summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJames Dong <jdong@google.com>2011-05-31 18:53:46 -0700
committerJames Dong <jdong@google.com>2011-06-02 12:32:46 -0700
commit0c1bc742181ded4930842b46e9507372f0b1b963 (patch)
treec952bfcb03ff7cce5e0f91ad7d25c67a2fdd39cb
parent92a746c3b18d035189f596ce32847bf26247aaca (diff)
downloadframeworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.zip
frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.tar.gz
frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.tar.bz2
Initial-checkin for ON2 Software AVC/H264 decoder
o when neon is present, the performance gain of On2 AVC software decoder over PV software decoder is more than 30%. o In addition, it fixes some known PV software decoder issues like missing output frames o allow both pv and on2 software avc to be available for easy comparision o change output frames from 8 to 16 Change-Id: I567ad1842025ead7092f0c47e3513d6d9ca232dd
-rw-r--r--media/libstagefright/OMXCodec.cpp1
-rw-r--r--media/libstagefright/codecs/on2/h264dec/Android.mk127
-rw-r--r--media/libstagefright/codecs/on2/h264dec/SoftAVC.cpp515
-rw-r--r--media/libstagefright/codecs/on2/h264dec/SoftAVC.h105
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/inc/H264SwDecApi.h192
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/inc/basetype.h52
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT63
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT91
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h785
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h670
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h212
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h40
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h1445
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h27
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h43
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h1154
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h274
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h252
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h77
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl111
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt74
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c936
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c329
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c60
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c45
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h1153
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h72
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h4381
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h129
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s148
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s72
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s189
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h30
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s222
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c327
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s20
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s366
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s325
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s123
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s236
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s149
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s178
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s296
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s276
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s239
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s185
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s273
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s74
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s407
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s92
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c88
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c91
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c62
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c68
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s480
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s336
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s437
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s331
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s550
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c79
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s426
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s494
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s501
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s567
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s128
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s469
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h37
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h25
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s75
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s398
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c211
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c75
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s104
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c61
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c102
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c208
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s364
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s132
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s136
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s224
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s194
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s73
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s713
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s283
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s141
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s188
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c6
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_DELIVERY.TXT63
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_MANIFEST.TXT91
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h785
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h670
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h212
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h40
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h1451
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h27
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h43
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h1157
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h274
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h252
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h77
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/build_vc.pl113
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/filelist_vc.txt75
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM.c936
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_Bitstream.c329
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_IDCTTable.c60
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_MaskTable.c45
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVC.h1153
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVCCOMM_s.h72
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC.h4381
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC_s.h129
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy16x16_s.s95
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy8x8_s.s70
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s236
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/api/armVCM4P10_CAVLCTables.h30
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s222
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c327
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s198
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s396
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s325
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DequantTables_s.s123
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s236
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s149
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s178
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s313
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s266
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s228
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s134
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s318
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_QuantTables_s.s74
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s186
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s92
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c88
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c91
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c62
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c68
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s396
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s202
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s282
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s288
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s436
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c79
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s553
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s436
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s424
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s531
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s140
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s264
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S134
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S54
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S102
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S272
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S103
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S123
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S105
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S107
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S164
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S119
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S72
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S58
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S175
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S68
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S52
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S40
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S67
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S119
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S87
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S123
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S107
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S157
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S323
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S217
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S239
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S261
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S54
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S76
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h37
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h25
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Clip8_s.s82
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s398
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c211
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Lookup_Tables.c75
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_SetPredDir_s.s104
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c61
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c102
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c214
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s364
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s132
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s136
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s224
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s194
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s73
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s444
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s320
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s162
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s210
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/src/armVC_Version.c6
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_DELIVERY.TXT63
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_MANIFEST.TXT140
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM.h785
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Bitstream.h212
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Version.h43
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armOMX.h274
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/omxtypes.h252
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/reference/build_vc.pl111
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/filelist_vc.txt123
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM.c936
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM_Bitstream.c329
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/armVC.h1153
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/omxVC.h4381
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_Average.c78
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_SAD.c75
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_16x.c86
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_8x.c87
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c88
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c93
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy16x16.c79
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy8x8.c79
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ExpandFrame_I.c126
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_LimitMVToRect.c81
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_16x.c80
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_8x.c80
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/api/armVCM4P10_CAVLCTables.h34
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CAVLCTables.c703
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c133
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DeBlockPixel.c151
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c267
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DequantTables.c45
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c78
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c106
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c82
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c84
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c109
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c195
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c88
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_QuantTables.c31
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_SADQuar.c84
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c80
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c78
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c78
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_Average_4x.c84
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c191
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c196
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c199
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c107
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c109
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c86
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c117
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c145
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c130
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c131
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c125
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c128
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c192
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c99
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c124
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c123
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c99
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c102
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c128
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c124
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c70
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEInit.c92
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c1892
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c284
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c198
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c338
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c86
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c85
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c87
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SAD_4x.c77
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SATD_4x4.c132
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c220
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c131
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c148
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c97
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c119
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_DCT_Table.h30
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h42
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h25
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_ACDCPredict.c253
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c187
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c167
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c156
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CompareMV.c71
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DCT_Table.c112
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c144
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c145
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c153
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c84
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_GetVLCBits.c278
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c495
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_PutVLCBits.c200
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_SetPredDir.c89
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c58
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c111
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c109
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c114
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c110
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c87
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c115
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c225
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c243
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c120
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c103
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c170
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeMV.c212
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c112
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c97
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c160
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_FindMVpred.c188
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c92
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MCReconBlock.c357
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c70
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEInit.c84
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c630
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c121
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInter_I.c117
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c153
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c96
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c153
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c108
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c260
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/src/armVC_Version.c6
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/DecTestBench.c761
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/EvaluationTestBench.c350
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/H264SwDecApi.c567
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/TestBenchMultipleInstance.c531
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor.s298
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s339
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_ver.s288
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_half.s251
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_quarter.s273
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_ver_quarter.s536
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s163
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_half.s347
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_quarter.s374
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm11_asm/win_asm.bat15
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdClearMbLayer.s66
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdCountLeadingZeros.s49
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFillRow7.s180
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFlushBits.s82
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdWriteMacroblock.s152
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S41
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S68
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S48
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S143
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S78
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S157
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.c237
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.h54
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.c916
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.h58
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_cfg.h56
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.c626
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.h54
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_container.h69
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.c2417
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.h56
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.c961
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.h83
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.c1584
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.h149
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_image.c345
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_image.h68
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.c1027
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.h56
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.c1937
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.h81
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.c1446
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.h212
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.c117
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.h76
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.c382
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.h78
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.c347
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.h68
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.c335
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.h77
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c2315
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.h210
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.c1692
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.h252
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.c577
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.h86
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.c354
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.h59
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.c589
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.h60
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.c1511
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.h186
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.c888
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.h174
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.c242
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.h67
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.c402
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.h54
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_util.c286
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_util.h178
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.c391
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.h63
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.c490
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.h130
-rw-r--r--media/libstagefright/omx/SoftOMXPlugin.cpp1
403 files changed, 112275 insertions, 0 deletions
diff --git a/media/libstagefright/OMXCodec.cpp b/media/libstagefright/OMXCodec.cpp
index 6339710..ee5abd1 100644
--- a/media/libstagefright/OMXCodec.cpp
+++ b/media/libstagefright/OMXCodec.cpp
@@ -200,6 +200,7 @@ static const CodecInfo kDecoderInfo[] = {
{ MEDIA_MIMETYPE_VIDEO_AVC, "OMX.qcom.video.decoder.avc" },
{ MEDIA_MIMETYPE_VIDEO_AVC, "OMX.TI.Video.Decoder" },
{ MEDIA_MIMETYPE_VIDEO_AVC, "OMX.SEC.AVC.Decoder" },
+ { MEDIA_MIMETYPE_VIDEO_AVC, "OMX.google.h264.decoder" },
{ MEDIA_MIMETYPE_VIDEO_AVC, "OMX.google.avc.decoder" },
{ MEDIA_MIMETYPE_VIDEO_AVC, "AVCDecoder" },
{ MEDIA_MIMETYPE_AUDIO_VORBIS, "OMX.google.vorbis.decoder" },
diff --git a/media/libstagefright/codecs/on2/h264dec/Android.mk b/media/libstagefright/codecs/on2/h264dec/Android.mk
new file mode 100644
index 0000000..5b3c876
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/Android.mk
@@ -0,0 +1,127 @@
+LOCAL_PATH := $(call my-dir)
+
+include $(CLEAR_VARS)
+
+LOCAL_ARM_MODE := arm
+
+LOCAL_SRC_FILES := \
+ ./source/h264bsd_transform.c \
+ ./source/h264bsd_util.c \
+ ./source/h264bsd_byte_stream.c \
+ ./source/h264bsd_seq_param_set.c \
+ ./source/h264bsd_pic_param_set.c \
+ ./source/h264bsd_slice_header.c \
+ ./source/h264bsd_slice_data.c \
+ ./source/h264bsd_macroblock_layer.c \
+ ./source/h264bsd_stream.c \
+ ./source/h264bsd_vlc.c \
+ ./source/h264bsd_cavlc.c \
+ ./source/h264bsd_nal_unit.c \
+ ./source/h264bsd_neighbour.c \
+ ./source/h264bsd_storage.c \
+ ./source/h264bsd_slice_group_map.c \
+ ./source/h264bsd_intra_prediction.c \
+ ./source/h264bsd_inter_prediction.c \
+ ./source/h264bsd_reconstruct.c \
+ ./source/h264bsd_dpb.c \
+ ./source/h264bsd_image.c \
+ ./source/h264bsd_deblocking.c \
+ ./source/h264bsd_conceal.c \
+ ./source/h264bsd_vui.c \
+ ./source/h264bsd_pic_order_cnt.c \
+ ./source/h264bsd_decoder.c \
+ ./source/H264SwDecApi.c \
+ SoftAVC.cpp \
+
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/./inc \
+ frameworks/base/media/libstagefright/include \
+ frameworks/base/include/media/stagefright/openmax \
+
+MY_ASM := \
+ ./source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S \
+ ./source/arm_neon_asm_gcc/h264bsdClearMbLayer.S \
+ ./source/arm_neon_asm_gcc/h264bsdFillRow7.S \
+ ./source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S \
+ ./source/arm_neon_asm_gcc/h264bsdFlushBits.S
+
+
+MY_OMXDL_C_SRC := \
+ ./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c \
+ ./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c \
+ ./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c \
+ ./omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c \
+ ./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c \
+ ./omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c \
+ ./omxdl/arm_neon/src/armCOMM_Bitstream.c \
+ ./omxdl/arm_neon/src/armCOMM.c
+
+MY_OMXDL_ASM_SRC := \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S \
+ ./omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S \
+
+
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+ LOCAL_ARM_NEON := true
+# LOCAL_CFLAGS := -std=c99 -D._NEON -D._OMXDL
+ LOCAL_CFLAGS := -DH264DEC_NEON -DH264DEC_OMXDL
+ LOCAL_SRC_FILES += $(MY_ASM) $(MY_OMXDL_C_SRC) $(MY_OMXDL_ASM_SRC)
+ LOCAL_C_INCLUDES += $(LOCAL_PATH)/./source/arm_neon_asm_gcc
+ LOCAL_C_INCLUDES += $(LOCAL_PATH)/./omxdl/arm_neon/api \
+ $(LOCAL_PATH)/./omxdl/arm_neon/vc/api \
+ $(LOCAL_PATH)/./omxdl/arm_neon/vc/m4p10/api
+endif
+
+LOCAL_SHARED_LIBRARIES := \
+ libstagefright libstagefright_omx libstagefright_foundation libutils \
+
+LOCAL_MODULE := libstagefright_soft_h264dec
+
+LOCAL_MODULE_TAGS := optional
+
+include $(BUILD_SHARED_LIBRARY)
+
+#####################################################################
+# test utility: decoder
+#####################################################################
+##
+## Test application
+##
+include $(CLEAR_VARS)
+
+LOCAL_SRC_FILES := ./source/DecTestBench.c
+
+LOCAL_C_INCLUDES := $(LOCAL_PATH)/inc
+
+LOCAL_SHARED_LIBRARIES := libstagefright_soft_h264dec
+
+LOCAL_MODULE_TAGS := debug
+
+LOCAL_MODULE := decoder
+
+include $(BUILD_EXECUTABLE)
+
diff --git a/media/libstagefright/codecs/on2/h264dec/SoftAVC.cpp b/media/libstagefright/codecs/on2/h264dec/SoftAVC.cpp
new file mode 100644
index 0000000..259fbc9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/SoftAVC.cpp
@@ -0,0 +1,515 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+//#define LOG_NDEBUG 0
+#define LOG_TAG "SoftAVC"
+#include <utils/Log.h>
+
+#include "SoftAVC.h"
+
+#include <media/stagefright/foundation/ADebug.h>
+#include <media/stagefright/MediaDefs.h>
+#include <media/stagefright/MediaErrors.h>
+
+
+namespace android {
+
+template<class T>
+static void InitOMXParams(T *params) {
+ params->nSize = sizeof(T);
+ params->nVersion.s.nVersionMajor = 1;
+ params->nVersion.s.nVersionMinor = 0;
+ params->nVersion.s.nRevision = 0;
+ params->nVersion.s.nStep = 0;
+}
+
+SoftAVC::SoftAVC(
+ const char *name,
+ const OMX_CALLBACKTYPE *callbacks,
+ OMX_PTR appData,
+ OMX_COMPONENTTYPE **component)
+ : SimpleSoftOMXComponent(name, callbacks, appData, component),
+ mHandle(NULL),
+ mInputBufferCount(0),
+ mWidth(320),
+ mHeight(240),
+ mPictureSize(mWidth * mHeight * 3 / 2),
+ mCropLeft(0),
+ mCropTop(0),
+ mFirstPicture(NULL),
+ mFirstPictureId(-1),
+ mPicId(0),
+ mHeadersDecoded(false),
+ mEOSStatus(INPUT_DATA_AVAILABLE),
+ mOutputPortSettingsChange(NONE) {
+ initPorts();
+ CHECK_EQ(initDecoder(), (status_t)OK);
+}
+
+SoftAVC::~SoftAVC() {
+ H264SwDecRelease(mHandle);
+ mHandle = NULL;
+
+ while (mPicToHeaderMap.size() != 0) {
+ OMX_BUFFERHEADERTYPE *header = mPicToHeaderMap.editValueAt(0);
+ mPicToHeaderMap.removeItemsAt(0);
+ delete header;
+ header = NULL;
+ }
+ List<BufferInfo *> &outQueue = getPortQueue(kOutputPortIndex);
+ List<BufferInfo *> &inQueue = getPortQueue(kInputPortIndex);
+ CHECK(outQueue.empty());
+ CHECK(inQueue.empty());
+
+ delete[] mFirstPicture;
+}
+
+void SoftAVC::initPorts() {
+ OMX_PARAM_PORTDEFINITIONTYPE def;
+ InitOMXParams(&def);
+
+ def.nPortIndex = kInputPortIndex;
+ def.eDir = OMX_DirInput;
+ def.nBufferCountMin = kNumInputBuffers;
+ def.nBufferCountActual = def.nBufferCountMin;
+ def.nBufferSize = 8192;
+ def.bEnabled = OMX_TRUE;
+ def.bPopulated = OMX_FALSE;
+ def.eDomain = OMX_PortDomainVideo;
+ def.bBuffersContiguous = OMX_FALSE;
+ def.nBufferAlignment = 1;
+
+ def.format.video.cMIMEType = const_cast<char *>(MEDIA_MIMETYPE_VIDEO_AVC);
+ def.format.video.pNativeRender = NULL;
+ def.format.video.nFrameWidth = mWidth;
+ def.format.video.nFrameHeight = mHeight;
+ def.format.video.nStride = def.format.video.nFrameWidth;
+ def.format.video.nSliceHeight = def.format.video.nFrameHeight;
+ def.format.video.nBitrate = 0;
+ def.format.video.xFramerate = 0;
+ def.format.video.bFlagErrorConcealment = OMX_FALSE;
+ def.format.video.eCompressionFormat = OMX_VIDEO_CodingAVC;
+ def.format.video.eColorFormat = OMX_COLOR_FormatUnused;
+ def.format.video.pNativeWindow = NULL;
+
+ addPort(def);
+
+ def.nPortIndex = kOutputPortIndex;
+ def.eDir = OMX_DirOutput;
+ def.nBufferCountMin = kNumOutputBuffers;
+ def.nBufferCountActual = def.nBufferCountMin;
+ def.bEnabled = OMX_TRUE;
+ def.bPopulated = OMX_FALSE;
+ def.eDomain = OMX_PortDomainVideo;
+ def.bBuffersContiguous = OMX_FALSE;
+ def.nBufferAlignment = 2;
+
+ def.format.video.cMIMEType = const_cast<char *>(MEDIA_MIMETYPE_VIDEO_RAW);
+ def.format.video.pNativeRender = NULL;
+ def.format.video.nFrameWidth = mWidth;
+ def.format.video.nFrameHeight = mHeight;
+ def.format.video.nStride = def.format.video.nFrameWidth;
+ def.format.video.nSliceHeight = def.format.video.nFrameHeight;
+ def.format.video.nBitrate = 0;
+ def.format.video.xFramerate = 0;
+ def.format.video.bFlagErrorConcealment = OMX_FALSE;
+ def.format.video.eCompressionFormat = OMX_VIDEO_CodingUnused;
+ def.format.video.eColorFormat = OMX_COLOR_FormatYUV420Planar;
+ def.format.video.pNativeWindow = NULL;
+
+ def.nBufferSize =
+ (def.format.video.nFrameWidth * def.format.video.nFrameHeight * 3) / 2;
+
+ addPort(def);
+}
+
+status_t SoftAVC::initDecoder() {
+ if (H264SwDecInit(&mHandle, 1) == H264SWDEC_OK) {
+ return OK;
+ }
+ return UNKNOWN_ERROR;
+}
+
+OMX_ERRORTYPE SoftAVC::internalGetParameter(
+ OMX_INDEXTYPE index, OMX_PTR params) {
+ switch (index) {
+ case OMX_IndexParamVideoPortFormat:
+ {
+ OMX_VIDEO_PARAM_PORTFORMATTYPE *formatParams =
+ (OMX_VIDEO_PARAM_PORTFORMATTYPE *)params;
+
+ if (formatParams->nPortIndex > kOutputPortIndex) {
+ return OMX_ErrorUndefined;
+ }
+
+ if (formatParams->nIndex != 0) {
+ return OMX_ErrorNoMore;
+ }
+
+ if (formatParams->nPortIndex == kInputPortIndex) {
+ formatParams->eCompressionFormat = OMX_VIDEO_CodingAVC;
+ formatParams->eColorFormat = OMX_COLOR_FormatUnused;
+ formatParams->xFramerate = 0;
+ } else {
+ CHECK(formatParams->nPortIndex == kOutputPortIndex);
+
+ formatParams->eCompressionFormat = OMX_VIDEO_CodingUnused;
+ formatParams->eColorFormat = OMX_COLOR_FormatYUV420Planar;
+ formatParams->xFramerate = 0;
+ }
+
+ return OMX_ErrorNone;
+ }
+
+ default:
+ return SimpleSoftOMXComponent::internalGetParameter(index, params);
+ }
+}
+
+OMX_ERRORTYPE SoftAVC::internalSetParameter(
+ OMX_INDEXTYPE index, const OMX_PTR params) {
+ switch (index) {
+ case OMX_IndexParamStandardComponentRole:
+ {
+ const OMX_PARAM_COMPONENTROLETYPE *roleParams =
+ (const OMX_PARAM_COMPONENTROLETYPE *)params;
+
+ if (strncmp((const char *)roleParams->cRole,
+ "video_decoder.avc",
+ OMX_MAX_STRINGNAME_SIZE - 1)) {
+ return OMX_ErrorUndefined;
+ }
+
+ return OMX_ErrorNone;
+ }
+
+ case OMX_IndexParamVideoPortFormat:
+ {
+ OMX_VIDEO_PARAM_PORTFORMATTYPE *formatParams =
+ (OMX_VIDEO_PARAM_PORTFORMATTYPE *)params;
+
+ if (formatParams->nPortIndex > kOutputPortIndex) {
+ return OMX_ErrorUndefined;
+ }
+
+ if (formatParams->nIndex != 0) {
+ return OMX_ErrorNoMore;
+ }
+
+ return OMX_ErrorNone;
+ }
+
+ default:
+ return SimpleSoftOMXComponent::internalSetParameter(index, params);
+ }
+}
+
+OMX_ERRORTYPE SoftAVC::getConfig(
+ OMX_INDEXTYPE index, OMX_PTR params) {
+ switch (index) {
+ case OMX_IndexConfigCommonOutputCrop:
+ {
+ OMX_CONFIG_RECTTYPE *rectParams = (OMX_CONFIG_RECTTYPE *)params;
+
+ if (rectParams->nPortIndex != 1) {
+ return OMX_ErrorUndefined;
+ }
+
+ rectParams->nLeft = mCropLeft;
+ rectParams->nTop = mCropTop;
+ rectParams->nWidth = mWidth;
+ rectParams->nHeight = mHeight;
+
+ return OMX_ErrorNone;
+ }
+
+ default:
+ return OMX_ErrorUnsupportedIndex;
+ }
+}
+
+void SoftAVC::onQueueFilled(OMX_U32 portIndex) {
+ if (mOutputPortSettingsChange != NONE) {
+ return;
+ }
+
+ if (mEOSStatus == OUTPUT_FRAMES_FLUSHED) {
+ return;
+ }
+
+ List<BufferInfo *> &inQueue = getPortQueue(kInputPortIndex);
+ List<BufferInfo *> &outQueue = getPortQueue(kOutputPortIndex);
+ H264SwDecRet ret = H264SWDEC_PIC_RDY;
+ status_t err = OK;
+ bool portSettingsChanged = false;
+ while ((mEOSStatus != INPUT_DATA_AVAILABLE || !inQueue.empty())
+ && outQueue.size() == kNumOutputBuffers) {
+
+ if (mEOSStatus == INPUT_EOS_SEEN) {
+ drainAllOutputBuffers();
+ return;
+ }
+
+ BufferInfo *inInfo = *inQueue.begin();
+ OMX_BUFFERHEADERTYPE *inHeader = inInfo->mHeader;
+ ++mPicId;
+ if (inHeader->nFlags & OMX_BUFFERFLAG_EOS) {
+ inQueue.erase(inQueue.begin());
+ inInfo->mOwnedByUs = false;
+ notifyEmptyBufferDone(inHeader);
+ mEOSStatus = INPUT_EOS_SEEN;
+ continue;
+ }
+
+ OMX_BUFFERHEADERTYPE *header = new OMX_BUFFERHEADERTYPE;
+ memset(header, 0, sizeof(OMX_BUFFERHEADERTYPE));
+ header->nTimeStamp = inHeader->nTimeStamp;
+ header->nFlags = inHeader->nFlags;
+ mPicToHeaderMap.add(mPicId, header);
+ inQueue.erase(inQueue.begin());
+
+ H264SwDecInput inPicture;
+ H264SwDecOutput outPicture;
+ memset(&inPicture, 0, sizeof(inPicture));
+ inPicture.dataLen = inHeader->nFilledLen;
+ inPicture.pStream = inHeader->pBuffer + inHeader->nOffset;
+ inPicture.picId = mPicId;
+ inPicture.intraConcealmentMethod = 1;
+ H264SwDecPicture decodedPicture;
+
+ while (inPicture.dataLen > 0) {
+ ret = H264SwDecDecode(mHandle, &inPicture, &outPicture);
+ if (ret == H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY ||
+ ret == H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY) {
+ inPicture.dataLen -= (u32)(outPicture.pStrmCurrPos - inPicture.pStream);
+ inPicture.pStream = outPicture.pStrmCurrPos;
+ if (ret == H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY) {
+ mHeadersDecoded = true;
+ H264SwDecInfo decoderInfo;
+ CHECK(H264SwDecGetInfo(mHandle, &decoderInfo) == H264SWDEC_OK);
+
+ if (handlePortSettingChangeEvent(&decoderInfo)) {
+ portSettingsChanged = true;
+ }
+
+ if (decoderInfo.croppingFlag &&
+ handleCropRectEvent(&decoderInfo.cropParams)) {
+ portSettingsChanged = true;
+ }
+ }
+ } else {
+ if (portSettingsChanged) {
+ if (H264SwDecNextPicture(mHandle, &decodedPicture, 0)
+ == H264SWDEC_PIC_RDY) {
+
+ // Save this output buffer; otherwise, it will be
+ // lost during dynamic port reconfiguration because
+ // OpenMAX client will delete _all_ output buffers
+ // in the process.
+ saveFirstOutputBuffer(
+ decodedPicture.picId,
+ (uint8_t *)decodedPicture.pOutputPicture);
+ }
+ }
+ inPicture.dataLen = 0;
+ if (ret < 0) {
+ LOGE("Decoder failed: %d", ret);
+ err = ERROR_MALFORMED;
+ }
+ }
+ }
+ inInfo->mOwnedByUs = false;
+ notifyEmptyBufferDone(inHeader);
+
+ if (portSettingsChanged) {
+ portSettingsChanged = false;
+ return;
+ }
+
+ if (mFirstPicture && !outQueue.empty()) {
+ drainOneOutputBuffer(mFirstPictureId, mFirstPicture);
+ delete[] mFirstPicture;
+ mFirstPicture = NULL;
+ mFirstPictureId = -1;
+ }
+
+ while (!outQueue.empty() &&
+ mHeadersDecoded &&
+ H264SwDecNextPicture(mHandle, &decodedPicture, 0)
+ == H264SWDEC_PIC_RDY) {
+
+ int32_t picId = decodedPicture.picId;
+ uint8_t *data = (uint8_t *) decodedPicture.pOutputPicture;
+ drainOneOutputBuffer(picId, data);
+ }
+
+ if (err != OK) {
+ notify(OMX_EventError, OMX_ErrorUndefined, err, NULL);
+ }
+ }
+}
+
+bool SoftAVC::handlePortSettingChangeEvent(const H264SwDecInfo *info) {
+ if (mWidth != info->picWidth || mHeight != info->picHeight) {
+ mWidth = info->picWidth;
+ mHeight = info->picHeight;
+ mPictureSize = mWidth * mHeight * 3 / 2;
+ updatePortDefinitions();
+ notify(OMX_EventPortSettingsChanged, 1, 0, NULL);
+ mOutputPortSettingsChange = AWAITING_DISABLED;
+ return true;
+ }
+
+ return false;
+}
+
+bool SoftAVC::handleCropRectEvent(const CropParams *crop) {
+ if (mCropLeft != crop->cropLeftOffset ||
+ mCropTop != crop->cropTopOffset ||
+ mWidth != crop->cropOutWidth ||
+ mHeight != crop->cropOutHeight) {
+
+ mCropLeft = crop->cropLeftOffset;
+ mCropTop = crop->cropTopOffset;
+ mWidth = crop->cropOutWidth;
+ mHeight = crop->cropOutHeight;
+ mPictureSize = mWidth * mHeight * 3 / 2;
+
+ notify(OMX_EventPortSettingsChanged, 1,
+ OMX_IndexConfigCommonOutputCrop, NULL);
+
+ return true;
+ }
+ return false;
+}
+
+void SoftAVC::saveFirstOutputBuffer(int32_t picId, uint8_t *data) {
+ CHECK(mFirstPicture == NULL);
+ mFirstPictureId = picId;
+
+ mFirstPicture = new uint8_t[mPictureSize];
+ memcpy(mFirstPicture, data, mPictureSize);
+}
+
+void SoftAVC::drainOneOutputBuffer(int32_t picId, uint8_t* data) {
+ List<BufferInfo *> &outQueue = getPortQueue(kOutputPortIndex);
+ BufferInfo *outInfo = *outQueue.begin();
+ outQueue.erase(outQueue.begin());
+ OMX_BUFFERHEADERTYPE *outHeader = outInfo->mHeader;
+ OMX_BUFFERHEADERTYPE *header = mPicToHeaderMap.valueFor(picId);
+ outHeader->nTimeStamp = header->nTimeStamp;
+ outHeader->nFlags = header->nFlags;
+ outHeader->nFilledLen = mPictureSize;
+ memcpy(outHeader->pBuffer + outHeader->nOffset,
+ data, mPictureSize);
+ mPicToHeaderMap.removeItem(picId);
+ delete header;
+ outInfo->mOwnedByUs = false;
+ notifyFillBufferDone(outHeader);
+}
+
+bool SoftAVC::drainAllOutputBuffers() {
+ List<BufferInfo *> &outQueue = getPortQueue(kOutputPortIndex);
+ H264SwDecPicture decodedPicture;
+
+ while (!outQueue.empty()) {
+ BufferInfo *outInfo = *outQueue.begin();
+ outQueue.erase(outQueue.begin());
+ OMX_BUFFERHEADERTYPE *outHeader = outInfo->mHeader;
+ if (mHeadersDecoded &&
+ H264SWDEC_PIC_RDY ==
+ H264SwDecNextPicture(mHandle, &decodedPicture, 1 /* flush */)) {
+
+ int32_t picId = decodedPicture.picId;
+ CHECK(mPicToHeaderMap.indexOfKey(picId) >= 0);
+
+ memcpy(outHeader->pBuffer + outHeader->nOffset,
+ decodedPicture.pOutputPicture,
+ mPictureSize);
+
+ OMX_BUFFERHEADERTYPE *header = mPicToHeaderMap.valueFor(picId);
+ outHeader->nTimeStamp = header->nTimeStamp;
+ outHeader->nFlags = header->nFlags;
+ outHeader->nFilledLen = mPictureSize;
+ mPicToHeaderMap.removeItem(picId);
+ delete header;
+ } else {
+ outHeader->nTimeStamp = 0;
+ outHeader->nFilledLen = 0;
+ outHeader->nFlags = OMX_BUFFERFLAG_EOS;
+ mEOSStatus = OUTPUT_FRAMES_FLUSHED;
+ }
+
+ outInfo->mOwnedByUs = false;
+ notifyFillBufferDone(outHeader);
+ }
+
+ return true;
+}
+
+void SoftAVC::onPortFlushCompleted(OMX_U32 portIndex) {
+ if (portIndex == kInputPortIndex) {
+ mEOSStatus = INPUT_DATA_AVAILABLE;
+ }
+}
+
+void SoftAVC::onPortEnableCompleted(OMX_U32 portIndex, bool enabled) {
+ switch (mOutputPortSettingsChange) {
+ case NONE:
+ break;
+
+ case AWAITING_DISABLED:
+ {
+ CHECK(!enabled);
+ mOutputPortSettingsChange = AWAITING_ENABLED;
+ break;
+ }
+
+ default:
+ {
+ CHECK_EQ((int)mOutputPortSettingsChange, (int)AWAITING_ENABLED);
+ CHECK(enabled);
+ mOutputPortSettingsChange = NONE;
+ break;
+ }
+ }
+}
+
+void SoftAVC::updatePortDefinitions() {
+ OMX_PARAM_PORTDEFINITIONTYPE *def = &editPortInfo(0)->mDef;
+ def->format.video.nFrameWidth = mWidth;
+ def->format.video.nFrameHeight = mHeight;
+ def->format.video.nStride = def->format.video.nFrameWidth;
+ def->format.video.nSliceHeight = def->format.video.nFrameHeight;
+
+ def = &editPortInfo(1)->mDef;
+ def->format.video.nFrameWidth = mWidth;
+ def->format.video.nFrameHeight = mHeight;
+ def->format.video.nStride = def->format.video.nFrameWidth;
+ def->format.video.nSliceHeight = def->format.video.nFrameHeight;
+
+ def->nBufferSize =
+ (def->format.video.nFrameWidth
+ * def->format.video.nFrameHeight * 3) / 2;
+}
+
+} // namespace android
+
+android::SoftOMXComponent *createSoftOMXComponent(
+ const char *name, const OMX_CALLBACKTYPE *callbacks,
+ OMX_PTR appData, OMX_COMPONENTTYPE **component) {
+ return new android::SoftAVC(name, callbacks, appData, component);
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/SoftAVC.h b/media/libstagefright/codecs/on2/h264dec/SoftAVC.h
new file mode 100644
index 0000000..a7340c0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/SoftAVC.h
@@ -0,0 +1,105 @@
+/*
+ * Copyright (C) 2011 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef SOFT_AVC_H_
+
+#define SOFT_AVC_H_
+
+#include "SimpleSoftOMXComponent.h"
+#include <utils/KeyedVector.h>
+
+#include "H264SwDecApi.h"
+#include "basetype.h"
+
+namespace android {
+
+struct SoftAVC : public SimpleSoftOMXComponent {
+ SoftAVC(const char *name,
+ const OMX_CALLBACKTYPE *callbacks,
+ OMX_PTR appData,
+ OMX_COMPONENTTYPE **component);
+
+protected:
+ virtual ~SoftAVC();
+
+ virtual OMX_ERRORTYPE internalGetParameter(
+ OMX_INDEXTYPE index, OMX_PTR params);
+
+ virtual OMX_ERRORTYPE internalSetParameter(
+ OMX_INDEXTYPE index, const OMX_PTR params);
+
+ virtual OMX_ERRORTYPE getConfig(OMX_INDEXTYPE index, OMX_PTR params);
+
+ virtual void onQueueFilled(OMX_U32 portIndex);
+ virtual void onPortFlushCompleted(OMX_U32 portIndex);
+ virtual void onPortEnableCompleted(OMX_U32 portIndex, bool enabled);
+
+private:
+ enum {
+ kInputPortIndex = 0,
+ kOutputPortIndex = 1,
+ kNumInputBuffers = 8,
+ kNumOutputBuffers = 16,
+ };
+
+ enum EOSStatus {
+ INPUT_DATA_AVAILABLE,
+ INPUT_EOS_SEEN,
+ OUTPUT_FRAMES_FLUSHED,
+ };
+
+ void *mHandle;
+
+ size_t mInputBufferCount;
+
+ uint32_t mWidth, mHeight, mPictureSize;
+ uint32_t mCropLeft, mCropTop;
+
+ uint8_t *mFirstPicture;
+ int32_t mFirstPictureId;
+
+ int32_t mPicId; // Which output picture is for which input buffer?
+
+ // OMX_BUFFERHEADERTYPE may be overkill, but it is convenient
+ // for tracking the following fields: nFlags, nTimeStamp, etc.
+ KeyedVector<int32_t, OMX_BUFFERHEADERTYPE *> mPicToHeaderMap;
+ bool mHeadersDecoded;
+
+ EOSStatus mEOSStatus;
+
+ enum OutputPortSettingChange {
+ NONE,
+ AWAITING_DISABLED,
+ AWAITING_ENABLED
+ };
+ OutputPortSettingChange mOutputPortSettingsChange;
+
+ void initPorts();
+ status_t initDecoder();
+ void updatePortDefinitions();
+ bool drainAllOutputBuffers();
+ void drainOneOutputBuffer(int32_t picId, uint8_t *data);
+ void saveFirstOutputBuffer(int32_t pidId, uint8_t *data);
+ bool handleCropRectEvent(const CropParams* crop);
+ bool handlePortSettingChangeEvent(const H264SwDecInfo *info);
+
+ DISALLOW_EVIL_CONSTRUCTORS(SoftAVC);
+};
+
+} // namespace android
+
+#endif // SOFT_AVC_H_
+
diff --git a/media/libstagefright/codecs/on2/h264dec/inc/H264SwDecApi.h b/media/libstagefright/codecs/on2/h264dec/inc/H264SwDecApi.h
new file mode 100755
index 0000000..fe112bc
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/inc/H264SwDecApi.h
@@ -0,0 +1,192 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include Headers
+
+ 2. Enumerations used as a return value or a parameter.
+ 2.1. API's return value enumerations.
+
+ 3. User Structures
+ 3.1. Structures for H264SwDecDecode() parameters.
+ 3.2. Structures for information interchange with
+ DEC API and user application.
+
+ 4. Prototypes of Decoder API functions
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDECAPI_H
+#define H264SWDECAPI_H
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+/*------------------------------------------------------------------------------
+ 1. Include Headers
+------------------------------------------------------------------------------*/
+
+ #include "basetype.h"
+
+/*------------------------------------------------------------------------------
+ 2.1. API's return value enumerations.
+------------------------------------------------------------------------------*/
+
+ typedef enum
+ {
+ H264SWDEC_OK = 0,
+ H264SWDEC_STRM_PROCESSED = 1,
+ H264SWDEC_PIC_RDY,
+ H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY,
+ H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY,
+ H264SWDEC_PARAM_ERR = -1,
+ H264SWDEC_STRM_ERR = -2,
+ H264SWDEC_NOT_INITIALIZED = -3,
+ H264SWDEC_MEMFAIL = -4,
+ H264SWDEC_INITFAIL = -5,
+ H264SWDEC_HDRS_NOT_RDY = -6,
+ H264SWDEC_EVALUATION_LIMIT_EXCEEDED = -7
+ } H264SwDecRet;
+
+/*------------------------------------------------------------------------------
+ 3.1. Structures for H264SwDecDecode() parameters.
+------------------------------------------------------------------------------*/
+
+ /* typedef of the Decoder instance */
+ typedef void *H264SwDecInst;
+
+ /* Input structure */
+ typedef struct
+ {
+ u8 *pStream; /* Pointer to stream to be decoded */
+ u32 dataLen; /* Number of bytes to be decoded */
+ u32 picId; /* Identifier for the picture to be decoded */
+ u32 intraConcealmentMethod; /* 0 = Gray concealment for intra
+ 1 = Reference concealment for intra */
+
+ } H264SwDecInput;
+
+
+ /* Output structure */
+ typedef struct
+ {
+ u8 *pStrmCurrPos; /* Pointer to stream position where decoder
+ ended up */
+ } H264SwDecOutput;
+
+ /* Output structure for H264SwDecNextPicture */
+ typedef struct
+ {
+ u32 *pOutputPicture; /* Pointer to the picture, YUV format */
+ u32 picId; /* Identifier of the picture to be displayed*/
+ u32 isIdrPicture; /* Flag to indicate if the picture is an
+ IDR picture */
+ u32 nbrOfErrMBs; /* Number of concealed MB's in the picture */
+ } H264SwDecPicture;
+
+/*------------------------------------------------------------------------------
+ 3.2. Structures for information interchange with DEC API
+ and user application.
+------------------------------------------------------------------------------*/
+
+ typedef struct
+ {
+ u32 cropLeftOffset;
+ u32 cropOutWidth;
+ u32 cropTopOffset;
+ u32 cropOutHeight;
+ } CropParams;
+
+ typedef struct
+ {
+ u32 profile;
+ u32 picWidth;
+ u32 picHeight;
+ u32 videoRange;
+ u32 matrixCoefficients;
+ u32 parWidth;
+ u32 parHeight;
+ u32 croppingFlag;
+ CropParams cropParams;
+ } H264SwDecInfo;
+
+ /* Version information */
+ typedef struct
+ {
+ u32 major; /* Decoder API major version */
+ u32 minor; /* Dncoder API minor version */
+ } H264SwDecApiVersion;
+
+/*------------------------------------------------------------------------------
+ 4. Prototypes of Decoder API functions
+------------------------------------------------------------------------------*/
+
+ H264SwDecRet H264SwDecDecode(H264SwDecInst decInst,
+ H264SwDecInput *pInput,
+ H264SwDecOutput *pOutput);
+
+ H264SwDecRet H264SwDecInit(H264SwDecInst *decInst,
+ u32 noOutputReordering);
+
+ H264SwDecRet H264SwDecNextPicture(H264SwDecInst decInst,
+ H264SwDecPicture *pOutput,
+ u32 endOfStream);
+
+ H264SwDecRet H264SwDecGetInfo(H264SwDecInst decInst,
+ H264SwDecInfo *pDecInfo);
+
+ void H264SwDecRelease(H264SwDecInst decInst);
+
+ H264SwDecApiVersion H264SwDecGetAPIVersion(void);
+
+ /* function prototype for API trace */
+ void H264SwDecTrace(char *);
+
+ /* function prototype for memory allocation */
+ void* H264SwDecMalloc(u32 size);
+
+ /* function prototype for memory free */
+ void H264SwDecFree(void *ptr);
+
+ /* function prototype for memory copy */
+ void H264SwDecMemcpy(void *dest, void *src, u32 count);
+
+ /* function prototype for memset */
+ void H264SwDecMemset(void *ptr, i32 value, u32 count);
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* H264SWDECAPI_H */
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/inc/basetype.h b/media/libstagefright/codecs/on2/h264dec/inc/basetype.h
new file mode 100755
index 0000000..63d5653
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/inc/basetype.h
@@ -0,0 +1,52 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+
+#ifndef BASETYPE_H_INCLUDED
+#define BASETYPE_H_INCLUDED
+
+
+#ifdef __arm
+#define VOLATILE volatile
+#else
+#define VOLATILE
+#endif
+
+typedef unsigned char u8;
+typedef signed char i8;
+typedef unsigned short u16;
+typedef signed short i16;
+typedef unsigned int u32;
+typedef signed int i32;
+
+#if defined(VC1SWDEC_16BIT) || defined(MP4ENC_ARM11)
+typedef unsigned short u16x;
+typedef signed short i16x;
+#else
+typedef unsigned int u16x;
+typedef signed int i16x;
+#endif
+
+
+#ifndef NULL
+#ifdef __cplusplus
+#define NULL 0
+#else
+#define NULL ((void *)0)
+#endif
+#endif
+
+#endif /* BASETYPE_H_INCLUDED */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT
new file mode 100644
index 0000000..5ce70ca
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT
@@ -0,0 +1,63 @@
+The contents of this transaction was created by Hedley Francis
+of ARM on 19-Feb-2008.
+
+It contains the ARM data versions listed below.
+
+This data, unless otherwise stated, is ARM Proprietary and access to it
+is subject to the agreements indicated below.
+
+If you experience problems with this data, please contact ARM support
+quoting transaction reference <97413>.
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+- OX001-SW-98010-r0p0-00bet1
+ Video codecs - optimised code
+ V6 optimized code release for Hantro (Ver 1.0.2)
+ internal access
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+This transaction contains deliverables which are designated as being of
+beta release status (BET).
+
+Beta release status has a particular meaning to ARM of which the recipient
+must be aware. Beta is a pre-release status indicating that the deliverable
+so described is believed to robustly demonstrate specified behaviour, to be
+consistent across its included aspects and be ready for general deployment.
+But Beta also indicates that pre-release reliability trials are ongoing and
+that it is possible residual defects or errors in operation, consistency
+and documentation may still be encountered. The recipient should consider
+this position when using this Beta material supplied. ARM will normally
+attempt to provide fixes or a work-around for defects identified by the
+recipient, but the provision or timeliness of this support cannot be
+guaranteed. ARM shall not be responsible for direct or consequential
+damages as a result of encountering one or more of these residual defects.
+By accepting a Beta release, the recipient agrees to these constraints and
+to providing reasonable information to ARM to enable the replication of the
+defects identified by the recipient. The specific Beta version supplied
+will not be supported after release of a later or higher status version.
+It should be noted that Support for the Beta release of the deliverable
+will only be provided by ARM to a recipient who has a current support and
+maintenance contract for the deliverable.
+
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+In addition to the data versions listed above, this transaction contains
+two additional files at the top level.
+
+The first is this file, ARM_DELIVERY_97413.TXT, which is the delivery
+note.
+
+The second is ARM_MANIFEST_97413.TXT which contains a manifest of all the
+files included in this transaction, together with their checksums.
+
+The checksums provided are calculated using the RSA Data Security, Inc.
+MD5 Message-Digest Algorithm.
+
+The checksums can be used to verify the integrity of this data using the
+"md5sum" tool (which is part of the GNU "textutils" package) by running:
+
+ % md5sum --check ARM_MANIFEST_97413.TXT
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT
new file mode 100644
index 0000000..9b2238b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT
@@ -0,0 +1,91 @@
+ OX001-SW-98010-r0p0-00bet1/
+ OX001-SW-98010-r0p0-00bet1/api/
+e049791cfab6060a08cbac7b3ad767d6 OX001-SW-98010-r0p0-00bet1/api/armCOMM_s.h
+ed798face25497b2703ede736d6d52b6 OX001-SW-98010-r0p0-00bet1/api/omxtypes_s.h
+4eebd63af087376811d6749f0646b864 OX001-SW-98010-r0p0-00bet1/api/armCOMM_BitDec_s.h
+43cf46c2cf2fe1f93c615b57bcbe4809 OX001-SW-98010-r0p0-00bet1/api/armCOMM.h
+8f248ceaac8f602e277a521b679dcbbe OX001-SW-98010-r0p0-00bet1/api/armCOMM_IDCTTable.h
+53f2ae8a98495f05e26a4cf862a7f750 OX001-SW-98010-r0p0-00bet1/api/armCOMM_Version.h
+3a2f420ddf6a1b950470bd0f5ebd5c62 OX001-SW-98010-r0p0-00bet1/api/armCOMM_IDCT_s.h
+511c0bb534fe223599e2c84eff24c9ed OX001-SW-98010-r0p0-00bet1/api/armCOMM_MaskTable.h
+8971932d56eed6b1ad1ba507f0bff5f0 OX001-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h
+f87fedd9ca432fefa757008176864ef8 OX001-SW-98010-r0p0-00bet1/api/armOMX.h
+8e49899a428822c36ef9dd94e0e05f18 OX001-SW-98010-r0p0-00bet1/api/omxtypes.h
+694281d11af52f88e6f9d4cb226ac8a7 OX001-SW-98010-r0p0-00bet1/build_vc.pl
+e72d96c0a415459748df9807f3dae72f OX001-SW-98010-r0p0-00bet1/filelist_vc.txt
+ OX001-SW-98010-r0p0-00bet1/src/
+5eeae659a29477f5c52296d24afffd3c OX001-SW-98010-r0p0-00bet1/src/armCOMM_IDCTTable.c
+d64cdcf38f7749dc7f77465e5b7d356d OX001-SW-98010-r0p0-00bet1/src/armCOMM_MaskTable.c
+ OX001-SW-98010-r0p0-00bet1/vc/
+ OX001-SW-98010-r0p0-00bet1/vc/m4p10/
+ OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/
+e7e0c320978564a7c9b2c723749a98d6 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c
+4adcd0df081990bdfc4729041a2a9152 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+852e0404142965dc1f3aa7f00ee5127b OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
+7054151c5bfea6b5e74feee86b2d7b01 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+38944c5e0bba01e32ff349c2c87c71b2 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables_s.s
+32ff4b8be62e2f0f3e764b83c1e5e2fd OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+90b0e6a04e764902c0a0903640c10b32 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
+28a19ae4fe2258628080d6a89bb54b91 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
+98e196b9e1ffebaf91f62ea9d17fb97d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
+01ba60eff66ea49a4f833ce6279f8e2f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+f301d5a95e07354f593ea5747c01cb0a OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+44c9ef21e840a100301f7d7a4189957c OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+a33b03bbd3352d24ed744769e12bb87d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+00c20bfda67bb86096b615fc17c94b35 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+2ddcaf60a8ea1e6e6b77737f768bfb9d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables_s.s
+c3002aad5600f872b70a5d7fe3915846 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+a2900f2c47f1c61d20bd6c1eda33d6d4 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+e4fecd66bc47f07539bc308935e84a1f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
+78815c9df50ba53131bb22d2b829e3c3 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+1909ae312ac79a03a5fac1d1e8bc0291 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+3d2c48580655928065de7839866d9bc4 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+23aa2fdf155d4fa6ff745eab6e01f32b OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
+97f20a93c481d7f6173d919f41e415bd OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+becd512da202436286811b6aec061f47 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+dd24a99ae3cd842dcacaf31d47de88b3 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+c2d995f787b6f44ef10c751c12d1935f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+3628fbdf0cd217c287b6ccc94135d06e OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
+4a52b3e9e268b8a8f07829bf500d03af OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
+11249f8a98c5d4b84cb5575b0e37ca9c OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
+3599b1074330965c8ca285d164efccff OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
+3339e026c7de655d9400949eb5e51451 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+cc4a6f32db0b72a91d3f278f6855df69 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+ OX001-SW-98010-r0p0-00bet1/vc/m4p10/api/
+6e530ddaa7c2b57ffe88162c020cb662 OX001-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h
+ OX001-SW-98010-r0p0-00bet1/vc/m4p2/
+ OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/
+cdf412920c2037a725d0420002b6752e OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Clip8_s.s
+dba9824e959b21d401cac925e68a11a6 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+b559b71d5c94627f10e616fb72c0cefc OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
+4fba4c431a783a78a2eb6497a94ac967 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+1e4c3be8c5eddc00c9f05e83bcf315ef OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
+1b0b2990c2669dfb87cf6b810611c01b OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+1c9b87abf3283e957816b3937c680701 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
+4fe1afca659a9055fc1172e58f78a506 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+2ea067f0436f91ba1351edaf411cb4ea OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
+acb92be1dbcdb3ebe824cbe9e28d03bf OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
+a6b41f01b1df7dd656ebdba3084bfa2a OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
+293a48a648a3085456e6665bb7366fad OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
+ffe6b96c74d4881f4d3c8de8cc737797 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
+437dfa204508850d61d4b87091446e9f OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+ff5915d181bfd2cd2f0bd588bd2300dc OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+6775eb0c561dbab965c60f85b08c96fd OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
+a0d85f4f517c945a4c9317ac021f2d08 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+386020dee8b725c7fe2526f1fc211d7d OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+ OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/
+4624e7c838e10a249abcc3d3f4f40748 OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+65e1057d04e2cb844559dc9f6e09795a OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+ OX001-SW-98010-r0p0-00bet1/vc/src/
+e627b3346b0dc9aff14446005ce0fa43 OX001-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c
+ OX001-SW-98010-r0p0-00bet1/vc/api/
+7ca94b1c33ac0211e17d38baadd7d1dd OX001-SW-98010-r0p0-00bet1/vc/api/armVC.h
+12cf7596edbbf6048b626d15e8d0ed48 OX001-SW-98010-r0p0-00bet1/vc/api/omxVC.h
+11726e286a81257cb45f5547fb4d374c OX001-SW-98010-r0p0-00bet1/vc/api/omxVC_s.h
+a5b2af605c319cd2491319e430741377 OX001-SW-98010-r0p0-00bet1/vc/api/armVCCOMM_s.h
+ OX001-SW-98010-r0p0-00bet1/vc/comm/
+ OX001-SW-98010-r0p0-00bet1/vc/comm/src/
+50cca6954c447b012ab39ca7872e5e8f OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16_s.s
+d1c3bce77fc5774c899b447d13f02cd0 OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8_s.s
+fdac1d1bad3fd23c880beb39bc2e89aa OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
+6d9adc2be5bd0311591030d0c6df771c ARM_DELIVERY_97413.TXT
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h
new file mode 100644
index 0000000..2ed86a4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h
@@ -0,0 +1,785 @@
+/**
+ *
+ * File Name: armCOMM.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM.h
+ * Brief: Declares Common APIs/Data Types used across OpenMAX API's
+ *
+ */
+
+
+#ifndef _armCommon_H_
+#define _armCommon_H_
+
+#include "omxtypes.h"
+
+typedef struct
+{
+ OMX_F32 Re; /** Real part */
+ OMX_F32 Im; /** Imaginary part */
+
+} OMX_FC32; /** single precision floating point complex number */
+
+typedef struct
+{
+ OMX_F64 Re; /** Real part */
+ OMX_F64 Im; /** Imaginary part */
+
+} OMX_FC64; /** double precision floating point complex number */
+
+
+/* Used by both IP and IC domains for 8x8 JPEG blocks. */
+typedef OMX_S16 ARM_BLOCK8x8[64];
+
+
+#include "armOMX.h"
+
+#define armPI (OMX_F64)(3.1415926535897932384626433832795)
+
+/***********************************************************************/
+
+/* Compiler extensions */
+#ifdef ARM_DEBUG
+/* debug version */
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#define armError(str) {printf((str)); printf("\n"); exit(-1);}
+#define armWarn(str) {printf((str)); printf("\n");}
+#define armIgnore(a) ((void)a)
+#define armAssert(a) assert(a)
+#else
+/* release version */
+#define armError(str) ((void) (str))
+#define armWarn(str) ((void) (str))
+#define armIgnore(a) ((void) (a))
+#define armAssert(a) ((void) (a))
+#endif /* ARM_DEBUG */
+
+/* Arithmetic operations */
+
+#define armMin(a,b) ( (a) > (b) ? (b):(a) )
+#define armMax(a,b) ( (a) > (b) ? (a):(b) )
+#define armAbs(a) ( (a) < 0 ? -(a):(a) )
+
+/* Alignment operation */
+
+#define armAlignToBytes(Ptr,N) (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) ))
+#define armAlignTo2Bytes(Ptr) armAlignToBytes(Ptr,2)
+#define armAlignTo4Bytes(Ptr) armAlignToBytes(Ptr,4)
+#define armAlignTo8Bytes(Ptr) armAlignToBytes(Ptr,8)
+#define armAlignTo16Bytes(Ptr) armAlignToBytes(Ptr,16)
+
+/* Error and Alignment check */
+
+#define armRetArgErrIf(condition, code) if(condition) { return (code); }
+#define armRetDataErrIf(condition, code) if(condition) { return (code); }
+
+#ifndef ALIGNMENT_DOESNT_MATTER
+#define armIsByteAligned(Ptr,N) ((((int)(Ptr)) % N)==0)
+#define armNotByteAligned(Ptr,N) ((((int)(Ptr)) % N)!=0)
+#else
+#define armIsByteAligned(Ptr,N) (1)
+#define armNotByteAligned(Ptr,N) (0)
+#endif
+
+#define armIs2ByteAligned(Ptr) armIsByteAligned(Ptr,2)
+#define armIs4ByteAligned(Ptr) armIsByteAligned(Ptr,4)
+#define armIs8ByteAligned(Ptr) armIsByteAligned(Ptr,8)
+#define armIs16ByteAligned(Ptr) armIsByteAligned(Ptr,16)
+
+#define armNot2ByteAligned(Ptr) armNotByteAligned(Ptr,2)
+#define armNot4ByteAligned(Ptr) armNotByteAligned(Ptr,4)
+#define armNot8ByteAligned(Ptr) armNotByteAligned(Ptr,8)
+#define armNot16ByteAligned(Ptr) armNotByteAligned(Ptr,16)
+#define armNot32ByteAligned(Ptr) armNotByteAligned(Ptr,32)
+
+/**
+ * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value);
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16/OMX_U32 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value);
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value);
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] var Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT -- returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ */
+
+OMX_INT armSignCheck (OMX_S16 var);
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 -- returns clipped value
+ */
+
+OMX_S32 armClip (
+ OMX_INT min,
+ OMX_INT max,
+ OMX_S32 src
+ );
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 -- returns clipped value
+ */
+
+OMX_F32 armClip_F32 (
+ OMX_F32 min,
+ OMX_F32 max,
+ OMX_F32 src
+ );
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding
+ * integer value. Takes care of rounding while clipping the final
+ * value.
+ *
+ * Parameters:
+ * [in] v Number to be operated upon
+ * [in] shift Divides the input "v" by "2^shift"
+ * [in] satBits Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 -- returns "shifted" saturated value
+ */
+
+OMX_U32 armShiftSat_F32(
+ OMX_F32 v,
+ OMX_INT shift,
+ OMX_INT satBits
+ );
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * This function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize);
+
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry First entry
+ * [in] sEntry second entry
+ * [in] tEntry Third entry
+ *
+ * Return Value:
+ * OMX_S32 -- returns the median value
+ */
+
+OMX_S32 armMedianOf3 (
+ OMX_S32 fEntry,
+ OMX_S32 sEntry,
+ OMX_S32 tEntry
+ );
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value Positive value
+ *
+ * Return Value:
+ * OMX_U8 -- returns the size of the positive value
+ */
+
+OMX_U8 armLogSize (
+ OMX_U16 value
+ );
+
+/***********************************************************************/
+ /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S32 armSatAdd_S32(
+ OMX_S32 Value1,
+ OMX_S32 Value2
+ );
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S64 armSatAdd_S64(
+ OMX_S64 Value1,
+ OMX_S64 Value2
+ );
+
+/** Function :armSatSub_S32()
+ *
+ * Description :
+ * Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatSub_S32(
+ OMX_S32 Value1,
+ OMX_S32 Value2
+ );
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ * accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ * [in] Mac Accumulator
+ *
+ * Return:
+ * [out] Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(
+ OMX_S32 Mac,
+ OMX_S16 Value1,
+ OMX_S16 Value2
+ );
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ * mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem First 32 bit Operand
+ * [in] filTap Second 16 bit Operand
+ * [in] mac Result of MAC operation
+ *
+ * Return:
+ * [out] mac Result of operation
+ *
+ **/
+
+OMX_S32 armSatMac_S16S32_S32(
+ OMX_S32 mac,
+ OMX_S32 delayElem,
+ OMX_S16 filTap );
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ * Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ * output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) )
+ *
+ * Parametrs:
+ * [in] input The input to be operated on
+ * [in] scaleFactor The shift number
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(
+ OMX_S32 input,
+ OMX_INT scaleFactor);
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(
+ OMX_S32 Value,
+ OMX_INT shift
+ );
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S64 armSatRoundLeftShift_S64(
+ OMX_S64 Value,
+ OMX_INT shift
+ );
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ * Returns the result of a S16 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(
+ OMX_S16 input1,
+ OMX_S32 input2);
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ * Returns the result of a S32 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatMulS32S32_S32(
+ OMX_S32 input1,
+ OMX_S32 input2);
+
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer.
+ * Half-integer values are rounded away from zero
+ * unless otherwise specified. For example 3//2 is rounded
+ * to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num Operand 1
+ * [in] Deno Operand 2
+ *
+ * Return:
+ * [out] Result of operation input1//input2
+ *
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno);
+
+
+/***********************************************************************/
+/*
+ * Debugging macros
+ *
+ */
+
+
+/*
+ * Definition of output stream - change to stderr if necessary
+ */
+#define DEBUG_STREAM stdout
+
+/*
+ * Debug printf macros, one for each argument count.
+ * Add more if needed.
+ */
+#ifdef DEBUG_ON
+#include <stdio.h>
+
+#define DEBUG_PRINTF_0(a) fprintf(DEBUG_STREAM, a)
+#define DEBUG_PRINTF_1(a, b) fprintf(DEBUG_STREAM, a, b)
+#define DEBUG_PRINTF_2(a, b, c) fprintf(DEBUG_STREAM, a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d) fprintf(DEBUG_STREAM, a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e) fprintf(DEBUG_STREAM, a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f) fprintf(DEBUG_STREAM, a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#else /* DEBUG_ON */
+#define DEBUG_PRINTF_0(a)
+#define DEBUG_PRINTF_1(a, b)
+#define DEBUG_PRINTF_2(a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#endif /* DEBUG_ON */
+
+
+/*
+ * Domain and sub domain definitions
+ *
+ * In order to turn on debug for an entire domain or sub-domain
+ * at compile time, one of the DEBUG_DOMAIN_* below may be defined,
+ * which will activate debug in all of the defines it contains.
+ */
+
+#ifdef DEBUG_DOMAIN_AC
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT
+#define DEBUG_OMXACAAC_DECODEDATSTRELT
+#define DEBUG_OMXACAAC_DECODEFILLELT
+#define DEBUG_OMXACAAC_DECODEISSTEREO_S32
+#define DEBUG_OMXACAAC_DECODEMSPNS_S32
+#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I
+#define DEBUG_OMXACAAC_DECODEPRGCFGELT
+#define DEBUG_OMXACAAC_DECODETNS_S32_I
+#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32
+#define DEBUG_OMXACAAC_ENCODETNS_S32_I
+#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32
+#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32
+#define DEBUG_OMXACAAC_MDCTFWD_S32
+#define DEBUG_OMXACAAC_MDCTINV_S32_S16
+#define DEBUG_OMXACAAC_NOISELESSDECODE
+#define DEBUG_OMXACAAC_QUANTINV_S32_I
+#define DEBUG_OMXACAAC_UNPACKADIFHEADER
+#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER
+#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODE_S32
+#define DEBUG_OMXACMP3_MDCTINV_S32
+#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I
+#define DEBUG_OMXACMP3_REQUANTIZE_S32_I
+#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16
+#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER
+#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8
+#define DEBUG_OMXACMP3_UNPACKSIDEINFO
+#endif /* DEBUG_DOMAIN_AC */
+
+
+#ifdef DEBUG_DOMAIN_VC
+#define DEBUG_OMXVCM4P10_AVERAGE_16X
+#define DEBUG_OMXVCM4P10_AVERAGE_4X
+#define DEBUG_OMXVCM4P10_AVERAGE_8X
+#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX
+#define DEBUG_OMXVCM4P10_EXPANDFRAME
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R
+#define DEBUG_OMXVCM4P10_SADQUAR_16X
+#define DEBUG_OMXVCM4P10_SADQUAR_4X
+#define DEBUG_OMXVCM4P10_SADQUAR_8X
+#define DEBUG_OMXVCM4P10_SAD_16X
+#define DEBUG_OMXVCM4P10_SAD_4X
+#define DEBUG_OMXVCM4P10_SAD_8X
+#define DEBUG_OMXVCM4P10_SATD_4X4
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16
+#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_FINDMVPRED
+#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_LIMITMVTORECT
+#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB
+#define DEBUG_OMXVCM4P2_PADMBGRAY_U8
+#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8
+#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8
+#define DEBUG_OMXVCM4P2_PADMV
+#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA
+#endif /* DEBUG_DOMAIN_VC */
+
+
+#ifdef DEBUG_DOMAIN_IC
+/* To be filled in */
+#endif /* DEBUG_DOMAIN_IC */
+
+
+#ifdef DEBUG_DOMAIN_SP
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S32
+#define DEBUG_OMXACSP_COPY_S16
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_DOTPROD_S16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32
+#define DEBUG_OMXACSP_FFTINIT_C_SC16
+#define DEBUG_OMXACSP_FFTINIT_C_SC32
+#define DEBUG_OMXACSP_FFTINIT_R_S16_S32
+#define DEBUG_OMXACSP_FFTINIT_R_S32
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIR_DIRECT_S16
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_DIRECT_S16
+#endif /* DEBUG_DOMAIN_SP */
+
+
+#ifdef DEBUG_DOMAIN_IP
+#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS
+#define DEBUG_OMXIPBM_COPY_U8_C1R
+#define DEBUG_OMXIPBM_COPY_U8_C3R
+#define DEBUG_OMXIPBM_MIRROR_U8_C1R
+#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS
+#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R
+#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R
+#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64
+#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64
+#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64
+#define DEBUG_OMXIPPP_MOMENTINIT_S64
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R
+#endif /* DEBUG_DOMAIN_IP */
+
+
+#endif /* _armCommon_H_ */
+
+/*End of File*/
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h
new file mode 100644
index 0000000..abb98fc
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h
@@ -0,0 +1,670 @@
+;//
+;//
+;// File Name: armCOMM_BitDec_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// OpenMAX optimized bitstream decode module
+;//
+;// You must include armCOMM_s.h before including this file
+;//
+;// This module provides macros to perform assembly optimized fixed and
+;// variable length decoding from a read-only bitstream. The variable
+;// length decode modules take as input a pointer to a table of 16-bit
+;// entries of the following format.
+;//
+;// VLD Table Entry format
+;//
+;// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+;// +------------------------------------------------+
+;// | Len | Symbol | 1 |
+;// +------------------------------------------------+
+;// | Offset | 0 |
+;// +------------------------------------------------+
+;//
+;// If the table entry is a leaf entry then bit 0 set:
+;// Len = Number of bits overread (0 to 7)
+;// Symbol = Symbol payload (unsigned 12 bits)
+;//
+;// If the table entry is an internal node then bit 0 is clear:
+;// Offset = Number of (16-bit) half words from the table
+;// start to the next table node
+;//
+;// The table is accessed by successive lookup up on the
+;// next Step bits of the input bitstream until a leaf node
+;// is obtained. The Step sizes are supplied to the VLD macro.
+;//
+;// USAGE:
+;//
+;// To use any of the macros in this package, first call:
+;//
+;// M_BD_INIT ppBitStream, pBitOffset, pBitStream, RBitBuffer, RBitCount, Tmp
+;//
+;// This caches the current bitstream position and next available
+;// bits in registers pBitStream, RBitBuffer, RBitCount. These registers
+;// are reserved for use by the bitstream decode package until you
+;// call M_BD_FINI.
+;//
+;// Next call the following macro(s) as many times as you need:
+;//
+;// M_BD_LOOK8 - Look ahead constant 1<=N<=8 bits into the bitstream
+;// M_BD_LOOK16 - Look ahead constant 1<=N<=16 bits into the bitstream
+;// M_BD_READ8 - Read constant 1<=N<=8 bits from the bitstream
+;// M_BD_READ16 - Read constant 1<=N<=16 bits from the bitstream
+;// M_BD_VREAD8 - Read variable 1<=N<=8 bits from the bitstream
+;// M_BD_VREAD16 - Read variable 1<=N<=16 bits from the bitstream
+;// M_BD_VLD - Perform variable length decode using lookup table
+;//
+;// Finally call the macro:
+;//
+;// M_BD_FINI ppBitStream, pBitOffset
+;//
+;// This writes the bitstream state back to memory.
+;//
+;// The three bitstream cache register names are assigned to the following global
+;// variables:
+;//
+
+ GBLS pBitStream ;// Register name for pBitStream
+ GBLS BitBuffer ;// Register name for BitBuffer
+ GBLS BitCount ;// Register name for BitCount
+
+;//
+;// These register variables must have a certain defined state on entry to every bitstream
+;// macro (except M_BD_INIT) and on exit from every bitstream macro (except M_BD_FINI).
+;// The state may depend on implementation.
+;//
+;// For the default (ARM11) implementation the following hold:
+;// pBitStream - points to the first byte not held in the BitBuffer
+;// BitBuffer - is a cache of (4 bytes) 32 bits, bit 31 the first bit
+;// BitCount - is offset (from the top bit) to the next unused bitstream bit
+;// 0<=BitCount<=15 (so BitBuffer holds at least 17 unused bits)
+;//
+;//
+
+ ;// Bitstream Decode initialise
+ ;//
+ ;// Initialises the bitstream decode global registers from
+ ;// bitstream pointers. This macro is split into 3 parts to enable
+ ;// scheduling.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $ppBitStream - pointer to pointer to the next bitstream byte
+ ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7)
+ ;// $RBitStream - register to use for pBitStream (can be $ppBitStream)
+ ;// $RBitBuffer - register to use for BitBuffer
+ ;// $RBitCount - register to use for BitCount (can be $pBitOffset)
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1,$T2,$T3 - registers that must be preserved between calls to
+ ;// M_BD_INIT1 and M_BD_INIT2
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_INIT0 $ppBitStream, $pBitOffset, $RBitStream, $RBitBuffer, $RBitCount
+
+pBitStream SETS "$RBitStream"
+BitBuffer SETS "$RBitBuffer"
+BitCount SETS "$RBitCount"
+
+ ;// load inputs
+ LDR $pBitStream, [$ppBitStream]
+ LDR $BitCount, [$pBitOffset]
+ MEND
+
+ MACRO
+ M_BD_INIT1 $T1, $T2, $T3
+ LDRB $T2, [$pBitStream, #2]
+ LDRB $T1, [$pBitStream, #1]
+ LDRB $BitBuffer, [$pBitStream], #3
+ ADD $BitCount, $BitCount, #8
+ MEND
+
+ MACRO
+ M_BD_INIT2 $T1, $T2, $T3
+ ORR $T2, $T2, $T1, LSL #8
+ ORR $BitBuffer, $T2, $BitBuffer, LSL #16
+ MEND
+
+ ;//
+ ;// Look ahead fixed 1<=N<=8 bits without consuming any bits
+ ;// The next bits will be placed at bit 31..24 of destination register
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to look
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_LOOK8 $Symbol, $N
+ ASSERT ($N>=1):LAND:($N<=8)
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ MEND
+
+ ;//
+ ;// Look ahead fixed 1<=N<=16 bits without consuming any bits
+ ;// The next bits will be placed at bit 31..16 of destination register
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to look
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_LOOK16 $Symbol, $N, $T1
+ ASSERT ($N >= 1):LAND:($N <= 16)
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ MEND
+
+ ;//
+ ;// Skips fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_SKIP8 $N, $T1
+ ASSERT ($N>=1):LAND:($N<=8)
+ SUBS $BitCount, $BitCount, #(8-$N)
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Read fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_READ8 $Symbol, $N, $T1
+ ASSERT ($N>=1):LAND:($N<=8)
+ MOVS $Symbol, $BitBuffer, LSL $BitCount
+ SUBS $BitCount, $BitCount, #(8-$N)
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ MOV $Symbol, $Symbol, LSR #(32-$N)
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Read fixed 1<=N<=16 bits from the bitstream, advancing the bitstream pointer
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_READ16 $Symbol, $N, $T1, $T2
+ ASSERT ($N>=1):LAND:($N<=16)
+ ASSERT $Symbol<>$T1
+ IF ($N<=8)
+ M_BD_READ8 $Symbol, $N, $T1
+ ELSE
+ ;// N>8 so we will be able to refill at least one byte
+ LDRB $T1, [$pBitStream], #1
+ MOVS $Symbol, $BitBuffer, LSL $BitCount
+ ORR $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBS $BitCount, $BitCount, #(16-$N)
+ LDRCSB $T1, [$pBitStream], #1
+ MOV $Symbol, $Symbol, LSR #(32-$N)
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ ENDIF
+ MEND
+
+ ;//
+ ;// Skip variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits. 1<=N<=8
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VSKIP8 $N, $T1
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Skip variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits. 1<=N<=16
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VSKIP16 $N, $T1, $T2
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Read variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read. 1<=N<=8
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VREAD8 $Symbol, $N, $T1, $T2
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ RSB $T2, $N, #32
+ ADDCC $BitCount, $BitCount, #8
+ MOV $Symbol, $Symbol, LSR $T2
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Read variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read. 1<=N<=16
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VREAD16 $Symbol, $N, $T1, $T2
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ RSB $T2, $N, #32
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ MOV $Symbol, $Symbol, LSR $T2
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Decode a code of the form 0000...001 where there
+ ;// are N zeros before the 1 and N<=15 (code length<=16)
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the number of zeros before the next 1
+ ;// >=16 is an illegal code
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_CLZ16 $Symbol, $T1, $T2
+ MOVS $Symbol, $BitBuffer, LSL $BitCount
+ CLZ $Symbol, $Symbol
+ ADD $BitCount, $BitCount, $Symbol
+ SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1
+ LDRCSB $T1, [$pBitStream], #1
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Decode a code of the form 1111...110 where there
+ ;// are N ones before the 0 and N<=15 (code length<=16)
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the number of zeros before the next 1
+ ;// >=16 is an illegal code
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_CLO16 $Symbol, $T1, $T2
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ MVN $Symbol, $Symbol
+ CLZ $Symbol, $Symbol
+ ADD $BitCount, $BitCount, $Symbol
+ SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1
+ LDRCSB $T1, [$pBitStream], #1
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Variable Length Decode module
+ ;//
+ ;// Decodes one VLD Symbol from a bitstream and refill the bitstream
+ ;// buffer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pVLDTable - pointer to VLD decode table of 16-bit entries.
+ ;// The format is described above at the start of
+ ;// this file.
+ ;// $S0 - The number of bits to look up for the first step
+ ;// 1<=$S0<=8
+ ;// $S1 - The number of bits to look up for each subsequent
+ ;// step 1<=$S1<=$S0.
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - decoded VLD symbol value
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VLD $Symbol, $T1, $T2, $pVLDTable, $S0, $S1
+ ASSERT (1<=$S0):LAND:($S0<=8)
+ ASSERT (1<=$S1):LAND:($S1<=$S0)
+
+ ;// Note 0<=BitCount<=15 on entry and exit
+
+ MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bits
+ MOVS $Symbol, #(2<<$S0)-2 ;// create mask
+ AND $Symbol, $Symbol, $T1, LSR #(31-$S0) ;// 2*(next $S0 bits)
+ SUBS $BitCount, $BitCount, #8 ;// CS if buffer can be filled
+01
+ LDRCSB $T1, [$pBitStream], #1 ;// load refill byte
+ LDRH $Symbol, [$pVLDTable, $Symbol] ;// load table entry
+ ADDCC $BitCount, $BitCount, #8 ;// refill not possible
+ ADD $BitCount, $BitCount, #$S0 ;// assume $S0 bits used
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 ;// merge in refill byte
+ MOVS $T1, $Symbol, LSR #1 ;// CS=leaf entry
+ BCS %FT02
+
+ MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bit
+ IF (2*$S0-$S1<=8)
+ ;// Can combine refill check and -S0+S1 and keep $BitCount<=15
+ SUBS $BitCount, $BitCount, #8+($S0-$S1)
+ ELSE
+ ;// Separate refill check and -S0+S1 offset
+ SUBS $BitCount, $BitCount, #8
+ SUB $BitCount, $BitCount, #($S0-$S1)
+ ENDIF
+ ADD $Symbol, $Symbol, $T1, LSR #(31-$S1) ;// add 2*(next $S1 bits) to
+ BIC $Symbol, $Symbol, #1 ;// table offset
+ B %BT01 ;// load next table entry
+02
+ ;// BitCount range now depend on the route here
+ ;// if (first step) S0 <= BitCount <= 7+S0 <=15
+ ;// else if (2*S0-S1<=8) S0 <= BitCount <= 7+(2*S0-S1) <=15
+ ;// else S1 <= BitCount <= 7+S1 <=15
+
+ SUB $BitCount, $BitCount, $Symbol, LSR#13
+ BIC $Symbol, $T1, #0xF000
+ MEND
+
+
+ ;// Add an offset number of bits
+ ;//
+ ;// Outputs destination byte and bit index values which corresponds to an offset number of bits
+ ;// from the current location. This is used to compare bitstream positions using. M_BD_CMP.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $Offset - Offset to be added in bits.
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $ByteIndex - Destination pBitStream pointer after adding the Offset.
+ ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact
+ ;// pointer (as in M_BD_FINI). But for using with M_BD_CMP subtract is not needed.
+ ;// $BitIndex - Destination BitCount after the addition of Offset number of bits
+ ;//
+ MACRO
+ M_BD_ADD $ByteIndex, $BitIndex, $Offset
+
+ ;// ($ByteIndex,$BitIndex) = Current position + $Offset bits
+ ADD $Offset, $Offset, $BitCount
+ AND $BitIndex, $Offset, #7
+ ADD $ByteIndex, $pBitStream, $Offset, ASR #3
+ MEND
+
+ ;// Move bitstream pointers to the location given
+ ;//
+ ;// Outputs destination byte and bit index values which corresponds to
+ ;// the current location given (calculated using M_BD_ADD).
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;// $ByteIndex - Destination pBitStream pointer after move.
+ ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact
+ ;// pointer (as in M_BD_FINI).
+ ;// $BitIndex - Destination BitCount after the move
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $pBitStream \
+ ;// } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_MOV $ByteIndex, $BitIndex
+
+ ;// ($pBitStream, $Offset) = ($ByteIndex,$BitIndex)
+ MOV $BitCount, $BitIndex
+ MOV $pBitStream, $ByteIndex
+ MEND
+
+ ;// Bitstream Compare
+ ;//
+ ;// Compares bitstream position with that of a destination position. Destination position
+ ;// is held in two input registers which are calculated using M_BD_ADD macro
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $ByteIndex - Destination pBitStream pointer, (4 byte ahead as described in M_BD_ADD)
+ ;// $BitIndex - Destination BitCount
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// FLAGS - GE if destination is reached, LT = is destination is ahead
+ ;// $T1 - corrupted temp/scratch register
+ ;//
+ MACRO
+ M_BD_CMP $ByteIndex, $BitIndex, $T1
+
+ ;// Return flags set by (current positon)-($ByteIndex,$BitIndex)
+ ;// so GE means that we have reached the indicated position
+
+ ADD $T1, $pBitStream, $BitCount, LSR #3
+ CMP $T1, $ByteIndex
+ AND $T1, $BitCount, #7
+ CMPEQ $T1, $BitIndex
+ MEND
+
+
+ ;// Bitstream Decode finalise
+ ;//
+ ;// Writes back the bitstream state to the bitstream pointers
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $ppBitStream - pointer to pointer to the next bitstream byte
+ ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7)
+ ;// $pBitStream \
+ ;// $BitBuffer } these register are corrupted
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_FINI $ppBitStream, $pBitOffset
+
+ ;// Advance pointer by the number of free bits in the buffer
+ ADD $pBitStream, $pBitStream, $BitCount, LSR#3
+ AND $BitCount, $BitCount, #7
+
+ ;// Now move back 32 bits to reach the first usued bit
+ SUB $pBitStream, $pBitStream, #4
+
+ ;// Store out bitstream state
+ STR $BitCount, [$pBitOffset]
+ STR $pBitStream, [$ppBitStream]
+ MEND
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h
new file mode 100644
index 0000000..4f9bc3b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h
@@ -0,0 +1,212 @@
+/**
+ *
+ * File Name: armCOMM_Bitstream.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM_Bitstream.h
+ * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders.
+ *
+ */
+
+#ifndef _armCodec_H_
+#define _armCodec_H_
+
+#include "omxtypes.h"
+
+typedef struct {
+ OMX_U8 codeLen;
+ OMX_U32 codeWord;
+} ARM_VLC32;
+
+/* The above should be renamed as "ARM_VLC32" */
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] N=1...32
+ *
+ * Returns Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N=1..32
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ * Returns Value
+ */
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset);
+
+/**
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N);
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] pCodeBook
+ *
+ * [out] **ppBitStream
+ * [out] *pOffset
+ *
+ * Returns : Code Book Index if successfull.
+ * : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails.
+ **/
+
+#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF)
+
+OMX_U16 armUnPackVLC32(
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ const ARM_VLC32 *pCodeBook
+);
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] codeWord Code word that need to be inserted in to the
+ * bitstream
+ * [in] codeLength Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ OMX_U32 codeWord,
+ OMX_INT codeLength
+);
+
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pBitOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] code VLC code word that need to be inserted in to the
+ * bitstream
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackVLC32 (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ ARM_VLC32 code
+);
+
+#endif /*_armCodec_H_*/
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h
new file mode 100644
index 0000000..d5db32f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h
@@ -0,0 +1,40 @@
+/**
+ *
+ *
+ * File Name: armCOMM_IDCTTable.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File : armCOMM_IDCTTable.h
+ * Description : Contains declarations of tables for IDCT calculation.
+ *
+ */
+
+#ifndef _armCOMM_IDCTTable_H_
+#define _armCOMM_IDCTTable_H_
+
+#include "omxtypes.h"
+
+ /* Table of s(u)*A(u)*A(v)/16 at Q15
+ * s(u)=1.0 0 <= u <= 5
+ * s(6)=2.0
+ * s(7)=4.0
+ * A(0) = 2*sqrt(2)
+ * A(u) = 4*cos(u*pi/16) for (u!=0)
+ */
+extern const OMX_U16 armCOMM_IDCTPreScale [64];
+extern const OMX_U16 armCOMM_IDCTCoef [4];
+
+#endif /* _armCOMM_IDCTTable_H_ */
+
+
+/* End of File */
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h
new file mode 100644
index 0000000..03f7137
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h
@@ -0,0 +1,1445 @@
+;//
+;// This confidential and proprietary software may be used only as
+;// authorised by a licensing agreement from ARM Limited
+;// (C) COPYRIGHT 2004 ARM Limited
+;// ALL RIGHTS RESERVED
+;// The entire notice above must be reproduced on all authorised
+;// copies and copies may only be made to the extent permitted
+;// by a licensing agreement from ARM Limited.
+;//
+;// IDCT_s.s
+;//
+;// Inverse DCT module
+;//
+;//
+;// ALGORITHM DESCRIPTION
+;//
+;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each
+;// column and then a 1D IDCT for each row.
+;//
+;// The 8-point 1D IDCT is defined by
+;// f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2
+;//
+;// C(u) = 1/sqrt(2) if u=0 or 1 if u!=0
+;// c(u,x) = cos( (2x+1)*u*pi/16 )
+;//
+;// We compute the 8-point 1D IDCT using the reverse of
+;// the Arai-Agui-Nakajima flow graph which we split into
+;// 5 stages named in reverse order to identify with the
+;// forward DCT. Direct inversion of the forward formulae
+;// in file FDCT_s.s gives:
+;//
+;// IStage 5: j(u) = T(u)*A(u) [ A(u)=4*C(u)*c(u,0) ]
+;// [ A(0) = 2*sqrt(2)
+;// A(u) = 4*cos(u*pi/16) for (u!=0) ]
+;//
+;// IStage 4: i0 = j0 i1 = j4
+;// i3 = (j2+j6)/2 i2 = (j2-j6)/2
+;// i7 = (j5+j3)/2 i4 = (j5-j3)/2
+;// i5 = (j1+j7)/2 i6 = (j1-j7)/2
+;//
+;// IStage 3: h0 = (i0+i1)/2 h1 = (i0-i1)/2
+;// h2 = (i2*sqrt2)-i3 h3 = i3
+;// h4 = cos(pi/8)*i4 + sin(pi/8)*i6
+;// h6 = -sin(pi/8)*i4 + cos(pi/8)*i6
+;// [ The above two lines rotate by -(pi/8) ]
+;// h5 = (i5-i7)/sqrt2 h7 = (i5+i7)/2
+;//
+;// IStage 2: g0 = (h0+h3)/2 g3 = (h0-h3)/2
+;// g1 = (h1+h2)/2 g2 = (h1-h2)/2
+;// g7 = h7 g6 = h6 - h7
+;// g5 = h5 - g6 g4 = h4 - g5
+;//
+;// IStage 1: f0 = (g0+g7)/2 f7 = (g0-g7)/2
+;// f1 = (g1+g6)/2 f6 = (g1-g6)/2
+;// f2 = (g2+g5)/2 f5 = (g2-g5)/2
+;// f3 = (g3+g4)/2 f4 = (g3-g4)/2
+;//
+;// Note that most coefficients are halved 3 times during the
+;// above calculation. We can rescale the algorithm dividing
+;// the input by 8 to remove the halvings.
+;//
+;// IStage 5: j(u) = T(u)*A(u)/8
+;//
+;// IStage 4: i0 = j0 i1 = j4
+;// i3 = j2 + j6 i2 = j2 - j6
+;// i7 = j5 + j3 i4 = j5 - j3
+;// i5 = j1 + j7 i6 = j1 - j7
+;//
+;// IStage 3: h0 = i0 + i1 h1 = i0 - i1
+;// h2 = (i2*sqrt2)-i3 h3 = i3
+;// h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6)
+;// h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6)
+;// h5 = (i5-i7)*sqrt2 h7 = i5 + i7
+;//
+;// IStage 2: g0 = h0 + h3 g3 = h0 - h3
+;// g1 = h1 + h2 g2 = h1 - h2
+;// g7 = h7 g6 = h6 - h7
+;// g5 = h5 - g6 g4 = h4 - g5
+;//
+;// IStage 1: f0 = g0 + g7 f7 = g0 - g7
+;// f1 = g1 + g6 f6 = g1 - g6
+;// f2 = g2 + g5 f5 = g2 - g5
+;// f3 = g3 + g4 f4 = g3 - g4
+;//
+;// Note:
+;// 1. The scaling by A(u)/8 can often be combined with inverse
+;// quantization. The column and row scalings can be combined.
+;// 2. The flowgraph in the AAN paper has h4,g6 negated compared
+;// to the above code but is otherwise identical.
+;// 3. The rotation by -pi/8 can be peformed using three multiplies
+;// Eg c*i4+s*i6 = (i6-i4)*s + (c+s)*i4
+;// -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6
+;// 4. If |T(u)|<=1 then from the IDCT definition,
+;// |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2
+;// = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2
+;// = ((1/sqrt2) + (cot(pi/32)-1)/2)/2
+;// = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2)
+;// = (approx)2.64
+;// So the max gain of the 2D IDCT is ~x7.0 = 3 bits.
+;// The table below shows input patterns generating the maximum
+;// value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1
+;// InputPattern Max |f(x)|
+;// PPPPPPPP |f0| = 2.64
+;// PPPMMMMM |f1| = 2.64
+;// PPMMMPPP |f2| = 2.64
+;// PPMMPPMM |f3| = 2.64
+;// PMMPPMMP |f4| = 2.64
+;// PMMPMMPM |f5| = 2.64
+;// PMPPMPMP |f6| = 2.64
+;// PMPMPMPM |f7| = 2.64
+;// Note that this input pattern is the transpose of the
+;// corresponding max input patter for the FDCT.
+
+;// Arguments
+
+pSrc RN 0 ;// source data buffer
+Stride RN 1 ;// destination stride in bytes
+pDest RN 2 ;// destination data buffer
+pScale RN 3 ;// pointer to scaling table
+
+
+ ;// DCT Inverse Macro
+ ;// The DCT code should be parametrized according
+ ;// to the following inputs:
+ ;// $outsize = "u8" : 8-bit unsigned data saturated (0 to +255)
+ ;// "s9" : 16-bit signed data saturated to 9-bit (-256 to +255)
+ ;// "s16" : 16-bit signed data not saturated (max size ~+/-14273)
+ ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment
+ ;// "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment
+ ;//
+ ;// Inputs:
+ ;// pSrc = r0 = Pointer to input data
+ ;// Range is -256 to +255 (9-bit)
+ ;// Stride = r1 = Stride between input lines
+ ;// pDest = r2 = Pointer to output data
+ ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale
+
+
+
+ MACRO
+ M_IDCT $outsize, $inscale, $stride
+ LCLA SHIFT
+
+
+ IF ARM1136JS
+
+;// REGISTER ALLOCATION
+;// This is hard since we have 8 values, 9 free registers and each
+;// butterfly requires a temporary register. We also want to
+;// maintain register order so we can use LDM/STM. The table below
+;// summarises the register allocation that meets all these criteria.
+;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above.
+;//
+;// r1 a01 g0 h0
+;// r4 b01 f0 g1 h1 i0
+;// r5 a23 f1 g2 i1
+;// r6 b23 f2 g3 h2 i2
+;// r7 a45 f3 h3 i3
+;// r8 b45 f4 g4 h4 i4
+;// r9 a67 f5 g5 h5 i5
+;// r10 b67 f6 g6 h6 i6
+;// r11 f7 g7 h7 i7
+;//
+ra01 RN 1
+rb01 RN 4
+ra23 RN 5
+rb23 RN 6
+ra45 RN 7
+rb45 RN 8
+ra67 RN 9
+rb67 RN 10
+rtmp RN 11
+csPiBy8 RN 12 ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ]
+LoopRR2 RN 14 ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ]
+;// Transpose allocation
+xft RN ra01
+xf0 RN rb01
+xf1 RN ra23
+xf2 RN rb23
+xf3 RN ra45
+xf4 RN rb45
+xf5 RN ra67
+xf6 RN rb67
+xf7 RN rtmp
+;// IStage 1 allocation
+xg0 RN xft
+xg1 RN xf0
+xg2 RN xf1
+xg3 RN xf2
+xgt RN xf3
+xg4 RN xf4
+xg5 RN xf5
+xg6 RN xf6
+xg7 RN xf7
+;// IStage 2 allocation
+xh0 RN xg0
+xh1 RN xg1
+xht RN xg2
+xh2 RN xg3
+xh3 RN xgt
+xh4 RN xg4
+xh5 RN xg5
+xh6 RN xg6
+xh7 RN xg7
+;// IStage 3,4 allocation
+xit RN xh0
+xi0 RN xh1
+xi1 RN xht
+xi2 RN xh2
+xi3 RN xh3
+xi4 RN xh4
+xi5 RN xh5
+xi6 RN xh6
+xi7 RN xh7
+
+ M_STR pDest, ppDest
+ IF "$stride"="s"
+ M_STR Stride, pStride
+ ENDIF
+ M_ADR pDest, pBlk
+ LDR csPiBy8, =0x30fc7642
+ LDR LoopRR2, =0x00005a82
+
+v6_idct_col$_F
+ ;// Load even values
+ LDR xi4, [pSrc], #4 ;// j0
+ LDR xi5, [pSrc, #4*16-4] ;// j4
+ LDR xi6, [pSrc, #2*16-4] ;// j2
+ LDR xi7, [pSrc, #6*16-4] ;// j6
+
+ ;// Scale Even Values
+ IF "$inscale"="s16" ;// 16x16 mul
+SHIFT SETA 12
+ LDR xi0, [pScale], #4
+ LDR xi1, [pScale, #4*16-4]
+ LDR xi2, [pScale, #2*16-4]
+ MOV xit, #1<<(SHIFT-1)
+ SMLABB xi3, xi0, xi4, xit
+ SMLATT xi4, xi0, xi4, xit
+ SMLABB xi0, xi1, xi5, xit
+ SMLATT xi5, xi1, xi5, xit
+ MOV xi3, xi3, ASR #SHIFT
+ PKHBT xi4, xi3, xi4, LSL #(16-SHIFT)
+ LDR xi3, [pScale, #6*16-4]
+ SMLABB xi1, xi2, xi6, xit
+ SMLATT xi6, xi2, xi6, xit
+ MOV xi0, xi0, ASR #SHIFT
+ PKHBT xi5, xi0, xi5, LSL #(16-SHIFT)
+ SMLABB xi2, xi3, xi7, xit
+ SMLATT xi7, xi3, xi7, xit
+ MOV xi1, xi1, ASR #SHIFT
+ PKHBT xi6, xi1, xi6, LSL #(16-SHIFT)
+ MOV xi2, xi2, ASR #SHIFT
+ PKHBT xi7, xi2, xi7, LSL #(16-SHIFT)
+ ENDIF
+ IF "$inscale"="s32" ;// 32x16 mul
+SHIFT SETA (12+8-16)
+ MOV xit, #1<<(SHIFT-1)
+ LDR xi0, [pScale], #8
+ LDR xi1, [pScale, #0*32+4-8]
+ LDR xi2, [pScale, #4*32-8]
+ LDR xi3, [pScale, #4*32+4-8]
+ SMLAWB xi0, xi0, xi4, xit
+ SMLAWT xi1, xi1, xi4, xit
+ SMLAWB xi2, xi2, xi5, xit
+ SMLAWT xi3, xi3, xi5, xit
+ MOV xi0, xi0, ASR #SHIFT
+ PKHBT xi4, xi0, xi1, LSL #(16-SHIFT)
+ MOV xi2, xi2, ASR #SHIFT
+ PKHBT xi5, xi2, xi3, LSL #(16-SHIFT)
+ LDR xi0, [pScale, #2*32-8]
+ LDR xi1, [pScale, #2*32+4-8]
+ LDR xi2, [pScale, #6*32-8]
+ LDR xi3, [pScale, #6*32+4-8]
+ SMLAWB xi0, xi0, xi6, xit
+ SMLAWT xi1, xi1, xi6, xit
+ SMLAWB xi2, xi2, xi7, xit
+ SMLAWT xi3, xi3, xi7, xit
+ MOV xi0, xi0, ASR #SHIFT
+ PKHBT xi6, xi0, xi1, LSL #(16-SHIFT)
+ MOV xi2, xi2, ASR #SHIFT
+ PKHBT xi7, xi2, xi3, LSL #(16-SHIFT)
+ ENDIF
+
+ ;// Load odd values
+ LDR xi0, [pSrc, #1*16-4] ;// j1
+ LDR xi1, [pSrc, #7*16-4] ;// j7
+ LDR xi2, [pSrc, #5*16-4] ;// j5
+ LDR xi3, [pSrc, #3*16-4] ;// j3
+
+ IF {TRUE}
+ ;// shortcut if odd values 0
+ TEQ xi0, #0
+ TEQEQ xi1, #0
+ TEQEQ xi2, #0
+ TEQEQ xi3, #0
+ BEQ v6OddZero$_F
+ ENDIF
+
+ ;// Store scaled even values
+ STMIA pDest, {xi4, xi5, xi6, xi7}
+
+ ;// Scale odd values
+ IF "$inscale"="s16"
+ ;// Perform AAN Scale
+ LDR xi4, [pScale, #1*16-4]
+ LDR xi5, [pScale, #7*16-4]
+ LDR xi6, [pScale, #5*16-4]
+ SMLABB xi7, xi0, xi4, xit
+ SMLATT xi0, xi0, xi4, xit
+ SMLABB xi4, xi1, xi5, xit
+ SMLATT xi1, xi1, xi5, xit
+ MOV xi7, xi7, ASR #SHIFT
+ PKHBT xi0, xi7, xi0, LSL #(16-SHIFT)
+ LDR xi7, [pScale, #3*16-4]
+ SMLABB xi5, xi2, xi6, xit
+ SMLATT xi2, xi2, xi6, xit
+ MOV xi4, xi4, ASR #SHIFT
+ PKHBT xi1, xi4, xi1, LSL #(16-SHIFT)
+ SMLABB xi6, xi3, xi7, xit
+ SMLATT xi3, xi3, xi7, xit
+ MOV xi5, xi5, ASR #SHIFT
+ PKHBT xi2, xi5, xi2, LSL #(16-SHIFT)
+ MOV xi6, xi6, ASR #SHIFT
+ PKHBT xi3, xi6, xi3, LSL #(16-SHIFT)
+ ENDIF
+ IF "$inscale"="s32" ;// 32x16 mul
+ LDR xi4, [pScale, #1*32-8]
+ LDR xi5, [pScale, #1*32+4-8]
+ LDR xi6, [pScale, #7*32-8]
+ LDR xi7, [pScale, #7*32+4-8]
+ SMLAWB xi4, xi4, xi0, xit
+ SMLAWT xi5, xi5, xi0, xit
+ SMLAWB xi6, xi6, xi1, xit
+ SMLAWT xi7, xi7, xi1, xit
+ MOV xi4, xi4, ASR #SHIFT
+ PKHBT xi0, xi4, xi5, LSL #(16-SHIFT)
+ MOV xi6, xi6, ASR #SHIFT
+ PKHBT xi1, xi6, xi7, LSL #(16-SHIFT)
+ LDR xi4, [pScale, #5*32-8]
+ LDR xi5, [pScale, #5*32+4-8]
+ LDR xi6, [pScale, #3*32-8]
+ LDR xi7, [pScale, #3*32+4-8]
+ SMLAWB xi4, xi4, xi2, xit
+ SMLAWT xi5, xi5, xi2, xit
+ SMLAWB xi6, xi6, xi3, xit
+ SMLAWT xi7, xi7, xi3, xit
+ MOV xi4, xi4, ASR #SHIFT
+ PKHBT xi2, xi4, xi5, LSL #(16-SHIFT)
+ MOV xi6, xi6, ASR #SHIFT
+ PKHBT xi3, xi6, xi7, LSL #(16-SHIFT)
+ ENDIF
+
+ SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2
+ SSUB16 xi6, xi0, xi1 ;// j1-j7
+ SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2
+ SSUB16 xi4, xi2, xi3 ;// j5-j3
+
+ SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2
+
+ PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a
+ PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b
+
+ SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s]
+ SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s]
+ SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c]
+ SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c]
+
+ SMULBB xi1, xi3, LoopRR2
+ SMULTB xi3, xi3, LoopRR2
+
+ PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4
+ PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4
+ SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4
+
+ ;// xi0,xi1,xi2,xi3 now free
+ ;// IStage 4,3, rows 2to3 x1/2
+
+ MOV xi3, xi3, LSL #1
+ PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4
+ LDRD xi0, [pDest, #8] ;// j2,j6 scaled
+
+ ;// IStage 2, rows4to7
+ SSUB16 xg6, xh6, xh7
+ SSUB16 xg5, xh5, xg6
+ SSUB16 xg4, xh4, xg5
+
+ SSUB16 xi2, xi0, xi1 ;// (j2-j6)
+ SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2
+
+ SMULBB xi0, xi2, LoopRR2
+ SMULTB xi2, xi2, LoopRR2
+
+ MOV xi2, xi2, LSL #1
+ PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4
+
+ ;// xi0, xi1 now free
+ ;// IStage 4,3 rows 0to1 x 1/2
+ LDRD xi0, [pDest] ;// j0, j4 scaled
+ SSUB16 xh2, xh2, xi3
+ ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows
+
+ SHADD16 xh0, xi0, xi1
+ SHSUB16 xh1, xi0, xi1
+
+ ;// IStage 2 rows 0to3 x 1/2
+ SHSUB16 xg2, xh1, xh2
+ SHADD16 xg1, xh1, xh2
+ SHSUB16 xg3, xh0, xh3
+ SHADD16 xg0, xh0, xh3
+
+ ;// IStage 1 all rows
+ SADD16 xf3, xg3, xg4
+ SSUB16 xf4, xg3, xg4
+ SADD16 xf2, xg2, xg5
+ SSUB16 xf5, xg2, xg5
+ SADD16 xf1, xg1, xg6
+ SSUB16 xf6, xg1, xg6
+ SADD16 xf0, xg0, xg7
+ SSUB16 xf7, xg0, xg7
+
+ ;// Transpose, store and loop
+ PKHBT ra01, xf0, xf1, LSL #16
+ PKHTB rb01, xf1, xf0, ASR #16
+
+ PKHBT ra23, xf2, xf3, LSL #16
+ PKHTB rb23, xf3, xf2, ASR #16
+
+ PKHBT ra45, xf4, xf5, LSL #16
+ PKHTB rb45, xf5, xf4, ASR #16
+
+ PKHBT ra67, xf6, xf7, LSL #16
+ STMIA pDest!, {ra01, ra23, ra45, ra67}
+ PKHTB rb67, xf7, xf6, ASR #16
+ STMIA pDest!, {rb01, rb23, rb45, rb67}
+ BCC v6_idct_col$_F
+
+ SUB pSrc, pDest, #(64*2)
+ M_LDR pDest, ppDest
+ IF "$stride"="s"
+ M_LDR pScale, pStride
+ ENDIF
+ B v6_idct_row$_F
+
+v6OddZero$_F
+ SSUB16 xi2, xi6, xi7 ;// (j2-j6)
+ SHADD16 xi3, xi6, xi7 ;// (j2+j6)/2
+
+ SMULBB xi0, xi2, LoopRR2
+ SMULTB xi2, xi2, LoopRR2
+
+ MOV xi2, xi2, LSL #1
+ PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4
+ SSUB16 xh2, xh2, xi3
+
+ ;// xi0, xi1 now free
+ ;// IStage 4,3 rows 0to1 x 1/2
+
+ SHADD16 xh0, xi4, xi5
+ SHSUB16 xh1, xi4, xi5
+
+ ;// IStage 2 rows 0to3 x 1/2
+ SHSUB16 xg2, xh1, xh2
+ SHADD16 xg1, xh1, xh2
+ SHSUB16 xg3, xh0, xh3
+ SHADD16 xg0, xh0, xh3
+
+ ;// IStage 1 all rows
+ MOV xf3, xg3
+ MOV xf4, xg3
+ MOV xf2, xg2
+ MOV xf5, xg2
+ MOV xf1, xg1
+ MOV xf6, xg1
+ MOV xf0, xg0
+ MOV xf7, xg0
+
+ ;// Transpose
+ PKHBT ra01, xf0, xf1, LSL #16
+ PKHTB rb01, xf1, xf0, ASR #16
+
+ PKHBT ra23, xf2, xf3, LSL #16
+ PKHTB rb23, xf3, xf2, ASR #16
+
+ PKHBT ra45, xf4, xf5, LSL #16
+ PKHTB rb45, xf5, xf4, ASR #16
+
+ PKHBT ra67, xf6, xf7, LSL #16
+ PKHTB rb67, xf7, xf6, ASR #16
+
+ STMIA pDest!, {ra01, ra23, ra45, ra67}
+ ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows
+ STMIA pDest!, {rb01, rb23, rb45, rb67}
+
+ BCC v6_idct_col$_F
+ SUB pSrc, pDest, #(64*2)
+ M_LDR pDest, ppDest
+ IF "$stride"="s"
+ M_LDR pScale, pStride
+ ENDIF
+
+
+v6_idct_row$_F
+ ;// IStage 4,3, rows4to7 x1/4
+ LDR xit, =0x00010001 ;// rounding constant
+ LDR xi0, [pSrc, #1*16] ;// j1
+ LDR xi1, [pSrc, #7*16] ;// 4*j7
+ LDR xi2, [pSrc, #5*16] ;// j5
+ LDR xi3, [pSrc, #3*16] ;// j3
+
+ SHADD16 xi1, xi1, xit ;// 2*j7
+ SHADD16 xi1, xi1, xit ;// j7
+
+ SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2
+ SSUB16 xi6, xi0, xi1 ;// j1-j7
+ SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2
+ SSUB16 xi4, xi2, xi3 ;// j5-j3
+
+ SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2
+
+ PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a
+ PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b
+
+ SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s]
+ SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s]
+ SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c]
+ SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c]
+
+ SMULBB xi1, xi3, LoopRR2
+ SMULTB xi3, xi3, LoopRR2
+
+ PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4
+ PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4
+ SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4
+
+ MOV xi3, xi3, LSL #1
+ PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4
+
+ ;// xi0,xi1,xi2,xi3 now free
+ ;// IStage 4,3, rows 2to3 x1/2
+
+ LDR xi0, [pSrc, #2*16] ;// j2
+ LDR xi1, [pSrc, #6*16] ;// 2*j6
+
+ ;// IStage 2, rows4to7
+ SSUB16 xg6, xh6, xh7
+ SSUB16 xg5, xh5, xg6
+ SSUB16 xg4, xh4, xg5
+
+ SHADD16 xi1, xi1, xit ;// j6
+ SSUB16 xi2, xi0, xi1 ;// (j2-j6)
+ SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2
+
+ SMULBB xi0, xi2, LoopRR2
+ SMULTB xi2, xi2, LoopRR2
+
+ MOV xi2, xi2, LSL #1
+
+ PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4
+
+ ;// xi0, xi1 now free
+ ;// IStage 4,3 rows 0to1 x 1/2
+ LDR xi1, [pSrc, #4*16] ;// j4
+ LDR xi0, [pSrc], #4 ;// j0
+
+ SSUB16 xh2, xh2, xi3
+ ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows
+
+ ADD xi0, xi0, xit, LSL #2 ;// ensure correct round
+ SHADD16 xh0, xi0, xi1 ;// of DC result
+ SHSUB16 xh1, xi0, xi1
+
+ ;// IStage 2 rows 0to3 x 1/2
+ SHSUB16 xg2, xh1, xh2
+ SHADD16 xg1, xh1, xh2
+ SHSUB16 xg3, xh0, xh3
+ SHADD16 xg0, xh0, xh3
+
+ ;// IStage 1 all rows
+ SHADD16 xf3, xg3, xg4
+ SHSUB16 xf4, xg3, xg4
+ SHADD16 xf2, xg2, xg5
+ SHSUB16 xf5, xg2, xg5
+ SHADD16 xf1, xg1, xg6
+ SHSUB16 xf6, xg1, xg6
+ SHADD16 xf0, xg0, xg7
+ SHSUB16 xf7, xg0, xg7
+
+ ;// Saturate
+ IF ("$outsize"="u8")
+ USAT16 xf0, #8, xf0
+ USAT16 xf1, #8, xf1
+ USAT16 xf2, #8, xf2
+ USAT16 xf3, #8, xf3
+ USAT16 xf4, #8, xf4
+ USAT16 xf5, #8, xf5
+ USAT16 xf6, #8, xf6
+ USAT16 xf7, #8, xf7
+ ENDIF
+ IF ("$outsize"="s9")
+ SSAT16 xf0, #9, xf0
+ SSAT16 xf1, #9, xf1
+ SSAT16 xf2, #9, xf2
+ SSAT16 xf3, #9, xf3
+ SSAT16 xf4, #9, xf4
+ SSAT16 xf5, #9, xf5
+ SSAT16 xf6, #9, xf6
+ SSAT16 xf7, #9, xf7
+ ENDIF
+
+ ;// Transpose to Row, Pack and store
+ IF ("$outsize"="u8")
+ ORR xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ]
+ ORR xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ]
+ ORR xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ]
+ ORR xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ]
+ PKHBT ra01, xf0, xf2, LSL #16
+ PKHTB rb01, xf2, xf0, ASR #16
+ PKHBT ra23, xf4, xf6, LSL #16
+ PKHTB rb23, xf6, xf4, ASR #16
+ STMIA pDest, {ra01, ra23}
+ IF "$stride"="s"
+ ADD pDest, pDest, pScale
+ STMIA pDest, {rb01, rb23}
+ ADD pDest, pDest, pScale
+ ELSE
+ ADD pDest, pDest, #($stride)
+ STMIA pDest, {rb01, rb23}
+ ADD pDest, pDest, #($stride)
+ ENDIF
+ ENDIF
+ IF ("$outsize"="s9"):LOR:("$outsize"="s16")
+ PKHBT ra01, xf0, xf1, LSL #16
+ PKHTB rb01, xf1, xf0, ASR #16
+
+ PKHBT ra23, xf2, xf3, LSL #16
+ PKHTB rb23, xf3, xf2, ASR #16
+
+ PKHBT ra45, xf4, xf5, LSL #16
+ PKHTB rb45, xf5, xf4, ASR #16
+
+ PKHBT ra67, xf6, xf7, LSL #16
+ PKHTB rb67, xf7, xf6, ASR #16
+
+ STMIA pDest, {ra01, ra23, ra45, ra67}
+ IF "$stride"="s"
+ ADD pDest, pDest, pScale
+ STMIA pDest, {rb01, rb23, rb45, rb67}
+ ADD pDest, pDest, pScale
+ ELSE
+ ADD pDest, pDest, #($stride)
+ STMIA pDest, {rb01, rb23, rb45, rb67}
+ ADD pDest, pDest, #($stride)
+ ENDIF
+ ENDIF
+
+ BCC v6_idct_row$_F
+ ENDIF ;// ARM1136JS
+
+
+ IF CortexA8
+
+Src0 EQU 7
+Src1 EQU 8
+Src2 EQU 9
+Src3 EQU 10
+Src4 EQU 11
+Src5 EQU 12
+Src6 EQU 13
+Src7 EQU 14
+Tmp EQU 15
+
+qXj0 QN Src0.S16
+qXj1 QN Src1.S16
+qXj2 QN Src2.S16
+qXj3 QN Src3.S16
+qXj4 QN Src4.S16
+qXj5 QN Src5.S16
+qXj6 QN Src6.S16
+qXj7 QN Src7.S16
+qXjt QN Tmp.S16
+
+dXj0lo DN (Src0*2).S16
+dXj0hi DN (Src0*2+1).S16
+dXj1lo DN (Src1*2).S16
+dXj1hi DN (Src1*2+1).S16
+dXj2lo DN (Src2*2).S16
+dXj2hi DN (Src2*2+1).S16
+dXj3lo DN (Src3*2).S16
+dXj3hi DN (Src3*2+1).S16
+dXj4lo DN (Src4*2).S16
+dXj4hi DN (Src4*2+1).S16
+dXj5lo DN (Src5*2).S16
+dXj5hi DN (Src5*2+1).S16
+dXj6lo DN (Src6*2).S16
+dXj6hi DN (Src6*2+1).S16
+dXj7lo DN (Src7*2).S16
+dXj7hi DN (Src7*2+1).S16
+dXjtlo DN (Tmp*2).S16
+dXjthi DN (Tmp*2+1).S16
+
+qXi0 QN qXj0
+qXi1 QN qXj4
+qXi2 QN qXj2
+qXi3 QN qXj7
+qXi4 QN qXj5
+qXi5 QN qXjt
+qXi6 QN qXj1
+qXi7 QN qXj6
+qXit QN qXj3
+
+dXi0lo DN dXj0lo
+dXi0hi DN dXj0hi
+dXi1lo DN dXj4lo
+dXi1hi DN dXj4hi
+dXi2lo DN dXj2lo
+dXi2hi DN dXj2hi
+dXi3lo DN dXj7lo
+dXi3hi DN dXj7hi
+dXi4lo DN dXj5lo
+dXi4hi DN dXj5hi
+dXi5lo DN dXjtlo
+dXi5hi DN dXjthi
+dXi6lo DN dXj1lo
+dXi6hi DN dXj1hi
+dXi7lo DN dXj6lo
+dXi7hi DN dXj6hi
+dXitlo DN dXj3lo
+dXithi DN dXj3hi
+
+qXh0 QN qXit
+qXh1 QN qXi0
+qXh2 QN qXi2
+qXh3 QN qXi3
+qXh4 QN qXi7
+qXh5 QN qXi5
+qXh6 QN qXi4
+qXh7 QN qXi1
+qXht QN qXi6
+
+dXh0lo DN dXitlo
+dXh0hi DN dXithi
+dXh1lo DN dXi0lo
+dXh1hi DN dXi0hi
+dXh2lo DN dXi2lo
+dXh2hi DN dXi2hi
+dXh3lo DN dXi3lo
+dXh3hi DN dXi3hi
+dXh4lo DN dXi7lo
+dXh4hi DN dXi7hi
+dXh5lo DN dXi5lo
+dXh5hi DN dXi5hi
+dXh6lo DN dXi4lo
+dXh6hi DN dXi4hi
+dXh7lo DN dXi1lo
+dXh7hi DN dXi1hi
+dXhtlo DN dXi6lo
+dXhthi DN dXi6hi
+
+qXg0 QN qXh2
+qXg1 QN qXht
+qXg2 QN qXh1
+qXg3 QN qXh0
+qXg4 QN qXh4
+qXg5 QN qXh5
+qXg6 QN qXh6
+qXg7 QN qXh7
+qXgt QN qXh3
+
+qXf0 QN qXg6
+qXf1 QN qXg5
+qXf2 QN qXg4
+qXf3 QN qXgt
+qXf4 QN qXg3
+qXf5 QN qXg2
+qXf6 QN qXg1
+qXf7 QN qXg0
+qXft QN qXg7
+
+
+qXt0 QN 1.S32
+qXt1 QN 2.S32
+qT0lo QN 1.S32
+qT0hi QN 2.S32
+qT1lo QN 3.S32
+qT1hi QN 4.S32
+qScalelo QN 5.S32 ;// used to read post scale values
+qScalehi QN 6.S32
+qTemp0 QN 5.S32
+qTemp1 QN 6.S32
+
+
+Scale1 EQU 6
+Scale2 EQU 15
+qScale1 QN Scale1.S16
+qScale2 QN Scale2.S16
+dScale1lo DN (Scale1*2).S16
+dScale1hi DN (Scale1*2+1).S16
+dScale2lo DN (Scale2*2).S16
+dScale2hi DN (Scale2*2+1).S16
+
+dCoefs DN 0.S16 ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]}
+InvSqrt2 DN dCoefs[0] ;// 1/sqrt(2) in Q15
+S DN dCoefs[1] ;// Sin(PI/8) in Q15
+C DN dCoefs[2] ;// Cos(PI/8) in Q15
+
+pTemp RN 12
+
+
+ IMPORT armCOMM_IDCTCoef
+
+ VLD1 {qXj0,qXj1}, [pSrc @64]!
+ VLD1 {qXj2,qXj3}, [pSrc @64]!
+ VLD1 {qXj4,qXj5}, [pSrc @64]!
+ VLD1 {qXj6,qXj7}, [pSrc @64]!
+
+ ;// Load PreScale and multiply with Src
+ ;// IStage 4
+
+ IF "$inscale"="s16" ;// 16X16 Mul
+ M_IDCT_PRESCALE16
+ ENDIF
+
+ IF "$inscale"="s32" ;// 32X32 ,ul
+ M_IDCT_PRESCALE32
+ ENDIF
+
+ ;// IStage 3
+ VQRDMULH qXi2, qXi2, InvSqrt2 ;// i2/sqrt(2)
+ VHADD qXh0, qXi0, qXi1 ;// (i0+i1)/2
+ VHSUB qXh1, qXi0, qXi1 ;// (i0-i1)/2
+ VHADD qXh7, qXi5, qXi7 ;// (i5+i7)/4
+ VSUB qXh5, qXi5, qXi7 ;// (i5-i7)/2
+ VQRDMULH qXh5, qXh5, InvSqrt2 ;// h5/sqrt(2)
+ VSUB qXh2, qXi2, qXi3 ;// h2, h3
+
+ VMULL qXt0, dXi4lo, C ;// c*i4
+ VMLAL qXt0, dXi6lo, S ;// c*i4+s*i6
+ VMULL qXt1, dXi4hi, C
+ VMLAL qXt1, dXi6hi, S
+ VSHRN dXh4lo, qXt0, #16 ;// h4
+ VSHRN dXh4hi, qXt1, #16
+
+ VMULL qXt0, dXi6lo, C ;// c*i6
+ VMLSL qXt0, dXi4lo, S ;// -s*i4 + c*h6
+ VMULL qXt1, dXi6hi, C
+ VMLSL qXt1, dXi4hi, S
+ VSHRN dXh6lo, qXt0, #16 ;// h6
+ VSHRN dXh6hi, qXt1, #16
+
+ ;// IStage 2
+ VSUB qXg6, qXh6, qXh7
+ VSUB qXg5, qXh5, qXg6
+ VSUB qXg4, qXh4, qXg5
+ VHADD qXg1, qXh1, qXh2 ;// (h1+h2)/2
+ VHSUB qXg2, qXh1, qXh2 ;// (h1-h2)/2
+ VHADD qXg0, qXh0, qXh3 ;// (h0+h3)/2
+ VHSUB qXg3, qXh0, qXh3 ;// (h0-h3)/2
+
+ ;// IStage 1 all rows
+ VADD qXf3, qXg3, qXg4
+ VSUB qXf4, qXg3, qXg4
+ VADD qXf2, qXg2, qXg5
+ VSUB qXf5, qXg2, qXg5
+ VADD qXf1, qXg1, qXg6
+ VSUB qXf6, qXg1, qXg6
+ VADD qXf0, qXg0, qXg7
+ VSUB qXf7, qXg0, qXg7
+
+ ;// Transpose, store and loop
+XTR0 EQU Src5
+XTR1 EQU Tmp
+XTR2 EQU Src6
+XTR3 EQU Src7
+XTR4 EQU Src3
+XTR5 EQU Src0
+XTR6 EQU Src1
+XTR7 EQU Src2
+XTRt EQU Src4
+
+qA0 QN XTR0.S32 ;// for XTRpose
+qA1 QN XTR1.S32
+qA2 QN XTR2.S32
+qA3 QN XTR3.S32
+qA4 QN XTR4.S32
+qA5 QN XTR5.S32
+qA6 QN XTR6.S32
+qA7 QN XTR7.S32
+
+dB0 DN XTR0*2+1 ;// for using VSWP
+dB1 DN XTR1*2+1
+dB2 DN XTR2*2+1
+dB3 DN XTR3*2+1
+dB4 DN XTR4*2
+dB5 DN XTR5*2
+dB6 DN XTR6*2
+dB7 DN XTR7*2
+
+
+ VTRN qXf0, qXf1
+ VTRN qXf2, qXf3
+ VTRN qXf4, qXf5
+ VTRN qXf6, qXf7
+ VTRN qA0, qA2
+ VTRN qA1, qA3
+ VTRN qA4, qA6
+ VTRN qA5, qA7
+ VSWP dB0, dB4
+ VSWP dB1, dB5
+ VSWP dB2, dB6
+ VSWP dB3, dB7
+
+
+qYj0 QN qXf0
+qYj1 QN qXf1
+qYj2 QN qXf2
+qYj3 QN qXf3
+qYj4 QN qXf4
+qYj5 QN qXf5
+qYj6 QN qXf6
+qYj7 QN qXf7
+qYjt QN qXft
+
+dYj0lo DN (XTR0*2).S16
+dYj0hi DN (XTR0*2+1).S16
+dYj1lo DN (XTR1*2).S16
+dYj1hi DN (XTR1*2+1).S16
+dYj2lo DN (XTR2*2).S16
+dYj2hi DN (XTR2*2+1).S16
+dYj3lo DN (XTR3*2).S16
+dYj3hi DN (XTR3*2+1).S16
+dYj4lo DN (XTR4*2).S16
+dYj4hi DN (XTR4*2+1).S16
+dYj5lo DN (XTR5*2).S16
+dYj5hi DN (XTR5*2+1).S16
+dYj6lo DN (XTR6*2).S16
+dYj6hi DN (XTR6*2+1).S16
+dYj7lo DN (XTR7*2).S16
+dYj7hi DN (XTR7*2+1).S16
+dYjtlo DN (XTRt*2).S16
+dYjthi DN (XTRt*2+1).S16
+
+qYi0 QN qYj0
+qYi1 QN qYj4
+qYi2 QN qYj2
+qYi3 QN qYj7
+qYi4 QN qYj5
+qYi5 QN qYjt
+qYi6 QN qYj1
+qYi7 QN qYj6
+qYit QN qYj3
+
+dYi0lo DN dYj0lo
+dYi0hi DN dYj0hi
+dYi1lo DN dYj4lo
+dYi1hi DN dYj4hi
+dYi2lo DN dYj2lo
+dYi2hi DN dYj2hi
+dYi3lo DN dYj7lo
+dYi3hi DN dYj7hi
+dYi4lo DN dYj5lo
+dYi4hi DN dYj5hi
+dYi5lo DN dYjtlo
+dYi5hi DN dYjthi
+dYi6lo DN dYj1lo
+dYi6hi DN dYj1hi
+dYi7lo DN dYj6lo
+dYi7hi DN dYj6hi
+dYitlo DN dYj3lo
+dYithi DN dYj3hi
+
+qYh0 QN qYit
+qYh1 QN qYi0
+qYh2 QN qYi2
+qYh3 QN qYi3
+qYh4 QN qYi7
+qYh5 QN qYi5
+qYh6 QN qYi4
+qYh7 QN qYi1
+qYht QN qYi6
+
+dYh0lo DN dYitlo
+dYh0hi DN dYithi
+dYh1lo DN dYi0lo
+dYh1hi DN dYi0hi
+dYh2lo DN dYi2lo
+dYh2hi DN dYi2hi
+dYh3lo DN dYi3lo
+dYh3hi DN dYi3hi
+dYh4lo DN dYi7lo
+dYh4hi DN dYi7hi
+dYh5lo DN dYi5lo
+dYh5hi DN dYi5hi
+dYh6lo DN dYi4lo
+dYh6hi DN dYi4hi
+dYh7lo DN dYi1lo
+dYh7hi DN dYi1hi
+dYhtlo DN dYi6lo
+dYhthi DN dYi6hi
+
+qYg0 QN qYh2
+qYg1 QN qYht
+qYg2 QN qYh1
+qYg3 QN qYh0
+qYg4 QN qYh4
+qYg5 QN qYh5
+qYg6 QN qYh6
+qYg7 QN qYh7
+qYgt QN qYh3
+
+qYf0 QN qYg6
+qYf1 QN qYg5
+qYf2 QN qYg4
+qYf3 QN qYgt
+qYf4 QN qYg3
+qYf5 QN qYg2
+qYf6 QN qYg1
+qYf7 QN qYg0
+qYft QN qYg7
+
+ VRSHR qYj7, qYj7, #2
+ VRSHR qYj6, qYj6, #1
+
+ VHADD qYi5, qYj1, qYj7 ;// i5 = (j1+j7)/2
+ VSUB qYi6, qYj1, qYj7 ;// i6 = j1-j7
+ VHADD qYi3, qYj2, qYj6 ;// i3 = (j2+j6)/2
+ VSUB qYi2, qYj2, qYj6 ;// i2 = j2-j6
+ VHADD qYi7, qYj5, qYj3 ;// i7 = (j5+j3)/2
+ VSUB qYi4, qYj5, qYj3 ;// i4 = j5-j3
+
+ VQRDMULH qYi2, qYi2, InvSqrt2 ;// i2/sqrt(2)
+ ;// IStage 4,3 rows 0to1 x 1/2
+
+ MOV pTemp, #0x4 ;// ensure correct round
+ VDUP qScale1, pTemp ;// of DC result
+ VADD qYi0, qYi0, qScale1
+
+ VHADD qYh0, qYi0, qYi1 ;// (i0+i1)/2
+ VHSUB qYh1, qYi0, qYi1 ;// (i0-i1)/2
+
+ VHADD qYh7, qYi5, qYi7 ;// (i5+i7)/4
+ VSUB qYh5, qYi5, qYi7 ;// (i5-i7)/2
+ VSUB qYh2, qYi2, qYi3 ;// h2, h3
+ VQRDMULH qYh5, qYh5, InvSqrt2 ;// h5/sqrt(2)
+
+ VMULL qXt0, dYi4lo, C ;// c*i4
+ VMLAL qXt0, dYi6lo, S ;// c*i4+s*i6
+ VMULL qXt1, dYi4hi, C
+ VMLAL qXt1, dYi6hi, S
+ VSHRN dYh4lo, qXt0, #16 ;// h4
+ VSHRN dYh4hi, qXt1, #16
+
+ VMULL qXt0, dYi6lo, C ;// c*i6
+ VMLSL qXt0, dYi4lo, S ;// -s*i4 + c*h6
+ VMULL qXt1, dYi6hi, C
+ VMLSL qXt1, dYi4hi, S
+ VSHRN dYh6lo, qXt0, #16 ;// h6
+ VSHRN dYh6hi, qXt1, #16
+
+ VSUB qYg6, qYh6, qYh7
+ VSUB qYg5, qYh5, qYg6
+ VSUB qYg4, qYh4, qYg5
+
+ ;// IStage 2 rows 0to3 x 1/2
+ VHADD qYg1, qYh1, qYh2 ;// (h1+h2)/2
+ VHSUB qYg2, qYh1, qYh2 ;// (h1-h2)/2
+ VHADD qYg0, qYh0, qYh3 ;// (h0+h3)/2
+ VHSUB qYg3, qYh0, qYh3 ;// (h0-h3)/2
+
+
+ ;// IStage 1 all rows
+ VHADD qYf3, qYg3, qYg4
+ VHSUB qYf4, qYg3, qYg4
+ VHADD qYf2, qYg2, qYg5
+ VHSUB qYf5, qYg2, qYg5
+ VHADD qYf1, qYg1, qYg6
+ VHSUB qYf6, qYg1, qYg6
+ VHADD qYf0, qYg0, qYg7
+ VHSUB qYf7, qYg0, qYg7
+
+YTR0 EQU Src0
+YTR1 EQU Src4
+YTR2 EQU Src1
+YTR3 EQU Src2
+YTR4 EQU Src7
+YTR5 EQU Src5
+YTR6 EQU Tmp
+YTR7 EQU Src6
+YTRt EQU Src3
+
+qC0 QN YTR0.S32 ;// for YTRpose
+qC1 QN YTR1.S32
+qC2 QN YTR2.S32
+qC3 QN YTR3.S32
+qC4 QN YTR4.S32
+qC5 QN YTR5.S32
+qC6 QN YTR6.S32
+qC7 QN YTR7.S32
+
+dD0 DN YTR0*2+1 ;// for using VSWP
+dD1 DN YTR1*2+1
+dD2 DN YTR2*2+1
+dD3 DN YTR3*2+1
+dD4 DN YTR4*2
+dD5 DN YTR5*2
+dD6 DN YTR6*2
+dD7 DN YTR7*2
+
+ VTRN qYf0, qYf1
+ VTRN qYf2, qYf3
+ VTRN qYf4, qYf5
+ VTRN qYf6, qYf7
+ VTRN qC0, qC2
+ VTRN qC1, qC3
+ VTRN qC4, qC6
+ VTRN qC5, qC7
+ VSWP dD0, dD4
+ VSWP dD1, dD5
+ VSWP dD2, dD6
+ VSWP dD3, dD7
+
+
+dYf0U8 DN YTR0*2.U8
+dYf1U8 DN YTR1*2.U8
+dYf2U8 DN YTR2*2.U8
+dYf3U8 DN YTR3*2.U8
+dYf4U8 DN YTR4*2.U8
+dYf5U8 DN YTR5*2.U8
+dYf6U8 DN YTR6*2.U8
+dYf7U8 DN YTR7*2.U8
+
+ ;//
+ ;// Do saturation if outsize is other than S16
+ ;//
+
+ IF ("$outsize"="u8")
+ ;// Output range [0-255]
+ VQMOVN dYf0U8, qYf0
+ VQMOVN dYf1U8, qYf1
+ VQMOVN dYf2U8, qYf2
+ VQMOVN dYf3U8, qYf3
+ VQMOVN dYf4U8, qYf4
+ VQMOVN dYf5U8, qYf5
+ VQMOVN dYf6U8, qYf6
+ VQMOVN dYf7U8, qYf7
+ ENDIF
+
+ IF ("$outsize"="s9")
+ ;// Output range [-256 to +255]
+ VQSHL qYf0, qYf0, #16-9
+ VQSHL qYf1, qYf1, #16-9
+ VQSHL qYf2, qYf2, #16-9
+ VQSHL qYf3, qYf3, #16-9
+ VQSHL qYf4, qYf4, #16-9
+ VQSHL qYf5, qYf5, #16-9
+ VQSHL qYf6, qYf6, #16-9
+ VQSHL qYf7, qYf7, #16-9
+
+ VSHR qYf0, qYf0, #16-9
+ VSHR qYf1, qYf1, #16-9
+ VSHR qYf2, qYf2, #16-9
+ VSHR qYf3, qYf3, #16-9
+ VSHR qYf4, qYf4, #16-9
+ VSHR qYf5, qYf5, #16-9
+ VSHR qYf6, qYf6, #16-9
+ VSHR qYf7, qYf7, #16-9
+ ENDIF
+
+ ;// Store output depending on the Stride size
+ IF "$stride"="s"
+ VST1 qYf0, [pDest @64], Stride
+ VST1 qYf1, [pDest @64], Stride
+ VST1 qYf2, [pDest @64], Stride
+ VST1 qYf3, [pDest @64], Stride
+ VST1 qYf4, [pDest @64], Stride
+ VST1 qYf5, [pDest @64], Stride
+ VST1 qYf6, [pDest @64], Stride
+ VST1 qYf7, [pDest @64]
+ ELSE
+ IF ("$outsize"="u8")
+ VST1 dYf0U8, [pDest @64], #8
+ VST1 dYf1U8, [pDest @64], #8
+ VST1 dYf2U8, [pDest @64], #8
+ VST1 dYf3U8, [pDest @64], #8
+ VST1 dYf4U8, [pDest @64], #8
+ VST1 dYf5U8, [pDest @64], #8
+ VST1 dYf6U8, [pDest @64], #8
+ VST1 dYf7U8, [pDest @64]
+ ELSE
+ ;// ("$outsize"="s9") or ("$outsize"="s16")
+ VST1 qYf0, [pDest @64], #16
+ VST1 qYf1, [pDest @64], #16
+ VST1 qYf2, [pDest @64], #16
+ VST1 qYf3, [pDest @64], #16
+ VST1 qYf4, [pDest @64], #16
+ VST1 qYf5, [pDest @64], #16
+ VST1 qYf6, [pDest @64], #16
+ VST1 qYf7, [pDest @64]
+ ENDIF
+
+ ENDIF
+
+
+
+ ENDIF ;// CortexA8
+
+
+
+ MEND
+
+ ;// Scale TWO input rows with TWO rows of 16 bit scale values
+ ;//
+ ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row
+ ;// input (Eight input values) with one row of scale values. Also
+ ;// Loads next scale values from pScale, if $LastRow flag is not set.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $dAlo - Input D register with first four S16 values of row n
+ ;// $dAhi - Input D register with next four S16 values of row n
+ ;// $dBlo - Input D register with first four S16 values of row n+1
+ ;// $dBhi - Input D register with next four S16 values of row n+1
+ ;// pScale - Pointer to next row of scale values
+ ;// qT0lo - Temporary scratch register
+ ;// qT0hi - Temporary scratch register
+ ;// qT1lo - Temporary scratch register
+ ;// qT1hi - Temporary scratch register
+ ;// dScale1lo - Scale value of row n
+ ;// dScale1hi - Scale value of row n
+ ;// dScale2lo - Scale value of row n+1
+ ;// dScale2hi - Scale value of row n+1
+ ;//
+ ;// Input Flag
+ ;//
+ ;// $LastRow - Flag to indicate whether current row is last row
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $dAlo - Scaled output values (first four S16 of row n)
+ ;// $dAhi - Scaled output values (next four S16 of row n)
+ ;// $dBlo - Scaled output values (first four S16 of row n+1)
+ ;// $dBhi - Scaled output values (next four S16 of row n+1)
+ ;// qScale1 - Scale values for next row
+ ;// qScale2 - Scale values for next row+1
+ ;// pScale - Pointer to next row of scale values
+ ;//
+ MACRO
+ M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow
+ VMULL qT0lo, $dAlo, dScale1lo
+ VMULL qT0hi, $dAhi, dScale1hi
+ VMULL qT1lo, $dBlo, dScale2lo
+ VMULL qT1hi, $dBhi, dScale2hi
+ IF "$LastRow"="0"
+ VLD1 qScale1, [pScale], #16 ;// Load scale for row n+1
+ VLD1 qScale2, [pScale], #16 ;// Load scale for row n+2
+ ENDIF
+ VQRSHRN $dAlo, qT0lo, #12
+ VQRSHRN $dAhi, qT0hi, #12
+ VQRSHRN $dBlo, qT1lo, #12
+ VQRSHRN $dBhi, qT1hi, #12
+ MEND
+
+ ;// Scale 8x8 block input values with 16 bit scale values
+ ;//
+ ;// This macro is used to pre-scale block of 8x8 input.
+ ;// This also do the Ist stage transformations of IDCT.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// dXjnlo - n th input D register with first four S16 values
+ ;// dXjnhi - n th input D register with next four S16 values
+ ;// qXjn - n th input Q register with eight S16 values
+ ;// pScale - Pointer to scale values
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// qXin - n th output Q register with eight S16 output values of 1st stage
+ ;//
+ MACRO
+ M_IDCT_PRESCALE16
+ VLD1 qScale1, [pScale], #16 ;// Load Pre scale for row 0
+ VLD1 qScale2, [pScale], #16 ;// Load Pre scale for row 0
+ M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0 ;// Pre scale row 0 & 1
+ M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0
+ M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0
+ M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1
+ VHADD qXi5, qXj1, qXj7 ;// (j1+j7)/2
+ VSUB qXi6, qXj1, qXj7 ;// j1-j7
+ LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants
+ VHADD qXi3, qXj2, qXj6 ;// (j2+j6)/2
+ VSUB qXi2, qXj2, qXj6 ;// j2-j6
+ VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants
+ VHADD qXi7, qXj5, qXj3 ;// (j5+j3)/2
+ VSUB qXi4, qXj5, qXj3 ;// j5-j3
+ MEND
+
+
+ ;// Scale 8x8 block input values with 32 bit scale values
+ ;//
+ ;// This macro is used to pre-scale block of 8x8 input.
+ ;// This also do the Ist stage transformations of IDCT.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// dXjnlo - n th input D register with first four S16 values
+ ;// dXjnhi - n th input D register with next four S16 values
+ ;// qXjn - n th input Q register with eight S16 values
+ ;// pScale - Pointer to 32bit scale values in Q23 format
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// dXinlo - n th output D register with first four S16 output values of 1st stage
+ ;// dXinhi - n th output D register with next four S16 output values of 1st stage
+ ;//
+ MACRO
+ M_IDCT_PRESCALE32
+qScale0lo QN 0.S32
+qScale0hi QN 1.S32
+qScale1lo QN 2.S32
+qScale1hi QN 3.S32
+qScale2lo QN qScale1lo
+qScale2hi QN qScale1hi
+qScale3lo QN qScale1lo
+qScale3hi QN qScale1hi
+qScale4lo QN qScale1lo
+qScale4hi QN qScale1hi
+qScale5lo QN qScale0lo
+qScale5hi QN qScale0hi
+qScale6lo QN qScale0lo
+qScale6hi QN qScale0hi
+qScale7lo QN qScale0lo
+qScale7hi QN qScale0hi
+
+qSrc0lo QN 4.S32
+qSrc0hi QN 5.S32
+qSrc1lo QN 6.S32
+qSrc1hi QN Src4.S32
+qSrc2lo QN qSrc0lo
+qSrc2hi QN qSrc0hi
+qSrc3lo QN qSrc0lo
+qSrc3hi QN qSrc0hi
+qSrc4lo QN qSrc0lo
+qSrc4hi QN qSrc0hi
+qSrc5lo QN qSrc1lo
+qSrc5hi QN qSrc1hi
+qSrc6lo QN qSrc1lo
+qSrc6hi QN qSrc1hi
+qSrc7lo QN qSrc0lo
+qSrc7hi QN qSrc0hi
+
+qRes17lo QN qScale0lo
+qRes17hi QN qScale0hi
+qRes26lo QN qScale0lo
+qRes26hi QN qScale0hi
+qRes53lo QN qScale0lo
+qRes53hi QN qScale0hi
+
+ ADD pTemp, pScale, #4*8*7 ;// Address of pScale[7]
+
+ ;// Row 0
+ VLD1 {qScale0lo, qScale0hi}, [pScale]!
+ VSHLL qSrc0lo, dXj0lo, #(12-1)
+ VSHLL qSrc0hi, dXj0hi, #(12-1)
+ VLD1 {qScale1lo, qScale1hi}, [pScale]!
+ VQRDMULH qSrc0lo, qScale0lo, qSrc0lo
+ VQRDMULH qSrc0hi, qScale0hi, qSrc0hi
+ VLD1 {qScale7lo, qScale7hi}, [pTemp]!
+ VSHLL qSrc1lo, dXj1lo, #(12-1)
+ VSHLL qSrc1hi, dXj1hi, #(12-1)
+ VMOVN dXi0lo, qSrc0lo ;// Output i0
+ VMOVN dXi0hi, qSrc0hi
+ VSHLL qSrc7lo, dXj7lo, #(12-1)
+ VSHLL qSrc7hi, dXj7hi, #(12-1)
+ SUB pTemp, pTemp, #((16*2)+(4*8*1))
+ VQRDMULH qSrc1lo, qScale1lo, qSrc1lo
+ VQRDMULH qSrc1hi, qScale1hi, qSrc1hi
+ VQRDMULH qSrc7lo, qScale7lo, qSrc7lo
+ VQRDMULH qSrc7hi, qScale7hi, qSrc7hi
+ VLD1 {qScale2lo, qScale2hi}, [pScale]!
+
+ ;// Row 1 & 7
+ VHADD qRes17lo, qSrc1lo, qSrc7lo ;// (j1+j7)/2
+ VHADD qRes17hi, qSrc1hi, qSrc7hi ;// (j1+j7)/2
+ VMOVN dXi5lo, qRes17lo ;// Output i5
+ VMOVN dXi5hi, qRes17hi
+ VSUB qRes17lo, qSrc1lo, qSrc7lo ;// j1-j7
+ VSUB qRes17hi, qSrc1hi, qSrc7hi ;// j1-j7
+ VMOVN dXi6lo, qRes17lo ;// Output i6
+ VMOVN dXi6hi, qRes17hi
+ VSHLL qSrc2lo, dXj2lo, #(12-1)
+ VSHLL qSrc2hi, dXj2hi, #(12-1)
+ VLD1 {qScale6lo, qScale6hi}, [pTemp]!
+ VSHLL qSrc6lo, dXj6lo, #(12-1)
+ VSHLL qSrc6hi, dXj6hi, #(12-1)
+ SUB pTemp, pTemp, #((16*2)+(4*8*1))
+ VQRDMULH qSrc2lo, qScale2lo, qSrc2lo
+ VQRDMULH qSrc2hi, qScale2hi, qSrc2hi
+ VQRDMULH qSrc6lo, qScale6lo, qSrc6lo
+ VQRDMULH qSrc6hi, qScale6hi, qSrc6hi
+ VLD1 {qScale3lo, qScale3hi}, [pScale]!
+
+ ;// Row 2 & 6
+ VHADD qRes26lo, qSrc2lo, qSrc6lo ;// (j2+j6)/2
+ VHADD qRes26hi, qSrc2hi, qSrc6hi ;// (j2+j6)/2
+ VMOVN dXi3lo, qRes26lo ;// Output i3
+ VMOVN dXi3hi, qRes26hi
+ VSUB qRes26lo, qSrc2lo, qSrc6lo ;// j2-j6
+ VSUB qRes26hi, qSrc2hi, qSrc6hi ;// j2-j6
+ VMOVN dXi2lo, qRes26lo ;// Output i2
+ VMOVN dXi2hi, qRes26hi
+ VSHLL qSrc3lo, dXj3lo, #(12-1)
+ VSHLL qSrc3hi, dXj3hi, #(12-1)
+ VLD1 {qScale5lo, qScale5hi}, [pTemp]!
+ VSHLL qSrc5lo, dXj5lo, #(12-1)
+ VSHLL qSrc5hi, dXj5hi, #(12-1)
+ VQRDMULH qSrc3lo, qScale3lo, qSrc3lo
+ VQRDMULH qSrc3hi, qScale3hi, qSrc3hi
+ VQRDMULH qSrc5lo, qScale5lo, qSrc5lo
+ VQRDMULH qSrc5hi, qScale5hi, qSrc5hi
+
+ ;// Row 3 & 5
+ VHADD qRes53lo, qSrc5lo, qSrc3lo ;// (j5+j3)/2
+ VHADD qRes53hi, qSrc5hi, qSrc3hi ;// (j5+j3)/2
+ SUB pSrc, pSrc, #16*2*2
+ VMOVN dXi7lo, qRes53lo ;// Output i7
+ VMOVN dXi7hi, qRes53hi
+ VSUB qRes53lo, qSrc5lo, qSrc3lo ;// j5-j3
+ VSUB qRes53hi, qSrc5hi, qSrc3hi ;// j5-j3
+ VLD1 qXj4, [pSrc @64]
+ VMOVN dXi4lo, qRes53lo ;// Output i4
+ VMOVN dXi4hi, qRes53hi
+ VSHLL qSrc4lo, dXj4lo, #(12-1)
+ VSHLL qSrc4hi, dXj4hi, #(12-1)
+ VLD1 {qScale4lo, qScale4hi}, [pScale]
+ LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants
+ VQRDMULH qSrc4lo, qScale4lo, qSrc4lo
+ VQRDMULH qSrc4hi, qScale4hi, qSrc4hi
+ VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants
+ ;// Row 4
+ VMOVN dXi1lo, qSrc4lo ;// Output i1
+ VMOVN dXi1hi, qSrc4hi
+
+ MEND
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h
new file mode 100644
index 0000000..b5da9dc
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h
@@ -0,0 +1,27 @@
+/**
+ *
+ * File Name: armCOMM_MaskTable.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Mask Table to mask the end of array
+ */
+
+
+
+#ifndef _ARMCOMM_MASKTABLE_H_
+#define _ARMCOMM_MASKTABLE_H_
+
+#define MaskTableSize 72
+
+/* Mask table */
+
+extern const OMX_U16 armCOMM_qMaskTable16[MaskTableSize];
+extern const OMX_U8 armCOMM_qMaskTable8[MaskTableSize];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h
new file mode 100644
index 0000000..13e5b2b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h
@@ -0,0 +1,43 @@
+/* Guard the header against multiple inclusion. */
+#ifndef __ARM_COMM_VERSION_H__
+#define __ARM_COMM_VERSION_H__
+
+
+/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */
+#define OMX_VERSION 102
+
+/* We need to define these macros in order to convert a #define number into a #define string. */
+#define ARM_QUOTE(a) #a
+#define ARM_INDIRECT(A) ARM_QUOTE(A)
+
+/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */
+#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION)
+
+
+/* Define this in order to turn on ARM version/release/build strings in each domain */
+#define ARM_INCLUDE_VERSION_DESCRIPTIONS
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+ extern const char * const omxAC_VersionDescription;
+ extern const char * const omxIC_VersionDescription;
+ extern const char * const omxIP_VersionDescription;
+ extern const char * const omxSP_VersionDescription;
+ extern const char * const omxVC_VersionDescription;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
+
+
+/* The following entries should be automatically updated by the release script */
+/* They are used in the ARM version strings defined for each domain. */
+
+/* The release tag associated with this release of the library. - used for source and object releases */
+#define OMX_ARM_RELEASE_TAG "r0p0-00bet1"
+
+/* The ARM architecture used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_ARCHITECTURE "ARM Architecture V6"
+
+/* The ARM Toolchain used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_TOOLCHAIN "ARM RVCT 3.1"
+
+
+#endif /* __ARM_COMM_VERSION_H__ */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h
new file mode 100644
index 0000000..2df1fc8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h
@@ -0,0 +1,1154 @@
+;//
+;//
+;// File Name: armCOMM_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// ARM optimized OpenMAX common header file
+;//
+
+;// Protect against multiple inclusion
+ IF :LNOT::DEF:ARMCOMM_S_H
+ GBLL ARMCOMM_S_H
+
+ REQUIRE8 ;// Requires 8-byte stack alignment
+ PRESERVE8 ;// Preserves 8-byte stack alignment
+
+ GBLL ARM_ERRORCHECK
+ARM_ERRORCHECK SETL {FALSE}
+
+;// Globals
+
+ GBLS _RRegList ;// R saved register list
+ GBLS _DRegList ;// D saved register list
+ GBLS _Variant ;// Selected processor variant
+ GBLS _CPU ;// CPU name
+ GBLS _Struct ;// Structure name
+
+ GBLL _InFunc ;// Inside function assembly flag
+ GBLL _SwLong ;// Long switch flag
+
+ GBLA _RBytes ;// Number of register bytes on stack
+ GBLA _SBytes ;// Number of scratch bytes on stack
+ GBLA _ABytes ;// Stack offset of next argument
+ GBLA _Workspace ;// Stack offset of scratch workspace
+ GBLA _F ;// Function number
+ GBLA _StOff ;// Struct offset
+ GBLA _SwNum ;// Switch number
+ GBLS _32 ;// Suffix for 32 byte alignmnet
+ GBLS _16 ;// Suffix for 16 byte alignmnet
+
+_InFunc SETL {FALSE}
+_SBytes SETA 0
+_F SETA 0
+_SwNum SETA 0
+_32 SETS "ALIGN32"
+_16 SETS "ALIGN16"
+
+;/////////////////////////////////////////////////////////
+;// Override the tools settings of the CPU if the #define
+;// USECPU is set, otherwise use the CPU defined by the
+;// assembler settings.
+;/////////////////////////////////////////////////////////
+
+ IF :DEF: OVERRIDECPU
+_CPU SETS OVERRIDECPU
+ ELSE
+_CPU SETS {CPU}
+ ENDIF
+
+
+
+;/////////////////////////////////////////////////////////
+;// Work out which code to build
+;/////////////////////////////////////////////////////////
+
+ IF :DEF:ARM1136JS:LOR::DEF:CortexA8:LOR::DEF:ARM_GENERIC
+ INFO 1,"Please switch to using M_VARIANTS"
+ ENDIF
+
+ ;// Define and reset all officially recongnised variants
+ MACRO
+ _M_DEF_VARIANTS
+ _M_DEF_VARIANT ARM926EJS
+ _M_DEF_VARIANT ARM1136JS
+ _M_DEF_VARIANT ARM1136JS_U
+ _M_DEF_VARIANT CortexA8
+ _M_DEF_VARIANT ARM7TDMI
+ MEND
+
+ MACRO
+ _M_DEF_VARIANT $var
+ GBLL $var
+ GBLL _ok$var
+$var SETL {FALSE}
+ MEND
+
+
+ ;// Variant declaration
+ ;//
+ ;// Define a list of code variants supported by this
+ ;// source file. This macro then chooses the most
+ ;// appropriate variant to build for the currently configured
+ ;// core.
+ ;//
+ MACRO
+ M_VARIANTS $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
+ ;// Set to TRUE variants that are supported
+ _M_DEF_VARIANTS
+ _M_VARIANT $v0
+ _M_VARIANT $v1
+ _M_VARIANT $v2
+ _M_VARIANT $v3
+ _M_VARIANT $v4
+ _M_VARIANT $v5
+ _M_VARIANT $v6
+ _M_VARIANT $v7
+
+ ;// Look for first available variant to match a CPU
+ ;// _M_TRY cpu, variant fall back list
+_Variant SETS ""
+ _M_TRY ARM926EJ-S, ARM926EJS
+ _M_TRY ARM1176JZ-S, ARM1136JS
+ _M_TRY ARM1176JZF-S, ARM1136JS
+ _M_TRY ARM1156T2-S, ARM1136JS
+ _M_TRY ARM1156T2F-S, ARM1136JS
+ _M_TRY ARM1136J-S, ARM1136JS
+ _M_TRY ARM1136JF-S, ARM1136JS
+ _M_TRY MPCore, ARM1136JS
+ _M_TRY Cortex-A8, CortexA8, ARM1136JS
+ _M_TRY Cortex-R4, ARM1136JS
+ _M_TRY ARM7TDMI
+
+ ;// Select the correct variant
+ _M_DEF_VARIANTS
+ IF _Variant=""
+ INFO 1, "No match found for CPU '$_CPU'"
+ ELSE
+$_Variant SETL {TRUE}
+ ENDIF
+ MEND
+
+ ;// Register a variant as available
+ MACRO
+ _M_VARIANT $var
+ IF "$var"=""
+ MEXIT
+ ENDIF
+ IF :LNOT::DEF:_ok$var
+ INFO 1, "Unrecognized variant '$var'"
+ ENDIF
+$var SETL {TRUE}
+ MEND
+
+ ;// For a given CPU, see if any of the variants supporting
+ ;// this CPU are available. The first available variant is
+ ;// chosen
+ MACRO
+ _M_TRY $cpu, $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
+ IF "$cpu"<>_CPU
+ MEXIT
+ ENDIF
+ _M_TRY1 $v0
+ _M_TRY1 $v1
+ _M_TRY1 $v2
+ _M_TRY1 $v3
+ _M_TRY1 $v4
+ _M_TRY1 $v5
+ _M_TRY1 $v6
+ _M_TRY1 $v7
+ ;// Check a match was found
+ IF _Variant=""
+ INFO 1, "No variant match found for CPU '$_CPU'"
+ ENDIF
+ MEND
+
+ MACRO
+ _M_TRY1 $var
+ IF "$var"=""
+ MEXIT
+ ENDIF
+ IF (_Variant=""):LAND:$var
+_Variant SETS "$var"
+ ENDIF
+ MEND
+
+;////////////////////////////////////////////////////////
+;// Structure definition
+;////////////////////////////////////////////////////////
+
+ ;// Declare a structure of given name
+ MACRO
+ M_STRUCT $sname
+_Struct SETS "$sname"
+_StOff SETA 0
+ MEND
+
+ ;// Declare a structure field
+ ;// The field is called $sname_$fname
+ ;// $size = the size of each entry, must be power of 2
+ ;// $number = (if provided) the number of entries for an array
+ MACRO
+ M_FIELD $fname, $size, $number
+ IF (_StOff:AND:($size-1))!=0
+_StOff SETA _StOff + ($size - (_StOff:AND:($size-1)))
+ ENDIF
+$_Struct._$fname EQU _StOff
+ IF "$number"<>""
+_StOff SETA _StOff + $size*$number
+ ELSE
+_StOff SETA _StOff + $size
+ ENDIF
+ MEND
+
+
+ MACRO
+ M_ENDSTRUCT
+sizeof_$_Struct EQU _StOff
+_Struct SETS ""
+ MEND
+
+;//////////////////////////////////////////////////////////
+;// Switch and table macros
+;//////////////////////////////////////////////////////////
+
+ ;// Start a relative switch table with register to switch on
+ ;//
+ ;// $v = the register to switch on
+ ;// $s = if specified must be "L" to indicate long
+ ;// this allows a greater range to the case code
+ MACRO
+ M_SWITCH $v, $s
+ ASSERT "$s"="":LOR:"$s"="L"
+_SwLong SETL {FALSE}
+ IF "$s"="L"
+_SwLong SETL {TRUE}
+ ENDIF
+_SwNum SETA _SwNum+1
+ IF {CONFIG}=16
+ ;// Thumb
+ IF _SwLong
+ TBH [pc, $v, LSL#1]
+ ELSE
+ TBB [pc, $v]
+ ENDIF
+_Switch$_SwNum
+ ELSE
+ ;// ARM
+ ADD pc, pc, $v, LSL #2
+ NOP
+ ENDIF
+ MEND
+
+ ;// Add a case to the switch statement
+ MACRO
+ M_CASE $label
+ IF {CONFIG}=16
+ ;// Thumb
+ IF _SwLong
+ DCW ($label - _Switch$_SwNum)/2
+ ELSE
+ DCB ($label - _Switch$_SwNum)/2
+ ENDIF
+ ELSE
+ ;// ARM
+ B $label
+ ENDIF
+ MEND
+
+ ;// End of switch statement
+ MACRO
+ M_ENDSWITCH
+ ALIGN 2
+ MEND
+
+
+;////////////////////////////////////////////////////////
+;// Data area allocation
+;////////////////////////////////////////////////////////
+
+ ;// Constant table allocator macro
+ ;//
+ ;// Creates a new section for each constant table
+ ;// $name is symbol through which the table can be accessed.
+ ;// $align is the optional alignment of the table, log2 of
+ ;// the byte alignment - $align=4 is 16 byte aligned
+ MACRO
+ M_TABLE $name, $align
+ ASSERT :LNOT:_InFunc
+ IF "$align"=""
+ AREA |.constdata|, READONLY, DATA
+ ELSE
+ ;// AREAs inherit the alignment of the first declaration.
+ ;// Therefore for each alignment size we must have an area
+ ;// of a different name.
+ AREA constdata_a$align, READONLY, DATA, ALIGN=$align
+
+ ;// We also force alignment incase we are tagging onto
+ ;// an already started area.
+ ALIGN (1<<$align)
+ ENDIF
+$name
+ MEND
+
+;/////////////////////////////////////////////////////
+;// Macros to allocate space on the stack
+;//
+;// These all assume that the stack is 8-byte aligned
+;// at entry to the function, which means that the
+;// 32-byte alignment macro needs to work in a
+;// bit more of a special way...
+;/////////////////////////////////////////////////////
+
+
+
+
+ ;// Allocate 1-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC1 $name, $size
+ ASSERT :LNOT:_InFunc
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+ ;// Allocate 2-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC2 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:1)!=0
+_SBytes SETA _SBytes + (2 - (_SBytes:AND:1))
+ ENDIF
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+ ;// Allocate 4-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC4 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:3)!=0
+_SBytes SETA _SBytes + (4 - (_SBytes:AND:3))
+ ENDIF
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+ ;// Allocate 8-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC8 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:7)!=0
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+
+ ;// Allocate 8-byte aligned area of name
+ ;// $name size ($size+16) bytes.
+ ;// The extra 16 bytes are later used to align the pointer to 16 bytes
+
+ MACRO
+ M_ALLOC16 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:7)!=0
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+$name$_F$_16 EQU (_SBytes + 8)
+_SBytes SETA _SBytes + ($size) + 8
+ MEND
+
+ ;// Allocate 8-byte aligned area of name
+ ;// $name size ($size+32) bytes.
+ ;// The extra 32 bytes are later used to align the pointer to 32 bytes
+
+ MACRO
+ M_ALLOC32 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:7)!=0
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+$name$_F$_32 EQU (_SBytes + 24)
+_SBytes SETA _SBytes + ($size) + 24
+ MEND
+
+
+
+
+ ;// Argument Declaration Macro
+ ;//
+ ;// Allocate an argument name $name
+ ;// size $size bytes
+ MACRO
+ M_ARG $name, $size
+ ASSERT _InFunc
+$name$_F EQU _ABytes
+_ABytes SETA _ABytes + ($size)
+ MEND
+
+;///////////////////////////////////////////////
+;// Macros to access stacked variables
+;///////////////////////////////////////////////
+
+ ;// Macro to perform a data processing operation
+ ;// with a constant second operand
+ MACRO
+ _M_OPC $op,$rd,$rn,$const
+ LCLA _sh
+ LCLA _cst
+_sh SETA 0
+_cst SETA $const
+ IF _cst=0
+ $op $rd, $rn, #_cst
+ MEXIT
+ ENDIF
+ WHILE (_cst:AND:3)=0
+_cst SETA _cst>>2
+_sh SETA _sh+2
+ WEND
+ $op $rd, $rn, #(_cst:AND:0x000000FF)<<_sh
+ IF _cst>=256
+ $op $rd, $rd, #(_cst:AND:0xFFFFFF00)<<_sh
+ ENDIF
+ MEND
+
+ ;// Macro to perform a data access operation
+ ;// Such as LDR or STR
+ ;// The addressing mode is modified such that
+ ;// 1. If no address is given then the name is taken
+ ;// as a stack offset
+ ;// 2. If the addressing mode is not available for the
+ ;// state being assembled for (eg Thumb) then a suitable
+ ;// addressing mode is substituted.
+ ;//
+ ;// On Entry:
+ ;// $i = Instruction to perform (eg "LDRB")
+ ;// $a = Required byte alignment
+ ;// $r = Register(s) to transfer (eg "r1")
+ ;// $a0,$a1,$a2. Addressing mode and condition. One of:
+ ;// label {,cc}
+ ;// [base] {,,,cc}
+ ;// [base, offset]{!} {,,cc}
+ ;// [base, offset, shift]{!} {,cc}
+ ;// [base], offset {,,cc}
+ ;// [base], offset, shift {,cc}
+ MACRO
+ _M_DATA $i,$a,$r,$a0,$a1,$a2,$a3
+ IF "$a0":LEFT:1="["
+ IF "$a1"=""
+ $i$a3 $r, $a0
+ ELSE
+ IF "$a0":RIGHT:1="]"
+ IF "$a2"=""
+ _M_POSTIND $i$a3, "$r", $a0, $a1
+ ELSE
+ _M_POSTIND $i$a3, "$r", $a0, "$a1,$a2"
+ ENDIF
+ ELSE
+ IF "$a2"=""
+ _M_PREIND $i$a3, "$r", $a0, $a1
+ ELSE
+ _M_PREIND $i$a3, "$r", $a0, "$a1,$a2"
+ ENDIF
+ ENDIF
+ ENDIF
+ ELSE
+ LCLA _Offset
+_Offset SETA _Workspace + $a0$_F
+ ASSERT (_Offset:AND:($a-1))=0
+ $i$a1 $r, [sp, #_Offset]
+ ENDIF
+ MEND
+
+ ;// Handle post indexed load/stores
+ ;// op reg, [base], offset
+ MACRO
+ _M_POSTIND $i,$r,$a0,$a1
+ LCLS _base
+ LCLS _offset
+ IF {CONFIG}=16 ;// Thumb
+_base SETS ("$a0":LEFT:(:LEN:"$a0"-1)):RIGHT:(:LEN:"$a0"-2) ;// remove []
+_offset SETS "$a1"
+ IF _offset:LEFT:1="+"
+_offset SETS _offset:RIGHT:(:LEN:_offset-1)
+ ENDIF
+ $i $r, $a0
+ IF _offset:LEFT:1="-"
+_offset SETS _offset:RIGHT:(:LEN:_offset-1)
+ SUB $_base, $_base, $_offset
+ ELSE
+ ADD $_base, $_base, $_offset
+ ENDIF
+ ELSE ;// ARM
+ $i $r, $a0, $a1
+ ENDIF
+ MEND
+
+ ;// Handle pre indexed load/store
+ ;// op reg, [base, offset]{!}
+ MACRO
+ _M_PREIND $i,$r,$a0,$a1
+ LCLS _base
+ LCLS _offset
+ IF ({CONFIG}=16):LAND:(("$a1":RIGHT:2)="]!")
+_base SETS "$a0":RIGHT:(:LEN:("$a0")-1)
+_offset SETS "$a1":LEFT:(:LEN:("$a1")-2)
+ $i $r, [$_base, $_offset]
+ ADD $_base, $_base, $_offset
+ ELSE
+ $i $r, $a0, $a1
+ ENDIF
+ MEND
+
+ ;// Load unsigned byte from stack
+ MACRO
+ M_LDRB $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRB",1,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load signed byte from stack
+ MACRO
+ M_LDRSB $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRSB",1,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store byte to stack
+ MACRO
+ M_STRB $r,$a0,$a1,$a2,$a3
+ _M_DATA "STRB",1,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load unsigned half word from stack
+ MACRO
+ M_LDRH $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRH",2,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load signed half word from stack
+ MACRO
+ M_LDRSH $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRSH",2,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store half word to stack
+ MACRO
+ M_STRH $r,$a0,$a1,$a2,$a3
+ _M_DATA "STRH",2,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load word from stack
+ MACRO
+ M_LDR $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDR",4,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store word to stack
+ MACRO
+ M_STR $r,$a0,$a1,$a2,$a3
+ _M_DATA "STR",4,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load double word from stack
+ MACRO
+ M_LDRD $r0,$r1,$a0,$a1,$a2,$a3
+ _M_DATA "LDRD",8,"$r0,$r1",$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store double word to stack
+ MACRO
+ M_STRD $r0,$r1,$a0,$a1,$a2,$a3
+ _M_DATA "STRD",8,"$r0,$r1",$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Get absolute address of stack allocated location
+ MACRO
+ M_ADR $a, $b, $cc
+ _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F)
+ MEND
+
+ ;// Get absolute address of stack allocated location and align the address to 16 bytes
+ MACRO
+ M_ADR16 $a, $b, $cc
+ _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_16)
+
+ ;// Now align $a to 16 bytes
+ BIC$cc $a,$a,#0x0F
+ MEND
+
+ ;// Get absolute address of stack allocated location and align the address to 32 bytes
+ MACRO
+ M_ADR32 $a, $b, $cc
+ _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_32)
+
+ ;// Now align $a to 32 bytes
+ BIC$cc $a,$a,#0x1F
+ MEND
+
+;//////////////////////////////////////////////////////////
+;// Function header and footer macros
+;//////////////////////////////////////////////////////////
+
+ ;// Function Header Macro
+ ;// Generates the function prologue
+ ;// Note that functions should all be "stack-moves-once"
+ ;// The FNSTART and FNEND macros should be the only places
+ ;// where the stack moves.
+ ;//
+ ;// $name = function name
+ ;// $rreg = "" don't stack any registers
+ ;// "lr" stack "lr" only
+ ;// "rN" stack registers "r4-rN,lr"
+ ;// $dreg = "" don't stack any D registers
+ ;// "dN" stack registers "d8-dN"
+ ;//
+ ;// Note: ARM Archicture procedure call standard AAPCS
+ ;// states that r4-r11, sp, d8-d15 must be preserved by
+ ;// a compliant function.
+ MACRO
+ M_START $name, $rreg, $dreg
+ ASSERT :LNOT:_InFunc
+ ASSERT "$name"!=""
+_InFunc SETL {TRUE}
+_RBytes SETA 0
+_Workspace SETA 0
+
+ ;// Create an area for the function
+ AREA |.text|, CODE
+ EXPORT $name
+$name FUNCTION
+
+ ;// Save R registers
+ _M_GETRREGLIST $rreg
+ IF _RRegList<>""
+ STMFD sp!, {$_RRegList, lr}
+ ENDIF
+
+ ;// Save D registers
+ _M_GETDREGLIST $dreg
+ IF _DRegList<>""
+ VSTMFD sp!, {$_DRegList}
+ ENDIF
+
+
+ ;// Ensure size claimed on stack is 8-byte aligned
+ IF ((_SBytes:AND:7)!=0)
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+
+ IF (_SBytes!=0)
+ _M_OPC SUB, sp, sp, _SBytes
+ ENDIF
+
+
+_ABytes SETA _SBytes + _RBytes - _Workspace
+
+
+ ;// Print function name if debug enabled
+ M_PRINTF "$name\n",
+ MEND
+
+ ;// Work out a list of R saved registers
+ MACRO
+ _M_GETRREGLIST $rreg
+ IF "$rreg"=""
+_RRegList SETS ""
+ MEXIT
+ ENDIF
+ IF "$rreg"="lr":LOR:"$rreg"="r4"
+_RRegList SETS "r4"
+_RBytes SETA _RBytes+8
+ MEXIT
+ ENDIF
+ IF "$rreg"="r5":LOR:"$rreg"="r6"
+_RRegList SETS "r4-r6"
+_RBytes SETA _RBytes+16
+ MEXIT
+ ENDIF
+ IF "$rreg"="r7":LOR:"$rreg"="r8"
+_RRegList SETS "r4-r8"
+_RBytes SETA _RBytes+24
+ MEXIT
+ ENDIF
+ IF "$rreg"="r9":LOR:"$rreg"="r10"
+_RRegList SETS "r4-r10"
+_RBytes SETA _RBytes+32
+ MEXIT
+ ENDIF
+ IF "$rreg"="r11":LOR:"$rreg"="r12"
+_RRegList SETS "r4-r12"
+_RBytes SETA _RBytes+40
+ MEXIT
+ ENDIF
+ INFO 1, "Unrecognized saved r register limit '$rreg'"
+ MEND
+
+ ;// Work out a list of D saved registers
+ MACRO
+ _M_GETDREGLIST $dreg
+ IF "$dreg"=""
+_DRegList SETS ""
+ MEXIT
+ ENDIF
+ IF "$dreg"="d8"
+_DRegList SETS "d8"
+_RBytes SETA _RBytes+8
+ MEXIT
+ ENDIF
+ IF "$dreg"="d9"
+_DRegList SETS "d8-d9"
+_RBytes SETA _RBytes+16
+ MEXIT
+ ENDIF
+ IF "$dreg"="d10"
+_DRegList SETS "d8-d10"
+_RBytes SETA _RBytes+24
+ MEXIT
+ ENDIF
+ IF "$dreg"="d11"
+_DRegList SETS "d8-d11"
+_RBytes SETA _RBytes+32
+ MEXIT
+ ENDIF
+ IF "$dreg"="d12"
+_DRegList SETS "d8-d12"
+_RBytes SETA _RBytes+40
+ MEXIT
+ ENDIF
+ IF "$dreg"="d13"
+_DRegList SETS "d8-d13"
+_RBytes SETA _RBytes+48
+ MEXIT
+ ENDIF
+ IF "$dreg"="d14"
+_DRegList SETS "d8-d14"
+_RBytes SETA _RBytes+56
+ MEXIT
+ ENDIF
+ IF "$dreg"="d15"
+_DRegList SETS "d8-d15"
+_RBytes SETA _RBytes+64
+ MEXIT
+ ENDIF
+ INFO 1, "Unrecognized saved d register limit '$dreg'"
+ MEND
+
+ ;// Produce function return instructions
+ MACRO
+ _M_RET $cc
+ IF _DRegList<>""
+ VPOP$cc {$_DRegList}
+ ENDIF
+ IF _RRegList=""
+ BX$cc lr
+ ELSE
+ LDM$cc.FD sp!, {$_RRegList, pc}
+ ENDIF
+ MEND
+
+ ;// Early Function Exit Macro
+ ;// $cc = condition to exit with
+ ;// (Example: M_EXIT EQ)
+ MACRO
+ M_EXIT $cc
+ ASSERT _InFunc
+ IF _SBytes!=0
+ ;// Restore stack frame and exit
+ B$cc _End$_F
+ ELSE
+ ;// Can return directly
+ _M_RET $cc
+ ENDIF
+ MEND
+
+ ;// Function Footer Macro
+ ;// Generates the function epilogue
+ MACRO
+ M_END
+ ASSERT _InFunc
+_InFunc SETL {FALSE}
+_End$_F
+
+ ;// Restore the stack pointer to its original value on function entry
+ IF _SBytes!=0
+ _M_OPC ADD, sp, sp, _SBytes
+ ENDIF
+ _M_RET
+ ENDFUNC
+
+ ;// Reset the global stack tracking variables back to their
+ ;// initial values, and increment the function count
+_SBytes SETA 0
+_F SETA _F+1
+ MEND
+
+
+;//==========================================================================
+;// Debug Macros
+;//==========================================================================
+
+ GBLL DEBUG_ON
+DEBUG_ON SETL {FALSE}
+ GBLL DEBUG_STALLS_ON
+DEBUG_STALLS_ON SETL {FALSE}
+
+ ;//==========================================================================
+ ;// Debug call to printf
+ ;// M_PRINTF $format, $val0, $val1, $val2
+ ;//
+ ;// Examples:
+ ;// M_PRINTF "x=%08x\n", r0
+ ;//
+ ;// This macro preserves the value of all registers including the
+ ;// flags.
+ ;//==========================================================================
+
+ MACRO
+ M_PRINTF $format, $val0, $val1, $val2
+ IF DEBUG_ON
+
+ IMPORT printf
+ LCLA nArgs
+nArgs SETA 0
+
+ ;// save registers so we don't corrupt them
+ STMFD sp!, {r0-r12, lr}
+
+ ;// Drop stack to give us some workspace
+ SUB sp, sp, #16
+
+ ;// Save registers we need to print to the stack
+ IF "$val2" <> ""
+ ASSERT "$val1" <> ""
+ STR $val2, [sp, #8]
+nArgs SETA nArgs+1
+ ENDIF
+ IF "$val1" <> ""
+ ASSERT "$val0" <> ""
+ STR $val1, [sp, #4]
+nArgs SETA nArgs+1
+ ENDIF
+ IF "$val0"<>""
+ STR $val0, [sp]
+nArgs SETA nArgs+1
+ ENDIF
+
+ ;// Now we are safe to corrupt registers
+ ADR r0, %FT00
+ IF nArgs=1
+ LDR r1, [sp]
+ ENDIF
+ IF nArgs=2
+ LDMIA sp, {r1,r2}
+ ENDIF
+ IF nArgs=3
+ LDMIA sp, {r1,r2,r3}
+ ENDIF
+
+ ;// print the values
+ MRS r4, cpsr ;// preserve flags
+ BL printf
+ MSR cpsr_f, r4 ;// restore flags
+ B %FT01
+00 ;// string to print
+ DCB "$format", 0
+ ALIGN
+01 ;// Finished
+ ADD sp, sp, #16
+ ;// Restore registers
+ LDMFD sp!, {r0-r12,lr}
+
+ ENDIF ;// DEBUG_ON
+ MEND
+
+
+ ;// Stall Simulation Macro
+ ;// Inserts a given number of NOPs for the currently
+ ;// defined platform
+ MACRO
+ M_STALL $plat1stall, $plat2stall, $plat3stall, $plat4stall, $plat5stall, $plat6stall
+ IF DEBUG_STALLS_ON
+ _M_STALL_SUB $plat1stall
+ _M_STALL_SUB $plat2stall
+ _M_STALL_SUB $plat3stall
+ _M_STALL_SUB $plat4stall
+ _M_STALL_SUB $plat5stall
+ _M_STALL_SUB $plat6stall
+ ENDIF
+ MEND
+
+ MACRO
+ _M_STALL_SUB $platstall
+ IF "$platstall"!=""
+ LCLA _pllen
+ LCLS _pl
+ LCLL _pllog
+_pllen SETA :LEN:"$platstall"
+_pl SETS "$platstall":LEFT:(_pllen - 2)
+ IF :DEF:$_pl
+ IF $_pl
+ LCLS _st
+ LCLA _stnum
+_st SETS "$platstall":RIGHT:1
+_stnum SETA $_st
+ WHILE _stnum>0
+ MOV sp, sp
+_stnum SETA _stnum - 1
+ WEND
+ ENDIF
+ ENDIF
+ ENDIF
+ MEND
+
+
+
+;//==========================================================================
+;// Endian Invarience Macros
+;//
+;// The idea behind these macros is that if an array is
+;// loaded as words then the SMUL00 macro will multiply
+;// array elements 0 regardless of the endianess of the
+;// system. For little endian SMUL00=SMULBB, for big
+;// endian SMUL00=SMULTT and similarly for other packed operations.
+;//
+;//==========================================================================
+
+ MACRO
+ LIBI4 $comli, $combi, $a, $b, $c, $d, $cc
+ IF {ENDIAN}="big"
+ $combi.$cc $a, $b, $c, $d
+ ELSE
+ $comli.$cc $a, $b, $c, $d
+ ENDIF
+ MEND
+
+ MACRO
+ LIBI3 $comli, $combi, $a, $b, $c, $cc
+ IF {ENDIAN}="big"
+ $combi.$cc $a, $b, $c
+ ELSE
+ $comli.$cc $a, $b, $c
+ ENDIF
+ MEND
+
+ ;// SMLAxy macros
+
+ MACRO
+ SMLA00 $a, $b, $c, $d, $cc
+ LIBI4 SMLABB, SMLATT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA01 $a, $b, $c, $d, $cc
+ LIBI4 SMLABT, SMLATB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA0B $a, $b, $c, $d, $cc
+ LIBI4 SMLABB, SMLATB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA0T $a, $b, $c, $d, $cc
+ LIBI4 SMLABT, SMLATT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA10 $a, $b, $c, $d, $cc
+ LIBI4 SMLATB, SMLABT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA11 $a, $b, $c, $d, $cc
+ LIBI4 SMLATT, SMLABB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA1B $a, $b, $c, $d, $cc
+ LIBI4 SMLATB, SMLABB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA1T $a, $b, $c, $d, $cc
+ LIBI4 SMLATT, SMLABT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAB0 $a, $b, $c, $d, $cc
+ LIBI4 SMLABB, SMLABT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAB1 $a, $b, $c, $d, $cc
+ LIBI4 SMLABT, SMLABB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAT0 $a, $b, $c, $d, $cc
+ LIBI4 SMLATB, SMLATT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAT1 $a, $b, $c, $d, $cc
+ LIBI4 SMLATT, SMLATB, $a, $b, $c, $d, $cc
+ MEND
+
+ ;// SMULxy macros
+
+ MACRO
+ SMUL00 $a, $b, $c, $cc
+ LIBI3 SMULBB, SMULTT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL01 $a, $b, $c, $cc
+ LIBI3 SMULBT, SMULTB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL0B $a, $b, $c, $cc
+ LIBI3 SMULBB, SMULTB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL0T $a, $b, $c, $cc
+ LIBI3 SMULBT, SMULTT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL10 $a, $b, $c, $cc
+ LIBI3 SMULTB, SMULBT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL11 $a, $b, $c, $cc
+ LIBI3 SMULTT, SMULBB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL1B $a, $b, $c, $cc
+ LIBI3 SMULTB, SMULBB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL1T $a, $b, $c, $cc
+ LIBI3 SMULTT, SMULBT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULB0 $a, $b, $c, $cc
+ LIBI3 SMULBB, SMULBT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULB1 $a, $b, $c, $cc
+ LIBI3 SMULBT, SMULBB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULT0 $a, $b, $c, $cc
+ LIBI3 SMULTB, SMULTT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULT1 $a, $b, $c, $cc
+ LIBI3 SMULTT, SMULTB, $a, $b, $c, $cc
+ MEND
+
+ ;// SMLAWx, SMULWx macros
+
+ MACRO
+ SMLAW0 $a, $b, $c, $d, $cc
+ LIBI4 SMLAWB, SMLAWT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAW1 $a, $b, $c, $d, $cc
+ LIBI4 SMLAWT, SMLAWB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMULW0 $a, $b, $c, $cc
+ LIBI3 SMULWB, SMULWT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULW1 $a, $b, $c, $cc
+ LIBI3 SMULWT, SMULWB, $a, $b, $c, $cc
+ MEND
+
+ ;// SMLALxy macros
+
+
+ MACRO
+ SMLAL00 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBB, SMLALTT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL01 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBT, SMLALTB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL0B $a, $b, $c, $d, $cc
+ LIBI4 SMLALBB, SMLALTB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL0T $a, $b, $c, $d, $cc
+ LIBI4 SMLALBT, SMLALTT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL10 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTB, SMLALBT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL11 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTT, SMLALBB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL1B $a, $b, $c, $d, $cc
+ LIBI4 SMLALTB, SMLALBB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL1T $a, $b, $c, $d, $cc
+ LIBI4 SMLALTT, SMLALBT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALB0 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBB, SMLALBT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALB1 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBT, SMLALBB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALT0 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTB, SMLALTT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALT1 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTT, SMLALTB, $a, $b, $c, $d, $cc
+ MEND
+
+ ENDIF ;// ARMCOMM_S_H
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h
new file mode 100644
index 0000000..f629f72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h
@@ -0,0 +1,274 @@
+/*
+ *
+ * File Name: armOMX_ReleaseVersion.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * This file allows a version of the OMX DL libraries to be built where some or
+ * all of the function names can be given a user specified suffix.
+ *
+ * You might want to use it where:
+ *
+ * - you want to rename a function "out of the way" so that you could replace
+ * a function with a different version (the original version would still be
+ * in the library just with a different name - so you could debug the new
+ * version by comparing it to the output of the old)
+ *
+ * - you want to rename all the functions to versions with a suffix so that
+ * you can include two versions of the library and choose between functions
+ * at runtime.
+ *
+ * e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8
+ *
+ */
+
+
+#ifndef _armOMX_H_
+#define _armOMX_H_
+
+
+/* We need to define these two macros in order to expand and concatenate the names */
+#define OMXCAT2BAR(A, B) omx ## A ## B
+#define OMXCATBAR(A, B) OMXCAT2BAR(A, B)
+
+/* Define the suffix to add to all functions - the default is no suffix */
+#define BARE_SUFFIX
+
+
+
+/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */
+#define OMXACAAC_SUFFIX BARE_SUFFIX
+#define OMXACMP3_SUFFIX BARE_SUFFIX
+#define OMXICJP_SUFFIX BARE_SUFFIX
+#define OMXIPBM_SUFFIX BARE_SUFFIX
+#define OMXIPCS_SUFFIX BARE_SUFFIX
+#define OMXIPPP_SUFFIX BARE_SUFFIX
+#define OMXSP_SUFFIX BARE_SUFFIX
+#define OMXVCCOMM_SUFFIX BARE_SUFFIX
+#define OMXVCM4P10_SUFFIX BARE_SUFFIX
+#define OMXVCM4P2_SUFFIX BARE_SUFFIX
+
+
+
+
+/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */
+#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX)
+#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX)
+#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX)
+
+
+#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX)
+
+#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+
+#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX)
+#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+
+#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX)
+
+#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX)
+
+#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX)
+#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX)
+#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX)
+
+#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX)
+
+#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX)
+
+#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX)
+
+
+#endif /* _armOMX_h_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h
new file mode 100644
index 0000000..8b295a6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h
@@ -0,0 +1,252 @@
+/**
+ * File: omxtypes.h
+ * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files.
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved.
+ *
+ * These materials are protected by copyright laws and contain material
+ * proprietary to the Khronos Group, Inc. You may use these materials
+ * for implementing Khronos specifications, without altering or removing
+ * any trademark, copyright or other notice from the specification.
+ *
+ * Khronos Group makes no, and expressly disclaims any, representations
+ * or warranties, express or implied, regarding these materials, including,
+ * without limitation, any implied warranties of merchantability or fitness
+ * for a particular purpose or non-infringement of any intellectual property.
+ * Khronos Group makes no, and expressly disclaims any, warranties, express
+ * or implied, regarding the correctness, accuracy, completeness, timeliness,
+ * and reliability of these materials.
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters,
+ * Contributors or Members or their respective partners, officers, directors,
+ * employees, agents or representatives be liable for any damages, whether
+ * direct, indirect, special or consequential damages for lost revenues,
+ * lost profits, or otherwise, arising from or in connection with these
+ * materials.
+ *
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc.
+ *
+ */
+
+#ifndef _OMXTYPES_H_
+#define _OMXTYPES_H_
+
+#include <limits.h>
+
+#define OMX_IN
+#define OMX_OUT
+#define OMX_INOUT
+
+
+typedef enum {
+
+ /* Mandatory return codes - use cases are explicitly described for each function */
+ OMX_Sts_NoErr = 0, /* No error, the function completed successfully */
+ OMX_Sts_Err = -2, /* Unknown/unspecified error */
+ OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */
+ OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */
+ OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */
+ OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */
+ OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */
+ OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */
+ OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */
+
+ /* Optional return codes - use cases are explicitly described for each function*/
+ OMX_Sts_BadArgErr = -5, /* Bad Arguments */
+
+ OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */
+ OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */
+ OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */
+ OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */
+ OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */
+ OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */
+
+ OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */
+ /* Huffman decoding operation terminated early. */
+ OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */
+ /* operation terminated early. */
+ OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */
+
+ OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */
+
+ OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/
+
+ } OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */
+
+
+/* OMX_U8 */
+#if UCHAR_MAX == 0xff
+typedef unsigned char OMX_U8;
+#elif USHRT_MAX == 0xff
+typedef unsigned short int OMX_U8;
+#else
+#error OMX_U8 undefined
+#endif
+
+
+/* OMX_S8 */
+#if SCHAR_MAX == 0x7f
+typedef signed char OMX_S8;
+#elif SHRT_MAX == 0x7f
+typedef signed short int OMX_S8;
+#else
+#error OMX_S8 undefined
+#endif
+
+
+/* OMX_U16 */
+#if USHRT_MAX == 0xffff
+typedef unsigned short int OMX_U16;
+#elif UINT_MAX == 0xffff
+typedef unsigned int OMX_U16;
+#else
+#error OMX_U16 undefined
+#endif
+
+
+/* OMX_S16 */
+#if SHRT_MAX == 0x7fff
+typedef signed short int OMX_S16;
+#elif INT_MAX == 0x7fff
+typedef signed int OMX_S16;
+#else
+#error OMX_S16 undefined
+#endif
+
+
+/* OMX_U32 */
+#if UINT_MAX == 0xffffffff
+typedef unsigned int OMX_U32;
+#elif LONG_MAX == 0xffffffff
+typedef unsigned long int OMX_U32;
+#else
+#error OMX_U32 undefined
+#endif
+
+
+/* OMX_S32 */
+#if INT_MAX == 0x7fffffff
+typedef signed int OMX_S32;
+#elif LONG_MAX == 0x7fffffff
+typedef long signed int OMX_S32;
+#else
+#error OMX_S32 undefined
+#endif
+
+
+/* OMX_U64 & OMX_S64 */
+#if defined( _WIN32 ) || defined ( _WIN64 )
+ typedef __int64 OMX_S64; /** Signed 64-bit integer */
+ typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */
+ #define OMX_MIN_S64 (0x8000000000000000i64)
+ #define OMX_MIN_U64 (0x0000000000000000i64)
+ #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64)
+ #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64)
+#else
+ typedef long long OMX_S64; /** Signed 64-bit integer */
+ typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */
+ #define OMX_MIN_S64 (0x8000000000000000LL)
+ #define OMX_MIN_U64 (0x0000000000000000LL)
+ #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL)
+ #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL)
+#endif
+
+
+/* OMX_SC8 */
+typedef struct
+{
+ OMX_S8 Re; /** Real part */
+ OMX_S8 Im; /** Imaginary part */
+
+} OMX_SC8; /** Signed 8-bit complex number */
+
+
+/* OMX_SC16 */
+typedef struct
+{
+ OMX_S16 Re; /** Real part */
+ OMX_S16 Im; /** Imaginary part */
+
+} OMX_SC16; /** Signed 16-bit complex number */
+
+
+/* OMX_SC32 */
+typedef struct
+{
+ OMX_S32 Re; /** Real part */
+ OMX_S32 Im; /** Imaginary part */
+
+} OMX_SC32; /** Signed 32-bit complex number */
+
+
+/* OMX_SC64 */
+typedef struct
+{
+ OMX_S64 Re; /** Real part */
+ OMX_S64 Im; /** Imaginary part */
+
+} OMX_SC64; /** Signed 64-bit complex number */
+
+
+/* OMX_F32 */
+typedef float OMX_F32; /** Single precision floating point,IEEE 754 */
+
+
+/* OMX_F64 */
+typedef double OMX_F64; /** Double precision floating point,IEEE 754 */
+
+
+/* OMX_INT */
+typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/
+
+
+#define OMX_MIN_S8 (-128)
+#define OMX_MIN_U8 0
+#define OMX_MIN_S16 (-32768)
+#define OMX_MIN_U16 0
+#define OMX_MIN_S32 (-2147483647-1)
+#define OMX_MIN_U32 0
+
+#define OMX_MAX_S8 (127)
+#define OMX_MAX_U8 (255)
+#define OMX_MAX_S16 (32767)
+#define OMX_MAX_U16 (0xFFFF)
+#define OMX_MAX_S32 (2147483647)
+#define OMX_MAX_U32 (0xFFFFFFFF)
+
+typedef void OMXVoid;
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+/** Defines the geometric position and size of a rectangle,
+ * where x,y defines the coordinates of the top left corner
+ * of the rectangle, with dimensions width in the x-direction
+ * and height in the y-direction */
+typedef struct {
+ OMX_INT x; /** x-coordinate of top left corner of rectangle */
+ OMX_INT y; /** y-coordinate of top left corner of rectangle */
+ OMX_INT width; /** Width in the x-direction. */
+ OMX_INT height; /** Height in the y-direction. */
+}OMXRect;
+
+
+/** Defines the geometric position of a point, */
+typedef struct
+{
+ OMX_INT x; /** x-coordinate */
+ OMX_INT y; /** y-coordinate */
+
+} OMXPoint;
+
+
+/** Defines the dimensions of a rectangle, or region of interest in an image */
+typedef struct
+{
+ OMX_INT width; /** Width of the rectangle, in the x-direction */
+ OMX_INT height; /** Height of the rectangle, in the y-direction */
+
+} OMXSize;
+
+#endif /* _OMXTYPES_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h
new file mode 100644
index 0000000..8d24b65
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h
@@ -0,0 +1,77 @@
+;//
+;//
+;// File Name: omxtypes_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Mandatory return codes - use cases are explicitly described for each function
+OMX_Sts_NoErr EQU 0 ;// No error the function completed successfully
+OMX_Sts_Err EQU -2 ;// Unknown/unspecified error
+OMX_Sts_InvalidBitstreamValErr EQU -182 ;// Invalid value detected during bitstream processing
+OMX_Sts_MemAllocErr EQU -9 ;// Not enough memory allocated for the operation
+OMX_StsACAAC_GainCtrErr EQU -159 ;// AAC: Unsupported gain control data detected
+OMX_StsACAAC_PrgNumErr EQU -167 ;// AAC: Invalid number of elements for one program
+OMX_StsACAAC_CoefValErr EQU -163 ;// AAC: Invalid quantized coefficient value
+OMX_StsACAAC_MaxSfbErr EQU -162 ;// AAC: Invalid maxSfb value in relation to numSwb
+OMX_StsACAAC_PlsDataErr EQU -160 ;// AAC: pulse escape sequence data error
+
+;// Optional return codes - use cases are explicitly described for each function
+OMX_Sts_BadArgErr EQU -5 ;// Bad Arguments
+
+OMX_StsACAAC_TnsNumFiltErr EQU -157 ;// AAC: Invalid number of TNS filters
+OMX_StsACAAC_TnsLenErr EQU -156 ;// AAC: Invalid TNS region length
+OMX_StsACAAC_TnsOrderErr EQU -155 ;// AAC: Invalid order of TNS filter
+OMX_StsACAAC_TnsCoefResErr EQU -154 ;// AAC: Invalid bit-resolution for TNS filter coefficients
+OMX_StsACAAC_TnsCoefErr EQU -153 ;// AAC: Invalid TNS filter coefficients
+OMX_StsACAAC_TnsDirectErr EQU -152 ;// AAC: Invalid TNS filter direction
+
+OMX_StsICJP_JPEGMarkerErr EQU -183 ;// JPEG marker encountered within an entropy-coded block;
+ ;// Huffman decoding operation terminated early.
+OMX_StsICJP_JPEGMarker EQU -181 ;// JPEG marker encountered; Huffman decoding
+ ;// operation terminated early.
+OMX_StsIPPP_ContextMatchErr EQU -17 ;// Context parameter doesn't match to the operation
+
+OMX_StsSP_EvenMedianMaskSizeErr EQU -180 ;// Even size of the Median Filter mask was replaced by the odd one
+
+OMX_Sts_MaximumEnumeration EQU 0x7FFFFFFF
+
+
+
+OMX_MIN_S8 EQU (-128)
+OMX_MIN_U8 EQU 0
+OMX_MIN_S16 EQU (-32768)
+OMX_MIN_U16 EQU 0
+
+
+OMX_MIN_S32 EQU (-2147483647-1)
+OMX_MIN_U32 EQU 0
+
+OMX_MAX_S8 EQU (127)
+OMX_MAX_U8 EQU (255)
+OMX_MAX_S16 EQU (32767)
+OMX_MAX_U16 EQU (0xFFFF)
+OMX_MAX_S32 EQU (2147483647)
+OMX_MAX_U32 EQU (0xFFFFFFFF)
+
+OMX_VC_UPPER EQU 0x1 ;// Used by the PredictIntra functions
+OMX_VC_LEFT EQU 0x2 ;// Used by the PredictIntra functions
+OMX_VC_UPPER_RIGHT EQU 0x40 ;// Used by the PredictIntra functions
+
+NULL EQU 0
+
+;// Structures
+
+ INCLUDE armCOMM_s.h
+
+ M_STRUCT OMXPoint
+ M_FIELD x, 4
+ M_FIELD y, 4
+ M_ENDSTRUCT
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl
new file mode 100755
index 0000000..1ae7005
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl
@@ -0,0 +1,111 @@
+#!/usr/bin/perl
+#
+#
+# File Name: build_vc.pl
+# OpenMAX DL: v1.0.2
+# Revision: 9641
+# Date: Thursday, February 7, 2008
+#
+# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+#
+#
+#
+# This file builds the OpenMAX DL vc domain library omxVC.o.
+#
+
+use File::Spec;
+use strict;
+
+my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE);
+
+$CC = 'armcc';
+$CC_OPTS = '--no_unaligned_access --cpu ARM1136J-S -c';
+$AS = 'armasm';
+$AS_OPTS = '--no_unaligned_access --cpu ARM1136J-S';
+# $LIB = 'armlink';
+# $LIB_OPTS = '--partial -o';
+# $LIB_TYPE = '.o';
+$LIB = 'armar';
+$LIB_OPTS = '--create -r';
+$LIB_TYPE = '.a';
+
+#------------------------
+
+my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h);
+
+# Define the list of directories containing included header files.
+@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api);
+
+# Define the list of source files to compile.
+open(FILES, '<filelist_vc.txt') or die("Can't open source file list\n");
+@filelist = <FILES>;
+close(FILES);
+
+# Fix the file separators in the header paths
+foreach $h (@headerlist)
+{
+ $h = File::Spec->canonpath($h);
+}
+
+# Create the include path to be passed to the compiler
+$hd = '-I' . join(' -I', @headerlist);
+
+# Create the build directories "/lib/" and "/obj/" (if they are not there already)
+mkdir "obj", 0777 if (! -d "obj");
+mkdir "lib", 0777 if (! -d "lib");
+
+$objlist = '';
+
+# Compile each file
+foreach $file (@filelist)
+{
+ my $f;
+ my $base;
+ my $ext;
+ my $objfile;
+
+ chomp($file);
+ $file = File::Spec->canonpath($file);
+
+ (undef, undef, $f) = File::Spec->splitpath($file);
+ if(($base, $ext) = $f =~ /(.+)\.(\w)$/)
+ {
+ $objfile = File::Spec->catfile('obj', $base.'.o');
+
+ if($ext eq 'c')
+ {
+ $objlist .= "$objfile ";
+ $command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+ print "$command\n";
+ system($command);
+ }
+ elsif($ext eq 's')
+ {
+ $objlist .= "$objfile ";
+ $command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+ print "$command\n";
+ system($command);
+ }
+ else
+ {
+ print "Ignoring file: $f\n";
+ }
+ }
+ else
+ {
+ die "No file extension found: $f\n";
+ }
+}
+
+# Do the final link stage to create the libraries.
+$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE);
+$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist;
+print "$command\n";
+(system($command) == 0) and print "Build successful\n";
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt
new file mode 100644
index 0000000..0f1623f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt
@@ -0,0 +1,74 @@
+./api/armCOMM.h
+./api/armCOMM_BitDec_s.h
+./api/armCOMM_Bitstream.h
+./api/armCOMM_IDCT_s.h
+./api/armCOMM_IDCTTable.h
+./api/armCOMM_MaskTable.h
+./api/armCOMM_s.h
+./api/armCOMM_Version.h
+./api/armOMX_ReleaseVersion.h
+./api/omxtypes.h
+./api/omxtypes_s.h
+./src/armCOMM_IDCTTable.c
+./src/armCOMM_MaskTable.c
+./vc/api/armVC.h
+./vc/api/armVCCOMM_s.h
+./vc/api/omxVC.h
+./vc/api/omxVC_s.h
+./vc/comm/src/omxVCCOMM_Copy16x16_s.s
+./vc/comm/src/omxVCCOMM_Copy8x8_s.s
+./vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
+./vc/m4p10/api/armVCM4P10_CAVLCTables.h
+./vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_CAVLCTables.c
+./vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
+./vc/m4p10/src/armVCM4P10_DequantTables_s.s
+./vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_QuantTables_s.s
+./vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+./vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
+./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
+./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+./vc/m4p2/src/armVCM4P2_Clip8_s.s
+./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+./vc/m4p2/src/armVCM4P2_Lookup_Tables.c
+./vc/m4p2/src/armVCM4P2_SetPredDir_s.s
+./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+./vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
+./vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
+./vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
+./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
+./vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
+./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
+./vc/src/armVC_Version.c \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c
new file mode 100644
index 0000000..e572a89
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c
@@ -0,0 +1,936 @@
+/**
+ *
+ * File Name: armCOMM.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Defines Common APIs used across OpenMAX API's
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+
+/***********************************************************************/
+ /* Miscellaneous Arithmetic operations */
+
+/**
+ * Function: armRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S16)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S16)(Value - .5);
+ }
+}
+
+/**
+ * Function: armRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S32)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S32)(Value - .5);
+ }
+}
+/**
+ * Function: armSatRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ Value += 0.5;
+
+ if(Value > (OMX_S16)OMX_MAX_S16 )
+ {
+ return (OMX_S16)OMX_MAX_S16;
+ }
+ else
+ {
+ return (OMX_S16)Value;
+ }
+ }
+ else
+ {
+ Value -= 0.5;
+
+ if(Value < (OMX_S16)OMX_MIN_S16 )
+ {
+ return (OMX_S16)OMX_MIN_S16;
+ }
+ else
+ {
+ return (OMX_S16)Value;
+ }
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ Value += 0.5;
+
+ if(Value > (OMX_S32)OMX_MAX_S32 )
+ {
+ return (OMX_S32)OMX_MAX_S32;
+ }
+ else
+ {
+ return (OMX_S32)Value;
+ }
+ }
+ else
+ {
+ Value -= 0.5;
+
+ if(Value < (OMX_S32)OMX_MIN_S32 )
+ {
+ return (OMX_S32)OMX_MIN_S32;
+ }
+ else
+ {
+ return (OMX_S32)Value;
+ }
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToU16
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value)
+{
+ Value += 0.5;
+
+ if(Value > (OMX_U16)OMX_MAX_U16 )
+ {
+ return (OMX_U16)OMX_MAX_U16;
+ }
+ else
+ {
+ return (OMX_U16)Value;
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U32 format
+ *
+ */
+
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value)
+{
+ Value += 0.5;
+
+ if(Value > (OMX_U32)OMX_MAX_U32 )
+ {
+ return (OMX_U32)OMX_MAX_U32;
+ }
+ else
+ {
+ return (OMX_U32)Value;
+ }
+}
+
+/**
+ * Function: armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a 64 bit int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S64 format
+ *
+ */
+
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S64)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S64)(Value - .5);
+ }
+}
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] var Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT -- returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ */
+
+OMX_INT armSignCheck (
+ OMX_S16 var
+)
+
+{
+ OMX_INT Sign;
+
+ if (var < 0)
+ {
+ Sign = -1;
+ }
+ else if ( var > 0)
+ {
+ Sign = 1;
+ }
+ else
+ {
+ Sign = 0;
+ }
+
+ return Sign;
+}
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 -- returns clipped value
+ */
+
+OMX_S32 armClip (
+ OMX_INT min,
+ OMX_INT max,
+ OMX_S32 src
+)
+
+{
+ if (src > max)
+ {
+ src = max;
+ }
+ else if (src < min)
+ {
+ src = min;
+ }
+
+ return src;
+}
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 -- returns clipped value
+ */
+
+OMX_F32 armClip_F32 (
+ OMX_F32 min,
+ OMX_F32 max,
+ OMX_F32 src
+)
+
+{
+ if (src > max)
+ {
+ src = max;
+ }
+ else if (src < min)
+ {
+ src = min;
+ }
+
+ return src;
+}
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding
+ * integer value. Takes care of rounding while clipping the final
+ * value.
+ *
+ * Parameters:
+ * [in] v Number to be operated upon
+ * [in] shift Divides the input "v" by "2^shift"
+ * [in] satBits Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 -- returns "shifted" saturated value
+ */
+
+OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits)
+{
+ OMX_U32 allOnes = (OMX_U32)(-1);
+ OMX_U32 maxV = allOnes >> (32-satBits);
+ OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift);
+ OMX_U32 vInt;
+ OMX_U32 vIntSat;
+
+ if(v <= 0)
+ return 0;
+
+ vShifted = v / shiftDiv;
+ vRounded = (OMX_F32)(vShifted + 0.5);
+ vInt = (OMX_U32)vRounded;
+ vIntSat = vInt;
+ if(vIntSat > maxV)
+ vIntSat = maxV;
+ return vIntSat;
+}
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * These function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(
+ OMX_U8 *pBuf1,
+ OMX_U8 *pBuf2,
+ OMX_INT elemSize
+ )
+{
+ OMX_INT i;
+ OMX_U8 temp;
+ armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr);
+
+ for(i = 0; i < elemSize; i++)
+ {
+ temp = *(pBuf1 + i);
+ *(pBuf1 + i) = *(pBuf2 + i);
+ *(pBuf2 + i) = temp;
+ }
+ return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry First entry
+ * [in] sEntry second entry
+ * [in] tEntry Third entry
+ *
+ * Return Value:
+ * OMX_S32 -- returns the median value
+ */
+
+OMX_S32 armMedianOf3 (
+ OMX_S32 fEntry,
+ OMX_S32 sEntry,
+ OMX_S32 tEntry
+)
+{
+ OMX_S32 a, b, c;
+
+ a = armMin (fEntry, sEntry);
+ b = armMax (fEntry, sEntry);
+ c = armMin (b, tEntry);
+ return (armMax (a, c));
+}
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value Positive value
+ *
+ * Return Value:
+ * OMX_U8 -- Returns the minimum number of bits required to represent the positive value.
+ This is the smallest k>=0 such that that value is less than (1<<k).
+ */
+
+OMX_U8 armLogSize (
+ OMX_U16 value
+)
+{
+ OMX_U8 i;
+ for ( i = 0; value > 0; value = value >> 1)
+ {
+ i++;
+ }
+ return i;
+}
+
+/***********************************************************************/
+ /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+ OMX_S32 Result;
+
+ Result = Value1 + Value2;
+
+ if( (Value1^Value2) >= 0)
+ {
+ /*Same sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ return OMX_MAX_S32;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S32;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2)
+{
+ OMX_S64 Result;
+
+ Result = Value1 + Value2;
+
+ if( (Value1^Value2) >= 0)
+ {
+ /*Same sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ Result = OMX_MAX_S64;
+ return Result;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S64;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/** Function :armSatSub_S32()
+ *
+ * Description :
+ * Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+ OMX_S32 Result;
+
+ Result = Value1 - Value2;
+
+ if( (Value1^Value2) < 0)
+ {
+ /*Opposite sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ return OMX_MAX_S32;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S32;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ * accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ * [in] Mac Accumulator
+ *
+ * Return:
+ * [out] Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2)
+{
+ OMX_S32 Result;
+
+ Result = (OMX_S32)(Value1*Value2);
+ Result = armSatAdd_S32( Mac , Result );
+
+ return Result;
+}
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ * mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem First 32 bit Operand
+ * [in] filTap Second 16 bit Operand
+ * [in] mac Result of MAC operation
+ *
+ * Return:
+ * [out] mac Result of operation
+ *
+ **/
+
+OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap )
+{
+
+ OMX_S32 result;
+
+ result = armSatMulS16S32_S32(filTap,delayElem);
+
+ if ( result > OMX_MAX_S16 )
+ {
+ result = OMX_MAX_S32;
+ }
+ else if( result < OMX_MIN_S16 )
+ {
+ result = OMX_MIN_S32;
+ }
+ else
+ {
+ result = delayElem * filTap;
+ }
+
+ mac = armSatAdd_S32(mac,result);
+
+ return mac;
+}
+
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ * Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ * output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) )
+ *
+ * Parametrs:
+ * [in] input The input to be operated on
+ * [in] shift The shift number
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift)
+{
+ input = armSatRoundLeftShift_S32(input,-shift);
+
+ if ( input > OMX_MAX_S16 )
+ {
+ return (OMX_S16)OMX_MAX_S16;
+ }
+ else if (input < OMX_MIN_S16)
+ {
+ return (OMX_S16)OMX_MIN_S16;
+ }
+ else
+ {
+ return (OMX_S16)input;
+ }
+
+}
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] Shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift)
+{
+ OMX_INT i;
+
+ if (Shift < 0)
+ {
+ Shift = -Shift;
+ Value = armSatAdd_S32(Value, (1 << (Shift - 1)));
+ Value = Value >> Shift;
+ }
+ else
+ {
+ for (i = 0; i < Shift; i++)
+ {
+ Value = armSatAdd_S32(Value, Value);
+ }
+ }
+ return Value;
+}
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift)
+{
+ OMX_INT i;
+
+ if (Shift < 0)
+ {
+ Shift = -Shift;
+ Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1)));
+ Value = Value >> Shift;
+ }
+ else
+ {
+ for (i = 0; i < Shift; i++)
+ {
+ Value = armSatAdd_S64(Value, Value);
+ }
+ }
+ return Value;
+}
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ * Returns the result of a S16 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2)
+{
+ OMX_S16 hi2,lo1;
+ OMX_U16 lo2;
+
+ OMX_S32 temp1,temp2;
+ OMX_S32 result;
+
+ lo1 = input1;
+
+ hi2 = ( input2 >> 16 );
+ lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 );
+
+ temp1 = hi2 * lo1;
+ temp2 = ( lo2* lo1 ) >> 16;
+
+ result = armSatAdd_S32(temp1,temp2);
+
+ return result;
+}
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ * Returns the result of a S32 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2)
+{
+ OMX_S16 hi1,hi2;
+ OMX_U16 lo1,lo2;
+
+ OMX_S32 temp1,temp2,temp3;
+ OMX_S32 result;
+
+ hi1 = ( input1 >> 16 );
+ lo1 = ( (OMX_U32)( input1 << 16 ) >> 16 );
+
+ hi2 = ( input2 >> 16 );
+ lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 );
+
+ temp1 = hi1 * hi2;
+ temp2 = ( hi1* lo2 ) >> 16;
+ temp3 = ( hi2* lo1 ) >> 16;
+
+ result = armSatAdd_S32(temp1,temp2);
+ result = armSatAdd_S32(result,temp3);
+
+ return result;
+}
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer.
+ * Half-integer values are rounded away from zero
+ * unless otherwise specified. For example 3//2 is rounded
+ * to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num Operand 1
+ * [in] Deno Operand 2
+ *
+ * Return:
+ * [out] Result of operation input1//input2
+ *
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno)
+{
+ OMX_F64 result;
+
+ result = ((OMX_F64)Num)/((OMX_F64)Deno);
+
+ if (result >= 0)
+ {
+ result += 0.5;
+ }
+ else
+ {
+ result -= 0.5;
+ }
+
+ return (OMX_S32)(result);
+}
+
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c
new file mode 100644
index 0000000..9ef9319
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c
@@ -0,0 +1,329 @@
+/**
+ *
+ * File Name: armCOMM_Bitstream.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Defines bitstream encode and decode functions common to all codecs
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+
+/***************************************
+ * Fixed bit length Decode
+ ***************************************/
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] N=1...32
+ *
+ * Returns Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ armAssert(Offset>=0 && Offset<=7);
+ armAssert(N>=1 && N<=32);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Return N bits */
+ return Value >> (32-N);
+}
+
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N=1..32
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ * Returns Value
+ */
+
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ if(N == 0)
+ {
+ return 0;
+ }
+
+ armAssert(Offset>=0 && Offset<=7);
+ armAssert(N>=1 && N<=32);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Advance bitstream pointer by N bits */
+ Offset += N;
+ *ppBitStream = pBitStream + (Offset>>3);
+ *pOffset = Offset & 7;
+
+ /* Return N bits */
+ return Value >> (32-N);
+}
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset)
+{
+ if(*pOffset > 0)
+ {
+ *ppBitStream += 1;
+ *pOffset = 0;
+ }
+}
+
+/**
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N)
+{
+ OMX_INT Offset = *pOffset;
+ const OMX_U8 *pBitStream = *ppBitStream;
+
+ /* Advance bitstream pointer by N bits */
+ Offset += N;
+ *ppBitStream = pBitStream + (Offset>>3);
+ *pOffset = Offset & 7;
+}
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ *
+ * Parameters:
+ * [in] *pBitStream
+ * [in] *pOffset
+ * [in] pCodeBook
+ *
+ * [out] *pBitStream
+ * [out] *pOffset
+ *
+ * Returns : Code Book Index if successfull.
+ * : ARM_NO_CODEBOOK_INDEX = -1 if search fails.
+ **/
+#ifndef C_OPTIMIZED_IMPLEMENTATION
+
+OMX_U16 armUnPackVLC32(
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ const ARM_VLC32 *pCodeBook
+)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+ OMX_INT Index;
+
+ armAssert(Offset>=0 && Offset<=7);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Search through the codebook */
+ for (Index=0; pCodeBook->codeLen != 0; Index++)
+ {
+ if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen)))
+ {
+ Offset = Offset + pCodeBook->codeLen;
+ *ppBitStream = pBitStream + (Offset >> 3) ;
+ *pOffset = Offset & 7;
+
+ return Index;
+ }
+ pCodeBook++;
+ }
+
+ /* No code match found */
+ return ARM_NO_CODEBOOK_INDEX;
+}
+
+#endif
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] codeWord Code word that need to be inserted in to the
+ * bitstream
+ * [in] codeLength Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ OMX_U32 codeWord,
+ OMX_INT codeLength
+)
+{
+ OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ /* checking argument validity */
+ armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr);
+ armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr);
+
+ /* Prepare the first byte */
+ codeWord = codeWord << (32-codeLength);
+ Value = (pBitStream[0] >> (8-Offset)) << (8-Offset);
+ Value = Value | (codeWord >> (24+Offset));
+
+ /* Write out whole bytes */
+ while (8-Offset <= codeLength)
+ {
+ *pBitStream++ = (OMX_U8)Value;
+ codeWord = codeWord << (8-Offset);
+ codeLength = codeLength - (8-Offset);
+ Offset = 0;
+ Value = codeWord >> 24;
+ }
+
+ /* Write out final partial byte */
+ *pBitStream = (OMX_U8)Value;
+ *ppBitStream = pBitStream;
+ *pOffset = Offset + codeLength;
+
+ return OMX_Sts_NoErr;
+}
+
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pBitOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] code VLC code word that need to be inserted in to the
+ * bitstream
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackVLC32 (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ ARM_VLC32 code
+)
+{
+ return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen));
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c
new file mode 100644
index 0000000..9e4679c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c
@@ -0,0 +1,60 @@
+/**
+ *
+ * File Name: armCOMM_IDCTTable.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM_IDCTTable.c
+ * Brief: Defines Tables used in IDCT computation
+ *
+ */
+
+#include "armCOMM_IDCTTable.h"
+
+ /* Table of s(u)*A(u)*A(v)/16 at Q15
+ * s(u)=1.0 0 <= u <= 5
+ * s(6)=2.0
+ * s(7)=4.0
+ * A(0) = 2*sqrt(2)
+ * A(u) = 4*cos(u*pi/16) for (u!=0)
+ */
+
+__align(4) const OMX_U16 armCOMM_IDCTPreScale [64] =
+{
+ 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1,
+ 0x58c5, 0x7b21, 0x73fc, 0x6862, 0x58c5, 0x45bf, 0x6016, 0x61f8,
+ 0x539f, 0x73fc, 0x6d41, 0x6254, 0x539f, 0x41b3, 0x5a82, 0x5c48,
+ 0x4b42, 0x6862, 0x6254, 0x587e, 0x4b42, 0x3b21, 0x5175, 0x530d,
+ 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1,
+ 0x3249, 0x45bf, 0x41b3, 0x3b21, 0x3249, 0x2782, 0x366d, 0x377e,
+ 0x22a3, 0x300b, 0x2d41, 0x28ba, 0x22a3, 0x1b37, 0x257e, 0x263a,
+ 0x11a8, 0x187e, 0x1712, 0x14c3, 0x11a8, 0x0de0, 0x131d, 0x137d
+};
+ /* Above array armCOMM_IDCTPreScale, in Q23 format */
+const OMX_U32 armCOMM_IDCTPreScaleU32 [64] =
+{
+ 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157,
+ 0x58c543, 0x7b20d8, 0x73fbfc, 0x686214, 0x58c543, 0x45bf1f, 0x6015a5, 0x61f78b,
+ 0x539eba, 0x73fbfc, 0x6d413d, 0x6253a6, 0x539eba, 0x41b328, 0x5a827a, 0x5c4869,
+ 0x4b418c, 0x686214, 0x6253a6, 0x587de3, 0x4b418c, 0x3b20d8, 0x5174e0, 0x530d69,
+ 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157,
+ 0x3248d4, 0x45bf1f, 0x41b328, 0x3b20d8, 0x3248d4, 0x27821d, 0x366d72, 0x377e6b,
+ 0x22a2f5, 0x300ad3, 0x2d413d, 0x28ba70, 0x22a2f5, 0x1b36b9, 0x257d86, 0x26398d,
+ 0x11a856, 0x187de3, 0x17121a, 0x14c35a, 0x11a856, 0x0ddf9b, 0x131cc7, 0x137ca2
+};
+
+const OMX_U16 armCOMM_IDCTCoef [4] =
+{
+ 0x5a82, /* InvSqrt2 */
+ 0x30fc, /* SinPIBy8 */
+ 0x7642, /* CosPIBy8 */
+ 0x0000
+};
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c
new file mode 100644
index 0000000..3241db2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c
@@ -0,0 +1,45 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armCOMM_MaskTable.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Mask Table to mask the end of array.
+ *
+ */
+
+#include "omxtypes.h"
+
+#define MaskTableSize 72
+
+const OMX_U16 armCOMM_qMaskTable16[MaskTableSize] =
+{
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
+};
+
+const OMX_U8 armCOMM_qMaskTable8[MaskTableSize] =
+{
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h
new file mode 100644
index 0000000..7fa7716
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h
@@ -0,0 +1,1153 @@
+/**
+ *
+ * File Name: armVC.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVideo.h
+ * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain
+ *
+ */
+
+
+#ifndef _armVideo_H_
+#define _armVideo_H_
+
+#include "omxVC.h"
+#include "armCOMM_Bitstream.h"
+
+/**
+ * ARM specific state structure to hold Motion Estimation information.
+ */
+
+struct m4p2_MESpec
+{
+ OMXVCM4P2MEParams MEParams;
+ OMXVCM4P2MEMode MEMode;
+};
+
+struct m4p10_MESpec
+{
+ OMXVCM4P10MEParams MEParams;
+ OMXVCM4P10MEMode MEMode;
+};
+
+typedef struct m4p2_MESpec ARMVCM4P2_MESpec;
+typedef struct m4p10_MESpec ARMVCM4P10_MESpec;
+
+/**
+ * Function: armVCM4P2_CompareMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and SAD's to decide the
+ * best MV and SAD
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] mvX x coordinate of the candidate motion vector
+ * [in] mvY y coordinate of the candidate motion vector
+ * [in] candSAD Candidate SAD
+ * [in] bestMVX x coordinate of the best motion vector
+ * [in] bestMVY y coordinate of the best motion vector
+ * [in] bestSAD best SAD
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current sad is the best
+ * 0 to indicate that it is NOT the best SAD
+ */
+
+OMX_INT armVCM4P2_CompareMV (
+ OMX_S16 mvX,
+ OMX_S16 mvY,
+ OMX_INT candSAD,
+ OMX_S16 bestMVX,
+ OMX_S16 bestMVY,
+ OMX_INT bestSAD);
+
+/**
+ * Function: armVCM4P2_ACDCPredict
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected
+ * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the coefficient buffer which contains
+ * the quantized coefficient residuals (PQF) of the
+ * current block
+ * [in] pPredBufRow pointer to the coefficient row buffer
+ * [in] pPredBufCol pointer to the coefficient column buffer
+ * [in] curQP quantization parameter of the current block. curQP
+ * may equal to predQP especially when the current
+ * block and the predictor block are in the same
+ * macroblock.
+ * [in] predQP quantization parameter of the predictor block
+ * [in] predDir indicates the prediction direction which takes one
+ * of the following values:
+ * OMX_VIDEO_HORIZONTAL predict horizontally
+ * OMX_VIDEO_VERTICAL predict vertically
+ * [in] ACPredFlag a flag indicating if AC prediction should be
+ * performed. It is equal to ac_pred_flag in the bit
+ * stream syntax of MPEG-4
+ * [in] videoComp video component type (luminance, chrominance or
+ * alpha) of the current block
+ * [in] flag This flag defines the if one wants to use this functions to
+ * calculate PQF (set 1, prediction) or QF (set 0, reconstruction)
+ * [out] pPreACPredict pointer to the predicted coefficients buffer.
+ * Filled ONLY if it is not NULL
+ * [out] pSrcDst pointer to the coefficient buffer which contains
+ * the quantized coefficients (QF) of the current
+ * block
+ * [out] pPredBufRow pointer to the updated coefficient row buffer
+ * [out] pPredBufCol pointer to the updated coefficient column buffer
+ * [out] pSumErr pointer to the updated sum of the difference
+ * between predicted and unpredicted coefficients
+ * If this is NULL, do not update
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_ACDCPredict(
+ OMX_S16 * pSrcDst,
+ OMX_S16 * pPreACPredict,
+ OMX_S16 * pPredBufRow,
+ OMX_S16 * pPredBufCol,
+ OMX_INT curQP,
+ OMX_INT predQP,
+ OMX_INT predDir,
+ OMX_INT ACPredFlag,
+ OMXVCM4P2VideoComponent videoComp,
+ OMX_U8 flag,
+ OMX_INT *pSumErr
+);
+
+/**
+ * Function: armVCM4P2_SetPredDir
+ *
+ * Description:
+ * Performs detecting the prediction direction
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] blockIndex block index indicating the component type and
+ * position as defined in subclause 6.1.3.8, of ISO/IEC
+ * 14496-2. Furthermore, indexes 6 to 9 indicate the
+ * alpha blocks spatially corresponding to luminance
+ * blocks 0 to 3 in the same macroblock.
+ * [in] pCoefBufRow pointer to the coefficient row buffer
+ * [in] pQpBuf pointer to the quantization parameter buffer
+ * [out] predQP quantization parameter of the predictor block
+ * [out] predDir indicates the prediction direction which takes one
+ * of the following values:
+ * OMX_VIDEO_HORIZONTAL predict horizontally
+ * OMX_VIDEO_VERTICAL predict vertically
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_SetPredDir(
+ OMX_INT blockIndex,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_INT *predDir,
+ OMX_INT *predQP,
+ const OMX_U8 *pQpBuf
+);
+
+/**
+ * Function: armVCM4P2_EncodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs zigzag scanning and VLC encoding for one intra block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7.
+ * [in] pQDctBlkCoef pointer to the quantized DCT coefficient
+ * [in] predDir AC prediction direction, which is used to decide
+ * the zigzag scan pattern. This takes one of the
+ * following values:
+ * OMX_VIDEO_NONE AC prediction not used.
+ * Performs classical zigzag
+ * scan.
+ * OMX_VIDEO_HORIZONTAL Horizontal prediction.
+ * Performs alternate-vertical
+ * zigzag scan.
+ * OMX_VIDEO_VERTICAL Vertical prediction.
+ * Performs alternate-horizontal
+ * zigzag scan.
+ * [in] pattern block pattern which is used to decide whether
+ * this block is encoded
+ * [in] start start indicates whether the encoding begins with 0th element
+ * or 1st.
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_EncodeVLCZigzag_Intra(
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_DecodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bitstream buffer
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7].
+ * [in] predDir AC prediction direction which is used to decide
+ * the zigzag scan pattern. It takes one of the
+ * following values:
+ * OMX_VIDEO_NONE AC prediction not used;
+ * perform classical zigzag scan;
+ * OMX_VIDEO_HORIZONTAL Horizontal prediction;
+ * perform alternate-vertical
+ * zigzag scan;
+ * OMX_VIDEO_VERTICAL Vertical prediction;
+ * thus perform
+ * alternate-horizontal
+ * zigzag scan.
+ * [in] videoComp video component type (luminance, chrominance or
+ * alpha) of the current block
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with 0th element
+ * or 1st.
+ * [out] ppBitStream *ppBitStream is updated after the block is
+ * decoded, so that it points to the current byte
+ * in the bit stream buffer
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream
+ * [out] pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_FillVLDBuffer
+ *
+ * Description:
+ * Performs filling of the coefficient buffer according to the run, level
+ * and sign, also updates the index
+ *
+ * Parameters:
+ * [in] storeRun Stored Run value (count of zeros)
+ * [in] storeLevel Stored Level value (non-zero value)
+ * [in] sign Flag indicating the sign of level
+ * [in] last status of the last flag
+ * [in] pIndex pointer to coefficient index in 8x8 matrix
+ * [out] pIndex pointer to updated coefficient index in 8x8
+ * matrix
+ * [in] pZigzagTable pointer to the zigzag tables
+ * [out] pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLDBuffer(
+ OMX_U32 storeRun,
+ OMX_S16 * pDst,
+ OMX_S16 storeLevel,
+ OMX_U8 sign,
+ OMX_U8 last,
+ OMX_U8 * index,
+ const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_GetVLCBits
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with
+ * 0th element or 1st.
+ * [in/out] pLast pointer to last status flag
+ * [in] runBeginSingleLevelEntriesL0 The run value from which level
+ * will be equal to 1: last == 0
+ * [in] IndexBeginSingleLevelEntriesL0 Array index in the VLC table
+ * pointing to the
+ * runBeginSingleLevelEntriesL0
+ * [in] runBeginSingleLevelEntriesL1 The run value from which level
+ * will be equal to 1: last == 1
+ * [in] IndexBeginSingleLevelEntriesL1 Array index in the VLC table
+ * pointing to the
+ * runBeginSingleLevelEntriesL0
+ * [in] pRunIndexTableL0 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0 VLC table for last == 0
+ * [in] pRunIndexTableL1 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1 VLC table for last == 1
+ * [in] pLMAXTableL0 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out]pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_GetVLCBits (
+ const OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start,
+ OMX_U8 * pLast,
+ OMX_U8 runBeginSingleLevelEntriesL0,
+ OMX_U8 maxIndexForMultipleEntriesL0,
+ OMX_U8 maxRunForMultipleEntriesL1,
+ OMX_U8 maxIndexForMultipleEntriesL1,
+ const OMX_U8 * pRunIndexTableL0,
+ const ARM_VLC32 *pVlcTableL0,
+ const OMX_U8 * pRunIndexTableL1,
+ const ARM_VLC32 *pVlcTableL1,
+ const OMX_U8 * pLMAXTableL0,
+ const OMX_U8 * pLMAXTableL1,
+ const OMX_U8 * pRMAXTableL0,
+ const OMX_U8 * pRMAXTableL1,
+ const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_PutVLCBits
+ *
+ * Description:
+ * Checks the type of Escape Mode and put encoded bits for
+ * quantized DCT coefficients.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with
+ * 0th element or 1st.
+ * [in] maxStoreRunL0 Max store possible (considering last and inter/intra)
+ * for last = 0
+ * [in] maxStoreRunL1 Max store possible (considering last and inter/intra)
+ * for last = 1
+ * [in] maxRunForMultipleEntriesL0
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status) for last = 0
+ * [in] maxRunForMultipleEntriesL1
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status) for last = 1
+ * [in] pRunIndexTableL0 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0 VLC table for last == 0
+ * [in] pRunIndexTableL1 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1 VLC table for last == 1
+ * [in] pLMAXTableL0 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out] pQDctBlkCoef pointer to the quantized DCT coefficient
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+
+OMXResult armVCM4P2_PutVLCBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start,
+ OMX_U8 maxStoreRunL0,
+ OMX_U8 maxStoreRunL1,
+ OMX_U8 maxRunForMultipleEntriesL0,
+ OMX_U8 maxRunForMultipleEntriesL1,
+ const OMX_U8 * pRunIndexTableL0,
+ const ARM_VLC32 *pVlcTableL0,
+ const OMX_U8 * pRunIndexTableL1,
+ const ARM_VLC32 *pVlcTableL1,
+ const OMX_U8 * pLMAXTableL0,
+ const OMX_U8 * pLMAXTableL1,
+ const OMX_U8 * pRMAXTableL0,
+ const OMX_U8 * pRMAXTableL1,
+ const OMX_U8 * pZigzagTable
+);
+/**
+ * Function: armVCM4P2_FillVLCBuffer
+ *
+ * Description:
+ * Performs calculating the VLC bits depending on the escape type and insert
+ * the same in the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] run Run value (count of zeros) to be encoded
+ * [in] level Level value (non-zero value) to be encoded
+ * [in] runPlus Calculated as runPlus = run - (RMAX + 1)
+ * [in] levelPlus Calculated as
+ * levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] fMode Flag indicating the escape modes
+ * [in] last status of the last flag
+ * [in] maxRunForMultipleEntries
+ * The run value after which level will be equal to 1:
+ * (considering last and inter/intra status)
+ * [in] pRunIndexTable Run Index table defined in
+ * armVCM4P2_Huff_tables_VLC.h
+ * [in] pVlcTable VLC table defined in armVCM4P2_Huff_tables_VLC.h
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLCBuffer (
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_U32 run,
+ OMX_S16 level,
+ OMX_U32 runPlus,
+ OMX_S16 levelPlus,
+ OMX_U8 fMode,
+ OMX_U8 last,
+ OMX_U8 maxRunForMultipleEntries,
+ const OMX_U8 *pRunIndexTable,
+ const ARM_VLC32 *pVlcTable
+);
+
+/**
+ * Function: armVCM4P2_CheckVLCEscapeMode
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] run Run value (count of zeros) to be encoded
+ * [in] level Level value (non-zero value) to be encoded
+ * [in] runPlus Calculated as runPlus = run - (RMAX + 1)
+ * [in] levelPlus Calculated as
+ * levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] maxStoreRun Max store possible (considering last and inter/intra)
+ * [in] maxRunForMultipleEntries
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status)
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] pRunIndexTable Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c
+ * (considering last and inter/intra status)
+ *
+ *
+ * Return Value:
+ * Returns an Escape mode which can take values from 0 to 3
+ * 0 --> no escape mode, 1 --> escape type 1,
+ * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3
+ * in the MPEG ISO standard.
+ *
+ */
+
+OMX_U8 armVCM4P2_CheckVLCEscapeMode(
+ OMX_U32 run,
+ OMX_U32 runPlus,
+ OMX_S16 level,
+ OMX_S16 levelPlus,
+ OMX_U8 maxStoreRun,
+ OMX_U8 maxRunForMultipleEntries,
+ OMX_INT shortVideoHeader,
+ const OMX_U8 *pRunIndexTable
+);
+
+
+/**
+ * Function: armVCM4P2_BlockMatch_Integer
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated minimum SAD.
+ * Both the input and output motion vectors are represented using half-pixel units, and
+ * therefore a shift left or right by 1 bit may be required, respectively, to match the
+ * input or output MVs with other functions that either generate output MVs or expect
+ * input MVs represented using integer pixel units.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that
+ * corresponds to the location of the current macroblock in the current
+ * plane.
+ * [in] refWidth width of the reference plane
+ * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin.
+ * It's not limited to the image boundary, but depended on the padding. For example,
+ * if you pad 4 pixels outside the image border, then the value for left border
+ * can be -4
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array,
+ * 256 entries); must be aligned on an 8-byte boundary.
+ * [in] pCurrPointPos position of the current macroblock in the current plane
+ * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV)
+ * [in] searchRange search range for 16X16 integer block,the units of it is full pixel,the search range
+ * is the same in all directions.It is in inclusive of the boundary and specified in
+ * terms of integer pixel units.
+ * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated
+ * and then initialized using omxVCM4P2_MEInit prior to calling the block matching
+ * function.
+ * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8.
+ * [out] pDstMV pointer to estimated MV
+ * [out] pDstSAD pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error.
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Integer(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD,
+ OMX_U8 BlockSize
+);
+
+/**
+ * Function: armVCM4P2_BlockMatch_Half
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution. Returns the estimated
+ * motion vector and associated minimum SAD. This function estimates the half-pixel
+ * motion vector by interpolating the integer resolution motion vector referenced
+ * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated
+ * externally. The input parameters pSrcRefBuf and pSearchPointRefPos should be
+ * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16.
+ * The function BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB
+ * that corresponds to the location of the current macroblock in
+ * the current plane.
+ * [in] refWidth width of the reference plane
+ * [in] pRefRect reference plane valid region rectangle
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane
+ * (linear array, 256 entries); must be aligned on an 8-byte boundary.
+ * [in] pSearchPointRefPos position of the starting point for half pixel search (specified
+ * in terms of integer pixel units) in the reference plane.
+ * [in] rndVal rounding control bit for half pixel motion estimation;
+ * 0=rounding control disabled; 1=rounding control enabled
+ * [in] pSrcDstMV pointer to the initial MV estimate; typically generated during a prior
+ * 16X16 integer search and its unit is half pixel.
+ * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]pSrcDstMV pointer to estimated MV
+ * [out]pDstSAD pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Half(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD,
+ OMX_U8 BlockSize
+);
+/**
+ * Function: armVCM4P2_PadMV
+ *
+ * Description:
+ * Performs motion vector padding for a macroblock.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDstMV pointer to motion vector buffer of the current
+ * macroblock
+ * [in] pTransp pointer to transparent status buffer of the
+ * current macroblock
+ * [out] pSrcDstMV pointer to motion vector buffer in which the
+ * motion vectors have been padded
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_PadMV(
+ OMXVCMotionVector * pSrcDstMV,
+ OMX_U8 * pTransp
+);
+
+/*
+ * H.264 Specific Declarations
+ */
+/* Defines */
+#define ARM_M4P10_Q_OFFSET (15)
+
+
+/* Dequant tables */
+
+extern const OMX_U8 armVCM4P10_PosToVCol4x4[16];
+extern const OMX_U8 armVCM4P10_PosToVCol2x2[4];
+extern const OMX_U8 armVCM4P10_VMatrix[6][3];
+extern const OMX_U32 armVCM4P10_MFMatrix[6][3];
+
+
+/*
+ * Description:
+ * This function perform the work required by the OpenMAX
+ * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair.
+ * Since most of the code is common we share it here.
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream buffer
+ * [in] pOffset Pointer to current bit position in the byte pointed
+ * to by *ppBitStream
+ * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current
+ * block (4,15 or 16)
+ * [in] nTable Table number (0 to 4) according to the five columns
+ * of Table 9-5 in the H.264 spec
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients in
+ * this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+
+ */
+
+OMXResult armVCM4P10_DecodeCoeffsToPair(
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8**ppPosCoefbuf,
+ OMX_INT nTable,
+ OMX_INT sMaxNumCoeff
+ );
+
+/*
+ * Description:
+ * Perform DC style intra prediction, averaging upper and left block
+ *
+ * Parameters:
+ * [in] pSrcLeft Pointer to the buffer of 16 left coefficients:
+ * p[x, y] (x = -1, y = 0..3)
+ * [in] pSrcAbove Pointer to the buffer of 16 above coefficients:
+ * p[x,y] (x = 0..3, y = -1)
+ * [in] leftStep Step of left coefficient buffer
+ * [in] dstStep Step of the destination buffer
+ * [in] availability Neighboring 16x16 MB availability flag
+ * [out] pDst Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+void armVCM4P10_PredictIntraDC4x4(
+ const OMX_U8* pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ OMX_U8* pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMX_S32 availability
+);
+
+/*
+ * Description
+ * Unpack a 4x4 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position pair
+ * buffer output by CALVC decoding
+ * [out] ppSrc *ppSrc is updated to the start of next non empty block
+ * [out] pDst Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock4x4(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Unpack a 2x2 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position pair
+ * buffer output by CALVC decoding
+ * [out] ppSrc *ppSrc is updated to the start of next non empty block
+ * [out] pDst Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock2x2(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Deblock one boundary pixel
+ *
+ * Parameters:
+ * [in] pQ0 Pointer to pixel q0
+ * [in] Step Step between pixels q0 and q1
+ * [in] tC0 Edge threshold value
+ * [in] alpha alpha threshold value
+ * [in] beta beta threshold value
+ * [in] bS deblocking strength
+ * [in] ChromaFlag True for chroma blocks
+ * [out] pQ0 Deblocked pixels
+ *
+ */
+
+void armVCM4P10_DeBlockPixel(
+ OMX_U8 *pQ0, /* pointer to the pixel q0 */
+ int Step, /* step between pixels q0 and q1 */
+ int tC0, /* edge threshold value */
+ int alpha, /* alpha */
+ int beta, /* beta */
+ int bS, /* deblocking strength */
+ int ChromaFlag
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfHor_Luma
+ *
+ * Description:
+ * This function performs interpolation for horizontal 1/2-pel positions
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfHor_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfVer_Luma
+ *
+ * Description:
+ * This function performs interpolation for vertical 1/2-pel positions
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfVer_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfDiag_Luma
+ *
+ * Description:
+ * This function performs interpolation for (1/2, 1/2) positions
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the (1/2,1/2)-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfDiag_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/*
+ * Description:
+ * Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in] pSrc Source 4x4 block
+ * [out] pDst Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+/*
+ * Description:
+ * Forward Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in] pSrc Source 4x4 block
+ * [out] pDst Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+OMX_INT armVCM4P10_CompareMotionCostToMV (
+ OMX_S16 mvX,
+ OMX_S16 mvY,
+ OMXVCMotionVector diffMV,
+ OMX_INT candSAD,
+ OMXVCMotionVector *bestMV,
+ OMX_U32 nLamda,
+ OMX_S32 *pBestCost);
+
+/**
+ * Function: armVCCOMM_SAD
+ *
+ * Description:
+ * This function calculate the SAD for NxM blocks.
+ *
+ * Remarks:
+ *
+ * [in] pSrcOrg Pointer to the original block
+ * [in] iStepOrg Step of the original block buffer
+ * [in] pSrcRef Pointer to the reference block
+ * [in] iStepRef Step of the reference block buffer
+ * [in] iHeight Height of the block
+ * [in] iWidth Width of the block
+ * [out] pDstSAD Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCCOMM_SAD(
+ const OMX_U8* pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8* pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32* pDstSAD,
+ OMX_U32 iHeight,
+ OMX_U32 iWidth);
+
+/**
+ * Function: armVCCOMM_Average
+ *
+ * Description:
+ * This function calculates the average of two blocks and stores the result.
+ *
+ * Remarks:
+ *
+ * [in] pPred0 Pointer to the top-left corner of reference block 0
+ * [in] pPred1 Pointer to the top-left corner of reference block 1
+ * [in] iPredStep0 Step of reference block 0
+ * [in] iPredStep1 Step of reference block 1
+ * [in] iDstStep Step of the destination buffer
+ * [in] iWidth Width of the blocks
+ * [in] iHeight Height of the blocks
+ * [out] pDstPred Pointer to the destination buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCCOMM_Average (
+ const OMX_U8* pPred0,
+ const OMX_U8* pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8* pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_SADQuar
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the
+ * average of the other two (pSrcRef0 and pSrcRef1)
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to the original block
+ * [in] pSrcRef0 Pointer to reference block 0
+ * [in] pSrcRef1 Pointer to reference block 1
+ * [in] iSrcStep Step of the original block buffer
+ * [in] iRefStep0 Step of reference block 0
+ * [in] iRefStep1 Step of reference block 1
+ * [in] iHeight Height of the block
+ * [in] iWidth Width of the block
+ * [out] pDstSAD Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCM4P10_SADQuar(
+ const OMX_U8* pSrc,
+ const OMX_U8* pSrcRef0,
+ const OMX_U8* pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32* pDstSAD,
+ OMX_U32 iHeight,
+ OMX_U32 iWidth
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Chroma
+ *
+ * Description:
+ * This function performs interpolation for chroma components.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to
+ * interpolate in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [in] dx Fractional part of horizontal motion vector
+ * component in 1/8 pixel unit (0~7)
+ * [in] dy Fractional part of vertical motion vector
+ * component in 1/8 pixel unit (0~7)
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCM4P10_Interpolate_Chroma(
+ OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight,
+ OMX_U32 dx,
+ OMX_U32 dy
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Luma
+ *
+ * Description:
+ * This function performs interpolation for luma components.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to
+ * interpolate in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [in] dx Fractional part of horizontal motion vector
+ * component in 1/4 pixel unit (0~3)
+ * [in] dy Fractional part of vertical motion vector
+ * component in 1/4 pixel unit (0~3)
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+ OMXResult armVCM4P10_Interpolate_Luma(
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight,
+ OMX_U32 dx,
+ OMX_U32 dy
+);
+
+/**
+ * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantisation and integer inverse transformation for 4x4 block of
+ * residuals and update the pair buffer pointer to next non-empty block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position
+ * pair buffer output by CALVC decoding
+ * [in] pDC Pointer to the DC coefficient of this block, NULL
+ * if it doesn't exist
+ * [in] QP Quantization parameter
+ * [in] AC Flag indicating if at least one non-zero coefficient exists
+ * [out] pDst pointer to the reconstructed 4x4 block data
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx(
+ OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP,
+ OMX_S16* pDC,
+ int AC
+);
+
+#endif /*_armVideo_H_*/
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h
new file mode 100644
index 0000000..7f0a9b8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h
@@ -0,0 +1,72 @@
+;//
+;//
+;// File Name: armVCCOMM_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// ARM optimized OpenMAX AC header file
+;//
+;// Formula used:
+;// MACRO for calculating median for three values.
+
+
+
+ IF :LNOT::DEF:ARMVCCOMM_S_H
+ INCLUDE armCOMM_s.h
+ M_VARIANTS CortexA8, ARM1136JS
+
+ IF ARM1136JS :LOR: CortexA8
+
+ ;///*
+ ;// * Macro: M_MEDIAN3
+ ;// *
+ ;// * Description: Finds the median of three numbers
+ ;// *
+ ;// * Remarks:
+ ;// *
+ ;// * Parameters:
+ ;// * [in] x First entry for the list of three numbers.
+ ;// * [in] y Second entry for the list of three numbers.
+ ;// * Input value may be corrupted at the end of
+ ;// * the execution of this macro.
+ ;// * [in] z Third entry of the list of three numbers.
+ ;// * Input value corrupted at the end of the
+ ;// * execution of this macro.
+ ;// * [in] t Temporary scratch register.
+ ;// * [out]z Median of the three numbers.
+ ;// */
+
+ MACRO
+
+ M_MEDIAN3 $x, $y, $z, $t
+
+ SUBS $t, $y, $z; // if (y < z)
+ ADDLT $z, $z, $t; // swap y and z
+ SUBLT $y, $y, $t;
+
+ ;// Now z' <= y', so there are three cases for the
+ ;// median value, depending on x.
+
+ ;// 1) x <= z' <= y' : median value is z'
+ ;// 2) z' <= x <= y' : median value is x
+ ;// 3) z' <= y' <= x : median value is y'
+
+ CMP $z, $x; // if ( x > min(y,z) )
+ MOVLT $z, $x; // ans = x
+
+ CMP $x, $y; // if ( x > max(y,z) )
+ MOVGT $z, $y; // ans = max(y,z)
+
+ MEND
+ ENDIF
+
+
+
+ ENDIF ;// ARMACCOMM_S_H
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h
new file mode 100644
index 0000000..7b3cc72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h
@@ -0,0 +1,4381 @@
+/**
+ * File: omxVC.h
+ * Brief: OpenMAX DL v1.0.2 - Video Coding library
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved.
+ *
+ * These materials are protected by copyright laws and contain material
+ * proprietary to the Khronos Group, Inc. You may use these materials
+ * for implementing Khronos specifications, without altering or removing
+ * any trademark, copyright or other notice from the specification.
+ *
+ * Khronos Group makes no, and expressly disclaims any, representations
+ * or warranties, express or implied, regarding these materials, including,
+ * without limitation, any implied warranties of merchantability or fitness
+ * for a particular purpose or non-infringement of any intellectual property.
+ * Khronos Group makes no, and expressly disclaims any, warranties, express
+ * or implied, regarding the correctness, accuracy, completeness, timeliness,
+ * and reliability of these materials.
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters,
+ * Contributors or Members or their respective partners, officers, directors,
+ * employees, agents or representatives be liable for any damages, whether
+ * direct, indirect, special or consequential damages for lost revenues,
+ * lost profits, or otherwise, arising from or in connection with these
+ * materials.
+ *
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc.
+ *
+ */
+
+/* *****************************************************************************************/
+
+#ifndef _OMXVC_H_
+#define _OMXVC_H_
+
+#include "omxtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* 6.1.1.1 Motion Vectors */
+/* In omxVC, motion vectors are represented as follows: */
+
+typedef struct {
+ OMX_S16 dx;
+ OMX_S16 dy;
+} OMXVCMotionVector;
+
+
+
+/**
+ * Function: omxVCCOMM_Average_8x (6.1.3.1.1)
+ *
+ * Description:
+ * This function calculates the average of two 8x4, 8x8, or 8x16 blocks. The
+ * result is rounded according to (a+b+1)/2. The block average function can
+ * be used in conjunction with half-pixel interpolation to obtain quarter
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0
+ * iPredStep1 - Step of reference block 1
+ * iDstStep - Step of the destination buffer.
+ * iHeight - Height of the blocks
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 8-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pPred0, pPred1, or
+ * pDstPred.
+ * - pDstPred is not aligned on an 8-byte boundary.
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 8.
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 8.
+ * - iDstStep <= 0 or iDstStep is not a multiple of 8.
+ * - iHeight is not 4, 8, or 16.
+ *
+ */
+OMXResult omxVCCOMM_Average_8x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Average_16x (6.1.3.1.2)
+ *
+ * Description:
+ * This function calculates the average of two 16x16 or 16x8 blocks. The
+ * result is rounded according to (a+b+1)/2. The block average function can
+ * be used in conjunction with half-pixel interpolation to obtain quarter
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0
+ * iPredStep1 - Step of reference block 1
+ * iDstStep - Step of the destination buffer
+ * iHeight - Height of the blocks
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 16-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pPred0, pPred1, or
+ * pDstPred.
+ * - pDstPred is not aligned on a 16-byte boundary.
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 16.
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 16.
+ * - iDstStep <= 0 or iDstStep is not a multiple of 16.
+ * - iHeight is not 8 or 16.
+ *
+ */
+OMXResult omxVCCOMM_Average_16x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ExpandFrame_I (6.1.3.2.1)
+ *
+ * Description:
+ * This function expands a reconstructed frame in-place. The unexpanded
+ * source frame should be stored in a plane buffer with sufficient space
+ * pre-allocated for edge expansion, and the input frame should be located in
+ * the plane buffer center. This function executes the pixel expansion by
+ * replicating source frame edge pixel intensities in the empty pixel
+ * locations (expansion region) between the source frame edge and the plane
+ * buffer edge. The width/height of the expansion regions on the
+ * horizontal/vertical edges is controlled by the parameter iExpandPels.
+ *
+ * Input Arguments:
+ *
+ * pSrcDstPlane - pointer to the top-left corner of the frame to be
+ * expanded; must be aligned on an 8-byte boundary.
+ * iFrameWidth - frame width; must be a multiple of 8.
+ * iFrameHeight -frame height; must be a multiple of 8.
+ * iExpandPels - number of pixels to be expanded in the horizontal and
+ * vertical directions; must be a multiple of 8.
+ * iPlaneStep - distance, in bytes, between the start of consecutive lines
+ * in the plane buffer; must be larger than or equal to
+ * (iFrameWidth + 2 * iExpandPels).
+ *
+ * Output Arguments:
+ *
+ * pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the
+ * top-left corner of the plane); must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - pSrcDstPlane is NULL.
+ * - pSrcDstPlane is not aligned on an 8-byte boundary.
+ * - one of the following parameters is either equal to zero or is a
+ * non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or
+ * iExpandPels.
+ * - iPlaneStep < (iFrameWidth + 2 * iExpandPels).
+ *
+ */
+OMXResult omxVCCOMM_ExpandFrame_I (
+ OMX_U8 *pSrcDstPlane,
+ OMX_U32 iFrameWidth,
+ OMX_U32 iFrameHeight,
+ OMX_U32 iExpandPels,
+ OMX_U32 iPlaneStep
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Copy8x8 (6.1.3.3.1)
+ *
+ * Description:
+ * Copies the reference 8x8 block to the current block.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the reference block in the source frame; must be
+ * aligned on an 8-byte boundary.
+ * step - distance between the starts of consecutive lines in the reference
+ * frame, in bytes; must be a multiple of 8 and must be larger than
+ * or equal to 8.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination block; must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pSrc, pDst
+ * - one or more of the following pointers is not aligned on an 8-byte
+ * boundary: pSrc, pDst
+ * - step <8 or step is not a multiple of 8.
+ *
+ */
+OMXResult omxVCCOMM_Copy8x8 (
+ const OMX_U8 *pSrc,
+ OMX_U8 *pDst,
+ OMX_INT step
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Copy16x16 (6.1.3.3.2)
+ *
+ * Description:
+ * Copies the reference 16x16 macroblock to the current macroblock.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the reference macroblock in the source frame; must be
+ * aligned on a 16-byte boundary.
+ * step - distance between the starts of consecutive lines in the reference
+ * frame, in bytes; must be a multiple of 16 and must be larger
+ * than or equal to 16.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination macroblock; must be aligned on a
+ * 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pSrc, pDst
+ * - one or more of the following pointers is not aligned on a 16-byte
+ * boundary: pSrc, pDst
+ * - step <16 or step is not a multiple of 16.
+ *
+ */
+OMXResult omxVCCOMM_Copy16x16 (
+ const OMX_U8 *pSrc,
+ OMX_U8 *pDst,
+ OMX_INT step
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ComputeTextureErrorBlock_SAD (6.1.4.1.1)
+ *
+ * Description:
+ * Computes texture error of the block; also returns SAD.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the source plane; must be aligned on an 8-byte
+ * boundary.
+ * srcStep - step of the source plane
+ * pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned
+ * on an 8-byte boundary.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer, an 8x8 block; must be aligned
+ * on an 8-byte boundary.
+ * pDstSAD - pointer to the Sum of Absolute Differences (SAD) value
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following
+ * pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD.
+ * - pSrc is not 8-byte aligned.
+ * - SrcStep <= 0 or srcStep is not a multiple of 8.
+ * - pSrcRef is not 8-byte aligned.
+ * - pDst is not 8-byte aligned.
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_U8 *pSrcRef,
+ OMX_S16 *pDst,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ComputeTextureErrorBlock (6.1.4.1.2)
+ *
+ * Description:
+ * Computes the texture error of the block.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the source plane. This should be aligned on an 8-byte
+ * boundary.
+ * srcStep - step of the source plane
+ * pSrcRef - pointer to the reference buffer, an 8x8 block. This should be
+ * aligned on an 8-byte boundary.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer, an 8x8 block. This should be
+ * aligned on an 8-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * pSrc, pSrcRef, pDst.
+ * - pSrc is not 8-byte aligned.
+ * - SrcStep <= 0 or srcStep is not a multiple of 8.
+ * - pSrcRef is not 8-byte aligned.
+ * - pDst is not 8-byte aligned
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_U8 *pSrcRef,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCCOMM_LimitMVToRect (6.1.4.1.3)
+ *
+ * Description:
+ * Limits the motion vector associated with the current block/macroblock to
+ * prevent the motion compensated block/macroblock from moving outside a
+ * bounding rectangle as shown in Figure 6-1.
+ *
+ * Input Arguments:
+ *
+ * pSrcMV - pointer to the motion vector associated with the current block
+ * or macroblock
+ * pRectVOPRef - pointer to the bounding rectangle
+ * Xcoord, Ycoord - coordinates of the current block or macroblock
+ * size - size of the current block or macroblock; must be equal to 8 or
+ * 16.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to the limited motion vector
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcMV, pDstMV, or pRectVOPRef.
+ * - size is not equal to either 8 or 16.
+ * - the width or height of the bounding rectangle is less than
+ * twice the block size.
+ */
+OMXResult omxVCCOMM_LimitMVToRect (
+ const OMXVCMotionVector *pSrcMV,
+ OMXVCMotionVector *pDstMV,
+ const OMXRect *pRectVOPRef,
+ OMX_INT Xcoord,
+ OMX_INT Ycoord,
+ OMX_INT size
+);
+
+
+
+/**
+ * Function: omxVCCOMM_SAD_16x (6.1.4.1.4)
+ *
+ * Description:
+ * This function calculates the SAD for 16x16 and 16x8 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 16-byte
+ * boundary.
+ * iStepOrg - Step of the original block buffer
+ * pSrcRef - Pointer to the reference block
+ * iStepRef - Step of the reference block buffer
+ * iHeight - Height of the block
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pDstSAD, or pSrcRef
+ * - pSrcOrg is not 16-byte aligned.
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 16
+ * - iStepRef <= 0 or iStepRef is not a multiple of 16
+ * - iHeight is not 8 or 16
+ *
+ */
+OMXResult omxVCCOMM_SAD_16x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_SAD_8x (6.1.4.1.5)
+ *
+ * Description:
+ * This function calculates the SAD for 8x16, 8x8, 8x4 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 8-byte
+ * boundary.
+ * iStepOrg - Step of the original block buffer
+ * pSrcRef - Pointer to the reference block
+ * iStepRef - Step of the reference block buffer
+ * iHeight - Height of the block
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pDstSAD, or pSrcRef
+ * - pSrcOrg is not 8-byte aligned.
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 8
+ * - iStepRef <= 0 or iStepRef is not a multiple of 8
+ * - iHeight is not 4, 8 or 16
+ *
+ */
+OMXResult omxVCCOMM_SAD_8x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32*pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/* 6.2.1.1 Direction */
+/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan. */
+
+enum {
+ OMX_VC_NONE = 0,
+ OMX_VC_HORIZONTAL = 1,
+ OMX_VC_VERTICAL = 2
+};
+
+
+
+/* 6.2.1.2 Bilinear Interpolation */
+/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions. */
+
+enum {
+ OMX_VC_INTEGER_PIXEL = 0, /* case a */
+ OMX_VC_HALF_PIXEL_X = 1, /* case b */
+ OMX_VC_HALF_PIXEL_Y = 2, /* case c */
+ OMX_VC_HALF_PIXEL_XY = 3 /* case d */
+};
+
+
+
+/* 6.2.1.3 Neighboring Macroblock Availability */
+/* Neighboring macroblock availability is indicated using the following flags: */
+
+enum {
+ OMX_VC_UPPER = 1, /** above macroblock is available */
+ OMX_VC_LEFT = 2, /** left macroblock is available */
+ OMX_VC_CENTER = 4,
+ OMX_VC_RIGHT = 8,
+ OMX_VC_LOWER = 16,
+ OMX_VC_UPPER_LEFT = 32, /** above-left macroblock is available */
+ OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */
+ OMX_VC_LOWER_LEFT = 128,
+ OMX_VC_LOWER_RIGHT = 256
+};
+
+
+
+/* 6.2.1.4 Video Components */
+/* A data type that enumerates video components is defined as follows: */
+
+typedef enum {
+ OMX_VC_LUMINANCE, /** Luminance component */
+ OMX_VC_CHROMINANCE /** chrominance component */
+} OMXVCM4P2VideoComponent;
+
+
+
+/* 6.2.1.5 MacroblockTypes */
+/* A data type that enumerates macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_INTER = 0, /** P picture or P-VOP */
+ OMX_VC_INTER_Q = 1, /** P picture or P-VOP */
+ OMX_VC_INTER4V = 2, /** P picture or P-VOP */
+ OMX_VC_INTRA = 3, /** I and P picture, I- and P-VOP */
+ OMX_VC_INTRA_Q = 4, /** I and P picture, I- and P-VOP */
+ OMX_VC_INTER4V_Q = 5 /** P picture or P-VOP (H.263)*/
+} OMXVCM4P2MacroblockType;
+
+
+
+/* 6.2.1.6 Coordinates */
+/* Coordinates are represented as follows: */
+
+typedef struct {
+ OMX_INT x;
+ OMX_INT y;
+} OMXVCM4P2Coordinate;
+
+
+
+/* 6.2.1.7 Motion Estimation Algorithms */
+/* A data type that enumerates motion estimation search methods is defined as follows: */
+
+typedef enum {
+ OMX_VC_M4P2_FAST_SEARCH = 0, /** Fast motion search */
+ OMX_VC_M4P2_FULL_SEARCH = 1 /** Full motion search */
+} OMXVCM4P2MEMode;
+
+
+
+/* 6.2.1.8 Motion Estimation Parameters */
+/* A data structure containing control parameters for
+ * motion estimation functions is defined as follows:
+ */
+
+typedef struct {
+ OMX_INT searchEnable8x8; /** enables 8x8 search */
+ OMX_INT halfPelSearchEnable; /** enables half-pel resolution */
+ OMX_INT searchRange; /** search range */
+ OMX_INT rndVal; /** rounding control; 0-disabled, 1-enabled*/
+} OMXVCM4P2MEParams;
+
+
+
+/* 6.2.1.9 Macroblock Information */
+/* A data structure containing macroblock parameters for
+ * motion estimation functions is defined as follows:
+ */
+
+typedef struct {
+ OMX_S32 sliceId; /* slice number */
+ OMXVCM4P2MacroblockType mbType; /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */
+ OMX_S32 qp; /* quantization parameter*/
+ OMX_U32 cbpy; /* CBP Luma */
+ OMX_U32 cbpc; /* CBP Chroma */
+ OMXVCMotionVector pMV0[2][2]; /* motion vector, represented using 1/2-pel units,
+ * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1)
+ */
+ OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units,
+ * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1)
+ */
+ OMX_U8 pPredDir[2][2]; /* AC prediction direction:
+ * OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL
+ */
+} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr;
+
+
+
+/**
+ * Function: omxVCM4P2_FindMVpred (6.2.3.1.1)
+ *
+ * Description:
+ * Predicts a motion vector for the current block using the procedure
+ * specified in [ISO14496-2], subclause 7.6.5. The resulting predicted MV is
+ * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then
+ * the set of three MV candidates used for prediction is also returned,
+ * otherwise pDstMVPredMEis NULL upon return.
+ *
+ * Input Arguments:
+ *
+ * pSrcMVCurMB - pointer to the MV buffer associated with the current Y
+ * macroblock; a value of NULL indicates unavailability.
+ * pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located to the left of the current MB; set to NULL
+ * if there is no MB to the left.
+ * pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located above the current MB; set to NULL if there
+ * is no MB located above the current MB.
+ * pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located to the right and above the current MB; set
+ * to NULL if there is no MB located to the above-right.
+ * iBlk - the index of block in the current macroblock
+ * pDstMVPredME - MV candidate return buffer; if set to NULL then
+ * prediction candidate MVs are not returned and pDstMVPredME will
+ * be NULL upon function return; if pDstMVPredME is non-NULL then it
+ * must point to a buffer containing sufficient space for three
+ * return MVs.
+ *
+ * Output Arguments:
+ *
+ * pDstMVPred - pointer to the predicted motion vector
+ * pDstMVPredME - if non-NULL upon input then pDstMVPredME points upon
+ * return to a buffer containing the three motion vector candidates
+ * used for prediction as specified in [ISO14496-2], subclause
+ * 7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL
+ * upon output.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - the pointer pDstMVPred is NULL
+ * - the parameter iBlk does not fall into the range 0 <= iBlk<=3
+ *
+ */
+OMXResult omxVCM4P2_FindMVpred (
+ const OMXVCMotionVector *pSrcMVCurMB,
+ const OMXVCMotionVector *pSrcCandMV1,
+ const OMXVCMotionVector *pSrcCandMV2,
+ const OMXVCMotionVector *pSrcCandMV3,
+ OMXVCMotionVector *pDstMVPred,
+ OMXVCMotionVector *pDstMVPredME,
+ OMX_INT iBlk
+);
+
+
+
+/**
+ * Function: omxVCM4P2_IDCT8x8blk (6.2.3.2.1)
+ *
+ * Description:
+ * Computes a 2D inverse DCT for a single 8x8 block, as defined in
+ * [ISO14496-2].
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the start of the linearly arranged IDCT input buffer;
+ * must be aligned on a 16-byte boundary. According to
+ * [ISO14496-2], the input coefficient values should lie within the
+ * range [-2048, 2047].
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the start of the linearly arranged IDCT output buffer;
+ * must be aligned on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrc or pDst is NULL.
+ * - pSrc or pDst is not 16-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_IDCT8x8blk (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MEGetBufSize (6.2.4.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification
+ * structure for the following motion estimation functions:
+ * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P2MEMode
+ * pMEParams - motion estimation parameters
+ *
+ * Output Arguments:
+ *
+ * pSize - pointer to the number of bytes required for the specification
+ * structure
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - one or more of the following is true:
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for the
+ * parameter pMEParams->searchRange
+ *
+ */
+OMXResult omxVCM4P2_MEGetBufSize (
+ OMXVCM4P2MEMode MEmode,
+ const OMXVCM4P2MEParams *pMEParams,
+ OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MEInit (6.2.4.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the
+ * following motion estimation functions: BlockMatch_Integer_8x8,
+ * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the
+ * specification structure *pMESpec must be allocated prior to calling the
+ * function, and should be aligned on a 4-byte boundary. Following
+ * initialization by this function, the vendor-specific structure *pMESpec
+ * should contain an implementation-specific representation of all motion
+ * estimation parameters received via the structure pMEParams, for example
+ * rndVal, searchRange, etc. The number of bytes required for the
+ * specification structure can be determined using the function
+ * omxVCM4P2_MEGetBufSize.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P2MEMode
+ * pMEParams - motion estimation parameters
+ * pMESpec - pointer to the uninitialized ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pMESpec - pointer to the initialized ME specification structure
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - one or more of the following is true:
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for the
+ * parameter pMEParams->searchRange
+ *
+ */
+OMXResult omxVCM4P2_MEInit (
+ OMXVCM4P2MEMode MEmode,
+ const OMXVCM4P2MEParams*pMEParams,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Integer_16x16 (6.2.4.2.1)
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated
+ * minimum SAD. Both the input and output motion vectors are represented using
+ * half-pixel units, and therefore a shift left or right by 1 bit may be
+ * required, respectively, to match the input or output MVs with other
+ * functions that either generate output MVs or expect input MVs represented
+ * using integer pixel units.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * MB that corresponds to the location of the current macroblock in
+ * the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - pointer to the valid reference plane rectangle; coordinates
+ * are specified relative to the image origin. Rectangle
+ * boundaries may extend beyond image boundaries if the image has
+ * been padded. For example, if padding extends 4 pixels beyond
+ * frame border, then the value for the left border could be set to
+ * -4.
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 256
+ * entries); must be aligned on a 16-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pCurrPointPos - position of the current macroblock in the current plane
+ * pSrcPreMV - pointer to predicted motion vector; NULL indicates no
+ * predicted MV
+ * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced
+ * by pSrcPreMV); may be set to NULL if unavailable.
+ * pMESpec - vendor-specific motion estimation specification structure;
+ * must have been allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling the block matching function.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or
+ * pMESpec, or
+ * - pSrcCurrBuf is not 16-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_16x16 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector*pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector*pDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Integer_8x8 (6.2.4.2.2)
+ *
+ * Description:
+ * Performs an 8x8 block search; estimates motion vector and associated
+ * minimum SAD. Both the input and output motion vectors are represented
+ * using half-pixel units, and therefore a shift left or right by 1 bit may be
+ * required, respectively, to match the input or output MVs with other
+ * functions that either generate output MVs or expect input MVs represented
+ * using integer pixel units.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * block that corresponds to the location of the current 8x8 block
+ * in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - pointer to the valid reference plane rectangle; coordinates
+ * are specified relative to the image origin. Rectangle
+ * boundaries may extend beyond image boundaries if the image has
+ * been padded.
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 128
+ * entries); must be aligned on an 8-byte boundary. The number of
+ * bytes between lines (step) is 16 bytes.
+ * pCurrPointPos - position of the current block in the current plane
+ * pSrcPreMV - pointer to predicted motion vector; NULL indicates no
+ * predicted MV
+ * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced
+ * by pSrcPreMV); may be set to NULL if unavailable.
+ * pMESpec - vendor-specific motion estimation specification structure;
+ * must have been allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling the block matching function.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or
+ * pMESpec, or
+ * - pSrcCurrBuf is not 8-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_8x8 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Half_16x16 (6.2.4.2.3)
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution. Returns the
+ * estimated motion vector and associated minimum SAD. This function
+ * estimates the half-pixel motion vector by interpolating the integer
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e.,
+ * the initial integer MV is generated externally. The input parameters
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of
+ * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function
+ * BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * macroblock that corresponds to the location of the current
+ * macroblock in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - reference plane valid region rectangle
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 256
+ * entries); must be aligned on a 16-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pSearchPointRefPos - position of the starting point for half pixel
+ * search (specified in terms of integer pixel units) in the
+ * reference plane, i.e., the reference position pointed to by the
+ * predicted motion vector.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ * pSrcDstMV - pointer to the initial MV estimate; typically generated
+ * during a prior 16X16 integer search; specified in terms of
+ * half-pixel units.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV.
+ * - pSrcCurrBuf is not 16-byte aligned, or
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_16x16 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Half_8x8 (6.2.4.2.4)
+ *
+ * Description:
+ * Performs an 8x8 block match with half-pixel resolution. Returns the
+ * estimated motion vector and associated minimum SAD. This function
+ * estimates the half-pixel motion vector by interpolating the integer
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e.,
+ * the initial integer MV is generated externally. The input parameters
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of
+ * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function
+ * BlockMatch_Integer_8x8 may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * block that corresponds to the location of the current 8x8 block
+ * in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - reference plane valid region rectangle
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 128
+ * entries); must be aligned on a 8-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pSearchPointRefPos - position of the starting point for half pixel
+ * search (specified in terms of integer pixel units) in the
+ * reference plane.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ * pSrcDstMV - pointer to the initial MV estimate; typically generated
+ * during a prior 8x8 integer search, specified in terms of
+ * half-pixel units.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV
+ * - pSrcCurrBuf is not 8-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_8x8 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MotionEstimationMB (6.2.4.3.1)
+ *
+ * Description:
+ * Performs motion search for a 16x16 macroblock. Selects best motion search
+ * strategy from among inter-1MV, inter-4MV, and intra modes. Supports
+ * integer and half pixel resolution.
+ *
+ * Input Arguments:
+ *
+ * pSrcCurrBuf - pointer to the top-left corner of the current MB in the
+ * original picture plane; must be aligned on a 16-byte boundary.
+ * The function does not expect source data outside the region
+ * bounded by the MB to be available; for example it is not
+ * necessary for the caller to guarantee the availability of
+ * pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB
+ * to be processed.
+ * srcCurrStep - width of the original picture plane, in terms of full
+ * pixels; must be a multiple of 16.
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * plane location corresponding to the location of the current
+ * macroblock in the current plane; must be aligned on a 16-byte
+ * boundary.
+ * srcRefStep - width of the reference picture plane, in terms of full
+ * pixels; must be a multiple of 16.
+ * pRefRect - reference plane valid region rectangle, specified relative to
+ * the image origin
+ * pCurrPointPos - position of the current macroblock in the current plane
+ * pMESpec - pointer to the vendor-specific motion estimation specification
+ * structure; must be allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling this function.
+ * pMBInfo - array, of dimension four, containing pointers to information
+ * associated with four nearby MBs:
+ * - pMBInfo[0] - pointer to left MB information
+ * - pMBInfo[1] - pointer to top MB information
+ * - pMBInfo[2] - pointer to top-left MB information
+ * - pMBInfo[3] - pointer to top-right MB information
+ * Any pointer in the array may be set equal to NULL if the
+ * corresponding MB doesn't exist. For each MB, the following structure
+ * members are used:
+ * - mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or
+ * OMX_VC_INTER4V
+ * - pMV0[2][2] - estimated motion vectors; represented
+ * in 1/2 pixel units
+ * - sliceID - number of the slice to which the MB belongs
+ * pSrcDstMBCurr - pointer to information structure for the current MB.
+ * The following entries should be set prior to calling the
+ * function: sliceID - the number of the slice the to which the
+ * current MB belongs. The structure elements cbpy and cbpc are
+ * ignored.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMBCurr - pointer to updated information structure for the current
+ * MB after MB-level motion estimation has been completed. The
+ * following structure members are updated by the ME function:
+ * - mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or
+ * OMX_VC_INTER4V.
+ * - pMV0[2][2] - estimated motion vectors; represented in
+ * terms of 1/2 pel units.
+ * - pMVPred[2][2] - predicted motion vectors; represented
+ * in terms of 1/2 pel units.
+ * The structure members cbpy and cbpc are not updated by the function.
+ * pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs
+ * for INTER4V
+ * pDstBlockSAD - pointer to an array of SAD values for each of the four
+ * 8x8 luma blocks in the MB. The block SADs are in scan order for
+ * each MB.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL: pSrcCurrBuf,
+ * pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra,
+ * pSrcDstMBCurr, or pDstSAD.
+ *
+ */
+OMXResult omxVCM4P2_MotionEstimationMB (
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 srcCurrStep,
+ const OMX_U8 *pSrcRefBuf,
+ OMX_S32 srcRefStep,
+ const OMXRect*pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ void *pMESpec,
+ const OMXVCM4P2MBInfoPtr *pMBInfo,
+ OMXVCM4P2MBInfo *pSrcDstMBCurr,
+ OMX_U16 *pDstSAD,
+ OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DCT8x8blk (6.2.4.4.1)
+ *
+ * Description:
+ * Computes a 2D forward DCT for a single 8x8 block, as defined in
+ * [ISO14496-2].
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the start of the linearly arranged input buffer; must
+ * be aligned on a 16-byte boundary. Input values (pixel
+ * intensities) are valid in the range [-255,255].
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the start of the linearly arranged output buffer; must
+ * be aligned on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, returned if:
+ * - pSrc or pDst is NULL.
+ * - pSrc or pDst is not 16-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_DCT8x8blk (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantIntra_I (6.2.4.4.2)
+ *
+ * Description:
+ * Performs quantization on intra block coefficients. This function supports
+ * bits_per_pixel == 8.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input intra block coefficients; must be aligned
+ * on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale).
+ * blockIndex - block index indicating the component type and position,
+ * valid in the range 0 to 5, as defined in [ISO14496-2], subclause
+ * 6.1.3.8.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (quantized) interblock coefficients.
+ * When shortVideoHeader==1, AC coefficients are saturated on the
+ * interval [-127, 127], and DC coefficients are saturated on the
+ * interval [1, 254]. When shortVideoHeader==0, AC coefficients
+ * are saturated on the interval [-2047, 2047].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrcDst is NULL.
+ * - blockIndex < 0 or blockIndex >= 10
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_QuantIntra_I (
+ OMX_S16 *pSrcDst,
+ OMX_U8 QP,
+ OMX_INT blockIndex,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInter_I (6.2.4.4.3)
+ *
+ * Description:
+ * Performs quantization on an inter coefficient block; supports
+ * bits_per_pixel == 8.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input inter block coefficients; must be aligned
+ * on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * shortVideoHeader - binary flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode, and
+ * shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (quantized) interblock coefficients.
+ * When shortVideoHeader==1, AC coefficients are saturated on the
+ * interval [-127, 127], and DC coefficients are saturated on the
+ * interval [1, 254]. When shortVideoHeader==0, AC coefficients
+ * are saturated on the interval [-2047, 2047].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrcDst is NULL.
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_QuantInter_I (
+ OMX_S16 *pSrcDst,
+ OMX_U8 QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_TransRecBlockCoef_intra (6.2.4.4.4)
+ *
+ * Description:
+ * Quantizes the DCT coefficients, implements intra block AC/DC coefficient
+ * prediction, and reconstructs the current intra block texture for prediction
+ * on the next frame. Quantized row and column coefficients are returned in
+ * the updated coefficient buffers.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the pixels of current intra block; must be aligned on
+ * an 8-byte boundary.
+ * pPredBufRow - pointer to the coefficient row buffer containing
+ * ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16.
+ * Coefficients are organized into blocks of eight as described
+ * below (Internal Prediction Coefficient Update Procedures). The
+ * DC coefficient is first, and the remaining buffer locations
+ * contain the quantized AC coefficients. Each group of eight row
+ * buffer elements combined with one element eight elements ahead
+ * contains the coefficient predictors of the neighboring block
+ * that is spatially above or to the left of the block currently to
+ * be decoded. A negative-valued DC coefficient indicates that this
+ * neighboring block is not INTRA-coded or out of bounds, and
+ * therefore the AC and DC coefficients are invalid. Pointer must
+ * be aligned on an 8-byte boundary.
+ * pPredBufCol - pointer to the prediction coefficient column buffer
+ * containing 16 elements of type OMX_S16. Coefficients are
+ * organized as described in section 6.2.2.5. Pointer must be
+ * aligned on an 8-byte boundary.
+ * pSumErr - pointer to a flag indicating whether or not AC prediction is
+ * required; AC prediction is enabled if *pSumErr >=0, but the
+ * value is not used for coefficient prediction, i.e., the sum of
+ * absolute differences starts from 0 for each call to this
+ * function. Otherwise AC prediction is disabled if *pSumErr < 0 .
+ * blockIndex - block index indicating the component type and position, as
+ * defined in [ISO14496-2], subclause 6.1.3.8.
+ * curQp - quantization parameter of the macroblock to which the current
+ * block belongs
+ * pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0]
+ * contains the quantization parameter associated with the 8x8
+ * block left of the current block (QPa), and pQpBuf[1] contains
+ * the quantization parameter associated with the 8x8 block above
+ * the current block (QPc). In the event that the corresponding
+ * block is outside of the VOP bound, the Qp value will not affect
+ * the intra prediction process, as described in [ISO14496-2],
+ * sub-clause 7.4.3.3, Adaptive AC Coefficient Prediction.
+ * srcStep - width of the source buffer; must be a multiple of 8.
+ * dstStep - width of the reconstructed destination buffer; must be a
+ * multiple of 16.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains
+ * the predicted DC coefficient; the remaining entries contain the
+ * quantized AC coefficients (without prediction). The pointer
+ * pDstmust be aligned on a 16-byte boundary.
+ * pRec - pointer to the reconstructed texture; must be aligned on an
+ * 8-byte boundary.
+ * pPredBufRow - pointer to the updated coefficient row buffer
+ * pPredBufCol - pointer to the updated coefficient column buffer
+ * pPreACPredict - if prediction is enabled, the parameter points to the
+ * start of the buffer containing the coefficient differences for
+ * VLC encoding. The entry pPreACPredict[0]indicates prediction
+ * direction for the current block and takes one of the following
+ * values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL. The entries
+ * pPreACPredict[1]-pPreACPredict[7]contain predicted AC
+ * coefficients. If prediction is disabled (*pSumErr<0) then the
+ * contents of this buffer are undefined upon return from the
+ * function
+ * pSumErr - pointer to the value of the accumulated AC coefficient errors,
+ * i.e., sum of the absolute differences between predicted and
+ * unpredicted AC coefficients
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: pSrc, pDst, pRec,
+ * pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr.
+ * - blockIndex < 0 or blockIndex >= 10;
+ * - curQP <= 0 or curQP >= 32.
+ * - srcStep, or dstStep <= 0 or not a multiple of 8.
+ * - pDst is not 16-byte aligned: .
+ * - At least one of the following pointers is not 8-byte aligned:
+ * pSrc, pRec.
+ *
+ * Note: The coefficient buffers must be updated in accordance with the
+ * update procedures defined in section in 6.2.2.
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_intra (
+ const OMX_U8 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U8 *pRec,
+ OMX_S16 *pPredBufRow,
+ OMX_S16 *pPredBufCol,
+ OMX_S16 *pPreACPredict,
+ OMX_INT *pSumErr,
+ OMX_INT blockIndex,
+ OMX_U8 curQp,
+ const OMX_U8 *pQpBuf,
+ OMX_INT srcStep,
+ OMX_INT dstStep,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_TransRecBlockCoef_inter (6.2.4.4.5)
+ *
+ * Description:
+ * Implements DCT, and quantizes the DCT coefficients of the inter block
+ * while reconstructing the texture residual. There is no boundary check for
+ * the bit stream buffer.
+ *
+ * Input Arguments:
+ *
+ * pSrc -pointer to the residuals to be encoded; must be aligned on an
+ * 16-byte boundary.
+ * QP - quantization parameter.
+ * shortVideoHeader - binary flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode, and
+ * shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the quantized DCT coefficients buffer; must be aligned
+ * on a 16-byte boundary.
+ * pRec - pointer to the reconstructed texture residuals; must be aligned
+ * on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is either NULL or
+ * not 16-byte aligned:
+ * - pSrc
+ * - pDst
+ * - pRec
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_inter (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_S16 *pRec,
+ OMX_U8 QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one
+ * intra block. Two versions of the function (DCVLC and ACVLC) are provided
+ * in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding".
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7.
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * predDir - AC prediction direction, which is used to decide the zigzag
+ * scan pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used.
+ * Performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction.
+ * Performs alternate-vertical zigzag scan.
+ * - OMX_VC_VERTICAL - Vertical prediction.
+ * Performs alternate-horizontal zigzag scan.
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance, chrominance) of the current
+ * block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded, so
+ * that it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pQDctBlkCoef.
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL.
+ * - VideoComp is not one component of enum OMXVCM4P2VideoComponent.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_IntraACVLC (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one
+ * intra block. Two versions of the function (DCVLC and ACVLC) are provided
+ * in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7.
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * predDir - AC prediction direction, which is used to decide the zigzag
+ * scan pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used.
+ * Performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction.
+ * Performs alternate-vertical zigzag scan.
+ * - OMX_VC_VERTICAL - Vertical prediction.
+ * Performs alternate-horizontal zigzag scan.
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded, so
+ * that it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pQDctBlkCoef.
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL.
+ * - VideoComp is not one component of enum OMXVCM4P2VideoComponent.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_Inter (6.2.4.5.3)
+ *
+ * Description:
+ * Performs classical zigzag scanning and VLC encoding for one inter block.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded so that
+ * it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments
+ * - At least one of the pointers: is NULL: ppBitStream, *ppBitStream,
+ * pBitOffset, pQDctBlkCoef
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_Inter (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeMV (6.2.4.5.4)
+ *
+ * Description:
+ * Predicts a motion vector for the current macroblock, encodes the
+ * difference, and writes the output to the stream buffer. The input MVs
+ * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie
+ * within the ranges associated with the input parameter fcodeForward, as
+ * described in [ISO14496-2], subclause 7.6.3. This function provides a
+ * superset of the functionality associated with the function
+ * omxVCM4P2_FindMVpred.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream buffer
+ * pBitOffset - index of the first free (next available) bit in the stream
+ * buffer referenced by *ppBitStream, valid in the range 0 to 7.
+ * pMVCurMB - pointer to the current macroblock motion vector; a value of
+ * NULL indicates unavailability.
+ * pSrcMVLeftMB - pointer to the source left macroblock motion vector; a
+ * value of NULLindicates unavailability.
+ * pSrcMVUpperMB - pointer to source upper macroblock motion vector; a
+ * value of NULL indicates unavailability.
+ * pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a
+ * value of NULL indicates unavailability.
+ * fcodeForward - an integer with values from 1 to 7; used in encoding
+ * motion vectors related to search range, as described in
+ * [ISO14496-2], subclause 7.6.3.
+ * MBType - macro block type, valid in the range 0 to 5
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - updated pointer to the current byte in the bit stream
+ * buffer
+ * pBitOffset - updated index of the next available bit position in stream
+ * buffer referenced by *ppBitStream
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pMVCurMB
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - fcodeForward <= 0, or fcodeForward > 7, or MBType < 0.
+ *
+ */
+OMXResult omxVCM4P2_EncodeMV (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMXVCMotionVector *pMVCurMB,
+ const OMXVCMotionVector*pSrcMVLeftMB,
+ const OMXVCMotionVector *pSrcMVUpperMB,
+ const OMXVCMotionVector *pSrcMVUpperRightMB,
+ OMX_INT fcodeForward,
+ OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodePadMV_PVOP (6.2.5.1.1)
+ *
+ * Description:
+ * Decodes and pads the four motion vectors associated with a non-intra P-VOP
+ * macroblock. For macroblocks of type OMX_VC_INTER4V, the output MV is
+ * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for
+ * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to
+ * all four output MV buffer entries.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7].
+ * pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the
+ * motion vector buffers of the macroblocks specially at the left,
+ * upper, and upper-right side of the current macroblock,
+ * respectively; a value of NULL indicates unavailability. Note:
+ * Any neighborhood macroblock outside the current VOP or video
+ * packet or outside the current GOB (when short_video_header is
+ * 1 ) for which gob_header_empty is 0 is treated as
+ * transparent, according to [ISO14496-2], subclause 7.6.5.
+ * fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream
+ * syntax
+ * MBType - the type of the current macroblock. If MBType is not equal to
+ * OMX_VC_INTER4V, the destination motion vector buffer is still
+ * filled with the same decoded vector.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDstMVCurMB - pointer to the motion vector buffer for the current
+ * macroblock; contains four decoded motion vectors
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB
+ * - *pBitOffset exceeds [0,7]
+ * - fcodeForward exceeds (0,7]
+ * - MBType less than zero
+ * - motion vector buffer is not 4-byte aligned.
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodePadMV_PVOP (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMXVCMotionVector *pSrcMVLeftMB,
+ OMXVCMotionVector*pSrcMVUpperMB,
+ OMXVCMotionVector *pSrcMVUpperRightMB,
+ OMXVCMotionVector*pDstMVCurMB,
+ OMX_INT fcodeForward,
+ OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients
+ * for one intra block. Two versions of the function (DCVLC and ACVLC) are
+ * provided in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the
+ * bitstream buffer
+ * pBitOffset - pointer to the bit position in the current byte referenced
+ * by *ppBitStream. The parameter *pBitOffset is valid in the
+ * range [0-7].
+ * Bit Position in one byte: |Most Least|
+ * *pBitOffset |0 1 2 3 4 5 6 7|
+ * predDir - AC prediction direction; used to select the zigzag scan
+ * pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used;
+ * performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction;
+ * performs alternate-vertical zigzag scan;
+ * - OMX_VC_VERTICAL - Vertical prediction;
+ * performs alternate-horizontal zigzag scan.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated such that it points to the current
+ * bit position in the byte pointed by *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDst
+ * - *pBitOffset exceeds [0,7]
+ * - preDir exceeds [0,2]
+ * - pDst is not 4-byte aligned
+ * OMX_Sts_Err - if:
+ * - In DecodeVLCZigzag_IntraDCVLC, dc_size > 12
+ * - At least one of mark bits equals zero
+ * - Illegal stream encountered; code cannot be located in VLC table
+ * - Forbidden code encountered in the VLC FLC table.
+ * - The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_IntraACVLC (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients
+ * for one intra block. Two versions of the function (DCVLC and ACVLC) are
+ * provided in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the
+ * bitstream buffer
+ * pBitOffset - pointer to the bit position in the current byte referenced
+ * by *ppBitStream. The parameter *pBitOffset is valid in the
+ * range [0-7]. Bit Position in one byte: |Most Least| *pBitOffset
+ * |0 1 2 3 4 5 6 7|
+ * predDir - AC prediction direction; used to select the zigzag scan
+ * pattern; takes one of the following values: OMX_VC_NONE - AC
+ * prediction not used; performs classical zigzag scan.
+ * OMX_VC_HORIZONTAL - Horizontal prediction; performs
+ * alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical
+ * prediction; performs alternate-horizontal zigzag scan.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated such that it points to the current
+ * bit position in the byte pointed by *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments At least one of the following
+ * pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst,
+ * or At least one of the following conditions is true:
+ * *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is
+ * not 4-byte aligned
+ * OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of
+ * mark bits equals zero Illegal stream encountered; code cannot
+ * be located in VLC table Forbidden code encountered in the VLC
+ * FLC table The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_Inter (6.2.5.2.3)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one inter-coded block.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the stream buffer
+ * pBitOffset - pointer to the next available bit in the current stream
+ * byte referenced by *ppBitStream. The parameter *pBitOffset is
+ * valid within the range [0-7].
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the stream buffer
+ * pBitOffset - *pBitOffset is updated after decoding such that it points
+ * to the next available bit in the stream byte referenced by
+ * *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDst
+ * - pDst is not 4-byte aligned
+ * - *pBitOffset exceeds [0,7]
+ * OMX_Sts_Err - status error, if:
+ * - At least one mark bit is equal to zero
+ * - Encountered an illegal stream code that cannot be found in the VLC table
+ * - Encountered an illegal code in the VLC FLC table
+ * - The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_Inter (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInvIntra_I (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to
+ * the range [-2048, 2047].
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input (quantized) intra/inter block; must be
+ * aligned on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * videoComp - video component type of the current block. Takes one of the
+ * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra
+ * version only).
+ * shortVideoHeader - binary flag indicating presence of short_video_header
+ * (intra version only).
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (dequantized) intra/inter block
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; one or more of the following is
+ * true:
+ * - pSrcDst is NULL
+ * - QP <= 0 or QP >=31
+ * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE.
+ *
+ */
+OMXResult omxVCM4P2_QuantInvIntra_I (
+ OMX_S16 *pSrcDst,
+ OMX_INT QP,
+ OMXVCM4P2VideoComponent videoComp,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInvInter_I (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to
+ * the range [-2048, 2047].
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input (quantized) intra/inter block; must be
+ * aligned on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * videoComp - video component type of the current block. Takes one of the
+ * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra
+ * version only).
+ * shortVideoHeader - binary flag indicating presence of short_video_header
+ * (intra version only).
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (dequantized) intra/inter block
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; one or more of the following is
+ * true:
+ * - pSrcDst is NULL
+ * - QP <= 0 or QP >=31
+ * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE.
+ *
+ */
+OMXResult omxVCM4P2_QuantInvInter_I (
+ OMX_S16 *pSrcDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra (6.2.5.4.1)
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely
+ * zigzag positioning, and IDCT, with appropriate clipping on each step, are
+ * performed on the coefficients. The results are then placed in the output
+ * frame/plane on a pixel basis. Note: This function will be used only when
+ * at least one non-zero AC coefficient of current block exists in the bit
+ * stream. The DC only condition will be handled in another function.
+ *
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer. There is no boundary check for the bit stream
+ * buffer.
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7].
+ * step - width of the destination plane
+ * pCoefBufRow - pointer to the coefficient row buffer; must be aligned on
+ * an 8-byte boundary.
+ * pCoefBufCol - pointer to the coefficient column buffer; must be aligned
+ * on an 8-byte boundary.
+ * curQP - quantization parameter of the macroblock which the current block
+ * belongs to
+ * pQPBuf - pointer to the quantization parameter buffer
+ * blockIndex - block index indicating the component type and position as
+ * defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5.
+ * intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a
+ * mechanism to switch between two VLC for coding of Intra DC
+ * coefficients as per [ISO14496-2], Table 6-21.
+ * ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if
+ * the ac coefficients of the first row or first column are
+ * differentially coded for intra coded macroblock.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDst - pointer to the block in the destination plane; must be aligned on
+ * an 8-byte boundary.
+ * pCoefBufRow - pointer to the updated coefficient row buffer.
+ * pCoefBufCol - pointer to the updated coefficient column buffer Note:
+ * The coefficient buffers must be updated in accordance with the
+ * update procedure defined in section 6.2.2.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol,
+ * pQPBuf, pDst.
+ * - *pBitOffset exceeds [0,7]
+ * - curQP exceeds (1, 31)
+ * - blockIndex exceeds [0,5]
+ * - step is not the multiple of 8
+ * - a pointer alignment requirement was violated.
+ * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra.
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_U8 *pDst,
+ OMX_INT step,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_U8 curQP,
+ const OMX_U8 *pQPBuf,
+ OMX_INT blockIndex,
+ OMX_INT intraDCVLC,
+ OMX_INT ACPredFlag,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter (6.2.5.4.2)
+ *
+ * Description:
+ * Decodes the INTER block coefficients. This function performs inverse
+ * quantization, inverse zigzag positioning, and IDCT (with appropriate
+ * clipping on each step) on the coefficients. The results (residuals) are
+ * placed in a contiguous array of 64 elements. For INTER block, the output
+ * buffer holds the residuals for further reconstruction.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer. There is no boundary check for the bit stream
+ * buffer.
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7]
+ * QP - quantization parameter
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDst - pointer to the decoded residual buffer (a contiguous array of 64
+ * elements of OMX_S16 data type); must be aligned on a 16-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is Null:
+ * ppBitStream, *ppBitStream, pBitOffset , pDst
+ * - *pBitOffset exceeds [0,7]
+ * - QP <= 0.
+ * - pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter .
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_INT QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_PredictReconCoefIntra (6.2.5.4.3)
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected as
+ * specified in [ISO14496-2], subclause 7.4.3.1.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the coefficient buffer which contains the quantized
+ * coefficient residuals (PQF) of the current block; must be
+ * aligned on a 4-byte boundary. The output coefficients are
+ * saturated to the range [-2048, 2047].
+ * pPredBufRow - pointer to the coefficient row buffer; must be aligned on
+ * a 4-byte boundary.
+ * pPredBufCol - pointer to the coefficient column buffer; must be aligned
+ * on a 4-byte boundary.
+ * curQP - quantization parameter of the current block. curQP may equal to
+ * predQP especially when the current block and the predictor block
+ * are in the same macroblock.
+ * predQP - quantization parameter of the predictor block
+ * predDir - indicates the prediction direction which takes one of the
+ * following values: OMX_VC_HORIZONTAL - predict horizontally
+ * OMX_VC_VERTICAL - predict vertically
+ * ACPredFlag - a flag indicating if AC prediction should be performed. It
+ * is equal to ac_pred_flag in the bit stream syntax of MPEG-4
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the coefficient buffer which contains the quantized
+ * coefficients (QF) of the current block
+ * pPredBufRow - pointer to the updated coefficient row buffer
+ * pPredBufCol - pointer to the updated coefficient column buffer Note:
+ * Buffer update: Update the AC prediction buffer (both row and
+ * column buffer).
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the pointers is NULL:
+ * pSrcDst, pPredBufRow, or pPredBufCol.
+ * - curQP <= 0,
+ * - predQP <= 0,
+ * - curQP >31,
+ * - predQP > 31,
+ * - preDir exceeds [1,2]
+ * - pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_PredictReconCoefIntra (
+ OMX_S16 *pSrcDst,
+ OMX_S16 *pPredBufRow,
+ OMX_S16 *pPredBufCol,
+ OMX_INT curQP,
+ OMX_INT predQP,
+ OMX_INT predDir,
+ OMX_INT ACPredFlag,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MCReconBlock (6.2.5.5.1)
+ *
+ * Description:
+ * Performs motion compensation prediction for an 8x8 block using
+ * interpolation described in [ISO14496-2], subclause 7.6.2.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the block in the reference plane.
+ * srcStep - distance between the start of consecutive lines in the
+ * reference plane, in bytes; must be a multiple of 8.
+ * dstStep - distance between the start of consecutive lines in the
+ * destination plane, in bytes; must be a multiple of 8.
+ * pSrcResidue - pointer to a buffer containing the 16-bit prediction
+ * residuals; must be 16-byte aligned. If the pointer is NULL, then
+ * no prediction is done, only motion compensation, i.e., the block
+ * is moved with interpolation.
+ * predictType - bilinear interpolation type, as defined in section
+ * 6.2.1.2.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer; must be 8-byte aligned. If
+ * prediction residuals are added then output intensities are
+ * clipped to the range [0,255].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - pDst is not 8-byte aligned.
+ * - pSrcResidue is not 16-byte aligned.
+ * - one or more of the following pointers is NULL: pSrc or pDst.
+ * - either srcStep or dstStep is not a multiple of 8.
+ * - invalid type specified for the parameter predictType.
+ * - the parameter rndVal is not equal either to 0 or 1.
+ *
+ */
+OMXResult omxVCM4P2_MCReconBlock (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_S16 *pSrcResidue,
+ OMX_U8 *pDst,
+ OMX_INT dstStep,
+ OMX_INT predictType,
+ OMX_INT rndVal
+);
+
+
+
+/* 6.3.1.1 Intra 16x16 Prediction Modes */
+/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_16X16_VERT = 0, /** Intra_16x16_Vertical */
+ OMX_VC_16X16_HOR = 1, /** Intra_16x16_Horizontal */
+ OMX_VC_16X16_DC = 2, /** Intra_16x16_DC */
+ OMX_VC_16X16_PLANE = 3 /** Intra_16x16_Plane */
+} OMXVCM4P10Intra16x16PredMode;
+
+
+
+/* 6.3.1.2 Intra 4x4 Prediction Modes */
+/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_4X4_VERT = 0, /** Intra_4x4_Vertical */
+ OMX_VC_4X4_HOR = 1, /** Intra_4x4_Horizontal */
+ OMX_VC_4X4_DC = 2, /** Intra_4x4_DC */
+ OMX_VC_4X4_DIAG_DL = 3, /** Intra_4x4_Diagonal_Down_Left */
+ OMX_VC_4X4_DIAG_DR = 4, /** Intra_4x4_Diagonal_Down_Right */
+ OMX_VC_4X4_VR = 5, /** Intra_4x4_Vertical_Right */
+ OMX_VC_4X4_HD = 6, /** Intra_4x4_Horizontal_Down */
+ OMX_VC_4X4_VL = 7, /** Intra_4x4_Vertical_Left */
+ OMX_VC_4X4_HU = 8 /** Intra_4x4_Horizontal_Up */
+} OMXVCM4P10Intra4x4PredMode;
+
+
+
+/* 6.3.1.3 Chroma Prediction Modes */
+/* A data type that enumerates intra chroma prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_CHROMA_DC = 0, /** Intra_Chroma_DC */
+ OMX_VC_CHROMA_HOR = 1, /** Intra_Chroma_Horizontal */
+ OMX_VC_CHROMA_VERT = 2, /** Intra_Chroma_Vertical */
+ OMX_VC_CHROMA_PLANE = 3 /** Intra_Chroma_Plane */
+} OMXVCM4P10IntraChromaPredMode;
+
+
+
+/* 6.3.1.4 Motion Estimation Modes */
+/* A data type that enumerates H.264 motion estimation modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */
+ OMX_VC_M4P10_FULL_SEARCH = 1 /** Full motion search */
+} OMXVCM4P10MEMode;
+
+
+
+/* 6.3.1.5 Macroblock Types */
+/* A data type that enumerates H.264 macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_P_16x16 = 0, /* defined by [ISO14496-10] */
+ OMX_VC_P_16x8 = 1,
+ OMX_VC_P_8x16 = 2,
+ OMX_VC_P_8x8 = 3,
+ OMX_VC_PREF0_8x8 = 4,
+ OMX_VC_INTER_SKIP = 5,
+ OMX_VC_INTRA_4x4 = 8,
+ OMX_VC_INTRA_16x16 = 9,
+ OMX_VC_INTRA_PCM = 10
+} OMXVCM4P10MacroblockType;
+
+
+
+/* 6.3.1.6 Sub-Macroblock Types */
+/* A data type that enumerates H.264 sub-macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */
+ OMX_VC_SUB_P_8x4 = 1,
+ OMX_VC_SUB_P_4x8 = 2,
+ OMX_VC_SUB_P_4x4 = 3
+} OMXVCM4P10SubMacroblockType;
+
+
+
+/* 6.3.1.7 Variable Length Coding (VLC) Information */
+
+typedef struct {
+ OMX_U8 uTrailing_Ones; /* Trailing ones; 3 at most */
+ OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */
+ OMX_U8 uNumCoeffs; /* Total number of non-zero coefs, including trailing ones */
+ OMX_U8 uTotalZeros; /* Total number of zero coefs */
+ OMX_S16 iLevels[16]; /* Levels of non-zero coefs, in reverse zig-zag order */
+ OMX_U8 uRuns[16]; /* Runs for levels and trailing ones, in reverse zig-zag order */
+} OMXVCM4P10VLCInfo;
+
+
+
+/* 6.3.1.8 Macroblock Information */
+
+typedef struct {
+ OMX_S32 sliceId; /* slice number */
+ OMXVCM4P10MacroblockType mbType; /* MB type */
+ OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */
+ OMX_S32 qpy; /* qp for luma */
+ OMX_S32 qpc; /* qp for chroma */
+ OMX_U32 cbpy; /* CBP Luma */
+ OMX_U32 cbpc; /* CBP Chroma */
+ OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */
+ OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */
+ OMX_U8 pRefL0Idx[4]; /* reference picture indices */
+ OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */
+ OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */
+} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr;
+
+
+
+/* 6.3.1.9 Motion Estimation Parameters */
+
+typedef struct {
+ OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */
+ OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */
+ OMX_S32 halfSearchEnable;
+ OMX_S32 quarterSearchEnable;
+ OMX_S32 intraEnable4x4; /* 1=enable, 0=disable */
+ OMX_S32 searchRange16x16; /* integer pixel units */
+ OMX_S32 searchRange8x8;
+ OMX_S32 searchRange4x4;
+} OMXVCM4P10MEParams;
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntra_4x4 (6.3.3.1.1)
+ *
+ * Description:
+ * Perform Intra_4x4 prediction for luma samples. If the upper-right block is
+ * not available, then duplication work should be handled inside the function.
+ * Users need not define them outside.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 4 left pixels:
+ * p[x, y] (x = -1, y = 0..3)
+ * pSrcAbove - Pointer to the buffer of 8 above pixels:
+ * p[x,y] (x = 0..7, y =-1);
+ * must be aligned on a 4-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 4.
+ * dstStep - Step of the destination buffer; must be a multiple of 4.
+ * predMode - Intra_4x4 prediction mode.
+ * availability - Neighboring 4x4 block availability flag, refer to
+ * "Neighboring Macroblock Availability" .
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination buffer; must be aligned on a 4-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 4, or dstStep is not a multiple of 4.
+ * leftStep is not a multiple of 4.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10Intra4x4PredMode.
+ * predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER
+ * indicating p[x,-1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..3) is not available.
+ * predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_VR, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_HD, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER
+ * indicating p[x,-1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..3) is not available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 4-byte boundary.
+ *
+ * Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if
+ * they are not used by intra prediction as implied in predMode.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_4x4 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10Intra4x4PredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntra_16x16 (6.3.3.1.2)
+ *
+ * Description:
+ * Perform Intra_16x16 prediction for luma samples. If the upper-right block
+ * is not available, then duplication work should be handled inside the
+ * function. Users need not define them outside.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y =
+ * 0..15)
+ * pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15,
+ * y= -1); must be aligned on a 16-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 16.
+ * dstStep - Step of the destination buffer; must be a multiple of 16.
+ * predMode - Intra_16x16 prediction mode, please refer to section 3.4.1.
+ * availability - Neighboring 16x16 MB availability flag. Refer to
+ * section 3.4.4.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination buffer; must be aligned on a 16-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 16. or dstStep is not a multiple of 16.
+ * leftStep is not a multiple of 16.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10Intra16x16PredMode
+ * predMode is OMX_VC_16X16_VERT, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available.
+ * predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..15) is not available.
+ * predMode is OMX_VC_16X16_PLANE, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not
+ * available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 16-byte boundary.
+ *
+ * Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if
+ * they are not used by intra prediction implied in predMode.
+ * Note:
+ * OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_16x16 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10Intra16x16PredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntraChroma_8x8 (6.3.3.1.3)
+ *
+ * Description:
+ * Performs intra prediction for chroma samples.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y=
+ * 0..7).
+ * pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y
+ * = -1); must be aligned on an 8-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 8.
+ * dstStep - Step of the destination buffer; must be a multiple of 8.
+ * predMode - Intra chroma prediction mode, please refer to section 3.4.3.
+ * availability - Neighboring chroma block availability flag, please refer
+ * to "Neighboring Macroblock Availability".
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination buffer; must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If any of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 8 or dstStep is not a multiple of 8.
+ * leftStep is not a multiple of 8.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10IntraChromaPredMode.
+ * predMode is OMX_VC_CHROMA_VERT, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available.
+ * predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..7) is not available.
+ * predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not
+ * available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 8-byte boundary.
+ *
+ * Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if
+ * they are not used by intra prediction implied in predMode.
+ *
+ * Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntraChroma_8x8 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10IntraChromaPredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateLuma (6.3.3.2.1)
+ *
+ * Description:
+ * Performs quarter-pixel interpolation for inter luma MB. It is assumed that
+ * the frame is already padded when calling this function.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the source reference frame buffer
+ * srcStep - reference frame step, in bytes; must be a multiple of roi.width
+ * dstStep - destination frame step, in bytes; must be a multiple of
+ * roi.width
+ * dx - Fractional part of horizontal motion vector component in 1/4 pixel
+ * unit; valid in the range [0,3]
+ * dy - Fractional part of vertical motion vector y component in 1/4 pixel
+ * unit; valid in the range [0,3]
+ * roi - Dimension of the interpolation region; the parameters roi.width and
+ * roi.height must be equal to either 4, 8, or 16.
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination frame buffer:
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ * if roi.width==16, 16-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < roi.width.
+ * dx or dy is out of range [0,3].
+ * roi.width or roi.height is out of range {4, 8, 16}.
+ * roi.width is equal to 4, but pDst is not 4 byte aligned.
+ * roi.width is equal to 8 or 16, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_InterpolateLuma (
+ const OMX_U8 *pSrc,
+ OMX_S32 srcStep,
+ OMX_U8 *pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateChroma (6.3.3.2.2)
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB.
+ *
+ * Input Arguments:
+ *
+ * pSrc -Pointer to the source reference frame buffer
+ * srcStep -Reference frame step in bytes
+ * dstStep -Destination frame step in bytes; must be a multiple of
+ * roi.width.
+ * dx -Fractional part of horizontal motion vector component in 1/8 pixel
+ * unit; valid in the range [0,7]
+ * dy -Fractional part of vertical motion vector component in 1/8 pixel
+ * unit; valid in the range [0,7]
+ * roi -Dimension of the interpolation region; the parameters roi.width and
+ * roi.height must be equal to either 2, 4, or 8.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination frame buffer:
+ * if roi.width==2, 2-byte alignment required
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < 8.
+ * dx or dy is out of range [0-7].
+ * roi.width or roi.height is out of range {2,4,8}.
+ * roi.width is equal to 2, but pDst is not 2-byte aligned.
+ * roi.width is equal to 4, but pDst is not 4-byte aligned.
+ * roi.width is equal to 8, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_InterpolateChroma (
+ const OMX_U8 *pSrc,
+ OMX_S32 srcStep,
+ OMX_U8 *pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingLuma_VerEdge_I (6.3.3.3.1)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the luma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep -Step of the arrays; must be a multiple of 16.
+ * pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] alpha values
+ * must be in the range [0,255].
+ * pBeta -Array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] beta values
+ * must be in the range [0,18].
+ * pThresholds -Array of size 16 of Thresholds (TC0) (values for the left
+ * edge of each 4x4 block, arranged in vertical block order); must
+ * be aligned on a 4-byte boundary.. Per [ISO14496-10] values must
+ * be in the range [0,25].
+ * pBS -Array of size 16 of BS parameters (arranged in vertical block
+ * order); valid in the range [0,4] with the following
+ * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii)
+ * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS
+ * is NULL.
+ * Either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * pSrcDst is not 16-byte aligned.
+ * srcdstStep is not a multiple of 16.
+ * pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * One or more entries in the table pThresholds[0..15]is outside of the
+ * range [0,25].
+ * pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 &&
+ * pBS[i^3]!=4) for 0<=i<=3.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingLuma_HorEdge_I (6.3.3.3.2)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four horizontal edges of the luma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep - step of the arrays; must be a multiple of 16.
+ * pAlpha - array of size 2 of alpha thresholds (the first item is the alpha
+ * threshold for the external vertical edge, and the second item is
+ * for the internal horizontal edge); per [ISO14496-10] alpha
+ * values must be in the range [0,255].
+ * pBeta - array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external horizontal edge, and the second item
+ * is for the internal horizontal edge). Per [ISO14496-10] beta
+ * values must be in the range [0,18].
+ * pThresholds - array of size 16 containing thresholds, TC0, for the top
+ * horizontal edge of each 4x4 block, arranged in horizontal block
+ * order; must be aligned on a 4-byte boundary. Per [ISO14496 10]
+ * values must be in the range [0,25].
+ * pBS - array of size 16 of BS parameters (arranged in horizontal block
+ * order); valid in the range [0,4] with the following
+ * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii)
+ * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr, if one of the following cases occurs:
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * - pSrcDst is not 16-byte aligned.
+ * - srcdstStep is not a multiple of 16.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..15] is
+ * outside of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingChroma_VerEdge_I (6.3.3.3.3)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the chroma
+ * macroblock (8x8).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - Step of the arrays; must be a multiple of 8.
+ * pAlpha - Array of size 2 of alpha thresholds (the first item is alpha
+ * threshold for external vertical edge, and the second item is for
+ * internal vertical edge); per [ISO14496-10] alpha values must be
+ * in the range [0,255].
+ * pBeta - Array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] beta values
+ * must be in the range [0,18].
+ * pThresholds - Array of size 8 containing thresholds, TC0, for the left
+ * vertical edge of each 4x2 chroma block, arranged in vertical
+ * block order; must be aligned on a 4-byte boundary. Per
+ * [ISO14496-10] values must be in the range [0,25].
+ * pBS - Array of size 16 of BS parameters (values for each 2x2 chroma
+ * block, arranged in vertical block order). This parameter is the
+ * same as the pBS parameter passed into FilterDeblockLuma_VerEdge;
+ * valid in the range [0,4] with the following restrictions: i)
+ * pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and
+ * only if pBS[i^3]== 4. Must be 4 byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8.
+ * - pThresholds is not 4-byte aligned.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..7] is outside
+ * of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - pBS is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingChroma_HorEdge_I (6.3.3.3.4)
+ *
+ * Description:
+ * Performs in-place deblock filtering on the horizontal edges of the chroma
+ * macroblock (8x8).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - array step; must be a multiple of 8.
+ * pAlpha - array of size 2 containing alpha thresholds; the first element
+ * contains the threshold for the external horizontal edge, and the
+ * second element contains the threshold for internal horizontal
+ * edge. Per [ISO14496-10] alpha values must be in the range
+ * [0,255].
+ * pBeta - array of size 2 containing beta thresholds; the first element
+ * contains the threshold for the external horizontal edge, and the
+ * second element contains the threshold for the internal
+ * horizontal edge. Per [ISO14496-10] beta values must be in the
+ * range [0,18].
+ * pThresholds - array of size 8 containing thresholds, TC0, for the top
+ * horizontal edge of each 2x4 chroma block, arranged in horizontal
+ * block order; must be aligned on a 4-byte boundary. Per
+ * [ISO14496-10] values must be in the range [0,25].
+ * pBS - array of size 16 containing BS parameters for each 2x2 chroma
+ * block, arranged in horizontal block order; valid in the range
+ * [0,4] with the following restrictions: i) pBS[i]== 4 may occur
+ * only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4.
+ * Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr, if one of the following cases occurs:
+ * - any of the following pointers is NULL:
+ * pSrcDst, pAlpha, pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8.
+ * - pThresholds is not 4-byte aligned.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..7] is outside
+ * of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - pBS is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DeblockLuma_I (6.3.3.3.5)
+ *
+ * Description:
+ * This function performs in-place deblock filtering the horizontal and
+ * vertical edges of a luma macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep - image width; must be a multiple of 16.
+ * pAlpha - pointer to a 2x2 table of alpha thresholds, organized as
+ * follows: {external vertical edge, internal vertical edge,
+ * external horizontal edge, internal horizontal edge }. Per
+ * [ISO14496-10] alpha values must be in the range [0,255].
+ * pBeta - pointer to a 2x2 table of beta thresholds, organized as follows:
+ * {external vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }. Per [ISO14496-10]
+ * beta values must be in the range [0,18].
+ * pThresholds - pointer to a 16x2 table of threshold (TC0), organized as
+ * follows: {values for the left or above edge of each 4x4 block,
+ * arranged in vertical block order and then in horizontal block
+ * order}; must be aligned on a 4-byte boundary. Per [ISO14496-10]
+ * values must be in the range [0,25].
+ * pBS - pointer to a 16x2 table of BS parameters arranged in scan block
+ * order for vertical edges and then horizontal edges; valid in the
+ * range [0,4] with the following restrictions: i) pBS[i]== 4 may
+ * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]==
+ * 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds or pBS.
+ * - pSrcDst is not 16-byte aligned.
+ * - either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * - one or more entries in the table pAlpha[0..3] is outside the range
+ * [0,255].
+ * - one or more entries in the table pBeta[0..3] is outside the range
+ * [0,18].
+ * - one or more entries in the table pThresholds[0..31]is outside of
+ * the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 16.
+ *
+ */
+OMXResult omxVCM4P10_DeblockLuma_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DeblockChroma_I (6.3.3.3.6)
+ *
+ * Description:
+ * Performs in-place deblocking filtering on all edges of the chroma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - step of the arrays; must be a multiple of 8.
+ * pAlpha - pointer to a 2x2 array of alpha thresholds, organized as
+ * follows: {external vertical edge, internal vertical edge,
+ * external horizontal edge, internal horizontal edge }. Per
+ * [ISO14496-10] alpha values must be in the range [0,255].
+ * pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows:
+ * { external vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }. Per [ISO14496-10]
+ * beta values must be in the range [0,18].
+ * pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left
+ * or above edge of each 4x2 or 2x4 block, arranged in vertical
+ * block order and then in horizontal block order); must be aligned
+ * on a 4-byte boundary. Per [ISO14496-10] values must be in the
+ * range [0,25].
+ * pBS - array of size 16x2 of BS parameters (arranged in scan block order
+ * for vertical edges and then horizontal edges); valid in the
+ * range [0,4] with the following restrictions: i) pBS[i]== 4 may
+ * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]==
+ * 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - either pThresholds or pBS is not 4-byte aligned.
+ * - one or more entries in the table pAlpha[0..3] is outside the range
+ * [0,255].
+ * - one or more entries in the table pBeta[0..3] is outside the range
+ * [0,18].
+ * - one or more entries in the table pThresholds[0..15]is outside of
+ * the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (6.3.4.1.1)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for a 2x2 block of
+ * ChromaDCLevel. The decoded coefficients in the packed position-coefficient
+ * buffer are stored in reverse zig-zag order, i.e., the first buffer element
+ * contains the last non-zero postion-coefficient pair of the block. Within
+ * each position-coefficient pair, the position entry indicates the
+ * raster-scan position of the coefficient, while the coefficient entry
+ * contains the coefficient value.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - Double pointer to current byte in bit stream buffer
+ * pOffset - Pointer to current bit position in the byte pointed to by
+ * *ppBitStream; valid in the range [0,7].
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after each block is decoded
+ * pOffset - *pOffset is updated after each block is decoded
+ * pNumCoeff - Pointer to the number of nonzero coefficients in this block
+ * ppPosCoefBuf - Double pointer to destination residual
+ * coefficient-position pair buffer. Buffer position
+ * (*ppPosCoefBuf) is updated upon return, unless there are only
+ * zero coefficients in the currently decoded block. In this case
+ * the caller is expected to bypass the transform/dequantization of
+ * the empty blocks.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppBitStream or pOffset is NULL.
+ * - ppPosCoefBuf or pNumCoeff is NULL.
+ * OMX_Sts_Err - if one of the following is true:
+ * - an illegal code is encountered in the bitstream
+ *
+ */
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_S32*pOffset,
+ OMX_U8 *pNumCoeff,
+ OMX_U8 **ppPosCoefbuf
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC (6.3.4.1.2)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of
+ * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse
+ * field scan is not supported. The decoded coefficients in the packed
+ * position-coefficient buffer are stored in reverse zig-zag order, i.e., the
+ * first buffer element contains the last non-zero postion-coefficient pair of
+ * the block. Within each position-coefficient pair, the position entry
+ * indicates the raster-scan position of the coefficient, while the
+ * coefficient entry contains the coefficient value.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream -Double pointer to current byte in bit stream buffer
+ * pOffset - Pointer to current bit position in the byte pointed to by
+ * *ppBitStream; valid in the range [0,7].
+ * sMaxNumCoeff - Maximum the number of non-zero coefficients in current
+ * block
+ * sVLCSelect - VLC table selector, obtained from the number of non-zero
+ * coefficients contained in the above and left 4x4 blocks. It is
+ * equivalent to the variable nC described in H.264 standard table
+ * 9 5, except its value can t be less than zero.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after each block is decoded.
+ * Buffer position (*ppPosCoefBuf) is updated upon return, unless
+ * there are only zero coefficients in the currently decoded block.
+ * In this case the caller is expected to bypass the
+ * transform/dequantization of the empty blocks.
+ * pOffset - *pOffset is updated after each block is decoded
+ * pNumCoeff - Pointer to the number of nonzero coefficients in this block
+ * ppPosCoefBuf - Double pointer to destination residual
+ * coefficient-position pair buffer
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppBitStream or pOffset is NULL.
+ * - ppPosCoefBuf or pNumCoeff is NULL.
+ * - sMaxNumCoeff is not equal to either 15 or 16.
+ * - sVLCSelect is less than 0.
+ *
+ * OMX_Sts_Err - if one of the following is true:
+ * - an illegal code is encountered in the bitstream
+ *
+ */
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_S32 *pOffset,
+ OMX_U8 *pNumCoeff,
+ OMX_U8 **ppPosCoefbuf,
+ OMX_INT sVLCSelect,
+ OMX_INT sMaxNumCoeff
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformDequantLumaDCFromPair (6.3.4.2.1)
+ *
+ * Description:
+ * Reconstructs the 4x4 LumaDC block from the coefficient-position pair
+ * buffer, performs integer inverse, and dequantization for 4x4 LumaDC
+ * coefficients, and updates the pair buffer pointer to the next non-empty
+ * block.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * QP - Quantization parameter QpY
+ *
+ * Output Arguments:
+ *
+ * ppSrc - *ppSrc is updated to the start of next non empty block
+ * pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must
+ * be aligned on a 8-byte boundary.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppSrc or pDst is NULL.
+ * - pDst is not 8 byte aligned.
+ * - QP is not in the range of [0-51].
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantLumaDCFromPair (
+ const OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformDequantChromaDCFromPair (6.3.4.2.2)
+ *
+ * Description:
+ * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer,
+ * perform integer inverse transformation, and dequantization for 2x2 chroma
+ * DC coefficients, and update the pair buffer pointer to next non-empty
+ * block.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * QP - Quantization parameter QpC
+ *
+ * Output Arguments:
+ *
+ * ppSrc - *ppSrc is updated to the start of next non empty block
+ * pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer;
+ * must be aligned on a 4-byte boundary.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppSrc or pDst is NULL.
+ * - pDst is not 4-byte aligned.
+ * - QP is not in the range of [0-51].
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantChromaDCFromPair (
+ const OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd (6.3.4.2.3)
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantization and integer inverse transformation for 4x4 block of
+ * residuals with previous intra prediction or motion compensation data, and
+ * update the pair buffer pointer to next non-empty block. If pDC == NULL,
+ * there re 16 non-zero AC coefficients at most in the packed buffer starting
+ * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC
+ * coefficients at most in the packet buffer starting from 4x4 block position
+ * 1.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte
+ * boundary
+ * predStep - Predicted frame step size in bytes; must be a multiple of 4
+ * dstStep - Destination frame step in bytes; must be a multiple of 4
+ * pDC - Pointer to the DC coefficient of this block, NULL if it doesn't
+ * exist
+ * QP - QP Quantization parameter. It should be QpC in chroma 4x4 block
+ * decoding, otherwise it should be QpY.
+ * AC - Flag indicating if at least one non-zero AC coefficient exists
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the reconstructed 4x4 block data; must be aligned on a
+ * 4-byte boundary
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pPred or pDst is NULL.
+ * - pPred or pDst is not 4-byte aligned.
+ * - predStep or dstStep is not a multiple of 4.
+ * - AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL.
+ * - AC ==0 && pDC ==NULL.
+ *
+ */
+OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd (
+ const OMX_U8 **ppSrc,
+ const OMX_U8 *pPred,
+ const OMX_S16 *pDC,
+ OMX_U8 *pDst,
+ OMX_INT predStep,
+ OMX_INT dstStep,
+ OMX_INT QP,
+ OMX_INT AC
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MEGetBufSize (6.3.5.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification
+ * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer
+ * and MotionEstimationMB.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P10MEMode
+ * pMEParams -motion estimation parameters
+ *
+ * Output Arguments:
+ *
+ * pSize - pointer to the number of bytes required for the motion
+ * estimation specification structure
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pMEParams or pSize is NULL.
+ * - an invalid MEMode is specified.
+ *
+ */
+OMXResult omxVCM4P10_MEGetBufSize (
+ OMXVCM4P10MEMode MEmode,
+ const OMXVCM4P10MEParams *pMEParams,
+ OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MEInit (6.3.5.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the
+ * omxVCM4P10 motion estimation functions: BlockMatch_Integer and
+ * MotionEstimationMB. Memory for the specification structure *pMESpec must be
+ * allocated prior to calling the function, and should be aligned on a 4-byte
+ * boundary. The number of bytes required for the specification structure can
+ * be determined using the function omxVCM4P10_MEGetBufSize. Following
+ * initialization by this function, the vendor-specific structure *pMESpec
+ * should contain an implementation-specific representation of all motion
+ * estimation parameters received via the structure pMEParams, for example
+ * searchRange16x16, searchRange8x8, etc.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P10MEMode
+ * pMEParams - motion estimation parameters
+ * pMESpec - pointer to the uninitialized ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pMESpec - pointer to the initialized ME specification structure
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pMEParams or pSize is NULL.
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for one of the search ranges
+ * (e.g., pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.)
+ * - either in isolation or in combination, one or more of the enables or
+ * search ranges in the structure *pMEParams were configured such
+ * that the requested behavior fails to comply with [ISO14496-10].
+ *
+ */
+OMXResult omxVCM4P10_MEInit (
+ OMXVCM4P10MEMode MEmode,
+ const OMXVCM4P10MEParams *pMEParams,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Integer (6.3.5.2.1)
+ *
+ * Description:
+ * Performs integer block match. Returns best MV and associated cost.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the top-left corner of the current block:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane, expressed in terms
+ * of integer pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane, expressed in terms
+ * of integer pixels
+ * pRefRect - pointer to the valid reference rectangle inside the reference
+ * picture plane
+ * nCurrPointPos - position of the current block in the current plane
+ * iBlockWidth - Width of the current block, expressed in terms of integer
+ * pixels; must be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block, expressed in terms of
+ * integer pixels; must be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor; used to compute motion cost
+ * pMVPred - Predicted MV; used to compute motion cost, expressed in terms
+ * of 1/4-pel units
+ * pMVCandidate - Candidate MV; used to initialize the motion search,
+ * expressed in terms of integer pixels
+ * pMESpec - pointer to the ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pDstBestMV - Best MV resulting from integer search, expressed in terms
+ * of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - any of the following pointers are NULL:
+ * pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec.
+ * - Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Integer (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ const OMXVCMotionVector *pMVCandidate,
+ OMXVCMotionVector *pBestMV,
+ OMX_S32 *pBestCost,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Half (6.3.5.2.2)
+ *
+ * Description:
+ * Performs a half-pel block match using results from a prior integer search.
+ * Returns the best MV and associated cost. This function estimates the
+ * half-pixel motion vector by interpolating the integer resolution motion
+ * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial
+ * integer MV is generated externally. The function
+ * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the current position in original picture plane:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane in terms of full
+ * pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane in terms of full
+ * pixels
+ * iBlockWidth - Width of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor, used to compute motion cost
+ * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to
+ * compute motion cost
+ * pSrcDstBestMV - The best MV resulting from a prior integer search,
+ * represented in terms of 1/4-pel units
+ *
+ * Output Arguments:
+ *
+ * pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in
+ * terms of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - any of the following pointers is NULL: pSrcOrgY, pSrcRefY,
+ * pSrcDstBestMV, pMVPred, pBestCost
+ * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Half (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ OMXVCMotionVector *pSrcDstBestMV,
+ OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Quarter (6.3.5.2.3)
+ *
+ * Description:
+ * Performs a quarter-pel block match using results from a prior half-pel
+ * search. Returns the best MV and associated cost. This function estimates
+ * the quarter-pixel motion vector by interpolating the half-pel resolution
+ * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the
+ * initial half-pel MV is generated externally. The function
+ * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the current position in original picture plane:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane in terms of full
+ * pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane in terms of full
+ * pixels
+ * iBlockWidth - Width of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor, used to compute motion cost
+ * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to
+ * compute motion cost
+ * pSrcDstBestMV - The best MV resulting from a prior half-pel search,
+ * represented in terms of 1/4 pel units
+ *
+ * Output Arguments:
+ *
+ * pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed
+ * in terms of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL:
+ * pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost
+ * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Quarter (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ OMXVCMotionVector *pSrcDstBestMV,
+ OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MotionEstimationMB (6.3.5.3.1)
+ *
+ * Description:
+ * Performs MB-level motion estimation and selects best motion estimation
+ * strategy from the set of modes supported in baseline profile [ISO14496-10].
+ *
+ * Input Arguments:
+ *
+ * pSrcCurrBuf - Pointer to the current position in original picture plane;
+ * 16-byte alignment required
+ * pSrcRefBufList - Pointer to an array with 16 entries. Each entry points
+ * to the top-left corner of the co-located MB in a reference
+ * picture. The array is filled from low-to-high with valid
+ * reference frame pointers; the unused high entries should be set
+ * to NULL. Ordering of the reference frames should follow
+ * [ISO14496-10] subclause 8.2.4 Decoding Process for Reference
+ * Picture Lists. The entries must be 16-byte aligned.
+ * pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the
+ * reconstructed picture; must be 16-byte aligned.
+ * SrcCurrStep - Width of the original picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * SrcRefStep - Width of the reference picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * SrcRecStep - Width of the reconstructed picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * pRefRect - Pointer to the valid reference rectangle; relative to the
+ * image origin.
+ * pCurrPointPos - Position of the current macroblock in the current plane.
+ * Lambda - Lagrange factor for computing the cost function
+ * pMESpec - Pointer to the motion estimation specification structure; must
+ * have been allocated and initialized prior to calling this
+ * function.
+ * pMBInter - Array, of dimension four, containing pointers to information
+ * associated with four adjacent type INTER MBs (Left, Top,
+ * Top-Left, Top-Right). Any pointer in the array may be set equal
+ * to NULL if the corresponding MB doesn t exist or is not of type
+ * INTER.
+ * - pMBInter[0] - Pointer to left MB information
+ * - pMBInter[1] - Pointer to top MB information
+ * - pMBInter[2] - Pointer to top-left MB information
+ * - pMBInter[3] - Pointer to top-right MB information
+ * pMBIntra - Array, of dimension four, containing pointers to information
+ * associated with four adjacent type INTRA MBs (Left, Top,
+ * Top-Left, Top-Right). Any pointer in the array may be set equal
+ * to NULL if the corresponding MB doesn t exist or is not of type
+ * INTRA.
+ * - pMBIntra[0] - Pointer to left MB information
+ * - pMBIntra[1] - Pointer to top MB information
+ * - pMBIntra[2] - Pointer to top-left MB information
+ * - pMBIntra[3] - Pointer to top-right MB information
+ * pSrcDstMBCurr - Pointer to information structure for the current MB.
+ * The following entries should be set prior to calling the
+ * function: sliceID - the number of the slice the to which the
+ * current MB belongs.
+ *
+ * Output Arguments:
+ *
+ * pDstCost - Pointer to the minimum motion cost for the current MB.
+ * pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma
+ * 4x4 blocks in each MB. The block SADs are in scan order for
+ * each MB. For implementations that cannot compute the SAD values
+ * individually, the maximum possible value (0xffff) is returned
+ * for each of the 16 block SAD entries.
+ * pSrcDstMBCurr - Pointer to updated information structure for the current
+ * MB after MB-level motion estimation has been completed. The
+ * following fields are updated by the ME function. The following
+ * parameter set quantifies the MB-level ME search results:
+ * - MbType
+ * - subMBType[4]
+ * - pMV0[4][4]
+ * - pMVPred[4][4]
+ * - pRefL0Idx[4]
+ * - Intra16x16PredMode
+ * - pIntra4x4PredMode[4][4]
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL: pSrcCurrBuf,
+ * pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec,
+ * pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0]
+ * - SrcRefStep, SrcRecStep are not multiples of 16
+ * - iBlockWidth or iBlockHeight are values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_MotionEstimationMB (
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8 *pSrcRefBufList[15],
+ OMX_S32 SrcRefStep,
+ const OMX_U8 *pSrcRecBuf,
+ OMX_S32 SrcRecStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U32 Lambda,
+ void *pMESpec,
+ const OMXVCM4P10MBInfoPtr *pMBInter,
+ const OMXVCM4P10MBInfoPtr *pMBIntra,
+ OMXVCM4P10MBInfoPtr pSrcDstMBCurr,
+ OMX_INT *pDstCost,
+ OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SAD_4x (6.3.5.4.1)
+ *
+ * Description:
+ * This function calculates the SAD for 4x8 and 4x4 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg -Pointer to the original block; must be aligned on a 4-byte
+ * boundary.
+ * iStepOrg -Step of the original block buffer; must be a multiple of 4.
+ * pSrcRef -Pointer to the reference block
+ * iStepRef -Step of the reference block buffer
+ * iHeight -Height of the block; must be equal to either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL:
+ * pSrcOrg, pSrcRef, or pDstSAD
+ * - iHeight is not equal to either 4 or 8.
+ * - iStepOrg is not a multiple of 4
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SAD_4x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_4x (6.3.5.4.2)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks. Rounding
+ * is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on a 4-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 4.
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal to either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 4 or 8.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 4
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_4x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_8x (6.3.5.4.3)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks.
+ * Rounding is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on an 8-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 8.
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal either 4, 8, or 16.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 4, 8, or 16.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 8
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_8x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_16x (6.3.5.4.4)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks.
+ * Rounding is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on a 16-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 16
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal to either 8 or 16
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 8 or 16.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 16
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_16x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SATD_4x4 (6.3.5.4.5)
+ *
+ * Description:
+ * This function calculates the sum of absolute transform differences (SATD)
+ * for a 4x4 block by applying a Hadamard transform to the difference block
+ * and then calculating the sum of absolute coefficient values.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 4-byte
+ * boundary
+ * iStepOrg - Step of the original block buffer; must be a multiple of 4
+ * pSrcRef - Pointer to the reference block; must be aligned on a 4-byte
+ * boundary
+ * iStepRef - Step of the reference block buffer; must be a multiple of 4
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - pointer to the resulting SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg
+ * - pSrcRef is not aligned on a 4-byte boundary
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 4
+ * - iStepRef <= 0 or iStepRef is not a multiple of 4
+ *
+ */
+OMXResult omxVCM4P10_SATD_4x4 (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_U32 *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateHalfHor_Luma (6.3.5.5.1)
+ *
+ * Description:
+ * This function performs interpolation for two horizontal 1/2-pel positions
+ * (-1/2,0) and (1/2, 0) - around a full-pel position.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the top-left corner of the block used to interpolate in
+ * the reconstruction frame plane.
+ * iSrcStep - Step of the source buffer.
+ * iDstStep - Step of the destination(interpolation) buffer; must be a
+ * multiple of iWidth.
+ * iWidth - Width of the current block; must be equal to either 4, 8, or 16
+ * iHeight - Height of the current block; must be equal to 4, 8, or 16
+ *
+ * Output Arguments:
+ *
+ * pDstLeft -Pointer to the interpolation buffer of the left -pel position
+ * (-1/2, 0)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ * pDstRight -Pointer to the interpolation buffer of the right -pel
+ * position (1/2, 0)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrc, pDstLeft, or pDstRight
+ * - iWidth or iHeight have values other than 4, 8, or 16
+ * - iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary
+ * - iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary
+ * - iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary
+ * - any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfHor_Luma (
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDstLeft,
+ OMX_U8 *pDstRight,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateHalfVer_Luma (6.3.5.5.2)
+ *
+ * Description:
+ * This function performs interpolation for two vertical 1/2-pel positions -
+ * (0, -1/2) and (0, 1/2) - around a full-pel position.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to top-left corner of block used to interpolate in the
+ * reconstructed frame plane
+ * iSrcStep - Step of the source buffer.
+ * iDstStep - Step of the destination (interpolation) buffer; must be a
+ * multiple of iWidth.
+ * iWidth - Width of the current block; must be equal to either 4, 8, or 16
+ * iHeight - Height of the current block; must be equal to either 4, 8, or 16
+ *
+ * Output Arguments:
+ *
+ * pDstUp -Pointer to the interpolation buffer of the -pel position above
+ * the current full-pel position (0, -1/2)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ * pDstDown -Pointer to the interpolation buffer of the -pel position below
+ * the current full-pel position (0, 1/2)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrc, pDstUp, or pDstDown
+ * - iWidth or iHeight have values other than 4, 8, or 16
+ * - iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary
+ * - iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary
+ * - iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfVer_Luma (
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDstUp,
+ OMX_U8 *pDstDown,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_Average_4x (6.3.5.5.3)
+ *
+ * Description:
+ * This function calculates the average of two 4x4, 4x8 blocks. The result
+ * is rounded according to (a+b+1)/2.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0; must be a multiple of 4.
+ * iPredStep1 - Step of reference block 1; must be a multiple of 4.
+ * iDstStep - Step of the destination buffer; must be a multiple of 4.
+ * iHeight - Height of the blocks; must be either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 4-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pPred0, pPred1, or pDstPred
+ * - pDstPred is not aligned on a 4-byte boundary
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 4
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 4
+ * - iDstStep <= 0 or iDstStep is not a multiple of 4
+ * - iHeight is not equal to either 4 or 8
+ *
+ */
+OMXResult omxVCM4P10_Average_4x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformQuant_ChromaDC (6.3.5.6.1)
+ *
+ * Description:
+ * This function performs 2x2 Hadamard transform of chroma DC coefficients
+ * and then quantizes the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the 2x2 array of chroma DC coefficients. 8-byte
+ * alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ * bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - Pointer to transformed and quantized coefficients. 8-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcDst
+ * - pSrcDst is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_ChromaDC (
+ OMX_S16 *pSrcDst,
+ OMX_U32 iQP,
+ OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformQuant_LumaDC (6.3.5.6.2)
+ *
+ * Description:
+ * This function performs a 4x4 Hadamard transform of luma DC coefficients
+ * and then quantizes the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the 4x4 array of luma DC coefficients. 16-byte
+ * alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - Pointer to transformed and quantized coefficients. 16-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrcDst
+ * - pSrcDst is not aligned on an 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_LumaDC (
+ OMX_S16 *pSrcDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformDequant_LumaDC (6.3.5.6.3)
+ *
+ * Description:
+ * This function performs inverse 4x4 Hadamard transform and then dequantizes
+ * the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and
+ * quantized coefficients. 16 byte alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to inverse-transformed and dequantized coefficients.
+ * 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrc
+ * - pSrc or pDst is not aligned on a 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_LumaDC (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformDequant_ChromaDC (6.3.5.6.4)
+ *
+ * Description:
+ * This function performs inverse 2x2 Hadamard transform and then dequantizes
+ * the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and
+ * quantized coefficients. 8 byte alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to inverse-transformed and dequantized coefficients.
+ * 8-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrc
+ * - pSrc or pDst is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_ChromaDC (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformResidualAndAdd (6.3.5.7.1)
+ *
+ * Description:
+ * This function performs inverse an 4x4 integer transformation to produce
+ * the difference signal and then adds the difference to the prediction to get
+ * the reconstructed signal.
+ *
+ * Input Arguments:
+ *
+ * pSrcPred - Pointer to prediction signal. 4-byte alignment required.
+ * pDequantCoeff - Pointer to the transformed coefficients. 8-byte
+ * alignment required.
+ * iSrcPredStep - Step of the prediction buffer; must be a multiple of 4.
+ * iDstReconStep - Step of the destination reconstruction buffer; must be a
+ * multiple of 4.
+ * bAC - Indicate whether there is AC coefficients in the coefficients
+ * matrix.
+ *
+ * Output Arguments:
+ *
+ * pDstRecon -Pointer to the destination reconstruction buffer. 4-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcPred, pDequantCoeff, pDstRecon
+ * - pSrcPred is not aligned on a 4-byte boundary
+ * - iSrcPredStep or iDstReconStep is not a multiple of 4.
+ * - pDequantCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformResidualAndAdd (
+ const OMX_U8 *pSrcPred,
+ const OMX_S16 *pDequantCoeff,
+ OMX_U8 *pDstRecon,
+ OMX_U32 iSrcPredStep,
+ OMX_U32 iDstReconStep,
+ OMX_U8 bAC
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SubAndTransformQDQResidual (6.3.5.8.1)
+ *
+ * Description:
+ * This function subtracts the prediction signal from the original signal to
+ * produce the difference signal and then performs a 4x4 integer transform and
+ * quantization. The quantized transformed coefficients are stored as
+ * pDstQuantCoeff. This function can also output dequantized coefficients or
+ * unquantized DC coefficients optionally by setting the pointers
+ * pDstDeQuantCoeff, pDCCoeff.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to original signal. 4-byte alignment required.
+ * pSrcPred - Pointer to prediction signal. 4-byte alignment required.
+ * iSrcOrgStep - Step of the original signal buffer; must be a multiple of
+ * 4.
+ * iSrcPredStep - Step of the prediction signal buffer; must be a multiple
+ * of 4.
+ * pNumCoeff -Number of non-zero coefficients after quantization. If this
+ * parameter is not required, it is set to NULL.
+ * nThreshSAD - Zero-block early detection threshold. If this parameter is
+ * not required, it is set to 0.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ * bIntra - Indicates whether this is an INTRA block, either 1-INTRA or
+ * 0-INTER
+ *
+ * Output Arguments:
+ *
+ * pDstQuantCoeff - Pointer to the quantized transformed coefficients.
+ * 8-byte alignment required.
+ * pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients
+ * if this parameter is not equal to NULL. 8-byte alignment
+ * required.
+ * pDCCoeff - Pointer to the unquantized DC coefficient if this parameter
+ * is not equal to NULL.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff,
+ * pDstDeQuantCoeff, pDCCoeff
+ * - pSrcOrg is not aligned on a 4-byte boundary
+ * - pSrcPred is not aligned on a 4-byte boundary
+ * - iSrcOrgStep is not a multiple of 4
+ * - iSrcPredStep is not a multiple of 4
+ * - pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_SubAndTransformQDQResidual (
+ const OMX_U8 *pSrcOrg,
+ const OMX_U8 *pSrcPred,
+ OMX_U32 iSrcOrgStep,
+ OMX_U32 iSrcPredStep,
+ OMX_S16 *pDstQuantCoeff,
+ OMX_S16 *pDstDeQuantCoeff,
+ OMX_S16 *pDCCoeff,
+ OMX_S8 *pNumCoeff,
+ OMX_U32 nThreshSAD,
+ OMX_U32 iQP,
+ OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function: omxVCM4P10_GetVLCInfo (6.3.5.9.1)
+ *
+ * Description:
+ * This function extracts run-length encoding (RLE) information from the
+ * coefficient matrix. The results are returned in an OMXVCM4P10VLCInfo
+ * structure.
+ *
+ * Input Arguments:
+ *
+ * pSrcCoeff - pointer to the transform coefficient matrix. 8-byte
+ * alignment required.
+ * pScanMatrix - pointer to the scan order definition matrix. For a luma
+ * block the scan matrix should follow [ISO14496-10] section 8.5.4,
+ * and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13,
+ * 10, 7, 11, 14, 15. For a chroma block, the scan matrix should
+ * contain the values 0, 1, 2, 3.
+ * bAC - indicates presence of a DC coefficient; 0 = DC coefficient
+ * present, 1= DC coefficient absent.
+ * MaxNumCoef - specifies the number of coefficients contained in the
+ * transform coefficient matrix, pSrcCoeff. The value should be 16
+ * for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The
+ * value should be 4 for blocks of type CHROMADC.
+ *
+ * Output Arguments:
+ *
+ * pDstVLCInfo - pointer to structure that stores information for
+ * run-length coding.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcCoeff, pScanMatrix, pDstVLCInfo
+ * - pSrcCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_GetVLCInfo (
+ const OMX_S16 *pSrcCoeff,
+ const OMX_U8 *pScanMatrix,
+ OMX_U8 bAC,
+ OMX_U32 MaxNumCoef,
+ OMXVCM4P10VLCInfo*pDstVLCInfo
+);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /** end of #define _OMXVC_H_ */
+
+/** EOF */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h
new file mode 100644
index 0000000..be974d5
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h
@@ -0,0 +1,129 @@
+;/******************************************************************************
+;// Copyright (c) 1999-2005 The Khronos Group Inc. All Rights Reserved
+;//
+;//
+;//
+;//
+;//
+;//
+;//
+;//
+;******************************************************************************/
+
+;/** =============== Structure Definition for Sample Generation ============== */
+;/** transparent status */
+
+;enum {
+OMX_VIDEO_TRANSPARENT EQU 0; /** Wholly transparent */
+OMX_VIDEO_PARTIAL EQU 1; /** Partially transparent */
+OMX_VIDEO_OPAQUE EQU 2; /** Opaque */
+;}
+
+;/** direction */
+;enum {
+OMX_VIDEO_NONE EQU 0;
+OMX_VIDEO_HORIZONTAL EQU 1;
+OMX_VIDEO_VERTICAL EQU 2;
+;}
+
+;/** bilinear interpolation type */
+;enum {
+OMX_VIDEO_INTEGER_PIXEL EQU 0; /** case ¡°a¡± */
+OMX_VIDEO_HALF_PIXEL_X EQU 1; /** case ¡°b¡± */
+OMX_VIDEO_HALF_PIXEL_Y EQU 2; /** case ¡°c¡± */
+OMX_VIDEO_HALF_PIXEL_XY EQU 3; /** case ¡°d¡± */
+;}
+
+;enum {
+OMX_UPPER EQU 1; /** set if the above macroblock is available */
+OMX_LEFT EQU 2; /** set if the left macroblock is available */
+OMX_CENTER EQU 4;
+OMX_RIGHT EQU 8;
+OMX_LOWER EQU 16;
+OMX_UPPER_LEFT EQU 32; /** set if the above-left macroblock is available */
+OMX_UPPER_RIGHT EQU 64; /** set if the above-right macroblock is available */
+OMX_LOWER_LEFT EQU 128;
+OMX_LOWER_RIGHT EQU 256
+;}
+
+;enum {
+OMX_VIDEO_LUMINANCE EQU 0; /** Luminance component */
+OMX_VIDEO_CHROMINANCE EQU 1; /** chrominance component */
+OMX_VIDEO_ALPHA EQU 2; /** Alpha component */
+;}
+
+;enum {
+OMX_VIDEO_INTER EQU 0; /** P picture or P-VOP */
+OMX_VIDEO_INTER_Q EQU 1; /** P picture or P-VOP */
+OMX_VIDEO_INTER4V EQU 2; /** P picture or P-VOP */
+OMX_VIDEO_INTRA EQU 3; /** I and P picture; I- and P-VOP */
+OMX_VIDEO_INTRA_Q EQU 4; /** I and P picture; I- and P-VOP */
+OMX_VIDEO_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/
+OMX_VIDEO_DIRECT EQU 6; /** B picture or B-VOP (MPEG-4 only) */
+OMX_VIDEO_INTERPOLATE EQU 7; /** B picture or B-VOP */
+OMX_VIDEO_BACKWARD EQU 8; /** B picture or B-VOP */
+OMX_VIDEO_FORWARD EQU 9; /** B picture or B-VOP */
+OMX_VIDEO_NOTCODED EQU 10; /** B picture or B-VOP */
+;}
+
+;enum {
+OMX_16X16_VERT EQU 0; /** Intra_16x16_Vertical (prediction mode) */
+OMX_16X16_HOR EQU 1; /** Intra_16x16_Horizontal (prediction mode) */
+OMX_16X16_DC EQU 2; /** Intra_16x16_DC (prediction mode) */
+OMX_16X16_PLANE EQU 3; /** Intra_16x16_Plane (prediction mode) */
+;}
+
+;enum {
+OMX_4x4_VERT EQU 0; /** Intra_4x4_Vertical (prediction mode) */
+OMX_4x4_HOR EQU 1; /** Intra_4x4_Horizontal (prediction mode) */
+OMX_4x4_DC EQU 2; /** Intra_4x4_DC (prediction mode) */
+OMX_4x4_DIAG_DL EQU 3; /** Intra_4x4_Diagonal_Down_Left (prediction mode) */
+OMX_4x4_DIAG_DR EQU 4; /** Intra_4x4_Diagonal_Down_Right (prediction mode) */
+OMX_4x4_VR EQU 5; /** Intra_4x4_Vertical_Right (prediction mode) */
+OMX_4x4_HD EQU 6; /** Intra_4x4_Horizontal_Down (prediction mode) */
+OMX_4x4_VL EQU 7; /** Intra_4x4_Vertical_Left (prediction mode) */
+OMX_4x4_HU EQU 8; /** Intra_4x4_Horizontal_Up (prediction mode) */
+;}
+
+;enum {
+OMX_CHROMA_DC EQU 0; /** Intra_Chroma_DC (prediction mode) */
+OMX_CHROMA_HOR EQU 1; /** Intra_Chroma_Horizontal (prediction mode) */
+OMX_CHROMA_VERT EQU 2; /** Intra_Chroma_Vertical (prediction mode) */
+OMX_CHROMA_PLANE EQU 3; /** Intra_Chroma_Plane (prediction mode) */
+;}
+
+;typedef struct {
+x EQU 0;
+y EQU 4;
+;}OMXCoordinate;
+
+;typedef struct {
+dx EQU 0;
+dy EQU 2;
+;}OMXMotionVector;
+
+;typedef struct {
+xx EQU 0;
+yy EQU 4;
+width EQU 8;
+height EQU 12;
+;}OMXiRect;
+
+;typedef enum {
+OMX_VC_INTER EQU 0; /** P picture or P-VOP */
+OMX_VC_INTER_Q EQU 1; /** P picture or P-VOP */
+OMX_VC_INTER4V EQU 2; /** P picture or P-VOP */
+OMX_VC_INTRA EQU 3; /** I and P picture, I- and P-VOP */
+OMX_VC_INTRA_Q EQU 4; /** I and P picture, I- and P-VOP */
+OMX_VC_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/
+;} OMXVCM4P2MacroblockType;
+
+;enum {
+OMX_VC_NONE EQU 0
+OMX_VC_HORIZONTAL EQU 1
+OMX_VC_VERTICAL EQU 2
+;};
+
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s
new file mode 100644
index 0000000..2663a70
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s
@@ -0,0 +1,148 @@
+ ;/**
+ ; * Function: omxVCCOMM_Copy16x16
+ ; *
+ ; * Description:
+ ; * Copies the reference 16x16 block to the current block.
+ ; * Parameters:
+ ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 16-byte boundary.
+ ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes;
+ ; * must be a multiple of 16 and must be larger than or equal to 16.
+ ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary.
+ ; * Return Value:
+ ; * OMX_Sts_NoErr - no error
+ ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
+ ; * - one or more of the following pointers is NULL: pSrc, pDst
+ ; * - one or more of the following pointers is not aligned on an 16-byte boundary: pSrc, pDst
+ ; * - step <16 or step is not a multiple of 16.
+ ; */
+
+ INCLUDE omxtypes_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+pSrc RN 0
+pDst RN 1
+step RN 2
+
+;//Local Variables
+Count RN 14
+X0 RN 2
+X1 RN 4
+
+Return RN 0
+
+ M_START omxVCCOMM_Copy16x16,r5
+
+
+
+ SUB Count,step,#8 ;//Count=step-8
+ LDRD X0,[pSrc],#8 ;//pSrc after loading pSrc=pSrc+8
+ LDRD X1,[pSrc],Count ;//pSrc after loading pSrc=pSrc+step
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ STRD X0,[pDst],#8
+ MOV Return,#OMX_Sts_NoErr
+ STRD X1,[pDst],#8
+
+
+ M_END
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s
new file mode 100644
index 0000000..993873c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s
@@ -0,0 +1,72 @@
+ ;/**
+ ; * Function: omxVCCOMM_Copy8x8
+ ; *
+ ; * Description:
+ ; * Copies the reference 8x8 block to the current block.
+ ; * Parameters:
+ ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 8-byte boundary.
+ ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes;
+ ; * must be a multiple of 8 and must be larger than or equal to 8.
+ ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary.
+ ; * Return Value:
+ ; * OMX_Sts_NoErr - no error
+ ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
+ ; * - one or more of the following pointers is NULL: pSrc, pDst
+ ; * - one or more of the following pointers is not aligned on an 8-byte boundary: pSrc, pDst
+ ; * - step <8 or step is not a multiple of 8.
+ ; */
+
+ INCLUDE omxtypes_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+pSrc RN 0
+pDst RN 1
+step RN 2
+
+;//Local Variables
+Count RN 14
+X0 RN 2
+X1 RN 4
+Return RN 0
+ M_START omxVCCOMM_Copy8x8,r5
+
+
+
+ MOV Count,step ;//Count=step
+
+ LDRD X0,[pSrc],Count ;//pSrc after loading : pSrc=pSrc+step
+ LDRD X1,[pSrc],Count
+
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],Count
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],Count
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],Count
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ STRD X0,[pDst],#8
+ MOV Return,#OMX_Sts_NoErr
+ STRD X1,[pDst],#8
+
+
+ M_END
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
new file mode 100644
index 0000000..02b4b08
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
@@ -0,0 +1,189 @@
+;//
+;//
+;// File Name: omxVCCOMM_ExpandFrame_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;// This function will Expand Frame boundary pixels into Plane
+;//
+;//
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+;// Import symbols required from other files
+;// (For example tables)
+
+
+;// Set debugging level
+DEBUG_ON SETL {FALSE}
+
+
+
+
+
+
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+;//Input Registers
+
+pSrcDstPlane RN 0
+iFrameWidth RN 1
+iFrameHeight RN 2
+iExpandPels RN 3
+
+
+;//Output Registers
+
+result RN 0
+
+;//Local Scratch Registers
+
+iPlaneStep RN 4
+pTop RN 5
+pBottom RN 6
+pBottomIndex RN 7
+x RN 8
+y RN 9
+tempTop RN 10
+tempBot RN 11
+ColStep RN 12
+pLeft RN 5
+pRight RN 6
+pRightIndex RN 7
+tempLeft1 RN 10
+tempRight1 RN 11
+tempLeft2 RN 14
+tempRight2 RN 2
+indexY RN 14
+RowStep RN 12
+expandTo4bytes RN 1 ;// copy a byte to 4 bytes of a word
+
+ ;// Allocate stack memory required by the function
+
+
+ ;// Write function header
+ M_START omxVCCOMM_ExpandFrame_I,r11
+
+ ;// Define stack arguments
+ M_ARG iPlaneStepOnStack, 4
+
+ ;// Load argument from the stack
+ M_LDR iPlaneStep, iPlaneStepOnStack
+
+ MUL pTop,iExpandPels,iPlaneStep
+ MLA pBottom,iFrameHeight,iPlaneStep,pSrcDstPlane
+ SUB x,iFrameWidth,#4
+ MOV indexY,pTop
+ ADD ColStep,indexY,#4
+ SUB pBottomIndex,pBottom,iPlaneStep
+ SUB pTop,pSrcDstPlane,pTop
+
+
+ ADD pTop,pTop,x
+ ADD pBottom,pBottom,x
+
+ ;//------------------------------------------------------------------------
+ ;// The following improves upon the C implmentation
+ ;// The x and y loops are interchanged: This ensures that the values of
+ ;// pSrcDstPlane [x] and pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x]
+ ;// which depend only on loop variable 'x' are loaded once and used in
+ ;// multiple stores in the 'Y' loop
+ ;//------------------------------------------------------------------------
+
+ ;// xloop
+ExpandFrameTopBotXloop
+
+ LDR tempTop,[pSrcDstPlane,x]
+ ;//------------------------------------------------------------------------
+ ;// pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x] is simplified as:
+ ;// pSrcDstPlane + (iFrameHeight * iPlaneStep) - iPlaneStep + x ==
+ ;// pBottom - iPlaneStep + x == pBottomIndex [x]
+ ;// The value of pBottomIndex is calculated above this 'x' loop
+ ;//------------------------------------------------------------------------
+ LDR tempBot,[pBottomIndex,x]
+
+ ;// yloop
+ MOV y,iExpandPels
+
+ExpandFrameTopBotYloop
+ SUBS y,y,#1
+ M_STR tempTop,[pTop],iPlaneStep
+ M_STR tempBot,[pBottom],iPlaneStep
+ BGT ExpandFrameTopBotYloop
+
+ SUBS x,x,#4
+ SUB pTop,pTop,ColStep
+ SUB pBottom,pBottom,ColStep
+ BGE ExpandFrameTopBotXloop
+
+
+ ;// y loop
+ ;// The product is already calculated above : Reuse
+ ;//MUL indexY,iExpandPels,iPlaneStep
+
+ SUB pSrcDstPlane,pSrcDstPlane,indexY
+ SUB pLeft,pSrcDstPlane,iExpandPels ;// pLeft->points to the top left of the expanded block
+ ADD pRight,pSrcDstPlane,iFrameWidth
+ SUB pRightIndex,pRight,#1
+
+ ADD y,iFrameHeight,iExpandPels,LSL #1
+ LDR expandTo4bytes,=0x01010101
+
+ RSB RowStep,iExpandPels,iPlaneStep,LSL #1
+
+ ;// The Y Loop is unrolled twice
+ExpandFrameLeftRightYloop
+ LDRB tempLeft2,[pSrcDstPlane,iPlaneStep] ;// PreLoad the values
+ LDRB tempRight2,[pRightIndex,iPlaneStep]
+ M_LDRB tempLeft1,[pSrcDstPlane],iPlaneStep,LSL #1 ;// PreLoad the values
+ M_LDRB tempRight1,[pRightIndex],iPlaneStep,LSL #1
+
+ SUB x,iExpandPels,#4
+ MUL tempLeft2,tempLeft2,expandTo4bytes ;// Copy the single byte to 4 bytes
+ MUL tempRight2,tempRight2,expandTo4bytes
+ MUL tempLeft1,tempLeft1,expandTo4bytes ;// Copy the single byte to 4 bytes
+ MUL tempRight1,tempRight1,expandTo4bytes
+
+
+ ;// x loop
+ExpandFrameLeftRightXloop
+ SUBS x,x,#4
+ STR tempLeft2,[pLeft,iPlaneStep] ;// Store the 4 bytes at one go
+ STR tempRight2,[pRight,iPlaneStep]
+ STR tempLeft1,[pLeft],#4 ;// Store the 4 bytes at one go
+ STR tempRight1,[pRight],#4
+ BGE ExpandFrameLeftRightXloop
+
+ SUBS y,y,#2
+ ADD pLeft,pLeft,RowStep
+ ADD pRight,pRight,RowStep
+ BGT ExpandFrameLeftRightYloop
+
+
+ ;// Set return value
+
+ MOV result,#OMX_Sts_NoErr
+End
+
+ ;// Write function tail
+
+ M_END
+
+ ENDIF ;//ARM1136JS
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h
new file mode 100644
index 0000000..4340f2a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h
@@ -0,0 +1,30 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_CAVLCTables.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Header file for optimized H.264 CALVC tables
+ *
+ */
+
+#ifndef ARMVCM4P10_CAVLCTABLES_H
+#define ARMVCM4P10_CAVLCTABLES_H
+
+/* CAVLC tables */
+
+extern const OMX_U16 *armVCM4P10_CAVLCCoeffTokenTables[18];
+extern const OMX_U16 *armVCM4P10_CAVLCTotalZeroTables[15];
+extern const OMX_U16 *armVCM4P10_CAVLCTotalZeros2x2Tables[3];
+extern const OMX_U16 *armVCM4P10_CAVLCRunBeforeTables[15];
+extern const OMX_U8 armVCM4P10_ZigZag_4x4[16];
+extern const OMX_U8 armVCM4P10_ZigZag_2x2[4];
+extern const OMX_S8 armVCM4P10_SuffixToLevel[7];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
new file mode 100644
index 0000000..b2cd9d1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
@@ -0,0 +1,222 @@
+;//
+;//
+;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+;// Functions:
+;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
+;//
+;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
+;// First operand will be at offset ALIGNMENT from aligned address
+;// Second operand will be at aligned location and will be used as output.
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction
+;//
+;// Registers used as input for this function
+;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r2 - pointer to the aligned location
+;// r3 - step size to this aligned location
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_Average_4x4_Align0_unsafe
+ EXPORT armVCM4P10_Average_4x4_Align2_unsafe
+ EXPORT armVCM4P10_Average_4x4_Align3_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+;// Declare input registers
+pPred0 RN 0
+iPredStep0 RN 1
+pPred1 RN 2
+iPredStep1 RN 3
+pDstPred RN 2
+iDstStep RN 3
+
+;// Declare other intermediate registers
+iPredA0 RN 10
+iPredA1 RN 11
+iPredB0 RN 12
+iPredB1 RN 14
+Temp1 RN 4
+Temp2 RN 5
+ResultA RN 5
+ResultB RN 4
+r0x80808080 RN 7
+
+ IF ARM1136JS
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB1, [pPred1, iPredStep1]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ M_LDR iPredB1, [pPred1, iPredStep1]
+
+ MVN iPredB0, iPredB0
+ UHSUB8 ResultA, iPredA0, iPredB0
+ MVN iPredB1, iPredB1
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End0
+ M_END
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredB1, [pPred1, iPredStep1]
+ M_LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #16
+ ORR iPredA0, iPredA0, Temp1, LSL #16
+ MOV iPredA1, iPredA1, LSR #16
+ ORR iPredA1, iPredA1, Temp2, LSL #16
+
+ ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #16
+ ORR iPredA0, iPredA0, Temp1, LSL #16
+ MOV iPredA1, iPredA1, LSR #16
+ ORR iPredA1, iPredA1, Temp2, LSL #16
+
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End2
+ M_END
+
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #24
+ ORR iPredA0, iPredA0, Temp1, LSL #8
+ MOV iPredA1, iPredA1, LSR #24
+ ORR iPredA1, iPredA1, Temp2, LSL #8
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #24
+ ORR iPredA0, iPredA0, Temp1, LSL #8
+ MOV iPredA1, iPredA1, LSR #24
+ ORR iPredA1, iPredA1, Temp2, LSL #8
+
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End3
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c
new file mode 100644
index 0000000..17fe518
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c
@@ -0,0 +1,327 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_CAVLCTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Optimized CAVLC tables for H.264
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVCM4P10_CAVLCTables.h"
+
+/* 4x4 DeZigZag table */
+
+const OMX_U8 armVCM4P10_ZigZag_4x4[16] =
+{
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/* 2x2 DeZigZag table */
+
+const OMX_U8 armVCM4P10_ZigZag_2x2[4] =
+{
+ 0, 1, 2, 3
+};
+
+
+/*
+ * Suffix To Level table
+ * We increment the suffix length if
+ * ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6
+ * (LevelCode>>1)>=(3<<(SuffixLength-1)) && SuffixLength<6
+ * LevelCode >= 3<<SuffixLength && SuffixLength<6
+ * (LevelCode+2) >= (3<<SuffixLength)+2 && SuffixLength<6
+ */
+const OMX_S8 armVCM4P10_SuffixToLevel[7] =
+{
+ (3<<1)+2, /* SuffixLength=1 */
+ (3<<1)+2, /* SuffixLength=1 */
+ (3<<2)+2, /* SuffixLength=2 */
+ (3<<3)+2, /* SuffixLength=3 */
+ (3<<4)+2, /* SuffixLength=4 */
+ (3<<5)+2, /* SuffixLength=5 */
+ -1 /* SuffixLength=6 - never increment */
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_0[132] = {
+ 0x0020, 0x0100, 0x2015, 0x2015, 0x400b, 0x400b, 0x400b, 0x400b,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+ 0x0028, 0x00f0, 0x00f8, 0x0027, 0x0030, 0x00d8, 0x00e0, 0x00e8,
+ 0x0038, 0x00a0, 0x00c8, 0x00d0, 0x0040, 0x0068, 0x0090, 0x0098,
+ 0x0048, 0x0050, 0x0058, 0x0060, 0x27ff, 0x27ff, 0x206b, 0x206b,
+ 0x0081, 0x0085, 0x0083, 0x0079, 0x0087, 0x007d, 0x007b, 0x0071,
+ 0x007f, 0x0075, 0x0073, 0x0069, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x2077, 0x2077, 0x206d, 0x206d, 0x2063, 0x2063, 0x2061, 0x2061,
+ 0x206f, 0x206f, 0x2065, 0x2065, 0x205b, 0x205b, 0x2059, 0x2059,
+ 0x0067, 0x005d, 0x0053, 0x0051, 0x005f, 0x0055, 0x004b, 0x0049,
+ 0x00a8, 0x00b0, 0x00b8, 0x00c0, 0x2041, 0x2041, 0x204d, 0x204d,
+ 0x2043, 0x2043, 0x2039, 0x2039, 0x2057, 0x2057, 0x2045, 0x2045,
+ 0x203b, 0x203b, 0x2031, 0x2031, 0x204f, 0x204f, 0x203d, 0x203d,
+ 0x2033, 0x2033, 0x2029, 0x2029, 0x0047, 0x0035, 0x002b, 0x0021,
+ 0x203f, 0x203f, 0x202d, 0x202d, 0x2023, 0x2023, 0x2019, 0x2019,
+ 0x0037, 0x0025, 0x001b, 0x0011, 0x202f, 0x202f, 0x201d, 0x201d,
+ 0x0013, 0x0009, 0x201f, 0x201f
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_1[128] = {
+ 0x0020, 0x00e8, 0x00f0, 0x00f8, 0x0027, 0x001f, 0x2015, 0x2015,
+ 0x400b, 0x400b, 0x400b, 0x400b, 0x4001, 0x4001, 0x4001, 0x4001,
+ 0x0028, 0x00d0, 0x00d8, 0x00e0, 0x0030, 0x0098, 0x00c0, 0x00c8,
+ 0x0038, 0x0060, 0x0088, 0x0090, 0x0040, 0x0048, 0x0050, 0x0058,
+ 0x27ff, 0x27ff, 0x207f, 0x207f, 0x0087, 0x0085, 0x0083, 0x0081,
+ 0x007b, 0x0079, 0x007d, 0x0073, 0x2075, 0x2075, 0x2071, 0x2071,
+ 0x0068, 0x0070, 0x0078, 0x0080, 0x2077, 0x2077, 0x206d, 0x206d,
+ 0x206b, 0x206b, 0x2069, 0x2069, 0x206f, 0x206f, 0x2065, 0x2065,
+ 0x2063, 0x2063, 0x2061, 0x2061, 0x0059, 0x005d, 0x005b, 0x0051,
+ 0x0067, 0x0055, 0x0053, 0x0049, 0x00a0, 0x00a8, 0x00b0, 0x00b8,
+ 0x205f, 0x205f, 0x204d, 0x204d, 0x204b, 0x204b, 0x2041, 0x2041,
+ 0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2039, 0x2039,
+ 0x204f, 0x204f, 0x203d, 0x203d, 0x203b, 0x203b, 0x2031, 0x2031,
+ 0x0029, 0x0035, 0x0033, 0x0021, 0x2047, 0x2047, 0x202d, 0x202d,
+ 0x202b, 0x202b, 0x2019, 0x2019, 0x003f, 0x0025, 0x0023, 0x0011,
+ 0x0037, 0x001d, 0x001b, 0x0009, 0x202f, 0x202f, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_2[112] = {
+ 0x0020, 0x0088, 0x00b0, 0x00b8, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x003f, 0x0037, 0x002f, 0x0027, 0x001f, 0x0015, 0x000b, 0x0001,
+ 0x0028, 0x0050, 0x0078, 0x0080, 0x0030, 0x0038, 0x0040, 0x0048,
+ 0x07ff, 0x0081, 0x0087, 0x0085, 0x0083, 0x0079, 0x007f, 0x007d,
+ 0x007b, 0x0071, 0x0077, 0x0075, 0x0073, 0x0069, 0x206b, 0x206b,
+ 0x0058, 0x0060, 0x0068, 0x0070, 0x2061, 0x2061, 0x206d, 0x206d,
+ 0x2063, 0x2063, 0x2059, 0x2059, 0x206f, 0x206f, 0x2065, 0x2065,
+ 0x205b, 0x205b, 0x2051, 0x2051, 0x0067, 0x005d, 0x0053, 0x0049,
+ 0x005f, 0x0055, 0x004b, 0x0041, 0x0090, 0x0098, 0x00a0, 0x00a8,
+ 0x2039, 0x2039, 0x2031, 0x2031, 0x204d, 0x204d, 0x2029, 0x2029,
+ 0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2021, 0x2021,
+ 0x0019, 0x003d, 0x003b, 0x0011, 0x004f, 0x0035, 0x0033, 0x0009,
+ 0x202b, 0x202b, 0x202d, 0x202d, 0x2023, 0x2023, 0x2025, 0x2025,
+ 0x201b, 0x201b, 0x2047, 0x2047, 0x201d, 0x201d, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_3[80] = {
+ 0x0020, 0x0028, 0x0030, 0x0038, 0x0040, 0x0048, 0x0050, 0x0058,
+ 0x0060, 0x0068, 0x0070, 0x0078, 0x0080, 0x0088, 0x0090, 0x0098,
+ 0x0009, 0x000b, 0x07ff, 0x0001, 0x0011, 0x0013, 0x0015, 0x07ff,
+ 0x0019, 0x001b, 0x001d, 0x001f, 0x0021, 0x0023, 0x0025, 0x0027,
+ 0x0029, 0x002b, 0x002d, 0x002f, 0x0031, 0x0033, 0x0035, 0x0037,
+ 0x0039, 0x003b, 0x003d, 0x003f, 0x0041, 0x0043, 0x0045, 0x0047,
+ 0x0049, 0x004b, 0x004d, 0x004f, 0x0051, 0x0053, 0x0055, 0x0057,
+ 0x0059, 0x005b, 0x005d, 0x005f, 0x0061, 0x0063, 0x0065, 0x0067,
+ 0x0069, 0x006b, 0x006d, 0x006f, 0x0071, 0x0073, 0x0075, 0x0077,
+ 0x0079, 0x007b, 0x007d, 0x007f, 0x0081, 0x0083, 0x0085, 0x0087
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_4[32] = {
+ 0x0020, 0x0038, 0x2015, 0x2015, 0x4001, 0x4001, 0x4001, 0x4001,
+ 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b,
+ 0x0028, 0x0030, 0x0021, 0x0019, 0x2027, 0x2027, 0x0025, 0x0023,
+ 0x201d, 0x201d, 0x201b, 0x201b, 0x0011, 0x001f, 0x0013, 0x0009
+};
+
+const OMX_U16 * armVCM4P10_CAVLCCoeffTokenTables[18] = {
+ armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=0 */
+ armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=1 */
+ armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=2 */
+ armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=3 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=4 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=5 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=6 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=7 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=8 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=9 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=10 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=11 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=12 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=13 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=14 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=15 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=16 */
+ armVCM4P10_CAVLCCoeffTokenTables_4 /* nC=-1 */
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_0[40] = {
+ 0x0020, 0x0048, 0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+ 0x0028, 0x0040, 0x0011, 0x000f, 0x0030, 0x0038, 0x0019, 0x0017,
+ 0x27ff, 0x27ff, 0x201f, 0x201f, 0x201d, 0x201d, 0x201b, 0x201b,
+ 0x2015, 0x2015, 0x2013, 0x2013, 0x200d, 0x200d, 0x200b, 0x200b
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_1[24] = {
+ 0x0020, 0x0028, 0x0011, 0x000f, 0x000d, 0x000b, 0x2009, 0x2009,
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001,
+ 0x001d, 0x001b, 0x0019, 0x0017, 0x2015, 0x2015, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_2[24] = {
+ 0x0020, 0x0028, 0x0011, 0x000b, 0x0009, 0x0001, 0x200f, 0x200f,
+ 0x200d, 0x200d, 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003,
+ 0x001b, 0x0017, 0x2019, 0x2019, 0x2015, 0x2015, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_3[24] = {
+ 0x0020, 0x0028, 0x0013, 0x000f, 0x0007, 0x0005, 0x2011, 0x2011,
+ 0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2003, 0x2003,
+ 0x2019, 0x2019, 0x2017, 0x2017, 0x2015, 0x2015, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_4[20] = {
+ 0x0020, 0x0015, 0x0011, 0x0005, 0x0003, 0x0001, 0x200f, 0x200f,
+ 0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007,
+ 0x2017, 0x2017, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_5[20] = {
+ 0x0020, 0x0011, 0x2013, 0x2013, 0x200f, 0x200f, 0x200d, 0x200d,
+ 0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2005, 0x2005,
+ 0x0015, 0x0001, 0x2003, 0x2003
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_6[20] = {
+ 0x0020, 0x000f, 0x2011, 0x2011, 0x200d, 0x200d, 0x2009, 0x2009,
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b,
+ 0x0013, 0x0001, 0x2003, 0x2003
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_7[20] = {
+ 0x0020, 0x0003, 0x200f, 0x200f, 0x200d, 0x200d, 0x2007, 0x2007,
+ 0x400b, 0x400b, 0x400b, 0x400b, 0x4009, 0x4009, 0x4009, 0x4009,
+ 0x0011, 0x0001, 0x2005, 0x2005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_8[20] = {
+ 0x0020, 0x0005, 0x200b, 0x200b, 0x400d, 0x400d, 0x400d, 0x400d,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007,
+ 0x0003, 0x0001, 0x200f, 0x200f
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_9[20] = {
+ 0x0020, 0x000d, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007,
+ 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_10[16] = {
+ 0x0001, 0x0003, 0x2005, 0x2005, 0x2007, 0x2007, 0x200b, 0x200b,
+ 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_11[16] = {
+ 0x0001, 0x0003, 0x2009, 0x2009, 0x4005, 0x4005, 0x4005, 0x4005,
+ 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_12[16] = {
+ 0x2001, 0x2001, 0x2003, 0x2003, 0x4007, 0x4007, 0x4007, 0x4007,
+ 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_13[16] = {
+ 0x4001, 0x4001, 0x4001, 0x4001, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_14[16] = {
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+ 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003
+};
+
+const OMX_U16 * armVCM4P10_CAVLCTotalZeroTables[15] = {
+ armVCM4P10_CAVLCTotalZeroTables_0,
+ armVCM4P10_CAVLCTotalZeroTables_1,
+ armVCM4P10_CAVLCTotalZeroTables_2,
+ armVCM4P10_CAVLCTotalZeroTables_3,
+ armVCM4P10_CAVLCTotalZeroTables_4,
+ armVCM4P10_CAVLCTotalZeroTables_5,
+ armVCM4P10_CAVLCTotalZeroTables_6,
+ armVCM4P10_CAVLCTotalZeroTables_7,
+ armVCM4P10_CAVLCTotalZeroTables_8,
+ armVCM4P10_CAVLCTotalZeroTables_9,
+ armVCM4P10_CAVLCTotalZeroTables_10,
+ armVCM4P10_CAVLCTotalZeroTables_11,
+ armVCM4P10_CAVLCTotalZeroTables_12,
+ armVCM4P10_CAVLCTotalZeroTables_13,
+ armVCM4P10_CAVLCTotalZeroTables_14
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_0[16] = {
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_1[16] = {
+ 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_2[16] = {
+ 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+const OMX_U16 * armVCM4P10_CAVLCTotalZeros2x2Tables[3] = {
+ armVCM4P10_CAVLCTotalZeros2x2Tables_0,
+ armVCM4P10_CAVLCTotalZeros2x2Tables_1,
+ armVCM4P10_CAVLCTotalZeros2x2Tables_2
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_0[8] = {
+ 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_1[8] = {
+ 0x2005, 0x2005, 0x2003, 0x2003, 0x4001, 0x4001, 0x4001, 0x4001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_2[8] = {
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_3[8] = {
+ 0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_4[8] = {
+ 0x000b, 0x0009, 0x0007, 0x0005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_5[8] = {
+ 0x0003, 0x0005, 0x0009, 0x0007, 0x000d, 0x000b, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_6[24] = {
+ 0x0010, 0x000d, 0x000b, 0x0009, 0x0007, 0x0005, 0x0003, 0x0001,
+ 0x0018, 0x0011, 0x200f, 0x200f, 0x0020, 0x0015, 0x2013, 0x2013,
+ 0x0028, 0x0019, 0x2017, 0x2017, 0x07ff, 0x001d, 0x201b, 0x201b
+};
+
+/* Tables 7 to 14 are duplicates of table 6 */
+
+const OMX_U16 * armVCM4P10_CAVLCRunBeforeTables[15] = {
+ armVCM4P10_CAVLCRunBeforeTables_0, /* ZerosLeft=1 */
+ armVCM4P10_CAVLCRunBeforeTables_1,
+ armVCM4P10_CAVLCRunBeforeTables_2,
+ armVCM4P10_CAVLCRunBeforeTables_3,
+ armVCM4P10_CAVLCRunBeforeTables_4,
+ armVCM4P10_CAVLCRunBeforeTables_5, /* ZerosLeft=6 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=7 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=8 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=9 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=10 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=11 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=12 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=13 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=14 */
+ armVCM4P10_CAVLCRunBeforeTables_6 /* ZerosLeft=15 */
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
new file mode 100644
index 0000000..dcbcd00
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
@@ -0,0 +1,20 @@
+;//
+;//
+;// File Name: armVCM4P10_DeblockingChroma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
new file mode 100644
index 0000000..14b37fe
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
@@ -0,0 +1,366 @@
+;//
+;//
+;// File Name: armVCM4P10_DeblockingLuma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IF ARM1136JS
+
+MASK_1 EQU 0x01010101
+
+;// Declare input registers
+
+pQ0 RN 0
+StepArg RN 1
+tC0Arg RN 2
+alpha RN 6
+
+beta RN 14
+bS RN 14
+tC0 RN 14
+ptC0 RN 1
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0 RN 3
+p_1 RN 5
+p_2 RN 4
+p_3 RN 2
+q_0 RN 8
+q_1 RN 9
+q_2 RN 10
+q_3 RN 12
+
+
+;// Filtering
+
+ap0q0 RN 1
+filt RN 2
+
+m00 RN 7
+m01 RN 11
+
+apflg RN 0
+aqflg RN 6
+
+tC RN 1
+
+
+;//Declarations for bSLT4 kernel
+
+pos RN 7
+neg RN 12
+
+P0a RN 1
+P1a RN 8
+Q0a RN 7
+Q1a RN 4
+
+u1 RN 3
+max RN 12
+min RN 2
+
+
+
+;//Declarations for bSGE4 kernel
+
+q_3b RN 9
+p_3b RN 0
+apqflg RN 12
+
+P0b RN 6
+P1b RN 7
+P2b RN 1
+
+Q0b RN 9
+Q1b RN 0
+Q2b RN 2
+
+;// Miscellanous
+
+a RN 0
+t0 RN 3
+t1 RN 12
+t2 RN 7
+t3 RN 11
+t4 RN 4
+t5 RN 1
+t8 RN 6
+t9 RN 14
+t10 RN 5
+t11 RN 9
+
+;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe()
+;//
+;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2)
+;// - 2 - filt, 0 - apflg, 6 - aqflg
+;// - 11 - m01, 7 - tC0
+;//
+;// Outputs - 1,8,7,11 - Output Pixels(P0a,P1a,Q0a,Q1a)
+;//
+;// Registers Corrupted - 0-3,5-12,14
+
+
+ M_START armVCM4P10_DeblockingLumabSLT4_unsafe, lr
+
+ ;// Since beta <= 18 and alpha <= 255 we know
+ ;// -254 <= p0-q0 <= 254
+ ;// -17 <= q1-q0 <= 17
+ ;// -17 <= p1-p0 <= 17
+
+ ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
+ ;//
+ ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
+ ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
+ ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
+
+ USUB8 t1, p_1, p_0
+ MUL tC0, t2, m01
+
+ USUB8 t2, q_1, q_0
+ SSUB8 t1, t1, t2
+
+ USUB8 t2, p_0, q_0
+ AND t2, t2, m01
+ SHSUB8 t1, t1, t2
+ UHSUB8 t5, p_0, q_0
+ SSUB8 t1, t1, t2
+ SHSUB8 t1, t1, t5
+ MOV m00, #0
+ SADD8 t1, t1, m01
+ SHSUB8 t1, t1, t5
+
+ ;// tC = tC0
+ ;// if (ap < beta) tC++;
+ ;// if (aq < beta) tC++;
+ USUB8 t5, filt, m01
+ SEL tC0, tC0, m00
+ UQADD8 tC, tC0, apflg
+ SSUB8 t1, t1, m00
+ UQADD8 tC, tC, aqflg
+
+ ;// Split into positive and negative part and clip
+ SEL pos, t1, m00
+ USUB8 neg, pos, t1
+ USUB8 t3, pos, tC
+ SEL pos, tC, pos
+ USUB8 t3, neg, tC
+ SEL neg, tC, neg
+
+ ;//Reload m01
+ LDR m01,=MASK_1
+
+ UQADD8 P0a, p_0, pos
+ UQSUB8 Q0a, q_0, pos
+ UQSUB8 P0a, P0a, neg
+ UQADD8 Q0a, Q0a, neg
+
+ ;// Choose to store the filtered
+ ;// value or the original pixel
+ USUB8 t1, filt, m01
+ SEL P0a, P0a, p_0
+ SEL Q0a, Q0a, q_0
+
+ ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1;
+ ;// u1 = (p0 + q0 + 1)>>1
+ ;// u1 = ( (q_0 - p_0')>>1 ) ^ 0x80
+ MVN p_0, p_0
+ UHSUB8 u1, q_0, p_0
+ UQADD8 max, p_1, tC0
+ EOR u1, u1, m01 ,LSL #7
+
+ ;// Calculate A = (p2+u1)>>1
+ ;// Then delta = Clip3( -tC0, tC0, A - p1)
+
+ ;// Clip P1
+ UHADD8 P1a, p_2, u1
+ UQSUB8 min, p_1, tC0
+ USUB8 t4, P1a, max
+ SEL P1a, max, P1a
+ USUB8 t4, P1a, min
+ SEL P1a, P1a, min
+
+ ;// Clip Q1
+ UHADD8 Q1a, q_2, u1
+ UQADD8 max, q_1, tC0
+ UQSUB8 min, q_1, tC0
+ USUB8 t0, Q1a, max
+ SEL Q1a, max, Q1a
+ USUB8 t0, Q1a, min
+ SEL Q1a, Q1a, min
+
+ ;// Choose to store the filtered
+ ;// value or the original pixel
+ USUB8 t0, apflg, m01
+ SEL P1a, P1a, p_1
+ USUB8 t0, aqflg, m01
+ SEL t3, Q1a, q_1
+
+ M_END
+
+;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
+;//
+;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2)
+;// - 2 - filt, 0 - apflg,aqflg
+;// - 1 - ap0q0, 6 - alpha
+;// - 7 - m00, 11 - m01
+;//
+;// Outputs - 6,7,1,9,0,2 - Output Pixels(P0b,P1b,P2b, Q0b,Q1b,Q2b)
+;//
+;// Registers Corrupted - 0-3,5-12,14
+
+ M_START armVCM4P10_DeblockingLumabSGE4_unsafe, lr
+
+ ;// apflg = apflg && |p0-q0|<((alpha>>2)+2)
+ ;// apflg = aqflg && |p0-q0|<((alpha>>2)+2)
+
+ M_ARG pDummy,4
+ M_ARG pQ_3,4
+ M_ARG pP_3,4
+
+ UHADD8 alpha, alpha, m00
+ USUB8 t9, p_2, p_0 ;//t9 = dp2p0
+ UHADD8 alpha, alpha, m00
+ ADD alpha, alpha, m01, LSL #1
+ USUB8 ap0q0, ap0q0, alpha
+ SEL apqflg, m00, apflg
+
+ ;// P0 = (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3
+ ;// = ((p2-p0) + 2*(p1-p0) + (q1-q0) + 3*(q0-p0) + 8*p0 + 4)>>3
+ ;// = p0 + (((p2-p0) + 2*(p1-p0) + (q1-q0) - 3*(p0-q0) + 4)>>3)
+
+ ;// P1 = (p2 + p1 + q0 + p0 + 2)>>2
+ ;// = p0 + (((p2-p0) + (p1-p0) - (p0-q0) + 2)>>2)
+
+ ;// P2 = (2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3
+ ;// = (2*(p3-p0) + 3*(p2-p0) + (p1-p0) - (p0-q0) + 8*p0 + 4)>>3
+ ;// = p0 + (((p3-p0) + (p2-p0) + t2 + 2)>>2)
+
+ ;// Compute P0b
+ USUB8 t2, p_0, q_0
+ SSUB8 t5, t9, t2
+
+ USUB8 t8, q_1, q_0
+ SHADD8 t8, t5, t8
+
+ USUB8 t9, p_1, p_0
+ SADD8 t8, t8, t9
+ SHSUB8 t8, t8, t2
+ SHADD8 t5, t5, t9
+ SHADD8 t8, t8, m01
+ SHADD8 t9, t5, m01
+ SADD8 P0b, p_0, t8
+ ;// P0b ready
+
+ ;// Compute P1b
+ M_LDR p_3b, pP_3
+ SADD8 P1b, p_0, t9
+ ;// P1b ready
+
+ ;// Compute P2b
+ USUB8 t9, p_2, p_0
+ SADD8 t5, t5, t9
+ UHSUB8 t9, p_3b, p_0
+ EOR a, p_3b, p_0
+ AND a, a, m01
+ SHADD8 t5, t5, a
+ UHADD8 a, p_0, q_1
+ SADD8 t5, t5, m01
+ SHADD8 t5, t5, t9
+ MVN t9, p_1
+ SADD8 P2b, p_0, t5
+ ;// P2b ready
+
+ UHSUB8 a, a, t9
+ ORR t9, apqflg, m01
+ USUB8 t9, apqflg, t9
+
+ EOR a, a, m01, LSL #7
+ SEL P0b, P0b, a
+ SEL P1b, P1b, p_1
+ SEL P2b, P2b, p_2
+
+ USUB8 t4, filt, m01
+ SEL P0b, P0b, p_0
+
+
+ ;// Q0 = (q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3
+ ;// = ((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 8*q0 + 4)>>3
+ ;// = q0 + (((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 4)>>3)
+
+ ;// Q1 = (q2 + q1 + p0 + q0 + 2)>>2
+ ;// = q0 + (((q2-q0) + (q1-q0) + (p0-q0) + 2)>>2)
+
+ ;// Q2 = (2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3
+ ;// = (2*(q3-q0) + 3*(q2-q0) + (q1-q0) + (p0-q0) + 8*q0 + 4)>>3
+ ;// = q0 + (((q3-q0) + (q2-q0) + t2 + 2)>>2)
+
+
+ ;// Compute Q0b Q1b
+ USUB8 t4, q_2, q_0
+ USUB8 a, p_0, q_0
+ USUB8 t9, p_1, p_0
+ SADD8 t0, t4, a
+ SHADD8 t9, t0, t9
+ UHADD8 t10, q_0, p_1
+ SADD8 t9, t9, a
+ USUB8 a, q_1, q_0
+ SHADD8 t9, t9, a
+ SHADD8 t0, t0, a
+ SHADD8 t9, t9, m01
+ SHADD8 a, t0, m01
+ SADD8 t9, q_0, t9
+ ;// Q0b ready - t9
+
+ MOV t4, #0
+ UHADD8 apqflg, apqflg, t4
+
+ SADD8 Q1b, q_0, a
+ ;// Q1b ready
+
+ USUB8 t4, apqflg, m01
+ SEL Q1b, Q1b, q_1
+ MVN t11, q_1
+ UHSUB8 t10, t10, t11
+ M_LDR q_3b, pQ_3
+ EOR t10, t10, m01, LSL #7
+ SEL t9, t9, t10
+
+ ;// Compute Q2b
+ USUB8 t4, q_2, q_0
+ SADD8 t4, t0, t4
+ EOR t0, q_3b, q_0
+ AND t0, t0, m01
+ SHADD8 t4, t4, t0
+ UHSUB8 t10, q_3b, q_0
+ SADD8 t4, t4, m01
+ SHADD8 t4, t4, t10
+
+ USUB8 t10, filt, m01
+ SEL Q0b, t9, q_0
+
+ SADD8 t4, q_0, t4
+ ;// Q2b ready - t4
+
+ USUB8 t10, apqflg, m01
+ SEL Q2b, t4, q_2
+
+ M_END
+
+ ENDIF
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
new file mode 100644
index 0000000..ac448a0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
@@ -0,0 +1,325 @@
+;//
+;//
+;// File Name: armVCM4P10_DecodeCoeffsToPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+ IMPORT armVCM4P10_CAVLCCoeffTokenTables
+ IMPORT armVCM4P10_CAVLCTotalZeroTables
+ IMPORT armVCM4P10_CAVLCTotalZeros2x2Tables
+ IMPORT armVCM4P10_CAVLCRunBeforeTables
+ IMPORT armVCM4P10_SuffixToLevel
+ IMPORT armVCM4P10_ZigZag_4x4
+ IMPORT armVCM4P10_ZigZag_2x2
+
+ M_VARIANTS ARM1136JS
+
+;//DEBUG_ON SETL {TRUE}
+
+LAST_COEFF EQU 0x20 ;// End of block flag
+TWO_BYTE_COEFF EQU 0x10
+
+;// Declare input registers
+
+ppBitStream RN 0
+pOffset RN 1
+pNumCoeff RN 2
+ppPosCoefbuf RN 3
+nC RN 4 ;// number of coeffs or 17 for chroma
+sMaxNumCoeff RN 5
+
+;// Declare inner loop registers
+
+;// Level loop
+Count RN 0
+TrailingOnes RN 1
+pLevel RN 2
+LevelSuffix RN 3
+SuffixLength RN 4
+TotalCoeff RN 5
+
+pVLDTable RN 6
+Symbol RN 7
+T1 RN 8
+T2 RN 9
+RBitStream RN 10
+RBitBuffer RN 11
+RBitCount RN 12
+lr RN 14
+
+;// Run loop
+Count RN 0
+ZerosLeft RN 1
+pLevel RN 2
+ppRunTable RN 3
+pRun RN 4
+TotalCoeff RN 5
+
+pVLDTable RN 6
+Symbol RN 7
+T1 RN 8
+T2 RN 9
+RBitStream RN 10
+RBitBuffer RN 11
+RBitCount RN 12
+lr RN 14
+
+;// Fill in coefficients loop
+pPosCoefbuf RN 0
+temp RN 1
+pLevel RN 2
+ppPosCoefbuf RN 3
+pRun RN 4
+TotalCoeff RN 5
+pZigZag RN 6
+
+T1 RN 8
+T2 RN 9
+RBitStream RN 10
+RBitBuffer RN 11
+RBitCount RN 12
+CoeffNum RN 14
+
+
+
+ IF ARM1136JS
+
+ ;// Allocate stack memory required by the function
+ M_ALLOC4 pppBitStream, 4
+ M_ALLOC4 ppOffset, 4
+ M_ALLOC4 pppPosCoefbuf, 4
+ M_ALLOC4 ppLevel, 16*2
+ M_ALLOC4 ppRun, 16
+
+ ;// Write function header
+ M_START armVCM4P10_DecodeCoeffsToPair, r11
+
+ ;// Define stack arguments
+ M_ARG pNC, 4
+ M_ARG pSMaxNumCoeff,4
+
+ ;// Code start
+ M_BD_INIT0 ppBitStream, pOffset, RBitStream, RBitBuffer, RBitCount
+ LDR pVLDTable, =armVCM4P10_CAVLCCoeffTokenTables
+ M_LDR nC, pNC
+
+ M_BD_INIT1 T1, T2, lr
+ LDR pVLDTable, [pVLDTable, nC, LSL #2] ;// Find VLD table
+
+ M_BD_INIT2 T1, T2, lr
+
+ ;// Decode Symbol = TotalCoeff*4 + TrailingOnes
+ M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2
+
+ MOVS TotalCoeff, Symbol, LSR #2
+ STRB TotalCoeff, [pNumCoeff]
+ M_PRINTF "TotalCoeff=%d\n", TotalCoeff
+ BEQ.W EndNoError ;// Finished if no coefficients
+
+ CMP Symbol, #17*4
+ BGE.W EndBadSymbol ;// Error if bad symbol
+
+ ;// Save bitstream pointers
+ M_STR ppBitStream, pppBitStream
+ M_STR pOffset, ppOffset
+ M_STR ppPosCoefbuf, pppPosCoefbuf
+
+ ;// Decode Trailing Ones
+ ANDS TrailingOnes, Symbol, #3
+ M_ADR pLevel, ppLevel
+ M_PRINTF "TrailingOnes=%d\n", TrailingOnes
+ BEQ TrailingOnesDone
+ MOV Count, TrailingOnes
+TrailingOnesLoop
+ M_BD_READ8 Symbol, 1, T1
+ SUBS Count, Count, #1
+ MOV T1, #1
+ SUB T1, T1, Symbol, LSL #1
+ M_PRINTF "Level=%d\n", T1
+ STRH T1, [pLevel], #2
+ BGT TrailingOnesLoop
+TrailingOnesDone
+
+ ;// Decode level values
+ SUBS Count, TotalCoeff, TrailingOnes ;// Number of levels to read
+ BEQ DecodeRuns ;// None left
+
+ MOV SuffixLength, #1
+ CMP TotalCoeff, #10
+ MOVLE SuffixLength, #0
+ CMP TrailingOnes, #3 ;// if (TrailingOnes<3)
+ MOVLT TrailingOnes, #4 ;// then TrailingOnes = +4
+ MOVGE TrailingOnes, #2 ;// else TrailingOnes = +2
+ MOVGE SuffixLength, #0 ;// SuffixLength = 0
+
+LevelLoop
+ M_BD_CLZ16 Symbol, T1, T2 ;// Symbol=LevelPrefix
+ CMP Symbol,#16
+ BGE EndBadSymbol
+
+ MOVS lr, SuffixLength ;// if LevelSuffixSize==0
+ TEQEQ Symbol, #14 ;// and LevelPrefix==14
+ MOVEQ lr, #4 ;// then LevelSuffixSize=4
+ TEQ Symbol, #15 ;// if LevelSuffixSize==15
+ MOVEQ lr, #12 ;// then LevelSuffixSize=12
+
+ TEQEQ SuffixLength,#0
+ ADDEQ Symbol,Symbol,#15
+
+ TEQ lr, #0 ;// if LevelSuffixSize==0
+ BEQ LevelCodeRead ;// LevelCode = LevelPrefix
+
+ M_BD_VREAD16 LevelSuffix, lr, T1, T2 ;// Read Level Suffix
+
+ MOV Symbol, Symbol, LSL SuffixLength
+ ADD Symbol, LevelSuffix, Symbol
+
+LevelCodeRead
+ ;// Symbol = LevelCode
+ ADD Symbol, Symbol, TrailingOnes ;// +4 if level cannot be +/-1, +2 o/w
+ MOV TrailingOnes, #2
+ MOVS T1, Symbol, LSR #1
+ RSBCS T1, T1, #0 ;// If Symbol odd then negate
+ M_PRINTF "Level=%d\n", T1
+ STRH T1, [pLevel], #2 ;// Store level.
+
+ LDR T2, =armVCM4P10_SuffixToLevel
+ LDRSB T1, [T2, SuffixLength] ;// Find increment level
+ TEQ SuffixLength, #0
+ MOVEQ SuffixLength, #1
+ CMP Symbol, T1
+ ADDCS SuffixLength, SuffixLength, #1
+ SUBS Count, Count, #1
+ BGT LevelLoop
+
+DecodeRuns
+ ;// Find number of zeros
+ M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff
+ SUB Count, TotalCoeff, #1 ;// Number of runs excluding last
+ SUBS ZerosLeft, T1, TotalCoeff ;// Maximum number of zeros there could be
+ M_ADR pRun, ppRun
+ MOV CoeffNum,TotalCoeff
+ SUB CoeffNum,CoeffNum,#1
+ BEQ NoZerosLeft
+
+ ;// Unpack number of zeros from bitstream
+ TEQ T1, #4
+ LDREQ pVLDTable, =(armVCM4P10_CAVLCTotalZeros2x2Tables-4)
+ LDRNE pVLDTable, =(armVCM4P10_CAVLCTotalZeroTables-4)
+ LDR pVLDTable, [pVLDTable, TotalCoeff, LSL #2]
+
+ M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2 ;// Symbol = ZerosLeft
+ CMP Symbol,#16
+ BGE EndBadSymbol
+
+ LDR ppRunTable, =(armVCM4P10_CAVLCRunBeforeTables-4)
+ M_ADR pRun, ppRun
+ MOVS ZerosLeft, Symbol
+
+ ADD CoeffNum,CoeffNum,ZerosLeft
+
+ BEQ NoZerosLeft
+
+ ;// Decode runs while zeros are left and more than one coefficient
+RunLoop
+ SUBS Count, Count, #1
+ LDR pVLDTable, [ppRunTable, ZerosLeft, LSL#2]
+ BLT LastRun
+ M_BD_VLD Symbol, T1, T2, pVLDTable, 3, 2 ;// Symbol = Run
+ CMP Symbol,#15
+ BGE EndBadSymbol
+
+ SUBS ZerosLeft, ZerosLeft, Symbol
+ M_PRINTF "Run=%d\n", Symbol
+ STRB Symbol, [pRun], #1
+ BGT RunLoop
+
+ ;// Decode runs while no zeros are left
+NoZerosLeft
+ SUBS Count, Count, #1
+ M_PRINTF "Run=%d\n", ZerosLeft
+ STRGEB ZerosLeft, [pRun], #1
+ BGT NoZerosLeft
+
+LastRun
+ ;// Final run length is remaining zeros
+ M_PRINTF "LastRun=%d\n", ZerosLeft
+ STRB ZerosLeft, [pRun], #1
+
+ ;// Write coefficients to output array
+ M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff
+ TEQ T1, #15
+ ADDEQ CoeffNum,CoeffNum,#1
+
+
+ SUB pRun,pRun,TotalCoeff
+ SUB pLevel,pLevel,TotalCoeff
+ SUB pLevel,pLevel,TotalCoeff
+
+ M_LDR ppPosCoefbuf, pppPosCoefbuf
+ LDR pPosCoefbuf, [ppPosCoefbuf]
+ TEQ T1, #4
+ LDREQ pZigZag, =armVCM4P10_ZigZag_2x2
+ LDRNE pZigZag, =armVCM4P10_ZigZag_4x4
+
+
+
+OutputLoop
+
+ LDRB T2, [pRun],#1
+ LDRB T1, [pZigZag, CoeffNum]
+ SUB CoeffNum, CoeffNum, #1 ;// Skip Non zero
+ SUB CoeffNum, CoeffNum, T2 ;// Skip Zero run
+
+ LDRSH T2, [pLevel],#2
+
+ SUBS TotalCoeff, TotalCoeff, #1
+ ORREQ T1, T1, #LAST_COEFF
+
+ ADD temp, T2, #128
+ CMP temp, #256
+ ORRCS T1, T1, #TWO_BYTE_COEFF
+
+
+ TEQ TotalCoeff, #0 ;// Preserves carry
+
+ M_PRINTF "Output=%02x %04x\n", T1, T2
+ STRB T1, [pPosCoefbuf], #1
+ STRB T2, [pPosCoefbuf], #1
+ MOV T2, T2, LSR #8
+ STRCSB T2, [pPosCoefbuf], #1
+ BNE OutputLoop
+
+ ;// Finished
+ STR pPosCoefbuf, [ppPosCoefbuf]
+ M_LDR ppBitStream, pppBitStream
+ M_LDR pOffset, ppOffset
+ B EndNoError
+
+EndBadSymbol
+ MOV r0, #OMX_Sts_Err
+ B End
+
+EndNoError
+ ;// Finished reading from the bitstream
+ M_BD_FINI ppBitStream, pOffset
+
+ ;// Set return value
+ MOV r0, #OMX_Sts_NoErr
+End
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s
new file mode 100644
index 0000000..b16f188
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s
@@ -0,0 +1,123 @@
+;//
+;//
+;// File Name: armVCM4P10_DequantTables_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_QPDivTable
+ EXPORT armVCM4P10_VMatrixQPModTable
+ EXPORT armVCM4P10_PosToVCol4x4
+ EXPORT armVCM4P10_PosToVCol2x2
+ EXPORT armVCM4P10_VMatrix
+ EXPORT armVCM4P10_QPModuloTable
+ EXPORT armVCM4P10_VMatrixU16
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS ARM1136JS
+
+
+;// Guarding implementation by the processor name
+
+
+ IF ARM1136JS :LOR: CortexA8
+
+
+ M_TABLE armVCM4P10_PosToVCol4x4
+ DCB 0, 2, 0, 2
+ DCB 2, 1, 2, 1
+ DCB 0, 2, 0, 2
+ DCB 2, 1, 2, 1
+
+
+ M_TABLE armVCM4P10_PosToVCol2x2
+ DCB 0, 2
+ DCB 2, 1
+
+
+ M_TABLE armVCM4P10_VMatrix
+ DCB 10, 16, 13
+ DCB 11, 18, 14
+ DCB 13, 20, 16
+ DCB 14, 23, 18
+ DCB 16, 25, 20
+ DCB 18, 29, 23
+
+;//-------------------------------------------------------
+;// This table evaluates the expression [(INT)(QP/6)],
+;// for values of QP from 0 to 51 (inclusive).
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPDivTable
+ DCB 0, 0, 0, 0, 0, 0
+ DCB 1, 1, 1, 1, 1, 1
+ DCB 2, 2, 2, 2, 2, 2
+ DCB 3, 3, 3, 3, 3, 3
+ DCB 4, 4, 4, 4, 4, 4
+ DCB 5, 5, 5, 5, 5, 5
+ DCB 6, 6, 6, 6, 6, 6
+ DCB 7, 7, 7, 7, 7, 7
+ DCB 8, 8, 8, 8, 8, 8
+
+;//----------------------------------------------------
+;// This table contains armVCM4P10_VMatrix[QP%6][0] entires,
+;// for values of QP from 0 to 51 (inclusive).
+;//----------------------------------------------------
+
+ M_TABLE armVCM4P10_VMatrixQPModTable
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+
+;//-------------------------------------------------------
+;// This table evaluates the modulus expression [QP%6]*6,
+;// for values of QP from 0 to 51 (inclusive).
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPModuloTable
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+
+;//-------------------------------------------------------
+;// This table contains the invidual byte values stored as
+;// halfwords. This avoids unpacking inside the function
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_VMatrixU16
+ DCW 10, 16, 13
+ DCW 11, 18, 14
+ DCW 13, 20, 16
+ DCW 14, 23, 18
+ DCW 16, 25, 20
+ DCW 18, 29, 23
+
+ ENDIF ;//ARM1136JS
+
+
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
new file mode 100644
index 0000000..82b9542
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
@@ -0,0 +1,236 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+ IF ARM1136JS
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 8
+iHeight RN 9
+
+;// Declare inner loop registers
+x RN 7
+x0 RN 7
+x1 RN 10
+x2 RN 11
+Scratch RN 12
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
+;// destination pointed by (pDst) for horizontal interpolation.
+;// This function needs to copy 9 bytes in horizontal direction.
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+
+ ;// Copy pDst to scratch
+ MOV Scratch, pDst
+
+StartAlignedStackCopy
+ AND x, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+ M_SWITCH x
+ M_CASE Copy0toAligned
+ M_CASE Copy1toAligned
+ M_CASE Copy2toAligned
+ M_CASE Copy3toAligned
+ M_ENDSWITCH
+
+Copy0toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy0toAligned
+ B CopyEnd
+
+Copy1toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #8
+ ORR x0, x0, x1, LSL #24
+ MOV x1, x1, LSR #8
+ ORR x1, x1, x2, LSL #24
+ MOV x2, x2, LSR #8
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy1toAligned
+ B CopyEnd
+
+Copy2toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #16
+ ORR x0, x0, x1, LSL #16
+ MOV x1, x1, LSR #16
+ ORR x1, x1, x2, LSL #16
+ MOV x2, x2, LSR #16
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy2toAligned
+ B CopyEnd
+
+Copy3toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #24
+ ORR x0, x0, x1, LSL #8
+ MOV x1, x1, LSR #24
+ ORR x1, x1, x2, LSL #8
+ MOV x2, x2, LSR #24
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy3toAligned
+
+CopyEnd
+
+ MOV pSrc, Scratch
+ MOV srcStep, #12
+
+ M_END
+
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+
+ ;// Copy pSrc to stack
+StartVAlignedStackCopy
+ AND x, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+
+ M_SWITCH x
+ M_CASE Copy0toVAligned
+ M_CASE Copy1toVAligned
+ M_CASE Copy2toVAligned
+ M_CASE Copy3toVAligned
+ M_ENDSWITCH
+
+Copy0toVAligned
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy0toVAligned
+ B CopyVEnd
+
+Copy1toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #24
+ ORR x0, x1, x0, LSR #8
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy1toVAligned
+ B CopyVEnd
+
+Copy2toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #16
+ ORR x0, x1, x0, LSR #16
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy2toVAligned
+ B CopyVEnd
+
+Copy3toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #8
+ ORR x0, x1, x0, LSR #24
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy3toVAligned
+
+CopyVEnd
+
+ SUB pSrc, pDst, #28
+ MOV srcStep, #4
+
+ M_END
+
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
new file mode 100644
index 0000000..bc0b6ec
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
@@ -0,0 +1,149 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst)
+;//
+;// Registers preserved for top level function
+;// r1,r3,r4,r5,r6,r7,r10,r11,r14
+;//
+;// Registers modified by the function
+;// r0,r2,r8,r9,r12
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare other intermediate registers
+x0 RN 4
+x1 RN 5
+x2 RN 8
+x3 RN 9
+Temp RN 12
+
+ IF ARM1136JS
+
+ M_START armVCM4P10_InterpolateLuma_Copy4x4_unsafe, r6
+
+Copy4x4Start
+ ;// Do Copy and branch to EndOfInterpolation
+ AND Temp, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+ M_SWITCH Temp
+ M_CASE Copy4x4Align0
+ M_CASE Copy4x4Align1
+ M_CASE Copy4x4Align2
+ M_CASE Copy4x4Align3
+ M_ENDSWITCH
+
+Copy4x4Align0
+ M_LDR x0, [pSrc], srcStep
+ M_LDR x1, [pSrc], srcStep
+ M_STR x0, [pDst], dstStep
+ M_LDR x2, [pSrc], srcStep
+ M_STR x1, [pDst], dstStep
+ M_LDR x3, [pSrc], srcStep
+ M_STR x2, [pDst], dstStep
+ M_STR x3, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4Align1
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #8
+ ORR x0, x0, x1, LSL #24
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #8
+ ORR x2, x2, x3, LSL #24
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ M_STR x2, [pDst], dstStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #8
+ ORR x0, x0, x1, LSL #24
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #8
+ ORR x2, x2, x3, LSL #24
+ M_STR x2, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4Align2
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #16
+ ORR x0, x0, x1, LSL #16
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #16
+ ORR x2, x2, x3, LSL #16
+ M_STR x2, [pDst], dstStep
+
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #16
+ ORR x0, x0, x1, LSL #16
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #16
+ ORR x2, x2, x3, LSL #16
+ M_STR x2, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4Align3
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #24
+ ORR x0, x0, x1, LSL #8
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #24
+ ORR x2, x2, x3, LSL #8
+ M_STR x2, [pDst], dstStep
+
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #24
+ ORR x0, x0, x1, LSL #8
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #24
+ ORR x2, x2, x3, LSL #8
+ M_STR x2, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4End
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
new file mode 100644
index 0000000..66cfe5e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
@@ -0,0 +1,178 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+
+;// Functions:
+;// armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
+;// armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+;//
+;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
+;// This will do the convertion of data from 16 bit to 8 bit and it also
+;// remove offset and check for saturation.
+;//
+;// Registers used as input for this function
+;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the destination location
+;// r1 - step size to this destination location
+
+
+DEBUG_ON SETL {FALSE}
+
+MASK EQU 0x80808080 ;// Mask is used to implement (a+b+1)/2
+
+;// Declare input registers
+
+pSrc0 RN 0
+srcStep0 RN 1
+
+;// Declare other intermediate registers
+Temp1 RN 4
+Temp2 RN 5
+Temp3 RN 10
+Temp4 RN 11
+pBuf RN 7
+r0x0fe00fe0 RN 6
+r0x00ff00ff RN 12
+Count RN 14
+ValueA0 RN 10
+ValueA1 RN 11
+
+ IF ARM1136JS
+
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6
+
+ ;// Code start
+ MOV Count, #4
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ LDR r0x00ff00ff, =0x00ff00ff
+LoopStart1
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8
+ ORR ValueA0, Temp1, Temp2, LSL #8
+ SUBS Count, Count, #1
+ STRD ValueA0, [pBuf], #8
+ BGT LoopStart1
+End1
+ SUB pSrc0, pBuf, #32
+ MOV srcStep0, #8
+
+ M_END
+
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
+
+ ;// Code start
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ LDR r0x00ff00ff, =0x00ff00ff
+ MOV Count, #2
+
+LoopStart
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0]
+ ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0]
+
+ PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0]
+
+ STR Temp1, [pBuf], #8
+ PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2]
+ STR Temp2, [pBuf], #-4
+
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0]
+ ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0]
+
+ PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0]
+ SUBS Count, Count, #1
+ STR Temp1, [pBuf], #8
+ PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2]
+ STR Temp2, [pBuf], #4
+
+ BGT LoopStart
+End2
+ SUB pSrc0, pBuf, #32-8
+ MOV srcStep0, #4
+
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
new file mode 100644
index 0000000..851ff6a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
@@ -0,0 +1,296 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IF ARM1136JS
+
+
+ M_ALLOC8 ppDstArgs, 8
+ M_ALLOC8 pTempResult1, 8
+ M_ALLOC8 pTempResult2, 8
+ M_ALLOC4 ppSrc, 4
+ M_ALLOC4 ppDst, 4
+ M_ALLOC4 pDstStep, 4
+ M_ALLOC4 pSrcStep, 4
+ M_ALLOC4 pCounter, 4
+
+ ;// Function header
+ ;// Function:
+ ;// armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ ;//
+ ;// Implements diagonal interpolation for a block of size 4x4. Input and output should
+ ;// be aligned.
+ ;//
+ ;// Registers used as input for this function
+ ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer
+ ;//
+ ;// Registers preserved for top level function
+ ;// r0,r1,r2,r3,r4,r5,r6,r14
+ ;//
+ ;// Registers modified by the function
+ ;// r7,r8,r9,r10,r11,r12
+ ;//
+ ;// Output registers
+ ;// None. Function will preserve r0-r3
+
+ M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r6
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare inner loop registers
+Acc0 RN 4
+Acc1 RN 5
+Acc2 RN 6
+Acc3 RN 7
+
+ValA RN 4
+ValB RN 5
+ValC RN 6
+ValD RN 7
+ValE RN 8
+ValF RN 9
+ValG RN 12
+ValH RN 14
+ValI RN 1
+
+Temp1 RN 3
+Temp2 RN 1
+Temp3 RN 12
+Temp4 RN 7
+Temp5 RN 5
+r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset
+Counter RN 11
+pInterBuf RN 8
+
+ValCA RN 8
+ValDB RN 9
+ValGE RN 10
+ValHF RN 11
+r0x00140001 RN 12
+r0x0014fffb RN 14
+
+r0x0001fc00 RN 11
+
+Accx RN 8
+Accy RN 9
+Temp6 RN 14
+
+ M_STRD pDst, dstStep, ppDstArgs
+
+ MOV pDst, pInterBuf
+ MOV dstStep, #16
+
+ ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)]
+ MOV Counter, #4
+ M_STR dstStep, pDstStep
+ M_STR srcStep, pSrcStep
+ LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results
+
+HeightLoop
+NextTwoRowsLoop
+ LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1]
+ LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0]
+ LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1]
+ LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0]
+ LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1]
+ LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0]
+
+ PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0]
+ PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0]
+ UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255]
+ UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255]
+ PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0]
+ PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0]
+ PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0]
+ UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255]
+
+ ;// Calculate Acc0
+ ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
+ UXTAB16 Temp1, ValC, ValD, ROR #8
+ UXTAB16 Temp3, ValE, ValB, ROR #8
+ RSB Temp1, Temp3, Temp1, LSL #2
+ UXTAB16 Acc0, ValA, ValF, ROR #8
+ ADD Temp1, Temp1, Temp1, LSL #2
+ ADD Acc0, Acc0, Temp1
+
+ ;// Calculate Acc1
+ ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
+ UXTAB16 Temp1, ValE, ValD, ROR #8
+ UXTAB16 Temp3, ValC, ValF, ROR #8
+ RSB Temp1, Temp3, Temp1, LSL #2
+ UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255]
+ ADD Temp1, Temp1, Temp1, LSL #2
+ UXTAB16 Acc1, ValG, ValB, ROR #8
+ ADD Acc1, Acc1, Temp1
+
+ UXTAB16 Acc2, ValC, ValH, ROR #8
+ ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255]
+
+ ;// Calculate Acc2
+ ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
+ UXTAB16 Temp1, ValG, ValD, ROR #8
+ UXTAB16 Acc3, ValI, ValD, ROR #8
+ UXTAB16 Temp2, ValE, ValF, ROR #8
+
+ RSB Temp1, Temp1, Temp2, LSL #2
+ UXTAB16 Temp2, ValG, ValF, ROR #8
+ ADD Temp1, Temp1, Temp1, LSL #2
+ ADD Acc2, Acc2, Temp1
+
+ ;// Calculate Acc3
+ ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
+ UXTAB16 Temp1, ValE, ValH, ROR #8
+ RSB Temp1, Temp1, Temp2, LSL #2
+ ADD Temp1, Temp1, Temp1, LSL #2
+ ADD Acc3, Acc3, Temp1
+
+ M_LDR dstStep, pDstStep
+ M_LDR srcStep, pSrcStep
+
+ ;// If Counter is even store Acc0-Acc3 in a temporary buffer
+ ;// If Counter is off store Acc0-Acc3 and previous Acc0-Acc3 in a intermediate buf
+ ANDS Temp3, Counter, #1
+ BEQ NoProcessing
+
+ ;// Packing previous and current Acc0-Acc3 values
+ M_LDRD Accx, Accy, pTempResult1
+ PKHBT Temp6, Accx, Acc0, LSL #16 ;//[0 a2 0 a0] = [0 a3 0 a2] [0 a1 0 a0]
+ PKHTB Acc0, Acc0, Accx, ASR #16 ;//[0 a3 0 a1] = [0 a1 0 a0] [0 a3 0 a2]
+ STR Acc0, [pDst, dstStep]
+ STR Temp6, [pDst], #4
+ PKHBT Temp6, Accy, Acc1, LSL #16 ;//[0 b2 0 b0] = [0 b3 0 b2] [0 b1 0 b0]
+ PKHTB Acc1, Acc1, Accy, ASR #16 ;//[0 b3 0 b1] = [0 b1 0 b0] [0 b3 0 b2]
+ M_LDRD Accx, Accy, pTempResult2
+ STR Acc1, [pDst, dstStep]
+ STR Temp6, [pDst], #4
+
+ PKHBT Temp6, Accx, Acc2, LSL #16 ;//[0 c2 0 c0] = [0 c3 0 c2] [0 c1 0 c0]
+ PKHTB Acc2, Acc2, Accx, ASR #16 ;//[0 c3 0 c1] = [0 c1 0 c0] [0 c3 0 c2]
+ STR Acc2, [pDst, dstStep]
+ STR Temp6, [pDst], #4
+ PKHBT Temp6, Accy, Acc3, LSL #16 ;//[0 d2 0 d0] = [0 d3 0 d2] [0 d1 0 d0]
+ PKHTB Acc3, Acc3, Accy, ASR #16 ;//[0 d3 0 d1] = [0 d1 0 d0] [0 d3 0 d2]
+ STR Acc3, [pDst, dstStep]
+ STR Temp6, [pDst], #-12
+ ADD pDst, pDst, dstStep, LSL #1
+ B AfterStore
+
+NoProcessing
+ M_STRD Acc0, Acc1, pTempResult1
+ M_STRD Acc2, Acc3, pTempResult2
+AfterStore
+ SUBS Counter, Counter, #1 ;// Loop till height is 10
+ ADD pSrc, pSrc, srcStep, LSL #1
+ BPL HeightLoop
+
+ STR Acc0, [pDst], #4 ;//[0 a1 0 a0]
+ STR Acc1, [pDst], #4
+ STR Acc2, [pDst], #4
+ STR Acc3, [pDst], #-12
+
+ ;//
+ ;// Horizontal interpolation using multiplication
+ ;//
+
+ SUB pSrc, pDst, dstStep, LSL #2
+ MOV srcStep, #16
+ M_LDRD pDst, dstStep, ppDstArgs
+
+ MOV Counter, #4
+ LDR r0x0014fffb, =0x0014fffb
+ LDR r0x00140001, =0x00140001
+
+HeightLoop1
+ M_STR Counter, pCounter
+
+ M_LDR ValCA, [pSrc], srcStep ;// Load [0 c 0 a]
+ M_LDR ValDB, [pSrc], srcStep ;// Load [0 d 0 b]
+ M_LDR ValGE, [pSrc], srcStep ;// Load [0 g 0 e]
+ M_LDR ValHF, [pSrc], srcStep ;// Load [0 h 0 f]
+
+
+ ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e))
+ ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f))
+ ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g))
+ ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h))
+
+ SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1]
+ SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1]
+ SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5]
+ SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5]
+
+ SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5]
+ SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1]
+ SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1]
+ SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5]
+
+ SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5]
+ SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5]
+ SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1]
+ SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1]
+
+ LDRH ValCA, [pSrc], #4 ;// 8 = srcStep - 16
+ SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1]
+ SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5]
+ SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5]
+ SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1]
+
+ LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512
+ SUB Acc0, Acc0, r0x0001fc00
+ SUB Acc1, Acc1, r0x0001fc00
+ SUB Acc2, Acc2, r0x0001fc00
+ SUB Acc3, Acc3, r0x0001fc00
+
+ USAT Acc0, #18, Acc0
+ USAT Acc1, #18, Acc1
+ USAT Acc2, #18, Acc2
+ USAT Acc3, #18, Acc3
+
+ MOV Acc0, Acc0, LSR #10
+ M_STRB Acc0, [pDst], dstStep
+ MOV Acc1, Acc1, LSR #10
+ M_STRB Acc1, [pDst], dstStep
+ MOV Acc2, Acc2, LSR #10
+ M_STRB Acc2, [pDst], dstStep
+ MOV Acc3, Acc3, LSR #10
+ M_STRB Acc3, [pDst], dstStep
+
+
+ M_LDR Counter, pCounter
+ SUB pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, srcStep, LSL #2
+ ADD pDst, pDst, #1
+ SUBS Counter, Counter, #1
+ BGT HeightLoop1
+End
+ SUB pDst, pDst, #4
+ SUB pSrc, pSrc, #16
+
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
new file mode 100644
index 0000000..2f48e13
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
@@ -0,0 +1,276 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+ M_ALLOC8 ppDstArgs, 8
+ M_ALLOC4 ppSrc, 4
+ M_ALLOC4 ppDst, 4
+ M_ALLOC4 pCounter, 4
+
+ ;// Function header
+ ;// Function:
+ ;// armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ ;//
+ ;// Implements diagonal interpolation for a block of size 4x4. Input and output should
+ ;// be aligned.
+ ;//
+ ;// Registers used as input for this function
+ ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer
+ ;//
+ ;// Registers preserved for top level function
+ ;// r0,r1,r2,r3,r4,r5,r6,r14
+ ;//
+ ;// Registers modified by the function
+ ;// r7,r8,r9,r10,r11,r12
+ ;//
+ ;// Output registers
+ ;// None. Function will preserve r0-r3
+
+ M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r6
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare inner loop registers
+ValA RN 5
+ValA0 RN 4
+ValA1 RN 5
+ValAF0 RN 4
+ValAF1 RN 5
+
+ValB RN 11
+
+ValC RN 5
+ValC0 RN 4
+ValC1 RN 5
+ValCD0 RN 12
+ValCD1 RN 14
+ValCF0 RN 4
+ValCF1 RN 5
+
+ValD RN 10
+
+ValE RN 7
+ValE0 RN 6
+ValE1 RN 7
+ValEB0 RN 10
+ValEB1 RN 11
+ValED0 RN 6
+ValED1 RN 7
+
+ValF RN 10
+
+ValG RN 14
+ValG0 RN 12
+ValG1 RN 14
+ValGB0 RN 12
+ValGB1 RN 14
+
+Acc0 RN 4
+Acc1 RN 5
+Acc2 RN 6
+Acc3 RN 7
+
+Temp RN 7
+Step RN 6
+
+pInterBuf RN 8
+Counter RN 8
+r0x00ff00ff RN 9 ;// [0 255 0 255] where 255 is offset
+r0x0001fc00 RN 10 ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+
+
+;// Declare inner loop registers
+ValCA RN 8
+ValDB RN 9
+ValGE RN 10
+ValHF RN 11
+r0x00140001 RN 12
+r0x0014fffb RN 14
+
+r0x00000200 RN 12
+r0x000000ff RN 12
+
+ M_STRD pDst, dstStep, ppDstArgs
+ MOV pDst, pInterBuf
+ MOV dstStep, #24
+
+ ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)]
+ MOV Counter, #1
+ MOV Temp, #8
+ ADD Counter, Temp, Counter, LSL #8 ;// [0 0 H W]
+
+ LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results
+WidthLoop
+ M_STR pSrc, ppSrc
+ M_STR pDst, ppDst
+HeightLoop
+TwoRowsLoop
+ M_LDR ValC, [pSrc], srcStep ;// Load [c3 c2 c1 c0]
+ M_LDR ValD, [pSrc], srcStep ;// Load [d3 d2 d1 d0]
+ M_LDR ValE, [pSrc], srcStep ;// Load [e3 e2 e1 e0]
+ SUB pSrc, pSrc, srcStep, LSL #2
+ UXTAB16 ValC0, r0x00ff00ff, ValC ;// [0 c2 0 c0] + [0 255 0 255]
+ UXTAB16 ValC1, r0x00ff00ff, ValC, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255]
+ LDR ValB, [pSrc] ;// Load [b3 b2 b1 b0]
+ UXTAB16 ValE0, r0x00ff00ff, ValE ;// [0 e2 0 e0] + [0 255 0 255]
+ UXTAB16 ValE1, r0x00ff00ff, ValE, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255]
+ UXTAB16 ValCD0, ValC0, ValD ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0]
+ UXTAB16 ValCD1, ValC1, ValD, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1]
+ UXTAB16 ValEB0, ValE0, ValB ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0]
+ RSB ValCD0, ValEB0, ValCD0, LSL #2 ;// 4*(Off+C+D) - (Off+B+E)
+
+ LDR ValD, [pSrc, srcStep, LSL #1] ;// Load [d3 d2 d1 d0]
+ UXTAB16 ValEB1, ValE1, ValB, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1]
+ RSB ValCD1, ValEB1, ValCD1, LSL #2
+
+ UXTAB16 ValED0, ValE0, ValD ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0]
+ UXTAB16 ValED1, ValE1, ValD, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1]
+ LDR ValF, [pSrc, srcStep, LSL #2] ;// Load [f3 f2 f1 f0]
+ M_LDR ValB, [pSrc], srcStep ;// Load [b3 b2 b1 b0]
+ ADD ValCD0, ValCD0, ValCD0, LSL #2 ;// 5 * [4*(Off+C+D) - (Off+B+E)]
+ ADD ValCD1, ValCD1, ValCD1, LSL #2
+ UXTAB16 ValCF1, ValC1, ValF, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1]
+ UXTAB16 ValCF0, ValC0, ValF ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0]
+ RSB ValED1, ValCF1, ValED1, LSL #2
+
+ SUB ValA, pSrc, srcStep, LSL #1
+ LDR ValA, [ValA] ;// Load [a3 a2 a1 a0]
+ RSB ValED0, ValCF0, ValED0, LSL #2 ;// 4*(Off+E+D) - (Off+C+F)
+ ADD ValED1, ValED1, ValED1, LSL #2
+ ADD ValED0, ValED0, ValED0, LSL #2 ;// 5 * [4*(Off+E+D) - (Off+C+F)]
+ UXTAB16 ValA0, r0x00ff00ff, ValA ;// [0 a2 0 a0] + [0 255 0 255]
+ UXTAB16 ValA1, r0x00ff00ff, ValA, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255]
+ UXTAB16 ValAF0, ValA0, ValF ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0]
+ UXTAB16 ValAF1, ValA1, ValF, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1]
+ ADD Acc1, ValCD1, ValAF1
+
+ LDR ValG, [pSrc, srcStep, LSL #2] ;// Load [g3 g2 g1 g0]
+ ADD Acc0, ValCD0, ValAF0 ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E)
+ STR Acc1, [pDst, #4] ;// Store result & adjust pointer
+ M_STR Acc0, [pDst], dstStep ;// Store result & adjust pointer
+ UXTAB16 ValG0, r0x00ff00ff, ValG ;// [0 g2 0 g0] + [0 255 0 255]
+ UXTAB16 ValG1, r0x00ff00ff, ValG, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255]
+ UXTAB16 ValGB0, ValG0, ValB ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0]
+ UXTAB16 ValGB1, ValG1, ValB, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1]
+ ADD Acc2, ValED0, ValGB0 ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F)
+ ADD Acc3, ValED1, ValGB1
+
+ STR Acc3, [pDst, #4] ;// Store result & adjust pointer
+ M_STR Acc2, [pDst], dstStep ;// Store result & adjust pointer
+
+ SUBS Counter, Counter, #1 << 8 ;// Loop till height is 10
+ ADD pSrc, pSrc, srcStep, LSL #1
+ BPL HeightLoop
+
+ M_LDR pSrc, ppSrc
+ M_LDR pDst, ppDst
+ ADDS Counter, Counter, #(1 << 8)-4 ;// Loop till width is 12
+ ADD pSrc, pSrc, #4
+ ADD pDst, pDst, #8
+ ADD Counter, Counter, #1<<8
+ BPL WidthLoop
+
+ ;//
+ ;// Horizontal interpolation using multiplication
+ ;//
+
+ SUB pSrc, pDst, #24
+ MOV srcStep, #24
+ M_LDRD pDst, dstStep, ppDstArgs
+
+ MOV Counter, #4
+ LDR r0x0014fffb, =0x0014fffb
+ LDR r0x00140001, =0x00140001
+
+HeightLoop1
+ M_STR Counter, pCounter
+
+
+ LDR ValCA, [pSrc], #4 ;// Load [0 c 0 a]
+ LDR ValDB, [pSrc], #4 ;// Load [0 d 0 b]
+ LDR ValGE, [pSrc], #4 ;// Load [0 g 0 e]
+ LDR ValHF, [pSrc], #4 ;// Load [0 h 0 f]
+
+ ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e))
+ ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f))
+ ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g))
+ ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h))
+ SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1]
+ SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1]
+ SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5]
+ SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5]
+
+ SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5]
+ SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1]
+ SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1]
+ SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5]
+
+ SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5]
+ SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5]
+ SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1]
+ SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1]
+
+ LDRH ValCA, [pSrc], #8 ;// 8 = srcStep - 16
+ SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1]
+ SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5]
+ SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5]
+ SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1]
+
+ LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512
+ SUB Acc0, Acc0, r0x0001fc00
+ SUB Acc1, Acc1, r0x0001fc00
+ SUB Acc2, Acc2, r0x0001fc00
+ SUB Acc3, Acc3, r0x0001fc00
+
+ USAT Acc0, #18, Acc0
+ USAT Acc1, #18, Acc1
+ USAT Acc2, #18, Acc2
+ USAT Acc3, #18, Acc3
+
+ MOV Acc0, Acc0, LSR #10
+ MOV Acc1, Acc1, LSR #10
+ MOV Acc2, Acc2, LSR #10
+ MOV Acc3, Acc3, LSR #10
+
+ M_LDR Counter, pCounter
+ ORR Acc0, Acc0, Acc1, LSL #8
+ ORR Acc2, Acc2, Acc3, LSL #8
+ SUBS Counter, Counter, #1
+ ORR Acc0, Acc0, Acc2, LSL #16
+ M_STR Acc0, [pDst], dstStep
+ BGT HeightLoop1
+End
+ SUB pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, srcStep, LSL #2
+
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
new file mode 100644
index 0000000..6690ced
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
@@ -0,0 +1,239 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+
+ IF ARM1136JS
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+;//
+;// Implements horizontal interpolation for a block of size 4x4. Input and output should
+;// be aligned.
+;//
+;// Registers used as input for this function
+;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size
+;//
+;// Registers preserved for top level function
+;// r0,r1,r2,r3,r4,r5,r6,r14
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// None. Function will preserve r0-r3
+
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare inner loop registers
+Acc0 RN 4
+Acc1 RN 5
+Acc2 RN 6
+Acc3 RN 7
+
+ValA RN 4
+ValB RN 5
+ValC RN 6
+ValD RN 7
+ValE RN 8
+ValF RN 9
+ValG RN 12
+ValH RN 14
+ValI RN 1
+
+Temp1 RN 3
+Temp2 RN 1
+Temp3 RN 12
+Temp4 RN 7
+Temp5 RN 5
+r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset
+Counter RN 11
+
+Height RN 3
+
+ M_ALLOC4 pDstStep, 4
+ M_ALLOC4 pSrcStep, 4
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6
+
+ MOV Counter, #2
+ M_STR dstStep, pDstStep
+ M_STR srcStep, pSrcStep
+ LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results
+
+NextTwoRowsLoop
+ LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1]
+ LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0]
+ LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1]
+ LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0]
+ LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1]
+ LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0]
+
+ PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0]
+ PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0]
+ UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255]
+ UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255]
+ PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0]
+ PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0]
+ PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0]
+ UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255]
+
+ ;// Calculate Acc0
+ ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
+ UXTAB16 Temp1, ValC, ValD, ROR #8
+ UXTAB16 Temp3, ValE, ValB, ROR #8
+ RSB Temp1, Temp3, Temp1, LSL #2
+ UXTAB16 Acc0, ValA, ValF, ROR #8
+ ADD Temp1, Temp1, Temp1, LSL #2
+ ADD Acc0, Acc0, Temp1
+
+ ;// Calculate Acc1
+ ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
+ UXTAB16 Temp1, ValE, ValD, ROR #8
+ UXTAB16 Temp3, ValC, ValF, ROR #8
+ RSB Temp1, Temp3, Temp1, LSL #2
+ UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255]
+ ADD Temp1, Temp1, Temp1, LSL #2
+ UXTAB16 Acc1, ValG, ValB, ROR #8
+ ADD Acc1, Acc1, Temp1
+
+ LDR r0x0fe00fe0, =0x0fe00fe0 ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255
+ UXTAB16 Acc2, ValC, ValH, ROR #8
+ ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255]
+ UQSUB16 Acc0, Acc0, r0x0fe00fe0
+ UQSUB16 Acc1, Acc1, r0x0fe00fe0
+ USAT16 Acc0, #13, Acc0
+ USAT16 Acc1, #13, Acc1
+
+ ;// Calculate Acc2
+ ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
+ UXTAB16 Temp1, ValG, ValD, ROR #8
+ UXTAB16 Acc3, ValI, ValD, ROR #8
+ UXTAB16 Temp2, ValE, ValF, ROR #8
+ AND Acc1, r0x00ff00ff, Acc1, LSR #5
+ AND Acc0, r0x00ff00ff, Acc0, LSR #5
+ ORR Acc0, Acc0, Acc1, LSL #8
+ RSB Temp5, Temp1, Temp2, LSL #2
+ UXTAB16 Temp2, ValG, ValF, ROR #8
+ ADD Temp5, Temp5, Temp5, LSL #2
+ ADD Acc2, Acc2, Temp5
+
+ ;// Calculate Acc3
+ ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
+ UXTAB16 Temp5, ValE, ValH, ROR #8
+ RSB Temp5, Temp5, Temp2, LSL #2
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ ADD Temp5, Temp5, Temp5, LSL #2
+ ADD Acc3, Acc3, Temp5
+
+ UQSUB16 Acc3, Acc3, r0x0fe00fe0
+ UQSUB16 Acc2, Acc2, r0x0fe00fe0
+ USAT16 Acc3, #13, Acc3
+ USAT16 Acc2, #13, Acc2
+
+ M_LDR dstStep, pDstStep
+ AND Acc3, r0x00ff00ff, Acc3, LSR #5
+ AND Acc2, r0x00ff00ff, Acc2, LSR #5
+ ORR Acc2, Acc2, Acc3, LSL #8
+
+ SUBS Counter, Counter, #1
+ M_LDR srcStep, pSrcStep
+ PKHBT Acc1, Acc0, Acc2, LSL #16
+ M_STR Acc1, [pDst], dstStep ;// Store result1
+ PKHTB Acc2, Acc2, Acc0, ASR #16
+ M_STR Acc2, [pDst], dstStep ;// Store result2
+ ADD pSrc, pSrc, srcStep, LSL #1
+
+ BGT NextTwoRowsLoop
+End
+ SUB pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, srcStep, LSL #2
+
+ M_END
+
+ ENDIF
+
+ END
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
new file mode 100644
index 0000000..007cd0d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
@@ -0,0 +1,185 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+
+
+
+ IF ARM1136JS
+
+ ;// Function header
+
+ ;// Function:
+ ;// armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ ;//
+ ;// Implements vertical interpolation for a block of size 4x4. Input and output should
+ ;// be aligned.
+ ;//
+ ;// Registers used as input for this function
+ ;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size
+ ;//
+ ;// Registers preserved for top level function
+ ;// r0,r1,r2,r3,r4,r5,r6,r14
+ ;//
+ ;// Registers modified by the function
+ ;// r7,r8,r9,r10,r11,r12
+ ;//
+ ;// Output registers
+ ;// None. Function will preserve r0-r3
+ M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r6
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare inner loop registers
+ValA RN 5
+ValA0 RN 4
+ValA1 RN 5
+ValAF0 RN 4
+ValAF1 RN 5
+
+ValB RN 11
+
+ValC RN 5
+ValC0 RN 4
+ValC1 RN 5
+ValCD0 RN 12
+ValCD1 RN 14
+ValCF0 RN 4
+ValCF1 RN 5
+
+ValD RN 10
+
+ValE RN 7
+ValE0 RN 6
+ValE1 RN 7
+ValEB0 RN 10
+ValEB1 RN 11
+ValED0 RN 6
+ValED1 RN 7
+
+ValF RN 10
+
+ValG RN 14
+ValG0 RN 12
+ValG1 RN 14
+ValGB0 RN 12
+ValGB1 RN 14
+
+Acc0 RN 4
+Acc1 RN 5
+Acc2 RN 6
+Acc3 RN 7
+
+Temp RN 7
+Height RN 3
+Step RN 6
+
+Counter RN 8
+r0x00ff00ff RN 9 ;// [0 255 0 255] where 255 is offset
+r0x0fe00fe0 RN 10 ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+
+
+ LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results
+ MOV Counter, #2
+
+TwoRowsLoop
+ M_LDR ValC, [pSrc], srcStep ;// Load [c3 c2 c1 c0]
+ M_LDR ValD, [pSrc], srcStep ;// Load [d3 d2 d1 d0]
+ M_LDR ValE, [pSrc], srcStep ;// Load [e3 e2 e1 e0]
+ SUB pSrc, pSrc, srcStep, LSL #2
+ LDR ValB, [pSrc] ;// Load [b3 b2 b1 b0]
+ UXTAB16 ValC0, r0x00ff00ff, ValC ;// [0 c2 0 c0] + [0 255 0 255]
+ UXTAB16 ValC1, r0x00ff00ff, ValC, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255]
+
+ UXTAB16 ValE0, r0x00ff00ff, ValE ;// [0 e2 0 e0] + [0 255 0 255]
+ UXTAB16 ValE1, r0x00ff00ff, ValE, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255]
+ UXTAB16 ValCD0, ValC0, ValD ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0]
+ UXTAB16 ValCD1, ValC1, ValD, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1]
+ UXTAB16 ValEB0, ValE0, ValB ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0]
+ RSB ValCD0, ValEB0, ValCD0, LSL #2 ;// 4*(Off+C+D) - (Off+B+E)
+
+ LDR ValD, [pSrc, srcStep, LSL #1] ;// Load [d3 d2 d1 d0]
+ UXTAB16 ValEB1, ValE1, ValB, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1]
+ RSB ValCD1, ValEB1, ValCD1, LSL #2
+ ;// One cycle stall
+ UXTAB16 ValED0, ValE0, ValD ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0]
+ UXTAB16 ValED1, ValE1, ValD, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1]
+
+ LDR ValF, [pSrc, srcStep, LSL #2] ;// Load [f3 f2 f1 f0]
+ M_LDR ValB, [pSrc], srcStep ;// Load [b3 b2 b1 b0]
+ ADD ValCD0, ValCD0, ValCD0, LSL #2 ;// 5 * [4*(Off+C+D) - (Off+B+E)]
+ ADD ValCD1, ValCD1, ValCD1, LSL #2
+ UXTAB16 ValCF1, ValC1, ValF, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1]
+ UXTAB16 ValCF0, ValC0, ValF ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0]
+ RSB ValED1, ValCF1, ValED1, LSL #2
+
+ SUB ValA, pSrc, srcStep, LSL #1
+ LDR ValA, [ValA] ;// Load [a3 a2 a1 a0]
+ RSB ValED0, ValCF0, ValED0, LSL #2 ;// 4*(Off+E+D) - (Off+C+F)
+ ADD ValED1, ValED1, ValED1, LSL #2
+ ADD ValED0, ValED0, ValED0, LSL #2 ;// 5 * [4*(Off+E+D) - (Off+C+F)]
+ UXTAB16 ValA0, r0x00ff00ff, ValA ;// [0 a2 0 a0] + [0 255 0 255]
+ UXTAB16 ValA1, r0x00ff00ff, ValA, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255]
+ UXTAB16 ValAF0, ValA0, ValF ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0]
+ UXTAB16 ValAF1, ValA1, ValF, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1]
+
+ LDR r0x0fe00fe0, =0x0fe00fe0 ;// [0 255 0 255] 255 is offset to avoid negative results
+ ADD Acc1, ValCD1, ValAF1
+
+ LDR ValG, [pSrc, srcStep, LSL #2] ;// Load [g3 g2 g1 g0]
+ ADD Acc0, ValCD0, ValAF0 ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E)
+ UQSUB16 Acc1, Acc1, r0x0fe00fe0 ;// Acc1 -= (16*Off - 16)
+ UQSUB16 Acc0, Acc0, r0x0fe00fe0
+ UXTAB16 ValG0, r0x00ff00ff, ValG ;// [0 g2 0 g0] + [0 255 0 255]
+ UXTAB16 ValG1, r0x00ff00ff, ValG, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255]
+ UXTAB16 ValGB0, ValG0, ValB ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0]
+ UXTAB16 ValGB1, ValG1, ValB, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1]
+ ADD Acc2, ValED0, ValGB0 ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F)
+ ADD Acc3, ValED1, ValGB1
+ UQSUB16 Acc3, Acc3, r0x0fe00fe0 ;// Acc3 -= (16*Off - 16)
+ UQSUB16 Acc2, Acc2, r0x0fe00fe0
+ USAT16 Acc1, #13, Acc1 ;// Saturate to 8+5 = 13 bits
+ USAT16 Acc0, #13, Acc0
+ USAT16 Acc3, #13, Acc3
+ USAT16 Acc2, #13, Acc2
+ AND Acc1, r0x00ff00ff, Acc1, LSR #5 ;// [0 a3 0 a1]
+ AND Acc0, r0x00ff00ff, Acc0, LSR #5 ;// [0 a2 0 a0]
+ ORR Acc0, Acc0, Acc1, LSL #8 ;// [a3 a2 a1 a0]
+ AND Acc3, r0x00ff00ff, Acc3, LSR #5 ;// [0 b3 0 b1]
+ AND Acc2, r0x00ff00ff, Acc2, LSR #5 ;// [0 b2 0 b0]
+
+ M_STR Acc0, [pDst], dstStep ;// Store result & adjust pointer
+ ORR Acc2, Acc2, Acc3, LSL #8 ;// [b3 b2 b1 b0]
+ M_STR Acc2, [pDst], dstStep ;// Store result & adjust pointer
+ ADD pSrc, pSrc, srcStep, LSL #1
+
+ SUBS Counter, Counter, #1
+ BGT TwoRowsLoop
+End
+ SUB pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, srcStep, LSL #2
+
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
new file mode 100644
index 0000000..b1ad17c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
@@ -0,0 +1,273 @@
+;//
+;//
+;// File Name: armVCM4P10_Interpolate_Chroma_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ IF ARM1136JS
+
+;// input registers
+
+pSrc RN 0
+iSrcStep RN 1
+pDst RN 2
+iDstStep RN 3
+iWidth RN 4
+iHeight RN 5
+dx RN 6
+dy RN 7
+
+
+;// local variable registers
+temp RN 11
+r0x20 RN 12
+tmp0x20 RN 14
+return RN 0
+dxPlusdy RN 10
+EightMinusdx RN 8
+EightMinusdy RN 9
+dxEightMinusdx RN 8
+BACoeff RN 6
+DCCoeff RN 7
+
+iDstStepx2MinusWidth RN 8
+iSrcStepx2MinusWidth RN 9
+iSrcStep1 RN 10
+
+pSrc1 RN 1
+pSrc2 RN 8
+pDst1 RN 8
+pDst2 RN 12
+
+pix00 RN 8
+pix01 RN 9
+pix10 RN 10
+pix11 RN 11
+
+Out0100 RN 8
+Out1110 RN 10
+
+x00 RN 8
+x01 RN 10
+x02 RN 12
+x10 RN 9
+x11 RN 11
+x12 RN 14
+x20 RN 10
+x21 RN 12
+x22 RN 14
+
+x01x00 RN 8
+x02x01 RN 10
+x11x10 RN 9
+x12x11 RN 11
+x21x20 RN 10
+x22x21 RN 12
+
+OutRow00 RN 12
+OutRow01 RN 14
+OutRow10 RN 10
+OutRow11 RN 12
+
+OutRow0100 RN 12
+OutRow1110 RN 12
+
+;//-----------------------------------------------------------------------------------------------
+;// armVCM4P10_Interpolate_Chroma_asm starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START armVCM4P10_Interpolate_Chroma, r11
+
+ ;// Define stack arguments
+ M_ARG Width, 4
+ M_ARG Height, 4
+ M_ARG Dx, 4
+ M_ARG Dy, 4
+
+ ;// Load argument from the stack
+ ;// M_STALL ARM1136JS=4
+
+ M_LDR iWidth, Width
+ M_LDR iHeight, Height
+ M_LDR dx, Dx
+ M_LDR dy, Dy
+
+ ;// EightMinusdx = 8 - dx
+ ;// EightMinusdy = 8 - dy
+
+ ;// ACoeff = EightMinusdx * EightMinusdy
+ ;// BCoeff = dx * EightMinusdy
+ ;// CCoeff = EightMinusdx * dy
+ ;// DCoeff = dx * dy
+
+ ADD pSrc1, pSrc, iSrcStep
+ SUB temp, iWidth, #1
+ RSB EightMinusdx, dx, #8
+ RSB EightMinusdy, dy, #8
+ CMN dx,dy
+ ADD dxEightMinusdx, EightMinusdx, dx, LSL #16
+ ORR iWidth, iWidth, temp, LSL #16
+
+ ;// Packed Coeffs.
+
+ MUL BACoeff, dxEightMinusdx, EightMinusdy
+ MUL DCCoeff, dxEightMinusdx, dy
+
+
+ ;// Checking either of dx and dy being non-zero
+
+ BEQ MVIsZero
+
+;// Pixel layout:
+;//
+;// x00 x01 x02
+;// x10 x11 x12
+;// x20 x21 x22
+
+;// If fractionl mv is not (0, 0)
+
+OuterLoopMVIsNotZero
+
+InnerLoopMVIsNotZero
+
+ LDRB x00, [pSrc, #+0]
+ LDRB x10, [pSrc1, #+0]
+ LDRB x01, [pSrc, #+1]
+ LDRB x11, [pSrc1, #+1]
+ LDRB x02, [pSrc, #+2]!
+ LDRB x12, [pSrc1, #+2]!
+
+ ORR x01x00, x00, x01, LSL #16
+ ;// M_STALL ARM1136JS=1
+ ORR x02x01, x01, x02, LSL #16
+ MOV r0x20, #32
+ ORR x11x10, x10, x11, LSL #16
+ ORR x12x11, x11, x12, LSL #16
+
+ SMLAD x01x00, x01x00, BACoeff, r0x20
+ SMLAD x02x01, x02x01, BACoeff, r0x20
+
+ ;// iWidth packed with MSB (top 16 bits)
+ ;// as inner loop counter value i.e
+ ;// (iWidth -1) and LSB (lower 16 bits)
+ ;// as original width
+
+ SUBS iWidth, iWidth, #1<<17
+
+ SMLAD OutRow00, x11x10, DCCoeff, x01x00
+ SMLAD OutRow01, x12x11, DCCoeff, x02x01
+
+ RSB pSrc2, pSrc, pSrc1, LSL #1
+
+ MOV OutRow00, OutRow00, LSR #6
+ MOV OutRow01, OutRow01, LSR #6
+
+ LDRB x20,[pSrc2, #-2]
+
+ ORR OutRow0100, OutRow00, OutRow01, LSL #8
+ STRH OutRow0100, [pDst], #2
+
+ LDRB x21,[pSrc2, #-1]
+ LDRB x22,[pSrc2, #+0]
+
+ ADD pDst1, pDst, iDstStep
+
+ ;// M_STALL ARM1136JS=1
+
+ ORR x21x20, x20, x21, LSL #16
+ ORR x22x21, x21, x22, LSL #16
+
+ MOV tmp0x20, #32
+
+ ;// Reusing the packed data x11x10 and x12x11
+
+ SMLAD x11x10, x11x10, BACoeff, tmp0x20
+ SMLAD x12x11, x12x11, BACoeff, tmp0x20
+ SMLAD OutRow10, x21x20, DCCoeff, x11x10
+ SMLAD OutRow11, x22x21, DCCoeff, x12x11
+
+ MOV OutRow10, OutRow10, LSR #6
+ MOV OutRow11, OutRow11, LSR #6
+
+ ;// M_STALL ARM1136JS=1
+
+ ORR OutRow1110, OutRow10, OutRow11, LSL #8
+
+ STRH OutRow1110, [pDst1, #-2]
+
+ BGT InnerLoopMVIsNotZero
+
+ SUBS iHeight, iHeight, #2
+ ADD iWidth, iWidth, #1<<16
+ RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
+ SUB iSrcStep1, pSrc1, pSrc
+ SUB temp, iWidth, #1
+ RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
+ ADD pDst, pDst, iDstStepx2MinusWidth
+ ADD pSrc1, pSrc1, iSrcStepx2MinusWidth
+ ADD pSrc, pSrc, iSrcStepx2MinusWidth
+ ORR iWidth, iWidth, temp, LSL #16
+ BGT OuterLoopMVIsNotZero
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+;// If fractionl mv is (0, 0)
+
+MVIsZero
+ ;// M_STALL ARM1136JS=4
+OuterLoopMVIsZero
+
+InnerLoopMVIsZero
+
+ LDRB pix00, [pSrc], #+1
+ LDRB pix01, [pSrc], #+1
+ LDRB pix10, [pSrc1], #+1
+ LDRB pix11, [pSrc1], #+1
+
+ ADD pDst2, pDst, iDstStep
+ SUBS iWidth, iWidth, #1<<17
+
+ ORR Out0100, pix00, pix01, LSL #8
+ ORR Out1110, pix10, pix11, LSL #8
+
+ STRH Out0100, [pDst], #2
+ STRH Out1110, [pDst2], #2
+
+ BGT InnerLoopMVIsZero
+
+ SUBS iHeight, iHeight, #2
+ ADD iWidth, iWidth, #1<<16
+ RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
+ SUB iSrcStep1, pSrc1, pSrc
+ SUB temp, iWidth, #1
+ RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
+ ADD pDst, pDst, iDstStepx2MinusWidth
+ ADD pSrc1, pSrc1, iSrcStepx2MinusWidth
+ ADD pSrc, pSrc, iSrcStepx2MinusWidth
+ ORR iWidth, iWidth, temp, LSL #16
+ BGT OuterLoopMVIsZero
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// ARM1136JS
+
+
+ END
+
+;//-----------------------------------------------------------------------------------------------
+;// armVCM4P10_Interpolate_Chroma_asm ends
+;//-----------------------------------------------------------------------------------------------
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s
new file mode 100644
index 0000000..f962f70
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s
@@ -0,0 +1,74 @@
+;//
+;//
+;// File Name: armVCM4P10_QuantTables_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;// Description:
+;// This file contains quantization tables
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+
+ EXPORT armVCM4P10_MFMatrixQPModTable
+ EXPORT armVCM4P10_QPDivIntraTable
+ EXPORT armVCM4P10_QPDivPlusOneTable
+
+;//--------------------------------------------------------------
+;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires,
+;// for values of iQP from 0 to 51 (inclusive).
+;//--------------------------------------------------------------
+
+ M_TABLE armVCM4P10_MFMatrixQPModTable
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+
+;//---------------------------------------------------------------
+;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values,
+;// for values of iQP from 0 to 51 (inclusive).
+;//---------------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPDivPlusOneTable
+ DCB 16, 16, 16, 16, 16, 16
+ DCB 17, 17, 17, 17, 17, 17
+ DCB 18, 18, 18, 18, 18, 18
+ DCB 19, 19, 19, 19, 19, 19
+ DCB 20, 20, 20, 20, 20, 20
+ DCB 21, 21, 21, 21, 21, 21
+ DCB 22, 22, 22, 22, 22, 22
+ DCB 23, 23, 23, 23, 23, 23
+ DCB 24, 24, 24, 24, 24, 24
+
+;//------------------------------------------------------------------
+;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) ,
+;// for values of iQP from 0 to 51 (inclusive).
+;//------------------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPDivIntraTable, 2
+ DCD 21845, 21845, 21845, 21845, 21845, 21845
+ DCD 43690, 43690, 43690, 43690, 43690, 43690
+ DCD 87381, 87381, 87381, 87381, 87381, 87381
+ DCD 174762, 174762, 174762, 174762, 174762, 174762
+ DCD 349525, 349525, 349525, 349525, 349525, 349525
+ DCD 699050, 699050, 699050, 699050, 699050, 699050
+ DCD 1398101, 1398101, 1398101, 1398101, 1398101, 1398101
+ DCD 2796202, 2796202, 2796202, 2796202, 2796202, 2796202
+ DCD 5592405, 5592405, 5592405, 5592405, 5592405, 5592405
+
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
new file mode 100644
index 0000000..241d188
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
@@ -0,0 +1,407 @@
+;//
+;//
+;// File Name: armVCM4P10_TransformResidual4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;// Transform Residual 4x4 Coefficients
+;//
+;//
+
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+;// Import symbols required from other files
+;// (For example tables)
+
+
+
+
+;// Set debugging level
+;//DEBUG_ON SETL {TRUE}
+
+
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+;//Input Registers
+pDst RN 0
+pSrc RN 1
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+
+;// Packed Input pixels
+in00 RN 2 ;// Src[0] & Src[1]
+in02 RN 3 ;// Src[2] & Src[3]
+in10 RN 4 ;// Src[4] & Src[5]
+in12 RN 5 ;// Src[6] & Src[7]
+in20 RN 6 ;// Src[8] & Src[9]
+in22 RN 7 ;// Src[10] & Src[11]
+in30 RN 8 ;// Src[12] & Src[13]
+in32 RN 9 ;// Src[14] & Src[15]
+
+;// Transpose for Row operations (Rows to cols)
+trRow00 RN 2
+trRow10 RN 10
+trRow02 RN 3
+trRow12 RN 5
+trRow20 RN 11
+trRow30 RN 12
+trRow32 RN 14
+trRow22 RN 7
+
+;// Intermediate calculations
+e0 RN 4
+e1 RN 6
+e2 RN 8
+e3 RN 9
+constZero RN 1
+
+;// Row operated pixels
+rowOp00 RN 2
+rowOp10 RN 10
+rowOp20 RN 11
+rowOp30 RN 12
+rowOp02 RN 3
+rowOp12 RN 5
+rowOp22 RN 7
+rowOp32 RN 14
+
+;// Transpose for colulmn operations
+trCol00 RN 2
+trCol02 RN 3
+trCol10 RN 4
+trCol12 RN 5
+trCol20 RN 6
+trCol22 RN 7
+trCol30 RN 8
+trCol32 RN 9
+
+;// Intermediate calculations
+g0 RN 10
+g1 RN 11
+g2 RN 12
+g3 RN 14
+
+;// Coloumn operated pixels
+colOp00 RN 2
+colOp02 RN 3
+colOp10 RN 4
+colOp12 RN 5
+colOp20 RN 6
+colOp22 RN 7
+colOp30 RN 8
+colOp32 RN 9
+
+
+temp1 RN 10 ;// Temporary scratch varaibles
+const1 RN 11
+const2 RN 12
+mask RN 14
+
+;// Output pixels
+out00 RN 2
+out02 RN 3
+out10 RN 4
+out12 RN 5
+out20 RN 6
+out22 RN 7
+out30 RN 8
+out32 RN 9
+
+
+
+ ;// Allocate stack memory required by the function
+
+
+ ;// Write function header
+ M_START armVCM4P10_TransformResidual4x4,r11
+
+ ;******************************************************************
+ ;// The strategy used in implementing the transform is as follows:*
+ ;// Load the 4x4 block into 8 registers *
+ ;// Transpose the 4x4 matrix *
+ ;// Perform the row operations (on columns) using SIMD *
+ ;// Transpose the 4x4 result matrix *
+ ;// Perform the coloumn operations *
+ ;// Store the 4x4 block at one go *
+ ;******************************************************************
+
+ ;// Load all the 4x4 pixels
+
+ LDMIA pSrc,{in00,in02,in10,in12,in20,in22,in30,in32}
+
+ MOV constZero,#0 ;// Used to right shift by 1
+ ;LDR constZero,=0x00000000
+
+ ;*****************************************************************
+ ;//
+ ;// Transpose the matrix inorder to perform row ops as coloumn ops
+ ;// Input: in[][] = original matrix
+ ;// Output: trRow[][]= transposed matrix
+ ;// Step1: Obtain the LL part of the transposed matrix
+ ;// Step2: Obtain the HL part
+ ;// step3: Obtain the LH part
+ ;// Step4: Obtain the HH part
+ ;//
+ ;*****************************************************************
+
+ ;// LL 2x2 transposed matrix
+ ;// d0 d1 - -
+ ;// d4 d5 - -
+ ;// - - - -
+ ;// - - - -
+
+ PKHTB trRow10,in10,in00,ASR #16 ;// [5 4] = [f5:f1]
+ PKHBT trRow00,in00,in10,LSL #16 ;// [1 0] = [f4:f0]
+
+ ;// HL 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// d8 d9 - -
+ ;// d12 d13 - -
+
+
+ PKHTB trRow30,in12,in02,ASR #16 ;// [13 12] = [7 3]
+ PKHBT trRow20,in02,in12,LSL #16 ;// [9 8] = [6 2]
+
+ ;// LH 2x2 transposed matrix
+ ;// - - d2 d3
+ ;// - - d6 d7
+ ;// - - - -
+ ;// - - - -
+
+ PKHBT trRow02,in20,in30,LSL #16 ;// [3 2] = [f12:f8]
+ PKHTB trRow12,in30,in20,ASR #16 ;// [7 6] = [f13:f9]
+
+
+
+
+ ;// HH 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// - - d10 d11
+ ;// - - d14 d15
+
+ PKHTB trRow32,in32,in22,ASR #16 ;// [15 14] = [15 11]
+ PKHBT trRow22,in22,in32,LSL #16 ;// [11 10] = [14 10]
+
+
+ ;****************************************
+ ;// Row Operations (Performed on columns)
+ ;****************************************
+
+
+ ;// SIMD operations on first two columns(two rows of the original matrix)
+
+
+ SADD16 e0, trRow00,trRow20 ;// e0 = d0 + d2
+ SSUB16 e1, trRow00,trRow20 ;// e1 = d0 - d2
+ SHADD16 e2, trRow10,constZero ;// (f1>>1) constZero is a register holding 0
+ SHADD16 e3, trRow30,constZero ;// avoid pipeline stalls for e2 and e3
+ SSUB16 e2, e2, trRow30 ;// e2 = (d1>>1) - d3
+ SADD16 e3, e3, trRow10 ;// e3 = d1 + (d3>>1)
+ SADD16 rowOp00, e0, e3 ;// f0 = e0 + e3
+ SADD16 rowOp10, e1, e2 ;// f1 = e1 + e2
+ SSUB16 rowOp20, e1, e2 ;// f2 = e1 - e2
+ SSUB16 rowOp30, e0, e3 ;// f3 = e0 - e3
+
+ ;// SIMD operations on next two columns(next two rows of the original matrix)
+
+ SADD16 e0, trRow02,trRow22
+ SSUB16 e1, trRow02,trRow22
+ SHADD16 e2, trRow12,constZero ;//(f1>>1) constZero is a register holding 0
+ SHADD16 e3, trRow32,constZero
+ SSUB16 e2, e2, trRow32
+ SADD16 e3, e3, trRow12
+ SADD16 rowOp02, e0, e3
+ SADD16 rowOp12, e1, e2
+ SSUB16 rowOp22, e1, e2
+ SSUB16 rowOp32, e0, e3
+
+
+ ;*****************************************************************
+ ;// Transpose the resultant matrix
+ ;// Input: rowOp[][]
+ ;// Output: trCol[][]
+ ;*****************************************************************
+
+ ;// LL 2x2 transposed matrix
+ ;// d0 d1 - -
+ ;// d4 d5 - -
+ ;// - - - -
+ ;// - - - -
+
+ PKHTB trCol10,rowOp10,rowOp00,ASR #16 ;// [5 4] = [f5:f1]
+ PKHBT trCol00,rowOp00,rowOp10,LSL #16 ;// [1 0] = [f4:f0]
+
+ ;// HL 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// d8 d9 - -
+ ;// d12 d13 - -
+
+
+ PKHTB trCol30,rowOp12,rowOp02,ASR #16 ;// [13 12] = [7 3]
+ PKHBT trCol20,rowOp02,rowOp12,LSL #16 ;// [9 8] = [6 2]
+
+ ;// LH 2x2 transposed matrix
+ ;// - - d2 d3
+ ;// - - d6 d7
+ ;// - - - -
+ ;// - - - -
+
+ PKHBT trCol02,rowOp20,rowOp30,LSL #16 ;// [3 2] = [f12:f8]
+ PKHTB trCol12,rowOp30,rowOp20,ASR #16 ;// [7 6] = [f13:f9]
+
+
+
+
+ ;// HH 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// - - d10 d11
+ ;// - - d14 d15
+
+ PKHTB trCol32,rowOp32,rowOp22,ASR #16 ;// [15 14] = [15 11]
+ PKHBT trCol22,rowOp22,rowOp32,LSL #16 ;// [11 10] = [14 10]
+
+
+ ;*******************************
+ ;// Coloumn Operations
+ ;*******************************
+
+
+ ;// SIMD operations on first two columns
+
+
+ SADD16 g0, trCol00,trCol20
+ SSUB16 g1, trCol00,trCol20
+ SHADD16 g2, trCol10,constZero ;// (f1>>1) constZero is a register holding 0
+ SHADD16 g3, trCol30,constZero
+ SSUB16 g2, g2, trCol30
+ SADD16 g3, g3, trCol10
+ SADD16 colOp00, g0, g3
+ SADD16 colOp10, g1, g2
+ SSUB16 colOp20, g1, g2
+ SSUB16 colOp30, g0, g3
+
+ ;// SIMD operations on next two columns
+
+ SADD16 g0, trCol02,trCol22
+ SSUB16 g1, trCol02,trCol22
+ SHADD16 g2, trCol12,constZero ;// (f1>>1) constZero is a register holding 0
+ SHADD16 g3, trCol32,constZero
+ SSUB16 g2, g2, trCol32
+ SADD16 g3, g3, trCol12
+ SADD16 colOp02, g0, g3
+ SADD16 colOp12, g1, g2
+ SSUB16 colOp22, g1, g2
+ SSUB16 colOp32, g0, g3
+
+
+
+
+
+ ;************************************************
+ ;// Calculate final value (colOp[i][j] + 32)>>6
+ ;************************************************
+
+ ;// const1: Serves dual purpose
+ ;// (1) Add #32 to both the lower and higher 16bits of the SIMD result
+ ;// (2) Convert the lower 16 bit value to an unsigned number (Add 32768)
+
+ LDR const1, =0x00208020
+
+ LDR mask, =0xffff03ff ;// Used to mask the down shifted 6 bits
+
+ ;// const2(#512): used to convert the lower 16bit number back to signed value
+
+ MOV const2,#0x200 ;// const2 = 2^9
+
+ ;// First Row
+
+ SADD16 colOp00, colOp00, const1
+ SADD16 colOp02, colOp02, const1
+ AND colOp00, mask, colOp00, ASR #6
+ AND colOp02, mask, colOp02, ASR #6
+ SSUB16 out00,colOp00,const2
+ SSUB16 out02,colOp02,const2
+
+
+ ;// Second Row
+
+ SADD16 colOp10, colOp10, const1
+ SADD16 colOp12, colOp12, const1
+ AND colOp10, mask, colOp10, ASR #6
+ AND colOp12, mask, colOp12, ASR #6
+ SSUB16 out10,colOp10,const2
+ SSUB16 out12,colOp12,const2
+
+
+ ;// Third Row
+
+ SADD16 colOp20, colOp20, const1
+ SADD16 colOp22, colOp22, const1
+ AND colOp20, mask, colOp20, ASR #6
+ AND colOp22, mask, colOp22, ASR #6
+ SSUB16 out20,colOp20,const2
+ SSUB16 out22,colOp22,const2
+
+
+ ;// Fourth Row
+
+ SADD16 colOp30, colOp30, const1
+ SADD16 colOp32, colOp32, const1
+ AND colOp30, mask, colOp30, ASR #6
+ AND colOp32, mask, colOp32, ASR #6
+ SSUB16 out30,colOp30,const2
+ SSUB16 out32,colOp32,const2
+
+
+
+
+ ;***************************
+ ;// Store all the 4x4 pixels
+ ;***************************
+
+ STMIA pDst,{out00,out02,out10,out12,out20,out22,out30,out32}
+
+
+
+ ;// Set return value
+
+End
+
+
+ ;// Write function tail
+ M_END
+
+ ENDIF ;//ARM1136JS
+
+
+
+
+
+
+
+;// Guarding implementation by the processor name
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
new file mode 100644
index 0000000..ad16d9c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
@@ -0,0 +1,92 @@
+;//
+;//
+;// File Name: armVCM4P10_UnpackBlock4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+;//--------------------------------------
+;// Input Arguments and their scope/usage
+;//--------------------------------------
+ppSrc RN 0 ;// Persistent variable
+pDst RN 1 ;// Persistent variable
+
+;//--------------------------------
+;// Variables and their scope/usage
+;//--------------------------------
+pSrc RN 2 ;// Persistent variables
+Flag RN 3
+Value RN 4
+Value2 RN 5
+strOffset RN 6
+cstOffset RN 7
+
+
+ M_START armVCM4P10_UnpackBlock4x4, r7
+
+ LDR pSrc, [ppSrc] ;// Load pSrc
+ MOV cstOffset, #31 ;// To be used in the loop, to compute offset
+
+ ;//-----------------------------------------------------------------------
+ ; Firstly, fill all the coefficient values on the <pDst> buffer by zero
+ ;//-----------------------------------------------------------------------
+
+ MOV Value, #0 ;// Initialize the zero value
+ MOV Value2, #0 ;// Initialize the zero value
+ LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop>
+
+ STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0
+ STRD Value, [pDst, #8] ;// pDst[4] = pDst[5] = pDst[6] = pDst[7] = 0
+ STRD Value, [pDst, #16] ;// pDst[8] = pDst[9] = pDst[10] = pDst[11] = 0
+ STRD Value, [pDst, #24] ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0
+
+ ;//----------------------------------------------------------------------------
+ ;// The loop below parses and unpacks the input stream. The C-model has
+ ;// a somewhat complicated logic for sign extension. But in the v6 version,
+ ;// that can be easily taken care by loading the data from <pSrc> stream as
+ ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or
+ ;// 16-bits are read.
+ ;//
+ ;// Next, to compute the offset, where the unpacked value needs to be stored,
+ ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31]
+ ;// This results in a saving of one cycle.
+ ;//----------------------------------------------------------------------------
+
+unpackLoop
+ TST Flag, #0x10 ;// Computing (Flag & 0x10)
+ LDRSBNE Value2,[pSrc,#1] ;// Load byte wise to avoid unaligned access
+ LDRBNE Value, [pSrc], #2
+ AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1;
+ LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++
+ ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++
+
+ TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done
+ LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration
+ STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset>
+ BEQ unpackLoop ;// Branch to the loop beginning
+
+ STR pSrc, [ppSrc] ;// Update the bitstream pointer
+ M_END
+
+ ENDIF
+
+
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
new file mode 100644
index 0000000..c2e6b60
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
@@ -0,0 +1,88 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DeblockChroma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 intra chroma deblock
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DeblockChroma_I
+ *
+ * Description:
+ * Performs deblocking filtering on all edges of the chroma macroblock (16x16).
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned.
+ * [in] srcdstStep Step of the arrays
+ * [in] pAlpha pointer to a 2x2 array of alpha thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }
+ * [in] pBeta pointer to a 2x2 array of beta thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external horizontal edge,
+ * internal horizontal edge }
+ * [in] pThresholds AArray of size 8x2 of Thresholds (TC0) (values for the left or
+ * above edge of each 4x2 or 2x4 block, arranged in vertical block order
+ * and then in horizontal block order)
+ * [in] pBS array of size 16x2 of BS parameters (arranged in scan block order for vertical edges and then horizontal edges);
+ * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned.
+ * [out] pSrcDst pointer to filtered output macroblock
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds, or pBS is NULL.
+ * - pSrcDst is not 8-byte aligned.
+ * - either pThresholds or pBS is not 4-byte aligned.
+ * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+)
+{
+ OMXResult errorCode;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+
+ errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I(
+ pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+ armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+
+ errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I(
+ pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16);
+
+ return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
new file mode 100644
index 0000000..6023862
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
@@ -0,0 +1,91 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DeblockLuma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 luma deblock
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+
+/**
+ * Function: omxVCM4P10_DeblockLuma_I
+ *
+ * Description:
+ * This function performs deblock filtering the horizontal and vertical edges of a luma macroblock
+ *(16x16).
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned.
+ * [in] srcdstStep image width
+ * [in] pAlpha pointer to a 2x2 table of alpha thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external horizontal
+ * edge, internal horizontal edge }
+ * [in] pBeta pointer to a 2x2 table of beta thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external horizontal edge,
+ * internal horizontal edge }
+ * [in] pThresholds pointer to a 16x2 table of threshold (TC0), organized as follows: { values for
+ * the left or above edge of each 4x4 block, arranged in vertical block order
+ * and then in horizontal block order)
+ * [in] pBS pointer to a 16x2 table of BS parameters arranged in scan block order for vertical edges and then horizontal edges;
+ * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned.
+ * [out] pSrcDst pointer to filtered output macroblock.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds or pBS is NULL.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8
+ * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3.
+.
+ *
+ */
+
+OMXResult omxVCM4P10_DeblockLuma_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+)
+{
+ OMXResult errorCode;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+
+ errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I(
+ pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+ armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+
+ errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I(
+ pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16);
+
+ return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
new file mode 100644
index 0000000..a19f277
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
@@ -0,0 +1,62 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 decode coefficients module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for 2x2 block of
+ * ChromaDCLevel. The decoded coefficients in packed position-coefficient
+ * buffer are stored in increasing raster scan order, namely position order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream
+ * buffer
+ * [in] pOffset Pointer to current bit position in the byte
+ * pointed to by *ppBitStream
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients
+ * in this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8** ppPosCoefbuf
+ )
+
+{
+ return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+ ppPosCoefbuf, 17, 4);
+
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
new file mode 100644
index 0000000..99bb4ce
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
@@ -0,0 +1,68 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 decode coefficients module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of
+ * Intra16x16DCLevel, Intra16x16ACLevel,LumaLevel, and ChromaACLevel.
+ * Inverse field scan is not supported. The decoded coefficients in packed
+ * position-coefficient buffer are stored in increasing zigzag order instead
+ * of position order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream buffer
+ * [in] pOffset Pointer to current bit position in the byte pointed
+ * to by *ppBitStream
+ * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current
+ * block
+ * [in] sVLCSelect VLC table selector, obtained from number of non-zero
+ * AC coefficients of above and left 4x4 blocks. It is
+ * equivalent to the variable nC described in H.264 standard
+ * table 9-5, except its value can¡¯t be less than zero.
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients in
+ * this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC(
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8**ppPosCoefbuf,
+ OMX_INT sVLCSelect,
+ OMX_INT sMaxNumCoeff
+ )
+{
+ return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+ ppPosCoefbuf, sVLCSelect, sMaxNumCoeff);
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
new file mode 100644
index 0000000..2b71486
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
@@ -0,0 +1,480 @@
+;//
+;// (c) Copyright 2007 ARM Limited. All Rights Reserved.
+;//
+;// Description:
+;// H.264 inverse quantize and transform module
+;//
+;//
+
+
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Import symbols required from other files
+;// (For example tables)
+
+ IMPORT armVCM4P10_UnpackBlock4x4
+ IMPORT armVCM4P10_TransformResidual4x4
+ IMPORT armVCM4P10_QPDivTable
+ IMPORT armVCM4P10_VMatrixU16
+ IMPORT armVCM4P10_QPModuloTable
+
+ M_VARIANTS ARM1136JS, ARM1136JS_U
+
+;// Set debugging level
+;//DEBUG_ON SETL {TRUE}
+
+
+;// Static Function: armVCM4P10_DequantLumaAC4x4
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+;//Input Registers
+pSrcDst RN 0
+QP RN 1
+
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+pQPdiv RN 4
+pQPmod RN 5
+pVRow RN 2
+QPmod RN 6
+shift RN 3
+rowLuma01 RN 1
+rowLuma23 RN 4
+
+SrcDst00 RN 5
+SrcDst02 RN 6
+SrcDst10 RN 7
+SrcDst12 RN 8
+SrcDst20 RN 9
+SrcDst22 RN 10
+SrcDst30 RN 11
+SrcDst32 RN 12
+
+temp1 RN 2
+temp2 RN 3
+temp3 RN 14
+
+
+ ;// Allocate stack memory required by the function
+
+ ;// Write function header
+ M_START armVCM4P10_DequantLumaAC4x4,r11
+
+ LDR pQPmod,=armVCM4P10_QPModuloTable
+ LDR pQPdiv,=armVCM4P10_QPDivTable
+ LDR pVRow,=armVCM4P10_VMatrixU16
+
+ LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6
+ LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6
+
+ LDRH rowLuma01,[pVRow,QPmod]! ;// rowLuma01 = [00|0a]
+ LDRH temp3,[pVRow,#2] ;// temp3 = [00|0b]
+ LDRH rowLuma23,[pVRow,#4] ;// rowLuma23 = [00|0c]
+ ORR rowLuma01,rowLuma01,temp3,LSL #16 ;// rowLuma01 = [0b|0a]
+
+ ;// Load all the 16 'src' values
+ LDMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+
+
+ ;//*********************************************************************************************
+ ;//
+ ;// 'Shift' ranges between [0,8]
+ ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation
+ ;//
+ ;//*********************************************************************************************
+
+ LSL rowLuma01,rowLuma01,shift
+ LSL rowLuma23,rowLuma23,shift
+
+
+ ;//**********************************************************************************************
+ ;//
+ ;// The idea is to unroll the Loop completely
+ ;// All the 16 src values are loaded at once into 8 registers : SrcDst<y><x> (above)
+ ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16'
+ ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2
+ ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above)
+ ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above)
+ ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated
+ ;// Here we interleave the PKHBT operations for various rows to avoide pipeline stalls
+ ;//
+ ;// We then pack the two 16 bit multiplication result into a word and store at one go
+ ;//
+ ;//**********************************************************************************************
+
+
+ ;// Row 1
+
+
+ SMULTB temp1,SrcDst00,rowLuma23 ;// pSrcDst[1] * (pVRow[2]<<Shift)
+ SMULBB SrcDst00,SrcDst00,rowLuma01 ;// pSrcDst[0] * (pVRow[0]<<Shift)
+
+ SMULTB temp2,SrcDst02,rowLuma23 ;// pSrcDst[3] * (pVRow[2]<<Shift)
+ SMULBB SrcDst02,SrcDst02,rowLuma01 ;// pSrcDst[2] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst00,SrcDst00,temp1,LSL #16 ;// Pack the first two product values
+
+
+ ;// Row 2
+ SMULTT temp1,SrcDst10,rowLuma01 ;// pSrcDst[5] * (pVRow[1]<<Shift)
+ SMULBB SrcDst10,SrcDst10,rowLuma23 ;// pSrcDst[4] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst02,SrcDst02,temp2,LSL #16 ;// Pack the next two product values
+ SMULTT temp2,SrcDst12,rowLuma01 ;// pSrcDst[7] * (pVRow[1]<<Shift)
+ SMULBB SrcDst12,SrcDst12,rowLuma23 ;// pSrcDst[6] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst10,SrcDst10,temp1,LSL #16 ;// Pack the next two product values
+
+
+ ;// Row 3
+
+ SMULTB temp1,SrcDst20,rowLuma23 ;// pSrcDst[9] * (pVRow[2]<<Shift)
+ SMULBB SrcDst20,SrcDst20,rowLuma01 ;// pSrcDst[8] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst12,SrcDst12,temp2,LSL #16 ;// Pack the next two product values
+ SMULTB temp2,SrcDst22,rowLuma23 ;// pSrcDst[11] * (pVRow[2]<<Shift)
+ SMULBB SrcDst22,SrcDst22,rowLuma01 ;// pSrcDst[10] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst20,SrcDst20,temp1,LSL #16 ;// Pack the next two product values
+
+
+
+ ;// Row 4
+
+ SMULTT temp1,SrcDst30,rowLuma01 ;// pSrcDst[13] * (pVRow[1]<<Shift)
+ SMULBB SrcDst30,SrcDst30,rowLuma23 ;// pSrcDst[12] * (pVRow[2]<<Shift)
+
+ SMULTT temp3,SrcDst32,rowLuma01 ;// pSrcDst[15] * (pVRow[1]<<Shift)
+ SMULBB SrcDst32,SrcDst32,rowLuma23 ;// pSrcDst[14] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst22,SrcDst22,temp2,LSL #16 ;// Pack the remaining product values
+ PKHBT SrcDst30,SrcDst30,temp1,LSL #16
+ PKHBT SrcDst32,SrcDst32,temp3,LSL #16
+
+
+ STMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+
+
+ ;// Set return value
+
+
+
+ ;// Write function tail
+ M_END
+
+ ENDIF ;//ARM1136JS
+
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS_U
+
+;//Input Registers
+pSrcDst RN 0
+QP RN 1
+
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+pQPdiv RN 4
+pQPmod RN 5
+pVRow RN 2
+QPmod RN 6
+shift RN 3
+rowLuma01 RN 1
+rowLuma23 RN 4
+
+SrcDst00 RN 5
+SrcDst02 RN 6
+SrcDst10 RN 7
+SrcDst12 RN 8
+SrcDst20 RN 9
+SrcDst22 RN 10
+SrcDst30 RN 11
+SrcDst32 RN 12
+
+temp1 RN 2
+temp2 RN 3
+temp3 RN 14
+
+
+ ;// Allocate stack memory required by the function
+
+ ;// Write function header
+ M_START armVCM4P10_DequantLumaAC4x4,r11
+
+ LDR pQPmod,=armVCM4P10_QPModuloTable
+ LDR pQPdiv,=armVCM4P10_QPDivTable
+ LDR pVRow,=armVCM4P10_VMatrixU16
+
+ LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6
+ LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6
+
+ LDR rowLuma01,[pVRow,QPmod]! ;// rowLuma01 = [0b|0a]
+ LDR rowLuma23,[pVRow,#4] ;// rowLuma23 = [0d|0c]
+
+ ;// Load all the 16 'src' values
+ LDMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+
+
+ ;//*********************************************************************************************
+ ;//
+ ;// 'Shift' ranges between [0,8]
+ ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation
+ ;//
+ ;//*********************************************************************************************
+
+ LSL rowLuma01,rowLuma01,shift
+ LSL rowLuma23,rowLuma23,shift
+
+
+ ;//**********************************************************************************************
+ ;//
+ ;// The idea is to unroll the Loop completely
+ ;// All the 16 src values are loaded at once into 8 registers : SrcDst<y><x> (above)
+ ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16'
+ ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2
+ ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above)
+ ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above)
+ ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated
+ ;// Here we interleave the PKHBT operations for various rows to avoide pipeline stalls
+ ;//
+ ;// We then pack the two 16 bit multiplication result into a word and store at one go
+ ;//
+ ;//**********************************************************************************************
+
+
+ ;// Row 1
+
+
+ SMULTB temp1,SrcDst00,rowLuma23 ;// pSrcDst[1] * (pVRow[2]<<Shift)
+ SMULBB SrcDst00,SrcDst00,rowLuma01 ;// pSrcDst[0] * (pVRow[0]<<Shift)
+
+ SMULTB temp2,SrcDst02,rowLuma23 ;// pSrcDst[3] * (pVRow[2]<<Shift)
+ SMULBB SrcDst02,SrcDst02,rowLuma01 ;// pSrcDst[2] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst00,SrcDst00,temp1,LSL #16 ;// Pack the first two product values
+
+
+ ;// Row 2
+ SMULTT temp1,SrcDst10,rowLuma01 ;// pSrcDst[5] * (pVRow[1]<<Shift)
+ SMULBB SrcDst10,SrcDst10,rowLuma23 ;// pSrcDst[4] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst02,SrcDst02,temp2,LSL #16 ;// Pack the next two product values
+ SMULTT temp2,SrcDst12,rowLuma01 ;// pSrcDst[7] * (pVRow[1]<<Shift)
+ SMULBB SrcDst12,SrcDst12,rowLuma23 ;// pSrcDst[6] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst10,SrcDst10,temp1,LSL #16 ;// Pack the next two product values
+
+
+ ;// Row 3
+
+ SMULTB temp1,SrcDst20,rowLuma23 ;// pSrcDst[9] * (pVRow[2]<<Shift)
+ SMULBB SrcDst20,SrcDst20,rowLuma01 ;// pSrcDst[8] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst12,SrcDst12,temp2,LSL #16 ;// Pack the next two product values
+ SMULTB temp2,SrcDst22,rowLuma23 ;// pSrcDst[11] * (pVRow[2]<<Shift)
+ SMULBB SrcDst22,SrcDst22,rowLuma01 ;// pSrcDst[10] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst20,SrcDst20,temp1,LSL #16 ;// Pack the next two product values
+
+
+
+ ;// Row 4
+
+ SMULTT temp1,SrcDst30,rowLuma01 ;// pSrcDst[13] * (pVRow[1]<<Shift)
+ SMULBB SrcDst30,SrcDst30,rowLuma23 ;// pSrcDst[12] * (pVRow[2]<<Shift)
+
+ SMULTT temp3,SrcDst32,rowLuma01 ;// pSrcDst[15] * (pVRow[1]<<Shift)
+ SMULBB SrcDst32,SrcDst32,rowLuma23 ;// pSrcDst[14] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst22,SrcDst22,temp2,LSL #16 ;// Pack the remaining product values
+ PKHBT SrcDst30,SrcDst30,temp1,LSL #16
+ PKHBT SrcDst32,SrcDst32,temp3,LSL #16
+
+
+ STMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+
+
+ ;// Set return value
+
+
+
+ ;// Write function tail
+ M_END
+
+ ENDIF ;//ARM1136JS_U
+
+
+
+
+
+;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+;//Input Registers
+ppSrc RN 0
+pPred RN 1
+pDC RN 2
+pDst RN 3
+
+
+;//Output Registers
+result RN 0
+
+;//Local Scratch Registers
+pDelta RN 4
+pDeltaTmp RN 6
+AC RN 5 ;//Load from stack
+pPredTemp RN 7
+pDCTemp RN 8
+pDstTemp RN 9
+pDeltaArg1 RN 1
+pDeltaArg0 RN 0
+QP RN 1 ;//Load from stack
+DCval RN 10
+DCvalCopy RN 11
+predstep RN 1
+dstStep RN 10
+ycounter RN 0
+PredVal1 RN 3
+PredVal2 RN 5
+DeltaVal1 RN 2
+DeltaVal2 RN 11
+PredVal RN 8
+tmpDeltaVal RN 6
+sum1 RN 12
+sum2 RN 14
+
+
+
+ ;// Allocate stack memory required by the function
+ M_ALLOC8 pBuffer, 32
+
+
+ ;// Write function header
+ M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11
+
+ ;// Define stack arguments
+ M_ARG predStepOnStack, 4
+ M_ARG dstStepOnStack,4
+ M_ARG QPOnStack, 4
+ M_ARG ACOnStack,4
+
+
+ M_ADR pDelta,pBuffer
+ M_LDR AC,ACOnStack
+
+
+ ;// Save registers r1,r2,r3 before function call
+ MOV pPredTemp,pPred
+ MOV pDCTemp,pDC
+ MOV pDstTemp,pDst
+
+ CMP AC,#0
+ BEQ DCcase
+ MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_UnpackBlock4x4
+
+ BL armVCM4P10_UnpackBlock4x4
+
+ M_LDR QP,QPOnStack ;// Set up r1 for DequantLumaAC4x4
+ MOV pDeltaArg0,pDelta ;// Set up r0 for DequantLumaAC4x4
+
+ BL armVCM4P10_DequantLumaAC4x4
+
+
+ CMP pDCTemp,#0
+ LDRSHNE DCval,[pDCTemp]
+ MOV pDeltaArg0,pDelta ;// Set up r0 for armVCM4P10_TransformResidual4x4
+ MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_TransformResidual4x4
+ STRHNE DCval,[pDelta]
+
+ BL armVCM4P10_TransformResidual4x4
+ B OutDCcase
+
+
+DCcase
+ LDRSH DCval,[pDCTemp]
+ ADD DCval,DCval,#32
+ ASR DCval,DCval,#6
+ PKHBT DCval,DCval,DCval,LSL #16 ;// Duplicating the Lower halfword
+ MOV DCvalCopy, DCval ;// Needed for STRD
+ STRD DCval, [pDelta, #0] ;// pDelta[0] = pDelta[1] = pDelta[2] = pDelta[3] = DCval
+ STRD DCval, [pDelta, #8] ;// pDelta[4] = pDelta[5] = pDelta[6] = pDelta[7] = DCval
+ STRD DCval, [pDelta, #16] ;// pDelta[8] = pDelta[9] = pDelta[10] = pDelta[11] = DCval
+ STRD DCval, [pDelta, #24]
+
+
+OutDCcase
+ M_LDR predstep,predStepOnStack
+ M_LDR dstStep,dstStepOnStack
+
+ LDMIA pDelta!,{tmpDeltaVal,DeltaVal2} ;// Pre load
+ MOV ycounter,#4 ;// Counter for the PredPlusDeltaLoop
+ LDR PredVal,[pPredTemp] ;// Pre load
+
+PredPlusDeltaLoop
+
+
+ SUBS ycounter,ycounter,#1
+ ADD pPredTemp,pPredTemp,predstep ;// Increment pPred ptr
+
+ PKHBT DeltaVal1,tmpDeltaVal,DeltaVal2,LSL #16 ;// Deltaval1 = [C A]
+ PKHTB DeltaVal2,DeltaVal2,tmpDeltaVal,ASR #16 ;// DeltaVal2 = [D B]
+
+ UXTB16 PredVal1,PredVal ;// PredVal1 = [0c0a]
+ UXTB16 PredVal2,PredVal,ROR #8 ;// PredVal2 = [0d0b]
+
+ LDRGT PredVal,[pPredTemp] ;// Pre load
+
+ QADD16 sum2,DeltaVal2,PredVal2 ;// Add and saturate to 16 bits
+ QADD16 sum1,DeltaVal1,PredVal1
+
+ USAT16 sum2,#8,sum2 ;// armClip(0,255,sum2)
+ USAT16 sum1,#8,sum1
+
+ LDMGTIA pDelta!,{tmpDeltaVal,DeltaVal2} ;// Pre load
+
+ ORR sum1,sum1,sum2,LSL #8 ;// sum1 = [dcba]
+ STR sum1,[pDstTemp]
+
+ ADD pDstTemp,pDstTemp,dstStep ;// Increment pDst ptr
+ BGT PredPlusDeltaLoop
+
+
+ ;// Set return value
+ MOV result,#OMX_Sts_NoErr
+
+End
+
+
+ ;// Write function tail
+
+ M_END
+
+ ENDIF ;//ARM1136JS
+
+
+;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd
+
+;// Guarding implementation by the processor name
+
+
+
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
new file mode 100644
index 0000000..6d960f0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
@@ -0,0 +1,336 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+MASK_0 EQU 0x00000000
+MASK_1 EQU 0x01010101
+LOOP_COUNT EQU 0x50000000
+
+;// Declare input registers
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlphaArg RN 2
+pBetaArg RN 3
+
+pThresholds RN 6
+pBS RN 9
+pQ0 RN 0
+bS RN 10
+
+alpha RN 6
+alpha0 RN 6
+alpha1 RN 8
+
+beta RN 7
+beta0 RN 7
+beta1 RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0 RN 3
+p_1 RN 5
+q_0 RN 8
+q_1 RN 9
+
+;// Filtering
+
+dp0q0 RN 12
+dp1p0 RN 12
+dq1q0 RN 12
+
+ap0q0 RN 4
+filt RN 2
+
+m00 RN 14
+m01 RN 11
+
+pQ0 RN 0
+Step RN 1
+
+;// Output
+
+P_0 RN 6
+Q_0 RN 7
+
+;//Declarations for bSLT4 kernel
+
+tC RN 12
+tC0 RN 5
+tC1 RN 12
+pos RN 5
+neg RN 9
+
+;//Declarations for bSGE4 kernel
+
+
+;// Miscellanous
+XY RN 8
+
+a RN 10
+t1 RN 10
+t2 RN 12
+t3 RN 14
+t4 RN 6
+t5 RN 5
+
+
+ ;// Allocate stack memory
+ M_ALLOC4 ppThresholds,4
+ M_ALLOC8 pAlphaBeta0,8
+ M_ALLOC8 pAlphaBeta1,8
+ M_ALLOC8 pXYBS,4
+ M_ALLOC4 ppBS,4
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r11
+
+ ;//Input arguments on the stack
+ M_ARG ppThresholdsArg, 4
+ M_ARG ppBSArg, 4
+
+ LDRB alpha1, [pAlphaArg,#1]
+ LDRB beta1, [pBetaArg,#1]
+ M_LDR pThresholds, ppThresholdsArg
+ LDR a,=MASK_1
+ LDRB beta0, [pBetaArg]
+ M_STR pThresholds, ppThresholds
+ LDRB alpha0, [pAlphaArg]
+
+ MUL alpha1, alpha1, a
+ MUL beta1, beta1, a
+ MUL alpha0, alpha0, a
+ MUL beta0, beta0, a
+
+ M_STRD alpha1, beta1, pAlphaBeta1
+ M_LDR pBS, ppBSArg
+ M_STRD alpha0, beta0, pAlphaBeta0
+
+ LDR XY,=LOOP_COUNT
+ M_STRD XY, pBS, pXYBS
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+LoopY
+LoopX
+;//---------------Load Pixels-------------------
+ LDRH bS, [pBS], #2
+
+ M_STR pBS, ppBS
+ M_LDR p_1, [pQ0],srcdstStep
+
+ CMP bS, #0
+
+ M_LDR p_0, [pQ0],srcdstStep
+ M_LDR q_0, [pQ0],srcdstStep
+ M_LDR q_1, [pQ0]
+ LDR m01, =MASK_1 ;// 01010101 mask
+ BEQ NoFilterBS0
+
+
+ ;// p_0 = [r3p0 r2p0 r1p0 r0p0]
+ ;// p_1 = [r3p1 r2p1 r1p1 r0p1]
+ ;// q_0 = [r3q0 r2q0 r1q0 r0q0]
+ ;// q_1 = [r3q1 r2q1 r1q1 r0q1]
+
+;//--------------Filtering Decision -------------------
+ MOV m00, #MASK_0 ;// 00000000 mask
+
+ MOV filt, m01
+ TST bS, #0xff00
+ MOVEQ filt, filt, LSR #16
+ TST bS, #0xff
+ MOVEQ filt, filt, LSL #16
+ TST bS, #4
+
+
+ ;// Check |p0-q0|<Alpha
+ USUB8 dp0q0, p_0, q_0
+ USUB8 a, q_0, p_0
+ SEL ap0q0, a, dp0q0
+ USUB8 a, ap0q0, alpha
+ SEL filt, m00, filt
+
+ ;// Check |p1-p0|<Beta
+ USUB8 dp1p0, p_1, p_0
+ USUB8 a, p_0, p_1
+ SEL a, a, dp1p0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check |q1-q0|<Beta
+ USUB8 dq1q0, q_1, q_0
+ USUB8 a, q_0, q_1
+ SEL a, a, dq1q0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ BEQ bSLT4
+;//-------------------Filter--------------------
+bSGE4
+ ;//---------bSGE4 Execution---------------
+ CMP filt, #0
+
+ M_LDR pThresholds, ppThresholds
+
+ ;// Compute P0b
+ UHADD8 t1, p_0, q_1
+ BEQ NoFilterFilt0
+ MVN t2, p_1
+ UHSUB8 t1, t1, t2
+ USUB8 t2, filt, m01
+ EOR t1, t1, m01, LSL #7
+
+ ADD pThresholds,pThresholds, #2
+
+ ;// Compute Q0b
+ UHADD8 t2, q_0, p_1
+ MVN t3, q_1
+ UHSUB8 t2, t2, t3
+ M_STR pThresholds, ppThresholds
+ SEL P_0, t1, p_0
+ EOR t2, t2, m01, LSL #7
+ SEL Q_0, t2, q_0
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ B StoreResultAndExit
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterFilt0
+NoFilterBS0
+ M_LDR pThresholds, ppThresholds
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ SUB pQ0, pQ0, srcdstStep
+ ADD pQ0, pQ0, #4
+ ADD pThresholds, pThresholds, #2
+
+ ;// Load counter for LoopX
+ M_LDRD XY, pBS, pXYBS
+ M_STR pThresholds, ppThresholds
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ ;// Align the pointer
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopY
+ B ExitLoopY
+
+bSLT4
+ ;//---------bSLT4 Execution---------------
+ M_LDR pThresholds, ppThresholds
+ CMP filt, #0
+
+ ;// Since beta <= 18 and alpha <= 255 we know
+ ;// -254 <= p0-q0 <= 254
+ ;// -17 <= q1-q0 <= 17
+ ;// -17 <= p1-p0 <= 17
+
+ ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
+ ;//
+ ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
+ ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
+ ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
+
+ USUB8 t1, p_1, p_0
+ USUB8 t2, q_1, q_0
+ BEQ NoFilterFilt0
+
+ LDRB tC0, [pThresholds],#1
+ SSUB8 t1, t1, t2
+ LDRB tC1, [pThresholds],#1
+ M_STR pThresholds, ppThresholds
+ UHSUB8 t4, p_0, q_0
+ ORR tC, tC0, tC1, LSL #16
+ USUB8 t5, p_0, q_0
+ AND t5, t5, m01
+ SHSUB8 t1, t1, t5
+ ORR tC, tC, LSL #8
+ SSUB8 t1, t1, t5
+ SHSUB8 t1, t1, t4
+ UQADD8 tC, tC, m01
+ SADD8 t1, t1, m01
+ USUB8 t5, filt, m01
+ SHSUB8 t1, t1, t4
+ SEL tC, tC, m00
+
+ ;// Split into positive and negative part and clip
+
+ SSUB8 t1, t1, m00
+ SEL pos, t1, m00
+ USUB8 neg, pos, t1
+ USUB8 t3, pos, tC
+ SEL pos, tC, pos
+ USUB8 t3, neg, tC
+ SEL neg, tC, neg
+ UQADD8 P_0, p_0, pos
+ UQSUB8 Q_0, q_0, pos
+ UQSUB8 P_0, P_0, neg
+ UQADD8 Q_0, Q_0, neg
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+
+ ;// Choose to store the filtered
+ ;// value or the original pixel
+ USUB8 t1, filt, m01
+ SEL P_0, P_0, p_0
+ SEL Q_0, Q_0, q_0
+
+StoreResultAndExit
+
+ ;//---------Store result---------------
+
+ ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
+ ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
+
+ M_STR P_0, [pQ0], srcdstStep
+ STR Q_0, [pQ0], #4
+
+ M_LDRD XY, pBS, pXYBS
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopX
+
+;//-------- Common Exit of LoopY -----------------
+ ;// Align the pointers
+
+ExitLoopY
+ ADD pBS, pBS, #4
+ M_LDRD alpha, beta, pAlphaBeta1
+ SUB pQ0, pQ0, #8
+ ADD pQ0, pQ0, srcdstStep, LSL #2
+ M_STRD alpha, beta, pAlphaBeta0
+
+ BNE LoopY
+ MOV r0, #OMX_Sts_NoErr
+
+;//-----------------End Filter--------------------
+ M_END
+
+ ENDIF
+
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
new file mode 100644
index 0000000..00c8354
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
@@ -0,0 +1,437 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+
+MASK_0 EQU 0x00000000
+MASK_1 EQU 0x01010101
+MASK_2 EQU 0x0000ff00
+LOOP_COUNT EQU 0x50000000
+
+;// Declare input registers
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlphaArg RN 2
+pBetaArg RN 3
+
+pThresholds RN 6
+pBS RN 9
+pQ0 RN 0
+bS RN 2
+bSTemp RN 10
+
+alpha RN 6
+alpha0 RN 6
+alpha1 RN 8
+
+beta RN 7
+beta0 RN 7
+beta1 RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0 RN 3
+p_1 RN 5
+q_0 RN 8
+q_1 RN 9
+
+;// Unpacking
+mask RN 11
+
+row0 RN 2
+row1 RN 4
+row2 RN 5
+row3 RN 3
+
+row4 RN 8
+row5 RN 9
+row6 RN 10
+row7 RN 12
+
+tunpk0 RN 2
+tunpk2 RN 10
+tunpk3 RN 12
+
+tunpk4 RN 4
+tunpk5 RN 5
+tunpk6 RN 14
+tunpk7 RN 2
+
+;// Filtering
+
+dp0q0 RN 12
+dp1p0 RN 12
+dq1q0 RN 12
+
+ap0q0 RN 4
+filt RN 2
+
+m00 RN 14
+m01 RN 11
+
+pQ0 RN 0
+Step RN 1
+
+;// Output
+
+P_0 RN 6
+Q_0 RN 7
+
+;//Declarations for bSLT4 kernel
+
+tC RN 12
+tC0 RN 5
+tC1 RN 12
+pos RN 5
+neg RN 9
+
+;//Declarations for bSGE4 kernel
+
+
+;// Miscellanous
+XY RN 8
+
+a RN 10
+t1 RN 10
+t2 RN 12
+t3 RN 14
+t4 RN 6
+t5 RN 5
+
+
+ ;// Allocate stack memory
+ M_ALLOC4 ppThresholds,4
+ M_ALLOC8 pAlphaBeta0,8
+ M_ALLOC8 pAlphaBeta1,8
+ M_ALLOC8 pXYBS,4
+ M_ALLOC4 ppBS,4
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r11
+
+ ;//Input arguments on the stack
+ M_ARG ppThresholdsArg, 4
+ M_ARG ppBSArg, 4
+
+ LDRB alpha1, [pAlphaArg,#1]
+ LDRB beta1, [pBetaArg,#1]
+ M_LDR pThresholds, ppThresholdsArg
+ LDR a,=MASK_1
+ LDRB beta0, [pBetaArg]
+ M_STR pThresholds, ppThresholds
+ LDRB alpha0, [pAlphaArg]
+
+ MUL alpha1, alpha1, a
+ MUL beta1, beta1, a
+ MUL alpha0, alpha0, a
+ MUL beta0, beta0, a
+
+ M_STRD alpha1, beta1, pAlphaBeta1
+ M_LDR pBS, ppBSArg
+ M_STRD alpha0, beta0, pAlphaBeta0
+
+ LDR XY,=LOOP_COUNT
+ M_STRD XY, pBS, pXYBS
+
+
+LoopY
+LoopX
+;//---------------Load Pixels-------------------
+
+;//----------------Pack q0-q1-----------------------
+ LDRH bS, [pBS], #8
+ LDR mask, =MASK_2
+
+ M_LDRH row4, [pQ0], srcdstStep
+ CMP bS, #0
+ M_STR pBS, ppBS
+ M_LDRH row5, [pQ0], srcdstStep
+ BEQ.W NoFilterBS0
+ LDRH row6, [pQ0]
+ LDRH row7, [pQ0, srcdstStep]
+
+ ;// row4 = [0 0 r0q0 r0q1]
+ ;// row5 = [0 0 r1q0 r1q1]
+ ;// row6 = [0 0 r2q0 r2q1]
+ ;// row7 = [0 0 r3q0 r3q1]
+
+ AND tunpk4, mask, row4
+ AND tunpk5, mask, row4, LSL#8
+ UXTAB tunpk4, tunpk4, row5, ROR#8
+ UXTAB tunpk5, tunpk5, row5
+ AND tunpk6, mask, row6
+ AND tunpk7, mask, row6, LSL#8
+ UXTAB tunpk6, tunpk6, row7, ROR#8
+ UXTAB tunpk7, tunpk7, row7
+
+ ;// tunpk4 = [0 0 r0q0 r1q0]
+ ;// tunpk5 = [0 0 r0q1 r1q1]
+ ;// tunpk6 = [0 0 r2q0 r3q0]
+ ;// tunpk7 = [0 0 r2q1 r3q1]
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ SUB pQ0, pQ0, #2
+
+ PKHBT q_1, tunpk6, tunpk4, LSL#16
+ PKHBT q_0, tunpk7, tunpk5, LSL#16
+
+ ;// q_0 = [r0q0 r1q0 r2q0 r3q0]
+ ;// q_1 = [r0q1 r1q1 r2q1 r3q1]
+
+
+;//----------------Pack p0-p1-----------------------
+
+ M_LDRH row0, [pQ0], srcdstStep
+ M_LDRH row1, [pQ0], srcdstStep
+ LDRH row2, [pQ0]
+ LDRH row3, [pQ0, srcdstStep]
+
+ ;// row0 = [0 0 r0p0 r0p1]
+ ;// row1 = [0 0 r1p0 r1p1]
+ ;// row2 = [0 0 r2p0 r2p1]
+ ;// row3 = [0 0 r3p0 r3p1]
+
+ AND tunpk2, mask, row0
+ AND tunpk6, mask, row0, LSL#8
+ UXTAB tunpk2, tunpk2, row1, ROR#8
+ UXTAB tunpk6, tunpk6, row1
+
+ AND tunpk0, mask, row2
+ AND tunpk3, mask, row2, LSL#8
+ UXTAB tunpk0, tunpk0, row3, ROR#8
+ UXTAB tunpk3, tunpk3, row3
+
+ ;// tunpk2 = [0 0 r0p0 r1p0]
+ ;// tunpk6 = [0 0 r0p1 r1p1]
+ ;// tunpk0 = [0 0 r2p0 r3p0]
+ ;// tunpk3 = [0 0 r2p1 r3p1]
+
+ PKHBT p_0, tunpk0, tunpk2, LSL#16
+ M_LDR bSTemp, ppBS
+ PKHBT p_1, tunpk3, tunpk6, LSL#16
+
+ ;// p_0 = [r0p0 r1p0 r2p0 r3p0]
+ ;// p_1 = [r0p1 r1p1 r2p1 r3p1]
+
+;//--------------Filtering Decision -------------------
+ USUB8 dp0q0, p_0, q_0
+ LDR m01, =MASK_1
+ LDRH bSTemp, [bSTemp ,#-8]
+ MOV m00, #MASK_0 ;// 00000000 mask
+
+ MOV filt, m01
+ TST bSTemp, #0xff00
+ MOVEQ filt, filt, LSL #16
+ TST bSTemp, #0xff
+ MOVEQ filt, filt, LSR #16
+ TST bSTemp, #4
+
+ ;// Check |p0-q0|<Alpha
+ USUB8 a, q_0, p_0
+ SEL ap0q0, a, dp0q0
+ USUB8 a, ap0q0, alpha
+ SEL filt, m00, filt
+
+ ;// Check |p1-p0|<Beta
+ USUB8 dp1p0, p_1, p_0
+ USUB8 a, p_0, p_1
+ SEL a, a, dp1p0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check |q1-q0|<Beta
+ USUB8 dq1q0, q_1, q_0
+ USUB8 a, q_0, q_1
+ SEL a, a, dq1q0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ BEQ bSLT4
+;//-------------------Filter--------------------
+bSGE4
+ ;//---------bSGE4 Execution---------------
+ CMP filt, #0
+
+ M_LDR pThresholds, ppThresholds
+
+ ;// Compute P0b
+ UHADD8 t1, p_0, q_1
+ BEQ NoFilterFilt0
+ MVN t2, p_1
+ UHSUB8 t1, t1, t2
+ USUB8 t2, filt, m01
+ EOR t1, t1, m01, LSL #7
+
+ ADD pThresholds,pThresholds, #4
+
+ ;// Compute Q0b
+ UHADD8 t2, q_0, p_1
+ MVN t3, q_1
+ UHSUB8 t2, t2, t3
+ M_STR pThresholds, ppThresholds
+ SEL P_0, t1, p_0
+ EOR t2, t2, m01, LSL #7
+ SEL Q_0, t2, q_0
+
+ B StoreResultAndExit
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterFilt0
+ ADD pQ0, pQ0, #2
+NoFilterBS0
+ M_LDR pThresholds, ppThresholds
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ ADD pQ0, pQ0, #4
+ ADD pThresholds, pThresholds, #4
+ ;// Load counter for LoopX
+ M_LDRD XY, pBS, pXYBS
+ M_STR pThresholds, ppThresholds
+ M_LDRD alpha, beta, pAlphaBeta1
+
+ ;// Align the pointer
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopY
+ B ExitLoopY
+
+bSLT4
+ ;//---------bSLT4 Execution---------------
+ M_LDR pThresholds, ppThresholds
+ CMP filt, #0
+
+
+ ;// Since beta <= 18 and alpha <= 255 we know
+ ;// -254 <= p0-q0 <= 254
+ ;// -17 <= q1-q0 <= 17
+ ;// -17 <= p1-p0 <= 17
+
+ ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
+ ;//
+ ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
+ ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
+ ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
+
+ USUB8 t1, p_1, p_0
+ USUB8 t2, q_1, q_0
+ BEQ NoFilterFilt0
+
+ LDRB tC0, [pThresholds], #1
+ SSUB8 t1, t1, t2
+ LDRB tC1, [pThresholds], #3
+ M_STR pThresholds, ppThresholds
+ UHSUB8 t4, p_0, q_0
+ ORR tC, tC1, tC0, LSL #16
+ USUB8 t5, p_0, q_0
+ AND t5, t5, m01
+ SHSUB8 t1, t1, t5
+ ORR tC, tC, LSL #8
+ SSUB8 t1, t1, t5
+ SHSUB8 t1, t1, t4
+ UQADD8 tC, tC, m01
+ SADD8 t1, t1, m01
+ USUB8 t5, filt, m01
+ SHSUB8 t1, t1, t4
+ SEL tC, tC, m00
+
+ ;// Split into positive and negative part and clip
+
+ SSUB8 t1, t1, m00
+ SEL pos, t1, m00
+ USUB8 neg, pos, t1
+ USUB8 t3, pos, tC
+ SEL pos, tC, pos
+ USUB8 t3, neg, tC
+ SEL neg, tC, neg
+ UQADD8 P_0, p_0, pos
+ UQSUB8 Q_0, q_0, pos
+ UQSUB8 P_0, P_0, neg
+ UQADD8 Q_0, Q_0, neg
+
+ ;// Choose to store the filtered
+ ;// value or the original pixel
+ USUB8 t1, filt, m01
+ SEL P_0, P_0, p_0
+ SEL Q_0, Q_0, q_0
+
+StoreResultAndExit
+
+ ;//---------Store result---------------
+
+ ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
+ ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ ADD pQ0, pQ0, #1
+
+ MOV t1, Q_0, LSR #24
+ STRB t1, [pQ0, #1]
+ MOV t1, P_0, LSR #24
+ M_STRB t1, [pQ0], srcdstStep
+
+ MOV t1, Q_0, LSR #16
+ STRB t1, [pQ0, #1]
+ MOV t1, P_0, LSR #16
+ M_STRB t1, [pQ0], srcdstStep
+
+ MOV t1, P_0, LSR #8
+ STRB t1, [pQ0]
+ STRB P_0, [pQ0, srcdstStep]
+ MOV t1, Q_0, LSR #8
+ STRB t1, [pQ0, #1]!
+ STRB Q_0, [pQ0, srcdstStep]
+
+ M_LDRD XY, pBS, pXYBS
+ M_LDRD alpha, beta, pAlphaBeta1
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ ADD pQ0, pQ0, #4
+
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopX
+
+;//-------- Common Exit of LoopY -----------------
+ ;// Align the pointers
+
+ExitLoopY
+
+ M_LDR pThresholds, ppThresholds
+ SUB pQ0, pQ0, #8
+ ADD pQ0, pQ0, srcdstStep, LSL #2
+ SUB pBS, pBS, #14
+ SUB pThresholds, pThresholds, #6
+ M_STR pThresholds, ppThresholds
+
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ BNE LoopY
+ MOV r0, #OMX_Sts_NoErr
+;//-----------------End Filter--------------------
+
+ M_END
+
+ ENDIF
+
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
new file mode 100644
index 0000000..1b84080
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
@@ -0,0 +1,331 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe
+ IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe
+
+
+
+ IF ARM1136JS
+
+
+MASK_0 EQU 0x00000000
+MASK_1 EQU 0x01010101
+MASK_2 EQU 0xff00ff00
+LOOP_COUNT EQU 0x11110000
+
+;// Declare input registers
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlphaArg RN 2
+pBetaArg RN 3
+
+pThresholds RN 14
+pBS RN 9
+pQ0 RN 0
+bS RN 2
+
+alpha RN 6
+alpha0 RN 6
+alpha1 RN 8
+
+beta RN 7
+beta0 RN 7
+beta1 RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0 RN 3
+p_1 RN 5
+p_2 RN 4
+p_3 RN 2
+q_0 RN 8
+q_1 RN 9
+q_2 RN 10
+q_3 RN 12
+
+;// Filtering
+
+dp0q0 RN 12
+dp1p0 RN 12
+dq1q0 RN 12
+dp2p0 RN 12
+dq2q0 RN 12
+
+ap0q0 RN 1
+filt RN 2
+
+m00 RN 14
+m01 RN 11
+
+apflg RN 0
+aqflg RN 6
+apqflg RN 0
+
+
+;//Declarations for bSLT4 kernel
+
+tC0 RN 7
+ptC0 RN 1
+
+pQ0a RN 0
+Stepa RN 1
+maska RN 14
+
+P0a RN 1
+P1a RN 8
+Q0a RN 7
+Q1a RN 11
+
+;//Declarations for bSGE4 kernel
+
+pQ0b RN 0
+Stepb RN 1
+maskb RN 14
+
+P0b RN 6
+P1b RN 7
+P2b RN 1
+P3b RN 3
+
+Q0b RN 9
+Q1b RN 0
+Q2b RN 2
+Q3b RN 3
+
+;// Miscellanous
+XY RN 8
+t0 RN 3
+t1 RN 12
+t2 RN 14
+t7 RN 7
+t4 RN 4
+t5 RN 1
+t8 RN 6
+a RN 0
+
+
+
+
+ ;// Allocate stack memory
+ M_ALLOC4 ppThresholds,4
+ M_ALLOC4 pQ_3,4
+ M_ALLOC4 pP_3,4
+ M_ALLOC8 pAlphaBeta0,8
+ M_ALLOC8 pAlphaBeta1,8
+ M_ALLOC8 pXYBS,4
+ M_ALLOC4 ppBS,4
+ M_ALLOC8 ppQ0Step,4
+ M_ALLOC4 pStep,4
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11
+
+ ;//Input arguments on the stack
+ M_ARG ppThresholdsArg, 4
+ M_ARG ppBSArg, 4
+
+ LDR t4,=MASK_1
+
+ LDRB alpha0, [pAlphaArg]
+ LDRB beta0, [pBetaArg]
+ LDRB alpha1, [pAlphaArg,#1]
+ LDRB beta1, [pBetaArg,#1]
+
+ MUL alpha0, alpha0, t4
+ MUL beta0, beta0, t4
+ MUL alpha1, alpha1, t4
+ MUL beta1, beta1, t4
+
+ M_STRD alpha0, beta0, pAlphaBeta0
+ M_STRD alpha1, beta1, pAlphaBeta1
+
+ LDR XY,=LOOP_COUNT
+ M_LDR pBS, ppBSArg
+ M_LDR pThresholds, ppThresholdsArg
+ M_STR srcdstStep, pStep
+ M_STRD XY, pBS, pXYBS
+ SUB pQ0, pQ0, srcdstStep, LSL #2
+ M_STR pThresholds, ppThresholds
+LoopY
+LoopX
+;//---------------Load Pixels-------------------
+ M_STR pQ0, ppQ0Step
+ M_LDR p_3, [pQ0], srcdstStep
+ M_LDR p_2, [pQ0], srcdstStep
+ M_STR p_3, pP_3
+ LDRB bS, [pBS], #1
+ M_STR pBS, ppBS
+ M_LDR p_1, [pQ0], srcdstStep
+ CMP bS, #0
+ M_LDR p_0, [pQ0], srcdstStep
+ M_LDR q_0, [pQ0], srcdstStep
+ M_LDR q_1, [pQ0], srcdstStep
+ M_LDR q_2, [pQ0], srcdstStep
+ M_LDR q_3, [pQ0], srcdstStep
+ BEQ NoFilterBS0
+ CMP bS, #4
+ M_STR q_3, pQ_3
+
+;//--------------Filtering Decision -------------------
+ LDR m01, =MASK_1 ;// 01010101 mask
+ MOV m00, #MASK_0 ;// 00000000 mask
+
+ ;// Check |p0-q0|<Alpha
+ USUB8 dp0q0, p_0, q_0
+ USUB8 a, q_0, p_0
+ SEL ap0q0, a, dp0q0
+ USUB8 a, ap0q0, alpha
+ SEL filt, m00, m01
+
+ ;// Check |p1-p0|<Beta
+ USUB8 dp1p0, p_1, p_0
+ USUB8 a, p_0, p_1
+ SEL a, a, dp1p0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check |q1-q0|<Beta
+ USUB8 dq1q0, q_1, q_0
+ USUB8 a, q_0, q_1
+ SEL a, a, dq1q0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check ap<Beta
+ USUB8 dp2p0, p_2, p_0
+ USUB8 a, p_0, p_2
+ SEL a, a, dp2p0
+ USUB8 a, a, beta
+ SEL apflg, m00, filt ;// apflg = filt && (ap<beta)
+
+ ;// Check aq<Beta
+ USUB8 dq2q0, q_2, q_0
+ USUB8 t2, q_0, q_2
+ SEL t2, t2, dq2q0
+ USUB8 t2, t2, beta
+ MOV t7,#0
+
+ BLT bSLT4
+;//-------------------Filter--------------------
+bSGE4
+ ;//---------bSGE4 Execution---------------
+ SEL t1, t7, filt ;// aqflg = filt && (aq<beta)
+ CMP filt, #0
+ ORR apqflg, apflg, t1, LSL #1
+ M_LDRD pQ0, srcdstStep, ppQ0Step, EQ
+ BEQ NoFilterFilt0
+
+ BL armVCM4P10_DeblockingLumabSGE4_unsafe
+
+ ;//---------Store result---------------
+ M_LDR pThresholds, ppThresholds
+ MOV p_2, Q1b
+ MOV p_1, P2b
+ M_LDRD pQ0b, Stepb, ppQ0Step
+ ADD pThresholds, #1
+ M_STR pThresholds, ppThresholds
+ M_STR p_1, [pQ0b, Stepb]!
+ M_STR P1b, [pQ0b, Stepb]!
+ M_STR P0b, [pQ0b, Stepb]!
+ M_STR Q0b, [pQ0b, Stepb]!
+ STR p_2, [pQ0b, Stepb]
+ STR Q2b, [pQ0b, Stepb, LSL #1]
+
+
+ M_LDRD XY, pBS, pXYBS
+ SUB pQ0, pQ0b, Stepb, LSL #2
+ ADD pQ0, pQ0, #4
+ M_LDRD alpha, beta, pAlphaBeta0
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopX
+ B ExitLoopY
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterBS0
+ SUB pQ0, pQ0, srcdstStep, LSL #3
+NoFilterFilt0
+ ADD pQ0, pQ0, #4
+ ;// Load counter for LoopX
+ M_LDRD XY, pBS, pXYBS
+ M_LDR pThresholds, ppThresholds
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ ;// Align the pointers
+ ADDS XY, XY, XY
+ ADD pThresholds, pThresholds, #1
+ M_STR pThresholds, ppThresholds
+ M_STR XY, pXYBS
+ BCC LoopX
+ B ExitLoopY
+
+bSLT4
+ ;//---------bSLT4 Execution---------------
+ SEL aqflg, t7, filt ;// aqflg = filt && (aq<beta)
+ M_LDR ptC0, ppThresholds
+ CMP filt, #0
+ M_LDRD pQ0, srcdstStep, ppQ0Step, EQ
+ BEQ NoFilterFilt0
+
+ LDRB tC0, [ptC0], #1
+ M_STR ptC0, ppThresholds
+
+ BL armVCM4P10_DeblockingLumabSLT4_unsafe
+
+ ;//---------Store result---------------
+ MOV p_2, P0a
+ M_LDRD pQ0a, Stepa, ppQ0Step
+ M_STR P1a, [pQ0a, Stepa, LSL #1]!
+ M_STR p_2, [pQ0a, Stepa]!
+ M_STR Q0a, [pQ0a, Stepa]!
+ STR Q1a, [pQ0a, Stepa]
+
+ ;// Load counter
+ M_LDRD XY, pBS, pXYBS
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ SUB pQ0, pQ0a, Stepa, LSL #2
+ ADD pQ0, pQ0, #4
+
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopX
+
+;//-------- Common Exit of LoopY -----------------
+ ;// Align the pointers
+ExitLoopY
+ M_LDRD alpha, beta, pAlphaBeta1
+ SUB pQ0, pQ0, #16
+ ADD pQ0, pQ0, srcdstStep, LSL #2
+ M_STRD alpha, beta, pAlphaBeta0
+
+ BNE LoopY
+ MOV r0, #OMX_Sts_NoErr
+;//-----------------End Filter--------------------
+ M_END
+
+ ENDIF
+
+
+ END
+
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
new file mode 100644
index 0000000..417ddc2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
@@ -0,0 +1,550 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe
+ IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe
+
+
+ IF ARM1136JS
+
+MASK_0 EQU 0x00000000
+MASK_1 EQU 0x01010101
+MASK_2 EQU 0xff00ff00
+LOOP_COUNT EQU 0x11110000
+
+;// Declare input registers
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlphaArg RN 2
+pBetaArg RN 3
+
+pThresholds RN 14
+pBS RN 9
+pQ0 RN 0
+bS RN 2
+
+alpha RN 6
+alpha0 RN 6
+alpha1 RN 8
+
+beta RN 7
+beta0 RN 7
+beta1 RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0 RN 3
+p_1 RN 5
+p_2 RN 4
+p_3 RN 2
+q_0 RN 8
+q_1 RN 9
+q_2 RN 10
+q_3 RN 12
+
+;// Unpacking
+mask RN 11
+
+row0 RN 2
+row1 RN 4
+row2 RN 5
+row3 RN 3
+
+row4 RN 8
+row5 RN 9
+row6 RN 10
+row7 RN 12
+row8 RN 14
+row9 RN 7
+
+tunpk0 RN 8
+tunpk1 RN 9
+tunpk2 RN 10
+tunpk3 RN 12
+tunpk4 RN 0
+
+tunpk5 RN 1
+tunpk6 RN 14
+tunpk7 RN 2
+tunpk8 RN 5
+tunpk9 RN 6
+
+
+;// Filtering
+
+dp0q0 RN 12
+dp1p0 RN 12
+dq1q0 RN 12
+dp2p0 RN 12
+dq2q0 RN 12
+
+ap0q0 RN 1
+filt RN 2
+
+m00 RN 14
+m01 RN 11
+
+apflg RN 0
+aqflg RN 6
+apqflg RN 0
+
+
+;//Declarations for bSLT4 kernel
+
+tC0 RN 7
+ptC0 RN 1
+
+pQ0a RN 0
+Stepa RN 1
+maska RN 14
+
+P0a RN 1
+P1a RN 8
+Q0a RN 7
+Q1a RN 11
+
+;//Declarations for bSGE4 kernel
+
+pQ0b RN 0
+Stepb RN 1
+maskb RN 14
+
+P0b RN 6
+P1b RN 7
+P2b RN 1
+P3b RN 3
+
+Q0b RN 9
+Q1b RN 0
+Q2b RN 2
+Q3b RN 3
+
+;// Miscellanous
+XY RN 8
+t0 RN 3
+t1 RN 12
+t2 RN 14
+t7 RN 7
+t4 RN 4
+t5 RN 1
+t8 RN 6
+a RN 0
+
+
+
+ ;// Allocate stack memory
+ M_ALLOC4 ppThresholds,4
+ M_ALLOC4 pQ_3,4
+ M_ALLOC4 pP_3,4
+ M_ALLOC8 pAlphaBeta0,8
+ M_ALLOC8 pAlphaBeta1,8
+ M_ALLOC8 pXYBS,4
+ M_ALLOC4 ppBS,4
+ M_ALLOC8 ppQ0Step,4
+ M_ALLOC4 pStep,4
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingLuma_VerEdge_I, r11
+
+ ;//Input arguments on the stack
+ M_ARG ppThresholdsArg, 4
+ M_ARG ppBSArg, 4
+
+ LDR t4,=MASK_1
+
+ LDRB alpha0, [pAlphaArg]
+ LDRB beta0, [pBetaArg]
+ LDRB alpha1, [pAlphaArg,#1]
+ LDRB beta1, [pBetaArg,#1]
+
+ MUL alpha0, alpha0, t4
+ MUL beta0, beta0, t4
+ MUL alpha1, alpha1, t4
+ MUL beta1, beta1, t4
+
+ M_STRD alpha0, beta0, pAlphaBeta0
+ M_STRD alpha1, beta1, pAlphaBeta1
+
+ LDR XY,=LOOP_COUNT
+ M_LDR pBS, ppBSArg
+ M_LDR pThresholds, ppThresholdsArg
+ M_STR srcdstStep, pStep
+ M_STRD XY, pBS, pXYBS
+ M_STR pThresholds, ppThresholds
+
+ SUB pQ0, pQ0, #4
+LoopY
+;//---------------Load Pixels-------------------
+
+;//----------------Pack p0-p3-----------------------
+ LDR mask, =MASK_2
+
+ M_LDR row0, [pQ0], srcdstStep
+ M_LDR row1, [pQ0], srcdstStep
+ LDR row2, [pQ0]
+ LDR row3, [pQ0, srcdstStep]
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+
+ ;// row0 = [r0p0 r0p1 r0p2 r0p3]
+ ;// row1 = [r1p0 r1p1 r1p2 r1p3]
+ ;// row2 = [r2p0 r2p1 r2p2 r2p3]
+ ;// row3 = [r3p0 r3p1 r3p2 r3p3]
+
+ AND tunpk0, mask, row0
+ AND tunpk6, mask, row0, LSL#8
+ UXTAB16 tunpk0, tunpk0, row1, ROR#8
+ UXTAB16 tunpk6, tunpk6, row1
+ AND tunpk2, mask, row2
+ AND tunpk3, mask, row2, LSL#8
+ UXTAB16 tunpk2, tunpk2, row3, ROR#8
+ UXTAB16 tunpk3, tunpk3, row3
+
+ ;// tunpk0 = [r0p0 r1p0 r0p2 r1p2]
+ ;// tunpk6 = [r0p1 r1p1 r0p3 r1p3]
+ ;// tunpk2 = [r2p0 r3p0 r2p2 r3p2]
+ ;// tunpk3 = [r2p1 r3p1 r2p3 r3p3]
+
+ PKHTB p_0, tunpk0, tunpk2, ASR#16
+ PKHTB p_1, tunpk6, tunpk3, ASR#16
+ PKHBT p_2, tunpk2, tunpk0, LSL#16
+ PKHBT p_3, tunpk3, tunpk6, LSL#16
+
+
+ ;// p_0 = [r0p0 r1p0 r2p0 r3p0]
+ ;// p_1 = [r0p1 r1p1 r2p1 r3p1]
+ ;// p_2 = [r0p2 r1p2 r2p1 r3p2]
+ ;// p_3 = [r0p3 r1p3 r2p3 r3p3]
+
+ M_STR p_3, pP_3
+
+;//----------------Pack q0-q3-----------------------
+LoopX
+ LDRB bS, [pBS], #4
+ M_STR pQ0, ppQ0Step
+ LDR mask, =MASK_2
+ CMP bS, #0
+ M_STR pBS, ppBS
+
+ LDR row4, [pQ0, #4]!
+ BEQ.W NoFilterBS0
+ M_LDR row5, [pQ0, srcdstStep]!
+ M_LDR row6, [pQ0, srcdstStep]!
+ M_LDR row7, [pQ0, srcdstStep]
+
+ ;// row4 = [r0q3 r0q2 r0q1 r0q0]
+ ;// row5 = [r1q3 r1q2 r1q1 r1q0]
+ ;// row6 = [r2q3 r2q2 r2q1 r2q0]
+ ;// row7 = [r3q3 r3q2 r3q1 r3q0]
+
+ AND tunpk4, mask, row4
+ CMP bS, #4
+ AND tunpk5, mask, row4, LSL#8
+ UXTAB16 tunpk4, tunpk4, row5, ROR#8
+ UXTAB16 tunpk5, tunpk5, row5
+ AND tunpk6, mask, row6
+ AND tunpk7, mask, row6, LSL#8
+ UXTAB16 tunpk6, tunpk6, row7, ROR#8
+ UXTAB16 tunpk7, tunpk7, row7
+
+ ;// tunpk4 = [r0q0 r1q0 r0q2 r1q2]
+ ;// tunpk5 = [r0q1 r1q1 r0q3 r1q3]
+ ;// tunpk6 = [r2q0 r3q0 r2q2 r3q2]
+ ;// tunpk7 = [r2q1 r3q1 r2q3 r3q3]
+
+ PKHTB q_3, tunpk4, tunpk6, ASR#16
+ PKHTB q_2, tunpk5, tunpk7, ASR#16
+ PKHBT q_1, tunpk6, tunpk4, LSL#16
+ M_STR q_3, pQ_3
+ PKHBT q_0, tunpk7, tunpk5, LSL#16
+
+
+ ;// q_0 = [r0q0 r1q0 r2q0 r3q0]
+ ;// q_1 = [r0q1 r1q1 r2q1 r3q1]
+ ;// q_2 = [r0q2 r1q2 r2q1 r3q2]
+ ;// q_3 = [r0q3 r1q3 r2q3 r3q3]
+
+
+;//--------------Filtering Decision -------------------
+ LDR m01, =MASK_1 ;// 01010101 mask
+ MOV m00, #MASK_0 ;// 00000000 mask
+
+ ;// Check |p0-q0|<Alpha
+ USUB8 dp0q0, p_0, q_0
+ USUB8 a, q_0, p_0
+ SEL ap0q0, a, dp0q0
+ USUB8 a, ap0q0, alpha
+ SEL filt, m00, m01
+
+ ;// Check |p1-p0|<Beta
+ USUB8 dp1p0, p_1, p_0
+ USUB8 a, p_0, p_1
+ SEL a, a, dp1p0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check |q1-q0|<Beta
+ USUB8 dq1q0, q_1, q_0
+ USUB8 a, q_0, q_1
+ SEL a, a, dq1q0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check ap<Beta
+ USUB8 dp2p0, p_2, p_0
+ USUB8 a, p_0, p_2
+ SEL a, a, dp2p0
+ USUB8 a, a, beta
+ SEL apflg, m00, filt ;// apflg = filt && (ap<beta)
+
+ ;// Check aq<Beta
+ USUB8 dq2q0, q_2, q_0
+ USUB8 t2, q_0, q_2
+ SEL t2, t2, dq2q0
+ USUB8 t2, t2, beta
+ MOV t7,#0
+
+
+ BLT bSLT4
+;//-------------------Filter--------------------
+bSGE4
+ ;//---------bSGE4 Execution---------------
+ SEL t1, t7, filt ;// aqflg = filt && (aq<beta)
+ CMP filt, #0
+ ORR apqflg, apflg, t1, LSL #1
+ M_LDRD pQ0, srcdstStep, ppQ0Step, EQ
+ BEQ NoFilterFilt0
+
+ BL armVCM4P10_DeblockingLumabSGE4_unsafe
+
+ ;//---------Store result---------------
+
+ LDR maskb,=MASK_2
+
+ ;// P0b = [r0p0 r1p0 r2p0 r3p0]
+ ;// P1b = [r0p1 r1p1 r2p1 r3p1]
+ ;// P2b = [r0p2 r1p2 r2p2 r3p2]
+ ;// P3b = [r0p3 r1p3 r2p3 r3p3]
+
+ M_LDR P3b, pP_3
+ M_STR Q0b, pP_3
+
+ ;//------Pack p0-p3------
+ AND tunpk0, maskb, P0b
+ AND tunpk2, maskb, P0b, LSL#8
+ UXTAB16 tunpk0, tunpk0, P1b, ROR#8
+ UXTAB16 tunpk2, tunpk2, P1b
+
+ AND tunpk3, maskb, P2b
+ AND tunpk8, maskb, P2b, LSL#8
+ UXTAB16 tunpk3, tunpk3, P3b, ROR#8
+ UXTAB16 tunpk8, tunpk8, P3b
+
+ ;// tunpk0 = [r0p0 r0p1 r2p0 r2p1]
+ ;// tunpk2 = [r1p0 r1p1 r3p0 r3p1]
+ ;// tunpk3 = [r0p2 r0p3 r2p2 r2p3]
+ ;// tunpk8 = [r1p2 r1p3 r3p2 r3p3]
+
+ MOV p_2, Q1b
+ M_LDRD pQ0b, Stepb, ppQ0Step
+
+ PKHTB row9, tunpk0, tunpk3, ASR#16
+ PKHBT row7, tunpk3, tunpk0, LSL#16
+ PKHTB row3, tunpk2, tunpk8, ASR#16
+ PKHBT row6, tunpk8, tunpk2, LSL#16
+
+ ;// row9 = [r0p0 r0p1 r0p2 r0p3]
+ ;// row3 = [r1p0 r1p1 r1p2 r1p3]
+ ;// row7 = [r2p0 r2p1 r2p2 r2p3]
+ ;// row6 = [r3p0 r3p1 r3p2 r3p3]
+
+ M_STR row9, [pQ0b], Stepb
+ STR row7, [pQ0b, Stepb]
+ STR row6, [pQ0b, Stepb, LSL #1]
+ STR row3, [pQ0b], #4
+
+ M_LDR Q3b, pQ_3
+
+ ;// Q0b = [r0q0 r1q0 r2q0 r3q0]
+ ;// Q1b = [r0q1 r1q1 r2q1 r3q1]
+ ;// Q2b = [r0q2 r1q2 r2q2 r3q2]
+ ;// Q3b = [r0q3 r1q3 r2q3 r3q3]
+
+ ;//------Pack q0-q3------
+ AND tunpk0, maskb, p_2
+ AND tunpk2, maskb, p_2, LSL#8
+ UXTAB16 tunpk0, tunpk0, Q0b, ROR#8
+ UXTAB16 tunpk2, tunpk2, Q0b
+
+ AND tunpk3, maskb, Q3b
+ AND tunpk8, maskb, Q3b, LSL#8
+ UXTAB16 tunpk3, tunpk3, Q2b, ROR#8
+ UXTAB16 tunpk8, tunpk8, Q2b
+
+ ;// tunpk0 = [r0q1 r0q0 r2q1 r2q0]
+ ;// tunpk2 = [r1q1 r1q0 r3q1 r3q0]
+ ;// tunpk3 = [r0q3 r0q2 r2q3 r2q2]
+ ;// tunpk8 = [r1q3 r1q2 r3q3 r3q2]
+
+ PKHTB row8, tunpk3, tunpk0, ASR#16
+ PKHBT row7, tunpk0, tunpk3, LSL#16
+ PKHTB row4, tunpk8, tunpk2, ASR#16
+ PKHBT row6, tunpk2, tunpk8, LSL#16
+
+ ;// row8 = [r0q0 r0q1 r0q2 r0q3]
+ ;// row4 = [r1q0 r1q1 r1q2 r1q3]
+ ;// row7 = [r2q0 r2q1 r2q2 r2q3]
+ ;// row6 = [r3q0 r3q1 r3q2 r3q3]
+
+ STR row4, [pQ0b]
+ STR row7, [pQ0b, Stepb]
+ STR row6, [pQ0b, Stepb, LSL #1]
+
+ SUB pQ0, pQ0b, Stepb
+ MOV p_1, Q2b
+
+ STR row8, [pQ0]
+
+ M_LDRD XY, pBS, pXYBS
+ M_LDR pThresholds, ppThresholds
+ M_LDRD alpha, beta, pAlphaBeta1
+
+ ADDS XY, XY, XY
+ ADD pThresholds, #4
+ M_STR pThresholds, ppThresholds
+ M_STR XY, pXYBS
+ BCC LoopX
+ B ExitLoopY
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterFilt0
+ ADD pQ0, pQ0, #4
+NoFilterBS0
+ ;// Load counter for LoopX
+ M_LDRD XY, pBS, pXYBS
+ M_LDR pThresholds, ppThresholds
+ M_LDRD alpha, beta, pAlphaBeta1
+
+ ;// Align the pointer
+ ADDS XY, XY, XY
+ ADD pThresholds, pThresholds, #4
+ M_STR pThresholds, ppThresholds
+ M_STR XY, pXYBS
+ BCC LoopY
+ B ExitLoopY
+
+bSLT4
+ ;//---------bSLT4 Execution---------------
+ SEL aqflg, t7, filt ;// aqflg = filt && (aq<beta)
+ M_LDR ptC0, ppThresholds
+ CMP filt, #0
+ M_LDRD pQ0, srcdstStep, ppQ0Step, EQ
+ BEQ NoFilterFilt0
+
+ LDRB tC0, [ptC0], #4
+ M_STR ptC0, ppThresholds
+
+ BL armVCM4P10_DeblockingLumabSLT4_unsafe
+
+ ;//---------Store result---------------
+ ;//--------Pack p1,p0,q1,q0------------
+
+ ;//Load destination pointer
+ LDR maska,=MASK_2
+ M_STR Q0a, pP_3
+ MOV p_1, q_2
+
+ ;// P1a = [r0p1 r1p1 r2p1 r3p1]
+ ;// P0a = [r0p0 r1p0 r2p0 r3p0]
+ ;// Q0a = [r0q0 r1q0 r2q0 r3q0]
+ ;// Q1a = [r0q1 r1q1 r2q1 r3q1]
+
+ AND tunpk1, maska, P0a
+ AND tunpk2, maska, P0a, LSL#8
+ UXTAB16 tunpk1, tunpk1, P1a, ROR#8
+ UXTAB16 tunpk2, tunpk2, P1a
+
+ M_LDRD pQ0a, Stepa, ppQ0Step
+
+ AND tunpk9, maska, Q1a
+ AND tunpk3, maska, Q1a, LSL#8
+ UXTAB16 tunpk9, tunpk9, Q0a, ROR#8
+ UXTAB16 tunpk3, tunpk3, Q0a
+
+ ;// tunpk1 = [r0p0 r0p1 r2p0 r2p1]
+ ;// tunpk2 = [r1p0 r1p1 r3p0 r3p1]
+ ;// tunpk9 = [r0q1 r0q0 r2q1 r2q0]
+ ;// tunpk3 = [r1q1 r1q0 r3q1 r3q0]
+
+ MOV t4, tunpk1, LSR #16
+ MOV t0, tunpk9, LSR #16
+
+ STRH t4,[pQ0a, #2]! ;//Stores [r0p0 r0p1]
+ STRH t0,[pQ0a, #2] ;//Stores [r0q0 r0q1]
+
+ MOV t4, tunpk2, LSR #16
+ MOV t0, tunpk3, LSR #16
+
+ M_STRH t4,[pQ0a, Stepa]! ;//Stores [r1p0 r1p1]
+ STRH t0,[pQ0a, #2] ;//Stores [r1q0 r1q1]
+
+ M_STRH tunpk1,[pQ0a, Stepa]! ;//Stores [r2p0 r2p1]
+ STRH tunpk2,[pQ0a, Stepa] ;//Stores [r3p0 r3p1]
+ STRH tunpk9,[pQ0a, #2]! ;//Stores [r2q0 r2q1]
+ STRH tunpk3,[pQ0a, Stepa] ;//Stores [r3q0 r3q1]
+
+ SUB pQ0, pQ0a, Stepa, LSL #1
+
+ ;// Load counter
+ M_LDRD XY, pBS, pXYBS
+
+ ;// Reload Pixels
+ M_LDR p_0, pQ_3
+ MOV p_2, Q1a
+
+ M_LDRD alpha, beta, pAlphaBeta1
+
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopX
+
+;//-------- Common Exit of LoopY -----------------
+ ;// Align the pointers
+ M_LDR pThresholds, ppThresholds
+ExitLoopY
+ SUB pQ0, pQ0, #16
+ ADD pQ0, pQ0, srcdstStep, LSL #2
+ SUB pBS, pBS, #15
+ SUB pThresholds, pThresholds, #15
+ M_STR pThresholds, ppThresholds
+
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ BNE LoopY
+ MOV r0, #OMX_Sts_NoErr
+
+ M_END
+;//-----------------End Filter--------------------
+
+ ENDIF
+
+ END
+
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
new file mode 100644
index 0000000..de835bd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
@@ -0,0 +1,79 @@
+/**
+ *
+ * File Name: omxVCM4P10_InterpolateChroma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate 1/8 Pixel interpolation for Chroma Block
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P10_InterpolateChroma,
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc Pointer to the source reference frame buffer
+ * [in] srcStep Reference frame step in byte
+ * [in] dstStep Destination frame step in byte. Must be multiple of roi.width.
+ * [in] dx Fractional part of horizontal motion vector component
+ * in 1/8 pixel unit;valid in the range [0,7]
+ * [in] dy Fractional part of vertical motion vector component
+ * in 1/8 pixel unit;valid in the range [0,7]
+ * [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must
+ * be equal to either 2, 4, or 8.
+ * [out] pDst Pointer to the destination frame buffer.
+ * if roi.width==2, 2-byte alignment required
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < 8.
+ * dx or dy is out of range [0-7].
+ * roi.width or roi.height is out of range {2,4,8}.
+ * roi.width is equal to 2, but pDst is not 2-byte aligned.
+ * roi.width is equal to 4, but pDst is not 4-byte aligned.
+ * roi.width is equal to 8, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateChroma (
+ const OMX_U8* pSrc,
+ OMX_S32 srcStep,
+ OMX_U8* pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+ )
+{
+ return armVCM4P10_Interpolate_Chroma
+ ((OMX_U8*)pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy);
+}
+
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
new file mode 100644
index 0000000..cf611a3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
@@ -0,0 +1,426 @@
+;//
+;//
+;// File Name: omxVCM4P10_InterpolateLuma_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P10_InterpolateLuma
+;//
+;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
+;// Performs quarter pel interpolation of inter luma MB.
+;// It's assumed that the frame is already padded when calling this function.
+;// Parameters:
+;// [in] pSrc Pointer to the source reference frame buffer
+;// [in] srcStep Reference frame step in byte
+;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width
+;// [in] dx Fractional part of horizontal motion vector
+;// component in 1/4 pixel unit; valid in the range [0,3]
+;// [in] dy Fractional part of vertical motion vector
+;// component in 1/4 pixel unit; valid in the range [0,3]
+;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must
+;// be equal to either 4, 8, or 16.
+;// [out] pDst Pointer to the destination frame buffer.
+;// if roi.width==4, 4-byte alignment required
+;// if roi.width==8, 8-byte alignment required
+;// if roi.width==16, 16-byte alignment required
+;//
+;// Return Value:
+;// If the function runs without error, it returns OMX_Sts_NoErr.
+;// It is assued that following cases are satisfied before calling this function:
+;// pSrc or pDst is not NULL.
+;// srcStep or dstStep >= roi.width.
+;// dx or dy is in the range [0-3].
+;// roi.width or roi.height is not out of range {4, 8, 16}.
+;// If roi.width is equal to 4, Dst is 4 byte aligned.
+;// If roi.width is equal to 8, pDst is 8 byte aligned.
+;// If roi.width is equal to 16, pDst is 16 byte aligned.
+;// srcStep and dstStep is multiple of 8.
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT omxVCM4P10_InterpolateLuma
+
+ IF ARM1136JS
+ IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ IMPORT armVCM4P10_Average_4x4_Align0_unsafe
+ IMPORT armVCM4P10_Average_4x4_Align2_unsafe
+ IMPORT armVCM4P10_Average_4x4_Align3_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+ ENDIF
+
+ IF ARM1136JS
+ IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ ENDIF
+
+
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+iHeight RN 4
+iWidth RN 5
+
+;// Declare other intermediate registers
+idx RN 6
+idy RN 7
+index RN 6
+Temp RN 12
+pArgs RN 11
+
+
+ ;// End of CortexA8
+
+;//-------------------------------------------------------------------------------------------------------------------------
+;//-------------------------------------------------------------------------------------------------------------------------
+ IF ARM1136JS
+
+
+ M_ALLOC4 ppDst, 8
+ M_ALLOC4 ppSrc, 8
+ M_ALLOC4 ppArgs, 16
+ M_ALLOC4 pBuffer, 120 ;// 120 = 12x10
+ M_ALLOC8 pInterBuf, 120 ;// 120 = 12*5*2
+ M_ALLOC8 pTempBuf, 32 ;// 32 = 8*4
+
+ ;// Function header
+ ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
+ ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed.
+ ;// Registers r4, r5, r6 to be preserved by internal unsafe functions
+ ;// r4 - iHeight
+ ;// r5 - iWidth
+ ;// r6 - index
+ M_START omxVCM4P10_InterpolateLuma, r11
+
+;// Declare other intermediate registers
+idx RN 6
+idy RN 7
+index RN 6
+Temp RN 12
+pArgs RN 11
+
+pBuf RN 8
+Height RN 9
+bufStep RN 9
+
+ ;// Define stack arguments
+ M_ARG ptridx, 4
+ M_ARG ptridy, 4
+ M_ARG ptrWidth, 4
+ M_ARG ptrHeight, 4
+
+ ;// Load structure elements of roi
+ M_LDR idx, ptridx
+ M_LDR idy, ptridy
+ M_LDR iWidth, ptrWidth
+ M_LDR iHeight, ptrHeight
+
+ M_PRINTF "roi.width %d\n", iWidth
+ M_PRINTF "roi.height %d\n", iHeight
+
+ ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx]
+ M_ADR pArgs, ppArgs
+
+InterpolateLuma
+Block4x4WidthLoop
+Block4x4HeightLoop
+
+ STM pArgs, {pSrc,srcStep,pDst,dstStep}
+ M_ADR pBuf, pBuffer
+
+ ;// switch table using motion vector as index
+ M_SWITCH index, L
+ M_CASE Case_0
+ M_CASE Case_1
+ M_CASE Case_2
+ M_CASE Case_3
+ M_CASE Case_4
+ M_CASE Case_5
+ M_CASE Case_6
+ M_CASE Case_7
+ M_CASE Case_8
+ M_CASE Case_9
+ M_CASE Case_a
+ M_CASE Case_b
+ M_CASE Case_c
+ M_CASE Case_d
+ M_CASE Case_e
+ M_CASE Case_f
+ M_ENDSWITCH
+
+Case_0
+ ;// Case G
+ M_PRINTF "Case 0 \n"
+
+ BL armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+ B Block4x4LoopEnd
+
+Case_1
+ ;// Case a
+ M_PRINTF "Case 1 \n"
+
+ SUB pSrc, pSrc, #2
+ MOV Height, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ BL armVCM4P10_Average_4x4_Align2_unsafe
+ B Block4x4LoopEnd
+Case_2
+ ;// Case b
+ M_PRINTF "Case 2 \n"
+
+ SUB pSrc, pSrc, #2
+ MOV Height, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ B Block4x4LoopEnd
+Case_3
+ ;// Case c
+ M_PRINTF "Case 3 \n"
+
+ SUB pSrc, pSrc, #2
+ MOV Height, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ BL armVCM4P10_Average_4x4_Align3_unsafe
+ B Block4x4LoopEnd
+Case_4
+ ;// Case d
+ M_PRINTF "Case 4 \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+
+ B Block4x4LoopEnd
+Case_5
+ ;// Case e
+ M_PRINTF "Case 5 \n"
+
+ SUB pSrc, pSrc, #2
+ MOV Height, #4
+ M_ADR pDst, pTempBuf
+ MOV dstStep, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc, srcStep, pDst, dstStep}
+ SUB pSrc, pSrc, srcStep, LSL #1
+ M_ADR pBuf, pBuffer
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ M_ADR pSrc, pTempBuf
+ MOV srcStep, #4
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+
+
+ B Block4x4LoopEnd
+Case_6
+ ;// Case f
+ M_PRINTF "Case 6 \n"
+
+ SUB pSrc, pSrc, #2
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ M_ADR pBuf, pInterBuf
+ BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ M_ADR idy, pTempBuf
+ BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+ B Block4x4LoopEnd
+Case_7
+ ;// Case g
+ M_PRINTF "Case 7 \n"
+
+ SUB pSrc, pSrc, #2
+ MOV Height, #4
+ M_ADR pDst, pTempBuf
+ MOV dstStep, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc, srcStep, pDst, dstStep}
+ SUB pSrc, pSrc, srcStep, LSL #1
+ ADD pSrc, pSrc, #1
+ M_ADR pBuf, pBuffer
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ M_ADR pSrc, pTempBuf
+ MOV srcStep, #4
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+
+ B Block4x4LoopEnd
+Case_8
+ ;// Case h
+ M_PRINTF "Case 8 \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ B Block4x4LoopEnd
+Case_9
+ ;// Case i
+ M_PRINTF "Case 9 \n"
+
+ SUB pSrc, pSrc, #2
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ ADD pSrc, pSrc, srcStep, LSL #1
+ M_ADR pBuf, pInterBuf
+ BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ M_ADR idy, pTempBuf
+ BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ BL armVCM4P10_Average_4x4_Align2_unsafe
+ B Block4x4LoopEnd
+Case_a
+ ;// Case j
+ M_PRINTF "Case a \n"
+
+ SUB pSrc, pSrc, #2
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ ADD pSrc, pSrc, srcStep, LSL #1
+ M_ADR pBuf, pInterBuf
+ BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ B Block4x4LoopEnd
+Case_b
+ ;// Case k
+ M_PRINTF "Case b \n"
+ SUB pSrc, pSrc, #2
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ ADD pSrc, pSrc, srcStep, LSL #1
+ M_ADR pBuf, pInterBuf
+ BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ M_ADR idy, pTempBuf
+ BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ BL armVCM4P10_Average_4x4_Align3_unsafe
+ B Block4x4LoopEnd
+Case_c
+ ;// Case n
+ M_PRINTF "Case c \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ ADD pSrc, pSrc, srcStep ;// Update pSrc to one row down
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+ B Block4x4LoopEnd
+Case_d
+ ;// Case p
+ M_PRINTF "Case d \n"
+ SUB pSrc, pSrc, #2
+ ADD pSrc, pSrc, srcStep
+ MOV Height, #4
+ M_ADR pDst, pTempBuf
+ MOV dstStep, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc, srcStep, pDst, dstStep}
+ SUB pSrc, pSrc, srcStep, LSL #1
+ M_ADR pBuf, pBuffer
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ M_ADR pSrc, pTempBuf
+ MOV srcStep, #4
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+ B Block4x4LoopEnd
+Case_e
+ ;// Case q
+ M_PRINTF "Case e \n"
+
+ SUB pSrc, pSrc, #2
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ M_ADR pBuf, pInterBuf
+ BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ M_ADR idy, pTempBuf
+ BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+ ADD pSrc, pSrc, #4
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+
+ B Block4x4LoopEnd
+Case_f
+ ;// Case r
+ M_PRINTF "Case f \n"
+ SUB pSrc, pSrc, #2
+ ADD pSrc, pSrc, srcStep
+ MOV Height, #4
+ M_ADR pDst, pTempBuf
+ MOV dstStep, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc, srcStep, pDst, dstStep}
+ SUB pSrc, pSrc, srcStep, LSL #1
+ ADD pSrc, pSrc, #1
+ M_ADR pBuf, pBuffer
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ M_ADR pSrc, pTempBuf
+ MOV srcStep, #4
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+
+Block4x4LoopEnd
+
+ ;// Width Loop
+ SUBS iWidth, iWidth, #4
+ M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments
+ ADD pSrc, pSrc, #4
+ ADD pDst, pDst, #4
+ BGT Block4x4WidthLoop
+
+ ;// Height Loop
+ SUBS iHeight, iHeight, #4
+ M_LDR iWidth, ptrWidth
+ M_ADR pArgs, ppArgs
+ ADD pSrc, pSrc, srcStep, LSL #2
+ ADD pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, iWidth
+ SUB pDst, pDst, iWidth
+ BGT Block4x4HeightLoop
+
+EndOfInterpolation
+ MOV r0, #0
+ M_END
+
+ ENDIF
+
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
new file mode 100644
index 0000000..34fedd8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
@@ -0,0 +1,494 @@
+;//
+;//
+;// File Name: omxVCM4P10_PredictIntraChroma_8x8_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_pIndexTable8x8
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS ARM1136JS
+
+ AREA table, DATA
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_pIndexTable8x8
+ DCD OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR
+ DCD OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE
+
+ M_TABLE armVCM4P10_MultiplierTableChroma8x8,1
+ DCW 3, 2, 1,4
+ DCW -3,-2,-1,0
+ DCW 1, 2, 3,4
+
+ IF ARM1136JS
+
+;//--------------------------------------------
+;// Constants
+;//--------------------------------------------
+
+BLK_SIZE EQU 0x8
+MUL_CONST0 EQU 0x01010101
+MASK_CONST EQU 0x00FF00FF
+MUL_CONST1 EQU 0x80808080
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+y RN 12
+pc RN 15
+return RN 0
+pSrcLeft2 RN 1
+pDst2 RN 2
+sum1 RN 6
+sum2 RN 7
+pTable RN 9
+dstStepx2 RN 11
+leftStepx2 RN 14
+outerCount RN 14
+r0x01010101 RN 10
+r0x00FF00FF RN 11
+
+tVal0 RN 0
+tVal1 RN 1
+tVal2 RN 2
+tVal3 RN 3
+tVal4 RN 4
+tVal5 RN 5
+tVal6 RN 6
+tVal7 RN 7
+tVal8 RN 8
+tVal9 RN 9
+tVal10 RN 10
+tVal11 RN 11
+tVal12 RN 12
+tVal14 RN 14
+
+b RN 14
+c RN 12
+
+p2p0 RN 0
+p3p1 RN 1
+p6p4 RN 2
+p7p5 RN 4
+
+pp2pp0 RN 6
+pp3pp1 RN 7
+pp6pp4 RN 8
+pp7pp5 RN 9
+
+p3210 RN 10
+p7654 RN 10
+
+;//--------------------------------------------
+;// Input Arguments
+;//--------------------------------------------
+pSrcLeft RN 0 ;// input pointer
+pSrcAbove RN 1 ;// input pointer
+pSrcAboveLeft RN 2 ;// input pointer
+pDst RN 3 ;// output pointer
+leftStep RN 4 ;// input variable
+dstStep RN 5 ;// input variable
+predMode RN 6 ;// input variable
+availability RN 7 ;// input variable
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntraChroma_8x8 starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START omxVCM4P10_PredictIntraChroma_8x8, r11
+
+ ;// Define stack arguments
+ M_ARG LeftStep, 4
+ M_ARG DstStep, 4
+ M_ARG PredMode, 4
+ M_ARG Availability, 4
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR pTable,=armVCM4P10_pIndexTable8x8 ;// Load index table for switch case
+
+
+ ;// Load argument from the stack
+ M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg
+ M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg
+ M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg
+ M_LDR availability, Availability ;// Arg availability loaded from stack to reg
+
+ MOV y, #BLK_SIZE ;// Outer Loop Count
+ LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode
+
+OMX_VC_CHROMA_DC
+ AND availability, availability,#(OMX_VC_UPPER + OMX_VC_LEFT)
+ CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT) ;// if(availability & (#OMX_VC_UPPER | #OMX_VC_LEFT))
+ LDR r0x01010101, =MUL_CONST0
+ BNE TST_UPPER ;// Jump to Upper if not both
+ LDM pSrcAbove,{tVal8,tVal9} ;// tVal 8 to 9 = pSrcAbove[0 to 7]
+
+ ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep
+ ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep
+
+ ;// M_STALL ARM1136JS=1
+
+ UXTB16 tVal7, tVal8 ;// pSrcAbove[0, 2]
+ UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3]
+ UADD16 sum1, tVal7, tVal8 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+
+ UXTB16 tVal7, tVal9 ;// pSrcAbove[4, 6]
+ UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7]
+ UADD16 sum2, tVal7, tVal9 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
+ ADD sum1, sum1, sum1, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3])
+ ADD sum2, sum2, sum2, LSR #16 ;// sum(pSrcAbove[4] to pSrcAbove[7])
+ UXTH sum1, sum1 ;// upsum1 (Clear the top junk bits)
+ UXTH sum2, sum2 ;// upsum2 (Clear the top junk bits)
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[2]
+ M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[3]
+ ADD tVal2, tVal8, tVal9 ;// tVal14 = tVal8 + tVal9
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[4]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[5]
+ ADD tVal14, tVal4, tVal12 ;// tVal14 = tVal4 + tVal12
+
+ LDRB tVal4, [pSrcLeft] ;// tVal4 = pSrcLeft[6]
+ LDRB tVal12,[pSrcLeft2] ;// tVal12= pSrcLeft[7]
+ ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9
+ ADD tVal2, tVal2, tVal14 ;// leftsum1 = sum(pSrcLeft[0] to pSrcLeft[3])
+ ADD tVal4, tVal4, tVal12 ;// tVal4 = tVal4 + tVal12
+ ADD tVal14, tVal8, tVal4 ;// leftsum2 = sum(pSrcLeft[4] to pSrcLeft[7])
+ ADD tVal8, tVal14, #2 ;// tVal8 = leftsum2 + 2
+ ADD tVal9, sum2, #2 ;// tVal8 = upsum2 + 2
+ ADD sum1, sum1, tVal2 ;// sum1 = upsum1 + leftsum1
+ ADD sum2, sum2, tVal14 ;// sum2 = upsum2 + leftsum2
+ ADD sum1, sum1, #4 ;// (sum1 + 4)
+ ADD sum2, sum2, #4 ;// (sum2 + 4)
+ MOV sum1, sum1, LSR #3 ;// (sum1 + 4)>>3
+ MOV tVal9, tVal9, LSR #2 ;// (tVal9 + 2)>>2
+ MOV tVal8, tVal8, LSR #2 ;// (tVal8 + 2)>>2
+ MOV sum2, sum2, LSR #3 ;// (sum2 + 4)>>3
+
+ MUL tVal0, sum1, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal1, tVal9,r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal8, tVal8,r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal9, sum2, r0x01010101 ;// replicate the val in all the bytes
+
+ M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[0 to 7] = tVal 0 to 1
+ M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[8 to 15] = tVal 0 to 1
+ M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[16 to 23] = tVal 0 to 1
+ M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[24 to 31] = tVal 0 to 1
+
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[32 to 39] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[40 to 47] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[48 to 55] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[56 to 63] = tVal 8 to 9
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+TST_UPPER
+
+ ;// M_STALL ARM1136JS=3
+
+ CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER)
+
+ BNE TST_LEFT ;// Jump to Left if not upper
+ LDM pSrcAbove,{tVal8,tVal9} ;// tVal 8 to 9 = pSrcAbove[0 to 7]
+
+ ;// M_STALL ARM1136JS=3
+
+ UXTB16 tVal7, tVal8 ;// pSrcAbove[0, 2]
+ UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3]
+ UADD16 sum1, tVal7, tVal8 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+
+ UXTB16 tVal7, tVal9 ;// pSrcAbove[4, 6]
+ UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7]
+ UADD16 sum2, tVal7, tVal9 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
+
+ ADD sum1, sum1, sum1, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3])
+ ADD sum2, sum2, sum2, LSR #16 ;// sum(pSrcAbove[4] to pSrcAbove[7])
+
+ UXTH sum1, sum1 ;// upsum1 (Clear the top junk bits)
+ UXTH sum2, sum2 ;// upsum2 (Clear the top junk bits)
+
+ ADD sum1, sum1, #2 ;// sum1 + 2
+ ADD sum2, sum2, #2 ;// sum2 + 2
+
+ MOV sum1, sum1, LSR #2 ;// (sum1 + 2)>>2
+ MOV sum2, sum2, LSR #2 ;// (sum2 + 2)>>2
+
+ MUL sum1, sum1,r0x01010101 ;// replicate the val in all the bytes
+ MUL sum2, sum2,r0x01010101 ;// replicate the val in all the bytes
+
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+TST_LEFT
+ ;// M_STALL ARM1136JS=3
+
+ CMP availability, #OMX_VC_LEFT
+ BNE TST_COUNT0
+ ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep
+ ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[2]
+ M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[3]
+
+ ADD tVal6, tVal8, tVal9 ;// tVal6 = tVal8 + tVal9
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[4]
+ ADD tVal7, tVal4, tVal12 ;// tVal7 = tVal4 + tVal12
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[5]
+ M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[6]
+ M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[7]
+
+ ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9
+ ADD sum1, tVal6, tVal7 ;// sum1 = sum(pSrcLeft[0] to pSrcLeft[3])
+ ADD tVal4, tVal4, tVal12 ;// tVal4 = tVal4 + tVal12
+ ADD sum2, tVal8, tVal4 ;// sum2 = sum(pSrcLeft[4] to pSrcLeft[7])
+
+ ADD sum1, sum1, #2 ;// sum1 + 2
+ ADD sum2, sum2, #2 ;// sum2 + 2
+
+ MOV sum1, sum1, LSR #2 ;// (sum1 + 2)>>2
+ MOV sum2, sum2, LSR #2 ;// (sum2 + 2)>>2
+
+ MUL tVal6, sum1,r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal8, sum2,r0x01010101 ;// replicate the val in all the bytes
+
+ ;// M_STALL ARM1136JS=1
+ MOV tVal7,tVal6 ;// tVal7 = sum1
+ MOV tVal9,tVal8 ;// tVal9 = sum2
+
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7
+
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[32 to 39] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[40 to 47] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[48 to 55] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[56 to 63] = tVal 8 to 9
+
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+TST_COUNT0
+ LDR sum1, =MUL_CONST1 ;// sum1 = 0x80808080 if(count == 0)
+
+ ;// M_STALL ARM1136JS=2
+
+ MOV tVal7, sum1 ;// tVal7 = sum1
+
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7
+
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_CHROMA_HOR
+
+ ;// M_STALL ARM1136JS=2
+
+ ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep
+ ADD leftStepx2, leftStep, leftStep ;// leftStepx2 = leftStep * 2
+ ADD pDst2, pDst, dstStep ;// pDst2 = pDst + dstStep
+ ADD dstStepx2, dstStep, dstStep ;// double dstStep
+ SUB dstStepx2, dstStepx2, #4 ;// double dstStep minus 4
+ LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times
+ M_LDRB tVal6, [pSrcLeft], +leftStepx2 ;// tVal6 = pSrcLeft[0]
+ M_LDRB tVal7, [pSrcLeft2],+leftStepx2 ;// tVal7 = pSrcLeft[1]
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[2]
+ M_LDRB tVal9, [pSrcLeft2],+leftStepx2 ;// tVal9 = pSrcLeft[3]
+ MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes
+ STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3]
+ STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7]
+ M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7]
+ STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3]
+ STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7]
+ M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7]
+ M_LDRB tVal6, [pSrcLeft], +leftStepx2 ;// tVal6 = pSrcLeft[4]
+ M_LDRB tVal7, [pSrcLeft2],+leftStepx2 ;// tVal7 = pSrcLeft[5]
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[6]
+ M_LDRB tVal9, [pSrcLeft2],+leftStepx2 ;// tVal9 = pSrcLeft[7]
+ MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes
+ STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3]
+ STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7]
+ M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7]
+ STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3]
+ STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7]
+ M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+OMX_VC_CHROMA_VERT
+
+ ;// M_STALL ARM1136JS=4
+
+ LDMIA pSrcAbove, {tVal6,tVal7} ;// tVal 6 to 7 = pSrcAbove[0 to 7]
+ MOV return, #OMX_Sts_NoErr
+
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7
+
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_CHROMA_PLANE
+
+ ;// M_STALL ARM1136JS=3
+
+ RSB tVal14, leftStep, leftStep, LSL #3 ;// 7*leftStep
+ LDRB tVal7, [pSrcAbove, #+7] ;// pSrcAbove[7]
+ LDRB tVal6, [pSrcLeft, +tVal14] ;// pSrcLeft[7*leftStep]
+ LDRB tVal8, [pSrcAboveLeft] ;// pSrcAboveLeft[0]
+ LDRB tVal9, [pSrcAbove, #+6 ] ;// pSrcAbove[6]
+ LDRB tVal10,[pSrcAbove] ;// pSrcAbove[0]
+ ADD tVal2, tVal7, tVal6 ;// pSrcAbove[7] + pSrcLeft[7*leftStep]
+ SUB tVal6, tVal6, tVal8 ;// V0 = pSrcLeft[7*leftStep] - pSrcAboveLeft[0]
+ SUB tVal7, tVal7, tVal8 ;// H0 = pSrcAbove[7] - pSrcAboveLeft[0]
+ LSL tVal2, tVal2, #4 ;// a = 16 * (pSrcAbove[15] + pSrcLeft[15*lS])
+ ADD tVal2, tVal2, #16 ;// a + 16
+ SUB tVal9, tVal9,tVal10 ;// pSrcAbove[6] - pSrcAbove[0]
+ LDRB tVal8, [pSrcAbove,#+5] ;// pSrcAbove[5]
+ LDRB tVal10,[pSrcAbove,#+1] ;// pSrcAbove[1]
+ ADD tVal9, tVal9, tVal9, LSL #1 ;// H1 = 3 * (pSrcAbove[6] - pSrcAbove[0])
+ ADD tVal7, tVal9, tVal7, LSL #2 ;// H = H1 + H0
+ SUB tVal8, tVal8, tVal10 ;// pSrcAbove[5] - pSrcAbove[1]
+ LDRB tVal9, [pSrcAbove,#+4] ;// pSrcAbove[4]
+ LDRB tVal10,[pSrcAbove,#+2] ;// pSrcAbove[2]
+ ADD tVal7, tVal7, tVal8, LSL #1 ;// H = H + H2
+ SUB tVal11, tVal14,leftStep ;// 6*leftStep
+ ADD tVal11, pSrcLeft, tVal11 ;// pSrcLeft + 6*leftStep
+ MOV tVal12, pSrcLeft ;// pSrcLeft
+ SUB tVal9, tVal9, tVal10 ;// pSrcAbove[4] - pSrcAbove[2]
+ ADD tVal7, tVal7, tVal9 ;// H = H + H3
+ M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[6*leftStep]
+ M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[0]
+ ADD tVal7, tVal7, tVal7, LSL #4 ;// 17 * H
+ ADD tVal7, tVal7, #16 ;// 17 * H + 16
+ SUB tVal8, tVal8, tVal10 ;// pSrcLeft[6*leftStep] - pSrcLeft[0]
+ ASR b, tVal7, #5 ;// b = (17 * H + 16) >> 5
+ ADD tVal8, tVal8, tVal8, LSL #1 ;// V1 = 3 * (pSrcLeft[6*leftStep] - pSrcLeft[0])
+ ADD tVal6, tVal8, tVal6, LSL #2 ;// V = V0 +V1
+ M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[5*leftStep]
+ M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[leftStep]
+ ADD tVal7, b, b, LSL #1 ;// 3*b
+ SUB tVal2, tVal2, tVal7 ;// a + 16 - 3*b
+ SUB tVal7, tVal8, tVal10 ;// pSrcLeft[5*leftStep] - pSrcLeft[leftStep]
+ M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[4*leftStep]
+ M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[2*leftStep]
+ ADD tVal6, tVal6, tVal7, LSL #1 ;// V = V + V2
+ LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF
+ SUB tVal7, tVal8, tVal10 ;// pSrcLeft[4*leftStep] - pSrcLeft[2*leftStep]
+ ADD tVal6, tVal6, tVal7 ;// V = V + V7
+ SUB dstStep, dstStep, #4 ;// dstStep - 4
+ ADD tVal6, tVal6, tVal6, LSL #4 ;// 17*V
+ ADD tVal6, tVal6, #16 ;// 17*V + 16
+
+ ;// M_STALL ARM1136JS=1
+
+ ASR c, tVal6, #5 ;// c = (17*V + 16)>>5
+
+ ;// M_STALL ARM1136JS=1
+
+ ADD tVal6, c, c, LSL #1 ;// 3*c
+ UXTH c, c ;// only in half word
+ SUB tVal6, tVal2, tVal6 ;// a - 3*b - 3*c + 16
+ ORR c, c, c, LSL #16 ;// c c
+ ADD tVal7, b, b ;// 2b
+ ADD tVal2, tVal6, tVal7 ;// pp2 = d + 2*b
+ ADD tVal7, tVal7, b ;// 3b
+ ORR p2p0, tVal6, tVal2, LSL #16 ;// p2p0 = pack {p2, p0}
+ UXTH b, b
+ UXTH tVal7, tVal7
+ ORR b, b, b, LSL #16 ;// {b,b}
+ ORR tVal7, tVal7, tVal7, LSL #16 ;// {3b,3b}
+ SADD16 p3p1, p2p0, b ;// p3p1 = p2p0 + {b,b}
+ SADD16 p6p4, p3p1, tVal7 ;// p6p4 = p3p1 + {3b,3b}
+ SADD16 p7p5, p6p4, b ;// p7p5 = p6p4 + {b,b}
+ MOV outerCount, #BLK_SIZE ;// Outer Loop Count
+
+LOOP_PLANE
+
+ USAT16 p7p5, #13, p7p5 ;// clip13(p7) clip13(p5)
+ USAT16 p6p4, #13, p6p4 ;// clip13(p6) clip13(p4)
+ USAT16 p3p1, #13, p3p1 ;// clip13(p3) clip13(p1)
+ USAT16 p2p0, #13, p2p0 ;// clip13(p2) clip13(p0)
+
+ AND pp7pp5, r0x00FF00FF, p7p5, ASR #5 ;// clip8(p7) clip8(p5)
+ AND pp6pp4, r0x00FF00FF, p6p4, ASR #5 ;// clip8(p6) clip8(p4)
+ AND pp3pp1, r0x00FF00FF, p3p1, ASR #5 ;// clip8(p3) clip8(p1)
+ AND pp2pp0, r0x00FF00FF, p2p0, ASR #5 ;// clip8(p2) clip8(p0)
+
+ SUBS outerCount, outerCount, #1 ;// outerCount--
+
+ ORR p3210, pp2pp0, pp3pp1, LSL #8 ;// pack {p3,p2, p1, p0}
+ STR p3210, [pDst], #4 ;// store {pDst[0] to pDst[3]}
+
+ ORR p7654, pp6pp4, pp7pp5, LSL #8 ;// pack {p7,p6, p5, p4}
+ M_STR p7654, [pDst], dstStep ;// store {pDst[4] to pDst[7]}
+
+ SADD16 p7p5, p7p5, c ;// {p7 + c}, {p5 + c}
+ SADD16 p6p4, p6p4, c ;// {p6 + c}, {p4 + c}
+ SADD16 p3p1, p3p1, c ;// {p3 + c}, {p1 + c}
+ SADD16 p2p0, p2p0, c ;// {p2 + c}, {p0 + c}
+
+ BNE LOOP_PLANE ;// Loop for 8 times
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// ARM1136JS
+
+
+
+ END
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntraChroma_8x8 ends
+;//-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
new file mode 100644
index 0000000..1557208
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
@@ -0,0 +1,501 @@
+;//
+;//
+;// File Name: omxVCM4P10_PredictIntra_16x16_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_pIndexTable16x16
+ DCD OMX_VC_16X16_VERT, OMX_VC_16X16_HOR
+ DCD OMX_VC_16X16_DC, OMX_VC_16X16_PLANE
+
+ IF ARM1136JS
+
+;//--------------------------------------------
+;// Constants
+;//--------------------------------------------
+BLK_SIZE EQU 0x10
+MUL_CONST0 EQU 0x01010101
+MUL_CONST1 EQU 0x00060004
+MUL_CONST2 EQU 0x00070005
+MUL_CONST3 EQU 0x00030001
+MASK_CONST EQU 0x00FF00FF
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+y RN 12
+pc RN 15
+
+return RN 0
+innerCount RN 0
+outerCount RN 1
+pSrcLeft2 RN 1
+pDst2 RN 2
+sum RN 6
+pTable RN 9
+temp1 RN 10
+temp2 RN 12
+cMul1 RN 11
+cMul2 RN 12
+count RN 12
+dstStepx2 RN 11
+leftStepx2 RN 14
+r0x01010101 RN 10
+r0x00FF00FF RN 11
+
+tVal0 RN 0
+tVal1 RN 1
+tVal2 RN 2
+tVal3 RN 3
+tVal4 RN 4
+tVal5 RN 5
+tVal6 RN 6
+tVal7 RN 7
+tVal8 RN 8
+tVal9 RN 9
+tVal10 RN 10
+tVal11 RN 11
+tVal12 RN 12
+tVal14 RN 14
+
+b RN 12
+c RN 14
+
+p2p0 RN 0
+p3p1 RN 1
+p6p4 RN 2
+p7p5 RN 4
+p10p8 RN 6
+p11p9 RN 7
+p14p12 RN 8
+p15p13 RN 9
+
+p3210 RN 10
+p7654 RN 10
+p111098 RN 10
+p15141312 RN 10
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+pSrcLeft RN 0 ;// input pointer
+pSrcAbove RN 1 ;// input pointer
+pSrcAboveLeft RN 2 ;// input pointer
+pDst RN 3 ;// output pointer
+leftStep RN 4 ;// input variable
+dstStep RN 5 ;// input variable
+predMode RN 6 ;// input variable
+availability RN 7 ;// input variable
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_16x16 starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START omxVCM4P10_PredictIntra_16x16, r11
+
+ ;// Define stack arguments
+ M_ARG LeftStep, 4
+ M_ARG DstStep, 4
+ M_ARG PredMode, 4
+ M_ARG Availability, 4
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case
+
+ ;// Load argument from the stack
+ M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg
+ M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg
+ M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg
+ M_LDR availability, Availability ;// Arg availability loaded from stack to reg
+
+ MOV y, #BLK_SIZE ;// Outer Loop Count
+ LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode
+
+OMX_VC_16X16_VERT
+ LDM pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15]
+ ADD dstStepx2, dstStep, dstStep ;// double dstStep
+ ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep
+
+ ;// M_STALL ARM1136JS=2 ;// Stall outside the loop
+
+LOOP_VERT
+ STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9
+ SUBS y, y, #2 ;// y--
+ ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep
+ STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9
+ ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep
+ BNE LOOP_VERT ;// Loop for 8 times
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+
+OMX_VC_16X16_HOR
+
+ ;// M_STALL ARM1136JS=6
+
+ LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times
+ MOV y, #4 ;// Outer Loop Count
+ M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3]
+ ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep
+ M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal1 = pSrcLeft[4 to 7]
+ ADD dstStepx2, dstStep, dstStep ;// double dstStep
+ SUB dstStepx2, dstStepx2, #12 ;// double dstStep minus 12
+
+LOOP_HOR
+ M_LDRB tVal8, [pSrcLeft], +leftStep ;// tVal8 = pSrcLeft[0 to 3]
+ MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes
+ M_LDRB tVal9, [pSrcLeft], +leftStep ;// tVal9 = pSrcLeft[4 to 7]
+ MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes
+ SUBS y, y, #1 ;// y--
+ STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3]
+ STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7]
+ STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7]
+ MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes
+ STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11]
+ STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11]
+ MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes
+ M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15]
+ M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15]
+ STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3]
+ STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7]
+ STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7]
+ STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11]
+ STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11]
+ M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15]
+ M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3]
+ M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15]
+ M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal7 = pSrcLeft[4 to 7]
+ BNE LOOP_HOR ;// Loop for 3 times
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+OMX_VC_16X16_DC
+
+ ;// M_STALL ARM1136JS=2
+
+ MOV count, #0 ;// count = 0
+ TST availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER)
+ BEQ TST_LEFT ;// Jump to Left if not upper
+ LDM pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15]
+ ADD count, count, #1 ;// if upper inc count by 1
+
+ ;// M_STALL ARM1136JS=2
+
+ UXTB16 tVal2, tVal8 ;// pSrcAbove[0, 2]
+ UXTB16 tVal6, tVal9 ;// pSrcAbove[4, 6]
+ UADD16 tVal2, tVal2, tVal6 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
+ UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3]
+ UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7]
+ UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7]
+ UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[7])
+
+ UXTB16 tVal8, tVal10 ;// pSrcAbove[8, 10]
+ UXTB16 tVal9, tVal11 ;// pSrcAbove[12, 14]
+ UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14]
+ UXTB16 tVal10, tVal10, ROR #8 ;// pSrcAbove[9, 11]
+ UXTB16 tVal11, tVal11, ROR #8 ;// pSrcAbove[13, 15]
+ UADD16 tVal10, tVal10, tVal11 ;// pSrcAbove[9, 11] + pSrcAbove[13, 15]
+ UADD16 tVal8, tVal8, tVal10 ;// sum(pSrcAbove[8] to pSrcAbove[15])
+
+ UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[15])
+
+ ;// M_STALL ARM1136JS=1
+
+ ADD tVal2, tVal2, tVal2, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[15])
+
+ ;// M_STALL ARM1136JS=1
+
+ UXTH sum, tVal2 ;// Extract the lower half for result
+
+TST_LEFT
+ TST availability, #OMX_VC_LEFT
+ BEQ TST_COUNT
+ ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep
+ ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2]
+ M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3]
+ ADD tVal7, tVal8, tVal9 ;// tVal7 = tVal8 + tVal9
+ ADD count, count, #1 ;// Inc Counter if Left is available
+ ADD tVal6, tVal10, tVal11 ;// tVal6 = tVal10 + tVal11
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2]
+ M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3]
+ ADD sum, tVal7, tVal6 ;// sum = tVal8 + tVal10
+ ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9
+ ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11
+ ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10
+
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2]
+ M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3]
+ ADD sum, sum, tVal7 ;// sum = sum + tVal7
+ ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9
+ ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11
+ ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10
+
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2]
+ M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3]
+ ADD sum, sum, tVal7 ;// sum = sum + tVal7
+ ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9
+ ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11
+ ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10
+ ADD sum, sum, tVal7 ;// sum = sum + tVal7
+
+TST_COUNT
+ CMP count, #0 ;// if(count == 0)
+ MOVEQ sum, #128 ;// sum = 128 if(count == 0)
+ BEQ TST_COUNT0 ;// if(count == 0)
+ CMP count, #1 ;// if(count == 1)
+ ADDEQ sum, sum, #8 ;// sum += 8 if(count == 1)
+ ADDNE sum, sum, tVal2 ;// sum = sumleft + sumupper
+ ADDNE sum, sum, #16 ;// sum += 16 if(count == 2)
+
+ ;// M_STALL ARM1136JS=1
+
+ UXTH sum, sum ;// sum only byte rest cleared
+
+ ;// M_STALL ARM1136JS=1
+
+ LSREQ sum, sum, #4 ;// sum >> 4 if(count == 1)
+
+ ;// M_STALL ARM1136JS=1
+
+ LSRNE sum, sum, #5 ;// sum >> 5 if(count == 2)
+
+TST_COUNT0
+
+ ;// M_STALL ARM1136JS=1
+
+ ORR sum, sum, sum, LSL #8 ;// sum replicated in two halfword
+
+ ;// M_STALL ARM1136JS=1
+
+ ORR tVal6, sum, sum, LSL #16 ;// sum replicated in all bytes
+ CPY tVal7, tVal6 ;// tVal1 = tVal0
+ CPY tVal8, tVal6 ;// tVal2 = tVal0
+ CPY tVal9, tVal6 ;// tVal3 = tVal0
+ ADD dstStepx2, dstStep, dstStep ;// double dstStep
+ ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep
+ MOV y, #BLK_SIZE ;// Outer Loop Count
+
+LOOP_DC
+ STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9
+ SUBS y, y, #2 ;// y--
+ ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep
+ STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9
+ ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep
+ BNE LOOP_DC ;// Loop for 8 times
+
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+OMX_VC_16X16_PLANE
+
+ ;// M_STALL ARM1136JS=3
+ RSB tVal14, leftStep, leftStep, LSL #4 ;// tVal14 = 15*leftStep
+
+ ;// M_STALL ARM1136JS=2
+ LDRB tVal10, [pSrcLeft, tVal14] ;// tVal10 = pSrcLeft[15*leftStep]
+ LDRB tVal11, [pSrcAboveLeft] ;// tVal11 = pSrcAboveLeft[0]
+ LDRB tVal12, [pSrcAbove, #15]
+
+ ADD tVal2, tVal12, tVal10 ;// tVal2 = pSrcAbove[15] + pSrcLeft[15*leftStep]
+ SUB tVal10, tVal10, tVal11 ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0]
+ SUB tVal11, tVal12, tVal11 ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0]
+ MOV tVal2, tVal2, LSL #4 ;// tVal2 = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep])
+
+ MOV tVal11, tVal11, LSL #3 ;// 8*[15]-[-1]
+ LDRB tVal6, [pSrcAbove, #0]
+ LDRB tVal7, [pSrcAbove, #14]
+ SUB tVal8, tVal7, tVal6
+ RSB tVal8, tVal8, tVal8, LSL #3 ;// 7*[14]-[0]
+ ADD tVal11, tVal11, tVal8
+ LDRB tVal6, [pSrcAbove, #1]
+ LDRB tVal7, [pSrcAbove, #13]
+ SUB tVal8, tVal7, tVal6
+ ADD tVal8, tVal8, tVal8
+ ADD tVal8, tVal8, tVal8, LSL #1 ;// 6*[13]-[1]
+ ADD tVal11, tVal11, tVal8
+ LDRB tVal6, [pSrcAbove, #2]
+ LDRB tVal7, [pSrcAbove, #12]
+ SUB tVal8, tVal7, tVal6
+ ADD tVal8, tVal8, tVal8, LSL #2 ;// 5*[12]-[2]
+ ADD tVal11, tVal11, tVal8
+ LDRB tVal6, [pSrcAbove, #3]
+ LDRB tVal7, [pSrcAbove, #11]
+ SUB tVal8, tVal7, tVal6
+ ADD tVal11, tVal11, tVal8, LSL #2 ;// + 4*[11]-[3]
+ LDRB tVal6, [pSrcAbove, #4]
+ LDRB tVal7, [pSrcAbove, #10]
+ SUB tVal8, tVal7, tVal6
+ ADD tVal8, tVal8, tVal8, LSL #1 ;// 3*[10]-[4]
+ ADD tVal11, tVal11, tVal8
+ LDRB tVal6, [pSrcAbove, #5]
+ LDRB tVal7, [pSrcAbove, #9]
+ SUB tVal8, tVal7, tVal6
+ ADD tVal11, tVal11, tVal8, LSL #1 ;// + 2*[9]-[5]
+ LDRB tVal6, [pSrcAbove, #6]
+ LDRB tVal7, [pSrcAbove, #8]
+ SUB tVal8, tVal7, tVal6 ;// 1*[8]-[6]
+ ADD tVal7, tVal11, tVal8
+
+ ADD tVal2, tVal2, #16 ;// tVal2 = a + 16
+ MOV tVal1, pSrcLeft ;// tVal4 = pSrcLeft
+ SUB tVal9, tVal14, leftStep ;// tVal9 = 14*leftStep
+ ADD tVal9, pSrcLeft, tVal9 ;// tVal9 = pSrcLeft + 14*leftStep
+
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[14*leftStep]
+ M_LDRB tVal11, [tVal1], +leftStep ;// tVal11 = pSrcLeft[0]
+ ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * H
+ ADD tVal7, tVal7, #32 ;// tVal7 = 5 * H + 32
+ SUB tVal8, tVal8, tVal11 ;// tVal8 = pSrcLeft[14*leftStep] - pSrcLeft[0]
+ ASR tVal12, tVal7, #6 ;// tVal12 = b = (5 * H + 32) >> 6
+
+ RSB tVal8, tVal8, tVal8, LSL #3 ;// tVal8 = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0])
+ ADD tVal6, tVal8, tVal10, LSL #3 ;// tVal6 = V = V0 +V1
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[13*leftStep]
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[leftStep]
+ RSB tVal7, tVal12, tVal12, LSL #3 ;// tVal7 = 7*b
+ SUB tVal2, tVal2, tVal7 ;// tVal2 = a + 16 - 7*b
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[13*leftStep] - pSrcLeft[leftStep]
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[12*lS]
+ ADD tVal7, tVal7, tVal7 ;// tVal7 = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep])
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[2*leftStep]
+ ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep])
+ ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V2
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[11*leftStep]
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[3*leftStep]
+ ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep])
+ ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V3
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep]
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[10*leftStep]
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[4*leftStep]
+ ADD tVal6, tVal6, tVal7, LSL #2 ;// tVal6 = V = V + V4
+ SUB dstStep, dstStep, #16 ;// tVal5 = dstStep - 16
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[9*leftStep]
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[5*leftStep]
+ ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep])
+ ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V5
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep]
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[8*leftStep]
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[6*leftStep]
+ ADD tVal6, tVal6, tVal7, LSL #1 ;// tVal6 = V = V + V6
+
+ ;// M_STALL ARM1136JS=1
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep]
+ ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V7
+
+ ;// M_STALL ARM1136JS=1
+ ADD tVal6, tVal6, tVal6, LSL #2 ;// tVal6 = 5*V
+ ADD tVal6, tVal6, #32 ;// tVal6 = 5*V + 32
+
+ ;// M_STALL ARM1136JS=1
+ ASR tVal14, tVal6, #6 ;// tVal14 = c = (5*V + 32)>>6
+
+ ;// M_STALL ARM1136JS=1
+ RSB tVal6, tVal14, tVal14, LSL #3 ;// tVal6 = 7*c
+ UXTH tVal14, tVal14 ;// tVal14 = Cleared the upper half word
+ ADD tVal10, tVal12, tVal12 ;// tVal10 = 2*b
+ ORR tVal14, tVal14, tVal14, LSL #16 ;// tVal14 = {c , c}
+ SUB tVal6, tVal2, tVal6 ;// tVal6 = d = a - 7*b - 7*c + 16
+ ADD tVal1, tVal6, tVal10 ;// tVal1 = pp2 = d + 2*b
+ ADD tVal10, tVal10, tVal12 ;// tVal10 =3*b
+ ORR tVal0, tVal6, tVal1, LSL #16 ;// tval0 = p2p0 = pack {p2, p0}
+ UXTH tVal12, tVal12 ;// tVal12 = Cleared the upper half word
+ UXTH tVal10, tVal10 ;// tVal12 = Cleared the upper half word
+ ORR tVal12, tVal12, tVal12, LSL #16 ;// tVal12 = {b , b}
+ ORR tVal10, tVal10, tVal10, LSL #16 ;// tVal10 = {3b , 3b}
+ SADD16 tVal1, tVal0, tVal12 ;// tVal1 = p3p1 = p2p0 + {b,b}
+ SADD16 tVal2, tVal1, tVal10 ;// tVal2 = p6p4 = p3p1 + {3b,3b}
+ SADD16 tVal4, tVal2, tVal12 ;// tVal4 = p7p5 = p6p4 + {b,b}
+ SADD16 tVal6, tVal4, tVal10 ;// tVal6 = p10p8 = p7p5 + {3b,3b}
+ SADD16 tVal7, tVal6, tVal12 ;// tVal7 = p11p9 = p10p8 + {b,b}
+ SADD16 tVal8, tVal7, tVal10 ;// tVal8 = p14p12 = p11p9 + {3b,3b}
+ SADD16 tVal9, tVal8, tVal12 ;// tVal9 = p15p13 = p14p12 + {b,b}
+ LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF
+
+LOOP_PLANE
+
+ USAT16 temp2, #13, p3p1
+ USAT16 temp1, #13, p2p0
+ SADD16 p3p1, p3p1, c
+ SADD16 p2p0, p2p0, c
+ AND temp2, r0x00FF00FF, temp2, ASR #5
+ AND temp1, r0x00FF00FF, temp1, ASR #5
+ ORR temp1, temp1, temp2, LSL #8
+ STR temp1, [pDst], #4
+
+ USAT16 temp2, #13, p7p5
+ USAT16 temp1, #13, p6p4
+ SADD16 p7p5, p7p5, c
+ SADD16 p6p4, p6p4, c
+ AND temp2, r0x00FF00FF, temp2, ASR #5
+ AND temp1, r0x00FF00FF, temp1, ASR #5
+ ORR temp1, temp1, temp2, LSL #8
+ STR temp1, [pDst], #4
+
+ USAT16 temp2, #13, p11p9
+ USAT16 temp1, #13, p10p8
+ SADD16 p11p9, p11p9, c
+ SADD16 p10p8, p10p8, c
+ AND temp2, r0x00FF00FF, temp2, ASR #5
+ AND temp1, r0x00FF00FF, temp1, ASR #5
+ ORR temp1, temp1, temp2, LSL #8
+ STR temp1, [pDst], #4
+
+ USAT16 temp2, #13, p15p13
+ USAT16 temp1, #13, p14p12
+ SADD16 p15p13, p15p13, c
+ SADD16 p14p12, p14p12, c
+ AND temp2, r0x00FF00FF, temp2, ASR #5
+ AND temp1, r0x00FF00FF, temp1, ASR #5
+ ORR temp1, temp1, temp2, LSL #8
+ STR temp1, [pDst], #4
+
+ ADDS r0x00FF00FF, r0x00FF00FF, #1<<28 ;// Loop counter value in top 4 bits
+
+ ADD pDst, pDst, dstStep
+
+ BCC LOOP_PLANE ;// Loop for 16 times
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// ARM1136JS
+
+
+ END
+;-----------------------------------------------------------------------------------------------
+; omxVCM4P10_PredictIntra_16x16 ends
+;-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
new file mode 100644
index 0000000..a90f460
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
@@ -0,0 +1,567 @@
+;//
+;//
+;// File Name: omxVCM4P10_PredictIntra_4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS ARM1136JS
+
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_pSwitchTable4x4
+ DCD OMX_VC_4x4_VERT, OMX_VC_4x4_HOR
+ DCD OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL
+ DCD OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR
+ DCD OMX_VC_4x4_HD, OMX_VC_4x4_VL
+ DCD OMX_VC_4x4_HU
+
+ IF ARM1136JS
+
+;//--------------------------------------------
+;// Constants
+;//--------------------------------------------
+BLK_SIZE EQU 0x8
+MUL_CONST0 EQU 0x01010101
+ADD_CONST1 EQU 0x80808080
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+return RN 0
+pTable RN 9
+pc RN 15
+r0x01010101 RN 1
+r0x80808080 RN 0
+
+tVal0 RN 0
+tVal1 RN 1
+tVal2 RN 2
+tVal4 RN 4
+tVal6 RN 6
+tVal7 RN 7
+tVal8 RN 8
+tVal9 RN 9
+tVal10 RN 10
+tVal11 RN 11
+tVal12 RN 12
+tVal14 RN 14
+
+Out0 RN 6
+Out1 RN 7
+Out2 RN 8
+Out3 RN 9
+
+Left0 RN 6
+Left1 RN 7
+Left2 RN 8
+Left3 RN 9
+
+Above0123 RN 12
+Above4567 RN 14
+
+AboveLeft RN 10
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+pSrcLeft RN 0 ;// input pointer
+pSrcAbove RN 1 ;// input pointer
+pSrcAboveLeft RN 2 ;// input pointer
+pDst RN 3 ;// output pointer
+leftStep RN 4 ;// input variable
+dstStep RN 5 ;// input variable
+predMode RN 6 ;// input variable
+availability RN 7 ;// input variable
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_4x4 starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START omxVCM4P10_PredictIntra_4x4, r11
+
+ ;// Define stack arguments
+ M_ARG LeftStep, 4
+ M_ARG DstStep, 4
+ M_ARG PredMode, 4
+ M_ARG Availability, 4
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR pTable,=armVCM4P10_pSwitchTable4x4 ;// Load index table for switch case
+
+ ;// Load argument from the stack
+ M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg
+ M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg
+ M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg
+ M_LDR availability, Availability ;// Arg availability loaded from stack to reg
+
+ LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode
+
+OMX_VC_4x4_VERT
+
+ LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3]
+ M_STR Above0123, [pDst], dstStep ;// pDst[0 to 3] = Above0123
+ M_STR Above0123, [pDst], dstStep ;// pDst[4 to 7] = Above0123
+ M_STR Above0123, [pDst], dstStep ;// pDst[8 to 11] = Above0123
+ STR Above0123, [pDst] ;// pDst[12 to 15] = Above0123
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_HOR
+
+ ;// M_STALL ARM1136JS=6
+
+ LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0]
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1]
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2]
+ LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3]
+ MUL Out0, Left0, r0x01010101 ;// replicate the val in all the bytes
+ MUL Out1, Left1, r0x01010101 ;// replicate the val in all the bytes
+ MUL Out2, Left2, r0x01010101 ;// replicate the val in all the bytes
+ MUL Out3, Left3, r0x01010101 ;// replicate the val in all the bytes
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ]
+ M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11]
+ STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_DC
+
+ ;// M_STALL ARM1136JS=6
+
+ AND availability, availability, #(OMX_VC_UPPER + OMX_VC_LEFT)
+ CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT)
+ BNE UpperOrLeftOrNoneAvailable ;// Jump to Upper if not both
+ LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3]
+
+ ;// M_STALL ARM1136JS=1
+
+ UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2]
+ UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3]
+ UADD16 tVal11, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0]
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1]
+ ADD tVal11, tVal11, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3])
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2]
+ LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3]
+ UXTH tVal11, tVal11 ;// upsum1 (Clear the top junk bits)
+ ADD tVal6, Left0, Left1 ;// tVal6 = Left0 + Left1
+ ADD tVal7, Left2, Left3 ;// tVal7 = Left2 + Left3
+ ADD tVal6, tVal6, tVal7 ;// tVal6 = tVal6 + tVal7
+ ADD Out0, tVal6, tVal11 ;// Out0 = tVal6 + tVal11
+ ADD Out0, Out0, #4 ;// Out0 = Out0 + 4
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ MOV Out0, Out0, LSR #3 ;// Out0 = (Out0 + 4)>>3
+
+ ;// M_STALL ARM1136JS=1
+
+ MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV return, #OMX_Sts_NoErr
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11]
+ STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15]
+ M_EXIT ;// Macro to exit midway-break frm case
+
+UpperOrLeftOrNoneAvailable
+ ;// M_STALL ARM1136JS=3
+
+ CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER)
+ BNE LeftOrNoneAvailable ;// Jump to Left if not upper
+ LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3]
+
+ ;// M_STALL ARM1136JS=3
+
+ UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2]
+ UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3]
+ UADD16 Out0, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ ADD Out0, Out0, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3])
+
+ ;// M_STALL ARM1136JS=1
+
+ UXTH Out0, Out0 ;// upsum1 (Clear the top junk bits)
+ ADD Out0, Out0, #2 ;// Out0 = Out0 + 2
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2
+
+ ;// M_STALL ARM1136JS=1
+
+ MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV return, #OMX_Sts_NoErr
+ M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [0 to 3 ]
+ M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [4 to 7 ]
+ M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [8 to 11]
+ STR Out0, [pDst] ;// store {tVal6} at pDst [12 to 15]
+
+ M_EXIT ;// Macro to exit midway-break frm case
+
+LeftOrNoneAvailable
+ ;// M_STALL ARM1136JS=3
+
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ CMP availability, #OMX_VC_LEFT
+ BNE NoneAvailable
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0]
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1]
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2]
+ LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3]
+ ADD Out0, Left0, Left1 ;// Out0 = Left0 + Left1
+
+ ;// M_STALL ARM1136JS=1
+
+ ADD Out1, Left2, Left3 ;// Out1 = Left2 + Left3
+ ADD Out0, Out0, Out1 ;// Out0 = Out0 + Out1
+ ADD Out0, Out0, #2 ;// Out0 = Out0 + 2
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2
+
+ ;// M_STALL ARM1136JS=1
+
+ MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV return, #OMX_Sts_NoErr
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11]
+ STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15]
+ M_EXIT ;// Macro to exit midway-break frm case
+
+NoneAvailable
+ MOV Out0, #128 ;// Out0 = 128 if(count == 0)
+
+ ;// M_STALL ARM1136JS=5
+
+ MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV return, #OMX_Sts_NoErr
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11]
+ STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15]
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_DIAG_DL
+
+ ;//------------------------------------------------------------------
+ ;// f = (a+2*b+c+2)>>2
+ ;// Calculate as:
+ ;// d = (a + c )>>1
+ ;// e = (d - b')>>1
+ ;// f = e + 128
+ ;//------------------------------------------------------------------
+
+ ;// M_STALL ARM1136JS=3
+
+ TST availability, #OMX_VC_UPPER_RIGHT
+ LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7]
+ LDR r0x80808080, =ADD_CONST1 ;// 0x80808080
+ BNE DLUpperRightAvailable
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ MOV tVal7, Above0123, LSR #24 ;// {00, 00, 00, U3 }
+ MOV tVal11, tVal7, LSL #24 ;// {U3, 00, 00, 00 }
+ MUL Out3, tVal7, r0x01010101 ;// {U3, U3, U3, U3 }
+ MOV tVal8, Above0123, LSR #16 ;// {00, 00, U3, U2 }
+ MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 }
+ MVN tVal10, tVal10 ;// {00', U3', U2', U1'}
+ UHADD8 tVal8, tVal8, Above0123 ;// {xx, xx, d1, d0 }
+ UHADD8 tVal6, Above0123, tVal9 ;// {xx, d2, xx, xx }
+ UHSUB8 tVal8, tVal8, tVal10 ;// {xx, xx, e1, e0 }
+ UHSUB8 tVal6, tVal6, tVal10 ;// {xx, e2, xx, xx }
+ UADD8 tVal8, tVal8, r0x80808080 ;// {xx, xx, f1, f0 }
+ UADD8 tVal6, tVal6, r0x80808080 ;// {xx, f2, xx, xx }
+
+ ;// M_STALL ARM1136JS=1
+
+ PKHBT tVal6, tVal8, tVal6 ;// {xx, f2, f1, f0 }
+ BIC tVal6, tVal6, #0xFF000000 ;// {00, f2, f1, f0 }
+ ORR Out0, tVal6, tVal11 ;// {U3, f2, f1, f0 }
+
+ ;// M_STALL ARM1136JS=1
+
+ PKHTB Out1, Out3, Out0, ASR #8 ;// {U3, U3, f2, f1 }
+ MOV return, #OMX_Sts_NoErr
+ PKHTB Out2, Out3, Out1, ASR #8 ;// {U3, U3, U3, f2 }
+
+ M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ]
+ M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ]
+ M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ]
+ STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12]
+ M_EXIT ;// Macro to exit midway-break frm case
+
+DLUpperRightAvailable
+
+ MOV tVal8, Above0123, LSR #24 ;// {00, 00, 00, U3 }
+ MOV tVal9, Above0123, LSR #16 ;// {00, 00, U3, U2 }
+ MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 }
+ ORR tVal8, tVal8, Above4567, LSL #8 ;// {U6, U5, U4, U3 }
+ ORR tVal10, tVal10, Above4567, LSL #24 ;// {U4, U3, U2, U1 }
+ PKHBT tVal9, tVal9, Above4567, LSL #16 ;// {U5, U4, U3, U2 }
+ MVN tVal1, tVal8 ;// {U6', U5', U4', U3'}
+ MVN tVal10, tVal10 ;// {U4', U3', U2', U1'}
+ MVN tVal2, Above4567 ;// {U7', U6', U5', U4'}
+ UHADD8 tVal6, Above0123, tVal9 ;// {d3, d2, d1, d0 }
+ UHADD8 tVal9, tVal9, Above4567 ;// {d5, d4, d3, d2 }
+ UHADD8 tVal8, Above4567, tVal8 ;// {d6, xx, xx, xx }
+ UHSUB8 tVal6, tVal6, tVal10 ;// {e3, e2, e1, e0 }
+ UHSUB8 tVal12, tVal9, tVal1 ;// {e5, e4, e3, e2 }
+ UHSUB8 tVal8, tVal8, tVal2 ;// {e6, xx, xx, xx }
+ UADD8 Out0, tVal6, r0x80808080 ;// {f3, f2, f1, f0 }
+ UADD8 tVal9, tVal8, r0x80808080 ;// {f6, xx, xx, xx }
+ UADD8 Out2, tVal12, r0x80808080 ;// {f5, f4, f3, f2 }
+ MOV tVal7, Out0, LSR #8 ;// {00, f3, f2, f1 }
+ AND tVal9, tVal9, #0xFF000000 ;// {f6, 00, 00, 00 }
+ PKHBT Out1, tVal7, Out2, LSL #8 ;// {f4, f3, f2, f1 }
+ ORR Out3, tVal9, Out2, LSR #8 ;// {f6, f5, f4, f3 }
+ M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ]
+ M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ]
+ M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ]
+ STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+
+OMX_VC_4x4_DIAG_DR
+
+ ;// M_STALL ARM1136JS=4
+
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0]
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1]
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2]
+ LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3]
+ LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = pSrcAboveLeft[0]
+ ORR tVal7, Left1, Left0, LSL #8 ;// tVal7 = 00 00 L0 L1
+ LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0
+ LDR r0x80808080, =ADD_CONST1 ;// 0x80808080
+ ORR tVal8, Left3, Left2, LSL #8 ;// tVal8 = 00 00 L2 L3
+ PKHBT tVal7, tVal8, tVal7, LSL #16 ;// tVal7 = L0 L1 L2 L3
+ MOV tVal8, Above0123, LSL #8 ;// tVal8 = U2 U1 U0 00
+ MOV tVal9, tVal7, LSR #8 ;// tVal9 = 00 L0 L1 L2
+ ORR tVal8, tVal8, AboveLeft ;// tVal8 = U2 U1 U0 UL
+ ORR tVal9, tVal9, AboveLeft, LSL #24 ;// tVal9 = UL L0 L1 L2
+ MOV tVal10, Above0123, LSL #24 ;// tVal10= U0 00 00 00
+ UXTB tVal11, tVal7, ROR #24 ;// tVal11= 00 00 00 L0
+ ORR tVal10, tVal10, tVal9, LSR #8 ;// tVal10= U0 UL L0 L1
+ ORR tVal11, tVal11, tVal8, LSL #8 ;// tVal11= U1 U0 UL L0
+ UHADD8 tVal11, Above0123, tVal11 ;// tVal11= d1 d0 dL g0
+ UHADD8 tVal10, tVal7, tVal10 ;// tVal10= g0 g1 g2 g3
+ MVN tVal8, tVal8 ;// tVal8 = U2'U1'U0'UL'
+ MVN tVal9, tVal9 ;// tVal9 = UL'L0'L1'L2'
+ UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= e1 e0 eL h0
+ UHSUB8 tVal10, tVal10, tVal9 ;// tVal10= h0 h1 h2 h3
+ UADD8 Out3, tVal10, r0x80808080 ;// Out3 = i0 i1 i2 i3
+ UADD8 Out0, tVal11, r0x80808080 ;// Out0 = f1 f0 fL i0
+ UXTH tVal11, Out3, ROR #8 ;// tVal11= 00 00 i1 i2
+ MOV tVal7, Out0, LSL #8 ;// tVal7 = f0 fL i0 00
+ ORR Out1, tVal7, tVal11, LSR #8 ;// Out1 = f0 fL i0 i1
+ PKHBT Out2, tVal11, Out0, LSL #16 ;// Out2 = fL i0 i1 i2
+ M_STR Out0, [pDst], dstStep ;// store {f1 to i0} at pDst[3 to 0 ]
+ M_STR Out1, [pDst], dstStep ;// store {f0 to i1} at pDst[7 to 4 ]
+ M_STR Out2, [pDst], dstStep ;// store {fL to i2} at pDst[11 to 8 ]
+ STR Out3, [pDst] ;// store {i0 to i3} at pDst[15 to 12]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_VR
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0
+ LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1
+ LDRB Left2, [pSrcLeft] ;// Left2 = 00 00 00 L2
+ MOV tVal0, Above0123, LSL #8 ;// tVal0 = U2 U1 U0 00
+ MOV tVal9, Above0123 ;// tVal9 = U3 U2 U1 U0
+ ORR tVal14, tVal0, AboveLeft ;// tVal14 = U2 U1 U0 UL
+ MVN tVal11, tVal14 ;// tVal11 = U2'U1'U0'UL'
+ MOV tVal2, tVal14, LSL #8 ;// tVal2 = U1 U0 UL 00
+ UHSUB8 tVal1, Above0123, tVal11 ;// tVal1 = d2 d1 d0 dL
+ UHADD8 tVal10, AboveLeft, Left1 ;// tVal10 = 00 00 00 j1
+ MVN tVal4, Left0 ;// tVal4 = 00 00 00 L0'
+ UHSUB8 tVal4, tVal10, tVal4 ;// tVal4 = 00 00 00 k1
+ ORR tVal12, tVal0, Left0 ;// tVal12 = U2 U1 U0 L0
+ ORR tVal14, tVal2, Left0 ;// tVal14 = U1 U0 UL L0
+ LDR r0x80808080, =ADD_CONST1 ;// 0x80808080
+ UHADD8 tVal10, tVal9, tVal14 ;// tVal10 = g3 g2 g1 g0
+ UADD8 Out0, tVal1, r0x80808080 ;// Out0 = e2 e1 e0 eL
+ UHSUB8 tVal10, tVal10, tVal11 ;// tVal10 = h3 h2 h1 h0
+ M_STR Out0, [pDst], dstStep ;// store {e2 to eL} at pDst[3 to 0 ]
+ MOV tVal1, tVal14, LSL #8 ;// tVal1 = U0 UL L0 00
+ MOV tVal6, Out0, LSL #8 ;// tVal6 = e1 e0 eL 00
+ ORR tVal2, tVal2, Left1 ;// tVal2 = U1 U0 UL L1
+ UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 00 00 l1
+ UADD8 Out1, tVal10, r0x80808080 ;// Out1 = i3 i2 i1 i0
+ MVN tVal2, tVal2 ;// tVal14 = U1'U0'UL'L1'
+ ORR tVal1, tVal1, Left2 ;// tVal1 = U0 UL L0 L2
+ ORR Out2, tVal6, tVal4 ;// Out2 = e1 e0 eL l1
+ UHADD8 tVal1, tVal1, tVal12 ;// tVal1 = g2 g1 g0 j2
+ M_STR Out1, [pDst], dstStep ;// store {i3 to i0} at pDst[7 to 4 ]
+ M_STR Out2, [pDst], dstStep ;// store {e1 to l1} at pDst[11 to 8 ]
+ UHSUB8 tVal9, tVal1, tVal2 ;// tVal9 = h2 h1 h0 k2
+ UADD8 Out3, tVal9, r0x80808080 ;// Out3 = i2 i1 i0 l2
+ STR Out3, [pDst] ;// store {i2 to l2} at pDst[15 to 12]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_HD
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0
+ LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = 00 00 00 L2
+ LDRB Left3, [pSrcLeft] ;// Left3 = 00 00 00 L3
+ LDR r0x80808080, =ADD_CONST1 ;// 0x80808080
+ ORR tVal2, AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL
+ MVN tVal1, Left0 ;// tVal1 = 00 00 00 L0'
+ ORR tVal4, Left0, tVal2, LSL #8 ;// tVal4 = U1 U0 UL L0
+ MVN tVal2, tVal2 ;// tVal2 = U2'U1'U0'UL'
+ UHADD8 tVal4, tVal4, Above0123 ;// tVal4 = g3 g2 g1 g0
+ UHSUB8 tVal1, AboveLeft, tVal1 ;// tVal1 = 00 00 00 dL
+ UHSUB8 tVal4, tVal4, tVal2 ;// tVal4 = h3 h2 h1 h0
+ UADD8 tVal1, tVal1, r0x80808080 ;// tVal1 = 00 00 00 eL
+ UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = i3 i2 i1 i0
+ ORR tVal2, Left0, AboveLeft, LSL #16 ;// tVal2 = 00 UL 00 L0
+ MOV tVal4, tVal4, LSL #8 ;// tVal4 = i2 i1 i0 00
+ ORR tVal11, Left1, Left0, LSL #16 ;// tVal11= 00 L0 00 L1
+ ORR tVal7, Left2, Left1, LSL #16 ;// tVal7 = 00 L1 00 L2
+ ORR tVal10, Left3, Left2, LSL #16 ;// tVal10= 00 L2 00 L3
+ ORR Out0, tVal4, tVal1 ;// Out0 = i2 i1 i0 eL
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ MOV tVal4, Out0, LSL #16 ;// tVal4 = i1 i0 00 00
+ UHADD8 tVal2, tVal2, tVal7 ;// tVal2 = 00 j1 00 j2
+ UHADD8 tVal6, tVal11, tVal10 ;// tVal11= 00 j2 00 j3
+ MVN tVal12, tVal11 ;// tVal12= 00 L0'00 L1'
+ MVN tVal14, tVal7 ;// tVal14= 00 L1'00 L2'
+ UHSUB8 tVal2, tVal2, tVal12 ;// tVal2 = 00 k1 00 k2
+ UHSUB8 tVal8, tVal7, tVal12 ;// tVal8 = 00 d1 00 d2
+ UHSUB8 tVal11, tVal6, tVal14 ;// tVal11= 00 k2 00 k3
+ UHSUB8 tVal9, tVal10, tVal14 ;// tVal9 = 00 d2 00 d3
+ UADD8 tVal2, tVal2, r0x80808080 ;// tVal2 = 00 l1 00 l2
+ UADD8 tVal8, tVal8, r0x80808080 ;// tVal8 = 00 e1 00 e2
+ UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 l2 00 l3
+ UADD8 tVal9, tVal9, r0x80808080 ;// tVal9 = 00 e2 00 e3
+ ORR Out2, tVal8, tVal2, LSL #8 ;// Out2 = l1 e1 l2 e2
+ ORR Out3, tVal9, tVal11, LSL #8 ;// Out3 = l2 e2 l3 e3
+ PKHTB Out1, tVal4, Out2, ASR #16 ;// Out1 = i1 i0 l1 e1
+ M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ]
+ M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11]
+ STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_VL
+
+ ;// M_STALL ARM1136JS=3
+
+ LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7]
+ TST availability, #OMX_VC_UPPER_RIGHT
+ LDR r0x80808080, =ADD_CONST1 ;// 0x80808080
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ MOV tVal11, Above0123, LSR #24 ;// tVal11= 00 00 00 U3
+ MULEQ Above4567, tVal11, r0x01010101 ;// Above4567 = U3 U3 U3 U3
+ MOV tVal9, Above0123, LSR #8 ;// tVal9 = 00 U3 U2 U1
+ MVN tVal10, Above0123 ;// tVal10= U3'U2'U1'U0'
+ ORR tVal2, tVal9, Above4567, LSL #24 ;// tVal2 = U4 U3 U2 U1
+ UHSUB8 tVal8, tVal2, tVal10 ;// tVal8 = d4 d3 d2 d1
+ UADD8 Out0, tVal8, r0x80808080 ;// Out0 = e4 e3 e2 e1
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ MOV tVal9, tVal9, LSR #8 ;// tVal9 = 00 00 U3 U2
+ MOV tVal10, Above4567, LSL #8 ;// tVal10= U6 U5 U4 00
+ PKHBT tVal9, tVal9, Above4567, LSL #16 ;// tVal9 = U5 U4 U3 U2
+ ORR tVal10, tVal10, tVal11 ;// tVal10= U6 U5 U4 U3
+ UHADD8 tVal11, tVal9, Above0123 ;// tVal11= g5 g4 g3 g2
+ UHADD8 tVal14, tVal2, tVal10 ;// tVal14= g6 g5 g4 g3
+ MVN tVal8, tVal2 ;// tVal8 = U4'U3'U2'U1'
+ MVN tVal7, tVal9 ;// tVal7 = U5'U4'U3'U2'
+ UHSUB8 tVal12, tVal9, tVal8 ;// tVal12= d5 d4 d3 d2
+ UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= h5 h4 h3 h2
+ UHSUB8 tVal2, tVal14, tVal7 ;// tVal2 = h6 h5 h4 h3
+ UADD8 Out1, tVal11, r0x80808080 ;// Out1 = i5 i4 i3 i2
+ UADD8 Out2, tVal12, r0x80808080 ;// Out2 = e5 e4 e3 e2
+ UADD8 Out3, tVal2, r0x80808080 ;// Out3 = i6 i5 i4 i3
+ M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ]
+ M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11]
+ M_STR Out3, [pDst], dstStep ;// store {Out3} at pDst [12 to 15]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_HU
+
+ ;// M_STALL ARM1136JS=2
+
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0]
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1]
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2]
+ LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3]
+ MOV r0x80808080, r0x01010101, LSL #7 ;// 0x80808080
+ ORR tVal6, Left0, Left1, LSL #16 ;// tVal6 = 00 L1 00 L0
+ ORR tVal7, Left1, Left2, LSL #16 ;// tVal7 = 00 L2 00 L1
+ ORR tVal11, Left2, Left3, LSL #16 ;// tVal11= 00 L3 00 L2
+ MUL Out3, Left3, r0x01010101 ;// Out3 = L3 L3 L3 L3
+ MVN tVal8, tVal7 ;// tVal8 = 00 L2'00 L1'
+ MVN tVal10, tVal11 ;// tVal10= 00 L3'00 L2'
+ UHADD8 tVal4, tVal6, tVal11 ;// tVal4 = 00 g3 00 g2
+ UXTB16 tVal12, Out3 ;// tVal12= 00 L3 00 L3
+ UHSUB8 tVal4, tVal4, tVal8 ;// tVal4 = 00 h3 00 h2
+ UHSUB8 tVal6, tVal6, tVal8 ;// tVal6 = 00 d2 00 d1
+ UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= 00 d3 00 d2
+ UHADD8 tVal12, tVal12, tVal7 ;// tVal12= 00 g4 00 g3
+ UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 i3 00 i2
+ UHSUB8 tVal12, tVal12, tVal10 ;// tVal12= 00 h4 00 h3
+ UADD8 tVal8, tVal6, r0x80808080 ;// tVal8 = 00 e2 00 e1
+ UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 e3 00 e2
+ UADD8 tVal12, tVal12, r0x80808080 ;// tVal12= 00 i4 00 i3
+ ORR Out0, tVal8, tVal4, LSL #8 ;// Out0 = i3 e2 i2 e1
+ ORR Out1, tVal11, tVal12, LSL #8 ;// Out1 = i4 e3 i3 e2
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ PKHTB Out2, Out3, Out1, ASR #16 ;// Out2 = L3 L3 i4 e3
+ M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ]
+ M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11]
+ STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15]
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// ARM1136JS
+
+
+ END
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_4x4 ends
+;//-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
new file mode 100644
index 0000000..53597a8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
@@ -0,0 +1,128 @@
+;//
+;//
+;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ IMPORT armVCM4P10_QPDivTable
+ IMPORT armVCM4P10_VMatrixQPModTable
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+;//--------------------------------------
+;// Declare input registers
+;//--------------------------------------
+ppSrc RN 0
+pDst RN 1
+QP RN 2
+
+;//--------------------------------
+;// Scratch variable for Unpack2x2
+;//--------------------------------
+pSrc RN 9
+Value RN 4
+Value2 RN 5
+Flag RN 6
+strOffset RN 7
+cstOffset RN 8
+
+;//--------------------------------
+;// Scratch variable
+;//--------------------------------
+r0w0 RN 3
+r0w1 RN 4
+
+c0w0 RN 5
+c1w0 RN 6
+
+return RN 0
+pQPDivTable RN 5
+pQPModTable RN 6
+Shift RN 9
+Scale RN 2
+
+Temp1 RN 3
+Temp2 RN 4
+Temp3 RN 7
+Temp4 RN 8
+
+ ;// Write function header
+ M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9
+
+
+ LDR pSrc, [ppSrc] ;// Load pSrc
+ MOV cstOffset, #31 ;// To be used in the loop, to compute offset
+
+ ;//-----------------------------------------------------------------------
+ ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero
+ ;//-----------------------------------------------------------------------
+
+ MOV Value, #0 ;// Initialize the zero value
+ MOV Value2, #0 ;// Initialize the zero value
+ LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop>
+ STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0
+
+
+unpackLoop
+ TST Flag, #0x10 ;// Computing (Flag & 0x10)
+ LDRSBNE Value2,[pSrc,#1]
+ LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access
+ AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1;
+ LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++
+ ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++
+
+ TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done
+ LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration
+ STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset>
+ BEQ unpackLoop ;// Branch to the loop beginning
+
+ LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2|
+
+
+ STR pSrc, [ppSrc] ;// Update the bitstream pointer
+
+ LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
+ LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
+
+ SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ]
+ SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ]
+
+ LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP]
+ LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP]
+
+ SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ]
+ SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ]
+
+ LSL Scale, Scale, Shift ;// Scale = Scale << Shift
+
+ SMULTB Temp2, c0w0, Scale ;// Temp2 = T(c0w0) * Scale
+ SMULTB Temp4, c1w0, Scale ;// Temp4 = T(c1w0) * Scale
+ SMULBB Temp1, c0w0, Scale ;// Temp1 = B(c0w0) * Scale
+ SMULBB Temp3, c1w0, Scale ;// Temp3 = B(c1w0) * Scale
+ MOV Temp2, Temp2, ASR #1 ;// Temp2 = Temp2 >> 1 & Temp1 = (Temp1 >> 1) << 16
+ MOV Temp4, Temp4, ASR #1 ;// Temp4 = Temp4 >> 1 & Temp3 = (Temp3 >> 1) << 16
+ PKHBT c0w0, Temp2, Temp1, LSL #15 ;// c0w0 = | Temp1 | Temp2 |
+ PKHBT c1w0, Temp4, Temp3, LSL #15 ;// c1w0 = | Temp3 | Temp4 |
+ STMIA pDst, {c0w0, c1w0} ;// Storing all the coefficients at once
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// ARM1136JS
+
+
+
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
new file mode 100644
index 0000000..73caec2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
@@ -0,0 +1,469 @@
+;//
+;//
+;// File Name: omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;// H.264 inverse quantize and transform module
+;//
+;//
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Import/Export symbols required from/to other files
+;// (For example tables)
+
+ IMPORT armVCM4P10_UnpackBlock4x4
+ IMPORT armVCM4P10_QPDivTable
+ IMPORT armVCM4P10_VMatrixQPModTable
+
+ M_VARIANTS ARM1136JS
+
+;// Set debugging level
+;//DEBUG_ON SETL {TRUE}
+
+
+;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4
+
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+
+;//Input Registers
+pData RN 0
+QP RN 1
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+
+;// Packed Input pixels
+in00 RN 2 ;// Src[0] & Src[1]
+in02 RN 3 ;// Src[2] & Src[3]
+in10 RN 4 ;// Src[4] & Src[5]
+in12 RN 5 ;// Src[6] & Src[7]
+in20 RN 6 ;// Src[8] & Src[9]
+in22 RN 7 ;// Src[10] & Src[11]
+in30 RN 8 ;// Src[12] & Src[13]
+in32 RN 9 ;// Src[14] & Src[15]
+
+;// Transpose for Row operations (Rows to cols)
+trRow00 RN 2
+trRow10 RN 10
+trRow02 RN 3
+trRow12 RN 5
+trRow20 RN 11
+trRow30 RN 12
+trRow32 RN 14
+trRow22 RN 7
+
+;// Intermediate calculations
+rowSum1 RN 4
+rowSum2 RN 6
+rowDiff1 RN 8
+rowDiff2 RN 9
+
+
+;// Row operated pixels
+rowOp00 RN 2
+rowOp10 RN 10
+rowOp20 RN 11
+rowOp30 RN 12
+rowOp02 RN 3
+rowOp12 RN 5
+rowOp22 RN 7
+rowOp32 RN 14
+
+;// Transpose for colulmn operations
+trCol00 RN 2
+trCol02 RN 3
+trCol10 RN 4
+trCol12 RN 5
+trCol20 RN 6
+trCol22 RN 7
+trCol30 RN 8
+trCol32 RN 9
+
+;// Intermediate calculations
+colSum1 RN 10
+colSum2 RN 11
+colDiff1 RN 12
+colDiff2 RN 14
+
+
+;// Coloumn operated pixels
+colOp00 RN 2
+colOp02 RN 3
+colOp10 RN 4
+colOp12 RN 5
+colOp20 RN 6
+colOp22 RN 7
+colOp30 RN 8
+colOp32 RN 9
+
+;// Temporary scratch varaibles
+pQPDivTable RN 0
+pQPModTable RN 11
+Shift RN 10
+Scale RN 14
+Round RN 0
+
+temp1 RN 10
+temp2 RN 11
+temp3 RN 12
+temp4 RN 1
+
+
+
+;// InvTransformed and Dequantized pixels
+out00 RN 2
+out02 RN 3
+out10 RN 4
+out12 RN 5
+out20 RN 6
+out22 RN 7
+out30 RN 8
+out32 RN 9
+
+
+
+
+ ;// Allocate stack memory required by the function
+ M_ALLOC4 pDataOnStack, 4
+
+ ;// Write function header
+ M_START armVCM4P10_InvTransformDequantLumaDC4x4,r11
+
+ ;******************************************************************
+ ;// The strategy used in implementing the transform is as follows:*
+ ;// Load the 4x4 block into 8 registers *
+ ;// Transpose the 4x4 matrix *
+ ;// Perform the row operations (on columns) using SIMD *
+ ;// Transpose the 4x4 result matrix *
+ ;// Perform the coloumn operations *
+ ;// Store the 4x4 block at one go *
+ ;******************************************************************
+
+ ;// Load all the 4x4 pixels
+
+ LDMIA pData,{in00,in02,in10,in12,in20,in22,in30,in32}
+
+ ;//*****************************************************************
+ ;//
+ ;// Transpose the matrix inorder to perform row ops as coloumn ops
+ ;// Input: in[][] = original matrix
+ ;// Output: trRow[][]= transposed matrix
+ ;// Step1: Obtain the LL part of the transposed matrix
+ ;// Step2: Obtain the HL part
+ ;// step3: Obtain the LH part
+ ;// Step4: Obtain the HH part
+ ;//
+ ;//*****************************************************************
+
+ ;// LL 2x2 transposed matrix
+ ;// d0 d1 - -
+ ;// d4 d5 - -
+ ;// - - - -
+ ;// - - - -
+
+ PKHTB trRow10,in10,in00,ASR #16 ;// [5 4] = [f5:f1]
+ PKHBT trRow00,in00,in10,LSL #16 ;// [1 0] = [f4:f0]
+
+ ;// HL 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// d8 d9 - -
+ ;// d12 d13 - -
+
+
+ PKHTB trRow30,in12,in02,ASR #16 ;// [13 12] = [7 3]
+ PKHBT trRow20,in02,in12,LSL #16 ;// [9 8] = [6 2]
+
+ ;// LH 2x2 transposed matrix
+ ;// - - d2 d3
+ ;// - - d6 d7
+ ;// - - - -
+ ;// - - - -
+
+ PKHBT trRow02,in20,in30,LSL #16 ;// [3 2] = [f12:f8]
+ PKHTB trRow12,in30,in20,ASR #16 ;// [7 6] = [f13:f9]
+
+
+
+
+ ;// HH 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// - - d10 d11
+ ;// - - d14 d15
+
+ PKHTB trRow32,in32,in22,ASR #16 ;// [15 14] = [15 11]
+ PKHBT trRow22,in22,in32,LSL #16 ;// [11 10] = [14 10]
+
+
+ ;****************************************
+ ;// Row Operations (Performed on columns)
+ ;****************************************
+
+
+ ;// SIMD operations on first two columns(two rows of the original matrix)
+
+ SADD16 rowSum1,trRow00,trRow10 ;// (c0+c1)
+ SADD16 rowSum2,trRow20,trRow30 ;// (c2+c3)
+ SSUB16 rowDiff1,trRow00,trRow10 ;// (c0-c1)
+ SSUB16 rowDiff2,trRow20,trRow30 ;// (c2-c3)
+ SADD16 rowOp00,rowSum1,rowSum2 ;// (c0+c1+c2+c3)
+ SSUB16 rowOp10,rowSum1,rowSum2 ;// (c0+c1-c2-c3)
+ SSUB16 rowOp20,rowDiff1,rowDiff2 ;// (c0-c1-c2+c3)
+ SADD16 rowOp30,rowDiff1,rowDiff2 ;// (c0-c1+c2-c3)
+
+
+ ;// SIMD operations on next two columns(next two rows of the original matrix)
+
+ SADD16 rowSum1,trRow02,trRow12 ;// (c0+c1)
+ SADD16 rowSum2,trRow22,trRow32 ;// (c2+c3)
+ SSUB16 rowDiff1,trRow02,trRow12 ;// (c0-c1)
+ SSUB16 rowDiff2,trRow22,trRow32 ;// (c2-c3)
+ SADD16 rowOp02,rowSum1,rowSum2 ;// (c0+c1+c2+c3)
+ SSUB16 rowOp12,rowSum1,rowSum2 ;// (c0+c1-c2-c3)
+ SSUB16 rowOp22,rowDiff1,rowDiff2 ;// (c0-c1-c2+c3)
+ SADD16 rowOp32,rowDiff1,rowDiff2 ;// (c0-c1+c2-c3)
+
+
+
+ ;*****************************************************************
+ ;// Transpose the resultant matrix
+ ;// Input: rowOp[][]
+ ;// Output: trCol[][]
+ ;*****************************************************************
+
+ ;// LL 2x2 transposed matrix
+ ;// d0 d1 - -
+ ;// d4 d5 - -
+ ;// - - - -
+ ;// - - - -
+
+ PKHTB trCol10,rowOp10,rowOp00,ASR #16 ;// [5 4] = [f5:f1]
+ PKHBT trCol00,rowOp00,rowOp10,LSL #16 ;// [1 0] = [f4:f0]
+
+ ;// HL 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// d8 d9 - -
+ ;// d12 d13 - -
+
+
+ PKHTB trCol30,rowOp12,rowOp02,ASR #16 ;// [13 12] = [7 3]
+ PKHBT trCol20,rowOp02,rowOp12,LSL #16 ;// [9 8] = [6 2]
+
+ ;// LH 2x2 transposed matrix
+ ;// - - d2 d3
+ ;// - - d6 d7
+ ;// - - - -
+ ;// - - - -
+
+ PKHBT trCol02,rowOp20,rowOp30,LSL #16 ;// [3 2] = [f12:f8]
+ PKHTB trCol12,rowOp30,rowOp20,ASR #16 ;// [7 6] = [f13:f9]
+
+
+
+
+ ;// HH 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// - - d10 d11
+ ;// - - d14 d15
+
+ PKHTB trCol32,rowOp32,rowOp22,ASR #16 ;// [15 14] = [15 11]
+ PKHBT trCol22,rowOp22,rowOp32,LSL #16 ;// [11 10] = [14 10]
+
+
+ ;*******************************
+ ;// Coloumn Operations
+ ;*******************************
+
+ ;//--------------------------------------------------------------------------------------
+ ;// Store pData(RN0) on stack and restore it only at the final store back
+ ;// This frees up a register (RN0) which is used to reduce number of intermediate stalls
+ ;//--------------------------------------------------------------------------------------
+ M_STR pData,pDataOnStack
+
+
+ ;// SIMD operations on first two columns(two rows of the original matrix)
+
+ SADD16 colSum1,trCol00,trCol10 ;// (c0+c1)
+ SADD16 colSum2,trCol20,trCol30 ;// (c2+c3)
+ SSUB16 colDiff1,trCol00,trCol10 ;// (c0-c1)
+ SSUB16 colDiff2,trCol20,trCol30 ;// (c2-c3)
+ SADD16 colOp00,colSum1,colSum2 ;// (c0+c1+c2+c3)
+ SSUB16 colOp10,colSum1,colSum2 ;// (c0+c1-c2-c3)
+ SSUB16 colOp20,colDiff1,colDiff2 ;// (c0-c1-c2+c3)
+ SADD16 colOp30,colDiff1,colDiff2 ;// (c0-c1+c2-c3)
+
+
+ ;// SIMD operations on next two columns(next two rows of the original matrix)
+
+ LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
+ SADD16 colSum1,trCol02,trCol12 ;// (c0+c1)
+ SADD16 colSum2,trCol22,trCol32 ;// (c2+c3)
+ SSUB16 colDiff1,trCol02,trCol12 ;// (c0-c1)
+ SSUB16 colDiff2,trCol22,trCol32 ;// (c2-c3)
+ SADD16 colOp02,colSum1,colSum2 ;// (c0+c1+c2+c3)
+ SSUB16 colOp12,colSum1,colSum2 ;// (c0+c1-c2-c3)
+ LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
+ LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP]
+ SSUB16 colOp22,colDiff1,colDiff2 ;// (c0-c1-c2+c3)
+ SADD16 colOp32,colDiff1,colDiff2 ;// (c0-c1+c2-c3)
+
+
+ LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP]
+
+ ;//----------------------------------------------------------------------
+ ;//
+ ;// <Dequantize> improves on the c-reference code
+ ;// Both the cases i.e., Shift>=0 and Shift<0 cases are covered together
+ ;// We do not subtract 2 from Shift as in C reference, instead perform a
+ ;// Scale << Shift once in the beginning and do a right shift by a
+ ;// constant 2 after the Multiplication. The value of Round would be 2
+ ;//
+ ;// By doing this we aviod the Branches required and also
+ ;// reduce the code size substantially
+ ;//
+ ;//----------------------------------------------------------------------
+
+ MOV Round, #2 ;// Round = 2
+ LSL Scale, Scale, Shift ;// Scale = Scale << Shift
+
+
+ ;// Row 1
+ SMLABB temp1, colOp00, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round
+ SMLABB temp3, colOp02, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round
+ SMLATB temp2, colOp00, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round
+ SMLATB temp4, colOp02, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round
+
+ ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2
+ ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2
+ PKHBT out00, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 |
+ PKHBT out02, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 |
+
+
+ ;// Row 2
+ SMLABB temp1, colOp10, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round
+ SMLABB temp3, colOp12, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round
+ SMLATB temp2, colOp10, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round
+ SMLATB temp4, colOp12, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round
+
+ ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2
+ ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2
+ PKHBT out10, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 |
+ PKHBT out12, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 |
+
+ ;// Row 3
+ SMLABB temp1, colOp20, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round
+ SMLABB temp3, colOp22, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round
+ SMLATB temp2, colOp20, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round
+ SMLATB temp4, colOp22, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round
+
+ ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2
+ ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2
+ PKHBT out20, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 |
+ PKHBT out22, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 |
+
+ ;// Row 4
+ SMLABB temp1, colOp30, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round
+ SMLABB temp3, colOp32, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round
+ SMLATB temp2, colOp30, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round
+ SMLATB temp4, colOp32, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round
+
+ M_LDR pData,pDataOnStack ;// Restore pData pointer from stack
+ ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2
+ ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2
+ PKHBT out30, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 |
+ PKHBT out32, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 |
+
+
+
+ ;***************************
+ ;// Store all the 4x4 pixels
+ ;***************************
+
+store_coeff
+
+ STMIA pData,{out00,out02,out10,out12,out20,out22,out30,out32}
+
+
+
+ ;// Set return value
+
+
+ ;// Write function tail
+ M_END
+
+ ENDIF ;//ARM1136JS
+
+
+;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4
+
+;// Guarding implementation by the processor name
+
+
+
+
+;// Function: omxVCM4P10_TransformDequantLumaDCFromPair
+
+;//Input Registers
+ppSrc RN 0
+pDst RN 1
+QPR2 RN 2
+
+;//Output Registers
+result RN 0
+
+;//Local Scratch Registers
+pDstR4 RN 4
+pDstR0 RN 0
+QPR1 RN 1
+QPR5 RN 5
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+ ;// Allocate stack memory required by the function
+
+
+ ;// Write function header
+ M_START omxVCM4P10_TransformDequantLumaDCFromPair,r5
+
+ MOV pDstR4,pDst ;// Saving register r1
+ MOV QPR5,QPR2 ;// Saving register r2
+ BL armVCM4P10_UnpackBlock4x4
+
+ MOV pDstR0,pDstR4 ;// Setting up register r0
+ MOV QPR1,QPR5 ;// Setting up register r1
+ BL armVCM4P10_InvTransformDequantLumaDC4x4
+
+
+ ;// Set return value
+ MOV result,#OMX_Sts_NoErr
+
+ ;// Write function tail
+ M_END
+
+
+ ENDIF ;//ARM1136JS
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
new file mode 100644
index 0000000..22115d3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
@@ -0,0 +1,37 @@
+/**
+ *
+ * File Name: armVCM4P2_Huff_Tables_VLC.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_Huff_Tables.h
+ * Description: Declares Tables used for Hufffman coding and decoding
+ * in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXHUFFTAB_H_
+#define _OMXHUFFTAB_H_
+
+
+extern const OMX_U16 armVCM4P2_IntraVlcL0L1[200];
+
+
+extern const OMX_U16 armVCM4P2_InterVlcL0L1[200];
+
+extern const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64];
+//extern const OMX_U16 armVCM4P2_aIntraDCChromaIndex[32];
+extern const OMX_U16 armVCM4P2_aVlcMVD[124];
+
+extern const OMX_U8 armVCM4P2_InterL0L1LMAX[73];
+extern const OMX_U8 armVCM4P2_InterL0L1RMAX[35];
+extern const OMX_U8 armVCM4P2_IntraL0L1LMAX[53];
+extern const OMX_U8 armVCM4P2_IntraL0L1RMAX[40]
+
+#endif /* _OMXHUFFTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
new file mode 100644
index 0000000..d5f865c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
@@ -0,0 +1,25 @@
+/**
+ *
+ * File Name: armVCM4P2_ZigZag_Tables.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_Zigzag_Tables.h
+ * Description: Declares Tables used for Zigzag scan in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXZIGZAGTAB_H
+#define _OMXZIGZAGTAB_H
+
+extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [192];
+//extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64];
+//extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64];
+
+#endif /* _OMXZIGZAGTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s
new file mode 100644
index 0000000..7801e57
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s
@@ -0,0 +1,75 @@
+; /**
+; *
+; * File Name: armVCM4P2_Clip8_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains module for Clipping 16 bit value to [0,255] Range
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+
+pSrc RN 0
+pDst RN 1
+step RN 2
+
+;// Local variables
+
+x0 RN 3
+x1 RN 4
+x2 RN 5
+x3 RN 6
+
+Count RN 14
+
+
+ M_START armVCM4P2_Clip8,r6
+
+
+ MOV Count,#8
+loop
+
+ LDMIA pSrc!,{x0, x1}
+ SUBS Count,Count, #1 ;// count down
+ LDMIA pSrc!,{x2, x3}
+ USAT16 x0, #8, x0 ;// clip two samples to [0,255]
+ USAT16 x1, #8, x1 ;// clip two samples to [0,255]
+ STRB x0, [pDst]
+ MOV x0, x0, LSR #16
+ STRB x0, [pDst,#1]
+ STRB x1, [pDst,#2]
+ MOV x1, x1, LSR #16
+ STRB x1, [pDst,#3]
+
+ USAT16 x2, #8, x2 ;// clip two samples to [0,255]
+ USAT16 x3, #8, x3 ;// clip two samples to [0,255]
+ STRB x2, [pDst,#4]
+ MOV x2, x2, LSR #16
+ STRB x2, [pDst,#5]
+ STRB x3, [pDst,#6]
+ MOV x3, x3, LSR #16
+ STRB x3, [pDst,#7]
+ ADD pDst,pDst,step ;// Increment pDst by step value
+
+ BGT loop ;// Continue loop until Count reaches 64
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
new file mode 100644
index 0000000..9e30900
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
@@ -0,0 +1,398 @@
+;/**
+; *
+; * File Name: armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter, intra block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan
+; *
+; *
+; *
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+
+
+
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+shortVideoHeader RN 3
+
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+
+ftype RN 0
+temp3 RN 4
+temp RN 5
+Count RN 6
+Escape RN 5
+
+;// armVCM4P2_FillVLDBuffer
+zigzag RN 0
+storeLevel RN 1
+temp2 RN 4
+temp1 RN 5
+sign RN 5
+Last RN 7
+storeRun RN 14
+
+
+packRetIndex RN 5
+
+
+markerbit RN 5
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitBuffer RN 9
+RBitCount RN 10
+
+T1 RN 11
+T2 RN 12
+LR RN 14
+
+
+
+ M_ALLOC4 pppBitStream,4
+ M_ALLOC4 ppOffset,4
+ M_ALLOC4 pLinkRegister,4
+
+ M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+ ;// get the table addresses from stack
+ M_ARG ppVlcTableL0L1,4
+ M_ARG ppLMAXTableL0L1,4
+ M_ARG ppRMAXTableL0L1,4
+ M_ARG ppZigzagTable,4
+
+ ;// Store ALL zeros at pDst
+
+ MOV temp1,#0 ;// Initialize Count to zero
+ MOV Last,#0
+ M_STR LR,pLinkRegister ;// Store Link Register on Stack
+ MOV temp2,#0
+ MOV LR,#0
+
+ ;// Initialize the Macro and Store all zeros to pDst
+
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT1 T1, T2, T2
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT2 T1, T2, T2
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_STR ppBitStream,pppBitStream ;// Store ppBitstream on stack
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_STR pBitOffset,ppOffset ;// Store pBitOffset on stack
+ STM pDst!,{temp2,temp1,Last,LR}
+
+ STM pDst!,{temp2,temp1,Last,LR}
+ STM pDst!,{temp2,temp1,Last,LR}
+
+
+ SUB pDst,pDst,#128 ;// Restore pDst
+
+ ;// The armVCM4P2_GetVLCBits begins
+
+getVLCbits
+
+ M_BD_LOOK8 Escape,7 ;// Load Escape Value
+ LSR Escape,Escape,#25
+ CMP Escape,#3 ;// check for escape mode
+ MOVNE ftype,#0
+ BNE notEscapemode ;// Branch if not in Escape mode 3
+
+ M_BD_VSKIP8 #7,T1
+ CMP shortVideoHeader,#0 ;// Check shortVideoHeader flag to know the type of Escape mode
+ BEQ endFillVLD
+
+ ;// Escape Mode 4
+
+ M_BD_READ8 Last,1,T1
+ M_BD_READ8 storeRun,6,T1
+ M_BD_READ8 storeLevel,8,T1
+
+
+ ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so
+
+ TEQ storeLevel,#0
+ TEQNE storeLevel,#128
+ BEQ ExitError
+
+ ADD temp2,storeRun,Count
+ CMP temp2,#64
+ BGE ExitError ;// error if Count+storeRun >= 64
+
+
+ ;// Load address of zigzagTable
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Loading the Address of Zigzag table
+
+
+ ;// armVCM4P2_FillVLDBuffer
+
+ SXTB storeLevel,storeLevel ;// Sign Extend storeLevel to 32 bits
+
+
+ ;// To Reflect Runlength
+
+ ADD Count,Count,storeRun
+ LDRB zigzag,[pZigzagTable,Count]
+ ADD Count,Count,#1
+ STRH storeLevel,[pDst,zigzag] ;// store Level
+
+ B ExitOk
+
+
+
+endFillVLD
+
+
+ ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream
+
+ M_BD_READ8 temp1,1,T1
+ CMP temp1,#0
+ MOVEQ ftype,#1
+ BEQ notEscapemode
+ M_BD_READ8 temp1,1,T1
+ CMP temp1,#1
+ MOVEQ ftype,#3
+ MOVNE ftype,#2
+
+
+notEscapemode
+
+ ;// Load optimized packed VLC table with last=0 and Last=1
+
+ M_LDR pVlcTableL0L1,ppVlcTableL0L1 ;// Load Combined VLC Table
+
+
+ CMP ftype,#3 ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3)
+ BGE EscapeMode3 ;// Else continue normal VLC Decoding
+
+ ;// Variable lengh decoding, "armUnPackVLC32"
+
+
+ M_BD_VLD packRetIndex,T1,T2,pVlcTableL0L1,4,2
+
+
+ LDR temp3,=0xFFF
+
+ CMP packRetIndex,temp3 ;// Check for invalid symbol
+ BEQ ExitError ;// if invalid symbol occurs exit with an error message
+
+ AND Last,packRetIndex,#2 ;// Get Last from packed Index
+
+
+
+
+ LSR storeRun,packRetIndex,#7 ;// Get Run Value from Packed index
+ AND storeLevel,packRetIndex,#0x7c ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0
+
+
+ M_LDR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Load LMAX table
+
+
+ LSR storeLevel,storeLevel,#2 ;// Level value
+
+ CMP ftype,#1
+ BNE ftype2
+
+ ;// ftype==1; Escape mode =1
+
+
+ ADD temp1, pLMAXTableL0L1, Last, LSL#4 ;// If the Last=1 add 32 to table address
+ LDRB temp1,[temp1,storeRun]
+
+
+ ADD storeLevel,temp1,storeLevel
+
+ftype2
+
+ ;// ftype =2; Escape mode =2
+
+ M_LDR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Load RMAX Table
+
+ CMP ftype,#2
+ BNE FillVLDL1
+
+ ADD temp1, pRMAXTableL0L1, Last, LSL#4 ;// If Last=1 add 32 to table address
+ SUB temp2,storeLevel,#1
+ LDRB temp1,[temp1,temp2]
+
+
+ ADD storeRun,storeRun,#1
+ ADD storeRun,temp1
+
+FillVLDL1
+
+
+ ;// armVCM4P2_FillVLDBuffer
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable
+
+ M_BD_READ8 sign,1,T1
+
+ CMP sign,#1
+ RSBEQ storeLevel,storeLevel,#0
+
+ ADD temp1,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63
+ CMP temp1,#64
+ BGE ExitError
+
+
+
+
+
+
+ ;// To Reflect Runlenght
+
+ ADD Count,Count,storeRun
+
+storeLevelL1
+
+ LDRB zigzag,[pZigzagTable,Count]
+ CMP Last,#2 ;// Check if the Level val is Last non zero val
+ ADD Count,Count,#1
+ LSR Last,Last,#1
+ STRH storeLevel,[pDst,zigzag]
+
+ BNE end
+
+ B ExitOk
+
+
+
+ ;// Fixed Lengh Decoding Escape Mode 3
+
+EscapeMode3
+
+ M_BD_READ8 Last,1,T1
+ M_BD_READ8 storeRun,6,T1
+
+ ADD temp2,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63
+ CMP temp2,#64
+ BGE ExitError
+
+ M_BD_READ8 markerbit,1,T1
+ TEQ markerbit,#0 ;// Exit with an error message if marker bit is zero
+ BEQ ExitError
+
+ M_BD_READ16 storeLevel,12,T1
+
+ TST storeLevel,#0x800 ;// test if the level is negative
+ SUBNE storeLevel,storeLevel,#4096
+ CMP storeLevel,#0
+ CMPNE storeLevel,#-2048
+ BEQ ExitError ;// Exit with an error message if Level==0 or -2048
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable
+
+ M_BD_READ8 markerbit,1,T1
+
+
+ ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed)
+
+
+
+ ;// To Reflect Run Length
+
+ ADD Count,Count,storeRun
+
+
+
+storeLevelLast
+
+ LDRB zigzag,[pZigzagTable,Count]
+ CMP Last,#1
+ ADD Count,Count,#1
+ STRH storeLevel,[pDst,zigzag]
+
+ BNE end
+
+ B ExitOk
+
+end
+
+ CMP Count,#64 ;//Run the Loop untill Count reaches 64
+
+ BLT getVLCbits
+
+
+ExitOk
+ ;// Exit When VLC Decoding is done Successfully
+
+ ;// Loading ppBitStream and pBitOffset from stack
+
+ CMP Last,#1
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppOffset
+
+ ;//Ending the macro
+
+ M_BD_FINI ppBitStream,pBitOffset
+
+ MOVEQ Return,#OMX_Sts_NoErr
+ MOVNE Return,#OMX_Sts_Err
+ M_LDR LR,pLinkRegister ;// Load the Link Register Back
+ B exit2
+
+ExitError
+ ;// Exit When an Error occurs
+
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppOffset
+ ;//Ending the macro
+
+ M_BD_FINI ppBitStream,pBitOffset
+ M_LDR LR,pLinkRegister
+ MOV Return,#OMX_Sts_Err
+
+exit2
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
new file mode 100644
index 0000000..ba4d058
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
@@ -0,0 +1,211 @@
+ /**
+ *
+ * File Name: armVCM4P2_Huff_Tables_VLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_Huff_Tables_VLC.c
+ * Description: Contains all the Huffman tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM_Bitstream.h"
+
+
+
+
+// Contains optimized and Packed VLC tables with Last=0 and Last=1
+
+// optimized Packed VLC table Entry Format
+// ---------------------------------------
+//
+// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+// +------------------------------------------------+
+// | Len | Run | Level |L | 1 |
+// +------------------------------------------------+
+// | Offset | 0 |
+// +------------------------------------------------+
+// If the table entry is a leaf entry then bit 0 set:
+// Len = Number of bits overread (0 to 7) 3 bits
+// Run = RunLength of the Symbol (0 to 63) 6 bits
+// Level = Level of the Symbol (0 to 31) 5 bits
+// L = Last Value of the Symbol (0 or 1) 1 bit
+//
+// If the table entry is an internal node then bit 0 is clear:
+// Offset = Number of (16-bit) half words from the table
+// start to the next table node
+//
+// The table is accessed by successive lookup up on the
+// next Step bits of the input bitstream until a leaf node
+// is obtained. The Step sizes are supplied to the VLD macro.
+
+// The VLC tables used for Intra and non inta coefficients in non Escape mode
+// contains symbols with both Last=0 and Last=1.
+// If a symbol is not found in the table it will be coded as 0xFFF
+
+
+const OMX_U16 armVCM4P2_InterVlcL0L1[200] = {
+ 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011,
+ 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+ 0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d,
+ 0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d,
+ 0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121,
+ 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809,
+ 0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d,
+ 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519,
+ 0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d,
+ 0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d,
+ 0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d,
+ 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015,
+ 0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309,
+ 0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140,
+ 0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09,
+ 0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031,
+ 0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09,
+ 0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168,
+ 0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d,
+ 0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021,
+ 0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609,
+ 0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309
+};
+
+
+const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = {
+ 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019,
+ 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+ 0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035,
+ 0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09,
+ 0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099,
+ 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911,
+ 0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d,
+ 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149,
+ 0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615,
+ 0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d,
+ 0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025,
+ 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09,
+ 0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319,
+ 0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140,
+ 0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081,
+ 0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069,
+ 0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311,
+ 0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168,
+ 0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d,
+ 0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049,
+ 0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039,
+ 0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021
+};
+
+const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = {
+ 0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001,
+ 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011,
+ 0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019,
+
+ 0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005,
+ 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001,
+ 0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f,
+ 0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017
+};
+
+
+const OMX_U16 armVCM4P2_aVlcMVD[124] = {
+ 0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041,
+ 0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0,
+ 0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040,
+ 0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005,
+ 0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068,
+ 0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b,
+ 0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f,
+ 0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017,
+ 0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b,
+ 0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023,
+ 0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8,
+ 0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d,
+ 0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031,
+ 0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039,
+ 0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037,
+ 0x2045, 0x2045, 0x203d, 0x203d
+};
+
+/* LMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_InterL0L1LMAX[73] =
+{
+ 12, 6, 4, 3, 3, 3, 3, 2,
+ 2, 2, 2, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0, 0,
+ 3, 2, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1
+};
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_InterL0L1RMAX[35] =
+{
+ 26, 10, 6, 2, 1, 1,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0, 40, 1, 0
+};
+
+/* LMAX table for non Intra (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] =
+{
+ 27, 10, 5, 4, 3, 3, 3,
+ 3, 2, 2, 1, 1, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 8, 3, 2, 2, 2, 2, 2, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+};
+
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] =
+{
+ 14, 9, 7, 3, 2, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
+
+ 20, 6, 1, 0, 0, 0, 0, 0
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
new file mode 100644
index 0000000..25cf8db
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
@@ -0,0 +1,75 @@
+ /**
+ *
+ * File Name: armVCM4P2_Lookup_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_Lookup_Tables.c
+ * Description: Contains all the Lookup tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+ /* * Table Entries contain Dc Scaler values
+ * armVCM4P2_DCScaler[i]= 8 for i=1 to 4 and i=33 to 36
+ * = 2*i for i=5 to 8
+ * = i+8 for i=9 to 25
+ * = 2*i-16 for i=26 to 31
+ * = (i-32+13)/2 for i=37 to 59
+ * = i-6-32 for i=60 to 63
+ * = 255 for i=0 and i=32
+ */
+
+const OMX_U8 armVCM4P2_DCScaler[64]={
+ 0xff, 0x8, 0x8, 0x8, 0x8, 0xa, 0xc, 0xe,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
+ 0xff, 0x8, 0x8, 0x8, 0x8, 0x9, 0x9, 0xa,
+ 0xa, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xe,
+ 0xe, 0xf, 0xf, 0x10, 0x10, 0x11, 0x11, 0x12,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+};
+
+
+ /* Table Entries Contain reciprocal of 1 to 63
+ * armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i)
+ * armVCM4P2_Reciprocal_QP_S16[0]= 0
+ */
+
+const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={
+ 0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249,
+ 0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888,
+ 0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591,
+ 0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421,
+ 0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348,
+ 0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9,
+ 0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254,
+ 0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208
+
+};
+
+ /* Table Entries Contain reciprocal of 1 to 63
+ * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i)
+ * armVCM4P2_Reciprocal_QP_S32[0]= 0
+ */
+
+const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={
+ 0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924,
+ 0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222,
+ 0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643,
+ 0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084,
+ 0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21,
+ 0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5,
+ 0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f,
+ 0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
new file mode 100644
index 0000000..3f92d85
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
@@ -0,0 +1,104 @@
+;//
+;//
+;// File Name: armVCM4P2_SetPredDir_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+; **
+; * Function: armVCM4P2_SetPredDir
+; *
+; * Description:
+; * Performs detecting the prediction direction
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] blockIndex block index indicating the component type and
+; * position as defined in subclause 6.1.3.8, of ISO/IEC
+; * 14496-2. Furthermore, indexes 6 to 9 indicate the
+; * alpha blocks spatially corresponding to luminance
+; * blocks 0 to 3 in the same macroblock.
+; * [in] pCoefBufRow pointer to the coefficient row buffer
+; * [in] pQpBuf pointer to the quantization parameter buffer
+; * [out]predQP quantization parameter of the predictor block
+; * [out]predDir indicates the prediction direction which takes one
+; * of the following values:
+; * OMX_VC_HORIZONTAL predict horizontally
+; * OMX_VC_VERTICAL predict vertically
+; *
+; * Return Value:
+; * Standard OMXResult result. See enumeration for possible result codes.
+; *
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE omxVC_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+;// Input Arguments
+BlockIndex RN 0
+pCoefBufRow RN 1
+pCoefBufCol RN 2
+predDir RN 3
+predQP RN 4
+pQpBuf RN 5
+
+;// Local Variables
+
+Return RN 0
+blockDCLeft RN 6
+blockDCTop RN 7
+blockDCTopLeft RN 8
+temp1 RN 9
+temp2 RN 14
+
+ M_START armVCM4P2_SetPredDir,r9
+
+ M_ARG ppredQP,4
+ M_ARG ppQpBuf,4
+
+ LDRH blockDCTopLeft,[pCoefBufRow,#-16]
+ LDRH blockDCLeft,[pCoefBufCol]
+
+ TEQ BlockIndex,#3
+ LDREQH blockDCTop,[pCoefBufCol,#-16]
+ LDRNEH blockDCTop,[pCoefBufRow]
+
+ SUBS temp1,blockDCLeft,blockDCTopLeft
+ RSBLT temp1,temp1,#0
+ SUBS temp2,blockDCTopLeft,blockDCTop
+ RSBLT temp2,temp2,#0
+
+ M_LDR pQpBuf,ppQpBuf
+ M_LDR predQP,ppredQP
+ CMP temp1,temp2
+ MOV temp2,#OMX_VC_VERTICAL
+ LDRLTB temp1,[pQpBuf,#1]
+ STRLT temp2,[predDir]
+ STRLT temp1,[predQP]
+ MOV temp2,#OMX_VC_HORIZONTAL
+ LDRGEB temp1,[pQpBuf]
+ STRGE temp2,[predDir]
+ MOV Return,#OMX_Sts_NoErr
+ STRGE temp1,[predQP]
+
+
+
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
new file mode 100644
index 0000000..ed17f9b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
@@ -0,0 +1,61 @@
+/**
+ *
+ * File Name: armVCM4P2_Zigzag_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_ZigZag_Tables.c
+ * Description: Contains the zigzag tables
+ *
+ */
+
+#include "omxtypes.h"
+
+/* Contains Double the values in the reference Zigzag Table
+ * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array
+ */
+
+const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] =
+{
+ 0, 2, 16, 32, 18, 4, 6, 20,
+ 34, 48, 64, 50, 36, 22, 8, 10,
+ 24, 38, 52, 66, 80, 96, 82, 68,
+ 54, 40, 26, 12, 14, 28, 42, 56,
+ 70, 84, 98, 112, 114, 100, 86, 72,
+ 58, 44, 30, 46, 60, 74, 88, 102,
+ 116, 118, 104, 90, 76, 62, 78, 92,
+ 106, 120, 122, 104, 94, 110, 124, 126,
+
+ 0, 16, 32, 48, 2, 18, 4, 20,
+ 34, 50, 64, 80, 96, 112, 114, 98,
+ 82, 66, 52, 36, 6, 22, 8, 24,
+ 38, 54, 68, 84, 100, 116, 70, 86,
+ 102, 118, 40, 56, 10, 26, 12, 28,
+ 42, 58, 72, 88, 104, 120, 74, 90,
+ 106, 122, 44, 60, 14, 30, 46, 62,
+ 76, 92, 108, 124, 78, 94, 110, 126,
+
+ 0, 2, 4, 6, 16, 18, 32, 34,
+ 20, 22, 8, 10, 12, 14, 30, 28,
+ 26, 24, 38, 36, 48, 50, 64, 66,
+ 52, 54, 40, 42, 44, 46, 56, 58,
+ 60, 62, 68, 70, 80, 82, 96, 98,
+ 84, 86, 72, 74, 76, 78, 88, 90,
+ 92, 94, 100, 102, 112, 114, 116, 118,
+ 104, 106, 108, 110, 120, 122, 124, 126
+
+
+};
+
+
+
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
new file mode 100644
index 0000000..b63d295
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
@@ -0,0 +1,102 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeBlockCoef_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for inter reconstruction
+ *
+ */
+
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter
+ *
+ * Description:
+ * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag
+ * positioning and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results (residuals) are placed in a contiguous array
+ * of 64 elements. For INTER block, the output buffer holds the residuals for
+ * further reconstruction.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream buffer. There is no boundary
+ * check for the bit stream buffer.
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7]
+ * [in] QP quantization parameter
+ * [in] shortVideoHeader a flag indicating presence of short_video_header;
+ * shortVideoHeader==1 indicates using quantization method defined in short
+ * video header mode, and shortVideoHeader==0 indicates normail quantization method.
+ * [out] ppBitStream *ppBitStream is updated after the block is decoded, so that it points to the
+ * current byte in the bit stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the current bit position in the
+ * byte pointed by *ppBitStream
+ * [out] pDst pointer to the decoded residual buffer (a contiguous array of 64 elements of
+ * OMX_S16 data type). Must be 16-byte aligned.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst
+ * - At least one of the below case:
+ * - *pBitOffset exceeds [0,7], QP <= 0;
+ * - pDst not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT QP,
+ OMX_INT shortVideoHeader
+)
+{
+ /* 64 elements are needed but to align it to 16 bytes need
+ 15 more elements of padding */
+ OMX_S16 tempBuf[79];
+ OMX_S16 *pTempBuf1;
+ OMXResult errorCode;
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf);
+
+
+ /* VLD and zigzag */
+ errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset,
+ pTempBuf1,shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Dequantization */
+ errorCode = omxVCM4P2_QuantInvInter_I(
+ pTempBuf1,
+ QP);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Inverse transform */
+ errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
new file mode 100644
index 0000000..c609a60
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
@@ -0,0 +1,208 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeBlockCoef_Intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for intra reconstruction
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag
+ * positioning, and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results are then placed in the output frame/plane on
+ * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and
+ * written to corresponding block buffer within the destination plane.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream buffer. There is no boundary
+ * check for the bit stream buffer.
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7].
+ * [in] step width of the destination plane
+ * [in/out] pCoefBufRow [in] pointer to the coefficient row buffer
+ * [out] updated coefficient rwo buffer
+ * [in/out] pCoefBufCol [in] pointer to the coefficient column buffer
+ * [out] updated coefficient column buffer
+ * [in] curQP quantization parameter of the macroblock which
+ * the current block belongs to
+ * [in] pQpBuf Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to
+ * the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the
+ * current block(QPc).
+ * Note, in case the corresponding block is out of VOP bound, the QP value will have
+ * no effect to the intra-prediction process. Refer to subclause "7.4.3.3 Adaptive
+ * ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description.
+ * [in] blockIndex block index indicating the component type and
+ * position as defined in subclause 6.1.3.8,
+ * Figure 6-5 of ISO/IEC 14496-2.
+ * [in] intraDCVLC a code determined by intra_dc_vlc_thr and QP.
+ * This allows a mechanism to switch between two VLC
+ * for coding of Intra DC coefficients as per Table
+ * 6-21 of ISO/IEC 14496-2.
+ * [in] ACPredFlag a flag equal to ac_pred_flag (of luminance) indicating
+ * if the ac coefficients of the first row or first
+ * column are differentially coded for intra coded
+ * macroblock.
+ * [in] shortVideoHeader a flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode,
+ * and shortVideoHeader==0 selects nonlinear intra DC mode.
+ * [out] ppBitStream *ppBitStream is updated after the block is
+ * decoded, so that it points to the current byte
+ * in the bit stream buffer
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream
+ * [out] pDst pointer to the block in the destination plane.
+ * pDst should be 16-byte aligned.
+ * [out] pCoefBufRow pointer to the updated coefficient row buffer.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset,
+ * pCoefBufRow, pCoefBufCol, pQPBuf, pDst.
+ * or
+ * - At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31),
+ * blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while
+ * blockIndex greater than 5.
+ * or
+ * - pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_U8 *pDst,
+ OMX_INT step,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_U8 curQP,
+ const OMX_U8 *pQPBuf,
+ OMX_INT blockIndex,
+ OMX_INT intraDCVLC,
+ OMX_INT ACPredFlag,
+ OMX_INT shortVideoHeader
+ )
+{
+ OMX_S16 tempBuf1[79], tempBuf2[79];
+ OMX_S16 *pTempBuf1, *pTempBuf2;
+ OMX_INT predDir, predACDir;
+ OMX_INT predQP;
+ OMXVCM4P2VideoComponent videoComp;
+ OMXResult errorCode;
+
+
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+ pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+
+ /* Setting the AC prediction direction and prediction direction */
+ armVCM4P2_SetPredDir(
+ blockIndex,
+ pCoefBufRow,
+ pCoefBufCol,
+ &predDir,
+ &predQP,
+ pQPBuf);
+
+ predACDir = predDir;
+
+
+ if (ACPredFlag == 0)
+ {
+ predACDir = OMX_VC_NONE;
+ }
+
+ /* Setting the videoComp */
+ if (blockIndex <= 3)
+ {
+ videoComp = OMX_VC_LUMINANCE;
+ }
+ else
+ {
+ videoComp = OMX_VC_CHROMINANCE;
+ }
+
+
+ /* VLD and zigzag */
+ if (intraDCVLC == 1)
+ {
+ errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+ ppBitStream,
+ pBitOffset,
+ pTempBuf1,
+ predACDir,
+ shortVideoHeader,
+ videoComp);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+ }
+ else
+ {
+ errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+ ppBitStream,
+ pBitOffset,
+ pTempBuf1,
+ predACDir,
+ shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+ }
+
+ /* AC DC prediction */
+ errorCode = omxVCM4P2_PredictReconCoefIntra(
+ pTempBuf1,
+ pCoefBufRow,
+ pCoefBufCol,
+ curQP,
+ predQP,
+ predDir,
+ ACPredFlag,
+ videoComp);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Dequantization */
+ errorCode = omxVCM4P2_QuantInvIntra_I(
+ pTempBuf1,
+ curQP,
+ videoComp,
+ shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Inverse transform */
+ errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Placing the linear array into the destination plane and clipping
+ it to 0 to 255 */
+
+ armVCM4P2_Clip8(pTempBuf2,pDst,step);
+
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
new file mode 100644
index 0000000..a1861da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
@@ -0,0 +1,364 @@
+; **********
+; *
+; * File Name: omxVCM4P2_DecodePadMV_PVOP_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; **
+; * Function: omxVCM4P2_DecodePadMV_PVOP
+; *
+; * Description:
+; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP.
+; * The motion vector padding process is specified in subclause 7.6.1.6 of
+; * ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bit stream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within
+; * [0-7].
+; * [in] pSrcMVLeftMB pointers to the motion vector buffers of the
+; * macroblocks specially at the left side of the current macroblock
+; * respectively.
+; * [in] pSrcMVUpperMB pointers to the motion vector buffers of the
+; * macroblocks specially at the upper side of the current macroblock
+; * respectively.
+; * [in] pSrcMVUpperRightMB pointers to the motion vector buffers of the
+; * macroblocks specially at the upper-right side of the current macroblock
+; * respectively.
+; * [in] fcodeForward a code equal to vop_fcode_forward in MPEG-4
+; * bit stream syntax
+; * [in] MBType the type of the current macroblock. If MBType
+; * is not equal to OMX_VC_INTER4V, the destination
+; * motion vector buffer is still filled with the
+; * same decoded vector.
+; * [out] ppBitStream *ppBitStream is updated after the block is decoded,
+; * so that it points to the current byte in the bit
+; * stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDstMVCurMB pointer to the motion vector buffer of the current
+; * macroblock which contains four decoded motion vectors
+; *
+; * Return Value:
+; * OMX_Sts_NoErr -no error
+; *
+; *
+; * OMX_Sts_Err - status error
+; *
+; *
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+ INCLUDE omxVC_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pSrcMVLeftMB RN 2
+pSrcMVUpperMB RN 3
+pSrcMVUpperRightMB RN 4
+pDstMVCurMB RN 5
+fcodeForward RN 6
+MBType RN 7
+
+;//Local Variables
+
+zero RN 4
+one RN 4
+scaleFactor RN 1
+
+
+Return RN 0
+
+VlcMVD RN 0
+index RN 4
+Count RN 7
+
+mvHorData RN 4
+mvHorResidual RN 0
+
+mvVerData RN 4
+mvVerResidual RN 0
+
+temp RN 1
+
+temp1 RN 3
+High RN 4
+Low RN 2
+Range RN 1
+
+BlkCount RN 14
+
+diffMVdx RN 0
+diffMVdy RN 1
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitCount RN 9
+RBitBuffer RN 10
+
+T1 RN 11
+T2 RN 12
+LR RN 14
+
+ IMPORT armVCM4P2_aVlcMVD
+ IMPORT omxVCM4P2_FindMVpred
+
+ ;// Allocate stack memory
+
+ M_ALLOC4 ppDstMVCurMB,4
+ M_ALLOC4 pDstMVPredME,4
+ M_ALLOC4 pBlkCount,4
+
+ M_ALLOC4 pppBitStream,4
+ M_ALLOC4 ppBitOffset,4
+ M_ALLOC4 ppSrcMVLeftMB,4
+ M_ALLOC4 ppSrcMVUpperMB,4
+
+ M_ALLOC4 pdiffMVdx,4
+ M_ALLOC4 pdiffMVdy,4
+ M_ALLOC4 pHigh,4
+
+
+
+
+ M_START omxVCM4P2_DecodePadMV_PVOP,r11
+
+ M_ARG pSrcMVUpperRightMBonStack,4 ;// pointer to pSrcMVUpperRightMB on stack
+ M_ARG pDstMVCurMBonStack,4 ;// pointer to pDstMVCurMB on stack
+ M_ARG fcodeForwardonStack,4 ;// pointer to fcodeForward on stack
+ M_ARG MBTypeonStack,4 ;// pointer to MBType on stack
+
+
+
+
+
+ ;// Initializing the BitStream Macro
+
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ M_LDR MBType,MBTypeonStack ;// Load MBType from stack
+ M_LDR pDstMVCurMB,pDstMVCurMBonStack ;// Load pDstMVCurMB from stack
+ MOV zero,#0
+
+ TEQ MBType,#OMX_VC_INTRA ;// Check if MBType=OMX_VC_INTRA
+ TEQNE MBType,#OMX_VC_INTRA_Q ;// check if MBType=OMX_VC_INTRA_Q
+ STREQ zero,[pDstMVCurMB]
+ M_BD_INIT1 T1, T2, T2
+ STREQ zero,[pDstMVCurMB,#4]
+ M_BD_INIT2 T1, T2, T2
+ STREQ zero,[pDstMVCurMB,#4]
+ MOVEQ Return,#OMX_Sts_NoErr
+ MOV BlkCount,#0
+ STREQ zero,[pDstMVCurMB,#4]
+
+ BEQ ExitOK
+
+ TEQ MBType,#OMX_VC_INTER4V ;// Check if MBType=OMX_VC_INTER4V
+ TEQNE MBType,#OMX_VC_INTER4V_Q ;// Check if MBType=OMX_VC_INTER4V_Q
+ MOVEQ Count,#4
+
+ TEQ MBType,#OMX_VC_INTER ;// Check if MBType=OMX_VC_INTER
+ TEQNE MBType,#OMX_VC_INTER_Q ;// Check if MBType=OMX_VC_INTER_Q
+ MOVEQ Count,#1
+
+ M_LDR fcodeForward,fcodeForwardonStack ;// Load fcodeForward from stack
+
+ ;// Storing the values temporarily on stack
+
+ M_STR ppBitStream,pppBitStream
+ M_STR pBitOffset,ppBitOffset
+
+
+ SUB temp,fcodeForward,#1 ;// temp=fcodeForward-1
+ MOV one,#1
+ M_STR pSrcMVLeftMB,ppSrcMVLeftMB
+ LSL scaleFactor,one,temp ;// scaleFactor=1<<(fcodeForward-1)
+ M_STR pSrcMVUpperMB,ppSrcMVUpperMB
+ LSL scaleFactor,scaleFactor,#5
+ M_STR scaleFactor,pHigh ;// [pHigh]=32*scaleFactor
+
+ ;// VLD Decoding
+
+
+Loop
+
+ LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Load the optimized MVD VLC table
+
+ ;// Horizontal Data and Residual calculation
+
+ LDR temp,=0xFFF
+ M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// variable lenght decoding using the macro
+
+ TEQ index,temp
+ BEQ ExitError ;// Exit with an Error Message if the decoded symbol is an invalied symbol
+
+ SUB mvHorData,index,#32 ;// mvHorData=index-32
+ MOV mvHorResidual,#1 ;// mvHorResidual=1
+ CMP fcodeForward,#1
+ TEQNE mvHorData,#0
+ MOVEQ diffMVdx,mvHorData ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData
+ BEQ VerticalData
+
+ SUB temp,fcodeForward,#1
+ M_BD_VREAD8 mvHorResidual,temp,T1,T2 ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0
+
+ CMP mvHorData,#0
+ RSBLT mvHorData,mvHorData,#0 ;// mvHorData=abs(mvHorData)
+ SUB mvHorResidual,mvHorResidual,fcodeForward
+ SMLABB diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward
+ ADD diffMVdx,diffMVdx,#1
+ RSBLT diffMVdx,diffMVdx,#0
+
+ ;// Vertical Data and Residual calculation
+
+VerticalData
+
+ M_STR diffMVdx,pdiffMVdx ;// Store the diffMVdx on stack
+ LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Loading the address of optimized VLC tables
+
+ LDR temp,=0xFFF
+ M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// VLC decoding using the macro
+
+ TEQ index,temp
+ BEQ ExitError ;// Exit with an Error Message if an Invalied Symbol occurs
+
+ SUB mvVerData,index,#32 ;// mvVerData=index-32
+ MOV mvVerResidual,#1
+ CMP fcodeForward,#1
+ TEQNE mvVerData,#0
+ MOVEQ diffMVdy,mvVerData ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0
+ BEQ FindMVPred
+
+ SUB temp,fcodeForward,#1
+ M_BD_VREAD8 mvVerResidual,temp,T1,T2 ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0
+
+
+ CMP mvVerData,#0
+ RSBLT mvVerData,mvVerData,#0
+ SUB mvVerResidual,mvVerResidual,fcodeForward
+ SMLABB diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward
+ ADD diffMVdy,diffMVdy,#1
+ RSBLT diffMVdy,diffMVdy,#0
+
+ ;//Calling the Function omxVCM4P2_FindMVpred
+
+FindMVPred
+
+ M_STR diffMVdy,pdiffMVdy
+ ADD temp,pDstMVCurMB,BlkCount,LSL #2 ;// temp=pDstMVCurMB[BlkCount]
+ M_STR temp,ppDstMVCurMB ;// store temp on stack for passing as an argument to FindMVPred
+
+ MOV temp,#0
+ M_STR temp,pDstMVPredME ;// Pass pDstMVPredME=NULL as an argument
+ M_STR BlkCount,pBlkCount ;// Passs BlkCount as Argument through stack
+
+ MOV temp,pSrcMVLeftMB ;// temp (RN 1)=pSrcMVLeftMB
+ M_LDR pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack
+ MOV pSrcMVLeftMB,pSrcMVUpperMB ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB
+ MOV ppBitStream,pDstMVCurMB ;// ppBitStream ( RN 0) = pDstMVCurMB
+ MOV pSrcMVUpperMB,pSrcMVUpperRightMB ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB
+ BL omxVCM4P2_FindMVpred ;// Branch to subroutine omxVCM4P2_FindMVpred
+
+ ;// Store Horizontal Motion Vector
+
+ M_LDR BlkCount,pBlkCount ;// Load BlkCount from stack
+ M_LDR High,pHigh ;// High=32*scaleFactor
+ LSL temp1,BlkCount,#2 ;// temp=BlkCount*4
+ M_LDR diffMVdx,pdiffMVdx ;// Laad diffMVdx
+
+ LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount]
+
+
+ RSB Low,High,#0 ;// Low = -32*scaleFactor
+ ADD diffMVdx,temp,diffMVdx ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx
+ ADD Range,High,High ;// Range=64*ScaleFactor
+ SUB High,High,#1 ;// High= 32*scaleFactor-1
+
+ CMP diffMVdx,Low ;// If diffMVdx<Low
+ ADDLT diffMVdx,diffMVdx,Range ;// diffMVdx+=Range
+
+ CMP diffMVdx,High
+ SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdx > High diffMVdx-=Range
+ STRH diffMVdx,[pDstMVCurMB,temp1]
+
+ ;// Store Vertical
+
+ ADD temp1,temp1,#2 ;// temp1=4*BlkCount+2
+ M_LDR diffMVdx,pdiffMVdy ;// Laad diffMVdy
+ LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount].diffMVdy
+ ADD BlkCount,BlkCount,#1 ;// BlkCount=BlkCount+1
+ ADD diffMVdx,temp,diffMVdx
+ CMP diffMVdx,Low
+ ADDLT diffMVdx,diffMVdx,Range ;// If diffMVdy<Low diffMVdy+=Range
+ CMP diffMVdx,High
+ SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdy > High diffMVdy-=Range
+ STRH diffMVdx,[pDstMVCurMB,temp1]
+
+ CMP BlkCount,Count
+ M_LDR pSrcMVLeftMB,ppSrcMVLeftMB
+ M_LDR pSrcMVUpperMB,ppSrcMVUpperMB
+
+ BLT Loop ;// If BlkCount<Count Continue the Loop
+
+
+ ;// If MBType=OMX_VC_INTER or MBtype=OMX_VC_INTER_Q copy pDstMVCurMB[0] to
+ ;// pDstMVCurMB[1], pDstMVCurMB[2], pDstMVCurMB[3]
+
+ M_LDR MBType,MBTypeonStack
+
+ TEQ MBType,#OMX_VC_INTER
+ TEQNE MBType,#OMX_VC_INTER_Q
+ LDREQ temp,[pDstMVCurMB]
+ M_LDR ppBitStream,pppBitStream
+ STREQ temp,[pDstMVCurMB,#4]
+
+ STREQ temp,[pDstMVCurMB,#8]
+ STREQ temp,[pDstMVCurMB,#12]
+
+
+ M_LDR pBitOffset,ppBitOffset
+ ;//Ending the macro
+ M_BD_FINI ppBitStream,pBitOffset ;// Finishing the Macro
+
+
+ MOV Return,#OMX_Sts_NoErr
+ B ExitOK
+
+ExitError
+
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppBitOffset
+ ;//Ending the macro
+ M_BD_FINI ppBitStream,pBitOffset
+
+ MOV Return,#OMX_Sts_Err
+
+ExitOK
+
+ M_END
+ ENDIF
+ END
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
new file mode 100644
index 0000000..c43b253
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
@@ -0,0 +1,132 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one inter coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_InterVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_InterL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_InterL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+shortVideoHeader RN 3
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+
+
+
+ ;// Allocate stack memory to store the VLC,Zigzag,LMAX and RMAX tables
+
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_Inter,r12
+
+
+
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load zigzag table
+ M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack to pass as argument to unsafe function
+ LDR pVlcTableL0L1, =armVCM4P2_InterVlcL0L1 ;// Load optimized VLC table with both L=0 and L=1 entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store optimized VLC table address on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_InterL0L1LMAX ;// Load Interleaved L=0 and L=1 LMAX Tables
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table address on stack
+ LDR pRMAXTableL0L1, =armVCM4P2_InterL0L1RMAX ;// Load Interleaved L=0 and L=1 RMAX Tables
+ MOV Count,#0 ;// set start=0
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// store RMAX table address on stack
+
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// call Unsafe Function for VLC Zigzag Decoding
+
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
new file mode 100644
index 0000000..166729e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
@@ -0,0 +1,136 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+PredDir RN 3
+shortVideoHeader RN 3
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+
+
+
+ ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_IntraACVLC,r12
+
+ M_ARG shortVideoHeaderonStack,4 ;// pointer to Input Argument on stack
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Address of the Zigzag table
+ ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Loading Different type of zigzag tables based on PredDir
+
+ M_STR pZigzagTable,ppZigzagTable ;// Store Zigzag table address on stack
+ LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load optimized packed VLC Table with both L=0 and L=1 entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store VLC Table address on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX Table address on Stack
+ LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table
+ MOV Count,#0 ;// Set Start=0
+
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table address on stack
+
+
+
+ M_LDR shortVideoHeader,shortVideoHeaderonStack ;// get the Input Argument from stack
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call Unsafe Function
+
+
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
new file mode 100644
index 0000000..d19cb13
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
@@ -0,0 +1,224 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS :LOR: CortexA8
+
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains CLassical, Horizontal, Vertical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_aIntraDCLumaChromaIndex ;// Contains Optimized DCLuma and DCChroma Index table Entries
+
+
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+PredDir RN 3
+shortVideoHeader RN 3
+videoComp RN 5
+;//Local Variables
+
+Return RN 0
+
+pDCLumaChromaIndex RN 4
+pDCChromaIndex RN 7
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+DCValueSize RN 6
+powOfSize RN 7
+temp1 RN 5
+
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitBuffer RN 9
+RBitCount RN 10
+
+T1 RN 11
+T2 RN 12
+DCVal RN 14
+
+
+ ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+ M_ALLOC4 pDCCoeff,4
+
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_IntraDCVLC,r12
+
+ M_ARG shortVideoHeaderonStack,4 ;// Pointer to argument on stack
+ M_ARG videoComponstack,4 ;// Pointer to argument on stack
+
+
+ ;// Decode DC Coefficient
+
+
+ LDR pDCLumaChromaIndex, =armVCM4P2_aIntraDCLumaChromaIndex ;// Load Optimized VLC Table for Luminance and Chrominance
+
+ ;// Initializing the Bitstream Macro
+
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ M_LDR videoComp,videoComponstack
+ M_BD_INIT1 T1, T2, T2
+ ADD pDCLumaChromaIndex,pDCLumaChromaIndex,videoComp, LSL #6
+ M_BD_INIT2 T1, T2, T2
+
+
+ M_BD_VLD DCValueSize,T1,T2,pDCLumaChromaIndex,4,2 ;// VLC Decode using optimized Luminance and Chrominance VLC Table
+
+
+
+
+DecodeDC
+
+ CMP DCValueSize,#12
+ BGT ExitError
+
+ CMP DCValueSize,#0
+ MOVEQ DCVal,#0 ;// If DCValueSize is zero then DC coeff =0
+ BEQ ACDecode ;// Branch to perform AC Coeff Decoding
+
+ M_BD_VREAD16 DCVal,DCValueSize,T1,T2 ;// Get DC Value From Bit stream
+
+
+ MOV powOfSize,#1
+ LSL powOfSize,DCValueSize ;// powOfSize=pow(2,DCValueSize)
+ CMP DCVal,powOfSize,LSR #1 ;// Compare DCVal with powOfSize/2
+ ADDLT DCVal,DCVal,#1
+ SUBLT DCVal,DCVal,powOfSize ;// If Lessthan powOfSize/2 DCVal=DCVal-powOfSize+1
+ ;// Else DCVal= fetchbits from bit stream
+
+CheckDCValueSize
+
+ CMP DCValueSize,#8 ;// If DCValueSize greater than 8 check marker bit
+
+ BLE ACDecode
+
+ M_BD_READ8 temp1,1,T1
+ TEQ temp1,#0 ;// If Marker bit is zero Exit with an Error Message
+ BEQ ExitError
+
+
+
+ ;// Decode AC Coefficient
+
+ACDecode
+
+ M_STR DCVal,pDCCoeff ;// Store Decoded DC Coeff on Stack
+ M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit stream Macro
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Zigzag talbe address
+ ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Modify the Zigzag table adress based on PredDir
+
+ M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack
+ LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load Optimized VLC Table With both Last=0 and Last=1 Entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store Optimized VLC Table on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table on stack
+ LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table
+ MOV Count,#1 ;// Set Start =1
+
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table on Stack
+
+
+ M_LDR shortVideoHeader,shortVideoHeaderonStack ;// Load the Input Argument From Stack
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call the Unsafe Function
+
+ M_LDR DCVal,pDCCoeff ;// Get the Decoded DC Value From Stack
+ STRH DCVal,[pDst] ;// Store the DC Value
+ B ExitOK
+
+
+
+ExitError
+
+ M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit Stream Macro in case of an Error
+ MOV Return,#OMX_Sts_Err ;// Exit with an Error Message
+ExitOK
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
new file mode 100644
index 0000000..a4bfa71
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
@@ -0,0 +1,194 @@
+;//
+;//
+;// File Name: omxVCM4P2_FindMVpred_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P2_FindMVpred
+;//
+ ;// Include headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armVCCOMM_s.h
+
+ ;// Define cpu variants
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+ M_TABLE armVCM4P2_pBlkIndexTable
+ DCD OMXVCBlk0, OMXVCBlk1
+ DCD OMXVCBlk2, OMXVCBlk3
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+
+pSrcMVCurMB RN 0
+pSrcCandMV1 RN 1
+pSrcCandMV2 RN 2
+pSrcCandMV3 RN 3
+pDstMVPred RN 4
+pDstMVPredME RN 5
+iBlk RN 6
+
+pTable RN 4
+CandMV RN 12
+
+pCandMV1 RN 7
+pCandMV2 RN 8
+pCandMV3 RN 9
+
+CandMV1dx RN 0
+CandMV1dy RN 1
+CandMV2dx RN 2
+CandMV2dy RN 3
+CandMV3dx RN 10
+CandMV3dy RN 11
+
+temp RN 14
+
+zero RN 14
+return RN 0
+
+; ----------------------------------------------
+; Main routine
+; ----------------------------------------------
+
+ M_ALLOC4 MV, 4
+
+ ;// Function header
+ M_START omxVCM4P2_FindMVpred, r11
+
+ ;// Define stack arguments
+ M_ARG ppDstMVPred, 4
+ M_ARG ppDstMVPredME, 4
+ M_ARG Blk, 4
+
+ M_ADR CandMV, MV
+ MOV zero, #0
+ M_LDR iBlk, Blk
+
+ ;// Set the default value for these
+ ;// to be used if pSrcCandMV[1|2|3] == NULL
+ MOV pCandMV1, CandMV
+ MOV pCandMV2, CandMV
+ MOV pCandMV3, CandMV
+
+ STR zero, [CandMV]
+
+ ;// Branch to the case based on blk number
+ M_SWITCH iBlk
+ M_CASE OMXVCBlk0 ;// iBlk=0
+ M_CASE OMXVCBlk1 ;// iBlk=0
+ M_CASE OMXVCBlk2 ;// iBlk=0
+ M_CASE OMXVCBlk3 ;// iBlk=0
+ M_ENDSWITCH
+
+OMXVCBlk0
+ CMP pSrcCandMV1, #0
+ ADDNE pCandMV1, pSrcCandMV1, #4
+
+ CMP pSrcCandMV2, #0
+ ADDNE pCandMV2, pSrcCandMV2, #8
+
+ CMP pSrcCandMV3, #0
+ ADDNE pCandMV3, pSrcCandMV3, #8
+ CMPEQ pSrcCandMV1, #0
+
+ MOVEQ pCandMV3, pCandMV2
+ MOVEQ pCandMV1, pCandMV2
+
+ CMP pSrcCandMV1, #0
+ CMPEQ pSrcCandMV2, #0
+
+ MOVEQ pCandMV1, pCandMV3
+ MOVEQ pCandMV2, pCandMV3
+
+ CMP pSrcCandMV2, #0
+ CMPEQ pSrcCandMV3, #0
+
+ MOVEQ pCandMV2, pCandMV1
+ MOVEQ pCandMV3, pCandMV1
+
+ B BlkEnd
+
+OMXVCBlk1
+ MOV pCandMV1, pSrcMVCurMB
+ CMP pSrcCandMV3, #0
+ ADDNE pCandMV3, pSrcCandMV3, #8
+
+ CMP pSrcCandMV2, #0
+ ADDNE pCandMV2, pSrcCandMV2, #12
+
+ CMPEQ pSrcCandMV3, #0
+
+ MOVEQ pCandMV2, pCandMV1
+ MOVEQ pCandMV3, pCandMV1
+
+ B BlkEnd
+
+OMXVCBlk2
+ CMP pSrcCandMV1, #0
+ MOV pCandMV2, pSrcMVCurMB
+ ADD pCandMV3, pSrcMVCurMB, #4
+ ADDNE pCandMV1, pSrcCandMV1, #12
+ B BlkEnd
+
+OMXVCBlk3
+ ADD pCandMV1, pSrcMVCurMB, #8
+ MOV pCandMV2, pSrcMVCurMB
+ ADD pCandMV3, pSrcMVCurMB, #4
+
+BlkEnd
+
+ ;// Using the transperancy info, zero
+ ;// out the candidate MV if neccesary
+ LDRSH CandMV1dx, [pCandMV1], #2
+ LDRSH CandMV2dx, [pCandMV2], #2
+ LDRSH CandMV3dx, [pCandMV3], #2
+
+ ;// Load argument from the stack
+ M_LDR pDstMVPredME, ppDstMVPredME
+
+ LDRSH CandMV1dy, [pCandMV1]
+ LDRSH CandMV2dy, [pCandMV2]
+ LDRSH CandMV3dy, [pCandMV3]
+
+ CMP pDstMVPredME, #0
+
+ ;// Store the candidate MV's into the pDstMVPredME,
+ ;// these can be used in the fast algorithm if implemented
+
+ STRHNE CandMV1dx, [pDstMVPredME], #2
+ STRHNE CandMV1dy, [pDstMVPredME], #2
+ STRHNE CandMV2dx, [pDstMVPredME], #2
+ STRHNE CandMV2dy, [pDstMVPredME], #2
+ STRHNE CandMV3dx, [pDstMVPredME], #2
+ STRHNE CandMV3dy, [pDstMVPredME]
+
+ ; Find the median of the 3 candidate MV's
+ M_MEDIAN3 CandMV1dx, CandMV2dx, CandMV3dx, temp
+
+ ;// Load argument from the stack
+ M_LDR pDstMVPred, ppDstMVPred
+
+ M_MEDIAN3 CandMV1dy, CandMV2dy, CandMV3dy, temp
+
+ STRH CandMV3dx, [pDstMVPred], #2
+ STRH CandMV3dy, [pDstMVPred]
+
+ MOV return, #OMX_Sts_NoErr
+
+ M_END
+ ENDIF ;// ARM1136JS :LOR: CortexA8
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
new file mode 100644
index 0000000..bfeb540
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
@@ -0,0 +1,73 @@
+;//
+;//
+;// File Name: omxVCM4P2_IDCT8x8blk_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P2_IDCT8x8blk
+;//
+ ;// Include headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ ;// Define cpu variants
+ M_VARIANTS ARM1136JS
+
+ INCLUDE armCOMM_IDCT_s.h
+
+ IMPORT armCOMM_IDCTPreScale
+ ;//
+ ;// Function prototype
+ ;//
+ ;// OMXResult
+ ;// omxVCM4P2_IDCT8x8blk(const OMX_S16* pSrc,
+ ;// OMX_S16* pDst)
+ ;//
+
+ IF ARM1136JS :LOR: CortexA8
+ M_ALLOC4 ppDest, 4
+ M_ALLOC4 pStride, 4
+ M_ALLOC8 pBlk, 2*8*8
+ ENDIF
+
+ IF ARM1136JS
+ M_START omxVCM4P2_IDCT8x8blk, r11
+ ENDIF
+
+
+ IF ARM1136JS :LOR: CortexA8
+
+;// Declare input registers
+pSrc RN 0
+pDst RN 1
+
+;// Declare other intermediate registers
+Result RN 0
+
+;// Prototype for macro M_IDCT
+;// pSrc RN 0 ;// source data buffer
+;// Stride RN 1 ;// destination stride in bytes
+;// pDest RN 2 ;// destination data buffer
+;// pScale RN 3 ;// pointer to scaling table
+
+pSrc RN 0
+Stride RN 1
+pDest RN 2
+pScale RN 3
+
+ MOV pDest, pDst
+ LDR pScale, =armCOMM_IDCTPreScale
+ M_IDCT s9, s16, 16
+ MOV Result, #OMX_Sts_NoErr
+ M_END
+ ENDIF
+ ;// ARM1136JS :LOR: CortexA8
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
new file mode 100644
index 0000000..20965bf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
@@ -0,0 +1,713 @@
+;//
+;//
+;// File Name: omxVCM4P2_MCReconBlock_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;//
+;//
+
+;// Include standard headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Import symbols required from other files
+
+ M_VARIANTS ARM1136JS
+
+;// ***************************************************************************
+;// ARM1136JS implementation
+;// ***************************************************************************
+ IF ARM1136JS
+
+;// ***************************************************************************
+;// MACRO DEFINITIONS
+;// ***************************************************************************
+ ;// Description:
+ ;//
+ ;// dest[j] = (x[j] + y[j] + round) >> 1, j=0..3
+ ;//
+ ;// Similar to UHADD8 instruction, but with a rounding value of 1 added to
+ ;// each sum before dividing by two, if round is 1
+ ;//
+ ;// Syntax:
+ ;// M_UHADD8R $dest, $x, $y, $round, $mask
+ ;//
+ ;// Inputs:
+ ;// $x four packed bytes, x[3] : x[2] : x[1] : x[0]
+ ;// $y four packed bytes, y[3] : y[2] : y[1] : y[0]
+ ;// $round 0 if no rounding to be added, 1 if rounding to be done
+ ;// $mask some register set to 0x80808080
+ ;//
+ ;// Outputs:
+ ;// $dest four packed bytes, z[3] : z[2] : z[1] : z[0]
+
+ MACRO
+ M_UHADD8R $dest, $x, $y, $round, $mask
+ IF $round = 1
+ IF $dest /= $y
+ MVN $dest, $x
+ UHSUB8 $dest, $y, $dest
+ EOR $dest, $dest, $mask
+ ELSE
+ MVN $dest, $y
+ UHSUB8 $dest, $x, $dest
+ EOR $dest, $dest, $mask
+ ENDIF
+ ELSE
+ UHADD8 $dest, $x, $y
+ ENDIF
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Load 8 bytes from $pSrc (aligned or unaligned locations)
+ ;//
+ ;// Syntax:
+ ;// M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset
+ ;//
+ ;// Inputs:
+ ;// $pSrc 4 byte aligned source pointer to an address just less than
+ ;// or equal to the data location
+ ;// $srcStep The stride on source
+ ;// $scratch A scratch register, used internally for temp calculations
+ ;// $offset Difference of source data location to the source pointer
+ ;// Use when $offset != 0 (unaligned load)
+ ;//
+ ;// Outputs:
+ ;// $pSrc In case the macro accepts stride, it increments the pSrc by
+ ;// that value, else unchanged
+ ;// $out0 four packed bytes, z[3] : z[2] : z[1] : z[0]
+ ;// $out1 four packed bytes, z[7] : z[6] : z[5] : z[4]
+ ;//
+ ;// Note: {$out0, $out1, $scratch} should be registers with ascending
+ ;// register numbering. In case offset is 0, $scratch is not modified.
+
+ MACRO
+ M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset
+ IF $offset = 0
+ LDM $pSrc, {$out0, $out1}
+ ADD $pSrc, $pSrc, $srcStep
+ ELSE
+ LDM $pSrc, {$out0, $out1, $scratch}
+ ADD $pSrc, $pSrc, $srcStep
+
+ MOV $out0, $out0, LSR #8 * $offset
+ ORR $out0, $out0, $out1, LSL #(32 - 8 * ($offset))
+ MOV $out1, $out1, LSR #8 * $offset
+ ORR $out1, $out1, $scratch, LSL #(32 - 8 * ($offset))
+ ENDIF
+ MEND
+
+;// ***************************************************************************
+ ;// Description:
+ ;// Loads three words for X interpolation, update pointer to next row. For
+ ;// X interpolation, given a truncated-4byteAligned source pointer,
+ ;// invariably three continous words are required from there to get the
+ ;// nine bytes from the source pointer for filtering.
+ ;//
+ ;// Syntax:
+ ;// M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3
+ ;//
+ ;// Inputs:
+ ;// $pSrc 4 byte aligned source pointer to an address just less than
+ ;// or equal to the data location
+ ;//
+ ;// $srcStep The stride on source
+ ;//
+ ;// $offset Difference of source data location to the source pointer
+ ;// Use when $offset != 0 (unaligned load)
+ ;//
+ ;// Outputs:
+ ;// $pSrc Incremented by $srcStep
+ ;//
+ ;// $word0, $word1, $word2, $word3
+ ;// Three of these are outputs based on the $offset parameter.
+ ;// The outputs are specifically generated to be processed by
+ ;// the M_EXT_XINT macro. Following is the illustration to show
+ ;// how the nine bytes are spanned for different offsets from
+ ;// notTruncatedForAlignmentSourcePointer.
+ ;//
+ ;// ------------------------------------------------------
+ ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 |
+ ;// |------------------------------------------------------|
+ ;// | 0 | 0 | 0123 | 4567 | 8xxx | |
+ ;// | 1 | -1 | x012 | 3456 | 78xx | |
+ ;// | 2 | -2 | xx01 | 2345 | 678x | |
+ ;// | 3 | -3 | xxx0 | | 1234 | 5678 |
+ ;// ------------------------------------------------------
+ ;//
+ ;// where the numbering (0-8) is to designate the 9 bytes from
+ ;// start of a particular row. The illustration doesn't take in
+ ;// account the positioning of bytes with in the word and the
+ ;// macro combination with M_EXT_XINT will work only in little
+ ;// endian environs
+ ;//
+ ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending
+ ;// register numbering
+
+ MACRO
+ M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3
+ IF $offset /= 3
+ LDM $pSrc, {$word0, $word1, $word2}
+ ELSE
+ LDM $pSrc, {$word0, $word2, $word3}
+ ENDIF
+ ADD $pSrc, $pSrc, $srcStep
+ MEND
+
+;// ***************************************************************************
+ ;// Description:
+ ;// Extract four registers of four pixels for X interpolation
+ ;//
+ ;// Syntax:
+ ;// M_EXT_XINT $offset, $word0, $word1, $word2, $word3
+ ;//
+ ;// Inputs:
+ ;// $offset Difference of source data location to the source pointer
+ ;// Use when $offset != 0 (unaligned load)
+ ;//
+ ;// $word0, $word1, $word2, $word3
+ ;// Three of these are inputs based on the $offset parameter.
+ ;// The inputs are specifically selected to be processed by
+ ;// the M_EXT_XINT macro.
+ ;//
+ ;// ------------------------------------------------------
+ ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 |
+ ;// |------------------------------------------------------|
+ ;// | 0 | 0 | 0123 | 4567 | 8xxx | yyyy |
+ ;// | 1 | -1 | x012 | 3456 | 78xx | yyyy |
+ ;// | 2 | -2 | xx01 | 2345 | 678x | yyyy |
+ ;// | 3 | -3 | xxx0 | yyyy | 1234 | 5678 |
+ ;// ------------------------------------------------------
+ ;//
+ ;// Outputs:
+ ;// $word0, $word1, $word2, $word3
+ ;// Bytes from the original source pointer (not truncated for
+ ;// 4 byte alignment) as shown in the table.
+ ;// -------------------------------
+ ;// | word0 | word1 | word2 | word3 |
+ ;// |-------------------------------|
+ ;// | 0123 | 4567 | 1234 | 5678 |
+ ;// -------------------------------
+ ;//
+ ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending
+ ;// register numbering
+
+ MACRO
+ M_EXT_XINT $offset, $word0, $word1, $word2, $word3
+ IF $offset = 0
+ ; $word0 and $word1 are ok
+ ; $word2, $word3 are just 8 shifted versions
+ MOV $word3, $word1, LSR #8
+ ORR $word3, $word3, $word2, LSL #24
+ MOV $word2, $word0, LSR #8
+ ORR $word2, $word2, $word1, LSL #24
+ ELIF $offset = 3
+ ; $word2 and $word3 are ok (taken care while loading itself)
+ ; set $word0 & $word1
+ MOV $word0, $word0, LSR #24
+ ORR $word0, $word0, $word2, LSL #8
+ MOV $word1, $word2, LSR #24
+ ORR $word1, $word1, $word3, LSL #8
+ ELSE
+ MOV $word0, $word0, LSR #8 * $offset
+ ORR $word0, $word0, $word1, LSL #(32 - 8 * ($offset))
+ MOV $word1, $word1, LSR #8 * $offset
+ ORR $word1, $word1, $word2, LSL #(32 - 8 * ($offset))
+
+ MOV $word3, $word1, LSR #8
+ ORR $word3, $word3, $word2, LSL #(32 - 8 * (($offset)+1))
+ MOV $word2, $word0, LSR #8
+ ORR $word2, $word2, $word1, LSL #24
+ ENDIF
+ MEND
+
+;// ***************************************************************************
+ ;// Description:
+ ;// Computes half-sum and xor of two inputs and puts them in the input
+ ;// registers in that order
+ ;//
+ ;// Syntax:
+ ;// M_HSUM_XOR $v0, $v1, $tmp
+ ;//
+ ;// Inputs:
+ ;// $v0 a, first input
+ ;// $v1 b, second input
+ ;// $tmp scratch register
+ ;//
+ ;// Outputs:
+ ;// $v0 (a + b)/2
+ ;// $v1 a ^ b
+
+ MACRO
+ M_HSUM_XOR $v0, $v1, $tmp
+ UHADD8 $tmp, $v0, $v1 ;// s0 = a + b
+ EOR $v1, $v0, $v1 ;// l0 = a ^ b
+ MOV $v0, $tmp ;// s0
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Calculates average of 4 values (a,b,c,d) for HalfPixelXY predict type in
+ ;// mcReconBlock module. Very specific to the implementation of
+ ;// M_MCRECONBLOCK_HalfPixelXY done here. Uses "tmp" as scratch register and
+ ;// "yMask" for mask variable "0x1010101x" set in it. In yMask 4 lsbs are
+ ;// not significant and are used by the callee for row counter (y)
+ ;//
+ ;// Some points to note are:
+ ;// 1. Input is pair of pair-averages and Xors
+ ;// 2. $sum1 and $lsb1 are not modified and hence can be reused in another
+ ;// running average
+ ;// 3. Output is in the first argument
+ ;//
+ ;// Syntax:
+ ;// M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal
+ ;//
+ ;// Inputs:
+ ;// $sum0 (a + b) >> 1, where a and b are 1st and 2nd inputs to be averaged
+ ;// $lsb0 (a ^ b)
+ ;// $sum1 (c + d) >> 1. Not modified
+ ;// $lsb1 (c ^ d) Not modified
+ ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding
+ ;//
+ ;// Outputs:
+ ;// $sum0 (a + b + c + d + 1) / 4 : If no rounding
+ ;// (a + b + c + d + 2) / 4 : If rounding
+
+ MACRO
+ M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal
+ LCLS OP1
+ LCLS OP2
+ IF $rndVal = 0 ;// rounding case
+OP1 SETS "AND"
+OP2 SETS "ORR"
+ ELSE ;// Not rounding case
+OP1 SETS "ORR"
+OP2 SETS "AND"
+ ENDIF
+
+ LCLS lsb2
+ LCLS sum2
+ LCLS dest
+
+lsb2 SETS "tmp"
+sum2 SETS "$lsb0"
+dest SETS "$sum0"
+
+ $OP1 $lsb0, $lsb0, $lsb1 ;// e0 = e0 & e1
+ EOR $lsb2, $sum0, $sum1 ;// e2 = s0 ^ s1
+ $OP2 $lsb2, $lsb2, $lsb0 ;// e2 = e2 | e0
+ AND $lsb2, $lsb2, yMask, LSR # 4 ;// e2 = e2 & mask
+ UHADD8 $sum2, $sum0, $sum1 ;// s2 = (s0 + s1)/2
+ UADD8 $dest, $sum2, $lsb2 ;// dest = s2 + e2
+ MEND
+;// ***************************************************************************
+;// Motion compensation handler macros
+;// ***************************************************************************
+ ;// Description:
+ ;// Implement motion compensation routines using the named registers in
+ ;// callee function. Each of the following 4 implement the 4 predict type
+ ;// Each handles 8 cases each ie all the combinations of 4 types of source
+ ;// alignment offsets and 2 types of rounding flag
+ ;//
+ ;// Syntax:
+ ;// M_MCRECONBLOCK_IntegerPixel $rndVal, $offset
+ ;// M_MCRECONBLOCK_HalfPixelX $rndVal, $offset
+ ;// M_MCRECONBLOCK_HalfPixelY $rndVal, $offset
+ ;// M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset
+ ;//
+ ;// Inputs:
+ ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding
+ ;// $offset $pSrc MOD 4 value. Offset from 4 byte aligned location.
+ ;//
+ ;// Outputs:
+ ;// Outputs come in the named registers of the callee functions
+ ;// The macro loads the data from the source pointer, processes it and
+ ;// stores in the destination pointer. Does the whole prediction cycle
+ ;// of Motion Compensation routine for a particular predictType
+ ;// After this only residue addition to the predicted values remain
+
+ MACRO
+ M_MCRECONBLOCK_IntegerPixel $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for IntegerPixel predictType. Both
+ ;// rounding cases are handled by the same code base. It is just a copy
+ ;// from source to destination. Two lines are done per loop to reduce
+ ;// stalls. Loop has been software pipelined as well for that purpose.
+ ;//
+ ;// M_LOAD_X loads a whole row in two registers and then they are stored
+
+CaseIntegerPixelRnd0Offset$offset
+CaseIntegerPixelRnd1Offset$offset
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset
+ M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+YloopIntegerPixelOffset$offset
+ SUBS y, y, #2
+ STRD tmp1, tmp2, [pDst], dstStep
+ STRD tmp3, tmp4, [pDst], dstStep
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset
+ M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+ BGT YloopIntegerPixelOffset$offset
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ MACRO
+ M_MCRECONBLOCK_HalfPixelX $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for HalfPixelX predictType. The two
+ ;// rounding cases are handled by the different code base and spanned by
+ ;// different macro calls. Loop has been software pipelined to reduce
+ ;// stalls.
+ ;//
+ ;// Filtering involves averaging a pixel with the next horizontal pixel.
+ ;// M_LOAD_XINT and M_EXT_XINT combination generate 4 registers, 2 with
+ ;// all pixels in a row with 4 pixel in each register and another 2
+ ;// registers with pixels corresponding to one horizontally shifted pixel
+ ;// corresponding to the initial row pixels. These are set of packed
+ ;// registers appropriate to do 4 lane SIMD.
+ ;// After that M_UHADD8R macro does the averaging taking care of the
+ ;// rounding as required
+
+CaseHalfPixelXRnd$rndVal.Offset$offset
+ IF $rndVal = 0
+ LDR mask, =0x80808080
+ ENDIF
+
+ M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4
+YloopHalfPixelXRnd$rndVal.Offset$offset
+ SUBS y, y, #1
+ M_EXT_XINT $offset, tmp1, tmp2, tmp3, tmp4
+ M_UHADD8R tmp5, tmp1, tmp3, (1-$rndVal), mask
+ M_UHADD8R tmp6, tmp2, tmp4, (1-$rndVal), mask
+ STRD tmp5, tmp6, [pDst], dstStep
+ M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4
+ BGT YloopHalfPixelXRnd$rndVal.Offset$offset
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ MACRO
+ M_MCRECONBLOCK_HalfPixelY $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for HalfPixelY predictType. The two
+ ;// rounding cases are handled by the different code base and spanned by
+ ;// different macro calls. PreLoading is used to avoid reload of same data.
+ ;//
+ ;// Filtering involves averaging a pixel with the next vertical pixel.
+ ;// M_LOAD_X generates 2 registers with all pixels in a row with 4 pixel in
+ ;// each register. These are set of packed registers appropriate to do
+ ;// 4 lane SIMD. After that M_UHADD8R macro does the averaging taking care
+ ;// of the rounding as required
+
+CaseHalfPixelYRnd$rndVal.Offset$offset
+ IF $rndVal = 0
+ LDR mask, =0x80808080
+ ENDIF
+
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset ;// Pre-load
+YloopHalfPixelYRnd$rndVal.Offset$offset
+ SUBS y, y, #2
+ ;// Processing one line
+ M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+ M_UHADD8R tmp1, tmp1, tmp3, (1-$rndVal), mask
+ M_UHADD8R tmp2, tmp2, tmp4, (1-$rndVal), mask
+ STRD tmp1, tmp2, [pDst], dstStep
+ ;// Processing another line
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset
+ M_UHADD8R tmp3, tmp3, tmp1, (1-$rndVal), mask
+ M_UHADD8R tmp4, tmp4, tmp2, (1-$rndVal), mask
+ STRD tmp3, tmp4, [pDst], dstStep
+
+ BGT YloopHalfPixelYRnd$rndVal.Offset$offset
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ MACRO
+ M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for HalfPixelXY predictType. The two
+ ;// rounding cases are handled by the different code base and spanned by
+ ;// different macro calls. PreLoading is used to avoid reload of same data.
+ ;//
+ ;// Filtering involves averaging a pixel with the next vertical, horizontal
+ ;// and right-down diagonal pixels. Just as in HalfPixelX case, M_LOAD_XINT
+ ;// and M_EXT_XINT combination generates 4 registers with a row and its
+ ;// 1 pixel right shifted version, with 4 pixels in one register. Another
+ ;// call of that macro-combination gets another row. Then M_HSUM_XOR is
+ ;// called to get mutual half-sum and xor combinations of a row with its
+ ;// shifted version as they are inputs to the M_AVG4 macro which computes
+ ;// the 4 element average with rounding. Note that it is the half-sum/xor
+ ;// values that are preserved for next row as they can be re-used in the
+ ;// next call to the M_AVG4 and saves recomputation.
+ ;// Due to lack of register, the row counter and a masking value required
+ ;// in M_AVG4 are packed into a single register yMask where the last nibble
+ ;// holds the row counter values and rest holds the masking variable left
+ ;// shifted by 4
+
+CaseHalfPixelXYRnd$rndVal.Offset$offset
+ LDR yMask, =((0x01010101 << 4) + 8)
+
+ M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b'
+ M_EXT_XINT $offset, t00, t01, t10, t11
+ M_HSUM_XOR t00, t10, tmp ;// s0, l0
+ M_HSUM_XOR t01, t11, tmp ;// s0', l0'
+
+YloopHalfPixelXYRnd$rndVal.Offset$offset
+ ;// Processsing one line
+ ;// t00, t01, t10, t11 required from previous loop
+ M_LOAD_XINT pSrc, srcStep, $offset, t20, t21, t30, t31 ;// Load c, c', d, d'
+ SUB yMask, yMask, #2
+ M_EXT_XINT $offset, t20, t21, t30, t31
+ M_HSUM_XOR t20, t30, tmp ;// s1, l1
+ M_HSUM_XOR t21, t31, tmp ;// s1', l1'
+ M_AVG4 t00, t10, t20, t30, $rndVal ;// s0, l0, s1, l1
+ M_AVG4 t01, t11, t21, t31, $rndVal ;// s0', l0', s1', l1'
+ STRD t00, t01, [pDst], dstStep ;// store the average
+
+ ;// Processsing another line
+ ;// t20, t21, t30, t31 required from above
+ M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b'
+ TST yMask, #7
+ M_EXT_XINT $offset, t00, t01, t10, t11
+ M_HSUM_XOR t00, t10, tmp
+ M_HSUM_XOR t01, t11, tmp
+ M_AVG4 t20, t30, t00, t10, $rndVal
+ M_AVG4 t21, t31, t01, t11, $rndVal
+ STRD t20, t21, [pDst], dstStep
+
+ BGT YloopHalfPixelXYRnd$rndVal.Offset$offset
+
+ IF $offset/=3 :LOR: $rndVal/=1
+ B SwitchPredictTypeEnd
+ ENDIF
+ MEND
+;// ***************************************************************************
+;// Motion compensation handler macros end here
+;// ***************************************************************************
+ ;// Description:
+ ;// Populates all 4 kinds of offsets "cases" for each predictType and rndVal
+ ;// combination in the "switch" to prediction processing code segment
+ ;//
+ ;// Syntax:
+ ;// M_CASE_OFFSET $rnd, $predictType
+ ;//
+ ;// Inputs:
+ ;// $rnd 0 for rounding, 1 for no rounding
+ ;// $predictType The prediction mode
+ ;//
+ ;// Outputs:
+ ;// Populated list of "M_CASE"s for the "M_SWITCH" macro
+
+ MACRO
+ M_CASE_OFFSET $rnd, $predictType
+ M_CASE Case$predictType.Rnd$rnd.Offset0
+ M_CASE Case$predictType.Rnd$rnd.Offset1
+ M_CASE Case$predictType.Rnd$rnd.Offset2
+ M_CASE Case$predictType.Rnd$rnd.Offset3
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Populates all 2 kinds of rounding "cases" for each predictType in the
+ ;// "switch" to prediction processing code segment
+ ;//
+ ;// Syntax:
+ ;// M_CASE_OFFSET $predictType
+ ;//
+ ;// Inputs:
+ ;// $predictType The prediction mode
+ ;//
+ ;// Outputs:
+ ;// Populated list of "M_CASE_OFFSET" macros
+
+ MACRO
+ M_CASE_MCRECONBLOCK $predictType
+ M_CASE_OFFSET 0, $predictType ;// 0 for rounding
+ M_CASE_OFFSET 1, $predictType ;// 1 for no rounding
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Populates all 8 kinds of rounding and offset combinations handling macros
+ ;// for the specified predictType. In case of "IntegerPixel" predictType,
+ ;// rounding is not required so same code segment handles both cases
+ ;//
+ ;// Syntax:
+ ;// M_MCRECONBLOCK $predictType
+ ;//
+ ;// Inputs:
+ ;// $predictType The prediction mode
+ ;//
+ ;// Outputs:
+ ;// Populated list of "M_MCRECONBLOCK_<predictType>" macros for specified
+ ;// predictType. Each
+ ;// M_MCRECONBLOCK_<predictType> $rnd, $offset
+ ;// is an code segment (starting with a label indicating the predictType,
+ ;// rounding and offset combination)
+ ;// Four calls of this macro with the 4 prediction modes populate all the 32
+ ;// handlers
+
+ MACRO
+ M_MCRECONBLOCK $predictType
+ M_MCRECONBLOCK_$predictType 0, 0
+ M_MCRECONBLOCK_$predictType 0, 1
+ M_MCRECONBLOCK_$predictType 0, 2
+ M_MCRECONBLOCK_$predictType 0, 3
+ IF "$predictType" /= "IntegerPixel" ;// If not IntegerPixel then rounding makes a difference
+ M_MCRECONBLOCK_$predictType 1, 0
+ M_MCRECONBLOCK_$predictType 1, 1
+ M_MCRECONBLOCK_$predictType 1, 2
+ M_MCRECONBLOCK_$predictType 1, 3
+ ENDIF
+ MEND
+;// ***************************************************************************
+;// Input/Output Registers
+pSrc RN 0
+srcStep RN 1
+arg_pSrcResidue RN 2
+pSrcResidue RN 12
+pDst RN 3
+dstStep RN 2
+predictType RN 10
+rndVal RN 11
+mask RN 11
+
+;// Local Scratch Registers
+zero RN 12
+y RN 14
+
+tmp1 RN 4
+tmp2 RN 5
+tmp3 RN 6
+tmp4 RN 7
+tmp5 RN 8
+tmp6 RN 9
+tmp7 RN 10
+tmp8 RN 11
+tmp9 RN 12
+
+t00 RN 4
+t01 RN 5
+t10 RN 6
+t11 RN 7
+t20 RN 8
+t21 RN 9
+t30 RN 10
+t31 RN 11
+tmp RN 12
+
+yMask RN 14
+
+dst RN 1
+return RN 0
+
+ ;// Allocate memory on stack
+ M_ALLOC4 Stk_pDst, 4
+ M_ALLOC4 Stk_pSrcResidue, 4
+ ;// Function header
+ M_START omxVCM4P2_MCReconBlock, r11
+ ;// Define stack arguments
+ M_ARG Arg_dstStep, 4
+ M_ARG Arg_predictType, 4
+ M_ARG Arg_rndVal, 4
+ ;// Save on stack
+ M_STR pDst, Stk_pDst
+ M_STR arg_pSrcResidue, Stk_pSrcResidue
+ ;// Load argument from the stack
+ M_LDR dstStep, Arg_dstStep
+ M_LDR predictType, Arg_predictType
+ M_LDR rndVal, Arg_rndVal
+
+ MOV y, #8
+
+ AND tmp1, pSrc, #3
+ ORR predictType, tmp1, predictType, LSL #3
+ ORR predictType, predictType, rndVal, LSL #2
+ ;// Truncating source pointer to align to 4 byte location
+ BIC pSrc, pSrc, #3
+
+ ;// Implementation takes care of all combinations of different
+ ;// predictTypes, rounding cases and source pointer offsets to alignment
+ ;// of 4 bytes in different code bases unless one of these parameter wasn't
+ ;// making any difference to the implementation. Below M_CASE_MCRECONBLOCK
+ ;// macros branch into 8 M_CASE macros for all combinations of the 2
+ ;// rounding cases and 4 offsets of the pSrc pointer to the 4 byte
+ ;// alignment.
+ M_SWITCH predictType
+ M_CASE_MCRECONBLOCK IntegerPixel
+ M_CASE_MCRECONBLOCK HalfPixelX
+ M_CASE_MCRECONBLOCK HalfPixelY
+ M_CASE_MCRECONBLOCK HalfPixelXY
+ M_ENDSWITCH
+
+ ;// The M_MCRECONBLOCK macros populate the code bases by calling all 8
+ ;// particular macros (4 in case of IntegerPixel as rounding makes no
+ ;// difference there) to generate the code for all cases of rounding and
+ ;// offsets. LTORG is used to segment the code as code size bloated beyond
+ ;// 4KB.
+ M_MCRECONBLOCK IntegerPixel
+ M_MCRECONBLOCK HalfPixelX
+ LTORG
+ M_MCRECONBLOCK HalfPixelY
+ M_MCRECONBLOCK HalfPixelXY
+SwitchPredictTypeEnd
+
+ ;// Residue Addition
+ ;// This is done in 2 lane SIMD though loads are further optimized and
+ ;// 4 bytes are loaded in case of destination buffer. Algorithmic
+ ;// details are in inlined comments
+ M_LDR pSrcResidue, Stk_pSrcResidue
+ CMP pSrcResidue, #0
+ BEQ pSrcResidueConditionEnd
+pSrcResidueNotNull
+ M_LDR pDst, Stk_pDst
+ MOV y, #8
+ SUB dstStep, dstStep, #4
+Yloop_pSrcResidueNotNull
+ SUBS y, y, #1
+ LDR dst, [pDst] ;// dst = [dcba]
+ LDMIA pSrcResidue!, {tmp1, tmp2} ;// tmp1=[DC] tmp2=[BA]
+ PKHBT tmp3, tmp1, tmp2, LSL #16 ;// Deltaval1 = [C A]
+ PKHTB tmp4, tmp2, tmp1, ASR #16 ;// DeltaVal2 = [D B]
+ UXTB16 tmp1, dst ;// tmp1 = [0c0a]
+ UXTB16 tmp2, dst, ROR #8 ;// tmp2 = [0d0b]
+ QADD16 tmp1, tmp1, tmp3 ;// Add and saturate to 16 bits
+ QADD16 tmp2, tmp2, tmp4
+ USAT16 tmp1, #8, tmp1
+ USAT16 tmp2, #8, tmp2 ;// armClip(0, 255, tmp2)
+ ORR tmp1, tmp1, tmp2, LSL #8 ;// tmp1 = [dcba]
+ STR tmp1, [pDst], #4
+
+ LDR dst, [pDst]
+ LDMIA pSrcResidue!, {tmp1, tmp2}
+ PKHBT tmp3, tmp1, tmp2, LSL #16
+ PKHTB tmp4, tmp2, tmp1, ASR #16
+ UXTB16 tmp1, dst
+ UXTB16 tmp2, dst, ROR #8
+ QADD16 tmp1, tmp1, tmp3
+ QADD16 tmp2, tmp2, tmp4
+ USAT16 tmp1, #8, tmp1
+ USAT16 tmp2, #8, tmp2
+ ORR tmp1, tmp1, tmp2, LSL #8
+ STR tmp1, [pDst], dstStep
+
+ BGT Yloop_pSrcResidueNotNull
+pSrcResidueConditionEnd
+
+ MOV return, #OMX_Sts_NoErr
+
+ M_END
+ ENDIF ;// ARM1136JS
+
+;// ***************************************************************************
+;// CortexA8 implementation
+;// ***************************************************************************
+ END
+;// ***************************************************************************
+;// omxVCM4P2_MCReconBlock ends
+;// ***************************************************************************
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
new file mode 100644
index 0000000..213444a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
@@ -0,0 +1,283 @@
+; **********
+; *
+; * File Name: omxVCM4P2_PredictReconCoefIntra_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains module for DC/AC coefficient prediction
+; *
+; *
+; * Function: omxVCM4P2_PredictReconCoefIntra
+; *
+; * Description:
+; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+; * to the function call, prediction direction (predDir) should be selected
+; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the coefficient buffer which contains the
+; * quantized coefficient residuals (PQF) of the current
+; * block; must be aligned on a 4-byte boundary. The
+; * output coefficients are saturated to the range
+; * [-2048, 2047].
+; * [in] pPredBufRow pointer to the coefficient row buffer; must be aligned
+; * on a 4-byte boundary.
+; * [in] pPredBufCol pointer to the coefficient column buffer; must be
+; * aligned on a 4-byte boundary.
+; * [in] curQP quantization parameter of the current block. curQP may
+; * equal to predQP especially when the current block and
+; * the predictor block are in the same macroblock.
+; * [in] predQP quantization parameter of the predictor block
+; * [in] predDir indicates the prediction direction which takes one
+; * of the following values:
+; * OMX_VIDEO_HORIZONTAL predict horizontally
+; * OMX_VIDEO_VERTICAL predict vertically
+; * [in] ACPredFlag a flag indicating if AC prediction should be
+; * performed. It is equal to ac_pred_flag in the bit
+; * stream syntax of MPEG-4
+; * [in] videoComp video component type (luminance, chrominance or
+; * alpha) of the current block
+; * [out] pSrcDst pointer to the coefficient buffer which contains
+; * the quantized coefficients (QF) of the current
+; * block
+; * [out] pPredBufRow pointer to the updated coefficient row buffer
+; * [out] pPredBufCol pointer to the updated coefficient column buffer
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - Bad arguments
+; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
+; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31,
+; * predQP > 31, preDir exceeds [1,2].
+; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not
+; * 4-byte aligned.
+; *
+; *********
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IMPORT armVCM4P2_Reciprocal_QP_S32
+ IMPORT armVCM4P2_Reciprocal_QP_S16
+ IMPORT armVCM4P2_DCScaler
+
+
+
+ IF ARM1136JS
+
+
+;// Input Arguments
+
+pSrcDst RN 0
+pPredBufRow RN 1
+pPredBufCol RN 2
+curQP RN 3
+QP RN 3
+predQP RN 4
+predDir RN 5
+ACPredFlag RN 6
+videoComp RN 7
+
+;// Local Variables
+
+temp2 RN 5
+negCurQP RN 7
+negdcScaler RN 7
+tempPred RN 8
+
+dcScaler RN 4
+CoeffTable RN 9
+absCoeffDC RN 9
+temp3 RN 6
+absCoeffAC RN 6
+
+shortVideoHeader RN 9
+predCoeffTable RN 10
+Count RN 10
+temp1 RN 12
+index RN 12
+Rem RN 14
+temp RN 11
+Return RN 0
+
+
+
+ M_START omxVCM4P2_PredictReconCoefIntra,r12
+
+ ;// Assigning pointers to Input arguments on Stack
+
+ M_ARG predQPonStack,4
+ M_ARG predDironStack,4
+ M_ARG ACPredFlagonStack,4
+ M_ARG videoComponStack,4
+
+ ;// DC Prediction
+
+ M_LDR videoComp,videoComponStack ;// Load videoComp From Stack
+
+ M_LDR predDir,predDironStack ;// Load Prediction direction
+
+ ;// dcScaler Calculation
+
+ LDR index, =armVCM4P2_DCScaler
+ ADD index,index,videoComp,LSL #5
+ LDRB dcScaler,[index,QP]
+
+
+calDCVal
+
+
+ LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63)
+
+ CMP predDir,#2 ;// Check if the Prediction direction is vertical
+
+ ;// Caulucate temp pred by performing Division
+
+ LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer
+ LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer
+
+ RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler
+
+ MOV temp1,absCoeffDC ;// temp1=prediction coeff
+ CMP temp1,#0
+ RSBLT absCoeffDC,temp1,#0 ;//absCoeffDC=abs(temp1)
+
+ ADD temp,dcScaler,dcScaler
+ LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication
+
+ SMULBB tempPred,temp,absCoeffDC ;// tempPred=pPredBufRow(Col)[0]*32767/dcScaler
+ ADD temp3,dcScaler,#1
+ LSR tempPred,tempPred,#15 ;// tempPred=pPredBufRow(Col)[0]/dcScaler
+ LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2)
+
+ MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Rem = pPredBufRow(Col)[0]-tempPred*dcScaler
+
+
+ LDRH temp,[pPredBufCol]
+ CMP Rem,temp3
+ ADDGE tempPred,#1 ;// If Rem>=round(dcScaler/2);tempPred=tempPred+1
+ CMP temp1,#0
+ RSBLT tempPred,tempPred,#0 ;/ if pPredBufRow(Col)[0]<0; tempPred=-tempPred
+
+
+ STRH temp,[pPredBufRow,#-16]
+
+ LDRH temp,[pSrcDst] ;// temp=pSrcDst[0]
+ M_LDR ACPredFlag,ACPredFlagonStack
+ ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred
+ SSAT16 temp,#12,temp ;// clip temp to [-2048,2047]
+
+ SMULBB temp1,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler
+ M_LDR predQP,predQPonStack
+ STRH temp,[pSrcDst]
+ CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not
+ STRH temp1,[pPredBufCol] ;// store temp1 to pPredBufCol
+
+ ;// AC Prediction
+
+
+ BNE Exit ;// If not set Exit
+
+ LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63)
+ MOV temp1,#4
+ MUL temp1,curQP,temp1
+ CMP predDir,#2 ;// Check the Prediction direction
+ RSB negCurQP,curQP,#0
+ LDR CoeffTable,[predCoeffTable,temp1] ;// CoeffTable=0x1ffff/curQP
+ ADD curQP,curQP,#1 ;// curQP=curQP+1
+ LSR curQP,curQP,#1 ;// curQP=round(curQP/2)
+ MOV Count,#2 ;// Initializing the Loop Count
+ BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal
+
+
+
+loop1
+ ;// Calculate tempPred
+
+ LDRSH absCoeffAC,[pPredBufRow,Count] ;// absCoeffAC=pPredBufRow[i], 1=<i<=7
+ MOV temp1,absCoeffAC
+ CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=<i<=7
+ RSBLT absCoeffAC,temp1,#0 ;// absCoeffAC= abs(pPredBufRow[i])
+
+ SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufRow[i]*predQP
+ MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufRow[i]*predQP*0x1ffff/curQP
+ LSR tempPred,tempPred,#17
+
+ MLA Rem,negCurQP,tempPred,absCoeffAC ;// Rem=abs(pPredBufRow[i])-tempPred*curQP
+ LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i],1<=i<8
+
+ CMP Rem,curQP
+ ADDGE tempPred,#1 ;// if Rem>=round(curQP/2); tempPred=tempPred+1
+ CMP temp1,#0
+ RSBLT tempPred,tempPred,#0 ;// if pPredBufRow[i]<0 ; tempPred=-tempPred
+
+ ;// Update source and Row Prediction buffers
+
+ ADD temp,temp,tempPred ;// temp=tempPred+pSrcDst[i]
+ SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047]
+ STRH temp,[pSrcDst,Count]
+ STRH temp,[pPredBufRow,Count] ;// pPredBufRow[i]=temp
+ ADD Count,Count,#2 ;// i=i+1
+ CMP Count,#16 ;// compare if i=8
+ BLT loop1
+ B Exit ;// Branch to exit
+
+Horizontal
+
+ MOV Count,#16 ;// Initializing i=8
+
+loop2
+
+ LSR temp2,Count,#3 ;// temp2=i>>3
+
+ ;// Calculate tempPred
+
+ LDRH absCoeffAC,[pPredBufCol,temp2] ;// absCoefAC=pPredBufCol[i>>3]
+ MOV temp1,absCoeffAC
+ CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=<i<=7
+ RSBLT absCoeffAC,temp1,#0 ;// absCoeffAC=abs(pPredBufCol[i>>3])
+
+ SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufCol[i>>3]*predQP
+ MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufCol[i>>3]*predQP*0x1ffff/curQP
+ LSR tempPred,tempPred,#17 ;// tempPred=pPredBufCol[i>>3]*predQP/curQP
+
+ MLA Rem,negCurQP,tempPred,absCoeffAC
+ LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i]
+
+ CMP Rem,curQP ;// Compare Rem with round(curQP/2)
+ ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=round(curQP/2)
+ CMP temp1,#0
+ RSBLT tempPred,tempPred,#0 ;// if pPredBufCol[i>>3 <0 tempPred=-tempPred
+
+ ;// Update source and Row Prediction buffers
+
+ ADD temp,temp,tempPred ;// temp=pSrcDst[i]+tempPred
+ SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047]
+ STRH temp,[pSrcDst,Count] ;// pSrcDst[0]= clipped value
+ STRH temp,[pPredBufCol,temp2] ;// pPredBufCol[i>>3]=temp
+ ADD Count,Count,#16 ;// i=i+8
+ CMP Count,#128 ;// compare i with 64
+ BLT loop2
+
+
+Exit
+
+ MOV Return,#OMX_Sts_NoErr
+
+ M_END
+ ENDIF
+ END
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
new file mode 100644
index 0000000..c9591cb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
@@ -0,0 +1,141 @@
+;/**
+; *
+; * File Name: omxVCM4P2_QuantInvInter_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for inter reconstruction
+; *
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvInter_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP quantization parameter (quantiser_scale)
+; * [in] videoComp (Intra version only.) Video component type of the
+; * current block. Takes one of the following flags:
+; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; * OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; * shortVideoHeader==1 selects linear intra DC mode,
+; * and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; * - If pSrcDst is NULL or is not 16-byte aligned.
+; * or
+; * - If QP <= 0.
+; * or
+; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+pSrcDst RN 0
+QP RN 1
+
+;//Local Variables
+Return RN 0
+Count RN 4
+tempVal21 RN 2
+tempVal43 RN 3
+QP1 RN 5
+X2 RN 6
+X3 RN 14
+Result1 RN 8
+Result2 RN 9
+two RN 7
+
+ M_START omxVCM4P2_QuantInvInter_I,r9
+
+ MOV Count,#64
+ TST QP,#1
+ LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21,
+ ;// next two values to tempVal43
+ SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even
+ MOVNE QP1,QP
+ MOV two,#2
+
+
+
+Loop
+
+
+ SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2
+ CMP X2,#0
+
+ RSBLT X2,X2,#0 ;// X2=absoluteval(first val)
+ SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd
+ ;// X2=2*absval(first val)*QP+QP-1 if QP is even
+ SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+
+ CMP X3,#0
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+
+ RSBLT X3,X3,#0
+ PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31]
+ SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2
+ SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047]
+ CMP X2,#0
+
+
+
+ RSBLE X2,X2,#0
+ SMLABBNE X2,QP,X2,QP1
+ SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+ CMP X3,#0
+
+ LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+ RSBLT X3,X3,#0
+ PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[0-15]
+ SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047]
+
+ SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0
+ STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address
+
+
+
+ BGT Loop
+
+ MOV Return,#OMX_Sts_NoErr
+
+ M_END
+ ENDIF
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
new file mode 100644
index 0000000..6328e01
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
@@ -0,0 +1,188 @@
+;/**
+; *
+; * File Name: omxVCM4P2_QuantInvIntra_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for inter reconstruction
+; *
+; *
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvIntra_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP quantization parameter (quantiser_scale)
+; * [in] videoComp (Intra version only.) Video component type of the
+; * current block. Takes one of the following flags:
+; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; * OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; * shortVideoHeader==1 selects linear intra DC mode,
+; * and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; * - If pSrcDst is NULL or is not 16-byte aligned.
+; * or
+; * - If QP <= 0.
+; * or
+; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+ IMPORT armVCM4P2_DCScaler
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+pSrcDst RN 0
+QP RN 1
+videoComp RN 2
+shortVideoHeader RN 3
+
+;//Local Variables
+Return RN 0
+dcScaler RN 4
+temp RN 12
+index RN 6
+
+tempVal21 RN 4
+tempVal43 RN 5
+QP1 RN 6
+X2 RN 7
+X3 RN 14
+Result1 RN 8
+Result2 RN 9
+two RN 10
+Count RN 11
+
+
+
+
+ M_START omxVCM4P2_QuantInvIntra_I,r11
+
+
+
+ ;// Perform Inverse Quantization for DC coefficient
+
+ TEQ shortVideoHeader,#0 ;// Test if short Video Header flag =0
+ MOVNE dcScaler,#8 ;// if shortVideoHeader is non zero dcScaler=8
+ BNE calDCVal
+ LDR index, =armVCM4P2_DCScaler
+ ADD index,index,videoComp,LSL #5
+ LDRB dcScaler,[index,QP]
+
+
+ ;//M_CalDCScalar shortVideoHeader,videoComp, QP
+
+calDCVal
+
+ LDRH temp,[pSrcDst]
+ SMULBB temp,temp,dcScaler ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory)
+ SSAT temp,#12,temp ;// Saturating to 12 bits
+
+
+ MOV Count,#64
+ TST QP,#1
+ LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21,
+ ;// next two values to tempVal43
+ SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even
+ MOVNE QP1,QP
+ MOV two,#2
+
+
+
+
+
+Loop
+
+
+ SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2
+ CMP X2,#0
+
+ RSBLT X2,X2,#0 ;// X2=absoluteval(first val)
+ SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd
+ ;// X2=2*absval(first val)*QP+QP-1 if QP is even
+ SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+
+ CMP X3,#0
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+
+ RSBLT X3,X3,#0
+ PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31]
+ SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2
+ SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047]
+ CMP X2,#0
+
+
+
+ RSBLE X2,X2,#0
+ SMLABBNE X2,QP,X2,QP1
+ SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+ CMP X3,#0
+
+ LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+ RSBLT X3,X3,#0
+ PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[16-31]
+ SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047]
+
+ SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0
+ STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address
+
+
+
+ BGT Loop
+
+ SUB pSrcDst,pSrcDst,#128
+
+ ;// Storing the Inverse Quantized DC coefficient
+
+ STRH temp,[pSrcDst],#2
+
+
+
+ MOV Return,#OMX_Sts_NoErr
+
+
+
+
+ M_END
+ ENDIF
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c
new file mode 100644
index 0000000..5d93681
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c
@@ -0,0 +1,6 @@
+#include "omxtypes.h"
+#include "armCOMM_Version.h"
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING " Rel=" OMX_ARM_RELEASE_TAG " Arch=" OMX_ARM_BUILD_ARCHITECTURE " Tools=" OMX_ARM_BUILD_TOOLCHAIN ;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_DELIVERY.TXT
new file mode 100755
index 0000000..cc2d70a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_DELIVERY.TXT
@@ -0,0 +1,63 @@
+The contents of this transaction was created by Hedley Francis
+of ARM on 19-Feb-2008.
+
+It contains the ARM data versions listed below.
+
+This data, unless otherwise stated, is ARM Proprietary and access to it
+is subject to the agreements indicated below.
+
+If you experience problems with this data, please contact ARM support
+quoting transaction reference <97414>.
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+- OX002-SW-98010-r0p0-00bet1
+ Video codecs - optimised code
+ V7 code release for Hantro (Ver 1.0.2)
+ internal access
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+This transaction contains deliverables which are designated as being of
+beta release status (BET).
+
+Beta release status has a particular meaning to ARM of which the recipient
+must be aware. Beta is a pre-release status indicating that the deliverable
+so described is believed to robustly demonstrate specified behaviour, to be
+consistent across its included aspects and be ready for general deployment.
+But Beta also indicates that pre-release reliability trials are ongoing and
+that it is possible residual defects or errors in operation, consistency
+and documentation may still be encountered. The recipient should consider
+this position when using this Beta material supplied. ARM will normally
+attempt to provide fixes or a work-around for defects identified by the
+recipient, but the provision or timeliness of this support cannot be
+guaranteed. ARM shall not be responsible for direct or consequential
+damages as a result of encountering one or more of these residual defects.
+By accepting a Beta release, the recipient agrees to these constraints and
+to providing reasonable information to ARM to enable the replication of the
+defects identified by the recipient. The specific Beta version supplied
+will not be supported after release of a later or higher status version.
+It should be noted that Support for the Beta release of the deliverable
+will only be provided by ARM to a recipient who has a current support and
+maintenance contract for the deliverable.
+
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+In addition to the data versions listed above, this transaction contains
+two additional files at the top level.
+
+The first is this file, ARM_DELIVERY_97414.TXT, which is the delivery
+note.
+
+The second is ARM_MANIFEST_97414.TXT which contains a manifest of all the
+files included in this transaction, together with their checksums.
+
+The checksums provided are calculated using the RSA Data Security, Inc.
+MD5 Message-Digest Algorithm.
+
+The checksums can be used to verify the integrity of this data using the
+"md5sum" tool (which is part of the GNU "textutils" package) by running:
+
+ % md5sum --check ARM_MANIFEST_97414.TXT
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_MANIFEST.TXT
new file mode 100755
index 0000000..8310f67
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/ARM_MANIFEST.TXT
@@ -0,0 +1,91 @@
+ OX002-SW-98010-r0p0-00bet1/
+ OX002-SW-98010-r0p0-00bet1/api/
+e049791cfab6060a08cbac7b3ad767d6 OX002-SW-98010-r0p0-00bet1/api/armCOMM_s.h
+ed798face25497b2703ede736d6d52b6 OX002-SW-98010-r0p0-00bet1/api/omxtypes_s.h
+4eebd63af087376811d6749f0646b864 OX002-SW-98010-r0p0-00bet1/api/armCOMM_BitDec_s.h
+43cf46c2cf2fe1f93c615b57bcbe4809 OX002-SW-98010-r0p0-00bet1/api/armCOMM.h
+8f248ceaac8f602e277a521b679dcbbe OX002-SW-98010-r0p0-00bet1/api/armCOMM_IDCTTable.h
+8ac5fa80ea98e391f5730a375280b5bd OX002-SW-98010-r0p0-00bet1/api/armCOMM_Version.h
+3a2f420ddf6a1b950470bd0f5ebd5c62 OX002-SW-98010-r0p0-00bet1/api/armCOMM_IDCT_s.h
+511c0bb534fe223599e2c84eff24c9ed OX002-SW-98010-r0p0-00bet1/api/armCOMM_MaskTable.h
+8971932d56eed6b1ad1ba507f0bff5f0 OX002-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h
+f87fedd9ca432fefa757008176864ef8 OX002-SW-98010-r0p0-00bet1/api/armOMX.h
+8e49899a428822c36ef9dd94e0e05f18 OX002-SW-98010-r0p0-00bet1/api/omxtypes.h
+323008b72e9f04099a8cb42e99a1face OX002-SW-98010-r0p0-00bet1/build_vc.pl
+e72d96c0a415459748df9807f3dae72f OX002-SW-98010-r0p0-00bet1/filelist_vc.txt
+ OX002-SW-98010-r0p0-00bet1/src/
+5eeae659a29477f5c52296d24afffd3c OX002-SW-98010-r0p0-00bet1/src/armCOMM_IDCTTable.c
+d64cdcf38f7749dc7f77465e5b7d356d OX002-SW-98010-r0p0-00bet1/src/armCOMM_MaskTable.c
+ OX002-SW-98010-r0p0-00bet1/vc/
+ OX002-SW-98010-r0p0-00bet1/vc/m4p10/
+ OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/
+e7e0c320978564a7c9b2c723749a98d6 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c
+4adcd0df081990bdfc4729041a2a9152 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+852e0404142965dc1f3aa7f00ee5127b OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
+7054151c5bfea6b5e74feee86b2d7b01 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+5f7213a4f37627b3c58f6294ba477e30 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables_s.s
+32ff4b8be62e2f0f3e764b83c1e5e2fd OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+d066e3c81d82616f37ec1810ea49e7b7 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
+fe629a3e9d55395a6098bdf2431b5f02 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
+5b13fb954b7679de20076bb6a7f4ee1d OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
+01ba60eff66ea49a4f833ce6279f8e2f OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+fa1072cf1d17e9666c9f1e215fa302b1 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+db387b9e66d32787f47ef9cf0347da2a OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+ea537e4e2ad03a1940981055fa3ace01 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+29a4283885b9473a3550a81eff2559d2 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+2ddcaf60a8ea1e6e6b77737f768bfb9d OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables_s.s
+c3002aad5600f872b70a5d7fe3915846 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+a2900f2c47f1c61d20bd6c1eda33d6d4 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+c921df73397a32c947dc996ba6858553 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
+3769e14f2fc3f514d025fe6ab73ff67a OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+c029d1cebea0a09e1d235a37e2155002 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+076a033f8161750a685756f9f51f04c9 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+c5b5d22842822e6e5e31094882cbeb46 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
+f6bdf6d914a4a1479f524951a3409846 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+ebeb0713a9b2ea25986360ef262138c4 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+78ed9ea200faa7be665445a713859af1 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+c2d995f787b6f44ef10c751c12d1935f OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+40bed679a9f6e0d3efe216b7d4a9cf45 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
+4a52b3e9e268b8a8f07829bf500d03af OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
+11249f8a98c5d4b84cb5575b0e37ca9c OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
+2513b60559ba71ae495c6053fb779fa9 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
+2fb1ee17c36e3c1469c170f6dac11bf1 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+cc4a6f32db0b72a91d3f278f6855df69 OX002-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+ OX002-SW-98010-r0p0-00bet1/vc/m4p10/api/
+6e530ddaa7c2b57ffe88162c020cb662 OX002-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h
+ OX002-SW-98010-r0p0-00bet1/vc/m4p2/
+ OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/
+bec6de348b113438498867b869001622 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Clip8_s.s
+dba9824e959b21d401cac925e68a11a6 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+dfa7e5b58027be3542dda0593b77b2d3 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
+4fba4c431a783a78a2eb6497a94ac967 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+39991961179ca03b6381b6e653b1f14b OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
+1b0b2990c2669dfb87cf6b810611c01b OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+1c9b87abf3283e957816b3937c680701 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
+4fe1afca659a9055fc1172e58f78a506 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+2ea067f0436f91ba1351edaf411cb4ea OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
+6ce363aadc9d65c308b40cca8902e4f6 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
+bf212f786772aed2bc705d22ff4e74f5 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
+293a48a648a3085456e6665bb7366fad OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
+2bb47ed9c9e25c5709c6d9b4ad39a38a OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
+437dfa204508850d61d4b87091446e9f OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+bc9778898dd41101dc0fb0139eaf83cc OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+fc191eeae43f8ce735dbd311cc7bcb8d OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
+a0d85f4f517c945a4c9317ac021f2d08 OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+386020dee8b725c7fe2526f1fc211d7d OX002-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+ OX002-SW-98010-r0p0-00bet1/vc/m4p2/api/
+4624e7c838e10a249abcc3d3f4f40748 OX002-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+65e1057d04e2cb844559dc9f6e09795a OX002-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+ OX002-SW-98010-r0p0-00bet1/vc/src/
+e627b3346b0dc9aff14446005ce0fa43 OX002-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c
+ OX002-SW-98010-r0p0-00bet1/vc/api/
+7ca94b1c33ac0211e17d38baadd7d1dd OX002-SW-98010-r0p0-00bet1/vc/api/armVC.h
+12cf7596edbbf6048b626d15e8d0ed48 OX002-SW-98010-r0p0-00bet1/vc/api/omxVC.h
+11726e286a81257cb45f5547fb4d374c OX002-SW-98010-r0p0-00bet1/vc/api/omxVC_s.h
+a5b2af605c319cd2491319e430741377 OX002-SW-98010-r0p0-00bet1/vc/api/armVCCOMM_s.h
+ OX002-SW-98010-r0p0-00bet1/vc/comm/
+ OX002-SW-98010-r0p0-00bet1/vc/comm/src/
+1f81187b48487a8ea6dbc327648e3e4f OX002-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16_s.s
+936d3f2038a6f8613ec25e50cc601fe8 OX002-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8_s.s
+8f6708a249130962e0bc5c044ac6dd93 OX002-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
+aab7713414428e95de0ba799a2679b36 ARM_DELIVERY_97414.TXT
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h
new file mode 100755
index 0000000..64c1958
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h
@@ -0,0 +1,785 @@
+/**
+ *
+ * File Name: armCOMM.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM.h
+ * Brief: Declares Common APIs/Data Types used across OpenMAX API's
+ *
+ */
+
+
+#ifndef _armCommon_H_
+#define _armCommon_H_
+
+#include "omxtypes.h"
+
+typedef struct
+{
+ OMX_F32 Re; /** Real part */
+ OMX_F32 Im; /** Imaginary part */
+
+} OMX_FC32; /** single precision floating point complex number */
+
+typedef struct
+{
+ OMX_F64 Re; /** Real part */
+ OMX_F64 Im; /** Imaginary part */
+
+} OMX_FC64; /** double precision floating point complex number */
+
+
+/* Used by both IP and IC domains for 8x8 JPEG blocks. */
+typedef OMX_S16 ARM_BLOCK8x8[64];
+
+
+#include "armOMX.h"
+
+#define armPI (OMX_F64)(3.1415926535897932384626433832795)
+
+/***********************************************************************/
+
+/* Compiler extensions */
+#ifdef ARM_DEBUG
+/* debug version */
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#define armError(str) {printf((str)); printf("\n"); exit(-1);}
+#define armWarn(str) {printf((str)); printf("\n");}
+#define armIgnore(a) ((void)a)
+#define armAssert(a) assert(a)
+#else
+/* release version */
+#define armError(str) ((void) (str))
+#define armWarn(str) ((void) (str))
+#define armIgnore(a) ((void) (a))
+#define armAssert(a) ((void) (a))
+#endif /* ARM_DEBUG */
+
+/* Arithmetic operations */
+
+#define armMin(a,b) ( (a) > (b) ? (b):(a) )
+#define armMax(a,b) ( (a) > (b) ? (a):(b) )
+#define armAbs(a) ( (a) < 0 ? -(a):(a) )
+
+/* Alignment operation */
+
+#define armAlignToBytes(Ptr,N) (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) ))
+#define armAlignTo2Bytes(Ptr) armAlignToBytes(Ptr,2)
+#define armAlignTo4Bytes(Ptr) armAlignToBytes(Ptr,4)
+#define armAlignTo8Bytes(Ptr) armAlignToBytes(Ptr,8)
+#define armAlignTo16Bytes(Ptr) armAlignToBytes(Ptr,16)
+
+/* Error and Alignment check */
+
+#define armRetArgErrIf(condition, code) if(condition) { return (code); }
+#define armRetDataErrIf(condition, code) if(condition) { return (code); }
+
+#ifndef ALIGNMENT_DOESNT_MATTER
+#define armIsByteAligned(Ptr,N) ((((int)(Ptr)) % N)==0)
+#define armNotByteAligned(Ptr,N) ((((int)(Ptr)) % N)!=0)
+#else
+#define armIsByteAligned(Ptr,N) (1)
+#define armNotByteAligned(Ptr,N) (0)
+#endif
+
+#define armIs2ByteAligned(Ptr) armIsByteAligned(Ptr,2)
+#define armIs4ByteAligned(Ptr) armIsByteAligned(Ptr,4)
+#define armIs8ByteAligned(Ptr) armIsByteAligned(Ptr,8)
+#define armIs16ByteAligned(Ptr) armIsByteAligned(Ptr,16)
+
+#define armNot2ByteAligned(Ptr) armNotByteAligned(Ptr,2)
+#define armNot4ByteAligned(Ptr) armNotByteAligned(Ptr,4)
+#define armNot8ByteAligned(Ptr) armNotByteAligned(Ptr,8)
+#define armNot16ByteAligned(Ptr) armNotByteAligned(Ptr,16)
+#define armNot32ByteAligned(Ptr) armNotByteAligned(Ptr,32)
+
+/**
+ * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value);
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16/OMX_U32 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value);
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value);
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] var Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT -- returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ */
+
+OMX_INT armSignCheck (OMX_S16 var);
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 -- returns clipped value
+ */
+
+OMX_S32 armClip (
+ OMX_INT min,
+ OMX_INT max,
+ OMX_S32 src
+ );
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 -- returns clipped value
+ */
+
+OMX_F32 armClip_F32 (
+ OMX_F32 min,
+ OMX_F32 max,
+ OMX_F32 src
+ );
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding
+ * integer value. Takes care of rounding while clipping the final
+ * value.
+ *
+ * Parameters:
+ * [in] v Number to be operated upon
+ * [in] shift Divides the input "v" by "2^shift"
+ * [in] satBits Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 -- returns "shifted" saturated value
+ */
+
+OMX_U32 armShiftSat_F32(
+ OMX_F32 v,
+ OMX_INT shift,
+ OMX_INT satBits
+ );
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * This function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize);
+
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry First entry
+ * [in] sEntry second entry
+ * [in] tEntry Third entry
+ *
+ * Return Value:
+ * OMX_S32 -- returns the median value
+ */
+
+OMX_S32 armMedianOf3 (
+ OMX_S32 fEntry,
+ OMX_S32 sEntry,
+ OMX_S32 tEntry
+ );
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value Positive value
+ *
+ * Return Value:
+ * OMX_U8 -- returns the size of the positive value
+ */
+
+OMX_U8 armLogSize (
+ OMX_U16 value
+ );
+
+/***********************************************************************/
+ /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S32 armSatAdd_S32(
+ OMX_S32 Value1,
+ OMX_S32 Value2
+ );
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S64 armSatAdd_S64(
+ OMX_S64 Value1,
+ OMX_S64 Value2
+ );
+
+/** Function :armSatSub_S32()
+ *
+ * Description :
+ * Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatSub_S32(
+ OMX_S32 Value1,
+ OMX_S32 Value2
+ );
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ * accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ * [in] Mac Accumulator
+ *
+ * Return:
+ * [out] Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(
+ OMX_S32 Mac,
+ OMX_S16 Value1,
+ OMX_S16 Value2
+ );
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ * mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem First 32 bit Operand
+ * [in] filTap Second 16 bit Operand
+ * [in] mac Result of MAC operation
+ *
+ * Return:
+ * [out] mac Result of operation
+ *
+ **/
+
+OMX_S32 armSatMac_S16S32_S32(
+ OMX_S32 mac,
+ OMX_S32 delayElem,
+ OMX_S16 filTap );
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ * Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ * output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) )
+ *
+ * Parametrs:
+ * [in] input The input to be operated on
+ * [in] scaleFactor The shift number
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(
+ OMX_S32 input,
+ OMX_INT scaleFactor);
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(
+ OMX_S32 Value,
+ OMX_INT shift
+ );
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S64 armSatRoundLeftShift_S64(
+ OMX_S64 Value,
+ OMX_INT shift
+ );
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ * Returns the result of a S16 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(
+ OMX_S16 input1,
+ OMX_S32 input2);
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ * Returns the result of a S32 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatMulS32S32_S32(
+ OMX_S32 input1,
+ OMX_S32 input2);
+
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer.
+ * Half-integer values are rounded away from zero
+ * unless otherwise specified. For example 3//2 is rounded
+ * to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num Operand 1
+ * [in] Deno Operand 2
+ *
+ * Return:
+ * [out] Result of operation input1//input2
+ *
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno);
+
+
+/***********************************************************************/
+/*
+ * Debugging macros
+ *
+ */
+
+
+/*
+ * Definition of output stream - change to stderr if necessary
+ */
+#define DEBUG_STREAM stdout
+
+/*
+ * Debug printf macros, one for each argument count.
+ * Add more if needed.
+ */
+#ifdef DEBUG_ON
+#include <stdio.h>
+
+#define DEBUG_PRINTF_0(a) fprintf(DEBUG_STREAM, a)
+#define DEBUG_PRINTF_1(a, b) fprintf(DEBUG_STREAM, a, b)
+#define DEBUG_PRINTF_2(a, b, c) fprintf(DEBUG_STREAM, a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d) fprintf(DEBUG_STREAM, a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e) fprintf(DEBUG_STREAM, a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f) fprintf(DEBUG_STREAM, a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#else /* DEBUG_ON */
+#define DEBUG_PRINTF_0(a)
+#define DEBUG_PRINTF_1(a, b)
+#define DEBUG_PRINTF_2(a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#endif /* DEBUG_ON */
+
+
+/*
+ * Domain and sub domain definitions
+ *
+ * In order to turn on debug for an entire domain or sub-domain
+ * at compile time, one of the DEBUG_DOMAIN_* below may be defined,
+ * which will activate debug in all of the defines it contains.
+ */
+
+#ifdef DEBUG_DOMAIN_AC
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT
+#define DEBUG_OMXACAAC_DECODEDATSTRELT
+#define DEBUG_OMXACAAC_DECODEFILLELT
+#define DEBUG_OMXACAAC_DECODEISSTEREO_S32
+#define DEBUG_OMXACAAC_DECODEMSPNS_S32
+#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I
+#define DEBUG_OMXACAAC_DECODEPRGCFGELT
+#define DEBUG_OMXACAAC_DECODETNS_S32_I
+#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32
+#define DEBUG_OMXACAAC_ENCODETNS_S32_I
+#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32
+#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32
+#define DEBUG_OMXACAAC_MDCTFWD_S32
+#define DEBUG_OMXACAAC_MDCTINV_S32_S16
+#define DEBUG_OMXACAAC_NOISELESSDECODE
+#define DEBUG_OMXACAAC_QUANTINV_S32_I
+#define DEBUG_OMXACAAC_UNPACKADIFHEADER
+#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER
+#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODE_S32
+#define DEBUG_OMXACMP3_MDCTINV_S32
+#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I
+#define DEBUG_OMXACMP3_REQUANTIZE_S32_I
+#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16
+#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER
+#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8
+#define DEBUG_OMXACMP3_UNPACKSIDEINFO
+#endif /* DEBUG_DOMAIN_AC */
+
+
+#ifdef DEBUG_DOMAIN_VC
+#define DEBUG_OMXVCM4P10_AVERAGE_16X
+#define DEBUG_OMXVCM4P10_AVERAGE_4X
+#define DEBUG_OMXVCM4P10_AVERAGE_8X
+#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX
+#define DEBUG_OMXVCM4P10_EXPANDFRAME
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R
+#define DEBUG_OMXVCM4P10_SADQUAR_16X
+#define DEBUG_OMXVCM4P10_SADQUAR_4X
+#define DEBUG_OMXVCM4P10_SADQUAR_8X
+#define DEBUG_OMXVCM4P10_SAD_16X
+#define DEBUG_OMXVCM4P10_SAD_4X
+#define DEBUG_OMXVCM4P10_SAD_8X
+#define DEBUG_OMXVCM4P10_SATD_4X4
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16
+#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_FINDMVPRED
+#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_LIMITMVTORECT
+#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB
+#define DEBUG_OMXVCM4P2_PADMBGRAY_U8
+#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8
+#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8
+#define DEBUG_OMXVCM4P2_PADMV
+#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA
+#endif /* DEBUG_DOMAIN_VC */
+
+
+#ifdef DEBUG_DOMAIN_IC
+/* To be filled in */
+#endif /* DEBUG_DOMAIN_IC */
+
+
+#ifdef DEBUG_DOMAIN_SP
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S32
+#define DEBUG_OMXACSP_COPY_S16
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_DOTPROD_S16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32
+#define DEBUG_OMXACSP_FFTINIT_C_SC16
+#define DEBUG_OMXACSP_FFTINIT_C_SC32
+#define DEBUG_OMXACSP_FFTINIT_R_S16_S32
+#define DEBUG_OMXACSP_FFTINIT_R_S32
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIR_DIRECT_S16
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_DIRECT_S16
+#endif /* DEBUG_DOMAIN_SP */
+
+
+#ifdef DEBUG_DOMAIN_IP
+#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS
+#define DEBUG_OMXIPBM_COPY_U8_C1R
+#define DEBUG_OMXIPBM_COPY_U8_C3R
+#define DEBUG_OMXIPBM_MIRROR_U8_C1R
+#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS
+#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R
+#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R
+#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64
+#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64
+#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64
+#define DEBUG_OMXIPPP_MOMENTINIT_S64
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R
+#endif /* DEBUG_DOMAIN_IP */
+
+
+#endif /* _armCommon_H_ */
+
+/*End of File*/
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h
new file mode 100755
index 0000000..c738f72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h
@@ -0,0 +1,670 @@
+;//
+;//
+;// File Name: armCOMM_BitDec_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// OpenMAX optimized bitstream decode module
+;//
+;// You must include armCOMM_s.h before including this file
+;//
+;// This module provides macros to perform assembly optimized fixed and
+;// variable length decoding from a read-only bitstream. The variable
+;// length decode modules take as input a pointer to a table of 16-bit
+;// entries of the following format.
+;//
+;// VLD Table Entry format
+;//
+;// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+;// +------------------------------------------------+
+;// | Len | Symbol | 1 |
+;// +------------------------------------------------+
+;// | Offset | 0 |
+;// +------------------------------------------------+
+;//
+;// If the table entry is a leaf entry then bit 0 set:
+;// Len = Number of bits overread (0 to 7)
+;// Symbol = Symbol payload (unsigned 12 bits)
+;//
+;// If the table entry is an internal node then bit 0 is clear:
+;// Offset = Number of (16-bit) half words from the table
+;// start to the next table node
+;//
+;// The table is accessed by successive lookup up on the
+;// next Step bits of the input bitstream until a leaf node
+;// is obtained. The Step sizes are supplied to the VLD macro.
+;//
+;// USAGE:
+;//
+;// To use any of the macros in this package, first call:
+;//
+;// M_BD_INIT ppBitStream, pBitOffset, pBitStream, RBitBuffer, RBitCount, Tmp
+;//
+;// This caches the current bitstream position and next available
+;// bits in registers pBitStream, RBitBuffer, RBitCount. These registers
+;// are reserved for use by the bitstream decode package until you
+;// call M_BD_FINI.
+;//
+;// Next call the following macro(s) as many times as you need:
+;//
+;// M_BD_LOOK8 - Look ahead constant 1<=N<=8 bits into the bitstream
+;// M_BD_LOOK16 - Look ahead constant 1<=N<=16 bits into the bitstream
+;// M_BD_READ8 - Read constant 1<=N<=8 bits from the bitstream
+;// M_BD_READ16 - Read constant 1<=N<=16 bits from the bitstream
+;// M_BD_VREAD8 - Read variable 1<=N<=8 bits from the bitstream
+;// M_BD_VREAD16 - Read variable 1<=N<=16 bits from the bitstream
+;// M_BD_VLD - Perform variable length decode using lookup table
+;//
+;// Finally call the macro:
+;//
+;// M_BD_FINI ppBitStream, pBitOffset
+;//
+;// This writes the bitstream state back to memory.
+;//
+;// The three bitstream cache register names are assigned to the following global
+;// variables:
+;//
+
+ GBLS pBitStream ;// Register name for pBitStream
+ GBLS BitBuffer ;// Register name for BitBuffer
+ GBLS BitCount ;// Register name for BitCount
+
+;//
+;// These register variables must have a certain defined state on entry to every bitstream
+;// macro (except M_BD_INIT) and on exit from every bitstream macro (except M_BD_FINI).
+;// The state may depend on implementation.
+;//
+;// For the default (ARM11) implementation the following hold:
+;// pBitStream - points to the first byte not held in the BitBuffer
+;// BitBuffer - is a cache of (4 bytes) 32 bits, bit 31 the first bit
+;// BitCount - is offset (from the top bit) to the next unused bitstream bit
+;// 0<=BitCount<=15 (so BitBuffer holds at least 17 unused bits)
+;//
+;//
+
+ ;// Bitstream Decode initialise
+ ;//
+ ;// Initialises the bitstream decode global registers from
+ ;// bitstream pointers. This macro is split into 3 parts to enable
+ ;// scheduling.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $ppBitStream - pointer to pointer to the next bitstream byte
+ ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7)
+ ;// $RBitStream - register to use for pBitStream (can be $ppBitStream)
+ ;// $RBitBuffer - register to use for BitBuffer
+ ;// $RBitCount - register to use for BitCount (can be $pBitOffset)
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1,$T2,$T3 - registers that must be preserved between calls to
+ ;// M_BD_INIT1 and M_BD_INIT2
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_INIT0 $ppBitStream, $pBitOffset, $RBitStream, $RBitBuffer, $RBitCount
+
+pBitStream SETS "$RBitStream"
+BitBuffer SETS "$RBitBuffer"
+BitCount SETS "$RBitCount"
+
+ ;// load inputs
+ LDR $pBitStream, [$ppBitStream]
+ LDR $BitCount, [$pBitOffset]
+ MEND
+
+ MACRO
+ M_BD_INIT1 $T1, $T2, $T3
+ LDRB $T2, [$pBitStream, #2]
+ LDRB $T1, [$pBitStream, #1]
+ LDRB $BitBuffer, [$pBitStream], #3
+ ADD $BitCount, $BitCount, #8
+ MEND
+
+ MACRO
+ M_BD_INIT2 $T1, $T2, $T3
+ ORR $T2, $T2, $T1, LSL #8
+ ORR $BitBuffer, $T2, $BitBuffer, LSL #16
+ MEND
+
+ ;//
+ ;// Look ahead fixed 1<=N<=8 bits without consuming any bits
+ ;// The next bits will be placed at bit 31..24 of destination register
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to look
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_LOOK8 $Symbol, $N
+ ASSERT ($N>=1):LAND:($N<=8)
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ MEND
+
+ ;//
+ ;// Look ahead fixed 1<=N<=16 bits without consuming any bits
+ ;// The next bits will be placed at bit 31..16 of destination register
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to look
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_LOOK16 $Symbol, $N, $T1
+ ASSERT ($N >= 1):LAND:($N <= 16)
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ MEND
+
+ ;//
+ ;// Skips fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_SKIP8 $N, $T1
+ ASSERT ($N>=1):LAND:($N<=8)
+ SUBS $BitCount, $BitCount, #(8-$N)
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Read fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_READ8 $Symbol, $N, $T1
+ ASSERT ($N>=1):LAND:($N<=8)
+ MOVS $Symbol, $BitBuffer, LSL $BitCount
+ SUBS $BitCount, $BitCount, #(8-$N)
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ MOV $Symbol, $Symbol, LSR #(32-$N)
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Read fixed 1<=N<=16 bits from the bitstream, advancing the bitstream pointer
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_READ16 $Symbol, $N, $T1, $T2
+ ASSERT ($N>=1):LAND:($N<=16)
+ ASSERT $Symbol<>$T1
+ IF ($N<=8)
+ M_BD_READ8 $Symbol, $N, $T1
+ ELSE
+ ;// N>8 so we will be able to refill at least one byte
+ LDRB $T1, [$pBitStream], #1
+ MOVS $Symbol, $BitBuffer, LSL $BitCount
+ ORR $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBS $BitCount, $BitCount, #(16-$N)
+ LDRCSB $T1, [$pBitStream], #1
+ MOV $Symbol, $Symbol, LSR #(32-$N)
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ ENDIF
+ MEND
+
+ ;//
+ ;// Skip variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits. 1<=N<=8
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VSKIP8 $N, $T1
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Skip variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits. 1<=N<=16
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VSKIP16 $N, $T1, $T2
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Read variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read. 1<=N<=8
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VREAD8 $Symbol, $N, $T1, $T2
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ RSB $T2, $N, #32
+ ADDCC $BitCount, $BitCount, #8
+ MOV $Symbol, $Symbol, LSR $T2
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Read variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read. 1<=N<=16
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VREAD16 $Symbol, $N, $T1, $T2
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ RSB $T2, $N, #32
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ MOV $Symbol, $Symbol, LSR $T2
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Decode a code of the form 0000...001 where there
+ ;// are N zeros before the 1 and N<=15 (code length<=16)
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the number of zeros before the next 1
+ ;// >=16 is an illegal code
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_CLZ16 $Symbol, $T1, $T2
+ MOVS $Symbol, $BitBuffer, LSL $BitCount
+ CLZ $Symbol, $Symbol
+ ADD $BitCount, $BitCount, $Symbol
+ SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1
+ LDRCSB $T1, [$pBitStream], #1
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Decode a code of the form 1111...110 where there
+ ;// are N ones before the 0 and N<=15 (code length<=16)
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the number of zeros before the next 1
+ ;// >=16 is an illegal code
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_CLO16 $Symbol, $T1, $T2
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ MVN $Symbol, $Symbol
+ CLZ $Symbol, $Symbol
+ ADD $BitCount, $BitCount, $Symbol
+ SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1
+ LDRCSB $T1, [$pBitStream], #1
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Variable Length Decode module
+ ;//
+ ;// Decodes one VLD Symbol from a bitstream and refill the bitstream
+ ;// buffer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pVLDTable - pointer to VLD decode table of 16-bit entries.
+ ;// The format is described above at the start of
+ ;// this file.
+ ;// $S0 - The number of bits to look up for the first step
+ ;// 1<=$S0<=8
+ ;// $S1 - The number of bits to look up for each subsequent
+ ;// step 1<=$S1<=$S0.
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - decoded VLD symbol value
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VLD $Symbol, $T1, $T2, $pVLDTable, $S0, $S1
+ ASSERT (1<=$S0):LAND:($S0<=8)
+ ASSERT (1<=$S1):LAND:($S1<=$S0)
+
+ ;// Note 0<=BitCount<=15 on entry and exit
+
+ MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bits
+ MOVS $Symbol, #(2<<$S0)-2 ;// create mask
+ AND $Symbol, $Symbol, $T1, LSR #(31-$S0) ;// 2*(next $S0 bits)
+ SUBS $BitCount, $BitCount, #8 ;// CS if buffer can be filled
+01
+ LDRCSB $T1, [$pBitStream], #1 ;// load refill byte
+ LDRH $Symbol, [$pVLDTable, $Symbol] ;// load table entry
+ ADDCC $BitCount, $BitCount, #8 ;// refill not possible
+ ADD $BitCount, $BitCount, #$S0 ;// assume $S0 bits used
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 ;// merge in refill byte
+ MOVS $T1, $Symbol, LSR #1 ;// CS=leaf entry
+ BCS %FT02
+
+ MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bit
+ IF (2*$S0-$S1<=8)
+ ;// Can combine refill check and -S0+S1 and keep $BitCount<=15
+ SUBS $BitCount, $BitCount, #8+($S0-$S1)
+ ELSE
+ ;// Separate refill check and -S0+S1 offset
+ SUBS $BitCount, $BitCount, #8
+ SUB $BitCount, $BitCount, #($S0-$S1)
+ ENDIF
+ ADD $Symbol, $Symbol, $T1, LSR #(31-$S1) ;// add 2*(next $S1 bits) to
+ BIC $Symbol, $Symbol, #1 ;// table offset
+ B %BT01 ;// load next table entry
+02
+ ;// BitCount range now depend on the route here
+ ;// if (first step) S0 <= BitCount <= 7+S0 <=15
+ ;// else if (2*S0-S1<=8) S0 <= BitCount <= 7+(2*S0-S1) <=15
+ ;// else S1 <= BitCount <= 7+S1 <=15
+
+ SUB $BitCount, $BitCount, $Symbol, LSR#13
+ BIC $Symbol, $T1, #0xF000
+ MEND
+
+
+ ;// Add an offset number of bits
+ ;//
+ ;// Outputs destination byte and bit index values which corresponds to an offset number of bits
+ ;// from the current location. This is used to compare bitstream positions using. M_BD_CMP.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $Offset - Offset to be added in bits.
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $ByteIndex - Destination pBitStream pointer after adding the Offset.
+ ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact
+ ;// pointer (as in M_BD_FINI). But for using with M_BD_CMP subtract is not needed.
+ ;// $BitIndex - Destination BitCount after the addition of Offset number of bits
+ ;//
+ MACRO
+ M_BD_ADD $ByteIndex, $BitIndex, $Offset
+
+ ;// ($ByteIndex,$BitIndex) = Current position + $Offset bits
+ ADD $Offset, $Offset, $BitCount
+ AND $BitIndex, $Offset, #7
+ ADD $ByteIndex, $pBitStream, $Offset, ASR #3
+ MEND
+
+ ;// Move bitstream pointers to the location given
+ ;//
+ ;// Outputs destination byte and bit index values which corresponds to
+ ;// the current location given (calculated using M_BD_ADD).
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;// $ByteIndex - Destination pBitStream pointer after move.
+ ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact
+ ;// pointer (as in M_BD_FINI).
+ ;// $BitIndex - Destination BitCount after the move
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $pBitStream \
+ ;// } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_MOV $ByteIndex, $BitIndex
+
+ ;// ($pBitStream, $Offset) = ($ByteIndex,$BitIndex)
+ MOV $BitCount, $BitIndex
+ MOV $pBitStream, $ByteIndex
+ MEND
+
+ ;// Bitstream Compare
+ ;//
+ ;// Compares bitstream position with that of a destination position. Destination position
+ ;// is held in two input registers which are calculated using M_BD_ADD macro
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $ByteIndex - Destination pBitStream pointer, (4 byte ahead as described in M_BD_ADD)
+ ;// $BitIndex - Destination BitCount
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// FLAGS - GE if destination is reached, LT = is destination is ahead
+ ;// $T1 - corrupted temp/scratch register
+ ;//
+ MACRO
+ M_BD_CMP $ByteIndex, $BitIndex, $T1
+
+ ;// Return flags set by (current positon)-($ByteIndex,$BitIndex)
+ ;// so GE means that we have reached the indicated position
+
+ ADD $T1, $pBitStream, $BitCount, LSR #3
+ CMP $T1, $ByteIndex
+ AND $T1, $BitCount, #7
+ CMPEQ $T1, $BitIndex
+ MEND
+
+
+ ;// Bitstream Decode finalise
+ ;//
+ ;// Writes back the bitstream state to the bitstream pointers
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $ppBitStream - pointer to pointer to the next bitstream byte
+ ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7)
+ ;// $pBitStream \
+ ;// $BitBuffer } these register are corrupted
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_FINI $ppBitStream, $pBitOffset
+
+ ;// Advance pointer by the number of free bits in the buffer
+ ADD $pBitStream, $pBitStream, $BitCount, LSR#3
+ AND $BitCount, $BitCount, #7
+
+ ;// Now move back 32 bits to reach the first usued bit
+ SUB $pBitStream, $pBitStream, #4
+
+ ;// Store out bitstream state
+ STR $BitCount, [$pBitOffset]
+ STR $pBitStream, [$ppBitStream]
+ MEND
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h
new file mode 100755
index 0000000..b699034
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h
@@ -0,0 +1,212 @@
+/**
+ *
+ * File Name: armCOMM_Bitstream.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM_Bitstream.h
+ * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders.
+ *
+ */
+
+#ifndef _armCodec_H_
+#define _armCodec_H_
+
+#include "omxtypes.h"
+
+typedef struct {
+ OMX_U8 codeLen;
+ OMX_U32 codeWord;
+} ARM_VLC32;
+
+/* The above should be renamed as "ARM_VLC32" */
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] N=1...32
+ *
+ * Returns Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N=1..32
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ * Returns Value
+ */
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset);
+
+/**
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N);
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] pCodeBook
+ *
+ * [out] **ppBitStream
+ * [out] *pOffset
+ *
+ * Returns : Code Book Index if successfull.
+ * : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails.
+ **/
+
+#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF)
+
+OMX_U16 armUnPackVLC32(
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ const ARM_VLC32 *pCodeBook
+);
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] codeWord Code word that need to be inserted in to the
+ * bitstream
+ * [in] codeLength Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ OMX_U32 codeWord,
+ OMX_INT codeLength
+);
+
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pBitOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] code VLC code word that need to be inserted in to the
+ * bitstream
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackVLC32 (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ ARM_VLC32 code
+);
+
+#endif /*_armCodec_H_*/
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h
new file mode 100755
index 0000000..e0cfdaa
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h
@@ -0,0 +1,40 @@
+/**
+ *
+ *
+ * File Name: armCOMM_IDCTTable.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File : armCOMM_IDCTTable.h
+ * Description : Contains declarations of tables for IDCT calculation.
+ *
+ */
+
+#ifndef _armCOMM_IDCTTable_H_
+#define _armCOMM_IDCTTable_H_
+
+#include "omxtypes.h"
+
+ /* Table of s(u)*A(u)*A(v)/16 at Q15
+ * s(u)=1.0 0 <= u <= 5
+ * s(6)=2.0
+ * s(7)=4.0
+ * A(0) = 2*sqrt(2)
+ * A(u) = 4*cos(u*pi/16) for (u!=0)
+ */
+extern const OMX_U16 armCOMM_IDCTPreScale [64];
+extern const OMX_U16 armCOMM_IDCTCoef [4];
+
+#endif /* _armCOMM_IDCTTable_H_ */
+
+
+/* End of File */
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h
new file mode 100755
index 0000000..0baa087
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h
@@ -0,0 +1,1451 @@
+;//
+;// This confidential and proprietary software may be used only as
+;// authorised by a licensing agreement from ARM Limited
+;// (C) COPYRIGHT 2004 ARM Limited
+;// ALL RIGHTS RESERVED
+;// The entire notice above must be reproduced on all authorised
+;// copies and copies may only be made to the extent permitted
+;// by a licensing agreement from ARM Limited.
+;//
+;// IDCT_s.s
+;//
+;// Inverse DCT module
+;//
+;//
+;// ALGORITHM DESCRIPTION
+;//
+;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each
+;// column and then a 1D IDCT for each row.
+;//
+;// The 8-point 1D IDCT is defined by
+;// f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2
+;//
+;// C(u) = 1/sqrt(2) if u=0 or 1 if u!=0
+;// c(u,x) = cos( (2x+1)*u*pi/16 )
+;//
+;// We compute the 8-point 1D IDCT using the reverse of
+;// the Arai-Agui-Nakajima flow graph which we split into
+;// 5 stages named in reverse order to identify with the
+;// forward DCT. Direct inversion of the forward formulae
+;// in file FDCT_s.s gives:
+;//
+;// IStage 5: j(u) = T(u)*A(u) [ A(u)=4*C(u)*c(u,0) ]
+;// [ A(0) = 2*sqrt(2)
+;// A(u) = 4*cos(u*pi/16) for (u!=0) ]
+;//
+;// IStage 4: i0 = j0 i1 = j4
+;// i3 = (j2+j6)/2 i2 = (j2-j6)/2
+;// i7 = (j5+j3)/2 i4 = (j5-j3)/2
+;// i5 = (j1+j7)/2 i6 = (j1-j7)/2
+;//
+;// IStage 3: h0 = (i0+i1)/2 h1 = (i0-i1)/2
+;// h2 = (i2*sqrt2)-i3 h3 = i3
+;// h4 = cos(pi/8)*i4 + sin(pi/8)*i6
+;// h6 = -sin(pi/8)*i4 + cos(pi/8)*i6
+;// [ The above two lines rotate by -(pi/8) ]
+;// h5 = (i5-i7)/sqrt2 h7 = (i5+i7)/2
+;//
+;// IStage 2: g0 = (h0+h3)/2 g3 = (h0-h3)/2
+;// g1 = (h1+h2)/2 g2 = (h1-h2)/2
+;// g7 = h7 g6 = h6 - h7
+;// g5 = h5 - g6 g4 = h4 - g5
+;//
+;// IStage 1: f0 = (g0+g7)/2 f7 = (g0-g7)/2
+;// f1 = (g1+g6)/2 f6 = (g1-g6)/2
+;// f2 = (g2+g5)/2 f5 = (g2-g5)/2
+;// f3 = (g3+g4)/2 f4 = (g3-g4)/2
+;//
+;// Note that most coefficients are halved 3 times during the
+;// above calculation. We can rescale the algorithm dividing
+;// the input by 8 to remove the halvings.
+;//
+;// IStage 5: j(u) = T(u)*A(u)/8
+;//
+;// IStage 4: i0 = j0 i1 = j4
+;// i3 = j2 + j6 i2 = j2 - j6
+;// i7 = j5 + j3 i4 = j5 - j3
+;// i5 = j1 + j7 i6 = j1 - j7
+;//
+;// IStage 3: h0 = i0 + i1 h1 = i0 - i1
+;// h2 = (i2*sqrt2)-i3 h3 = i3
+;// h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6)
+;// h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6)
+;// h5 = (i5-i7)*sqrt2 h7 = i5 + i7
+;//
+;// IStage 2: g0 = h0 + h3 g3 = h0 - h3
+;// g1 = h1 + h2 g2 = h1 - h2
+;// g7 = h7 g6 = h6 - h7
+;// g5 = h5 - g6 g4 = h4 - g5
+;//
+;// IStage 1: f0 = g0 + g7 f7 = g0 - g7
+;// f1 = g1 + g6 f6 = g1 - g6
+;// f2 = g2 + g5 f5 = g2 - g5
+;// f3 = g3 + g4 f4 = g3 - g4
+;//
+;// Note:
+;// 1. The scaling by A(u)/8 can often be combined with inverse
+;// quantization. The column and row scalings can be combined.
+;// 2. The flowgraph in the AAN paper has h4,g6 negated compared
+;// to the above code but is otherwise identical.
+;// 3. The rotation by -pi/8 can be peformed using three multiplies
+;// Eg c*i4+s*i6 = (i6-i4)*s + (c+s)*i4
+;// -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6
+;// 4. If |T(u)|<=1 then from the IDCT definition,
+;// |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2
+;// = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2
+;// = ((1/sqrt2) + (cot(pi/32)-1)/2)/2
+;// = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2)
+;// = (approx)2.64
+;// So the max gain of the 2D IDCT is ~x7.0 = 3 bits.
+;// The table below shows input patterns generating the maximum
+;// value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1
+;// InputPattern Max |f(x)|
+;// PPPPPPPP |f0| = 2.64
+;// PPPMMMMM |f1| = 2.64
+;// PPMMMPPP |f2| = 2.64
+;// PPMMPPMM |f3| = 2.64
+;// PMMPPMMP |f4| = 2.64
+;// PMMPMMPM |f5| = 2.64
+;// PMPPMPMP |f6| = 2.64
+;// PMPMPMPM |f7| = 2.64
+;// Note that this input pattern is the transpose of the
+;// corresponding max input patter for the FDCT.
+
+;// Arguments
+
+pSrc RN 0 ;// source data buffer
+Stride RN 1 ;// destination stride in bytes
+pDest RN 2 ;// destination data buffer
+pScale RN 3 ;// pointer to scaling table
+
+
+ ;// DCT Inverse Macro
+ ;// The DCT code should be parametrized according
+ ;// to the following inputs:
+ ;// $outsize = "u8" : 8-bit unsigned data saturated (0 to +255)
+ ;// "s9" : 16-bit signed data saturated to 9-bit (-256 to +255)
+ ;// "s16" : 16-bit signed data not saturated (max size ~+/-14273)
+ ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment
+ ;// "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment
+ ;//
+ ;// Inputs:
+ ;// pSrc = r0 = Pointer to input data
+ ;// Range is -256 to +255 (9-bit)
+ ;// Stride = r1 = Stride between input lines
+ ;// pDest = r2 = Pointer to output data
+ ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale
+
+
+
+ MACRO
+ M_IDCT $outsize, $inscale, $stride
+ LCLA SHIFT
+
+
+ IF ARM1136JS
+
+;// REGISTER ALLOCATION
+;// This is hard since we have 8 values, 9 free registers and each
+;// butterfly requires a temporary register. We also want to
+;// maintain register order so we can use LDM/STM. The table below
+;// summarises the register allocation that meets all these criteria.
+;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above.
+;//
+;// r1 a01 g0 h0
+;// r4 b01 f0 g1 h1 i0
+;// r5 a23 f1 g2 i1
+;// r6 b23 f2 g3 h2 i2
+;// r7 a45 f3 h3 i3
+;// r8 b45 f4 g4 h4 i4
+;// r9 a67 f5 g5 h5 i5
+;// r10 b67 f6 g6 h6 i6
+;// r11 f7 g7 h7 i7
+;//
+ra01 RN 1
+rb01 RN 4
+ra23 RN 5
+rb23 RN 6
+ra45 RN 7
+rb45 RN 8
+ra67 RN 9
+rb67 RN 10
+rtmp RN 11
+csPiBy8 RN 12 ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ]
+LoopRR2 RN 14 ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ]
+;// Transpose allocation
+xft RN ra01
+xf0 RN rb01
+xf1 RN ra23
+xf2 RN rb23
+xf3 RN ra45
+xf4 RN rb45
+xf5 RN ra67
+xf6 RN rb67
+xf7 RN rtmp
+;// IStage 1 allocation
+xg0 RN xft
+xg1 RN xf0
+xg2 RN xf1
+xg3 RN xf2
+xgt RN xf3
+xg4 RN xf4
+xg5 RN xf5
+xg6 RN xf6
+xg7 RN xf7
+;// IStage 2 allocation
+xh0 RN xg0
+xh1 RN xg1
+xht RN xg2
+xh2 RN xg3
+xh3 RN xgt
+xh4 RN xg4
+xh5 RN xg5
+xh6 RN xg6
+xh7 RN xg7
+;// IStage 3,4 allocation
+xit RN xh0
+xi0 RN xh1
+xi1 RN xht
+xi2 RN xh2
+xi3 RN xh3
+xi4 RN xh4
+xi5 RN xh5
+xi6 RN xh6
+xi7 RN xh7
+
+ M_STR pDest, ppDest
+ IF "$stride"="s"
+ M_STR Stride, pStride
+ ENDIF
+ M_ADR pDest, pBlk
+ LDR csPiBy8, =0x30fc7642
+ LDR LoopRR2, =0x00005a82
+
+v6_idct_col$_F
+ ;// Load even values
+ LDR xi4, [pSrc], #4 ;// j0
+ LDR xi5, [pSrc, #4*16-4] ;// j4
+ LDR xi6, [pSrc, #2*16-4] ;// j2
+ LDR xi7, [pSrc, #6*16-4] ;// j6
+
+ ;// Scale Even Values
+ IF "$inscale"="s16" ;// 16x16 mul
+SHIFT SETA 12
+ LDR xi0, [pScale], #4
+ LDR xi1, [pScale, #4*16-4]
+ LDR xi2, [pScale, #2*16-4]
+ MOV xit, #1<<(SHIFT-1)
+ SMLABB xi3, xi0, xi4, xit
+ SMLATT xi4, xi0, xi4, xit
+ SMLABB xi0, xi1, xi5, xit
+ SMLATT xi5, xi1, xi5, xit
+ MOV xi3, xi3, ASR #SHIFT
+ PKHBT xi4, xi3, xi4, LSL #(16-SHIFT)
+ LDR xi3, [pScale, #6*16-4]
+ SMLABB xi1, xi2, xi6, xit
+ SMLATT xi6, xi2, xi6, xit
+ MOV xi0, xi0, ASR #SHIFT
+ PKHBT xi5, xi0, xi5, LSL #(16-SHIFT)
+ SMLABB xi2, xi3, xi7, xit
+ SMLATT xi7, xi3, xi7, xit
+ MOV xi1, xi1, ASR #SHIFT
+ PKHBT xi6, xi1, xi6, LSL #(16-SHIFT)
+ MOV xi2, xi2, ASR #SHIFT
+ PKHBT xi7, xi2, xi7, LSL #(16-SHIFT)
+ ENDIF
+ IF "$inscale"="s32" ;// 32x16 mul
+SHIFT SETA (12+8-16)
+ MOV xit, #1<<(SHIFT-1)
+ LDR xi0, [pScale], #8
+ LDR xi1, [pScale, #0*32+4-8]
+ LDR xi2, [pScale, #4*32-8]
+ LDR xi3, [pScale, #4*32+4-8]
+ SMLAWB xi0, xi0, xi4, xit
+ SMLAWT xi1, xi1, xi4, xit
+ SMLAWB xi2, xi2, xi5, xit
+ SMLAWT xi3, xi3, xi5, xit
+ MOV xi0, xi0, ASR #SHIFT
+ PKHBT xi4, xi0, xi1, LSL #(16-SHIFT)
+ MOV xi2, xi2, ASR #SHIFT
+ PKHBT xi5, xi2, xi3, LSL #(16-SHIFT)
+ LDR xi0, [pScale, #2*32-8]
+ LDR xi1, [pScale, #2*32+4-8]
+ LDR xi2, [pScale, #6*32-8]
+ LDR xi3, [pScale, #6*32+4-8]
+ SMLAWB xi0, xi0, xi6, xit
+ SMLAWT xi1, xi1, xi6, xit
+ SMLAWB xi2, xi2, xi7, xit
+ SMLAWT xi3, xi3, xi7, xit
+ MOV xi0, xi0, ASR #SHIFT
+ PKHBT xi6, xi0, xi1, LSL #(16-SHIFT)
+ MOV xi2, xi2, ASR #SHIFT
+ PKHBT xi7, xi2, xi3, LSL #(16-SHIFT)
+ ENDIF
+
+ ;// Load odd values
+ LDR xi0, [pSrc, #1*16-4] ;// j1
+ LDR xi1, [pSrc, #7*16-4] ;// j7
+ LDR xi2, [pSrc, #5*16-4] ;// j5
+ LDR xi3, [pSrc, #3*16-4] ;// j3
+
+ IF {TRUE}
+ ;// shortcut if odd values 0
+ TEQ xi0, #0
+ TEQEQ xi1, #0
+ TEQEQ xi2, #0
+ TEQEQ xi3, #0
+ BEQ v6OddZero$_F
+ ENDIF
+
+ ;// Store scaled even values
+ STMIA pDest, {xi4, xi5, xi6, xi7}
+
+ ;// Scale odd values
+ IF "$inscale"="s16"
+ ;// Perform AAN Scale
+ LDR xi4, [pScale, #1*16-4]
+ LDR xi5, [pScale, #7*16-4]
+ LDR xi6, [pScale, #5*16-4]
+ SMLABB xi7, xi0, xi4, xit
+ SMLATT xi0, xi0, xi4, xit
+ SMLABB xi4, xi1, xi5, xit
+ SMLATT xi1, xi1, xi5, xit
+ MOV xi7, xi7, ASR #SHIFT
+ PKHBT xi0, xi7, xi0, LSL #(16-SHIFT)
+ LDR xi7, [pScale, #3*16-4]
+ SMLABB xi5, xi2, xi6, xit
+ SMLATT xi2, xi2, xi6, xit
+ MOV xi4, xi4, ASR #SHIFT
+ PKHBT xi1, xi4, xi1, LSL #(16-SHIFT)
+ SMLABB xi6, xi3, xi7, xit
+ SMLATT xi3, xi3, xi7, xit
+ MOV xi5, xi5, ASR #SHIFT
+ PKHBT xi2, xi5, xi2, LSL #(16-SHIFT)
+ MOV xi6, xi6, ASR #SHIFT
+ PKHBT xi3, xi6, xi3, LSL #(16-SHIFT)
+ ENDIF
+ IF "$inscale"="s32" ;// 32x16 mul
+ LDR xi4, [pScale, #1*32-8]
+ LDR xi5, [pScale, #1*32+4-8]
+ LDR xi6, [pScale, #7*32-8]
+ LDR xi7, [pScale, #7*32+4-8]
+ SMLAWB xi4, xi4, xi0, xit
+ SMLAWT xi5, xi5, xi0, xit
+ SMLAWB xi6, xi6, xi1, xit
+ SMLAWT xi7, xi7, xi1, xit
+ MOV xi4, xi4, ASR #SHIFT
+ PKHBT xi0, xi4, xi5, LSL #(16-SHIFT)
+ MOV xi6, xi6, ASR #SHIFT
+ PKHBT xi1, xi6, xi7, LSL #(16-SHIFT)
+ LDR xi4, [pScale, #5*32-8]
+ LDR xi5, [pScale, #5*32+4-8]
+ LDR xi6, [pScale, #3*32-8]
+ LDR xi7, [pScale, #3*32+4-8]
+ SMLAWB xi4, xi4, xi2, xit
+ SMLAWT xi5, xi5, xi2, xit
+ SMLAWB xi6, xi6, xi3, xit
+ SMLAWT xi7, xi7, xi3, xit
+ MOV xi4, xi4, ASR #SHIFT
+ PKHBT xi2, xi4, xi5, LSL #(16-SHIFT)
+ MOV xi6, xi6, ASR #SHIFT
+ PKHBT xi3, xi6, xi7, LSL #(16-SHIFT)
+ ENDIF
+
+ LDR xit, =0x00010001 ;// rounding constant
+ SADD16 xi5, xi0, xi1 ;// (j1+j7)/2
+ SHADD16 xi5, xi5, xit
+
+ SSUB16 xi6, xi0, xi1 ;// j1-j7
+ SADD16 xi7, xi2, xi3 ;// (j5+j3)/2
+ SHADD16 xi7, xi7, xit
+
+ SSUB16 xi4, xi2, xi3 ;// j5-j3
+
+ SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2
+
+ PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a
+ PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b
+
+ SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s]
+ SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s]
+ SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c]
+ SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c]
+
+ SMULBB xi1, xi3, LoopRR2
+ SMULTB xi3, xi3, LoopRR2
+
+ PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4
+ PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4
+ SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4
+
+ ;// xi0,xi1,xi2,xi3 now free
+ ;// IStage 4,3, rows 2to3 x1/2
+
+ MOV xi3, xi3, LSL #1
+ PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4
+ LDRD xi0, [pDest, #8] ;// j2,j6 scaled
+
+ ;// IStage 2, rows4to7
+ SSUB16 xg6, xh6, xh7
+ SSUB16 xg5, xh5, xg6
+ SSUB16 xg4, xh4, xg5
+
+ SSUB16 xi2, xi0, xi1 ;// (j2-j6)
+
+ SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2
+
+ SMULBB xi0, xi2, LoopRR2
+ SMULTB xi2, xi2, LoopRR2
+
+ MOV xi2, xi2, LSL #1
+ PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4
+
+ ;// xi0, xi1 now free
+ ;// IStage 4,3 rows 0to1 x 1/2
+ LDRD xi0, [pDest] ;// j0, j4 scaled
+ SSUB16 xh2, xh2, xi3
+ ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows
+
+ SHADD16 xh0, xi0, xi1
+ SHSUB16 xh1, xi0, xi1
+
+ ;// IStage 2 rows 0to3 x 1/2
+ SHSUB16 xg2, xh1, xh2
+ SHADD16 xg1, xh1, xh2
+ SHSUB16 xg3, xh0, xh3
+ SHADD16 xg0, xh0, xh3
+
+ ;// IStage 1 all rows
+ SADD16 xf3, xg3, xg4
+ SSUB16 xf4, xg3, xg4
+ SADD16 xf2, xg2, xg5
+ SSUB16 xf5, xg2, xg5
+ SADD16 xf1, xg1, xg6
+ SSUB16 xf6, xg1, xg6
+ SADD16 xf0, xg0, xg7
+ SSUB16 xf7, xg0, xg7
+
+ ;// Transpose, store and loop
+ PKHBT ra01, xf0, xf1, LSL #16
+ PKHTB rb01, xf1, xf0, ASR #16
+
+ PKHBT ra23, xf2, xf3, LSL #16
+ PKHTB rb23, xf3, xf2, ASR #16
+
+ PKHBT ra45, xf4, xf5, LSL #16
+ PKHTB rb45, xf5, xf4, ASR #16
+
+ PKHBT ra67, xf6, xf7, LSL #16
+ STMIA pDest!, {ra01, ra23, ra45, ra67}
+ PKHTB rb67, xf7, xf6, ASR #16
+ STMIA pDest!, {rb01, rb23, rb45, rb67}
+ BCC v6_idct_col$_F
+
+ SUB pSrc, pDest, #(64*2)
+ M_LDR pDest, ppDest
+ IF "$stride"="s"
+ M_LDR pScale, pStride
+ ENDIF
+ B v6_idct_row$_F
+
+v6OddZero$_F
+ SSUB16 xi2, xi6, xi7 ;// (j2-j6)
+ SHADD16 xi3, xi6, xi7 ;// (j2+j6)/2
+
+ SMULBB xi0, xi2, LoopRR2
+ SMULTB xi2, xi2, LoopRR2
+
+ MOV xi2, xi2, LSL #1
+ PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4
+ SSUB16 xh2, xh2, xi3
+
+ ;// xi0, xi1 now free
+ ;// IStage 4,3 rows 0to1 x 1/2
+
+ SHADD16 xh0, xi4, xi5
+ SHSUB16 xh1, xi4, xi5
+
+ ;// IStage 2 rows 0to3 x 1/2
+ SHSUB16 xg2, xh1, xh2
+ SHADD16 xg1, xh1, xh2
+ SHSUB16 xg3, xh0, xh3
+ SHADD16 xg0, xh0, xh3
+
+ ;// IStage 1 all rows
+ MOV xf3, xg3
+ MOV xf4, xg3
+ MOV xf2, xg2
+ MOV xf5, xg2
+ MOV xf1, xg1
+ MOV xf6, xg1
+ MOV xf0, xg0
+ MOV xf7, xg0
+
+ ;// Transpose
+ PKHBT ra01, xf0, xf1, LSL #16
+ PKHTB rb01, xf1, xf0, ASR #16
+
+ PKHBT ra23, xf2, xf3, LSL #16
+ PKHTB rb23, xf3, xf2, ASR #16
+
+ PKHBT ra45, xf4, xf5, LSL #16
+ PKHTB rb45, xf5, xf4, ASR #16
+
+ PKHBT ra67, xf6, xf7, LSL #16
+ PKHTB rb67, xf7, xf6, ASR #16
+
+ STMIA pDest!, {ra01, ra23, ra45, ra67}
+ ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows
+ STMIA pDest!, {rb01, rb23, rb45, rb67}
+
+ BCC v6_idct_col$_F
+ SUB pSrc, pDest, #(64*2)
+ M_LDR pDest, ppDest
+ IF "$stride"="s"
+ M_LDR pScale, pStride
+ ENDIF
+
+
+v6_idct_row$_F
+ ;// IStage 4,3, rows4to7 x1/4
+ LDR xit, =0x00010001 ;// rounding constant
+ LDR xi0, [pSrc, #1*16] ;// j1
+ LDR xi1, [pSrc, #7*16] ;// 4*j7
+ LDR xi2, [pSrc, #5*16] ;// j5
+ LDR xi3, [pSrc, #3*16] ;// j3
+
+ SHADD16 xi1, xi1, xit ;// 2*j7
+ SHADD16 xi1, xi1, xit ;// j7
+
+ SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2
+ SSUB16 xi6, xi0, xi1 ;// j1-j7
+ SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2
+ SSUB16 xi4, xi2, xi3 ;// j5-j3
+
+ SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2
+
+ PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a
+ PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b
+
+ SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s]
+ SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s]
+ SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c]
+ SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c]
+
+ SMULBB xi1, xi3, LoopRR2
+ SMULTB xi3, xi3, LoopRR2
+
+ PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4
+ PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4
+ SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4
+
+ MOV xi3, xi3, LSL #1
+ PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4
+
+ ;// xi0,xi1,xi2,xi3 now free
+ ;// IStage 4,3, rows 2to3 x1/2
+
+ LDR xi0, [pSrc, #2*16] ;// j2
+ LDR xi1, [pSrc, #6*16] ;// 2*j6
+
+ ;// IStage 2, rows4to7
+ SSUB16 xg6, xh6, xh7
+ SSUB16 xg5, xh5, xg6
+ SSUB16 xg4, xh4, xg5
+
+ SHADD16 xi1, xi1, xit ;// j6
+ SSUB16 xi2, xi0, xi1 ;// (j2-j6)
+ SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2
+
+ SMULBB xi0, xi2, LoopRR2
+ SMULTB xi2, xi2, LoopRR2
+
+ MOV xi2, xi2, LSL #1
+
+ PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4
+
+ ;// xi0, xi1 now free
+ ;// IStage 4,3 rows 0to1 x 1/2
+ LDR xi1, [pSrc, #4*16] ;// j4
+ LDR xi0, [pSrc], #4 ;// j0
+
+ SSUB16 xh2, xh2, xi3
+ ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows
+
+ ADD xi0, xi0, xit, LSL #2 ;// ensure correct round
+ SHADD16 xh0, xi0, xi1 ;// of DC result
+ SHSUB16 xh1, xi0, xi1
+
+ ;// IStage 2 rows 0to3 x 1/2
+ SHSUB16 xg2, xh1, xh2
+ SHADD16 xg1, xh1, xh2
+ SHSUB16 xg3, xh0, xh3
+ SHADD16 xg0, xh0, xh3
+
+ ;// IStage 1 all rows
+ SHADD16 xf3, xg3, xg4
+ SHSUB16 xf4, xg3, xg4
+ SHADD16 xf2, xg2, xg5
+ SHSUB16 xf5, xg2, xg5
+ SHADD16 xf1, xg1, xg6
+ SHSUB16 xf6, xg1, xg6
+ SHADD16 xf0, xg0, xg7
+ SHSUB16 xf7, xg0, xg7
+
+ ;// Saturate
+ IF ("$outsize"="u8")
+ USAT16 xf0, #8, xf0
+ USAT16 xf1, #8, xf1
+ USAT16 xf2, #8, xf2
+ USAT16 xf3, #8, xf3
+ USAT16 xf4, #8, xf4
+ USAT16 xf5, #8, xf5
+ USAT16 xf6, #8, xf6
+ USAT16 xf7, #8, xf7
+ ENDIF
+ IF ("$outsize"="s9")
+ SSAT16 xf0, #9, xf0
+ SSAT16 xf1, #9, xf1
+ SSAT16 xf2, #9, xf2
+ SSAT16 xf3, #9, xf3
+ SSAT16 xf4, #9, xf4
+ SSAT16 xf5, #9, xf5
+ SSAT16 xf6, #9, xf6
+ SSAT16 xf7, #9, xf7
+ ENDIF
+
+ ;// Transpose to Row, Pack and store
+ IF ("$outsize"="u8")
+ ORR xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ]
+ ORR xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ]
+ ORR xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ]
+ ORR xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ]
+ PKHBT ra01, xf0, xf2, LSL #16
+ PKHTB rb01, xf2, xf0, ASR #16
+ PKHBT ra23, xf4, xf6, LSL #16
+ PKHTB rb23, xf6, xf4, ASR #16
+ STMIA pDest, {ra01, ra23}
+ IF "$stride"="s"
+ ADD pDest, pDest, pScale
+ STMIA pDest, {rb01, rb23}
+ ADD pDest, pDest, pScale
+ ELSE
+ ADD pDest, pDest, #($stride)
+ STMIA pDest, {rb01, rb23}
+ ADD pDest, pDest, #($stride)
+ ENDIF
+ ENDIF
+ IF ("$outsize"="s9"):LOR:("$outsize"="s16")
+ PKHBT ra01, xf0, xf1, LSL #16
+ PKHTB rb01, xf1, xf0, ASR #16
+
+ PKHBT ra23, xf2, xf3, LSL #16
+ PKHTB rb23, xf3, xf2, ASR #16
+
+ PKHBT ra45, xf4, xf5, LSL #16
+ PKHTB rb45, xf5, xf4, ASR #16
+
+ PKHBT ra67, xf6, xf7, LSL #16
+ PKHTB rb67, xf7, xf6, ASR #16
+
+ STMIA pDest, {ra01, ra23, ra45, ra67}
+ IF "$stride"="s"
+ ADD pDest, pDest, pScale
+ STMIA pDest, {rb01, rb23, rb45, rb67}
+ ADD pDest, pDest, pScale
+ ELSE
+ ADD pDest, pDest, #($stride)
+ STMIA pDest, {rb01, rb23, rb45, rb67}
+ ADD pDest, pDest, #($stride)
+ ENDIF
+ ENDIF
+
+ BCC v6_idct_row$_F
+ ENDIF ;// ARM1136JS
+
+
+ IF CortexA8
+
+Src0 EQU 7
+Src1 EQU 8
+Src2 EQU 9
+Src3 EQU 10
+Src4 EQU 11
+Src5 EQU 12
+Src6 EQU 13
+Src7 EQU 14
+Tmp EQU 15
+
+qXj0 QN Src0.S16
+qXj1 QN Src1.S16
+qXj2 QN Src2.S16
+qXj3 QN Src3.S16
+qXj4 QN Src4.S16
+qXj5 QN Src5.S16
+qXj6 QN Src6.S16
+qXj7 QN Src7.S16
+qXjt QN Tmp.S16
+
+dXj0lo DN (Src0*2).S16
+dXj0hi DN (Src0*2+1).S16
+dXj1lo DN (Src1*2).S16
+dXj1hi DN (Src1*2+1).S16
+dXj2lo DN (Src2*2).S16
+dXj2hi DN (Src2*2+1).S16
+dXj3lo DN (Src3*2).S16
+dXj3hi DN (Src3*2+1).S16
+dXj4lo DN (Src4*2).S16
+dXj4hi DN (Src4*2+1).S16
+dXj5lo DN (Src5*2).S16
+dXj5hi DN (Src5*2+1).S16
+dXj6lo DN (Src6*2).S16
+dXj6hi DN (Src6*2+1).S16
+dXj7lo DN (Src7*2).S16
+dXj7hi DN (Src7*2+1).S16
+dXjtlo DN (Tmp*2).S16
+dXjthi DN (Tmp*2+1).S16
+
+qXi0 QN qXj0
+qXi1 QN qXj4
+qXi2 QN qXj2
+qXi3 QN qXj7
+qXi4 QN qXj5
+qXi5 QN qXjt
+qXi6 QN qXj1
+qXi7 QN qXj6
+qXit QN qXj3
+
+dXi0lo DN dXj0lo
+dXi0hi DN dXj0hi
+dXi1lo DN dXj4lo
+dXi1hi DN dXj4hi
+dXi2lo DN dXj2lo
+dXi2hi DN dXj2hi
+dXi3lo DN dXj7lo
+dXi3hi DN dXj7hi
+dXi4lo DN dXj5lo
+dXi4hi DN dXj5hi
+dXi5lo DN dXjtlo
+dXi5hi DN dXjthi
+dXi6lo DN dXj1lo
+dXi6hi DN dXj1hi
+dXi7lo DN dXj6lo
+dXi7hi DN dXj6hi
+dXitlo DN dXj3lo
+dXithi DN dXj3hi
+
+qXh0 QN qXit
+qXh1 QN qXi0
+qXh2 QN qXi2
+qXh3 QN qXi3
+qXh4 QN qXi7
+qXh5 QN qXi5
+qXh6 QN qXi4
+qXh7 QN qXi1
+qXht QN qXi6
+
+dXh0lo DN dXitlo
+dXh0hi DN dXithi
+dXh1lo DN dXi0lo
+dXh1hi DN dXi0hi
+dXh2lo DN dXi2lo
+dXh2hi DN dXi2hi
+dXh3lo DN dXi3lo
+dXh3hi DN dXi3hi
+dXh4lo DN dXi7lo
+dXh4hi DN dXi7hi
+dXh5lo DN dXi5lo
+dXh5hi DN dXi5hi
+dXh6lo DN dXi4lo
+dXh6hi DN dXi4hi
+dXh7lo DN dXi1lo
+dXh7hi DN dXi1hi
+dXhtlo DN dXi6lo
+dXhthi DN dXi6hi
+
+qXg0 QN qXh2
+qXg1 QN qXht
+qXg2 QN qXh1
+qXg3 QN qXh0
+qXg4 QN qXh4
+qXg5 QN qXh5
+qXg6 QN qXh6
+qXg7 QN qXh7
+qXgt QN qXh3
+
+qXf0 QN qXg6
+qXf1 QN qXg5
+qXf2 QN qXg4
+qXf3 QN qXgt
+qXf4 QN qXg3
+qXf5 QN qXg2
+qXf6 QN qXg1
+qXf7 QN qXg0
+qXft QN qXg7
+
+
+qXt0 QN 1.S32
+qXt1 QN 2.S32
+qT0lo QN 1.S32
+qT0hi QN 2.S32
+qT1lo QN 3.S32
+qT1hi QN 4.S32
+qScalelo QN 5.S32 ;// used to read post scale values
+qScalehi QN 6.S32
+qTemp0 QN 5.S32
+qTemp1 QN 6.S32
+
+
+Scale1 EQU 6
+Scale2 EQU 15
+qScale1 QN Scale1.S16
+qScale2 QN Scale2.S16
+dScale1lo DN (Scale1*2).S16
+dScale1hi DN (Scale1*2+1).S16
+dScale2lo DN (Scale2*2).S16
+dScale2hi DN (Scale2*2+1).S16
+
+dCoefs DN 0.S16 ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]}
+InvSqrt2 DN dCoefs[0] ;// 1/sqrt(2) in Q15
+S DN dCoefs[1] ;// Sin(PI/8) in Q15
+C DN dCoefs[2] ;// Cos(PI/8) in Q15
+
+pTemp RN 12
+
+
+ IMPORT armCOMM_IDCTCoef
+
+ VLD1 {qXj0,qXj1}, [pSrc @64]!
+ VLD1 {qXj2,qXj3}, [pSrc @64]!
+ VLD1 {qXj4,qXj5}, [pSrc @64]!
+ VLD1 {qXj6,qXj7}, [pSrc @64]!
+
+ ;// Load PreScale and multiply with Src
+ ;// IStage 4
+
+ IF "$inscale"="s16" ;// 16X16 Mul
+ M_IDCT_PRESCALE16
+ ENDIF
+
+ IF "$inscale"="s32" ;// 32X32 ,ul
+ M_IDCT_PRESCALE32
+ ENDIF
+
+ ;// IStage 3
+ VQDMULH qXi2, qXi2, InvSqrt2 ;// i2/sqrt(2)
+ VHADD qXh0, qXi0, qXi1 ;// (i0+i1)/2
+ VHSUB qXh1, qXi0, qXi1 ;// (i0-i1)/2
+ VHADD qXh7, qXi5, qXi7 ;// (i5+i7)/4
+ VSUB qXh5, qXi5, qXi7 ;// (i5-i7)/2
+ VQDMULH qXh5, qXh5, InvSqrt2 ;// h5/sqrt(2)
+ VSUB qXh2, qXi2, qXi3 ;// h2, h3
+
+ VMULL qXt0, dXi4lo, C ;// c*i4
+ VMLAL qXt0, dXi6lo, S ;// c*i4+s*i6
+ VMULL qXt1, dXi4hi, C
+ VMLAL qXt1, dXi6hi, S
+ VSHRN dXh4lo, qXt0, #16 ;// h4
+ VSHRN dXh4hi, qXt1, #16
+
+ VMULL qXt0, dXi6lo, C ;// c*i6
+ VMLSL qXt0, dXi4lo, S ;// -s*i4 + c*h6
+ VMULL qXt1, dXi6hi, C
+ VMLSL qXt1, dXi4hi, S
+ VSHRN dXh6lo, qXt0, #16 ;// h6
+ VSHRN dXh6hi, qXt1, #16
+
+ ;// IStage 2
+ VSUB qXg6, qXh6, qXh7
+ VSUB qXg5, qXh5, qXg6
+ VSUB qXg4, qXh4, qXg5
+ VHADD qXg1, qXh1, qXh2 ;// (h1+h2)/2
+ VHSUB qXg2, qXh1, qXh2 ;// (h1-h2)/2
+ VHADD qXg0, qXh0, qXh3 ;// (h0+h3)/2
+ VHSUB qXg3, qXh0, qXh3 ;// (h0-h3)/2
+
+ ;// IStage 1 all rows
+ VADD qXf3, qXg3, qXg4
+ VSUB qXf4, qXg3, qXg4
+ VADD qXf2, qXg2, qXg5
+ VSUB qXf5, qXg2, qXg5
+ VADD qXf1, qXg1, qXg6
+ VSUB qXf6, qXg1, qXg6
+ VADD qXf0, qXg0, qXg7
+ VSUB qXf7, qXg0, qXg7
+
+ ;// Transpose, store and loop
+XTR0 EQU Src5
+XTR1 EQU Tmp
+XTR2 EQU Src6
+XTR3 EQU Src7
+XTR4 EQU Src3
+XTR5 EQU Src0
+XTR6 EQU Src1
+XTR7 EQU Src2
+XTRt EQU Src4
+
+qA0 QN XTR0.S32 ;// for XTRpose
+qA1 QN XTR1.S32
+qA2 QN XTR2.S32
+qA3 QN XTR3.S32
+qA4 QN XTR4.S32
+qA5 QN XTR5.S32
+qA6 QN XTR6.S32
+qA7 QN XTR7.S32
+
+dB0 DN XTR0*2+1 ;// for using VSWP
+dB1 DN XTR1*2+1
+dB2 DN XTR2*2+1
+dB3 DN XTR3*2+1
+dB4 DN XTR4*2
+dB5 DN XTR5*2
+dB6 DN XTR6*2
+dB7 DN XTR7*2
+
+
+ VTRN qXf0, qXf1
+ VTRN qXf2, qXf3
+ VTRN qXf4, qXf5
+ VTRN qXf6, qXf7
+ VTRN qA0, qA2
+ VTRN qA1, qA3
+ VTRN qA4, qA6
+ VTRN qA5, qA7
+ VSWP dB0, dB4
+ VSWP dB1, dB5
+ VSWP dB2, dB6
+ VSWP dB3, dB7
+
+
+qYj0 QN qXf0
+qYj1 QN qXf1
+qYj2 QN qXf2
+qYj3 QN qXf3
+qYj4 QN qXf4
+qYj5 QN qXf5
+qYj6 QN qXf6
+qYj7 QN qXf7
+qYjt QN qXft
+
+dYj0lo DN (XTR0*2).S16
+dYj0hi DN (XTR0*2+1).S16
+dYj1lo DN (XTR1*2).S16
+dYj1hi DN (XTR1*2+1).S16
+dYj2lo DN (XTR2*2).S16
+dYj2hi DN (XTR2*2+1).S16
+dYj3lo DN (XTR3*2).S16
+dYj3hi DN (XTR3*2+1).S16
+dYj4lo DN (XTR4*2).S16
+dYj4hi DN (XTR4*2+1).S16
+dYj5lo DN (XTR5*2).S16
+dYj5hi DN (XTR5*2+1).S16
+dYj6lo DN (XTR6*2).S16
+dYj6hi DN (XTR6*2+1).S16
+dYj7lo DN (XTR7*2).S16
+dYj7hi DN (XTR7*2+1).S16
+dYjtlo DN (XTRt*2).S16
+dYjthi DN (XTRt*2+1).S16
+
+qYi0 QN qYj0
+qYi1 QN qYj4
+qYi2 QN qYj2
+qYi3 QN qYj7
+qYi4 QN qYj5
+qYi5 QN qYjt
+qYi6 QN qYj1
+qYi7 QN qYj6
+qYit QN qYj3
+
+dYi0lo DN dYj0lo
+dYi0hi DN dYj0hi
+dYi1lo DN dYj4lo
+dYi1hi DN dYj4hi
+dYi2lo DN dYj2lo
+dYi2hi DN dYj2hi
+dYi3lo DN dYj7lo
+dYi3hi DN dYj7hi
+dYi4lo DN dYj5lo
+dYi4hi DN dYj5hi
+dYi5lo DN dYjtlo
+dYi5hi DN dYjthi
+dYi6lo DN dYj1lo
+dYi6hi DN dYj1hi
+dYi7lo DN dYj6lo
+dYi7hi DN dYj6hi
+dYitlo DN dYj3lo
+dYithi DN dYj3hi
+
+qYh0 QN qYit
+qYh1 QN qYi0
+qYh2 QN qYi2
+qYh3 QN qYi3
+qYh4 QN qYi7
+qYh5 QN qYi5
+qYh6 QN qYi4
+qYh7 QN qYi1
+qYht QN qYi6
+
+dYh0lo DN dYitlo
+dYh0hi DN dYithi
+dYh1lo DN dYi0lo
+dYh1hi DN dYi0hi
+dYh2lo DN dYi2lo
+dYh2hi DN dYi2hi
+dYh3lo DN dYi3lo
+dYh3hi DN dYi3hi
+dYh4lo DN dYi7lo
+dYh4hi DN dYi7hi
+dYh5lo DN dYi5lo
+dYh5hi DN dYi5hi
+dYh6lo DN dYi4lo
+dYh6hi DN dYi4hi
+dYh7lo DN dYi1lo
+dYh7hi DN dYi1hi
+dYhtlo DN dYi6lo
+dYhthi DN dYi6hi
+
+qYg0 QN qYh2
+qYg1 QN qYht
+qYg2 QN qYh1
+qYg3 QN qYh0
+qYg4 QN qYh4
+qYg5 QN qYh5
+qYg6 QN qYh6
+qYg7 QN qYh7
+qYgt QN qYh3
+
+qYf0 QN qYg6
+qYf1 QN qYg5
+qYf2 QN qYg4
+qYf3 QN qYgt
+qYf4 QN qYg3
+qYf5 QN qYg2
+qYf6 QN qYg1
+qYf7 QN qYg0
+qYft QN qYg7
+
+ VRSHR qYj7, qYj7, #2
+ VRSHR qYj6, qYj6, #1
+
+ VHADD qYi5, qYj1, qYj7 ;// i5 = (j1+j7)/2
+ VSUB qYi6, qYj1, qYj7 ;// i6 = j1-j7
+ VHADD qYi3, qYj2, qYj6 ;// i3 = (j2+j6)/2
+ VSUB qYi2, qYj2, qYj6 ;// i2 = j2-j6
+ VHADD qYi7, qYj5, qYj3 ;// i7 = (j5+j3)/2
+ VSUB qYi4, qYj5, qYj3 ;// i4 = j5-j3
+
+ VQDMULH qYi2, qYi2, InvSqrt2 ;// i2/sqrt(2)
+ ;// IStage 4,3 rows 0to1 x 1/2
+
+ MOV pTemp, #0x4 ;// ensure correct round
+ VDUP qScale1, pTemp ;// of DC result
+ VADD qYi0, qYi0, qScale1
+
+ VHADD qYh0, qYi0, qYi1 ;// (i0+i1)/2
+ VHSUB qYh1, qYi0, qYi1 ;// (i0-i1)/2
+
+ VHADD qYh7, qYi5, qYi7 ;// (i5+i7)/4
+ VSUB qYh5, qYi5, qYi7 ;// (i5-i7)/2
+ VSUB qYh2, qYi2, qYi3 ;// h2, h3
+ VQDMULH qYh5, qYh5, InvSqrt2 ;// h5/sqrt(2)
+
+ VMULL qXt0, dYi4lo, C ;// c*i4
+ VMLAL qXt0, dYi6lo, S ;// c*i4+s*i6
+ VMULL qXt1, dYi4hi, C
+ VMLAL qXt1, dYi6hi, S
+ VSHRN dYh4lo, qXt0, #16 ;// h4
+ VSHRN dYh4hi, qXt1, #16
+
+ VMULL qXt0, dYi6lo, C ;// c*i6
+ VMLSL qXt0, dYi4lo, S ;// -s*i4 + c*h6
+ VMULL qXt1, dYi6hi, C
+ VMLSL qXt1, dYi4hi, S
+ VSHRN dYh6lo, qXt0, #16 ;// h6
+ VSHRN dYh6hi, qXt1, #16
+
+ VSUB qYg6, qYh6, qYh7
+ VSUB qYg5, qYh5, qYg6
+ VSUB qYg4, qYh4, qYg5
+
+ ;// IStage 2 rows 0to3 x 1/2
+ VHADD qYg1, qYh1, qYh2 ;// (h1+h2)/2
+ VHSUB qYg2, qYh1, qYh2 ;// (h1-h2)/2
+ VHADD qYg0, qYh0, qYh3 ;// (h0+h3)/2
+ VHSUB qYg3, qYh0, qYh3 ;// (h0-h3)/2
+
+
+ ;// IStage 1 all rows
+ VHADD qYf3, qYg3, qYg4
+ VHSUB qYf4, qYg3, qYg4
+ VHADD qYf2, qYg2, qYg5
+ VHSUB qYf5, qYg2, qYg5
+ VHADD qYf1, qYg1, qYg6
+ VHSUB qYf6, qYg1, qYg6
+ VHADD qYf0, qYg0, qYg7
+ VHSUB qYf7, qYg0, qYg7
+
+YTR0 EQU Src0
+YTR1 EQU Src4
+YTR2 EQU Src1
+YTR3 EQU Src2
+YTR4 EQU Src7
+YTR5 EQU Src5
+YTR6 EQU Tmp
+YTR7 EQU Src6
+YTRt EQU Src3
+
+qC0 QN YTR0.S32 ;// for YTRpose
+qC1 QN YTR1.S32
+qC2 QN YTR2.S32
+qC3 QN YTR3.S32
+qC4 QN YTR4.S32
+qC5 QN YTR5.S32
+qC6 QN YTR6.S32
+qC7 QN YTR7.S32
+
+dD0 DN YTR0*2+1 ;// for using VSWP
+dD1 DN YTR1*2+1
+dD2 DN YTR2*2+1
+dD3 DN YTR3*2+1
+dD4 DN YTR4*2
+dD5 DN YTR5*2
+dD6 DN YTR6*2
+dD7 DN YTR7*2
+
+ VTRN qYf0, qYf1
+ VTRN qYf2, qYf3
+ VTRN qYf4, qYf5
+ VTRN qYf6, qYf7
+ VTRN qC0, qC2
+ VTRN qC1, qC3
+ VTRN qC4, qC6
+ VTRN qC5, qC7
+ VSWP dD0, dD4
+ VSWP dD1, dD5
+ VSWP dD2, dD6
+ VSWP dD3, dD7
+
+
+dYf0U8 DN YTR0*2.U8
+dYf1U8 DN YTR1*2.U8
+dYf2U8 DN YTR2*2.U8
+dYf3U8 DN YTR3*2.U8
+dYf4U8 DN YTR4*2.U8
+dYf5U8 DN YTR5*2.U8
+dYf6U8 DN YTR6*2.U8
+dYf7U8 DN YTR7*2.U8
+
+ ;//
+ ;// Do saturation if outsize is other than S16
+ ;//
+
+ IF ("$outsize"="u8")
+ ;// Output range [0-255]
+ VQMOVN dYf0U8, qYf0
+ VQMOVN dYf1U8, qYf1
+ VQMOVN dYf2U8, qYf2
+ VQMOVN dYf3U8, qYf3
+ VQMOVN dYf4U8, qYf4
+ VQMOVN dYf5U8, qYf5
+ VQMOVN dYf6U8, qYf6
+ VQMOVN dYf7U8, qYf7
+ ENDIF
+
+ IF ("$outsize"="s9")
+ ;// Output range [-256 to +255]
+ VQSHL qYf0, qYf0, #16-9
+ VQSHL qYf1, qYf1, #16-9
+ VQSHL qYf2, qYf2, #16-9
+ VQSHL qYf3, qYf3, #16-9
+ VQSHL qYf4, qYf4, #16-9
+ VQSHL qYf5, qYf5, #16-9
+ VQSHL qYf6, qYf6, #16-9
+ VQSHL qYf7, qYf7, #16-9
+
+ VSHR qYf0, qYf0, #16-9
+ VSHR qYf1, qYf1, #16-9
+ VSHR qYf2, qYf2, #16-9
+ VSHR qYf3, qYf3, #16-9
+ VSHR qYf4, qYf4, #16-9
+ VSHR qYf5, qYf5, #16-9
+ VSHR qYf6, qYf6, #16-9
+ VSHR qYf7, qYf7, #16-9
+ ENDIF
+
+ ;// Store output depending on the Stride size
+ IF "$stride"="s"
+ VST1 qYf0, [pDest @64], Stride
+ VST1 qYf1, [pDest @64], Stride
+ VST1 qYf2, [pDest @64], Stride
+ VST1 qYf3, [pDest @64], Stride
+ VST1 qYf4, [pDest @64], Stride
+ VST1 qYf5, [pDest @64], Stride
+ VST1 qYf6, [pDest @64], Stride
+ VST1 qYf7, [pDest @64]
+ ELSE
+ IF ("$outsize"="u8")
+ VST1 dYf0U8, [pDest @64], #8
+ VST1 dYf1U8, [pDest @64], #8
+ VST1 dYf2U8, [pDest @64], #8
+ VST1 dYf3U8, [pDest @64], #8
+ VST1 dYf4U8, [pDest @64], #8
+ VST1 dYf5U8, [pDest @64], #8
+ VST1 dYf6U8, [pDest @64], #8
+ VST1 dYf7U8, [pDest @64]
+ ELSE
+ ;// ("$outsize"="s9") or ("$outsize"="s16")
+ VST1 qYf0, [pDest @64], #16
+ VST1 qYf1, [pDest @64], #16
+ VST1 qYf2, [pDest @64], #16
+ VST1 qYf3, [pDest @64], #16
+ VST1 qYf4, [pDest @64], #16
+ VST1 qYf5, [pDest @64], #16
+ VST1 qYf6, [pDest @64], #16
+ VST1 qYf7, [pDest @64]
+ ENDIF
+
+ ENDIF
+
+
+
+ ENDIF ;// CortexA8
+
+
+
+ MEND
+
+ ;// Scale TWO input rows with TWO rows of 16 bit scale values
+ ;//
+ ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row
+ ;// input (Eight input values) with one row of scale values. Also
+ ;// Loads next scale values from pScale, if $LastRow flag is not set.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $dAlo - Input D register with first four S16 values of row n
+ ;// $dAhi - Input D register with next four S16 values of row n
+ ;// $dBlo - Input D register with first four S16 values of row n+1
+ ;// $dBhi - Input D register with next four S16 values of row n+1
+ ;// pScale - Pointer to next row of scale values
+ ;// qT0lo - Temporary scratch register
+ ;// qT0hi - Temporary scratch register
+ ;// qT1lo - Temporary scratch register
+ ;// qT1hi - Temporary scratch register
+ ;// dScale1lo - Scale value of row n
+ ;// dScale1hi - Scale value of row n
+ ;// dScale2lo - Scale value of row n+1
+ ;// dScale2hi - Scale value of row n+1
+ ;//
+ ;// Input Flag
+ ;//
+ ;// $LastRow - Flag to indicate whether current row is last row
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $dAlo - Scaled output values (first four S16 of row n)
+ ;// $dAhi - Scaled output values (next four S16 of row n)
+ ;// $dBlo - Scaled output values (first four S16 of row n+1)
+ ;// $dBhi - Scaled output values (next four S16 of row n+1)
+ ;// qScale1 - Scale values for next row
+ ;// qScale2 - Scale values for next row+1
+ ;// pScale - Pointer to next row of scale values
+ ;//
+ MACRO
+ M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow
+ VMULL qT0lo, $dAlo, dScale1lo
+ VMULL qT0hi, $dAhi, dScale1hi
+ VMULL qT1lo, $dBlo, dScale2lo
+ VMULL qT1hi, $dBhi, dScale2hi
+ IF "$LastRow"="0"
+ VLD1 qScale1, [pScale], #16 ;// Load scale for row n+1
+ VLD1 qScale2, [pScale], #16 ;// Load scale for row n+2
+ ENDIF
+ VQRSHRN $dAlo, qT0lo, #12
+ VQRSHRN $dAhi, qT0hi, #12
+ VQRSHRN $dBlo, qT1lo, #12
+ VQRSHRN $dBhi, qT1hi, #12
+ MEND
+
+ ;// Scale 8x8 block input values with 16 bit scale values
+ ;//
+ ;// This macro is used to pre-scale block of 8x8 input.
+ ;// This also do the Ist stage transformations of IDCT.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// dXjnlo - n th input D register with first four S16 values
+ ;// dXjnhi - n th input D register with next four S16 values
+ ;// qXjn - n th input Q register with eight S16 values
+ ;// pScale - Pointer to scale values
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// qXin - n th output Q register with eight S16 output values of 1st stage
+ ;//
+ MACRO
+ M_IDCT_PRESCALE16
+ VLD1 qScale1, [pScale], #16 ;// Load Pre scale for row 0
+ VLD1 qScale2, [pScale], #16 ;// Load Pre scale for row 0
+ M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0 ;// Pre scale row 0 & 1
+ M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0
+ M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0
+ M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1
+ VHADD qXi5, qXj1, qXj7 ;// (j1+j7)/2
+ VSUB qXi6, qXj1, qXj7 ;// j1-j7
+ LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants
+ VHADD qXi3, qXj2, qXj6 ;// (j2+j6)/2
+ VSUB qXi2, qXj2, qXj6 ;// j2-j6
+ VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants
+ VHADD qXi7, qXj5, qXj3 ;// (j5+j3)/2
+ VSUB qXi4, qXj5, qXj3 ;// j5-j3
+ MEND
+
+
+ ;// Scale 8x8 block input values with 32 bit scale values
+ ;//
+ ;// This macro is used to pre-scale block of 8x8 input.
+ ;// This also do the Ist stage transformations of IDCT.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// dXjnlo - n th input D register with first four S16 values
+ ;// dXjnhi - n th input D register with next four S16 values
+ ;// qXjn - n th input Q register with eight S16 values
+ ;// pScale - Pointer to 32bit scale values in Q23 format
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// dXinlo - n th output D register with first four S16 output values of 1st stage
+ ;// dXinhi - n th output D register with next four S16 output values of 1st stage
+ ;//
+ MACRO
+ M_IDCT_PRESCALE32
+qScale0lo QN 0.S32
+qScale0hi QN 1.S32
+qScale1lo QN 2.S32
+qScale1hi QN 3.S32
+qScale2lo QN qScale1lo
+qScale2hi QN qScale1hi
+qScale3lo QN qScale1lo
+qScale3hi QN qScale1hi
+qScale4lo QN qScale1lo
+qScale4hi QN qScale1hi
+qScale5lo QN qScale0lo
+qScale5hi QN qScale0hi
+qScale6lo QN qScale0lo
+qScale6hi QN qScale0hi
+qScale7lo QN qScale0lo
+qScale7hi QN qScale0hi
+
+qSrc0lo QN 4.S32
+qSrc0hi QN 5.S32
+qSrc1lo QN 6.S32
+qSrc1hi QN Src4.S32
+qSrc2lo QN qSrc0lo
+qSrc2hi QN qSrc0hi
+qSrc3lo QN qSrc0lo
+qSrc3hi QN qSrc0hi
+qSrc4lo QN qSrc0lo
+qSrc4hi QN qSrc0hi
+qSrc5lo QN qSrc1lo
+qSrc5hi QN qSrc1hi
+qSrc6lo QN qSrc1lo
+qSrc6hi QN qSrc1hi
+qSrc7lo QN qSrc0lo
+qSrc7hi QN qSrc0hi
+
+qRes17lo QN qScale0lo
+qRes17hi QN qScale0hi
+qRes26lo QN qScale0lo
+qRes26hi QN qScale0hi
+qRes53lo QN qScale0lo
+qRes53hi QN qScale0hi
+
+ ADD pTemp, pScale, #4*8*7 ;// Address of pScale[7]
+
+ ;// Row 0
+ VLD1 {qScale0lo, qScale0hi}, [pScale]!
+ VSHLL qSrc0lo, dXj0lo, #(12-1)
+ VSHLL qSrc0hi, dXj0hi, #(12-1)
+ VLD1 {qScale1lo, qScale1hi}, [pScale]!
+ VQRDMULH qSrc0lo, qScale0lo, qSrc0lo
+ VQRDMULH qSrc0hi, qScale0hi, qSrc0hi
+ VLD1 {qScale7lo, qScale7hi}, [pTemp]!
+ VSHLL qSrc1lo, dXj1lo, #(12-1)
+ VSHLL qSrc1hi, dXj1hi, #(12-1)
+ VMOVN dXi0lo, qSrc0lo ;// Output i0
+ VMOVN dXi0hi, qSrc0hi
+ VSHLL qSrc7lo, dXj7lo, #(12-1)
+ VSHLL qSrc7hi, dXj7hi, #(12-1)
+ SUB pTemp, pTemp, #((16*2)+(4*8*1))
+ VQRDMULH qSrc1lo, qScale1lo, qSrc1lo
+ VQRDMULH qSrc1hi, qScale1hi, qSrc1hi
+ VQRDMULH qSrc7lo, qScale7lo, qSrc7lo
+ VQRDMULH qSrc7hi, qScale7hi, qSrc7hi
+ VLD1 {qScale2lo, qScale2hi}, [pScale]!
+
+ ;// Row 1 & 7
+ VHADD qRes17lo, qSrc1lo, qSrc7lo ;// (j1+j7)/2
+ VHADD qRes17hi, qSrc1hi, qSrc7hi ;// (j1+j7)/2
+ VMOVN dXi5lo, qRes17lo ;// Output i5
+ VMOVN dXi5hi, qRes17hi
+ VSUB qRes17lo, qSrc1lo, qSrc7lo ;// j1-j7
+ VSUB qRes17hi, qSrc1hi, qSrc7hi ;// j1-j7
+ VMOVN dXi6lo, qRes17lo ;// Output i6
+ VMOVN dXi6hi, qRes17hi
+ VSHLL qSrc2lo, dXj2lo, #(12-1)
+ VSHLL qSrc2hi, dXj2hi, #(12-1)
+ VLD1 {qScale6lo, qScale6hi}, [pTemp]!
+ VSHLL qSrc6lo, dXj6lo, #(12-1)
+ VSHLL qSrc6hi, dXj6hi, #(12-1)
+ SUB pTemp, pTemp, #((16*2)+(4*8*1))
+ VQRDMULH qSrc2lo, qScale2lo, qSrc2lo
+ VQRDMULH qSrc2hi, qScale2hi, qSrc2hi
+ VQRDMULH qSrc6lo, qScale6lo, qSrc6lo
+ VQRDMULH qSrc6hi, qScale6hi, qSrc6hi
+ VLD1 {qScale3lo, qScale3hi}, [pScale]!
+
+ ;// Row 2 & 6
+ VHADD qRes26lo, qSrc2lo, qSrc6lo ;// (j2+j6)/2
+ VHADD qRes26hi, qSrc2hi, qSrc6hi ;// (j2+j6)/2
+ VMOVN dXi3lo, qRes26lo ;// Output i3
+ VMOVN dXi3hi, qRes26hi
+ VSUB qRes26lo, qSrc2lo, qSrc6lo ;// j2-j6
+ VSUB qRes26hi, qSrc2hi, qSrc6hi ;// j2-j6
+ VMOVN dXi2lo, qRes26lo ;// Output i2
+ VMOVN dXi2hi, qRes26hi
+ VSHLL qSrc3lo, dXj3lo, #(12-1)
+ VSHLL qSrc3hi, dXj3hi, #(12-1)
+ VLD1 {qScale5lo, qScale5hi}, [pTemp]!
+ VSHLL qSrc5lo, dXj5lo, #(12-1)
+ VSHLL qSrc5hi, dXj5hi, #(12-1)
+ VQRDMULH qSrc3lo, qScale3lo, qSrc3lo
+ VQRDMULH qSrc3hi, qScale3hi, qSrc3hi
+ VQRDMULH qSrc5lo, qScale5lo, qSrc5lo
+ VQRDMULH qSrc5hi, qScale5hi, qSrc5hi
+
+ ;// Row 3 & 5
+ VHADD qRes53lo, qSrc5lo, qSrc3lo ;// (j5+j3)/2
+ VHADD qRes53hi, qSrc5hi, qSrc3hi ;// (j5+j3)/2
+ SUB pSrc, pSrc, #16*2*2
+ VMOVN dXi7lo, qRes53lo ;// Output i7
+ VMOVN dXi7hi, qRes53hi
+ VSUB qRes53lo, qSrc5lo, qSrc3lo ;// j5-j3
+ VSUB qRes53hi, qSrc5hi, qSrc3hi ;// j5-j3
+ VLD1 qXj4, [pSrc @64]
+ VMOVN dXi4lo, qRes53lo ;// Output i4
+ VMOVN dXi4hi, qRes53hi
+ VSHLL qSrc4lo, dXj4lo, #(12-1)
+ VSHLL qSrc4hi, dXj4hi, #(12-1)
+ VLD1 {qScale4lo, qScale4hi}, [pScale]
+ LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants
+ VQRDMULH qSrc4lo, qScale4lo, qSrc4lo
+ VQRDMULH qSrc4hi, qScale4hi, qSrc4hi
+ VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants
+ ;// Row 4
+ VMOVN dXi1lo, qSrc4lo ;// Output i1
+ VMOVN dXi1hi, qSrc4hi
+
+ MEND
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h
new file mode 100755
index 0000000..51118fd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h
@@ -0,0 +1,27 @@
+/**
+ *
+ * File Name: armCOMM_MaskTable.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Mask Table to mask the end of array
+ */
+
+
+
+#ifndef _ARMCOMM_MASKTABLE_H_
+#define _ARMCOMM_MASKTABLE_H_
+
+#define MaskTableSize 72
+
+/* Mask table */
+
+extern const OMX_U16 armCOMM_qMaskTable16[MaskTableSize];
+extern const OMX_U8 armCOMM_qMaskTable8[MaskTableSize];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h
new file mode 100755
index 0000000..41b3e1e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h
@@ -0,0 +1,43 @@
+/* Guard the header against multiple inclusion. */
+#ifndef __ARM_COMM_VERSION_H__
+#define __ARM_COMM_VERSION_H__
+
+
+/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */
+#define OMX_VERSION 102
+
+/* We need to define these macros in order to convert a #define number into a #define string. */
+#define ARM_QUOTE(a) #a
+#define ARM_INDIRECT(A) ARM_QUOTE(A)
+
+/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */
+#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION)
+
+
+/* Define this in order to turn on ARM version/release/build strings in each domain */
+#define ARM_INCLUDE_VERSION_DESCRIPTIONS
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+ extern const char * const omxAC_VersionDescription;
+ extern const char * const omxIC_VersionDescription;
+ extern const char * const omxIP_VersionDescription;
+ extern const char * const omxSP_VersionDescription;
+ extern const char * const omxVC_VersionDescription;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
+
+
+/* The following entries should be automatically updated by the release script */
+/* They are used in the ARM version strings defined for each domain. */
+
+/* The release tag associated with this release of the library. - used for source and object releases */
+#define OMX_ARM_RELEASE_TAG "r1p0-00bet0"
+
+/* The ARM architecture used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_ARCHITECTURE "ARM Architecture V7 with NEON"
+
+/* The ARM Toolchain used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_TOOLCHAIN "ARM RVCT 3.1"
+
+
+#endif /* __ARM_COMM_VERSION_H__ */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h
new file mode 100755
index 0000000..0956bd1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h
@@ -0,0 +1,1157 @@
+;//
+;//
+;// File Name: armCOMM_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// ARM optimized OpenMAX common header file
+;//
+
+;// Protect against multiple inclusion
+ IF :LNOT::DEF:ARMCOMM_S_H
+ GBLL ARMCOMM_S_H
+
+ REQUIRE8 ;// Requires 8-byte stack alignment
+ PRESERVE8 ;// Preserves 8-byte stack alignment
+
+ GBLL ARM_ERRORCHECK
+ARM_ERRORCHECK SETL {FALSE}
+
+;// Globals
+
+ GBLS _RRegList ;// R saved register list
+ GBLS _DRegList ;// D saved register list
+ GBLS _Variant ;// Selected processor variant
+ GBLS _CPU ;// CPU name
+ GBLS _Struct ;// Structure name
+
+ GBLL _InFunc ;// Inside function assembly flag
+ GBLL _SwLong ;// Long switch flag
+
+ GBLA _RBytes ;// Number of register bytes on stack
+ GBLA _SBytes ;// Number of scratch bytes on stack
+ GBLA _ABytes ;// Stack offset of next argument
+ GBLA _Workspace ;// Stack offset of scratch workspace
+ GBLA _F ;// Function number
+ GBLA _StOff ;// Struct offset
+ GBLA _SwNum ;// Switch number
+ GBLS _32 ;// Suffix for 32 byte alignmnet
+ GBLS _16 ;// Suffix for 16 byte alignmnet
+
+_InFunc SETL {FALSE}
+_SBytes SETA 0
+_F SETA 0
+_SwNum SETA 0
+_32 SETS "ALIGN32"
+_16 SETS "ALIGN16"
+
+;/////////////////////////////////////////////////////////
+;// Override the tools settings of the CPU if the #define
+;// USECPU is set, otherwise use the CPU defined by the
+;// assembler settings.
+;/////////////////////////////////////////////////////////
+
+ IF :DEF: OVERRIDECPU
+_CPU SETS OVERRIDECPU
+ ELSE
+_CPU SETS {CPU}
+ ENDIF
+
+
+
+;/////////////////////////////////////////////////////////
+;// Work out which code to build
+;/////////////////////////////////////////////////////////
+
+ IF :DEF:ARM1136JS:LOR::DEF:CortexA8:LOR::DEF:ARM_GENERIC
+ INFO 1,"Please switch to using M_VARIANTS"
+ ENDIF
+
+ ;// Define and reset all officially recongnised variants
+ MACRO
+ _M_DEF_VARIANTS
+ _M_DEF_VARIANT ARM926EJS
+ _M_DEF_VARIANT ARM1136JS
+ _M_DEF_VARIANT ARM1136JS_U
+ _M_DEF_VARIANT CortexA8
+ _M_DEF_VARIANT ARM7TDMI
+ MEND
+
+ MACRO
+ _M_DEF_VARIANT $var
+ GBLL $var
+ GBLL _ok$var
+$var SETL {FALSE}
+ MEND
+
+
+ ;// Variant declaration
+ ;//
+ ;// Define a list of code variants supported by this
+ ;// source file. This macro then chooses the most
+ ;// appropriate variant to build for the currently configured
+ ;// core.
+ ;//
+ MACRO
+ M_VARIANTS $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
+ ;// Set to TRUE variants that are supported
+ _M_DEF_VARIANTS
+ _M_VARIANT $v0
+ _M_VARIANT $v1
+ _M_VARIANT $v2
+ _M_VARIANT $v3
+ _M_VARIANT $v4
+ _M_VARIANT $v5
+ _M_VARIANT $v6
+ _M_VARIANT $v7
+
+ ;// Look for first available variant to match a CPU
+ ;// _M_TRY cpu, variant fall back list
+_Variant SETS ""
+ _M_TRY ARM926EJ-S, ARM926EJS
+ _M_TRY ARM1176JZ-S, ARM1136JS
+ _M_TRY ARM1176JZF-S, ARM1136JS
+ _M_TRY ARM1156T2-S, ARM1136JS
+ _M_TRY ARM1156T2F-S, ARM1136JS
+ _M_TRY ARM1136J-S, ARM1136JS
+ _M_TRY ARM1136JF-S, ARM1136JS
+ _M_TRY MPCore, ARM1136JS
+ _M_TRY falcon-vfp, ARM1136JS
+ _M_TRY falcon-full-neon, CortexA8
+ _M_TRY Cortex-A8NoNeon, ARM1136JS
+ _M_TRY Cortex-A8, CortexA8, ARM1136JS
+ _M_TRY Cortex-R4, ARM1136JS
+ _M_TRY ARM7TDMI
+
+ ;// Select the correct variant
+ _M_DEF_VARIANTS
+ IF _Variant=""
+ INFO 1, "No match found for CPU '$_CPU'"
+ ELSE
+$_Variant SETL {TRUE}
+ ENDIF
+ MEND
+
+ ;// Register a variant as available
+ MACRO
+ _M_VARIANT $var
+ IF "$var"=""
+ MEXIT
+ ENDIF
+ IF :LNOT::DEF:_ok$var
+ INFO 1, "Unrecognized variant '$var'"
+ ENDIF
+$var SETL {TRUE}
+ MEND
+
+ ;// For a given CPU, see if any of the variants supporting
+ ;// this CPU are available. The first available variant is
+ ;// chosen
+ MACRO
+ _M_TRY $cpu, $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
+ IF "$cpu"<>_CPU
+ MEXIT
+ ENDIF
+ _M_TRY1 $v0
+ _M_TRY1 $v1
+ _M_TRY1 $v2
+ _M_TRY1 $v3
+ _M_TRY1 $v4
+ _M_TRY1 $v5
+ _M_TRY1 $v6
+ _M_TRY1 $v7
+ ;// Check a match was found
+ IF _Variant=""
+ INFO 1, "No variant match found for CPU '$_CPU'"
+ ENDIF
+ MEND
+
+ MACRO
+ _M_TRY1 $var
+ IF "$var"=""
+ MEXIT
+ ENDIF
+ IF (_Variant=""):LAND:$var
+_Variant SETS "$var"
+ ENDIF
+ MEND
+
+;////////////////////////////////////////////////////////
+;// Structure definition
+;////////////////////////////////////////////////////////
+
+ ;// Declare a structure of given name
+ MACRO
+ M_STRUCT $sname
+_Struct SETS "$sname"
+_StOff SETA 0
+ MEND
+
+ ;// Declare a structure field
+ ;// The field is called $sname_$fname
+ ;// $size = the size of each entry, must be power of 2
+ ;// $number = (if provided) the number of entries for an array
+ MACRO
+ M_FIELD $fname, $size, $number
+ IF (_StOff:AND:($size-1))!=0
+_StOff SETA _StOff + ($size - (_StOff:AND:($size-1)))
+ ENDIF
+$_Struct._$fname EQU _StOff
+ IF "$number"<>""
+_StOff SETA _StOff + $size*$number
+ ELSE
+_StOff SETA _StOff + $size
+ ENDIF
+ MEND
+
+
+ MACRO
+ M_ENDSTRUCT
+sizeof_$_Struct EQU _StOff
+_Struct SETS ""
+ MEND
+
+;//////////////////////////////////////////////////////////
+;// Switch and table macros
+;//////////////////////////////////////////////////////////
+
+ ;// Start a relative switch table with register to switch on
+ ;//
+ ;// $v = the register to switch on
+ ;// $s = if specified must be "L" to indicate long
+ ;// this allows a greater range to the case code
+ MACRO
+ M_SWITCH $v, $s
+ ASSERT "$s"="":LOR:"$s"="L"
+_SwLong SETL {FALSE}
+ IF "$s"="L"
+_SwLong SETL {TRUE}
+ ENDIF
+_SwNum SETA _SwNum+1
+ IF {CONFIG}=16
+ ;// Thumb
+ IF _SwLong
+ TBH [pc, $v, LSL#1]
+ ELSE
+ TBB [pc, $v]
+ ENDIF
+_Switch$_SwNum
+ ELSE
+ ;// ARM
+ ADD pc, pc, $v, LSL #2
+ NOP
+ ENDIF
+ MEND
+
+ ;// Add a case to the switch statement
+ MACRO
+ M_CASE $label
+ IF {CONFIG}=16
+ ;// Thumb
+ IF _SwLong
+ DCW ($label - _Switch$_SwNum)/2
+ ELSE
+ DCB ($label - _Switch$_SwNum)/2
+ ENDIF
+ ELSE
+ ;// ARM
+ B $label
+ ENDIF
+ MEND
+
+ ;// End of switch statement
+ MACRO
+ M_ENDSWITCH
+ ALIGN 2
+ MEND
+
+
+;////////////////////////////////////////////////////////
+;// Data area allocation
+;////////////////////////////////////////////////////////
+
+ ;// Constant table allocator macro
+ ;//
+ ;// Creates a new section for each constant table
+ ;// $name is symbol through which the table can be accessed.
+ ;// $align is the optional alignment of the table, log2 of
+ ;// the byte alignment - $align=4 is 16 byte aligned
+ MACRO
+ M_TABLE $name, $align
+ ASSERT :LNOT:_InFunc
+ IF "$align"=""
+ AREA |.constdata|, READONLY, DATA
+ ELSE
+ ;// AREAs inherit the alignment of the first declaration.
+ ;// Therefore for each alignment size we must have an area
+ ;// of a different name.
+ AREA constdata_a$align, READONLY, DATA, ALIGN=$align
+
+ ;// We also force alignment incase we are tagging onto
+ ;// an already started area.
+ ALIGN (1<<$align)
+ ENDIF
+$name
+ MEND
+
+;/////////////////////////////////////////////////////
+;// Macros to allocate space on the stack
+;//
+;// These all assume that the stack is 8-byte aligned
+;// at entry to the function, which means that the
+;// 32-byte alignment macro needs to work in a
+;// bit more of a special way...
+;/////////////////////////////////////////////////////
+
+
+
+
+ ;// Allocate 1-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC1 $name, $size
+ ASSERT :LNOT:_InFunc
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+ ;// Allocate 2-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC2 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:1)!=0
+_SBytes SETA _SBytes + (2 - (_SBytes:AND:1))
+ ENDIF
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+ ;// Allocate 4-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC4 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:3)!=0
+_SBytes SETA _SBytes + (4 - (_SBytes:AND:3))
+ ENDIF
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+ ;// Allocate 8-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC8 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:7)!=0
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+
+ ;// Allocate 8-byte aligned area of name
+ ;// $name size ($size+16) bytes.
+ ;// The extra 16 bytes are later used to align the pointer to 16 bytes
+
+ MACRO
+ M_ALLOC16 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:7)!=0
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+$name$_F$_16 EQU (_SBytes + 8)
+_SBytes SETA _SBytes + ($size) + 8
+ MEND
+
+ ;// Allocate 8-byte aligned area of name
+ ;// $name size ($size+32) bytes.
+ ;// The extra 32 bytes are later used to align the pointer to 32 bytes
+
+ MACRO
+ M_ALLOC32 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:7)!=0
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+$name$_F$_32 EQU (_SBytes + 24)
+_SBytes SETA _SBytes + ($size) + 24
+ MEND
+
+
+
+
+ ;// Argument Declaration Macro
+ ;//
+ ;// Allocate an argument name $name
+ ;// size $size bytes
+ MACRO
+ M_ARG $name, $size
+ ASSERT _InFunc
+$name$_F EQU _ABytes
+_ABytes SETA _ABytes + ($size)
+ MEND
+
+;///////////////////////////////////////////////
+;// Macros to access stacked variables
+;///////////////////////////////////////////////
+
+ ;// Macro to perform a data processing operation
+ ;// with a constant second operand
+ MACRO
+ _M_OPC $op,$rd,$rn,$const
+ LCLA _sh
+ LCLA _cst
+_sh SETA 0
+_cst SETA $const
+ IF _cst=0
+ $op $rd, $rn, #_cst
+ MEXIT
+ ENDIF
+ WHILE (_cst:AND:3)=0
+_cst SETA _cst>>2
+_sh SETA _sh+2
+ WEND
+ $op $rd, $rn, #(_cst:AND:0x000000FF)<<_sh
+ IF _cst>=256
+ $op $rd, $rd, #(_cst:AND:0xFFFFFF00)<<_sh
+ ENDIF
+ MEND
+
+ ;// Macro to perform a data access operation
+ ;// Such as LDR or STR
+ ;// The addressing mode is modified such that
+ ;// 1. If no address is given then the name is taken
+ ;// as a stack offset
+ ;// 2. If the addressing mode is not available for the
+ ;// state being assembled for (eg Thumb) then a suitable
+ ;// addressing mode is substituted.
+ ;//
+ ;// On Entry:
+ ;// $i = Instruction to perform (eg "LDRB")
+ ;// $a = Required byte alignment
+ ;// $r = Register(s) to transfer (eg "r1")
+ ;// $a0,$a1,$a2. Addressing mode and condition. One of:
+ ;// label {,cc}
+ ;// [base] {,,,cc}
+ ;// [base, offset]{!} {,,cc}
+ ;// [base, offset, shift]{!} {,cc}
+ ;// [base], offset {,,cc}
+ ;// [base], offset, shift {,cc}
+ MACRO
+ _M_DATA $i,$a,$r,$a0,$a1,$a2,$a3
+ IF "$a0":LEFT:1="["
+ IF "$a1"=""
+ $i$a3 $r, $a0
+ ELSE
+ IF "$a0":RIGHT:1="]"
+ IF "$a2"=""
+ _M_POSTIND $i$a3, "$r", $a0, $a1
+ ELSE
+ _M_POSTIND $i$a3, "$r", $a0, "$a1,$a2"
+ ENDIF
+ ELSE
+ IF "$a2"=""
+ _M_PREIND $i$a3, "$r", $a0, $a1
+ ELSE
+ _M_PREIND $i$a3, "$r", $a0, "$a1,$a2"
+ ENDIF
+ ENDIF
+ ENDIF
+ ELSE
+ LCLA _Offset
+_Offset SETA _Workspace + $a0$_F
+ ASSERT (_Offset:AND:($a-1))=0
+ $i$a1 $r, [sp, #_Offset]
+ ENDIF
+ MEND
+
+ ;// Handle post indexed load/stores
+ ;// op reg, [base], offset
+ MACRO
+ _M_POSTIND $i,$r,$a0,$a1
+ LCLS _base
+ LCLS _offset
+ IF {CONFIG}=16 ;// Thumb
+_base SETS ("$a0":LEFT:(:LEN:"$a0"-1)):RIGHT:(:LEN:"$a0"-2) ;// remove []
+_offset SETS "$a1"
+ IF _offset:LEFT:1="+"
+_offset SETS _offset:RIGHT:(:LEN:_offset-1)
+ ENDIF
+ $i $r, $a0
+ IF _offset:LEFT:1="-"
+_offset SETS _offset:RIGHT:(:LEN:_offset-1)
+ SUB $_base, $_base, $_offset
+ ELSE
+ ADD $_base, $_base, $_offset
+ ENDIF
+ ELSE ;// ARM
+ $i $r, $a0, $a1
+ ENDIF
+ MEND
+
+ ;// Handle pre indexed load/store
+ ;// op reg, [base, offset]{!}
+ MACRO
+ _M_PREIND $i,$r,$a0,$a1
+ LCLS _base
+ LCLS _offset
+ IF ({CONFIG}=16):LAND:(("$a1":RIGHT:2)="]!")
+_base SETS "$a0":RIGHT:(:LEN:("$a0")-1)
+_offset SETS "$a1":LEFT:(:LEN:("$a1")-2)
+ $i $r, [$_base, $_offset]
+ ADD $_base, $_base, $_offset
+ ELSE
+ $i $r, $a0, $a1
+ ENDIF
+ MEND
+
+ ;// Load unsigned byte from stack
+ MACRO
+ M_LDRB $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRB",1,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load signed byte from stack
+ MACRO
+ M_LDRSB $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRSB",1,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store byte to stack
+ MACRO
+ M_STRB $r,$a0,$a1,$a2,$a3
+ _M_DATA "STRB",1,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load unsigned half word from stack
+ MACRO
+ M_LDRH $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRH",2,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load signed half word from stack
+ MACRO
+ M_LDRSH $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRSH",2,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store half word to stack
+ MACRO
+ M_STRH $r,$a0,$a1,$a2,$a3
+ _M_DATA "STRH",2,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load word from stack
+ MACRO
+ M_LDR $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDR",4,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store word to stack
+ MACRO
+ M_STR $r,$a0,$a1,$a2,$a3
+ _M_DATA "STR",4,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load double word from stack
+ MACRO
+ M_LDRD $r0,$r1,$a0,$a1,$a2,$a3
+ _M_DATA "LDRD",8,"$r0,$r1",$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store double word to stack
+ MACRO
+ M_STRD $r0,$r1,$a0,$a1,$a2,$a3
+ _M_DATA "STRD",8,"$r0,$r1",$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Get absolute address of stack allocated location
+ MACRO
+ M_ADR $a, $b, $cc
+ _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F)
+ MEND
+
+ ;// Get absolute address of stack allocated location and align the address to 16 bytes
+ MACRO
+ M_ADR16 $a, $b, $cc
+ _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_16)
+
+ ;// Now align $a to 16 bytes
+ BIC$cc $a,$a,#0x0F
+ MEND
+
+ ;// Get absolute address of stack allocated location and align the address to 32 bytes
+ MACRO
+ M_ADR32 $a, $b, $cc
+ _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_32)
+
+ ;// Now align $a to 32 bytes
+ BIC$cc $a,$a,#0x1F
+ MEND
+
+;//////////////////////////////////////////////////////////
+;// Function header and footer macros
+;//////////////////////////////////////////////////////////
+
+ ;// Function Header Macro
+ ;// Generates the function prologue
+ ;// Note that functions should all be "stack-moves-once"
+ ;// The FNSTART and FNEND macros should be the only places
+ ;// where the stack moves.
+ ;//
+ ;// $name = function name
+ ;// $rreg = "" don't stack any registers
+ ;// "lr" stack "lr" only
+ ;// "rN" stack registers "r4-rN,lr"
+ ;// $dreg = "" don't stack any D registers
+ ;// "dN" stack registers "d8-dN"
+ ;//
+ ;// Note: ARM Archicture procedure call standard AAPCS
+ ;// states that r4-r11, sp, d8-d15 must be preserved by
+ ;// a compliant function.
+ MACRO
+ M_START $name, $rreg, $dreg
+ ASSERT :LNOT:_InFunc
+ ASSERT "$name"!=""
+_InFunc SETL {TRUE}
+_RBytes SETA 0
+_Workspace SETA 0
+
+ ;// Create an area for the function
+ AREA |.text|, CODE
+ EXPORT $name
+$name FUNCTION
+
+ ;// Save R registers
+ _M_GETRREGLIST $rreg
+ IF _RRegList<>""
+ STMFD sp!, {$_RRegList, lr}
+ ENDIF
+
+ ;// Save D registers
+ _M_GETDREGLIST $dreg
+ IF _DRegList<>""
+ VSTMFD sp!, {$_DRegList}
+ ENDIF
+
+
+ ;// Ensure size claimed on stack is 8-byte aligned
+ IF ((_SBytes:AND:7)!=0)
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+
+ IF (_SBytes!=0)
+ _M_OPC SUB, sp, sp, _SBytes
+ ENDIF
+
+
+_ABytes SETA _SBytes + _RBytes - _Workspace
+
+
+ ;// Print function name if debug enabled
+ M_PRINTF "$name\n",
+ MEND
+
+ ;// Work out a list of R saved registers
+ MACRO
+ _M_GETRREGLIST $rreg
+ IF "$rreg"=""
+_RRegList SETS ""
+ MEXIT
+ ENDIF
+ IF "$rreg"="lr":LOR:"$rreg"="r4"
+_RRegList SETS "r4"
+_RBytes SETA _RBytes+8
+ MEXIT
+ ENDIF
+ IF "$rreg"="r5":LOR:"$rreg"="r6"
+_RRegList SETS "r4-r6"
+_RBytes SETA _RBytes+16
+ MEXIT
+ ENDIF
+ IF "$rreg"="r7":LOR:"$rreg"="r8"
+_RRegList SETS "r4-r8"
+_RBytes SETA _RBytes+24
+ MEXIT
+ ENDIF
+ IF "$rreg"="r9":LOR:"$rreg"="r10"
+_RRegList SETS "r4-r10"
+_RBytes SETA _RBytes+32
+ MEXIT
+ ENDIF
+ IF "$rreg"="r11":LOR:"$rreg"="r12"
+_RRegList SETS "r4-r12"
+_RBytes SETA _RBytes+40
+ MEXIT
+ ENDIF
+ INFO 1, "Unrecognized saved r register limit '$rreg'"
+ MEND
+
+ ;// Work out a list of D saved registers
+ MACRO
+ _M_GETDREGLIST $dreg
+ IF "$dreg"=""
+_DRegList SETS ""
+ MEXIT
+ ENDIF
+ IF "$dreg"="d8"
+_DRegList SETS "d8"
+_RBytes SETA _RBytes+8
+ MEXIT
+ ENDIF
+ IF "$dreg"="d9"
+_DRegList SETS "d8-d9"
+_RBytes SETA _RBytes+16
+ MEXIT
+ ENDIF
+ IF "$dreg"="d10"
+_DRegList SETS "d8-d10"
+_RBytes SETA _RBytes+24
+ MEXIT
+ ENDIF
+ IF "$dreg"="d11"
+_DRegList SETS "d8-d11"
+_RBytes SETA _RBytes+32
+ MEXIT
+ ENDIF
+ IF "$dreg"="d12"
+_DRegList SETS "d8-d12"
+_RBytes SETA _RBytes+40
+ MEXIT
+ ENDIF
+ IF "$dreg"="d13"
+_DRegList SETS "d8-d13"
+_RBytes SETA _RBytes+48
+ MEXIT
+ ENDIF
+ IF "$dreg"="d14"
+_DRegList SETS "d8-d14"
+_RBytes SETA _RBytes+56
+ MEXIT
+ ENDIF
+ IF "$dreg"="d15"
+_DRegList SETS "d8-d15"
+_RBytes SETA _RBytes+64
+ MEXIT
+ ENDIF
+ INFO 1, "Unrecognized saved d register limit '$dreg'"
+ MEND
+
+ ;// Produce function return instructions
+ MACRO
+ _M_RET $cc
+ IF _DRegList<>""
+ VPOP$cc {$_DRegList}
+ ENDIF
+ IF _RRegList=""
+ BX$cc lr
+ ELSE
+ LDM$cc.FD sp!, {$_RRegList, pc}
+ ENDIF
+ MEND
+
+ ;// Early Function Exit Macro
+ ;// $cc = condition to exit with
+ ;// (Example: M_EXIT EQ)
+ MACRO
+ M_EXIT $cc
+ ASSERT _InFunc
+ IF _SBytes!=0
+ ;// Restore stack frame and exit
+ B$cc _End$_F
+ ELSE
+ ;// Can return directly
+ _M_RET $cc
+ ENDIF
+ MEND
+
+ ;// Function Footer Macro
+ ;// Generates the function epilogue
+ MACRO
+ M_END
+ ASSERT _InFunc
+_InFunc SETL {FALSE}
+_End$_F
+
+ ;// Restore the stack pointer to its original value on function entry
+ IF _SBytes!=0
+ _M_OPC ADD, sp, sp, _SBytes
+ ENDIF
+ _M_RET
+ ENDFUNC
+
+ ;// Reset the global stack tracking variables back to their
+ ;// initial values, and increment the function count
+_SBytes SETA 0
+_F SETA _F+1
+ MEND
+
+
+;//==========================================================================
+;// Debug Macros
+;//==========================================================================
+
+ GBLL DEBUG_ON
+DEBUG_ON SETL {FALSE}
+ GBLL DEBUG_STALLS_ON
+DEBUG_STALLS_ON SETL {FALSE}
+
+ ;//==========================================================================
+ ;// Debug call to printf
+ ;// M_PRINTF $format, $val0, $val1, $val2
+ ;//
+ ;// Examples:
+ ;// M_PRINTF "x=%08x\n", r0
+ ;//
+ ;// This macro preserves the value of all registers including the
+ ;// flags.
+ ;//==========================================================================
+
+ MACRO
+ M_PRINTF $format, $val0, $val1, $val2
+ IF DEBUG_ON
+
+ IMPORT printf
+ LCLA nArgs
+nArgs SETA 0
+
+ ;// save registers so we don't corrupt them
+ STMFD sp!, {r0-r12, lr}
+
+ ;// Drop stack to give us some workspace
+ SUB sp, sp, #16
+
+ ;// Save registers we need to print to the stack
+ IF "$val2" <> ""
+ ASSERT "$val1" <> ""
+ STR $val2, [sp, #8]
+nArgs SETA nArgs+1
+ ENDIF
+ IF "$val1" <> ""
+ ASSERT "$val0" <> ""
+ STR $val1, [sp, #4]
+nArgs SETA nArgs+1
+ ENDIF
+ IF "$val0"<>""
+ STR $val0, [sp]
+nArgs SETA nArgs+1
+ ENDIF
+
+ ;// Now we are safe to corrupt registers
+ ADR r0, %FT00
+ IF nArgs=1
+ LDR r1, [sp]
+ ENDIF
+ IF nArgs=2
+ LDMIA sp, {r1,r2}
+ ENDIF
+ IF nArgs=3
+ LDMIA sp, {r1,r2,r3}
+ ENDIF
+
+ ;// print the values
+ MRS r4, cpsr ;// preserve flags
+ BL printf
+ MSR cpsr_f, r4 ;// restore flags
+ B %FT01
+00 ;// string to print
+ DCB "$format", 0
+ ALIGN
+01 ;// Finished
+ ADD sp, sp, #16
+ ;// Restore registers
+ LDMFD sp!, {r0-r12,lr}
+
+ ENDIF ;// DEBUG_ON
+ MEND
+
+
+ ;// Stall Simulation Macro
+ ;// Inserts a given number of NOPs for the currently
+ ;// defined platform
+ MACRO
+ M_STALL $plat1stall, $plat2stall, $plat3stall, $plat4stall, $plat5stall, $plat6stall
+ IF DEBUG_STALLS_ON
+ _M_STALL_SUB $plat1stall
+ _M_STALL_SUB $plat2stall
+ _M_STALL_SUB $plat3stall
+ _M_STALL_SUB $plat4stall
+ _M_STALL_SUB $plat5stall
+ _M_STALL_SUB $plat6stall
+ ENDIF
+ MEND
+
+ MACRO
+ _M_STALL_SUB $platstall
+ IF "$platstall"!=""
+ LCLA _pllen
+ LCLS _pl
+ LCLL _pllog
+_pllen SETA :LEN:"$platstall"
+_pl SETS "$platstall":LEFT:(_pllen - 2)
+ IF :DEF:$_pl
+ IF $_pl
+ LCLS _st
+ LCLA _stnum
+_st SETS "$platstall":RIGHT:1
+_stnum SETA $_st
+ WHILE _stnum>0
+ MOV sp, sp
+_stnum SETA _stnum - 1
+ WEND
+ ENDIF
+ ENDIF
+ ENDIF
+ MEND
+
+
+
+;//==========================================================================
+;// Endian Invarience Macros
+;//
+;// The idea behind these macros is that if an array is
+;// loaded as words then the SMUL00 macro will multiply
+;// array elements 0 regardless of the endianess of the
+;// system. For little endian SMUL00=SMULBB, for big
+;// endian SMUL00=SMULTT and similarly for other packed operations.
+;//
+;//==========================================================================
+
+ MACRO
+ LIBI4 $comli, $combi, $a, $b, $c, $d, $cc
+ IF {ENDIAN}="big"
+ $combi.$cc $a, $b, $c, $d
+ ELSE
+ $comli.$cc $a, $b, $c, $d
+ ENDIF
+ MEND
+
+ MACRO
+ LIBI3 $comli, $combi, $a, $b, $c, $cc
+ IF {ENDIAN}="big"
+ $combi.$cc $a, $b, $c
+ ELSE
+ $comli.$cc $a, $b, $c
+ ENDIF
+ MEND
+
+ ;// SMLAxy macros
+
+ MACRO
+ SMLA00 $a, $b, $c, $d, $cc
+ LIBI4 SMLABB, SMLATT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA01 $a, $b, $c, $d, $cc
+ LIBI4 SMLABT, SMLATB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA0B $a, $b, $c, $d, $cc
+ LIBI4 SMLABB, SMLATB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA0T $a, $b, $c, $d, $cc
+ LIBI4 SMLABT, SMLATT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA10 $a, $b, $c, $d, $cc
+ LIBI4 SMLATB, SMLABT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA11 $a, $b, $c, $d, $cc
+ LIBI4 SMLATT, SMLABB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA1B $a, $b, $c, $d, $cc
+ LIBI4 SMLATB, SMLABB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA1T $a, $b, $c, $d, $cc
+ LIBI4 SMLATT, SMLABT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAB0 $a, $b, $c, $d, $cc
+ LIBI4 SMLABB, SMLABT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAB1 $a, $b, $c, $d, $cc
+ LIBI4 SMLABT, SMLABB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAT0 $a, $b, $c, $d, $cc
+ LIBI4 SMLATB, SMLATT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAT1 $a, $b, $c, $d, $cc
+ LIBI4 SMLATT, SMLATB, $a, $b, $c, $d, $cc
+ MEND
+
+ ;// SMULxy macros
+
+ MACRO
+ SMUL00 $a, $b, $c, $cc
+ LIBI3 SMULBB, SMULTT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL01 $a, $b, $c, $cc
+ LIBI3 SMULBT, SMULTB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL0B $a, $b, $c, $cc
+ LIBI3 SMULBB, SMULTB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL0T $a, $b, $c, $cc
+ LIBI3 SMULBT, SMULTT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL10 $a, $b, $c, $cc
+ LIBI3 SMULTB, SMULBT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL11 $a, $b, $c, $cc
+ LIBI3 SMULTT, SMULBB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL1B $a, $b, $c, $cc
+ LIBI3 SMULTB, SMULBB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL1T $a, $b, $c, $cc
+ LIBI3 SMULTT, SMULBT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULB0 $a, $b, $c, $cc
+ LIBI3 SMULBB, SMULBT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULB1 $a, $b, $c, $cc
+ LIBI3 SMULBT, SMULBB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULT0 $a, $b, $c, $cc
+ LIBI3 SMULTB, SMULTT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULT1 $a, $b, $c, $cc
+ LIBI3 SMULTT, SMULTB, $a, $b, $c, $cc
+ MEND
+
+ ;// SMLAWx, SMULWx macros
+
+ MACRO
+ SMLAW0 $a, $b, $c, $d, $cc
+ LIBI4 SMLAWB, SMLAWT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAW1 $a, $b, $c, $d, $cc
+ LIBI4 SMLAWT, SMLAWB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMULW0 $a, $b, $c, $cc
+ LIBI3 SMULWB, SMULWT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULW1 $a, $b, $c, $cc
+ LIBI3 SMULWT, SMULWB, $a, $b, $c, $cc
+ MEND
+
+ ;// SMLALxy macros
+
+
+ MACRO
+ SMLAL00 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBB, SMLALTT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL01 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBT, SMLALTB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL0B $a, $b, $c, $d, $cc
+ LIBI4 SMLALBB, SMLALTB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL0T $a, $b, $c, $d, $cc
+ LIBI4 SMLALBT, SMLALTT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL10 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTB, SMLALBT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL11 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTT, SMLALBB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL1B $a, $b, $c, $d, $cc
+ LIBI4 SMLALTB, SMLALBB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL1T $a, $b, $c, $d, $cc
+ LIBI4 SMLALTT, SMLALBT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALB0 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBB, SMLALBT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALB1 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBT, SMLALBB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALT0 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTB, SMLALTT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALT1 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTT, SMLALTB, $a, $b, $c, $d, $cc
+ MEND
+
+ ENDIF ;// ARMCOMM_S_H
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h
new file mode 100755
index 0000000..7a68d14
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h
@@ -0,0 +1,274 @@
+/*
+ *
+ * File Name: armOMX_ReleaseVersion.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * This file allows a version of the OMX DL libraries to be built where some or
+ * all of the function names can be given a user specified suffix.
+ *
+ * You might want to use it where:
+ *
+ * - you want to rename a function "out of the way" so that you could replace
+ * a function with a different version (the original version would still be
+ * in the library just with a different name - so you could debug the new
+ * version by comparing it to the output of the old)
+ *
+ * - you want to rename all the functions to versions with a suffix so that
+ * you can include two versions of the library and choose between functions
+ * at runtime.
+ *
+ * e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8
+ *
+ */
+
+
+#ifndef _armOMX_H_
+#define _armOMX_H_
+
+
+/* We need to define these two macros in order to expand and concatenate the names */
+#define OMXCAT2BAR(A, B) omx ## A ## B
+#define OMXCATBAR(A, B) OMXCAT2BAR(A, B)
+
+/* Define the suffix to add to all functions - the default is no suffix */
+#define BARE_SUFFIX
+
+
+
+/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */
+#define OMXACAAC_SUFFIX BARE_SUFFIX
+#define OMXACMP3_SUFFIX BARE_SUFFIX
+#define OMXICJP_SUFFIX BARE_SUFFIX
+#define OMXIPBM_SUFFIX BARE_SUFFIX
+#define OMXIPCS_SUFFIX BARE_SUFFIX
+#define OMXIPPP_SUFFIX BARE_SUFFIX
+#define OMXSP_SUFFIX BARE_SUFFIX
+#define OMXVCCOMM_SUFFIX BARE_SUFFIX
+#define OMXVCM4P10_SUFFIX BARE_SUFFIX
+#define OMXVCM4P2_SUFFIX BARE_SUFFIX
+
+
+
+
+/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */
+#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX)
+#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX)
+#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX)
+
+
+#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX)
+
+#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+
+#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX)
+#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+
+#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX)
+
+#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX)
+
+#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX)
+#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX)
+#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX)
+
+#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX)
+
+#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX)
+
+#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX)
+
+
+#endif /* _armOMX_h_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h
new file mode 100755
index 0000000..8b295a6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h
@@ -0,0 +1,252 @@
+/**
+ * File: omxtypes.h
+ * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files.
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved.
+ *
+ * These materials are protected by copyright laws and contain material
+ * proprietary to the Khronos Group, Inc. You may use these materials
+ * for implementing Khronos specifications, without altering or removing
+ * any trademark, copyright or other notice from the specification.
+ *
+ * Khronos Group makes no, and expressly disclaims any, representations
+ * or warranties, express or implied, regarding these materials, including,
+ * without limitation, any implied warranties of merchantability or fitness
+ * for a particular purpose or non-infringement of any intellectual property.
+ * Khronos Group makes no, and expressly disclaims any, warranties, express
+ * or implied, regarding the correctness, accuracy, completeness, timeliness,
+ * and reliability of these materials.
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters,
+ * Contributors or Members or their respective partners, officers, directors,
+ * employees, agents or representatives be liable for any damages, whether
+ * direct, indirect, special or consequential damages for lost revenues,
+ * lost profits, or otherwise, arising from or in connection with these
+ * materials.
+ *
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc.
+ *
+ */
+
+#ifndef _OMXTYPES_H_
+#define _OMXTYPES_H_
+
+#include <limits.h>
+
+#define OMX_IN
+#define OMX_OUT
+#define OMX_INOUT
+
+
+typedef enum {
+
+ /* Mandatory return codes - use cases are explicitly described for each function */
+ OMX_Sts_NoErr = 0, /* No error, the function completed successfully */
+ OMX_Sts_Err = -2, /* Unknown/unspecified error */
+ OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */
+ OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */
+ OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */
+ OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */
+ OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */
+ OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */
+ OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */
+
+ /* Optional return codes - use cases are explicitly described for each function*/
+ OMX_Sts_BadArgErr = -5, /* Bad Arguments */
+
+ OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */
+ OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */
+ OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */
+ OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */
+ OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */
+ OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */
+
+ OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */
+ /* Huffman decoding operation terminated early. */
+ OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */
+ /* operation terminated early. */
+ OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */
+
+ OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */
+
+ OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/
+
+ } OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */
+
+
+/* OMX_U8 */
+#if UCHAR_MAX == 0xff
+typedef unsigned char OMX_U8;
+#elif USHRT_MAX == 0xff
+typedef unsigned short int OMX_U8;
+#else
+#error OMX_U8 undefined
+#endif
+
+
+/* OMX_S8 */
+#if SCHAR_MAX == 0x7f
+typedef signed char OMX_S8;
+#elif SHRT_MAX == 0x7f
+typedef signed short int OMX_S8;
+#else
+#error OMX_S8 undefined
+#endif
+
+
+/* OMX_U16 */
+#if USHRT_MAX == 0xffff
+typedef unsigned short int OMX_U16;
+#elif UINT_MAX == 0xffff
+typedef unsigned int OMX_U16;
+#else
+#error OMX_U16 undefined
+#endif
+
+
+/* OMX_S16 */
+#if SHRT_MAX == 0x7fff
+typedef signed short int OMX_S16;
+#elif INT_MAX == 0x7fff
+typedef signed int OMX_S16;
+#else
+#error OMX_S16 undefined
+#endif
+
+
+/* OMX_U32 */
+#if UINT_MAX == 0xffffffff
+typedef unsigned int OMX_U32;
+#elif LONG_MAX == 0xffffffff
+typedef unsigned long int OMX_U32;
+#else
+#error OMX_U32 undefined
+#endif
+
+
+/* OMX_S32 */
+#if INT_MAX == 0x7fffffff
+typedef signed int OMX_S32;
+#elif LONG_MAX == 0x7fffffff
+typedef long signed int OMX_S32;
+#else
+#error OMX_S32 undefined
+#endif
+
+
+/* OMX_U64 & OMX_S64 */
+#if defined( _WIN32 ) || defined ( _WIN64 )
+ typedef __int64 OMX_S64; /** Signed 64-bit integer */
+ typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */
+ #define OMX_MIN_S64 (0x8000000000000000i64)
+ #define OMX_MIN_U64 (0x0000000000000000i64)
+ #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64)
+ #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64)
+#else
+ typedef long long OMX_S64; /** Signed 64-bit integer */
+ typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */
+ #define OMX_MIN_S64 (0x8000000000000000LL)
+ #define OMX_MIN_U64 (0x0000000000000000LL)
+ #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL)
+ #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL)
+#endif
+
+
+/* OMX_SC8 */
+typedef struct
+{
+ OMX_S8 Re; /** Real part */
+ OMX_S8 Im; /** Imaginary part */
+
+} OMX_SC8; /** Signed 8-bit complex number */
+
+
+/* OMX_SC16 */
+typedef struct
+{
+ OMX_S16 Re; /** Real part */
+ OMX_S16 Im; /** Imaginary part */
+
+} OMX_SC16; /** Signed 16-bit complex number */
+
+
+/* OMX_SC32 */
+typedef struct
+{
+ OMX_S32 Re; /** Real part */
+ OMX_S32 Im; /** Imaginary part */
+
+} OMX_SC32; /** Signed 32-bit complex number */
+
+
+/* OMX_SC64 */
+typedef struct
+{
+ OMX_S64 Re; /** Real part */
+ OMX_S64 Im; /** Imaginary part */
+
+} OMX_SC64; /** Signed 64-bit complex number */
+
+
+/* OMX_F32 */
+typedef float OMX_F32; /** Single precision floating point,IEEE 754 */
+
+
+/* OMX_F64 */
+typedef double OMX_F64; /** Double precision floating point,IEEE 754 */
+
+
+/* OMX_INT */
+typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/
+
+
+#define OMX_MIN_S8 (-128)
+#define OMX_MIN_U8 0
+#define OMX_MIN_S16 (-32768)
+#define OMX_MIN_U16 0
+#define OMX_MIN_S32 (-2147483647-1)
+#define OMX_MIN_U32 0
+
+#define OMX_MAX_S8 (127)
+#define OMX_MAX_U8 (255)
+#define OMX_MAX_S16 (32767)
+#define OMX_MAX_U16 (0xFFFF)
+#define OMX_MAX_S32 (2147483647)
+#define OMX_MAX_U32 (0xFFFFFFFF)
+
+typedef void OMXVoid;
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+/** Defines the geometric position and size of a rectangle,
+ * where x,y defines the coordinates of the top left corner
+ * of the rectangle, with dimensions width in the x-direction
+ * and height in the y-direction */
+typedef struct {
+ OMX_INT x; /** x-coordinate of top left corner of rectangle */
+ OMX_INT y; /** y-coordinate of top left corner of rectangle */
+ OMX_INT width; /** Width in the x-direction. */
+ OMX_INT height; /** Height in the y-direction. */
+}OMXRect;
+
+
+/** Defines the geometric position of a point, */
+typedef struct
+{
+ OMX_INT x; /** x-coordinate */
+ OMX_INT y; /** y-coordinate */
+
+} OMXPoint;
+
+
+/** Defines the dimensions of a rectangle, or region of interest in an image */
+typedef struct
+{
+ OMX_INT width; /** Width of the rectangle, in the x-direction */
+ OMX_INT height; /** Height of the rectangle, in the y-direction */
+
+} OMXSize;
+
+#endif /* _OMXTYPES_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h
new file mode 100755
index 0000000..48703d1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h
@@ -0,0 +1,77 @@
+;//
+;//
+;// File Name: omxtypes_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Mandatory return codes - use cases are explicitly described for each function
+OMX_Sts_NoErr EQU 0 ;// No error the function completed successfully
+OMX_Sts_Err EQU -2 ;// Unknown/unspecified error
+OMX_Sts_InvalidBitstreamValErr EQU -182 ;// Invalid value detected during bitstream processing
+OMX_Sts_MemAllocErr EQU -9 ;// Not enough memory allocated for the operation
+OMX_StsACAAC_GainCtrErr EQU -159 ;// AAC: Unsupported gain control data detected
+OMX_StsACAAC_PrgNumErr EQU -167 ;// AAC: Invalid number of elements for one program
+OMX_StsACAAC_CoefValErr EQU -163 ;// AAC: Invalid quantized coefficient value
+OMX_StsACAAC_MaxSfbErr EQU -162 ;// AAC: Invalid maxSfb value in relation to numSwb
+OMX_StsACAAC_PlsDataErr EQU -160 ;// AAC: pulse escape sequence data error
+
+;// Optional return codes - use cases are explicitly described for each function
+OMX_Sts_BadArgErr EQU -5 ;// Bad Arguments
+
+OMX_StsACAAC_TnsNumFiltErr EQU -157 ;// AAC: Invalid number of TNS filters
+OMX_StsACAAC_TnsLenErr EQU -156 ;// AAC: Invalid TNS region length
+OMX_StsACAAC_TnsOrderErr EQU -155 ;// AAC: Invalid order of TNS filter
+OMX_StsACAAC_TnsCoefResErr EQU -154 ;// AAC: Invalid bit-resolution for TNS filter coefficients
+OMX_StsACAAC_TnsCoefErr EQU -153 ;// AAC: Invalid TNS filter coefficients
+OMX_StsACAAC_TnsDirectErr EQU -152 ;// AAC: Invalid TNS filter direction
+
+OMX_StsICJP_JPEGMarkerErr EQU -183 ;// JPEG marker encountered within an entropy-coded block;
+ ;// Huffman decoding operation terminated early.
+OMX_StsICJP_JPEGMarker EQU -181 ;// JPEG marker encountered; Huffman decoding
+ ;// operation terminated early.
+OMX_StsIPPP_ContextMatchErr EQU -17 ;// Context parameter doesn't match to the operation
+
+OMX_StsSP_EvenMedianMaskSizeErr EQU -180 ;// Even size of the Median Filter mask was replaced by the odd one
+
+OMX_Sts_MaximumEnumeration EQU 0x7FFFFFFF
+
+
+
+OMX_MIN_S8 EQU (-128)
+OMX_MIN_U8 EQU 0
+OMX_MIN_S16 EQU (-32768)
+OMX_MIN_U16 EQU 0
+
+
+OMX_MIN_S32 EQU (-2147483647-1)
+OMX_MIN_U32 EQU 0
+
+OMX_MAX_S8 EQU (127)
+OMX_MAX_U8 EQU (255)
+OMX_MAX_S16 EQU (32767)
+OMX_MAX_U16 EQU (0xFFFF)
+OMX_MAX_S32 EQU (2147483647)
+OMX_MAX_U32 EQU (0xFFFFFFFF)
+
+OMX_VC_UPPER EQU 0x1 ;// Used by the PredictIntra functions
+OMX_VC_LEFT EQU 0x2 ;// Used by the PredictIntra functions
+OMX_VC_UPPER_RIGHT EQU 0x40 ;// Used by the PredictIntra functions
+
+NULL EQU 0
+
+;// Structures
+
+ INCLUDE armCOMM_s.h
+
+ M_STRUCT OMXPoint
+ M_FIELD x, 4
+ M_FIELD y, 4
+ M_ENDSTRUCT
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/build_vc.pl
new file mode 100755
index 0000000..649e74c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/build_vc.pl
@@ -0,0 +1,113 @@
+#!/usr/bin/perl
+#
+#
+# File Name: build_vc.pl
+# OpenMAX DL: v1.0.2
+# Revision: 12290
+# Date: Wednesday, April 9, 2008
+#
+# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+#
+#
+#
+# This file builds the OpenMAX DL vc domain library omxVC.o.
+#
+
+use File::Spec;
+use strict;
+
+my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE);
+
+$CC = 'armcc';
+$CC_OPTS = '--no_unaligned_access --cpu Cortex-A8 -c';
+$AS = 'armasm';
+$AS_OPTS = '--no_unaligned_access --cpu Cortex-A8';
+# $LIB = 'armlink';
+# $LIB_OPTS = '--partial -o';
+# $LIB_TYPE = '.o';
+$LIB = 'armar';
+$LIB_OPTS = '--create -r';
+$LIB_TYPE = '.a';
+
+#------------------------
+
+my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h);
+
+# Define the list of directories containing included header files.
+@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api);
+
+# Define the list of source files to compile.
+open(FILES, '<filelist_vc.txt') or die("Can't open source file list\n");
+@filelist = <FILES>;
+close(FILES);
+
+# Fix the file separators in the header paths
+foreach $h (@headerlist)
+{
+ $h = File::Spec->canonpath($h);
+}
+
+# Create the include path to be passed to the compiler
+$hd = '-I' . join(' -I', @headerlist);
+
+# Create the build directories "/lib/" and "/obj/" (if they are not there already)
+mkdir "obj", 0777 if (! -d "obj");
+mkdir "lib", 0777 if (! -d "lib");
+
+$objlist = '';
+
+# Compile each file
+foreach $file (@filelist)
+{
+ my $f;
+ my $base;
+ my $ext;
+ my $objfile;
+
+ chomp($file);
+ $file = File::Spec->canonpath($file);
+
+ (undef, undef, $f) = File::Spec->splitpath($file);
+ $f=~s/[\n\f\r]//g; # Remove any end-of-line characters
+
+ if(($base, $ext) = $f =~ /(.+)\.(\w)$/)
+ {
+ $objfile = File::Spec->catfile('obj', $base.'.o');
+
+ if($ext eq 'c')
+ {
+ $objlist .= "$objfile ";
+ $command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+ print "$command\n";
+ system($command);
+ }
+ elsif($ext eq 's')
+ {
+ $objlist .= "$objfile ";
+ $command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+ print "$command\n";
+ system($command);
+ }
+ else
+ {
+ print "Ignoring file: $f\n";
+ }
+ }
+ else
+ {
+ die "No file extension found: $f\n";
+ }
+}
+
+# Do the final link stage to create the libraries.
+$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE);
+$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist;
+print "$command\n";
+(system($command) == 0) and print "Build successful\n";
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/filelist_vc.txt
new file mode 100755
index 0000000..8db8eeb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/filelist_vc.txt
@@ -0,0 +1,75 @@
+./api/armCOMM.h
+./api/armCOMM_BitDec_s.h
+./api/armCOMM_Bitstream.h
+./api/armCOMM_IDCT_s.h
+./api/armCOMM_IDCTTable.h
+./api/armCOMM_MaskTable.h
+./api/armCOMM_s.h
+./api/armCOMM_Version.h
+./api/armOMX_ReleaseVersion.h
+./api/omxtypes.h
+./api/omxtypes_s.h
+./src/armCOMM_IDCTTable.c
+./src/armCOMM_MaskTable.c
+./vc/api/armVC.h
+./vc/api/armVCCOMM_s.h
+./vc/api/omxVC.h
+./vc/api/omxVC_s.h
+./vc/comm/src/omxVCCOMM_Copy16x16_s.s
+./vc/comm/src/omxVCCOMM_Copy8x8_s.s
+./vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
+./vc/m4p10/api/armVCM4P10_CAVLCTables.h
+./vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_CAVLCTables.c
+./vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
+./vc/m4p10/src/armVCM4P10_DequantTables_s.s
+./vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_QuantTables_s.s
+./vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+./vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
+./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
+./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+./vc/m4p2/src/armVCM4P2_Clip8_s.s
+./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+./vc/m4p2/src/armVCM4P2_Lookup_Tables.c
+./vc/m4p2/src/armVCM4P2_SetPredDir_s.s
+./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+./vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
+./vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
+./vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
+./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
+./vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
+./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
+./vc/src/armVC_Version.c \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM.c
new file mode 100755
index 0000000..e572a89
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM.c
@@ -0,0 +1,936 @@
+/**
+ *
+ * File Name: armCOMM.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Defines Common APIs used across OpenMAX API's
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+
+/***********************************************************************/
+ /* Miscellaneous Arithmetic operations */
+
+/**
+ * Function: armRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S16)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S16)(Value - .5);
+ }
+}
+
+/**
+ * Function: armRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S32)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S32)(Value - .5);
+ }
+}
+/**
+ * Function: armSatRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ Value += 0.5;
+
+ if(Value > (OMX_S16)OMX_MAX_S16 )
+ {
+ return (OMX_S16)OMX_MAX_S16;
+ }
+ else
+ {
+ return (OMX_S16)Value;
+ }
+ }
+ else
+ {
+ Value -= 0.5;
+
+ if(Value < (OMX_S16)OMX_MIN_S16 )
+ {
+ return (OMX_S16)OMX_MIN_S16;
+ }
+ else
+ {
+ return (OMX_S16)Value;
+ }
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ Value += 0.5;
+
+ if(Value > (OMX_S32)OMX_MAX_S32 )
+ {
+ return (OMX_S32)OMX_MAX_S32;
+ }
+ else
+ {
+ return (OMX_S32)Value;
+ }
+ }
+ else
+ {
+ Value -= 0.5;
+
+ if(Value < (OMX_S32)OMX_MIN_S32 )
+ {
+ return (OMX_S32)OMX_MIN_S32;
+ }
+ else
+ {
+ return (OMX_S32)Value;
+ }
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToU16
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value)
+{
+ Value += 0.5;
+
+ if(Value > (OMX_U16)OMX_MAX_U16 )
+ {
+ return (OMX_U16)OMX_MAX_U16;
+ }
+ else
+ {
+ return (OMX_U16)Value;
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U32 format
+ *
+ */
+
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value)
+{
+ Value += 0.5;
+
+ if(Value > (OMX_U32)OMX_MAX_U32 )
+ {
+ return (OMX_U32)OMX_MAX_U32;
+ }
+ else
+ {
+ return (OMX_U32)Value;
+ }
+}
+
+/**
+ * Function: armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a 64 bit int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S64 format
+ *
+ */
+
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S64)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S64)(Value - .5);
+ }
+}
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] var Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT -- returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ */
+
+OMX_INT armSignCheck (
+ OMX_S16 var
+)
+
+{
+ OMX_INT Sign;
+
+ if (var < 0)
+ {
+ Sign = -1;
+ }
+ else if ( var > 0)
+ {
+ Sign = 1;
+ }
+ else
+ {
+ Sign = 0;
+ }
+
+ return Sign;
+}
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 -- returns clipped value
+ */
+
+OMX_S32 armClip (
+ OMX_INT min,
+ OMX_INT max,
+ OMX_S32 src
+)
+
+{
+ if (src > max)
+ {
+ src = max;
+ }
+ else if (src < min)
+ {
+ src = min;
+ }
+
+ return src;
+}
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 -- returns clipped value
+ */
+
+OMX_F32 armClip_F32 (
+ OMX_F32 min,
+ OMX_F32 max,
+ OMX_F32 src
+)
+
+{
+ if (src > max)
+ {
+ src = max;
+ }
+ else if (src < min)
+ {
+ src = min;
+ }
+
+ return src;
+}
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding
+ * integer value. Takes care of rounding while clipping the final
+ * value.
+ *
+ * Parameters:
+ * [in] v Number to be operated upon
+ * [in] shift Divides the input "v" by "2^shift"
+ * [in] satBits Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 -- returns "shifted" saturated value
+ */
+
+OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits)
+{
+ OMX_U32 allOnes = (OMX_U32)(-1);
+ OMX_U32 maxV = allOnes >> (32-satBits);
+ OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift);
+ OMX_U32 vInt;
+ OMX_U32 vIntSat;
+
+ if(v <= 0)
+ return 0;
+
+ vShifted = v / shiftDiv;
+ vRounded = (OMX_F32)(vShifted + 0.5);
+ vInt = (OMX_U32)vRounded;
+ vIntSat = vInt;
+ if(vIntSat > maxV)
+ vIntSat = maxV;
+ return vIntSat;
+}
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * These function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(
+ OMX_U8 *pBuf1,
+ OMX_U8 *pBuf2,
+ OMX_INT elemSize
+ )
+{
+ OMX_INT i;
+ OMX_U8 temp;
+ armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr);
+
+ for(i = 0; i < elemSize; i++)
+ {
+ temp = *(pBuf1 + i);
+ *(pBuf1 + i) = *(pBuf2 + i);
+ *(pBuf2 + i) = temp;
+ }
+ return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry First entry
+ * [in] sEntry second entry
+ * [in] tEntry Third entry
+ *
+ * Return Value:
+ * OMX_S32 -- returns the median value
+ */
+
+OMX_S32 armMedianOf3 (
+ OMX_S32 fEntry,
+ OMX_S32 sEntry,
+ OMX_S32 tEntry
+)
+{
+ OMX_S32 a, b, c;
+
+ a = armMin (fEntry, sEntry);
+ b = armMax (fEntry, sEntry);
+ c = armMin (b, tEntry);
+ return (armMax (a, c));
+}
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value Positive value
+ *
+ * Return Value:
+ * OMX_U8 -- Returns the minimum number of bits required to represent the positive value.
+ This is the smallest k>=0 such that that value is less than (1<<k).
+ */
+
+OMX_U8 armLogSize (
+ OMX_U16 value
+)
+{
+ OMX_U8 i;
+ for ( i = 0; value > 0; value = value >> 1)
+ {
+ i++;
+ }
+ return i;
+}
+
+/***********************************************************************/
+ /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+ OMX_S32 Result;
+
+ Result = Value1 + Value2;
+
+ if( (Value1^Value2) >= 0)
+ {
+ /*Same sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ return OMX_MAX_S32;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S32;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2)
+{
+ OMX_S64 Result;
+
+ Result = Value1 + Value2;
+
+ if( (Value1^Value2) >= 0)
+ {
+ /*Same sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ Result = OMX_MAX_S64;
+ return Result;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S64;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/** Function :armSatSub_S32()
+ *
+ * Description :
+ * Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+ OMX_S32 Result;
+
+ Result = Value1 - Value2;
+
+ if( (Value1^Value2) < 0)
+ {
+ /*Opposite sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ return OMX_MAX_S32;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S32;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ * accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ * [in] Mac Accumulator
+ *
+ * Return:
+ * [out] Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2)
+{
+ OMX_S32 Result;
+
+ Result = (OMX_S32)(Value1*Value2);
+ Result = armSatAdd_S32( Mac , Result );
+
+ return Result;
+}
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ * mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem First 32 bit Operand
+ * [in] filTap Second 16 bit Operand
+ * [in] mac Result of MAC operation
+ *
+ * Return:
+ * [out] mac Result of operation
+ *
+ **/
+
+OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap )
+{
+
+ OMX_S32 result;
+
+ result = armSatMulS16S32_S32(filTap,delayElem);
+
+ if ( result > OMX_MAX_S16 )
+ {
+ result = OMX_MAX_S32;
+ }
+ else if( result < OMX_MIN_S16 )
+ {
+ result = OMX_MIN_S32;
+ }
+ else
+ {
+ result = delayElem * filTap;
+ }
+
+ mac = armSatAdd_S32(mac,result);
+
+ return mac;
+}
+
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ * Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ * output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) )
+ *
+ * Parametrs:
+ * [in] input The input to be operated on
+ * [in] shift The shift number
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift)
+{
+ input = armSatRoundLeftShift_S32(input,-shift);
+
+ if ( input > OMX_MAX_S16 )
+ {
+ return (OMX_S16)OMX_MAX_S16;
+ }
+ else if (input < OMX_MIN_S16)
+ {
+ return (OMX_S16)OMX_MIN_S16;
+ }
+ else
+ {
+ return (OMX_S16)input;
+ }
+
+}
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] Shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift)
+{
+ OMX_INT i;
+
+ if (Shift < 0)
+ {
+ Shift = -Shift;
+ Value = armSatAdd_S32(Value, (1 << (Shift - 1)));
+ Value = Value >> Shift;
+ }
+ else
+ {
+ for (i = 0; i < Shift; i++)
+ {
+ Value = armSatAdd_S32(Value, Value);
+ }
+ }
+ return Value;
+}
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift)
+{
+ OMX_INT i;
+
+ if (Shift < 0)
+ {
+ Shift = -Shift;
+ Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1)));
+ Value = Value >> Shift;
+ }
+ else
+ {
+ for (i = 0; i < Shift; i++)
+ {
+ Value = armSatAdd_S64(Value, Value);
+ }
+ }
+ return Value;
+}
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ * Returns the result of a S16 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2)
+{
+ OMX_S16 hi2,lo1;
+ OMX_U16 lo2;
+
+ OMX_S32 temp1,temp2;
+ OMX_S32 result;
+
+ lo1 = input1;
+
+ hi2 = ( input2 >> 16 );
+ lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 );
+
+ temp1 = hi2 * lo1;
+ temp2 = ( lo2* lo1 ) >> 16;
+
+ result = armSatAdd_S32(temp1,temp2);
+
+ return result;
+}
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ * Returns the result of a S32 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2)
+{
+ OMX_S16 hi1,hi2;
+ OMX_U16 lo1,lo2;
+
+ OMX_S32 temp1,temp2,temp3;
+ OMX_S32 result;
+
+ hi1 = ( input1 >> 16 );
+ lo1 = ( (OMX_U32)( input1 << 16 ) >> 16 );
+
+ hi2 = ( input2 >> 16 );
+ lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 );
+
+ temp1 = hi1 * hi2;
+ temp2 = ( hi1* lo2 ) >> 16;
+ temp3 = ( hi2* lo1 ) >> 16;
+
+ result = armSatAdd_S32(temp1,temp2);
+ result = armSatAdd_S32(result,temp3);
+
+ return result;
+}
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer.
+ * Half-integer values are rounded away from zero
+ * unless otherwise specified. For example 3//2 is rounded
+ * to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num Operand 1
+ * [in] Deno Operand 2
+ *
+ * Return:
+ * [out] Result of operation input1//input2
+ *
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno)
+{
+ OMX_F64 result;
+
+ result = ((OMX_F64)Num)/((OMX_F64)Deno);
+
+ if (result >= 0)
+ {
+ result += 0.5;
+ }
+ else
+ {
+ result -= 0.5;
+ }
+
+ return (OMX_S32)(result);
+}
+
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_Bitstream.c
new file mode 100755
index 0000000..9ef9319
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_Bitstream.c
@@ -0,0 +1,329 @@
+/**
+ *
+ * File Name: armCOMM_Bitstream.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Defines bitstream encode and decode functions common to all codecs
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+
+/***************************************
+ * Fixed bit length Decode
+ ***************************************/
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] N=1...32
+ *
+ * Returns Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ armAssert(Offset>=0 && Offset<=7);
+ armAssert(N>=1 && N<=32);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Return N bits */
+ return Value >> (32-N);
+}
+
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N=1..32
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ * Returns Value
+ */
+
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ if(N == 0)
+ {
+ return 0;
+ }
+
+ armAssert(Offset>=0 && Offset<=7);
+ armAssert(N>=1 && N<=32);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Advance bitstream pointer by N bits */
+ Offset += N;
+ *ppBitStream = pBitStream + (Offset>>3);
+ *pOffset = Offset & 7;
+
+ /* Return N bits */
+ return Value >> (32-N);
+}
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset)
+{
+ if(*pOffset > 0)
+ {
+ *ppBitStream += 1;
+ *pOffset = 0;
+ }
+}
+
+/**
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N)
+{
+ OMX_INT Offset = *pOffset;
+ const OMX_U8 *pBitStream = *ppBitStream;
+
+ /* Advance bitstream pointer by N bits */
+ Offset += N;
+ *ppBitStream = pBitStream + (Offset>>3);
+ *pOffset = Offset & 7;
+}
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ *
+ * Parameters:
+ * [in] *pBitStream
+ * [in] *pOffset
+ * [in] pCodeBook
+ *
+ * [out] *pBitStream
+ * [out] *pOffset
+ *
+ * Returns : Code Book Index if successfull.
+ * : ARM_NO_CODEBOOK_INDEX = -1 if search fails.
+ **/
+#ifndef C_OPTIMIZED_IMPLEMENTATION
+
+OMX_U16 armUnPackVLC32(
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ const ARM_VLC32 *pCodeBook
+)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+ OMX_INT Index;
+
+ armAssert(Offset>=0 && Offset<=7);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Search through the codebook */
+ for (Index=0; pCodeBook->codeLen != 0; Index++)
+ {
+ if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen)))
+ {
+ Offset = Offset + pCodeBook->codeLen;
+ *ppBitStream = pBitStream + (Offset >> 3) ;
+ *pOffset = Offset & 7;
+
+ return Index;
+ }
+ pCodeBook++;
+ }
+
+ /* No code match found */
+ return ARM_NO_CODEBOOK_INDEX;
+}
+
+#endif
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] codeWord Code word that need to be inserted in to the
+ * bitstream
+ * [in] codeLength Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ OMX_U32 codeWord,
+ OMX_INT codeLength
+)
+{
+ OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ /* checking argument validity */
+ armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr);
+ armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr);
+
+ /* Prepare the first byte */
+ codeWord = codeWord << (32-codeLength);
+ Value = (pBitStream[0] >> (8-Offset)) << (8-Offset);
+ Value = Value | (codeWord >> (24+Offset));
+
+ /* Write out whole bytes */
+ while (8-Offset <= codeLength)
+ {
+ *pBitStream++ = (OMX_U8)Value;
+ codeWord = codeWord << (8-Offset);
+ codeLength = codeLength - (8-Offset);
+ Offset = 0;
+ Value = codeWord >> 24;
+ }
+
+ /* Write out final partial byte */
+ *pBitStream = (OMX_U8)Value;
+ *ppBitStream = pBitStream;
+ *pOffset = Offset + codeLength;
+
+ return OMX_Sts_NoErr;
+}
+
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pBitOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] code VLC code word that need to be inserted in to the
+ * bitstream
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackVLC32 (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ ARM_VLC32 code
+)
+{
+ return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen));
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_IDCTTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_IDCTTable.c
new file mode 100755
index 0000000..3f5e279
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_IDCTTable.c
@@ -0,0 +1,60 @@
+/**
+ *
+ * File Name: armCOMM_IDCTTable.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM_IDCTTable.c
+ * Brief: Defines Tables used in IDCT computation
+ *
+ */
+
+#include "armCOMM_IDCTTable.h"
+
+ /* Table of s(u)*A(u)*A(v)/16 at Q15
+ * s(u)=1.0 0 <= u <= 5
+ * s(6)=2.0
+ * s(7)=4.0
+ * A(0) = 2*sqrt(2)
+ * A(u) = 4*cos(u*pi/16) for (u!=0)
+ */
+
+__align(4) const OMX_U16 armCOMM_IDCTPreScale [64] =
+{
+ 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1,
+ 0x58c5, 0x7b21, 0x73fc, 0x6862, 0x58c5, 0x45bf, 0x6016, 0x61f8,
+ 0x539f, 0x73fc, 0x6d41, 0x6254, 0x539f, 0x41b3, 0x5a82, 0x5c48,
+ 0x4b42, 0x6862, 0x6254, 0x587e, 0x4b42, 0x3b21, 0x5175, 0x530d,
+ 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1,
+ 0x3249, 0x45bf, 0x41b3, 0x3b21, 0x3249, 0x2782, 0x366d, 0x377e,
+ 0x22a3, 0x300b, 0x2d41, 0x28ba, 0x22a3, 0x1b37, 0x257e, 0x263a,
+ 0x11a8, 0x187e, 0x1712, 0x14c3, 0x11a8, 0x0de0, 0x131d, 0x137d
+};
+ /* Above array armCOMM_IDCTPreScale, in Q23 format */
+const OMX_U32 armCOMM_IDCTPreScaleU32 [64] =
+{
+ 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157,
+ 0x58c543, 0x7b20d8, 0x73fbfc, 0x686214, 0x58c543, 0x45bf1f, 0x6015a5, 0x61f78b,
+ 0x539eba, 0x73fbfc, 0x6d413d, 0x6253a6, 0x539eba, 0x41b328, 0x5a827a, 0x5c4869,
+ 0x4b418c, 0x686214, 0x6253a6, 0x587de3, 0x4b418c, 0x3b20d8, 0x5174e0, 0x530d69,
+ 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157,
+ 0x3248d4, 0x45bf1f, 0x41b328, 0x3b20d8, 0x3248d4, 0x27821d, 0x366d72, 0x377e6b,
+ 0x22a2f5, 0x300ad3, 0x2d413d, 0x28ba70, 0x22a2f5, 0x1b36b9, 0x257d86, 0x26398d,
+ 0x11a856, 0x187de3, 0x17121a, 0x14c35a, 0x11a856, 0x0ddf9b, 0x131cc7, 0x137ca2
+};
+
+const OMX_U16 armCOMM_IDCTCoef [4] =
+{
+ 0x5a82, /* InvSqrt2 */
+ 0x30fc, /* SinPIBy8 */
+ 0x7642, /* CosPIBy8 */
+ 0x0000
+};
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_MaskTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_MaskTable.c
new file mode 100755
index 0000000..09f88c3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/src/armCOMM_MaskTable.c
@@ -0,0 +1,45 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armCOMM_MaskTable.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Mask Table to mask the end of array.
+ *
+ */
+
+#include "omxtypes.h"
+
+#define MaskTableSize 72
+
+const OMX_U16 armCOMM_qMaskTable16[MaskTableSize] =
+{
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
+};
+
+const OMX_U8 armCOMM_qMaskTable8[MaskTableSize] =
+{
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVC.h
new file mode 100755
index 0000000..35b510b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVC.h
@@ -0,0 +1,1153 @@
+/**
+ *
+ * File Name: armVC.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVideo.h
+ * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain
+ *
+ */
+
+
+#ifndef _armVideo_H_
+#define _armVideo_H_
+
+#include "omxVC.h"
+#include "armCOMM_Bitstream.h"
+
+/**
+ * ARM specific state structure to hold Motion Estimation information.
+ */
+
+struct m4p2_MESpec
+{
+ OMXVCM4P2MEParams MEParams;
+ OMXVCM4P2MEMode MEMode;
+};
+
+struct m4p10_MESpec
+{
+ OMXVCM4P10MEParams MEParams;
+ OMXVCM4P10MEMode MEMode;
+};
+
+typedef struct m4p2_MESpec ARMVCM4P2_MESpec;
+typedef struct m4p10_MESpec ARMVCM4P10_MESpec;
+
+/**
+ * Function: armVCM4P2_CompareMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and SAD's to decide the
+ * best MV and SAD
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] mvX x coordinate of the candidate motion vector
+ * [in] mvY y coordinate of the candidate motion vector
+ * [in] candSAD Candidate SAD
+ * [in] bestMVX x coordinate of the best motion vector
+ * [in] bestMVY y coordinate of the best motion vector
+ * [in] bestSAD best SAD
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current sad is the best
+ * 0 to indicate that it is NOT the best SAD
+ */
+
+OMX_INT armVCM4P2_CompareMV (
+ OMX_S16 mvX,
+ OMX_S16 mvY,
+ OMX_INT candSAD,
+ OMX_S16 bestMVX,
+ OMX_S16 bestMVY,
+ OMX_INT bestSAD);
+
+/**
+ * Function: armVCM4P2_ACDCPredict
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected
+ * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the coefficient buffer which contains
+ * the quantized coefficient residuals (PQF) of the
+ * current block
+ * [in] pPredBufRow pointer to the coefficient row buffer
+ * [in] pPredBufCol pointer to the coefficient column buffer
+ * [in] curQP quantization parameter of the current block. curQP
+ * may equal to predQP especially when the current
+ * block and the predictor block are in the same
+ * macroblock.
+ * [in] predQP quantization parameter of the predictor block
+ * [in] predDir indicates the prediction direction which takes one
+ * of the following values:
+ * OMX_VIDEO_HORIZONTAL predict horizontally
+ * OMX_VIDEO_VERTICAL predict vertically
+ * [in] ACPredFlag a flag indicating if AC prediction should be
+ * performed. It is equal to ac_pred_flag in the bit
+ * stream syntax of MPEG-4
+ * [in] videoComp video component type (luminance, chrominance or
+ * alpha) of the current block
+ * [in] flag This flag defines the if one wants to use this functions to
+ * calculate PQF (set 1, prediction) or QF (set 0, reconstruction)
+ * [out] pPreACPredict pointer to the predicted coefficients buffer.
+ * Filled ONLY if it is not NULL
+ * [out] pSrcDst pointer to the coefficient buffer which contains
+ * the quantized coefficients (QF) of the current
+ * block
+ * [out] pPredBufRow pointer to the updated coefficient row buffer
+ * [out] pPredBufCol pointer to the updated coefficient column buffer
+ * [out] pSumErr pointer to the updated sum of the difference
+ * between predicted and unpredicted coefficients
+ * If this is NULL, do not update
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_ACDCPredict(
+ OMX_S16 * pSrcDst,
+ OMX_S16 * pPreACPredict,
+ OMX_S16 * pPredBufRow,
+ OMX_S16 * pPredBufCol,
+ OMX_INT curQP,
+ OMX_INT predQP,
+ OMX_INT predDir,
+ OMX_INT ACPredFlag,
+ OMXVCM4P2VideoComponent videoComp,
+ OMX_U8 flag,
+ OMX_INT *pSumErr
+);
+
+/**
+ * Function: armVCM4P2_SetPredDir
+ *
+ * Description:
+ * Performs detecting the prediction direction
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] blockIndex block index indicating the component type and
+ * position as defined in subclause 6.1.3.8, of ISO/IEC
+ * 14496-2. Furthermore, indexes 6 to 9 indicate the
+ * alpha blocks spatially corresponding to luminance
+ * blocks 0 to 3 in the same macroblock.
+ * [in] pCoefBufRow pointer to the coefficient row buffer
+ * [in] pQpBuf pointer to the quantization parameter buffer
+ * [out] predQP quantization parameter of the predictor block
+ * [out] predDir indicates the prediction direction which takes one
+ * of the following values:
+ * OMX_VIDEO_HORIZONTAL predict horizontally
+ * OMX_VIDEO_VERTICAL predict vertically
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_SetPredDir(
+ OMX_INT blockIndex,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_INT *predDir,
+ OMX_INT *predQP,
+ const OMX_U8 *pQpBuf
+);
+
+/**
+ * Function: armVCM4P2_EncodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs zigzag scanning and VLC encoding for one intra block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7.
+ * [in] pQDctBlkCoef pointer to the quantized DCT coefficient
+ * [in] predDir AC prediction direction, which is used to decide
+ * the zigzag scan pattern. This takes one of the
+ * following values:
+ * OMX_VIDEO_NONE AC prediction not used.
+ * Performs classical zigzag
+ * scan.
+ * OMX_VIDEO_HORIZONTAL Horizontal prediction.
+ * Performs alternate-vertical
+ * zigzag scan.
+ * OMX_VIDEO_VERTICAL Vertical prediction.
+ * Performs alternate-horizontal
+ * zigzag scan.
+ * [in] pattern block pattern which is used to decide whether
+ * this block is encoded
+ * [in] start start indicates whether the encoding begins with 0th element
+ * or 1st.
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_EncodeVLCZigzag_Intra(
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_DecodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bitstream buffer
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7].
+ * [in] predDir AC prediction direction which is used to decide
+ * the zigzag scan pattern. It takes one of the
+ * following values:
+ * OMX_VIDEO_NONE AC prediction not used;
+ * perform classical zigzag scan;
+ * OMX_VIDEO_HORIZONTAL Horizontal prediction;
+ * perform alternate-vertical
+ * zigzag scan;
+ * OMX_VIDEO_VERTICAL Vertical prediction;
+ * thus perform
+ * alternate-horizontal
+ * zigzag scan.
+ * [in] videoComp video component type (luminance, chrominance or
+ * alpha) of the current block
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with 0th element
+ * or 1st.
+ * [out] ppBitStream *ppBitStream is updated after the block is
+ * decoded, so that it points to the current byte
+ * in the bit stream buffer
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream
+ * [out] pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_FillVLDBuffer
+ *
+ * Description:
+ * Performs filling of the coefficient buffer according to the run, level
+ * and sign, also updates the index
+ *
+ * Parameters:
+ * [in] storeRun Stored Run value (count of zeros)
+ * [in] storeLevel Stored Level value (non-zero value)
+ * [in] sign Flag indicating the sign of level
+ * [in] last status of the last flag
+ * [in] pIndex pointer to coefficient index in 8x8 matrix
+ * [out] pIndex pointer to updated coefficient index in 8x8
+ * matrix
+ * [in] pZigzagTable pointer to the zigzag tables
+ * [out] pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLDBuffer(
+ OMX_U32 storeRun,
+ OMX_S16 * pDst,
+ OMX_S16 storeLevel,
+ OMX_U8 sign,
+ OMX_U8 last,
+ OMX_U8 * index,
+ const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_GetVLCBits
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with
+ * 0th element or 1st.
+ * [in/out] pLast pointer to last status flag
+ * [in] runBeginSingleLevelEntriesL0 The run value from which level
+ * will be equal to 1: last == 0
+ * [in] IndexBeginSingleLevelEntriesL0 Array index in the VLC table
+ * pointing to the
+ * runBeginSingleLevelEntriesL0
+ * [in] runBeginSingleLevelEntriesL1 The run value from which level
+ * will be equal to 1: last == 1
+ * [in] IndexBeginSingleLevelEntriesL1 Array index in the VLC table
+ * pointing to the
+ * runBeginSingleLevelEntriesL0
+ * [in] pRunIndexTableL0 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0 VLC table for last == 0
+ * [in] pRunIndexTableL1 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1 VLC table for last == 1
+ * [in] pLMAXTableL0 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out]pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_GetVLCBits (
+ const OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start,
+ OMX_U8 * pLast,
+ OMX_U8 runBeginSingleLevelEntriesL0,
+ OMX_U8 maxIndexForMultipleEntriesL0,
+ OMX_U8 maxRunForMultipleEntriesL1,
+ OMX_U8 maxIndexForMultipleEntriesL1,
+ const OMX_U8 * pRunIndexTableL0,
+ const ARM_VLC32 *pVlcTableL0,
+ const OMX_U8 * pRunIndexTableL1,
+ const ARM_VLC32 *pVlcTableL1,
+ const OMX_U8 * pLMAXTableL0,
+ const OMX_U8 * pLMAXTableL1,
+ const OMX_U8 * pRMAXTableL0,
+ const OMX_U8 * pRMAXTableL1,
+ const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_PutVLCBits
+ *
+ * Description:
+ * Checks the type of Escape Mode and put encoded bits for
+ * quantized DCT coefficients.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with
+ * 0th element or 1st.
+ * [in] maxStoreRunL0 Max store possible (considering last and inter/intra)
+ * for last = 0
+ * [in] maxStoreRunL1 Max store possible (considering last and inter/intra)
+ * for last = 1
+ * [in] maxRunForMultipleEntriesL0
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status) for last = 0
+ * [in] maxRunForMultipleEntriesL1
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status) for last = 1
+ * [in] pRunIndexTableL0 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0 VLC table for last == 0
+ * [in] pRunIndexTableL1 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1 VLC table for last == 1
+ * [in] pLMAXTableL0 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out] pQDctBlkCoef pointer to the quantized DCT coefficient
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+
+OMXResult armVCM4P2_PutVLCBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start,
+ OMX_U8 maxStoreRunL0,
+ OMX_U8 maxStoreRunL1,
+ OMX_U8 maxRunForMultipleEntriesL0,
+ OMX_U8 maxRunForMultipleEntriesL1,
+ const OMX_U8 * pRunIndexTableL0,
+ const ARM_VLC32 *pVlcTableL0,
+ const OMX_U8 * pRunIndexTableL1,
+ const ARM_VLC32 *pVlcTableL1,
+ const OMX_U8 * pLMAXTableL0,
+ const OMX_U8 * pLMAXTableL1,
+ const OMX_U8 * pRMAXTableL0,
+ const OMX_U8 * pRMAXTableL1,
+ const OMX_U8 * pZigzagTable
+);
+/**
+ * Function: armVCM4P2_FillVLCBuffer
+ *
+ * Description:
+ * Performs calculating the VLC bits depending on the escape type and insert
+ * the same in the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] run Run value (count of zeros) to be encoded
+ * [in] level Level value (non-zero value) to be encoded
+ * [in] runPlus Calculated as runPlus = run - (RMAX + 1)
+ * [in] levelPlus Calculated as
+ * levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] fMode Flag indicating the escape modes
+ * [in] last status of the last flag
+ * [in] maxRunForMultipleEntries
+ * The run value after which level will be equal to 1:
+ * (considering last and inter/intra status)
+ * [in] pRunIndexTable Run Index table defined in
+ * armVCM4P2_Huff_tables_VLC.h
+ * [in] pVlcTable VLC table defined in armVCM4P2_Huff_tables_VLC.h
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLCBuffer (
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_U32 run,
+ OMX_S16 level,
+ OMX_U32 runPlus,
+ OMX_S16 levelPlus,
+ OMX_U8 fMode,
+ OMX_U8 last,
+ OMX_U8 maxRunForMultipleEntries,
+ const OMX_U8 *pRunIndexTable,
+ const ARM_VLC32 *pVlcTable
+);
+
+/**
+ * Function: armVCM4P2_CheckVLCEscapeMode
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] run Run value (count of zeros) to be encoded
+ * [in] level Level value (non-zero value) to be encoded
+ * [in] runPlus Calculated as runPlus = run - (RMAX + 1)
+ * [in] levelPlus Calculated as
+ * levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] maxStoreRun Max store possible (considering last and inter/intra)
+ * [in] maxRunForMultipleEntries
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status)
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] pRunIndexTable Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c
+ * (considering last and inter/intra status)
+ *
+ *
+ * Return Value:
+ * Returns an Escape mode which can take values from 0 to 3
+ * 0 --> no escape mode, 1 --> escape type 1,
+ * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3
+ * in the MPEG ISO standard.
+ *
+ */
+
+OMX_U8 armVCM4P2_CheckVLCEscapeMode(
+ OMX_U32 run,
+ OMX_U32 runPlus,
+ OMX_S16 level,
+ OMX_S16 levelPlus,
+ OMX_U8 maxStoreRun,
+ OMX_U8 maxRunForMultipleEntries,
+ OMX_INT shortVideoHeader,
+ const OMX_U8 *pRunIndexTable
+);
+
+
+/**
+ * Function: armVCM4P2_BlockMatch_Integer
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated minimum SAD.
+ * Both the input and output motion vectors are represented using half-pixel units, and
+ * therefore a shift left or right by 1 bit may be required, respectively, to match the
+ * input or output MVs with other functions that either generate output MVs or expect
+ * input MVs represented using integer pixel units.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that
+ * corresponds to the location of the current macroblock in the current
+ * plane.
+ * [in] refWidth width of the reference plane
+ * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin.
+ * It's not limited to the image boundary, but depended on the padding. For example,
+ * if you pad 4 pixels outside the image border, then the value for left border
+ * can be -4
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array,
+ * 256 entries); must be aligned on an 8-byte boundary.
+ * [in] pCurrPointPos position of the current macroblock in the current plane
+ * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV)
+ * [in] searchRange search range for 16X16 integer block,the units of it is full pixel,the search range
+ * is the same in all directions.It is in inclusive of the boundary and specified in
+ * terms of integer pixel units.
+ * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated
+ * and then initialized using omxVCM4P2_MEInit prior to calling the block matching
+ * function.
+ * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8.
+ * [out] pDstMV pointer to estimated MV
+ * [out] pDstSAD pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error.
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Integer(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD,
+ OMX_U8 BlockSize
+);
+
+/**
+ * Function: armVCM4P2_BlockMatch_Half
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution. Returns the estimated
+ * motion vector and associated minimum SAD. This function estimates the half-pixel
+ * motion vector by interpolating the integer resolution motion vector referenced
+ * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated
+ * externally. The input parameters pSrcRefBuf and pSearchPointRefPos should be
+ * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16.
+ * The function BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB
+ * that corresponds to the location of the current macroblock in
+ * the current plane.
+ * [in] refWidth width of the reference plane
+ * [in] pRefRect reference plane valid region rectangle
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane
+ * (linear array, 256 entries); must be aligned on an 8-byte boundary.
+ * [in] pSearchPointRefPos position of the starting point for half pixel search (specified
+ * in terms of integer pixel units) in the reference plane.
+ * [in] rndVal rounding control bit for half pixel motion estimation;
+ * 0=rounding control disabled; 1=rounding control enabled
+ * [in] pSrcDstMV pointer to the initial MV estimate; typically generated during a prior
+ * 16X16 integer search and its unit is half pixel.
+ * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]pSrcDstMV pointer to estimated MV
+ * [out]pDstSAD pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Half(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD,
+ OMX_U8 BlockSize
+);
+/**
+ * Function: armVCM4P2_PadMV
+ *
+ * Description:
+ * Performs motion vector padding for a macroblock.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDstMV pointer to motion vector buffer of the current
+ * macroblock
+ * [in] pTransp pointer to transparent status buffer of the
+ * current macroblock
+ * [out] pSrcDstMV pointer to motion vector buffer in which the
+ * motion vectors have been padded
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_PadMV(
+ OMXVCMotionVector * pSrcDstMV,
+ OMX_U8 * pTransp
+);
+
+/*
+ * H.264 Specific Declarations
+ */
+/* Defines */
+#define ARM_M4P10_Q_OFFSET (15)
+
+
+/* Dequant tables */
+
+extern const OMX_U8 armVCM4P10_PosToVCol4x4[16];
+extern const OMX_U8 armVCM4P10_PosToVCol2x2[4];
+extern const OMX_U8 armVCM4P10_VMatrix[6][3];
+extern const OMX_U32 armVCM4P10_MFMatrix[6][3];
+
+
+/*
+ * Description:
+ * This function perform the work required by the OpenMAX
+ * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair.
+ * Since most of the code is common we share it here.
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream buffer
+ * [in] pOffset Pointer to current bit position in the byte pointed
+ * to by *ppBitStream
+ * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current
+ * block (4,15 or 16)
+ * [in] nTable Table number (0 to 4) according to the five columns
+ * of Table 9-5 in the H.264 spec
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients in
+ * this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+
+ */
+
+OMXResult armVCM4P10_DecodeCoeffsToPair(
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8**ppPosCoefbuf,
+ OMX_INT nTable,
+ OMX_INT sMaxNumCoeff
+ );
+
+/*
+ * Description:
+ * Perform DC style intra prediction, averaging upper and left block
+ *
+ * Parameters:
+ * [in] pSrcLeft Pointer to the buffer of 16 left coefficients:
+ * p[x, y] (x = -1, y = 0..3)
+ * [in] pSrcAbove Pointer to the buffer of 16 above coefficients:
+ * p[x,y] (x = 0..3, y = -1)
+ * [in] leftStep Step of left coefficient buffer
+ * [in] dstStep Step of the destination buffer
+ * [in] availability Neighboring 16x16 MB availability flag
+ * [out] pDst Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+void armVCM4P10_PredictIntraDC4x4(
+ const OMX_U8* pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ OMX_U8* pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMX_S32 availability
+);
+
+/*
+ * Description
+ * Unpack a 4x4 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position pair
+ * buffer output by CALVC decoding
+ * [out] ppSrc *ppSrc is updated to the start of next non empty block
+ * [out] pDst Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock4x4(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Unpack a 2x2 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position pair
+ * buffer output by CALVC decoding
+ * [out] ppSrc *ppSrc is updated to the start of next non empty block
+ * [out] pDst Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock2x2(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Deblock one boundary pixel
+ *
+ * Parameters:
+ * [in] pQ0 Pointer to pixel q0
+ * [in] Step Step between pixels q0 and q1
+ * [in] tC0 Edge threshold value
+ * [in] alpha alpha threshold value
+ * [in] beta beta threshold value
+ * [in] bS deblocking strength
+ * [in] ChromaFlag True for chroma blocks
+ * [out] pQ0 Deblocked pixels
+ *
+ */
+
+void armVCM4P10_DeBlockPixel(
+ OMX_U8 *pQ0, /* pointer to the pixel q0 */
+ int Step, /* step between pixels q0 and q1 */
+ int tC0, /* edge threshold value */
+ int alpha, /* alpha */
+ int beta, /* beta */
+ int bS, /* deblocking strength */
+ int ChromaFlag
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfHor_Luma
+ *
+ * Description:
+ * This function performs interpolation for horizontal 1/2-pel positions
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfHor_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfVer_Luma
+ *
+ * Description:
+ * This function performs interpolation for vertical 1/2-pel positions
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfVer_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfDiag_Luma
+ *
+ * Description:
+ * This function performs interpolation for (1/2, 1/2) positions
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the (1/2,1/2)-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfDiag_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/*
+ * Description:
+ * Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in] pSrc Source 4x4 block
+ * [out] pDst Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+/*
+ * Description:
+ * Forward Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in] pSrc Source 4x4 block
+ * [out] pDst Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+OMX_INT armVCM4P10_CompareMotionCostToMV (
+ OMX_S16 mvX,
+ OMX_S16 mvY,
+ OMXVCMotionVector diffMV,
+ OMX_INT candSAD,
+ OMXVCMotionVector *bestMV,
+ OMX_U32 nLamda,
+ OMX_S32 *pBestCost);
+
+/**
+ * Function: armVCCOMM_SAD
+ *
+ * Description:
+ * This function calculate the SAD for NxM blocks.
+ *
+ * Remarks:
+ *
+ * [in] pSrcOrg Pointer to the original block
+ * [in] iStepOrg Step of the original block buffer
+ * [in] pSrcRef Pointer to the reference block
+ * [in] iStepRef Step of the reference block buffer
+ * [in] iHeight Height of the block
+ * [in] iWidth Width of the block
+ * [out] pDstSAD Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCCOMM_SAD(
+ const OMX_U8* pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8* pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32* pDstSAD,
+ OMX_U32 iHeight,
+ OMX_U32 iWidth);
+
+/**
+ * Function: armVCCOMM_Average
+ *
+ * Description:
+ * This function calculates the average of two blocks and stores the result.
+ *
+ * Remarks:
+ *
+ * [in] pPred0 Pointer to the top-left corner of reference block 0
+ * [in] pPred1 Pointer to the top-left corner of reference block 1
+ * [in] iPredStep0 Step of reference block 0
+ * [in] iPredStep1 Step of reference block 1
+ * [in] iDstStep Step of the destination buffer
+ * [in] iWidth Width of the blocks
+ * [in] iHeight Height of the blocks
+ * [out] pDstPred Pointer to the destination buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCCOMM_Average (
+ const OMX_U8* pPred0,
+ const OMX_U8* pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8* pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_SADQuar
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the
+ * average of the other two (pSrcRef0 and pSrcRef1)
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to the original block
+ * [in] pSrcRef0 Pointer to reference block 0
+ * [in] pSrcRef1 Pointer to reference block 1
+ * [in] iSrcStep Step of the original block buffer
+ * [in] iRefStep0 Step of reference block 0
+ * [in] iRefStep1 Step of reference block 1
+ * [in] iHeight Height of the block
+ * [in] iWidth Width of the block
+ * [out] pDstSAD Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCM4P10_SADQuar(
+ const OMX_U8* pSrc,
+ const OMX_U8* pSrcRef0,
+ const OMX_U8* pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32* pDstSAD,
+ OMX_U32 iHeight,
+ OMX_U32 iWidth
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Chroma
+ *
+ * Description:
+ * This function performs interpolation for chroma components.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to
+ * interpolate in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [in] dx Fractional part of horizontal motion vector
+ * component in 1/8 pixel unit (0~7)
+ * [in] dy Fractional part of vertical motion vector
+ * component in 1/8 pixel unit (0~7)
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCM4P10_Interpolate_Chroma(
+ OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight,
+ OMX_U32 dx,
+ OMX_U32 dy
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Luma
+ *
+ * Description:
+ * This function performs interpolation for luma components.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to
+ * interpolate in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [in] dx Fractional part of horizontal motion vector
+ * component in 1/4 pixel unit (0~3)
+ * [in] dy Fractional part of vertical motion vector
+ * component in 1/4 pixel unit (0~3)
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+ OMXResult armVCM4P10_Interpolate_Luma(
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight,
+ OMX_U32 dx,
+ OMX_U32 dy
+);
+
+/**
+ * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantisation and integer inverse transformation for 4x4 block of
+ * residuals and update the pair buffer pointer to next non-empty block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position
+ * pair buffer output by CALVC decoding
+ * [in] pDC Pointer to the DC coefficient of this block, NULL
+ * if it doesn't exist
+ * [in] QP Quantization parameter
+ * [in] AC Flag indicating if at least one non-zero coefficient exists
+ * [out] pDst pointer to the reconstructed 4x4 block data
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx(
+ OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP,
+ OMX_S16* pDC,
+ int AC
+);
+
+#endif /*_armVideo_H_*/
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVCCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVCCOMM_s.h
new file mode 100755
index 0000000..32a0166
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/armVCCOMM_s.h
@@ -0,0 +1,72 @@
+;//
+;//
+;// File Name: armVCCOMM_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// ARM optimized OpenMAX AC header file
+;//
+;// Formula used:
+;// MACRO for calculating median for three values.
+
+
+
+ IF :LNOT::DEF:ARMVCCOMM_S_H
+ INCLUDE armCOMM_s.h
+ M_VARIANTS CortexA8, ARM1136JS
+
+ IF ARM1136JS :LOR: CortexA8
+
+ ;///*
+ ;// * Macro: M_MEDIAN3
+ ;// *
+ ;// * Description: Finds the median of three numbers
+ ;// *
+ ;// * Remarks:
+ ;// *
+ ;// * Parameters:
+ ;// * [in] x First entry for the list of three numbers.
+ ;// * [in] y Second entry for the list of three numbers.
+ ;// * Input value may be corrupted at the end of
+ ;// * the execution of this macro.
+ ;// * [in] z Third entry of the list of three numbers.
+ ;// * Input value corrupted at the end of the
+ ;// * execution of this macro.
+ ;// * [in] t Temporary scratch register.
+ ;// * [out]z Median of the three numbers.
+ ;// */
+
+ MACRO
+
+ M_MEDIAN3 $x, $y, $z, $t
+
+ SUBS $t, $y, $z; // if (y < z)
+ ADDLT $z, $z, $t; // swap y and z
+ SUBLT $y, $y, $t;
+
+ ;// Now z' <= y', so there are three cases for the
+ ;// median value, depending on x.
+
+ ;// 1) x <= z' <= y' : median value is z'
+ ;// 2) z' <= x <= y' : median value is x
+ ;// 3) z' <= y' <= x : median value is y'
+
+ CMP $z, $x; // if ( x > min(y,z) )
+ MOVLT $z, $x; // ans = x
+
+ CMP $x, $y; // if ( x > max(y,z) )
+ MOVGT $z, $y; // ans = max(y,z)
+
+ MEND
+ ENDIF
+
+
+
+ ENDIF ;// ARMACCOMM_S_H
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC.h
new file mode 100755
index 0000000..7b3cc72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC.h
@@ -0,0 +1,4381 @@
+/**
+ * File: omxVC.h
+ * Brief: OpenMAX DL v1.0.2 - Video Coding library
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved.
+ *
+ * These materials are protected by copyright laws and contain material
+ * proprietary to the Khronos Group, Inc. You may use these materials
+ * for implementing Khronos specifications, without altering or removing
+ * any trademark, copyright or other notice from the specification.
+ *
+ * Khronos Group makes no, and expressly disclaims any, representations
+ * or warranties, express or implied, regarding these materials, including,
+ * without limitation, any implied warranties of merchantability or fitness
+ * for a particular purpose or non-infringement of any intellectual property.
+ * Khronos Group makes no, and expressly disclaims any, warranties, express
+ * or implied, regarding the correctness, accuracy, completeness, timeliness,
+ * and reliability of these materials.
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters,
+ * Contributors or Members or their respective partners, officers, directors,
+ * employees, agents or representatives be liable for any damages, whether
+ * direct, indirect, special or consequential damages for lost revenues,
+ * lost profits, or otherwise, arising from or in connection with these
+ * materials.
+ *
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc.
+ *
+ */
+
+/* *****************************************************************************************/
+
+#ifndef _OMXVC_H_
+#define _OMXVC_H_
+
+#include "omxtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* 6.1.1.1 Motion Vectors */
+/* In omxVC, motion vectors are represented as follows: */
+
+typedef struct {
+ OMX_S16 dx;
+ OMX_S16 dy;
+} OMXVCMotionVector;
+
+
+
+/**
+ * Function: omxVCCOMM_Average_8x (6.1.3.1.1)
+ *
+ * Description:
+ * This function calculates the average of two 8x4, 8x8, or 8x16 blocks. The
+ * result is rounded according to (a+b+1)/2. The block average function can
+ * be used in conjunction with half-pixel interpolation to obtain quarter
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0
+ * iPredStep1 - Step of reference block 1
+ * iDstStep - Step of the destination buffer.
+ * iHeight - Height of the blocks
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 8-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pPred0, pPred1, or
+ * pDstPred.
+ * - pDstPred is not aligned on an 8-byte boundary.
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 8.
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 8.
+ * - iDstStep <= 0 or iDstStep is not a multiple of 8.
+ * - iHeight is not 4, 8, or 16.
+ *
+ */
+OMXResult omxVCCOMM_Average_8x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Average_16x (6.1.3.1.2)
+ *
+ * Description:
+ * This function calculates the average of two 16x16 or 16x8 blocks. The
+ * result is rounded according to (a+b+1)/2. The block average function can
+ * be used in conjunction with half-pixel interpolation to obtain quarter
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0
+ * iPredStep1 - Step of reference block 1
+ * iDstStep - Step of the destination buffer
+ * iHeight - Height of the blocks
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 16-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pPred0, pPred1, or
+ * pDstPred.
+ * - pDstPred is not aligned on a 16-byte boundary.
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 16.
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 16.
+ * - iDstStep <= 0 or iDstStep is not a multiple of 16.
+ * - iHeight is not 8 or 16.
+ *
+ */
+OMXResult omxVCCOMM_Average_16x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ExpandFrame_I (6.1.3.2.1)
+ *
+ * Description:
+ * This function expands a reconstructed frame in-place. The unexpanded
+ * source frame should be stored in a plane buffer with sufficient space
+ * pre-allocated for edge expansion, and the input frame should be located in
+ * the plane buffer center. This function executes the pixel expansion by
+ * replicating source frame edge pixel intensities in the empty pixel
+ * locations (expansion region) between the source frame edge and the plane
+ * buffer edge. The width/height of the expansion regions on the
+ * horizontal/vertical edges is controlled by the parameter iExpandPels.
+ *
+ * Input Arguments:
+ *
+ * pSrcDstPlane - pointer to the top-left corner of the frame to be
+ * expanded; must be aligned on an 8-byte boundary.
+ * iFrameWidth - frame width; must be a multiple of 8.
+ * iFrameHeight -frame height; must be a multiple of 8.
+ * iExpandPels - number of pixels to be expanded in the horizontal and
+ * vertical directions; must be a multiple of 8.
+ * iPlaneStep - distance, in bytes, between the start of consecutive lines
+ * in the plane buffer; must be larger than or equal to
+ * (iFrameWidth + 2 * iExpandPels).
+ *
+ * Output Arguments:
+ *
+ * pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the
+ * top-left corner of the plane); must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - pSrcDstPlane is NULL.
+ * - pSrcDstPlane is not aligned on an 8-byte boundary.
+ * - one of the following parameters is either equal to zero or is a
+ * non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or
+ * iExpandPels.
+ * - iPlaneStep < (iFrameWidth + 2 * iExpandPels).
+ *
+ */
+OMXResult omxVCCOMM_ExpandFrame_I (
+ OMX_U8 *pSrcDstPlane,
+ OMX_U32 iFrameWidth,
+ OMX_U32 iFrameHeight,
+ OMX_U32 iExpandPels,
+ OMX_U32 iPlaneStep
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Copy8x8 (6.1.3.3.1)
+ *
+ * Description:
+ * Copies the reference 8x8 block to the current block.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the reference block in the source frame; must be
+ * aligned on an 8-byte boundary.
+ * step - distance between the starts of consecutive lines in the reference
+ * frame, in bytes; must be a multiple of 8 and must be larger than
+ * or equal to 8.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination block; must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pSrc, pDst
+ * - one or more of the following pointers is not aligned on an 8-byte
+ * boundary: pSrc, pDst
+ * - step <8 or step is not a multiple of 8.
+ *
+ */
+OMXResult omxVCCOMM_Copy8x8 (
+ const OMX_U8 *pSrc,
+ OMX_U8 *pDst,
+ OMX_INT step
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Copy16x16 (6.1.3.3.2)
+ *
+ * Description:
+ * Copies the reference 16x16 macroblock to the current macroblock.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the reference macroblock in the source frame; must be
+ * aligned on a 16-byte boundary.
+ * step - distance between the starts of consecutive lines in the reference
+ * frame, in bytes; must be a multiple of 16 and must be larger
+ * than or equal to 16.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination macroblock; must be aligned on a
+ * 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pSrc, pDst
+ * - one or more of the following pointers is not aligned on a 16-byte
+ * boundary: pSrc, pDst
+ * - step <16 or step is not a multiple of 16.
+ *
+ */
+OMXResult omxVCCOMM_Copy16x16 (
+ const OMX_U8 *pSrc,
+ OMX_U8 *pDst,
+ OMX_INT step
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ComputeTextureErrorBlock_SAD (6.1.4.1.1)
+ *
+ * Description:
+ * Computes texture error of the block; also returns SAD.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the source plane; must be aligned on an 8-byte
+ * boundary.
+ * srcStep - step of the source plane
+ * pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned
+ * on an 8-byte boundary.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer, an 8x8 block; must be aligned
+ * on an 8-byte boundary.
+ * pDstSAD - pointer to the Sum of Absolute Differences (SAD) value
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following
+ * pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD.
+ * - pSrc is not 8-byte aligned.
+ * - SrcStep <= 0 or srcStep is not a multiple of 8.
+ * - pSrcRef is not 8-byte aligned.
+ * - pDst is not 8-byte aligned.
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_U8 *pSrcRef,
+ OMX_S16 *pDst,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ComputeTextureErrorBlock (6.1.4.1.2)
+ *
+ * Description:
+ * Computes the texture error of the block.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the source plane. This should be aligned on an 8-byte
+ * boundary.
+ * srcStep - step of the source plane
+ * pSrcRef - pointer to the reference buffer, an 8x8 block. This should be
+ * aligned on an 8-byte boundary.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer, an 8x8 block. This should be
+ * aligned on an 8-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * pSrc, pSrcRef, pDst.
+ * - pSrc is not 8-byte aligned.
+ * - SrcStep <= 0 or srcStep is not a multiple of 8.
+ * - pSrcRef is not 8-byte aligned.
+ * - pDst is not 8-byte aligned
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_U8 *pSrcRef,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCCOMM_LimitMVToRect (6.1.4.1.3)
+ *
+ * Description:
+ * Limits the motion vector associated with the current block/macroblock to
+ * prevent the motion compensated block/macroblock from moving outside a
+ * bounding rectangle as shown in Figure 6-1.
+ *
+ * Input Arguments:
+ *
+ * pSrcMV - pointer to the motion vector associated with the current block
+ * or macroblock
+ * pRectVOPRef - pointer to the bounding rectangle
+ * Xcoord, Ycoord - coordinates of the current block or macroblock
+ * size - size of the current block or macroblock; must be equal to 8 or
+ * 16.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to the limited motion vector
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcMV, pDstMV, or pRectVOPRef.
+ * - size is not equal to either 8 or 16.
+ * - the width or height of the bounding rectangle is less than
+ * twice the block size.
+ */
+OMXResult omxVCCOMM_LimitMVToRect (
+ const OMXVCMotionVector *pSrcMV,
+ OMXVCMotionVector *pDstMV,
+ const OMXRect *pRectVOPRef,
+ OMX_INT Xcoord,
+ OMX_INT Ycoord,
+ OMX_INT size
+);
+
+
+
+/**
+ * Function: omxVCCOMM_SAD_16x (6.1.4.1.4)
+ *
+ * Description:
+ * This function calculates the SAD for 16x16 and 16x8 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 16-byte
+ * boundary.
+ * iStepOrg - Step of the original block buffer
+ * pSrcRef - Pointer to the reference block
+ * iStepRef - Step of the reference block buffer
+ * iHeight - Height of the block
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pDstSAD, or pSrcRef
+ * - pSrcOrg is not 16-byte aligned.
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 16
+ * - iStepRef <= 0 or iStepRef is not a multiple of 16
+ * - iHeight is not 8 or 16
+ *
+ */
+OMXResult omxVCCOMM_SAD_16x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_SAD_8x (6.1.4.1.5)
+ *
+ * Description:
+ * This function calculates the SAD for 8x16, 8x8, 8x4 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 8-byte
+ * boundary.
+ * iStepOrg - Step of the original block buffer
+ * pSrcRef - Pointer to the reference block
+ * iStepRef - Step of the reference block buffer
+ * iHeight - Height of the block
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pDstSAD, or pSrcRef
+ * - pSrcOrg is not 8-byte aligned.
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 8
+ * - iStepRef <= 0 or iStepRef is not a multiple of 8
+ * - iHeight is not 4, 8 or 16
+ *
+ */
+OMXResult omxVCCOMM_SAD_8x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32*pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/* 6.2.1.1 Direction */
+/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan. */
+
+enum {
+ OMX_VC_NONE = 0,
+ OMX_VC_HORIZONTAL = 1,
+ OMX_VC_VERTICAL = 2
+};
+
+
+
+/* 6.2.1.2 Bilinear Interpolation */
+/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions. */
+
+enum {
+ OMX_VC_INTEGER_PIXEL = 0, /* case a */
+ OMX_VC_HALF_PIXEL_X = 1, /* case b */
+ OMX_VC_HALF_PIXEL_Y = 2, /* case c */
+ OMX_VC_HALF_PIXEL_XY = 3 /* case d */
+};
+
+
+
+/* 6.2.1.3 Neighboring Macroblock Availability */
+/* Neighboring macroblock availability is indicated using the following flags: */
+
+enum {
+ OMX_VC_UPPER = 1, /** above macroblock is available */
+ OMX_VC_LEFT = 2, /** left macroblock is available */
+ OMX_VC_CENTER = 4,
+ OMX_VC_RIGHT = 8,
+ OMX_VC_LOWER = 16,
+ OMX_VC_UPPER_LEFT = 32, /** above-left macroblock is available */
+ OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */
+ OMX_VC_LOWER_LEFT = 128,
+ OMX_VC_LOWER_RIGHT = 256
+};
+
+
+
+/* 6.2.1.4 Video Components */
+/* A data type that enumerates video components is defined as follows: */
+
+typedef enum {
+ OMX_VC_LUMINANCE, /** Luminance component */
+ OMX_VC_CHROMINANCE /** chrominance component */
+} OMXVCM4P2VideoComponent;
+
+
+
+/* 6.2.1.5 MacroblockTypes */
+/* A data type that enumerates macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_INTER = 0, /** P picture or P-VOP */
+ OMX_VC_INTER_Q = 1, /** P picture or P-VOP */
+ OMX_VC_INTER4V = 2, /** P picture or P-VOP */
+ OMX_VC_INTRA = 3, /** I and P picture, I- and P-VOP */
+ OMX_VC_INTRA_Q = 4, /** I and P picture, I- and P-VOP */
+ OMX_VC_INTER4V_Q = 5 /** P picture or P-VOP (H.263)*/
+} OMXVCM4P2MacroblockType;
+
+
+
+/* 6.2.1.6 Coordinates */
+/* Coordinates are represented as follows: */
+
+typedef struct {
+ OMX_INT x;
+ OMX_INT y;
+} OMXVCM4P2Coordinate;
+
+
+
+/* 6.2.1.7 Motion Estimation Algorithms */
+/* A data type that enumerates motion estimation search methods is defined as follows: */
+
+typedef enum {
+ OMX_VC_M4P2_FAST_SEARCH = 0, /** Fast motion search */
+ OMX_VC_M4P2_FULL_SEARCH = 1 /** Full motion search */
+} OMXVCM4P2MEMode;
+
+
+
+/* 6.2.1.8 Motion Estimation Parameters */
+/* A data structure containing control parameters for
+ * motion estimation functions is defined as follows:
+ */
+
+typedef struct {
+ OMX_INT searchEnable8x8; /** enables 8x8 search */
+ OMX_INT halfPelSearchEnable; /** enables half-pel resolution */
+ OMX_INT searchRange; /** search range */
+ OMX_INT rndVal; /** rounding control; 0-disabled, 1-enabled*/
+} OMXVCM4P2MEParams;
+
+
+
+/* 6.2.1.9 Macroblock Information */
+/* A data structure containing macroblock parameters for
+ * motion estimation functions is defined as follows:
+ */
+
+typedef struct {
+ OMX_S32 sliceId; /* slice number */
+ OMXVCM4P2MacroblockType mbType; /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */
+ OMX_S32 qp; /* quantization parameter*/
+ OMX_U32 cbpy; /* CBP Luma */
+ OMX_U32 cbpc; /* CBP Chroma */
+ OMXVCMotionVector pMV0[2][2]; /* motion vector, represented using 1/2-pel units,
+ * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1)
+ */
+ OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units,
+ * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1)
+ */
+ OMX_U8 pPredDir[2][2]; /* AC prediction direction:
+ * OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL
+ */
+} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr;
+
+
+
+/**
+ * Function: omxVCM4P2_FindMVpred (6.2.3.1.1)
+ *
+ * Description:
+ * Predicts a motion vector for the current block using the procedure
+ * specified in [ISO14496-2], subclause 7.6.5. The resulting predicted MV is
+ * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then
+ * the set of three MV candidates used for prediction is also returned,
+ * otherwise pDstMVPredMEis NULL upon return.
+ *
+ * Input Arguments:
+ *
+ * pSrcMVCurMB - pointer to the MV buffer associated with the current Y
+ * macroblock; a value of NULL indicates unavailability.
+ * pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located to the left of the current MB; set to NULL
+ * if there is no MB to the left.
+ * pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located above the current MB; set to NULL if there
+ * is no MB located above the current MB.
+ * pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located to the right and above the current MB; set
+ * to NULL if there is no MB located to the above-right.
+ * iBlk - the index of block in the current macroblock
+ * pDstMVPredME - MV candidate return buffer; if set to NULL then
+ * prediction candidate MVs are not returned and pDstMVPredME will
+ * be NULL upon function return; if pDstMVPredME is non-NULL then it
+ * must point to a buffer containing sufficient space for three
+ * return MVs.
+ *
+ * Output Arguments:
+ *
+ * pDstMVPred - pointer to the predicted motion vector
+ * pDstMVPredME - if non-NULL upon input then pDstMVPredME points upon
+ * return to a buffer containing the three motion vector candidates
+ * used for prediction as specified in [ISO14496-2], subclause
+ * 7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL
+ * upon output.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - the pointer pDstMVPred is NULL
+ * - the parameter iBlk does not fall into the range 0 <= iBlk<=3
+ *
+ */
+OMXResult omxVCM4P2_FindMVpred (
+ const OMXVCMotionVector *pSrcMVCurMB,
+ const OMXVCMotionVector *pSrcCandMV1,
+ const OMXVCMotionVector *pSrcCandMV2,
+ const OMXVCMotionVector *pSrcCandMV3,
+ OMXVCMotionVector *pDstMVPred,
+ OMXVCMotionVector *pDstMVPredME,
+ OMX_INT iBlk
+);
+
+
+
+/**
+ * Function: omxVCM4P2_IDCT8x8blk (6.2.3.2.1)
+ *
+ * Description:
+ * Computes a 2D inverse DCT for a single 8x8 block, as defined in
+ * [ISO14496-2].
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the start of the linearly arranged IDCT input buffer;
+ * must be aligned on a 16-byte boundary. According to
+ * [ISO14496-2], the input coefficient values should lie within the
+ * range [-2048, 2047].
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the start of the linearly arranged IDCT output buffer;
+ * must be aligned on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrc or pDst is NULL.
+ * - pSrc or pDst is not 16-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_IDCT8x8blk (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MEGetBufSize (6.2.4.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification
+ * structure for the following motion estimation functions:
+ * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P2MEMode
+ * pMEParams - motion estimation parameters
+ *
+ * Output Arguments:
+ *
+ * pSize - pointer to the number of bytes required for the specification
+ * structure
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - one or more of the following is true:
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for the
+ * parameter pMEParams->searchRange
+ *
+ */
+OMXResult omxVCM4P2_MEGetBufSize (
+ OMXVCM4P2MEMode MEmode,
+ const OMXVCM4P2MEParams *pMEParams,
+ OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MEInit (6.2.4.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the
+ * following motion estimation functions: BlockMatch_Integer_8x8,
+ * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the
+ * specification structure *pMESpec must be allocated prior to calling the
+ * function, and should be aligned on a 4-byte boundary. Following
+ * initialization by this function, the vendor-specific structure *pMESpec
+ * should contain an implementation-specific representation of all motion
+ * estimation parameters received via the structure pMEParams, for example
+ * rndVal, searchRange, etc. The number of bytes required for the
+ * specification structure can be determined using the function
+ * omxVCM4P2_MEGetBufSize.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P2MEMode
+ * pMEParams - motion estimation parameters
+ * pMESpec - pointer to the uninitialized ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pMESpec - pointer to the initialized ME specification structure
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - one or more of the following is true:
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for the
+ * parameter pMEParams->searchRange
+ *
+ */
+OMXResult omxVCM4P2_MEInit (
+ OMXVCM4P2MEMode MEmode,
+ const OMXVCM4P2MEParams*pMEParams,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Integer_16x16 (6.2.4.2.1)
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated
+ * minimum SAD. Both the input and output motion vectors are represented using
+ * half-pixel units, and therefore a shift left or right by 1 bit may be
+ * required, respectively, to match the input or output MVs with other
+ * functions that either generate output MVs or expect input MVs represented
+ * using integer pixel units.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * MB that corresponds to the location of the current macroblock in
+ * the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - pointer to the valid reference plane rectangle; coordinates
+ * are specified relative to the image origin. Rectangle
+ * boundaries may extend beyond image boundaries if the image has
+ * been padded. For example, if padding extends 4 pixels beyond
+ * frame border, then the value for the left border could be set to
+ * -4.
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 256
+ * entries); must be aligned on a 16-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pCurrPointPos - position of the current macroblock in the current plane
+ * pSrcPreMV - pointer to predicted motion vector; NULL indicates no
+ * predicted MV
+ * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced
+ * by pSrcPreMV); may be set to NULL if unavailable.
+ * pMESpec - vendor-specific motion estimation specification structure;
+ * must have been allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling the block matching function.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or
+ * pMESpec, or
+ * - pSrcCurrBuf is not 16-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_16x16 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector*pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector*pDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Integer_8x8 (6.2.4.2.2)
+ *
+ * Description:
+ * Performs an 8x8 block search; estimates motion vector and associated
+ * minimum SAD. Both the input and output motion vectors are represented
+ * using half-pixel units, and therefore a shift left or right by 1 bit may be
+ * required, respectively, to match the input or output MVs with other
+ * functions that either generate output MVs or expect input MVs represented
+ * using integer pixel units.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * block that corresponds to the location of the current 8x8 block
+ * in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - pointer to the valid reference plane rectangle; coordinates
+ * are specified relative to the image origin. Rectangle
+ * boundaries may extend beyond image boundaries if the image has
+ * been padded.
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 128
+ * entries); must be aligned on an 8-byte boundary. The number of
+ * bytes between lines (step) is 16 bytes.
+ * pCurrPointPos - position of the current block in the current plane
+ * pSrcPreMV - pointer to predicted motion vector; NULL indicates no
+ * predicted MV
+ * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced
+ * by pSrcPreMV); may be set to NULL if unavailable.
+ * pMESpec - vendor-specific motion estimation specification structure;
+ * must have been allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling the block matching function.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or
+ * pMESpec, or
+ * - pSrcCurrBuf is not 8-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_8x8 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Half_16x16 (6.2.4.2.3)
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution. Returns the
+ * estimated motion vector and associated minimum SAD. This function
+ * estimates the half-pixel motion vector by interpolating the integer
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e.,
+ * the initial integer MV is generated externally. The input parameters
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of
+ * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function
+ * BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * macroblock that corresponds to the location of the current
+ * macroblock in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - reference plane valid region rectangle
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 256
+ * entries); must be aligned on a 16-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pSearchPointRefPos - position of the starting point for half pixel
+ * search (specified in terms of integer pixel units) in the
+ * reference plane, i.e., the reference position pointed to by the
+ * predicted motion vector.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ * pSrcDstMV - pointer to the initial MV estimate; typically generated
+ * during a prior 16X16 integer search; specified in terms of
+ * half-pixel units.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV.
+ * - pSrcCurrBuf is not 16-byte aligned, or
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_16x16 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Half_8x8 (6.2.4.2.4)
+ *
+ * Description:
+ * Performs an 8x8 block match with half-pixel resolution. Returns the
+ * estimated motion vector and associated minimum SAD. This function
+ * estimates the half-pixel motion vector by interpolating the integer
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e.,
+ * the initial integer MV is generated externally. The input parameters
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of
+ * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function
+ * BlockMatch_Integer_8x8 may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * block that corresponds to the location of the current 8x8 block
+ * in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - reference plane valid region rectangle
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 128
+ * entries); must be aligned on a 8-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pSearchPointRefPos - position of the starting point for half pixel
+ * search (specified in terms of integer pixel units) in the
+ * reference plane.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ * pSrcDstMV - pointer to the initial MV estimate; typically generated
+ * during a prior 8x8 integer search, specified in terms of
+ * half-pixel units.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV
+ * - pSrcCurrBuf is not 8-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_8x8 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MotionEstimationMB (6.2.4.3.1)
+ *
+ * Description:
+ * Performs motion search for a 16x16 macroblock. Selects best motion search
+ * strategy from among inter-1MV, inter-4MV, and intra modes. Supports
+ * integer and half pixel resolution.
+ *
+ * Input Arguments:
+ *
+ * pSrcCurrBuf - pointer to the top-left corner of the current MB in the
+ * original picture plane; must be aligned on a 16-byte boundary.
+ * The function does not expect source data outside the region
+ * bounded by the MB to be available; for example it is not
+ * necessary for the caller to guarantee the availability of
+ * pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB
+ * to be processed.
+ * srcCurrStep - width of the original picture plane, in terms of full
+ * pixels; must be a multiple of 16.
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * plane location corresponding to the location of the current
+ * macroblock in the current plane; must be aligned on a 16-byte
+ * boundary.
+ * srcRefStep - width of the reference picture plane, in terms of full
+ * pixels; must be a multiple of 16.
+ * pRefRect - reference plane valid region rectangle, specified relative to
+ * the image origin
+ * pCurrPointPos - position of the current macroblock in the current plane
+ * pMESpec - pointer to the vendor-specific motion estimation specification
+ * structure; must be allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling this function.
+ * pMBInfo - array, of dimension four, containing pointers to information
+ * associated with four nearby MBs:
+ * - pMBInfo[0] - pointer to left MB information
+ * - pMBInfo[1] - pointer to top MB information
+ * - pMBInfo[2] - pointer to top-left MB information
+ * - pMBInfo[3] - pointer to top-right MB information
+ * Any pointer in the array may be set equal to NULL if the
+ * corresponding MB doesn't exist. For each MB, the following structure
+ * members are used:
+ * - mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or
+ * OMX_VC_INTER4V
+ * - pMV0[2][2] - estimated motion vectors; represented
+ * in 1/2 pixel units
+ * - sliceID - number of the slice to which the MB belongs
+ * pSrcDstMBCurr - pointer to information structure for the current MB.
+ * The following entries should be set prior to calling the
+ * function: sliceID - the number of the slice the to which the
+ * current MB belongs. The structure elements cbpy and cbpc are
+ * ignored.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMBCurr - pointer to updated information structure for the current
+ * MB after MB-level motion estimation has been completed. The
+ * following structure members are updated by the ME function:
+ * - mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or
+ * OMX_VC_INTER4V.
+ * - pMV0[2][2] - estimated motion vectors; represented in
+ * terms of 1/2 pel units.
+ * - pMVPred[2][2] - predicted motion vectors; represented
+ * in terms of 1/2 pel units.
+ * The structure members cbpy and cbpc are not updated by the function.
+ * pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs
+ * for INTER4V
+ * pDstBlockSAD - pointer to an array of SAD values for each of the four
+ * 8x8 luma blocks in the MB. The block SADs are in scan order for
+ * each MB.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL: pSrcCurrBuf,
+ * pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra,
+ * pSrcDstMBCurr, or pDstSAD.
+ *
+ */
+OMXResult omxVCM4P2_MotionEstimationMB (
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 srcCurrStep,
+ const OMX_U8 *pSrcRefBuf,
+ OMX_S32 srcRefStep,
+ const OMXRect*pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ void *pMESpec,
+ const OMXVCM4P2MBInfoPtr *pMBInfo,
+ OMXVCM4P2MBInfo *pSrcDstMBCurr,
+ OMX_U16 *pDstSAD,
+ OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DCT8x8blk (6.2.4.4.1)
+ *
+ * Description:
+ * Computes a 2D forward DCT for a single 8x8 block, as defined in
+ * [ISO14496-2].
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the start of the linearly arranged input buffer; must
+ * be aligned on a 16-byte boundary. Input values (pixel
+ * intensities) are valid in the range [-255,255].
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the start of the linearly arranged output buffer; must
+ * be aligned on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, returned if:
+ * - pSrc or pDst is NULL.
+ * - pSrc or pDst is not 16-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_DCT8x8blk (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantIntra_I (6.2.4.4.2)
+ *
+ * Description:
+ * Performs quantization on intra block coefficients. This function supports
+ * bits_per_pixel == 8.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input intra block coefficients; must be aligned
+ * on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale).
+ * blockIndex - block index indicating the component type and position,
+ * valid in the range 0 to 5, as defined in [ISO14496-2], subclause
+ * 6.1.3.8.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (quantized) interblock coefficients.
+ * When shortVideoHeader==1, AC coefficients are saturated on the
+ * interval [-127, 127], and DC coefficients are saturated on the
+ * interval [1, 254]. When shortVideoHeader==0, AC coefficients
+ * are saturated on the interval [-2047, 2047].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrcDst is NULL.
+ * - blockIndex < 0 or blockIndex >= 10
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_QuantIntra_I (
+ OMX_S16 *pSrcDst,
+ OMX_U8 QP,
+ OMX_INT blockIndex,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInter_I (6.2.4.4.3)
+ *
+ * Description:
+ * Performs quantization on an inter coefficient block; supports
+ * bits_per_pixel == 8.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input inter block coefficients; must be aligned
+ * on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * shortVideoHeader - binary flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode, and
+ * shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (quantized) interblock coefficients.
+ * When shortVideoHeader==1, AC coefficients are saturated on the
+ * interval [-127, 127], and DC coefficients are saturated on the
+ * interval [1, 254]. When shortVideoHeader==0, AC coefficients
+ * are saturated on the interval [-2047, 2047].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrcDst is NULL.
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_QuantInter_I (
+ OMX_S16 *pSrcDst,
+ OMX_U8 QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_TransRecBlockCoef_intra (6.2.4.4.4)
+ *
+ * Description:
+ * Quantizes the DCT coefficients, implements intra block AC/DC coefficient
+ * prediction, and reconstructs the current intra block texture for prediction
+ * on the next frame. Quantized row and column coefficients are returned in
+ * the updated coefficient buffers.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the pixels of current intra block; must be aligned on
+ * an 8-byte boundary.
+ * pPredBufRow - pointer to the coefficient row buffer containing
+ * ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16.
+ * Coefficients are organized into blocks of eight as described
+ * below (Internal Prediction Coefficient Update Procedures). The
+ * DC coefficient is first, and the remaining buffer locations
+ * contain the quantized AC coefficients. Each group of eight row
+ * buffer elements combined with one element eight elements ahead
+ * contains the coefficient predictors of the neighboring block
+ * that is spatially above or to the left of the block currently to
+ * be decoded. A negative-valued DC coefficient indicates that this
+ * neighboring block is not INTRA-coded or out of bounds, and
+ * therefore the AC and DC coefficients are invalid. Pointer must
+ * be aligned on an 8-byte boundary.
+ * pPredBufCol - pointer to the prediction coefficient column buffer
+ * containing 16 elements of type OMX_S16. Coefficients are
+ * organized as described in section 6.2.2.5. Pointer must be
+ * aligned on an 8-byte boundary.
+ * pSumErr - pointer to a flag indicating whether or not AC prediction is
+ * required; AC prediction is enabled if *pSumErr >=0, but the
+ * value is not used for coefficient prediction, i.e., the sum of
+ * absolute differences starts from 0 for each call to this
+ * function. Otherwise AC prediction is disabled if *pSumErr < 0 .
+ * blockIndex - block index indicating the component type and position, as
+ * defined in [ISO14496-2], subclause 6.1.3.8.
+ * curQp - quantization parameter of the macroblock to which the current
+ * block belongs
+ * pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0]
+ * contains the quantization parameter associated with the 8x8
+ * block left of the current block (QPa), and pQpBuf[1] contains
+ * the quantization parameter associated with the 8x8 block above
+ * the current block (QPc). In the event that the corresponding
+ * block is outside of the VOP bound, the Qp value will not affect
+ * the intra prediction process, as described in [ISO14496-2],
+ * sub-clause 7.4.3.3, Adaptive AC Coefficient Prediction.
+ * srcStep - width of the source buffer; must be a multiple of 8.
+ * dstStep - width of the reconstructed destination buffer; must be a
+ * multiple of 16.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains
+ * the predicted DC coefficient; the remaining entries contain the
+ * quantized AC coefficients (without prediction). The pointer
+ * pDstmust be aligned on a 16-byte boundary.
+ * pRec - pointer to the reconstructed texture; must be aligned on an
+ * 8-byte boundary.
+ * pPredBufRow - pointer to the updated coefficient row buffer
+ * pPredBufCol - pointer to the updated coefficient column buffer
+ * pPreACPredict - if prediction is enabled, the parameter points to the
+ * start of the buffer containing the coefficient differences for
+ * VLC encoding. The entry pPreACPredict[0]indicates prediction
+ * direction for the current block and takes one of the following
+ * values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL. The entries
+ * pPreACPredict[1]-pPreACPredict[7]contain predicted AC
+ * coefficients. If prediction is disabled (*pSumErr<0) then the
+ * contents of this buffer are undefined upon return from the
+ * function
+ * pSumErr - pointer to the value of the accumulated AC coefficient errors,
+ * i.e., sum of the absolute differences between predicted and
+ * unpredicted AC coefficients
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: pSrc, pDst, pRec,
+ * pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr.
+ * - blockIndex < 0 or blockIndex >= 10;
+ * - curQP <= 0 or curQP >= 32.
+ * - srcStep, or dstStep <= 0 or not a multiple of 8.
+ * - pDst is not 16-byte aligned: .
+ * - At least one of the following pointers is not 8-byte aligned:
+ * pSrc, pRec.
+ *
+ * Note: The coefficient buffers must be updated in accordance with the
+ * update procedures defined in section in 6.2.2.
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_intra (
+ const OMX_U8 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U8 *pRec,
+ OMX_S16 *pPredBufRow,
+ OMX_S16 *pPredBufCol,
+ OMX_S16 *pPreACPredict,
+ OMX_INT *pSumErr,
+ OMX_INT blockIndex,
+ OMX_U8 curQp,
+ const OMX_U8 *pQpBuf,
+ OMX_INT srcStep,
+ OMX_INT dstStep,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_TransRecBlockCoef_inter (6.2.4.4.5)
+ *
+ * Description:
+ * Implements DCT, and quantizes the DCT coefficients of the inter block
+ * while reconstructing the texture residual. There is no boundary check for
+ * the bit stream buffer.
+ *
+ * Input Arguments:
+ *
+ * pSrc -pointer to the residuals to be encoded; must be aligned on an
+ * 16-byte boundary.
+ * QP - quantization parameter.
+ * shortVideoHeader - binary flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode, and
+ * shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the quantized DCT coefficients buffer; must be aligned
+ * on a 16-byte boundary.
+ * pRec - pointer to the reconstructed texture residuals; must be aligned
+ * on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is either NULL or
+ * not 16-byte aligned:
+ * - pSrc
+ * - pDst
+ * - pRec
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_inter (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_S16 *pRec,
+ OMX_U8 QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one
+ * intra block. Two versions of the function (DCVLC and ACVLC) are provided
+ * in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding".
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7.
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * predDir - AC prediction direction, which is used to decide the zigzag
+ * scan pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used.
+ * Performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction.
+ * Performs alternate-vertical zigzag scan.
+ * - OMX_VC_VERTICAL - Vertical prediction.
+ * Performs alternate-horizontal zigzag scan.
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance, chrominance) of the current
+ * block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded, so
+ * that it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pQDctBlkCoef.
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL.
+ * - VideoComp is not one component of enum OMXVCM4P2VideoComponent.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_IntraACVLC (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one
+ * intra block. Two versions of the function (DCVLC and ACVLC) are provided
+ * in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7.
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * predDir - AC prediction direction, which is used to decide the zigzag
+ * scan pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used.
+ * Performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction.
+ * Performs alternate-vertical zigzag scan.
+ * - OMX_VC_VERTICAL - Vertical prediction.
+ * Performs alternate-horizontal zigzag scan.
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded, so
+ * that it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pQDctBlkCoef.
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL.
+ * - VideoComp is not one component of enum OMXVCM4P2VideoComponent.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_Inter (6.2.4.5.3)
+ *
+ * Description:
+ * Performs classical zigzag scanning and VLC encoding for one inter block.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded so that
+ * it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments
+ * - At least one of the pointers: is NULL: ppBitStream, *ppBitStream,
+ * pBitOffset, pQDctBlkCoef
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_Inter (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeMV (6.2.4.5.4)
+ *
+ * Description:
+ * Predicts a motion vector for the current macroblock, encodes the
+ * difference, and writes the output to the stream buffer. The input MVs
+ * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie
+ * within the ranges associated with the input parameter fcodeForward, as
+ * described in [ISO14496-2], subclause 7.6.3. This function provides a
+ * superset of the functionality associated with the function
+ * omxVCM4P2_FindMVpred.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream buffer
+ * pBitOffset - index of the first free (next available) bit in the stream
+ * buffer referenced by *ppBitStream, valid in the range 0 to 7.
+ * pMVCurMB - pointer to the current macroblock motion vector; a value of
+ * NULL indicates unavailability.
+ * pSrcMVLeftMB - pointer to the source left macroblock motion vector; a
+ * value of NULLindicates unavailability.
+ * pSrcMVUpperMB - pointer to source upper macroblock motion vector; a
+ * value of NULL indicates unavailability.
+ * pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a
+ * value of NULL indicates unavailability.
+ * fcodeForward - an integer with values from 1 to 7; used in encoding
+ * motion vectors related to search range, as described in
+ * [ISO14496-2], subclause 7.6.3.
+ * MBType - macro block type, valid in the range 0 to 5
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - updated pointer to the current byte in the bit stream
+ * buffer
+ * pBitOffset - updated index of the next available bit position in stream
+ * buffer referenced by *ppBitStream
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pMVCurMB
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - fcodeForward <= 0, or fcodeForward > 7, or MBType < 0.
+ *
+ */
+OMXResult omxVCM4P2_EncodeMV (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMXVCMotionVector *pMVCurMB,
+ const OMXVCMotionVector*pSrcMVLeftMB,
+ const OMXVCMotionVector *pSrcMVUpperMB,
+ const OMXVCMotionVector *pSrcMVUpperRightMB,
+ OMX_INT fcodeForward,
+ OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodePadMV_PVOP (6.2.5.1.1)
+ *
+ * Description:
+ * Decodes and pads the four motion vectors associated with a non-intra P-VOP
+ * macroblock. For macroblocks of type OMX_VC_INTER4V, the output MV is
+ * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for
+ * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to
+ * all four output MV buffer entries.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7].
+ * pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the
+ * motion vector buffers of the macroblocks specially at the left,
+ * upper, and upper-right side of the current macroblock,
+ * respectively; a value of NULL indicates unavailability. Note:
+ * Any neighborhood macroblock outside the current VOP or video
+ * packet or outside the current GOB (when short_video_header is
+ * 1 ) for which gob_header_empty is 0 is treated as
+ * transparent, according to [ISO14496-2], subclause 7.6.5.
+ * fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream
+ * syntax
+ * MBType - the type of the current macroblock. If MBType is not equal to
+ * OMX_VC_INTER4V, the destination motion vector buffer is still
+ * filled with the same decoded vector.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDstMVCurMB - pointer to the motion vector buffer for the current
+ * macroblock; contains four decoded motion vectors
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB
+ * - *pBitOffset exceeds [0,7]
+ * - fcodeForward exceeds (0,7]
+ * - MBType less than zero
+ * - motion vector buffer is not 4-byte aligned.
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodePadMV_PVOP (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMXVCMotionVector *pSrcMVLeftMB,
+ OMXVCMotionVector*pSrcMVUpperMB,
+ OMXVCMotionVector *pSrcMVUpperRightMB,
+ OMXVCMotionVector*pDstMVCurMB,
+ OMX_INT fcodeForward,
+ OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients
+ * for one intra block. Two versions of the function (DCVLC and ACVLC) are
+ * provided in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the
+ * bitstream buffer
+ * pBitOffset - pointer to the bit position in the current byte referenced
+ * by *ppBitStream. The parameter *pBitOffset is valid in the
+ * range [0-7].
+ * Bit Position in one byte: |Most Least|
+ * *pBitOffset |0 1 2 3 4 5 6 7|
+ * predDir - AC prediction direction; used to select the zigzag scan
+ * pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used;
+ * performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction;
+ * performs alternate-vertical zigzag scan;
+ * - OMX_VC_VERTICAL - Vertical prediction;
+ * performs alternate-horizontal zigzag scan.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated such that it points to the current
+ * bit position in the byte pointed by *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDst
+ * - *pBitOffset exceeds [0,7]
+ * - preDir exceeds [0,2]
+ * - pDst is not 4-byte aligned
+ * OMX_Sts_Err - if:
+ * - In DecodeVLCZigzag_IntraDCVLC, dc_size > 12
+ * - At least one of mark bits equals zero
+ * - Illegal stream encountered; code cannot be located in VLC table
+ * - Forbidden code encountered in the VLC FLC table.
+ * - The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_IntraACVLC (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients
+ * for one intra block. Two versions of the function (DCVLC and ACVLC) are
+ * provided in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the
+ * bitstream buffer
+ * pBitOffset - pointer to the bit position in the current byte referenced
+ * by *ppBitStream. The parameter *pBitOffset is valid in the
+ * range [0-7]. Bit Position in one byte: |Most Least| *pBitOffset
+ * |0 1 2 3 4 5 6 7|
+ * predDir - AC prediction direction; used to select the zigzag scan
+ * pattern; takes one of the following values: OMX_VC_NONE - AC
+ * prediction not used; performs classical zigzag scan.
+ * OMX_VC_HORIZONTAL - Horizontal prediction; performs
+ * alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical
+ * prediction; performs alternate-horizontal zigzag scan.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated such that it points to the current
+ * bit position in the byte pointed by *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments At least one of the following
+ * pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst,
+ * or At least one of the following conditions is true:
+ * *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is
+ * not 4-byte aligned
+ * OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of
+ * mark bits equals zero Illegal stream encountered; code cannot
+ * be located in VLC table Forbidden code encountered in the VLC
+ * FLC table The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_Inter (6.2.5.2.3)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one inter-coded block.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the stream buffer
+ * pBitOffset - pointer to the next available bit in the current stream
+ * byte referenced by *ppBitStream. The parameter *pBitOffset is
+ * valid within the range [0-7].
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the stream buffer
+ * pBitOffset - *pBitOffset is updated after decoding such that it points
+ * to the next available bit in the stream byte referenced by
+ * *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDst
+ * - pDst is not 4-byte aligned
+ * - *pBitOffset exceeds [0,7]
+ * OMX_Sts_Err - status error, if:
+ * - At least one mark bit is equal to zero
+ * - Encountered an illegal stream code that cannot be found in the VLC table
+ * - Encountered an illegal code in the VLC FLC table
+ * - The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_Inter (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInvIntra_I (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to
+ * the range [-2048, 2047].
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input (quantized) intra/inter block; must be
+ * aligned on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * videoComp - video component type of the current block. Takes one of the
+ * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra
+ * version only).
+ * shortVideoHeader - binary flag indicating presence of short_video_header
+ * (intra version only).
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (dequantized) intra/inter block
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; one or more of the following is
+ * true:
+ * - pSrcDst is NULL
+ * - QP <= 0 or QP >=31
+ * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE.
+ *
+ */
+OMXResult omxVCM4P2_QuantInvIntra_I (
+ OMX_S16 *pSrcDst,
+ OMX_INT QP,
+ OMXVCM4P2VideoComponent videoComp,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInvInter_I (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to
+ * the range [-2048, 2047].
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input (quantized) intra/inter block; must be
+ * aligned on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * videoComp - video component type of the current block. Takes one of the
+ * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra
+ * version only).
+ * shortVideoHeader - binary flag indicating presence of short_video_header
+ * (intra version only).
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (dequantized) intra/inter block
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; one or more of the following is
+ * true:
+ * - pSrcDst is NULL
+ * - QP <= 0 or QP >=31
+ * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE.
+ *
+ */
+OMXResult omxVCM4P2_QuantInvInter_I (
+ OMX_S16 *pSrcDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra (6.2.5.4.1)
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely
+ * zigzag positioning, and IDCT, with appropriate clipping on each step, are
+ * performed on the coefficients. The results are then placed in the output
+ * frame/plane on a pixel basis. Note: This function will be used only when
+ * at least one non-zero AC coefficient of current block exists in the bit
+ * stream. The DC only condition will be handled in another function.
+ *
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer. There is no boundary check for the bit stream
+ * buffer.
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7].
+ * step - width of the destination plane
+ * pCoefBufRow - pointer to the coefficient row buffer; must be aligned on
+ * an 8-byte boundary.
+ * pCoefBufCol - pointer to the coefficient column buffer; must be aligned
+ * on an 8-byte boundary.
+ * curQP - quantization parameter of the macroblock which the current block
+ * belongs to
+ * pQPBuf - pointer to the quantization parameter buffer
+ * blockIndex - block index indicating the component type and position as
+ * defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5.
+ * intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a
+ * mechanism to switch between two VLC for coding of Intra DC
+ * coefficients as per [ISO14496-2], Table 6-21.
+ * ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if
+ * the ac coefficients of the first row or first column are
+ * differentially coded for intra coded macroblock.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDst - pointer to the block in the destination plane; must be aligned on
+ * an 8-byte boundary.
+ * pCoefBufRow - pointer to the updated coefficient row buffer.
+ * pCoefBufCol - pointer to the updated coefficient column buffer Note:
+ * The coefficient buffers must be updated in accordance with the
+ * update procedure defined in section 6.2.2.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol,
+ * pQPBuf, pDst.
+ * - *pBitOffset exceeds [0,7]
+ * - curQP exceeds (1, 31)
+ * - blockIndex exceeds [0,5]
+ * - step is not the multiple of 8
+ * - a pointer alignment requirement was violated.
+ * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra.
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_U8 *pDst,
+ OMX_INT step,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_U8 curQP,
+ const OMX_U8 *pQPBuf,
+ OMX_INT blockIndex,
+ OMX_INT intraDCVLC,
+ OMX_INT ACPredFlag,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter (6.2.5.4.2)
+ *
+ * Description:
+ * Decodes the INTER block coefficients. This function performs inverse
+ * quantization, inverse zigzag positioning, and IDCT (with appropriate
+ * clipping on each step) on the coefficients. The results (residuals) are
+ * placed in a contiguous array of 64 elements. For INTER block, the output
+ * buffer holds the residuals for further reconstruction.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer. There is no boundary check for the bit stream
+ * buffer.
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7]
+ * QP - quantization parameter
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDst - pointer to the decoded residual buffer (a contiguous array of 64
+ * elements of OMX_S16 data type); must be aligned on a 16-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is Null:
+ * ppBitStream, *ppBitStream, pBitOffset , pDst
+ * - *pBitOffset exceeds [0,7]
+ * - QP <= 0.
+ * - pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter .
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_INT QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_PredictReconCoefIntra (6.2.5.4.3)
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected as
+ * specified in [ISO14496-2], subclause 7.4.3.1.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the coefficient buffer which contains the quantized
+ * coefficient residuals (PQF) of the current block; must be
+ * aligned on a 4-byte boundary. The output coefficients are
+ * saturated to the range [-2048, 2047].
+ * pPredBufRow - pointer to the coefficient row buffer; must be aligned on
+ * a 4-byte boundary.
+ * pPredBufCol - pointer to the coefficient column buffer; must be aligned
+ * on a 4-byte boundary.
+ * curQP - quantization parameter of the current block. curQP may equal to
+ * predQP especially when the current block and the predictor block
+ * are in the same macroblock.
+ * predQP - quantization parameter of the predictor block
+ * predDir - indicates the prediction direction which takes one of the
+ * following values: OMX_VC_HORIZONTAL - predict horizontally
+ * OMX_VC_VERTICAL - predict vertically
+ * ACPredFlag - a flag indicating if AC prediction should be performed. It
+ * is equal to ac_pred_flag in the bit stream syntax of MPEG-4
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the coefficient buffer which contains the quantized
+ * coefficients (QF) of the current block
+ * pPredBufRow - pointer to the updated coefficient row buffer
+ * pPredBufCol - pointer to the updated coefficient column buffer Note:
+ * Buffer update: Update the AC prediction buffer (both row and
+ * column buffer).
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the pointers is NULL:
+ * pSrcDst, pPredBufRow, or pPredBufCol.
+ * - curQP <= 0,
+ * - predQP <= 0,
+ * - curQP >31,
+ * - predQP > 31,
+ * - preDir exceeds [1,2]
+ * - pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_PredictReconCoefIntra (
+ OMX_S16 *pSrcDst,
+ OMX_S16 *pPredBufRow,
+ OMX_S16 *pPredBufCol,
+ OMX_INT curQP,
+ OMX_INT predQP,
+ OMX_INT predDir,
+ OMX_INT ACPredFlag,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MCReconBlock (6.2.5.5.1)
+ *
+ * Description:
+ * Performs motion compensation prediction for an 8x8 block using
+ * interpolation described in [ISO14496-2], subclause 7.6.2.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the block in the reference plane.
+ * srcStep - distance between the start of consecutive lines in the
+ * reference plane, in bytes; must be a multiple of 8.
+ * dstStep - distance between the start of consecutive lines in the
+ * destination plane, in bytes; must be a multiple of 8.
+ * pSrcResidue - pointer to a buffer containing the 16-bit prediction
+ * residuals; must be 16-byte aligned. If the pointer is NULL, then
+ * no prediction is done, only motion compensation, i.e., the block
+ * is moved with interpolation.
+ * predictType - bilinear interpolation type, as defined in section
+ * 6.2.1.2.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer; must be 8-byte aligned. If
+ * prediction residuals are added then output intensities are
+ * clipped to the range [0,255].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - pDst is not 8-byte aligned.
+ * - pSrcResidue is not 16-byte aligned.
+ * - one or more of the following pointers is NULL: pSrc or pDst.
+ * - either srcStep or dstStep is not a multiple of 8.
+ * - invalid type specified for the parameter predictType.
+ * - the parameter rndVal is not equal either to 0 or 1.
+ *
+ */
+OMXResult omxVCM4P2_MCReconBlock (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_S16 *pSrcResidue,
+ OMX_U8 *pDst,
+ OMX_INT dstStep,
+ OMX_INT predictType,
+ OMX_INT rndVal
+);
+
+
+
+/* 6.3.1.1 Intra 16x16 Prediction Modes */
+/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_16X16_VERT = 0, /** Intra_16x16_Vertical */
+ OMX_VC_16X16_HOR = 1, /** Intra_16x16_Horizontal */
+ OMX_VC_16X16_DC = 2, /** Intra_16x16_DC */
+ OMX_VC_16X16_PLANE = 3 /** Intra_16x16_Plane */
+} OMXVCM4P10Intra16x16PredMode;
+
+
+
+/* 6.3.1.2 Intra 4x4 Prediction Modes */
+/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_4X4_VERT = 0, /** Intra_4x4_Vertical */
+ OMX_VC_4X4_HOR = 1, /** Intra_4x4_Horizontal */
+ OMX_VC_4X4_DC = 2, /** Intra_4x4_DC */
+ OMX_VC_4X4_DIAG_DL = 3, /** Intra_4x4_Diagonal_Down_Left */
+ OMX_VC_4X4_DIAG_DR = 4, /** Intra_4x4_Diagonal_Down_Right */
+ OMX_VC_4X4_VR = 5, /** Intra_4x4_Vertical_Right */
+ OMX_VC_4X4_HD = 6, /** Intra_4x4_Horizontal_Down */
+ OMX_VC_4X4_VL = 7, /** Intra_4x4_Vertical_Left */
+ OMX_VC_4X4_HU = 8 /** Intra_4x4_Horizontal_Up */
+} OMXVCM4P10Intra4x4PredMode;
+
+
+
+/* 6.3.1.3 Chroma Prediction Modes */
+/* A data type that enumerates intra chroma prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_CHROMA_DC = 0, /** Intra_Chroma_DC */
+ OMX_VC_CHROMA_HOR = 1, /** Intra_Chroma_Horizontal */
+ OMX_VC_CHROMA_VERT = 2, /** Intra_Chroma_Vertical */
+ OMX_VC_CHROMA_PLANE = 3 /** Intra_Chroma_Plane */
+} OMXVCM4P10IntraChromaPredMode;
+
+
+
+/* 6.3.1.4 Motion Estimation Modes */
+/* A data type that enumerates H.264 motion estimation modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */
+ OMX_VC_M4P10_FULL_SEARCH = 1 /** Full motion search */
+} OMXVCM4P10MEMode;
+
+
+
+/* 6.3.1.5 Macroblock Types */
+/* A data type that enumerates H.264 macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_P_16x16 = 0, /* defined by [ISO14496-10] */
+ OMX_VC_P_16x8 = 1,
+ OMX_VC_P_8x16 = 2,
+ OMX_VC_P_8x8 = 3,
+ OMX_VC_PREF0_8x8 = 4,
+ OMX_VC_INTER_SKIP = 5,
+ OMX_VC_INTRA_4x4 = 8,
+ OMX_VC_INTRA_16x16 = 9,
+ OMX_VC_INTRA_PCM = 10
+} OMXVCM4P10MacroblockType;
+
+
+
+/* 6.3.1.6 Sub-Macroblock Types */
+/* A data type that enumerates H.264 sub-macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */
+ OMX_VC_SUB_P_8x4 = 1,
+ OMX_VC_SUB_P_4x8 = 2,
+ OMX_VC_SUB_P_4x4 = 3
+} OMXVCM4P10SubMacroblockType;
+
+
+
+/* 6.3.1.7 Variable Length Coding (VLC) Information */
+
+typedef struct {
+ OMX_U8 uTrailing_Ones; /* Trailing ones; 3 at most */
+ OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */
+ OMX_U8 uNumCoeffs; /* Total number of non-zero coefs, including trailing ones */
+ OMX_U8 uTotalZeros; /* Total number of zero coefs */
+ OMX_S16 iLevels[16]; /* Levels of non-zero coefs, in reverse zig-zag order */
+ OMX_U8 uRuns[16]; /* Runs for levels and trailing ones, in reverse zig-zag order */
+} OMXVCM4P10VLCInfo;
+
+
+
+/* 6.3.1.8 Macroblock Information */
+
+typedef struct {
+ OMX_S32 sliceId; /* slice number */
+ OMXVCM4P10MacroblockType mbType; /* MB type */
+ OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */
+ OMX_S32 qpy; /* qp for luma */
+ OMX_S32 qpc; /* qp for chroma */
+ OMX_U32 cbpy; /* CBP Luma */
+ OMX_U32 cbpc; /* CBP Chroma */
+ OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */
+ OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */
+ OMX_U8 pRefL0Idx[4]; /* reference picture indices */
+ OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */
+ OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */
+} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr;
+
+
+
+/* 6.3.1.9 Motion Estimation Parameters */
+
+typedef struct {
+ OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */
+ OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */
+ OMX_S32 halfSearchEnable;
+ OMX_S32 quarterSearchEnable;
+ OMX_S32 intraEnable4x4; /* 1=enable, 0=disable */
+ OMX_S32 searchRange16x16; /* integer pixel units */
+ OMX_S32 searchRange8x8;
+ OMX_S32 searchRange4x4;
+} OMXVCM4P10MEParams;
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntra_4x4 (6.3.3.1.1)
+ *
+ * Description:
+ * Perform Intra_4x4 prediction for luma samples. If the upper-right block is
+ * not available, then duplication work should be handled inside the function.
+ * Users need not define them outside.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 4 left pixels:
+ * p[x, y] (x = -1, y = 0..3)
+ * pSrcAbove - Pointer to the buffer of 8 above pixels:
+ * p[x,y] (x = 0..7, y =-1);
+ * must be aligned on a 4-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 4.
+ * dstStep - Step of the destination buffer; must be a multiple of 4.
+ * predMode - Intra_4x4 prediction mode.
+ * availability - Neighboring 4x4 block availability flag, refer to
+ * "Neighboring Macroblock Availability" .
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination buffer; must be aligned on a 4-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 4, or dstStep is not a multiple of 4.
+ * leftStep is not a multiple of 4.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10Intra4x4PredMode.
+ * predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER
+ * indicating p[x,-1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..3) is not available.
+ * predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_VR, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_HD, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER
+ * indicating p[x,-1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..3) is not available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 4-byte boundary.
+ *
+ * Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if
+ * they are not used by intra prediction as implied in predMode.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_4x4 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10Intra4x4PredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntra_16x16 (6.3.3.1.2)
+ *
+ * Description:
+ * Perform Intra_16x16 prediction for luma samples. If the upper-right block
+ * is not available, then duplication work should be handled inside the
+ * function. Users need not define them outside.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y =
+ * 0..15)
+ * pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15,
+ * y= -1); must be aligned on a 16-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 16.
+ * dstStep - Step of the destination buffer; must be a multiple of 16.
+ * predMode - Intra_16x16 prediction mode, please refer to section 3.4.1.
+ * availability - Neighboring 16x16 MB availability flag. Refer to
+ * section 3.4.4.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination buffer; must be aligned on a 16-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 16. or dstStep is not a multiple of 16.
+ * leftStep is not a multiple of 16.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10Intra16x16PredMode
+ * predMode is OMX_VC_16X16_VERT, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available.
+ * predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..15) is not available.
+ * predMode is OMX_VC_16X16_PLANE, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not
+ * available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 16-byte boundary.
+ *
+ * Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if
+ * they are not used by intra prediction implied in predMode.
+ * Note:
+ * OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_16x16 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10Intra16x16PredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntraChroma_8x8 (6.3.3.1.3)
+ *
+ * Description:
+ * Performs intra prediction for chroma samples.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y=
+ * 0..7).
+ * pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y
+ * = -1); must be aligned on an 8-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 8.
+ * dstStep - Step of the destination buffer; must be a multiple of 8.
+ * predMode - Intra chroma prediction mode, please refer to section 3.4.3.
+ * availability - Neighboring chroma block availability flag, please refer
+ * to "Neighboring Macroblock Availability".
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination buffer; must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If any of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 8 or dstStep is not a multiple of 8.
+ * leftStep is not a multiple of 8.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10IntraChromaPredMode.
+ * predMode is OMX_VC_CHROMA_VERT, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available.
+ * predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..7) is not available.
+ * predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not
+ * available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 8-byte boundary.
+ *
+ * Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if
+ * they are not used by intra prediction implied in predMode.
+ *
+ * Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntraChroma_8x8 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10IntraChromaPredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateLuma (6.3.3.2.1)
+ *
+ * Description:
+ * Performs quarter-pixel interpolation for inter luma MB. It is assumed that
+ * the frame is already padded when calling this function.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the source reference frame buffer
+ * srcStep - reference frame step, in bytes; must be a multiple of roi.width
+ * dstStep - destination frame step, in bytes; must be a multiple of
+ * roi.width
+ * dx - Fractional part of horizontal motion vector component in 1/4 pixel
+ * unit; valid in the range [0,3]
+ * dy - Fractional part of vertical motion vector y component in 1/4 pixel
+ * unit; valid in the range [0,3]
+ * roi - Dimension of the interpolation region; the parameters roi.width and
+ * roi.height must be equal to either 4, 8, or 16.
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination frame buffer:
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ * if roi.width==16, 16-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < roi.width.
+ * dx or dy is out of range [0,3].
+ * roi.width or roi.height is out of range {4, 8, 16}.
+ * roi.width is equal to 4, but pDst is not 4 byte aligned.
+ * roi.width is equal to 8 or 16, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_InterpolateLuma (
+ const OMX_U8 *pSrc,
+ OMX_S32 srcStep,
+ OMX_U8 *pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateChroma (6.3.3.2.2)
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB.
+ *
+ * Input Arguments:
+ *
+ * pSrc -Pointer to the source reference frame buffer
+ * srcStep -Reference frame step in bytes
+ * dstStep -Destination frame step in bytes; must be a multiple of
+ * roi.width.
+ * dx -Fractional part of horizontal motion vector component in 1/8 pixel
+ * unit; valid in the range [0,7]
+ * dy -Fractional part of vertical motion vector component in 1/8 pixel
+ * unit; valid in the range [0,7]
+ * roi -Dimension of the interpolation region; the parameters roi.width and
+ * roi.height must be equal to either 2, 4, or 8.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination frame buffer:
+ * if roi.width==2, 2-byte alignment required
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < 8.
+ * dx or dy is out of range [0-7].
+ * roi.width or roi.height is out of range {2,4,8}.
+ * roi.width is equal to 2, but pDst is not 2-byte aligned.
+ * roi.width is equal to 4, but pDst is not 4-byte aligned.
+ * roi.width is equal to 8, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_InterpolateChroma (
+ const OMX_U8 *pSrc,
+ OMX_S32 srcStep,
+ OMX_U8 *pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingLuma_VerEdge_I (6.3.3.3.1)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the luma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep -Step of the arrays; must be a multiple of 16.
+ * pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] alpha values
+ * must be in the range [0,255].
+ * pBeta -Array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] beta values
+ * must be in the range [0,18].
+ * pThresholds -Array of size 16 of Thresholds (TC0) (values for the left
+ * edge of each 4x4 block, arranged in vertical block order); must
+ * be aligned on a 4-byte boundary.. Per [ISO14496-10] values must
+ * be in the range [0,25].
+ * pBS -Array of size 16 of BS parameters (arranged in vertical block
+ * order); valid in the range [0,4] with the following
+ * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii)
+ * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS
+ * is NULL.
+ * Either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * pSrcDst is not 16-byte aligned.
+ * srcdstStep is not a multiple of 16.
+ * pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * One or more entries in the table pThresholds[0..15]is outside of the
+ * range [0,25].
+ * pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 &&
+ * pBS[i^3]!=4) for 0<=i<=3.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingLuma_HorEdge_I (6.3.3.3.2)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four horizontal edges of the luma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep - step of the arrays; must be a multiple of 16.
+ * pAlpha - array of size 2 of alpha thresholds (the first item is the alpha
+ * threshold for the external vertical edge, and the second item is
+ * for the internal horizontal edge); per [ISO14496-10] alpha
+ * values must be in the range [0,255].
+ * pBeta - array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external horizontal edge, and the second item
+ * is for the internal horizontal edge). Per [ISO14496-10] beta
+ * values must be in the range [0,18].
+ * pThresholds - array of size 16 containing thresholds, TC0, for the top
+ * horizontal edge of each 4x4 block, arranged in horizontal block
+ * order; must be aligned on a 4-byte boundary. Per [ISO14496 10]
+ * values must be in the range [0,25].
+ * pBS - array of size 16 of BS parameters (arranged in horizontal block
+ * order); valid in the range [0,4] with the following
+ * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii)
+ * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr, if one of the following cases occurs:
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * - pSrcDst is not 16-byte aligned.
+ * - srcdstStep is not a multiple of 16.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..15] is
+ * outside of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingChroma_VerEdge_I (6.3.3.3.3)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the chroma
+ * macroblock (8x8).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - Step of the arrays; must be a multiple of 8.
+ * pAlpha - Array of size 2 of alpha thresholds (the first item is alpha
+ * threshold for external vertical edge, and the second item is for
+ * internal vertical edge); per [ISO14496-10] alpha values must be
+ * in the range [0,255].
+ * pBeta - Array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] beta values
+ * must be in the range [0,18].
+ * pThresholds - Array of size 8 containing thresholds, TC0, for the left
+ * vertical edge of each 4x2 chroma block, arranged in vertical
+ * block order; must be aligned on a 4-byte boundary. Per
+ * [ISO14496-10] values must be in the range [0,25].
+ * pBS - Array of size 16 of BS parameters (values for each 2x2 chroma
+ * block, arranged in vertical block order). This parameter is the
+ * same as the pBS parameter passed into FilterDeblockLuma_VerEdge;
+ * valid in the range [0,4] with the following restrictions: i)
+ * pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and
+ * only if pBS[i^3]== 4. Must be 4 byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8.
+ * - pThresholds is not 4-byte aligned.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..7] is outside
+ * of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - pBS is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingChroma_HorEdge_I (6.3.3.3.4)
+ *
+ * Description:
+ * Performs in-place deblock filtering on the horizontal edges of the chroma
+ * macroblock (8x8).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - array step; must be a multiple of 8.
+ * pAlpha - array of size 2 containing alpha thresholds; the first element
+ * contains the threshold for the external horizontal edge, and the
+ * second element contains the threshold for internal horizontal
+ * edge. Per [ISO14496-10] alpha values must be in the range
+ * [0,255].
+ * pBeta - array of size 2 containing beta thresholds; the first element
+ * contains the threshold for the external horizontal edge, and the
+ * second element contains the threshold for the internal
+ * horizontal edge. Per [ISO14496-10] beta values must be in the
+ * range [0,18].
+ * pThresholds - array of size 8 containing thresholds, TC0, for the top
+ * horizontal edge of each 2x4 chroma block, arranged in horizontal
+ * block order; must be aligned on a 4-byte boundary. Per
+ * [ISO14496-10] values must be in the range [0,25].
+ * pBS - array of size 16 containing BS parameters for each 2x2 chroma
+ * block, arranged in horizontal block order; valid in the range
+ * [0,4] with the following restrictions: i) pBS[i]== 4 may occur
+ * only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4.
+ * Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr, if one of the following cases occurs:
+ * - any of the following pointers is NULL:
+ * pSrcDst, pAlpha, pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8.
+ * - pThresholds is not 4-byte aligned.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..7] is outside
+ * of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - pBS is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DeblockLuma_I (6.3.3.3.5)
+ *
+ * Description:
+ * This function performs in-place deblock filtering the horizontal and
+ * vertical edges of a luma macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep - image width; must be a multiple of 16.
+ * pAlpha - pointer to a 2x2 table of alpha thresholds, organized as
+ * follows: {external vertical edge, internal vertical edge,
+ * external horizontal edge, internal horizontal edge }. Per
+ * [ISO14496-10] alpha values must be in the range [0,255].
+ * pBeta - pointer to a 2x2 table of beta thresholds, organized as follows:
+ * {external vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }. Per [ISO14496-10]
+ * beta values must be in the range [0,18].
+ * pThresholds - pointer to a 16x2 table of threshold (TC0), organized as
+ * follows: {values for the left or above edge of each 4x4 block,
+ * arranged in vertical block order and then in horizontal block
+ * order}; must be aligned on a 4-byte boundary. Per [ISO14496-10]
+ * values must be in the range [0,25].
+ * pBS - pointer to a 16x2 table of BS parameters arranged in scan block
+ * order for vertical edges and then horizontal edges; valid in the
+ * range [0,4] with the following restrictions: i) pBS[i]== 4 may
+ * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]==
+ * 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds or pBS.
+ * - pSrcDst is not 16-byte aligned.
+ * - either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * - one or more entries in the table pAlpha[0..3] is outside the range
+ * [0,255].
+ * - one or more entries in the table pBeta[0..3] is outside the range
+ * [0,18].
+ * - one or more entries in the table pThresholds[0..31]is outside of
+ * the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 16.
+ *
+ */
+OMXResult omxVCM4P10_DeblockLuma_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DeblockChroma_I (6.3.3.3.6)
+ *
+ * Description:
+ * Performs in-place deblocking filtering on all edges of the chroma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - step of the arrays; must be a multiple of 8.
+ * pAlpha - pointer to a 2x2 array of alpha thresholds, organized as
+ * follows: {external vertical edge, internal vertical edge,
+ * external horizontal edge, internal horizontal edge }. Per
+ * [ISO14496-10] alpha values must be in the range [0,255].
+ * pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows:
+ * { external vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }. Per [ISO14496-10]
+ * beta values must be in the range [0,18].
+ * pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left
+ * or above edge of each 4x2 or 2x4 block, arranged in vertical
+ * block order and then in horizontal block order); must be aligned
+ * on a 4-byte boundary. Per [ISO14496-10] values must be in the
+ * range [0,25].
+ * pBS - array of size 16x2 of BS parameters (arranged in scan block order
+ * for vertical edges and then horizontal edges); valid in the
+ * range [0,4] with the following restrictions: i) pBS[i]== 4 may
+ * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]==
+ * 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - either pThresholds or pBS is not 4-byte aligned.
+ * - one or more entries in the table pAlpha[0..3] is outside the range
+ * [0,255].
+ * - one or more entries in the table pBeta[0..3] is outside the range
+ * [0,18].
+ * - one or more entries in the table pThresholds[0..15]is outside of
+ * the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (6.3.4.1.1)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for a 2x2 block of
+ * ChromaDCLevel. The decoded coefficients in the packed position-coefficient
+ * buffer are stored in reverse zig-zag order, i.e., the first buffer element
+ * contains the last non-zero postion-coefficient pair of the block. Within
+ * each position-coefficient pair, the position entry indicates the
+ * raster-scan position of the coefficient, while the coefficient entry
+ * contains the coefficient value.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - Double pointer to current byte in bit stream buffer
+ * pOffset - Pointer to current bit position in the byte pointed to by
+ * *ppBitStream; valid in the range [0,7].
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after each block is decoded
+ * pOffset - *pOffset is updated after each block is decoded
+ * pNumCoeff - Pointer to the number of nonzero coefficients in this block
+ * ppPosCoefBuf - Double pointer to destination residual
+ * coefficient-position pair buffer. Buffer position
+ * (*ppPosCoefBuf) is updated upon return, unless there are only
+ * zero coefficients in the currently decoded block. In this case
+ * the caller is expected to bypass the transform/dequantization of
+ * the empty blocks.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppBitStream or pOffset is NULL.
+ * - ppPosCoefBuf or pNumCoeff is NULL.
+ * OMX_Sts_Err - if one of the following is true:
+ * - an illegal code is encountered in the bitstream
+ *
+ */
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_S32*pOffset,
+ OMX_U8 *pNumCoeff,
+ OMX_U8 **ppPosCoefbuf
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC (6.3.4.1.2)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of
+ * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse
+ * field scan is not supported. The decoded coefficients in the packed
+ * position-coefficient buffer are stored in reverse zig-zag order, i.e., the
+ * first buffer element contains the last non-zero postion-coefficient pair of
+ * the block. Within each position-coefficient pair, the position entry
+ * indicates the raster-scan position of the coefficient, while the
+ * coefficient entry contains the coefficient value.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream -Double pointer to current byte in bit stream buffer
+ * pOffset - Pointer to current bit position in the byte pointed to by
+ * *ppBitStream; valid in the range [0,7].
+ * sMaxNumCoeff - Maximum the number of non-zero coefficients in current
+ * block
+ * sVLCSelect - VLC table selector, obtained from the number of non-zero
+ * coefficients contained in the above and left 4x4 blocks. It is
+ * equivalent to the variable nC described in H.264 standard table
+ * 9 5, except its value can t be less than zero.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after each block is decoded.
+ * Buffer position (*ppPosCoefBuf) is updated upon return, unless
+ * there are only zero coefficients in the currently decoded block.
+ * In this case the caller is expected to bypass the
+ * transform/dequantization of the empty blocks.
+ * pOffset - *pOffset is updated after each block is decoded
+ * pNumCoeff - Pointer to the number of nonzero coefficients in this block
+ * ppPosCoefBuf - Double pointer to destination residual
+ * coefficient-position pair buffer
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppBitStream or pOffset is NULL.
+ * - ppPosCoefBuf or pNumCoeff is NULL.
+ * - sMaxNumCoeff is not equal to either 15 or 16.
+ * - sVLCSelect is less than 0.
+ *
+ * OMX_Sts_Err - if one of the following is true:
+ * - an illegal code is encountered in the bitstream
+ *
+ */
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_S32 *pOffset,
+ OMX_U8 *pNumCoeff,
+ OMX_U8 **ppPosCoefbuf,
+ OMX_INT sVLCSelect,
+ OMX_INT sMaxNumCoeff
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformDequantLumaDCFromPair (6.3.4.2.1)
+ *
+ * Description:
+ * Reconstructs the 4x4 LumaDC block from the coefficient-position pair
+ * buffer, performs integer inverse, and dequantization for 4x4 LumaDC
+ * coefficients, and updates the pair buffer pointer to the next non-empty
+ * block.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * QP - Quantization parameter QpY
+ *
+ * Output Arguments:
+ *
+ * ppSrc - *ppSrc is updated to the start of next non empty block
+ * pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must
+ * be aligned on a 8-byte boundary.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppSrc or pDst is NULL.
+ * - pDst is not 8 byte aligned.
+ * - QP is not in the range of [0-51].
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantLumaDCFromPair (
+ const OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformDequantChromaDCFromPair (6.3.4.2.2)
+ *
+ * Description:
+ * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer,
+ * perform integer inverse transformation, and dequantization for 2x2 chroma
+ * DC coefficients, and update the pair buffer pointer to next non-empty
+ * block.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * QP - Quantization parameter QpC
+ *
+ * Output Arguments:
+ *
+ * ppSrc - *ppSrc is updated to the start of next non empty block
+ * pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer;
+ * must be aligned on a 4-byte boundary.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppSrc or pDst is NULL.
+ * - pDst is not 4-byte aligned.
+ * - QP is not in the range of [0-51].
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantChromaDCFromPair (
+ const OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd (6.3.4.2.3)
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantization and integer inverse transformation for 4x4 block of
+ * residuals with previous intra prediction or motion compensation data, and
+ * update the pair buffer pointer to next non-empty block. If pDC == NULL,
+ * there re 16 non-zero AC coefficients at most in the packed buffer starting
+ * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC
+ * coefficients at most in the packet buffer starting from 4x4 block position
+ * 1.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte
+ * boundary
+ * predStep - Predicted frame step size in bytes; must be a multiple of 4
+ * dstStep - Destination frame step in bytes; must be a multiple of 4
+ * pDC - Pointer to the DC coefficient of this block, NULL if it doesn't
+ * exist
+ * QP - QP Quantization parameter. It should be QpC in chroma 4x4 block
+ * decoding, otherwise it should be QpY.
+ * AC - Flag indicating if at least one non-zero AC coefficient exists
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the reconstructed 4x4 block data; must be aligned on a
+ * 4-byte boundary
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pPred or pDst is NULL.
+ * - pPred or pDst is not 4-byte aligned.
+ * - predStep or dstStep is not a multiple of 4.
+ * - AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL.
+ * - AC ==0 && pDC ==NULL.
+ *
+ */
+OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd (
+ const OMX_U8 **ppSrc,
+ const OMX_U8 *pPred,
+ const OMX_S16 *pDC,
+ OMX_U8 *pDst,
+ OMX_INT predStep,
+ OMX_INT dstStep,
+ OMX_INT QP,
+ OMX_INT AC
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MEGetBufSize (6.3.5.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification
+ * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer
+ * and MotionEstimationMB.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P10MEMode
+ * pMEParams -motion estimation parameters
+ *
+ * Output Arguments:
+ *
+ * pSize - pointer to the number of bytes required for the motion
+ * estimation specification structure
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pMEParams or pSize is NULL.
+ * - an invalid MEMode is specified.
+ *
+ */
+OMXResult omxVCM4P10_MEGetBufSize (
+ OMXVCM4P10MEMode MEmode,
+ const OMXVCM4P10MEParams *pMEParams,
+ OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MEInit (6.3.5.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the
+ * omxVCM4P10 motion estimation functions: BlockMatch_Integer and
+ * MotionEstimationMB. Memory for the specification structure *pMESpec must be
+ * allocated prior to calling the function, and should be aligned on a 4-byte
+ * boundary. The number of bytes required for the specification structure can
+ * be determined using the function omxVCM4P10_MEGetBufSize. Following
+ * initialization by this function, the vendor-specific structure *pMESpec
+ * should contain an implementation-specific representation of all motion
+ * estimation parameters received via the structure pMEParams, for example
+ * searchRange16x16, searchRange8x8, etc.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P10MEMode
+ * pMEParams - motion estimation parameters
+ * pMESpec - pointer to the uninitialized ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pMESpec - pointer to the initialized ME specification structure
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pMEParams or pSize is NULL.
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for one of the search ranges
+ * (e.g., pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.)
+ * - either in isolation or in combination, one or more of the enables or
+ * search ranges in the structure *pMEParams were configured such
+ * that the requested behavior fails to comply with [ISO14496-10].
+ *
+ */
+OMXResult omxVCM4P10_MEInit (
+ OMXVCM4P10MEMode MEmode,
+ const OMXVCM4P10MEParams *pMEParams,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Integer (6.3.5.2.1)
+ *
+ * Description:
+ * Performs integer block match. Returns best MV and associated cost.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the top-left corner of the current block:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane, expressed in terms
+ * of integer pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane, expressed in terms
+ * of integer pixels
+ * pRefRect - pointer to the valid reference rectangle inside the reference
+ * picture plane
+ * nCurrPointPos - position of the current block in the current plane
+ * iBlockWidth - Width of the current block, expressed in terms of integer
+ * pixels; must be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block, expressed in terms of
+ * integer pixels; must be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor; used to compute motion cost
+ * pMVPred - Predicted MV; used to compute motion cost, expressed in terms
+ * of 1/4-pel units
+ * pMVCandidate - Candidate MV; used to initialize the motion search,
+ * expressed in terms of integer pixels
+ * pMESpec - pointer to the ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pDstBestMV - Best MV resulting from integer search, expressed in terms
+ * of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - any of the following pointers are NULL:
+ * pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec.
+ * - Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Integer (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ const OMXVCMotionVector *pMVCandidate,
+ OMXVCMotionVector *pBestMV,
+ OMX_S32 *pBestCost,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Half (6.3.5.2.2)
+ *
+ * Description:
+ * Performs a half-pel block match using results from a prior integer search.
+ * Returns the best MV and associated cost. This function estimates the
+ * half-pixel motion vector by interpolating the integer resolution motion
+ * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial
+ * integer MV is generated externally. The function
+ * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the current position in original picture plane:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane in terms of full
+ * pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane in terms of full
+ * pixels
+ * iBlockWidth - Width of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor, used to compute motion cost
+ * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to
+ * compute motion cost
+ * pSrcDstBestMV - The best MV resulting from a prior integer search,
+ * represented in terms of 1/4-pel units
+ *
+ * Output Arguments:
+ *
+ * pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in
+ * terms of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - any of the following pointers is NULL: pSrcOrgY, pSrcRefY,
+ * pSrcDstBestMV, pMVPred, pBestCost
+ * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Half (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ OMXVCMotionVector *pSrcDstBestMV,
+ OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Quarter (6.3.5.2.3)
+ *
+ * Description:
+ * Performs a quarter-pel block match using results from a prior half-pel
+ * search. Returns the best MV and associated cost. This function estimates
+ * the quarter-pixel motion vector by interpolating the half-pel resolution
+ * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the
+ * initial half-pel MV is generated externally. The function
+ * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the current position in original picture plane:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane in terms of full
+ * pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane in terms of full
+ * pixels
+ * iBlockWidth - Width of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor, used to compute motion cost
+ * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to
+ * compute motion cost
+ * pSrcDstBestMV - The best MV resulting from a prior half-pel search,
+ * represented in terms of 1/4 pel units
+ *
+ * Output Arguments:
+ *
+ * pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed
+ * in terms of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL:
+ * pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost
+ * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Quarter (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ OMXVCMotionVector *pSrcDstBestMV,
+ OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MotionEstimationMB (6.3.5.3.1)
+ *
+ * Description:
+ * Performs MB-level motion estimation and selects best motion estimation
+ * strategy from the set of modes supported in baseline profile [ISO14496-10].
+ *
+ * Input Arguments:
+ *
+ * pSrcCurrBuf - Pointer to the current position in original picture plane;
+ * 16-byte alignment required
+ * pSrcRefBufList - Pointer to an array with 16 entries. Each entry points
+ * to the top-left corner of the co-located MB in a reference
+ * picture. The array is filled from low-to-high with valid
+ * reference frame pointers; the unused high entries should be set
+ * to NULL. Ordering of the reference frames should follow
+ * [ISO14496-10] subclause 8.2.4 Decoding Process for Reference
+ * Picture Lists. The entries must be 16-byte aligned.
+ * pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the
+ * reconstructed picture; must be 16-byte aligned.
+ * SrcCurrStep - Width of the original picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * SrcRefStep - Width of the reference picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * SrcRecStep - Width of the reconstructed picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * pRefRect - Pointer to the valid reference rectangle; relative to the
+ * image origin.
+ * pCurrPointPos - Position of the current macroblock in the current plane.
+ * Lambda - Lagrange factor for computing the cost function
+ * pMESpec - Pointer to the motion estimation specification structure; must
+ * have been allocated and initialized prior to calling this
+ * function.
+ * pMBInter - Array, of dimension four, containing pointers to information
+ * associated with four adjacent type INTER MBs (Left, Top,
+ * Top-Left, Top-Right). Any pointer in the array may be set equal
+ * to NULL if the corresponding MB doesn t exist or is not of type
+ * INTER.
+ * - pMBInter[0] - Pointer to left MB information
+ * - pMBInter[1] - Pointer to top MB information
+ * - pMBInter[2] - Pointer to top-left MB information
+ * - pMBInter[3] - Pointer to top-right MB information
+ * pMBIntra - Array, of dimension four, containing pointers to information
+ * associated with four adjacent type INTRA MBs (Left, Top,
+ * Top-Left, Top-Right). Any pointer in the array may be set equal
+ * to NULL if the corresponding MB doesn t exist or is not of type
+ * INTRA.
+ * - pMBIntra[0] - Pointer to left MB information
+ * - pMBIntra[1] - Pointer to top MB information
+ * - pMBIntra[2] - Pointer to top-left MB information
+ * - pMBIntra[3] - Pointer to top-right MB information
+ * pSrcDstMBCurr - Pointer to information structure for the current MB.
+ * The following entries should be set prior to calling the
+ * function: sliceID - the number of the slice the to which the
+ * current MB belongs.
+ *
+ * Output Arguments:
+ *
+ * pDstCost - Pointer to the minimum motion cost for the current MB.
+ * pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma
+ * 4x4 blocks in each MB. The block SADs are in scan order for
+ * each MB. For implementations that cannot compute the SAD values
+ * individually, the maximum possible value (0xffff) is returned
+ * for each of the 16 block SAD entries.
+ * pSrcDstMBCurr - Pointer to updated information structure for the current
+ * MB after MB-level motion estimation has been completed. The
+ * following fields are updated by the ME function. The following
+ * parameter set quantifies the MB-level ME search results:
+ * - MbType
+ * - subMBType[4]
+ * - pMV0[4][4]
+ * - pMVPred[4][4]
+ * - pRefL0Idx[4]
+ * - Intra16x16PredMode
+ * - pIntra4x4PredMode[4][4]
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL: pSrcCurrBuf,
+ * pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec,
+ * pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0]
+ * - SrcRefStep, SrcRecStep are not multiples of 16
+ * - iBlockWidth or iBlockHeight are values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_MotionEstimationMB (
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8 *pSrcRefBufList[15],
+ OMX_S32 SrcRefStep,
+ const OMX_U8 *pSrcRecBuf,
+ OMX_S32 SrcRecStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U32 Lambda,
+ void *pMESpec,
+ const OMXVCM4P10MBInfoPtr *pMBInter,
+ const OMXVCM4P10MBInfoPtr *pMBIntra,
+ OMXVCM4P10MBInfoPtr pSrcDstMBCurr,
+ OMX_INT *pDstCost,
+ OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SAD_4x (6.3.5.4.1)
+ *
+ * Description:
+ * This function calculates the SAD for 4x8 and 4x4 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg -Pointer to the original block; must be aligned on a 4-byte
+ * boundary.
+ * iStepOrg -Step of the original block buffer; must be a multiple of 4.
+ * pSrcRef -Pointer to the reference block
+ * iStepRef -Step of the reference block buffer
+ * iHeight -Height of the block; must be equal to either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL:
+ * pSrcOrg, pSrcRef, or pDstSAD
+ * - iHeight is not equal to either 4 or 8.
+ * - iStepOrg is not a multiple of 4
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SAD_4x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_4x (6.3.5.4.2)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks. Rounding
+ * is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on a 4-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 4.
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal to either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 4 or 8.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 4
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_4x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_8x (6.3.5.4.3)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks.
+ * Rounding is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on an 8-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 8.
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal either 4, 8, or 16.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 4, 8, or 16.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 8
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_8x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_16x (6.3.5.4.4)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks.
+ * Rounding is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on a 16-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 16
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal to either 8 or 16
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 8 or 16.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 16
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_16x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SATD_4x4 (6.3.5.4.5)
+ *
+ * Description:
+ * This function calculates the sum of absolute transform differences (SATD)
+ * for a 4x4 block by applying a Hadamard transform to the difference block
+ * and then calculating the sum of absolute coefficient values.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 4-byte
+ * boundary
+ * iStepOrg - Step of the original block buffer; must be a multiple of 4
+ * pSrcRef - Pointer to the reference block; must be aligned on a 4-byte
+ * boundary
+ * iStepRef - Step of the reference block buffer; must be a multiple of 4
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - pointer to the resulting SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg
+ * - pSrcRef is not aligned on a 4-byte boundary
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 4
+ * - iStepRef <= 0 or iStepRef is not a multiple of 4
+ *
+ */
+OMXResult omxVCM4P10_SATD_4x4 (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_U32 *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateHalfHor_Luma (6.3.5.5.1)
+ *
+ * Description:
+ * This function performs interpolation for two horizontal 1/2-pel positions
+ * (-1/2,0) and (1/2, 0) - around a full-pel position.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the top-left corner of the block used to interpolate in
+ * the reconstruction frame plane.
+ * iSrcStep - Step of the source buffer.
+ * iDstStep - Step of the destination(interpolation) buffer; must be a
+ * multiple of iWidth.
+ * iWidth - Width of the current block; must be equal to either 4, 8, or 16
+ * iHeight - Height of the current block; must be equal to 4, 8, or 16
+ *
+ * Output Arguments:
+ *
+ * pDstLeft -Pointer to the interpolation buffer of the left -pel position
+ * (-1/2, 0)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ * pDstRight -Pointer to the interpolation buffer of the right -pel
+ * position (1/2, 0)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrc, pDstLeft, or pDstRight
+ * - iWidth or iHeight have values other than 4, 8, or 16
+ * - iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary
+ * - iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary
+ * - iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary
+ * - any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfHor_Luma (
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDstLeft,
+ OMX_U8 *pDstRight,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateHalfVer_Luma (6.3.5.5.2)
+ *
+ * Description:
+ * This function performs interpolation for two vertical 1/2-pel positions -
+ * (0, -1/2) and (0, 1/2) - around a full-pel position.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to top-left corner of block used to interpolate in the
+ * reconstructed frame plane
+ * iSrcStep - Step of the source buffer.
+ * iDstStep - Step of the destination (interpolation) buffer; must be a
+ * multiple of iWidth.
+ * iWidth - Width of the current block; must be equal to either 4, 8, or 16
+ * iHeight - Height of the current block; must be equal to either 4, 8, or 16
+ *
+ * Output Arguments:
+ *
+ * pDstUp -Pointer to the interpolation buffer of the -pel position above
+ * the current full-pel position (0, -1/2)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ * pDstDown -Pointer to the interpolation buffer of the -pel position below
+ * the current full-pel position (0, 1/2)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrc, pDstUp, or pDstDown
+ * - iWidth or iHeight have values other than 4, 8, or 16
+ * - iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary
+ * - iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary
+ * - iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfVer_Luma (
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDstUp,
+ OMX_U8 *pDstDown,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_Average_4x (6.3.5.5.3)
+ *
+ * Description:
+ * This function calculates the average of two 4x4, 4x8 blocks. The result
+ * is rounded according to (a+b+1)/2.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0; must be a multiple of 4.
+ * iPredStep1 - Step of reference block 1; must be a multiple of 4.
+ * iDstStep - Step of the destination buffer; must be a multiple of 4.
+ * iHeight - Height of the blocks; must be either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 4-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pPred0, pPred1, or pDstPred
+ * - pDstPred is not aligned on a 4-byte boundary
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 4
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 4
+ * - iDstStep <= 0 or iDstStep is not a multiple of 4
+ * - iHeight is not equal to either 4 or 8
+ *
+ */
+OMXResult omxVCM4P10_Average_4x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformQuant_ChromaDC (6.3.5.6.1)
+ *
+ * Description:
+ * This function performs 2x2 Hadamard transform of chroma DC coefficients
+ * and then quantizes the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the 2x2 array of chroma DC coefficients. 8-byte
+ * alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ * bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - Pointer to transformed and quantized coefficients. 8-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcDst
+ * - pSrcDst is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_ChromaDC (
+ OMX_S16 *pSrcDst,
+ OMX_U32 iQP,
+ OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformQuant_LumaDC (6.3.5.6.2)
+ *
+ * Description:
+ * This function performs a 4x4 Hadamard transform of luma DC coefficients
+ * and then quantizes the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the 4x4 array of luma DC coefficients. 16-byte
+ * alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - Pointer to transformed and quantized coefficients. 16-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrcDst
+ * - pSrcDst is not aligned on an 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_LumaDC (
+ OMX_S16 *pSrcDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformDequant_LumaDC (6.3.5.6.3)
+ *
+ * Description:
+ * This function performs inverse 4x4 Hadamard transform and then dequantizes
+ * the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and
+ * quantized coefficients. 16 byte alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to inverse-transformed and dequantized coefficients.
+ * 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrc
+ * - pSrc or pDst is not aligned on a 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_LumaDC (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformDequant_ChromaDC (6.3.5.6.4)
+ *
+ * Description:
+ * This function performs inverse 2x2 Hadamard transform and then dequantizes
+ * the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and
+ * quantized coefficients. 8 byte alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to inverse-transformed and dequantized coefficients.
+ * 8-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrc
+ * - pSrc or pDst is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_ChromaDC (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformResidualAndAdd (6.3.5.7.1)
+ *
+ * Description:
+ * This function performs inverse an 4x4 integer transformation to produce
+ * the difference signal and then adds the difference to the prediction to get
+ * the reconstructed signal.
+ *
+ * Input Arguments:
+ *
+ * pSrcPred - Pointer to prediction signal. 4-byte alignment required.
+ * pDequantCoeff - Pointer to the transformed coefficients. 8-byte
+ * alignment required.
+ * iSrcPredStep - Step of the prediction buffer; must be a multiple of 4.
+ * iDstReconStep - Step of the destination reconstruction buffer; must be a
+ * multiple of 4.
+ * bAC - Indicate whether there is AC coefficients in the coefficients
+ * matrix.
+ *
+ * Output Arguments:
+ *
+ * pDstRecon -Pointer to the destination reconstruction buffer. 4-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcPred, pDequantCoeff, pDstRecon
+ * - pSrcPred is not aligned on a 4-byte boundary
+ * - iSrcPredStep or iDstReconStep is not a multiple of 4.
+ * - pDequantCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformResidualAndAdd (
+ const OMX_U8 *pSrcPred,
+ const OMX_S16 *pDequantCoeff,
+ OMX_U8 *pDstRecon,
+ OMX_U32 iSrcPredStep,
+ OMX_U32 iDstReconStep,
+ OMX_U8 bAC
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SubAndTransformQDQResidual (6.3.5.8.1)
+ *
+ * Description:
+ * This function subtracts the prediction signal from the original signal to
+ * produce the difference signal and then performs a 4x4 integer transform and
+ * quantization. The quantized transformed coefficients are stored as
+ * pDstQuantCoeff. This function can also output dequantized coefficients or
+ * unquantized DC coefficients optionally by setting the pointers
+ * pDstDeQuantCoeff, pDCCoeff.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to original signal. 4-byte alignment required.
+ * pSrcPred - Pointer to prediction signal. 4-byte alignment required.
+ * iSrcOrgStep - Step of the original signal buffer; must be a multiple of
+ * 4.
+ * iSrcPredStep - Step of the prediction signal buffer; must be a multiple
+ * of 4.
+ * pNumCoeff -Number of non-zero coefficients after quantization. If this
+ * parameter is not required, it is set to NULL.
+ * nThreshSAD - Zero-block early detection threshold. If this parameter is
+ * not required, it is set to 0.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ * bIntra - Indicates whether this is an INTRA block, either 1-INTRA or
+ * 0-INTER
+ *
+ * Output Arguments:
+ *
+ * pDstQuantCoeff - Pointer to the quantized transformed coefficients.
+ * 8-byte alignment required.
+ * pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients
+ * if this parameter is not equal to NULL. 8-byte alignment
+ * required.
+ * pDCCoeff - Pointer to the unquantized DC coefficient if this parameter
+ * is not equal to NULL.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff,
+ * pDstDeQuantCoeff, pDCCoeff
+ * - pSrcOrg is not aligned on a 4-byte boundary
+ * - pSrcPred is not aligned on a 4-byte boundary
+ * - iSrcOrgStep is not a multiple of 4
+ * - iSrcPredStep is not a multiple of 4
+ * - pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_SubAndTransformQDQResidual (
+ const OMX_U8 *pSrcOrg,
+ const OMX_U8 *pSrcPred,
+ OMX_U32 iSrcOrgStep,
+ OMX_U32 iSrcPredStep,
+ OMX_S16 *pDstQuantCoeff,
+ OMX_S16 *pDstDeQuantCoeff,
+ OMX_S16 *pDCCoeff,
+ OMX_S8 *pNumCoeff,
+ OMX_U32 nThreshSAD,
+ OMX_U32 iQP,
+ OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function: omxVCM4P10_GetVLCInfo (6.3.5.9.1)
+ *
+ * Description:
+ * This function extracts run-length encoding (RLE) information from the
+ * coefficient matrix. The results are returned in an OMXVCM4P10VLCInfo
+ * structure.
+ *
+ * Input Arguments:
+ *
+ * pSrcCoeff - pointer to the transform coefficient matrix. 8-byte
+ * alignment required.
+ * pScanMatrix - pointer to the scan order definition matrix. For a luma
+ * block the scan matrix should follow [ISO14496-10] section 8.5.4,
+ * and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13,
+ * 10, 7, 11, 14, 15. For a chroma block, the scan matrix should
+ * contain the values 0, 1, 2, 3.
+ * bAC - indicates presence of a DC coefficient; 0 = DC coefficient
+ * present, 1= DC coefficient absent.
+ * MaxNumCoef - specifies the number of coefficients contained in the
+ * transform coefficient matrix, pSrcCoeff. The value should be 16
+ * for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The
+ * value should be 4 for blocks of type CHROMADC.
+ *
+ * Output Arguments:
+ *
+ * pDstVLCInfo - pointer to structure that stores information for
+ * run-length coding.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcCoeff, pScanMatrix, pDstVLCInfo
+ * - pSrcCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_GetVLCInfo (
+ const OMX_S16 *pSrcCoeff,
+ const OMX_U8 *pScanMatrix,
+ OMX_U8 bAC,
+ OMX_U32 MaxNumCoef,
+ OMXVCM4P10VLCInfo*pDstVLCInfo
+);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /** end of #define _OMXVC_H_ */
+
+/** EOF */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC_s.h
new file mode 100755
index 0000000..89f3040
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/api/omxVC_s.h
@@ -0,0 +1,129 @@
+;/******************************************************************************
+;// Copyright (c) 1999-2005 The Khronos Group Inc. All Rights Reserved
+;//
+;//
+;//
+;//
+;//
+;//
+;//
+;//
+;******************************************************************************/
+
+;/** =============== Structure Definition for Sample Generation ============== */
+;/** transparent status */
+
+;enum {
+OMX_VIDEO_TRANSPARENT EQU 0; /** Wholly transparent */
+OMX_VIDEO_PARTIAL EQU 1; /** Partially transparent */
+OMX_VIDEO_OPAQUE EQU 2; /** Opaque */
+;}
+
+;/** direction */
+;enum {
+OMX_VIDEO_NONE EQU 0;
+OMX_VIDEO_HORIZONTAL EQU 1;
+OMX_VIDEO_VERTICAL EQU 2;
+;}
+
+;/** bilinear interpolation type */
+;enum {
+OMX_VIDEO_INTEGER_PIXEL EQU 0; /** case a */
+OMX_VIDEO_HALF_PIXEL_X EQU 1; /** case b */
+OMX_VIDEO_HALF_PIXEL_Y EQU 2; /** case c */
+OMX_VIDEO_HALF_PIXEL_XY EQU 3; /** case d */
+;}
+
+;enum {
+OMX_UPPER EQU 1; /** set if the above macroblock is available */
+OMX_LEFT EQU 2; /** set if the left macroblock is available */
+OMX_CENTER EQU 4;
+OMX_RIGHT EQU 8;
+OMX_LOWER EQU 16;
+OMX_UPPER_LEFT EQU 32; /** set if the above-left macroblock is available */
+OMX_UPPER_RIGHT EQU 64; /** set if the above-right macroblock is available */
+OMX_LOWER_LEFT EQU 128;
+OMX_LOWER_RIGHT EQU 256
+;}
+
+;enum {
+OMX_VIDEO_LUMINANCE EQU 0; /** Luminance component */
+OMX_VIDEO_CHROMINANCE EQU 1; /** chrominance component */
+OMX_VIDEO_ALPHA EQU 2; /** Alpha component */
+;}
+
+;enum {
+OMX_VIDEO_INTER EQU 0; /** P picture or P-VOP */
+OMX_VIDEO_INTER_Q EQU 1; /** P picture or P-VOP */
+OMX_VIDEO_INTER4V EQU 2; /** P picture or P-VOP */
+OMX_VIDEO_INTRA EQU 3; /** I and P picture; I- and P-VOP */
+OMX_VIDEO_INTRA_Q EQU 4; /** I and P picture; I- and P-VOP */
+OMX_VIDEO_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/
+OMX_VIDEO_DIRECT EQU 6; /** B picture or B-VOP (MPEG-4 only) */
+OMX_VIDEO_INTERPOLATE EQU 7; /** B picture or B-VOP */
+OMX_VIDEO_BACKWARD EQU 8; /** B picture or B-VOP */
+OMX_VIDEO_FORWARD EQU 9; /** B picture or B-VOP */
+OMX_VIDEO_NOTCODED EQU 10; /** B picture or B-VOP */
+;}
+
+;enum {
+OMX_16X16_VERT EQU 0; /** Intra_16x16_Vertical (prediction mode) */
+OMX_16X16_HOR EQU 1; /** Intra_16x16_Horizontal (prediction mode) */
+OMX_16X16_DC EQU 2; /** Intra_16x16_DC (prediction mode) */
+OMX_16X16_PLANE EQU 3; /** Intra_16x16_Plane (prediction mode) */
+;}
+
+;enum {
+OMX_4x4_VERT EQU 0; /** Intra_4x4_Vertical (prediction mode) */
+OMX_4x4_HOR EQU 1; /** Intra_4x4_Horizontal (prediction mode) */
+OMX_4x4_DC EQU 2; /** Intra_4x4_DC (prediction mode) */
+OMX_4x4_DIAG_DL EQU 3; /** Intra_4x4_Diagonal_Down_Left (prediction mode) */
+OMX_4x4_DIAG_DR EQU 4; /** Intra_4x4_Diagonal_Down_Right (prediction mode) */
+OMX_4x4_VR EQU 5; /** Intra_4x4_Vertical_Right (prediction mode) */
+OMX_4x4_HD EQU 6; /** Intra_4x4_Horizontal_Down (prediction mode) */
+OMX_4x4_VL EQU 7; /** Intra_4x4_Vertical_Left (prediction mode) */
+OMX_4x4_HU EQU 8; /** Intra_4x4_Horizontal_Up (prediction mode) */
+;}
+
+;enum {
+OMX_CHROMA_DC EQU 0; /** Intra_Chroma_DC (prediction mode) */
+OMX_CHROMA_HOR EQU 1; /** Intra_Chroma_Horizontal (prediction mode) */
+OMX_CHROMA_VERT EQU 2; /** Intra_Chroma_Vertical (prediction mode) */
+OMX_CHROMA_PLANE EQU 3; /** Intra_Chroma_Plane (prediction mode) */
+;}
+
+;typedef struct {
+x EQU 0;
+y EQU 4;
+;}OMXCoordinate;
+
+;typedef struct {
+dx EQU 0;
+dy EQU 2;
+;}OMXMotionVector;
+
+;typedef struct {
+xx EQU 0;
+yy EQU 4;
+width EQU 8;
+height EQU 12;
+;}OMXiRect;
+
+;typedef enum {
+OMX_VC_INTER EQU 0; /** P picture or P-VOP */
+OMX_VC_INTER_Q EQU 1; /** P picture or P-VOP */
+OMX_VC_INTER4V EQU 2; /** P picture or P-VOP */
+OMX_VC_INTRA EQU 3; /** I and P picture, I- and P-VOP */
+OMX_VC_INTRA_Q EQU 4; /** I and P picture, I- and P-VOP */
+OMX_VC_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/
+;} OMXVCM4P2MacroblockType;
+
+;enum {
+OMX_VC_NONE EQU 0
+OMX_VC_HORIZONTAL EQU 1
+OMX_VC_VERTICAL EQU 2
+;};
+
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy16x16_s.s
new file mode 100755
index 0000000..296d59d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy16x16_s.s
@@ -0,0 +1,95 @@
+ ;/**
+ ; * Function: omxVCCOMM_Copy16x16
+ ; *
+ ; * Description:
+ ; * Copies the reference 16x16 block to the current block.
+ ; * Parameters:
+ ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 16-byte boundary.
+ ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes;
+ ; * must be a multiple of 16 and must be larger than or equal to 16.
+ ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary.
+ ; * Return Value:
+ ; * OMX_Sts_NoErr - no error
+ ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
+ ; * - one or more of the following pointers is NULL: pSrc, pDst
+ ; * - one or more of the following pointers is not aligned on an 16-byte boundary: pSrc, pDst
+ ; * - step <16 or step is not a multiple of 16.
+ ; */
+
+ INCLUDE omxtypes_s.h
+
+
+ M_VARIANTS CortexA8
+
+ IF CortexA8
+
+
+ ;//Input Arguments
+pSrc RN 0
+pDst RN 1
+step RN 2
+
+;//Local Variables
+Return RN 0
+;// Neon Registers
+
+X0 DN D0.S8
+X1 DN D1.S8
+X2 DN D2.S8
+X3 DN D3.S8
+X4 DN D4.S8
+X5 DN D5.S8
+X6 DN D6.S8
+X7 DN D7.S8
+
+ M_START omxVCCOMM_Copy16x16
+
+
+ VLD1 {X0,X1},[pSrc@128],step ;// Load 16 bytes from 16 byte aligned pSrc and pSrc=pSrc + step after loading
+ VLD1 {X2,X3},[pSrc@128],step
+ VLD1 {X4,X5},[pSrc@128],step
+ VLD1 {X6,X7},[pSrc@128],step
+
+ VST1 {X0,X1,X2,X3},[pDst@128]! ;// Store 32 bytes to 16 byte aligned pDst
+ VST1 {X4,X5,X6,X7},[pDst@128]!
+
+
+ VLD1 {X0,X1},[pSrc@128],step
+ VLD1 {X2,X3},[pSrc@128],step
+ VLD1 {X4,X5},[pSrc@128],step
+ VLD1 {X6,X7},[pSrc@128],step
+
+ VST1 {X0,X1,X2,X3},[pDst@128]!
+ VST1 {X4,X5,X6,X7},[pDst@128]!
+
+
+ VLD1 {X0,X1},[pSrc@128],step
+ VLD1 {X2,X3},[pSrc@128],step
+ VLD1 {X4,X5},[pSrc@128],step
+ VLD1 {X6,X7},[pSrc@128],step
+
+ VST1 {X0,X1,X2,X3},[pDst@128]!
+ VST1 {X4,X5,X6,X7},[pDst@128]!
+
+
+ VLD1 {X0,X1},[pSrc@128],step
+ VLD1 {X2,X3},[pSrc@128],step
+ VLD1 {X4,X5},[pSrc@128],step
+ VLD1 {X6,X7},[pSrc@128],step
+
+ VST1 {X0,X1,X2,X3},[pDst@128]!
+ VST1 {X4,X5,X6,X7},[pDst@128]!
+
+
+ MOV Return,#OMX_Sts_NoErr
+
+
+
+ M_END
+ ENDIF
+
+
+
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy8x8_s.s
new file mode 100755
index 0000000..db9e5ef
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_Copy8x8_s.s
@@ -0,0 +1,70 @@
+ ;/**
+ ; * Function: omxVCCOMM_Copy8x8
+ ; *
+ ; * Description:
+ ; * Copies the reference 8x8 block to the current block.
+ ; * Parameters:
+ ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 8-byte boundary.
+ ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes;
+ ; * must be a multiple of 8 and must be larger than or equal to 8.
+ ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary.
+ ; * Return Value:
+ ; * OMX_Sts_NoErr - no error
+ ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
+ ; * - one or more of the following pointers is NULL: pSrc, pDst
+ ; * - one or more of the following pointers is not aligned on an 8-byte boundary: pSrc, pDst
+ ; * - step <8 or step is not a multiple of 8.
+ ; */
+
+ INCLUDE omxtypes_s.h
+
+
+ M_VARIANTS CortexA8
+
+ IF CortexA8
+
+
+ ;//Input Arguments
+pSrc RN 0
+pDst RN 1
+step RN 2
+
+;//Local Variables
+Count RN 3
+Return RN 0
+;// Neon Registers
+
+X0 DN D0.S8
+X1 DN D1.S8
+X2 DN D2.S8
+X3 DN D3.S8
+ M_START omxVCCOMM_Copy8x8
+
+
+
+ VLD1 {X0},[pSrc],step ;// Load 8 bytes from 8 byte aligned pSrc, pSrc=pSrc+step after load
+ VLD1 {X1},[pSrc],step
+ VLD1 {X2},[pSrc],step
+ VLD1 {X3},[pSrc],step
+
+ VST1 {X0,X1},[pDst]! ;// Store 16 bytes to 8 byte aligned pDst
+ VST1 {X2,X3},[pDst]!
+
+ VLD1 {X0},[pSrc],step
+ VLD1 {X1},[pSrc],step
+ VLD1 {X2},[pSrc],step
+ VLD1 {X3},[pSrc],step
+
+ VST1 {X0,X1},[pDst]!
+ VST1 {X2,X3},[pDst]!
+
+ MOV Return,#OMX_Sts_NoErr
+
+ M_END
+ ENDIF
+
+
+
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
new file mode 100755
index 0000000..5c5b7d8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
@@ -0,0 +1,236 @@
+;//
+;//
+;// File Name: omxVCCOMM_ExpandFrame_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;// This function will Expand Frame boundary pixels into Plane
+;//
+;//
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+;// Import symbols required from other files
+;// (For example tables)
+
+
+;// Set debugging level
+DEBUG_ON SETL {FALSE}
+
+
+
+ IF CortexA8
+
+ M_START omxVCCOMM_ExpandFrame_I,r11
+
+;//Input registers
+
+pSrcDstPlane RN 0
+iFrameWidth RN 1
+iFrameHeight RN 2
+iExpandPels RN 3
+iPlaneStep RN 4
+pTop RN 5
+pBot RN 6
+pDstTop RN 7
+pDstBot RN 8
+pLeft RN 5
+pRight RN 6
+pDstLeft RN 9
+pDstRight RN 10
+Offset RN 11
+Temp RN 14
+Counter RN 12
+Tmp RN 7
+;//Output registers
+
+result RN 0
+;// Neon registers
+qData0 QN 0.U8
+qData1 QN 1.U8
+dData0 DN 0.U8
+dData1 DN 1.U8
+dData2 DN 2.U8
+dData3 DN 3.U8
+
+ ;// Define stack arguments
+ M_ARG pPlaneStep, 4
+
+ ;// Load argument from the stack
+ M_LDR iPlaneStep, pPlaneStep
+
+ SUB pTop, pSrcDstPlane, #0 ;// Top row pointer of the frame
+ MUL Offset, iExpandPels, iPlaneStep ;// E*Step
+ SUB Temp, iFrameHeight, #1 ;// H-1
+ MUL Temp, iPlaneStep, Temp ;// (H-1)*Step
+ ADD pBot, Temp, pSrcDstPlane ;// BPtr = TPtr + (H-1)*Step
+ MOV Temp, iFrameWidth ;// Outer loop counter
+
+ ;// Check if pSrcDstPlane and iPlaneStep are 16 byte aligned
+ TST pSrcDstPlane, #0xf
+ TSTEQ iPlaneStep, #0xf
+ BNE Hor8Loop00
+
+ ;//
+ ;// Copy top and bottom region of the plane as follows
+ ;// top region = top row elements from the frame
+ ;// bottom region = last row elements from the frame
+ ;//
+
+ ;// Case for 16 byte alignment
+Hor16Loop00
+ SUB pDstTop, pTop, Offset
+ VLD1 qData0, [pTop @128]!
+ MOV Counter, iExpandPels ;// Inner loop counter
+ ADD pDstBot, pBot, iPlaneStep
+ VLD1 qData1, [pBot @128]!
+Ver16Loop0
+ VST1 qData0, [pDstTop @128], iPlaneStep
+ VST1 qData0, [pDstTop @128], iPlaneStep
+ VST1 qData0, [pDstTop @128], iPlaneStep
+ VST1 qData0, [pDstTop @128], iPlaneStep
+ VST1 qData0, [pDstTop @128], iPlaneStep
+ VST1 qData0, [pDstTop @128], iPlaneStep
+ VST1 qData0, [pDstTop @128], iPlaneStep
+ VST1 qData0, [pDstTop @128], iPlaneStep
+ SUBS Counter, Counter, #8
+ VST1 qData1, [pDstBot @128], iPlaneStep
+ VST1 qData1, [pDstBot @128], iPlaneStep
+ VST1 qData1, [pDstBot @128], iPlaneStep
+ VST1 qData1, [pDstBot @128], iPlaneStep
+ VST1 qData1, [pDstBot @128], iPlaneStep
+ VST1 qData1, [pDstBot @128], iPlaneStep
+ VST1 qData1, [pDstBot @128], iPlaneStep
+ VST1 qData1, [pDstBot @128], iPlaneStep
+ BGT Ver16Loop0
+
+ SUBS Temp, Temp, #16
+ BGT Hor16Loop00
+ B EndAlignedLoop
+
+ ;// Case for 8 byte alignment
+Hor8Loop00
+ SUB pDstTop, pTop, Offset
+ VLD1 qData0, [pTop @64]!
+ MOV Counter, iExpandPels ;// Inner loop counter
+ ADD pDstBot, pBot, iPlaneStep
+ VLD1 qData1, [pBot @64]!
+Ver8Loop0
+ VST1 qData0, [pDstTop @64], iPlaneStep
+ VST1 qData0, [pDstTop @64], iPlaneStep
+ VST1 qData0, [pDstTop @64], iPlaneStep
+ VST1 qData0, [pDstTop @64], iPlaneStep
+ VST1 qData0, [pDstTop @64], iPlaneStep
+ VST1 qData0, [pDstTop @64], iPlaneStep
+ VST1 qData0, [pDstTop @64], iPlaneStep
+ VST1 qData0, [pDstTop @64], iPlaneStep
+ SUBS Counter, Counter, #8
+ VST1 qData1, [pDstBot @64], iPlaneStep
+ VST1 qData1, [pDstBot @64], iPlaneStep
+ VST1 qData1, [pDstBot @64], iPlaneStep
+ VST1 qData1, [pDstBot @64], iPlaneStep
+ VST1 qData1, [pDstBot @64], iPlaneStep
+ VST1 qData1, [pDstBot @64], iPlaneStep
+ VST1 qData1, [pDstBot @64], iPlaneStep
+ VST1 qData1, [pDstBot @64], iPlaneStep
+ BGT Ver8Loop0
+
+ SUBS Temp, Temp, #16
+ BGT Hor8Loop00
+
+EndAlignedLoop
+ ADD Temp, pSrcDstPlane, iFrameWidth
+ SUB pDstRight, Temp, Offset
+ SUB pRight, Temp, #1
+ SUB pDstLeft, pSrcDstPlane, Offset
+ SUB pDstLeft, pDstLeft, iExpandPels
+ ADD pLeft, pSrcDstPlane, #0
+
+ VLD1 {dData0 []}, [pLeft], iPlaneStep ;// Top-Left corner pixel from frame duplicated in dData0
+ SUB Offset, iPlaneStep, iExpandPels
+ VLD1 {dData1 []}, [pRight], iPlaneStep ;// Top-Right corner pixel from frame duplicated in dData1
+ MOV Temp, iExpandPels
+
+ ;//
+ ;// Copy top-left and top-right region of the plane as follows
+ ;// top-left region = top-left corner pixel from the frame
+ ;// top-right region = top-right corner pixel from the frame
+ ;//
+HorLoop11
+ MOV Counter, iExpandPels
+VerLoop1
+ VST1 dData0, [pDstLeft], #8
+ SUBS Counter, Counter, #8
+ VST1 dData1, [pDstRight], #8
+ BGT VerLoop1
+
+ SUBS Temp, Temp, #1
+ ADD pDstLeft, pDstLeft, Offset
+ ADD pDstRight, pDstRight, Offset
+ BPL HorLoop11
+
+ SUB iFrameHeight, iFrameHeight, #1
+ ;//
+ ;// Copy left and right region of the plane as follows
+ ;// Left region = copy the row with left start pixel from the frame
+ ;// Right region = copy the row with right end pixel from the frame
+ ;//
+HorLoop22
+ VLD1 {dData0 []}, [pLeft], iPlaneStep
+ MOV Counter, iExpandPels
+ VLD1 {dData1 []}, [pRight], iPlaneStep
+VerLoop2
+ VST1 dData0, [pDstLeft], #8
+ SUBS Counter, Counter, #8
+ VST1 dData1, [pDstRight], #8
+ BGT VerLoop2
+
+ SUBS iFrameHeight, iFrameHeight, #1
+ ADD pDstLeft, pDstLeft, Offset
+ ADD pDstRight, pDstRight, Offset
+ BGT HorLoop22
+
+ MOV Temp, iExpandPels
+ ;//
+ ;// Copy bottom-left and bottom-right region of the plane as follows
+ ;// bottom-left region = bottom-left corner pixel from the frame
+ ;// bottom-right region = bottom-right corner pixel from the frame
+ ;//
+HorLoop33
+ MOV Counter, iExpandPels
+VerLoop3
+ VST1 dData0, [pDstLeft], #8
+ SUBS Counter, Counter, #8
+ VST1 dData1, [pDstRight], #8
+ BGT VerLoop3
+
+ SUBS Temp, Temp, #1
+ ADD pDstLeft, pDstLeft, Offset
+ ADD pDstRight, pDstRight, Offset
+ BGT HorLoop33
+End
+ MOV r0, #OMX_Sts_NoErr
+
+ M_END
+
+ ENDIF
+
+
+
+
+;// Guarding implementation by the processor name
+
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/api/armVCM4P10_CAVLCTables.h
new file mode 100755
index 0000000..547a2d9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/api/armVCM4P10_CAVLCTables.h
@@ -0,0 +1,30 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_CAVLCTables.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Header file for optimized H.264 CALVC tables
+ *
+ */
+
+#ifndef ARMVCM4P10_CAVLCTABLES_H
+#define ARMVCM4P10_CAVLCTABLES_H
+
+/* CAVLC tables */
+
+extern const OMX_U16 *armVCM4P10_CAVLCCoeffTokenTables[18];
+extern const OMX_U16 *armVCM4P10_CAVLCTotalZeroTables[15];
+extern const OMX_U16 *armVCM4P10_CAVLCTotalZeros2x2Tables[3];
+extern const OMX_U16 *armVCM4P10_CAVLCRunBeforeTables[15];
+extern const OMX_U8 armVCM4P10_ZigZag_4x4[16];
+extern const OMX_U8 armVCM4P10_ZigZag_2x2[4];
+extern const OMX_S8 armVCM4P10_SuffixToLevel[7];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
new file mode 100755
index 0000000..4f0892d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
@@ -0,0 +1,222 @@
+;//
+;//
+;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+;// Functions:
+;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
+;//
+;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
+;// First operand will be at offset ALIGNMENT from aligned address
+;// Second operand will be at aligned location and will be used as output.
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction
+;//
+;// Registers used as input for this function
+;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r2 - pointer to the aligned location
+;// r3 - step size to this aligned location
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_Average_4x4_Align0_unsafe
+ EXPORT armVCM4P10_Average_4x4_Align2_unsafe
+ EXPORT armVCM4P10_Average_4x4_Align3_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+;// Declare input registers
+pPred0 RN 0
+iPredStep0 RN 1
+pPred1 RN 2
+iPredStep1 RN 3
+pDstPred RN 2
+iDstStep RN 3
+
+;// Declare other intermediate registers
+iPredA0 RN 10
+iPredA1 RN 11
+iPredB0 RN 12
+iPredB1 RN 14
+Temp1 RN 4
+Temp2 RN 5
+ResultA RN 5
+ResultB RN 4
+r0x80808080 RN 7
+
+ IF ARM1136JS
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB1, [pPred1, iPredStep1]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ M_LDR iPredB1, [pPred1, iPredStep1]
+
+ MVN iPredB0, iPredB0
+ UHSUB8 ResultA, iPredA0, iPredB0
+ MVN iPredB1, iPredB1
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End0
+ M_END
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredB1, [pPred1, iPredStep1]
+ M_LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #16
+ ORR iPredA0, iPredA0, Temp1, LSL #16
+ MOV iPredA1, iPredA1, LSR #16
+ ORR iPredA1, iPredA1, Temp2, LSL #16
+
+ ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #16
+ ORR iPredA0, iPredA0, Temp1, LSL #16
+ MOV iPredA1, iPredA1, LSR #16
+ ORR iPredA1, iPredA1, Temp2, LSL #16
+
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End2
+ M_END
+
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #24
+ ORR iPredA0, iPredA0, Temp1, LSL #8
+ MOV iPredA1, iPredA1, LSR #24
+ ORR iPredA1, iPredA1, Temp2, LSL #8
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #24
+ ORR iPredA0, iPredA0, Temp1, LSL #8
+ MOV iPredA1, iPredA1, LSR #24
+ ORR iPredA1, iPredA1, Temp2, LSL #8
+
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End3
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c
new file mode 100755
index 0000000..137495d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_CAVLCTables.c
@@ -0,0 +1,327 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_CAVLCTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Optimized CAVLC tables for H.264
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVCM4P10_CAVLCTables.h"
+
+/* 4x4 DeZigZag table */
+
+const OMX_U8 armVCM4P10_ZigZag_4x4[16] =
+{
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/* 2x2 DeZigZag table */
+
+const OMX_U8 armVCM4P10_ZigZag_2x2[4] =
+{
+ 0, 1, 2, 3
+};
+
+
+/*
+ * Suffix To Level table
+ * We increment the suffix length if
+ * ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6
+ * (LevelCode>>1)>=(3<<(SuffixLength-1)) && SuffixLength<6
+ * LevelCode >= 3<<SuffixLength && SuffixLength<6
+ * (LevelCode+2) >= (3<<SuffixLength)+2 && SuffixLength<6
+ */
+const OMX_S8 armVCM4P10_SuffixToLevel[7] =
+{
+ (3<<1)+2, /* SuffixLength=1 */
+ (3<<1)+2, /* SuffixLength=1 */
+ (3<<2)+2, /* SuffixLength=2 */
+ (3<<3)+2, /* SuffixLength=3 */
+ (3<<4)+2, /* SuffixLength=4 */
+ (3<<5)+2, /* SuffixLength=5 */
+ -1 /* SuffixLength=6 - never increment */
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_0[132] = {
+ 0x0020, 0x0100, 0x2015, 0x2015, 0x400b, 0x400b, 0x400b, 0x400b,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+ 0x0028, 0x00f0, 0x00f8, 0x0027, 0x0030, 0x00d8, 0x00e0, 0x00e8,
+ 0x0038, 0x00a0, 0x00c8, 0x00d0, 0x0040, 0x0068, 0x0090, 0x0098,
+ 0x0048, 0x0050, 0x0058, 0x0060, 0x27ff, 0x27ff, 0x206b, 0x206b,
+ 0x0081, 0x0085, 0x0083, 0x0079, 0x0087, 0x007d, 0x007b, 0x0071,
+ 0x007f, 0x0075, 0x0073, 0x0069, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x2077, 0x2077, 0x206d, 0x206d, 0x2063, 0x2063, 0x2061, 0x2061,
+ 0x206f, 0x206f, 0x2065, 0x2065, 0x205b, 0x205b, 0x2059, 0x2059,
+ 0x0067, 0x005d, 0x0053, 0x0051, 0x005f, 0x0055, 0x004b, 0x0049,
+ 0x00a8, 0x00b0, 0x00b8, 0x00c0, 0x2041, 0x2041, 0x204d, 0x204d,
+ 0x2043, 0x2043, 0x2039, 0x2039, 0x2057, 0x2057, 0x2045, 0x2045,
+ 0x203b, 0x203b, 0x2031, 0x2031, 0x204f, 0x204f, 0x203d, 0x203d,
+ 0x2033, 0x2033, 0x2029, 0x2029, 0x0047, 0x0035, 0x002b, 0x0021,
+ 0x203f, 0x203f, 0x202d, 0x202d, 0x2023, 0x2023, 0x2019, 0x2019,
+ 0x0037, 0x0025, 0x001b, 0x0011, 0x202f, 0x202f, 0x201d, 0x201d,
+ 0x0013, 0x0009, 0x201f, 0x201f
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_1[128] = {
+ 0x0020, 0x00e8, 0x00f0, 0x00f8, 0x0027, 0x001f, 0x2015, 0x2015,
+ 0x400b, 0x400b, 0x400b, 0x400b, 0x4001, 0x4001, 0x4001, 0x4001,
+ 0x0028, 0x00d0, 0x00d8, 0x00e0, 0x0030, 0x0098, 0x00c0, 0x00c8,
+ 0x0038, 0x0060, 0x0088, 0x0090, 0x0040, 0x0048, 0x0050, 0x0058,
+ 0x27ff, 0x27ff, 0x207f, 0x207f, 0x0087, 0x0085, 0x0083, 0x0081,
+ 0x007b, 0x0079, 0x007d, 0x0073, 0x2075, 0x2075, 0x2071, 0x2071,
+ 0x0068, 0x0070, 0x0078, 0x0080, 0x2077, 0x2077, 0x206d, 0x206d,
+ 0x206b, 0x206b, 0x2069, 0x2069, 0x206f, 0x206f, 0x2065, 0x2065,
+ 0x2063, 0x2063, 0x2061, 0x2061, 0x0059, 0x005d, 0x005b, 0x0051,
+ 0x0067, 0x0055, 0x0053, 0x0049, 0x00a0, 0x00a8, 0x00b0, 0x00b8,
+ 0x205f, 0x205f, 0x204d, 0x204d, 0x204b, 0x204b, 0x2041, 0x2041,
+ 0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2039, 0x2039,
+ 0x204f, 0x204f, 0x203d, 0x203d, 0x203b, 0x203b, 0x2031, 0x2031,
+ 0x0029, 0x0035, 0x0033, 0x0021, 0x2047, 0x2047, 0x202d, 0x202d,
+ 0x202b, 0x202b, 0x2019, 0x2019, 0x003f, 0x0025, 0x0023, 0x0011,
+ 0x0037, 0x001d, 0x001b, 0x0009, 0x202f, 0x202f, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_2[112] = {
+ 0x0020, 0x0088, 0x00b0, 0x00b8, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x003f, 0x0037, 0x002f, 0x0027, 0x001f, 0x0015, 0x000b, 0x0001,
+ 0x0028, 0x0050, 0x0078, 0x0080, 0x0030, 0x0038, 0x0040, 0x0048,
+ 0x07ff, 0x0081, 0x0087, 0x0085, 0x0083, 0x0079, 0x007f, 0x007d,
+ 0x007b, 0x0071, 0x0077, 0x0075, 0x0073, 0x0069, 0x206b, 0x206b,
+ 0x0058, 0x0060, 0x0068, 0x0070, 0x2061, 0x2061, 0x206d, 0x206d,
+ 0x2063, 0x2063, 0x2059, 0x2059, 0x206f, 0x206f, 0x2065, 0x2065,
+ 0x205b, 0x205b, 0x2051, 0x2051, 0x0067, 0x005d, 0x0053, 0x0049,
+ 0x005f, 0x0055, 0x004b, 0x0041, 0x0090, 0x0098, 0x00a0, 0x00a8,
+ 0x2039, 0x2039, 0x2031, 0x2031, 0x204d, 0x204d, 0x2029, 0x2029,
+ 0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2021, 0x2021,
+ 0x0019, 0x003d, 0x003b, 0x0011, 0x004f, 0x0035, 0x0033, 0x0009,
+ 0x202b, 0x202b, 0x202d, 0x202d, 0x2023, 0x2023, 0x2025, 0x2025,
+ 0x201b, 0x201b, 0x2047, 0x2047, 0x201d, 0x201d, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_3[80] = {
+ 0x0020, 0x0028, 0x0030, 0x0038, 0x0040, 0x0048, 0x0050, 0x0058,
+ 0x0060, 0x0068, 0x0070, 0x0078, 0x0080, 0x0088, 0x0090, 0x0098,
+ 0x0009, 0x000b, 0x07ff, 0x0001, 0x0011, 0x0013, 0x0015, 0x07ff,
+ 0x0019, 0x001b, 0x001d, 0x001f, 0x0021, 0x0023, 0x0025, 0x0027,
+ 0x0029, 0x002b, 0x002d, 0x002f, 0x0031, 0x0033, 0x0035, 0x0037,
+ 0x0039, 0x003b, 0x003d, 0x003f, 0x0041, 0x0043, 0x0045, 0x0047,
+ 0x0049, 0x004b, 0x004d, 0x004f, 0x0051, 0x0053, 0x0055, 0x0057,
+ 0x0059, 0x005b, 0x005d, 0x005f, 0x0061, 0x0063, 0x0065, 0x0067,
+ 0x0069, 0x006b, 0x006d, 0x006f, 0x0071, 0x0073, 0x0075, 0x0077,
+ 0x0079, 0x007b, 0x007d, 0x007f, 0x0081, 0x0083, 0x0085, 0x0087
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_4[32] = {
+ 0x0020, 0x0038, 0x2015, 0x2015, 0x4001, 0x4001, 0x4001, 0x4001,
+ 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b,
+ 0x0028, 0x0030, 0x0021, 0x0019, 0x2027, 0x2027, 0x0025, 0x0023,
+ 0x201d, 0x201d, 0x201b, 0x201b, 0x0011, 0x001f, 0x0013, 0x0009
+};
+
+const OMX_U16 * armVCM4P10_CAVLCCoeffTokenTables[18] = {
+ armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=0 */
+ armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=1 */
+ armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=2 */
+ armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=3 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=4 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=5 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=6 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=7 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=8 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=9 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=10 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=11 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=12 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=13 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=14 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=15 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=16 */
+ armVCM4P10_CAVLCCoeffTokenTables_4 /* nC=-1 */
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_0[40] = {
+ 0x0020, 0x0048, 0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+ 0x0028, 0x0040, 0x0011, 0x000f, 0x0030, 0x0038, 0x0019, 0x0017,
+ 0x27ff, 0x27ff, 0x201f, 0x201f, 0x201d, 0x201d, 0x201b, 0x201b,
+ 0x2015, 0x2015, 0x2013, 0x2013, 0x200d, 0x200d, 0x200b, 0x200b
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_1[24] = {
+ 0x0020, 0x0028, 0x0011, 0x000f, 0x000d, 0x000b, 0x2009, 0x2009,
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001,
+ 0x001d, 0x001b, 0x0019, 0x0017, 0x2015, 0x2015, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_2[24] = {
+ 0x0020, 0x0028, 0x0011, 0x000b, 0x0009, 0x0001, 0x200f, 0x200f,
+ 0x200d, 0x200d, 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003,
+ 0x001b, 0x0017, 0x2019, 0x2019, 0x2015, 0x2015, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_3[24] = {
+ 0x0020, 0x0028, 0x0013, 0x000f, 0x0007, 0x0005, 0x2011, 0x2011,
+ 0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2003, 0x2003,
+ 0x2019, 0x2019, 0x2017, 0x2017, 0x2015, 0x2015, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_4[20] = {
+ 0x0020, 0x0015, 0x0011, 0x0005, 0x0003, 0x0001, 0x200f, 0x200f,
+ 0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007,
+ 0x2017, 0x2017, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_5[20] = {
+ 0x0020, 0x0011, 0x2013, 0x2013, 0x200f, 0x200f, 0x200d, 0x200d,
+ 0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2005, 0x2005,
+ 0x0015, 0x0001, 0x2003, 0x2003
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_6[20] = {
+ 0x0020, 0x000f, 0x2011, 0x2011, 0x200d, 0x200d, 0x2009, 0x2009,
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b,
+ 0x0013, 0x0001, 0x2003, 0x2003
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_7[20] = {
+ 0x0020, 0x0003, 0x200f, 0x200f, 0x200d, 0x200d, 0x2007, 0x2007,
+ 0x400b, 0x400b, 0x400b, 0x400b, 0x4009, 0x4009, 0x4009, 0x4009,
+ 0x0011, 0x0001, 0x2005, 0x2005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_8[20] = {
+ 0x0020, 0x0005, 0x200b, 0x200b, 0x400d, 0x400d, 0x400d, 0x400d,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007,
+ 0x0003, 0x0001, 0x200f, 0x200f
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_9[20] = {
+ 0x0020, 0x000d, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007,
+ 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_10[16] = {
+ 0x0001, 0x0003, 0x2005, 0x2005, 0x2007, 0x2007, 0x200b, 0x200b,
+ 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_11[16] = {
+ 0x0001, 0x0003, 0x2009, 0x2009, 0x4005, 0x4005, 0x4005, 0x4005,
+ 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_12[16] = {
+ 0x2001, 0x2001, 0x2003, 0x2003, 0x4007, 0x4007, 0x4007, 0x4007,
+ 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_13[16] = {
+ 0x4001, 0x4001, 0x4001, 0x4001, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_14[16] = {
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+ 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003
+};
+
+const OMX_U16 * armVCM4P10_CAVLCTotalZeroTables[15] = {
+ armVCM4P10_CAVLCTotalZeroTables_0,
+ armVCM4P10_CAVLCTotalZeroTables_1,
+ armVCM4P10_CAVLCTotalZeroTables_2,
+ armVCM4P10_CAVLCTotalZeroTables_3,
+ armVCM4P10_CAVLCTotalZeroTables_4,
+ armVCM4P10_CAVLCTotalZeroTables_5,
+ armVCM4P10_CAVLCTotalZeroTables_6,
+ armVCM4P10_CAVLCTotalZeroTables_7,
+ armVCM4P10_CAVLCTotalZeroTables_8,
+ armVCM4P10_CAVLCTotalZeroTables_9,
+ armVCM4P10_CAVLCTotalZeroTables_10,
+ armVCM4P10_CAVLCTotalZeroTables_11,
+ armVCM4P10_CAVLCTotalZeroTables_12,
+ armVCM4P10_CAVLCTotalZeroTables_13,
+ armVCM4P10_CAVLCTotalZeroTables_14
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_0[16] = {
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_1[16] = {
+ 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_2[16] = {
+ 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+const OMX_U16 * armVCM4P10_CAVLCTotalZeros2x2Tables[3] = {
+ armVCM4P10_CAVLCTotalZeros2x2Tables_0,
+ armVCM4P10_CAVLCTotalZeros2x2Tables_1,
+ armVCM4P10_CAVLCTotalZeros2x2Tables_2
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_0[8] = {
+ 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_1[8] = {
+ 0x2005, 0x2005, 0x2003, 0x2003, 0x4001, 0x4001, 0x4001, 0x4001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_2[8] = {
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_3[8] = {
+ 0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_4[8] = {
+ 0x000b, 0x0009, 0x0007, 0x0005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_5[8] = {
+ 0x0003, 0x0005, 0x0009, 0x0007, 0x000d, 0x000b, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_6[24] = {
+ 0x0010, 0x000d, 0x000b, 0x0009, 0x0007, 0x0005, 0x0003, 0x0001,
+ 0x0018, 0x0011, 0x200f, 0x200f, 0x0020, 0x0015, 0x2013, 0x2013,
+ 0x0028, 0x0019, 0x2017, 0x2017, 0x07ff, 0x001d, 0x201b, 0x201b
+};
+
+/* Tables 7 to 14 are duplicates of table 6 */
+
+const OMX_U16 * armVCM4P10_CAVLCRunBeforeTables[15] = {
+ armVCM4P10_CAVLCRunBeforeTables_0, /* ZerosLeft=1 */
+ armVCM4P10_CAVLCRunBeforeTables_1,
+ armVCM4P10_CAVLCRunBeforeTables_2,
+ armVCM4P10_CAVLCRunBeforeTables_3,
+ armVCM4P10_CAVLCRunBeforeTables_4,
+ armVCM4P10_CAVLCRunBeforeTables_5, /* ZerosLeft=6 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=7 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=8 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=9 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=10 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=11 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=12 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=13 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=14 */
+ armVCM4P10_CAVLCRunBeforeTables_6 /* ZerosLeft=15 */
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
new file mode 100755
index 0000000..4c3a77c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
@@ -0,0 +1,198 @@
+;//
+;//
+;// File Name: armVCM4P10_DeblockingChroma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+
+ IF CortexA8
+
+pAlpha RN 2
+pBeta RN 3
+
+pThresholds RN 5
+pBS RN 4
+bS3210 RN 6
+
+;// Pixels
+dP_0 DN D4.U8
+dP_1 DN D5.U8
+dP_2 DN D6.U8
+dP_3 DN D7.U8
+dQ_0 DN D8.U8
+dQ_1 DN D9.U8
+dQ_2 DN D10.U8
+dQ_3 DN D11.U8
+
+
+;// Filtering Decision
+dAlpha DN D0.U8
+dBeta DN D2.U8
+
+dFilt DN D16.U8
+dAqflg DN D12.U8
+dApflg DN D17.U8
+
+dAp0q0 DN D13.U8
+
+;// bSLT4
+dTC3210 DN D18.U8
+dTCs DN D31.S8
+dTC DN D31.U8
+
+dMask_0 DN D14.U8
+dMask_1 DN D15.U8
+dMask_4 DN D26.U16
+
+dTemp DN D28.U8
+dDummy DN D17.U8
+
+;// Computing P0,Q0
+qDq0p0 QN Q10.S16
+qDp1q1 QN Q11.S16
+qDelta QN Q10.S16 ; reuse qDq0p0
+dDelta DN D20.S8
+
+
+;// Computing P1,Q1
+qP_0n QN Q14.S16
+qQ_0n QN Q12.S16
+
+dQ_0n DN D24.U8
+dP_0n DN D29.U8
+
+;// bSGE4
+
+dHSp0q1 DN D13.U8
+dHSq0p1 DN D31.U8
+
+dBS3210 DN D28.U16
+
+dP_0t DN D13.U8 ;dHSp0q1
+dQ_0t DN D31.U8 ;Temp1
+
+dP_0n DN D29.U8
+dQ_0n DN D24.U8 ;Temp2
+
+;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe
+;//
+;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11
+;// - Filter masks - filt: D16, aqflg: D12, apflg: D17
+;// - Additional Params - pThresholds: r5
+;//
+;// Outputs - Pixels - P0-P1: D29-D30, Q0-Q1: D24-D25
+;// - Additional Params - pThresholds: r5
+
+;// Registers Corrupted - D18-D31
+
+
+ M_START armVCM4P10_DeblockingChromabSLT4_unsafe
+
+
+ ;dTC3210 -18
+ ;dTemp-28
+
+ VLD1 d18.U32[0], [pThresholds]! ;here
+
+ ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3;
+ ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1
+
+ ;// qDp1q1-11
+ ;// qDq0p0-10
+ VSUBL qDp1q1, dP_1, dQ_1
+ VMOV dTemp, dTC3210
+ VSUBL qDq0p0, dQ_0, dP_0
+ VSHR qDp1q1, qDp1q1, #2
+ VZIP.8 dTC3210, dTemp
+
+ ;// qDelta-qDq0p0-10
+
+ ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1)
+
+ ;// dTC3210-18
+ ;// dTemp-28
+ ;// dTC-31
+ VBIF dTC3210, dMask_0, dFilt
+ VRHADD qDelta, qDp1q1, qDq0p0
+ VADD dTC, dTC3210, dMask_1
+ VQMOVN dDelta, qDelta
+ ;// dDelta-d20
+
+ ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta);
+ VLD1 {dAlpha[]}, [pAlpha]
+ VMIN dDelta, dDelta, dTCs
+ VNEG dTCs, dTCs
+ VLD1 {dBeta[]}, [pBeta]
+ ;1
+ VMAX dDelta, dDelta, dTCs
+
+ ;// dP_0n - 29
+ ;// dQ_0n - 24
+
+ ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta);
+ ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta);
+
+ ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
+ ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
+
+ ;// qP_0n - 14
+ ;// qQ_0n - 12
+
+ VMOVL qP_0n, dP_0
+ VMOVL qQ_0n, dQ_0
+
+ ;1
+ VADDW qP_0n, qP_0n, dDelta
+ VSUBW qQ_0n, qQ_0n, dDelta
+
+ VQMOVUN dP_0n, qP_0n
+ VQMOVUN dQ_0n, qQ_0n
+
+ M_END
+
+;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
+;//
+;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11
+;// - Filter masks - filt: D16, aqflg: D12, apflg: D17
+;// - Additional Params - alpha: D0, dMask_1: D15
+;//
+;// Outputs - Pixels - P0-P2: D29-D31, Q0-Q2: D24,D25,D28
+
+;// Registers Corrupted - D18-D31
+
+ M_START armVCM4P10_DeblockingChromabSGE4_unsafe
+
+ ;dHSq0p1 - 31
+ ;dHSp0q1 - 13
+ VHADD dHSp0q1, dP_0, dQ_1
+ VHADD dHSq0p1, dQ_0, dP_1
+
+ ;// Prepare the bS mask
+
+ ;// dHSp0q1-13
+ ;// dP_0t-dHSp0q1-13
+ ;// dHSq0p1-31
+ ;// dQ_0t-Temp1-31
+ VLD1 {dAlpha[]}, [pAlpha]
+ ADD pThresholds, pThresholds, #4
+ VLD1 {dBeta[]}, [pBeta]
+
+ VRHADD dP_0t, dHSp0q1, dP_1
+ VRHADD dQ_0t, dHSq0p1, dQ_1
+
+ M_END
+
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
new file mode 100755
index 0000000..0afe4fd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
@@ -0,0 +1,396 @@
+;//
+;//
+;// File Name: armVCM4P10_DeblockingLuma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+
+ IF CortexA8
+
+pThresholds RN 5
+
+;// Pixels
+dP_0 DN D4.U8
+dP_1 DN D5.U8
+dP_2 DN D6.U8
+dP_3 DN D7.U8
+dQ_0 DN D8.U8
+dQ_1 DN D9.U8
+dQ_2 DN D10.U8
+dQ_3 DN D11.U8
+
+
+;// Filtering Decision
+dAlpha DN D0.U8
+
+dFilt DN D16.U8
+dAqflg DN D12.U8
+dApflg DN D17.U8
+
+dAp0q0 DN D13.U8
+
+;// bSLT4
+dTC0 DN D18.U8
+dTC1 DN D19.U8
+dTC01 DN D18.U8
+
+dTCs DN D31.S8
+dTC DN D31.U8
+
+dMask_0 DN D14.U8
+dMask_1 DN D15.U8
+
+dTemp DN D19.U8
+
+;// Computing P0,Q0
+qDq0p0 QN Q10.S16
+qDp1q1 QN Q11.S16
+qDelta QN Q10.S16 ; reuse qDq0p0
+dDelta DN D20.S8
+
+
+;// Computing P1,Q1
+dRp0q0 DN D24.U8
+
+dMaxP DN D23.U8
+dMinP DN D22.U8
+
+dMaxQ DN D19.U8
+dMinQ DN D21.U8
+
+dDeltaP DN D26.U8
+dDeltaQ DN D27.U8
+
+qP_0n QN Q14.S16
+qQ_0n QN Q12.S16
+
+dQ_0n DN D24.U8
+dQ_1n DN D25.U8
+dP_0n DN D29.U8
+dP_1n DN D30.U8
+
+;// bSGE4
+
+qSp0q0 QN Q10.U16
+
+qSp2q1 QN Q11.U16
+qSp0q0p1 QN Q12.U16
+qSp3p2 QN Q13.U16
+dHSp0q1 DN D28.U8
+
+qSq2p1 QN Q11.U16
+qSp0q0q1 QN Q12.U16
+qSq3q2 QN Q13.U16 ;!!
+dHSq0p1 DN D28.U8 ;!!
+
+qTemp1 QN Q11.U16 ;!!;qSp2q1
+qTemp2 QN Q12.U16 ;!!;qSp0q0p1
+
+dP_0t DN D28.U8 ;!!;dHSp0q1
+dQ_0t DN D22.U8 ;!!;Temp1
+
+dP_0n DN D29.U8
+dP_1n DN D30.U8
+dP_2n DN D31.U8
+
+dQ_0n DN D24.U8 ;!!;Temp2
+dQ_1n DN D25.U8 ;!!;Temp2
+dQ_2n DN D28.U8 ;!!;dQ_0t
+
+;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe
+;//
+;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11
+;// - Filter masks - filt: D16, aqflg: D12, apflg: D17
+;// - Additional Params - pThresholds: r5
+;//
+;// Outputs - Pixels - P0-P1: D29-D30, Q0-Q1: D24-D25
+;// - Additional Params - pThresholds: r5
+
+;// Registers Corrupted - D18-D31
+
+
+ M_START armVCM4P10_DeblockingLumabSLT4_unsafe
+
+
+ ;// qDq0p0-10
+ VSUBL qDp1q1, dP_1, dQ_1
+ VLD1 {dTC0[]}, [pThresholds]!
+ ;// qDp1q1-11
+ VSUBL qDq0p0, dQ_0, dP_0
+ VLD1 {dTC1[]}, [pThresholds]!
+
+ ;// dRp0q0-24
+ VSHR qDp1q1, qDp1q1, #2
+
+ ;// dTC01 = (dTC1 << 4) | dTC0
+ ;// dTC01-18
+ VEXT dTC01, dTC0, dTC1, #4
+ ;// dTemp-19
+ VAND dTemp, dApflg, dMask_1
+
+ VBIF dTC01, dMask_0, dFilt
+
+
+ ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3;
+ ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1
+
+ ;// qDelta-qDq0p0-10
+ VRHADD qDelta, qDp1q1, qDq0p0
+ VRHADD dRp0q0, dP_0, dQ_0
+ VADD dTC, dTC01, dTemp
+
+ ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1)
+
+ VAND dTemp, dAqflg, dMask_1
+ VQADD dMaxP, dP_1, dTC01
+ VQMOVN dDelta, qDelta
+ VADD dTC, dTC, dTemp
+
+ ;// dMaxP = QADD(dP_1, dTC01)
+ ;// dMinP = QSUB(dP_1, dTC01)
+
+ ;// dMaxP-d23
+ ;// dMinP-d22
+ VQSUB dMinP, dP_1, dTC01
+
+ ;// dDelta-d20
+
+ ;// dMaxQ = QADD(dQ_1, dTC01)
+ ;// dMinQ = QSUB(dQ_1, dTC01)
+
+ ;// dMaxQ-19
+ ;// dMinQ-21
+ VQADD dMaxQ, dQ_1, dTC01
+ VHADD dDeltaP, dRp0q0, dP_2
+ VMIN dDelta, dDelta, dTCs
+
+ ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta);
+ VNEG dTCs, dTCs
+
+ VQSUB dMinQ, dQ_1, dTC01
+
+ ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1;
+ ;// delta = armClip(-tC0, tC0, delta);
+ ;// pQ0[-2*Step] = (OMX_U8)(p1 + delta);
+
+ ;// dDeltaP = (dP_2 + dRp0q0)>>1;
+ ;// dP_1n = armClip(dP_1 - dTC01, dP_1 + dTC01, dDeltaP);
+ ;// dP_1n = armClip(MinP, MaxP, dDeltaP);
+
+ ;// delta = (q2 + ((p0+q0+1)>>1) - (q1<<1))>>1;
+ ;// delta = armClip(-tC0, tC0, delta);
+ ;// pQ0[1*Step] = (OMX_U8)(q1 + delta);
+
+ ;// dDeltaQ = (dQ_2 + dRp0q0)>>1;
+ ;// dQ_1n = armClip(dQ_1 - dTC01, dQ_1 + dTC01, dDeltaQ);
+ ;// dQ_1n = armClip(MinQ, MaxQ, dDeltaQ);
+
+ ;// dDeltaP-26
+ VHADD dDeltaQ, dRp0q0, dQ_2
+
+ ;// dDeltaQ-27
+
+ ;// dP_0n - 29
+ ;// dP_1n - 30
+ ;// dQ_0n - 24
+ ;// dQ_1n - 25
+
+ ;// delta = (q2 + ((p0+q0+1)>>1) - (q1<<1))>>1;
+ ;// dDeltaQ = (dQ_2 + dRp0q0)>>1;
+
+ VMAX dP_1n, dDeltaP, dMinP
+ VMAX dDelta, dDelta, dTCs
+
+ ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta);
+ ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta);
+
+ ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
+ ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
+
+ ;// qP_0n - 14
+ ;// qQ_0n - 12
+
+ VMOVL qP_0n, dP_0
+ VMOVL qQ_0n, dQ_0
+
+ VADDW qP_0n, qP_0n, dDelta
+ VSUBW qQ_0n, qQ_0n, dDelta
+
+ VQMOVUN dP_0n, qP_0n
+ VQMOVUN dQ_0n, qQ_0n
+
+ VMAX dQ_1n, dDeltaQ, dMinQ
+
+ VMIN dP_1n, dP_1n, dMaxP
+ VMIN dQ_1n, dQ_1n, dMaxQ
+ VBIF dP_0n, dP_0, dFilt
+
+ VBIF dP_1n, dP_1, dApflg
+ VBIF dQ_0n, dQ_0, dFilt
+ VBIF dQ_1n, dQ_1, dAqflg
+
+ M_END
+
+;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
+;//
+;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11
+;// - Filter masks - filt: D16, aqflg: D12, apflg: D17
+;// - Additional Params - alpha: D0, dMask_1: D15
+;//
+;// Outputs - Pixels - P0-P2: D29-D31, Q0-Q2: D24,D25,D28
+
+;// Registers Corrupted - D18-D31
+
+ M_START armVCM4P10_DeblockingLumabSGE4_unsafe
+
+
+ ;// ap<beta && armAbs(p0-q0)<((alpha>>2)+2)
+ ;// aq<beta && armAbs(p0-q0)<((alpha>>2)+2)
+
+ ;// ( dApflg & dAp0q0 < (dAlpha >> 2 + 2) )
+ ;// ( dAqflg & dAp0q0 < (dAlpha >> 2 + 2) )
+
+ ;// ( dApflg = dApflg & dAp0q0 < (dTemp + dMask_1 + dMask_1) )
+ ;// ( dAqflg = dAqflg & dAp0q0 < (dTemp + dMask_1 + dMask_1) )
+
+ ;// P Filter
+
+ VSHR dTemp, dAlpha, #2
+ VADD dTemp, dTemp, dMask_1
+
+ ;// qSp0q0-10
+ VADDL qSp0q0, dQ_0, dP_0
+ VADD dTemp, dTemp, dMask_1
+
+ ;// qSp2q1-11
+ ;// qSp0q0p1-12
+ VADDL qSp2q1, dP_2, dQ_1
+ VADDW qSp0q0p1, qSp0q0, dP_1
+
+ VCGT dTemp, dTemp, dAp0q0
+ VSHR qSp2q1, #1
+
+ ;// pQ0[-1*Step] = (OMX_U8)((p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3);
+ ;// pQ0[-1*Step] = ( ( (p0 + q0 + p1) + (p2 + q1)>>1 ) >> 1 + 1 ) >> 1
+
+ ;// dP_0n = ( ( (qSp0q0 + dP_1) + qSp2q1>>1 ) >> 1 + 1 ) >> 1
+ ;// dP_0n = ( ( qSp0q0p1 + qSp2q1>>1 ) >> 1 + 1 ) >> 1
+ ;// dP_0n = ( qTemp1 + 1 ) >> 1
+
+ ;// pQ0[-2*Step] = (OMX_U8)((p2 + p1 + p0 + q0 + 2)>>2);
+
+ ;// dP_1n = (OMX_U8)((dP_2 + qSp0q0p1 + 2)>>2);
+ ;// dP_1n = (OMX_U8)((qTemp2 + 2)>>2);
+
+ ;// pQ0[-3*Step] = (OMX_U8)((2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3);
+ ;// pQ0[-3*Step] = (OMX_U8)(( (p3 + p2) + (p1 + p0 + q0 + p2) >> 1 + 2)>>2);
+
+ ;// dP_2n = (OMX_U8)(( qSp3p2 + (dP_2 + qSp0q0p1) >> 1 + 2) >> 2);
+ ;// dP_2n = (OMX_U8)(( qSp3p2 + qTemp2 >> 1 + 2) >> 2);
+
+ ;// qTemp1-qSp2q1-11
+ ;// qTemp2-qSp0q0p1-12
+ VHADD qTemp1, qSp0q0p1, qSp2q1
+ VADDW qTemp2, qSp0q0p1, dP_2
+
+ ;// qSp3p2-13
+ VADDL qSp3p2, dP_3, dP_2
+
+ VAND dApflg, dApflg, dTemp
+ VHADD dHSp0q1, dP_0, dQ_1
+ VSRA qSp3p2, qTemp2, #1
+ ;// dHSp0q1-28
+ VAND dAqflg, dAqflg, dTemp
+
+ ;// dP_0n-29
+ ;// dP_0t-dHSp0q1-28
+ VQRSHRN dP_0n, qTemp1, #1
+ VRHADD dP_0t, dHSp0q1, dP_1
+
+ ;// dP_1n-30
+ VQRSHRN dP_1n, qTemp2, #2
+
+ VADDL qSq2p1, dQ_2, dP_1
+ VADDW qSp0q0q1, qSp0q0, dQ_1
+
+ VBIF dP_0n, dP_0t, dApflg
+
+ ;// Q Filter
+
+ ;// pQ0[0*Step] = (OMX_U8)((q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3);
+ ;// pQ0[0*Step] = ( ( (p0 + q0 + q1) + (q2 + p1)>>1 ) >> 1 + 1 ) >> 1
+
+ ;// dQ_0n = ( ( (qSp0q0 + dQ_1) + qSq2p1>>1 ) >> 1 + 1 ) >> 1
+ ;// dQ_0n = ( ( qSp0q0q1 + qSq2p1>>1 ) >> 1 + 1 ) >> 1
+ ;// dQ_0n = ( qTemp1 + 1 ) >> 1
+
+ ;// pQ0[1*Step] = (OMX_U8)((q2 + q1 + q0 + q0 + 2)>>2);
+
+ ;// dQ_1n = (OMX_U8)((dQ_2 + qSp0q0q1 + 2)>>2);
+ ;// dQ_1n = (OMX_U8)((qTemp2 + 2)>>2);
+
+ ;// pQ0[2*Step] = (OMX_U8)((2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3);
+ ;// pQ0[2*Step] = (OMX_U8)(( (q3 + q2) + (q1 + p0 + q0 + q2) >> 1 + 2)>>2);
+
+ ;// dQ_2n = (OMX_U8)(( qSq3q2 + (dQ_2 + qSp0q0q1) >> 1 + 2) >> 2);
+ ;// dQ_2n = (OMX_U8)(( qSq3q2 + qTemp2 >> 1 + 2) >> 2);
+
+ ;// qTemp1-qSp2q1-11
+ ;// qTemp2-qSp0q0p1-12
+ ;// qSq2p1-11
+ ;// qSp0q0q1-12
+
+
+ ;// qTemp2-qSp0q0p1-12
+ ;// qTemp1-qSq2p1-11
+ ;// qSq3q2-13
+ ;// dP_2n-31
+
+ VQRSHRN dP_2n, qSp3p2, #2
+ VADDL qSq3q2, dQ_3, dQ_2
+
+ VSHR qSq2p1, #1
+
+ VHADD qTemp1, qSp0q0q1, qSq2p1
+ VADDW qTemp2, qSp0q0q1, dQ_2
+
+ ;// dHSq0p1-28
+ VHADD dHSq0p1, dQ_0, dP_1
+
+ VBIF dP_0n, dP_0, dFilt
+ VBIF dP_1n, dP_1, dApflg
+
+ VSRA qSq3q2, qTemp2, #1
+
+ ;// dQ_1-Temp2-25
+ ;// dQ_0-Temp2-24
+ VQRSHRN dQ_1n, qTemp2, #2
+ VQRSHRN dQ_0n, qTemp1, #1
+
+ ;// dQ_0t-Temp1-22
+ VRHADD dQ_0t, dHSq0p1, dQ_1
+ VBIF dQ_1n, dQ_1, dAqflg
+
+ VBIF dP_2n, dP_2, dApflg
+ VBIF dQ_0n, dQ_0t, dAqflg
+ VQRSHRN dQ_2n, qSq3q2, #2
+ VBIF dQ_0n, dQ_0, dFilt
+ VBIF dQ_2n, dQ_2, dAqflg
+
+ M_END
+
+ ENDIF
+
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
new file mode 100755
index 0000000..10a89e9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
@@ -0,0 +1,325 @@
+;//
+;//
+;// File Name: armVCM4P10_DecodeCoeffsToPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+ IMPORT armVCM4P10_CAVLCCoeffTokenTables
+ IMPORT armVCM4P10_CAVLCTotalZeroTables
+ IMPORT armVCM4P10_CAVLCTotalZeros2x2Tables
+ IMPORT armVCM4P10_CAVLCRunBeforeTables
+ IMPORT armVCM4P10_SuffixToLevel
+ IMPORT armVCM4P10_ZigZag_4x4
+ IMPORT armVCM4P10_ZigZag_2x2
+
+ M_VARIANTS ARM1136JS
+
+;//DEBUG_ON SETL {TRUE}
+
+LAST_COEFF EQU 0x20 ;// End of block flag
+TWO_BYTE_COEFF EQU 0x10
+
+;// Declare input registers
+
+ppBitStream RN 0
+pOffset RN 1
+pNumCoeff RN 2
+ppPosCoefbuf RN 3
+nC RN 4 ;// number of coeffs or 17 for chroma
+sMaxNumCoeff RN 5
+
+;// Declare inner loop registers
+
+;// Level loop
+Count RN 0
+TrailingOnes RN 1
+pLevel RN 2
+LevelSuffix RN 3
+SuffixLength RN 4
+TotalCoeff RN 5
+
+pVLDTable RN 6
+Symbol RN 7
+T1 RN 8
+T2 RN 9
+RBitStream RN 10
+RBitBuffer RN 11
+RBitCount RN 12
+lr RN 14
+
+;// Run loop
+Count RN 0
+ZerosLeft RN 1
+pLevel RN 2
+ppRunTable RN 3
+pRun RN 4
+TotalCoeff RN 5
+
+pVLDTable RN 6
+Symbol RN 7
+T1 RN 8
+T2 RN 9
+RBitStream RN 10
+RBitBuffer RN 11
+RBitCount RN 12
+lr RN 14
+
+;// Fill in coefficients loop
+pPosCoefbuf RN 0
+temp RN 1
+pLevel RN 2
+ppPosCoefbuf RN 3
+pRun RN 4
+TotalCoeff RN 5
+pZigZag RN 6
+
+T1 RN 8
+T2 RN 9
+RBitStream RN 10
+RBitBuffer RN 11
+RBitCount RN 12
+CoeffNum RN 14
+
+
+
+ IF ARM1136JS
+
+ ;// Allocate stack memory required by the function
+ M_ALLOC4 pppBitStream, 4
+ M_ALLOC4 ppOffset, 4
+ M_ALLOC4 pppPosCoefbuf, 4
+ M_ALLOC4 ppLevel, 16*2
+ M_ALLOC4 ppRun, 16
+
+ ;// Write function header
+ M_START armVCM4P10_DecodeCoeffsToPair, r11
+
+ ;// Define stack arguments
+ M_ARG pNC, 4
+ M_ARG pSMaxNumCoeff,4
+
+ ;// Code start
+ M_BD_INIT0 ppBitStream, pOffset, RBitStream, RBitBuffer, RBitCount
+ LDR pVLDTable, =armVCM4P10_CAVLCCoeffTokenTables
+ M_LDR nC, pNC
+
+ M_BD_INIT1 T1, T2, lr
+ LDR pVLDTable, [pVLDTable, nC, LSL #2] ;// Find VLD table
+
+ M_BD_INIT2 T1, T2, lr
+
+ ;// Decode Symbol = TotalCoeff*4 + TrailingOnes
+ M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2
+
+ MOVS TotalCoeff, Symbol, LSR #2
+ STRB TotalCoeff, [pNumCoeff]
+ M_PRINTF "TotalCoeff=%d\n", TotalCoeff
+ BEQ.W EndNoError ;// Finished if no coefficients
+
+ CMP Symbol, #17*4
+ BGE.W EndBadSymbol ;// Error if bad symbol
+
+ ;// Save bitstream pointers
+ M_STR ppBitStream, pppBitStream
+ M_STR pOffset, ppOffset
+ M_STR ppPosCoefbuf, pppPosCoefbuf
+
+ ;// Decode Trailing Ones
+ ANDS TrailingOnes, Symbol, #3
+ M_ADR pLevel, ppLevel
+ M_PRINTF "TrailingOnes=%d\n", TrailingOnes
+ BEQ TrailingOnesDone
+ MOV Count, TrailingOnes
+TrailingOnesLoop
+ M_BD_READ8 Symbol, 1, T1
+ SUBS Count, Count, #1
+ MOV T1, #1
+ SUB T1, T1, Symbol, LSL #1
+ M_PRINTF "Level=%d\n", T1
+ STRH T1, [pLevel], #2
+ BGT TrailingOnesLoop
+TrailingOnesDone
+
+ ;// Decode level values
+ SUBS Count, TotalCoeff, TrailingOnes ;// Number of levels to read
+ BEQ DecodeRuns ;// None left
+
+ MOV SuffixLength, #1
+ CMP TotalCoeff, #10
+ MOVLE SuffixLength, #0
+ CMP TrailingOnes, #3 ;// if (TrailingOnes<3)
+ MOVLT TrailingOnes, #4 ;// then TrailingOnes = +4
+ MOVGE TrailingOnes, #2 ;// else TrailingOnes = +2
+ MOVGE SuffixLength, #0 ;// SuffixLength = 0
+
+LevelLoop
+ M_BD_CLZ16 Symbol, T1, T2 ;// Symbol=LevelPrefix
+ CMP Symbol,#16
+ BGE EndBadSymbol
+
+ MOVS lr, SuffixLength ;// if LevelSuffixSize==0
+ TEQEQ Symbol, #14 ;// and LevelPrefix==14
+ MOVEQ lr, #4 ;// then LevelSuffixSize=4
+ TEQ Symbol, #15 ;// if LevelSuffixSize==15
+ MOVEQ lr, #12 ;// then LevelSuffixSize=12
+
+ TEQEQ SuffixLength,#0
+ ADDEQ Symbol,Symbol,#15
+
+ TEQ lr, #0 ;// if LevelSuffixSize==0
+ BEQ LevelCodeRead ;// LevelCode = LevelPrefix
+
+ M_BD_VREAD16 LevelSuffix, lr, T1, T2 ;// Read Level Suffix
+
+ MOV Symbol, Symbol, LSL SuffixLength
+ ADD Symbol, LevelSuffix, Symbol
+
+LevelCodeRead
+ ;// Symbol = LevelCode
+ ADD Symbol, Symbol, TrailingOnes ;// +4 if level cannot be +/-1, +2 o/w
+ MOV TrailingOnes, #2
+ MOVS T1, Symbol, LSR #1
+ RSBCS T1, T1, #0 ;// If Symbol odd then negate
+ M_PRINTF "Level=%d\n", T1
+ STRH T1, [pLevel], #2 ;// Store level.
+
+ LDR T2, =armVCM4P10_SuffixToLevel
+ LDRSB T1, [T2, SuffixLength] ;// Find increment level
+ TEQ SuffixLength, #0
+ MOVEQ SuffixLength, #1
+ CMP Symbol, T1
+ ADDCS SuffixLength, SuffixLength, #1
+ SUBS Count, Count, #1
+ BGT LevelLoop
+
+DecodeRuns
+ ;// Find number of zeros
+ M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff
+ SUB Count, TotalCoeff, #1 ;// Number of runs excluding last
+ SUBS ZerosLeft, T1, TotalCoeff ;// Maximum number of zeros there could be
+ M_ADR pRun, ppRun
+ MOV CoeffNum,TotalCoeff
+ SUB CoeffNum,CoeffNum,#1
+ BEQ NoZerosLeft
+
+ ;// Unpack number of zeros from bitstream
+ TEQ T1, #4
+ LDREQ pVLDTable, =(armVCM4P10_CAVLCTotalZeros2x2Tables-4)
+ LDRNE pVLDTable, =(armVCM4P10_CAVLCTotalZeroTables-4)
+ LDR pVLDTable, [pVLDTable, TotalCoeff, LSL #2]
+
+ M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2 ;// Symbol = ZerosLeft
+ CMP Symbol,#16
+ BGE EndBadSymbol
+
+ LDR ppRunTable, =(armVCM4P10_CAVLCRunBeforeTables-4)
+ M_ADR pRun, ppRun
+ MOVS ZerosLeft, Symbol
+
+ ADD CoeffNum,CoeffNum,ZerosLeft
+
+ BEQ NoZerosLeft
+
+ ;// Decode runs while zeros are left and more than one coefficient
+RunLoop
+ SUBS Count, Count, #1
+ LDR pVLDTable, [ppRunTable, ZerosLeft, LSL#2]
+ BLT LastRun
+ M_BD_VLD Symbol, T1, T2, pVLDTable, 3, 2 ;// Symbol = Run
+ CMP Symbol,#15
+ BGE EndBadSymbol
+
+ SUBS ZerosLeft, ZerosLeft, Symbol
+ M_PRINTF "Run=%d\n", Symbol
+ STRB Symbol, [pRun], #1
+ BGT RunLoop
+
+ ;// Decode runs while no zeros are left
+NoZerosLeft
+ SUBS Count, Count, #1
+ M_PRINTF "Run=%d\n", ZerosLeft
+ STRGEB ZerosLeft, [pRun], #1
+ BGT NoZerosLeft
+
+LastRun
+ ;// Final run length is remaining zeros
+ M_PRINTF "LastRun=%d\n", ZerosLeft
+ STRB ZerosLeft, [pRun], #1
+
+ ;// Write coefficients to output array
+ M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff
+ TEQ T1, #15
+ ADDEQ CoeffNum,CoeffNum,#1
+
+
+ SUB pRun,pRun,TotalCoeff
+ SUB pLevel,pLevel,TotalCoeff
+ SUB pLevel,pLevel,TotalCoeff
+
+ M_LDR ppPosCoefbuf, pppPosCoefbuf
+ LDR pPosCoefbuf, [ppPosCoefbuf]
+ TEQ T1, #4
+ LDREQ pZigZag, =armVCM4P10_ZigZag_2x2
+ LDRNE pZigZag, =armVCM4P10_ZigZag_4x4
+
+
+
+OutputLoop
+
+ LDRB T2, [pRun],#1
+ LDRB T1, [pZigZag, CoeffNum]
+ SUB CoeffNum, CoeffNum, #1 ;// Skip Non zero
+ SUB CoeffNum, CoeffNum, T2 ;// Skip Zero run
+
+ LDRSH T2, [pLevel],#2
+
+ SUBS TotalCoeff, TotalCoeff, #1
+ ORREQ T1, T1, #LAST_COEFF
+
+ ADD temp, T2, #128
+ CMP temp, #256
+ ORRCS T1, T1, #TWO_BYTE_COEFF
+
+
+ TEQ TotalCoeff, #0 ;// Preserves carry
+
+ M_PRINTF "Output=%02x %04x\n", T1, T2
+ STRB T1, [pPosCoefbuf], #1
+ STRB T2, [pPosCoefbuf], #1
+ MOV T2, T2, LSR #8
+ STRCSB T2, [pPosCoefbuf], #1
+ BNE OutputLoop
+
+ ;// Finished
+ STR pPosCoefbuf, [ppPosCoefbuf]
+ M_LDR ppBitStream, pppBitStream
+ M_LDR pOffset, ppOffset
+ B EndNoError
+
+EndBadSymbol
+ MOV r0, #OMX_Sts_Err
+ B End
+
+EndNoError
+ ;// Finished reading from the bitstream
+ M_BD_FINI ppBitStream, pOffset
+
+ ;// Set return value
+ MOV r0, #OMX_Sts_NoErr
+End
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DequantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DequantTables_s.s
new file mode 100755
index 0000000..2761600
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DequantTables_s.s
@@ -0,0 +1,123 @@
+;//
+;//
+;// File Name: armVCM4P10_DequantTables_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_QPDivTable
+ EXPORT armVCM4P10_VMatrixQPModTable
+ EXPORT armVCM4P10_PosToVCol4x4
+ EXPORT armVCM4P10_PosToVCol2x2
+ EXPORT armVCM4P10_VMatrix
+ EXPORT armVCM4P10_QPModuloTable
+ EXPORT armVCM4P10_VMatrixU16
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS CortexA8
+
+
+;// Guarding implementation by the processor name
+
+
+ IF CortexA8
+
+
+ M_TABLE armVCM4P10_PosToVCol4x4
+ DCB 0, 2, 0, 2
+ DCB 2, 1, 2, 1
+ DCB 0, 2, 0, 2
+ DCB 2, 1, 2, 1
+
+
+ M_TABLE armVCM4P10_PosToVCol2x2
+ DCB 0, 2
+ DCB 2, 1
+
+
+ M_TABLE armVCM4P10_VMatrix
+ DCB 10, 16, 13
+ DCB 11, 18, 14
+ DCB 13, 20, 16
+ DCB 14, 23, 18
+ DCB 16, 25, 20
+ DCB 18, 29, 23
+
+;//-------------------------------------------------------
+;// This table evaluates the expression [(INT)(QP/6)],
+;// for values of QP from 0 to 51 (inclusive).
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPDivTable
+ DCB 0, 0, 0, 0, 0, 0
+ DCB 1, 1, 1, 1, 1, 1
+ DCB 2, 2, 2, 2, 2, 2
+ DCB 3, 3, 3, 3, 3, 3
+ DCB 4, 4, 4, 4, 4, 4
+ DCB 5, 5, 5, 5, 5, 5
+ DCB 6, 6, 6, 6, 6, 6
+ DCB 7, 7, 7, 7, 7, 7
+ DCB 8, 8, 8, 8, 8, 8
+
+;//----------------------------------------------------
+;// This table contains armVCM4P10_VMatrix[QP%6][0] entires,
+;// for values of QP from 0 to 51 (inclusive).
+;//----------------------------------------------------
+
+ M_TABLE armVCM4P10_VMatrixQPModTable
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+
+;//-------------------------------------------------------
+;// This table evaluates the modulus expression [QP%6]*6,
+;// for values of QP from 0 to 51 (inclusive).
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPModuloTable
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+
+;//-------------------------------------------------------
+;// This table contains the invidual byte values stored as
+;// halfwords. This avoids unpacking inside the function
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_VMatrixU16
+ DCW 10, 16, 13
+ DCW 11, 18, 14
+ DCW 13, 20, 16
+ DCW 14, 23, 18
+ DCW 16, 25, 20
+ DCW 18, 29, 23
+
+ ENDIF ;//ARM1136JS
+
+
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
new file mode 100755
index 0000000..6e912d7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
@@ -0,0 +1,236 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+ IF ARM1136JS
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 8
+iHeight RN 9
+
+;// Declare inner loop registers
+x RN 7
+x0 RN 7
+x1 RN 10
+x2 RN 11
+Scratch RN 12
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
+;// destination pointed by (pDst) for horizontal interpolation.
+;// This function needs to copy 9 bytes in horizontal direction.
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+
+ ;// Copy pDst to scratch
+ MOV Scratch, pDst
+
+StartAlignedStackCopy
+ AND x, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+ M_SWITCH x
+ M_CASE Copy0toAligned
+ M_CASE Copy1toAligned
+ M_CASE Copy2toAligned
+ M_CASE Copy3toAligned
+ M_ENDSWITCH
+
+Copy0toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy0toAligned
+ B CopyEnd
+
+Copy1toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #8
+ ORR x0, x0, x1, LSL #24
+ MOV x1, x1, LSR #8
+ ORR x1, x1, x2, LSL #24
+ MOV x2, x2, LSR #8
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy1toAligned
+ B CopyEnd
+
+Copy2toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #16
+ ORR x0, x0, x1, LSL #16
+ MOV x1, x1, LSR #16
+ ORR x1, x1, x2, LSL #16
+ MOV x2, x2, LSR #16
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy2toAligned
+ B CopyEnd
+
+Copy3toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #24
+ ORR x0, x0, x1, LSL #8
+ MOV x1, x1, LSR #24
+ ORR x1, x1, x2, LSL #8
+ MOV x2, x2, LSR #24
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy3toAligned
+
+CopyEnd
+
+ MOV pSrc, Scratch
+ MOV srcStep, #12
+
+ M_END
+
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+
+ ;// Copy pSrc to stack
+StartVAlignedStackCopy
+ AND x, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+
+ M_SWITCH x
+ M_CASE Copy0toVAligned
+ M_CASE Copy1toVAligned
+ M_CASE Copy2toVAligned
+ M_CASE Copy3toVAligned
+ M_ENDSWITCH
+
+Copy0toVAligned
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy0toVAligned
+ B CopyVEnd
+
+Copy1toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #24
+ ORR x0, x1, x0, LSR #8
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy1toVAligned
+ B CopyVEnd
+
+Copy2toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #16
+ ORR x0, x1, x0, LSR #16
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy2toVAligned
+ B CopyVEnd
+
+Copy3toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #8
+ ORR x0, x1, x0, LSR #24
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy3toVAligned
+
+CopyVEnd
+
+ SUB pSrc, pDst, #28
+ MOV srcStep, #4
+
+ M_END
+
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
new file mode 100755
index 0000000..d275891
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
@@ -0,0 +1,149 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst)
+;//
+;// Registers preserved for top level function
+;// r1,r3,r4,r5,r6,r7,r10,r11,r14
+;//
+;// Registers modified by the function
+;// r0,r2,r8,r9,r12
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare other intermediate registers
+x0 RN 4
+x1 RN 5
+x2 RN 8
+x3 RN 9
+Temp RN 12
+
+ IF ARM1136JS
+
+ M_START armVCM4P10_InterpolateLuma_Copy4x4_unsafe, r6
+
+Copy4x4Start
+ ;// Do Copy and branch to EndOfInterpolation
+ AND Temp, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+ M_SWITCH Temp
+ M_CASE Copy4x4Align0
+ M_CASE Copy4x4Align1
+ M_CASE Copy4x4Align2
+ M_CASE Copy4x4Align3
+ M_ENDSWITCH
+
+Copy4x4Align0
+ M_LDR x0, [pSrc], srcStep
+ M_LDR x1, [pSrc], srcStep
+ M_STR x0, [pDst], dstStep
+ M_LDR x2, [pSrc], srcStep
+ M_STR x1, [pDst], dstStep
+ M_LDR x3, [pSrc], srcStep
+ M_STR x2, [pDst], dstStep
+ M_STR x3, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4Align1
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #8
+ ORR x0, x0, x1, LSL #24
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #8
+ ORR x2, x2, x3, LSL #24
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ M_STR x2, [pDst], dstStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #8
+ ORR x0, x0, x1, LSL #24
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #8
+ ORR x2, x2, x3, LSL #24
+ M_STR x2, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4Align2
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #16
+ ORR x0, x0, x1, LSL #16
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #16
+ ORR x2, x2, x3, LSL #16
+ M_STR x2, [pDst], dstStep
+
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #16
+ ORR x0, x0, x1, LSL #16
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #16
+ ORR x2, x2, x3, LSL #16
+ M_STR x2, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4Align3
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #24
+ ORR x0, x0, x1, LSL #8
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #24
+ ORR x2, x2, x3, LSL #8
+ M_STR x2, [pDst], dstStep
+
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #24
+ ORR x0, x0, x1, LSL #8
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #24
+ ORR x2, x2, x3, LSL #8
+ M_STR x2, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4End
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
new file mode 100755
index 0000000..4e5a39d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
@@ -0,0 +1,178 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+
+;// Functions:
+;// armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
+;// armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+;//
+;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
+;// This will do the convertion of data from 16 bit to 8 bit and it also
+;// remove offset and check for saturation.
+;//
+;// Registers used as input for this function
+;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the destination location
+;// r1 - step size to this destination location
+
+
+DEBUG_ON SETL {FALSE}
+
+MASK EQU 0x80808080 ;// Mask is used to implement (a+b+1)/2
+
+;// Declare input registers
+
+pSrc0 RN 0
+srcStep0 RN 1
+
+;// Declare other intermediate registers
+Temp1 RN 4
+Temp2 RN 5
+Temp3 RN 10
+Temp4 RN 11
+pBuf RN 7
+r0x0fe00fe0 RN 6
+r0x00ff00ff RN 12
+Count RN 14
+ValueA0 RN 10
+ValueA1 RN 11
+
+ IF ARM1136JS
+
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6
+
+ ;// Code start
+ MOV Count, #4
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ LDR r0x00ff00ff, =0x00ff00ff
+LoopStart1
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8
+ ORR ValueA0, Temp1, Temp2, LSL #8
+ SUBS Count, Count, #1
+ STRD ValueA0, [pBuf], #8
+ BGT LoopStart1
+End1
+ SUB pSrc0, pBuf, #32
+ MOV srcStep0, #8
+
+ M_END
+
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
+
+ ;// Code start
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ LDR r0x00ff00ff, =0x00ff00ff
+ MOV Count, #2
+
+LoopStart
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0]
+ ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0]
+
+ PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0]
+
+ STR Temp1, [pBuf], #8
+ PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2]
+ STR Temp2, [pBuf], #-4
+
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0]
+ ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0]
+
+ PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0]
+ SUBS Count, Count, #1
+ STR Temp1, [pBuf], #8
+ PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2]
+ STR Temp2, [pBuf], #4
+
+ BGT LoopStart
+End2
+ SUB pSrc0, pBuf, #32-8
+ MOV srcStep0, #4
+
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
new file mode 100755
index 0000000..d1684cb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
@@ -0,0 +1,313 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+
+ M_VARIANTS CortexA8
+
+ IF CortexA8
+
+ M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r11
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare Neon registers
+dCoeff5 DN 30.S16
+dCoeff20 DN 31.S16
+qCoeff5 QN 14.S32
+qCoeff20 QN 15.S32
+
+qSrc01 QN 0.U8
+dSrc0 DN 0.U8
+dSrc1 DN 1.U8
+
+dSrcb DN 4.U8
+dSrcc DN 2.U8
+dSrcd DN 3.U8
+dSrce DN 5.U8
+dSrcf DN 1.U8
+
+qSrcb QN 2.S16
+qSrcc QN 1.S16
+dSrcB DN 4.S16
+dSrcC DN 2.S16
+
+qRes0 QN 5.S16
+qRes1 QN 6.S16
+qRes2 QN 7.S16
+qRes3 QN 8.S16
+qRes4 QN 9.S16
+qRes5 QN 10.S16
+qRes6 QN 11.S16
+qRes7 QN 12.S16
+qRes8 QN 13.S16
+
+dRes0 DN 10.S16
+dRes1 DN 12.S16
+dRes2 DN 14.S16
+dRes3 DN 16.S16
+dRes4 DN 18.S16
+dRes5 DN 20.S16
+dRes6 DN 22.S16
+dRes7 DN 24.S16
+dRes8 DN 26.S16
+
+qAcc01 QN 5.S32
+qAcc23 QN 6.S32
+qAcc45 QN 2.S32
+qAcc67 QN 3.S32
+qSumBE QN 0.S32
+qSumCD QN 1.S32
+
+dTempAcc0 DN 0.U16
+dTempAcc1 DN 2.U16
+dTempAcc2 DN 4.U16
+dTempAcc3 DN 6.U16
+
+qTAcc0 QN 0.U16
+qTAcc1 QN 1.U16
+qTAcc2 QN 2.U16
+qTAcc3 QN 3.U16
+
+dAcc0 DN 0.U8
+dAcc1 DN 2.U8
+dAcc2 DN 4.U8
+dAcc3 DN 6.U8
+
+dTmp0 DN 8.S16
+dTmp1 DN 9.S16
+qTmp0 QN 4.S32
+
+ VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..]
+ VMOV dCoeff20, #20
+ VMOV dCoeff5, #5
+
+ ;// Row0
+ VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrcc, dSrc0, dSrc1, #2
+ VEXT dSrcd, dSrc0, dSrc1, #3
+ VEXT dSrce, dSrc0, dSrc1, #4
+ VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..]
+ VADDL qSrcc, dSrcc, dSrcd ;// c+d
+ VADDL qSrcb, dSrcb, dSrce ;// b+e
+ VADDL qRes0, dSrc0, dSrcf ;// Acc=a+f
+ VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..]
+ VMLA dRes0, dSrcC, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+
+ ;// Row1
+ VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrcc, dSrc0, dSrc1, #2
+ VEXT dSrcd, dSrc0, dSrc1, #3
+ VEXT dSrce, dSrc0, dSrc1, #4
+ VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..]
+ VADDL qSrcc, dSrcc, dSrcd ;// c+d
+ VADDL qSrcb, dSrcb, dSrce ;// b+e
+ VADDL qRes1, dSrc0, dSrcf ;// Acc=a+f
+ VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..]
+
+ VSUB dRes0, dRes0, dTmp0 ;// TeRi
+
+ VMLA dRes1, dSrcC, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes1, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+
+ ;// Row2
+ VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrcc, dSrc0, dSrc1, #2
+ VEXT dSrcd, dSrc0, dSrc1, #3
+ VEXT dSrce, dSrc0, dSrc1, #4
+ VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..]
+ VADDL qSrcc, dSrcc, dSrcd ;// c+d
+ VADDL qSrcb, dSrcb, dSrce ;// b+e
+ VADDL qRes2, dSrc0, dSrcf ;// Acc=a+f
+ VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..]
+
+ VSUB dRes1, dRes1, dTmp0
+
+ VMLA dRes2, dSrcC, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes2, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+
+ ;// Row3
+ VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrcc, dSrc0, dSrc1, #2
+ VEXT dSrcd, dSrc0, dSrc1, #3
+ VEXT dSrce, dSrc0, dSrc1, #4
+ VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..]
+ VADDL qSrcc, dSrcc, dSrcd ;// c+d
+ VADDL qSrcb, dSrcb, dSrce ;// b+e
+ VADDL qRes3, dSrc0, dSrcf ;// Acc=a+f
+ VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..]
+
+ VSUB dRes2, dRes2, dTmp0
+
+ VMLA dRes3, dSrcC, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes3, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+
+ ;// Row4
+ VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrcc, dSrc0, dSrc1, #2
+ VEXT dSrcd, dSrc0, dSrc1, #3
+ VEXT dSrce, dSrc0, dSrc1, #4
+ VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..]
+ VADDL qSrcc, dSrcc, dSrcd ;// c+d
+ VADDL qSrcb, dSrcb, dSrce ;// b+e
+ VADDL qRes4, dSrc0, dSrcf ;// Acc=a+f
+ VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..]
+
+ VSUB dRes3, dRes3, dTmp0
+
+ VMLA dRes4, dSrcC, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes4, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+
+ ;// Row5
+ VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrcc, dSrc0, dSrc1, #2
+ VEXT dSrcd, dSrc0, dSrc1, #3
+ VEXT dSrce, dSrc0, dSrc1, #4
+ VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..]
+ VADDL qSrcc, dSrcc, dSrcd ;// c+d
+ VADDL qSrcb, dSrcb, dSrce ;// b+e
+ VADDL qRes5, dSrc0, dSrcf ;// Acc=a+f
+ VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..]
+
+ VSUB dRes4, dRes4, dTmp0
+
+ VMLA dRes5, dSrcC, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes5, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+
+ ;// Row6
+ VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrcc, dSrc0, dSrc1, #2
+ VEXT dSrcd, dSrc0, dSrc1, #3
+ VEXT dSrce, dSrc0, dSrc1, #4
+ VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..]
+ VADDL qSrcc, dSrcc, dSrcd ;// c+d
+ VADDL qSrcb, dSrcb, dSrce ;// b+e
+ VADDL qRes6, dSrc0, dSrcf ;// Acc=a+f
+ VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..]
+
+ VSUB dRes5, dRes5, dTmp0
+
+ VMLA dRes6, dSrcC, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes6, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+
+ ;// Row7
+ VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrcc, dSrc0, dSrc1, #2
+ VEXT dSrcd, dSrc0, dSrc1, #3
+ VEXT dSrce, dSrc0, dSrc1, #4
+ VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..]
+ VADDL qSrcc, dSrcc, dSrcd ;// c+d
+ VADDL qSrcb, dSrcb, dSrce ;// b+e
+ VADDL qRes7, dSrc0, dSrcf ;// Acc=a+f
+ VLD1 qSrc01, [pSrc], srcStep ;// [a0 a1 a2 a3 ..]
+
+ VSUB dRes6, dRes6, dTmp0
+
+ VMLA dRes7, dSrcC, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes7, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+
+ ;// Row8
+ VEXT dSrcb, dSrc0, dSrc1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrcc, dSrc0, dSrc1, #2
+ VEXT dSrcd, dSrc0, dSrc1, #3
+ VEXT dSrce, dSrc0, dSrc1, #4
+ VEXT dSrcf, dSrc0, dSrc1, #5 ;// [f0 f1 f2 f3 ..]
+ VADDL qSrcc, dSrcc, dSrcd ;// c+d
+ VADDL qSrcb, dSrcb, dSrce ;// b+e
+ VADDL qRes8, dSrc0, dSrcf ;// Acc=a+f
+
+ VSUB dRes7, dRes7, dTmp0
+
+ VMLA dRes8, dSrcC, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes8, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTmp0, dSrcB, dCoeff5 ;// Acc -= 5*(b+e)
+
+ VMOV qCoeff20, #20
+ VMOV qCoeff5, #5
+
+ ;// Col0
+ VADDL qAcc01, dRes0, dRes5 ;// Acc = a+f
+ VADDL qSumCD, dRes2, dRes3 ;// c+d
+ VADDL qSumBE, dRes1, dRes4 ;// b+e
+
+ VSUB dRes8, dRes8, dTmp0
+
+ VMLA qAcc01, qSumCD, qCoeff20 ;// Acc += 20*(c+d)
+; VMLS qAcc01, qSumBE, qCoeff5 ;// Acc -= 20*(b+e)
+ VMUL qTmp0, qSumBE, qCoeff5 ;// Acc -= 20*(b+e)
+
+ ;// Col1
+ VADDL qAcc23, dRes1, dRes6 ;// Acc = a+f
+ VADDL qSumCD, dRes3, dRes4 ;// c+d
+ VADDL qSumBE, dRes2, dRes5 ;// b+e
+ VMLA qAcc23, qSumCD, qCoeff20 ;// Acc += 20*(c+d)
+
+ VSUB qAcc01, qAcc01, qTmp0
+
+; VMLS qAcc23, qSumBE, qCoeff5 ;// Acc -= 20*(b+e)
+ VMUL qTmp0, qSumBE, qCoeff5 ;// Acc -= 20*(b+e)
+
+ ;// Col2
+ VADDL qAcc45, dRes2, dRes7 ;// Acc = a+f
+ VADDL qSumCD, dRes4, dRes5 ;// c+d
+ VADDL qSumBE, dRes3, dRes6 ;// b+e
+ VMLA qAcc45, qSumCD, qCoeff20 ;// Acc += 20*(c+d)
+
+ VSUB qAcc23, qAcc23, qTmp0
+
+; VMLS qAcc45, qSumBE, qCoeff5 ;// Acc -= 20*(b+e)
+ VMUL qTmp0, qSumBE, qCoeff5 ;// Acc -= 20*(b+e)
+
+ ;// Col3
+ VADDL qAcc67, dRes3, dRes8 ;// Acc = a+f
+ VADDL qSumCD, dRes5, dRes6 ;// c+d
+ VADDL qSumBE, dRes4, dRes7 ;// b+e
+ VMLA qAcc67, qSumCD, qCoeff20 ;// Acc += 20*(c+d)
+
+ VSUB qAcc45, qAcc45, qTmp0
+
+ VMLS qAcc67, qSumBE, qCoeff5 ;// Acc -= 20*(b+e)
+
+ VQRSHRUN dTempAcc0, qAcc01, #10
+ VQRSHRUN dTempAcc1, qAcc23, #10
+ VQRSHRUN dTempAcc2, qAcc45, #10
+ VQRSHRUN dTempAcc3, qAcc67, #10
+
+ VQMOVN dAcc0, qTAcc0
+ VQMOVN dAcc1, qTAcc1
+ VQMOVN dAcc2, qTAcc2
+ VQMOVN dAcc3, qTAcc3
+
+ M_END
+
+ ENDIF
+
+
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
new file mode 100755
index 0000000..7bc091f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
@@ -0,0 +1,266 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+
+ M_VARIANTS CortexA8
+
+ IF CortexA8
+ M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r11
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare Neon registers
+dTCoeff5 DN 30.U8
+dTCoeff20 DN 31.U8
+dCoeff5 DN 30.S16
+dCoeff20 DN 31.S16
+
+qSrcA01 QN 0.U8
+qSrcB23 QN 1.U8
+qSrcC45 QN 2.U8
+qSrcD67 QN 3.U8
+qSrcE89 QN 4.U8
+qSrcF1011 QN 5.U8
+qSrcG1213 QN 6.U8
+qSrcH1415 QN 7.U8
+qSrcI1617 QN 8.U8
+
+dSrcA0 DN 0.U8
+dSrcB2 DN 2.U8
+dSrcC4 DN 4.U8
+dSrcD6 DN 6.U8
+dSrcE8 DN 8.U8
+dSrcF10 DN 10.U8
+dSrcG12 DN 12.U8
+dSrcH14 DN 14.U8
+dSrcI16 DN 16.U8
+
+dSrcA1 DN 1.U8
+dSrcB3 DN 3.U8
+dSrcC5 DN 5.U8
+dSrcD7 DN 7.U8
+dSrcE9 DN 9.U8
+dSrcF11 DN 11.U8
+dSrcG13 DN 13.U8
+dSrcH15 DN 15.U8
+dSrcI17 DN 17.U8
+
+qTempP01 QN 9.S16
+qTempQ01 QN 10.S16
+qTempR01 QN 11.S16
+qTempS01 QN 12.S16
+
+qTempP23 QN 0.S16
+qTempQ23 QN 1.S16
+qTempR23 QN 2.S16
+qTempS23 QN 3.S16
+
+dTempP0 DN 18.S16
+dTempP1 DN 19.S16
+dTempP2 DN 0.S16
+
+dTempQ0 DN 20.S16
+dTempQ1 DN 21.S16
+dTempQ2 DN 2.S16
+
+dTempR0 DN 22.S16
+dTempR1 DN 23.S16
+dTempR2 DN 4.S16
+
+dTempS0 DN 24.S16
+dTempS1 DN 25.S16
+dTempS2 DN 6.S16
+
+dTempB0 DN 26.S16
+dTempC0 DN 27.S16
+dTempD0 DN 28.S16
+dTempF0 DN 29.S16
+
+dTempAcc0 DN 0.U16
+dTempAcc1 DN 2.U16
+dTempAcc2 DN 4.U16
+dTempAcc3 DN 6.U16
+
+dAcc0 DN 0.U8
+dAcc1 DN 2.U8
+dAcc2 DN 4.U8
+dAcc3 DN 6.U8
+
+qAcc0 QN 0.S32
+qAcc1 QN 1.S32
+qAcc2 QN 2.S32
+qAcc3 QN 3.S32
+
+qTAcc0 QN 0.U16
+qTAcc1 QN 1.U16
+qTAcc2 QN 2.U16
+qTAcc3 QN 3.U16
+
+qTmp QN 4.S16
+dTmp DN 8.S16
+
+ VLD1 qSrcA01, [pSrc], srcStep ;// [a0 a1 a2 a3 .. a15]
+ ADD r12, pSrc, srcStep, LSL #2
+ VMOV dTCoeff5, #5
+ VMOV dTCoeff20, #20
+ VLD1 qSrcF1011, [r12], srcStep
+ VLD1 qSrcB23, [pSrc], srcStep ;// [b0 b1 b2 b3 .. b15]
+
+ VLD1 qSrcG1213, [r12], srcStep
+ VADDL qTempP01, dSrcA0, dSrcF10
+ VLD1 qSrcC45, [pSrc], srcStep ;// [c0 c1 c2 c3 .. c15]
+ VADDL qTempP23, dSrcA1, dSrcF11
+ VLD1 qSrcD67, [pSrc], srcStep
+ VADDL qTempQ01, dSrcB2, dSrcG12
+ VLD1 qSrcE89, [pSrc], srcStep
+
+ ;//t0
+ VMLAL qTempP01, dSrcC4, dTCoeff20
+
+ VLD1 qSrcH1415, [r12], srcStep
+
+ VMLAL qTempP23, dSrcC5, dTCoeff20
+
+ VLD1 qSrcI1617, [r12], srcStep ;// [i0 i1 i2 i3 .. ]
+
+ VMLAL qTempP01, dSrcD6, dTCoeff20
+ VMLAL qTempQ01, dSrcD6, dTCoeff20
+ VMLSL qTempP23, dSrcB3, dTCoeff5
+
+ VADDL qTempR01, dSrcC4, dSrcH14
+
+ VMLSL qTempP01, dSrcB2, dTCoeff5
+
+ VADDL qTempQ23, dSrcB3, dSrcG13
+
+ VMLAL qTempP23, dSrcD7, dTCoeff20
+ VMLAL qTempQ01, dSrcE8, dTCoeff20
+
+ VMLSL qTempP01, dSrcE8, dTCoeff5
+ VMLAL qTempQ23, dSrcD7, dTCoeff20
+
+ VMLSL qTempP23, dSrcE9, dTCoeff5
+
+ ;//t1
+
+ VMLAL qTempR01, dSrcE8, dTCoeff20
+ VMLSL qTempQ01, dSrcC4, dTCoeff5
+ VMLSL qTempQ23, dSrcC5, dTCoeff5
+ VADDL qTempR23, dSrcC5, dSrcH15
+
+ VMLAL qTempR01, dSrcF10, dTCoeff20
+ VMLSL qTempQ01, dSrcF10, dTCoeff5
+ VMLAL qTempQ23, dSrcE9, dTCoeff20
+ VMLAL qTempR23, dSrcE9, dTCoeff20
+ VADDL qTempS01, dSrcD6, dSrcI16
+
+
+ VMLSL qTempR01, dSrcD6, dTCoeff5
+ VMLSL qTempQ23, dSrcF11, dTCoeff5
+ VMLSL qTempR23, dSrcD7, dTCoeff5
+
+ ;//t2
+ VADDL qTempS23, dSrcD7, dSrcI17
+ VMLAL qTempS01, dSrcF10, dTCoeff20
+ VMLSL qTempR01, dSrcG12, dTCoeff5
+ VMLSL qTempR23, dSrcG13, dTCoeff5
+
+ VMLAL qTempS23, dSrcF11, dTCoeff20
+ VMLAL qTempS01, dSrcG12, dTCoeff20
+ VEXT dTempB0, dTempP0, dTempP1, #1
+ VMLAL qTempR23, dSrcF11, dTCoeff20
+
+
+ ;//t3
+ VMLAL qTempS23, dSrcG13, dTCoeff20
+ VMLSL qTempS01, dSrcE8, dTCoeff5
+ VEXT dTempC0, dTempP0, dTempP1, #2
+ VMOV dCoeff20, #20
+ VMLSL qTempS23, dSrcE9, dTCoeff5
+ VMLSL qTempS01, dSrcH14, dTCoeff5
+ VEXT dTempF0, dTempP1, dTempP2, #1
+ VEXT dTempD0, dTempP0, dTempP1, #3
+ VMLSL qTempS23, dSrcH15, dTCoeff5
+
+ VADDL qAcc0, dTempP0, dTempF0
+ VADD dTempC0, dTempC0, dTempD0
+ ;//h
+ VMOV dCoeff5, #5
+
+ ;// res0
+ VADD dTempB0, dTempB0, dTempP1
+ VMLAL qAcc0, dTempC0, dCoeff20
+ VEXT dTempC0, dTempQ0, dTempQ1, #2
+ VEXT dTempD0, dTempQ0, dTempQ1, #3
+ VEXT dTempF0, dTempQ1, dTempQ2, #1
+ VMLSL qAcc0, dTempB0, dCoeff5
+
+ ;// res1
+ VEXT dTempB0, dTempQ0, dTempQ1, #1
+ VADDL qAcc1, dTempQ0, dTempF0
+ VADD dTempC0, dTempC0, dTempD0
+ VADD dTempB0, dTempB0, dTempQ1
+ VEXT dTempD0, dTempR0, dTempR1, #3
+ VMLAL qAcc1, dTempC0, dCoeff20
+ VEXT dTempF0, dTempR1, dTempR2, #1
+ VEXT dTempC0, dTempR0, dTempR1, #2
+ VEXT dTmp, dTempR0, dTempR1, #1
+ VADDL qAcc2, dTempR0, dTempF0
+ VMLSL qAcc1, dTempB0, dCoeff5
+; VEXT dTempB0, dTempR0, dTempR1, #1
+ VADD dTempC0, dTempC0, dTempD0
+
+ ;// res2
+ VADD dTempB0, dTmp, dTempR1
+ VEXT dTempD0, dTempS0, dTempS1, #3
+ VMLAL qAcc2, dTempC0, dCoeff20
+; VADD dTempB0, dTempB0, dTempR1
+
+ ;// res3
+ VEXT dTempC0, dTempS0, dTempS1, #2
+ VEXT dTempF0, dTempS1, dTempS2, #1
+ VADD dTempC0, dTempC0, dTempD0
+ VEXT dTmp, dTempS0, dTempS1, #1
+ VADDL qAcc3, dTempS0, dTempF0
+ VMLSL qAcc2, dTempB0, dCoeff5
+ VMLAL qAcc3, dTempC0, dCoeff20
+ VADD dTmp, dTmp, dTempS1
+ VMLSL qAcc3, dTmp, dCoeff5
+
+ VQRSHRUN dTempAcc0, qAcc0, #10
+ VQRSHRUN dTempAcc1, qAcc1, #10
+ VQRSHRUN dTempAcc2, qAcc2, #10
+ VQRSHRUN dTempAcc3, qAcc3, #10
+
+ VQMOVN dAcc0, qTAcc0
+ VQMOVN dAcc1, qTAcc1
+ VQMOVN dAcc2, qTAcc2
+ VQMOVN dAcc3, qTAcc3
+
+ M_END
+
+ ENDIF
+
+
+
+
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
new file mode 100755
index 0000000..babe8ad
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
@@ -0,0 +1,228 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+ IF CortexA8
+
+ M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r11
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare Neon registers
+dCoeff5 DN 30.S16
+dCoeff20 DN 31.S16
+
+qSrcA01 QN 11.U8
+qSrcB01 QN 12.U8
+qSrcC01 QN 13.U8
+qSrcD01 QN 14.U8
+
+dSrcA0 DN 22.U8
+dSrcA1 DN 23.U8
+dSrcB0 DN 24.U8
+dSrcB1 DN 25.U8
+dSrcC0 DN 26.U8
+dSrcC1 DN 27.U8
+dSrcD0 DN 28.U8
+dSrcD1 DN 29.U8
+
+dSrcb DN 12.U8
+dSrce DN 13.U8
+dSrcf DN 10.U8
+
+dSrc0c DN 14.U8
+dSrc1c DN 16.U8
+dSrc2c DN 18.U8
+dSrc3c DN 20.U8
+
+dSrc0d DN 15.U8
+dSrc1d DN 17.U8
+dSrc2d DN 19.U8
+dSrc3d DN 21.U8
+
+qTemp01 QN 4.S16
+qTemp23 QN 6.S16
+dTemp0 DN 8.S16
+dTemp2 DN 12.S16
+
+qRes01 QN 11.S16
+qRes23 QN 12.S16
+qRes45 QN 13.S16
+qRes67 QN 14.S16
+
+dRes0 DN 22.S16
+dRes2 DN 24.S16
+dRes4 DN 26.S16
+dRes6 DN 28.S16
+
+dAcc0 DN 22.U8
+dAcc2 DN 24.U8
+dAcc4 DN 26.U8
+dAcc6 DN 28.U8
+
+dResult0 DN 22.U32
+dResult2 DN 24.U32
+dResult4 DN 26.U32
+dResult6 DN 28.U32
+
+ VLD1 qSrcA01, [pSrc], srcStep ;// Load A register [a0 a1 a2 a3 ..]
+ ;// One cycle stall
+ VEXT dSrcf, dSrcA0, dSrcA1, #5 ;// [f0 f1 f2 f3 ..]
+ VEXT dSrcb, dSrcA0, dSrcA1, #1 ;// [b0 b1 b2 b3 ..]
+; VLD1 qSrcB01, [pSrc], srcStep ;// Load B register [a0 a1 a2 a3 ..]
+ VEXT dSrc0c, dSrcA0, dSrcA1, #2
+ VEXT dSrc0d, dSrcA0, dSrcA1, #3
+ VEXT dSrce, dSrcA0, dSrcA1, #4
+ VADDL qRes01, dSrcA0, dSrcf ;// Acc=a+f
+ VADDL qTemp01, dSrc0c, dSrc0d ;// c+d
+ VADDL qTemp23, dSrcb, dSrce ;// b+e
+
+ VLD1 qSrcB01, [pSrc], srcStep ;// Load B register [a0 a1 a2 a3 ..]
+; VLD1 qSrcC01, [pSrc], srcStep ;// Load C register [a0 a1 a2 a3 ..]
+ VMLA dRes0, dTemp0, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes0, dTemp2, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTemp0, dTemp2, dCoeff5 ;// TeRi
+
+ VEXT dSrcf, dSrcB0, dSrcB1, #5 ;// [f0 f1 f2 f3 ..]
+ VEXT dSrcb, dSrcB0, dSrcB1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrc1c, dSrcB0, dSrcB1, #2
+ VEXT dSrc1d, dSrcB0, dSrcB1, #3
+ VEXT dSrce, dSrcB0, dSrcB1, #4
+ VADDL qRes23, dSrcB0, dSrcf ;// Acc=a+f
+
+ VSUB dRes0, dRes0, dTemp0 ;// TeRi
+
+ VADDL qTemp01, dSrc1c, dSrc1d ;// c+d
+ VADDL qTemp23, dSrcb, dSrce ;// b+e
+
+ VLD1 qSrcC01, [pSrc], srcStep ;// Load C register [a0 a1 a2 a3 ..]
+; VLD1 qSrcD01, [pSrc], srcStep ;// Load D register [a0 a1 a2 a3 ..]
+
+ VMLA dRes2, dTemp0, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes2, dTemp2, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTemp0, dTemp2, dCoeff5 ;// TeRi
+
+ VEXT dSrcf, dSrcC0, dSrcC1, #5 ;// [f0 f1 f2 f3 ..]
+ VEXT dSrcb, dSrcC0, dSrcC1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrc2c, dSrcC0, dSrcC1, #2
+ VEXT dSrc2d, dSrcC0, dSrcC1, #3
+ VEXT dSrce, dSrcC0, dSrcC1, #4
+ VADDL qRes45, dSrcC0, dSrcf ;// Acc=a+f
+
+ VSUB dRes2, dRes2, dTemp0 ;// TeRi
+
+ VADDL qTemp01, dSrc2c, dSrc2d ;// c+d
+ VADDL qTemp23, dSrcb, dSrce ;// b+e
+
+ VLD1 qSrcD01, [pSrc], srcStep ;// Load D register [a0 a1 a2 a3 ..]
+
+ VMLA dRes4, dTemp0, dCoeff20 ;// Acc += 20*(c+d)
+; VMLS dRes4, dTemp2, dCoeff5 ;// Acc -= 5*(b+e)
+ VMUL dTemp0, dTemp2, dCoeff5 ;// Acc -= 5*(b+e) TeRi
+
+
+ VEXT dSrcf, dSrcD0, dSrcD1, #5 ;// [f0 f1 f2 f3 ..]
+ VEXT dSrcb, dSrcD0, dSrcD1, #1 ;// [b0 b1 b2 b3 ..]
+ VEXT dSrc3c, dSrcD0, dSrcD1, #2
+ VEXT dSrc3d, dSrcD0, dSrcD1, #3
+ VEXT dSrce, dSrcD0, dSrcD1, #4
+ VADDL qRes67, dSrcD0, dSrcf ;// Acc=a+f
+
+ VSUB dRes4, dRes4, dTemp0 ;// TeRi
+
+ VADDL qTemp01, dSrc3c, dSrc3d ;// c+d
+ VADDL qTemp23, dSrcb, dSrce ;// b+e
+ VMLA dRes6, dTemp0, dCoeff20 ;// Acc += 20*(c+d)
+ VMLS dRes6, dTemp2, dCoeff5 ;// Acc -= 5*(b+e)
+
+ VQRSHRUN dAcc0, qRes01, #5 ;// Acc = Sat ((Acc + 16) / 32)
+ VQRSHRUN dAcc2, qRes23, #5 ;// Acc = Sat ((Acc + 16) / 32)
+ VQRSHRUN dAcc4, qRes45, #5 ;// Acc = Sat ((Acc + 16) / 32)
+ VQRSHRUN dAcc6, qRes67, #5 ;// Acc = Sat ((Acc + 16) / 32)
+
+ M_END
+
+ ENDIF
+
+
+ END
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
new file mode 100755
index 0000000..89c90aa
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
@@ -0,0 +1,134 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+
+ IF CortexA8
+
+ M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r11
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+Temp RN 12
+
+;// Declare Neon registers
+dCoeff5 DN 30.S16
+dCoeff20 DN 31.S16
+
+dSrc0 DN 7.U8
+dSrc1 DN 8.U8
+dSrc2 DN 9.U8
+dSrc3 DN 10.U8
+dSrc4 DN 11.U8
+dSrc5 DN 12.U8
+dSrc6 DN 13.U8
+dSrc7 DN 14.U8
+dSrc8 DN 15.U8
+
+qSumBE01 QN 8.S16
+qSumCD01 QN 9.S16
+dSumBE0 DN 16.S16
+dSumCD0 DN 18.S16
+
+qAcc01 QN 0.S16
+qAcc23 QN 1.S16
+qAcc45 QN 2.S16
+qAcc67 QN 3.S16
+
+dRes0 DN 0.S16
+dRes1 DN 2.S16
+dRes2 DN 4.S16
+dRes3 DN 6.S16
+
+dAcc0 DN 0.U8
+dAcc1 DN 2.U8
+dAcc2 DN 4.U8
+dAcc3 DN 6.U8
+
+
+dTmp0 DN 20.S16
+dTmp1 DN 21.S16
+dTmp2 DN 22.S16
+dTmp3 DN 23.S16
+
+
+ VLD1 dSrc0, [pSrc], srcStep ;// [a0 a1 a2 a3 .. ]
+ ADD Temp, pSrc, srcStep, LSL #2
+ VLD1 dSrc1, [pSrc], srcStep ;// [b0 b1 b2 b3 .. ]
+ ;// One cycle stall
+ VLD1 dSrc5, [Temp], srcStep
+ ;// One cycle stall
+ VLD1 dSrc2, [pSrc], srcStep ;// [c0 c1 c2 c3 .. ]
+ VADDL qAcc01, dSrc0, dSrc5 ;// Acc = a+f
+ VLD1 dSrc3, [pSrc], srcStep
+ ;// One cycle stall
+ VLD1 dSrc6, [Temp], srcStep ;// TeRi
+
+ VLD1 dSrc4, [pSrc], srcStep
+ VLD1 dSrc7, [Temp], srcStep ;// TeRi
+ VADDL qSumBE01, dSrc1, dSrc4 ;// b+e
+ VADDL qSumCD01, dSrc2, dSrc3 ;// c+d
+ VLD1 dSrc8, [Temp], srcStep ;// TeRi
+ VMLS dRes0, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e)
+; VMLA dRes0, dSumCD0, dCoeff20 ;// Acc += 20*(c+d)
+ VMUL dTmp0, dSumCD0, dCoeff20 ;// Acc += 20*(c+d)
+
+; VLD1 dSrc6, [Temp], srcStep
+ VADDL qSumBE01, dSrc2, dSrc5 ;// b+e
+ VADDL qSumCD01, dSrc3, dSrc4 ;// c+d
+ VADDL qAcc23, dSrc1, dSrc6 ;// Acc = a+f
+ VMLS dRes1, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e)
+; VMLA dRes1, dSumCD0, dCoeff20 ;// Acc += 20*(c+d)
+ VMUL dTmp1, dSumCD0, dCoeff20 ;// Acc += 20*(c+d)
+
+; VLD1 dSrc7, [Temp], srcStep
+ VADDL qSumBE01, dSrc3, dSrc6 ;// b+e
+ VADDL qSumCD01, dSrc4, dSrc5 ;// c+d
+ VADDL qAcc45, dSrc2, dSrc7 ;// Acc = a+f
+ VMLS dRes2, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e)
+; VMLA dRes2, dSumCD0, dCoeff20 ;// Acc += 20*(c+d)
+ VMUL dTmp2, dSumCD0, dCoeff20 ;// Acc += 20*(c+d)
+
+; VLD1 dSrc8, [Temp], srcStep ;// [i0 i1 i2 i3 .. ]
+ VADDL qSumBE01, dSrc4, dSrc7 ;// b+e
+ VADDL qAcc67, dSrc3, dSrc8 ;// Acc = a+f
+ VADDL qSumCD01, dSrc5, dSrc6 ;// c+d
+ VMLS dRes3, dSumBE0, dCoeff5 ;// Acc -= 20*(b+e)
+ VADD dRes0, dRes0, dTmp0
+ VADD dRes1, dRes1, dTmp1
+ VADD dRes2, dRes2, dTmp2
+ VMLA dRes3, dSumCD0, dCoeff20 ;// Acc += 20*(c+d)
+; VMUL dTmp3, dSumCD0, dCoeff20 ;// Acc += 20*(c+d)
+; VADD dRes3, dRes3, dTmp3
+
+ VQRSHRUN dAcc0, qAcc01, #5
+ VQRSHRUN dAcc1, qAcc23, #5
+ VQRSHRUN dAcc2, qAcc45, #5
+ VQRSHRUN dAcc3, qAcc67, #5
+
+ M_END
+
+ ENDIF
+
+
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
new file mode 100755
index 0000000..0f0ec78
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
@@ -0,0 +1,318 @@
+;//
+;//
+;// File Name: armVCM4P10_Interpolate_Chroma_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+
+ IF CortexA8
+
+ M_TABLE armVCM4P10_WidthBranchTableMVIsNotZero
+
+ DCD WidthIs2MVIsNotZero, WidthIs2MVIsNotZero
+ DCD WidthIs4MVIsNotZero, WidthIs4MVIsNotZero
+ DCD WidthIs8MVIsNotZero
+
+ M_TABLE armVCM4P10_WidthBranchTableMVIsZero
+
+ DCD WidthIs2MVIsZero, WidthIs2MVIsZero
+ DCD WidthIs4MVIsZero, WidthIs4MVIsZero
+ DCD WidthIs8MVIsZero
+
+
+;// input registers
+
+pSrc RN 0
+iSrcStep RN 1
+pDst RN 2
+iDstStep RN 3
+iWidth RN 4
+iHeight RN 5
+dx RN 6
+dy RN 7
+
+;// local variable registers
+pc RN 15
+return RN 0
+EightMinusdx RN 8
+EightMinusdy RN 9
+
+ACoeff RN 12
+BCoeff RN 9
+CCoeff RN 8
+DCoeff RN 6
+
+pTable RN 11
+
+Step1 RN 10
+SrcStepMinus1 RN 14
+
+dACoeff DN D12.U8
+dBCoeff DN D13.U8
+dCCoeff DN D14.U8
+dDCoeff DN D15.U8
+
+dRow0a DN D0.U8
+dRow0b DN D1.U8
+dRow1a DN D2.U8
+dRow1b DN D3.U8
+
+qRow0a QN Q2.S16
+qRow0b QN Q3.S16
+
+;//dIndex DN D16.U8
+qRow1a QN Q11.S16
+qRow1b QN Q12.S16
+
+dRow2a DN D16.U8
+dRow2b DN D17.U8
+dRow3a DN D18.U8
+dRow3b DN D19.U8
+
+qOutRow2 QN Q11.U16
+qOutRow3 QN Q12.U16
+dOutRow2 DN D20.U8
+dOutRow3 DN D21.U8
+dOutRow2U64 DN D20.U64
+dOutRow3U64 DN D21.U64
+
+qOutRow0 QN Q2.U16
+qOutRow1 QN Q3.U16
+dOutRow0 DN D8.U8
+dOutRow1 DN D9.U8
+
+dOutRow0U64 DN D8.U64
+dOutRow1U64 DN D9.U64
+
+dOutRow0U32 DN D8.U32
+dOutRow1U32 DN D9.U32
+
+dOutRow0U16 DN D8.U16
+dOutRow1U16 DN D9.U16
+
+
+dOut0U64 DN D0.U64
+dOut1U64 DN D1.U64
+
+dOut00U32 DN D0.U32
+dOut01U32 DN D1.U32
+dOut10U32 DN D2.U32
+dOut11U32 DN D3.U32
+
+dOut0U16 DN D0.U16
+dOut1U16 DN D1.U16
+
+;//-----------------------------------------------------------------------------------------------
+;// armVCM4P10_Interpolate_Chroma_asm starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START armVCM4P10_Interpolate_Chroma, r11, d15
+
+ ;// Define stack arguments
+ M_ARG Width, 4
+ M_ARG Height, 4
+ M_ARG Dx, 4
+ M_ARG Dy, 4
+
+ ;// Load argument from the stack
+ ;// M_STALL ARM1136JS=4
+
+ M_LDRD dx, dy, Dx
+ M_LDRD iWidth, iHeight, Width
+
+ ;// EightMinusdx = 8 - dx
+ ;// EightMinusdy = 8 - dy
+
+ ;// ACoeff = EightMinusdx * EightMinusdy
+ ;// BCoeff = dx * EightMinusdy
+ ;// CCoeff = EightMinusdx * dy
+ ;// DCoeff = dx * dy
+
+ RSB EightMinusdx, dx, #8
+ RSB EightMinusdy, dy, #8
+ CMN dx,dy
+ MOV Step1, #1
+ LDREQ pTable, =armVCM4P10_WidthBranchTableMVIsZero
+ SUB SrcStepMinus1, iSrcStep, Step1
+ LDRNE pTable, =armVCM4P10_WidthBranchTableMVIsNotZero
+
+ VLD1 dRow0a, [pSrc], Step1 ;// 0a
+
+ SMULBB ACoeff, EightMinusdx, EightMinusdy
+ SMULBB BCoeff, dx, EightMinusdy
+ VLD1 dRow0b, [pSrc], SrcStepMinus1 ;// 0b
+ SMULBB CCoeff, EightMinusdx, dy
+ SMULBB DCoeff, dx, dy
+
+ VDUP dACoeff, ACoeff
+ VDUP dBCoeff, BCoeff
+ VDUP dCCoeff, CCoeff
+ VDUP dDCoeff, DCoeff
+
+ LDR pc, [pTable, iWidth, LSL #1] ;// Branch to the case based on iWidth
+
+;// Pixel layout:
+;//
+;// x00 x01 x02
+;// x10 x11 x12
+;// x20 x21 x22
+
+;// If fractionl mv is not (0, 0)
+WidthIs8MVIsNotZero
+
+ VLD1 dRow1a, [pSrc], Step1 ;// 1a
+ VMULL qRow0a, dRow0a, dACoeff
+ VLD1 dRow1b, [pSrc], SrcStepMinus1 ;// 1b
+ VMULL qRow0b, dRow1a, dACoeff
+ VLD1 dRow2a, [pSrc], Step1 ;// 2a
+ VMLAL qRow0a, dRow0b, dBCoeff
+ VLD1 dRow2b, [pSrc], SrcStepMinus1 ;// 2b
+ VMULL qRow1a, dRow2a, dACoeff
+ VMLAL qRow0b, dRow1b, dBCoeff
+ VLD1 dRow3a, [pSrc], Step1 ;// 3a
+ VMLAL qRow0a, dRow1a, dCCoeff
+ VMLAL qRow1a, dRow2b, dBCoeff
+ VMULL qRow1b, dRow3a, dACoeff
+ VLD1 dRow3b, [pSrc], SrcStepMinus1 ;// 3b
+ VMLAL qRow0b, dRow2a, dCCoeff
+ VLD1 dRow0a, [pSrc], Step1 ;// 0a
+ VMLAL qRow1b, dRow3b, dBCoeff
+ VMLAL qRow1a, dRow3a, dCCoeff
+ VMLAL qRow0a, dRow1b, dDCoeff
+ VLD1 dRow0b, [pSrc], SrcStepMinus1 ;// 0b
+ VMLAL qRow1b, dRow0a, dCCoeff
+ VMLAL qRow0b, dRow2b, dDCoeff
+ VMLAL qRow1a, dRow3b, dDCoeff
+
+
+ SUBS iHeight, iHeight, #4
+ VMLAL qRow1b, dRow0b, dDCoeff
+
+ VQRSHRN dOutRow0, qOutRow0, #6
+ VQRSHRN dOutRow1, qOutRow1, #6
+ VQRSHRN dOutRow2, qOutRow2, #6
+ VST1 dOutRow0U64, [pDst], iDstStep
+ VQRSHRN dOutRow3, qOutRow3, #6
+
+ VST1 dOutRow1U64, [pDst], iDstStep
+ VST1 dOutRow2U64, [pDst], iDstStep
+ VST1 dOutRow3U64, [pDst], iDstStep
+
+
+ BGT WidthIs8MVIsNotZero
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+WidthIs4MVIsNotZero
+
+ VLD1 dRow1a, [pSrc], Step1
+ VMULL qRow0a, dRow0a, dACoeff
+ VMULL qRow0b, dRow1a, dACoeff
+ VLD1 dRow1b, [pSrc], SrcStepMinus1
+ VMLAL qRow0a, dRow0b, dBCoeff
+ VMLAL qRow0b, dRow1b, dBCoeff
+ VLD1 dRow0a, [pSrc], Step1
+ VMLAL qRow0a, dRow1a, dCCoeff
+ VMLAL qRow0b, dRow0a, dCCoeff
+ VLD1 dRow0b, [pSrc], SrcStepMinus1
+ SUBS iHeight, iHeight, #2
+ VMLAL qRow0b, dRow0b, dDCoeff
+ VMLAL qRow0a, dRow1b, dDCoeff
+
+ VQRSHRN dOutRow1, qOutRow1, #6
+ VQRSHRN dOutRow0, qOutRow0, #6
+
+ VST1 dOutRow0U32[0], [pDst], iDstStep
+ VST1 dOutRow1U32[0], [pDst], iDstStep
+
+ BGT WidthIs4MVIsNotZero
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+WidthIs2MVIsNotZero
+
+ VLD1 dRow1a, [pSrc], Step1
+ VMULL qRow0a, dRow0a, dACoeff
+ VMULL qRow0b, dRow1a, dACoeff
+ VLD1 dRow1b, [pSrc], SrcStepMinus1
+ VMLAL qRow0a, dRow0b, dBCoeff
+ VMLAL qRow0b, dRow1b, dBCoeff
+ VLD1 dRow0a, [pSrc], Step1
+ VMLAL qRow0a, dRow1a, dCCoeff
+ VMLAL qRow0b, dRow0a, dCCoeff
+ VLD1 dRow0b, [pSrc], SrcStepMinus1
+ SUBS iHeight, iHeight, #2
+ VMLAL qRow0b, dRow0b, dDCoeff
+ VMLAL qRow0a, dRow1b, dDCoeff
+
+ VQRSHRN dOutRow1, qOutRow1, #6
+ VQRSHRN dOutRow0, qOutRow0, #6
+
+ VST1 dOutRow0U16[0], [pDst], iDstStep
+ VST1 dOutRow1U16[0], [pDst], iDstStep
+
+ BGT WidthIs2MVIsNotZero
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+;// If fractionl mv is (0, 0)
+WidthIs8MVIsZero
+ SUB pSrc, pSrc, iSrcStep
+
+WidthIs8LoopMVIsZero
+ VLD1 dRow0a, [pSrc], iSrcStep
+ SUBS iHeight, iHeight, #2
+ VLD1 dRow0b, [pSrc], iSrcStep
+ VST1 dOut0U64, [pDst], iDstStep
+ VST1 dOut1U64, [pDst], iDstStep
+ BGT WidthIs8LoopMVIsZero
+
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+WidthIs4MVIsZero
+ VLD1 dRow0b, [pSrc], iSrcStep
+
+ SUBS iHeight, iHeight, #2
+
+ VST1 dOut00U32[0], [pDst], iDstStep
+ VLD1 dRow0a, [pSrc], iSrcStep
+ VST1 dOut01U32[0], [pDst], iDstStep
+
+ BGT WidthIs4MVIsZero
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+WidthIs2MVIsZero
+ VLD1 dRow0b, [pSrc], iSrcStep
+ SUBS iHeight, iHeight, #2
+
+ VST1 dOut0U16[0], [pDst], iDstStep
+ VLD1 dRow0a, [pSrc], iSrcStep
+ VST1 dOut1U16[0], [pDst], iDstStep
+
+ BGT WidthIs2MVIsZero
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// CortexA8
+
+ END
+
+;//-----------------------------------------------------------------------------------------------
+;// armVCM4P10_Interpolate_Chroma_asm ends
+;//-----------------------------------------------------------------------------------------------
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_QuantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_QuantTables_s.s
new file mode 100755
index 0000000..7e2642b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_QuantTables_s.s
@@ -0,0 +1,74 @@
+;//
+;//
+;// File Name: armVCM4P10_QuantTables_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;// Description:
+;// This file contains quantization tables
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+
+ EXPORT armVCM4P10_MFMatrixQPModTable
+ EXPORT armVCM4P10_QPDivIntraTable
+ EXPORT armVCM4P10_QPDivPlusOneTable
+
+;//--------------------------------------------------------------
+;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires,
+;// for values of iQP from 0 to 51 (inclusive).
+;//--------------------------------------------------------------
+
+ M_TABLE armVCM4P10_MFMatrixQPModTable
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+
+;//---------------------------------------------------------------
+;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values,
+;// for values of iQP from 0 to 51 (inclusive).
+;//---------------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPDivPlusOneTable
+ DCB 16, 16, 16, 16, 16, 16
+ DCB 17, 17, 17, 17, 17, 17
+ DCB 18, 18, 18, 18, 18, 18
+ DCB 19, 19, 19, 19, 19, 19
+ DCB 20, 20, 20, 20, 20, 20
+ DCB 21, 21, 21, 21, 21, 21
+ DCB 22, 22, 22, 22, 22, 22
+ DCB 23, 23, 23, 23, 23, 23
+ DCB 24, 24, 24, 24, 24, 24
+
+;//------------------------------------------------------------------
+;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) ,
+;// for values of iQP from 0 to 51 (inclusive).
+;//------------------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPDivIntraTable, 2
+ DCD 21845, 21845, 21845, 21845, 21845, 21845
+ DCD 43690, 43690, 43690, 43690, 43690, 43690
+ DCD 87381, 87381, 87381, 87381, 87381, 87381
+ DCD 174762, 174762, 174762, 174762, 174762, 174762
+ DCD 349525, 349525, 349525, 349525, 349525, 349525
+ DCD 699050, 699050, 699050, 699050, 699050, 699050
+ DCD 1398101, 1398101, 1398101, 1398101, 1398101, 1398101
+ DCD 2796202, 2796202, 2796202, 2796202, 2796202, 2796202
+ DCD 5592405, 5592405, 5592405, 5592405, 5592405, 5592405
+
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
new file mode 100755
index 0000000..ee9c339
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
@@ -0,0 +1,186 @@
+;//
+;//
+;// File Name: armVCM4P10_TransformResidual4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;// Transform Residual 4x4 Coefficients
+;//
+;//
+
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+;// Import symbols required from other files
+;// (For example tables)
+
+
+
+
+;// Set debugging level
+;//DEBUG_ON SETL {TRUE}
+
+
+
+;// Guarding implementation by the processor name
+
+
+
+
+
+
+
+
+;// Guarding implementation by the processor name
+
+ IF CortexA8
+
+;// ARM Registers
+
+;//Input Registers
+pDst RN 0
+pSrc RN 1
+
+
+;// Neon Registers
+
+;// Packed Input pixels
+dIn0 DN D0.S16
+dIn1 DN D1.S16
+dIn2 DN D2.S16
+dIn3 DN D3.S16
+
+;// Intermediate calculations
+dZero DN D4.S16
+de0 DN D5.S16
+de1 DN D6.S16
+de2 DN D7.S16
+de3 DN D8.S16
+dIn1RS DN D7.S16
+dIn3RS DN D8.S16
+df0 DN D0.S16
+df1 DN D1.S16
+df2 DN D2.S16
+df3 DN D3.S16
+qf01 QN Q0.32
+qf23 QN Q1.32
+dg0 DN D5.S16
+dg1 DN D6.S16
+dg2 DN D7.S16
+dg3 DN D8.S16
+df1RS DN D7.S16
+df3RS DN D8.S16
+
+;// Output pixels
+dh0 DN D0.S16
+dh1 DN D1.S16
+dh2 DN D2.S16
+dh3 DN D3.S16
+
+
+ ;// Allocate stack memory required by the function
+
+
+ ;// Write function header
+ M_START armVCM4P10_TransformResidual4x4, ,d8
+
+ ;******************************************************************
+ ;// The strategy used in implementing the transform is as follows:*
+ ;// Load the 4x4 block into 8 registers *
+ ;// Transpose the 4x4 matrix *
+ ;// Perform the row operations (on columns) using SIMD *
+ ;// Transpose the 4x4 result matrix *
+ ;// Perform the coloumn operations *
+ ;// Store the 4x4 block at one go *
+ ;******************************************************************
+
+ ;// Load all the 4x4 pixels in transposed form
+
+ VLD4 {dIn0,dIn1,dIn2,dIn3},[pSrc]
+
+ VMOV dZero,#0 ;// Used to right shift by 1
+
+
+ ;****************************************
+ ;// Row Operations (Performed on columns)
+ ;****************************************
+
+
+ VADD de0,dIn0,dIn2 ;// e0 = d0 + d2
+ VSUB de1,dIn0,dIn2 ;// e1 = d0 - d2
+ VHADD dIn1RS,dIn1,dZero ;// (f1>>1) constZero is a register holding 0
+ VHADD dIn3RS,dIn3,dZero
+ VSUB de2,dIn1RS,dIn3 ;// e2 = (d1>>1) - d3
+ VADD de3,dIn1,dIn3RS ;// e3 = d1 + (d3>>1)
+ VADD df0,de0,de3 ;// f0 = e0 + e3
+ VADD df1,de1,de2 ;// f1 = e1 + e2
+ VSUB df2,de1,de2 ;// f2 = e1 - e2
+ VSUB df3,de0,de3 ;// f3 = e0 - e3
+
+
+
+ ;*****************************************************************
+ ;// Transpose the resultant matrix
+ ;*****************************************************************
+
+ VTRN df0,df1
+ VTRN df2,df3
+ VTRN qf01,qf23
+
+
+ ;*******************************
+ ;// Coloumn Operations
+ ;*******************************
+
+
+ VADD dg0,df0,df2 ;// e0 = d0 + d2
+ VSUB dg1,df0,df2 ;// e1 = d0 - d2
+ VHADD df1RS,df1,dZero ;// (f1>>1) constZero is a register holding 0
+ VHADD df3RS,df3,dZero
+ VSUB dg2,df1RS,df3 ;// e2 = (d1>>1) - d3
+ VADD dg3,df1,df3RS ;// e3 = d1 + (d3>>1)
+ VADD dh0,dg0,dg3 ;// f0 = e0 + e3
+ VADD dh1,dg1,dg2 ;// f1 = e1 + e2
+ VSUB dh2,dg1,dg2 ;// f2 = e1 - e2
+ VSUB dh3,dg0,dg3 ;// f3 = e0 - e3
+
+
+ ;************************************************
+ ;// Calculate final value (colOp[i][j] + 32)>>6
+ ;************************************************
+
+ VRSHR dh0,#6
+ VRSHR dh1,#6
+ VRSHR dh2,#6
+ VRSHR dh3,#6
+
+
+ ;***************************
+ ;// Store all the 4x4 pixels
+ ;***************************
+
+ VST1 {dh0,dh1,dh2,dh3},[pDst]
+
+
+ ;// Set return value
+
+End
+
+
+ ;// Write function tail
+ M_END
+
+ ENDIF ;//CortexA8
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
new file mode 100755
index 0000000..4c52e22
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
@@ -0,0 +1,92 @@
+;//
+;//
+;// File Name: armVCM4P10_UnpackBlock4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+;//--------------------------------------
+;// Input Arguments and their scope/usage
+;//--------------------------------------
+ppSrc RN 0 ;// Persistent variable
+pDst RN 1 ;// Persistent variable
+
+;//--------------------------------
+;// Variables and their scope/usage
+;//--------------------------------
+pSrc RN 2 ;// Persistent variables
+Flag RN 3
+Value RN 4
+Value2 RN 5
+strOffset RN 6
+cstOffset RN 7
+
+
+ M_START armVCM4P10_UnpackBlock4x4, r7
+
+ LDR pSrc, [ppSrc] ;// Load pSrc
+ MOV cstOffset, #31 ;// To be used in the loop, to compute offset
+
+ ;//-----------------------------------------------------------------------
+ ; Firstly, fill all the coefficient values on the <pDst> buffer by zero
+ ;//-----------------------------------------------------------------------
+
+ MOV Value, #0 ;// Initialize the zero value
+ MOV Value2, #0 ;// Initialize the zero value
+ LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop>
+
+ STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0
+ STRD Value, [pDst, #8] ;// pDst[4] = pDst[5] = pDst[6] = pDst[7] = 0
+ STRD Value, [pDst, #16] ;// pDst[8] = pDst[9] = pDst[10] = pDst[11] = 0
+ STRD Value, [pDst, #24] ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0
+
+ ;//----------------------------------------------------------------------------
+ ;// The loop below parses and unpacks the input stream. The C-model has
+ ;// a somewhat complicated logic for sign extension. But in the v6 version,
+ ;// that can be easily taken care by loading the data from <pSrc> stream as
+ ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or
+ ;// 16-bits are read.
+ ;//
+ ;// Next, to compute the offset, where the unpacked value needs to be stored,
+ ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31]
+ ;// This results in a saving of one cycle.
+ ;//----------------------------------------------------------------------------
+
+unpackLoop
+ TST Flag, #0x10 ;// Computing (Flag & 0x10)
+ LDRSBNE Value2,[pSrc,#1] ;// Load byte wise to avoid unaligned access
+ LDRBNE Value, [pSrc], #2
+ AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1;
+ LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++
+ ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++
+
+ TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done
+ LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration
+ STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset>
+ BEQ unpackLoop ;// Branch to the loop beginning
+
+ STR pSrc, [ppSrc] ;// Update the bitstream pointer
+ M_END
+
+ ENDIF
+
+
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
new file mode 100755
index 0000000..40d4d5e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
@@ -0,0 +1,88 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DeblockChroma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 intra chroma deblock
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DeblockChroma_I
+ *
+ * Description:
+ * Performs deblocking filtering on all edges of the chroma macroblock (16x16).
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned.
+ * [in] srcdstStep Step of the arrays
+ * [in] pAlpha pointer to a 2x2 array of alpha thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }
+ * [in] pBeta pointer to a 2x2 array of beta thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external horizontal edge,
+ * internal horizontal edge }
+ * [in] pThresholds AArray of size 8x2 of Thresholds (TC0) (values for the left or
+ * above edge of each 4x2 or 2x4 block, arranged in vertical block order
+ * and then in horizontal block order)
+ * [in] pBS array of size 16x2 of BS parameters (arranged in scan block order for vertical edges and then horizontal edges);
+ * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned.
+ * [out] pSrcDst pointer to filtered output macroblock
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds, or pBS is NULL.
+ * - pSrcDst is not 8-byte aligned.
+ * - either pThresholds or pBS is not 4-byte aligned.
+ * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+)
+{
+ OMXResult errorCode;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+
+ errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I(
+ pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+ armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+
+ errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I(
+ pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16);
+
+ return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
new file mode 100755
index 0000000..619365f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
@@ -0,0 +1,91 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DeblockLuma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 luma deblock
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+
+/**
+ * Function: omxVCM4P10_DeblockLuma_I
+ *
+ * Description:
+ * This function performs deblock filtering the horizontal and vertical edges of a luma macroblock
+ *(16x16).
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned.
+ * [in] srcdstStep image width
+ * [in] pAlpha pointer to a 2x2 table of alpha thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external horizontal
+ * edge, internal horizontal edge }
+ * [in] pBeta pointer to a 2x2 table of beta thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external horizontal edge,
+ * internal horizontal edge }
+ * [in] pThresholds pointer to a 16x2 table of threshold (TC0), organized as follows: { values for
+ * the left or above edge of each 4x4 block, arranged in vertical block order
+ * and then in horizontal block order)
+ * [in] pBS pointer to a 16x2 table of BS parameters arranged in scan block order for vertical edges and then horizontal edges;
+ * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned.
+ * [out] pSrcDst pointer to filtered output macroblock.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds or pBS is NULL.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8
+ * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3.
+.
+ *
+ */
+
+OMXResult omxVCM4P10_DeblockLuma_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+)
+{
+ OMXResult errorCode;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+
+ errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I(
+ pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+ armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+
+ errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I(
+ pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16);
+
+ return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
new file mode 100755
index 0000000..4e871bf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
@@ -0,0 +1,62 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 decode coefficients module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for 2x2 block of
+ * ChromaDCLevel. The decoded coefficients in packed position-coefficient
+ * buffer are stored in increasing raster scan order, namely position order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream
+ * buffer
+ * [in] pOffset Pointer to current bit position in the byte
+ * pointed to by *ppBitStream
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients
+ * in this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8** ppPosCoefbuf
+ )
+
+{
+ return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+ ppPosCoefbuf, 17, 4);
+
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
new file mode 100755
index 0000000..b29e576
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
@@ -0,0 +1,68 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 decode coefficients module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of
+ * Intra16x16DCLevel, Intra16x16ACLevel,LumaLevel, and ChromaACLevel.
+ * Inverse field scan is not supported. The decoded coefficients in packed
+ * position-coefficient buffer are stored in increasing zigzag order instead
+ * of position order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream buffer
+ * [in] pOffset Pointer to current bit position in the byte pointed
+ * to by *ppBitStream
+ * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current
+ * block
+ * [in] sVLCSelect VLC table selector, obtained from number of non-zero
+ * AC coefficients of above and left 4x4 blocks. It is
+ * equivalent to the variable nC described in H.264 standard
+ * table 9-5, except its value can¡¯t be less than zero.
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients in
+ * this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC(
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8**ppPosCoefbuf,
+ OMX_INT sVLCSelect,
+ OMX_INT sMaxNumCoeff
+ )
+{
+ return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+ ppPosCoefbuf, sVLCSelect, sMaxNumCoeff);
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
new file mode 100755
index 0000000..485a488
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
@@ -0,0 +1,396 @@
+;//
+;//
+;// File Name: omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;// H.264 inverse quantize and transform module
+;//
+;//
+
+
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Import symbols required from other files
+;// (For example tables)
+
+ IMPORT armVCM4P10_UnpackBlock4x4
+ IMPORT armVCM4P10_TransformResidual4x4
+ IMPORT armVCM4P10_QPDivTable
+ IMPORT armVCM4P10_VMatrixU16
+ IMPORT armVCM4P10_QPModuloTable
+
+ M_VARIANTS CortexA8
+
+;// Set debugging level
+;//DEBUG_ON SETL {TRUE}
+
+
+;// Static Function: armVCM4P10_DequantLumaAC4x4
+
+;// Guarding implementation by the processor name
+
+
+
+;// Guarding implementation by the processor name
+
+
+
+
+
+
+;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd
+
+;// Guarding implementation by the processor name
+
+
+
+;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd
+
+;// Guarding implementation by the processor name
+
+ IF CortexA8
+
+
+;// ARM Registers
+
+;//Input Registers
+ppSrc RN 0
+pPred RN 1
+pDC RN 2
+pDst RN 3
+
+
+;//Output Registers
+result RN 0
+
+;//Local Scratch Registers
+
+;//Registers used in armVCM4P10_DequantLumaAC4x4
+pQPdiv RN 10
+pQPmod RN 11
+pVRow RN 2
+QPmod RN 12
+shift RN 14
+index0 RN 1
+index1 RN 10
+
+;//Registers used in DequantTransformResidualFromPairAndAdd
+pDelta RN 4
+pDeltaTmp RN 6
+AC RN 5 ;//Load from stack
+pPredTemp RN 7
+pDCTemp RN 8
+pDstTemp RN 9
+pDeltaArg1 RN 1
+pDeltaArg0 RN 0
+QP RN 1 ;//Load from stack
+DCval RN 10
+predstep RN 1
+dstStep RN 10
+PredVal1 RN 3
+PredVal2 RN 5
+
+
+
+
+;// Neon Registers
+
+;// Registers used in armVCM4P10_DequantLumaAC4x4
+
+dVmatrix DN D6.8
+dindexRow0 DN D7.32
+dindexRow1 DN D9.32
+dByteIndexRow0 DN D7.8
+dByteIndexRow1 DN D9.8
+dVRow0 DN D8.8
+dVRow1 DN D4.8
+dVRow0U16 DN D8.U16
+dVRow1U16 DN D4.U16
+dVRow2U16 DN D8.U16
+dVRow3U16 DN D4.U16
+
+dShift DN D5.U16
+dSrcRow0 DN D0.I16
+dSrcRow1 DN D1.I16
+dSrcRow2 DN D2.I16
+dSrcRow3 DN D3.I16
+dDqntRow0 DN D0.I16
+dDqntRow1 DN D1.I16
+dDqntRow2 DN D2.I16
+dDqntRow3 DN D3.I16
+
+;// Registers used in TransformResidual4x4
+
+;// Packed Input pixels
+dIn0 DN D0.S16
+dIn1 DN D1.S16
+dIn2 DN D2.S16
+dIn3 DN D3.S16
+qIn01 QN Q0.32
+qIn23 QN Q1.32
+
+;// Intermediate calculations
+dZero DN D4.S16
+de0 DN D5.S16
+de1 DN D6.S16
+de2 DN D7.S16
+de3 DN D8.S16
+dIn1RS DN D7.S16
+dIn3RS DN D8.S16
+df0 DN D0.S16
+df1 DN D1.S16
+df2 DN D2.S16
+df3 DN D3.S16
+qf01 QN Q0.32
+qf23 QN Q1.32
+dg0 DN D5.S16
+dg1 DN D6.S16
+dg2 DN D7.S16
+dg3 DN D8.S16
+df1RS DN D7.S16
+df3RS DN D8.S16
+
+;// Output pixels
+dh0 DN D0.S16
+dh1 DN D1.S16
+dh2 DN D2.S16
+dh3 DN D3.S16
+
+;// Registers used in DequantTransformResidualFromPairAndAdd
+
+dDeltaRow0 DN D0.S16
+dDeltaRow1 DN D1.S16
+dDeltaRow2 DN D2.S16
+dDeltaRow3 DN D3.S16
+qDeltaRow01 QN Q0.S16
+qDeltaRow23 QN Q1.S16
+
+dPredValRow01 DN D4.U8
+dPredValRow23 DN D5.U8
+
+qSumRow01 QN Q3.S16
+qSumRow23 QN Q4.S16
+dDstRow01 DN D0.U8
+dDstRow23 DN D1.U8
+dDstRow0 DN D0.32[0]
+dDstRow1 DN D0.32[1]
+dDstRow2 DN D1.32[0]
+dDstRow3 DN D1.32[1]
+
+
+ ;// Allocate stack memory required by the function
+ M_ALLOC8 pBuffer, 32
+
+
+ ;// Write function header
+ M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11,d9
+
+ ;// Define stack arguments
+ M_ARG predStepOnStack, 4
+ M_ARG dstStepOnStack,4
+ M_ARG QPOnStack, 4
+ M_ARG ACOnStack,4
+
+
+ M_ADR pDelta,pBuffer
+ M_LDR AC,ACOnStack
+
+
+ ;// Save registers r1,r2,r3 before function call
+ MOV pPredTemp,pPred
+ MOV pDCTemp,pDC
+ MOV pDstTemp,pDst
+
+ CMP AC,#0
+ BEQ DCcase
+ MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_UnpackBlock4x4
+
+ BL armVCM4P10_UnpackBlock4x4
+
+ ;//--------------------------------------------------------
+ ;// armVCM4P10_DequantLumaAC4x4 : static function inlined
+ ;//--------------------------------------------------------
+
+ ;//BL armVCM4P10_DequantLumaAC4x4
+ M_LDR QP,QPOnStack ;// Set up r1 for armVCM4P10_DequantLumaAC4x4
+
+ LDR pQPmod,=armVCM4P10_QPModuloTable
+ LDR pQPdiv,=armVCM4P10_QPDivTable
+ LDR pVRow,=armVCM4P10_VMatrixU16
+
+
+ LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6
+ LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6
+
+ LDR index1,=0x03020504
+ LDR index0,=0x05040100 ;// Indexes into dVmatrix
+ ADD pVRow,pVRow,QPmod
+ VDUP dindexRow0,index0
+ VDUP dindexRow1,index1
+ VDUP dShift,shift
+
+ ;// Load all 4x4 pVRow[] values
+ VLD1 dVmatrix,[pVRow] ;// dVmatrix = [0d|0c|0b|0a]
+
+
+ VTBL dVRow0,dVmatrix,dByteIndexRow0 ;// row0 = row2 = [pVRow[2] | pVRow[0] | pVRow[2] | pVRow[0]]
+ VTBL dVRow1,dVmatrix,dByteIndexRow1 ;// row1 = row3 = [pVRow[1] | pVRow[2] | pVRow[1] | pVRow[2]]
+ CMP pDCTemp,#0
+ ;// Load all the 4x4 'src' values
+ VLD1 { dSrcRow0,dSrcRow1,dSrcRow2,dSrcRow3 },[pDelta]
+
+ VSHL dVRow0U16,dVRow0U16,dShift
+ VSHL dVRow1U16,dVRow1U16,dShift
+ LDRSHNE DCval,[pDCTemp]
+
+
+ ;// Multiply src[] with pVRow[]
+ VMUL dDqntRow0,dSrcRow0,dVRow0U16
+ VMUL dDqntRow1,dSrcRow1,dVRow1U16
+ VMUL dDqntRow2,dSrcRow2,dVRow2U16
+ VMUL dDqntRow3,dSrcRow3,dVRow3U16
+
+
+
+ ;//-------------------------------------------------------------
+ ;// TransformResidual4x4 : Inlined to avoid Load/Stores
+ ;//-------------------------------------------------------------
+
+
+ ;//BL armVCM4P10_TransformResidual4x4
+ ;//STRHNE DCval,[pDelta]
+ VMOVNE dIn0[0],DCval
+
+
+
+ ;//*****************************************************************
+ ;// Transpose the input pixels : perform Row ops as Col ops
+ ;//*****************************************************************
+
+ VTRN dIn0,dIn1
+ VTRN dIn2,dIn3
+ VTRN qIn01,qIn23
+
+
+ VMOV dZero,#0 ;// Used to right shift by 1
+
+
+ ;//****************************************
+ ;// Row Operations (Performed on columns)
+ ;//****************************************
+
+
+ VADD de0,dIn0,dIn2 ;// e0 = d0 + d2
+ VSUB de1,dIn0,dIn2 ;// e1 = d0 - d2
+ VHADD dIn1RS,dIn1,dZero ;// (f1>>1) constZero is a register holding 0
+ VHADD dIn3RS,dIn3,dZero
+ VSUB de2,dIn1RS,dIn3 ;// e2 = (d1>>1) - d3
+ VADD de3,dIn1,dIn3RS ;// e3 = d1 + (d3>>1)
+ VADD df0,de0,de3 ;// f0 = e0 + e3
+ VADD df1,de1,de2 ;// f1 = e1 + e2
+ VSUB df2,de1,de2 ;// f2 = e1 - e2
+ VSUB df3,de0,de3 ;// f3 = e0 - e3
+
+
+
+ ;//*****************************************************************
+ ;// Transpose the resultant matrix
+ ;//*****************************************************************
+
+ VTRN df0,df1
+ VTRN df2,df3
+ VTRN qf01,qf23
+
+
+ ;//*******************************
+ ;// Coloumn Operations
+ ;//*******************************
+
+
+ VADD dg0,df0,df2 ;// e0 = d0 + d2
+ VSUB dg1,df0,df2 ;// e1 = d0 - d2
+ VHADD df1RS,df1,dZero ;// (f1>>1) constZero is a register holding 0
+ VHADD df3RS,df3,dZero
+ VSUB dg2,df1RS,df3 ;// e2 = (d1>>1) - d3
+ VADD dg3,df1,df3RS ;// e3 = d1 + (d3>>1)
+ VADD dh0,dg0,dg3 ;// f0 = e0 + e3
+ VADD dh1,dg1,dg2 ;// f1 = e1 + e2
+ VSUB dh2,dg1,dg2 ;// f2 = e1 - e2
+ VSUB dh3,dg0,dg3 ;// f3 = e0 - e3
+
+
+ ;//************************************************
+ ;// Calculate final value (colOp[i][j] + 32)>>6
+ ;//************************************************
+
+ VRSHR dh0,#6
+ VRSHR dh1,#6
+ VRSHR dh2,#6
+ VRSHR dh3,#6
+
+
+ B OutDCcase
+
+
+DCcase
+ ;// Calculate the Transformed DCvalue : (DCval+32)>>6
+ LDRSH DCval,[pDCTemp]
+ ADD DCval,DCval,#32
+ ASR DCval,DCval,#6
+
+ VDUP dDeltaRow0, DCval ;// pDelta[0] = pDelta[1] = pDelta[2] = pDelta[3] = DCval
+ VDUP dDeltaRow1, DCval ;// pDelta[4] = pDelta[5] = pDelta[6] = pDelta[7] = DCval
+ VDUP dDeltaRow2, DCval ;// pDelta[8] = pDelta[9] = pDelta[10] = pDelta[11] = DCval
+ VDUP dDeltaRow3, DCval
+
+
+OutDCcase
+ M_LDR predstep,predStepOnStack
+ M_LDR dstStep,dstStepOnStack
+
+ LDR PredVal1,[pPredTemp],predstep
+ LDR PredVal2,[pPredTemp],predstep
+ VMOV dPredValRow01,PredVal1,PredVal2
+
+ LDR PredVal1,[pPredTemp],predstep
+ LDR PredVal2,[pPredTemp]
+ VMOV dPredValRow23,PredVal1,PredVal2
+
+
+ VADDW qSumRow01,qDeltaRow01,dPredValRow01
+ VADDW qSumRow23,qDeltaRow23,dPredValRow23
+ VQMOVUN dDstRow01,qSumRow01
+ VQMOVUN dDstRow23,qSumRow23
+
+
+ VST1 dDstRow0,[pDstTemp],dstStep
+ VST1 dDstRow1,[pDstTemp],dstStep
+ VST1 dDstRow2,[pDstTemp],dstStep
+ VST1 dDstRow3,[pDstTemp]
+
+ ;// Set return value
+ MOV result,#OMX_Sts_NoErr
+
+End
+
+
+ ;// Write function tail
+
+ M_END
+
+ ENDIF ;//CORTEXA8
+
+
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
new file mode 100644
index 0000000..4606197
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
@@ -0,0 +1,202 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+ IF CortexA8
+
+ IMPORT armVCM4P10_DeblockingChromabSGE4_unsafe
+ IMPORT armVCM4P10_DeblockingChromabSLT4_unsafe
+
+LOOP_COUNT EQU 0x40000000
+MASK_3 EQU 0x03030303
+MASK_4 EQU 0x04040404
+
+;// Function arguments
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlpha RN 2
+pBeta RN 3
+
+pThresholds RN 5
+pBS RN 4
+bS3210 RN 6
+
+;// Loop
+
+XY RN 7
+
+;// Pixels
+dP_0 DN D4.U8
+dP_1 DN D5.U8
+dP_2 DN D6.U8
+dQ_0 DN D8.U8
+dQ_1 DN D9.U8
+dQ_2 DN D10.U8
+
+;// Filtering Decision
+dAlpha DN D0.U8
+dBeta DN D2.U8
+
+dFilt DN D16.U8
+dAqflg DN D12.U8
+dApflg DN D17.U8
+
+dAp0q0 DN D13.U8
+dAp1p0 DN D12.U8
+dAq1q0 DN D18.U8
+dAp2p0 DN D19.U8
+dAq2q0 DN D17.U8
+
+qBS3210 QN Q13.U16
+dBS3210 DN D26
+dMask_bs DN D27
+dFilt_bs DN D26.U16
+
+;// bSLT4
+dMask_0 DN D14.U8
+dMask_1 DN D15.U8
+dMask_4 DN D1.U16
+
+Mask_4 RN 8
+Mask_3 RN 9
+
+dTemp DN D19.U8
+
+;// Result
+dP_0t DN D13.U8
+dQ_0t DN D31.U8
+
+dP_0n DN D29.U8
+dQ_0n DN D24.U8
+
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r9, d15
+
+ ;//Arguments on the stack
+ M_ARG ppThresholds, 4
+ M_ARG ppBS, 4
+
+ ;// d0-dAlpha_0
+ ;// d2-dBeta_0
+
+ ;load alpha1,beta1 somewhere to avoid more loads
+ VLD1 {dAlpha[]}, [pAlpha]!
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #1 ;?
+ SUB pSrcDst, pSrcDst, srcdstStep
+ VLD1 {dBeta[]}, [pBeta]!
+
+ M_LDR pBS, ppBS
+ M_LDR pThresholds, ppThresholds
+
+ LDR Mask_3, =MASK_3
+ LDR Mask_4, =MASK_4
+
+ VMOV dMask_0, #0
+ VMOV dMask_1, #1
+ VMOV dMask_4, #4
+
+ LDR XY, =LOOP_COUNT
+
+ ;// p0-p3 - d4-d7
+ ;// q0-q3 - d8-d11
+LoopY
+ LDR bS3210, [pBS], #8
+
+ VLD1 dP_2, [pSrcDst], srcdstStep
+ ;1
+ VLD1 dP_1, [pSrcDst], srcdstStep
+ CMP bS3210, #0
+ VLD1 dP_0, [pSrcDst], srcdstStep
+ ;1
+ VLD1 dQ_0, [pSrcDst], srcdstStep
+ VABD dAp2p0, dP_2, dP_0
+ VLD1 dQ_1, [pSrcDst], srcdstStep
+ VABD dAp0q0, dP_0, dQ_0
+ VLD1 dQ_2, [pSrcDst], srcdstStep
+ BEQ NoFilterBS0
+
+ VABD dAp1p0, dP_1, dP_0
+ VABD dAq1q0, dQ_1, dQ_0
+
+ VCGT dFilt, dAlpha, dAp0q0
+ VMOV.U32 dBS3210[0], bS3210
+ VMAX dAp1p0, dAq1q0, dAp1p0
+ VMOVL qBS3210, dBS3210.U8
+ VABD dAq2q0, dQ_2, dQ_0
+ VCGT dMask_bs.S16, dBS3210.S16, #0
+
+ VCGT dAp1p0, dBeta, dAp1p0
+ VCGT dAp2p0, dBeta, dAp2p0
+
+ VAND dFilt, dMask_bs.U8
+
+ TST bS3210, Mask_3
+
+ VCGT dAq2q0, dBeta, dAq2q0
+ VAND dFilt, dFilt, dAp1p0
+
+ VAND dAqflg, dFilt, dAq2q0
+ VAND dApflg, dFilt, dAp2p0
+
+ ;// bS < 4 Filtering
+ BLNE armVCM4P10_DeblockingChromabSLT4_unsafe
+
+ TST bS3210, Mask_4
+
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #2
+ VTST dFilt_bs, dFilt_bs, dMask_4
+
+ ;// bS == 4 Filtering
+ BLNE armVCM4P10_DeblockingChromabSGE4_unsafe
+
+ VBIT dP_0n, dP_0t, dFilt_bs
+ VBIT dQ_0n, dQ_0t, dFilt_bs
+
+ VBIF dP_0n, dP_0, dFilt
+ VBIF dQ_0n, dQ_0, dFilt
+
+ ;// Result Storage
+ VST1 dP_0n, [pSrcDst], srcdstStep
+ ADDS XY, XY, XY
+ VST1 dQ_0n, [pSrcDst], srcdstStep
+
+ BNE LoopY
+
+ MOV r0, #OMX_Sts_NoErr
+
+ M_EXIT
+
+NoFilterBS0
+
+ VLD1 {dAlpha[]}, [pAlpha]
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #1
+ ADDS XY, XY, XY
+ VLD1 {dBeta[]}, [pBeta]
+ ADD pThresholds, pThresholds, #4
+ BNE LoopY
+
+ MOV r0, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF
+
+
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
new file mode 100644
index 0000000..18e6c1d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
@@ -0,0 +1,282 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+ IF CortexA8
+
+ IMPORT armVCM4P10_DeblockingChromabSGE4_unsafe
+ IMPORT armVCM4P10_DeblockingChromabSLT4_unsafe
+
+LOOP_COUNT EQU 0x40000000
+MASK_3 EQU 0x03030303
+MASK_4 EQU 0x04040404
+
+;// Function arguments
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlpha RN 2
+pBeta RN 3
+
+pThresholds RN 5
+pBS RN 4
+bS3210 RN 6
+pSrcDst_P RN 10
+pSrcDst_Q RN 12
+
+pTmp RN 10
+pTmp2 RN 12
+step RN 14
+
+;// Loop
+
+XY RN 7
+
+;// Rows input
+dRow0 DN D7.U8
+dRow1 DN D8.U8
+dRow2 DN D5.U8
+dRow3 DN D10.U8
+dRow4 DN D6.U8
+dRow5 DN D9.U8
+dRow6 DN D4.U8
+dRow7 DN D11.U8
+
+
+;// Pixels
+dP_0 DN D4.U8
+dP_1 DN D5.U8
+dP_2 DN D6.U8
+dQ_0 DN D8.U8
+dQ_1 DN D9.U8
+dQ_2 DN D10.U8
+
+;// Filtering Decision
+dAlpha DN D0.U8
+dBeta DN D2.U8
+
+dFilt DN D16.U8
+dAqflg DN D12.U8
+dApflg DN D17.U8
+
+dAp0q0 DN D13.U8
+dAp1p0 DN D12.U8
+dAq1q0 DN D18.U8
+dAp2p0 DN D19.U8
+dAq2q0 DN D17.U8
+
+qBS3210 QN Q13.U16
+dBS3210 DN D26
+dMask_bs DN D27
+dFilt_bs DN D26.U16
+
+;// bSLT4
+dMask_0 DN D14.U8
+dMask_1 DN D15.U8
+dMask_4 DN D1.U16
+
+Mask_4 RN 8
+Mask_3 RN 9
+
+dTemp DN D19.U8
+
+;// Result
+dP_0t DN D13.U8
+dQ_0t DN D31.U8
+
+dP_0n DN D29.U8
+dQ_0n DN D24.U8
+
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r12, d15
+
+ ;//Arguments on the stack
+ M_ARG ppThresholds, 4
+ M_ARG ppBS, 4
+
+ ;// d0-dAlpha_0
+ ;// d2-dBeta_0
+
+ ;load alpha1,beta1 somewhere to avoid more loads
+ VLD1 {dAlpha[]}, [pAlpha]!
+ SUB pSrcDst, pSrcDst, #4
+ VLD1 {dBeta[]}, [pBeta]!
+
+ M_LDR pBS, ppBS
+ M_LDR pThresholds, ppThresholds
+
+ LDR Mask_4, =MASK_4
+ LDR Mask_3, =MASK_3
+
+ ;dMask_0-14
+ ;dMask_1-15
+ ;dMask_4-19
+
+ VMOV dMask_0, #0
+ VMOV dMask_1, #1
+ VMOV dMask_4, #4
+
+ LDR XY, =LOOP_COUNT
+
+ ;// p0-p3 - d4-d7
+ ;// q0-q3 - d8-d11
+
+
+LoopY
+ LDR bS3210, [pBS], #8
+ ADD pTmp, pSrcDst, srcdstStep
+ ADD step, srcdstStep, srcdstStep
+
+ ;1
+ VLD1 dRow0, [pSrcDst], step
+ ;1
+ VLD1 dRow1, [pTmp], step
+ VLD1 dRow2, [pSrcDst], step
+ VLD1 dRow3, [pTmp], step
+ VLD1 dRow4, [pSrcDst], step
+ VLD1 dRow5, [pTmp], step
+ VLD1 dRow6, [pSrcDst], step
+ VLD1 dRow7, [pTmp], step
+
+
+ ;// dRow0 = [q3r0 q2r0 q1r0 q0r0 p0r0 p1r0 p2r0 p3r0]
+ ;// dRow1 = [q3r1 q2r1 q1r1 q0r1 p0r1 p1r1 p2r1 p3r1]
+ ;// dRow2 = [q3r2 q2r2 q1r2 q0r2 p0r2 p1r2 p2r2 p3r2]
+ ;// dRow3 = [q3r3 q2r3 q1r3 q0r3 p0r3 p1r3 p2r3 p3r3]
+ ;// dRow4 = [q3r4 q2r4 q1r4 q0r4 p0r4 p1r4 p2r4 p3r4]
+ ;// dRow5 = [q3r5 q2r5 q1r5 q0r5 p0r5 p1r5 p2r5 p3r5]
+ ;// dRow6 = [q3r6 q2r6 q1r6 q0r6 p0r6 p1r6 p2r6 p3r6]
+ ;// dRow7 = [q3r7 q2r7 q1r7 q0r7 p0r7 p1r7 p2r7 p3r7]
+
+ ;// 8x8 Transpose
+ VZIP.8 dRow0, dRow1
+ VZIP.8 dRow2, dRow3
+ VZIP.8 dRow4, dRow5
+ VZIP.8 dRow6, dRow7
+
+ VZIP.16 dRow0, dRow2
+ VZIP.16 dRow1, dRow3
+ VZIP.16 dRow4, dRow6
+ VZIP.16 dRow5, dRow7
+
+ VZIP.32 dRow0, dRow4
+ VZIP.32 dRow2, dRow6
+ VZIP.32 dRow3, dRow7
+ VZIP.32 dRow1, dRow5
+
+
+ ;Realign the pointers
+
+ CMP bS3210, #0
+ VABD dAp2p0, dP_2, dP_0
+ VABD dAp0q0, dP_0, dQ_0
+ BEQ NoFilterBS0
+
+ VABD dAp1p0, dP_1, dP_0
+ VABD dAq1q0, dQ_1, dQ_0
+
+ VMOV.U32 dBS3210[0], bS3210
+ VCGT dFilt, dAlpha, dAp0q0
+ VMAX dAp1p0, dAq1q0, dAp1p0
+ VMOVL qBS3210, dBS3210.U8
+ VABD dAq2q0, dQ_2, dQ_0
+ VCGT dMask_bs.S16, dBS3210.S16, #0
+
+ VCGT dAp1p0, dBeta, dAp1p0
+ VCGT dAp2p0, dBeta, dAp2p0
+ VAND dFilt, dMask_bs.U8
+
+ TST bS3210, Mask_3
+
+ VCGT dAq2q0, dBeta, dAq2q0
+ VAND dFilt, dFilt, dAp1p0
+
+ VAND dAqflg, dFilt, dAq2q0
+ VAND dApflg, dFilt, dAp2p0
+
+ ;// bS < 4 Filtering
+ BLNE armVCM4P10_DeblockingChromabSLT4_unsafe
+
+ TST bS3210, Mask_4
+
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #3
+ VTST dFilt_bs, dFilt_bs, dMask_4
+
+ ;// bS == 4 Filtering
+ BLNE armVCM4P10_DeblockingChromabSGE4_unsafe
+
+ VBIT dP_0n, dP_0t, dFilt_bs
+ VBIT dQ_0n, dQ_0t, dFilt_bs
+
+ ;// Result Storage
+ ADD pSrcDst_P, pSrcDst, #3
+ VBIF dP_0n, dP_0, dFilt
+
+ ADD pTmp2, pSrcDst_P, srcdstStep
+ ADD step, srcdstStep, srcdstStep
+ VBIF dQ_0n, dQ_0, dFilt
+
+ ADDS XY, XY, XY
+
+ VST1 {dP_0n[0]}, [pSrcDst_P], step
+ VST1 {dP_0n[1]}, [pTmp2], step
+ VST1 {dP_0n[2]}, [pSrcDst_P], step
+ VST1 {dP_0n[3]}, [pTmp2], step
+ VST1 {dP_0n[4]}, [pSrcDst_P], step
+ VST1 {dP_0n[5]}, [pTmp2], step
+ VST1 {dP_0n[6]}, [pSrcDst_P], step
+ VST1 {dP_0n[7]}, [pTmp2], step
+
+ ADD pSrcDst_Q, pSrcDst, #4
+ ADD pTmp, pSrcDst_Q, srcdstStep
+
+ VST1 {dQ_0n[0]}, [pSrcDst_Q], step
+ VST1 {dQ_0n[1]}, [pTmp], step
+ VST1 {dQ_0n[2]}, [pSrcDst_Q], step
+ VST1 {dQ_0n[3]}, [pTmp], step
+ VST1 {dQ_0n[4]}, [pSrcDst_Q], step
+ VST1 {dQ_0n[5]}, [pTmp], step
+ VST1 {dQ_0n[6]}, [pSrcDst_Q], step
+ VST1 {dQ_0n[7]}, [pTmp], step
+
+ ADD pSrcDst, pSrcDst, #4
+
+ BNE LoopY
+
+ MOV r0, #OMX_Sts_NoErr
+
+ M_EXIT
+
+NoFilterBS0
+ VLD1 {dAlpha[]}, [pAlpha]
+ ADD pSrcDst, pSrcDst, #4
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #3
+ ADDS XY, XY, XY
+ VLD1 {dBeta[]}, [pBeta]
+ ADD pThresholds, pThresholds, #4
+ BNE LoopY
+
+ MOV r0, #OMX_Sts_NoErr
+
+ M_END
+
+ ENDIF
+
+
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
new file mode 100755
index 0000000..0c3f4f2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
@@ -0,0 +1,288 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+ IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe
+ IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe
+
+ IF CortexA8
+
+LOOP_COUNT EQU 0x55000000
+
+
+;// Function arguments
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlpha RN 2
+pBeta RN 3
+
+pThresholds RN 5
+pBS RN 4
+bS10 RN 12
+
+pAlpha_0 RN 2
+pBeta_0 RN 3
+
+pAlpha_1 RN 7
+pBeta_1 RN 8
+
+
+
+;// Loop
+
+XY RN 9
+
+pTmp RN 6
+step RN 10
+
+;// Pixels
+dP_0 DN D4.U8
+dP_1 DN D5.U8
+dP_2 DN D6.U8
+dP_3 DN D7.U8
+dQ_0 DN D8.U8
+dQ_1 DN D9.U8
+dQ_2 DN D10.U8
+dQ_3 DN D11.U8
+
+
+;// Filtering Decision
+dAlpha DN D0.U8
+dBeta DN D2.U8
+
+dFilt DN D16.U8
+dAqflg DN D12.U8
+dApflg DN D17.U8
+
+dAp0q0 DN D13.U8
+dAp1p0 DN D12.U8
+dAq1q0 DN D18.U8
+dAp2p0 DN D19.U8
+dAq2q0 DN D17.U8
+
+;// bSLT4
+dTC0 DN D18.U8
+dTC1 DN D19.U8
+dTC01 DN D18.U8
+
+dTCs DN D31.S8
+dTC DN D31.U8
+
+dMask_0 DN D14.U8
+dMask_1 DN D15.U8
+
+Mask_0 RN 11
+
+dTemp DN D19.U8
+
+;// Computing P0,Q0
+qDq0p0 QN Q10.S16
+qDp1q1 QN Q11.S16
+qDelta QN Q10.S16 ; reuse qDq0p0
+dDelta DN D20.S8
+
+
+;// Computing P1,Q1
+dRp0q0 DN D24.U8
+
+dMaxP DN D23.U8
+dMinP DN D22.U8
+
+dMaxQ DN D19.U8
+dMinQ DN D21.U8
+
+dDeltaP DN D26.U8
+dDeltaQ DN D27.U8
+
+qP_0n QN Q14.S16
+qQ_0n QN Q12.S16
+
+dQ_0n DN D24.U8
+dQ_1n DN D25.U8
+dP_0n DN D29.U8
+dP_1n DN D30.U8
+
+;// bSGE4
+
+qSp0q0 QN Q10.U16
+
+qSp2q1 QN Q11.U16
+qSp0q0p1 QN Q12.U16
+qSp3p2 QN Q13.U16
+dHSp0q1 DN D28.U8
+
+qSq2p1 QN Q11.U16
+qSp0q0q1 QN Q12.U16
+qSq3q2 QN Q13.U16 ;!!
+dHSq0p1 DN D28.U8 ;!!
+
+qTemp1 QN Q11.U16 ;!!;qSp2q1
+qTemp2 QN Q12.U16 ;!!;qSp0q0p1
+
+dP_0t DN D28.U8 ;!!;dHSp0q1
+dQ_0t DN D22.U8 ;!!;Temp1
+
+dP_0n DN D29.U8
+dP_1n DN D30.U8
+dP_2n DN D31.U8
+
+dQ_0n DN D24.U8 ;!!;Temp2
+dQ_1n DN D25.U8 ;!!;Temp2
+dQ_2n DN D28.U8 ;!!;dQ_0t
+
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11, d15
+
+ ;//Arguments on the stack
+ M_ARG ppThresholds, 4
+ M_ARG ppBS, 4
+
+ ;// d0-dAlpha_0
+ ;// d2-dBeta_0
+
+ ADD pAlpha_1, pAlpha_0, #1
+ ADD pBeta_1, pBeta_0, #1
+
+ VLD1 {dAlpha[]}, [pAlpha_0]
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #2
+ VLD1 {dBeta[]}, [pBeta_0]
+
+ M_LDR pBS, ppBS
+ M_LDR pThresholds, ppThresholds
+
+ MOV Mask_0,#0
+
+ ;dMask_0-14
+ ;dMask_1-15
+
+ VMOV dMask_0, #0
+ VMOV dMask_1, #1
+
+ ADD step, srcdstStep, srcdstStep
+
+ LDR XY,=LOOP_COUNT
+
+ ;// p0-p3 - d4-d7
+ ;// q0-q3 - d8-d11
+LoopY
+LoopX
+ LDRH bS10, [pBS], #2
+ ADD pTmp, pSrcDst, srcdstStep
+ CMP bS10, #0
+ BEQ NoFilterBS0
+
+ VLD1 dP_3, [pSrcDst], step
+ VLD1 dP_2, [pTmp], step
+ VLD1 dP_1, [pSrcDst], step
+ VLD1 dP_0, [pTmp], step
+ VLD1 dQ_0, [pSrcDst], step
+ VABD dAp1p0, dP_0, dP_1
+ VLD1 dQ_1, [pTmp]
+ VABD dAp0q0, dQ_0, dP_0
+ VLD1 dQ_2, [pSrcDst], srcdstStep
+
+ VABD dAq1q0, dQ_1, dQ_0
+ VABD dAp2p0, dP_2, dP_0
+ VCGT dFilt, dAlpha, dAp0q0
+
+ TST bS10, #0xff
+ VMAX dAp1p0, dAq1q0, dAp1p0
+ VABD dAq2q0, dQ_2, dQ_0
+
+ VMOVEQ.U32 dFilt[0], Mask_0
+ TST bS10, #0xff00
+
+ VCGT dAp2p0, dBeta, dAp2p0
+ VCGT dAp1p0, dBeta, dAp1p0
+
+ VMOVEQ.U32 dFilt[1], Mask_0
+
+ VCGT dAq2q0, dBeta, dAq2q0
+ VLD1 dQ_3, [pSrcDst]
+ VAND dFilt, dFilt, dAp1p0
+ TST bS10, #4
+
+ VAND dAqflg, dFilt, dAq2q0
+ VAND dApflg, dFilt, dAp2p0
+
+ BNE bSGE4
+bSLT4
+ ;// bS < 4 Filtering
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #2
+ SUB pSrcDst, pSrcDst, srcdstStep
+
+ BL armVCM4P10_DeblockingLumabSLT4_unsafe
+
+ ;// Result Storage
+ VST1 dP_1n, [pSrcDst], srcdstStep
+ VST1 dP_0n, [pSrcDst], srcdstStep
+ SUB pTmp, pSrcDst, srcdstStep, LSL #2
+ VST1 dQ_0n, [pSrcDst], srcdstStep
+ ADDS XY, XY, XY
+ VST1 dQ_1n, [pSrcDst]
+ ADD pSrcDst, pTmp, #8
+
+ BCC LoopX
+ B ExitLoopY
+
+NoFilterBS0
+ ADD pSrcDst, pSrcDst, #8
+ ADDS XY, XY, XY
+ ADD pThresholds, pThresholds, #2
+ BCC LoopX
+ B ExitLoopY
+bSGE4
+ ;// bS >= 4 Filtering
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #2
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #1
+ BL armVCM4P10_DeblockingLumabSGE4_unsafe
+
+ ;// Result Storage
+ VST1 dP_2n, [pSrcDst], srcdstStep
+ VST1 dP_1n, [pSrcDst], srcdstStep
+ VST1 dP_0n, [pSrcDst], srcdstStep
+ SUB pTmp, pSrcDst, srcdstStep, LSL #2
+ VST1 dQ_0n, [pSrcDst], srcdstStep
+ ADDS XY,XY,XY
+ VST1 dQ_1n, [pSrcDst], srcdstStep
+ ADD pThresholds, pThresholds, #2
+ VST1 dQ_2n, [pSrcDst]
+
+ ADD pSrcDst, pTmp, #8
+ BCC LoopX
+
+ExitLoopY
+
+ SUB pSrcDst, pSrcDst, #16
+ VLD1 {dAlpha[]}, [pAlpha_1]
+ ADD pSrcDst, pSrcDst, srcdstStep, LSL #2
+ VLD1 {dBeta[]}, [pBeta_1]
+ BNE LoopY
+
+ MOV r0, #OMX_Sts_NoErr
+
+ M_END
+
+ ENDIF
+
+
+
+
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
new file mode 100755
index 0000000..e6fbb34
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
@@ -0,0 +1,436 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+ IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe
+ IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe
+
+ IF CortexA8
+
+LOOP_COUNT EQU 0x11000000
+
+
+;// Function arguments
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlpha RN 2
+pBeta RN 3
+
+pThresholds RN 5
+pBS RN 4
+bS10 RN 12
+
+pAlpha_0 RN 2
+pBeta_0 RN 3
+
+pAlpha_1 RN 7
+pBeta_1 RN 8
+
+pTmp RN 10
+pTmpStep RN 11
+
+;// Loop
+
+XY RN 9
+
+;// Rows input
+dRow0 DN D7.U8
+dRow1 DN D8.U8
+dRow2 DN D5.U8
+dRow3 DN D10.U8
+dRow4 DN D6.U8
+dRow5 DN D9.U8
+dRow6 DN D4.U8
+dRow7 DN D11.U8
+
+;// dRow0 - dP_3, dRow1 - dQ_0, dRow2 - dP_1, dRow3 - dQ_2
+;// dRow4 - dP_2, dRow5 - dQ_1, dRow6 - dP_0, dRow7 - dQ_3
+
+;// Rows output
+dRown0 DN D7.U8
+dRown1 DN D24.U8
+dRown2 DN D30.U8
+dRown3 DN D10.U8
+dRown4 DN D6.U8
+dRown5 DN D25.U8
+dRown6 DN D29.U8
+dRown7 DN D11.U8
+
+;// dP_0n DN D29.U8
+;// dP_1n DN D30.U8
+;// dP_2n DN D31.U8
+;//
+;// dQ_0n DN D24.U8 ;!!;Temp2
+;// dQ_1n DN D25.U8 ;!!;Temp2
+;// dQ_2n DN D28.U8 ;!!;dQ_0t
+;//
+;// dRown0 - dP_3, dRown1 - dQ_0n
+;// dRown2 - dP_1n, dRown3 - dQ_2
+;// dRown4 - dP_2, dRown5 - dQ_1n
+;// dRown6 - dP_0n, dRown7 - dQ_3
+
+dRow0n DN D7.U8
+dRow1n DN D24.U8
+dRow2n DN D30.U8
+dRow3n DN D28.U8
+dRow4n DN D31.U8
+dRow5n DN D25.U8
+dRow6n DN D29.U8
+dRow7n DN D11.U8
+
+;// dRow0n - dP_3, dRow1n - dQ_0n, dRow2n - dP_1n, dRow3n - dQ_2n
+;// dRow4n - dP_2, dRow5n - dQ_1n, dRow6n - dP_0n, dRow7n - dQ_3
+
+;// Pixels
+dP_0 DN D4.U8
+dP_1 DN D5.U8
+dP_2 DN D6.U8
+dP_3 DN D7.U8
+dQ_0 DN D8.U8
+dQ_1 DN D9.U8
+dQ_2 DN D10.U8
+dQ_3 DN D11.U8
+
+
+;// Filtering Decision
+dAlpha DN D0.U8
+dBeta DN D2.U8
+
+dFilt DN D16.U8
+dAqflg DN D12.U8
+dApflg DN D17.U8
+
+dAp0q0 DN D13.U8
+dAp1p0 DN D12.U8
+dAq1q0 DN D18.U8
+dAp2p0 DN D19.U8
+dAq2q0 DN D17.U8
+
+;// bSLT4
+dTC0 DN D18.U8
+dTC1 DN D19.U8
+dTC01 DN D18.U8
+
+dTCs DN D31.S8
+dTC DN D31.U8
+
+dMask_0 DN D14.U8
+dMask_1 DN D15.U8
+
+Mask_0 RN 6
+
+dTemp DN D19.U8
+
+;// Computing P0,Q0
+qDq0p0 QN Q10.S16
+qDp1q1 QN Q11.S16
+qDelta QN Q10.S16 ; reuse qDq0p0
+dDelta DN D20.S8
+
+
+;// Computing P1,Q1
+dRp0q0 DN D24.U8
+
+dMaxP DN D23.U8
+dMinP DN D22.U8
+
+dMaxQ DN D19.U8
+dMinQ DN D21.U8
+
+dDeltaP DN D26.U8
+dDeltaQ DN D27.U8
+
+qP_0n QN Q14.S16
+qQ_0n QN Q12.S16
+
+dQ_0n DN D24.U8
+dQ_1n DN D25.U8
+dP_0n DN D29.U8
+dP_1n DN D30.U8
+
+;// bSGE4
+
+qSp0q0 QN Q10.U16
+
+qSp2q1 QN Q11.U16
+qSp0q0p1 QN Q12.U16
+qSp3p2 QN Q13.U16
+dHSp0q1 DN D28.U8
+
+qSq2p1 QN Q11.U16
+qSp0q0q1 QN Q12.U16
+qSq3q2 QN Q13.U16 ;!!
+dHSq0p1 DN D28.U8 ;!!
+
+qTemp1 QN Q11.U16 ;!!;qSp2q1
+qTemp2 QN Q12.U16 ;!!;qSp0q0p1
+
+dP_0t DN D28.U8 ;!!;dHSp0q1
+dQ_0t DN D22.U8 ;!!;Temp1
+
+dP_0n DN D29.U8
+dP_1n DN D30.U8
+dP_2n DN D31.U8
+
+dQ_0n DN D24.U8 ;!!;Temp2
+dQ_1n DN D25.U8 ;!!;Temp2
+dQ_2n DN D28.U8 ;!!;dQ_0t
+
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingLuma_VerEdge_I, r11, d15
+
+ ;//Arguments on the stack
+ M_ARG ppThresholds, 4
+ M_ARG ppBS, 4
+
+ ;// d0-dAlpha_0
+ ;// d2-dBeta_0
+
+ ADD pAlpha_1, pAlpha_0, #1
+ ADD pBeta_1, pBeta_0, #1
+
+ VLD1 {dAlpha[]}, [pAlpha_0]
+ SUB pSrcDst, pSrcDst, #4
+ VLD1 {dBeta[]}, [pBeta_0]
+
+ M_LDR pBS, ppBS
+ M_LDR pThresholds, ppThresholds
+
+ MOV Mask_0,#0
+
+ ;dMask_0-14
+ ;dMask_1-15
+
+ VMOV dMask_0, #0
+ VMOV dMask_1, #1
+
+ LDR XY,=LOOP_COUNT
+
+ ADD pTmpStep, srcdstStep, srcdstStep
+
+ ;// p0-p3 - d4-d7
+ ;// q0-q3 - d8-d11
+LoopY
+LoopX
+ LDRH bS10, [pBS], #4
+
+ CMP bS10, #0
+ BEQ NoFilterBS0
+
+ ;// Load 8 rows of data
+ ADD pTmp, pSrcDst, srcdstStep
+ VLD1 dRow0, [pSrcDst], pTmpStep
+ VLD1 dRow1, [pTmp], pTmpStep
+ VLD1 dRow2, [pSrcDst], pTmpStep
+ VZIP.8 dRow0, dRow1
+ VLD1 dRow3, [pTmp], pTmpStep
+ VLD1 dRow4, [pSrcDst], pTmpStep
+ VZIP.8 dRow2, dRow3
+ VLD1 dRow5, [pTmp], pTmpStep
+ VLD1 dRow6, [pSrcDst], pTmpStep
+ VLD1 dRow7, [pTmp], pTmpStep
+ VZIP.8 dRow4, dRow5
+ VZIP.16 dRow1, dRow3
+
+
+ ;// dRow0 = [q3r0 q2r0 q1r0 q0r0 p0r0 p1r0 p2r0 p3r0]
+ ;// dRow1 = [q3r1 q2r1 q1r1 q0r1 p0r1 p1r1 p2r1 p3r1]
+ ;// dRow2 = [q3r2 q2r2 q1r2 q0r2 p0r2 p1r2 p2r2 p3r2]
+ ;// dRow3 = [q3r3 q2r3 q1r3 q0r3 p0r3 p1r3 p2r3 p3r3]
+ ;// dRow4 = [q3r4 q2r4 q1r4 q0r4 p0r4 p1r4 p2r4 p3r4]
+ ;// dRow5 = [q3r5 q2r5 q1r5 q0r5 p0r5 p1r5 p2r5 p3r5]
+ ;// dRow6 = [q3r6 q2r6 q1r6 q0r6 p0r6 p1r6 p2r6 p3r6]
+ ;// dRow7 = [q3r7 q2r7 q1r7 q0r7 p0r7 p1r7 p2r7 p3r7]
+
+ ;// 8x8 Transpose
+
+ VZIP.8 dRow6, dRow7
+
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #3
+ VZIP.16 dRow0, dRow2
+ VZIP.16 dRow5, dRow7
+
+
+ VZIP.16 dRow4, dRow6
+ VZIP.32 dRow1, dRow5
+ VZIP.32 dRow2, dRow6
+ VZIP.32 dRow3, dRow7
+ VZIP.32 dRow0, dRow4
+
+
+ ;// dRow0 - dP_3, dRow1 - dQ_0, dRow2 - dP_1, dRow3 - dQ_2
+ ;// dRow4 - dP_2, dRow5 - dQ_1, dRow6 - dP_0, dRow7 - dQ_3
+
+ ;// dQ_0 = [q0r7 q0r6 q0r5 q0r4 q0r3 q0r2 q0r1 q0r0]
+ ;// dQ_1 = [q1r7 q1r6 q1r5 q1r4 q1r3 q1r2 q1r1 q1r0]
+ ;// dQ_2 = [q2r7 q2r6 q2r5 q2r4 q2r3 q2r2 q2r1 q2r0]
+ ;// dQ_3 = [q3r7 q3r6 q3r5 q3r4 q3r3 q3r2 q3r1 q3r0]
+
+ ;// dP_0 = [p0r7 p0r6 p0r5 p0r4 p0r3 p0r2 p0r1 p0r0]
+ ;// dP_1 = [p1r7 p1r6 p1r5 p1r4 p1r3 p1r2 p1r1 p1r0]
+ ;// dP_2 = [p2r7 p2r6 p2r5 p2r4 p2r3 p2r2 p2r1 p2r0]
+ ;// dP_3 = [p3r7 p3r6 p3r5 p3r4 p3r3 p3r2 p3r1 p3r0]
+
+ VABD dAp0q0, dP_0, dQ_0
+ VABD dAp1p0, dP_1, dP_0
+
+ VABD dAq1q0, dQ_1, dQ_0
+ VABD dAp2p0, dP_2, dP_0
+
+ TST bS10, #0xff
+ VCGT dFilt, dAlpha, dAp0q0
+
+ VMAX dAp1p0, dAq1q0, dAp1p0
+ VABD dAq2q0, dQ_2, dQ_0
+
+ VMOVEQ.U32 dFilt[0], Mask_0
+ TST bS10, #0xff00
+
+ VCGT dAp2p0, dBeta, dAp2p0
+ VCGT dAp1p0, dBeta, dAp1p0
+
+ VMOVEQ.U32 dFilt[1], Mask_0
+
+ VCGT dAq2q0, dBeta, dAq2q0
+ VAND dFilt, dFilt, dAp1p0
+ TST bS10, #4
+
+ VAND dAqflg, dFilt, dAq2q0
+ VAND dApflg, dFilt, dAp2p0
+
+ BNE bSGE4
+bSLT4
+ ;// bS < 4 Filtering
+
+ BL armVCM4P10_DeblockingLumabSLT4_unsafe
+
+ ;// Transpose
+
+ VZIP.8 dP_3, dP_2
+ VZIP.8 dP_1n, dP_0n
+ VZIP.8 dQ_0n, dQ_1n
+ VZIP.8 dQ_2, dQ_3
+
+
+ VZIP.16 dP_3, dP_1n
+ ADD pTmp, pSrcDst, srcdstStep
+ VZIP.16 dQ_0n, dQ_2
+ VZIP.16 dQ_1n, dQ_3
+ VZIP.16 dP_2, dP_0n
+
+ VZIP.32 dP_3, dQ_0n
+ VZIP.32 dP_1n, dQ_2
+ VZIP.32 dP_2, dQ_1n
+ VZIP.32 dP_0n, dQ_3
+
+ ;// dRown0 - dP_3, dRown1 - dQ_0n
+ ;// dRown2 - dP_1n, dRown3 - dQ_2
+ ;// dRown4 - dP_2, dRown5 - dQ_1n
+ ;// dRown6 - dP_0n, dRown7 - dQ_3
+
+ VST1 dRown0, [pSrcDst], pTmpStep
+ VST1 dRown1, [pTmp], pTmpStep
+ VST1 dRown2, [pSrcDst], pTmpStep
+ VST1 dRown3, [pTmp], pTmpStep
+ ;1
+ VST1 dRown4, [pSrcDst], pTmpStep
+ VST1 dRown5, [pTmp], pTmpStep
+ ADDS XY, XY, XY
+ VST1 dRown6, [pSrcDst], pTmpStep
+ ADD pThresholds, pThresholds, #2
+ VST1 dRown7, [pTmp], srcdstStep
+
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #3
+ VLD1 {dAlpha[]}, [pAlpha_1]
+ ADD pSrcDst, pSrcDst, #4
+ VLD1 {dBeta[]}, [pBeta_1]
+
+ BCC LoopX
+ B ExitLoopY
+
+NoFilterBS0
+ ADD pSrcDst, pSrcDst, #4
+ ADDS XY, XY, XY
+ VLD1 {dAlpha[]}, [pAlpha_1]
+ ADD pThresholds, pThresholds, #4
+ VLD1 {dBeta[]}, [pBeta_1]
+ BCC LoopX
+ B ExitLoopY
+bSGE4
+ ;// bS >= 4 Filtering
+
+ BL armVCM4P10_DeblockingLumabSGE4_unsafe
+
+ ;// Transpose
+
+ VZIP.8 dP_3, dP_2n
+ VZIP.8 dP_1n, dP_0n
+ VZIP.8 dQ_0n, dQ_1n
+ VZIP.8 dQ_2n, dQ_3
+
+ VZIP.16 dP_3, dP_1n
+ ADD pTmp, pSrcDst, srcdstStep
+ VZIP.16 dQ_0n, dQ_2n
+ VZIP.16 dQ_1n, dQ_3
+ VZIP.16 dP_2n, dP_0n
+
+ VZIP.32 dP_3, dQ_0n
+ VZIP.32 dP_1n, dQ_2n
+ VZIP.32 dP_2n, dQ_1n
+ VZIP.32 dP_0n, dQ_3
+
+ ;// dRow0n - dP_3, dRow1n - dQ_0n, dRow2n - dP_1n, dRow3n - dQ_2n
+ ;// dRow4n - dP_2, dRow5n - dQ_1n, dRow6n - dP_0n, dRow7n - dQ_3
+
+ VST1 dRow0n, [pSrcDst], pTmpStep
+ VST1 dRow1n, [pTmp], pTmpStep
+ VST1 dRow2n, [pSrcDst], pTmpStep
+ VST1 dRow3n, [pTmp], pTmpStep
+ VST1 dRow4n, [pSrcDst], pTmpStep
+ VST1 dRow5n, [pTmp], pTmpStep
+ ADDS XY,XY,XY
+ VST1 dRow6n, [pSrcDst], pTmpStep
+ ADD pThresholds, pThresholds, #4
+ VST1 dRow7n, [pTmp], pTmpStep
+
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #3
+ VLD1 {dAlpha[]}, [pAlpha_1]
+ ADD pSrcDst, pSrcDst, #4
+ VLD1 {dBeta[]}, [pBeta_1]
+
+ BCC LoopX
+
+ExitLoopY
+ SUB pBS, pBS, #14
+ SUB pThresholds, pThresholds, #14
+ SUB pSrcDst, pSrcDst, #16
+ VLD1 {dAlpha[]}, [pAlpha_0]
+ ADD pSrcDst, pSrcDst, srcdstStep, LSL #3
+ VLD1 {dBeta[]}, [pBeta_0]
+ BNE LoopY
+
+ MOV r0, #OMX_Sts_NoErr
+
+ M_END
+
+ ENDIF
+
+
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
new file mode 100755
index 0000000..3ce41be
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
@@ -0,0 +1,79 @@
+/**
+ *
+ * File Name: omxVCM4P10_InterpolateChroma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate 1/8 Pixel interpolation for Chroma Block
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P10_InterpolateChroma,
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc Pointer to the source reference frame buffer
+ * [in] srcStep Reference frame step in byte
+ * [in] dstStep Destination frame step in byte. Must be multiple of roi.width.
+ * [in] dx Fractional part of horizontal motion vector component
+ * in 1/8 pixel unit;valid in the range [0,7]
+ * [in] dy Fractional part of vertical motion vector component
+ * in 1/8 pixel unit;valid in the range [0,7]
+ * [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must
+ * be equal to either 2, 4, or 8.
+ * [out] pDst Pointer to the destination frame buffer.
+ * if roi.width==2, 2-byte alignment required
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < 8.
+ * dx or dy is out of range [0-7].
+ * roi.width or roi.height is out of range {2,4,8}.
+ * roi.width is equal to 2, but pDst is not 2-byte aligned.
+ * roi.width is equal to 4, but pDst is not 4-byte aligned.
+ * roi.width is equal to 8, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateChroma (
+ const OMX_U8* pSrc,
+ OMX_S32 srcStep,
+ OMX_U8* pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+ )
+{
+ return armVCM4P10_Interpolate_Chroma
+ ((OMX_U8*)pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy);
+}
+
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
new file mode 100755
index 0000000..942ebc6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
@@ -0,0 +1,553 @@
+;//
+;//
+;// File Name: omxVCM4P10_InterpolateLuma_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P10_InterpolateLuma
+;//
+;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
+;// Performs quarter pel interpolation of inter luma MB.
+;// It's assumed that the frame is already padded when calling this function.
+;// Parameters:
+;// [in] pSrc Pointer to the source reference frame buffer
+;// [in] srcStep Reference frame step in byte
+;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width
+;// [in] dx Fractional part of horizontal motion vector
+;// component in 1/4 pixel unit; valid in the range [0,3]
+;// [in] dy Fractional part of vertical motion vector
+;// component in 1/4 pixel unit; valid in the range [0,3]
+;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must
+;// be equal to either 4, 8, or 16.
+;// [out] pDst Pointer to the destination frame buffer.
+;// if roi.width==4, 4-byte alignment required
+;// if roi.width==8, 8-byte alignment required
+;// if roi.width==16, 16-byte alignment required
+;//
+;// Return Value:
+;// If the function runs without error, it returns OMX_Sts_NoErr.
+;// It is assued that following cases are satisfied before calling this function:
+;// pSrc or pDst is not NULL.
+;// srcStep or dstStep >= roi.width.
+;// dx or dy is in the range [0-3].
+;// roi.width or roi.height is not out of range {4, 8, 16}.
+;// If roi.width is equal to 4, Dst is 4 byte aligned.
+;// If roi.width is equal to 8, pDst is 8 byte aligned.
+;// If roi.width is equal to 16, pDst is 16 byte aligned.
+;// srcStep and dstStep is multiple of 8.
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+ EXPORT omxVCM4P10_InterpolateLuma
+
+
+ IF CortexA8
+ IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ ENDIF
+
+
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+iHeight RN 4
+iWidth RN 5
+
+;// Declare other intermediate registers
+idx RN 6
+idy RN 7
+index RN 6
+Temp RN 12
+pArgs RN 11
+
+
+ IF CortexA8
+
+ ;//
+ ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
+ ;//
+ M_ALLOC4 ppArgs, 16
+
+ ;// Function header
+ M_START omxVCM4P10_InterpolateLuma, r11, d15
+
+pSrcBK RN 8
+
+;// Declare Neon registers
+dCoeff5 DN 30.S16
+dCoeff20 DN 31.S16
+
+;// Registers used for implementing Horizontal interpolation
+dSrc0c DN 14.U8
+dSrc1c DN 16.U8
+dSrc2c DN 18.U8
+dSrc3c DN 20.U8
+dSrc0d DN 15.U8
+dSrc1d DN 17.U8
+dSrc2d DN 19.U8
+dSrc3d DN 21.U8
+dAccH0 DN 22.U8
+dAccH1 DN 24.U8
+dAccH2 DN 26.U8
+dAccH3 DN 28.U8
+dResultH0 DN 22.U32
+dResultH1 DN 24.U32
+dResultH2 DN 26.U32
+dResultH3 DN 28.U32
+
+;// Registers used for implementing Vertical interpolation
+dSrc0 DN 9.U8
+dSrc1 DN 10.U8
+dSrc2 DN 11.U8
+dSrc3 DN 12.U8
+dSrc4 DN 13.U8
+dAccV0 DN 0.U8
+dAccV1 DN 2.U8
+dAccV2 DN 4.U8
+dAccV3 DN 6.U8
+dResultV0 DN 0.U32
+dResultV1 DN 2.U32
+dResultV2 DN 4.U32
+dResultV3 DN 6.U32
+
+;// Registers used for implementing Diagonal interpolation
+dTAcc0 DN 0.U8
+dTAcc1 DN 2.U8
+dTAcc2 DN 4.U8
+dTAcc3 DN 6.U8
+dTRes0 DN 0.32
+dTRes1 DN 2.32
+dTRes2 DN 4.32
+dTRes3 DN 6.32
+dTResult0 DN 14.U8
+dTResult1 DN 16.U8
+dTResult2 DN 18.U8
+dTResult3 DN 20.U8
+dTempP0 DN 18.S16
+dTempP1 DN 19.S16
+dTempQ0 DN 20.S16
+dTempQ1 DN 21.S16
+dTempR0 DN 22.S16
+dTempR1 DN 23.S16
+dTempS0 DN 24.S16
+dTempS1 DN 25.S16
+qTempP01 QN 9.S16
+qTempQ01 QN 10.S16
+qTempR01 QN 11.S16
+qTempS01 QN 12.S16
+
+;// Intermediate values for averaging
+qRes2 QN 7.S16
+qRes3 QN 8.S16
+qRes4 QN 9.S16
+qRes5 QN 10.S16
+qRes6 QN 11.S16
+
+;// For implementing copy
+dDst0 DN 9.32
+dDst1 DN 10.32
+dDst2 DN 11.32
+dDst3 DN 12.32
+
+ ;// Define stack arguments
+ M_ARG ptridx, 4
+ M_ARG ptridy, 4
+ M_ARG ptrWidth, 4
+ M_ARG ptrHeight, 4
+
+ ;// Load structure elements of roi
+ M_LDR idx, ptridx
+ M_LDR idy, ptridy
+ M_LDR iWidth, ptrWidth
+ M_LDR iHeight, ptrHeight
+
+ ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx]
+ M_ADR pArgs, ppArgs
+
+ ;// Move coefficients Neon registers
+ VMOV dCoeff20, #20
+ VMOV dCoeff5, #5
+
+Block4x4WidthLoop
+Block4x4HeightLoop
+
+ STM pArgs, {pSrc,srcStep,pDst,dstStep}
+
+ ;// switch table using motion vector as index
+ ADD pc, pc, index, LSL #2
+ B Case_f
+ B Case_0
+ B Case_1
+ B Case_2
+ B Case_3
+ B Case_4
+ B Case_5
+ B Case_6
+ B Case_7
+ B Case_8
+ B Case_9
+ B Case_a
+ B Case_b
+ B Case_c
+ B Case_d
+ B Case_e
+ B Case_f
+
+Case_0
+ ;// Case G
+ M_PRINTF "Case 0 \n"
+
+ ;// Loads a 4x4 block of .8 and stores as .32
+ ADD Temp, pSrc, srcStep, LSL #1
+ VLD1 dSrc0, [pSrc], srcStep
+ VLD1 dSrc2, [Temp], srcStep
+ VLD1 dSrc1, [pSrc]
+ VLD1 dSrc3, [Temp]
+
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dDst0[0], [pDst], dstStep
+ VST1 dDst2[0], [Temp], dstStep
+ VST1 dDst1[0], [pDst]
+ VST1 dDst3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_1
+ ;// Case a
+ M_PRINTF "Case 1 \n"
+
+ SUB pSrc, pSrc, #2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD dAccH0, dAccH0, dSrc0c
+ VRHADD dAccH2, dAccH2, dSrc2c
+ VRHADD dAccH1, dAccH1, dSrc1c
+ VRHADD dAccH3, dAccH3, dSrc3c
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dResultH0[0], [pDst], dstStep
+ VST1 dResultH2[0], [Temp], dstStep
+ VST1 dResultH1[0], [pDst]
+ VST1 dResultH3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_2
+ ;// Case b
+ M_PRINTF "Case 2 \n"
+
+ SUB pSrc, pSrc, #2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dResultH0[0], [pDst], dstStep
+ VST1 dResultH2[0], [Temp], dstStep
+ VST1 dResultH1[0], [pDst]
+ VST1 dResultH3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_3
+ ;// Case c
+ M_PRINTF "Case 3 \n"
+
+ SUB pSrc, pSrc, #2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD dAccH0, dAccH0, dSrc0d
+ VRHADD dAccH2, dAccH2, dSrc2d
+ VRHADD dAccH1, dAccH1, dSrc1d
+ VRHADD dAccH3, dAccH3, dSrc3d
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dResultH0[0], [pDst], dstStep
+ VST1 dResultH2[0], [Temp], dstStep
+ VST1 dResultH1[0], [pDst]
+ VST1 dResultH3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_4
+ ;// Case d
+ M_PRINTF "Case 4 \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ VRHADD dAccV0, dAccV0, dSrc0
+ VRHADD dAccV2, dAccV2, dSrc2
+ VRHADD dAccV1, dAccV1, dSrc1
+ VRHADD dAccV3, dAccV3, dSrc3
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dResultV0[0], [pDst], dstStep
+ VST1 dResultV2[0], [Temp], dstStep
+ VST1 dResultV1[0], [pDst]
+ VST1 dResultV3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_5
+ ;// Case e
+ M_PRINTF "Case 5 \n"
+
+ MOV pSrcBK, pSrc
+ SUB pSrc, pSrc, srcStep, LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ SUB pSrc, pSrcBK, #2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD dAccH0, dAccH0, dAccV0
+ VRHADD dAccH2, dAccH2, dAccV2
+ VRHADD dAccH1, dAccH1, dAccV1
+ VRHADD dAccH3, dAccH3, dAccV3
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dResultH0[0], [pDst], dstStep
+ VST1 dResultH2[0], [Temp], dstStep
+ VST1 dResultH1[0], [pDst]
+ VST1 dResultH3[0], [Temp]
+
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_6
+ ;// Case f
+ M_PRINTF "Case 6 \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ SUB pSrc, pSrc, #2
+ BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ VQRSHRUN dTResult0, qRes2, #5
+ VQRSHRUN dTResult1, qRes3, #5
+ VQRSHRUN dTResult2, qRes4, #5
+ VQRSHRUN dTResult3, qRes5, #5
+ VRHADD dTAcc0, dTAcc0, dTResult0
+ VRHADD dTAcc2, dTAcc2, dTResult2
+ VRHADD dTAcc1, dTAcc1, dTResult1
+ VRHADD dTAcc3, dTAcc3, dTResult3
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dTRes0[0], [pDst], dstStep
+ VST1 dTRes2[0], [Temp], dstStep
+ VST1 dTRes1[0], [pDst]
+ VST1 dTRes3[0], [Temp]
+
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_7
+ ;// Case g
+ M_PRINTF "Case 7 \n"
+ MOV pSrcBK, pSrc
+ ADD pSrc, pSrc, #1
+ SUB pSrc, pSrc, srcStep, LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ SUB pSrc, pSrcBK, #2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD dAccH0, dAccH0, dAccV0
+ VRHADD dAccH2, dAccH2, dAccV2
+ VRHADD dAccH1, dAccH1, dAccV1
+ VRHADD dAccH3, dAccH3, dAccV3
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dResultH0[0], [pDst], dstStep
+ VST1 dResultH2[0], [Temp], dstStep
+ VST1 dResultH1[0], [pDst]
+ VST1 dResultH3[0], [Temp]
+
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_8
+ ;// Case h
+ M_PRINTF "Case 8 \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dResultV0[0], [pDst], dstStep
+ VST1 dResultV2[0], [Temp], dstStep
+ VST1 dResultV1[0], [pDst]
+ VST1 dResultV3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_9
+ ;// Case i
+ M_PRINTF "Case 9 \n"
+ SUB pSrc, pSrc, srcStep, LSL #1
+ SUB pSrc, pSrc, #2
+ BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ VEXT dTempP0, dTempP0, dTempP1, #2
+ VEXT dTempQ0, dTempQ0, dTempQ1, #2
+ VEXT dTempR0, dTempR0, dTempR1, #2
+ VEXT dTempS0, dTempS0, dTempS1, #2
+
+ VQRSHRUN dTResult0, qTempP01, #5
+ VQRSHRUN dTResult1, qTempQ01, #5
+ VQRSHRUN dTResult2, qTempR01, #5
+ VQRSHRUN dTResult3, qTempS01, #5
+
+ VRHADD dTAcc0, dTAcc0, dTResult0
+ VRHADD dTAcc2, dTAcc2, dTResult2
+ VRHADD dTAcc1, dTAcc1, dTResult1
+ VRHADD dTAcc3, dTAcc3, dTResult3
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dTRes0[0], [pDst], dstStep
+ VST1 dTRes2[0], [Temp], dstStep
+ VST1 dTRes1[0], [pDst]
+ VST1 dTRes3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_a
+ ;// Case j
+ M_PRINTF "Case a \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ SUB pSrc, pSrc, #2
+ BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dTRes0[0], [pDst], dstStep
+ VST1 dTRes2[0], [Temp], dstStep
+ VST1 dTRes1[0], [pDst]
+ VST1 dTRes3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_b
+ ;// Case k
+ M_PRINTF "Case b \n"
+ SUB pSrc, pSrc, srcStep, LSL #1
+ SUB pSrc, pSrc, #2
+ BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ VEXT dTempP0, dTempP0, dTempP1, #3
+ VEXT dTempQ0, dTempQ0, dTempQ1, #3
+ VEXT dTempR0, dTempR0, dTempR1, #3
+ VEXT dTempS0, dTempS0, dTempS1, #3
+
+ VQRSHRUN dTResult0, qTempP01, #5
+ VQRSHRUN dTResult1, qTempQ01, #5
+ VQRSHRUN dTResult2, qTempR01, #5
+ VQRSHRUN dTResult3, qTempS01, #5
+
+ VRHADD dTAcc0, dTAcc0, dTResult0
+ VRHADD dTAcc2, dTAcc2, dTResult2
+ VRHADD dTAcc1, dTAcc1, dTResult1
+ VRHADD dTAcc3, dTAcc3, dTResult3
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dTRes0[0], [pDst], dstStep
+ VST1 dTRes2[0], [Temp], dstStep
+ VST1 dTRes1[0], [pDst]
+ VST1 dTRes3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_c
+ ;// Case n
+ M_PRINTF "Case c \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ VRHADD dAccV0, dAccV0, dSrc1
+ VRHADD dAccV2, dAccV2, dSrc3
+ VRHADD dAccV1, dAccV1, dSrc2
+ VRHADD dAccV3, dAccV3, dSrc4
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dResultV0[0], [pDst], dstStep
+ VST1 dResultV2[0], [Temp], dstStep
+ VST1 dResultV1[0], [pDst]
+ VST1 dResultV3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_d
+ ;// Case p
+ M_PRINTF "Case d \n"
+
+ MOV pSrcBK, pSrc
+ SUB pSrc, pSrc, srcStep, LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ ADD pSrc, pSrcBK, srcStep
+ SUB pSrc, pSrc, #2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD dAccH0, dAccH0, dAccV0
+ VRHADD dAccH2, dAccH2, dAccV2
+ VRHADD dAccH1, dAccH1, dAccV1
+ VRHADD dAccH3, dAccH3, dAccV3
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dResultH0[0], [pDst], dstStep
+ VST1 dResultH2[0], [Temp], dstStep
+ VST1 dResultH1[0], [pDst]
+ VST1 dResultH3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_e
+ ;// Case q
+ M_PRINTF "Case e \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ SUB pSrc, pSrc, #2
+ BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ VQRSHRUN dTResult0, qRes3, #5
+ VQRSHRUN dTResult1, qRes4, #5
+ VQRSHRUN dTResult2, qRes5, #5
+ VQRSHRUN dTResult3, qRes6, #5
+
+ VRHADD dTAcc0, dTAcc0, dTResult0
+ VRHADD dTAcc2, dTAcc2, dTResult2
+ VRHADD dTAcc1, dTAcc1, dTResult1
+ VRHADD dTAcc3, dTAcc3, dTResult3
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dTRes0[0], [pDst], dstStep
+ VST1 dTRes2[0], [Temp], dstStep
+ VST1 dTRes1[0], [pDst]
+ VST1 dTRes3[0], [Temp]
+ M_ADR pArgs, ppArgs
+ B Block4x4LoopEnd
+Case_f
+ ;// Case r
+ M_PRINTF "Case f \n"
+ MOV pSrcBK, pSrc
+ ADD pSrc, pSrc, #1
+ SUB pSrc, pSrc, srcStep, LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ ADD pSrc, pSrcBK, srcStep
+ SUB pSrc, pSrc, #2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD dAccH0, dAccH0, dAccV0
+ VRHADD dAccH2, dAccH2, dAccV2
+ VRHADD dAccH1, dAccH1, dAccV1
+ VRHADD dAccH3, dAccH3, dAccV3
+ ADD Temp, pDst, dstStep, LSL #1
+ VST1 dResultH0[0], [pDst], dstStep
+ VST1 dResultH2[0], [Temp], dstStep
+ VST1 dResultH1[0], [pDst]
+ VST1 dResultH3[0], [Temp]
+ M_ADR pArgs, ppArgs
+
+
+Block4x4LoopEnd
+
+ ;// Width Loop
+ ;//M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments
+ SUBS iWidth, iWidth, #4
+ ADD pSrc, pSrc, #4
+ ADD pDst, pDst, #4
+ BGT Block4x4WidthLoop
+
+ ;// Height Loop
+ SUBS iHeight, iHeight, #4
+ M_LDR iWidth, ptrWidth
+ M_ADR pArgs, ppArgs
+ ADD pSrc, pSrc, srcStep, LSL #2
+ ADD pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, iWidth
+ SUB pDst, pDst, iWidth
+ BGT Block4x4HeightLoop
+
+EndOfInterpolation
+ MOV r0, #0
+ M_END
+
+ ENDIF
+ ;// End of CortexA8
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
new file mode 100755
index 0000000..3a60705
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
@@ -0,0 +1,436 @@
+;//
+;//
+;// File Name: omxVCM4P10_PredictIntraChroma_8x8_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_pIndexTable8x8
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS CortexA8
+
+ AREA table, DATA
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_pIndexTable8x8
+ DCD OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR
+ DCD OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE
+
+ M_TABLE armVCM4P10_MultiplierTableChroma8x8,1
+ DCW 3, 2, 1,4
+ DCW -3,-2,-1,0
+ DCW 1, 2, 3,4
+
+
+
+ IF CortexA8
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+
+pc RN 15
+return RN 0
+pTable RN 8
+
+;//--------------------------------------------
+;// Input Arguments
+;//--------------------------------------------
+pSrcLeft RN 0 ;// input pointer
+pSrcAbove RN 1 ;// input pointer
+pSrcAboveLeft RN 2 ;// input pointer
+pDst RN 3 ;// output pointer
+leftStep RN 4 ;// input variable
+dstStep RN 5 ;// input variable
+predMode RN 6 ;// input variable
+availability RN 7 ;// input variable
+pMultiplierTable RN 2
+
+pTmp RN 9
+step RN 10
+
+;//---------------------
+;// Neon Registers
+;//---------------------
+
+;// OMX_VC_CHROMA_HOR
+
+dLeftVal0 DN D0.8
+dLeftVal1 DN D1.8
+dLeftVal2 DN D2.8
+dLeftVal3 DN D3.8
+dLeftVal4 DN D4.8
+dLeftVal5 DN D5.8
+dLeftVal6 DN D6.8
+dLeftVal7 DN D7.8
+
+;// OMX_VC_CHROMA_VERT
+
+dAboveVal DN D0.U8
+
+;// OMX_VC_CHROMA_DC
+
+dLeftVal DN D1.U8
+dSumAboveValU16 DN D2.U16
+dSumAboveValU32 DN D3.U32
+dSumAboveValU8 DN D3.U8
+dSumLeftValU16 DN D2.U16
+dSumLeftValU32 DN D1.U32
+dSumLeftValU8 DN D1.U8
+dSumAboveLeft DN D2.U32
+dSumAboveLeftU8 DN D2.U8
+dIndexRow0U8 DN D5.U8
+dIndexRow0 DN D5.U64
+dIndexRow4U8 DN D6.U8
+dIndexRow4 DN D6.U64
+dDstRow0 DN D0.U8
+dDstRow4 DN D4.U8
+dConst128U8 DN D0.U8
+
+;// OMX_VC_CHROMA_PLANE
+
+dRevAboveVal DN D3.U8
+dRevAboveValU64 DN D3.U64
+dAboveLeftVal DN D2.U8
+qAbove7minus0 QN Q3.S16
+qAboveDiff QN Q2.S16
+dIndex DN D8.U8
+dDiffAboveU8 DN D9.U8
+dDiffAboveS16 DN D9.S16
+dAboveDiff0U8 DN D4.U8
+dAboveDiff0U64 DN D4.U64
+dAbove7minus0U8 DN D6.U8
+dMultiplier DN D10.S16
+dHorPred DN D11.S16
+dRevLeftVal DN D3.U8
+dRevLeftValU64 DN D3.U64
+qLeft7minus0 QN Q7.S16
+qLeftDiff QN Q6.S16
+dDiffLeftU8 DN D16.U8
+dDiffLeftS16 DN D16.S16
+dLeftDiff0U8 DN D12.U8
+dLeftDiff0U64 DN D12.U64
+dLeft7minus0U8 DN D14.U8
+dVerPred DN D3.S16
+dHVValS16 DN D3.S16
+dHVValS32 DN D3.S32
+dHVTempS32 DN D2.S32
+qA QN Q0.S16
+qB QN Q2.S16
+qC QN Q3.S16
+qMultiplier QN Q5.S16
+dMultiplier0 DN D10.S16
+dMultiplier1 DN D11.S16
+qC0 QN Q0.S16
+qC1 QN Q1.S16
+qC2 QN Q4.S16
+qC3 QN Q5.S16
+qC4 QN Q6.S16
+qC5 QN Q7.S16
+qC6 QN Q8.S16
+qC7 QN Q9.S16
+qSum0 QN Q0.S16
+qSum1 QN Q1.S16
+qSum2 QN Q4.S16
+qSum3 QN Q5.S16
+qSum4 QN Q6.S16
+qSum5 QN Q7.S16
+qSum6 QN Q8.S16
+qSum7 QN Q9.S16
+dSum0 DN D0.U8
+dSum1 DN D1.U8
+dSum2 DN D2.U8
+dSum3 DN D3.U8
+dSum4 DN D4.U8
+dSum5 DN D5.U8
+dSum6 DN D6.U8
+dSum7 DN D7.U8
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntraChroma_8x8 starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START omxVCM4P10_PredictIntraChroma_8x8, r10, d15
+
+ ;// Define stack arguments
+ M_ARG LeftStep, 4
+ M_ARG DstStep, 4
+ M_ARG PredMode, 4
+ M_ARG Availability, 4
+
+ LDR pTable,=armVCM4P10_pIndexTable8x8 ;// Load index table for switch case
+
+ ;// Load argument from the stack
+ M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg
+ M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg
+ M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg
+ M_LDR availability, Availability ;// Arg availability loaded from stack to reg
+
+
+ LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode
+
+OMX_VC_CHROMA_DC
+
+ TST availability, #OMX_VC_LEFT
+ BEQ DCChroma8x8LeftNotAvailable
+
+ ADD pTmp, pSrcLeft, leftStep
+ ADD step, leftStep, leftStep
+
+ ;// Load Left Edge
+ VLD1 {dLeftVal[0]},[pSrcLeft],step ;// pSrcLeft[0*leftStep]
+ VLD1 {dLeftVal[1]},[pTmp],step ;// pSrcLeft[1*leftStep]
+ VLD1 {dLeftVal[2]},[pSrcLeft],step ;// pSrcLeft[2*leftStep]
+ VLD1 {dLeftVal[3]},[pTmp],step ;// pSrcLeft[3*leftStep]
+ VLD1 {dLeftVal[4]},[pSrcLeft],step ;// pSrcLeft[4*leftStep]
+ VLD1 {dLeftVal[5]},[pTmp],step ;// pSrcLeft[5*leftStep]
+ VLD1 {dLeftVal[6]},[pSrcLeft],step ;// pSrcLeft[6*leftStep]
+ VLD1 {dLeftVal[7]},[pTmp] ;// pSrcLeft[7*leftStep]
+
+ TST availability, #OMX_VC_UPPER
+ BEQ DCChroma8x8LeftOnlyAvailable
+
+ ;// Load Upper Edge also
+ VLD1 dAboveVal,[pSrcAbove] ;// pSrcAbove[0 to 7]
+
+ MOV return, #OMX_Sts_NoErr ;// returnNoError
+
+ VPADDL dSumAboveValU16, dAboveVal ;// pSrcAbove[ 6+7 | 4+5 | 2+3 | 0+1 ]
+ VPADDL dSumAboveValU32, dSumAboveValU16 ;// pSrcAbove[ 4+5+6+7 | 0+1+2+3 ]
+
+ VPADDL dSumLeftValU16, dLeftVal ;// pSrcLeft[ 6+7 | 4+5 | 2+3 | 0+1 ]
+ VPADDL dSumLeftValU32, dSumLeftValU16 ;// pSrcLeft[ 4+5+6+7 | 0+1+2+3 ]
+
+ VADD dSumAboveLeft,dSumAboveValU32,dSumLeftValU32
+ VRSHR dSumAboveLeft,dSumAboveLeft,#3 ;// Sum = (Sum + 4) >> 3
+ VRSHR dSumAboveValU32,dSumAboveValU32,#2 ;// Sum = (Sum + 2) >> 2
+ VRSHR dSumLeftValU32,dSumLeftValU32,#2 ;// Sum = (Sum + 2) >> 2
+
+ VMOV dIndexRow0U8,#0x0c
+ VMOV dIndexRow4U8,#0x04
+ VSHL dIndexRow0,dIndexRow0,#32 ;// index0 = 0x0c0c0c0c00000000
+ VSHR dIndexRow4,dIndexRow4,#32 ;// index4 = 0x0000000004040404
+ VADD dIndexRow4U8,dIndexRow4U8,dIndexRow0U8 ;// index4 = 0x0c0c0c0c04040404
+ VTBL dDstRow0,{dSumAboveLeftU8,dSumAboveValU8},dIndexRow0U8
+ VTBL dDstRow4,{dSumLeftValU8,dSumAboveLeftU8},dIndexRow4U8
+
+DCChroma8x8LeftStore
+ ADD pTmp, pDst, dstStep
+ ADD step, dstStep, dstStep
+
+ VST1 dDstRow0,[pDst],step ;// pDst[0*dstStep+x] :0<= x <= 7
+ VST1 dDstRow0,[pTmp],step ;// pDst[1*dstStep+x] :0<= x <= 7
+ VST1 dDstRow0,[pDst],step ;// pDst[2*dstStep+x] :0<= x <= 7
+ VST1 dDstRow0,[pTmp],step ;// pDst[3*dstStep+x] :0<= x <= 7
+ VST1 dDstRow4,[pDst],step ;// pDst[4*dstStep+x] :0<= x <= 7
+ VST1 dDstRow4,[pTmp],step ;// pDst[5*dstStep+x] :0<= x <= 7
+ VST1 dDstRow4,[pDst],step ;// pDst[6*dstStep+x] :0<= x <= 7
+ VST1 dDstRow4,[pTmp] ;// pDst[7*dstStep+x] :0<= x <= 7
+
+ M_EXIT
+
+
+DCChroma8x8LeftOnlyAvailable
+
+ MOV return, #OMX_Sts_NoErr
+
+ VPADDL dSumLeftValU16, dLeftVal ;// pSrcLeft[ 6+7 | 4+5 | 2+3 | 0+1 ]
+ VPADDL dSumLeftValU32, dSumLeftValU16 ;// pSrcLeft[ 4+5+6+7 | 0+1+2+3 ]
+ VRSHR dSumLeftValU32,dSumLeftValU32,#2 ;// Sum = (Sum + 2) >> 2
+
+ VDUP dDstRow0,dSumLeftValU8[0]
+ VDUP dDstRow4,dSumLeftValU8[4]
+
+ B DCChroma8x8LeftStore
+
+
+DCChroma8x8LeftNotAvailable
+
+ TST availability, #OMX_VC_UPPER
+ BEQ DCChroma8x8NoneAvailable
+
+ ;// Load Upper Edge
+ VLD1 dAboveVal,[pSrcAbove] ;// pSrcAbove[0 to 7]
+ MOV return, #OMX_Sts_NoErr ;// returnNoError
+
+ VPADDL dSumAboveValU16, dAboveVal ;// pSrcAbove[ 6+7 | 4+5 | 2+3 | 0+1 ]
+ VPADDL dSumAboveValU32, dSumAboveValU16 ;// pSrcAbove[ 4+5+6+7 | 0+1+2+3 ]
+ VRSHR dSumAboveValU32,dSumAboveValU32,#2 ;// Sum = (Sum + 2) >> 2
+ VMOV dIndexRow0U8,#0x04
+ VSHL dIndexRow0,dIndexRow0,#32 ;// index = 0x0404040400000000
+ VTBL dDstRow0,{dSumAboveValU8},dIndexRow0U8
+
+ B DCChroma8x8UpperStore
+
+
+DCChroma8x8NoneAvailable
+
+ VMOV dConst128U8,#0x80 ;// 0x8080808080808080 if(count == 0)
+ MOV return, #OMX_Sts_NoErr ;// returnNoError
+
+DCChroma8x8UpperStore
+
+ ADD pTmp, pDst, dstStep
+ ADD step, dstStep, dstStep
+
+ VST1 dDstRow0,[pDst],step ;// pDst[0*dstStep+x] :0<= x <= 7
+ VST1 dDstRow0,[pTmp],step ;// pDst[1*dstStep+x] :0<= x <= 7
+ VST1 dDstRow0,[pDst],step ;// pDst[2*dstStep+x] :0<= x <= 7
+ VST1 dDstRow0,[pTmp],step ;// pDst[3*dstStep+x] :0<= x <= 7
+ VST1 dDstRow0,[pDst],step ;// pDst[4*dstStep+x] :0<= x <= 7
+ VST1 dDstRow0,[pTmp],step ;// pDst[5*dstStep+x] :0<= x <= 7
+ VST1 dDstRow0,[pDst],step ;// pDst[6*dstStep+x] :0<= x <= 7
+ VST1 dDstRow0,[pTmp] ;// pDst[7*dstStep+x] :0<= x <= 7
+
+ M_EXIT
+
+
+OMX_VC_CHROMA_VERT
+
+ VLD1 dAboveVal,[pSrcAbove] ;// pSrcAbove[x] :0<= x <= 7
+ MOV return, #OMX_Sts_NoErr
+
+ B DCChroma8x8UpperStore
+
+
+OMX_VC_CHROMA_HOR
+
+ ADD pTmp, pSrcLeft, leftStep
+ ADD step, leftStep, leftStep
+
+ VLD1 {dLeftVal0[]},[pSrcLeft],step ;// pSrcLeft[0*leftStep]
+ VLD1 {dLeftVal1[]},[pTmp],step ;// pSrcLeft[1*leftStep]
+ VLD1 {dLeftVal2[]},[pSrcLeft],step ;// pSrcLeft[2*leftStep]
+ VLD1 {dLeftVal3[]},[pTmp],step ;// pSrcLeft[3*leftStep]
+ VLD1 {dLeftVal4[]},[pSrcLeft],step ;// pSrcLeft[4*leftStep]
+ VLD1 {dLeftVal5[]},[pTmp],step ;// pSrcLeft[5*leftStep]
+ VLD1 {dLeftVal6[]},[pSrcLeft],step ;// pSrcLeft[6*leftStep]
+ VLD1 {dLeftVal7[]},[pTmp] ;// pSrcLeft[7*leftStep]
+
+ B DCChroma8x8PlaneStore
+
+
+OMX_VC_CHROMA_PLANE
+ ADD pTmp, pSrcLeft, leftStep
+ ADD step, leftStep, leftStep
+
+ VLD1 dAboveVal,[pSrcAbove] ;// pSrcAbove[x] :0<= x <= 7
+ VLD1 dAboveLeftVal[0],[pSrcAboveLeft]
+
+ VLD1 {dLeftVal[0]},[pSrcLeft],step ;// pSrcLeft[0*leftStep]
+ VLD1 {dLeftVal[1]},[pTmp],step ;// pSrcLeft[1*leftStep]
+ VLD1 {dLeftVal[2]},[pSrcLeft],step ;// pSrcLeft[2*leftStep]
+ VLD1 {dLeftVal[3]},[pTmp],step ;// pSrcLeft[3*leftStep]
+ VLD1 {dLeftVal[4]},[pSrcLeft],step ;// pSrcLeft[4*leftStep]
+ VLD1 {dLeftVal[5]},[pTmp],step ;// pSrcLeft[5*leftStep]
+ VLD1 {dLeftVal[6]},[pSrcLeft],step ;// pSrcLeft[6*leftStep]
+ VLD1 {dLeftVal[7]},[pTmp] ;// pSrcLeft[7*leftStep]
+
+
+ VREV64 dRevAboveVal,dAboveVal ;// Reverse order of bytes = pSrcAbove[0:1:2:3:4:5:6:7]
+ VSUBL qAbove7minus0,dRevAboveVal,dAboveLeftVal ;// qAbove7minus0[0] = pSrcAbove[7] - pSrcAboveLeft[0]
+ VSHR dRevAboveValU64,dRevAboveValU64,#8 ;// pSrcAbove[X:0:1:2:3:4:5:6]
+ VSUBL qAboveDiff,dRevAboveVal,dAboveVal ;// pSrcAbove[6] - pSrcAbove[0]
+ ;// pSrcAbove[5] - pSrcAbove[1]
+ ;// pSrcAbove[4] - pSrcAbove[2]
+
+ VREV64 dRevLeftVal,dLeftVal ;// Reverse order of bytes = pSrcLeft[0:1:2:3:4:5:6:7]
+ VSUBL qLeft7minus0,dRevLeftVal,dAboveLeftVal ;// qAbove7minus0[0] = pSrcLeft[7] - pSrcAboveLeft[0]
+ VSHR dRevLeftValU64,dRevLeftValU64,#8 ;// pSrcLeft[X:0:1:2:3:4:5:6]
+ VSUBL qLeftDiff,dRevLeftVal,dLeftVal ;// pSrcLeft[6] - pSrcLeft[0]
+ ;// pSrcLeft[5] - pSrcLeft[1]
+ ;// pSrcLeft[4] - pSrcLeft[2]
+
+ LDR pMultiplierTable,=armVCM4P10_MultiplierTableChroma8x8 ;// Used to calculate Hval & Vval
+ VSHL dAboveDiff0U64,dAboveDiff0U64,#16
+ VEXT dDiffAboveU8,dAboveDiff0U8,dAbove7minus0U8,#2 ;// pSrcAbove[ 7-0 | 4-2 | 5-1 | 6-0 ]
+ VLD1 dMultiplier,[pMultiplierTable]!
+ VSHL dLeftDiff0U64,dLeftDiff0U64,#16
+ VEXT dDiffLeftU8,dLeftDiff0U8,dLeft7minus0U8,#2 ;// pSrcLeft[ 7-0 | 4-2 | 5-1 | 6-0 ]
+
+
+ VMUL dHorPred,dDiffAboveS16,dMultiplier ;// pSrcAbove[ 4*(7-0) | 1*(4-2) | 2*(5-1) | 3*(6-0) ]
+ VMUL dVerPred,dDiffLeftS16,dMultiplier
+ VPADD dHVValS16,dHorPred,dVerPred
+
+
+ VPADDL dHVValS32,dHVValS16 ;// [V|H] in 32 bits each
+ VSHL dHVTempS32,dHVValS32,#4 ;// 17*H = 16*H + H = (H<<4)+H
+ VADD dHVValS32,dHVValS32,dHVTempS32 ;// [ 17*V | 17*H ]in 32 bits each
+ VLD1 {dMultiplier0,dMultiplier1},[pMultiplierTable] ;// qMultiplier = [ 4|3|2|1|0|-1|-2|-3 ]
+ VRSHR dHVValS32,dHVValS32,#5 ;// [c|b] in 16bits each
+ VADDL qA,dAboveVal,dLeftVal
+ VDUP qA,qA[7]
+ VSHL qA,qA,#4 ;// [a|a|a|a|a|a|a|a]
+ VDUP qB,dHVValS16[0] ;// [b|b|b|b|b|b|b|b]
+ VDUP qC,dHVValS16[2] ;// [c|c|c|c|c|c|c|c]
+
+
+ VMUL qB,qB,qMultiplier
+ VMUL qC,qC,qMultiplier
+ VADD qB,qB,qA
+
+ VDUP qC0,qC[0]
+ VDUP qC1,qC[1]
+ VDUP qC2,qC[2]
+ VDUP qC3,qC[3]
+ VDUP qC4,qC[4]
+ VDUP qC5,qC[5]
+ VDUP qC6,qC[6]
+ VDUP qC7,qC[7]
+
+ VADD qSum0,qB,qC0
+ VADD qSum1,qB,qC1
+ VADD qSum2,qB,qC2
+ VADD qSum3,qB,qC3
+ VADD qSum4,qB,qC4
+ VADD qSum5,qB,qC5
+ VADD qSum6,qB,qC6
+ VADD qSum7,qB,qC7
+
+ VQRSHRUN dSum0,qSum0,#5 ;// (OMX_U8)armClip(0,255,(Sum+16)>>5)
+ VQRSHRUN dSum1,qSum1,#5
+ VQRSHRUN dSum2,qSum2,#5
+ VQRSHRUN dSum3,qSum3,#5
+ VQRSHRUN dSum4,qSum4,#5
+ VQRSHRUN dSum5,qSum5,#5
+ VQRSHRUN dSum6,qSum6,#5
+ VQRSHRUN dSum7,qSum7,#5
+
+DCChroma8x8PlaneStore
+ ADD pTmp, pDst, dstStep
+ ADD step, dstStep, dstStep
+
+ VST1 dSum0,[pDst],step ;// pDst[0*dstStep+x] :0<= x <= 7
+ VST1 dSum1,[pTmp],step ;// pDst[1*dstStep+x] :0<= x <= 7
+ VST1 dSum2,[pDst],step ;// pDst[2*dstStep+x] :0<= x <= 7
+ VST1 dSum3,[pTmp],step ;// pDst[3*dstStep+x] :0<= x <= 7
+ VST1 dSum4,[pDst],step ;// pDst[4*dstStep+x] :0<= x <= 7
+ VST1 dSum5,[pTmp],step ;// pDst[5*dstStep+x] :0<= x <= 7
+ VST1 dSum6,[pDst],step ;// pDst[6*dstStep+x] :0<= x <= 7
+ VST1 dSum7,[pTmp] ;// pDst[7*dstStep+x] :0<= x <= 7
+
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// CortexA8
+
+ END
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntraChroma_8x8 ends
+;//-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
new file mode 100755
index 0000000..e9c0eee
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
@@ -0,0 +1,424 @@
+;//
+;//
+;// File Name: omxVCM4P10_PredictIntra_16x16_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_pIndexTable16x16
+ DCD OMX_VC_16X16_VERT, OMX_VC_16X16_HOR
+ DCD OMX_VC_16X16_DC, OMX_VC_16X16_PLANE
+
+
+ IF CortexA8
+
+ M_TABLE armVCM4P10_MultiplierTable16x16,1
+ DCW 7, 6, 5, 4, 3, 2, 1, 8
+ DCW 0, 1, 2, 3, 4, 5, 6, 7
+ DCW 8, 9, 10, 11, 12, 13, 14, 15
+
+;//--------------------------------------------
+;// Constants
+;//--------------------------------------------
+BLK_SIZE EQU 0x10
+MUL_CONST0 EQU 0x01010101
+MUL_CONST1 EQU 0x00060004
+MUL_CONST2 EQU 0x00070005
+MUL_CONST3 EQU 0x00030001
+MASK_CONST EQU 0x00FF00FF
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+y RN 12
+pc RN 15
+
+return RN 0
+pTable RN 9
+count RN 11
+pMultTable RN 9
+; ----------------------------------------------
+; Neon registers
+; ----------------------------------------------
+qAbove QN Q0.U8
+qLeft QN Q1.U8
+qSum8 QN Q0.U16
+dSum80 DN D0.U16
+dSum81 DN D1.U16
+dSum4 DN D0.U16
+dSum2 DN D0.U32
+dSum1 DN D0.U64
+qOut QN Q3.U8
+dSumLeft DN D6.U64
+dSumAbove DN D7.U64
+dSum DN D8.U64
+dSum0 DN D8.U8[0]
+
+qH QN Q11.S32
+qV QN Q12.S32
+qA QN Q11.S16
+qB QN Q6.S16
+qC QN Q7.S16
+
+qB0 QN Q5.S16
+qB1 QN Q6.S16
+dA1 DN D23.S16
+
+dH0 DN D22.S32
+dH1 DN D23.S32
+dV0 DN D24.S32
+dV1 DN D25.S32
+
+qHV QN Q11.S64
+qHV0 QN Q11.S32
+qHV1 QN Q12.S64
+
+dHV00 DN D22.S32
+dHV01 DN D23.S32
+
+dHV0 DN D22.S16[0]
+dHV1 DN D23.S16[0]
+dHV10 DN D24.S64
+dHV11 DN D25.S64
+
+qSum0 QN Q0.S16
+qSum1 QN Q1.S16
+
+dOut0 DN D6.U8
+dOut1 DN D7.U8
+
+dLeft0 DN D2.U8
+dLeft1 DN D3.U8
+qConst QN Q13.S16
+
+dAbove0 DN D0.U8
+dAbove1 DN D1.U8
+
+dRevLeft64 DN D12.U64
+dRevLeft DN D12.U8
+dRevAbove64 DN D5.U64
+dRevAbove DN D5.U8
+qLeftDiff QN Q8.S16
+dLeftDiff1 DN D17.S16
+dLeftDiff64 DN D17.S64
+qDiffLeft QN Q8.S16
+qDiffAbove QN Q4.S16
+dAboveDiff1 DN D9.S16
+dAboveDiff64 DN D9.S64
+qAboveDiff QN Q4.S16
+
+dAboveLeft DN D4.U8
+
+dDiffLeft0 DN D16.S16
+dDiffLeft1 DN D17.S16
+dDiffAbove0 DN D8.S16
+dDiffAbove1 DN D9.S16
+
+qLeft15minus0 QN Q7.S16
+dLeft15minus0 DN D14.S16
+qAbove15minus0 QN Q3.S16
+dAbove15minus0 DN D6.S16
+
+qMultiplier QN Q10.S16
+qMultiplier0 QN Q10.S16
+qMultiplier1 QN Q12.S16
+dMultiplier0 DN D20.S16
+dMultiplier1 DN D21.S16
+
+dBPlusCMult7 DN D1.S64
+dBPlusCMult7S16 DN D1.S16
+
+qTmp QN Q0.U8
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+pSrcLeft RN 0 ;// input pointer
+pSrcAbove RN 1 ;// input pointer
+pSrcAboveLeft RN 2 ;// input pointer
+pDst RN 3 ;// output pointer
+leftStep RN 4 ;// input variable
+dstStep RN 5 ;// input variable
+predMode RN 6 ;// input variable
+availability RN 7 ;// input variable
+
+pTmp RN 8
+step RN 10
+pTmp2 RN 11
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_16x16 starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START omxVCM4P10_PredictIntra_16x16, r11, d15
+
+ ;// Define stack arguments
+ M_ARG LeftStep, 4
+ M_ARG DstStep, 4
+ M_ARG PredMode, 4
+ M_ARG Availability, 4
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case
+
+ ;// Load argument from the stack
+ M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg
+ M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg
+ M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg
+ M_LDR availability, Availability ;// Arg availability loaded from stack to reg
+
+ MOV y, #BLK_SIZE ;// Outer Loop Count
+ LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode
+
+OMX_VC_16X16_VERT
+ VLD1 qAbove, [pSrcAbove]
+ ADD pTmp, pDst, dstStep
+ ADD step, dstStep, dstStep
+ VST1 qAbove, [pDst], step
+ VST1 qAbove, [pTmp], step
+ VST1 qAbove, [pDst], step
+ VST1 qAbove, [pTmp], step
+ VST1 qAbove, [pDst], step
+ VST1 qAbove, [pTmp], step
+ VST1 qAbove, [pDst], step
+ VST1 qAbove, [pTmp], step
+ VST1 qAbove, [pDst], step
+ VST1 qAbove, [pTmp], step
+ VST1 qAbove, [pDst], step
+ VST1 qAbove, [pTmp], step
+ VST1 qAbove, [pDst], step
+ VST1 qAbove, [pTmp], step
+ VST1 qAbove, [pDst]
+ VST1 qAbove, [pTmp]
+ MOV return, #OMX_Sts_NoErr ;// returnNoError
+ M_EXIT
+
+OMX_VC_16X16_HOR
+ ADD pTmp, pSrcLeft, leftStep
+ ADD leftStep, leftStep, leftStep
+ ADD pTmp2, pDst, dstStep
+ ADD dstStep, dstStep, dstStep
+LoopHor
+ VLD1 {qLeft[]}, [pSrcLeft], leftStep
+ VLD1 {qTmp[]}, [pTmp], leftStep
+ SUBS y, y, #8
+ VST1 qLeft, [pDst], dstStep
+ VST1 qTmp, [pTmp2], dstStep
+ VLD1 {qLeft[]}, [pSrcLeft], leftStep
+ VLD1 {qTmp[]}, [pTmp], leftStep
+ VST1 qLeft, [pDst], dstStep
+ VST1 qTmp, [pTmp2], dstStep
+ VLD1 {qLeft[]}, [pSrcLeft], leftStep
+ VLD1 {qTmp[]}, [pTmp], leftStep
+ VST1 qLeft, [pDst], dstStep
+ VST1 qTmp, [pTmp2], dstStep
+ VLD1 {qLeft[]}, [pSrcLeft], leftStep
+ VLD1 {qTmp[]}, [pTmp], leftStep
+ VST1 qLeft, [pDst], dstStep
+ VST1 qTmp, [pTmp2], dstStep
+
+ BNE LoopHor ;// Loop for 16 times
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+OMX_VC_16X16_DC
+ MOV count, #0 ;// count = 0
+ TST availability, #OMX_VC_LEFT
+ BEQ UpperOrNoneAvailable ;// Jump to Upper if not left
+
+ ADD pTmp, pSrcLeft, leftStep
+ ADD step, leftStep, leftStep
+
+ VLD1 {qLeft[0]}, [pSrcLeft],step
+ VLD1 {qLeft[1]}, [pTmp],step
+ VLD1 {qLeft[2]}, [pSrcLeft],step
+ VLD1 {qLeft[3]}, [pTmp],step
+ VLD1 {qLeft[4]}, [pSrcLeft],step
+ VLD1 {qLeft[5]}, [pTmp],step
+ VLD1 {qLeft[6]}, [pSrcLeft],step
+ VLD1 {qLeft[7]}, [pTmp],step
+ VLD1 {qLeft[8]}, [pSrcLeft],step
+ VLD1 {qLeft[9]}, [pTmp],step
+ VLD1 {qLeft[10]},[pSrcLeft],step
+ VLD1 {qLeft[11]},[pTmp],step
+ VLD1 {qLeft[12]},[pSrcLeft],step
+ VLD1 {qLeft[13]},[pTmp],step
+ VLD1 {qLeft[14]},[pSrcLeft],step
+ VLD1 {qLeft[15]},[pTmp]
+
+ VPADDL qSum8, qLeft
+ ADD count, count, #1
+ VPADD dSum4, dSum80, dSum81
+ VPADDL dSum2, dSum4
+ VPADDL dSumLeft, dSum2
+ VRSHR dSum, dSumLeft, #4
+
+UpperOrNoneAvailable
+ TST availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER)
+ BEQ BothOrNoneAvailable ;// Jump to Left if not upper
+ VLD1 qAbove, [pSrcAbove]
+ ADD count, count, #1 ;// if upper inc count by 1
+ VPADDL qSum8, qAbove
+ VPADD dSum4, dSum80, dSum81
+ VPADDL dSum2, dSum4
+ VPADDL dSumAbove, dSum2
+ VRSHR dSum, dSumAbove, #4
+
+BothOrNoneAvailable
+ CMP count, #2 ;// check if both available
+ BNE NoneAvailable
+ VADD dSum, dSumAbove, dSumLeft
+ VRSHR dSum, dSum, #5
+
+
+NoneAvailable
+ VDUP qOut, dSum0
+ CMP count, #0 ;// check if none available
+ ADD pTmp, pDst, dstStep
+ ADD step, dstStep, dstStep
+ BNE LoopDC
+ VMOV qOut, #128
+LoopDC
+ VST1 qOut, [pDst], step
+ VST1 qOut, [pTmp], step
+ VST1 qOut, [pDst], step
+ VST1 qOut, [pTmp], step
+ VST1 qOut, [pDst], step
+ VST1 qOut, [pTmp], step
+ VST1 qOut, [pDst], step
+ VST1 qOut, [pTmp], step
+ VST1 qOut, [pDst], step
+ VST1 qOut, [pTmp], step
+ VST1 qOut, [pDst], step
+ VST1 qOut, [pTmp], step
+ VST1 qOut, [pDst], step
+ VST1 qOut, [pTmp], step
+ VST1 qOut, [pDst], step
+ VST1 qOut, [pTmp], step
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+OMX_VC_16X16_PLANE
+ LDR pMultTable, =armVCM4P10_MultiplierTable16x16
+ VLD1 qAbove, [pSrcAbove] ;// pSrcAbove[x] :0<= x <= 7
+ VLD1 dAboveLeft[0],[pSrcAboveLeft]
+ ADD pTmp, pSrcLeft, leftStep
+ ADD step, leftStep, leftStep
+ VLD1 {qLeft[0]}, [pSrcLeft],step
+ VLD1 {qLeft[1]}, [pTmp],step
+ VLD1 {qLeft[2]}, [pSrcLeft],step
+ VLD1 {qLeft[3]}, [pTmp],step
+ VLD1 {qLeft[4]}, [pSrcLeft],step
+ VLD1 {qLeft[5]}, [pTmp],step
+ VLD1 {qLeft[6]}, [pSrcLeft],step
+ VLD1 {qLeft[7]}, [pTmp],step
+ VLD1 {qLeft[8]}, [pSrcLeft],step
+ VLD1 {qLeft[9]}, [pTmp],step
+ VLD1 {qLeft[10]}, [pSrcLeft],step
+ VLD1 {qLeft[11]}, [pTmp],step
+ VLD1 {qLeft[12]}, [pSrcLeft],step
+ VLD1 {qLeft[13]}, [pTmp],step
+ VLD1 {qLeft[14]}, [pSrcLeft],step
+ VLD1 {qLeft[15]}, [pTmp]
+
+ VREV64 dRevAbove, dAbove1 ;// pSrcAbove[15:14:13:12:11:10:9:8]
+ VSUBL qAbove15minus0, dRevAbove, dAboveLeft ;// qAbove7minus0[0] = pSrcAbove[15] - pSrcAboveLeft[0]
+ VSHR dRevAbove64, dRevAbove64, #8 ;// pSrcAbove[14:13:12:11:10:9:8:X]
+ VSUBL qAboveDiff, dRevAbove, dAbove0
+
+ VSHL dAboveDiff64, dAboveDiff64, #16
+ VEXT dDiffAbove1, dAboveDiff1, dAbove15minus0, #1
+
+ VREV64 dRevLeft,dLeft1 ;// pSrcLeft[15:14:13:12:11:10:9:8]
+ VSUBL qLeft15minus0,dRevLeft, dAboveLeft ;// qAbove7minus0[0] = pSrcLeft[7] - pSrcAboveLeft[0]
+ VSHR dRevLeft64, dRevLeft64, #8 ;// pSrcLeft[14:13:12:11:10:9:8:X]
+ VSUBL qLeftDiff,dRevLeft, dLeft0
+
+ ;// Multiplier = [8|1|2|...|6|7]
+ VLD1 qMultiplier, [pMultTable]!
+
+ VSHL dLeftDiff64, dLeftDiff64, #16
+ VEXT dDiffLeft1, dLeftDiff1, dLeft15minus0, #1
+
+ VMULL qH,dDiffAbove0, dMultiplier0
+ VMULL qV,dDiffLeft0, dMultiplier0
+ VMLAL qH,dDiffAbove1, dMultiplier1
+ VMLAL qV,dDiffLeft1, dMultiplier1
+
+ VPADD dHV00,dH1,dH0
+ VPADD dHV01,dV1,dV0
+ VPADDL qHV, qHV0
+ VSHL qHV1,qHV,#2
+ VADD qHV,qHV,qHV1
+
+ ;// HV = [c = ((5*V+32)>>6) | b = ((5*H+32)>>6)]
+ VRSHR qHV,qHV,#6
+
+ ;// HV1 = [c*7|b*7]
+ VSHL qHV1,qHV,#3
+ VSUB qHV1,qHV1,qHV
+
+ ;// Multiplier1 = [0|1|2|...|7]
+ VLD1 qMultiplier0, [pMultTable]!
+ VDUP qB, dHV0
+ VDUP qC, dHV1
+
+ VADDL qA,dAbove1,dLeft1
+ VSHL qA,qA, #4
+ VDUP qA,dA1[3]
+ VADD dBPlusCMult7, dHV10, dHV11
+
+ ;// Multiplier1 = [8|9|10|...|15]
+ VLD1 qMultiplier1, [pMultTable]
+ ;// Const = a - 7*(b+c)
+ VDUP qConst, dBPlusCMult7S16[0]
+ VSUB qConst, qA, qConst
+
+ ;// B0 = [0*b|1*b|2*b|3*b|......|7*b]
+ VMUL qB0,qB,qMultiplier0
+
+ ;// B0 = [8*b|9*b|10*b|11*b|....|15*b]
+ VMUL qB1,qB,qMultiplier1
+
+ VADD qSum0, qB0, qConst
+ VADD qSum1, qB1, qConst
+
+ ;// Loops for 16 times
+LoopPlane
+ ;// (b*x + c*y + C)>>5
+ VQRSHRUN dOut0, qSum0,#5
+ VQRSHRUN dOut1, qSum1,#5
+ SUBS y, y, #1
+ VST1 qOut,[pDst],dstStep
+ VADD qSum0,qSum0,qC
+ VADD qSum1,qSum1,qC
+ BNE LoopPlane
+
+ MOV return, #OMX_Sts_NoErr
+
+ M_END
+
+ ENDIF ;// CortexA8
+
+ END
+;-----------------------------------------------------------------------------------------------
+; omxVCM4P10_PredictIntra_16x16 ends
+;-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
new file mode 100755
index 0000000..39eb8a4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
@@ -0,0 +1,531 @@
+;//
+;//
+;// File Name: omxVCM4P10_PredictIntra_4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS CortexA8
+
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_pSwitchTable4x4
+ DCD OMX_VC_4x4_VERT, OMX_VC_4x4_HOR
+ DCD OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL
+ DCD OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR
+ DCD OMX_VC_4x4_HD, OMX_VC_4x4_VL
+ DCD OMX_VC_4x4_HU
+
+
+ IF CortexA8
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+return RN 0
+pTable RN 8
+pc RN 15
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+pSrcLeft RN 0 ;// input pointer
+pSrcAbove RN 1 ;// input pointer
+pSrcAboveLeft RN 2 ;// input pointer
+pDst RN 3 ;// output pointer
+leftStep RN 4 ;// input variable
+dstStep RN 5 ;// input variable
+predMode RN 6 ;// input variable
+availability RN 7 ;// input variable
+pDst1 RN 1
+pDst2 RN 4
+pDst3 RN 6
+
+pSrcTmp RN 9
+srcStep RN 10
+pDstTmp RN 11
+dstep RN 12
+
+;//-------------------
+;// Neon registers
+;//-------------------
+
+;// OMX_VC_CHROMA_VERT
+dAboveU32 DN D0.U32
+
+;// OMX_VC_CHROMA_HOR
+dLeftVal0 DN D0.8
+dLeftVal1 DN D1.8
+dLeftVal2 DN D2.8
+dLeftVal3 DN D3.8
+dLeftVal0U32 DN D0.U32
+dLeftVal1U32 DN D1.U32
+dLeftVal2U32 DN D2.U32
+dLeftVal3U32 DN D3.U32
+
+;// OMX_VC_4x4_DC
+dLeftVal DN D0.U8
+dLeftValU32 DN D0.U32
+dSumAboveLeftU16 DN D1.U16
+dSumAboveLeftU32 DN D1.U32
+dSumAboveLeftU64 DN D1.U64
+dSumAboveLeftU8 DN D1.U8
+dSum DN D0.U8
+
+dSumLeftValU16 DN D1.U16
+dSumLeftValU32 DN D1.U32
+dSumLeftValU64 DN D1.U64
+dSumLeftValU8 DN D1.U8
+
+dAboveVal DN D0.U8
+dSumAboveValU16 DN D1.U16
+dSumAboveValU32 DN D1.U32
+dSumAboveValU64 DN D1.U64
+dSumAboveValU8 DN D1.U8
+dConst128U8 DN D0.U8
+
+
+;//OMX_VC_4x4_DIAG_DL
+
+dAbove DN D0.U8
+dU7 DN D2.U8
+dU3 DN D2.U8
+dAbove0 DN D3.U8
+dAbove1 DN D4.U8
+dAbove2 DN D5.U8
+dTmp DN D6.U8
+dTmp0 DN D7.U8
+dTmp1 DN D8.U8
+dTmp2 DN D9.U8
+dTmp3 DN D10.U8
+dTmpU32 DN D6.U32
+
+
+;//OMX_VC_4x4_DIAG_DR
+dLeft DN D1.U8
+dUL DN D2.U8
+
+;//OMX_VC_4x4_VR
+dLeft0 DN D1.U8
+dLeft1 DN D2.U8
+dEven0 DN D3.U8
+dEven1 DN D4.U8
+dEven2 DN D5.U8
+dOdd0 DN D6.U8
+dOdd1 DN D11.U8
+dOdd2 DN D12.U8
+dTmp3U32 DN D10.U32
+dTmp2U32 DN D9.U32
+
+
+;//OMX_VC_4x4_HD
+dTmp1U64 DN D8.U64
+dTmp0U64 DN D7.U64
+dTmpU64 DN D6.U64
+dTmpU32 DN D6.U32
+dTmp1U32 DN D8.U32
+
+;//OMX_VC_4x4_HU
+dL3 DN D2.U8
+dLeftHU0 DN D3.U8
+dLeftHU1 DN D4.U8
+dLeftHU2 DN D5.U8
+dTmp0U32 DN D7.U32
+
+
+
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_4x4 starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START omxVCM4P10_PredictIntra_4x4, r12,d12
+
+ ;// Define stack arguments
+ M_ARG LeftStep, 4
+ M_ARG DstStep, 4
+ M_ARG PredMode, 4
+ M_ARG Availability, 4
+
+
+ LDR pTable,=armVCM4P10_pSwitchTable4x4 ;// Load index table for switch case
+
+ ;// Load argument from the stack
+ M_LDRD predMode,availability,PredMode ;// Arg predMode & availability loaded from stack to reg
+ M_LDRD leftStep,dstStep,LeftStep ;// Arg leftStep & dstStep loaded from stack to reg
+
+
+ LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode
+
+
+OMX_VC_4x4_HOR
+
+ ADD pSrcTmp, pSrcLeft, leftStep
+ ADD srcStep, leftStep, leftStep
+ ;// Load Left Edge
+ VLD1 {dLeftVal0[]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep]
+ VLD1 {dLeftVal1[]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep]
+ VLD1 {dLeftVal2[]},[pSrcLeft] ;// pSrcLeft[2*leftStep]
+ VLD1 {dLeftVal3[]},[pSrcTmp] ;// pSrcLeft[3*leftStep]
+
+ ADD pDstTmp, pDst, dstStep
+ ADD dstep, dstStep, dstStep
+
+ VST1 dLeftVal0U32[0],[pDst],dstep ;// pDst[0*dstStep+x] :0<= x <= 7
+ VST1 dLeftVal1U32[0],[pDstTmp],dstep ;// pDst[1*dstStep+x] :0<= x <= 7
+ VST1 dLeftVal2U32[0],[pDst] ;// pDst[2*dstStep+x] :0<= x <= 7
+ VST1 dLeftVal3U32[0],[pDstTmp] ;// pDst[3*dstStep+x] :0<= x <= 7
+
+ B ExitPredict4x4 ;// Branch to exit code
+
+OMX_VC_4x4_VERT
+
+ ;// Load Upper Edge
+ VLD1 dAboveU32[0],[pSrcAbove]
+ ADD pDstTmp, pDst, dstStep
+ ADD dstep, dstStep, dstStep
+
+DCPredict4x4VertStore
+
+ VST1 dAboveU32[0],[pDst],dstep
+ VST1 dAboveU32[0],[pDstTmp],dstep
+ VST1 dAboveU32[0],[pDst]
+ VST1 dAboveU32[0],[pDstTmp]
+
+ B ExitPredict4x4 ;// Branch to exit code
+
+OMX_VC_4x4_DC
+
+
+ TST availability, #OMX_VC_LEFT
+ BEQ DCPredict4x4LeftNotAvailable
+
+ ADD pSrcTmp, pSrcLeft, leftStep
+ ADD srcStep, leftStep, leftStep
+ ;// Load Left Edge
+ VLD1 {dLeftVal[0]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep]
+ VLD1 {dLeftVal[1]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep]
+ VLD1 {dLeftVal[2]},[pSrcLeft] ;// pSrcLeft[2*leftStep]
+ VLD1 {dLeftVal[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep]
+
+ TST availability, #OMX_VC_UPPER
+ BEQ DCPredict4x4LeftOnlyAvailable
+
+ ;// Load Upper Edge also
+ VLD1 dLeftValU32[1],[pSrcAbove] ;// pSrcAbove[0 to 3]
+ MOV return, #OMX_Sts_NoErr
+
+ VPADDL dSumAboveLeftU16, dLeftVal ;// [pSrcAbove[2+3 | 0+1] | pSrcLeft[2+3 | 0+1]]
+ VPADDL dSumAboveLeftU32, dSumAboveLeftU16 ;// [pSrcAbove[2+3+0+1] | pSrcLeft[2+3+0+1]]
+ VPADDL dSumAboveLeftU64, dSumAboveLeftU32 ;// [pSrcAbove[2+3+0+1] + pSrcLeft[2+3+0+1]]
+ VRSHR dSumAboveLeftU64,dSumAboveLeftU64,#3 ;// Sum = (Sum + 4) >> 3
+ ADD pDstTmp, pDst, dstStep
+ ADD dstep, dstStep, dstStep
+ VDUP dSum,dSumAboveLeftU8[0]
+
+ B DCPredict4x4VertStore
+
+DCPredict4x4LeftOnlyAvailable
+
+ MOV return, #OMX_Sts_NoErr ;// returnNoError
+
+ VPADDL dSumLeftValU16, dLeftVal ;// [ XX | pSrcLeft[2+3 | 0+1]]
+ VPADDL dSumLeftValU32, dSumLeftValU16 ;// [ XXXX | pSrcLeft[2+3+0+1]]
+
+ VRSHR dSumLeftValU32,dSumLeftValU32,#2 ;// Sum = (Sum + 2) >> 2
+ ADD pDstTmp, pDst, dstStep
+ ADD dstep, dstStep, dstStep
+ VDUP dSum,dSumLeftValU8[0]
+
+ B DCPredict4x4VertStore
+
+DCPredict4x4LeftNotAvailable
+
+ TST availability, #OMX_VC_UPPER
+ BEQ DCPredict4x4NoneAvailable
+
+ ;// Load Upper Edge
+ VLD1 dAboveU32[0],[pSrcAbove] ;// pSrcAbove[0 to 3]
+ MOV return, #OMX_Sts_NoErr
+
+ VPADDL dSumAboveValU16, dAboveVal ;// [ XX | pSrcAbove[2+3 | 0+1]]
+ VPADDL dSumAboveValU32, dSumAboveValU16 ;// [ XXXX | pSrcAbove[2+3+0+1]]
+
+ VRSHR dSumAboveValU32,dSumAboveValU32,#2 ;// Sum = (Sum + 2) >> 2
+ ADD pDstTmp, pDst, dstStep
+ ADD dstep, dstStep, dstStep
+ VDUP dSum,dSumAboveValU8[0]
+
+ B DCPredict4x4VertStore
+
+DCPredict4x4NoneAvailable
+
+ VMOV dConst128U8,#0x80 ;// 0x8080808080808080 if(count == 0)
+ MOV return, #OMX_Sts_NoErr
+
+ ADD pDstTmp, pDst, dstStep
+ ADD dstep, dstStep, dstStep
+ B DCPredict4x4VertStore
+
+
+
+OMX_VC_4x4_DIAG_DL
+
+ TST availability, #OMX_VC_UPPER_RIGHT
+ BEQ DiagDLUpperRightNotAvailable
+
+ VLD1 dAbove0,[pSrcAbove] ;// [U7|U6|U5|U4|U3|U2|U1|U0]
+ VDUP dU7, dAbove0[7] ;// [U7|U7|U7|U7|U7|U7|U7|U7]
+ VEXT dAbove1, dAbove0, dU7, #1 ;// [U7|U7|U6|U5|U4|U3|U2|U1]
+ VEXT dAbove2, dAbove0, dU7, #2 ;// [U7|U7|U7|U6|U5|U4|U3|U2]
+ B DiagDLPredict4x4Store
+
+DiagDLUpperRightNotAvailable
+ VLD1 dAboveU32[1],[pSrcAbove] ;// [U3|U2|U1|U0|-|-|-|-]
+ VDUP dU3, dAbove[7] ;// [U3 U3 U3 U3 U3 U3 U3 U3]
+
+ VEXT dAbove0, dAbove, dU3, #4 ;// [U3 U3 U3 U3 U3 U2 U1 U0]
+ VEXT dAbove1, dAbove, dU3, #5 ;// [U3 U3 U3 U3 U3 U3 U2 U1]
+ VEXT dAbove2, dAbove, dU3, #6 ;// [U3 U3 U3 U3 U3 U3 U3 U2]
+
+DiagDLPredict4x4Store
+
+ VHADD dTmp, dAbove0, dAbove2
+ VRHADD dTmp, dTmp, dAbove1 ;// (a+2*b+c+2)>>2
+
+
+ VST1 dTmpU32[0],[pDst],dstStep
+ VEXT dTmp,dTmp,dTmp,#1
+ VST1 dTmpU32[0],[pDst],dstStep
+ VEXT dTmp,dTmp,dTmp,#1
+ VST1 dTmpU32[0],[pDst],dstStep
+ VEXT dTmp,dTmp,dTmp,#1
+ VST1 dTmpU32[0],[pDst]
+
+ B ExitPredict4x4 ;// Branch to exit code
+
+
+OMX_VC_4x4_DIAG_DR
+
+
+ ;// Load U0,U1,U2,U3
+
+ VLD1 dAboveU32[0],[pSrcAbove] ;// [X|X|X|X|U3|U2|U1|U0]
+
+ ;// Load UL,L0,L1,L2,L3 ;// dLeft = [UL|L0|L1|L2|L3|X|X|X]
+ VLD1 {dLeft[7]},[pSrcAboveLeft]
+ ADD pSrcTmp, pSrcLeft, leftStep
+ ADD srcStep, leftStep, leftStep
+ ADD pDst1,pDst,dstStep
+
+ VLD1 {dLeft[6]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep]
+ VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep]
+ VLD1 {dLeft[4]},[pSrcLeft] ;// pSrcLeft[2*leftStep]
+ VLD1 {dLeft[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep]
+
+
+ VEXT dAbove0,dLeft,dAbove,#3 ;// [U2|U1|U0|UL|L0|L1|L2|L3]
+ ADD pDst2,pDst1,dstStep
+ VEXT dAbove1,dLeft,dAbove,#4 ;// [U3|U2|U1|U0|UL|L0|L1|L2]
+ ADD pDst3,pDst2,dstStep
+ VEXT dAbove2,dLeft,dAbove,#5 ;// [ X|U3|U2|U1|U0|UL|L0|L1]
+
+ VHADD dTmp, dAbove0, dAbove2
+ VRHADD dTmp, dTmp, dAbove1 ;// (a+2*b+c+2)>>2
+
+
+ VST1 dTmpU32[0],[pDst3] ;// Store pTmp[0],[1],[2],[3] @ pDst3
+ VEXT dTmp,dTmp,dTmp,#1
+ VST1 dTmpU32[0],[pDst2] ;// Store pTmp[1],[2],[3],[4] @ pDst2
+ VEXT dTmp,dTmp,dTmp,#1
+ VST1 dTmpU32[0],[pDst1] ;// Store pTmp[2],[3],[4],[5] @ pDst1
+ VEXT dTmp,dTmp,dTmp,#1
+ VST1 dTmpU32[0],[pDst] ;// Store pTmp[3],[4],[5],[6] @ pDst
+
+ B ExitPredict4x4 ;// Branch to exit code
+
+OMX_VC_4x4_VR
+
+
+ ;// Load UL,U0,U1,U2,U3
+ VLD1 dAboveU32[0],[pSrcAbove]
+ VLD1 dAbove[7],[pSrcAboveLeft] ;// [UL|X|X|X|U3|U2|U1|U0]
+
+ ;// Load L0,L1,L2 ;// dLeft0 = [L0|L2|X|X|X|X|X|X]
+ ;// dLeft1 = [L1| X|X|X|X|X|X|X]
+ VLD1 {dLeft0[7]},[pSrcLeft],leftStep ;// pSrcLeft[0*leftStep]
+ VLD1 {dLeft1[7]},[pSrcLeft],leftStep ;// pSrcLeft[1*leftStep]
+ VLD1 {dLeft0[6]},[pSrcLeft] ;// pSrcLeft[2*leftStep]
+
+
+ VEXT dOdd2,dAbove,dAbove,#7 ;// [ x x x U3 U2 U1 U0 UL ]
+ VEXT dEven0,dLeft0,dOdd2,#6 ;// [ x x x U1 U0 UL L0 L2 ]
+ VEXT dEven1,dLeft1,dOdd2,#7 ;// [ x x x U2 U1 U0 UL L1 ]
+ VEXT dEven2,dLeft0,dAbove,#7 ;// [ x x x U3 U2 U1 U0 L0 ]
+ VEXT dOdd0,dLeft1,dAbove,#7 ;// [ x x x U3 U2 U1 U0 L1 ]
+ VEXT dOdd1,dLeft0,dOdd2,#7 ;// [ x x x U2 U1 U0 UL L0 ]
+
+ VHADD dTmp1, dOdd0, dOdd2
+ VRHADD dTmp1, dTmp1, dOdd1 ;// Tmp[ x x x 9 7 5 3 1 ]
+
+ VHADD dTmp0, dEven0, dEven2
+ VRHADD dTmp0, dTmp0, dEven1 ;// Tmp[ x x x 8 6 4 2 0 ]
+
+
+ VEXT dTmp3,dTmp1,dTmp1,#1 ;// Tmp[ x x x x 9 7 5 3 ]
+ ADD pDstTmp, pDst, dstStep
+ ADD dstep, dstStep, dstStep
+ VEXT dTmp2,dTmp0,dTmp0,#1 ;// Tmp[ x x x x 8 6 4 2 ]
+
+
+ VST1 dTmp3U32[0],[pDst],dstep ;// Tmp[9],[7],[5],[3]
+ VST1 dTmp2U32[0],[pDstTmp],dstep ;// Tmp[8],[6],[4],[2]
+ VST1 dTmp1U32[0],[pDst],dstep ;// Tmp[7],[5],[3],[1]
+ VST1 dTmp0U32[0],[pDstTmp] ;// Tmp[6],[4],[2],[0]
+
+ B ExitPredict4x4 ;// Branch to exit code
+
+OMX_VC_4x4_HD
+
+
+ ;// Load U0,U1,U2,U3
+ VLD1 dAbove,[pSrcAbove] ;//dAboveLeftVal = [U7|U6|U5|U4|U3|U2|U1|U0]
+
+ ;// Load UL,L0,L1,L2,L3 ;// dLeft = [UL|L0|L1|L2|L3|X|X|X]
+ VLD1 {dLeft[7]},[pSrcAboveLeft]
+ ADD pSrcTmp, pSrcLeft, leftStep
+ ADD srcStep, leftStep, leftStep
+
+ VLD1 {dLeft[6]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep]
+ VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep]
+ VLD1 {dLeft[4]},[pSrcLeft] ;// pSrcLeft[2*leftStep]
+ VLD1 {dLeft[3]},[pSrcTmp] ;// pSrcLeft[3*leftStep]
+
+ VEXT dAbove0,dLeft,dAbove,#3 ;// [ U2|U1|U0|UL|L0|L1|L2|L3 ]
+ VEXT dAbove1,dLeft,dAbove,#2 ;// [ U1|U0|UL|L0|L1|L2|L3|X ]
+ VEXT dAbove2,dLeft,dAbove,#1 ;// [ U0|UL|L0|L1|L2|L3|X|X ]
+
+ VHADD dTmp0, dAbove0, dAbove2
+ VRHADD dTmp0, dTmp0, dAbove1 ;// Tmp[ 0 | 1 | 2 | 4 | 6 | 8 | X | X ]
+
+
+ VRHADD dTmp1, dAbove1, dAbove0 ;// (a+b+1)>>1
+ VSHL dTmp1U64,dTmp1U64,#24 ;// Tmp[ 3|5| 7 |9 | X | X | X | X ]
+
+
+ VSHL dTmpU64,dTmp0U64,#16 ;// Tmp[ 2|4|6|8| X | X | X | X ]
+ VZIP dTmp1,dTmp ;// dTmp = [ 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 ]
+ VEXT dTmp0,dTmp0,dTmp0,#6 ;// Tmp[ X| X| X| X| X| X| 0 | 1 ]
+ VEXT dTmp1,dTmp,dTmp0,#2 ;// Tmp[ 0 | 1 | 2 | 3 | 4 | 5 | 6 |7 ]
+
+ ADD pDstTmp, pDst, dstStep
+ ADD dstep, dstStep, dstStep
+
+ VST1 dTmp1U32[1],[pDst],dstep ;// Store pTmp[0|1|2|3]
+ VST1 dTmpU32[1],[pDstTmp],dstep ;// Store pTmp[2|3|4|5]
+ VST1 dTmp1U32[0],[pDst] ;// Store pTmp[4|5|6|7]
+ VST1 dTmpU32[0],[pDstTmp] ;// Store pTmp[6|7|8|9]
+
+ B ExitPredict4x4 ;// Branch to exit code
+
+OMX_VC_4x4_VL
+
+
+ TST availability, #OMX_VC_UPPER_RIGHT
+ BEQ DiagVLUpperRightNotAvailable
+
+ VLD1 dAbove0,[pSrcAbove] ;// [U7|U6|U5|U4|U3|U2|U1|U0]
+ VEXT dAbove1,dAbove0,dAbove0,#1 ;// [ X|U7|U6|U5|U4|U3|U2|U1]
+ VEXT dAbove2,dAbove1,dAbove1,#1 ;// [ X| X|U7|U6|U5|U4|U3|U2]
+
+ B DiagVLPredict4x4Store
+
+DiagVLUpperRightNotAvailable
+ VLD1 dAboveU32[1],[pSrcAbove] ;// [U3|U2|U1|U0|-|-|-|-]
+ VDUP dU3, dAbove[7] ;// [U3 U3 U3 U3 U3 U3 U3 U3]
+
+ VEXT dAbove0, dAbove, dU3, #4 ;// [U3 U3 U3 U3 U3 U2 U1 U0]
+ VEXT dAbove1, dAbove, dU3, #5 ;// [U3 U3 U3 U3 U3 U3 U2 U1]
+ VEXT dAbove2, dAbove, dU3, #6 ;// [U3 U3 U3 U3 U3 U3 U3 U2]
+
+DiagVLPredict4x4Store
+
+ VRHADD dTmp0, dAbove1, dAbove0 ;// (a+b+1)>>1
+ ;// Tmp[ X| X| X| 8| 6| 4| 2| 0 ]
+
+ VHADD dTmp3, dAbove0, dAbove2
+ VRHADD dTmp3, dTmp3, dAbove1 ;// (a+2*b+c+2)>>2
+ ;// Tmp[ X| X| X| 9| 7| 5| 3| 1 ]
+
+ VEXT dTmp1,dTmp0,dTmp0,#1 ;// Tmp[ X| X| X| X| 8| 6| 4| 2 ]
+ ADD pDstTmp, pDst, dstStep
+ ADD dstep, dstStep, dstStep
+ VEXT dTmp2,dTmp3,dTmp1,#1 ;// Tmp[ X| X| X| X| 9| 7| 5| 3 ]
+
+ VST1 dTmp0U32[0],[pDst],dstep ;// Tmp[6],[4],[2],[0]
+ VST1 dTmp3U32[0],[pDstTmp],dstep ;// Tmp[7],[5],[3],[1]
+ VST1 dTmp1U32[0],[pDst] ;// Tmp[8],[6],[4],[2]
+ VST1 dTmp2U32[0],[pDstTmp] ;// Tmp[9],[7],[5],[3]
+
+ B ExitPredict4x4 ;// Branch to exit code
+
+OMX_VC_4x4_HU
+ ADD pSrcTmp, pSrcLeft, leftStep
+ ADD srcStep, leftStep, leftStep
+
+ ;// Load Left Edge ;// [L3|L2|L1|L0|X|X|X|X]
+ VLD1 {dLeft[4]},[pSrcLeft],srcStep ;// pSrcLeft[0*leftStep]
+ VLD1 {dLeft[5]},[pSrcTmp],srcStep ;// pSrcLeft[1*leftStep]
+ VLD1 {dLeft[6]},[pSrcLeft] ;// pSrcLeft[2*leftStep]
+ VLD1 {dLeft[7]},[pSrcTmp] ;// pSrcLeft[3*leftStep]
+
+ VDUP dL3,dLeft[7] ;// [L3|L3|L3|L3|L3|L3|L3|L3]
+
+ VEXT dLeftHU0,dLeft,dL3,#4 ;// [L3|L3|L3|L3|L3|L2|L1|L0]
+ VEXT dLeftHU1,dLeft,dL3,#5 ;// [L3|L3|L3|L3|L3|L3|L2|L1]
+ VEXT dLeftHU2,dLeft,dL3,#6 ;// [L3|L3|L3|L3|L3|L3|L3|L2]
+
+ VHADD dTmp0, dLeftHU0, dLeftHU2
+ VRHADD dTmp0, dTmp0, dLeftHU1 ;// Tmp[ L3 | L3 | L3 | L3 | L3 | 5 | 3 | 1 ]
+
+ VRHADD dTmp1, dLeftHU1, dLeftHU0 ;// (a+b+1)>>1
+ ;// Tmp[ L3 | L3 | L3 | L3 | L3 | 4 | 2 | 0 ]
+
+ VZIP dTmp1,dTmp0 ;// dTmp1 = Tmp[7| 6| 5| 4| 3| 2| 1| 0]
+ ;// dTmp0 = [L3|L3|L3|L3|L3|L3|L3|L3]
+
+
+ VST1 dTmp1U32[0],[pDst],dstStep ;// [3|2|1|0]
+ VEXT dTmp1,dTmp1,dTmp1,#2
+ VST1 dTmp1U32[0],[pDst],dstStep ;// [5|4|3|2]
+ VEXT dTmp1,dTmp1,dTmp1,#2
+ VST1 dTmp1U32[0],[pDst],dstStep ;// [7|6|5|4]
+ VST1 dTmp0U32[0],[pDst] ;// [9|8|7|6]
+
+
+ExitPredict4x4
+
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// CortexA8
+
+ END
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_4x4 ends
+;//-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
new file mode 100755
index 0000000..e394339
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
@@ -0,0 +1,140 @@
+;//
+;//
+;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ IMPORT armVCM4P10_QPDivTable
+ IMPORT armVCM4P10_VMatrixQPModTable
+
+ M_VARIANTS CortexA8
+
+
+
+
+ IF CortexA8
+
+;// ARM Registers
+;//--------------------------------------
+;// Declare input registers
+;//--------------------------------------
+ppSrc RN 0
+pDst RN 1
+QP RN 2
+
+;//--------------------------------
+;// Scratch variable for Unpack2x2
+;//--------------------------------
+pSrc RN 9
+Value RN 4
+Value2 RN 5
+Flag RN 6
+strOffset RN 7
+cstOffset RN 8
+
+;//--------------------------------
+;// Scratch variable
+;//--------------------------------
+r0w0 RN 3
+r0w1 RN 4
+
+c0w0 RN 5
+c1w0 RN 6
+
+return RN 0
+pQPDivTable RN 5
+pQPModTable RN 6
+Shift RN 9
+Scale RN 2
+
+
+
+;// Neon Registers
+
+dZero DN D0.U16
+dInvTrCoeff DN D0.S16
+dScale DN D1.S16
+qDqntCoeff QN Q1.S32
+dDqntCoeff DN D2.S16
+
+
+ ;// Write function header
+ M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9
+
+ LDR pSrc, [ppSrc] ;// Load pSrc
+ VMOV dZero, #0
+ MOV cstOffset, #31 ;// To be used in the loop, to compute offset
+
+ ;//-----------------------------------------------------------------------
+ ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero
+ ;//-----------------------------------------------------------------------
+
+ VST1 dZero,[pDst] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0
+ LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop>
+
+
+unpackLoop
+ TST Flag, #0x10 ;// Computing (Flag & 0x10)
+ LDRSBNE Value2,[pSrc,#1]
+ LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access
+ AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1;
+ LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++
+ ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++
+
+ TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done
+ LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration
+ STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset>
+ BEQ unpackLoop ;// Branch to the loop beginning
+
+ ;//--------------------------------------------------
+ ;//InvTransformDC2x2: Inlined (Implemented in ARM V6)
+ ;//--------------------------------------------------
+
+ LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2|
+
+ STR pSrc, [ppSrc] ;// Update the bitstream pointer
+
+ LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
+ LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
+
+ SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ]
+ SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ]
+
+ LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP]
+ LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP]
+
+ SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ]
+ SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ]
+
+ ;//-------------------------------------------------
+ ;//DequantChromaDC2x2: Inlined (Neon Implementation)
+ ;//-------------------------------------------------
+
+ LSL Scale, Scale, Shift ;// Scale = Scale << Shift
+ VMOV dInvTrCoeff, c0w0, c1w0
+ VREV32 dInvTrCoeff,dInvTrCoeff
+ VDUP dScale,Scale
+
+ VMULL qDqntCoeff,dInvTrCoeff,dScale
+ VSHRN dDqntCoeff,qDqntCoeff,#1
+
+
+ VST1 dDqntCoeff,[pDst] ;// Storing all the coefficients at once
+
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// CortexA8
+
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
new file mode 100755
index 0000000..2529959
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
@@ -0,0 +1,264 @@
+;//
+;//
+;// File Name: omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;// H.264 inverse quantize and transform module
+;//
+;//
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Import/Export symbols required from/to other files
+;// (For example tables)
+
+ IMPORT armVCM4P10_UnpackBlock4x4
+ IMPORT armVCM4P10_QPDivTable
+ IMPORT armVCM4P10_VMatrixQPModTable
+
+ M_VARIANTS CortexA8
+
+;// Set debugging level
+;//DEBUG_ON SETL {TRUE}
+
+
+;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4
+
+
+;// Guarding implementation by the processor name
+
+
+
+;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4
+
+;// Guarding implementation by the processor name
+
+ IF CortexA8
+
+;//Input Registers
+pData RN 0
+QP RN 1
+
+
+;//Local Scratch Registers
+
+;// ARM Registers
+
+pQPDivTable RN 2
+pQPModTable RN 3
+Shift RN 4
+Scale RN 5
+
+;// NEON Registers
+
+;// Packed Input pixels
+dIn0 DN D0.S16
+dIn1 DN D1.S16
+dIn2 DN D2.S16
+dIn3 DN D3.S16
+
+;// Intermediate calculations
+dRowSum1 DN D4.S16
+dRowSum2 DN D5.S16
+dRowDiff1 DN D6.S16
+dRowDiff2 DN D7.S16
+
+;// Row operated pixels
+dRowOp0 DN D0.S16
+dRowOp1 DN D1.S16
+dRowOp2 DN D2.S16
+dRowOp3 DN D3.S16
+qRowOp01 QN Q0.32
+qRowOp23 QN Q1.32
+
+;// Intermediate calculations
+dColSum1 DN D4.S16
+dColSum2 DN D5.S16
+dColDiff1 DN D6.S16
+dColDiff2 DN D7.S16
+
+;// Coloumn operated pixels
+dColOp0 DN D0.S16
+dColOp1 DN D1.S16
+dColOp2 DN D2.S16
+dColOp3 DN D3.S16
+
+;// Temporary scratch varaibles
+
+dScale DN D5.S16
+qRound0 QN Q3.S32
+qRound1 QN Q4.S32
+qRound2 QN Q5.S32
+qRound3 QN Q6.S32
+
+;// InvTransformed and Dequantized pixels
+dOut0 DN D0.S16
+dOut1 DN D1.S16
+dOut2 DN D2.S16
+dOut3 DN D3.S16
+
+
+ ;// Allocate stack memory required by the function
+
+
+ ;// Write function header
+ M_START armVCM4P10_InvTransformDequantLumaDC4x4,r5,d13
+
+ ;******************************************************************
+ ;// The strategy used in implementing the transform is as follows:*
+ ;// Load the 4x4 block into 4 D-registers *
+ ;// Transpose the 4x4 matrix *
+ ;// Perform the row operations (on columns) using SIMD *
+ ;// Transpose the 4x4 result matrix *
+ ;// Perform the coloumn operations *
+ ;******************************************************************
+
+ ;// Load all the 4x4 pixels in Transposed form
+
+ VLD4 {dIn0,dIn1,dIn2,dIn3},[pData]
+ LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
+ LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
+
+ ;****************************************
+ ;// Row Operations (Performed on columns)
+ ;****************************************
+ ;// Scale factor calculation is done using ARM instructions
+ ;// Interleaved with NEON instructions inorder to Dual issue
+
+ VADD dRowSum1,dIn0,dIn1
+ VADD dRowSum2,dIn2,dIn3
+ VSUB dRowDiff1,dIn0,dIn1
+ LDRSB Shift, [pQPDivTable, QP] ;// ARM CODE: Shift = pQPDivTable[QP]
+ VSUB dRowDiff2,dIn2,dIn3
+ LDRSB Scale, [pQPModTable, QP] ;// ARM CODE: Scale = pQPModTable[QP]
+ VADD dRowOp0,dRowSum1,dRowSum2
+ VSUB dRowOp1,dRowSum1,dRowSum2
+ VSUB dRowOp2,dRowDiff1,dRowDiff2
+ LSL Scale, Scale, Shift ;// ARM CODE: Scale = Scale << Shift
+ VADD dRowOp3,dRowDiff1,dRowDiff2
+
+ ;****************************************
+ ;// Transpose the resultant matrix
+ ;****************************************
+
+ VTRN dRowOp0,dRowOp1
+ VTRN dRowOp2,dRowOp3
+ VTRN qRowOp01,qRowOp23
+
+ ;****************************************
+ ;// Coloumn Operations
+ ;****************************************
+
+ VADD dColSum1,dRowOp0,dRowOp1
+ VADD dColSum2,dRowOp2,dRowOp3
+ VSUB dColDiff1,dRowOp0,dRowOp1
+ VSUB dColDiff2,dRowOp2,dRowOp3
+ VADD dColOp0,dColSum1,dColSum2
+ VSUB dColOp1,dColSum1,dColSum2
+ VSUB dColOp2,dColDiff1,dColDiff2
+ VADD dColOp3,dColDiff1,dColDiff2
+
+ ;//----------------------------------------------------------------------
+ ;//
+ ;// <Dequantize> improves on the c-reference code
+ ;// Both the cases i.e., Shift>=0 and Shift<0 cases are covered together
+ ;// We do not subtract 2 from Shift as in C reference, instead perform a
+ ;// Scale << Shift once in the beginning and do a right shift by a
+ ;// constant 2 after the Multiplication. The value of Round would be 2
+ ;//
+ ;// By doing this we aviod the Branches required and also
+ ;// reduce the code size substantially
+ ;//
+ ;//----------------------------------------------------------------------
+
+
+ VDUP dScale, Scale ;// ARM -> NEON copy 'scale' to vector
+
+
+ VMOV qRound0,#2 ;// Set the Round Value
+ VMOV qRound1,#2
+ VMOV qRound2,#2
+ VMOV qRound3,#2
+
+ VMLAL qRound0,dColOp0,dScale ;// pDst[i] * Scale + Round
+ VMLAL qRound1,dColOp1,dScale
+ VMLAL qRound2,dColOp2,dScale
+ VMLAL qRound3,dColOp3,dScale
+
+ VSHRN dOut0,qRound0,#2 ;// Right shift by 2 & (OMX_S16)Value
+ VSHRN dOut1,qRound1,#2
+ VSHRN dOut2,qRound2,#2
+ VSHRN dOut3,qRound3,#2
+
+ ;***************************
+ ;// Store all the 4x4 pixels
+ ;***************************
+
+ VST1 {dOut0,dOut1,dOut2,dOut3}, [pData]
+
+
+ ;// Set return value
+
+ ;// Write function tail
+ M_END
+
+ ENDIF ;//CORTEXA8
+
+
+
+;// Function: omxVCM4P10_TransformDequantLumaDCFromPair
+
+;//Input Registers
+ppSrc RN 0
+pDst RN 1
+QPR2 RN 2
+
+;//Output Registers
+result RN 0
+
+;//Local Scratch Registers
+pDstR4 RN 4
+pDstR0 RN 0
+QPR1 RN 1
+QPR5 RN 5
+
+;// Guarding implementation by the processor name
+
+ IF CortexA8
+
+ ;// Allocate stack memory required by the function
+
+
+ ;// Write function header
+ M_START omxVCM4P10_TransformDequantLumaDCFromPair,r5
+
+ MOV pDstR4,pDst ;// Saving register r1
+ MOV QPR5,QPR2 ;// Saving register r2
+ BL armVCM4P10_UnpackBlock4x4
+
+ MOV pDstR0,pDstR4 ;// Setting up register r0
+ MOV QPR1,QPR5 ;// Setting up register r1
+ BL armVCM4P10_InvTransformDequantLumaDC4x4
+
+
+ ;// Set return value
+ MOV result,#OMX_Sts_NoErr
+
+ ;// Write function tail
+ M_END
+
+
+ ENDIF ;//ARM1136JS
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S
new file mode 100644
index 0000000..aca2df4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Average_4x_Align_unsafe_s.S
@@ -0,0 +1,134 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_Average_4x4_Align0_unsafe
+ .func armVCM4P10_Average_4x4_Align0_unsafe
+armVCM4P10_Average_4x4_Align0_unsafe:
+ PUSH {r4-r6,lr}
+ LDR r7, =0x80808080
+ LDR r12,[r2,#0]
+ LDR r10,[r0],r1
+ LDR lr,[r2,r3]
+ LDR r11,[r0],r1
+ MVN r12,r12
+ MVN lr,lr
+ UHSUB8 r5,r10,r12
+ UHSUB8 r4,r11,lr
+ EOR r5,r5,r7
+ STR r5,[r2],r3
+ EOR r4,r4,r7
+ STR r4,[r2],r3
+ LDR r10,[r0],r1
+ LDR r12,[r2,#0]
+ LDR r11,[r0],r1
+ LDR lr,[r2,r3]
+ MVN r12,r12
+ UHSUB8 r5,r10,r12
+ MVN lr,lr
+ UHSUB8 r4,r11,lr
+ EOR r5,r5,r7
+ STR r5,[r2],r3
+ EOR r4,r4,r7
+ STR r4,[r2],r3
+ POP {r4-r6,pc}
+ .endfunc
+
+ .global armVCM4P10_Average_4x4_Align2_unsafe
+ .func armVCM4P10_Average_4x4_Align2_unsafe
+armVCM4P10_Average_4x4_Align2_unsafe:
+ PUSH {r4-r6,lr}
+ LDR r7, =0x80808080
+ LDR r4,[r0,#4]
+ LDR r10,[r0],r1
+ LDR r12,[r2,#0]
+ LDR lr,[r2,r3]
+ LDR r5,[r0,#4]
+ LDR r11,[r0],r1
+ MVN r12,r12
+ MVN lr,lr
+ LSR r10,r10,#16
+ ORR r10,r10,r4,LSL #16
+ LSR r11,r11,#16
+ ORR r11,r11,r5,LSL #16
+ UHSUB8 r5,r10,r12
+ UHSUB8 r4,r11,lr
+ EOR r5,r5,r7
+ STR r5,[r2],r3
+ EOR r4,r4,r7
+ STR r4,[r2],r3
+ LDR r4,[r0,#4]
+ LDR r10,[r0],r1
+ LDR r12,[r2,#0]
+ LDR lr,[r2,r3]
+ LDR r5,[r0,#4]
+ LDR r11,[r0],r1
+ MVN r12,r12
+ MVN lr,lr
+ LSR r10,r10,#16
+ ORR r10,r10,r4,LSL #16
+ LSR r11,r11,#16
+ ORR r11,r11,r5,LSL #16
+ UHSUB8 r5,r10,r12
+ UHSUB8 r4,r11,lr
+ EOR r5,r5,r7
+ STR r5,[r2],r3
+ EOR r4,r4,r7
+ STR r4,[r2],r3
+ POP {r4-r6,pc}
+ .endfunc
+
+ .global armVCM4P10_Average_4x4_Align3_unsafe
+ .func armVCM4P10_Average_4x4_Align3_unsafe
+armVCM4P10_Average_4x4_Align3_unsafe:
+ PUSH {r4-r6,lr}
+ LDR r7, =0x80808080
+ LDR r4,[r0,#4]
+ LDR r10,[r0],r1
+ LDR r12,[r2,#0]
+ LDR lr,[r2,r3]
+ LDR r5,[r0,#4]
+ LDR r11,[r0],r1
+ MVN r12,r12
+ MVN lr,lr
+ LSR r10,r10,#24
+ ORR r10,r10,r4,LSL #8
+ LSR r11,r11,#24
+ ORR r11,r11,r5,LSL #8
+ UHSUB8 r5,r10,r12
+ UHSUB8 r4,r11,lr
+ EOR r5,r5,r7
+ STR r5,[r2],r3
+ EOR r4,r4,r7
+ STR r4,[r2],r3
+ LDR r4,[r0,#4]
+ LDR r10,[r0],r1
+ LDR r12,[r2,#0]
+ LDR lr,[r2,r3]
+ LDR r5,[r0,#4]
+ LDR r11,[r0],r1
+ MVN r12,r12
+ MVN lr,lr
+ LSR r10,r10,#24
+ ORR r10,r10,r4,LSL #8
+ LSR r11,r11,#24
+ ORR r11,r11,r5,LSL #8
+ UHSUB8 r5,r10,r12
+ UHSUB8 r4,r11,lr
+ EOR r5,r5,r7
+ STR r5,[r2],r3
+ EOR r4,r4,r7
+ STR r4,[r2],r3
+ POP {r4-r6,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S
new file mode 100644
index 0000000..b9ee221
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingChroma_unsafe_s.S
@@ -0,0 +1,54 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_DeblockingChromabSLT4_unsafe
+ .func armVCM4P10_DeblockingChromabSLT4_unsafe
+armVCM4P10_DeblockingChromabSLT4_unsafe:
+ VLD1.32 {d18[0]},[r5]!
+ VSUBL.U8 q11,d5,d9
+ VMOV d28,d18
+ VSUBL.U8 q10,d8,d4
+ VSHR.S16 q11,q11,#2
+ VZIP.8 d18,d28
+ VBIF d18,d14,d16
+ VRHADD.S16 q10,q11,q10
+ VADD.I8 d31,d18,d15
+ VQMOVN.S16 d20,q10
+ VLD1.8 {d0[]},[r2]
+ VMIN.S8 d20,d20,d31
+ VNEG.S8 d31,d31
+ VLD1.8 {d2[]},[r3]
+ VMAX.S8 d20,d20,d31
+ VMOVL.U8 q14,d4
+ VMOVL.U8 q12,d8
+ VADDW.S8 q14,q14,d20
+ VSUBW.S8 q12,q12,d20
+ VQMOVUN.S16 d29,q14
+ VQMOVUN.S16 d24,q12
+ BX lr
+ .endfunc
+
+ .global armVCM4P10_DeblockingChromabSGE4_unsafe
+ .func armVCM4P10_DeblockingChromabSGE4_unsafe
+armVCM4P10_DeblockingChromabSGE4_unsafe:
+ VHADD.U8 d13,d4,d9
+ VHADD.U8 d31,d8,d5
+ VLD1.8 {d0[]},[r2]
+ ADD r5,r5,#4
+ VLD1.8 {d2[]},[r3]
+ VRHADD.U8 d13,d13,d5
+ VRHADD.U8 d31,d31,d9
+ BX lr
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S
new file mode 100644
index 0000000..47f3d44
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DeblockingLuma_unsafe_s.S
@@ -0,0 +1,102 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_DeblockingLumabSLT4_unsafe
+ .func armVCM4P10_DeblockingLumabSLT4_unsafe
+armVCM4P10_DeblockingLumabSLT4_unsafe:
+ VSUBL.U8 q11,d5,d9
+ VLD1.8 {d18[]},[r5]!
+ VSUBL.U8 q10,d8,d4
+ VLD1.8 {d19[]},[r5]!
+ VSHR.S16 q11,q11,#2
+ VEXT.8 d18,d18,d19,#4
+ VAND d19,d17,d15
+ VBIF d18,d14,d16
+ VRHADD.S16 q10,q11,q10
+ VRHADD.U8 d24,d4,d8
+ VADD.I8 d31,d18,d19
+ VAND d19,d12,d15
+ VQADD.U8 d23,d5,d18
+ VQMOVN.S16 d20,q10
+ VADD.I8 d31,d31,d19
+ VQSUB.U8 d22,d5,d18
+ VQADD.U8 d19,d9,d18
+ VHADD.U8 d26,d24,d6
+ VMIN.S8 d20,d20,d31
+ VNEG.S8 d31,d31
+ VQSUB.U8 d21,d9,d18
+ VHADD.U8 d27,d24,d10
+ VMAX.U8 d30,d26,d22
+ VMAX.S8 d20,d20,d31
+ VMOVL.U8 q14,d4
+ VMOVL.U8 q12,d8
+ VADDW.S8 q14,q14,d20
+ VSUBW.S8 q12,q12,d20
+ VQMOVUN.S16 d29,q14
+ VQMOVUN.S16 d24,q12
+ VMAX.U8 d25,d27,d21
+ VMIN.U8 d30,d30,d23
+ VMIN.U8 d25,d25,d19
+ VBIF d29,d4,d16
+ VBIF d30,d5,d17
+ VBIF d24,d8,d16
+ VBIF d25,d9,d12
+ BX lr
+ .endfunc
+
+ .global armVCM4P10_DeblockingLumabSGE4_unsafe
+ .func armVCM4P10_DeblockingLumabSGE4_unsafe
+armVCM4P10_DeblockingLumabSGE4_unsafe:
+ VSHR.U8 d19,d0,#2
+ VADD.I8 d19,d19,d15
+ VADDL.U8 q10,d8,d4
+ VADD.I8 d19,d19,d15
+ VADDL.U8 q11,d6,d9
+ VADDW.U8 q12,q10,d5
+ VCGT.U8 d19,d19,d13
+ VSHR.U16 q11,q11,#1
+ VHADD.U16 q11,q12,q11
+ VADDW.U8 q12,q12,d6
+ VADDL.U8 q13,d7,d6
+ VAND d17,d17,d19
+ VHADD.U8 d28,d4,d9
+ VSRA.U16 q13,q12,#1
+ VAND d12,d12,d19
+ VQRSHRN.U16 d29,q11,#1
+ VRHADD.U8 d28,d28,d5
+ VQRSHRN.U16 d30,q12,#2
+ VADDL.U8 q11,d10,d5
+ VADDW.U8 q12,q10,d9
+ VBIF d29,d28,d17
+ VQRSHRN.U16 d31,q13,#2
+ VADDL.U8 q13,d11,d10
+ VSHR.U16 q11,q11,#1
+ VHADD.U16 q11,q12,q11
+ VADDW.U8 q12,q12,d10
+ VHADD.U8 d28,d8,d5
+ VBIF d29,d4,d16
+ VBIF d30,d5,d17
+ VSRA.U16 q13,q12,#1
+ VQRSHRN.U16 d25,q12,#2
+ VQRSHRN.U16 d24,q11,#1
+ VRHADD.U8 d22,d28,d9
+ VBIF d25,d9,d12
+ VBIF d31,d6,d17
+ VBIF d24,d22,d12
+ VQRSHRN.U16 d28,q13,#2
+ VBIF d24,d8,d16
+ VBIF d28,d10,d12
+ BX lr
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S
new file mode 100644
index 0000000..e68bd8e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DecodeCoeffsToPair_s.S
@@ -0,0 +1,272 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_DecodeCoeffsToPair
+ .func armVCM4P10_DecodeCoeffsToPair
+armVCM4P10_DecodeCoeffsToPair:
+ PUSH {r4-r12,lr}
+ SUB sp,sp,#0x40
+ LDR r10,[r0,#0]
+ LDR r12,[r1,#0]
+ LDR r6, =armVCM4P10_CAVLCCoeffTokenTables
+ LDR r4,[sp,#0x68]
+ LDRB r9,[r10,#2]
+ LDRB r8,[r10,#1]
+ LDRB r11,[r10],#3
+ ADD r12,r12,#8
+ LDR r6,[r6,r4,LSL #2]
+ ORR r9,r9,r8,LSL #8
+ ORR r11,r9,r11,LSL #16
+ LSLS r8,r11,r12
+ MOVS r7,#0x1e
+ AND r7,r7,r8,LSR #27
+ SUBS r12,r12,#8
+L0x44:
+ BCC L1
+ LDRB r8,[r10],#1
+L1:
+ LDRH r7,[r6,r7]
+ ADDCC r12,r12,#8
+ ADD r12,r12,#4
+ ORRCS r11,r8,r11,LSL #8
+ LSRS r8,r7,#1
+ BCS L0x74
+ LSLS r8,r11,r12
+ SUBS r12,r12,#0xa
+ ADD r7,r7,r8,LSR #29
+ BIC r7,r7,#1
+ B L0x44
+L0x74:
+ SUB r12,r12,r7,LSR #13
+ BIC r7,r8,#0xf000
+ LSRS r5,r7,#2
+ STRB r5,[r2,#0]
+ BEQ L0x344
+ CMP r7,#0x44
+ BGE L0x33c
+ STR r0,[sp,#0]
+ STR r1,[sp,#4]
+ STR r3,[sp,#8]
+ ANDS r1,r7,#3
+ ADD r2,sp,#0xc
+ BEQ L0xd8
+ MOV r0,r1
+L0xac:
+ LSLS r7,r11,r12
+ SUBS r12,r12,#7
+ BCC L2
+ LDRB r8,[r10],#1
+L2:
+ ADDCC r12,r12,#8
+ LSR r7,r7,#31
+ ORRCS r11,r8,r11,LSL #8
+ SUBS r0,r0,#1
+ MOV r8,#1
+ SUB r8,r8,r7,LSL #1
+ STRH r8,[r2],#2
+ BGT L0xac
+L0xd8:
+ SUBS r0,r5,r1
+ BEQ L0x1b8
+ MOV r4,#1
+ CMP r5,#0xa
+ MOVLE r4,#0
+ CMP r1,#3
+ MOVLT r1,#4
+ MOVGE r1,#2
+ MOVGE r4,#0
+L0xfc:
+ LSLS r7,r11,r12
+ CLZ r7,r7
+ ADD r12,r12,r7
+ SUBS r12,r12,#7
+ BCC L3
+ LDRB r8,[r10],#1
+ ORR r11,r8,r11,LSL #8
+ SUBS r12,r12,#8
+ BCC L3
+ LDRB r8,[r10],#1
+L3:
+ ADDCC r12,r12,#8
+ ORRCS r11,r8,r11,LSL #8
+ CMP r7,#0x10
+ BGE L0x33c
+ MOVS lr,r4
+ TEQEQ r7,#0xe
+ MOVEQ lr,#4
+ TEQ r7,#0xf
+ MOVEQ lr,#0xc
+ TEQEQ r4,#0
+ ADDEQ r7,r7,#0xf
+ TEQ lr,#0
+ BEQ L0x184
+ LSL r3,r11,r12
+ ADD r12,r12,lr
+ SUBS r12,r12,#8
+ RSB r9,lr,#0x20
+ BCC L4
+ LDRB r8,[r10],#1
+ ORR r11,r8,r11,LSL #8
+ SUBS r12,r12,#8
+ BCC L4
+ LDRB r8,[r10],#1
+L4:
+ ADDCC r12,r12,#8
+ LSR r3,r3,r9
+ ORRCS r11,r8,r11,LSL #8
+ LSL r7,r7,r4
+ ADD r7,r3,r7
+L0x184:
+ ADD r7,r7,r1
+ MOV r1,#2
+ LSRS r8,r7,#1
+ RSBCS r8,r8,#0
+ STRH r8,[r2],#2
+ LDR r9, =armVCM4P10_SuffixToLevel
+ LDRSB r8,[r9,r4]
+ TEQ r4,#0
+ MOVEQ r4,#1
+ CMP r7,r8
+ ADDCS r4,r4,#1
+ SUBS r0,r0,#1
+ BGT L0xfc
+L0x1b8:
+ LDR r8,[sp,#0x6c]
+ SUB r0,r5,#1
+ SUBS r1,r8,r5
+ ADD r4,sp,#0x2c
+ MOV lr,r5
+ SUB lr,lr,#1
+ BEQ L0x2b0
+ TEQ r8,#4
+ LDREQ r6, =(armVCM4P10_CAVLCTotalZeros2x2Tables - 4)
+ LDRNE r6, =(armVCM4P10_CAVLCTotalZeroTables - 4)
+ LDR r6,[r6,r5,LSL #2]
+ LSLS r8,r11,r12
+ MOVS r7,#0x1e
+ AND r7,r7,r8,LSR #27
+ SUBS r12,r12,#8
+L0x1f4:
+ BCC L5
+ LDRB r8,[r10],#1
+L5:
+ LDRH r7,[r6,r7]
+ ADDCC r12,r12,#8
+ ADD r12,r12,#4
+ ORRCS r11,r8,r11,LSL #8
+ LSRS r8,r7,#1
+ BCS L0x224
+ LSLS r8,r11,r12
+ SUBS r12,r12,#0xa
+ ADD r7,r7,r8,LSR #29
+ BIC r7,r7,#1
+ B L0x1f4
+L0x224:
+ SUB r12,r12,r7,LSR #13
+ BIC r7,r8,#0xf000
+ CMP r7,#0x10
+ BGE L0x33c
+ LDR r3, =(armVCM4P10_CAVLCRunBeforeTables - 4)
+ ADD r4,sp,#0x2c
+ MOVS r1,r7
+ ADD lr,lr,r1
+ BEQ L0x2b0
+L0x248:
+ SUBS r0,r0,#1
+ LDR r6,[r3,r1,LSL #2]
+ BLT L0x2bc
+ LSLS r8,r11,r12
+ MOVS r7,#0xe
+ AND r7,r7,r8,LSR #28
+ SUBS r12,r12,#8
+L0x264:
+ BCC L6
+ LDRB r8,[r10],#1
+L6:
+ LDRH r7,[r6,r7]
+ ADDCC r12,r12,#8
+ ADD r12,r12,#3
+ ORRCS r11,r8,r11,LSL #8
+ LSRS r8,r7,#1
+ BCS L0x294
+ LSLS r8,r11,r12
+ SUBS r12,r12,#9
+ ADD r7,r7,r8,LSR #29
+ BIC r7,r7,#1
+ B L0x264
+L0x294:
+ SUB r12,r12,r7,LSR #13
+ BIC r7,r8,#0xf000
+ CMP r7,#0xf
+ BGE L0x33c
+ SUBS r1,r1,r7
+ STRB r7,[r4],#1
+ BGT L0x248
+L0x2b0:
+ SUBS r0,r0,#1
+ BLT L7
+ STRB r1,[r4],#1
+L7:
+ BGT L0x2b0
+L0x2bc:
+ STRB r1,[r4],#1
+ LDR r8,[sp,#0x6c]
+ TEQ r8,#0xf
+ ADDEQ lr,lr,#1
+ SUB r4,r4,r5
+ SUB r2,r2,r5
+ SUB r2,r2,r5
+ LDR r3,[sp,#8]
+ LDR r0,[r3,#0]
+ TEQ r8,#4
+ LDREQ r6, =armVCM4P10_ZigZag_2x2
+ LDRNE r6, =armVCM4P10_ZigZag_4x4
+L0x2ec:
+ LDRB r9,[r4],#1
+ LDRB r8,[r6,lr]
+ SUB lr,lr,#1
+ SUB lr,lr,r9
+ LDRSH r9,[r2],#2
+ SUBS r5,r5,#1
+ ORREQ r8,r8,#0x20
+ ADD r1,r9,#0x80
+ CMP r1,#0x100
+ ORRCS r8,r8,#0x10
+ TEQ r5,#0
+ STRB r8,[r0],#1
+ STRB r9,[r0],#1
+ LSR r9,r9,#8
+ BCC L8
+ STRB r9,[r0],#1
+L8:
+ BNE L0x2ec
+ STR r0,[r3,#0]
+ LDR r0,[sp,#0]
+ LDR r1,[sp,#4]
+ B L0x344
+L0x33c:
+ MVN r0,#1
+ B L0x35c
+L0x344:
+ ADD r10,r10,r12,LSR #3
+ AND r12,r12,#7
+ SUB r10,r10,#4
+ STR r12,[r1,#0]
+ STR r10,[r0,#0]
+ MOV r0,#0
+L0x35c:
+ ADD sp,sp,#0x40
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S
new file mode 100644
index 0000000..44eb428
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_DequantTables_s.S
@@ -0,0 +1,103 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .section .rodata
+ .align 4
+
+
+ .global armVCM4P10_QPDivTable
+ .global armVCM4P10_VMatrixQPModTable
+ .global armVCM4P10_PosToVCol4x4
+ .global armVCM4P10_PosToVCol2x2
+ .global armVCM4P10_VMatrix
+ .global armVCM4P10_QPModuloTable
+ .global armVCM4P10_VMatrixU16
+
+armVCM4P10_PosToVCol4x4:
+ .byte 0, 2, 0, 2
+ .byte 2, 1, 2, 1
+ .byte 0, 2, 0, 2
+ .byte 2, 1, 2, 1
+
+armVCM4P10_PosToVCol2x2:
+ .byte 0, 2
+ .byte 2, 1
+
+armVCM4P10_VMatrix:
+ .byte 10, 16, 13
+ .byte 11, 18, 14
+ .byte 13, 20, 16
+ .byte 14, 23, 18
+ .byte 16, 25, 20
+ .byte 18, 29, 23
+
+;//-------------------------------------------------------
+;// This table evaluates the expression [(INT)(QP/6)],
+;// for values of QP from 0 to 51 (inclusive).
+;//-------------------------------------------------------
+
+armVCM4P10_QPDivTable:
+ .byte 0, 0, 0, 0, 0, 0
+ .byte 1, 1, 1, 1, 1, 1
+ .byte 2, 2, 2, 2, 2, 2
+ .byte 3, 3, 3, 3, 3, 3
+ .byte 4, 4, 4, 4, 4, 4
+ .byte 5, 5, 5, 5, 5, 5
+ .byte 6, 6, 6, 6, 6, 6
+ .byte 7, 7, 7, 7, 7, 7
+ .byte 8, 8, 8, 8, 8, 8
+
+;//----------------------------------------------------
+;// This table contains armVCM4P10_VMatrix[QP%6][0] entires,
+;// for values of QP from 0 to 51 (inclusive).
+;//----------------------------------------------------
+
+armVCM4P10_VMatrixQPModTable:
+ .byte 10, 11, 13, 14, 16, 18
+ .byte 10, 11, 13, 14, 16, 18
+ .byte 10, 11, 13, 14, 16, 18
+ .byte 10, 11, 13, 14, 16, 18
+ .byte 10, 11, 13, 14, 16, 18
+ .byte 10, 11, 13, 14, 16, 18
+ .byte 10, 11, 13, 14, 16, 18
+ .byte 10, 11, 13, 14, 16, 18
+ .byte 10, 11, 13, 14, 16, 18
+
+;//-------------------------------------------------------
+;// This table evaluates the modulus expression [QP%6]*6,
+;// for values of QP from 0 to 51 (inclusive).
+;//-------------------------------------------------------
+
+armVCM4P10_QPModuloTable:
+ .byte 0, 6, 12, 18, 24, 30
+ .byte 0, 6, 12, 18, 24, 30
+ .byte 0, 6, 12, 18, 24, 30
+ .byte 0, 6, 12, 18, 24, 30
+ .byte 0, 6, 12, 18, 24, 30
+ .byte 0, 6, 12, 18, 24, 30
+ .byte 0, 6, 12, 18, 24, 30
+ .byte 0, 6, 12, 18, 24, 30
+ .byte 0, 6, 12, 18, 24, 30
+
+;//-------------------------------------------------------
+;// This table contains the invidual byte values stored as
+;// halfwords. This avoids unpacking inside the function
+;//-------------------------------------------------------
+
+armVCM4P10_VMatrixU16:
+ .hword 10, 16, 13
+ .hword 11, 18, 14
+ .hword 13, 20, 16
+ .hword 14, 23, 18
+ .hword 16, 25, 20
+ .hword 18, 29, 23
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S
new file mode 100644
index 0000000..37bc69b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Align_unsafe_s.S
@@ -0,0 +1,123 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ .func armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+armVCM4P10_InterpolateLuma_HorAlign9x_unsafe:
+ MOV r12,r8
+ AND r7,r0,#3
+ BIC r0,r0,#3
+ ADD pc,pc,r7,LSL #2
+ NOP
+ B Copy0toAligned
+ B Copy1toAligned
+ B Copy2toAligned
+ B Copy3toAligned
+Copy0toAligned:
+ LDM r0,{r7,r10,r11}
+ SUBS r9,r9,#1
+ ADD r0,r0,r1
+ STM r8!,{r7,r10,r11}
+ BGT Copy0toAligned
+ B CopyEnd
+Copy1toAligned:
+ LDM r0,{r7,r10,r11}
+ SUBS r9,r9,#1
+ ADD r0,r0,r1
+ LSR r7,r7,#8
+ ORR r7,r7,r10,LSL #24
+ LSR r10,r10,#8
+ ORR r10,r10,r11,LSL #24
+ LSR r11,r11,#8
+ STM r8!,{r7,r10,r11}
+ BGT Copy1toAligned
+ B CopyEnd
+Copy2toAligned:
+ LDM r0,{r7,r10,r11}
+ SUBS r9,r9,#1
+ ADD r0,r0,r1
+ LSR r7,r7,#16
+ ORR r7,r7,r10,LSL #16
+ LSR r10,r10,#16
+ ORR r10,r10,r11,LSL #16
+ LSR r11,r11,#16
+ STM r8!,{r7,r10,r11}
+ BGT Copy2toAligned
+ B CopyEnd
+Copy3toAligned:
+ LDM r0,{r7,r10,r11}
+ SUBS r9,r9,#1
+ ADD r0,r0,r1
+ LSR r7,r7,#24
+ ORR r7,r7,r10,LSL #8
+ LSR r10,r10,#24
+ ORR r10,r10,r11,LSL #8
+ LSR r11,r11,#24
+ STM r8!,{r7,r10,r11}
+ BGT Copy3toAligned
+CopyEnd:
+ MOV r0,r12
+ MOV r1,#0xc
+ BX lr
+ .endfunc
+
+ .global armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ .func armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+armVCM4P10_InterpolateLuma_VerAlign4x_unsafe:
+ AND r7,r0,#3
+ BIC r0,r0,#3
+ ADD pc,pc,r7,LSL #2
+ NOP
+ B Copy0toVAligned
+ B Copy1toVAligned
+ B Copy2toVAligned
+ B Copy3toVAligned
+Copy0toVAligned:
+ LDR r7,[r0],r1
+ SUBS r9,r9,#1
+ STR r7,[r8],#4
+ BGT Copy0toVAligned
+ B CopyVEnd
+Copy1toVAligned:
+ LDR r10,[r0,#4]
+ LDR r7,[r0],r1
+ SUBS r9,r9,#1
+ LSL r10,r10,#24
+ ORR r7,r10,r7,LSR #8
+ STR r7,[r8],#4
+ BGT Copy1toVAligned
+ B CopyVEnd
+Copy2toVAligned:
+ LDR r10,[r0,#4]
+ LDR r7,[r0],r1
+ SUBS r9,r9,#1
+ LSL r10,r10,#16
+ ORR r7,r10,r7,LSR #16
+ STR r7,[r8],#4
+ BGT Copy2toVAligned
+ B CopyVEnd
+Copy3toVAligned:
+ LDR r10,[r0,#4]
+ LDR r7,[r0],r1
+ SUBS r9,r9,#1
+ LSL r10,r10,#8
+ ORR r7,r10,r7,LSR #24
+ STR r7,[r8],#4
+ BGT Copy3toVAligned
+CopyVEnd:
+ SUB r0,r8,#0x1c
+ MOV r1,#4
+ BX lr
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S
new file mode 100644
index 0000000..fe92201
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_Copy_unsafe_s.S
@@ -0,0 +1,105 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+ .func armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+armVCM4P10_InterpolateLuma_Copy4x4_unsafe:
+ PUSH {r4-r6,lr}
+ AND r12,r0,#3
+ BIC r0,r0,#3
+ ADD pc,pc,r12,LSL #2
+ NOP
+ B Copy4x4Align0
+ B Copy4x4Align1
+ B Copy4x4Align2
+ B Copy4x4Align3
+Copy4x4Align0:
+ LDR r4,[r0],r1
+ LDR r5,[r0],r1
+ STR r4,[r2],r3
+ LDR r8,[r0],r1
+ STR r5,[r2],r3
+ LDR r9,[r0],r1
+ STR r8,[r2],r3
+ STR r9,[r2],r3
+ B Copy4x4End
+Copy4x4Align1:
+ LDR r5,[r0,#4]
+ LDR r4,[r0],r1
+ LDR r9,[r0,#4]
+ LDR r8,[r0],r1
+ LSR r4,r4,#8
+ ORR r4,r4,r5,LSL #24
+ STR r4,[r2],r3
+ LSR r8,r8,#8
+ ORR r8,r8,r9,LSL #24
+ LDR r5,[r0,#4]
+ LDR r4,[r0],r1
+ STR r8,[r2],r3
+ LDR r9,[r0,#4]
+ LDR r8,[r0],r1
+ LSR r4,r4,#8
+ ORR r4,r4,r5,LSL #24
+ STR r4,[r2],r3
+ LSR r8,r8,#8
+ ORR r8,r8,r9,LSL #24
+ STR r8,[r2],r3
+ B Copy4x4End
+Copy4x4Align2:
+ LDR r5,[r0,#4]
+ LDR r4,[r0],r1
+ LDR r9,[r0,#4]
+ LDR r8,[r0],r1
+ LSR r4,r4,#16
+ ORR r4,r4,r5,LSL #16
+ STR r4,[r2],r3
+ LSR r8,r8,#16
+ ORR r8,r8,r9,LSL #16
+ STR r8,[r2],r3
+ LDR r5,[r0,#4]
+ LDR r4,[r0],r1
+ LDR r9,[r0,#4]
+ LDR r8,[r0],r1
+ LSR r4,r4,#16
+ ORR r4,r4,r5,LSL #16
+ STR r4,[r2],r3
+ LSR r8,r8,#16
+ ORR r8,r8,r9,LSL #16
+ STR r8,[r2],r3
+ B Copy4x4End
+Copy4x4Align3:
+ LDR r5,[r0,#4]
+ LDR r4,[r0],r1
+ LDR r9,[r0,#4]
+ LDR r8,[r0],r1
+ LSR r4,r4,#24
+ ORR r4,r4,r5,LSL #8
+ STR r4,[r2],r3
+ LSR r8,r8,#24
+ ORR r8,r8,r9,LSL #8
+ STR r8,[r2],r3
+ LDR r5,[r0,#4]
+ LDR r4,[r0],r1
+ LDR r9,[r0,#4]
+ LDR r8,[r0],r1
+ LSR r4,r4,#24
+ ORR r4,r4,r5,LSL #8
+ STR r4,[r2],r3
+ LSR r8,r8,#24
+ ORR r8,r8,r9,LSL #8
+ STR r8,[r2],r3
+Copy4x4End:
+ POP {r4-r6,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S
new file mode 100644
index 0000000..544abe8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S
@@ -0,0 +1,107 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ .func armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe:
+ PUSH {r4-r6,lr}
+ MOV lr,#4
+ LDR r6, =0xfe00fe0
+ LDR r12, =0xff00ff
+LoopStart1:
+ LDR r11,[r0,#0xc]
+ LDR r10,[r0,#8]
+ LDR r5,[r0,#4]
+ LDR r4,[r0],r1
+ UQSUB16 r11,r11,r6
+ UQSUB16 r10,r10,r6
+ UQSUB16 r5,r5,r6
+ UQSUB16 r4,r4,r6
+ USAT16 r11,#13,r11
+ USAT16 r10,#13,r10
+ USAT16 r5,#13,r5
+ USAT16 r4,#13,r4
+ AND r11,r12,r11,LSR #5
+ AND r10,r12,r10,LSR #5
+ AND r5,r12,r5,LSR #5
+ AND r4,r12,r4,LSR #5
+ ORR r11,r10,r11,LSL #8
+ ORR r10,r4,r5,LSL #8
+ SUBS lr,lr,#1
+ STRD r10,r11,[r7],#8
+ BGT LoopStart1
+ SUB r0,r7,#0x20
+ MOV r1,#8
+ POP {r4-r6,pc}
+ .endfunc
+
+ .global armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+ .func armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe:
+ PUSH {r4-r6,lr}
+ LDR r6, =0xfe00fe0
+ LDR r12, =0xff00ff
+ MOV lr,#2
+LoopStart:
+ LDR r11,[r0,#0xc]
+ LDR r10,[r0,#8]
+ LDR r5,[r0,#4]
+ LDR r4,[r0],r1
+ UQSUB16 r11,r11,r6
+ UQSUB16 r10,r10,r6
+ UQSUB16 r5,r5,r6
+ UQSUB16 r4,r4,r6
+ USAT16 r11,#13,r11
+ USAT16 r10,#13,r10
+ USAT16 r5,#13,r5
+ USAT16 r4,#13,r4
+ AND r11,r12,r11,LSR #5
+ AND r10,r12,r10,LSR #5
+ AND r5,r12,r5,LSR #5
+ AND r4,r12,r4,LSR #5
+ ORR r11,r10,r11,LSL #8
+ ORR r10,r4,r5,LSL #8
+ PKHBT r4,r10,r11,LSL #16
+ STR r4,[r7],#8
+ PKHTB r5,r11,r10,ASR #16
+ STR r5,[r7],#-4
+ LDR r11,[r0,#0xc]
+ LDR r10,[r0,#8]
+ LDR r5,[r0,#4]
+ LDR r4,[r0],r1
+ UQSUB16 r11,r11,r6
+ UQSUB16 r10,r10,r6
+ UQSUB16 r5,r5,r6
+ UQSUB16 r4,r4,r6
+ USAT16 r11,#13,r11
+ USAT16 r10,#13,r10
+ USAT16 r5,#13,r5
+ USAT16 r4,#13,r4
+ AND r11,r12,r11,LSR #5
+ AND r10,r12,r10,LSR #5
+ AND r5,r12,r5,LSR #5
+ AND r4,r12,r4,LSR #5
+ ORR r11,r10,r11,LSL #8
+ ORR r10,r4,r5,LSL #8
+ PKHBT r4,r10,r11,LSL #16
+ SUBS lr,lr,#1
+ STR r4,[r7],#8
+ PKHTB r5,r11,r10,ASR #16
+ STR r5,[r7],#4
+ BGT LoopStart
+ SUB r0,r7,#0x18
+ MOV r1,#4
+ POP {r4-r6,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S
new file mode 100644
index 0000000..a330972
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.S
@@ -0,0 +1,164 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ .func armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe:
+ PUSH {r4-r12,lr}
+ VLD1.8 {d0,d1},[r0],r1
+ VMOV.I16 d31,#0x14
+ VMOV.I16 d30,#0x5
+ VEXT.8 d4,d0,d1,#1
+ VEXT.8 d2,d0,d1,#2
+ VEXT.8 d3,d0,d1,#3
+ VEXT.8 d5,d0,d1,#4
+ VEXT.8 d1,d0,d1,#5
+ VADDL.U8 q1,d2,d3
+ VADDL.U8 q2,d4,d5
+ VADDL.U8 q5,d0,d1
+ VLD1.8 {d0,d1},[r0],r1
+ VMLA.I16 d10,d2,d31
+ VMUL.I16 d8,d4,d30
+ VEXT.8 d4,d0,d1,#1
+ VEXT.8 d2,d0,d1,#2
+ VEXT.8 d3,d0,d1,#3
+ VEXT.8 d5,d0,d1,#4
+ VEXT.8 d1,d0,d1,#5
+ VADDL.U8 q1,d2,d3
+ VADDL.U8 q2,d4,d5
+ VADDL.U8 q6,d0,d1
+ VLD1.8 {d0,d1},[r0],r1
+ VSUB.I16 d10,d10,d8
+ VMLA.I16 d12,d2,d31
+ VMUL.I16 d8,d4,d30
+ VEXT.8 d4,d0,d1,#1
+ VEXT.8 d2,d0,d1,#2
+ VEXT.8 d3,d0,d1,#3
+ VEXT.8 d5,d0,d1,#4
+ VEXT.8 d1,d0,d1,#5
+ VADDL.U8 q1,d2,d3
+ VADDL.U8 q2,d4,d5
+ VADDL.U8 q7,d0,d1
+ VLD1.8 {d0,d1},[r0],r1
+ VSUB.I16 d12,d12,d8
+ VMLA.I16 d14,d2,d31
+ VMUL.I16 d8,d4,d30
+ VEXT.8 d4,d0,d1,#1
+ VEXT.8 d2,d0,d1,#2
+ VEXT.8 d3,d0,d1,#3
+ VEXT.8 d5,d0,d1,#4
+ VEXT.8 d1,d0,d1,#5
+ VADDL.U8 q1,d2,d3
+ VADDL.U8 q2,d4,d5
+ VADDL.U8 q8,d0,d1
+ VLD1.8 {d0,d1},[r0],r1
+ VSUB.I16 d14,d14,d8
+ VMLA.I16 d16,d2,d31
+ VMUL.I16 d8,d4,d30
+ VEXT.8 d4,d0,d1,#1
+ VEXT.8 d2,d0,d1,#2
+ VEXT.8 d3,d0,d1,#3
+ VEXT.8 d5,d0,d1,#4
+ VEXT.8 d1,d0,d1,#5
+ VADDL.U8 q1,d2,d3
+ VADDL.U8 q2,d4,d5
+ VADDL.U8 q9,d0,d1
+ VLD1.8 {d0,d1},[r0],r1
+ VSUB.I16 d16,d16,d8
+ VMLA.I16 d18,d2,d31
+ VMUL.I16 d8,d4,d30
+ VEXT.8 d4,d0,d1,#1
+ VEXT.8 d2,d0,d1,#2
+ VEXT.8 d3,d0,d1,#3
+ VEXT.8 d5,d0,d1,#4
+ VEXT.8 d1,d0,d1,#5
+ VADDL.U8 q1,d2,d3
+ VADDL.U8 q2,d4,d5
+ VADDL.U8 q10,d0,d1
+ VLD1.8 {d0,d1},[r0],r1
+ VSUB.I16 d18,d18,d8
+ VMLA.I16 d20,d2,d31
+ VMUL.I16 d8,d4,d30
+ VEXT.8 d4,d0,d1,#1
+ VEXT.8 d2,d0,d1,#2
+ VEXT.8 d3,d0,d1,#3
+ VEXT.8 d5,d0,d1,#4
+ VEXT.8 d1,d0,d1,#5
+ VADDL.U8 q1,d2,d3
+ VADDL.U8 q2,d4,d5
+ VADDL.U8 q11,d0,d1
+ VLD1.8 {d0,d1},[r0],r1
+ VSUB.I16 d20,d20,d8
+ VMLA.I16 d22,d2,d31
+ VMUL.I16 d8,d4,d30
+ VEXT.8 d4,d0,d1,#1
+ VEXT.8 d2,d0,d1,#2
+ VEXT.8 d3,d0,d1,#3
+ VEXT.8 d5,d0,d1,#4
+ VEXT.8 d1,d0,d1,#5
+ VADDL.U8 q1,d2,d3
+ VADDL.U8 q2,d4,d5
+ VADDL.U8 q12,d0,d1
+ VLD1.8 {d0,d1},[r0],r1
+ VSUB.I16 d22,d22,d8
+ VMLA.I16 d24,d2,d31
+ VMUL.I16 d8,d4,d30
+ VEXT.8 d4,d0,d1,#1
+ VEXT.8 d2,d0,d1,#2
+ VEXT.8 d3,d0,d1,#3
+ VEXT.8 d5,d0,d1,#4
+ VEXT.8 d1,d0,d1,#5
+ VADDL.U8 q1,d2,d3
+ VADDL.U8 q2,d4,d5
+ VADDL.U8 q13,d0,d1
+ VSUB.I16 d24,d24,d8
+ VMLA.I16 d26,d2,d31
+ VMUL.I16 d8,d4,d30
+ VMOV.I32 q15,#0x14
+ VMOV.I32 q14,#0x5
+ VADDL.S16 q5,d10,d20
+ VADDL.S16 q1,d14,d16
+ VADDL.S16 q0,d12,d18
+ VSUB.I16 d26,d26,d8
+ VMLA.I32 q5,q1,q15
+ VMUL.I32 q4,q0,q14
+ VADDL.S16 q6,d12,d22
+ VADDL.S16 q1,d16,d18
+ VADDL.S16 q0,d14,d20
+ VMLA.I32 q6,q1,q15
+ VSUB.I32 q5,q5,q4
+ VMUL.I32 q4,q0,q14
+ VADDL.S16 q2,d14,d24
+ VADDL.S16 q1,d18,d20
+ VADDL.S16 q0,d16,d22
+ VMLA.I32 q2,q1,q15
+ VSUB.I32 q6,q6,q4
+ VMUL.I32 q4,q0,q14
+ VADDL.S16 q3,d16,d26
+ VADDL.S16 q1,d20,d22
+ VADDL.S16 q0,d18,d24
+ VMLA.I32 q3,q1,q15
+ VSUB.I32 q2,q2,q4
+ VMLS.I32 q3,q0,q14
+ VQRSHRUN.S32 d0,q5,#10
+ VQRSHRUN.S32 d2,q6,#10
+ VQRSHRUN.S32 d4,q2,#10
+ VQRSHRUN.S32 d6,q3,#10
+ VQMOVN.U16 d0,q0
+ VQMOVN.U16 d2,q1
+ VQMOVN.U16 d4,q2
+ VQMOVN.U16 d6,q3
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S
new file mode 100644
index 0000000..991c33f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.S
@@ -0,0 +1,119 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ .func armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe:
+ PUSH {r4-r12,lr}
+ VLD1.8 {d0,d1},[r0],r1
+ ADD r12,r0,r1,LSL #2
+ VMOV.I8 d30,#0x5
+ VMOV.I8 d31,#0x14
+ VLD1.8 {d10,d11},[r12],r1
+ VLD1.8 {d2,d3},[r0],r1
+ VLD1.8 {d12,d13},[r12],r1
+ VADDL.U8 q9,d0,d10
+ VLD1.8 {d4,d5},[r0],r1
+ VADDL.U8 q0,d1,d11
+ VLD1.8 {d6,d7},[r0],r1
+ VADDL.U8 q10,d2,d12
+ VLD1.8 {d8,d9},[r0],r1
+ VMLAL.U8 q9,d4,d31
+ VLD1.8 {d14,d15},[r12],r1
+ VMLAL.U8 q0,d5,d31
+ VLD1.8 {d16,d17},[r12],r1
+ VMLAL.U8 q9,d6,d31
+ VMLAL.U8 q10,d6,d31
+ VMLSL.U8 q0,d3,d30
+ VADDL.U8 q11,d4,d14
+ VMLSL.U8 q9,d2,d30
+ VADDL.U8 q1,d3,d13
+ VMLAL.U8 q0,d7,d31
+ VMLAL.U8 q10,d8,d31
+ VMLSL.U8 q9,d8,d30
+ VMLAL.U8 q1,d7,d31
+ VMLSL.U8 q0,d9,d30
+ VMLAL.U8 q11,d8,d31
+ VMLSL.U8 q10,d4,d30
+ VMLSL.U8 q1,d5,d30
+ VADDL.U8 q2,d5,d15
+ VMLAL.U8 q11,d10,d31
+ VMLSL.U8 q10,d10,d30
+ VMLAL.U8 q1,d9,d31
+ VMLAL.U8 q2,d9,d31
+ VADDL.U8 q12,d6,d16
+ VMLSL.U8 q11,d6,d30
+ VMLSL.U8 q1,d11,d30
+ VMLSL.U8 q2,d7,d30
+ VADDL.U8 q3,d7,d17
+ VMLAL.U8 q12,d10,d31
+ VMLSL.U8 q11,d12,d30
+ VMLSL.U8 q2,d13,d30
+ VMLAL.U8 q3,d11,d31
+ VMLAL.U8 q12,d12,d31
+ VEXT.8 d26,d18,d19,#2
+ VMLAL.U8 q2,d11,d31
+ VMLAL.U8 q3,d13,d31
+ VMLSL.U8 q12,d8,d30
+ VEXT.8 d27,d18,d19,#4
+ VMOV.I16 d31,#0x14
+ VMLSL.U8 q3,d9,d30
+ VMLSL.U8 q12,d14,d30
+ VEXT.8 d29,d19,d0,#2
+ VEXT.8 d28,d18,d19,#6
+ VMLSL.U8 q3,d15,d30
+ VADDL.S16 q0,d18,d29
+ VADD.I16 d27,d27,d28
+ VMOV.I16 d30,#0x5
+ VADD.I16 d26,d26,d19
+ VMLAL.S16 q0,d27,d31
+ VEXT.8 d27,d20,d21,#4
+ VEXT.8 d28,d20,d21,#6
+ VEXT.8 d29,d21,d2,#2
+ VMLSL.S16 q0,d26,d30
+ VEXT.8 d26,d20,d21,#2
+ VADDL.S16 q1,d20,d29
+ VADD.I16 d27,d27,d28
+ VADD.I16 d26,d26,d21
+ VEXT.8 d28,d22,d23,#6
+ VMLAL.S16 q1,d27,d31
+ VEXT.8 d29,d23,d4,#2
+ VEXT.8 d27,d22,d23,#4
+ VEXT.8 d8,d22,d23,#2
+ VADDL.S16 q2,d22,d29
+ VMLSL.S16 q1,d26,d30
+ VADD.I16 d27,d27,d28
+ VADD.I16 d26,d8,d23
+ VEXT.8 d28,d24,d25,#6
+ VMLAL.S16 q2,d27,d31
+ VEXT.8 d27,d24,d25,#4
+ VEXT.8 d29,d25,d6,#2
+ VADD.I16 d27,d27,d28
+ VEXT.8 d8,d24,d25,#2
+ VADDL.S16 q3,d24,d29
+ VMLSL.S16 q2,d26,d30
+ VMLAL.S16 q3,d27,d31
+ VADD.I16 d8,d8,d25
+ VMLSL.S16 q3,d8,d30
+ VQRSHRUN.S32 d0,q0,#10
+ VQRSHRUN.S32 d2,q1,#10
+ VQRSHRUN.S32 d4,q2,#10
+ VQRSHRUN.S32 d6,q3,#10
+ VQMOVN.U16 d0,q0
+ VQMOVN.U16 d2,q1
+ VQMOVN.U16 d4,q2
+ VQMOVN.U16 d6,q3
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S
new file mode 100644
index 0000000..40e141b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.S
@@ -0,0 +1,72 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ .func armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe:
+ PUSH {r4-r12,lr}
+ VLD1.8 {d22,d23},[r0],r1
+ VEXT.8 d10,d22,d23,#5
+ VEXT.8 d12,d22,d23,#1
+ VEXT.8 d14,d22,d23,#2
+ VEXT.8 d15,d22,d23,#3
+ VEXT.8 d13,d22,d23,#4
+ VADDL.U8 q11,d22,d10
+ VADDL.U8 q4,d14,d15
+ VADDL.U8 q6,d12,d13
+ VLD1.8 {d24,d25},[r0],r1
+ VMLA.I16 d22,d8,d31
+ VMUL.I16 d8,d12,d30
+ VEXT.8 d10,d24,d25,#5
+ VEXT.8 d12,d24,d25,#1
+ VEXT.8 d16,d24,d25,#2
+ VEXT.8 d17,d24,d25,#3
+ VEXT.8 d13,d24,d25,#4
+ VADDL.U8 q12,d24,d10
+ VSUB.I16 d22,d22,d8
+ VADDL.U8 q4,d16,d17
+ VADDL.U8 q6,d12,d13
+ VLD1.8 {d26,d27},[r0],r1
+ VMLA.I16 d24,d8,d31
+ VMUL.I16 d8,d12,d30
+ VEXT.8 d10,d26,d27,#5
+ VEXT.8 d12,d26,d27,#1
+ VEXT.8 d18,d26,d27,#2
+ VEXT.8 d19,d26,d27,#3
+ VEXT.8 d13,d26,d27,#4
+ VADDL.U8 q13,d26,d10
+ VSUB.I16 d24,d24,d8
+ VADDL.U8 q4,d18,d19
+ VADDL.U8 q6,d12,d13
+ VLD1.8 {d28,d29},[r0],r1
+ VMLA.I16 d26,d8,d31
+ VMUL.I16 d8,d12,d30
+ VEXT.8 d10,d28,d29,#5
+ VEXT.8 d12,d28,d29,#1
+ VEXT.8 d20,d28,d29,#2
+ VEXT.8 d21,d28,d29,#3
+ VEXT.8 d13,d28,d29,#4
+ VADDL.U8 q14,d28,d10
+ VSUB.I16 d26,d26,d8
+ VADDL.U8 q4,d20,d21
+ VADDL.U8 q6,d12,d13
+ VMLA.I16 d28,d8,d31
+ VMLS.I16 d28,d12,d30
+ VQRSHRUN.S16 d22,q11,#5
+ VQRSHRUN.S16 d24,q12,#5
+ VQRSHRUN.S16 d26,q13,#5
+ VQRSHRUN.S16 d28,q14,#5
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S
new file mode 100644
index 0000000..955846f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.S
@@ -0,0 +1,58 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ .func armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe:
+ PUSH {r4-r12,lr}
+ VLD1.8 {d7},[r0],r1
+ ADD r12,r0,r1,LSL #2
+ VLD1.8 {d8},[r0],r1
+ VLD1.8 {d12},[r12],r1
+ VLD1.8 {d9},[r0],r1
+ VADDL.U8 q0,d7,d12
+ VLD1.8 {d10},[r0],r1
+ VLD1.8 {d13},[r12],r1
+ VLD1.8 {d11},[r0],r1
+ VLD1.8 {d14},[r12],r1
+ VADDL.U8 q8,d8,d11
+ VADDL.U8 q9,d9,d10
+ VLD1.8 {d15},[r12],r1
+ VMLS.I16 d0,d16,d30
+ VMUL.I16 d20,d18,d31
+ VADDL.U8 q8,d9,d12
+ VADDL.U8 q9,d10,d11
+ VADDL.U8 q1,d8,d13
+ VMLS.I16 d2,d16,d30
+ VMUL.I16 d21,d18,d31
+ VADDL.U8 q8,d10,d13
+ VADDL.U8 q9,d11,d12
+ VADDL.U8 q2,d9,d14
+ VMLS.I16 d4,d16,d30
+ VMUL.I16 d22,d18,d31
+ VADDL.U8 q8,d11,d14
+ VADDL.U8 q3,d10,d15
+ VADDL.U8 q9,d12,d13
+ VMLS.I16 d6,d16,d30
+ VADD.I16 d0,d0,d20
+ VADD.I16 d2,d2,d21
+ VADD.I16 d4,d4,d22
+ VMLA.I16 d6,d18,d31
+ VQRSHRUN.S16 d0,q0,#5
+ VQRSHRUN.S16 d2,q1,#5
+ VQRSHRUN.S16 d4,q2,#5
+ VQRSHRUN.S16 d6,q3,#5
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S
new file mode 100644
index 0000000..66520da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S
@@ -0,0 +1,175 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+
+ .section .rodata
+ .align 4
+
+armVCM4P10_WidthBranchTableMVIsNotZero:
+ .word WidthIs2MVIsNotZero, WidthIs2MVIsNotZero
+ .word WidthIs4MVIsNotZero, WidthIs4MVIsNotZero
+ .word WidthIs8MVIsNotZero
+
+armVCM4P10_WidthBranchTableMVIsZero:
+ .word WidthIs2MVIsZero, WidthIs2MVIsZero
+ .word WidthIs4MVIsZero, WidthIs4MVIsZero
+ .word WidthIs8MVIsZero
+
+ .text
+
+ .global armVCM4P10_Interpolate_Chroma
+ .func armVCM4P10_Interpolate_Chroma
+armVCM4P10_Interpolate_Chroma:
+ PUSH {r4-r12,lr}
+ VPUSH {d8-d15}
+ LDRD r6,r7,[sp,#0x70]
+ LDRD r4,r5,[sp,#0x68]
+ RSB r8,r6,#8
+ RSB r9,r7,#8
+ CMN r6,r7
+ MOV r10,#1
+ LDREQ r11, =armVCM4P10_WidthBranchTableMVIsZero
+ SUB lr,r1,r10
+ LDRNE r11, =armVCM4P10_WidthBranchTableMVIsNotZero
+ VLD1.8 {d0},[r0],r10
+ SMULBB r12,r8,r9
+ SMULBB r9,r6,r9
+ VLD1.8 {d1},[r0],lr
+ SMULBB r8,r8,r7
+ SMULBB r6,r6,r7
+ VDUP.8 d12,r12
+ VDUP.8 d13,r9
+ VDUP.8 d14,r8
+ VDUP.8 d15,r6
+ LDR pc,[r11,r4,LSL #1]
+
+WidthIs8MVIsNotZero:
+ VLD1.8 {d2},[r0],r10
+ VMULL.U8 q2,d0,d12
+ VLD1.8 {d3},[r0],lr
+ VMULL.U8 q3,d2,d12
+ VLD1.8 {d16},[r0],r10
+ VMLAL.U8 q2,d1,d13
+ VLD1.8 {d17},[r0],lr
+ VMULL.U8 q11,d16,d12
+ VMLAL.U8 q3,d3,d13
+ VLD1.8 {d18},[r0],r10
+ VMLAL.U8 q2,d2,d14
+ VMLAL.U8 q11,d17,d13
+ VMULL.U8 q12,d18,d12
+ VLD1.8 {d19},[r0],lr
+ VMLAL.U8 q3,d16,d14
+ VLD1.8 {d0},[r0],r10
+ VMLAL.U8 q12,d19,d13
+ VMLAL.U8 q11,d18,d14
+ VMLAL.U8 q2,d3,d15
+ VLD1.8 {d1},[r0],lr
+ VMLAL.U8 q12,d0,d14
+ VMLAL.U8 q3,d17,d15
+ VMLAL.U8 q11,d19,d15
+ SUBS r5,r5,#4
+ VMLAL.U8 q12,d1,d15
+ VQRSHRN.U16 d8,q2,#6
+ VQRSHRN.U16 d9,q3,#6
+ VQRSHRN.U16 d20,q11,#6
+ VST1.64 {d8},[r2],r3
+ VQRSHRN.U16 d21,q12,#6
+ VST1.64 {d9},[r2],r3
+ VST1.64 {d20},[r2],r3
+ VST1.64 {d21},[r2],r3
+ BGT WidthIs8MVIsNotZero
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+
+WidthIs4MVIsNotZero:
+ VLD1.8 {d2},[r0],r10
+ VMULL.U8 q2,d0,d12
+ VMULL.U8 q3,d2,d12
+ VLD1.8 {d3},[r0],lr
+ VMLAL.U8 q2,d1,d13
+ VMLAL.U8 q3,d3,d13
+ VLD1.8 {d0},[r0],r10
+ VMLAL.U8 q2,d2,d14
+ VMLAL.U8 q3,d0,d14
+ VLD1.8 {d1},[r0],lr
+ SUBS r5,r5,#2
+ VMLAL.U8 q3,d1,d15
+ VMLAL.U8 q2,d3,d15
+ VQRSHRN.U16 d9,q3,#6
+ VQRSHRN.U16 d8,q2,#6
+ VST1.32 {d8[0]},[r2],r3
+ VST1.32 {d9[0]},[r2],r3
+ BGT WidthIs4MVIsNotZero
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+
+WidthIs2MVIsNotZero:
+ VLD1.8 {d2},[r0],r10
+ VMULL.U8 q2,d0,d12
+ VMULL.U8 q3,d2,d12
+ VLD1.8 {d3},[r0],lr
+ VMLAL.U8 q2,d1,d13
+ VMLAL.U8 q3,d3,d13
+ VLD1.8 {d0},[r0],r10
+ VMLAL.U8 q2,d2,d14
+ VMLAL.U8 q3,d0,d14
+ VLD1.8 {d1},[r0],lr
+ SUBS r5,r5,#2
+ VMLAL.U8 q3,d1,d15
+ VMLAL.U8 q2,d3,d15
+ VQRSHRN.U16 d9,q3,#6
+ VQRSHRN.U16 d8,q2,#6
+ VST1.16 {d8[0]},[r2],r3
+ VST1.16 {d9[0]},[r2],r3
+ BGT WidthIs2MVIsNotZero
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+
+WidthIs8MVIsZero:
+ SUB r0,r0,r1
+WidthIs8LoopMVIsZero:
+ VLD1.8 {d0},[r0],r1
+ SUBS r5,r5,#2
+ VLD1.8 {d1},[r0],r1
+ VST1.64 {d0},[r2],r3
+ VST1.64 {d1},[r2],r3
+ BGT WidthIs8LoopMVIsZero
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+
+WidthIs4MVIsZero:
+ VLD1.8 {d1},[r0],r1
+ SUBS r5,r5,#2
+ VST1.32 {d0[0]},[r2],r3
+ VLD1.8 {d0},[r0],r1
+ VST1.32 {d1[0]},[r2],r3
+ BGT WidthIs4MVIsZero
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+
+WidthIs2MVIsZero:
+ VLD1.8 {d1},[r0],r1
+ SUBS r5,r5,#2
+ VST1.16 {d0[0]},[r2],r3
+ VLD1.8 {d0},[r0],r1
+ VST1.16 {d1[0]},[r2],r3
+ BGT WidthIs2MVIsZero
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S
new file mode 100644
index 0000000..f5d6d1f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_QuantTables_s.S
@@ -0,0 +1,68 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .section .rodata
+ .align 4
+
+ .global armVCM4P10_MFMatrixQPModTable
+ .global armVCM4P10_QPDivIntraTable
+ .global armVCM4P10_QPDivPlusOneTable
+
+;//------------------------------------------------------------------
+;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) ,
+;// for values of iQP from 0 to 51 (inclusive).
+;//------------------------------------------------------------------
+
+
+armVCM4P10_QPDivIntraTable:
+ .word 21845, 21845, 21845, 21845, 21845, 21845
+ .word 43690, 43690, 43690, 43690, 43690, 43690
+ .word 87381, 87381, 87381, 87381, 87381, 87381
+ .word 174762, 174762, 174762, 174762, 174762, 174762
+ .word 349525, 349525, 349525, 349525, 349525, 349525
+ .word 699050, 699050, 699050, 699050, 699050, 699050
+ .word 1398101, 1398101, 1398101, 1398101, 1398101, 1398101
+ .word 2796202, 2796202, 2796202, 2796202, 2796202, 2796202
+
+
+;//--------------------------------------------------------------
+;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires,
+;// for values of iQP from 0 to 51 (inclusive).
+;//--------------------------------------------------------------
+
+armVCM4P10_MFMatrixQPModTable:
+ .hword 13107, 11916, 10082, 9362, 8192, 7282
+ .hword 13107, 11916, 10082, 9362, 8192, 7282
+ .hword 13107, 11916, 10082, 9362, 8192, 7282
+ .hword 13107, 11916, 10082, 9362, 8192, 7282
+ .hword 13107, 11916, 10082, 9362, 8192, 7282
+ .hword 13107, 11916, 10082, 9362, 8192, 7282
+ .hword 13107, 11916, 10082, 9362, 8192, 7282
+ .hword 13107, 11916, 10082, 9362, 8192, 7282
+ .hword 13107, 11916, 10082, 9362, 8192, 7282
+
+;//---------------------------------------------------------------
+;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values,
+;// for values of iQP from 0 to 51 (inclusive).
+;//---------------------------------------------------------------
+
+armVCM4P10_QPDivPlusOneTable:
+ .byte 16, 16, 16, 16, 16, 16
+ .byte 17, 17, 17, 17, 17, 17
+ .byte 18, 18, 18, 18, 18, 18
+ .byte 19, 19, 19, 19, 19, 19
+ .byte 20, 20, 20, 20, 20, 20
+ .byte 21, 21, 21, 21, 21, 21
+ .byte 22, 22, 22, 22, 22, 22
+ .byte 23, 23, 23, 23, 23, 23
+ .byte 24, 24, 24, 24, 24, 24
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S
new file mode 100644
index 0000000..c24d717
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_TransformResidual4x4_s.S
@@ -0,0 +1,52 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_TransformResidual4x4
+ .func armVCM4P10_TransformResidual4x4
+armVCM4P10_TransformResidual4x4:
+ VPUSH {d8}
+ VLD4.16 {d0,d1,d2,d3},[r1]
+ VMOV.I16 d4,#0
+ VADD.I16 d5,d0,d2
+ VSUB.I16 d6,d0,d2
+ VHADD.S16 d7,d1,d4
+ VHADD.S16 d8,d3,d4
+ VSUB.I16 d7,d7,d3
+ VADD.I16 d8,d1,d8
+ VADD.I16 d0,d5,d8
+ VADD.I16 d1,d6,d7
+ VSUB.I16 d2,d6,d7
+ VSUB.I16 d3,d5,d8
+ VTRN.16 d0,d1
+ VTRN.16 d2,d3
+ VTRN.32 q0,q1
+ VADD.I16 d5,d0,d2
+ VSUB.I16 d6,d0,d2
+ VHADD.S16 d7,d1,d4
+ VHADD.S16 d8,d3,d4
+ VSUB.I16 d7,d7,d3
+ VADD.I16 d8,d1,d8
+ VADD.I16 d0,d5,d8
+ VADD.I16 d1,d6,d7
+ VSUB.I16 d2,d6,d7
+ VSUB.I16 d3,d5,d8
+ VRSHR.S16 d0,d0,#6
+ VRSHR.S16 d1,d1,#6
+ VRSHR.S16 d2,d2,#6
+ VRSHR.S16 d3,d3,#6
+ VST1.16 {d0,d1,d2,d3},[r0]
+ VPOP {d8}
+ BX lr
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S
new file mode 100644
index 0000000..c552f8d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_UnpackBlock4x4_s.S
@@ -0,0 +1,40 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_UnpackBlock4x4
+ .func armVCM4P10_UnpackBlock4x4
+armVCM4P10_UnpackBlock4x4:
+ PUSH {r4-r8,lr}
+ LDR r2,[r0,#0]
+ MOV r7,#0x1f
+ MOV r4,#0
+ MOV r5,#0
+ LDRB r3,[r2],#1
+ STRD r4,r5,[r1,#0]
+ STRD r4,r5,[r1,#8]
+ STRD r4,r5,[r1,#0x10]
+ STRD r4,r5,[r1,#0x18]
+unpackLoop:
+ TST r3,#0x10
+ LDRNESB r5,[r2,#1]
+ LDRNEB r4,[r2],#2
+ AND r6,r7,r3,LSL #1
+ LDREQSB r4,[r2],#1
+ ORRNE r4,r4,r5,LSL #8
+ TST r3,#0x20
+ LDREQB r3,[r2],#1
+ STRH r4,[r1,r6]
+ BEQ unpackLoop
+ STR r2,[r0,#0]
+ POP {r4-r8,pc}
+ .endfunc
+ .end
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S
new file mode 100644
index 0000000..ba61059
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DeblockLuma_I.S
@@ -0,0 +1,67 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global omxVCM4P10_DeblockLuma_I
+ .func omxVCM4P10_DeblockLuma_I
+omxVCM4P10_DeblockLuma_I:
+ PUSH {r4-r9,lr}
+ MOVS r6,r0
+ SUB sp,sp,#0xc
+ MOV r9,r1
+ MOV r7,r2
+ MOV r8,r3
+ LDR r4,[sp,#0x28]
+ LDR r5,[sp,#0x2c]
+ BEQ L0x58
+ TST r6,#7
+ TSTEQ r9,#7
+ BNE L0x58
+ CMP r7,#0
+ CMPNE r8,#0
+ CMPNE r4,#0
+ BEQ L0x58
+ TST r4,#3
+ BNE L0x58
+ CMP r5,#0
+ BEQ L0x58
+ TST r5,#3
+ BEQ L0x64
+L0x58:
+ MVN r0,#4
+L0x5c:
+ ADD sp,sp,#0xc
+ POP {r4-r9,pc}
+L0x64:
+ STR r4,[sp,#0]
+ MOV r3,r8
+ STR r5,[sp,#4]
+ MOV r2,r7
+ MOV r1,r9
+ MOV r0,r6
+ BL omxVCM4P10_FilterDeblockingLuma_VerEdge_I
+ CMP r0,#0
+ BNE L0x5c
+ ADD r3,r5,#0x10
+ ADD r2,r4,#0x10
+ STR r3,[sp,#4]
+ STR r2,[sp,#0]
+ ADD r3,r8,#2
+ ADD r2,r7,#2
+ MOV r1,r9
+ MOV r0,r6
+ BL omxVCM4P10_FilterDeblockingLuma_HorEdge_I
+ ADD sp,sp,#0xc
+ POP {r4-r9,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S
new file mode 100644
index 0000000..be21ee7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.S
@@ -0,0 +1,119 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global omxVCM4P10_DequantTransformResidualFromPairAndAdd
+ .func omxVCM4P10_DequantTransformResidualFromPairAndAdd
+omxVCM4P10_DequantTransformResidualFromPairAndAdd:
+ PUSH {r4-r12,lr}
+ VPUSH {d8-d9}
+ SUB sp,sp,#0x20
+ ADD r4,sp,#0
+ LDR r5,[sp,#0x64]
+ MOV r7,r1
+ MOV r8,r2
+ MOV r9,r3
+ CMP r5,#0
+ BEQ L0x114
+ MOV r1,r4
+ BL armVCM4P10_UnpackBlock4x4 ;//
+ LDR r1,[sp,#0x60]
+ LDR r11, =armVCM4P10_QPModuloTable
+ LDR r10, =armVCM4P10_QPDivTable
+ LDR r2, =armVCM4P10_VMatrixU16
+ LDRSB r12,[r11,r1]
+ LDRSB lr,[r10,r1]
+ LDR r10, =0x3020504
+ LDR r1, =0x5040100
+ ADD r2,r2,r12
+ VDUP.32 d7,r1
+ VDUP.32 d9,r10
+ VDUP.16 d5,lr
+ VLD1.8 {d6},[r2]
+ VTBL.8 d8,{d6},d7
+ VTBL.8 d4,{d6},d9
+ CMP r8,#0
+ VLD1.16 {d0,d1,d2,d3},[r4]
+ VSHL.U16 d8,d8,d5
+ VSHL.U16 d4,d4,d5
+ BEQ L1
+ LDRSH r10,[r8,#0]
+L1:
+ VMUL.I16 d0,d0,d8
+ VMUL.I16 d1,d1,d4
+ VMUL.I16 d2,d2,d8
+ VMUL.I16 d3,d3,d4
+ VMOVNE.16 d0[0],r10
+ VTRN.16 d0,d1
+ VTRN.16 d2,d3
+ VTRN.32 q0,q1
+ VMOV.I16 d4,#0
+ VADD.I16 d5,d0,d2
+ VSUB.I16 d6,d0,d2
+ VHADD.S16 d7,d1,d4
+ VHADD.S16 d8,d3,d4
+ VSUB.I16 d7,d7,d3
+ VADD.I16 d8,d1,d8
+ VADD.I16 d0,d5,d8
+ VADD.I16 d1,d6,d7
+ VSUB.I16 d2,d6,d7
+ VSUB.I16 d3,d5,d8
+ VTRN.16 d0,d1
+ VTRN.16 d2,d3
+ VTRN.32 q0,q1
+ VADD.I16 d5,d0,d2
+ VSUB.I16 d6,d0,d2
+ VHADD.S16 d7,d1,d4
+ VHADD.S16 d8,d3,d4
+ VSUB.I16 d7,d7,d3
+ VADD.I16 d8,d1,d8
+ VADD.I16 d0,d5,d8
+ VADD.I16 d1,d6,d7
+ VSUB.I16 d2,d6,d7
+ VSUB.I16 d3,d5,d8
+ VRSHR.S16 d0,d0,#6
+ VRSHR.S16 d1,d1,#6
+ VRSHR.S16 d2,d2,#6
+ VRSHR.S16 d3,d3,#6
+ B L0x130
+L0x114:
+ LDRSH r10,[r8,#0]
+ ADD r10,r10,#0x20
+ ASR r10,r10,#6
+ VDUP.16 d0,r10
+ VDUP.16 d1,r10
+ VDUP.16 d2,r10
+ VDUP.16 d3,r10
+L0x130:
+ LDR r1,[sp,#0x58]
+ LDR r10,[sp,#0x5c]
+ LDR r3,[r7],r1
+ LDR r5,[r7],r1
+ VMOV d4,r3,r5
+ LDR r3,[r7],r1
+ LDR r5,[r7,#0]
+ VMOV d5,r3,r5
+ VADDW.U8 q3,q0,d4
+ VADDW.U8 q4,q1,d5
+ VQMOVUN.S16 d0,q3
+ VQMOVUN.S16 d1,q4
+ VST1.32 {d0[0]},[r9],r10
+ VST1.32 {d0[1]},[r9],r10
+ VST1.32 {d1[0]},[r9],r10
+ VST1.32 {d1[1]},[r9]
+ MOV r0,#0
+ ADD sp,sp,#0x20
+ VPOP {d8-d9}
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S
new file mode 100644
index 0000000..79ba538
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.S
@@ -0,0 +1,87 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global omxVCM4P10_FilterDeblockingChroma_HorEdge_I
+ .func omxVCM4P10_FilterDeblockingChroma_HorEdge_I
+omxVCM4P10_FilterDeblockingChroma_HorEdge_I:
+ PUSH {r4-r10,lr}
+ VPUSH {d8-d15}
+ VLD1.8 {d0[]},[r2]!
+ SUB r0,r0,r1,LSL #1
+ SUB r0,r0,r1
+ VLD1.8 {d2[]},[r3]!
+ LDR r4,[sp,#0x64]
+ LDR r5,[sp,#0x60]
+ LDR r9, =0x3030303
+ LDR r8, =0x4040404
+ VMOV.I8 d14,#0
+ VMOV.I8 d15,#0x1
+ VMOV.I16 d1,#0x4
+ MOV r7,#0x40000000
+L0x38:
+ LDR r6,[r4],#8
+ VLD1.8 {d6},[r0],r1
+ VLD1.8 {d5},[r0],r1
+ CMP r6,#0
+ VLD1.8 {d4},[r0],r1
+ VLD1.8 {d8},[r0],r1
+ VABD.U8 d19,d6,d4
+ VLD1.8 {d9},[r0],r1
+ VABD.U8 d13,d4,d8
+ VLD1.8 {d10},[r0],r1
+ BEQ L0xe4
+ VABD.U8 d12,d5,d4
+ VABD.U8 d18,d9,d8
+ VCGT.U8 d16,d0,d13
+ VMOV.32 d26[0],r6
+ VMAX.U8 d12,d18,d12
+ VMOVL.U8 q13,d26
+ VABD.U8 d17,d10,d8
+ VCGT.S16 d27,d26,#0
+ VCGT.U8 d12,d2,d12
+ VCGT.U8 d19,d2,d19
+ VAND d16,d16,d27
+ TST r6,r9
+ VCGT.U8 d17,d2,d17
+ VAND d16,d16,d12
+ VAND d12,d16,d17
+ VAND d17,d16,d19
+ BLNE armVCM4P10_DeblockingChromabSLT4_unsafe
+ TST r6,r8
+ SUB r0,r0,r1,LSL #2
+ VTST.16 d26,d26,d1
+ BLNE armVCM4P10_DeblockingChromabSGE4_unsafe
+ VBIT d29,d13,d26
+ VBIT d24,d31,d26
+ VBIF d29,d4,d16
+ VBIF d24,d8,d16
+ VST1.8 {d29},[r0],r1
+ ADDS r7,r7,r7
+ VST1.8 {d24},[r0],r1
+ BNE L0x38
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r10,pc}
+L0xe4:
+ VLD1.8 {d0[]},[r2]
+ SUB r0,r0,r1,LSL #1
+ ADDS r7,r7,r7
+ VLD1.8 {d2[]},[r3]
+ ADD r5,r5,#4
+ BNE L0x38
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r10,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S
new file mode 100644
index 0000000..dcdddbe
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.S
@@ -0,0 +1,123 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global omxVCM4P10_FilterDeblockingChroma_VerEdge_I
+ .func omxVCM4P10_FilterDeblockingChroma_VerEdge_I
+omxVCM4P10_FilterDeblockingChroma_VerEdge_I:
+ PUSH {r4-r12,lr}
+ VPUSH {d8-d15}
+ VLD1.8 {d0[]},[r2]!
+ SUB r0,r0,#4
+ VLD1.8 {d2[]},[r3]!
+ LDR r4,[sp,#0x6c]
+ LDR r5,[sp,#0x68]
+ LDR r8, =0x4040404
+ LDR r9, =0x3030303
+ VMOV.I8 d14,#0
+ VMOV.I8 d15,#0x1
+ VMOV.I16 d1,#0x4
+ MOV r7,#0x40000000
+L0x34:
+ LDR r6,[r4],#8
+ ADD r10,r0,r1
+ ADD lr,r1,r1
+ VLD1.8 {d7},[r0],lr
+ VLD1.8 {d8},[r10],lr
+ VLD1.8 {d5},[r0],lr
+ VLD1.8 {d10},[r10],lr
+ VLD1.8 {d6},[r0],lr
+ VLD1.8 {d9},[r10],lr
+ VLD1.8 {d4},[r0],lr
+ VLD1.8 {d11},[r10],lr
+ VZIP.8 d7,d8
+ VZIP.8 d5,d10
+ VZIP.8 d6,d9
+ VZIP.8 d4,d11
+ VZIP.16 d7,d5
+ VZIP.16 d8,d10
+ VZIP.16 d6,d4
+ VZIP.16 d9,d11
+ VTRN.32 d7,d6
+ VTRN.32 d5,d4
+ VTRN.32 d10,d11
+ VTRN.32 d8,d9
+ CMP r6,#0
+ VABD.U8 d19,d6,d4
+ VABD.U8 d13,d4,d8
+ BEQ L0x170
+ VABD.U8 d12,d5,d4
+ VABD.U8 d18,d9,d8
+ VMOV.32 d26[0],r6
+ VCGT.U8 d16,d0,d13
+ VMAX.U8 d12,d18,d12
+ VMOVL.U8 q13,d26
+ VABD.U8 d17,d10,d8
+ VCGT.S16 d27,d26,#0
+ VCGT.U8 d12,d2,d12
+ VCGT.U8 d19,d2,d19
+ VAND d16,d16,d27
+ TST r6,r9
+ VCGT.U8 d17,d2,d17
+ VAND d16,d16,d12
+ VAND d12,d16,d17
+ VAND d17,d16,d19
+ BLNE armVCM4P10_DeblockingChromabSLT4_unsafe
+ TST r6,r8
+ SUB r0,r0,r1,LSL #3
+ VTST.16 d26,d26,d1
+ BLNE armVCM4P10_DeblockingChromabSGE4_unsafe
+ VBIT d29,d13,d26
+ VBIT d24,d31,d26
+ ADD r10,r0,#3
+ VBIF d29,d4,d16
+ ADD r12,r10,r1
+ ADD lr,r1,r1
+ VBIF d24,d8,d16
+ ADDS r7,r7,r7
+ VST1.8 {d29[0]},[r10],lr
+ VST1.8 {d29[1]},[r12],lr
+ VST1.8 {d29[2]},[r10],lr
+ VST1.8 {d29[3]},[r12],lr
+ VST1.8 {d29[4]},[r10],lr
+ VST1.8 {d29[5]},[r12],lr
+ VST1.8 {d29[6]},[r10],lr
+ VST1.8 {d29[7]},[r12],lr
+ ADD r12,r0,#4
+ ADD r10,r12,r1
+ VST1.8 {d24[0]},[r12],lr
+ VST1.8 {d24[1]},[r10],lr
+ VST1.8 {d24[2]},[r12],lr
+ VST1.8 {d24[3]},[r10],lr
+ VST1.8 {d24[4]},[r12],lr
+ VST1.8 {d24[5]},[r10],lr
+ VST1.8 {d24[6]},[r12],lr
+ VST1.8 {d24[7]},[r10],lr
+ ADD r0,r0,#4
+ BNE L0x34
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+L0x170:
+ VLD1.8 {d0[]},[r2]
+ ADD r0,r0,#4
+ SUB r0,r0,r1,LSL #3
+ ADDS r7,r7,r7
+ VLD1.8 {d2[]},[r3]
+ ADD r5,r5,#4
+ BNE L0x34
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S
new file mode 100644
index 0000000..9755899
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.S
@@ -0,0 +1,107 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global omxVCM4P10_FilterDeblockingLuma_HorEdge_I
+ .func omxVCM4P10_FilterDeblockingLuma_HorEdge_I
+omxVCM4P10_FilterDeblockingLuma_HorEdge_I:
+ PUSH {r4-r12,lr}
+ VPUSH {d8-d15}
+ ADD r7,r2,#1
+ ADD r8,r3,#1
+ VLD1.8 {d0[]},[r2]
+ SUB r0,r0,r1,LSL #2
+ VLD1.8 {d2[]},[r3]
+ LDR r4,[sp,#0x6c]
+ LDR r5,[sp,#0x68]
+ MOV r11,#0
+ VMOV.I8 d14,#0
+ VMOV.I8 d15,#0x1
+ ADD r10,r1,r1
+ MOV r9,#0x55000000
+L0x38:
+ LDRH r12,[r4],#2
+ ADD r6,r0,r1
+ CMP r12,#0
+ BEQ L0xe4
+ VLD1.8 {d7},[r0],r10
+ VLD1.8 {d6},[r6],r10
+ VLD1.8 {d5},[r0],r10
+ VLD1.8 {d4},[r6],r10
+ VLD1.8 {d8},[r0],r10
+ VABD.U8 d12,d4,d5
+ VLD1.8 {d9},[r6]
+ VABD.U8 d13,d8,d4
+ VLD1.8 {d10},[r0],r1
+ VABD.U8 d18,d9,d8
+ VABD.U8 d19,d6,d4
+ VCGT.U8 d16,d0,d13
+ TST r12,#0xff
+ VMAX.U8 d12,d18,d12
+ VABD.U8 d17,d10,d8
+ VMOVEQ.32 d16[0],r11
+ TST r12,#0xff00
+ VCGT.U8 d19,d2,d19
+ VCGT.U8 d12,d2,d12
+ VMOVEQ.32 d16[1],r11
+ VCGT.U8 d17,d2,d17
+ VLD1.8 {d11},[r0]
+ VAND d16,d16,d12
+ TST r12,#4
+ VAND d12,d16,d17
+ VAND d17,d16,d19
+ BNE L0xf8
+ SUB r0,r0,r1,LSL #2
+ SUB r0,r0,r1
+ BL armVCM4P10_DeblockingLumabSLT4_unsafe
+ VST1.8 {d30},[r0],r1
+ VST1.8 {d29},[r0],r1
+ SUB r6,r0,r1,LSL #2
+ VST1.8 {d24},[r0],r1
+ ADDS r9,r9,r9
+ VST1.8 {d25},[r0]
+ ADD r0,r6,#8
+ BCC L0x38
+ B L0x130
+L0xe4:
+ ADD r0,r0,#8
+ ADDS r9,r9,r9
+ ADD r5,r5,#2
+ BCC L0x38
+ B L0x130
+L0xf8:
+ SUB r0,r0,r1,LSL #2
+ SUB r0,r0,r1,LSL #1
+ BL armVCM4P10_DeblockingLumabSGE4_unsafe
+ VST1.8 {d31},[r0],r1
+ VST1.8 {d30},[r0],r1
+ VST1.8 {d29},[r0],r1
+ SUB r6,r0,r1,LSL #2
+ VST1.8 {d24},[r0],r1
+ ADDS r9,r9,r9
+ VST1.8 {d25},[r0],r1
+ ADD r5,r5,#2
+ VST1.8 {d28},[r0]
+ ADD r0,r6,#8
+ BCC L0x38
+L0x130:
+ SUB r0,r0,#0x10
+ VLD1.8 {d0[]},[r7]
+ ADD r0,r0,r1,LSL #2
+ VLD1.8 {d2[]},[r8]
+ BNE L0x38
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S
new file mode 100644
index 0000000..66cc32e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S
@@ -0,0 +1,157 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I
+ .func omxVCM4P10_FilterDeblockingLuma_VerEdge_I
+omxVCM4P10_FilterDeblockingLuma_VerEdge_I:
+ PUSH {r4-r12,lr}
+ VPUSH {d8-d15}
+ ADD r7,r2,#1
+ ADD r8,r3,#1
+ VLD1.8 {d0[]},[r2]
+ SUB r0,r0,#4
+ VLD1.8 {d2[]},[r3]
+ LDR r4,[sp,#0x6c]
+ LDR r5,[sp,#0x68]
+ MOV r6,#0
+ VMOV.I8 d14,#0
+ VMOV.I8 d15,#0x1
+ MOV r9,#0x11000000
+ ADD r11,r1,r1
+L0x38:
+ LDRH r12,[r4],#4
+ CMP r12,#0
+ BEQ L0x160
+ ADD r10,r0,r1
+ VLD1.8 {d7},[r0],r11
+ VLD1.8 {d8},[r10],r11
+ VLD1.8 {d5},[r0],r11
+ VZIP.8 d7,d8
+ VLD1.8 {d10},[r10],r11
+ VLD1.8 {d6},[r0],r11
+ VZIP.8 d5,d10
+ VLD1.8 {d9},[r10],r11
+ VLD1.8 {d4},[r0],r11
+ VLD1.8 {d11},[r10],r11
+ VZIP.8 d6,d9
+ VZIP.16 d8,d10
+ VZIP.8 d4,d11
+ SUB r0,r0,r1,LSL #3
+ VZIP.16 d7,d5
+ VZIP.16 d9,d11
+ VZIP.16 d6,d4
+ VTRN.32 d8,d9
+ VTRN.32 d5,d4
+ VTRN.32 d10,d11
+ VTRN.32 d7,d6
+ VABD.U8 d13,d4,d8
+ VABD.U8 d12,d5,d4
+ VABD.U8 d18,d9,d8
+ VABD.U8 d19,d6,d4
+ TST r12,#0xff
+ VCGT.U8 d16,d0,d13
+ VMAX.U8 d12,d18,d12
+ VABD.U8 d17,d10,d8
+ VMOVEQ.32 d16[0],r6
+ TST r12,#0xff00
+ VCGT.U8 d19,d2,d19
+ VCGT.U8 d12,d2,d12
+ VMOVEQ.32 d16[1],r6
+ VCGT.U8 d17,d2,d17
+ VAND d16,d16,d12
+ TST r12,#4
+ VAND d12,d16,d17
+ VAND d17,d16,d19
+ BNE L0x17c
+ BL armVCM4P10_DeblockingLumabSLT4_unsafe
+ VZIP.8 d7,d6
+ VZIP.8 d30,d29
+ VZIP.8 d24,d25
+ VZIP.8 d10,d11
+ VZIP.16 d7,d30
+ ADD r10,r0,r1
+ VZIP.16 d24,d10
+ VZIP.16 d25,d11
+ VZIP.16 d6,d29
+ VTRN.32 d7,d24
+ VTRN.32 d30,d10
+ VTRN.32 d6,d25
+ VTRN.32 d29,d11
+ VST1.8 {d7},[r0],r11
+ VST1.8 {d24},[r10],r11
+ VST1.8 {d30},[r0],r11
+ VST1.8 {d10},[r10],r11
+ VST1.8 {d6},[r0],r11
+ VST1.8 {d25},[r10],r11
+ ADDS r9,r9,r9
+ VST1.8 {d29},[r0],r11
+ ADD r5,r5,#2
+ VST1.8 {d11},[r10],r1
+ SUB r0,r0,r1,LSL #3
+ VLD1.8 {d0[]},[r7]
+ ADD r0,r0,#4
+ VLD1.8 {d2[]},[r8]
+ BCC L0x38
+ B L0x1f0
+L0x160:
+ ADD r0,r0,#4
+ ADDS r9,r9,r9
+ VLD1.8 {d0[]},[r7]
+ ADD r5,r5,#4
+ VLD1.8 {d2[]},[r8]
+ BCC L0x38
+ B L0x1f0
+L0x17c:
+ BL armVCM4P10_DeblockingLumabSGE4_unsafe
+ VZIP.8 d7,d31
+ VZIP.8 d30,d29
+ VZIP.8 d24,d25
+ VZIP.8 d28,d11
+ VZIP.16 d7,d30
+ ADD r10,r0,r1
+ VZIP.16 d24,d28
+ VZIP.16 d25,d11
+ VZIP.16 d31,d29
+ VTRN.32 d7,d24
+ VTRN.32 d30,d28
+ VTRN.32 d31,d25
+ VTRN.32 d29,d11
+ VST1.8 {d7},[r0],r11
+ VST1.8 {d24},[r10],r11
+ VST1.8 {d30},[r0],r11
+ VST1.8 {d28},[r10],r11
+ VST1.8 {d31},[r0],r11
+ VST1.8 {d25},[r10],r11
+ ADDS r9,r9,r9
+ VST1.8 {d29},[r0],r11
+ ADD r5,r5,#4
+ VST1.8 {d11},[r10],r11
+ SUB r0,r0,r1,LSL #3
+ VLD1.8 {d0[]},[r7]
+ ADD r0,r0,#4
+ VLD1.8 {d2[]},[r8]
+ BCC L0x38
+L0x1f0:
+ SUB r4,r4,#0xe
+ SUB r5,r5,#0xe
+ SUB r0,r0,#0x10
+ VLD1.8 {d0[]},[r2]
+ ADD r0,r0,r1,LSL #3
+ VLD1.8 {d2[]},[r3]
+ BNE L0x38
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S
new file mode 100644
index 0000000..76c3d7d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_InterpolateLuma_s.S
@@ -0,0 +1,323 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global omxVCM4P10_InterpolateLuma
+ .func omxVCM4P10_InterpolateLuma
+omxVCM4P10_InterpolateLuma:
+ PUSH {r4-r12,lr}
+ VPUSH {d8-d15}
+ SUB sp,sp,#0x10
+ LDR r6,[sp,#0x78]
+ LDR r7,[sp,#0x7c]
+ LDR r5,[sp,#0x80]
+ LDR r4,[sp,#0x84]
+ ADD r6,r6,r7,LSL #2
+ ADD r11,sp,#0
+ VMOV.I16 d31,#0x14
+ VMOV.I16 d30,#0x5
+L0x2c:
+ STM r11,{r0-r3}
+ ADD pc,pc,r6,LSL #2
+ B L0x3f0
+ B L0x78
+ B L0xa8
+ B L0xdc
+ B L0x100
+ B L0x134
+ B L0x168
+ B L0x1a8
+ B L0x1f0
+ B L0x234
+ B L0x258
+ B L0x2b0
+ B L0x2d8
+ B L0x330
+ B L0x364
+ B L0x3a8
+ B L0x3f0
+L0x78:
+ ADD r12,r0,r1,LSL #1
+ VLD1.8 {d9},[r0],r1
+ VLD1.8 {d11},[r12],r1
+ VLD1.8 {d10},[r0]
+ VLD1.8 {d12},[r12]
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d9[0]},[r2],r3
+ VST1.32 {d11[0]},[r12],r3
+ VST1.32 {d10[0]},[r2]
+ VST1.32 {d12[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0xa8:
+ SUB r0,r0,#2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD.U8 d22,d22,d14
+ VRHADD.U8 d26,d26,d18
+ VRHADD.U8 d24,d24,d16
+ VRHADD.U8 d28,d28,d20
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d22[0]},[r2],r3
+ VST1.32 {d26[0]},[r12],r3
+ VST1.32 {d24[0]},[r2]
+ VST1.32 {d28[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0xdc:
+ SUB r0,r0,#2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d22[0]},[r2],r3
+ VST1.32 {d26[0]},[r12],r3
+ VST1.32 {d24[0]},[r2]
+ VST1.32 {d28[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x100:
+ SUB r0,r0,#2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD.U8 d22,d22,d15
+ VRHADD.U8 d26,d26,d19
+ VRHADD.U8 d24,d24,d17
+ VRHADD.U8 d28,d28,d21
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d22[0]},[r2],r3
+ VST1.32 {d26[0]},[r12],r3
+ VST1.32 {d24[0]},[r2]
+ VST1.32 {d28[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x134:
+ SUB r0,r0,r1,LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ VRHADD.U8 d0,d0,d9
+ VRHADD.U8 d4,d4,d11
+ VRHADD.U8 d2,d2,d10
+ VRHADD.U8 d6,d6,d12
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d0[0]},[r2],r3
+ VST1.32 {d4[0]},[r12],r3
+ VST1.32 {d2[0]},[r2]
+ VST1.32 {d6[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x168:
+ MOV r8,r0
+ SUB r0,r0,r1,LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ SUB r0,r8,#2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD.U8 d22,d22,d0
+ VRHADD.U8 d26,d26,d4
+ VRHADD.U8 d24,d24,d2
+ VRHADD.U8 d28,d28,d6
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d22[0]},[r2],r3
+ VST1.32 {d26[0]},[r12],r3
+ VST1.32 {d24[0]},[r2]
+ VST1.32 {d28[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x1a8:
+ SUB r0,r0,r1,LSL #1
+ SUB r0,r0,#2
+ BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ VQRSHRUN.S16 d14,q7,#5
+ VQRSHRUN.S16 d16,q8,#5
+ VQRSHRUN.S16 d18,q9,#5
+ VQRSHRUN.S16 d20,q10,#5
+ VRHADD.U8 d0,d0,d14
+ VRHADD.U8 d4,d4,d18
+ VRHADD.U8 d2,d2,d16
+ VRHADD.U8 d6,d6,d20
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d0[0]},[r2],r3
+ VST1.32 {d4[0]},[r12],r3
+ VST1.32 {d2[0]},[r2]
+ VST1.32 {d6[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x1f0:
+ MOV r8,r0
+ ADD r0,r0,#1
+ SUB r0,r0,r1,LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ SUB r0,r8,#2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD.U8 d22,d22,d0
+ VRHADD.U8 d26,d26,d4
+ VRHADD.U8 d24,d24,d2
+ VRHADD.U8 d28,d28,d6
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d22[0]},[r2],r3
+ VST1.32 {d26[0]},[r12],r3
+ VST1.32 {d24[0]},[r2]
+ VST1.32 {d28[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x234:
+ SUB r0,r0,r1,LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d0[0]},[r2],r3
+ VST1.32 {d4[0]},[r12],r3
+ VST1.32 {d2[0]},[r2]
+ VST1.32 {d6[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x258:
+ SUB r0,r0,r1,LSL #1
+ SUB r0,r0,#2
+ BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ VEXT.8 d18,d18,d19,#4
+ VEXT.8 d20,d20,d21,#4
+ VEXT.8 d22,d22,d23,#4
+ VEXT.8 d24,d24,d25,#4
+ VQRSHRUN.S16 d14,q9,#5
+ VQRSHRUN.S16 d16,q10,#5
+ VQRSHRUN.S16 d18,q11,#5
+ VQRSHRUN.S16 d20,q12,#5
+ VRHADD.U8 d0,d0,d14
+ VRHADD.U8 d4,d4,d18
+ VRHADD.U8 d2,d2,d16
+ VRHADD.U8 d6,d6,d20
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d0[0]},[r2],r3
+ VST1.32 {d4[0]},[r12],r3
+ VST1.32 {d2[0]},[r2]
+ VST1.32 {d6[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x2b0:
+ SUB r0,r0,r1,LSL #1
+ SUB r0,r0,#2
+ BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d0[0]},[r2],r3
+ VST1.32 {d4[0]},[r12],r3
+ VST1.32 {d2[0]},[r2]
+ VST1.32 {d6[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x2d8:
+ SUB r0,r0,r1,LSL #1
+ SUB r0,r0,#2
+ BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ VEXT.8 d18,d18,d19,#6
+ VEXT.8 d20,d20,d21,#6
+ VEXT.8 d22,d22,d23,#6
+ VEXT.8 d24,d24,d25,#6
+ VQRSHRUN.S16 d14,q9,#5
+ VQRSHRUN.S16 d16,q10,#5
+ VQRSHRUN.S16 d18,q11,#5
+ VQRSHRUN.S16 d20,q12,#5
+ VRHADD.U8 d0,d0,d14
+ VRHADD.U8 d4,d4,d18
+ VRHADD.U8 d2,d2,d16
+ VRHADD.U8 d6,d6,d20
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d0[0]},[r2],r3
+ VST1.32 {d4[0]},[r12],r3
+ VST1.32 {d2[0]},[r2]
+ VST1.32 {d6[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x330:
+ SUB r0,r0,r1,LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ VRHADD.U8 d0,d0,d10
+ VRHADD.U8 d4,d4,d12
+ VRHADD.U8 d2,d2,d11
+ VRHADD.U8 d6,d6,d13
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d0[0]},[r2],r3
+ VST1.32 {d4[0]},[r12],r3
+ VST1.32 {d2[0]},[r2]
+ VST1.32 {d6[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x364:
+ MOV r8,r0
+ SUB r0,r0,r1,LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ ADD r0,r8,r1
+ SUB r0,r0,#2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD.U8 d22,d22,d0
+ VRHADD.U8 d26,d26,d4
+ VRHADD.U8 d24,d24,d2
+ VRHADD.U8 d28,d28,d6
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d22[0]},[r2],r3
+ VST1.32 {d26[0]},[r12],r3
+ VST1.32 {d24[0]},[r2]
+ VST1.32 {d28[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x3a8:
+ SUB r0,r0,r1,LSL #1
+ SUB r0,r0,#2
+ BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ VQRSHRUN.S16 d14,q8,#5
+ VQRSHRUN.S16 d16,q9,#5
+ VQRSHRUN.S16 d18,q10,#5
+ VQRSHRUN.S16 d20,q11,#5
+ VRHADD.U8 d0,d0,d14
+ VRHADD.U8 d4,d4,d18
+ VRHADD.U8 d2,d2,d16
+ VRHADD.U8 d6,d6,d20
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d0[0]},[r2],r3
+ VST1.32 {d4[0]},[r12],r3
+ VST1.32 {d2[0]},[r2]
+ VST1.32 {d6[0]},[r12]
+ ADD r11,sp,#0
+ B L0x434
+L0x3f0:
+ MOV r8,r0
+ ADD r0,r0,#1
+ SUB r0,r0,r1,LSL #1
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ ADD r0,r8,r1
+ SUB r0,r0,#2
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ VRHADD.U8 d22,d22,d0
+ VRHADD.U8 d26,d26,d4
+ VRHADD.U8 d24,d24,d2
+ VRHADD.U8 d28,d28,d6
+ ADD r12,r2,r3,LSL #1
+ VST1.32 {d22[0]},[r2],r3
+ VST1.32 {d26[0]},[r12],r3
+ VST1.32 {d24[0]},[r2]
+ VST1.32 {d28[0]},[r12]
+ ADD r11,sp,#0
+L0x434:
+ LDM r11,{r0-r3}
+ SUBS r5,r5,#4
+ ADD r0,r0,#4
+ ADD r2,r2,#4
+ BGT L0x2c
+ SUBS r4,r4,#4
+ LDR r5,[sp,#0x80]
+ ADD r11,sp,#0
+ ADD r0,r0,r1,LSL #2
+ ADD r2,r2,r3,LSL #2
+ SUB r0,r0,r5
+ SUB r2,r2,r5
+ BGT L0x2c
+ MOV r0,#0
+ ADD sp,sp,#0x10
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S
new file mode 100644
index 0000000..0d49e4b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntraChroma_8x8_s.S
@@ -0,0 +1,217 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .section .rodata
+ .align 4
+
+armVCM4P10_pIndexTable8x8:
+ .word OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR
+ .word OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE
+
+armVCM4P10_MultiplierTableChroma8x8:
+ .hword 3, 2, 1,4
+ .hword -3,-2,-1,0
+ .hword 1, 2, 3,4
+
+
+ .text
+ .global omxVCM4P10_PredictIntraChroma_8x8
+ .func omxVCM4P10_PredictIntraChroma_8x8
+omxVCM4P10_PredictIntraChroma_8x8:
+ PUSH {r4-r10,lr}
+ VPUSH {d8-d15}
+ LDR r8, =armVCM4P10_pIndexTable8x8
+ LDR r6,[sp,#0x68]
+ LDR r4,[sp,#0x60]
+ LDR r5,[sp,#0x64]
+ LDR r7,[sp,#0x6c]
+ LDR pc,[r8,r6,LSL #2]
+OMX_VC_CHROMA_DC:
+ TST r7,#2
+ BEQ L0xe8
+ ADD r9,r0,r4
+ ADD r10,r4,r4
+ VLD1.8 {d1[0]},[r0],r10
+ VLD1.8 {d1[1]},[r9],r10
+ VLD1.8 {d1[2]},[r0],r10
+ VLD1.8 {d1[3]},[r9],r10
+ VLD1.8 {d1[4]},[r0],r10
+ VLD1.8 {d1[5]},[r9],r10
+ VLD1.8 {d1[6]},[r0],r10
+ VLD1.8 {d1[7]},[r9]
+ TST r7,#1
+ BEQ L0xcc
+ VLD1.8 {d0},[r1]
+ MOV r0,#0
+ VPADDL.U8 d2,d0
+ VPADDL.U16 d3,d2
+ VPADDL.U8 d2,d1
+ VPADDL.U16 d1,d2
+ VADD.I32 d2,d3,d1
+ VRSHR.U32 d2,d2,#3
+ VRSHR.U32 d3,d3,#2
+ VRSHR.U32 d1,d1,#2
+ VMOV.I8 d5,#0xc
+ VMOV.I8 d6,#0x4
+ VSHL.I64 d5,d5,#32
+ VSHR.U64 d6,d6,#32
+ VADD.I8 d6,d6,d5
+ VTBL.8 d0,{d2-d3},d5
+ VTBL.8 d4,{d1-d2},d6
+L0x9c:
+ ADD r9,r3,r5
+ ADD r10,r5,r5
+ VST1.8 {d0},[r3],r10
+ VST1.8 {d0},[r9],r10
+ VST1.8 {d0},[r3],r10
+ VST1.8 {d0},[r9],r10
+ VST1.8 {d4},[r3],r10
+ VST1.8 {d4},[r9],r10
+ VST1.8 {d4},[r3],r10
+ VST1.8 {d4},[r9]
+ VPOP {d8-d15}
+ POP {r4-r10,pc}
+L0xcc:
+ MOV r0,#0
+ VPADDL.U8 d2,d1
+ VPADDL.U16 d1,d2
+ VRSHR.U32 d1,d1,#2
+ VDUP.8 d0,d1[0]
+ VDUP.8 d4,d1[4]
+ B L0x9c
+L0xe8:
+ TST r7,#1
+ BEQ L0x114
+ VLD1.8 {d0},[r1]
+ MOV r0,#0
+ VPADDL.U8 d2,d0
+ VPADDL.U16 d3,d2
+ VRSHR.U32 d3,d3,#2
+ VMOV.I8 d5,#0x4
+ VSHL.I64 d5,d5,#32
+ VTBL.8 d0,{d3},d5
+ B L0x11c
+L0x114:
+ VMOV.I8 d0,#0x80
+ MOV r0,#0
+L0x11c:
+ ADD r9,r3,r5
+ ADD r10,r5,r5
+ VST1.8 {d0},[r3],r10
+ VST1.8 {d0},[r9],r10
+ VST1.8 {d0},[r3],r10
+ VST1.8 {d0},[r9],r10
+ VST1.8 {d0},[r3],r10
+ VST1.8 {d0},[r9],r10
+ VST1.8 {d0},[r3],r10
+ VST1.8 {d0},[r9]
+ VPOP {d8-d15}
+ POP {r4-r10,pc}
+OMX_VC_CHROMA_VERT:
+ VLD1.8 {d0},[r1]
+ MOV r0,#0
+ B L0x11c
+OMX_VC_CHROMA_HOR:
+ ADD r9,r0,r4
+ ADD r10,r4,r4
+ VLD1.8 {d0[]},[r0],r10
+ VLD1.8 {d1[]},[r9],r10
+ VLD1.8 {d2[]},[r0],r10
+ VLD1.8 {d3[]},[r9],r10
+ VLD1.8 {d4[]},[r0],r10
+ VLD1.8 {d5[]},[r9],r10
+ VLD1.8 {d6[]},[r0],r10
+ VLD1.8 {d7[]},[r9]
+ B L0x28c
+OMX_VC_CHROMA_PLANE:
+ ADD r9,r0,r4
+ ADD r10,r4,r4
+ VLD1.8 {d0},[r1]
+ VLD1.8 {d2[0]},[r2]
+ VLD1.8 {d1[0]},[r0],r10
+ VLD1.8 {d1[1]},[r9],r10
+ VLD1.8 {d1[2]},[r0],r10
+ VLD1.8 {d1[3]},[r9],r10
+ VLD1.8 {d1[4]},[r0],r10
+ VLD1.8 {d1[5]},[r9],r10
+ VLD1.8 {d1[6]},[r0],r10
+ VLD1.8 {d1[7]},[r9]
+ VREV64.8 d3,d0
+ VSUBL.U8 q3,d3,d2
+ VSHR.U64 d3,d3,#8
+ VSUBL.U8 q2,d3,d0
+ VREV64.8 d3,d1
+ VSUBL.U8 q7,d3,d2
+ VSHR.U64 d3,d3,#8
+ VSUBL.U8 q6,d3,d1
+ LDR r2, =armVCM4P10_MultiplierTableChroma8x8
+ VSHL.I64 d4,d4,#16
+ VEXT.8 d9,d4,d6,#2
+ VLD1.16 {d10},[r2]!
+ VSHL.I64 d12,d12,#16
+ VEXT.8 d16,d12,d14,#2
+ VMUL.I16 d11,d9,d10
+ VMUL.I16 d3,d16,d10
+ VPADD.I16 d3,d11,d3
+ VPADDL.S16 d3,d3
+ VSHL.I32 d2,d3,#4
+ VADD.I32 d3,d3,d2
+ VLD1.16 {d10,d11},[r2]
+ VRSHR.S32 d3,d3,#5
+ VADDL.U8 q0,d0,d1
+ VDUP.16 q0,d1[3]
+ VSHL.I16 q0,q0,#4
+ VDUP.16 q2,d3[0]
+ VDUP.16 q3,d3[2]
+ VMUL.I16 q2,q2,q5
+ VMUL.I16 q3,q3,q5
+ VADD.I16 q2,q2,q0
+ VDUP.16 q0,d6[0]
+ VDUP.16 q1,d6[1]
+ VDUP.16 q4,d6[2]
+ VDUP.16 q5,d6[3]
+ VDUP.16 q6,d7[0]
+ VDUP.16 q7,d7[1]
+ VDUP.16 q8,d7[2]
+ VDUP.16 q9,d7[3]
+ VADD.I16 q0,q2,q0
+ VADD.I16 q1,q2,q1
+ VADD.I16 q4,q2,q4
+ VADD.I16 q5,q2,q5
+ VADD.I16 q6,q2,q6
+ VADD.I16 q7,q2,q7
+ VADD.I16 q8,q2,q8
+ VADD.I16 q9,q2,q9
+ VQRSHRUN.S16 d0,q0,#5
+ VQRSHRUN.S16 d1,q1,#5
+ VQRSHRUN.S16 d2,q4,#5
+ VQRSHRUN.S16 d3,q5,#5
+ VQRSHRUN.S16 d4,q6,#5
+ VQRSHRUN.S16 d5,q7,#5
+ VQRSHRUN.S16 d6,q8,#5
+ VQRSHRUN.S16 d7,q9,#5
+L0x28c:
+ ADD r9,r3,r5
+ ADD r10,r5,r5
+ VST1.8 {d0},[r3],r10
+ VST1.8 {d1},[r9],r10
+ VST1.8 {d2},[r3],r10
+ VST1.8 {d3},[r9],r10
+ VST1.8 {d4},[r3],r10
+ VST1.8 {d5},[r9],r10
+ VST1.8 {d6},[r3],r10
+ VST1.8 {d7},[r9]
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r10,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S
new file mode 100644
index 0000000..53268f6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_16x16_s.S
@@ -0,0 +1,239 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+
+ .section .rodata
+ .align 4
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+armVCM4P10_pIndexTable16x16:
+ .word OMX_VC_16X16_VERT, OMX_VC_16X16_HOR
+ .word OMX_VC_16X16_DC, OMX_VC_16X16_PLANE
+
+
+
+armVCM4P10_MultiplierTable16x16:
+ .hword 7, 6, 5, 4, 3, 2, 1, 8
+ .hword 0, 1, 2, 3, 4, 5, 6, 7
+ .hword 8, 9, 10, 11, 12, 13, 14, 15
+
+ .text
+
+ .global omxVCM4P10_PredictIntra_16x16
+ .func omxVCM4P10_PredictIntra_16x16
+omxVCM4P10_PredictIntra_16x16:
+ PUSH {r4-r12,lr}
+ VPUSH {d8-d15}
+ LDR r9, =armVCM4P10_pIndexTable16x16
+ LDR r6,[sp,#0x70]
+ LDR r4,[sp,#0x68]
+ LDR r5,[sp,#0x6c]
+ LDR r7,[sp,#0x74]
+ MOV r12,#0x10
+ LDR pc,[r9,r6,LSL #2]
+OMX_VC_16X16_VERT:
+ VLD1.8 {d0,d1},[r1]
+ ADD r8,r3,r5
+ ADD r10,r5,r5
+ VST1.8 {d0,d1},[r3],r10
+ VST1.8 {d0,d1},[r8],r10
+ VST1.8 {d0,d1},[r3],r10
+ VST1.8 {d0,d1},[r8],r10
+ VST1.8 {d0,d1},[r3],r10
+ VST1.8 {d0,d1},[r8],r10
+ VST1.8 {d0,d1},[r3],r10
+ VST1.8 {d0,d1},[r8],r10
+ VST1.8 {d0,d1},[r3],r10
+ VST1.8 {d0,d1},[r8],r10
+ VST1.8 {d0,d1},[r3],r10
+ VST1.8 {d0,d1},[r8],r10
+ VST1.8 {d0,d1},[r3],r10
+ VST1.8 {d0,d1},[r8],r10
+ VST1.8 {d0,d1},[r3]
+ VST1.8 {d0,d1},[r8]
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+OMX_VC_16X16_HOR:
+ ADD r8,r0,r4
+ ADD r4,r4,r4
+ ADD r11,r3,r5
+ ADD r5,r5,r5
+L0x8c:
+ VLD1.8 {d2[],d3[]},[r0],r4
+ VLD1.8 {d0[],d1[]},[r8],r4
+ SUBS r12,r12,#8
+ VST1.8 {d2,d3},[r3],r5
+ VST1.8 {d0,d1},[r11],r5
+ VLD1.8 {d2[],d3[]},[r0],r4
+ VLD1.8 {d0[],d1[]},[r8],r4
+ VST1.8 {d2,d3},[r3],r5
+ VST1.8 {d0,d1},[r11],r5
+ VLD1.8 {d2[],d3[]},[r0],r4
+ VLD1.8 {d0[],d1[]},[r8],r4
+ VST1.8 {d2,d3},[r3],r5
+ VST1.8 {d0,d1},[r11],r5
+ VLD1.8 {d2[],d3[]},[r0],r4
+ VLD1.8 {d0[],d1[]},[r8],r4
+ VST1.8 {d2,d3},[r3],r5
+ VST1.8 {d0,d1},[r11],r5
+ BNE L0x8c
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+OMX_VC_16X16_DC:
+ MOV r11,#0
+ TST r7,#2
+ BEQ L0x14c
+ ADD r8,r0,r4
+ ADD r10,r4,r4
+ VLD1.8 {d2[0]},[r0],r10
+ VLD1.8 {d2[1]},[r8],r10
+ VLD1.8 {d2[2]},[r0],r10
+ VLD1.8 {d2[3]},[r8],r10
+ VLD1.8 {d2[4]},[r0],r10
+ VLD1.8 {d2[5]},[r8],r10
+ VLD1.8 {d2[6]},[r0],r10
+ VLD1.8 {d2[7]},[r8],r10
+ VLD1.8 {d3[0]},[r0],r10
+ VLD1.8 {d3[1]},[r8],r10
+ VLD1.8 {d3[2]},[r0],r10
+ VLD1.8 {d3[3]},[r8],r10
+ VLD1.8 {d3[4]},[r0],r10
+ VLD1.8 {d3[5]},[r8],r10
+ VLD1.8 {d3[6]},[r0],r10
+ VLD1.8 {d3[7]},[r8]
+ VPADDL.U8 q0,q1
+ ADD r11,r11,#1
+ VPADD.I16 d0,d0,d1
+ VPADDL.U16 d0,d0
+ VPADDL.U32 d6,d0
+ VRSHR.U64 d8,d6,#4
+L0x14c:
+ TST r7,#1
+ BEQ L0x170
+ VLD1.8 {d0,d1},[r1]
+ ADD r11,r11,#1
+ VPADDL.U8 q0,q0
+ VPADD.I16 d0,d0,d1
+ VPADDL.U16 d0,d0
+ VPADDL.U32 d7,d0
+ VRSHR.U64 d8,d7,#4
+L0x170:
+ CMP r11,#2
+ BNE L0x180
+ VADD.I64 d8,d7,d6
+ VRSHR.U64 d8,d8,#5
+L0x180:
+ VDUP.8 q3,d8[0]
+ CMP r11,#0
+ ADD r8,r3,r5
+ ADD r10,r5,r5
+ BNE L0x198
+ VMOV.I8 q3,#0x80
+L0x198:
+ VST1.8 {d6,d7},[r3],r10
+ VST1.8 {d6,d7},[r8],r10
+ VST1.8 {d6,d7},[r3],r10
+ VST1.8 {d6,d7},[r8],r10
+ VST1.8 {d6,d7},[r3],r10
+ VST1.8 {d6,d7},[r8],r10
+ VST1.8 {d6,d7},[r3],r10
+ VST1.8 {d6,d7},[r8],r10
+ VST1.8 {d6,d7},[r3],r10
+ VST1.8 {d6,d7},[r8],r10
+ VST1.8 {d6,d7},[r3],r10
+ VST1.8 {d6,d7},[r8],r10
+ VST1.8 {d6,d7},[r3],r10
+ VST1.8 {d6,d7},[r8],r10
+ VST1.8 {d6,d7},[r3],r10
+ VST1.8 {d6,d7},[r8],r10
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+OMX_VC_16X16_PLANE:
+ LDR r9, =armVCM4P10_MultiplierTable16x16
+ VLD1.8 {d0,d1},[r1]
+ VLD1.8 {d4[0]},[r2]
+ ADD r8,r0,r4
+ ADD r10,r4,r4
+ VLD1.8 {d2[0]},[r0],r10
+ VLD1.8 {d2[1]},[r8],r10
+ VLD1.8 {d2[2]},[r0],r10
+ VLD1.8 {d2[3]},[r8],r10
+ VLD1.8 {d2[4]},[r0],r10
+ VLD1.8 {d2[5]},[r8],r10
+ VLD1.8 {d2[6]},[r0],r10
+ VLD1.8 {d2[7]},[r8],r10
+ VLD1.8 {d3[0]},[r0],r10
+ VLD1.8 {d3[1]},[r8],r10
+ VLD1.8 {d3[2]},[r0],r10
+ VLD1.8 {d3[3]},[r8],r10
+ VLD1.8 {d3[4]},[r0],r10
+ VLD1.8 {d3[5]},[r8],r10
+ VLD1.8 {d3[6]},[r0],r10
+ VLD1.8 {d3[7]},[r8]
+ VREV64.8 d5,d1
+ VSUBL.U8 q3,d5,d4
+ VSHR.U64 d5,d5,#8
+ VSUBL.U8 q4,d5,d0
+ VSHL.I64 d9,d9,#16
+ VEXT.8 d9,d9,d6,#2
+ VREV64.8 d12,d3
+ VSUBL.U8 q7,d12,d4
+ VSHR.U64 d12,d12,#8
+ VSUBL.U8 q8,d12,d2
+ VLD1.16 {d20,d21},[r9]!
+ VSHL.I64 d17,d17,#16
+ VEXT.8 d17,d17,d14,#2
+ VMULL.S16 q11,d8,d20
+ VMULL.S16 q12,d16,d20
+ VMLAL.S16 q11,d9,d21
+ VMLAL.S16 q12,d17,d21
+ VPADD.I32 d22,d23,d22
+ VPADD.I32 d23,d25,d24
+ VPADDL.S32 q11,q11
+ VSHL.I64 q12,q11,#2
+ VADD.I64 q11,q11,q12
+ VRSHR.S64 q11,q11,#6
+ VSHL.I64 q12,q11,#3
+ VSUB.I64 q12,q12,q11
+ VLD1.16 {d20,d21},[r9]!
+ VDUP.16 q6,d22[0]
+ VDUP.16 q7,d23[0]
+ VADDL.U8 q11,d1,d3
+ VSHL.I16 q11,q11,#4
+ VDUP.16 q11,d23[3]
+ VADD.I64 d1,d24,d25
+ VLD1.16 {d24,d25},[r9]
+ VDUP.16 q13,d1[0]
+ VSUB.I16 q13,q11,q13
+ VMUL.I16 q5,q6,q10
+ VMUL.I16 q6,q6,q12
+ VADD.I16 q0,q5,q13
+ VADD.I16 q1,q6,q13
+L0x2d4:
+ VQRSHRUN.S16 d6,q0,#5
+ VQRSHRUN.S16 d7,q1,#5
+ SUBS r12,r12,#1
+ VST1.8 {d6,d7},[r3],r5
+ VADD.I16 q0,q0,q7
+ VADD.I16 q1,q1,q7
+ BNE L0x2d4
+ MOV r0,#0
+ VPOP {d8-d15}
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S
new file mode 100644
index 0000000..aa6d7ef
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_PredictIntra_4x4_s.S
@@ -0,0 +1,261 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+
+ .section .rodata
+ .align 4
+
+armVCM4P10_pSwitchTable4x4:
+ .word OMX_VC_4x4_VERT, OMX_VC_4x4_HOR
+ .word OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL
+ .word OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR
+ .word OMX_VC_4x4_HD, OMX_VC_4x4_VL
+ .word OMX_VC_4x4_HU
+
+ .text
+
+ .global omxVCM4P10_PredictIntra_4x4
+ .func omxVCM4P10_PredictIntra_4x4
+omxVCM4P10_PredictIntra_4x4:
+ PUSH {r4-r12,lr}
+ VPUSH {d8-d12}
+ LDR r8, =armVCM4P10_pSwitchTable4x4
+ LDRD r6,r7,[sp,#0x58]
+ LDRD r4,r5,[sp,#0x50]
+ LDR pc,[r8,r6,LSL #2]
+OMX_VC_4x4_HOR:
+ ADD r9,r0,r4
+ ADD r10,r4,r4
+ VLD1.8 {d0[]},[r0],r10
+ VLD1.8 {d1[]},[r9],r10
+ VLD1.8 {d2[]},[r0]
+ VLD1.8 {d3[]},[r9]
+ ADD r11,r3,r5
+ ADD r12,r5,r5
+ VST1.32 {d0[0]},[r3],r12
+ VST1.32 {d1[0]},[r11],r12
+ VST1.32 {d2[0]},[r3]
+ VST1.32 {d3[0]},[r11]
+ B L0x348
+OMX_VC_4x4_VERT:
+ VLD1.32 {d0[0]},[r1]
+ ADD r11,r3,r5
+ ADD r12,r5,r5
+L0x58:
+ VST1.32 {d0[0]},[r3],r12
+ VST1.32 {d0[0]},[r11],r12
+ VST1.32 {d0[0]},[r3]
+ VST1.32 {d0[0]},[r11]
+ B L0x348
+OMX_VC_4x4_DC:
+ TST r7,#2
+ BEQ L0xdc
+ ADD r9,r0,r4
+ ADD r10,r4,r4
+ VLD1.8 {d0[0]},[r0],r10
+ VLD1.8 {d0[1]},[r9],r10
+ VLD1.8 {d0[2]},[r0]
+ VLD1.8 {d0[3]},[r9]
+ TST r7,#1
+ BEQ L0xbc
+ VLD1.32 {d0[1]},[r1]
+ MOV r0,#0
+ VPADDL.U8 d1,d0
+ VPADDL.U16 d1,d1
+ VPADDL.U32 d1,d1
+ VRSHR.U64 d1,d1,#3
+ ADD r11,r3,r5
+ ADD r12,r5,r5
+ VDUP.8 d0,d1[0]
+ B L0x58
+L0xbc:
+ MOV r0,#0
+ VPADDL.U8 d1,d0
+ VPADDL.U16 d1,d1
+ VRSHR.U32 d1,d1,#2
+ ADD r11,r3,r5
+ ADD r12,r5,r5
+ VDUP.8 d0,d1[0]
+ B L0x58
+L0xdc:
+ TST r7,#1
+ BEQ L0x108
+ VLD1.32 {d0[0]},[r1]
+ MOV r0,#0
+ VPADDL.U8 d1,d0
+ VPADDL.U16 d1,d1
+ VRSHR.U32 d1,d1,#2
+ ADD r11,r3,r5
+ ADD r12,r5,r5
+ VDUP.8 d0,d1[0]
+ B L0x58
+L0x108:
+ VMOV.I8 d0,#0x80
+ MOV r0,#0
+ ADD r11,r3,r5
+ ADD r12,r5,r5
+ B L0x58
+OMX_VC_4x4_DIAG_DL:
+ TST r7,#0x40
+ BEQ L0x138
+ VLD1.8 {d3},[r1]
+ VDUP.8 d2,d3[7]
+ VEXT.8 d4,d3,d2,#1
+ VEXT.8 d5,d3,d2,#2
+ B L0x14c
+L0x138:
+ VLD1.32 {d0[1]},[r1]
+ VDUP.8 d2,d0[7]
+ VEXT.8 d3,d0,d2,#4
+ VEXT.8 d4,d0,d2,#5
+ VEXT.8 d5,d0,d2,#6
+L0x14c:
+ VHADD.U8 d6,d3,d5
+ VRHADD.U8 d6,d6,d4
+ VST1.32 {d6[0]},[r3],r5
+ VEXT.8 d6,d6,d6,#1
+ VST1.32 {d6[0]},[r3],r5
+ VEXT.8 d6,d6,d6,#1
+ VST1.32 {d6[0]},[r3],r5
+ VEXT.8 d6,d6,d6,#1
+ VST1.32 {d6[0]},[r3]
+ B L0x348
+OMX_VC_4x4_DIAG_DR:
+ VLD1.32 {d0[0]},[r1]
+ VLD1.8 {d1[7]},[r2]
+ ADD r9,r0,r4
+ ADD r10,r4,r4
+ ADD r1,r3,r5
+ VLD1.8 {d1[6]},[r0],r10
+ VLD1.8 {d1[5]},[r9],r10
+ VLD1.8 {d1[4]},[r0]
+ VLD1.8 {d1[3]},[r9]
+ VEXT.8 d3,d1,d0,#3
+ ADD r4,r1,r5
+ VEXT.8 d4,d1,d0,#4
+ ADD r6,r4,r5
+ VEXT.8 d5,d1,d0,#5
+ VHADD.U8 d6,d3,d5
+ VRHADD.U8 d6,d6,d4
+ VST1.32 {d6[0]},[r6]
+ VEXT.8 d6,d6,d6,#1
+ VST1.32 {d6[0]},[r4]
+ VEXT.8 d6,d6,d6,#1
+ VST1.32 {d6[0]},[r1]
+ VEXT.8 d6,d6,d6,#1
+ VST1.32 {d6[0]},[r3]
+ B L0x348
+OMX_VC_4x4_VR:
+ VLD1.32 {d0[0]},[r1]
+ VLD1.8 {d0[7]},[r2]
+ VLD1.8 {d1[7]},[r0],r4
+ VLD1.8 {d2[7]},[r0],r4
+ VLD1.8 {d1[6]},[r0]
+ VEXT.8 d12,d0,d0,#7
+ VEXT.8 d3,d1,d12,#6
+ VEXT.8 d4,d2,d12,#7
+ VEXT.8 d5,d1,d0,#7
+ VEXT.8 d6,d2,d0,#7
+ VEXT.8 d11,d1,d12,#7
+ VHADD.U8 d8,d6,d12
+ VRHADD.U8 d8,d8,d11
+ VHADD.U8 d7,d3,d5
+ VRHADD.U8 d7,d7,d4
+ VEXT.8 d10,d8,d8,#1
+ ADD r11,r3,r5
+ ADD r12,r5,r5
+ VEXT.8 d9,d7,d7,#1
+ VST1.32 {d10[0]},[r3],r12
+ VST1.32 {d9[0]},[r11],r12
+ VST1.32 {d8[0]},[r3],r12
+ VST1.32 {d7[0]},[r11]
+ B L0x348
+OMX_VC_4x4_HD:
+ VLD1.8 {d0},[r1]
+ VLD1.8 {d1[7]},[r2]
+ ADD r9,r0,r4
+ ADD r10,r4,r4
+ VLD1.8 {d1[6]},[r0],r10
+ VLD1.8 {d1[5]},[r9],r10
+ VLD1.8 {d1[4]},[r0]
+ VLD1.8 {d1[3]},[r9]
+ VEXT.8 d3,d1,d0,#3
+ VEXT.8 d4,d1,d0,#2
+ VEXT.8 d5,d1,d0,#1
+ VHADD.U8 d7,d3,d5
+ VRHADD.U8 d7,d7,d4
+ VRHADD.U8 d8,d4,d3
+ VSHL.I64 d8,d8,#24
+ VSHL.I64 d6,d7,#16
+ VZIP.8 d8,d6
+ VEXT.8 d7,d7,d7,#6
+ VEXT.8 d8,d6,d7,#2
+ ADD r11,r3,r5
+ ADD r12,r5,r5
+ VST1.32 {d8[1]},[r3],r12
+ VST1.32 {d6[1]},[r11],r12
+ VST1.32 {d8[0]},[r3]
+ VST1.32 {d6[0]},[r11]
+ B L0x348
+OMX_VC_4x4_VL:
+ TST r7,#0x40
+ BEQ L0x2b4
+ VLD1.8 {d3},[r1]
+ VEXT.8 d4,d3,d3,#1
+ VEXT.8 d5,d4,d4,#1
+ B L0x2c8
+L0x2b4:
+ VLD1.32 {d0[1]},[r1]
+ VDUP.8 d2,d0[7]
+ VEXT.8 d3,d0,d2,#4
+ VEXT.8 d4,d0,d2,#5
+ VEXT.8 d5,d0,d2,#6
+L0x2c8:
+ VRHADD.U8 d7,d4,d3
+ VHADD.U8 d10,d3,d5
+ VRHADD.U8 d10,d10,d4
+ VEXT.8 d8,d7,d7,#1
+ ADD r11,r3,r5
+ ADD r12,r5,r5
+ VEXT.8 d9,d10,d8,#1
+ VST1.32 {d7[0]},[r3],r12
+ VST1.32 {d10[0]},[r11],r12
+ VST1.32 {d8[0]},[r3]
+ VST1.32 {d9[0]},[r11]
+ B L0x348
+OMX_VC_4x4_HU:
+ ADD r9,r0,r4
+ ADD r10,r4,r4
+ VLD1.8 {d1[4]},[r0],r10
+ VLD1.8 {d1[5]},[r9],r10
+ VLD1.8 {d1[6]},[r0]
+ VLD1.8 {d1[7]},[r9]
+ VDUP.8 d2,d1[7]
+ VEXT.8 d3,d1,d2,#4
+ VEXT.8 d4,d1,d2,#5
+ VEXT.8 d5,d1,d2,#6
+ VHADD.U8 d7,d3,d5
+ VRHADD.U8 d7,d7,d4
+ VRHADD.U8 d8,d4,d3
+ VZIP.8 d8,d7
+ VST1.32 {d8[0]},[r3],r5
+ VEXT.8 d8,d8,d8,#2
+ VST1.32 {d8[0]},[r3],r5
+ VEXT.8 d8,d8,d8,#2
+ VST1.32 {d8[0]},[r3],r5
+ VST1.32 {d7[0]},[r3]
+L0x348:
+ MOV r0,#0
+ VPOP {d8-d12}
+ POP {r4-r12,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S
new file mode 100644
index 0000000..28a89cb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantChromaDCFromPair_s.S
@@ -0,0 +1,54 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global omxVCM4P10_TransformDequantChromaDCFromPair
+ .func omxVCM4P10_TransformDequantChromaDCFromPair
+omxVCM4P10_TransformDequantChromaDCFromPair:
+ push {r4-r10, lr}
+ ldr r9, [r0,#0]
+ vmov.i16 d0, #0
+ mov r8, #0x1f
+ vst1.16 {d0}, [r1]
+ ldrb r6, [r9], #1
+unpackLoop:
+ tst r6, #0x10
+ ldrnesb r5, [r9, #1]
+ ldrneb r4, [r9], #2
+ and r7, r8, r6, lsl #1
+ ldreqsb r4, [r9], #1
+ orrne r4, r4, r5, lsl #8
+ tst r6, #0x20
+ ldreqb r6, [r9], #1
+ strh r4, [r1, r7]
+ beq unpackLoop
+ ldmia r1, {r3, r4}
+ str r9, [r0, #0]
+ ldr r5, =armVCM4P10_QPDivTable
+ ldr r6, =armVCM4P10_VMatrixQPModTable
+ saddsubx r3, r3, r3
+ saddsubx r4, r4, r4
+ ldrsb r9, [r5, r2]
+ ldrsb r2, [r6, r2]
+ sadd16 r5, r3, r4
+ ssub16 r6, r3, r4
+ lsl r2, r2, r9
+ vmov d0, r5, r6
+ vrev32.16 d0, d0
+ vdup.16 d1, r2
+ vmull.s16 q1, d0, d1
+ vshrn.i32 d2, q1, #1
+ vst1.16 {d2}, [r1]
+ mov r0, #0
+ pop {r4-r10, pc}
+ .endfunc
+
+ .end
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S
new file mode 100644
index 0000000..a3a0715
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S
@@ -0,0 +1,76 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_InvTransformDequantLumaDC4x4
+ .func armVCM4P10_InvTransformDequantLumaDC4x4
+armVCM4P10_InvTransformDequantLumaDC4x4:
+ PUSH {r4-r6,lr}
+ VPUSH {d8-d13}
+ VLD4.16 {d0,d1,d2,d3},[r0]
+ LDR r2, =armVCM4P10_QPDivTable
+ LDR r3, =armVCM4P10_VMatrixQPModTable
+ VADD.I16 d4,d0,d1
+ VADD.I16 d5,d2,d3
+ VSUB.I16 d6,d0,d1
+ LDRSB r4,[r2,r1]
+ VSUB.I16 d7,d2,d3
+ LDRSB r5,[r3,r1]
+ VADD.I16 d0,d4,d5
+ VSUB.I16 d1,d4,d5
+ VSUB.I16 d2,d6,d7
+ LSL r5,r5,r4
+ VADD.I16 d3,d6,d7
+ VTRN.16 d0,d1
+ VTRN.16 d2,d3
+ VTRN.32 q0,q1
+ VADD.I16 d4,d0,d1
+ VADD.I16 d5,d2,d3
+ VSUB.I16 d6,d0,d1
+ VSUB.I16 d7,d2,d3
+ VADD.I16 d0,d4,d5
+ VSUB.I16 d1,d4,d5
+ VSUB.I16 d2,d6,d7
+ VADD.I16 d3,d6,d7
+ VDUP.16 d5,r5
+ VMOV.I32 q3,#0x2
+ VMOV.I32 q4,#0x2
+ VMOV.I32 q5,#0x2
+ VMOV.I32 q6,#0x2
+ VMLAL.S16 q3,d0,d5
+ VMLAL.S16 q4,d1,d5
+ VMLAL.S16 q5,d2,d5
+ VMLAL.S16 q6,d3,d5
+ VSHRN.I32 d0,q3,#2
+ VSHRN.I32 d1,q4,#2
+ VSHRN.I32 d2,q5,#2
+ VSHRN.I32 d3,q6,#2
+ VST1.16 {d0,d1,d2,d3},[r0]
+ VPOP {d8-d13}
+ POP {r4-r6,pc}
+ .endfunc
+
+.global omxVCM4P10_TransformDequantLumaDCFromPair
+.func omxVCM4P10_TransformDequantLumaDCFromPair
+omxVCM4P10_TransformDequantLumaDCFromPair:
+ PUSH {r4-r6,lr}
+ MOV r4,r1
+ MOV r5,r2
+ BL armVCM4P10_UnpackBlock4x4
+ MOV r0,r4
+ MOV r1,r5
+ BL armVCM4P10_InvTransformDequantLumaDC4x4
+ MOV r0,#0
+ POP {r4-r6,pc}
+ .endfunc
+
+ .end
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
new file mode 100755
index 0000000..74b5505
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
@@ -0,0 +1,37 @@
+/**
+ *
+ * File Name: armVCM4P2_Huff_Tables_VLC.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_Huff_Tables.h
+ * Description: Declares Tables used for Hufffman coding and decoding
+ * in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXHUFFTAB_H_
+#define _OMXHUFFTAB_H_
+
+
+extern const OMX_U16 armVCM4P2_IntraVlcL0L1[200];
+
+
+extern const OMX_U16 armVCM4P2_InterVlcL0L1[200];
+
+extern const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64];
+//extern const OMX_U16 armVCM4P2_aIntraDCChromaIndex[32];
+extern const OMX_U16 armVCM4P2_aVlcMVD[124];
+
+extern const OMX_U8 armVCM4P2_InterL0L1LMAX[73];
+extern const OMX_U8 armVCM4P2_InterL0L1RMAX[35];
+extern const OMX_U8 armVCM4P2_IntraL0L1LMAX[53];
+extern const OMX_U8 armVCM4P2_IntraL0L1RMAX[40]
+
+#endif /* _OMXHUFFTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
new file mode 100755
index 0000000..e95203a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
@@ -0,0 +1,25 @@
+/**
+ *
+ * File Name: armVCM4P2_ZigZag_Tables.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_Zigzag_Tables.h
+ * Description: Declares Tables used for Zigzag scan in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXZIGZAGTAB_H
+#define _OMXZIGZAGTAB_H
+
+extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [192];
+//extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64];
+//extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64];
+
+#endif /* _OMXZIGZAGTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Clip8_s.s
new file mode 100755
index 0000000..95fe6d2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Clip8_s.s
@@ -0,0 +1,82 @@
+; /**
+; *
+; * File Name: armVCM4P2_Clip8_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 12290
+; * Date: Wednesday, April 9, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains module for Clipping 16 bit value to [0,255] Range
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+
+ M_VARIANTS CortexA8
+
+ IF CortexA8
+;//Input Arguments
+
+pSrc RN 0
+pDst RN 1
+step RN 2
+
+;// Neon Registers
+
+qx0 QN Q0.S16
+dx00 DN D0.S16
+dx01 DN D1.S16
+qx1 QN Q1.S16
+dx10 DN D2.S16
+dx11 DN D3.S16
+
+qx2 QN Q2.S16
+dx20 DN D4.S16
+dx21 DN D5.S16
+qx3 QN Q3.S16
+dx30 DN D6.S16
+dx31 DN D7.S16
+
+
+dclip0 DN D0.U8
+dclip1 DN D2.U8
+dclip2 DN D4.U8
+dclip3 DN D6.U8
+
+ M_START armVCM4P2_Clip8
+
+ VLD1 {dx00,dx01,dx10,dx11},[pSrc]! ;// Load 16 entries from pSrc
+ VLD1 {dx20,dx21,dx30,dx31},[pSrc]! ;// Load next 16 entries from pSrc
+ VQSHRUN dclip0,qx0,#0 ;// dclip0[i]=clip qx0[i] to [0,255]
+ VQSHRUN dclip1,qx1,#0 ;// dclip1[i]=clip qx1[i] to [0,255]
+ VST1 {dclip0},[pDst],step ;// store 8 bytes and pDst=pDst+step
+ VST1 {dclip1},[pDst],step ;// store 8 bytes and pDst=pDst+step
+ VQSHRUN dclip2,qx2,#0
+ VQSHRUN dclip3,qx3,#0
+ VST1 {dclip2},[pDst],step
+ VST1 {dclip3},[pDst],step
+
+ VLD1 {dx00,dx01,dx10,dx11},[pSrc]! ;// Load 16 entries from pSrc
+ VLD1 {dx20,dx21,dx30,dx31},[pSrc]! ;// Load next 16 entries from pSrc
+ VQSHRUN dclip0,qx0,#0 ;// dclip0[i]=clip qx0[i] to [0,255]
+ VQSHRUN dclip1,qx1,#0 ;// dclip1[i]=clip qx1[i] to [0,255]
+ VST1 {dclip0},[pDst],step ;// store 8 bytes and pDst=pDst+step
+ VST1 {dclip1},[pDst],step ;// store 8 bytes and pDst=pDst+step
+ VQSHRUN dclip2,qx2,#0
+ VQSHRUN dclip3,qx3,#0
+ VST1 {dclip2},[pDst],step
+ VST1 {dclip3},[pDst],step
+
+
+
+ M_END
+ ENDIF
+
+
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
new file mode 100755
index 0000000..e4a7f33
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
@@ -0,0 +1,398 @@
+;/**
+; *
+; * File Name: armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 12290
+; * Date: Wednesday, April 9, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter, intra block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan
+; *
+; *
+; *
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+
+
+
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+shortVideoHeader RN 3
+
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+
+ftype RN 0
+temp3 RN 4
+temp RN 5
+Count RN 6
+Escape RN 5
+
+;// armVCM4P2_FillVLDBuffer
+zigzag RN 0
+storeLevel RN 1
+temp2 RN 4
+temp1 RN 5
+sign RN 5
+Last RN 7
+storeRun RN 14
+
+
+packRetIndex RN 5
+
+
+markerbit RN 5
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitBuffer RN 9
+RBitCount RN 10
+
+T1 RN 11
+T2 RN 12
+LR RN 14
+
+
+
+ M_ALLOC4 pppBitStream,4
+ M_ALLOC4 ppOffset,4
+ M_ALLOC4 pLinkRegister,4
+
+ M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+ ;// get the table addresses from stack
+ M_ARG ppVlcTableL0L1,4
+ M_ARG ppLMAXTableL0L1,4
+ M_ARG ppRMAXTableL0L1,4
+ M_ARG ppZigzagTable,4
+
+ ;// Store ALL zeros at pDst
+
+ MOV temp1,#0 ;// Initialize Count to zero
+ MOV Last,#0
+ M_STR LR,pLinkRegister ;// Store Link Register on Stack
+ MOV temp2,#0
+ MOV LR,#0
+
+ ;// Initialize the Macro and Store all zeros to pDst
+
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT1 T1, T2, T2
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT2 T1, T2, T2
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_STR ppBitStream,pppBitStream ;// Store ppBitstream on stack
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_STR pBitOffset,ppOffset ;// Store pBitOffset on stack
+ STM pDst!,{temp2,temp1,Last,LR}
+
+ STM pDst!,{temp2,temp1,Last,LR}
+ STM pDst!,{temp2,temp1,Last,LR}
+
+
+ SUB pDst,pDst,#128 ;// Restore pDst
+
+ ;// The armVCM4P2_GetVLCBits begins
+
+getVLCbits
+
+ M_BD_LOOK8 Escape,7 ;// Load Escape Value
+ LSR Escape,Escape,#25
+ CMP Escape,#3 ;// check for escape mode
+ MOVNE ftype,#0
+ BNE notEscapemode ;// Branch if not in Escape mode 3
+
+ M_BD_VSKIP8 #7,T1
+ CMP shortVideoHeader,#0 ;// Check shortVideoHeader flag to know the type of Escape mode
+ BEQ endFillVLD
+
+ ;// Escape Mode 4
+
+ M_BD_READ8 Last,1,T1
+ M_BD_READ8 storeRun,6,T1
+ M_BD_READ8 storeLevel,8,T1
+
+
+ ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so
+
+ TEQ storeLevel,#0
+ TEQNE storeLevel,#128
+ BEQ ExitError
+
+ ADD temp2,storeRun,Count
+ CMP temp2,#64
+ BGE ExitError ;// error if Count+storeRun >= 64
+
+
+ ;// Load address of zigzagTable
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Loading the Address of Zigzag table
+
+
+ ;// armVCM4P2_FillVLDBuffer
+
+ SXTB storeLevel,storeLevel ;// Sign Extend storeLevel to 32 bits
+
+
+ ;// To Reflect Runlength
+
+ ADD Count,Count,storeRun
+ LDRB zigzag,[pZigzagTable,Count]
+ ADD Count,Count,#1
+ STRH storeLevel,[pDst,zigzag] ;// store Level
+
+ B ExitOk
+
+
+
+endFillVLD
+
+
+ ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream
+
+ M_BD_READ8 temp1,1,T1
+ CMP temp1,#0
+ MOVEQ ftype,#1
+ BEQ notEscapemode
+ M_BD_READ8 temp1,1,T1
+ CMP temp1,#1
+ MOVEQ ftype,#3
+ MOVNE ftype,#2
+
+
+notEscapemode
+
+ ;// Load optimized packed VLC table with last=0 and Last=1
+
+ M_LDR pVlcTableL0L1,ppVlcTableL0L1 ;// Load Combined VLC Table
+
+
+ CMP ftype,#3 ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3)
+ BGE EscapeMode3 ;// Else continue normal VLC Decoding
+
+ ;// Variable lengh decoding, "armUnPackVLC32"
+
+
+ M_BD_VLD packRetIndex,T1,T2,pVlcTableL0L1,4,2
+
+
+ LDR temp3,=0xFFF
+
+ CMP packRetIndex,temp3 ;// Check for invalid symbol
+ BEQ ExitError ;// if invalid symbol occurs exit with an error message
+
+ AND Last,packRetIndex,#2 ;// Get Last from packed Index
+
+
+
+
+ LSR storeRun,packRetIndex,#7 ;// Get Run Value from Packed index
+ AND storeLevel,packRetIndex,#0x7c ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0
+
+
+ M_LDR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Load LMAX table
+
+
+ LSR storeLevel,storeLevel,#2 ;// Level value
+
+ CMP ftype,#1
+ BNE ftype2
+
+ ;// ftype==1; Escape mode =1
+
+
+ ADD temp1, pLMAXTableL0L1, Last, LSL#4 ;// If the Last=1 add 32 to table address
+ LDRB temp1,[temp1,storeRun]
+
+
+ ADD storeLevel,temp1,storeLevel
+
+ftype2
+
+ ;// ftype =2; Escape mode =2
+
+ M_LDR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Load RMAX Table
+
+ CMP ftype,#2
+ BNE FillVLDL1
+
+ ADD temp1, pRMAXTableL0L1, Last, LSL#4 ;// If Last=1 add 32 to table address
+ SUB temp2,storeLevel,#1
+ LDRB temp1,[temp1,temp2]
+
+
+ ADD storeRun,storeRun,#1
+ ADD storeRun,temp1
+
+FillVLDL1
+
+
+ ;// armVCM4P2_FillVLDBuffer
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable
+
+ M_BD_READ8 sign,1,T1
+
+ CMP sign,#1
+ RSBEQ storeLevel,storeLevel,#0
+
+ ADD temp1,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63
+ CMP temp1,#64
+ BGE ExitError
+
+
+
+
+
+
+ ;// To Reflect Runlenght
+
+ ADD Count,Count,storeRun
+
+storeLevelL1
+
+ LDRB zigzag,[pZigzagTable,Count]
+ CMP Last,#2 ;// Check if the Level val is Last non zero val
+ ADD Count,Count,#1
+ LSR Last,Last,#1
+ STRH storeLevel,[pDst,zigzag]
+
+ BNE end
+
+ B ExitOk
+
+
+
+ ;// Fixed Lengh Decoding Escape Mode 3
+
+EscapeMode3
+
+ M_BD_READ8 Last,1,T1
+ M_BD_READ8 storeRun,6,T1
+
+ ADD temp2,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63
+ CMP temp2,#64
+ BGE ExitError
+
+ M_BD_READ8 markerbit,1,T1
+ TEQ markerbit,#0 ;// Exit with an error message if marker bit is zero
+ BEQ ExitError
+
+ M_BD_READ16 storeLevel,12,T1
+
+ TST storeLevel,#0x800 ;// test if the level is negative
+ SUBNE storeLevel,storeLevel,#4096
+ CMP storeLevel,#0
+ CMPNE storeLevel,#-2048
+ BEQ ExitError ;// Exit with an error message if Level==0 or -2048
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable
+
+ M_BD_READ8 markerbit,1,T1
+
+
+ ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed)
+
+
+
+ ;// To Reflect Run Length
+
+ ADD Count,Count,storeRun
+
+
+
+storeLevelLast
+
+ LDRB zigzag,[pZigzagTable,Count]
+ CMP Last,#1
+ ADD Count,Count,#1
+ STRH storeLevel,[pDst,zigzag]
+
+ BNE end
+
+ B ExitOk
+
+end
+
+ CMP Count,#64 ;//Run the Loop untill Count reaches 64
+
+ BLT getVLCbits
+
+
+ExitOk
+ ;// Exit When VLC Decoding is done Successfully
+
+ ;// Loading ppBitStream and pBitOffset from stack
+
+ CMP Last,#1
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppOffset
+
+ ;//Ending the macro
+
+ M_BD_FINI ppBitStream,pBitOffset
+
+ MOVEQ Return,#OMX_Sts_NoErr
+ MOVNE Return,#OMX_Sts_Err
+ M_LDR LR,pLinkRegister ;// Load the Link Register Back
+ B exit2
+
+ExitError
+ ;// Exit When an Error occurs
+
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppOffset
+ ;//Ending the macro
+
+ M_BD_FINI ppBitStream,pBitOffset
+ M_LDR LR,pLinkRegister
+ MOV Return,#OMX_Sts_Err
+
+exit2
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
new file mode 100755
index 0000000..38af975
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
@@ -0,0 +1,211 @@
+ /**
+ *
+ * File Name: armVCM4P2_Huff_Tables_VLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_Huff_Tables_VLC.c
+ * Description: Contains all the Huffman tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM_Bitstream.h"
+
+
+
+
+// Contains optimized and Packed VLC tables with Last=0 and Last=1
+
+// optimized Packed VLC table Entry Format
+// ---------------------------------------
+//
+// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+// +------------------------------------------------+
+// | Len | Run | Level |L | 1 |
+// +------------------------------------------------+
+// | Offset | 0 |
+// +------------------------------------------------+
+// If the table entry is a leaf entry then bit 0 set:
+// Len = Number of bits overread (0 to 7) 3 bits
+// Run = RunLength of the Symbol (0 to 63) 6 bits
+// Level = Level of the Symbol (0 to 31) 5 bits
+// L = Last Value of the Symbol (0 or 1) 1 bit
+//
+// If the table entry is an internal node then bit 0 is clear:
+// Offset = Number of (16-bit) half words from the table
+// start to the next table node
+//
+// The table is accessed by successive lookup up on the
+// next Step bits of the input bitstream until a leaf node
+// is obtained. The Step sizes are supplied to the VLD macro.
+
+// The VLC tables used for Intra and non inta coefficients in non Escape mode
+// contains symbols with both Last=0 and Last=1.
+// If a symbol is not found in the table it will be coded as 0xFFF
+
+
+const OMX_U16 armVCM4P2_InterVlcL0L1[200] = {
+ 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011,
+ 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+ 0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d,
+ 0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d,
+ 0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121,
+ 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809,
+ 0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d,
+ 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519,
+ 0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d,
+ 0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d,
+ 0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d,
+ 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015,
+ 0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309,
+ 0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140,
+ 0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09,
+ 0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031,
+ 0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09,
+ 0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168,
+ 0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d,
+ 0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021,
+ 0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609,
+ 0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309
+};
+
+
+const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = {
+ 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019,
+ 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+ 0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035,
+ 0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09,
+ 0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099,
+ 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911,
+ 0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d,
+ 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149,
+ 0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615,
+ 0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d,
+ 0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025,
+ 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09,
+ 0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319,
+ 0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140,
+ 0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081,
+ 0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069,
+ 0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311,
+ 0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168,
+ 0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d,
+ 0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049,
+ 0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039,
+ 0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021
+};
+
+const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = {
+ 0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001,
+ 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011,
+ 0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019,
+
+ 0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005,
+ 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001,
+ 0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f,
+ 0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017
+};
+
+
+const OMX_U16 armVCM4P2_aVlcMVD[124] = {
+ 0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041,
+ 0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0,
+ 0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040,
+ 0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005,
+ 0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068,
+ 0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b,
+ 0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f,
+ 0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017,
+ 0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b,
+ 0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023,
+ 0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8,
+ 0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d,
+ 0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031,
+ 0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039,
+ 0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037,
+ 0x2045, 0x2045, 0x203d, 0x203d
+};
+
+/* LMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_InterL0L1LMAX[73] =
+{
+ 12, 6, 4, 3, 3, 3, 3, 2,
+ 2, 2, 2, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0, 0,
+ 3, 2, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1
+};
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_InterL0L1RMAX[35] =
+{
+ 26, 10, 6, 2, 1, 1,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0, 40, 1, 0
+};
+
+/* LMAX table for non Intra (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] =
+{
+ 27, 10, 5, 4, 3, 3, 3,
+ 3, 2, 2, 1, 1, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 8, 3, 2, 2, 2, 2, 2, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+};
+
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] =
+{
+ 14, 9, 7, 3, 2, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
+
+ 20, 6, 1, 0, 0, 0, 0, 0
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
new file mode 100755
index 0000000..6948f80
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
@@ -0,0 +1,75 @@
+ /**
+ *
+ * File Name: armVCM4P2_Lookup_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_Lookup_Tables.c
+ * Description: Contains all the Lookup tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+ /* * Table Entries contain Dc Scaler values
+ * armVCM4P2_DCScaler[i]= 8 for i=1 to 4 and i=33 to 36
+ * = 2*i for i=5 to 8
+ * = i+8 for i=9 to 25
+ * = 2*i-16 for i=26 to 31
+ * = (i-32+13)/2 for i=37 to 59
+ * = i-6-32 for i=60 to 63
+ * = 255 for i=0 and i=32
+ */
+
+const OMX_U8 armVCM4P2_DCScaler[64]={
+ 0xff, 0x8, 0x8, 0x8, 0x8, 0xa, 0xc, 0xe,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
+ 0xff, 0x8, 0x8, 0x8, 0x8, 0x9, 0x9, 0xa,
+ 0xa, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xe,
+ 0xe, 0xf, 0xf, 0x10, 0x10, 0x11, 0x11, 0x12,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+};
+
+
+ /* Table Entries Contain reciprocal of 1 to 63
+ * armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i)
+ * armVCM4P2_Reciprocal_QP_S16[0]= 0
+ */
+
+const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={
+ 0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249,
+ 0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888,
+ 0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591,
+ 0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421,
+ 0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348,
+ 0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9,
+ 0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254,
+ 0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208
+
+};
+
+ /* Table Entries Contain reciprocal of 1 to 63
+ * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i)
+ * armVCM4P2_Reciprocal_QP_S32[0]= 0
+ */
+
+const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={
+ 0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924,
+ 0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222,
+ 0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643,
+ 0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084,
+ 0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21,
+ 0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5,
+ 0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f,
+ 0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
new file mode 100755
index 0000000..44f2460
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
@@ -0,0 +1,104 @@
+;//
+;//
+;// File Name: armVCM4P2_SetPredDir_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+; **
+; * Function: armVCM4P2_SetPredDir
+; *
+; * Description:
+; * Performs detecting the prediction direction
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] blockIndex block index indicating the component type and
+; * position as defined in subclause 6.1.3.8, of ISO/IEC
+; * 14496-2. Furthermore, indexes 6 to 9 indicate the
+; * alpha blocks spatially corresponding to luminance
+; * blocks 0 to 3 in the same macroblock.
+; * [in] pCoefBufRow pointer to the coefficient row buffer
+; * [in] pQpBuf pointer to the quantization parameter buffer
+; * [out]predQP quantization parameter of the predictor block
+; * [out]predDir indicates the prediction direction which takes one
+; * of the following values:
+; * OMX_VC_HORIZONTAL predict horizontally
+; * OMX_VC_VERTICAL predict vertically
+; *
+; * Return Value:
+; * Standard OMXResult result. See enumeration for possible result codes.
+; *
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE omxVC_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+;// Input Arguments
+BlockIndex RN 0
+pCoefBufRow RN 1
+pCoefBufCol RN 2
+predDir RN 3
+predQP RN 4
+pQpBuf RN 5
+
+;// Local Variables
+
+Return RN 0
+blockDCLeft RN 6
+blockDCTop RN 7
+blockDCTopLeft RN 8
+temp1 RN 9
+temp2 RN 14
+
+ M_START armVCM4P2_SetPredDir,r9
+
+ M_ARG ppredQP,4
+ M_ARG ppQpBuf,4
+
+ LDRH blockDCTopLeft,[pCoefBufRow,#-16]
+ LDRH blockDCLeft,[pCoefBufCol]
+
+ TEQ BlockIndex,#3
+ LDREQH blockDCTop,[pCoefBufCol,#-16]
+ LDRNEH blockDCTop,[pCoefBufRow]
+
+ SUBS temp1,blockDCLeft,blockDCTopLeft
+ RSBLT temp1,temp1,#0
+ SUBS temp2,blockDCTopLeft,blockDCTop
+ RSBLT temp2,temp2,#0
+
+ M_LDR pQpBuf,ppQpBuf
+ M_LDR predQP,ppredQP
+ CMP temp1,temp2
+ MOV temp2,#OMX_VC_VERTICAL
+ LDRLTB temp1,[pQpBuf,#1]
+ STRLT temp2,[predDir]
+ STRLT temp1,[predQP]
+ MOV temp2,#OMX_VC_HORIZONTAL
+ LDRGEB temp1,[pQpBuf]
+ STRGE temp2,[predDir]
+ MOV Return,#OMX_Sts_NoErr
+ STRGE temp1,[predQP]
+
+
+
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
new file mode 100755
index 0000000..21fa715
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
@@ -0,0 +1,61 @@
+/**
+ *
+ * File Name: armVCM4P2_Zigzag_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_ZigZag_Tables.c
+ * Description: Contains the zigzag tables
+ *
+ */
+
+#include "omxtypes.h"
+
+/* Contains Double the values in the reference Zigzag Table
+ * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array
+ */
+
+const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] =
+{
+ 0, 2, 16, 32, 18, 4, 6, 20,
+ 34, 48, 64, 50, 36, 22, 8, 10,
+ 24, 38, 52, 66, 80, 96, 82, 68,
+ 54, 40, 26, 12, 14, 28, 42, 56,
+ 70, 84, 98, 112, 114, 100, 86, 72,
+ 58, 44, 30, 46, 60, 74, 88, 102,
+ 116, 118, 104, 90, 76, 62, 78, 92,
+ 106, 120, 122, 104, 94, 110, 124, 126,
+
+ 0, 16, 32, 48, 2, 18, 4, 20,
+ 34, 50, 64, 80, 96, 112, 114, 98,
+ 82, 66, 52, 36, 6, 22, 8, 24,
+ 38, 54, 68, 84, 100, 116, 70, 86,
+ 102, 118, 40, 56, 10, 26, 12, 28,
+ 42, 58, 72, 88, 104, 120, 74, 90,
+ 106, 122, 44, 60, 14, 30, 46, 62,
+ 76, 92, 108, 124, 78, 94, 110, 126,
+
+ 0, 2, 4, 6, 16, 18, 32, 34,
+ 20, 22, 8, 10, 12, 14, 30, 28,
+ 26, 24, 38, 36, 48, 50, 64, 66,
+ 52, 54, 40, 42, 44, 46, 56, 58,
+ 60, 62, 68, 70, 80, 82, 96, 98,
+ 84, 86, 72, 74, 76, 78, 88, 90,
+ 92, 94, 100, 102, 112, 114, 116, 118,
+ 104, 106, 108, 110, 120, 122, 124, 126
+
+
+};
+
+
+
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
new file mode 100755
index 0000000..796ad6e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
@@ -0,0 +1,102 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeBlockCoef_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for inter reconstruction
+ *
+ */
+
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter
+ *
+ * Description:
+ * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag
+ * positioning and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results (residuals) are placed in a contiguous array
+ * of 64 elements. For INTER block, the output buffer holds the residuals for
+ * further reconstruction.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream buffer. There is no boundary
+ * check for the bit stream buffer.
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7]
+ * [in] QP quantization parameter
+ * [in] shortVideoHeader a flag indicating presence of short_video_header;
+ * shortVideoHeader==1 indicates using quantization method defined in short
+ * video header mode, and shortVideoHeader==0 indicates normail quantization method.
+ * [out] ppBitStream *ppBitStream is updated after the block is decoded, so that it points to the
+ * current byte in the bit stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the current bit position in the
+ * byte pointed by *ppBitStream
+ * [out] pDst pointer to the decoded residual buffer (a contiguous array of 64 elements of
+ * OMX_S16 data type). Must be 16-byte aligned.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst
+ * - At least one of the below case:
+ * - *pBitOffset exceeds [0,7], QP <= 0;
+ * - pDst not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT QP,
+ OMX_INT shortVideoHeader
+)
+{
+ /* 64 elements are needed but to align it to 16 bytes need
+ 15 more elements of padding */
+ OMX_S16 tempBuf[79];
+ OMX_S16 *pTempBuf1;
+ OMXResult errorCode;
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf);
+
+
+ /* VLD and zigzag */
+ errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset,
+ pTempBuf1,shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Dequantization */
+ errorCode = omxVCM4P2_QuantInvInter_I(
+ pTempBuf1,
+ QP);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Inverse transform */
+ errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
new file mode 100755
index 0000000..b28657c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
@@ -0,0 +1,214 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeBlockCoef_Intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 12290
+ * Date: Wednesday, April 9, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for intra reconstruction
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/* Function for saturating 16 bit values to the [0,255] range and */
+/* writing out as 8 bit values. Does 64 entries */
+void armVCM4P2_Clip8(OMX_S16 *pSrc, OMX_U8 *pDst, OMX_INT dstStep );
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag
+ * positioning, and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results are then placed in the output frame/plane on
+ * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and
+ * written to corresponding block buffer within the destination plane.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream buffer. There is no boundary
+ * check for the bit stream buffer.
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7].
+ * [in] step width of the destination plane
+ * [in/out] pCoefBufRow [in] pointer to the coefficient row buffer
+ * [out] updated coefficient rwo buffer
+ * [in/out] pCoefBufCol [in] pointer to the coefficient column buffer
+ * [out] updated coefficient column buffer
+ * [in] curQP quantization parameter of the macroblock which
+ * the current block belongs to
+ * [in] pQpBuf Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to
+ * the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the
+ * current block(QPc).
+ * Note, in case the corresponding block is out of VOP bound, the QP value will have
+ * no effect to the intra-prediction process. Refer to subclause "7.4.3.3 Adaptive
+ * ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description.
+ * [in] blockIndex block index indicating the component type and
+ * position as defined in subclause 6.1.3.8,
+ * Figure 6-5 of ISO/IEC 14496-2.
+ * [in] intraDCVLC a code determined by intra_dc_vlc_thr and QP.
+ * This allows a mechanism to switch between two VLC
+ * for coding of Intra DC coefficients as per Table
+ * 6-21 of ISO/IEC 14496-2.
+ * [in] ACPredFlag a flag equal to ac_pred_flag (of luminance) indicating
+ * if the ac coefficients of the first row or first
+ * column are differentially coded for intra coded
+ * macroblock.
+ * [in] shortVideoHeader a flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode,
+ * and shortVideoHeader==0 selects nonlinear intra DC mode.
+ * [out] ppBitStream *ppBitStream is updated after the block is
+ * decoded, so that it points to the current byte
+ * in the bit stream buffer
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream
+ * [out] pDst pointer to the block in the destination plane.
+ * pDst should be 16-byte aligned.
+ * [out] pCoefBufRow pointer to the updated coefficient row buffer.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset,
+ * pCoefBufRow, pCoefBufCol, pQPBuf, pDst.
+ * or
+ * - At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31),
+ * blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while
+ * blockIndex greater than 5.
+ * or
+ * - pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_U8 *pDst,
+ OMX_INT step,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_U8 curQP,
+ const OMX_U8 *pQPBuf,
+ OMX_INT blockIndex,
+ OMX_INT intraDCVLC,
+ OMX_INT ACPredFlag,
+ OMX_INT shortVideoHeader
+ )
+{
+ OMX_S16 tempBuf1[79], tempBuf2[79];
+ OMX_S16 *pTempBuf1, *pTempBuf2;
+ OMX_INT predDir, predACDir;
+ OMX_INT predQP;
+ OMXVCM4P2VideoComponent videoComp;
+ OMXResult errorCode;
+
+
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+ pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+
+ /* Setting the AC prediction direction and prediction direction */
+ armVCM4P2_SetPredDir(
+ blockIndex,
+ pCoefBufRow,
+ pCoefBufCol,
+ &predDir,
+ &predQP,
+ pQPBuf);
+
+ predACDir = predDir;
+
+
+ if (ACPredFlag == 0)
+ {
+ predACDir = OMX_VC_NONE;
+ }
+
+ /* Setting the videoComp */
+ if (blockIndex <= 3)
+ {
+ videoComp = OMX_VC_LUMINANCE;
+ }
+ else
+ {
+ videoComp = OMX_VC_CHROMINANCE;
+ }
+
+
+ /* VLD and zigzag */
+ if (intraDCVLC == 1)
+ {
+ errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+ ppBitStream,
+ pBitOffset,
+ pTempBuf1,
+ predACDir,
+ shortVideoHeader,
+ videoComp);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+ }
+ else
+ {
+ errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+ ppBitStream,
+ pBitOffset,
+ pTempBuf1,
+ predACDir,
+ shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+ }
+
+ /* AC DC prediction */
+ errorCode = omxVCM4P2_PredictReconCoefIntra(
+ pTempBuf1,
+ pCoefBufRow,
+ pCoefBufCol,
+ curQP,
+ predQP,
+ predDir,
+ ACPredFlag,
+ videoComp);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Dequantization */
+ errorCode = omxVCM4P2_QuantInvIntra_I(
+ pTempBuf1,
+ curQP,
+ videoComp,
+ shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Inverse transform */
+ errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Placing the linear array into the destination plane and clipping
+ it to 0 to 255 */
+
+ armVCM4P2_Clip8(pTempBuf2,pDst,step);
+
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
new file mode 100755
index 0000000..cc16f5a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
@@ -0,0 +1,364 @@
+; **********
+; *
+; * File Name: omxVCM4P2_DecodePadMV_PVOP_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 12290
+; * Date: Wednesday, April 9, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; **
+; * Function: omxVCM4P2_DecodePadMV_PVOP
+; *
+; * Description:
+; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP.
+; * The motion vector padding process is specified in subclause 7.6.1.6 of
+; * ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bit stream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within
+; * [0-7].
+; * [in] pSrcMVLeftMB pointers to the motion vector buffers of the
+; * macroblocks specially at the left side of the current macroblock
+; * respectively.
+; * [in] pSrcMVUpperMB pointers to the motion vector buffers of the
+; * macroblocks specially at the upper side of the current macroblock
+; * respectively.
+; * [in] pSrcMVUpperRightMB pointers to the motion vector buffers of the
+; * macroblocks specially at the upper-right side of the current macroblock
+; * respectively.
+; * [in] fcodeForward a code equal to vop_fcode_forward in MPEG-4
+; * bit stream syntax
+; * [in] MBType the type of the current macroblock. If MBType
+; * is not equal to OMX_VC_INTER4V, the destination
+; * motion vector buffer is still filled with the
+; * same decoded vector.
+; * [out] ppBitStream *ppBitStream is updated after the block is decoded,
+; * so that it points to the current byte in the bit
+; * stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDstMVCurMB pointer to the motion vector buffer of the current
+; * macroblock which contains four decoded motion vectors
+; *
+; * Return Value:
+; * OMX_Sts_NoErr -no error
+; *
+; *
+; * OMX_Sts_Err - status error
+; *
+; *
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+ INCLUDE omxVC_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pSrcMVLeftMB RN 2
+pSrcMVUpperMB RN 3
+pSrcMVUpperRightMB RN 4
+pDstMVCurMB RN 5
+fcodeForward RN 6
+MBType RN 7
+
+;//Local Variables
+
+zero RN 4
+one RN 4
+scaleFactor RN 1
+
+
+Return RN 0
+
+VlcMVD RN 0
+index RN 4
+Count RN 7
+
+mvHorData RN 4
+mvHorResidual RN 0
+
+mvVerData RN 4
+mvVerResidual RN 0
+
+temp RN 1
+
+temp1 RN 3
+High RN 4
+Low RN 2
+Range RN 1
+
+BlkCount RN 14
+
+diffMVdx RN 0
+diffMVdy RN 1
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitCount RN 9
+RBitBuffer RN 10
+
+T1 RN 11
+T2 RN 12
+LR RN 14
+
+ IMPORT armVCM4P2_aVlcMVD
+ IMPORT omxVCM4P2_FindMVpred
+
+ ;// Allocate stack memory
+
+ M_ALLOC4 ppDstMVCurMB,4
+ M_ALLOC4 pDstMVPredME,4
+ M_ALLOC4 pBlkCount,4
+
+ M_ALLOC4 pppBitStream,4
+ M_ALLOC4 ppBitOffset,4
+ M_ALLOC4 ppSrcMVLeftMB,4
+ M_ALLOC4 ppSrcMVUpperMB,4
+
+ M_ALLOC4 pdiffMVdx,4
+ M_ALLOC4 pdiffMVdy,4
+ M_ALLOC4 pHigh,4
+
+
+
+
+ M_START omxVCM4P2_DecodePadMV_PVOP,r11
+
+ M_ARG pSrcMVUpperRightMBonStack,4 ;// pointer to pSrcMVUpperRightMB on stack
+ M_ARG pDstMVCurMBonStack,4 ;// pointer to pDstMVCurMB on stack
+ M_ARG fcodeForwardonStack,4 ;// pointer to fcodeForward on stack
+ M_ARG MBTypeonStack,4 ;// pointer to MBType on stack
+
+
+
+
+
+ ;// Initializing the BitStream Macro
+
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ M_LDR MBType,MBTypeonStack ;// Load MBType from stack
+ M_LDR pDstMVCurMB,pDstMVCurMBonStack ;// Load pDstMVCurMB from stack
+ MOV zero,#0
+
+ TEQ MBType,#OMX_VC_INTRA ;// Check if MBType=OMX_VC_INTRA
+ TEQNE MBType,#OMX_VC_INTRA_Q ;// check if MBType=OMX_VC_INTRA_Q
+ STREQ zero,[pDstMVCurMB]
+ M_BD_INIT1 T1, T2, T2
+ STREQ zero,[pDstMVCurMB,#4]
+ M_BD_INIT2 T1, T2, T2
+ STREQ zero,[pDstMVCurMB,#4]
+ MOVEQ Return,#OMX_Sts_NoErr
+ MOV BlkCount,#0
+ STREQ zero,[pDstMVCurMB,#4]
+
+ BEQ ExitOK
+
+ TEQ MBType,#OMX_VC_INTER4V ;// Check if MBType=OMX_VC_INTER4V
+ TEQNE MBType,#OMX_VC_INTER4V_Q ;// Check if MBType=OMX_VC_INTER4V_Q
+ MOVEQ Count,#4
+
+ TEQ MBType,#OMX_VC_INTER ;// Check if MBType=OMX_VC_INTER
+ TEQNE MBType,#OMX_VC_INTER_Q ;// Check if MBType=OMX_VC_INTER_Q
+ MOVEQ Count,#1
+
+ M_LDR fcodeForward,fcodeForwardonStack ;// Load fcodeForward from stack
+
+ ;// Storing the values temporarily on stack
+
+ M_STR ppBitStream,pppBitStream
+ M_STR pBitOffset,ppBitOffset
+
+
+ SUB temp,fcodeForward,#1 ;// temp=fcodeForward-1
+ MOV one,#1
+ M_STR pSrcMVLeftMB,ppSrcMVLeftMB
+ LSL scaleFactor,one,temp ;// scaleFactor=1<<(fcodeForward-1)
+ M_STR pSrcMVUpperMB,ppSrcMVUpperMB
+ LSL scaleFactor,scaleFactor,#5
+ M_STR scaleFactor,pHigh ;// [pHigh]=32*scaleFactor
+
+ ;// VLD Decoding
+
+
+Loop
+
+ LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Load the optimized MVD VLC table
+
+ ;// Horizontal Data and Residual calculation
+
+ LDR temp,=0xFFF
+ M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// variable lenght decoding using the macro
+
+ TEQ index,temp
+ BEQ ExitError ;// Exit with an Error Message if the decoded symbol is an invalied symbol
+
+ SUB mvHorData,index,#32 ;// mvHorData=index-32
+ MOV mvHorResidual,#1 ;// mvHorResidual=1
+ CMP fcodeForward,#1
+ TEQNE mvHorData,#0
+ MOVEQ diffMVdx,mvHorData ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData
+ BEQ VerticalData
+
+ SUB temp,fcodeForward,#1
+ M_BD_VREAD8 mvHorResidual,temp,T1,T2 ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0
+
+ CMP mvHorData,#0
+ RSBLT mvHorData,mvHorData,#0 ;// mvHorData=abs(mvHorData)
+ SUB mvHorResidual,mvHorResidual,fcodeForward
+ SMLABB diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward
+ ADD diffMVdx,diffMVdx,#1
+ RSBLT diffMVdx,diffMVdx,#0
+
+ ;// Vertical Data and Residual calculation
+
+VerticalData
+
+ M_STR diffMVdx,pdiffMVdx ;// Store the diffMVdx on stack
+ LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Loading the address of optimized VLC tables
+
+ LDR temp,=0xFFF
+ M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// VLC decoding using the macro
+
+ TEQ index,temp
+ BEQ ExitError ;// Exit with an Error Message if an Invalied Symbol occurs
+
+ SUB mvVerData,index,#32 ;// mvVerData=index-32
+ MOV mvVerResidual,#1
+ CMP fcodeForward,#1
+ TEQNE mvVerData,#0
+ MOVEQ diffMVdy,mvVerData ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0
+ BEQ FindMVPred
+
+ SUB temp,fcodeForward,#1
+ M_BD_VREAD8 mvVerResidual,temp,T1,T2 ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0
+
+
+ CMP mvVerData,#0
+ RSBLT mvVerData,mvVerData,#0
+ SUB mvVerResidual,mvVerResidual,fcodeForward
+ SMLABB diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward
+ ADD diffMVdy,diffMVdy,#1
+ RSBLT diffMVdy,diffMVdy,#0
+
+ ;//Calling the Function omxVCM4P2_FindMVpred
+
+FindMVPred
+
+ M_STR diffMVdy,pdiffMVdy
+ ADD temp,pDstMVCurMB,BlkCount,LSL #2 ;// temp=pDstMVCurMB[BlkCount]
+ M_STR temp,ppDstMVCurMB ;// store temp on stack for passing as an argument to FindMVPred
+
+ MOV temp,#0
+ M_STR temp,pDstMVPredME ;// Pass pDstMVPredME=NULL as an argument
+ M_STR BlkCount,pBlkCount ;// Passs BlkCount as Argument through stack
+
+ MOV temp,pSrcMVLeftMB ;// temp (RN 1)=pSrcMVLeftMB
+ M_LDR pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack
+ MOV pSrcMVLeftMB,pSrcMVUpperMB ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB
+ MOV ppBitStream,pDstMVCurMB ;// ppBitStream ( RN 0) = pDstMVCurMB
+ MOV pSrcMVUpperMB,pSrcMVUpperRightMB ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB
+ BL omxVCM4P2_FindMVpred ;// Branch to subroutine omxVCM4P2_FindMVpred
+
+ ;// Store Horizontal Motion Vector
+
+ M_LDR BlkCount,pBlkCount ;// Load BlkCount from stack
+ M_LDR High,pHigh ;// High=32*scaleFactor
+ LSL temp1,BlkCount,#2 ;// temp=BlkCount*4
+ M_LDR diffMVdx,pdiffMVdx ;// Laad diffMVdx
+
+ LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount]
+
+
+ RSB Low,High,#0 ;// Low = -32*scaleFactor
+ ADD diffMVdx,temp,diffMVdx ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx
+ ADD Range,High,High ;// Range=64*ScaleFactor
+ SUB High,High,#1 ;// High= 32*scaleFactor-1
+
+ CMP diffMVdx,Low ;// If diffMVdx<Low
+ ADDLT diffMVdx,diffMVdx,Range ;// diffMVdx+=Range
+
+ CMP diffMVdx,High
+ SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdx > High diffMVdx-=Range
+ STRH diffMVdx,[pDstMVCurMB,temp1]
+
+ ;// Store Vertical
+
+ ADD temp1,temp1,#2 ;// temp1=4*BlkCount+2
+ M_LDR diffMVdx,pdiffMVdy ;// Laad diffMVdy
+ LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount].diffMVdy
+ ADD BlkCount,BlkCount,#1 ;// BlkCount=BlkCount+1
+ ADD diffMVdx,temp,diffMVdx
+ CMP diffMVdx,Low
+ ADDLT diffMVdx,diffMVdx,Range ;// If diffMVdy<Low diffMVdy+=Range
+ CMP diffMVdx,High
+ SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdy > High diffMVdy-=Range
+ STRH diffMVdx,[pDstMVCurMB,temp1]
+
+ CMP BlkCount,Count
+ M_LDR pSrcMVLeftMB,ppSrcMVLeftMB
+ M_LDR pSrcMVUpperMB,ppSrcMVUpperMB
+
+ BLT Loop ;// If BlkCount<Count Continue the Loop
+
+
+ ;// If MBType=OMX_VC_INTER or MBtype=OMX_VC_INTER_Q copy pDstMVCurMB[0] to
+ ;// pDstMVCurMB[1], pDstMVCurMB[2], pDstMVCurMB[3]
+
+ M_LDR MBType,MBTypeonStack
+
+ TEQ MBType,#OMX_VC_INTER
+ TEQNE MBType,#OMX_VC_INTER_Q
+ LDREQ temp,[pDstMVCurMB]
+ M_LDR ppBitStream,pppBitStream
+ STREQ temp,[pDstMVCurMB,#4]
+
+ STREQ temp,[pDstMVCurMB,#8]
+ STREQ temp,[pDstMVCurMB,#12]
+
+
+ M_LDR pBitOffset,ppBitOffset
+ ;//Ending the macro
+ M_BD_FINI ppBitStream,pBitOffset ;// Finishing the Macro
+
+
+ MOV Return,#OMX_Sts_NoErr
+ B ExitOK
+
+ExitError
+
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppBitOffset
+ ;//Ending the macro
+ M_BD_FINI ppBitStream,pBitOffset
+
+ MOV Return,#OMX_Sts_Err
+
+ExitOK
+
+ M_END
+ ENDIF
+ END
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
new file mode 100755
index 0000000..7208c21
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
@@ -0,0 +1,132 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 12290
+; * Date: Wednesday, April 9, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one inter coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_InterVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_InterL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_InterL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+shortVideoHeader RN 3
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+
+
+
+ ;// Allocate stack memory to store the VLC,Zigzag,LMAX and RMAX tables
+
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_Inter,r12
+
+
+
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load zigzag table
+ M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack to pass as argument to unsafe function
+ LDR pVlcTableL0L1, =armVCM4P2_InterVlcL0L1 ;// Load optimized VLC table with both L=0 and L=1 entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store optimized VLC table address on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_InterL0L1LMAX ;// Load Interleaved L=0 and L=1 LMAX Tables
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table address on stack
+ LDR pRMAXTableL0L1, =armVCM4P2_InterL0L1RMAX ;// Load Interleaved L=0 and L=1 RMAX Tables
+ MOV Count,#0 ;// set start=0
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// store RMAX table address on stack
+
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// call Unsafe Function for VLC Zigzag Decoding
+
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
new file mode 100755
index 0000000..9a37ec9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
@@ -0,0 +1,136 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 12290
+; * Date: Wednesday, April 9, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+PredDir RN 3
+shortVideoHeader RN 3
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+
+
+
+ ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_IntraACVLC,r12
+
+ M_ARG shortVideoHeaderonStack,4 ;// pointer to Input Argument on stack
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Address of the Zigzag table
+ ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Loading Different type of zigzag tables based on PredDir
+
+ M_STR pZigzagTable,ppZigzagTable ;// Store Zigzag table address on stack
+ LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load optimized packed VLC Table with both L=0 and L=1 entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store VLC Table address on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX Table address on Stack
+ LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table
+ MOV Count,#0 ;// Set Start=0
+
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table address on stack
+
+
+
+ M_LDR shortVideoHeader,shortVideoHeaderonStack ;// get the Input Argument from stack
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call Unsafe Function
+
+
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
new file mode 100755
index 0000000..778aaf2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
@@ -0,0 +1,224 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 12290
+; * Date: Wednesday, April 9, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS CortexA8
+
+
+
+
+
+ IF CortexA8
+
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains CLassical, Horizontal, Vertical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_aIntraDCLumaChromaIndex ;// Contains Optimized DCLuma and DCChroma Index table Entries
+
+
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+PredDir RN 3
+shortVideoHeader RN 3
+videoComp RN 5
+;//Local Variables
+
+Return RN 0
+
+pDCLumaChromaIndex RN 4
+pDCChromaIndex RN 7
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+DCValueSize RN 6
+powOfSize RN 7
+temp1 RN 5
+
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitBuffer RN 9
+RBitCount RN 10
+
+T1 RN 11
+T2 RN 12
+DCVal RN 14
+
+
+ ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+ M_ALLOC4 pDCCoeff,4
+
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_IntraDCVLC,r12
+
+ M_ARG shortVideoHeaderonStack,4 ;// Pointer to argument on stack
+ M_ARG videoComponstack,4 ;// Pointer to argument on stack
+
+
+ ;// Decode DC Coefficient
+
+
+ LDR pDCLumaChromaIndex, =armVCM4P2_aIntraDCLumaChromaIndex ;// Load Optimized VLC Table for Luminance and Chrominance
+
+ ;// Initializing the Bitstream Macro
+
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ M_LDR videoComp,videoComponstack
+ M_BD_INIT1 T1, T2, T2
+ ADD pDCLumaChromaIndex,pDCLumaChromaIndex,videoComp, LSL #6
+ M_BD_INIT2 T1, T2, T2
+
+
+ M_BD_VLD DCValueSize,T1,T2,pDCLumaChromaIndex,4,2 ;// VLC Decode using optimized Luminance and Chrominance VLC Table
+
+
+
+
+DecodeDC
+
+ CMP DCValueSize,#12
+ BGT ExitError
+
+ CMP DCValueSize,#0
+ MOVEQ DCVal,#0 ;// If DCValueSize is zero then DC coeff =0
+ BEQ ACDecode ;// Branch to perform AC Coeff Decoding
+
+ M_BD_VREAD16 DCVal,DCValueSize,T1,T2 ;// Get DC Value From Bit stream
+
+
+ MOV powOfSize,#1
+ LSL powOfSize,DCValueSize ;// powOfSize=pow(2,DCValueSize)
+ CMP DCVal,powOfSize,LSR #1 ;// Compare DCVal with powOfSize/2
+ ADDLT DCVal,DCVal,#1
+ SUBLT DCVal,DCVal,powOfSize ;// If Lessthan powOfSize/2 DCVal=DCVal-powOfSize+1
+ ;// Else DCVal= fetchbits from bit stream
+
+CheckDCValueSize
+
+ CMP DCValueSize,#8 ;// If DCValueSize greater than 8 check marker bit
+
+ BLE ACDecode
+
+ M_BD_READ8 temp1,1,T1
+ TEQ temp1,#0 ;// If Marker bit is zero Exit with an Error Message
+ BEQ ExitError
+
+
+
+ ;// Decode AC Coefficient
+
+ACDecode
+
+ M_STR DCVal,pDCCoeff ;// Store Decoded DC Coeff on Stack
+ M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit stream Macro
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Zigzag talbe address
+ ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Modify the Zigzag table adress based on PredDir
+
+ M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack
+ LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load Optimized VLC Table With both Last=0 and Last=1 Entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store Optimized VLC Table on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table on stack
+ LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table
+ MOV Count,#1 ;// Set Start =1
+
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table on Stack
+
+
+ M_LDR shortVideoHeader,shortVideoHeaderonStack ;// Load the Input Argument From Stack
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call the Unsafe Function
+
+ M_LDR DCVal,pDCCoeff ;// Get the Decoded DC Value From Stack
+ STRH DCVal,[pDst] ;// Store the DC Value
+ B ExitOK
+
+
+
+ExitError
+
+ M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit Stream Macro in case of an Error
+ MOV Return,#OMX_Sts_Err ;// Exit with an Error Message
+ExitOK
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
new file mode 100755
index 0000000..caf7121
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
@@ -0,0 +1,194 @@
+;//
+;//
+;// File Name: omxVCM4P2_FindMVpred_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P2_FindMVpred
+;//
+ ;// Include headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armVCCOMM_s.h
+
+ ;// Define cpu variants
+ M_VARIANTS CortexA8
+
+
+ IF CortexA8
+
+ M_TABLE armVCM4P2_pBlkIndexTable
+ DCD OMXVCBlk0, OMXVCBlk1
+ DCD OMXVCBlk2, OMXVCBlk3
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+
+pSrcMVCurMB RN 0
+pSrcCandMV1 RN 1
+pSrcCandMV2 RN 2
+pSrcCandMV3 RN 3
+pDstMVPred RN 4
+pDstMVPredME RN 5
+iBlk RN 6
+
+pTable RN 4
+CandMV RN 12
+
+pCandMV1 RN 7
+pCandMV2 RN 8
+pCandMV3 RN 9
+
+CandMV1dx RN 0
+CandMV1dy RN 1
+CandMV2dx RN 2
+CandMV2dy RN 3
+CandMV3dx RN 10
+CandMV3dy RN 11
+
+temp RN 14
+
+zero RN 14
+return RN 0
+
+; ----------------------------------------------
+; Main routine
+; ----------------------------------------------
+
+ M_ALLOC4 MV, 4
+
+ ;// Function header
+ M_START omxVCM4P2_FindMVpred, r11
+
+ ;// Define stack arguments
+ M_ARG ppDstMVPred, 4
+ M_ARG ppDstMVPredME, 4
+ M_ARG Blk, 4
+
+ M_ADR CandMV, MV
+ MOV zero, #0
+ M_LDR iBlk, Blk
+
+ ;// Set the default value for these
+ ;// to be used if pSrcCandMV[1|2|3] == NULL
+ MOV pCandMV1, CandMV
+ MOV pCandMV2, CandMV
+ MOV pCandMV3, CandMV
+
+ STR zero, [CandMV]
+
+ ;// Branch to the case based on blk number
+ M_SWITCH iBlk
+ M_CASE OMXVCBlk0 ;// iBlk=0
+ M_CASE OMXVCBlk1 ;// iBlk=0
+ M_CASE OMXVCBlk2 ;// iBlk=0
+ M_CASE OMXVCBlk3 ;// iBlk=0
+ M_ENDSWITCH
+
+OMXVCBlk0
+ CMP pSrcCandMV1, #0
+ ADDNE pCandMV1, pSrcCandMV1, #4
+
+ CMP pSrcCandMV2, #0
+ ADDNE pCandMV2, pSrcCandMV2, #8
+
+ CMP pSrcCandMV3, #0
+ ADDNE pCandMV3, pSrcCandMV3, #8
+ CMPEQ pSrcCandMV1, #0
+
+ MOVEQ pCandMV3, pCandMV2
+ MOVEQ pCandMV1, pCandMV2
+
+ CMP pSrcCandMV1, #0
+ CMPEQ pSrcCandMV2, #0
+
+ MOVEQ pCandMV1, pCandMV3
+ MOVEQ pCandMV2, pCandMV3
+
+ CMP pSrcCandMV2, #0
+ CMPEQ pSrcCandMV3, #0
+
+ MOVEQ pCandMV2, pCandMV1
+ MOVEQ pCandMV3, pCandMV1
+
+ B BlkEnd
+
+OMXVCBlk1
+ MOV pCandMV1, pSrcMVCurMB
+ CMP pSrcCandMV3, #0
+ ADDNE pCandMV3, pSrcCandMV3, #8
+
+ CMP pSrcCandMV2, #0
+ ADDNE pCandMV2, pSrcCandMV2, #12
+
+ CMPEQ pSrcCandMV3, #0
+
+ MOVEQ pCandMV2, pCandMV1
+ MOVEQ pCandMV3, pCandMV1
+
+ B BlkEnd
+
+OMXVCBlk2
+ CMP pSrcCandMV1, #0
+ MOV pCandMV2, pSrcMVCurMB
+ ADD pCandMV3, pSrcMVCurMB, #4
+ ADDNE pCandMV1, pSrcCandMV1, #12
+ B BlkEnd
+
+OMXVCBlk3
+ ADD pCandMV1, pSrcMVCurMB, #8
+ MOV pCandMV2, pSrcMVCurMB
+ ADD pCandMV3, pSrcMVCurMB, #4
+
+BlkEnd
+
+ ;// Using the transperancy info, zero
+ ;// out the candidate MV if neccesary
+ LDRSH CandMV1dx, [pCandMV1], #2
+ LDRSH CandMV2dx, [pCandMV2], #2
+ LDRSH CandMV3dx, [pCandMV3], #2
+
+ ;// Load argument from the stack
+ M_LDR pDstMVPredME, ppDstMVPredME
+
+ LDRSH CandMV1dy, [pCandMV1]
+ LDRSH CandMV2dy, [pCandMV2]
+ LDRSH CandMV3dy, [pCandMV3]
+
+ CMP pDstMVPredME, #0
+
+ ;// Store the candidate MV's into the pDstMVPredME,
+ ;// these can be used in the fast algorithm if implemented
+
+ STRHNE CandMV1dx, [pDstMVPredME], #2
+ STRHNE CandMV1dy, [pDstMVPredME], #2
+ STRHNE CandMV2dx, [pDstMVPredME], #2
+ STRHNE CandMV2dy, [pDstMVPredME], #2
+ STRHNE CandMV3dx, [pDstMVPredME], #2
+ STRHNE CandMV3dy, [pDstMVPredME]
+
+ ; Find the median of the 3 candidate MV's
+ M_MEDIAN3 CandMV1dx, CandMV2dx, CandMV3dx, temp
+
+ ;// Load argument from the stack
+ M_LDR pDstMVPred, ppDstMVPred
+
+ M_MEDIAN3 CandMV1dy, CandMV2dy, CandMV3dy, temp
+
+ STRH CandMV3dx, [pDstMVPred], #2
+ STRH CandMV3dy, [pDstMVPred]
+
+ MOV return, #OMX_Sts_NoErr
+
+ M_END
+ ENDIF ;// ARM1136JS :LOR: CortexA8
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
new file mode 100755
index 0000000..b5e3d0d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
@@ -0,0 +1,73 @@
+;//
+;//
+;// File Name: omxVCM4P2_IDCT8x8blk_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P2_IDCT8x8blk
+;//
+ ;// Include headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ ;// Define cpu variants
+ M_VARIANTS CortexA8
+
+ INCLUDE armCOMM_IDCT_s.h
+
+ IMPORT armCOMM_IDCTPreScale
+ ;//
+ ;// Function prototype
+ ;//
+ ;// OMXResult
+ ;// omxVCM4P2_IDCT8x8blk(const OMX_S16* pSrc,
+ ;// OMX_S16* pDst)
+ ;//
+
+ IF CortexA8
+ M_ALLOC4 ppDest, 4
+ M_ALLOC4 pStride, 4
+ M_ALLOC8 pBlk, 2*8*8
+ ENDIF
+
+
+ IF CortexA8
+ M_START omxVCM4P2_IDCT8x8blk, r11, d15
+ ENDIF
+
+ IF CortexA8
+
+;// Declare input registers
+pSrc RN 0
+pDst RN 1
+
+;// Declare other intermediate registers
+Result RN 0
+
+;// Prototype for macro M_IDCT
+;// pSrc RN 0 ;// source data buffer
+;// Stride RN 1 ;// destination stride in bytes
+;// pDest RN 2 ;// destination data buffer
+;// pScale RN 3 ;// pointer to scaling table
+
+pSrc RN 0
+Stride RN 1
+pDest RN 2
+pScale RN 3
+
+ MOV pDest, pDst
+ LDR pScale, =armCOMM_IDCTPreScale
+ M_IDCT s9, s16, 16
+ MOV Result, #OMX_Sts_NoErr
+ M_END
+ ENDIF
+ ;// ARM1136JS :LOR: CortexA8
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
new file mode 100755
index 0000000..dd00df5
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
@@ -0,0 +1,444 @@
+;//
+;//
+;// File Name: omxVCM4P2_MCReconBlock_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;//
+;//
+
+;// Include standard headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Import symbols required from other files
+
+ M_VARIANTS CortexA8
+
+;// ***************************************************************************
+;// ARM1136JS implementation
+;// ***************************************************************************
+
+;// ***************************************************************************
+;// CortexA8 implementation
+;// ***************************************************************************
+ IF CortexA8
+;// ***************************************************************************
+;// MACRO DEFINITIONS
+;// ***************************************************************************
+ ;// Description:
+ ;// Does interpolation for the case of "IntegerPixel" predictType. Both
+ ;// rounding cases are handled. Just copies a block from pSrc to pDst
+ ;//
+ ;// Syntax:
+ ;// M_MCRECONBLOCK_IntegerPixel
+ ;//
+ ;// Inputs: None
+ ;// Outputs: None
+
+ MACRO
+ M_MCRECONBLOCK_IntegerPixel
+CaseIntegerPixel_Rnd0
+CaseIntegerPixel_Rnd1
+
+ VLD1 dRow0, [pSrc], srcStep
+ VLD1 dRow1, [pSrc], srcStep
+ VLD1 dRow2, [pSrc], srcStep
+ VLD1 dRow3, [pSrc], srcStep
+ VLD1 dRow4, [pSrc], srcStep
+ VLD1 dRow5, [pSrc], srcStep
+ VLD1 dRow6, [pSrc], srcStep
+ VLD1 dRow7, [pSrc], srcStep
+
+ VST1 dRow0, [pDst@64], dstStep
+ VST1 dRow1, [pDst@64], dstStep
+ VST1 dRow2, [pDst@64], dstStep
+ VST1 dRow3, [pDst@64], dstStep
+ VST1 dRow4, [pDst@64], dstStep
+ VST1 dRow5, [pDst@64], dstStep
+ VST1 dRow6, [pDst@64], dstStep
+ VST1 dRow7, [pDst@64], dstStep
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Does interpolation for the case of "HalfPixelX" predictType. The two
+ ;// rounding cases are handled by the parameter "$rndVal". Averages between
+ ;// a pixel and pixel right to it, rounding it based on $rndVal. The
+ ;// rounding is implemented by using opCode switching between "VRHADD" and
+ ;// "VHADD" instructions.
+ ;//
+ ;// Syntax:
+ ;// M_MCRECONBLOCK_HalfPixelX $rndVal
+ ;//
+ ;// Inputs:
+ ;// $rndVal: 0 for rounding and 1 for no rounding
+ ;// Outputs: None
+
+ MACRO
+ M_MCRECONBLOCK_HalfPixelX $rndVal
+
+ LCLS M_VHADDR
+ IF $rndVal = 0
+M_VHADDR SETS "VRHADD"
+ ELSE
+M_VHADDR SETS "VHADD"
+ ENDIF
+
+CaseHalfPixelX_Rnd$rndVal
+
+ VLD1 {dRow0, dRow0Shft}, [pSrc], srcStep
+ VEXT dRow0Shft, dRow0, dRow0Shft, #1
+ VLD1 {dRow1, dRow1Shft}, [pSrc], srcStep
+ VEXT dRow1Shft, dRow1, dRow1Shft, #1
+ VLD1 {dRow2, dRow2Shft}, [pSrc], srcStep
+ VEXT dRow2Shft, dRow2, dRow2Shft, #1
+ VLD1 {dRow3, dRow3Shft}, [pSrc], srcStep
+ VEXT dRow3Shft, dRow3, dRow3Shft, #1
+ VLD1 {dRow4, dRow4Shft}, [pSrc], srcStep
+ VEXT dRow4Shft, dRow4, dRow4Shft, #1
+ VLD1 {dRow5, dRow5Shft}, [pSrc], srcStep
+ VEXT dRow5Shft, dRow5, dRow5Shft, #1
+ VLD1 {dRow6, dRow6Shft}, [pSrc], srcStep
+ VEXT dRow6Shft, dRow6, dRow6Shft, #1
+ VLD1 {dRow7, dRow7Shft}, [pSrc], srcStep
+ VEXT dRow7Shft, dRow7, dRow7Shft, #1
+ $M_VHADDR dRow0, dRow0, dRow0Shft
+ $M_VHADDR dRow1, dRow1, dRow1Shft
+ VST1 dRow0, [pDst@64], dstStep
+ $M_VHADDR dRow2, dRow2, dRow2Shft
+ VST1 dRow1, [pDst@64], dstStep
+ $M_VHADDR dRow3, dRow3, dRow3Shft
+ VST1 dRow2, [pDst@64], dstStep
+ $M_VHADDR dRow4, dRow4, dRow4Shft
+ VST1 dRow3, [pDst@64], dstStep
+ $M_VHADDR dRow5, dRow5, dRow5Shft
+ VST1 dRow4, [pDst@64], dstStep
+ $M_VHADDR dRow6, dRow6, dRow6Shft
+ VST1 dRow5, [pDst@64], dstStep
+ $M_VHADDR dRow7, dRow7, dRow7Shft
+ VST1 dRow6, [pDst@64], dstStep
+ VST1 dRow7, [pDst@64], dstStep
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Does interpolation for the case of "HalfPixelY" predictType. The two
+ ;// rounding cases are handled by the parameter "$rndVal". Averages between
+ ;// a pixel and pixel below it, rounding it based on $rndVal. The
+ ;// rounding is implemented by using opCode switching between "VRHADD" and
+ ;// "VHADD" instructions.
+ ;//
+ ;// Syntax:
+ ;// M_MCRECONBLOCK_HalfPixelY $rndVal
+ ;//
+ ;// Inputs:
+ ;// $rndVal: 0 for rounding and 1 for no rounding
+ ;// Outputs: None
+
+ MACRO
+ M_MCRECONBLOCK_HalfPixelY $rndVal
+
+ LCLS M_VHADDR
+ IF $rndVal = 0
+M_VHADDR SETS "VRHADD"
+ ELSE
+M_VHADDR SETS "VHADD"
+ ENDIF
+
+CaseHalfPixelY_Rnd$rndVal
+ VLD1 dRow0, [pSrc], srcStep
+ VLD1 dRow1, [pSrc], srcStep
+ VLD1 dRow2, [pSrc], srcStep
+ VLD1 dRow3, [pSrc], srcStep
+ VLD1 dRow4, [pSrc], srcStep
+ VLD1 dRow5, [pSrc], srcStep
+ VLD1 dRow6, [pSrc], srcStep
+ VLD1 dRow7, [pSrc], srcStep
+ $M_VHADDR dRow0, dRow0, dRow1
+ VLD1 dRow8, [pSrc], srcStep
+ $M_VHADDR dRow1, dRow1, dRow2
+ VST1 dRow0, [pDst@64], dstStep
+ $M_VHADDR dRow2, dRow2, dRow3
+ VST1 dRow1, [pDst@64], dstStep
+ $M_VHADDR dRow3, dRow3, dRow4
+ VST1 dRow2, [pDst@64], dstStep
+ $M_VHADDR dRow4, dRow4, dRow5
+ VST1 dRow3, [pDst@64], dstStep
+ $M_VHADDR dRow5, dRow5, dRow6
+ VST1 dRow4, [pDst@64], dstStep
+ $M_VHADDR dRow6, dRow6, dRow7
+ VST1 dRow5, [pDst@64], dstStep
+ $M_VHADDR dRow7, dRow7, dRow8
+ VST1 dRow6, [pDst@64], dstStep
+ VST1 dRow7, [pDst@64], dstStep
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Does interpolation for the case of "IntegerPixel" predictType. Both
+ ;// rounding cases are handled.
+ ;// Typical computation for a row goes like this
+ ;// 1. VLD1 {dRow0, dRow0Shft}, [pSrc], srcStep ;// Load the row and next 8 bytes
+ ;// 2. VEXT dRow0Shft, dRow0, dRow0Shft, #1 ;// Generate the shifted row
+ ;// 3. VADDL qSum0, dRow0, dRow0Shft ;// Generate the sum of row and shifted row
+ ;// 5. VADD qSum0, qSum0, qSum1 ;// Add to the sum of next row (odd row sum has rounding value added to it)
+ ;// 6. VSHRN dRow0, qSum0, #2 ;// Divide by 4
+ ;// 7. VST1 dRow0, [pDst@64], dstStep ;// Store
+ ;// Odd rows undergo following computation after step 3
+ ;// 4. VADD qSum1, qSum1, qRound
+ ;// This saves for adding rounding value to each final sum (overall saves 4
+ ;// instructions).
+ ;// There is reuse of registers for qSum6, qSum7 & qSum8. Overall scheduling takes
+ ;// care of this and also minimizes stalls. Rounding value was modified in
+ ;// ARM register rndVal (originally used for rounding flag) before the switch.
+ ;// It is then populated into all lanes in this macro. No branching out to
+ ;// label "SwitchPredictTypeEnd" is required in the end of the macro as these
+ ;// are the last of switch cases.
+ ;//
+ ;// Syntax:
+ ;// M_MCRECONBLOCK_HalfPixelXY
+ ;//
+ ;// Inputs: None
+ ;// Outputs: None
+
+ MACRO
+ M_MCRECONBLOCK_HalfPixelXY
+
+CaseHalfPixelXY_Rnd0
+CaseHalfPixelXY_Rnd1
+ VLD1 {dRow0, dRow0Shft}, [pSrc], srcStep
+ VDUP qRound, rndVal
+ VLD1 {dRow1, dRow1Shft}, [pSrc], srcStep
+ VEXT dRow0Shft, dRow0, dRow0Shft, #1
+ VLD1 {dRow2, dRow2Shft}, [pSrc], srcStep
+ VEXT dRow1Shft, dRow1, dRow1Shft, #1
+ VLD1 {dRow3, dRow3Shft}, [pSrc], srcStep
+ VEXT dRow2Shft, dRow2, dRow2Shft, #1
+ VLD1 {dRow4, dRow4Shft}, [pSrc], srcStep
+ VADDL qSum0, dRow0, dRow0Shft
+ VLD1 {dRow5, dRow5Shft}, [pSrc], srcStep
+ VADDL qSum1, dRow1, dRow1Shft
+ VLD1 {dRow6, dRow6Shft}, [pSrc], srcStep
+ VEXT dRow3Shft, dRow3, dRow3Shft, #1
+ VLD1 {dRow7, dRow7Shft}, [pSrc], srcStep
+ VEXT dRow4Shft, dRow4, dRow4Shft, #1
+ VLD1 {dRow8, dRow8Shft}, [pSrc], srcStep
+ VADD qSum1, qSum1, qRound
+ VADDL qSum2, dRow2, dRow2Shft
+ VEXT dRow5Shft, dRow5, dRow5Shft, #1
+ VADD qSum0, qSum0, qSum1
+ VADDL qSum3, dRow3, dRow3Shft
+ VEXT dRow6Shft, dRow6, dRow6Shft, #1
+ VADD qSum1, qSum1, qSum2
+ VSHRN dRow0, qSum0, #2
+ VADDL qSum4, dRow4, dRow4Shft
+ VSHRN dRow1, qSum1, #2
+ VADD qSum3, qSum3, qRound
+ VADDL qSum5, dRow5, dRow5Shft
+ VST1 dRow0, [pDst@64], dstStep
+ VEXT dRow7Shft, dRow7, dRow7Shft, #1
+ VST1 dRow1, [pDst@64], dstStep
+ VEXT dRow8Shft, dRow8, dRow8Shft, #1
+ VADD qSum5, qSum5, qRound
+ VADD qSum2, qSum2, qSum3
+ VADD qSum3, qSum3, qSum4
+ VADD qSum4, qSum4, qSum5
+ VSHRN dRow2, qSum2, #2
+ VSHRN dRow3, qSum3, #2
+ VSHRN dRow4, qSum4, #2
+ VADDL qSum6, dRow6, dRow6Shft
+ VADDL qSum7, dRow7, dRow7Shft
+ VST1 dRow2, [pDst@64], dstStep
+ VADDL qSum8, dRow8, dRow8Shft
+ VADD qSum7, qSum7, qRound
+ VST1 dRow3, [pDst@64], dstStep
+ VST1 dRow4, [pDst@64], dstStep
+ VADD qSum5, qSum5, qSum6
+ VADD qSum6, qSum6, qSum7
+ VADD qSum7, qSum7, qSum8
+ VSHRN dRow5, qSum5, #2
+ VSHRN dRow6, qSum6, #2
+ VSHRN dRow7, qSum7, #2
+ VST1 dRow5, [pDst@64], dstStep
+ VST1 dRow6, [pDst@64], dstStep
+ VST1 dRow7, [pDst@64], dstStep
+
+ MEND
+;// ***************************************************************************
+
+;// Input/Output Registers
+pSrc RN 0
+srcStep RN 1
+pSrcResidue RN 2
+pDst RN 3
+dstStep RN 4
+predictType RN 5
+rndVal RN 6
+
+;// Local Scratch Registers
+pDstCopy RN 0
+return RN 0
+
+;// Neon Registers
+dRow0 DN D0.U8
+dRow0Shft DN D1.U8
+dRow1 DN D2.U8
+dRow1Shft DN D3.U8
+dRow2 DN D4.U8
+dRow2Shft DN D5.U8
+dRow3 DN D6.U8
+dRow3Shft DN D7.U8
+dRow4 DN D8.U8
+dRow4Shft DN D9.U8
+dRow5 DN D10.U8
+dRow5Shft DN D11.U8
+dRow6 DN D12.U8
+dRow6Shft DN D13.U8
+dRow7 DN D14.U8
+dRow7Shft DN D15.U8
+dRow8 DN D16.U8
+dRow8Shft DN D17.U8
+
+
+qSum0 QN Q9.U16
+qSum1 QN Q10.U16
+qSum2 QN Q11.U16
+qSum3 QN Q12.U16
+qSum4 QN Q13.U16
+qSum5 QN Q14.U16
+qSum6 QN Q0.U16
+qSum7 QN Q1.U16
+qSum8 QN Q2.U16
+
+qRound QN Q15.U16
+
+dDst0 DN D0.U8
+dDst1 DN D1.U8
+dDst2 DN D2.U8
+dDst3 DN D3.U8
+dDst4 DN D4.U8
+dDst5 DN D5.U8
+dDst6 DN D6.U8
+dDst7 DN D7.U8
+
+qRes0 QN Q4.S16
+qRes1 QN Q5.S16
+qRes2 QN Q6.S16
+qRes3 QN Q7.S16
+qRes4 QN Q8.S16
+qRes5 QN Q9.S16
+qRes6 QN Q10.S16
+qRes7 QN Q11.S16
+
+ ;// Function header
+ M_START omxVCM4P2_MCReconBlock, r6, d15
+ ;// Define stack arguments
+ M_ARG Arg_dstStep, 4
+ M_ARG Arg_predictType, 4
+ M_ARG Arg_rndVal, 4
+ ;// Load argument from the stack
+ M_LDR dstStep, Arg_dstStep
+ M_LDR predictType, Arg_predictType
+ M_LDR rndVal, Arg_rndVal
+ ADD predictType, rndVal, predictType, LSL #1
+ RSB rndVal, rndVal, #2 ;// preparing rndVal for HalfPixelXY
+
+ ;// The following is implementation of switching to different code segments
+ ;// based on different predictType and rndVal flags. The corresponding
+ ;// labels (e.g. CaseIntegerPixel_Rnd0) are embedded in the macros following
+ ;// M_ENDSWITCH (e.g. M_MCRECONBLOCK_IntegerPixel). While "M_MCRECONBLOCK_IntegerPixel"
+ ;// and "M_MCRECONBLOCK_HalfPixelXY" handle for both rounding cases;
+ ;// "M_MCRECONBLOCK_HalfPixelX" and "M_MCRECONBLOCK_HalfPixelY" macros handle
+ ;// the two rounding cases in separate code bases.
+ ;// All these together implement the interpolation functionality
+
+ M_SWITCH predictType
+ M_CASE CaseIntegerPixel_Rnd0
+ M_CASE CaseIntegerPixel_Rnd1
+ M_CASE CaseHalfPixelX_Rnd0
+ M_CASE CaseHalfPixelX_Rnd1
+ M_CASE CaseHalfPixelY_Rnd0
+ M_CASE CaseHalfPixelY_Rnd1
+ M_CASE CaseHalfPixelXY_Rnd0
+ M_CASE CaseHalfPixelXY_Rnd1
+ M_ENDSWITCH
+
+ M_MCRECONBLOCK_IntegerPixel
+ M_MCRECONBLOCK_HalfPixelX 0
+ M_MCRECONBLOCK_HalfPixelX 1
+ M_MCRECONBLOCK_HalfPixelY 0
+ M_MCRECONBLOCK_HalfPixelY 1
+ M_MCRECONBLOCK_HalfPixelXY
+SwitchPredictTypeEnd
+
+ ;// After interpolation is done, residue needs to be added. This is done
+ ;// only in case "pSrcResidue" parameter to the function is not NULL.
+ ;// Following is a completely unrolled code to do so. Each row and
+ ;// corresponding residue is loaded and residue is added and value
+ ;// stored
+
+ CMP pSrcResidue, #0
+ SUBNE pDst, pDst, dstStep, LSL #3 ;// Restoring pDst
+ MOVNE pDstCopy, pDst
+ BEQ pSrcResidueConditionEnd
+pSrcResidueNotNull
+ VLD1 dDst0, [pDst@64], dstStep
+ VLD1 qRes0, [pSrcResidue@128]!
+ VLD1 dDst1, [pDst@64], dstStep
+ VLD1 qRes1, [pSrcResidue@128]!
+ VLD1 dDst2, [pDst@64], dstStep
+ VLD1 qRes2, [pSrcResidue@128]!
+ VADDW qRes0, qRes0, dDst0
+ VLD1 dDst3, [pDst@64], dstStep
+ VADDW qRes1, qRes1, dDst1
+ VLD1 qRes3, [pSrcResidue@128]!
+ VADDW qRes2, qRes2, dDst2
+ VLD1 dDst4, [pDst@64], dstStep
+ VQMOVUN dDst0, qRes0
+ VLD1 qRes4, [pSrcResidue@128]!
+ VADDW qRes3, qRes3, dDst3
+ VLD1 dDst5, [pDst@64], dstStep
+ VQMOVUN dDst1, qRes1
+ VLD1 qRes5, [pSrcResidue@128]!
+ VADDW qRes4, qRes4, dDst4
+ VLD1 dDst6, [pDst@64], dstStep
+ VQMOVUN dDst2, qRes2
+ VLD1 qRes6, [pSrcResidue@128]!
+ VADDW qRes5, qRes5, dDst5
+ VLD1 dDst7, [pDst@64], dstStep
+ VQMOVUN dDst3, qRes3
+ VLD1 qRes7, [pSrcResidue@128]!
+ VADDW qRes6, qRes6, dDst6
+ VST1 dDst0, [pDstCopy@64], dstStep
+ VQMOVUN dDst4, qRes4
+ VST1 dDst1, [pDstCopy@64], dstStep
+ VADDW qRes7, qRes7, dDst7
+ VST1 dDst2, [pDstCopy@64], dstStep
+ VQMOVUN dDst5, qRes5
+ VST1 dDst3, [pDstCopy@64], dstStep
+ VQMOVUN dDst6, qRes6
+ VST1 dDst4, [pDstCopy@64], dstStep
+ VQMOVUN dDst7, qRes7
+ VST1 dDst5, [pDstCopy@64], dstStep
+ VST1 dDst6, [pDstCopy@64], dstStep
+ VST1 dDst7, [pDstCopy@64], dstStep
+
+pSrcResidueConditionEnd
+ MOV return, #OMX_Sts_NoErr
+
+ M_END
+ ENDIF ;// CortexA8
+ END
+;// ***************************************************************************
+;// omxVCM4P2_MCReconBlock ends
+;// ***************************************************************************
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
new file mode 100755
index 0000000..a73f64a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
@@ -0,0 +1,320 @@
+; **********
+; *
+; * File Name: omxVCM4P2_PredictReconCoefIntra_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 12290
+; * Date: Wednesday, April 9, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains module for DC/AC coefficient prediction
+; *
+; *
+; * Function: omxVCM4P2_PredictReconCoefIntra
+; *
+; * Description:
+; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+; * to the function call, prediction direction (predDir) should be selected
+; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the coefficient buffer which contains the
+; * quantized coefficient residuals (PQF) of the current
+; * block; must be aligned on a 4-byte boundary. The
+; * output coefficients are saturated to the range
+; * [-2048, 2047].
+; * [in] pPredBufRow pointer to the coefficient row buffer; must be aligned
+; * on a 4-byte boundary.
+; * [in] pPredBufCol pointer to the coefficient column buffer; must be
+; * aligned on a 4-byte boundary.
+; * [in] curQP quantization parameter of the current block. curQP may
+; * equal to predQP especially when the current block and
+; * the predictor block are in the same macroblock.
+; * [in] predQP quantization parameter of the predictor block
+; * [in] predDir indicates the prediction direction which takes one
+; * of the following values:
+; * OMX_VIDEO_HORIZONTAL predict horizontally
+; * OMX_VIDEO_VERTICAL predict vertically
+; * [in] ACPredFlag a flag indicating if AC prediction should be
+; * performed. It is equal to ac_pred_flag in the bit
+; * stream syntax of MPEG-4
+; * [in] videoComp video component type (luminance, chrominance or
+; * alpha) of the current block
+; * [out] pSrcDst pointer to the coefficient buffer which contains
+; * the quantized coefficients (QF) of the current
+; * block
+; * [out] pPredBufRow pointer to the updated coefficient row buffer
+; * [out] pPredBufCol pointer to the updated coefficient column buffer
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - Bad arguments
+; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
+; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31,
+; * predQP > 31, preDir exceeds [1,2].
+; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not
+; * 4-byte aligned.
+; *
+; *********
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+
+
+ IMPORT armVCM4P2_Reciprocal_QP_S32
+ IMPORT armVCM4P2_Reciprocal_QP_S16
+ IMPORT armVCM4P2_DCScaler
+
+ IF CortexA8
+;// Input Arguments
+
+pSrcDst RN 0
+pPredBufRow RN 1
+pPredBufCol RN 2
+curQP RN 3
+QP RN 3
+predQP RN 4
+predDir RN 5
+ACPredFlag RN 6
+videoComp RN 7
+
+;// Local Variables
+
+shortVideoHeader RN 4
+dcScaler RN 4
+index RN 6
+predCoeffTable RN 7
+temp1 RN 6
+temp2 RN 9
+temp RN 14
+Const RN 8
+temppPredColBuf RN 8
+tempPred RN 9
+
+absCoeffDC RN 8
+negdcScaler RN 10
+Rem RN 11
+temp3 RN 12
+
+dcRowbufCoeff RN 10
+dcColBuffCoeff RN 11
+Return RN 0
+
+;//NEON Registers
+
+qPredRowBuf QN Q0.S16
+dPredRowBuf0 DN D0.S16
+dPredRowBuf1 DN D1.S16
+
+
+
+
+qCoeffTab QN Q1.S32
+
+qPredQP QN Q2.S16
+dPredQP0 DN D4.S16
+dPredQP1 DN D5.S16
+
+
+qtemp1 QN Q3.S32
+qtemp QN Q3.S16
+
+dtemp0 DN D6.S16
+dtemp1 DN D7.S16
+
+dtemp2 DN D8.S16
+dtemp3 DN D9.S16
+
+dtemp4 DN D2.S16
+dtemp5 DN D3.S16
+dtemp6 DN D4.S16
+dtemp7 DN D5.S16
+
+qtempPred1 QN Q5.S32
+qtempPred QN Q5.S16
+
+dtempPred0 DN D10.S16
+dtempPred1 DN D11.S16
+
+
+
+ M_START omxVCM4P2_PredictReconCoefIntra,r11,d11
+
+ ;// Assigning pointers to Input arguments on Stack
+
+ M_ARG predQPonStack,4
+ M_ARG predDironStack,4
+ M_ARG ACPredFlagonStack,4
+ M_ARG videoComponStack,4
+
+ ;// DC Prediction
+
+ M_LDR videoComp,videoComponStack ;// Load videoComp From Stack
+
+ M_LDR predDir,predDironStack ;// Load Prediction direction
+ ;// DC Scaler calculation
+ LDR index, =armVCM4P2_DCScaler
+ ADD index,index,videoComp,LSL #5
+ LDRB dcScaler,[index,QP]
+
+
+ LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63)
+ CMP predDir,#2 ;// Check if the Prediction direction is vertical
+
+ ;// Caulucate tempPred
+
+ LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer
+ LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer
+
+ RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler
+ MOV temp1,absCoeffDC ;// Load the Prediction coeff to temp for comparision
+ CMP temp1,#0
+ RSBLT absCoeffDC,temp1,#0 ;// calculate absolute val of prediction coeff
+
+ ADD temp,dcScaler,dcScaler
+ LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication
+ SMULBB tempPred,temp,absCoeffDC ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler
+ ADD temp3,dcScaler,#1
+ LSR tempPred,tempPred,#15 ;// tempped=pPredBufRow(Col)[0]/dcScaler
+ LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2)
+ MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler
+
+ LDRH dcRowbufCoeff,[pPredBufCol]
+
+ CMP Rem,temp3 ;// compare Rem with (dcScaler/2)
+ ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=(dcScaler/2)
+ CMP temp1,#0
+ RSBLT tempPred,tempPred,#0 ;// tempPred=-tempPred if
+
+ STRH dcRowbufCoeff,[pPredBufRow,#-16]
+
+
+ LDRH temp,[pSrcDst] ;// temp=pSrcDst[0]
+ ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred
+ SSAT16 temp,#12,temp ;// clip temp to [-2048,2047]
+ SMULBB dcColBuffCoeff,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler
+ M_LDR ACPredFlag,ACPredFlagonStack
+ STRH dcColBuffCoeff,[pPredBufCol]
+
+
+ ;// AC Prediction
+
+ M_LDR predQP,predQPonStack
+
+ CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not
+ BNE Exit ;// If not set Exit
+ CMP predDir,#2 ;// Check the Prediction direction
+ LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63)
+ MOV Const,#4
+ MUL curQP,curQP,Const ;// curQP=4*curQP
+ VDUP dPredQP0,predQP
+ LDR temp2,[predCoeffTable,curQP] ;// temp=0x1ffff/curQP
+ VDUP qCoeffTab,temp2
+ BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal
+
+
+
+ ;// Vertical
+ ;//Calculating tempPred
+
+ VLD1 {dPredRowBuf0,dPredRowBuf1},[pPredBufRow] ;// Loading pPredBufRow[i]:i=0 t0 7
+
+ VMULL qtemp1,dPredRowBuf0,dPredQP0 ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3
+ VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
+
+ VMULL qtemp1,dPredRowBuf1,dPredQP0 ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7
+
+ VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3
+ VSHRN dPredQP1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits
+
+
+ VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
+ VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=4 t0 7
+ VLD1 {dtemp0,dtemp1},[pSrcDst] ;//Loading pSrcDst[i] : i=0 to 7
+ VSHRN dtempPred1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits
+ VMOV dtempPred0,dPredQP1
+
+ ;//updating source and row prediction buffer contents
+ VADD qtemp,qtemp,qtempPred ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7
+ VQSHL qtemp,qtemp,#4 ;//Clip to [-2048,2047]
+ LDRH dcRowbufCoeff,[pPredBufRow] ;//Loading Dc Value of Row Prediction buffer
+ VSHR qtemp,qtemp,#4
+
+ VST1 {dtemp0,dtemp1},[pSrcDst] ;//storing back the updated values
+ VST1 {dtemp0,dtemp1},[pPredBufRow] ;//storing back the updated row prediction values
+ STRH dcRowbufCoeff,[pPredBufRow] ;// storing the updated DC Row Prediction coeff
+
+ B Exit
+
+Horizontal
+
+ ;// Calculating Temppred
+
+
+
+ VLD1 {dPredRowBuf0,dPredRowBuf1},[pPredBufCol] ;// Loading pPredBufCol[i]:i=0 t0 7
+ VMULL qtemp1,dPredRowBuf0,dPredQP0 ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3
+ VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3
+
+ VMULL qtemp1,dPredRowBuf1,dPredQP0 ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7
+
+ VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3
+ VSHRN dPredQP1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits
+
+
+ VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7
+
+ MOV temppPredColBuf,pPredBufCol
+ VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=4 t0 7
+ VLD4 {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst] ;// Loading coefficients Interleaving by 4
+ VSHRN dtempPred1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits
+ VMOV dtempPred0,dPredQP1
+
+ ;// Updating source and column prediction buffer contents
+ ADD temp2,pSrcDst,#32
+ VLD4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] ;// Loading next 16 coefficients Interleaving by 4
+ VUZP dtemp0,dtemp4 ;// Interleaving by 8
+ VADD dtemp0,dtemp0,dtempPred0 ;// Adding tempPred to coeffs
+ VQSHL dtemp0,dtemp0,#4 ;// Clip to [-2048,2047]
+ VSHR dtemp0,dtemp0,#4
+ VST1 {dtemp0},[pPredBufCol]! ;// Updating Pridiction column buffer
+ VZIP dtemp0,dtemp4 ;// deinterleaving
+ VST4 {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst] ;// Updating source coeffs
+ VST4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]!
+
+ MOV temp1,temp2
+ VLD4 {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]! ;// Loading coefficients Interleaving by 4
+
+ VLD4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
+ VUZP dtemp0,dtemp4 ;// Interleaving by 8
+ VADD dtemp0,dtemp0,dtempPred1
+ VQSHL dtemp0,dtemp0,#4 ;// Clip to [-2048,2047]
+ VSHR dtemp0,dtemp0,#4
+ VST1 {dtemp0},[pPredBufCol]!
+ VZIP dtemp0,dtemp4
+ VST4 {dtemp0,dtemp1,dtemp2,dtemp3},[temp1]
+ STRH dcColBuffCoeff,[temppPredColBuf]
+ VST4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]
+
+Exit
+
+ STRH temp,[pSrcDst]
+
+
+ MOV Return,#OMX_Sts_NoErr
+
+ M_END
+ ENDIF
+
+
+ END
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
new file mode 100755
index 0000000..bd0ad1f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
@@ -0,0 +1,162 @@
+;/**
+; *
+; * File Name: omxVCM4P2_QuantInvInter_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 12290
+; * Date: Wednesday, April 9, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for inter reconstruction
+; *
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvInter_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP quantization parameter (quantiser_scale)
+; * [in] videoComp (Intra version only.) Video component type of the
+; * current block. Takes one of the following flags:
+; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; * OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; * shortVideoHeader==1 selects linear intra DC mode,
+; * and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; * - If pSrcDst is NULL or is not 16-byte aligned.
+; * or
+; * - If QP <= 0.
+; * or
+; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+ IF CortexA8
+
+
+;//Input Arguments
+pSrcDst RN 0
+QP RN 1
+
+
+;//Local Variables
+Count RN 3
+doubleQP RN 4
+Return RN 0
+;// Neon registers
+
+
+dQP10 DN D0.S32[0]
+qQP1 QN Q0.S32
+
+dQP1 DN D0.S16
+dMinusQP1 DN D1.S16
+
+dCoeff0 DN D2.S16
+dCoeff1 DN D3.S16
+
+qResult0 QN Q3.S32
+dResult0 DN D7.S16
+qSign0 QN Q3.S32
+dSign0 DN D6.S16
+
+qResult1 QN Q4.S32
+dResult1 DN D8.S16
+qSign1 QN Q4.S32
+dSign1 DN D8.S16
+
+d2QP0 DN D10.S32[0]
+q2QP0 QN Q5.S32
+d2QP DN D10.S16
+
+dZero0 DN D11.S16
+dZero1 DN D12.S16
+dConst0 DN D13.S16
+
+
+ M_START omxVCM4P2_QuantInvInter_I,r4,d13
+
+
+
+ ADD doubleQP,QP,QP ;// doubleQP= 2*QP
+ VMOV d2QP0,doubleQP
+ VDUP q2QP0,d2QP0 ;// Move doubleQP in to a scalar
+ TST QP,#1
+ VLD1 {dCoeff0,dCoeff1},[pSrcDst] ;// Load first 8 values to Coeff0,Coeff1
+ SUBEQ QP,QP,#1
+ VMOV dQP10,QP ;// If QP is even then QP1=QP-1 else QP1=QP
+ MOV Count,#64
+ VDUP qQP1,dQP10 ;// Duplicate tempResult with QP1
+ VSHRN d2QP,q2QP0,#0
+ VEOR dConst0,dConst0,dConst0
+ VSHRN dQP1,qQP1,#0 ;// QP1 truncated to 16 bits
+ VSUB dMinusQP1,dConst0,dQP1 ;// dMinusQP1=-QP1
+
+Loop
+
+ ;//Performing Inverse Quantization
+
+ VCLT dSign0,dCoeff0, #0 ;// Compare Coefficient 0 against 0
+ VCLT dSign1,dCoeff1, #0 ;// Compare Coefficient 1 against 0
+ VCEQ dZero0,dCoeff0,#0 ;// Compare Coefficient 0 against zero
+ VBSL dSign0,dMinusQP1,dQP1 ;// dSign0 = -QP1 if Coeff0< 0 else QP1
+ VCEQ dZero1,dCoeff1,#0 ;// Compare Coefficient 1 against zero
+ VBSL dSign1,dMinusQP1,dQP1 ;// dSign1 = -QP1 if Coeff1< 0 else QP1
+ VMOVL qSign0,dSign0 ;// Sign extend qSign0 to 32 bits
+ VMOVL qSign1,dSign1
+ VMLAL qResult0,dCoeff0,d2QP ;// qResult0[i]= qCoeff0[i]+qCoeff0[i]*(-2) if Coeff <0
+ ;// qResult0[i]= qCoeff0[i] if Coeff >=0
+ VMLAL qResult1,dCoeff1,d2QP ;// qResult1[i]= qCoeff1[i]+qCoeff1[i]*(-2) if Coeff <0
+ ;// qResult1[i]= qCoeff1[i] if Coeff >=0
+ ;// Clip Result to [-2048,2047]
+
+ VQSHL qResult0,qResult0,#20 ;// clip to [-2048,2047]
+ VQSHL qResult1,qResult1,#20
+
+ VSHR qResult0,qResult0,#4
+ VSHR qResult1,qResult1,#4
+ VSHRN dResult0,qResult0,#16 ;// Narrow the clipped Value to Halfword
+ VSHRN dResult1,qResult1,#16
+ VBIT dResult0,dConst0,dZero0
+ VBIT dResult1,dConst0,dZero1
+
+ VST1 {dResult0,dResult1},[pSrcDst]! ;// Store the result
+ SUBS Count,Count,#8
+ VLD1 {dCoeff0,dCoeff1},[pSrcDst]
+
+
+ BGT Loop
+
+ MOV Return,#OMX_Sts_NoErr
+
+
+ M_END
+ ENDIF
+
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
new file mode 100755
index 0000000..e00591f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
@@ -0,0 +1,210 @@
+;/**
+; *
+; * File Name: omxVCM4P2_QuantInvIntra_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 12290
+; * Date: Wednesday, April 9, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for inter reconstruction
+; *
+; *
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvIntra_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP quantization parameter (quantiser_scale)
+; * [in] videoComp (Intra version only.) Video component type of the
+; * current block. Takes one of the following flags:
+; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; * OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; * shortVideoHeader==1 selects linear intra DC mode,
+; * and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; * - If pSrcDst is NULL or is not 16-byte aligned.
+; * or
+; * - If QP <= 0.
+; * or
+; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+
+ IMPORT armVCM4P2_DCScaler
+
+ IF CortexA8
+
+
+;//Input Arguments
+pSrcDst RN 0
+QP RN 1
+videoComp RN 2
+shortVideoHeader RN 3
+
+
+;//Local Variables
+
+dcScaler RN 4
+temp RN 14
+index RN 5
+
+
+Count RN 5
+doubleQP RN 4
+Return RN 0
+
+
+;// Neon registers
+
+
+dQP10 DN D0.S32[0]
+qQP1 QN Q0.S32
+
+dQP1 DN D0.S16
+dMinusQP1 DN D1.S16
+
+dCoeff0 DN D2.S16
+dCoeff1 DN D3.S16
+
+qResult0 QN Q3.S32
+dResult0 DN D7.S16
+qSign0 QN Q3.S32
+dSign0 DN D6.S16
+
+qResult1 QN Q4.S32
+dResult1 DN D8.S16
+qSign1 QN Q4.S32
+dSign1 DN D8.S16
+
+d2QP0 DN D10.S32[0]
+q2QP0 QN Q5.S32
+d2QP DN D10.S16
+
+dZero0 DN D11.S16
+dZero1 DN D4.S16
+dConst0 DN D5.S16
+
+
+
+
+
+
+ M_START omxVCM4P2_QuantInvIntra_I,r5,d11
+
+
+ ;// Perform Inverse Quantization for DC coefficient
+
+ TEQ shortVideoHeader,#0 ;// Test if short Video Header flag =0
+ MOVNE dcScaler,#8 ;// if shortVideoHeader is non zero dcScaler=8
+ BNE calDCVal
+
+ LDR index, =armVCM4P2_DCScaler
+ ADD index,index,videoComp,LSL #5
+ LDRB dcScaler,[index,QP]
+
+ ;//M_CalDCScalar shortVideoHeader,videoComp, QP
+
+calDCVal
+
+ LDRH temp,[pSrcDst]
+ SMULBB temp,temp,dcScaler ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory)
+ SSAT temp,#12,temp ;// Saturating to 12 bits
+
+
+
+ ;// Perform Inverse Quantization for Ac Coefficients
+
+
+
+ ADD doubleQP,QP,QP ;// doubleQP= 2*QP
+ VMOV d2QP0,doubleQP
+ VDUP q2QP0,d2QP0 ;// Move doubleQP in to a scalar
+ TST QP,#1
+ VLD1 {dCoeff0,dCoeff1},[pSrcDst] ;// Load first 8 values to Coeff0,Coeff1
+ SUBEQ QP,QP,#1
+ VMOV dQP10,QP ;// If QP is even then QP1=QP-1 else QP1=QP
+ MOV Count,#64
+ VDUP qQP1,dQP10 ;// Duplicate tempResult with QP1
+ VSHRN d2QP,q2QP0,#0
+ VEOR dConst0,dConst0,dConst0
+ VSHRN dQP1,qQP1,#0 ;// QP1 truncated to 16 bits
+ VSUB dMinusQP1,dConst0,dQP1 ;// dMinusQP1=-QP1
+
+Loop
+
+ ;//Performing Inverse Quantization
+
+ VCLT dSign0,dCoeff0, #0 ;// Compare Coefficient 0 against 0
+ VCLT dSign1,dCoeff1, #0 ;// Compare Coefficient 1 against 0
+ VCEQ dZero0,dCoeff0,#0 ;// Compare Coefficient 0 against zero
+ VBSL dSign0,dMinusQP1,dQP1 ;// dSign0 = -QP1 if Coeff0< 0 else QP1
+ VCEQ dZero1,dCoeff1,#0 ;// Compare Coefficient 1 against zero
+ VBSL dSign1,dMinusQP1,dQP1 ;// dSign1 = -QP1 if Coeff1< 0 else QP1
+ VMOVL qSign0,dSign0 ;// Sign extend qSign0 to 32 bits
+ VMOVL qSign1,dSign1
+ VMLAL qResult0,dCoeff0,d2QP ;// qResult0[i]= qCoeff0[i]+qCoeff0[i]*(-2) if Coeff <0
+ ;// qResult0[i]= qCoeff0[i] if Coeff >=0
+ VMLAL qResult1,dCoeff1,d2QP ;// qResult1[i]= qCoeff1[i]+qCoeff1[i]*(-2) if Coeff <0
+ ;// qResult1[i]= qCoeff1[i] if Coeff >=0
+ ;// Clip Result to [-2048,2047]
+
+ VQSHL qResult0,qResult0,#20 ;// clip to [-2048,2047]
+ VQSHL qResult1,qResult1,#20
+
+ VSHR qResult0,qResult0,#4
+ VSHR qResult1,qResult1,#4
+ VSHRN dResult0,qResult0,#16 ;// Narrow the clipped Value to Halfword
+ VSHRN dResult1,qResult1,#16
+ VBIT dResult0,dConst0,dZero0
+ VBIT dResult1,dConst0,dZero1
+
+ VST1 {dResult0,dResult1},[pSrcDst]! ;// Store the result
+ SUBS Count,Count,#8
+ VLD1 {dCoeff0,dCoeff1},[pSrcDst]
+
+
+ BGT Loop
+
+ SUB pSrcDst,pSrcDst,#128
+
+ ;// Store the Inverse quantized Dc coefficient
+
+ STRH temp,[pSrcDst],#2
+
+ MOV Return,#OMX_Sts_NoErr
+
+
+
+ M_END
+ ENDIF
+
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/src/armVC_Version.c
new file mode 100755
index 0000000..5d93681
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/src/armVC_Version.c
@@ -0,0 +1,6 @@
+#include "omxtypes.h"
+#include "armCOMM_Version.h"
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING " Rel=" OMX_ARM_RELEASE_TAG " Arch=" OMX_ARM_BUILD_ARCHITECTURE " Tools=" OMX_ARM_BUILD_TOOLCHAIN ;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_DELIVERY.TXT
new file mode 100644
index 0000000..7801f3d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_DELIVERY.TXT
@@ -0,0 +1,63 @@
+The contents of this transaction was created by Hedley Francis
+of ARM on 19-Feb-2008.
+
+It contains the ARM data versions listed below.
+
+This data, unless otherwise stated, is ARM Proprietary and access to it
+is subject to the agreements indicated below.
+
+If you experience problems with this data, please contact ARM support
+quoting transaction reference <97412>.
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+- OX000-SW-98010-r0p0-00bet1
+ Video codecs - sample code
+ Sample code release for Hantro (Ver 1.0.2)
+ internal access
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+This transaction contains deliverables which are designated as being of
+beta release status (BET).
+
+Beta release status has a particular meaning to ARM of which the recipient
+must be aware. Beta is a pre-release status indicating that the deliverable
+so described is believed to robustly demonstrate specified behaviour, to be
+consistent across its included aspects and be ready for general deployment.
+But Beta also indicates that pre-release reliability trials are ongoing and
+that it is possible residual defects or errors in operation, consistency
+and documentation may still be encountered. The recipient should consider
+this position when using this Beta material supplied. ARM will normally
+attempt to provide fixes or a work-around for defects identified by the
+recipient, but the provision or timeliness of this support cannot be
+guaranteed. ARM shall not be responsible for direct or consequential
+damages as a result of encountering one or more of these residual defects.
+By accepting a Beta release, the recipient agrees to these constraints and
+to providing reasonable information to ARM to enable the replication of the
+defects identified by the recipient. The specific Beta version supplied
+will not be supported after release of a later or higher status version.
+It should be noted that Support for the Beta release of the deliverable
+will only be provided by ARM to a recipient who has a current support and
+maintenance contract for the deliverable.
+
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+In addition to the data versions listed above, this transaction contains
+two additional files at the top level.
+
+The first is this file, ARM_DELIVERY_97412.TXT, which is the delivery
+note.
+
+The second is ARM_MANIFEST_97412.TXT which contains a manifest of all the
+files included in this transaction, together with their checksums.
+
+The checksums provided are calculated using the RSA Data Security, Inc.
+MD5 Message-Digest Algorithm.
+
+The checksums can be used to verify the integrity of this data using the
+"md5sum" tool (which is part of the GNU "textutils" package) by running:
+
+ % md5sum --check ARM_MANIFEST_97412.TXT
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_MANIFEST.TXT
new file mode 100644
index 0000000..8e01b1e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/ARM_MANIFEST.TXT
@@ -0,0 +1,140 @@
+ OX000-SW-98010-r0p0-00bet1/
+ OX000-SW-98010-r0p0-00bet1/api/
+8971932d56eed6b1ad1ba507f0bff5f0 OX000-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h
+e88ec84e122534092b90c67841549d6f OX000-SW-98010-r0p0-00bet1/api/armCOMM_Version.h
+43cf46c2cf2fe1f93c615b57bcbe4809 OX000-SW-98010-r0p0-00bet1/api/armCOMM.h
+f87fedd9ca432fefa757008176864ef8 OX000-SW-98010-r0p0-00bet1/api/armOMX.h
+8e49899a428822c36ef9dd94e0e05f18 OX000-SW-98010-r0p0-00bet1/api/omxtypes.h
+a06983abb39c476b081e87ea271361a5 OX000-SW-98010-r0p0-00bet1/build_vc.pl
+c01f8b93ab73d8c00ddf2499f01da5ff OX000-SW-98010-r0p0-00bet1/filelist_vc.txt
+ OX000-SW-98010-r0p0-00bet1/src/
+26e2ff3f633764eb720deb340978dc2d OX000-SW-98010-r0p0-00bet1/src/armCOMM_Bitstream.c
+79aa23d9817efd11d0c4c2be36ec1e5c OX000-SW-98010-r0p0-00bet1/src/armCOMM.c
+ OX000-SW-98010-r0p0-00bet1/vc/
+ OX000-SW-98010-r0p0-00bet1/vc/m4p10/
+ OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/
+e45297704d72302d4a947d0798c666fb OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c
+205dfafe1fe7bb160bf36d2600e1100a OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+bf92641e8548577b77e04e03ec04c358 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c
+f5ee6f7be822d87471cef3b1801dbfc2 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c
+28110b3a13cecf4f216d10bcc761c401 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c
+9a1a25245c975d641e1c6378834aea4d OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c
+3a643eaaaeb12e8d274dc59a7357a586 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c
+4c4de5973a6b74250ce91ac0b317a617 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c
+4ecdbe9193aaba1f9bb0e24c938b34f9 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c
+66e912f8c88f6019cba3ede27150a407 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c
+266da42f4e3015e67b2cbb58169d437f OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c
+d905247eeaa52d4e2cf5f6bc3f61b348 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c
+5b29448db0495cd1717a4b925f13377c OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c
+f6451df27f6dcc99036b4b1253c23bb6 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c
+892787d850eef09dc2148d45b416b062 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c
+33da1c01a31f47c0f3aea9a7a5eaa9be OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c
+e9fb11b066775283dcfeae8d12a6c97a OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c
+add97bec08e5e1a538aa8607168e61ba OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c
+b695ecfc917b39470d1f40773b923972 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c
+51bc596fd2ff61ad5450d7138461f4a1 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_MEInit.c
+dc6baa0a388dc5ea8ff65c24b179e670 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c
+a5499902996576f6712849db34d5ad65 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c
+0c3b76745d53e74a8e64e80def31faba OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c
+4f2742ba5a3c2208f53bc0f6a443be14 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c
+b4ae2dc948e8ca64831fe3bbfbd89523 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+e15118cbe372db7cadba225c9456f189 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_SADQuar.c
+623cf336cfce7d0174f4e54072456f33 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c
+89e452c80e30357cadfb04c05b6fe00c OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c
+3a5551cc54e85bbe34fc966c7dc00f1c OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c
+114030fa0d8f00af6d3289f47a5e85bf OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+9e373ab296fb85bb45565a6c384f6ed8 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables.c
+2d200f7cc230f302da48c589da42c02f OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c
+ea3f1d1d1507b55610b1349c7b5946e8 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c
+bd2bf1743aef2a9396545ed025362be2 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_Average_4x.c
+ca68e809567bf89044631b67d228c7ce OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+77caf2b5cbee96d360a919f27e1f14f4 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables.c
+26081e384ec627fedad474a0e7dad877 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c
+1c83ae9207a54944936f4a63c665bd99 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c
+4c36e04db20200f4ec72e5aba57446fd OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SATD_4x4.c
+f75b7c5a80d8bf33e315380e4ef0ab8a OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c
+488925bb7aeeae0ccf93ec44af9fce35 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c
+c91a5345b5f877b3831ed1abcc60d579 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c
+35515a115a32fcac8479072a9a5b0db9 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c
+fdcf4622bc5f0ae75bdb0a51dcd03397 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c
+74c9278177400a1f7cc6d799a8c8ab34 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SAD_4x.c
+56aa2d506d0cfdb4ebd366c07adb2d85 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeBlockPixel.c
+36b2165fd4d2a7f3f3e1f8daff4f94e5 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c
+4b6b1b933fc7bc8f14a184c02c028085 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c
+cf0ff093a9b372dd3271e3e5c28984d4 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c
+9ccad9f894fbd32194f5b53da217072a OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c
+4943a7a2df7e9d700675f8c1debf4d90 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c
+29e4a7f38f8c2e8246ed756db03c012e OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c
+27bc64e7c18da0aab9c987a388f61608 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+859185614bb9d0013861e454d7b918f2 OX000-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c
+ OX000-SW-98010-r0p0-00bet1/vc/m4p10/api/
+63e3b64b96cc42a235c04f3a0f991316 OX000-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h
+ OX000-SW-98010-r0p0-00bet1/vc/m4p2/
+ OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/
+0aae4f683d8903cba9956d3301ed9ffe OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_ACDCPredict.c
+8d6c1b44915329165df643081cc11a97 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock.c
+0435eca930eacda0f2a59e843d405eff OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c
+9a82dd0b1f05f798567436a009d02969 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c
+e1e24646c4bd03f5df78295452dd4eb2 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c
+746e6b334e4a26d4a9bfae6d735826f6 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c
+8b1d87b74d80ff13a16215b61d5e52ba OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c
+309358d357baafc38d2b37bf1e9768a9 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c
+cc77c7242b53c153f8d09527583f2771 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred.c
+7cd8e7796017e3dd00b494d34f629f3f OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c
+a4905cb5f8d4b244454ee4f60d18358b OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_PutVLCBits.c
+5596b31e433222c1e4860deebfa98ef2 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DCT_Table.c
+365d072be6eab201f6e040058a3bacfc OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c
+78ed2212585b0cca75913a473b2ec430 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c
+50b2d8da8f20f6b1d39b8d3df38af55d OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeMV.c
+4a851a2ad6d357cdc233d9c0bf475e02 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c
+0d6d63878f2827e00e5f85b1e8e26017 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+48b865a983fe5bf3075eddf652950722 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c
+5f48fa7941835c46ac767e63fc29403b OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c
+bbaf454b64b32b2c42a76a7ec393d977 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_CompareMV.c
+eebff772f87a414436c5c5286f2cd213 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MEInit.c
+65ae242eb8cb6d1027677c8ef8f77ca0 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c
+125642b1ea0c1256d79af1e0ddecae93 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInter_I.c
+ce24ba3d83da4cb791485d3128268bf6 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c
+09bc09a2e6fd962e719944582e38a8fd OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+6b0ee7a116471a4dadbe5bc8dbf425b0 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c
+21322dca027c28353e3e7eb8f3620062 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c
+ef353d83244288d8c37e0f70249177cc OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_GetVLCBits.c
+541de824f8aebe4a5cac6f15da943efa OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c
+0b40b154b591c7f8842cffe4042d17c5 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+2ffcec88d3fcb372543a8f4508ea1ac6 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c
+e06d85ca000afcbb50580f98f0203ac8 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c
+ae82b6fcfcf731a61d70e1aa42e6277a OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c
+1d04395e231b597562257e98cda6cfb0 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+72c0a36327b6b9b436d3bce7c896c520 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c
+1b65aa7f311124ea6fb47e384ec06a50 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c
+714957104a6ef71341fbe6a9ec65c136 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir.c
+86493f0ee853f653354a7389f1727f73 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c
+5de8afcfb3052968794782a7c3a0b41a OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c
+50bcc228cc660dbda037725309de3f8b OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c
+4f5cfa1ecc668913dde94e3caf97a2e1 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c
+c2ec804ddf64ee841146e39c3a783451 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c
+4087f6a827912ee5b45ed4217f1a6d77 OX000-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c
+ OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/
+5c711702dddcec85298003860d760cec OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_DCT_Table.h
+1b92c94b785c03ec76d4fae2f2bbdb8a OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+ad9c6986d2a3200dd5e1f6103a54a99b OX000-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+ OX000-SW-98010-r0p0-00bet1/vc/src/
+e627b3346b0dc9aff14446005ce0fa43 OX000-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c
+ OX000-SW-98010-r0p0-00bet1/vc/api/
+7ca94b1c33ac0211e17d38baadd7d1dd OX000-SW-98010-r0p0-00bet1/vc/api/armVC.h
+12cf7596edbbf6048b626d15e8d0ed48 OX000-SW-98010-r0p0-00bet1/vc/api/omxVC.h
+ OX000-SW-98010-r0p0-00bet1/vc/comm/
+ OX000-SW-98010-r0p0-00bet1/vc/comm/src/
+3a6df0085736cbcbe2e3f45d08af4221 OX000-SW-98010-r0p0-00bet1/vc/comm/src/armVCCOMM_Average.c
+0bf3cb52863c829b28c0352835170211 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8.c
+538b62f510b5a8bdced4a39fa12d9a23 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c
+66993edd9d441bf3b5b6c912f6400b6e OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I.c
+8e526a9007eb0d43ebf362c498b37415 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_LimitMVToRect.c
+87f8f26e6e9178df0ab7419334d5a3db OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_SAD_16x.c
+1a8577646132ad9b63a1477fdaec2464 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Average_16x.c
+48529c4f70c7e954e832eece1aee57bd OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_SAD_8x.c
+252977764d4f38282b6a56c59ccf4f09 OX000-SW-98010-r0p0-00bet1/vc/comm/src/armVCCOMM_SAD.c
+cc78cfaed9502c2e0282c91fb95eeac4 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Average_8x.c
+e468751c15a581ebd22da031e22117d1 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16.c
+3f448d191eaeb82ecb7e27ef8ba27875 OX000-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c
+b1291c307808631fa833684abb9c34ce ARM_DELIVERY_97412.TXT
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM.h
new file mode 100644
index 0000000..2ed86a4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM.h
@@ -0,0 +1,785 @@
+/**
+ *
+ * File Name: armCOMM.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM.h
+ * Brief: Declares Common APIs/Data Types used across OpenMAX API's
+ *
+ */
+
+
+#ifndef _armCommon_H_
+#define _armCommon_H_
+
+#include "omxtypes.h"
+
+typedef struct
+{
+ OMX_F32 Re; /** Real part */
+ OMX_F32 Im; /** Imaginary part */
+
+} OMX_FC32; /** single precision floating point complex number */
+
+typedef struct
+{
+ OMX_F64 Re; /** Real part */
+ OMX_F64 Im; /** Imaginary part */
+
+} OMX_FC64; /** double precision floating point complex number */
+
+
+/* Used by both IP and IC domains for 8x8 JPEG blocks. */
+typedef OMX_S16 ARM_BLOCK8x8[64];
+
+
+#include "armOMX.h"
+
+#define armPI (OMX_F64)(3.1415926535897932384626433832795)
+
+/***********************************************************************/
+
+/* Compiler extensions */
+#ifdef ARM_DEBUG
+/* debug version */
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#define armError(str) {printf((str)); printf("\n"); exit(-1);}
+#define armWarn(str) {printf((str)); printf("\n");}
+#define armIgnore(a) ((void)a)
+#define armAssert(a) assert(a)
+#else
+/* release version */
+#define armError(str) ((void) (str))
+#define armWarn(str) ((void) (str))
+#define armIgnore(a) ((void) (a))
+#define armAssert(a) ((void) (a))
+#endif /* ARM_DEBUG */
+
+/* Arithmetic operations */
+
+#define armMin(a,b) ( (a) > (b) ? (b):(a) )
+#define armMax(a,b) ( (a) > (b) ? (a):(b) )
+#define armAbs(a) ( (a) < 0 ? -(a):(a) )
+
+/* Alignment operation */
+
+#define armAlignToBytes(Ptr,N) (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) ))
+#define armAlignTo2Bytes(Ptr) armAlignToBytes(Ptr,2)
+#define armAlignTo4Bytes(Ptr) armAlignToBytes(Ptr,4)
+#define armAlignTo8Bytes(Ptr) armAlignToBytes(Ptr,8)
+#define armAlignTo16Bytes(Ptr) armAlignToBytes(Ptr,16)
+
+/* Error and Alignment check */
+
+#define armRetArgErrIf(condition, code) if(condition) { return (code); }
+#define armRetDataErrIf(condition, code) if(condition) { return (code); }
+
+#ifndef ALIGNMENT_DOESNT_MATTER
+#define armIsByteAligned(Ptr,N) ((((int)(Ptr)) % N)==0)
+#define armNotByteAligned(Ptr,N) ((((int)(Ptr)) % N)!=0)
+#else
+#define armIsByteAligned(Ptr,N) (1)
+#define armNotByteAligned(Ptr,N) (0)
+#endif
+
+#define armIs2ByteAligned(Ptr) armIsByteAligned(Ptr,2)
+#define armIs4ByteAligned(Ptr) armIsByteAligned(Ptr,4)
+#define armIs8ByteAligned(Ptr) armIsByteAligned(Ptr,8)
+#define armIs16ByteAligned(Ptr) armIsByteAligned(Ptr,16)
+
+#define armNot2ByteAligned(Ptr) armNotByteAligned(Ptr,2)
+#define armNot4ByteAligned(Ptr) armNotByteAligned(Ptr,4)
+#define armNot8ByteAligned(Ptr) armNotByteAligned(Ptr,8)
+#define armNot16ByteAligned(Ptr) armNotByteAligned(Ptr,16)
+#define armNot32ByteAligned(Ptr) armNotByteAligned(Ptr,32)
+
+/**
+ * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value);
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16/OMX_U32 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value);
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value);
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] var Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT -- returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ */
+
+OMX_INT armSignCheck (OMX_S16 var);
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 -- returns clipped value
+ */
+
+OMX_S32 armClip (
+ OMX_INT min,
+ OMX_INT max,
+ OMX_S32 src
+ );
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 -- returns clipped value
+ */
+
+OMX_F32 armClip_F32 (
+ OMX_F32 min,
+ OMX_F32 max,
+ OMX_F32 src
+ );
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding
+ * integer value. Takes care of rounding while clipping the final
+ * value.
+ *
+ * Parameters:
+ * [in] v Number to be operated upon
+ * [in] shift Divides the input "v" by "2^shift"
+ * [in] satBits Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 -- returns "shifted" saturated value
+ */
+
+OMX_U32 armShiftSat_F32(
+ OMX_F32 v,
+ OMX_INT shift,
+ OMX_INT satBits
+ );
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * This function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize);
+
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry First entry
+ * [in] sEntry second entry
+ * [in] tEntry Third entry
+ *
+ * Return Value:
+ * OMX_S32 -- returns the median value
+ */
+
+OMX_S32 armMedianOf3 (
+ OMX_S32 fEntry,
+ OMX_S32 sEntry,
+ OMX_S32 tEntry
+ );
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value Positive value
+ *
+ * Return Value:
+ * OMX_U8 -- returns the size of the positive value
+ */
+
+OMX_U8 armLogSize (
+ OMX_U16 value
+ );
+
+/***********************************************************************/
+ /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S32 armSatAdd_S32(
+ OMX_S32 Value1,
+ OMX_S32 Value2
+ );
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S64 armSatAdd_S64(
+ OMX_S64 Value1,
+ OMX_S64 Value2
+ );
+
+/** Function :armSatSub_S32()
+ *
+ * Description :
+ * Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatSub_S32(
+ OMX_S32 Value1,
+ OMX_S32 Value2
+ );
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ * accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ * [in] Mac Accumulator
+ *
+ * Return:
+ * [out] Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(
+ OMX_S32 Mac,
+ OMX_S16 Value1,
+ OMX_S16 Value2
+ );
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ * mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem First 32 bit Operand
+ * [in] filTap Second 16 bit Operand
+ * [in] mac Result of MAC operation
+ *
+ * Return:
+ * [out] mac Result of operation
+ *
+ **/
+
+OMX_S32 armSatMac_S16S32_S32(
+ OMX_S32 mac,
+ OMX_S32 delayElem,
+ OMX_S16 filTap );
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ * Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ * output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) )
+ *
+ * Parametrs:
+ * [in] input The input to be operated on
+ * [in] scaleFactor The shift number
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(
+ OMX_S32 input,
+ OMX_INT scaleFactor);
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(
+ OMX_S32 Value,
+ OMX_INT shift
+ );
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S64 armSatRoundLeftShift_S64(
+ OMX_S64 Value,
+ OMX_INT shift
+ );
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ * Returns the result of a S16 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(
+ OMX_S16 input1,
+ OMX_S32 input2);
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ * Returns the result of a S32 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatMulS32S32_S32(
+ OMX_S32 input1,
+ OMX_S32 input2);
+
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer.
+ * Half-integer values are rounded away from zero
+ * unless otherwise specified. For example 3//2 is rounded
+ * to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num Operand 1
+ * [in] Deno Operand 2
+ *
+ * Return:
+ * [out] Result of operation input1//input2
+ *
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno);
+
+
+/***********************************************************************/
+/*
+ * Debugging macros
+ *
+ */
+
+
+/*
+ * Definition of output stream - change to stderr if necessary
+ */
+#define DEBUG_STREAM stdout
+
+/*
+ * Debug printf macros, one for each argument count.
+ * Add more if needed.
+ */
+#ifdef DEBUG_ON
+#include <stdio.h>
+
+#define DEBUG_PRINTF_0(a) fprintf(DEBUG_STREAM, a)
+#define DEBUG_PRINTF_1(a, b) fprintf(DEBUG_STREAM, a, b)
+#define DEBUG_PRINTF_2(a, b, c) fprintf(DEBUG_STREAM, a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d) fprintf(DEBUG_STREAM, a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e) fprintf(DEBUG_STREAM, a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f) fprintf(DEBUG_STREAM, a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#else /* DEBUG_ON */
+#define DEBUG_PRINTF_0(a)
+#define DEBUG_PRINTF_1(a, b)
+#define DEBUG_PRINTF_2(a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#endif /* DEBUG_ON */
+
+
+/*
+ * Domain and sub domain definitions
+ *
+ * In order to turn on debug for an entire domain or sub-domain
+ * at compile time, one of the DEBUG_DOMAIN_* below may be defined,
+ * which will activate debug in all of the defines it contains.
+ */
+
+#ifdef DEBUG_DOMAIN_AC
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT
+#define DEBUG_OMXACAAC_DECODEDATSTRELT
+#define DEBUG_OMXACAAC_DECODEFILLELT
+#define DEBUG_OMXACAAC_DECODEISSTEREO_S32
+#define DEBUG_OMXACAAC_DECODEMSPNS_S32
+#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I
+#define DEBUG_OMXACAAC_DECODEPRGCFGELT
+#define DEBUG_OMXACAAC_DECODETNS_S32_I
+#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32
+#define DEBUG_OMXACAAC_ENCODETNS_S32_I
+#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32
+#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32
+#define DEBUG_OMXACAAC_MDCTFWD_S32
+#define DEBUG_OMXACAAC_MDCTINV_S32_S16
+#define DEBUG_OMXACAAC_NOISELESSDECODE
+#define DEBUG_OMXACAAC_QUANTINV_S32_I
+#define DEBUG_OMXACAAC_UNPACKADIFHEADER
+#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER
+#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODE_S32
+#define DEBUG_OMXACMP3_MDCTINV_S32
+#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I
+#define DEBUG_OMXACMP3_REQUANTIZE_S32_I
+#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16
+#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER
+#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8
+#define DEBUG_OMXACMP3_UNPACKSIDEINFO
+#endif /* DEBUG_DOMAIN_AC */
+
+
+#ifdef DEBUG_DOMAIN_VC
+#define DEBUG_OMXVCM4P10_AVERAGE_16X
+#define DEBUG_OMXVCM4P10_AVERAGE_4X
+#define DEBUG_OMXVCM4P10_AVERAGE_8X
+#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX
+#define DEBUG_OMXVCM4P10_EXPANDFRAME
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R
+#define DEBUG_OMXVCM4P10_SADQUAR_16X
+#define DEBUG_OMXVCM4P10_SADQUAR_4X
+#define DEBUG_OMXVCM4P10_SADQUAR_8X
+#define DEBUG_OMXVCM4P10_SAD_16X
+#define DEBUG_OMXVCM4P10_SAD_4X
+#define DEBUG_OMXVCM4P10_SAD_8X
+#define DEBUG_OMXVCM4P10_SATD_4X4
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16
+#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_FINDMVPRED
+#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_LIMITMVTORECT
+#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB
+#define DEBUG_OMXVCM4P2_PADMBGRAY_U8
+#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8
+#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8
+#define DEBUG_OMXVCM4P2_PADMV
+#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA
+#endif /* DEBUG_DOMAIN_VC */
+
+
+#ifdef DEBUG_DOMAIN_IC
+/* To be filled in */
+#endif /* DEBUG_DOMAIN_IC */
+
+
+#ifdef DEBUG_DOMAIN_SP
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S32
+#define DEBUG_OMXACSP_COPY_S16
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_DOTPROD_S16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32
+#define DEBUG_OMXACSP_FFTINIT_C_SC16
+#define DEBUG_OMXACSP_FFTINIT_C_SC32
+#define DEBUG_OMXACSP_FFTINIT_R_S16_S32
+#define DEBUG_OMXACSP_FFTINIT_R_S32
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIR_DIRECT_S16
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_DIRECT_S16
+#endif /* DEBUG_DOMAIN_SP */
+
+
+#ifdef DEBUG_DOMAIN_IP
+#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS
+#define DEBUG_OMXIPBM_COPY_U8_C1R
+#define DEBUG_OMXIPBM_COPY_U8_C3R
+#define DEBUG_OMXIPBM_MIRROR_U8_C1R
+#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS
+#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R
+#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R
+#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64
+#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64
+#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64
+#define DEBUG_OMXIPPP_MOMENTINIT_S64
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R
+#endif /* DEBUG_DOMAIN_IP */
+
+
+#endif /* _armCommon_H_ */
+
+/*End of File*/
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Bitstream.h
new file mode 100644
index 0000000..4f9bc3b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Bitstream.h
@@ -0,0 +1,212 @@
+/**
+ *
+ * File Name: armCOMM_Bitstream.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM_Bitstream.h
+ * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders.
+ *
+ */
+
+#ifndef _armCodec_H_
+#define _armCodec_H_
+
+#include "omxtypes.h"
+
+typedef struct {
+ OMX_U8 codeLen;
+ OMX_U32 codeWord;
+} ARM_VLC32;
+
+/* The above should be renamed as "ARM_VLC32" */
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] N=1...32
+ *
+ * Returns Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N=1..32
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ * Returns Value
+ */
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset);
+
+/**
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N);
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] pCodeBook
+ *
+ * [out] **ppBitStream
+ * [out] *pOffset
+ *
+ * Returns : Code Book Index if successfull.
+ * : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails.
+ **/
+
+#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF)
+
+OMX_U16 armUnPackVLC32(
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ const ARM_VLC32 *pCodeBook
+);
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] codeWord Code word that need to be inserted in to the
+ * bitstream
+ * [in] codeLength Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ OMX_U32 codeWord,
+ OMX_INT codeLength
+);
+
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pBitOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] code VLC code word that need to be inserted in to the
+ * bitstream
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackVLC32 (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ ARM_VLC32 code
+);
+
+#endif /*_armCodec_H_*/
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Version.h
new file mode 100644
index 0000000..e99a450
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armCOMM_Version.h
@@ -0,0 +1,43 @@
+/* Guard the header against multiple inclusion. */
+#ifndef __ARM_COMM_VERSION_H__
+#define __ARM_COMM_VERSION_H__
+
+
+/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */
+#define OMX_VERSION 102
+
+/* We need to define these macros in order to convert a #define number into a #define string. */
+#define ARM_QUOTE(a) #a
+#define ARM_INDIRECT(A) ARM_QUOTE(A)
+
+/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */
+#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION)
+
+
+/* Define this in order to turn on ARM version/release/build strings in each domain */
+#define ARM_INCLUDE_VERSION_DESCRIPTIONS
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+ extern const char * const omxAC_VersionDescription;
+ extern const char * const omxIC_VersionDescription;
+ extern const char * const omxIP_VersionDescription;
+ extern const char * const omxSP_VersionDescription;
+ extern const char * const omxVC_VersionDescription;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
+
+
+/* The following entries should be automatically updated by the release script */
+/* They are used in the ARM version strings defined for each domain. */
+
+/* The release tag associated with this release of the library. - used for source and object releases */
+#define OMX_ARM_RELEASE_TAG "r0p0-00bet1"
+
+/* The ARM architecture used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_ARCHITECTURE "ANSI C"
+
+/* The ARM Toolchain used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_TOOLCHAIN "ARM RVCT 3.1"
+
+
+#endif /* __ARM_COMM_VERSION_H__ */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armOMX.h
new file mode 100644
index 0000000..f629f72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/armOMX.h
@@ -0,0 +1,274 @@
+/*
+ *
+ * File Name: armOMX_ReleaseVersion.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * This file allows a version of the OMX DL libraries to be built where some or
+ * all of the function names can be given a user specified suffix.
+ *
+ * You might want to use it where:
+ *
+ * - you want to rename a function "out of the way" so that you could replace
+ * a function with a different version (the original version would still be
+ * in the library just with a different name - so you could debug the new
+ * version by comparing it to the output of the old)
+ *
+ * - you want to rename all the functions to versions with a suffix so that
+ * you can include two versions of the library and choose between functions
+ * at runtime.
+ *
+ * e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8
+ *
+ */
+
+
+#ifndef _armOMX_H_
+#define _armOMX_H_
+
+
+/* We need to define these two macros in order to expand and concatenate the names */
+#define OMXCAT2BAR(A, B) omx ## A ## B
+#define OMXCATBAR(A, B) OMXCAT2BAR(A, B)
+
+/* Define the suffix to add to all functions - the default is no suffix */
+#define BARE_SUFFIX
+
+
+
+/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */
+#define OMXACAAC_SUFFIX BARE_SUFFIX
+#define OMXACMP3_SUFFIX BARE_SUFFIX
+#define OMXICJP_SUFFIX BARE_SUFFIX
+#define OMXIPBM_SUFFIX BARE_SUFFIX
+#define OMXIPCS_SUFFIX BARE_SUFFIX
+#define OMXIPPP_SUFFIX BARE_SUFFIX
+#define OMXSP_SUFFIX BARE_SUFFIX
+#define OMXVCCOMM_SUFFIX BARE_SUFFIX
+#define OMXVCM4P10_SUFFIX BARE_SUFFIX
+#define OMXVCM4P2_SUFFIX BARE_SUFFIX
+
+
+
+
+/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */
+#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX)
+#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX)
+#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX)
+
+
+#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX)
+
+#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+
+#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX)
+#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+
+#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX)
+
+#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX)
+
+#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX)
+#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX)
+#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX)
+
+#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX)
+
+#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX)
+
+#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX)
+
+
+#endif /* _armOMX_h_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/omxtypes.h
new file mode 100644
index 0000000..8b295a6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/api/omxtypes.h
@@ -0,0 +1,252 @@
+/**
+ * File: omxtypes.h
+ * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files.
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved.
+ *
+ * These materials are protected by copyright laws and contain material
+ * proprietary to the Khronos Group, Inc. You may use these materials
+ * for implementing Khronos specifications, without altering or removing
+ * any trademark, copyright or other notice from the specification.
+ *
+ * Khronos Group makes no, and expressly disclaims any, representations
+ * or warranties, express or implied, regarding these materials, including,
+ * without limitation, any implied warranties of merchantability or fitness
+ * for a particular purpose or non-infringement of any intellectual property.
+ * Khronos Group makes no, and expressly disclaims any, warranties, express
+ * or implied, regarding the correctness, accuracy, completeness, timeliness,
+ * and reliability of these materials.
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters,
+ * Contributors or Members or their respective partners, officers, directors,
+ * employees, agents or representatives be liable for any damages, whether
+ * direct, indirect, special or consequential damages for lost revenues,
+ * lost profits, or otherwise, arising from or in connection with these
+ * materials.
+ *
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc.
+ *
+ */
+
+#ifndef _OMXTYPES_H_
+#define _OMXTYPES_H_
+
+#include <limits.h>
+
+#define OMX_IN
+#define OMX_OUT
+#define OMX_INOUT
+
+
+typedef enum {
+
+ /* Mandatory return codes - use cases are explicitly described for each function */
+ OMX_Sts_NoErr = 0, /* No error, the function completed successfully */
+ OMX_Sts_Err = -2, /* Unknown/unspecified error */
+ OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */
+ OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */
+ OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */
+ OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */
+ OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */
+ OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */
+ OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */
+
+ /* Optional return codes - use cases are explicitly described for each function*/
+ OMX_Sts_BadArgErr = -5, /* Bad Arguments */
+
+ OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */
+ OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */
+ OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */
+ OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */
+ OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */
+ OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */
+
+ OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */
+ /* Huffman decoding operation terminated early. */
+ OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */
+ /* operation terminated early. */
+ OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */
+
+ OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */
+
+ OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/
+
+ } OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */
+
+
+/* OMX_U8 */
+#if UCHAR_MAX == 0xff
+typedef unsigned char OMX_U8;
+#elif USHRT_MAX == 0xff
+typedef unsigned short int OMX_U8;
+#else
+#error OMX_U8 undefined
+#endif
+
+
+/* OMX_S8 */
+#if SCHAR_MAX == 0x7f
+typedef signed char OMX_S8;
+#elif SHRT_MAX == 0x7f
+typedef signed short int OMX_S8;
+#else
+#error OMX_S8 undefined
+#endif
+
+
+/* OMX_U16 */
+#if USHRT_MAX == 0xffff
+typedef unsigned short int OMX_U16;
+#elif UINT_MAX == 0xffff
+typedef unsigned int OMX_U16;
+#else
+#error OMX_U16 undefined
+#endif
+
+
+/* OMX_S16 */
+#if SHRT_MAX == 0x7fff
+typedef signed short int OMX_S16;
+#elif INT_MAX == 0x7fff
+typedef signed int OMX_S16;
+#else
+#error OMX_S16 undefined
+#endif
+
+
+/* OMX_U32 */
+#if UINT_MAX == 0xffffffff
+typedef unsigned int OMX_U32;
+#elif LONG_MAX == 0xffffffff
+typedef unsigned long int OMX_U32;
+#else
+#error OMX_U32 undefined
+#endif
+
+
+/* OMX_S32 */
+#if INT_MAX == 0x7fffffff
+typedef signed int OMX_S32;
+#elif LONG_MAX == 0x7fffffff
+typedef long signed int OMX_S32;
+#else
+#error OMX_S32 undefined
+#endif
+
+
+/* OMX_U64 & OMX_S64 */
+#if defined( _WIN32 ) || defined ( _WIN64 )
+ typedef __int64 OMX_S64; /** Signed 64-bit integer */
+ typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */
+ #define OMX_MIN_S64 (0x8000000000000000i64)
+ #define OMX_MIN_U64 (0x0000000000000000i64)
+ #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64)
+ #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64)
+#else
+ typedef long long OMX_S64; /** Signed 64-bit integer */
+ typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */
+ #define OMX_MIN_S64 (0x8000000000000000LL)
+ #define OMX_MIN_U64 (0x0000000000000000LL)
+ #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL)
+ #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL)
+#endif
+
+
+/* OMX_SC8 */
+typedef struct
+{
+ OMX_S8 Re; /** Real part */
+ OMX_S8 Im; /** Imaginary part */
+
+} OMX_SC8; /** Signed 8-bit complex number */
+
+
+/* OMX_SC16 */
+typedef struct
+{
+ OMX_S16 Re; /** Real part */
+ OMX_S16 Im; /** Imaginary part */
+
+} OMX_SC16; /** Signed 16-bit complex number */
+
+
+/* OMX_SC32 */
+typedef struct
+{
+ OMX_S32 Re; /** Real part */
+ OMX_S32 Im; /** Imaginary part */
+
+} OMX_SC32; /** Signed 32-bit complex number */
+
+
+/* OMX_SC64 */
+typedef struct
+{
+ OMX_S64 Re; /** Real part */
+ OMX_S64 Im; /** Imaginary part */
+
+} OMX_SC64; /** Signed 64-bit complex number */
+
+
+/* OMX_F32 */
+typedef float OMX_F32; /** Single precision floating point,IEEE 754 */
+
+
+/* OMX_F64 */
+typedef double OMX_F64; /** Double precision floating point,IEEE 754 */
+
+
+/* OMX_INT */
+typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/
+
+
+#define OMX_MIN_S8 (-128)
+#define OMX_MIN_U8 0
+#define OMX_MIN_S16 (-32768)
+#define OMX_MIN_U16 0
+#define OMX_MIN_S32 (-2147483647-1)
+#define OMX_MIN_U32 0
+
+#define OMX_MAX_S8 (127)
+#define OMX_MAX_U8 (255)
+#define OMX_MAX_S16 (32767)
+#define OMX_MAX_U16 (0xFFFF)
+#define OMX_MAX_S32 (2147483647)
+#define OMX_MAX_U32 (0xFFFFFFFF)
+
+typedef void OMXVoid;
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+/** Defines the geometric position and size of a rectangle,
+ * where x,y defines the coordinates of the top left corner
+ * of the rectangle, with dimensions width in the x-direction
+ * and height in the y-direction */
+typedef struct {
+ OMX_INT x; /** x-coordinate of top left corner of rectangle */
+ OMX_INT y; /** y-coordinate of top left corner of rectangle */
+ OMX_INT width; /** Width in the x-direction. */
+ OMX_INT height; /** Height in the y-direction. */
+}OMXRect;
+
+
+/** Defines the geometric position of a point, */
+typedef struct
+{
+ OMX_INT x; /** x-coordinate */
+ OMX_INT y; /** y-coordinate */
+
+} OMXPoint;
+
+
+/** Defines the dimensions of a rectangle, or region of interest in an image */
+typedef struct
+{
+ OMX_INT width; /** Width of the rectangle, in the x-direction */
+ OMX_INT height; /** Height of the rectangle, in the y-direction */
+
+} OMXSize;
+
+#endif /* _OMXTYPES_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/build_vc.pl
new file mode 100755
index 0000000..f0b43e0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/build_vc.pl
@@ -0,0 +1,111 @@
+#!/usr/bin/perl
+#
+#
+# File Name: build_vc.pl
+# OpenMAX DL: v1.0.2
+# Revision: 9641
+# Date: Thursday, February 7, 2008
+#
+# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+#
+#
+#
+# This file builds the OpenMAX DL vc domain library omxVC.o.
+#
+
+use File::Spec;
+use strict;
+
+my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE);
+
+$CC = 'armcc';
+$CC_OPTS = '--no_unaligned_access --cpu ARM7TDMI -c';
+$AS = 'armasm';
+$AS_OPTS = '--no_unaligned_access --cpu ARM7TDMI';
+# $LIB = 'armlink';
+# $LIB_OPTS = '--partial -o';
+# $LIB_TYPE = '.o';
+$LIB = 'armar';
+$LIB_OPTS = '--create -r';
+$LIB_TYPE = '.a';
+
+#------------------------
+
+my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h);
+
+# Define the list of directories containing included header files.
+@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api);
+
+# Define the list of source files to compile.
+open(FILES, '<filelist_vc.txt') or die("Can't open source file list\n");
+@filelist = <FILES>;
+close(FILES);
+
+# Fix the file separators in the header paths
+foreach $h (@headerlist)
+{
+ $h = File::Spec->canonpath($h);
+}
+
+# Create the include path to be passed to the compiler
+$hd = '-I' . join(' -I', @headerlist);
+
+# Create the build directories "/lib/" and "/obj/" (if they are not there already)
+mkdir "obj", 0777 if (! -d "obj");
+mkdir "lib", 0777 if (! -d "lib");
+
+$objlist = '';
+
+# Compile each file
+foreach $file (@filelist)
+{
+ my $f;
+ my $base;
+ my $ext;
+ my $objfile;
+
+ chomp($file);
+ $file = File::Spec->canonpath($file);
+
+ (undef, undef, $f) = File::Spec->splitpath($file);
+ if(($base, $ext) = $f =~ /(.+)\.(\w)$/)
+ {
+ $objfile = File::Spec->catfile('obj', $base.'.o');
+
+ if($ext eq 'c')
+ {
+ $objlist .= "$objfile ";
+ $command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+ print "$command\n";
+ system($command);
+ }
+ elsif($ext eq 's')
+ {
+ $objlist .= "$objfile ";
+ $command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+ print "$command\n";
+ system($command);
+ }
+ else
+ {
+ print "Ignoring file: $f\n";
+ }
+ }
+ else
+ {
+ die "No file extension found: $f\n";
+ }
+}
+
+# Do the final link stage to create the libraries.
+$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE);
+$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist;
+print "$command\n";
+(system($command) == 0) and print "Build successful\n";
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/filelist_vc.txt
new file mode 100644
index 0000000..66f34ac
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/filelist_vc.txt
@@ -0,0 +1,123 @@
+./api/armCOMM.h
+./api/armCOMM_Bitstream.h
+./api/armCOMM_Version.h
+./api/armOMX_ReleaseVersion.h
+./api/omxtypes.h
+./src/armCOMM.c
+./src/armCOMM_Bitstream.c
+./vc/api/armVC.h
+./vc/api/omxVC.h
+./vc/comm/src/armVCCOMM_Average.c
+./vc/comm/src/armVCCOMM_SAD.c
+./vc/comm/src/omxVCCOMM_Average_16x.c
+./vc/comm/src/omxVCCOMM_Average_8x.c
+./vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c
+./vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c
+./vc/comm/src/omxVCCOMM_Copy16x16.c
+./vc/comm/src/omxVCCOMM_Copy8x8.c
+./vc/comm/src/omxVCCOMM_ExpandFrame_I.c
+./vc/comm/src/omxVCCOMM_LimitMVToRect.c
+./vc/comm/src/omxVCCOMM_SAD_16x.c
+./vc/comm/src/omxVCCOMM_SAD_8x.c
+./vc/m4p10/api/armVCM4P10_CAVLCTables.h
+./vc/m4p10/src/armVCM4P10_CAVLCTables.c
+./vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c
+./vc/m4p10/src/armVCM4P10_DeBlockPixel.c
+./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c
+./vc/m4p10/src/armVCM4P10_DequantTables.c
+./vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c
+./vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c
+./vc/m4p10/src/armVCM4P10_Interpolate_Luma.c
+./vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c
+./vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c
+./vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c
+./vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c
+./vc/m4p10/src/armVCM4P10_QuantTables.c
+./vc/m4p10/src/armVCM4P10_SADQuar.c
+./vc/m4p10/src/armVCM4P10_TransformResidual4x4.c
+./vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c
+./vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c
+./vc/m4p10/src/omxVCM4P10_Average_4x.c
+./vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c
+./vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c
+./vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c
+./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c
+./vc/m4p10/src/omxVCM4P10_GetVLCInfo.c
+./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateLuma.c
+./vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c
+./vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c
+./vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c
+./vc/m4p10/src/omxVCM4P10_MEGetBufSize.c
+./vc/m4p10/src/omxVCM4P10_MEInit.c
+./vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c
+./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c
+./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c
+./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c
+./vc/m4p10/src/omxVCM4P10_SAD_4x.c
+./vc/m4p10/src/omxVCM4P10_SADQuar_16x.c
+./vc/m4p10/src/omxVCM4P10_SADQuar_4x.c
+./vc/m4p10/src/omxVCM4P10_SADQuar_8x.c
+./vc/m4p10/src/omxVCM4P10_SATD_4x4.c
+./vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c
+./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c
+./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c
+./vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c
+./vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c
+./vc/m4p2/api/armVCM4P2_DCT_Table.h
+./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+./vc/m4p2/src/armVCM4P2_ACDCPredict.c
+./vc/m4p2/src/armVCM4P2_BlockMatch_Half.c
+./vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c
+./vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c
+./vc/m4p2/src/armVCM4P2_CompareMV.c
+./vc/m4p2/src/armVCM4P2_DCT_Table.c
+./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c
+./vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c
+./vc/m4p2/src/armVCM4P2_FillVLCBuffer.c
+./vc/m4p2/src/armVCM4P2_FillVLDBuffer.c
+./vc/m4p2/src/armVCM4P2_GetVLCBits.c
+./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+./vc/m4p2/src/armVCM4P2_PutVLCBits.c
+./vc/m4p2/src/armVCM4P2_SetPredDir.c
+./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+./vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c
+./vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c
+./vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c
+./vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c
+./vc/m4p2/src/omxVCM4P2_DCT8x8blk.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c
+./vc/m4p2/src/omxVCM4P2_EncodeMV.c
+./vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c
+./vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c
+./vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c
+./vc/m4p2/src/omxVCM4P2_FindMVpred.c
+./vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c
+./vc/m4p2/src/omxVCM4P2_MCReconBlock.c
+./vc/m4p2/src/omxVCM4P2_MEGetBufSize.c
+./vc/m4p2/src/omxVCM4P2_MEInit.c
+./vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c
+./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c
+./vc/m4p2/src/omxVCM4P2_QuantInter_I.c
+./vc/m4p2/src/omxVCM4P2_QuantIntra_I.c
+./vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c
+./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c
+./vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c
+./vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c
+./vc/src/armVC_Version.c \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM.c
new file mode 100644
index 0000000..e572a89
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM.c
@@ -0,0 +1,936 @@
+/**
+ *
+ * File Name: armCOMM.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Defines Common APIs used across OpenMAX API's
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+
+/***********************************************************************/
+ /* Miscellaneous Arithmetic operations */
+
+/**
+ * Function: armRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S16)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S16)(Value - .5);
+ }
+}
+
+/**
+ * Function: armRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S32)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S32)(Value - .5);
+ }
+}
+/**
+ * Function: armSatRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ Value += 0.5;
+
+ if(Value > (OMX_S16)OMX_MAX_S16 )
+ {
+ return (OMX_S16)OMX_MAX_S16;
+ }
+ else
+ {
+ return (OMX_S16)Value;
+ }
+ }
+ else
+ {
+ Value -= 0.5;
+
+ if(Value < (OMX_S16)OMX_MIN_S16 )
+ {
+ return (OMX_S16)OMX_MIN_S16;
+ }
+ else
+ {
+ return (OMX_S16)Value;
+ }
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ Value += 0.5;
+
+ if(Value > (OMX_S32)OMX_MAX_S32 )
+ {
+ return (OMX_S32)OMX_MAX_S32;
+ }
+ else
+ {
+ return (OMX_S32)Value;
+ }
+ }
+ else
+ {
+ Value -= 0.5;
+
+ if(Value < (OMX_S32)OMX_MIN_S32 )
+ {
+ return (OMX_S32)OMX_MIN_S32;
+ }
+ else
+ {
+ return (OMX_S32)Value;
+ }
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToU16
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value)
+{
+ Value += 0.5;
+
+ if(Value > (OMX_U16)OMX_MAX_U16 )
+ {
+ return (OMX_U16)OMX_MAX_U16;
+ }
+ else
+ {
+ return (OMX_U16)Value;
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U32 format
+ *
+ */
+
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value)
+{
+ Value += 0.5;
+
+ if(Value > (OMX_U32)OMX_MAX_U32 )
+ {
+ return (OMX_U32)OMX_MAX_U32;
+ }
+ else
+ {
+ return (OMX_U32)Value;
+ }
+}
+
+/**
+ * Function: armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a 64 bit int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S64 format
+ *
+ */
+
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S64)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S64)(Value - .5);
+ }
+}
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] var Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT -- returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ */
+
+OMX_INT armSignCheck (
+ OMX_S16 var
+)
+
+{
+ OMX_INT Sign;
+
+ if (var < 0)
+ {
+ Sign = -1;
+ }
+ else if ( var > 0)
+ {
+ Sign = 1;
+ }
+ else
+ {
+ Sign = 0;
+ }
+
+ return Sign;
+}
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 -- returns clipped value
+ */
+
+OMX_S32 armClip (
+ OMX_INT min,
+ OMX_INT max,
+ OMX_S32 src
+)
+
+{
+ if (src > max)
+ {
+ src = max;
+ }
+ else if (src < min)
+ {
+ src = min;
+ }
+
+ return src;
+}
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 -- returns clipped value
+ */
+
+OMX_F32 armClip_F32 (
+ OMX_F32 min,
+ OMX_F32 max,
+ OMX_F32 src
+)
+
+{
+ if (src > max)
+ {
+ src = max;
+ }
+ else if (src < min)
+ {
+ src = min;
+ }
+
+ return src;
+}
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding
+ * integer value. Takes care of rounding while clipping the final
+ * value.
+ *
+ * Parameters:
+ * [in] v Number to be operated upon
+ * [in] shift Divides the input "v" by "2^shift"
+ * [in] satBits Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 -- returns "shifted" saturated value
+ */
+
+OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits)
+{
+ OMX_U32 allOnes = (OMX_U32)(-1);
+ OMX_U32 maxV = allOnes >> (32-satBits);
+ OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift);
+ OMX_U32 vInt;
+ OMX_U32 vIntSat;
+
+ if(v <= 0)
+ return 0;
+
+ vShifted = v / shiftDiv;
+ vRounded = (OMX_F32)(vShifted + 0.5);
+ vInt = (OMX_U32)vRounded;
+ vIntSat = vInt;
+ if(vIntSat > maxV)
+ vIntSat = maxV;
+ return vIntSat;
+}
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * These function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(
+ OMX_U8 *pBuf1,
+ OMX_U8 *pBuf2,
+ OMX_INT elemSize
+ )
+{
+ OMX_INT i;
+ OMX_U8 temp;
+ armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr);
+
+ for(i = 0; i < elemSize; i++)
+ {
+ temp = *(pBuf1 + i);
+ *(pBuf1 + i) = *(pBuf2 + i);
+ *(pBuf2 + i) = temp;
+ }
+ return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry First entry
+ * [in] sEntry second entry
+ * [in] tEntry Third entry
+ *
+ * Return Value:
+ * OMX_S32 -- returns the median value
+ */
+
+OMX_S32 armMedianOf3 (
+ OMX_S32 fEntry,
+ OMX_S32 sEntry,
+ OMX_S32 tEntry
+)
+{
+ OMX_S32 a, b, c;
+
+ a = armMin (fEntry, sEntry);
+ b = armMax (fEntry, sEntry);
+ c = armMin (b, tEntry);
+ return (armMax (a, c));
+}
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value Positive value
+ *
+ * Return Value:
+ * OMX_U8 -- Returns the minimum number of bits required to represent the positive value.
+ This is the smallest k>=0 such that that value is less than (1<<k).
+ */
+
+OMX_U8 armLogSize (
+ OMX_U16 value
+)
+{
+ OMX_U8 i;
+ for ( i = 0; value > 0; value = value >> 1)
+ {
+ i++;
+ }
+ return i;
+}
+
+/***********************************************************************/
+ /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+ OMX_S32 Result;
+
+ Result = Value1 + Value2;
+
+ if( (Value1^Value2) >= 0)
+ {
+ /*Same sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ return OMX_MAX_S32;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S32;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2)
+{
+ OMX_S64 Result;
+
+ Result = Value1 + Value2;
+
+ if( (Value1^Value2) >= 0)
+ {
+ /*Same sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ Result = OMX_MAX_S64;
+ return Result;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S64;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/** Function :armSatSub_S32()
+ *
+ * Description :
+ * Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+ OMX_S32 Result;
+
+ Result = Value1 - Value2;
+
+ if( (Value1^Value2) < 0)
+ {
+ /*Opposite sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ return OMX_MAX_S32;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S32;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ * accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ * [in] Mac Accumulator
+ *
+ * Return:
+ * [out] Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2)
+{
+ OMX_S32 Result;
+
+ Result = (OMX_S32)(Value1*Value2);
+ Result = armSatAdd_S32( Mac , Result );
+
+ return Result;
+}
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ * mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem First 32 bit Operand
+ * [in] filTap Second 16 bit Operand
+ * [in] mac Result of MAC operation
+ *
+ * Return:
+ * [out] mac Result of operation
+ *
+ **/
+
+OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap )
+{
+
+ OMX_S32 result;
+
+ result = armSatMulS16S32_S32(filTap,delayElem);
+
+ if ( result > OMX_MAX_S16 )
+ {
+ result = OMX_MAX_S32;
+ }
+ else if( result < OMX_MIN_S16 )
+ {
+ result = OMX_MIN_S32;
+ }
+ else
+ {
+ result = delayElem * filTap;
+ }
+
+ mac = armSatAdd_S32(mac,result);
+
+ return mac;
+}
+
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ * Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ * output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) )
+ *
+ * Parametrs:
+ * [in] input The input to be operated on
+ * [in] shift The shift number
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift)
+{
+ input = armSatRoundLeftShift_S32(input,-shift);
+
+ if ( input > OMX_MAX_S16 )
+ {
+ return (OMX_S16)OMX_MAX_S16;
+ }
+ else if (input < OMX_MIN_S16)
+ {
+ return (OMX_S16)OMX_MIN_S16;
+ }
+ else
+ {
+ return (OMX_S16)input;
+ }
+
+}
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] Shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift)
+{
+ OMX_INT i;
+
+ if (Shift < 0)
+ {
+ Shift = -Shift;
+ Value = armSatAdd_S32(Value, (1 << (Shift - 1)));
+ Value = Value >> Shift;
+ }
+ else
+ {
+ for (i = 0; i < Shift; i++)
+ {
+ Value = armSatAdd_S32(Value, Value);
+ }
+ }
+ return Value;
+}
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift)
+{
+ OMX_INT i;
+
+ if (Shift < 0)
+ {
+ Shift = -Shift;
+ Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1)));
+ Value = Value >> Shift;
+ }
+ else
+ {
+ for (i = 0; i < Shift; i++)
+ {
+ Value = armSatAdd_S64(Value, Value);
+ }
+ }
+ return Value;
+}
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ * Returns the result of a S16 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2)
+{
+ OMX_S16 hi2,lo1;
+ OMX_U16 lo2;
+
+ OMX_S32 temp1,temp2;
+ OMX_S32 result;
+
+ lo1 = input1;
+
+ hi2 = ( input2 >> 16 );
+ lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 );
+
+ temp1 = hi2 * lo1;
+ temp2 = ( lo2* lo1 ) >> 16;
+
+ result = armSatAdd_S32(temp1,temp2);
+
+ return result;
+}
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ * Returns the result of a S32 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2)
+{
+ OMX_S16 hi1,hi2;
+ OMX_U16 lo1,lo2;
+
+ OMX_S32 temp1,temp2,temp3;
+ OMX_S32 result;
+
+ hi1 = ( input1 >> 16 );
+ lo1 = ( (OMX_U32)( input1 << 16 ) >> 16 );
+
+ hi2 = ( input2 >> 16 );
+ lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 );
+
+ temp1 = hi1 * hi2;
+ temp2 = ( hi1* lo2 ) >> 16;
+ temp3 = ( hi2* lo1 ) >> 16;
+
+ result = armSatAdd_S32(temp1,temp2);
+ result = armSatAdd_S32(result,temp3);
+
+ return result;
+}
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer.
+ * Half-integer values are rounded away from zero
+ * unless otherwise specified. For example 3//2 is rounded
+ * to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num Operand 1
+ * [in] Deno Operand 2
+ *
+ * Return:
+ * [out] Result of operation input1//input2
+ *
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno)
+{
+ OMX_F64 result;
+
+ result = ((OMX_F64)Num)/((OMX_F64)Deno);
+
+ if (result >= 0)
+ {
+ result += 0.5;
+ }
+ else
+ {
+ result -= 0.5;
+ }
+
+ return (OMX_S32)(result);
+}
+
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM_Bitstream.c
new file mode 100644
index 0000000..9ef9319
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/src/armCOMM_Bitstream.c
@@ -0,0 +1,329 @@
+/**
+ *
+ * File Name: armCOMM_Bitstream.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Defines bitstream encode and decode functions common to all codecs
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+
+/***************************************
+ * Fixed bit length Decode
+ ***************************************/
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] N=1...32
+ *
+ * Returns Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ armAssert(Offset>=0 && Offset<=7);
+ armAssert(N>=1 && N<=32);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Return N bits */
+ return Value >> (32-N);
+}
+
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N=1..32
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ * Returns Value
+ */
+
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ if(N == 0)
+ {
+ return 0;
+ }
+
+ armAssert(Offset>=0 && Offset<=7);
+ armAssert(N>=1 && N<=32);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Advance bitstream pointer by N bits */
+ Offset += N;
+ *ppBitStream = pBitStream + (Offset>>3);
+ *pOffset = Offset & 7;
+
+ /* Return N bits */
+ return Value >> (32-N);
+}
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset)
+{
+ if(*pOffset > 0)
+ {
+ *ppBitStream += 1;
+ *pOffset = 0;
+ }
+}
+
+/**
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N)
+{
+ OMX_INT Offset = *pOffset;
+ const OMX_U8 *pBitStream = *ppBitStream;
+
+ /* Advance bitstream pointer by N bits */
+ Offset += N;
+ *ppBitStream = pBitStream + (Offset>>3);
+ *pOffset = Offset & 7;
+}
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ *
+ * Parameters:
+ * [in] *pBitStream
+ * [in] *pOffset
+ * [in] pCodeBook
+ *
+ * [out] *pBitStream
+ * [out] *pOffset
+ *
+ * Returns : Code Book Index if successfull.
+ * : ARM_NO_CODEBOOK_INDEX = -1 if search fails.
+ **/
+#ifndef C_OPTIMIZED_IMPLEMENTATION
+
+OMX_U16 armUnPackVLC32(
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ const ARM_VLC32 *pCodeBook
+)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+ OMX_INT Index;
+
+ armAssert(Offset>=0 && Offset<=7);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Search through the codebook */
+ for (Index=0; pCodeBook->codeLen != 0; Index++)
+ {
+ if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen)))
+ {
+ Offset = Offset + pCodeBook->codeLen;
+ *ppBitStream = pBitStream + (Offset >> 3) ;
+ *pOffset = Offset & 7;
+
+ return Index;
+ }
+ pCodeBook++;
+ }
+
+ /* No code match found */
+ return ARM_NO_CODEBOOK_INDEX;
+}
+
+#endif
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] codeWord Code word that need to be inserted in to the
+ * bitstream
+ * [in] codeLength Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ OMX_U32 codeWord,
+ OMX_INT codeLength
+)
+{
+ OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ /* checking argument validity */
+ armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr);
+ armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr);
+
+ /* Prepare the first byte */
+ codeWord = codeWord << (32-codeLength);
+ Value = (pBitStream[0] >> (8-Offset)) << (8-Offset);
+ Value = Value | (codeWord >> (24+Offset));
+
+ /* Write out whole bytes */
+ while (8-Offset <= codeLength)
+ {
+ *pBitStream++ = (OMX_U8)Value;
+ codeWord = codeWord << (8-Offset);
+ codeLength = codeLength - (8-Offset);
+ Offset = 0;
+ Value = codeWord >> 24;
+ }
+
+ /* Write out final partial byte */
+ *pBitStream = (OMX_U8)Value;
+ *ppBitStream = pBitStream;
+ *pOffset = Offset + codeLength;
+
+ return OMX_Sts_NoErr;
+}
+
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pBitOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] code VLC code word that need to be inserted in to the
+ * bitstream
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackVLC32 (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ ARM_VLC32 code
+)
+{
+ return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen));
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/armVC.h
new file mode 100644
index 0000000..7fa7716
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/armVC.h
@@ -0,0 +1,1153 @@
+/**
+ *
+ * File Name: armVC.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVideo.h
+ * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain
+ *
+ */
+
+
+#ifndef _armVideo_H_
+#define _armVideo_H_
+
+#include "omxVC.h"
+#include "armCOMM_Bitstream.h"
+
+/**
+ * ARM specific state structure to hold Motion Estimation information.
+ */
+
+struct m4p2_MESpec
+{
+ OMXVCM4P2MEParams MEParams;
+ OMXVCM4P2MEMode MEMode;
+};
+
+struct m4p10_MESpec
+{
+ OMXVCM4P10MEParams MEParams;
+ OMXVCM4P10MEMode MEMode;
+};
+
+typedef struct m4p2_MESpec ARMVCM4P2_MESpec;
+typedef struct m4p10_MESpec ARMVCM4P10_MESpec;
+
+/**
+ * Function: armVCM4P2_CompareMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and SAD's to decide the
+ * best MV and SAD
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] mvX x coordinate of the candidate motion vector
+ * [in] mvY y coordinate of the candidate motion vector
+ * [in] candSAD Candidate SAD
+ * [in] bestMVX x coordinate of the best motion vector
+ * [in] bestMVY y coordinate of the best motion vector
+ * [in] bestSAD best SAD
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current sad is the best
+ * 0 to indicate that it is NOT the best SAD
+ */
+
+OMX_INT armVCM4P2_CompareMV (
+ OMX_S16 mvX,
+ OMX_S16 mvY,
+ OMX_INT candSAD,
+ OMX_S16 bestMVX,
+ OMX_S16 bestMVY,
+ OMX_INT bestSAD);
+
+/**
+ * Function: armVCM4P2_ACDCPredict
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected
+ * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the coefficient buffer which contains
+ * the quantized coefficient residuals (PQF) of the
+ * current block
+ * [in] pPredBufRow pointer to the coefficient row buffer
+ * [in] pPredBufCol pointer to the coefficient column buffer
+ * [in] curQP quantization parameter of the current block. curQP
+ * may equal to predQP especially when the current
+ * block and the predictor block are in the same
+ * macroblock.
+ * [in] predQP quantization parameter of the predictor block
+ * [in] predDir indicates the prediction direction which takes one
+ * of the following values:
+ * OMX_VIDEO_HORIZONTAL predict horizontally
+ * OMX_VIDEO_VERTICAL predict vertically
+ * [in] ACPredFlag a flag indicating if AC prediction should be
+ * performed. It is equal to ac_pred_flag in the bit
+ * stream syntax of MPEG-4
+ * [in] videoComp video component type (luminance, chrominance or
+ * alpha) of the current block
+ * [in] flag This flag defines the if one wants to use this functions to
+ * calculate PQF (set 1, prediction) or QF (set 0, reconstruction)
+ * [out] pPreACPredict pointer to the predicted coefficients buffer.
+ * Filled ONLY if it is not NULL
+ * [out] pSrcDst pointer to the coefficient buffer which contains
+ * the quantized coefficients (QF) of the current
+ * block
+ * [out] pPredBufRow pointer to the updated coefficient row buffer
+ * [out] pPredBufCol pointer to the updated coefficient column buffer
+ * [out] pSumErr pointer to the updated sum of the difference
+ * between predicted and unpredicted coefficients
+ * If this is NULL, do not update
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_ACDCPredict(
+ OMX_S16 * pSrcDst,
+ OMX_S16 * pPreACPredict,
+ OMX_S16 * pPredBufRow,
+ OMX_S16 * pPredBufCol,
+ OMX_INT curQP,
+ OMX_INT predQP,
+ OMX_INT predDir,
+ OMX_INT ACPredFlag,
+ OMXVCM4P2VideoComponent videoComp,
+ OMX_U8 flag,
+ OMX_INT *pSumErr
+);
+
+/**
+ * Function: armVCM4P2_SetPredDir
+ *
+ * Description:
+ * Performs detecting the prediction direction
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] blockIndex block index indicating the component type and
+ * position as defined in subclause 6.1.3.8, of ISO/IEC
+ * 14496-2. Furthermore, indexes 6 to 9 indicate the
+ * alpha blocks spatially corresponding to luminance
+ * blocks 0 to 3 in the same macroblock.
+ * [in] pCoefBufRow pointer to the coefficient row buffer
+ * [in] pQpBuf pointer to the quantization parameter buffer
+ * [out] predQP quantization parameter of the predictor block
+ * [out] predDir indicates the prediction direction which takes one
+ * of the following values:
+ * OMX_VIDEO_HORIZONTAL predict horizontally
+ * OMX_VIDEO_VERTICAL predict vertically
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_SetPredDir(
+ OMX_INT blockIndex,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_INT *predDir,
+ OMX_INT *predQP,
+ const OMX_U8 *pQpBuf
+);
+
+/**
+ * Function: armVCM4P2_EncodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs zigzag scanning and VLC encoding for one intra block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7.
+ * [in] pQDctBlkCoef pointer to the quantized DCT coefficient
+ * [in] predDir AC prediction direction, which is used to decide
+ * the zigzag scan pattern. This takes one of the
+ * following values:
+ * OMX_VIDEO_NONE AC prediction not used.
+ * Performs classical zigzag
+ * scan.
+ * OMX_VIDEO_HORIZONTAL Horizontal prediction.
+ * Performs alternate-vertical
+ * zigzag scan.
+ * OMX_VIDEO_VERTICAL Vertical prediction.
+ * Performs alternate-horizontal
+ * zigzag scan.
+ * [in] pattern block pattern which is used to decide whether
+ * this block is encoded
+ * [in] start start indicates whether the encoding begins with 0th element
+ * or 1st.
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_EncodeVLCZigzag_Intra(
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_DecodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bitstream buffer
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7].
+ * [in] predDir AC prediction direction which is used to decide
+ * the zigzag scan pattern. It takes one of the
+ * following values:
+ * OMX_VIDEO_NONE AC prediction not used;
+ * perform classical zigzag scan;
+ * OMX_VIDEO_HORIZONTAL Horizontal prediction;
+ * perform alternate-vertical
+ * zigzag scan;
+ * OMX_VIDEO_VERTICAL Vertical prediction;
+ * thus perform
+ * alternate-horizontal
+ * zigzag scan.
+ * [in] videoComp video component type (luminance, chrominance or
+ * alpha) of the current block
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with 0th element
+ * or 1st.
+ * [out] ppBitStream *ppBitStream is updated after the block is
+ * decoded, so that it points to the current byte
+ * in the bit stream buffer
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream
+ * [out] pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_FillVLDBuffer
+ *
+ * Description:
+ * Performs filling of the coefficient buffer according to the run, level
+ * and sign, also updates the index
+ *
+ * Parameters:
+ * [in] storeRun Stored Run value (count of zeros)
+ * [in] storeLevel Stored Level value (non-zero value)
+ * [in] sign Flag indicating the sign of level
+ * [in] last status of the last flag
+ * [in] pIndex pointer to coefficient index in 8x8 matrix
+ * [out] pIndex pointer to updated coefficient index in 8x8
+ * matrix
+ * [in] pZigzagTable pointer to the zigzag tables
+ * [out] pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLDBuffer(
+ OMX_U32 storeRun,
+ OMX_S16 * pDst,
+ OMX_S16 storeLevel,
+ OMX_U8 sign,
+ OMX_U8 last,
+ OMX_U8 * index,
+ const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_GetVLCBits
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with
+ * 0th element or 1st.
+ * [in/out] pLast pointer to last status flag
+ * [in] runBeginSingleLevelEntriesL0 The run value from which level
+ * will be equal to 1: last == 0
+ * [in] IndexBeginSingleLevelEntriesL0 Array index in the VLC table
+ * pointing to the
+ * runBeginSingleLevelEntriesL0
+ * [in] runBeginSingleLevelEntriesL1 The run value from which level
+ * will be equal to 1: last == 1
+ * [in] IndexBeginSingleLevelEntriesL1 Array index in the VLC table
+ * pointing to the
+ * runBeginSingleLevelEntriesL0
+ * [in] pRunIndexTableL0 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0 VLC table for last == 0
+ * [in] pRunIndexTableL1 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1 VLC table for last == 1
+ * [in] pLMAXTableL0 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out]pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_GetVLCBits (
+ const OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start,
+ OMX_U8 * pLast,
+ OMX_U8 runBeginSingleLevelEntriesL0,
+ OMX_U8 maxIndexForMultipleEntriesL0,
+ OMX_U8 maxRunForMultipleEntriesL1,
+ OMX_U8 maxIndexForMultipleEntriesL1,
+ const OMX_U8 * pRunIndexTableL0,
+ const ARM_VLC32 *pVlcTableL0,
+ const OMX_U8 * pRunIndexTableL1,
+ const ARM_VLC32 *pVlcTableL1,
+ const OMX_U8 * pLMAXTableL0,
+ const OMX_U8 * pLMAXTableL1,
+ const OMX_U8 * pRMAXTableL0,
+ const OMX_U8 * pRMAXTableL1,
+ const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_PutVLCBits
+ *
+ * Description:
+ * Checks the type of Escape Mode and put encoded bits for
+ * quantized DCT coefficients.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with
+ * 0th element or 1st.
+ * [in] maxStoreRunL0 Max store possible (considering last and inter/intra)
+ * for last = 0
+ * [in] maxStoreRunL1 Max store possible (considering last and inter/intra)
+ * for last = 1
+ * [in] maxRunForMultipleEntriesL0
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status) for last = 0
+ * [in] maxRunForMultipleEntriesL1
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status) for last = 1
+ * [in] pRunIndexTableL0 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0 VLC table for last == 0
+ * [in] pRunIndexTableL1 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1 VLC table for last == 1
+ * [in] pLMAXTableL0 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out] pQDctBlkCoef pointer to the quantized DCT coefficient
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+
+OMXResult armVCM4P2_PutVLCBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start,
+ OMX_U8 maxStoreRunL0,
+ OMX_U8 maxStoreRunL1,
+ OMX_U8 maxRunForMultipleEntriesL0,
+ OMX_U8 maxRunForMultipleEntriesL1,
+ const OMX_U8 * pRunIndexTableL0,
+ const ARM_VLC32 *pVlcTableL0,
+ const OMX_U8 * pRunIndexTableL1,
+ const ARM_VLC32 *pVlcTableL1,
+ const OMX_U8 * pLMAXTableL0,
+ const OMX_U8 * pLMAXTableL1,
+ const OMX_U8 * pRMAXTableL0,
+ const OMX_U8 * pRMAXTableL1,
+ const OMX_U8 * pZigzagTable
+);
+/**
+ * Function: armVCM4P2_FillVLCBuffer
+ *
+ * Description:
+ * Performs calculating the VLC bits depending on the escape type and insert
+ * the same in the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] run Run value (count of zeros) to be encoded
+ * [in] level Level value (non-zero value) to be encoded
+ * [in] runPlus Calculated as runPlus = run - (RMAX + 1)
+ * [in] levelPlus Calculated as
+ * levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] fMode Flag indicating the escape modes
+ * [in] last status of the last flag
+ * [in] maxRunForMultipleEntries
+ * The run value after which level will be equal to 1:
+ * (considering last and inter/intra status)
+ * [in] pRunIndexTable Run Index table defined in
+ * armVCM4P2_Huff_tables_VLC.h
+ * [in] pVlcTable VLC table defined in armVCM4P2_Huff_tables_VLC.h
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLCBuffer (
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_U32 run,
+ OMX_S16 level,
+ OMX_U32 runPlus,
+ OMX_S16 levelPlus,
+ OMX_U8 fMode,
+ OMX_U8 last,
+ OMX_U8 maxRunForMultipleEntries,
+ const OMX_U8 *pRunIndexTable,
+ const ARM_VLC32 *pVlcTable
+);
+
+/**
+ * Function: armVCM4P2_CheckVLCEscapeMode
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] run Run value (count of zeros) to be encoded
+ * [in] level Level value (non-zero value) to be encoded
+ * [in] runPlus Calculated as runPlus = run - (RMAX + 1)
+ * [in] levelPlus Calculated as
+ * levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] maxStoreRun Max store possible (considering last and inter/intra)
+ * [in] maxRunForMultipleEntries
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status)
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] pRunIndexTable Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c
+ * (considering last and inter/intra status)
+ *
+ *
+ * Return Value:
+ * Returns an Escape mode which can take values from 0 to 3
+ * 0 --> no escape mode, 1 --> escape type 1,
+ * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3
+ * in the MPEG ISO standard.
+ *
+ */
+
+OMX_U8 armVCM4P2_CheckVLCEscapeMode(
+ OMX_U32 run,
+ OMX_U32 runPlus,
+ OMX_S16 level,
+ OMX_S16 levelPlus,
+ OMX_U8 maxStoreRun,
+ OMX_U8 maxRunForMultipleEntries,
+ OMX_INT shortVideoHeader,
+ const OMX_U8 *pRunIndexTable
+);
+
+
+/**
+ * Function: armVCM4P2_BlockMatch_Integer
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated minimum SAD.
+ * Both the input and output motion vectors are represented using half-pixel units, and
+ * therefore a shift left or right by 1 bit may be required, respectively, to match the
+ * input or output MVs with other functions that either generate output MVs or expect
+ * input MVs represented using integer pixel units.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that
+ * corresponds to the location of the current macroblock in the current
+ * plane.
+ * [in] refWidth width of the reference plane
+ * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin.
+ * It's not limited to the image boundary, but depended on the padding. For example,
+ * if you pad 4 pixels outside the image border, then the value for left border
+ * can be -4
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array,
+ * 256 entries); must be aligned on an 8-byte boundary.
+ * [in] pCurrPointPos position of the current macroblock in the current plane
+ * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV)
+ * [in] searchRange search range for 16X16 integer block,the units of it is full pixel,the search range
+ * is the same in all directions.It is in inclusive of the boundary and specified in
+ * terms of integer pixel units.
+ * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated
+ * and then initialized using omxVCM4P2_MEInit prior to calling the block matching
+ * function.
+ * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8.
+ * [out] pDstMV pointer to estimated MV
+ * [out] pDstSAD pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error.
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Integer(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD,
+ OMX_U8 BlockSize
+);
+
+/**
+ * Function: armVCM4P2_BlockMatch_Half
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution. Returns the estimated
+ * motion vector and associated minimum SAD. This function estimates the half-pixel
+ * motion vector by interpolating the integer resolution motion vector referenced
+ * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated
+ * externally. The input parameters pSrcRefBuf and pSearchPointRefPos should be
+ * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16.
+ * The function BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB
+ * that corresponds to the location of the current macroblock in
+ * the current plane.
+ * [in] refWidth width of the reference plane
+ * [in] pRefRect reference plane valid region rectangle
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane
+ * (linear array, 256 entries); must be aligned on an 8-byte boundary.
+ * [in] pSearchPointRefPos position of the starting point for half pixel search (specified
+ * in terms of integer pixel units) in the reference plane.
+ * [in] rndVal rounding control bit for half pixel motion estimation;
+ * 0=rounding control disabled; 1=rounding control enabled
+ * [in] pSrcDstMV pointer to the initial MV estimate; typically generated during a prior
+ * 16X16 integer search and its unit is half pixel.
+ * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]pSrcDstMV pointer to estimated MV
+ * [out]pDstSAD pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Half(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD,
+ OMX_U8 BlockSize
+);
+/**
+ * Function: armVCM4P2_PadMV
+ *
+ * Description:
+ * Performs motion vector padding for a macroblock.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDstMV pointer to motion vector buffer of the current
+ * macroblock
+ * [in] pTransp pointer to transparent status buffer of the
+ * current macroblock
+ * [out] pSrcDstMV pointer to motion vector buffer in which the
+ * motion vectors have been padded
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_PadMV(
+ OMXVCMotionVector * pSrcDstMV,
+ OMX_U8 * pTransp
+);
+
+/*
+ * H.264 Specific Declarations
+ */
+/* Defines */
+#define ARM_M4P10_Q_OFFSET (15)
+
+
+/* Dequant tables */
+
+extern const OMX_U8 armVCM4P10_PosToVCol4x4[16];
+extern const OMX_U8 armVCM4P10_PosToVCol2x2[4];
+extern const OMX_U8 armVCM4P10_VMatrix[6][3];
+extern const OMX_U32 armVCM4P10_MFMatrix[6][3];
+
+
+/*
+ * Description:
+ * This function perform the work required by the OpenMAX
+ * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair.
+ * Since most of the code is common we share it here.
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream buffer
+ * [in] pOffset Pointer to current bit position in the byte pointed
+ * to by *ppBitStream
+ * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current
+ * block (4,15 or 16)
+ * [in] nTable Table number (0 to 4) according to the five columns
+ * of Table 9-5 in the H.264 spec
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients in
+ * this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+
+ */
+
+OMXResult armVCM4P10_DecodeCoeffsToPair(
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8**ppPosCoefbuf,
+ OMX_INT nTable,
+ OMX_INT sMaxNumCoeff
+ );
+
+/*
+ * Description:
+ * Perform DC style intra prediction, averaging upper and left block
+ *
+ * Parameters:
+ * [in] pSrcLeft Pointer to the buffer of 16 left coefficients:
+ * p[x, y] (x = -1, y = 0..3)
+ * [in] pSrcAbove Pointer to the buffer of 16 above coefficients:
+ * p[x,y] (x = 0..3, y = -1)
+ * [in] leftStep Step of left coefficient buffer
+ * [in] dstStep Step of the destination buffer
+ * [in] availability Neighboring 16x16 MB availability flag
+ * [out] pDst Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+void armVCM4P10_PredictIntraDC4x4(
+ const OMX_U8* pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ OMX_U8* pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMX_S32 availability
+);
+
+/*
+ * Description
+ * Unpack a 4x4 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position pair
+ * buffer output by CALVC decoding
+ * [out] ppSrc *ppSrc is updated to the start of next non empty block
+ * [out] pDst Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock4x4(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Unpack a 2x2 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position pair
+ * buffer output by CALVC decoding
+ * [out] ppSrc *ppSrc is updated to the start of next non empty block
+ * [out] pDst Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock2x2(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Deblock one boundary pixel
+ *
+ * Parameters:
+ * [in] pQ0 Pointer to pixel q0
+ * [in] Step Step between pixels q0 and q1
+ * [in] tC0 Edge threshold value
+ * [in] alpha alpha threshold value
+ * [in] beta beta threshold value
+ * [in] bS deblocking strength
+ * [in] ChromaFlag True for chroma blocks
+ * [out] pQ0 Deblocked pixels
+ *
+ */
+
+void armVCM4P10_DeBlockPixel(
+ OMX_U8 *pQ0, /* pointer to the pixel q0 */
+ int Step, /* step between pixels q0 and q1 */
+ int tC0, /* edge threshold value */
+ int alpha, /* alpha */
+ int beta, /* beta */
+ int bS, /* deblocking strength */
+ int ChromaFlag
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfHor_Luma
+ *
+ * Description:
+ * This function performs interpolation for horizontal 1/2-pel positions
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfHor_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfVer_Luma
+ *
+ * Description:
+ * This function performs interpolation for vertical 1/2-pel positions
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfVer_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfDiag_Luma
+ *
+ * Description:
+ * This function performs interpolation for (1/2, 1/2) positions
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the (1/2,1/2)-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfDiag_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/*
+ * Description:
+ * Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in] pSrc Source 4x4 block
+ * [out] pDst Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+/*
+ * Description:
+ * Forward Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in] pSrc Source 4x4 block
+ * [out] pDst Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+OMX_INT armVCM4P10_CompareMotionCostToMV (
+ OMX_S16 mvX,
+ OMX_S16 mvY,
+ OMXVCMotionVector diffMV,
+ OMX_INT candSAD,
+ OMXVCMotionVector *bestMV,
+ OMX_U32 nLamda,
+ OMX_S32 *pBestCost);
+
+/**
+ * Function: armVCCOMM_SAD
+ *
+ * Description:
+ * This function calculate the SAD for NxM blocks.
+ *
+ * Remarks:
+ *
+ * [in] pSrcOrg Pointer to the original block
+ * [in] iStepOrg Step of the original block buffer
+ * [in] pSrcRef Pointer to the reference block
+ * [in] iStepRef Step of the reference block buffer
+ * [in] iHeight Height of the block
+ * [in] iWidth Width of the block
+ * [out] pDstSAD Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCCOMM_SAD(
+ const OMX_U8* pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8* pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32* pDstSAD,
+ OMX_U32 iHeight,
+ OMX_U32 iWidth);
+
+/**
+ * Function: armVCCOMM_Average
+ *
+ * Description:
+ * This function calculates the average of two blocks and stores the result.
+ *
+ * Remarks:
+ *
+ * [in] pPred0 Pointer to the top-left corner of reference block 0
+ * [in] pPred1 Pointer to the top-left corner of reference block 1
+ * [in] iPredStep0 Step of reference block 0
+ * [in] iPredStep1 Step of reference block 1
+ * [in] iDstStep Step of the destination buffer
+ * [in] iWidth Width of the blocks
+ * [in] iHeight Height of the blocks
+ * [out] pDstPred Pointer to the destination buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCCOMM_Average (
+ const OMX_U8* pPred0,
+ const OMX_U8* pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8* pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_SADQuar
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the
+ * average of the other two (pSrcRef0 and pSrcRef1)
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to the original block
+ * [in] pSrcRef0 Pointer to reference block 0
+ * [in] pSrcRef1 Pointer to reference block 1
+ * [in] iSrcStep Step of the original block buffer
+ * [in] iRefStep0 Step of reference block 0
+ * [in] iRefStep1 Step of reference block 1
+ * [in] iHeight Height of the block
+ * [in] iWidth Width of the block
+ * [out] pDstSAD Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCM4P10_SADQuar(
+ const OMX_U8* pSrc,
+ const OMX_U8* pSrcRef0,
+ const OMX_U8* pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32* pDstSAD,
+ OMX_U32 iHeight,
+ OMX_U32 iWidth
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Chroma
+ *
+ * Description:
+ * This function performs interpolation for chroma components.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to
+ * interpolate in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [in] dx Fractional part of horizontal motion vector
+ * component in 1/8 pixel unit (0~7)
+ * [in] dy Fractional part of vertical motion vector
+ * component in 1/8 pixel unit (0~7)
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCM4P10_Interpolate_Chroma(
+ OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight,
+ OMX_U32 dx,
+ OMX_U32 dy
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Luma
+ *
+ * Description:
+ * This function performs interpolation for luma components.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to
+ * interpolate in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [in] dx Fractional part of horizontal motion vector
+ * component in 1/4 pixel unit (0~3)
+ * [in] dy Fractional part of vertical motion vector
+ * component in 1/4 pixel unit (0~3)
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+ OMXResult armVCM4P10_Interpolate_Luma(
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight,
+ OMX_U32 dx,
+ OMX_U32 dy
+);
+
+/**
+ * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantisation and integer inverse transformation for 4x4 block of
+ * residuals and update the pair buffer pointer to next non-empty block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position
+ * pair buffer output by CALVC decoding
+ * [in] pDC Pointer to the DC coefficient of this block, NULL
+ * if it doesn't exist
+ * [in] QP Quantization parameter
+ * [in] AC Flag indicating if at least one non-zero coefficient exists
+ * [out] pDst pointer to the reconstructed 4x4 block data
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx(
+ OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP,
+ OMX_S16* pDC,
+ int AC
+);
+
+#endif /*_armVideo_H_*/
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/omxVC.h
new file mode 100644
index 0000000..7b3cc72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/api/omxVC.h
@@ -0,0 +1,4381 @@
+/**
+ * File: omxVC.h
+ * Brief: OpenMAX DL v1.0.2 - Video Coding library
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved.
+ *
+ * These materials are protected by copyright laws and contain material
+ * proprietary to the Khronos Group, Inc. You may use these materials
+ * for implementing Khronos specifications, without altering or removing
+ * any trademark, copyright or other notice from the specification.
+ *
+ * Khronos Group makes no, and expressly disclaims any, representations
+ * or warranties, express or implied, regarding these materials, including,
+ * without limitation, any implied warranties of merchantability or fitness
+ * for a particular purpose or non-infringement of any intellectual property.
+ * Khronos Group makes no, and expressly disclaims any, warranties, express
+ * or implied, regarding the correctness, accuracy, completeness, timeliness,
+ * and reliability of these materials.
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters,
+ * Contributors or Members or their respective partners, officers, directors,
+ * employees, agents or representatives be liable for any damages, whether
+ * direct, indirect, special or consequential damages for lost revenues,
+ * lost profits, or otherwise, arising from or in connection with these
+ * materials.
+ *
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc.
+ *
+ */
+
+/* *****************************************************************************************/
+
+#ifndef _OMXVC_H_
+#define _OMXVC_H_
+
+#include "omxtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* 6.1.1.1 Motion Vectors */
+/* In omxVC, motion vectors are represented as follows: */
+
+typedef struct {
+ OMX_S16 dx;
+ OMX_S16 dy;
+} OMXVCMotionVector;
+
+
+
+/**
+ * Function: omxVCCOMM_Average_8x (6.1.3.1.1)
+ *
+ * Description:
+ * This function calculates the average of two 8x4, 8x8, or 8x16 blocks. The
+ * result is rounded according to (a+b+1)/2. The block average function can
+ * be used in conjunction with half-pixel interpolation to obtain quarter
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0
+ * iPredStep1 - Step of reference block 1
+ * iDstStep - Step of the destination buffer.
+ * iHeight - Height of the blocks
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 8-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pPred0, pPred1, or
+ * pDstPred.
+ * - pDstPred is not aligned on an 8-byte boundary.
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 8.
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 8.
+ * - iDstStep <= 0 or iDstStep is not a multiple of 8.
+ * - iHeight is not 4, 8, or 16.
+ *
+ */
+OMXResult omxVCCOMM_Average_8x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Average_16x (6.1.3.1.2)
+ *
+ * Description:
+ * This function calculates the average of two 16x16 or 16x8 blocks. The
+ * result is rounded according to (a+b+1)/2. The block average function can
+ * be used in conjunction with half-pixel interpolation to obtain quarter
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0
+ * iPredStep1 - Step of reference block 1
+ * iDstStep - Step of the destination buffer
+ * iHeight - Height of the blocks
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 16-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pPred0, pPred1, or
+ * pDstPred.
+ * - pDstPred is not aligned on a 16-byte boundary.
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 16.
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 16.
+ * - iDstStep <= 0 or iDstStep is not a multiple of 16.
+ * - iHeight is not 8 or 16.
+ *
+ */
+OMXResult omxVCCOMM_Average_16x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ExpandFrame_I (6.1.3.2.1)
+ *
+ * Description:
+ * This function expands a reconstructed frame in-place. The unexpanded
+ * source frame should be stored in a plane buffer with sufficient space
+ * pre-allocated for edge expansion, and the input frame should be located in
+ * the plane buffer center. This function executes the pixel expansion by
+ * replicating source frame edge pixel intensities in the empty pixel
+ * locations (expansion region) between the source frame edge and the plane
+ * buffer edge. The width/height of the expansion regions on the
+ * horizontal/vertical edges is controlled by the parameter iExpandPels.
+ *
+ * Input Arguments:
+ *
+ * pSrcDstPlane - pointer to the top-left corner of the frame to be
+ * expanded; must be aligned on an 8-byte boundary.
+ * iFrameWidth - frame width; must be a multiple of 8.
+ * iFrameHeight -frame height; must be a multiple of 8.
+ * iExpandPels - number of pixels to be expanded in the horizontal and
+ * vertical directions; must be a multiple of 8.
+ * iPlaneStep - distance, in bytes, between the start of consecutive lines
+ * in the plane buffer; must be larger than or equal to
+ * (iFrameWidth + 2 * iExpandPels).
+ *
+ * Output Arguments:
+ *
+ * pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the
+ * top-left corner of the plane); must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - pSrcDstPlane is NULL.
+ * - pSrcDstPlane is not aligned on an 8-byte boundary.
+ * - one of the following parameters is either equal to zero or is a
+ * non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or
+ * iExpandPels.
+ * - iPlaneStep < (iFrameWidth + 2 * iExpandPels).
+ *
+ */
+OMXResult omxVCCOMM_ExpandFrame_I (
+ OMX_U8 *pSrcDstPlane,
+ OMX_U32 iFrameWidth,
+ OMX_U32 iFrameHeight,
+ OMX_U32 iExpandPels,
+ OMX_U32 iPlaneStep
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Copy8x8 (6.1.3.3.1)
+ *
+ * Description:
+ * Copies the reference 8x8 block to the current block.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the reference block in the source frame; must be
+ * aligned on an 8-byte boundary.
+ * step - distance between the starts of consecutive lines in the reference
+ * frame, in bytes; must be a multiple of 8 and must be larger than
+ * or equal to 8.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination block; must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pSrc, pDst
+ * - one or more of the following pointers is not aligned on an 8-byte
+ * boundary: pSrc, pDst
+ * - step <8 or step is not a multiple of 8.
+ *
+ */
+OMXResult omxVCCOMM_Copy8x8 (
+ const OMX_U8 *pSrc,
+ OMX_U8 *pDst,
+ OMX_INT step
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Copy16x16 (6.1.3.3.2)
+ *
+ * Description:
+ * Copies the reference 16x16 macroblock to the current macroblock.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the reference macroblock in the source frame; must be
+ * aligned on a 16-byte boundary.
+ * step - distance between the starts of consecutive lines in the reference
+ * frame, in bytes; must be a multiple of 16 and must be larger
+ * than or equal to 16.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination macroblock; must be aligned on a
+ * 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pSrc, pDst
+ * - one or more of the following pointers is not aligned on a 16-byte
+ * boundary: pSrc, pDst
+ * - step <16 or step is not a multiple of 16.
+ *
+ */
+OMXResult omxVCCOMM_Copy16x16 (
+ const OMX_U8 *pSrc,
+ OMX_U8 *pDst,
+ OMX_INT step
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ComputeTextureErrorBlock_SAD (6.1.4.1.1)
+ *
+ * Description:
+ * Computes texture error of the block; also returns SAD.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the source plane; must be aligned on an 8-byte
+ * boundary.
+ * srcStep - step of the source plane
+ * pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned
+ * on an 8-byte boundary.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer, an 8x8 block; must be aligned
+ * on an 8-byte boundary.
+ * pDstSAD - pointer to the Sum of Absolute Differences (SAD) value
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following
+ * pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD.
+ * - pSrc is not 8-byte aligned.
+ * - SrcStep <= 0 or srcStep is not a multiple of 8.
+ * - pSrcRef is not 8-byte aligned.
+ * - pDst is not 8-byte aligned.
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_U8 *pSrcRef,
+ OMX_S16 *pDst,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ComputeTextureErrorBlock (6.1.4.1.2)
+ *
+ * Description:
+ * Computes the texture error of the block.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the source plane. This should be aligned on an 8-byte
+ * boundary.
+ * srcStep - step of the source plane
+ * pSrcRef - pointer to the reference buffer, an 8x8 block. This should be
+ * aligned on an 8-byte boundary.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer, an 8x8 block. This should be
+ * aligned on an 8-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * pSrc, pSrcRef, pDst.
+ * - pSrc is not 8-byte aligned.
+ * - SrcStep <= 0 or srcStep is not a multiple of 8.
+ * - pSrcRef is not 8-byte aligned.
+ * - pDst is not 8-byte aligned
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_U8 *pSrcRef,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCCOMM_LimitMVToRect (6.1.4.1.3)
+ *
+ * Description:
+ * Limits the motion vector associated with the current block/macroblock to
+ * prevent the motion compensated block/macroblock from moving outside a
+ * bounding rectangle as shown in Figure 6-1.
+ *
+ * Input Arguments:
+ *
+ * pSrcMV - pointer to the motion vector associated with the current block
+ * or macroblock
+ * pRectVOPRef - pointer to the bounding rectangle
+ * Xcoord, Ycoord - coordinates of the current block or macroblock
+ * size - size of the current block or macroblock; must be equal to 8 or
+ * 16.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to the limited motion vector
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcMV, pDstMV, or pRectVOPRef.
+ * - size is not equal to either 8 or 16.
+ * - the width or height of the bounding rectangle is less than
+ * twice the block size.
+ */
+OMXResult omxVCCOMM_LimitMVToRect (
+ const OMXVCMotionVector *pSrcMV,
+ OMXVCMotionVector *pDstMV,
+ const OMXRect *pRectVOPRef,
+ OMX_INT Xcoord,
+ OMX_INT Ycoord,
+ OMX_INT size
+);
+
+
+
+/**
+ * Function: omxVCCOMM_SAD_16x (6.1.4.1.4)
+ *
+ * Description:
+ * This function calculates the SAD for 16x16 and 16x8 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 16-byte
+ * boundary.
+ * iStepOrg - Step of the original block buffer
+ * pSrcRef - Pointer to the reference block
+ * iStepRef - Step of the reference block buffer
+ * iHeight - Height of the block
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pDstSAD, or pSrcRef
+ * - pSrcOrg is not 16-byte aligned.
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 16
+ * - iStepRef <= 0 or iStepRef is not a multiple of 16
+ * - iHeight is not 8 or 16
+ *
+ */
+OMXResult omxVCCOMM_SAD_16x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_SAD_8x (6.1.4.1.5)
+ *
+ * Description:
+ * This function calculates the SAD for 8x16, 8x8, 8x4 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 8-byte
+ * boundary.
+ * iStepOrg - Step of the original block buffer
+ * pSrcRef - Pointer to the reference block
+ * iStepRef - Step of the reference block buffer
+ * iHeight - Height of the block
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pDstSAD, or pSrcRef
+ * - pSrcOrg is not 8-byte aligned.
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 8
+ * - iStepRef <= 0 or iStepRef is not a multiple of 8
+ * - iHeight is not 4, 8 or 16
+ *
+ */
+OMXResult omxVCCOMM_SAD_8x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32*pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/* 6.2.1.1 Direction */
+/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan. */
+
+enum {
+ OMX_VC_NONE = 0,
+ OMX_VC_HORIZONTAL = 1,
+ OMX_VC_VERTICAL = 2
+};
+
+
+
+/* 6.2.1.2 Bilinear Interpolation */
+/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions. */
+
+enum {
+ OMX_VC_INTEGER_PIXEL = 0, /* case a */
+ OMX_VC_HALF_PIXEL_X = 1, /* case b */
+ OMX_VC_HALF_PIXEL_Y = 2, /* case c */
+ OMX_VC_HALF_PIXEL_XY = 3 /* case d */
+};
+
+
+
+/* 6.2.1.3 Neighboring Macroblock Availability */
+/* Neighboring macroblock availability is indicated using the following flags: */
+
+enum {
+ OMX_VC_UPPER = 1, /** above macroblock is available */
+ OMX_VC_LEFT = 2, /** left macroblock is available */
+ OMX_VC_CENTER = 4,
+ OMX_VC_RIGHT = 8,
+ OMX_VC_LOWER = 16,
+ OMX_VC_UPPER_LEFT = 32, /** above-left macroblock is available */
+ OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */
+ OMX_VC_LOWER_LEFT = 128,
+ OMX_VC_LOWER_RIGHT = 256
+};
+
+
+
+/* 6.2.1.4 Video Components */
+/* A data type that enumerates video components is defined as follows: */
+
+typedef enum {
+ OMX_VC_LUMINANCE, /** Luminance component */
+ OMX_VC_CHROMINANCE /** chrominance component */
+} OMXVCM4P2VideoComponent;
+
+
+
+/* 6.2.1.5 MacroblockTypes */
+/* A data type that enumerates macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_INTER = 0, /** P picture or P-VOP */
+ OMX_VC_INTER_Q = 1, /** P picture or P-VOP */
+ OMX_VC_INTER4V = 2, /** P picture or P-VOP */
+ OMX_VC_INTRA = 3, /** I and P picture, I- and P-VOP */
+ OMX_VC_INTRA_Q = 4, /** I and P picture, I- and P-VOP */
+ OMX_VC_INTER4V_Q = 5 /** P picture or P-VOP (H.263)*/
+} OMXVCM4P2MacroblockType;
+
+
+
+/* 6.2.1.6 Coordinates */
+/* Coordinates are represented as follows: */
+
+typedef struct {
+ OMX_INT x;
+ OMX_INT y;
+} OMXVCM4P2Coordinate;
+
+
+
+/* 6.2.1.7 Motion Estimation Algorithms */
+/* A data type that enumerates motion estimation search methods is defined as follows: */
+
+typedef enum {
+ OMX_VC_M4P2_FAST_SEARCH = 0, /** Fast motion search */
+ OMX_VC_M4P2_FULL_SEARCH = 1 /** Full motion search */
+} OMXVCM4P2MEMode;
+
+
+
+/* 6.2.1.8 Motion Estimation Parameters */
+/* A data structure containing control parameters for
+ * motion estimation functions is defined as follows:
+ */
+
+typedef struct {
+ OMX_INT searchEnable8x8; /** enables 8x8 search */
+ OMX_INT halfPelSearchEnable; /** enables half-pel resolution */
+ OMX_INT searchRange; /** search range */
+ OMX_INT rndVal; /** rounding control; 0-disabled, 1-enabled*/
+} OMXVCM4P2MEParams;
+
+
+
+/* 6.2.1.9 Macroblock Information */
+/* A data structure containing macroblock parameters for
+ * motion estimation functions is defined as follows:
+ */
+
+typedef struct {
+ OMX_S32 sliceId; /* slice number */
+ OMXVCM4P2MacroblockType mbType; /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */
+ OMX_S32 qp; /* quantization parameter*/
+ OMX_U32 cbpy; /* CBP Luma */
+ OMX_U32 cbpc; /* CBP Chroma */
+ OMXVCMotionVector pMV0[2][2]; /* motion vector, represented using 1/2-pel units,
+ * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1)
+ */
+ OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units,
+ * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1)
+ */
+ OMX_U8 pPredDir[2][2]; /* AC prediction direction:
+ * OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL
+ */
+} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr;
+
+
+
+/**
+ * Function: omxVCM4P2_FindMVpred (6.2.3.1.1)
+ *
+ * Description:
+ * Predicts a motion vector for the current block using the procedure
+ * specified in [ISO14496-2], subclause 7.6.5. The resulting predicted MV is
+ * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then
+ * the set of three MV candidates used for prediction is also returned,
+ * otherwise pDstMVPredMEis NULL upon return.
+ *
+ * Input Arguments:
+ *
+ * pSrcMVCurMB - pointer to the MV buffer associated with the current Y
+ * macroblock; a value of NULL indicates unavailability.
+ * pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located to the left of the current MB; set to NULL
+ * if there is no MB to the left.
+ * pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located above the current MB; set to NULL if there
+ * is no MB located above the current MB.
+ * pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located to the right and above the current MB; set
+ * to NULL if there is no MB located to the above-right.
+ * iBlk - the index of block in the current macroblock
+ * pDstMVPredME - MV candidate return buffer; if set to NULL then
+ * prediction candidate MVs are not returned and pDstMVPredME will
+ * be NULL upon function return; if pDstMVPredME is non-NULL then it
+ * must point to a buffer containing sufficient space for three
+ * return MVs.
+ *
+ * Output Arguments:
+ *
+ * pDstMVPred - pointer to the predicted motion vector
+ * pDstMVPredME - if non-NULL upon input then pDstMVPredME points upon
+ * return to a buffer containing the three motion vector candidates
+ * used for prediction as specified in [ISO14496-2], subclause
+ * 7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL
+ * upon output.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - the pointer pDstMVPred is NULL
+ * - the parameter iBlk does not fall into the range 0 <= iBlk<=3
+ *
+ */
+OMXResult omxVCM4P2_FindMVpred (
+ const OMXVCMotionVector *pSrcMVCurMB,
+ const OMXVCMotionVector *pSrcCandMV1,
+ const OMXVCMotionVector *pSrcCandMV2,
+ const OMXVCMotionVector *pSrcCandMV3,
+ OMXVCMotionVector *pDstMVPred,
+ OMXVCMotionVector *pDstMVPredME,
+ OMX_INT iBlk
+);
+
+
+
+/**
+ * Function: omxVCM4P2_IDCT8x8blk (6.2.3.2.1)
+ *
+ * Description:
+ * Computes a 2D inverse DCT for a single 8x8 block, as defined in
+ * [ISO14496-2].
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the start of the linearly arranged IDCT input buffer;
+ * must be aligned on a 16-byte boundary. According to
+ * [ISO14496-2], the input coefficient values should lie within the
+ * range [-2048, 2047].
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the start of the linearly arranged IDCT output buffer;
+ * must be aligned on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrc or pDst is NULL.
+ * - pSrc or pDst is not 16-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_IDCT8x8blk (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MEGetBufSize (6.2.4.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification
+ * structure for the following motion estimation functions:
+ * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P2MEMode
+ * pMEParams - motion estimation parameters
+ *
+ * Output Arguments:
+ *
+ * pSize - pointer to the number of bytes required for the specification
+ * structure
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - one or more of the following is true:
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for the
+ * parameter pMEParams->searchRange
+ *
+ */
+OMXResult omxVCM4P2_MEGetBufSize (
+ OMXVCM4P2MEMode MEmode,
+ const OMXVCM4P2MEParams *pMEParams,
+ OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MEInit (6.2.4.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the
+ * following motion estimation functions: BlockMatch_Integer_8x8,
+ * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the
+ * specification structure *pMESpec must be allocated prior to calling the
+ * function, and should be aligned on a 4-byte boundary. Following
+ * initialization by this function, the vendor-specific structure *pMESpec
+ * should contain an implementation-specific representation of all motion
+ * estimation parameters received via the structure pMEParams, for example
+ * rndVal, searchRange, etc. The number of bytes required for the
+ * specification structure can be determined using the function
+ * omxVCM4P2_MEGetBufSize.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P2MEMode
+ * pMEParams - motion estimation parameters
+ * pMESpec - pointer to the uninitialized ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pMESpec - pointer to the initialized ME specification structure
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - one or more of the following is true:
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for the
+ * parameter pMEParams->searchRange
+ *
+ */
+OMXResult omxVCM4P2_MEInit (
+ OMXVCM4P2MEMode MEmode,
+ const OMXVCM4P2MEParams*pMEParams,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Integer_16x16 (6.2.4.2.1)
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated
+ * minimum SAD. Both the input and output motion vectors are represented using
+ * half-pixel units, and therefore a shift left or right by 1 bit may be
+ * required, respectively, to match the input or output MVs with other
+ * functions that either generate output MVs or expect input MVs represented
+ * using integer pixel units.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * MB that corresponds to the location of the current macroblock in
+ * the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - pointer to the valid reference plane rectangle; coordinates
+ * are specified relative to the image origin. Rectangle
+ * boundaries may extend beyond image boundaries if the image has
+ * been padded. For example, if padding extends 4 pixels beyond
+ * frame border, then the value for the left border could be set to
+ * -4.
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 256
+ * entries); must be aligned on a 16-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pCurrPointPos - position of the current macroblock in the current plane
+ * pSrcPreMV - pointer to predicted motion vector; NULL indicates no
+ * predicted MV
+ * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced
+ * by pSrcPreMV); may be set to NULL if unavailable.
+ * pMESpec - vendor-specific motion estimation specification structure;
+ * must have been allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling the block matching function.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or
+ * pMESpec, or
+ * - pSrcCurrBuf is not 16-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_16x16 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector*pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector*pDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Integer_8x8 (6.2.4.2.2)
+ *
+ * Description:
+ * Performs an 8x8 block search; estimates motion vector and associated
+ * minimum SAD. Both the input and output motion vectors are represented
+ * using half-pixel units, and therefore a shift left or right by 1 bit may be
+ * required, respectively, to match the input or output MVs with other
+ * functions that either generate output MVs or expect input MVs represented
+ * using integer pixel units.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * block that corresponds to the location of the current 8x8 block
+ * in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - pointer to the valid reference plane rectangle; coordinates
+ * are specified relative to the image origin. Rectangle
+ * boundaries may extend beyond image boundaries if the image has
+ * been padded.
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 128
+ * entries); must be aligned on an 8-byte boundary. The number of
+ * bytes between lines (step) is 16 bytes.
+ * pCurrPointPos - position of the current block in the current plane
+ * pSrcPreMV - pointer to predicted motion vector; NULL indicates no
+ * predicted MV
+ * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced
+ * by pSrcPreMV); may be set to NULL if unavailable.
+ * pMESpec - vendor-specific motion estimation specification structure;
+ * must have been allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling the block matching function.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or
+ * pMESpec, or
+ * - pSrcCurrBuf is not 8-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_8x8 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Half_16x16 (6.2.4.2.3)
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution. Returns the
+ * estimated motion vector and associated minimum SAD. This function
+ * estimates the half-pixel motion vector by interpolating the integer
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e.,
+ * the initial integer MV is generated externally. The input parameters
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of
+ * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function
+ * BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * macroblock that corresponds to the location of the current
+ * macroblock in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - reference plane valid region rectangle
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 256
+ * entries); must be aligned on a 16-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pSearchPointRefPos - position of the starting point for half pixel
+ * search (specified in terms of integer pixel units) in the
+ * reference plane, i.e., the reference position pointed to by the
+ * predicted motion vector.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ * pSrcDstMV - pointer to the initial MV estimate; typically generated
+ * during a prior 16X16 integer search; specified in terms of
+ * half-pixel units.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV.
+ * - pSrcCurrBuf is not 16-byte aligned, or
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_16x16 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Half_8x8 (6.2.4.2.4)
+ *
+ * Description:
+ * Performs an 8x8 block match with half-pixel resolution. Returns the
+ * estimated motion vector and associated minimum SAD. This function
+ * estimates the half-pixel motion vector by interpolating the integer
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e.,
+ * the initial integer MV is generated externally. The input parameters
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of
+ * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function
+ * BlockMatch_Integer_8x8 may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * block that corresponds to the location of the current 8x8 block
+ * in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - reference plane valid region rectangle
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 128
+ * entries); must be aligned on a 8-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pSearchPointRefPos - position of the starting point for half pixel
+ * search (specified in terms of integer pixel units) in the
+ * reference plane.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ * pSrcDstMV - pointer to the initial MV estimate; typically generated
+ * during a prior 8x8 integer search, specified in terms of
+ * half-pixel units.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV
+ * - pSrcCurrBuf is not 8-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_8x8 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MotionEstimationMB (6.2.4.3.1)
+ *
+ * Description:
+ * Performs motion search for a 16x16 macroblock. Selects best motion search
+ * strategy from among inter-1MV, inter-4MV, and intra modes. Supports
+ * integer and half pixel resolution.
+ *
+ * Input Arguments:
+ *
+ * pSrcCurrBuf - pointer to the top-left corner of the current MB in the
+ * original picture plane; must be aligned on a 16-byte boundary.
+ * The function does not expect source data outside the region
+ * bounded by the MB to be available; for example it is not
+ * necessary for the caller to guarantee the availability of
+ * pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB
+ * to be processed.
+ * srcCurrStep - width of the original picture plane, in terms of full
+ * pixels; must be a multiple of 16.
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * plane location corresponding to the location of the current
+ * macroblock in the current plane; must be aligned on a 16-byte
+ * boundary.
+ * srcRefStep - width of the reference picture plane, in terms of full
+ * pixels; must be a multiple of 16.
+ * pRefRect - reference plane valid region rectangle, specified relative to
+ * the image origin
+ * pCurrPointPos - position of the current macroblock in the current plane
+ * pMESpec - pointer to the vendor-specific motion estimation specification
+ * structure; must be allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling this function.
+ * pMBInfo - array, of dimension four, containing pointers to information
+ * associated with four nearby MBs:
+ * - pMBInfo[0] - pointer to left MB information
+ * - pMBInfo[1] - pointer to top MB information
+ * - pMBInfo[2] - pointer to top-left MB information
+ * - pMBInfo[3] - pointer to top-right MB information
+ * Any pointer in the array may be set equal to NULL if the
+ * corresponding MB doesn't exist. For each MB, the following structure
+ * members are used:
+ * - mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or
+ * OMX_VC_INTER4V
+ * - pMV0[2][2] - estimated motion vectors; represented
+ * in 1/2 pixel units
+ * - sliceID - number of the slice to which the MB belongs
+ * pSrcDstMBCurr - pointer to information structure for the current MB.
+ * The following entries should be set prior to calling the
+ * function: sliceID - the number of the slice the to which the
+ * current MB belongs. The structure elements cbpy and cbpc are
+ * ignored.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMBCurr - pointer to updated information structure for the current
+ * MB after MB-level motion estimation has been completed. The
+ * following structure members are updated by the ME function:
+ * - mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or
+ * OMX_VC_INTER4V.
+ * - pMV0[2][2] - estimated motion vectors; represented in
+ * terms of 1/2 pel units.
+ * - pMVPred[2][2] - predicted motion vectors; represented
+ * in terms of 1/2 pel units.
+ * The structure members cbpy and cbpc are not updated by the function.
+ * pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs
+ * for INTER4V
+ * pDstBlockSAD - pointer to an array of SAD values for each of the four
+ * 8x8 luma blocks in the MB. The block SADs are in scan order for
+ * each MB.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL: pSrcCurrBuf,
+ * pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra,
+ * pSrcDstMBCurr, or pDstSAD.
+ *
+ */
+OMXResult omxVCM4P2_MotionEstimationMB (
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 srcCurrStep,
+ const OMX_U8 *pSrcRefBuf,
+ OMX_S32 srcRefStep,
+ const OMXRect*pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ void *pMESpec,
+ const OMXVCM4P2MBInfoPtr *pMBInfo,
+ OMXVCM4P2MBInfo *pSrcDstMBCurr,
+ OMX_U16 *pDstSAD,
+ OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DCT8x8blk (6.2.4.4.1)
+ *
+ * Description:
+ * Computes a 2D forward DCT for a single 8x8 block, as defined in
+ * [ISO14496-2].
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the start of the linearly arranged input buffer; must
+ * be aligned on a 16-byte boundary. Input values (pixel
+ * intensities) are valid in the range [-255,255].
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the start of the linearly arranged output buffer; must
+ * be aligned on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, returned if:
+ * - pSrc or pDst is NULL.
+ * - pSrc or pDst is not 16-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_DCT8x8blk (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantIntra_I (6.2.4.4.2)
+ *
+ * Description:
+ * Performs quantization on intra block coefficients. This function supports
+ * bits_per_pixel == 8.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input intra block coefficients; must be aligned
+ * on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale).
+ * blockIndex - block index indicating the component type and position,
+ * valid in the range 0 to 5, as defined in [ISO14496-2], subclause
+ * 6.1.3.8.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (quantized) interblock coefficients.
+ * When shortVideoHeader==1, AC coefficients are saturated on the
+ * interval [-127, 127], and DC coefficients are saturated on the
+ * interval [1, 254]. When shortVideoHeader==0, AC coefficients
+ * are saturated on the interval [-2047, 2047].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrcDst is NULL.
+ * - blockIndex < 0 or blockIndex >= 10
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_QuantIntra_I (
+ OMX_S16 *pSrcDst,
+ OMX_U8 QP,
+ OMX_INT blockIndex,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInter_I (6.2.4.4.3)
+ *
+ * Description:
+ * Performs quantization on an inter coefficient block; supports
+ * bits_per_pixel == 8.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input inter block coefficients; must be aligned
+ * on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * shortVideoHeader - binary flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode, and
+ * shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (quantized) interblock coefficients.
+ * When shortVideoHeader==1, AC coefficients are saturated on the
+ * interval [-127, 127], and DC coefficients are saturated on the
+ * interval [1, 254]. When shortVideoHeader==0, AC coefficients
+ * are saturated on the interval [-2047, 2047].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrcDst is NULL.
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_QuantInter_I (
+ OMX_S16 *pSrcDst,
+ OMX_U8 QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_TransRecBlockCoef_intra (6.2.4.4.4)
+ *
+ * Description:
+ * Quantizes the DCT coefficients, implements intra block AC/DC coefficient
+ * prediction, and reconstructs the current intra block texture for prediction
+ * on the next frame. Quantized row and column coefficients are returned in
+ * the updated coefficient buffers.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the pixels of current intra block; must be aligned on
+ * an 8-byte boundary.
+ * pPredBufRow - pointer to the coefficient row buffer containing
+ * ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16.
+ * Coefficients are organized into blocks of eight as described
+ * below (Internal Prediction Coefficient Update Procedures). The
+ * DC coefficient is first, and the remaining buffer locations
+ * contain the quantized AC coefficients. Each group of eight row
+ * buffer elements combined with one element eight elements ahead
+ * contains the coefficient predictors of the neighboring block
+ * that is spatially above or to the left of the block currently to
+ * be decoded. A negative-valued DC coefficient indicates that this
+ * neighboring block is not INTRA-coded or out of bounds, and
+ * therefore the AC and DC coefficients are invalid. Pointer must
+ * be aligned on an 8-byte boundary.
+ * pPredBufCol - pointer to the prediction coefficient column buffer
+ * containing 16 elements of type OMX_S16. Coefficients are
+ * organized as described in section 6.2.2.5. Pointer must be
+ * aligned on an 8-byte boundary.
+ * pSumErr - pointer to a flag indicating whether or not AC prediction is
+ * required; AC prediction is enabled if *pSumErr >=0, but the
+ * value is not used for coefficient prediction, i.e., the sum of
+ * absolute differences starts from 0 for each call to this
+ * function. Otherwise AC prediction is disabled if *pSumErr < 0 .
+ * blockIndex - block index indicating the component type and position, as
+ * defined in [ISO14496-2], subclause 6.1.3.8.
+ * curQp - quantization parameter of the macroblock to which the current
+ * block belongs
+ * pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0]
+ * contains the quantization parameter associated with the 8x8
+ * block left of the current block (QPa), and pQpBuf[1] contains
+ * the quantization parameter associated with the 8x8 block above
+ * the current block (QPc). In the event that the corresponding
+ * block is outside of the VOP bound, the Qp value will not affect
+ * the intra prediction process, as described in [ISO14496-2],
+ * sub-clause 7.4.3.3, Adaptive AC Coefficient Prediction.
+ * srcStep - width of the source buffer; must be a multiple of 8.
+ * dstStep - width of the reconstructed destination buffer; must be a
+ * multiple of 16.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains
+ * the predicted DC coefficient; the remaining entries contain the
+ * quantized AC coefficients (without prediction). The pointer
+ * pDstmust be aligned on a 16-byte boundary.
+ * pRec - pointer to the reconstructed texture; must be aligned on an
+ * 8-byte boundary.
+ * pPredBufRow - pointer to the updated coefficient row buffer
+ * pPredBufCol - pointer to the updated coefficient column buffer
+ * pPreACPredict - if prediction is enabled, the parameter points to the
+ * start of the buffer containing the coefficient differences for
+ * VLC encoding. The entry pPreACPredict[0]indicates prediction
+ * direction for the current block and takes one of the following
+ * values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL. The entries
+ * pPreACPredict[1]-pPreACPredict[7]contain predicted AC
+ * coefficients. If prediction is disabled (*pSumErr<0) then the
+ * contents of this buffer are undefined upon return from the
+ * function
+ * pSumErr - pointer to the value of the accumulated AC coefficient errors,
+ * i.e., sum of the absolute differences between predicted and
+ * unpredicted AC coefficients
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: pSrc, pDst, pRec,
+ * pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr.
+ * - blockIndex < 0 or blockIndex >= 10;
+ * - curQP <= 0 or curQP >= 32.
+ * - srcStep, or dstStep <= 0 or not a multiple of 8.
+ * - pDst is not 16-byte aligned: .
+ * - At least one of the following pointers is not 8-byte aligned:
+ * pSrc, pRec.
+ *
+ * Note: The coefficient buffers must be updated in accordance with the
+ * update procedures defined in section in 6.2.2.
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_intra (
+ const OMX_U8 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U8 *pRec,
+ OMX_S16 *pPredBufRow,
+ OMX_S16 *pPredBufCol,
+ OMX_S16 *pPreACPredict,
+ OMX_INT *pSumErr,
+ OMX_INT blockIndex,
+ OMX_U8 curQp,
+ const OMX_U8 *pQpBuf,
+ OMX_INT srcStep,
+ OMX_INT dstStep,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_TransRecBlockCoef_inter (6.2.4.4.5)
+ *
+ * Description:
+ * Implements DCT, and quantizes the DCT coefficients of the inter block
+ * while reconstructing the texture residual. There is no boundary check for
+ * the bit stream buffer.
+ *
+ * Input Arguments:
+ *
+ * pSrc -pointer to the residuals to be encoded; must be aligned on an
+ * 16-byte boundary.
+ * QP - quantization parameter.
+ * shortVideoHeader - binary flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode, and
+ * shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the quantized DCT coefficients buffer; must be aligned
+ * on a 16-byte boundary.
+ * pRec - pointer to the reconstructed texture residuals; must be aligned
+ * on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is either NULL or
+ * not 16-byte aligned:
+ * - pSrc
+ * - pDst
+ * - pRec
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_inter (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_S16 *pRec,
+ OMX_U8 QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one
+ * intra block. Two versions of the function (DCVLC and ACVLC) are provided
+ * in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding".
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7.
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * predDir - AC prediction direction, which is used to decide the zigzag
+ * scan pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used.
+ * Performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction.
+ * Performs alternate-vertical zigzag scan.
+ * - OMX_VC_VERTICAL - Vertical prediction.
+ * Performs alternate-horizontal zigzag scan.
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance, chrominance) of the current
+ * block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded, so
+ * that it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pQDctBlkCoef.
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL.
+ * - VideoComp is not one component of enum OMXVCM4P2VideoComponent.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_IntraACVLC (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one
+ * intra block. Two versions of the function (DCVLC and ACVLC) are provided
+ * in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7.
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * predDir - AC prediction direction, which is used to decide the zigzag
+ * scan pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used.
+ * Performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction.
+ * Performs alternate-vertical zigzag scan.
+ * - OMX_VC_VERTICAL - Vertical prediction.
+ * Performs alternate-horizontal zigzag scan.
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded, so
+ * that it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pQDctBlkCoef.
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL.
+ * - VideoComp is not one component of enum OMXVCM4P2VideoComponent.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_Inter (6.2.4.5.3)
+ *
+ * Description:
+ * Performs classical zigzag scanning and VLC encoding for one inter block.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded so that
+ * it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments
+ * - At least one of the pointers: is NULL: ppBitStream, *ppBitStream,
+ * pBitOffset, pQDctBlkCoef
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_Inter (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeMV (6.2.4.5.4)
+ *
+ * Description:
+ * Predicts a motion vector for the current macroblock, encodes the
+ * difference, and writes the output to the stream buffer. The input MVs
+ * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie
+ * within the ranges associated with the input parameter fcodeForward, as
+ * described in [ISO14496-2], subclause 7.6.3. This function provides a
+ * superset of the functionality associated with the function
+ * omxVCM4P2_FindMVpred.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream buffer
+ * pBitOffset - index of the first free (next available) bit in the stream
+ * buffer referenced by *ppBitStream, valid in the range 0 to 7.
+ * pMVCurMB - pointer to the current macroblock motion vector; a value of
+ * NULL indicates unavailability.
+ * pSrcMVLeftMB - pointer to the source left macroblock motion vector; a
+ * value of NULLindicates unavailability.
+ * pSrcMVUpperMB - pointer to source upper macroblock motion vector; a
+ * value of NULL indicates unavailability.
+ * pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a
+ * value of NULL indicates unavailability.
+ * fcodeForward - an integer with values from 1 to 7; used in encoding
+ * motion vectors related to search range, as described in
+ * [ISO14496-2], subclause 7.6.3.
+ * MBType - macro block type, valid in the range 0 to 5
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - updated pointer to the current byte in the bit stream
+ * buffer
+ * pBitOffset - updated index of the next available bit position in stream
+ * buffer referenced by *ppBitStream
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pMVCurMB
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - fcodeForward <= 0, or fcodeForward > 7, or MBType < 0.
+ *
+ */
+OMXResult omxVCM4P2_EncodeMV (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMXVCMotionVector *pMVCurMB,
+ const OMXVCMotionVector*pSrcMVLeftMB,
+ const OMXVCMotionVector *pSrcMVUpperMB,
+ const OMXVCMotionVector *pSrcMVUpperRightMB,
+ OMX_INT fcodeForward,
+ OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodePadMV_PVOP (6.2.5.1.1)
+ *
+ * Description:
+ * Decodes and pads the four motion vectors associated with a non-intra P-VOP
+ * macroblock. For macroblocks of type OMX_VC_INTER4V, the output MV is
+ * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for
+ * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to
+ * all four output MV buffer entries.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7].
+ * pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the
+ * motion vector buffers of the macroblocks specially at the left,
+ * upper, and upper-right side of the current macroblock,
+ * respectively; a value of NULL indicates unavailability. Note:
+ * Any neighborhood macroblock outside the current VOP or video
+ * packet or outside the current GOB (when short_video_header is
+ * 1 ) for which gob_header_empty is 0 is treated as
+ * transparent, according to [ISO14496-2], subclause 7.6.5.
+ * fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream
+ * syntax
+ * MBType - the type of the current macroblock. If MBType is not equal to
+ * OMX_VC_INTER4V, the destination motion vector buffer is still
+ * filled with the same decoded vector.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDstMVCurMB - pointer to the motion vector buffer for the current
+ * macroblock; contains four decoded motion vectors
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB
+ * - *pBitOffset exceeds [0,7]
+ * - fcodeForward exceeds (0,7]
+ * - MBType less than zero
+ * - motion vector buffer is not 4-byte aligned.
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodePadMV_PVOP (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMXVCMotionVector *pSrcMVLeftMB,
+ OMXVCMotionVector*pSrcMVUpperMB,
+ OMXVCMotionVector *pSrcMVUpperRightMB,
+ OMXVCMotionVector*pDstMVCurMB,
+ OMX_INT fcodeForward,
+ OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients
+ * for one intra block. Two versions of the function (DCVLC and ACVLC) are
+ * provided in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the
+ * bitstream buffer
+ * pBitOffset - pointer to the bit position in the current byte referenced
+ * by *ppBitStream. The parameter *pBitOffset is valid in the
+ * range [0-7].
+ * Bit Position in one byte: |Most Least|
+ * *pBitOffset |0 1 2 3 4 5 6 7|
+ * predDir - AC prediction direction; used to select the zigzag scan
+ * pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used;
+ * performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction;
+ * performs alternate-vertical zigzag scan;
+ * - OMX_VC_VERTICAL - Vertical prediction;
+ * performs alternate-horizontal zigzag scan.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated such that it points to the current
+ * bit position in the byte pointed by *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDst
+ * - *pBitOffset exceeds [0,7]
+ * - preDir exceeds [0,2]
+ * - pDst is not 4-byte aligned
+ * OMX_Sts_Err - if:
+ * - In DecodeVLCZigzag_IntraDCVLC, dc_size > 12
+ * - At least one of mark bits equals zero
+ * - Illegal stream encountered; code cannot be located in VLC table
+ * - Forbidden code encountered in the VLC FLC table.
+ * - The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_IntraACVLC (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients
+ * for one intra block. Two versions of the function (DCVLC and ACVLC) are
+ * provided in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the
+ * bitstream buffer
+ * pBitOffset - pointer to the bit position in the current byte referenced
+ * by *ppBitStream. The parameter *pBitOffset is valid in the
+ * range [0-7]. Bit Position in one byte: |Most Least| *pBitOffset
+ * |0 1 2 3 4 5 6 7|
+ * predDir - AC prediction direction; used to select the zigzag scan
+ * pattern; takes one of the following values: OMX_VC_NONE - AC
+ * prediction not used; performs classical zigzag scan.
+ * OMX_VC_HORIZONTAL - Horizontal prediction; performs
+ * alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical
+ * prediction; performs alternate-horizontal zigzag scan.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated such that it points to the current
+ * bit position in the byte pointed by *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments At least one of the following
+ * pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst,
+ * or At least one of the following conditions is true:
+ * *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is
+ * not 4-byte aligned
+ * OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of
+ * mark bits equals zero Illegal stream encountered; code cannot
+ * be located in VLC table Forbidden code encountered in the VLC
+ * FLC table The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_Inter (6.2.5.2.3)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one inter-coded block.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the stream buffer
+ * pBitOffset - pointer to the next available bit in the current stream
+ * byte referenced by *ppBitStream. The parameter *pBitOffset is
+ * valid within the range [0-7].
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the stream buffer
+ * pBitOffset - *pBitOffset is updated after decoding such that it points
+ * to the next available bit in the stream byte referenced by
+ * *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDst
+ * - pDst is not 4-byte aligned
+ * - *pBitOffset exceeds [0,7]
+ * OMX_Sts_Err - status error, if:
+ * - At least one mark bit is equal to zero
+ * - Encountered an illegal stream code that cannot be found in the VLC table
+ * - Encountered an illegal code in the VLC FLC table
+ * - The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_Inter (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInvIntra_I (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to
+ * the range [-2048, 2047].
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input (quantized) intra/inter block; must be
+ * aligned on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * videoComp - video component type of the current block. Takes one of the
+ * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra
+ * version only).
+ * shortVideoHeader - binary flag indicating presence of short_video_header
+ * (intra version only).
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (dequantized) intra/inter block
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; one or more of the following is
+ * true:
+ * - pSrcDst is NULL
+ * - QP <= 0 or QP >=31
+ * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE.
+ *
+ */
+OMXResult omxVCM4P2_QuantInvIntra_I (
+ OMX_S16 *pSrcDst,
+ OMX_INT QP,
+ OMXVCM4P2VideoComponent videoComp,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInvInter_I (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to
+ * the range [-2048, 2047].
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input (quantized) intra/inter block; must be
+ * aligned on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * videoComp - video component type of the current block. Takes one of the
+ * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra
+ * version only).
+ * shortVideoHeader - binary flag indicating presence of short_video_header
+ * (intra version only).
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (dequantized) intra/inter block
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; one or more of the following is
+ * true:
+ * - pSrcDst is NULL
+ * - QP <= 0 or QP >=31
+ * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE.
+ *
+ */
+OMXResult omxVCM4P2_QuantInvInter_I (
+ OMX_S16 *pSrcDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra (6.2.5.4.1)
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely
+ * zigzag positioning, and IDCT, with appropriate clipping on each step, are
+ * performed on the coefficients. The results are then placed in the output
+ * frame/plane on a pixel basis. Note: This function will be used only when
+ * at least one non-zero AC coefficient of current block exists in the bit
+ * stream. The DC only condition will be handled in another function.
+ *
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer. There is no boundary check for the bit stream
+ * buffer.
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7].
+ * step - width of the destination plane
+ * pCoefBufRow - pointer to the coefficient row buffer; must be aligned on
+ * an 8-byte boundary.
+ * pCoefBufCol - pointer to the coefficient column buffer; must be aligned
+ * on an 8-byte boundary.
+ * curQP - quantization parameter of the macroblock which the current block
+ * belongs to
+ * pQPBuf - pointer to the quantization parameter buffer
+ * blockIndex - block index indicating the component type and position as
+ * defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5.
+ * intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a
+ * mechanism to switch between two VLC for coding of Intra DC
+ * coefficients as per [ISO14496-2], Table 6-21.
+ * ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if
+ * the ac coefficients of the first row or first column are
+ * differentially coded for intra coded macroblock.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDst - pointer to the block in the destination plane; must be aligned on
+ * an 8-byte boundary.
+ * pCoefBufRow - pointer to the updated coefficient row buffer.
+ * pCoefBufCol - pointer to the updated coefficient column buffer Note:
+ * The coefficient buffers must be updated in accordance with the
+ * update procedure defined in section 6.2.2.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol,
+ * pQPBuf, pDst.
+ * - *pBitOffset exceeds [0,7]
+ * - curQP exceeds (1, 31)
+ * - blockIndex exceeds [0,5]
+ * - step is not the multiple of 8
+ * - a pointer alignment requirement was violated.
+ * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra.
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_U8 *pDst,
+ OMX_INT step,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_U8 curQP,
+ const OMX_U8 *pQPBuf,
+ OMX_INT blockIndex,
+ OMX_INT intraDCVLC,
+ OMX_INT ACPredFlag,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter (6.2.5.4.2)
+ *
+ * Description:
+ * Decodes the INTER block coefficients. This function performs inverse
+ * quantization, inverse zigzag positioning, and IDCT (with appropriate
+ * clipping on each step) on the coefficients. The results (residuals) are
+ * placed in a contiguous array of 64 elements. For INTER block, the output
+ * buffer holds the residuals for further reconstruction.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer. There is no boundary check for the bit stream
+ * buffer.
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7]
+ * QP - quantization parameter
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDst - pointer to the decoded residual buffer (a contiguous array of 64
+ * elements of OMX_S16 data type); must be aligned on a 16-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is Null:
+ * ppBitStream, *ppBitStream, pBitOffset , pDst
+ * - *pBitOffset exceeds [0,7]
+ * - QP <= 0.
+ * - pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter .
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_INT QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_PredictReconCoefIntra (6.2.5.4.3)
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected as
+ * specified in [ISO14496-2], subclause 7.4.3.1.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the coefficient buffer which contains the quantized
+ * coefficient residuals (PQF) of the current block; must be
+ * aligned on a 4-byte boundary. The output coefficients are
+ * saturated to the range [-2048, 2047].
+ * pPredBufRow - pointer to the coefficient row buffer; must be aligned on
+ * a 4-byte boundary.
+ * pPredBufCol - pointer to the coefficient column buffer; must be aligned
+ * on a 4-byte boundary.
+ * curQP - quantization parameter of the current block. curQP may equal to
+ * predQP especially when the current block and the predictor block
+ * are in the same macroblock.
+ * predQP - quantization parameter of the predictor block
+ * predDir - indicates the prediction direction which takes one of the
+ * following values: OMX_VC_HORIZONTAL - predict horizontally
+ * OMX_VC_VERTICAL - predict vertically
+ * ACPredFlag - a flag indicating if AC prediction should be performed. It
+ * is equal to ac_pred_flag in the bit stream syntax of MPEG-4
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the coefficient buffer which contains the quantized
+ * coefficients (QF) of the current block
+ * pPredBufRow - pointer to the updated coefficient row buffer
+ * pPredBufCol - pointer to the updated coefficient column buffer Note:
+ * Buffer update: Update the AC prediction buffer (both row and
+ * column buffer).
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the pointers is NULL:
+ * pSrcDst, pPredBufRow, or pPredBufCol.
+ * - curQP <= 0,
+ * - predQP <= 0,
+ * - curQP >31,
+ * - predQP > 31,
+ * - preDir exceeds [1,2]
+ * - pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_PredictReconCoefIntra (
+ OMX_S16 *pSrcDst,
+ OMX_S16 *pPredBufRow,
+ OMX_S16 *pPredBufCol,
+ OMX_INT curQP,
+ OMX_INT predQP,
+ OMX_INT predDir,
+ OMX_INT ACPredFlag,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MCReconBlock (6.2.5.5.1)
+ *
+ * Description:
+ * Performs motion compensation prediction for an 8x8 block using
+ * interpolation described in [ISO14496-2], subclause 7.6.2.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the block in the reference plane.
+ * srcStep - distance between the start of consecutive lines in the
+ * reference plane, in bytes; must be a multiple of 8.
+ * dstStep - distance between the start of consecutive lines in the
+ * destination plane, in bytes; must be a multiple of 8.
+ * pSrcResidue - pointer to a buffer containing the 16-bit prediction
+ * residuals; must be 16-byte aligned. If the pointer is NULL, then
+ * no prediction is done, only motion compensation, i.e., the block
+ * is moved with interpolation.
+ * predictType - bilinear interpolation type, as defined in section
+ * 6.2.1.2.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer; must be 8-byte aligned. If
+ * prediction residuals are added then output intensities are
+ * clipped to the range [0,255].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - pDst is not 8-byte aligned.
+ * - pSrcResidue is not 16-byte aligned.
+ * - one or more of the following pointers is NULL: pSrc or pDst.
+ * - either srcStep or dstStep is not a multiple of 8.
+ * - invalid type specified for the parameter predictType.
+ * - the parameter rndVal is not equal either to 0 or 1.
+ *
+ */
+OMXResult omxVCM4P2_MCReconBlock (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_S16 *pSrcResidue,
+ OMX_U8 *pDst,
+ OMX_INT dstStep,
+ OMX_INT predictType,
+ OMX_INT rndVal
+);
+
+
+
+/* 6.3.1.1 Intra 16x16 Prediction Modes */
+/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_16X16_VERT = 0, /** Intra_16x16_Vertical */
+ OMX_VC_16X16_HOR = 1, /** Intra_16x16_Horizontal */
+ OMX_VC_16X16_DC = 2, /** Intra_16x16_DC */
+ OMX_VC_16X16_PLANE = 3 /** Intra_16x16_Plane */
+} OMXVCM4P10Intra16x16PredMode;
+
+
+
+/* 6.3.1.2 Intra 4x4 Prediction Modes */
+/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_4X4_VERT = 0, /** Intra_4x4_Vertical */
+ OMX_VC_4X4_HOR = 1, /** Intra_4x4_Horizontal */
+ OMX_VC_4X4_DC = 2, /** Intra_4x4_DC */
+ OMX_VC_4X4_DIAG_DL = 3, /** Intra_4x4_Diagonal_Down_Left */
+ OMX_VC_4X4_DIAG_DR = 4, /** Intra_4x4_Diagonal_Down_Right */
+ OMX_VC_4X4_VR = 5, /** Intra_4x4_Vertical_Right */
+ OMX_VC_4X4_HD = 6, /** Intra_4x4_Horizontal_Down */
+ OMX_VC_4X4_VL = 7, /** Intra_4x4_Vertical_Left */
+ OMX_VC_4X4_HU = 8 /** Intra_4x4_Horizontal_Up */
+} OMXVCM4P10Intra4x4PredMode;
+
+
+
+/* 6.3.1.3 Chroma Prediction Modes */
+/* A data type that enumerates intra chroma prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_CHROMA_DC = 0, /** Intra_Chroma_DC */
+ OMX_VC_CHROMA_HOR = 1, /** Intra_Chroma_Horizontal */
+ OMX_VC_CHROMA_VERT = 2, /** Intra_Chroma_Vertical */
+ OMX_VC_CHROMA_PLANE = 3 /** Intra_Chroma_Plane */
+} OMXVCM4P10IntraChromaPredMode;
+
+
+
+/* 6.3.1.4 Motion Estimation Modes */
+/* A data type that enumerates H.264 motion estimation modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */
+ OMX_VC_M4P10_FULL_SEARCH = 1 /** Full motion search */
+} OMXVCM4P10MEMode;
+
+
+
+/* 6.3.1.5 Macroblock Types */
+/* A data type that enumerates H.264 macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_P_16x16 = 0, /* defined by [ISO14496-10] */
+ OMX_VC_P_16x8 = 1,
+ OMX_VC_P_8x16 = 2,
+ OMX_VC_P_8x8 = 3,
+ OMX_VC_PREF0_8x8 = 4,
+ OMX_VC_INTER_SKIP = 5,
+ OMX_VC_INTRA_4x4 = 8,
+ OMX_VC_INTRA_16x16 = 9,
+ OMX_VC_INTRA_PCM = 10
+} OMXVCM4P10MacroblockType;
+
+
+
+/* 6.3.1.6 Sub-Macroblock Types */
+/* A data type that enumerates H.264 sub-macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */
+ OMX_VC_SUB_P_8x4 = 1,
+ OMX_VC_SUB_P_4x8 = 2,
+ OMX_VC_SUB_P_4x4 = 3
+} OMXVCM4P10SubMacroblockType;
+
+
+
+/* 6.3.1.7 Variable Length Coding (VLC) Information */
+
+typedef struct {
+ OMX_U8 uTrailing_Ones; /* Trailing ones; 3 at most */
+ OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */
+ OMX_U8 uNumCoeffs; /* Total number of non-zero coefs, including trailing ones */
+ OMX_U8 uTotalZeros; /* Total number of zero coefs */
+ OMX_S16 iLevels[16]; /* Levels of non-zero coefs, in reverse zig-zag order */
+ OMX_U8 uRuns[16]; /* Runs for levels and trailing ones, in reverse zig-zag order */
+} OMXVCM4P10VLCInfo;
+
+
+
+/* 6.3.1.8 Macroblock Information */
+
+typedef struct {
+ OMX_S32 sliceId; /* slice number */
+ OMXVCM4P10MacroblockType mbType; /* MB type */
+ OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */
+ OMX_S32 qpy; /* qp for luma */
+ OMX_S32 qpc; /* qp for chroma */
+ OMX_U32 cbpy; /* CBP Luma */
+ OMX_U32 cbpc; /* CBP Chroma */
+ OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */
+ OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */
+ OMX_U8 pRefL0Idx[4]; /* reference picture indices */
+ OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */
+ OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */
+} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr;
+
+
+
+/* 6.3.1.9 Motion Estimation Parameters */
+
+typedef struct {
+ OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */
+ OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */
+ OMX_S32 halfSearchEnable;
+ OMX_S32 quarterSearchEnable;
+ OMX_S32 intraEnable4x4; /* 1=enable, 0=disable */
+ OMX_S32 searchRange16x16; /* integer pixel units */
+ OMX_S32 searchRange8x8;
+ OMX_S32 searchRange4x4;
+} OMXVCM4P10MEParams;
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntra_4x4 (6.3.3.1.1)
+ *
+ * Description:
+ * Perform Intra_4x4 prediction for luma samples. If the upper-right block is
+ * not available, then duplication work should be handled inside the function.
+ * Users need not define them outside.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 4 left pixels:
+ * p[x, y] (x = -1, y = 0..3)
+ * pSrcAbove - Pointer to the buffer of 8 above pixels:
+ * p[x,y] (x = 0..7, y =-1);
+ * must be aligned on a 4-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 4.
+ * dstStep - Step of the destination buffer; must be a multiple of 4.
+ * predMode - Intra_4x4 prediction mode.
+ * availability - Neighboring 4x4 block availability flag, refer to
+ * "Neighboring Macroblock Availability" .
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination buffer; must be aligned on a 4-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 4, or dstStep is not a multiple of 4.
+ * leftStep is not a multiple of 4.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10Intra4x4PredMode.
+ * predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER
+ * indicating p[x,-1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..3) is not available.
+ * predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_VR, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_HD, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER
+ * indicating p[x,-1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..3) is not available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 4-byte boundary.
+ *
+ * Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if
+ * they are not used by intra prediction as implied in predMode.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_4x4 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10Intra4x4PredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntra_16x16 (6.3.3.1.2)
+ *
+ * Description:
+ * Perform Intra_16x16 prediction for luma samples. If the upper-right block
+ * is not available, then duplication work should be handled inside the
+ * function. Users need not define them outside.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y =
+ * 0..15)
+ * pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15,
+ * y= -1); must be aligned on a 16-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 16.
+ * dstStep - Step of the destination buffer; must be a multiple of 16.
+ * predMode - Intra_16x16 prediction mode, please refer to section 3.4.1.
+ * availability - Neighboring 16x16 MB availability flag. Refer to
+ * section 3.4.4.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination buffer; must be aligned on a 16-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 16. or dstStep is not a multiple of 16.
+ * leftStep is not a multiple of 16.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10Intra16x16PredMode
+ * predMode is OMX_VC_16X16_VERT, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available.
+ * predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..15) is not available.
+ * predMode is OMX_VC_16X16_PLANE, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not
+ * available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 16-byte boundary.
+ *
+ * Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if
+ * they are not used by intra prediction implied in predMode.
+ * Note:
+ * OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_16x16 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10Intra16x16PredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntraChroma_8x8 (6.3.3.1.3)
+ *
+ * Description:
+ * Performs intra prediction for chroma samples.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y=
+ * 0..7).
+ * pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y
+ * = -1); must be aligned on an 8-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 8.
+ * dstStep - Step of the destination buffer; must be a multiple of 8.
+ * predMode - Intra chroma prediction mode, please refer to section 3.4.3.
+ * availability - Neighboring chroma block availability flag, please refer
+ * to "Neighboring Macroblock Availability".
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination buffer; must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If any of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 8 or dstStep is not a multiple of 8.
+ * leftStep is not a multiple of 8.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10IntraChromaPredMode.
+ * predMode is OMX_VC_CHROMA_VERT, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available.
+ * predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..7) is not available.
+ * predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not
+ * available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 8-byte boundary.
+ *
+ * Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if
+ * they are not used by intra prediction implied in predMode.
+ *
+ * Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntraChroma_8x8 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10IntraChromaPredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateLuma (6.3.3.2.1)
+ *
+ * Description:
+ * Performs quarter-pixel interpolation for inter luma MB. It is assumed that
+ * the frame is already padded when calling this function.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the source reference frame buffer
+ * srcStep - reference frame step, in bytes; must be a multiple of roi.width
+ * dstStep - destination frame step, in bytes; must be a multiple of
+ * roi.width
+ * dx - Fractional part of horizontal motion vector component in 1/4 pixel
+ * unit; valid in the range [0,3]
+ * dy - Fractional part of vertical motion vector y component in 1/4 pixel
+ * unit; valid in the range [0,3]
+ * roi - Dimension of the interpolation region; the parameters roi.width and
+ * roi.height must be equal to either 4, 8, or 16.
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination frame buffer:
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ * if roi.width==16, 16-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < roi.width.
+ * dx or dy is out of range [0,3].
+ * roi.width or roi.height is out of range {4, 8, 16}.
+ * roi.width is equal to 4, but pDst is not 4 byte aligned.
+ * roi.width is equal to 8 or 16, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_InterpolateLuma (
+ const OMX_U8 *pSrc,
+ OMX_S32 srcStep,
+ OMX_U8 *pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateChroma (6.3.3.2.2)
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB.
+ *
+ * Input Arguments:
+ *
+ * pSrc -Pointer to the source reference frame buffer
+ * srcStep -Reference frame step in bytes
+ * dstStep -Destination frame step in bytes; must be a multiple of
+ * roi.width.
+ * dx -Fractional part of horizontal motion vector component in 1/8 pixel
+ * unit; valid in the range [0,7]
+ * dy -Fractional part of vertical motion vector component in 1/8 pixel
+ * unit; valid in the range [0,7]
+ * roi -Dimension of the interpolation region; the parameters roi.width and
+ * roi.height must be equal to either 2, 4, or 8.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination frame buffer:
+ * if roi.width==2, 2-byte alignment required
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < 8.
+ * dx or dy is out of range [0-7].
+ * roi.width or roi.height is out of range {2,4,8}.
+ * roi.width is equal to 2, but pDst is not 2-byte aligned.
+ * roi.width is equal to 4, but pDst is not 4-byte aligned.
+ * roi.width is equal to 8, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_InterpolateChroma (
+ const OMX_U8 *pSrc,
+ OMX_S32 srcStep,
+ OMX_U8 *pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingLuma_VerEdge_I (6.3.3.3.1)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the luma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep -Step of the arrays; must be a multiple of 16.
+ * pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] alpha values
+ * must be in the range [0,255].
+ * pBeta -Array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] beta values
+ * must be in the range [0,18].
+ * pThresholds -Array of size 16 of Thresholds (TC0) (values for the left
+ * edge of each 4x4 block, arranged in vertical block order); must
+ * be aligned on a 4-byte boundary.. Per [ISO14496-10] values must
+ * be in the range [0,25].
+ * pBS -Array of size 16 of BS parameters (arranged in vertical block
+ * order); valid in the range [0,4] with the following
+ * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii)
+ * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS
+ * is NULL.
+ * Either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * pSrcDst is not 16-byte aligned.
+ * srcdstStep is not a multiple of 16.
+ * pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * One or more entries in the table pThresholds[0..15]is outside of the
+ * range [0,25].
+ * pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 &&
+ * pBS[i^3]!=4) for 0<=i<=3.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingLuma_HorEdge_I (6.3.3.3.2)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four horizontal edges of the luma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep - step of the arrays; must be a multiple of 16.
+ * pAlpha - array of size 2 of alpha thresholds (the first item is the alpha
+ * threshold for the external vertical edge, and the second item is
+ * for the internal horizontal edge); per [ISO14496-10] alpha
+ * values must be in the range [0,255].
+ * pBeta - array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external horizontal edge, and the second item
+ * is for the internal horizontal edge). Per [ISO14496-10] beta
+ * values must be in the range [0,18].
+ * pThresholds - array of size 16 containing thresholds, TC0, for the top
+ * horizontal edge of each 4x4 block, arranged in horizontal block
+ * order; must be aligned on a 4-byte boundary. Per [ISO14496 10]
+ * values must be in the range [0,25].
+ * pBS - array of size 16 of BS parameters (arranged in horizontal block
+ * order); valid in the range [0,4] with the following
+ * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii)
+ * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr, if one of the following cases occurs:
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * - pSrcDst is not 16-byte aligned.
+ * - srcdstStep is not a multiple of 16.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..15] is
+ * outside of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingChroma_VerEdge_I (6.3.3.3.3)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the chroma
+ * macroblock (8x8).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - Step of the arrays; must be a multiple of 8.
+ * pAlpha - Array of size 2 of alpha thresholds (the first item is alpha
+ * threshold for external vertical edge, and the second item is for
+ * internal vertical edge); per [ISO14496-10] alpha values must be
+ * in the range [0,255].
+ * pBeta - Array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] beta values
+ * must be in the range [0,18].
+ * pThresholds - Array of size 8 containing thresholds, TC0, for the left
+ * vertical edge of each 4x2 chroma block, arranged in vertical
+ * block order; must be aligned on a 4-byte boundary. Per
+ * [ISO14496-10] values must be in the range [0,25].
+ * pBS - Array of size 16 of BS parameters (values for each 2x2 chroma
+ * block, arranged in vertical block order). This parameter is the
+ * same as the pBS parameter passed into FilterDeblockLuma_VerEdge;
+ * valid in the range [0,4] with the following restrictions: i)
+ * pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and
+ * only if pBS[i^3]== 4. Must be 4 byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8.
+ * - pThresholds is not 4-byte aligned.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..7] is outside
+ * of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - pBS is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingChroma_HorEdge_I (6.3.3.3.4)
+ *
+ * Description:
+ * Performs in-place deblock filtering on the horizontal edges of the chroma
+ * macroblock (8x8).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - array step; must be a multiple of 8.
+ * pAlpha - array of size 2 containing alpha thresholds; the first element
+ * contains the threshold for the external horizontal edge, and the
+ * second element contains the threshold for internal horizontal
+ * edge. Per [ISO14496-10] alpha values must be in the range
+ * [0,255].
+ * pBeta - array of size 2 containing beta thresholds; the first element
+ * contains the threshold for the external horizontal edge, and the
+ * second element contains the threshold for the internal
+ * horizontal edge. Per [ISO14496-10] beta values must be in the
+ * range [0,18].
+ * pThresholds - array of size 8 containing thresholds, TC0, for the top
+ * horizontal edge of each 2x4 chroma block, arranged in horizontal
+ * block order; must be aligned on a 4-byte boundary. Per
+ * [ISO14496-10] values must be in the range [0,25].
+ * pBS - array of size 16 containing BS parameters for each 2x2 chroma
+ * block, arranged in horizontal block order; valid in the range
+ * [0,4] with the following restrictions: i) pBS[i]== 4 may occur
+ * only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4.
+ * Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr, if one of the following cases occurs:
+ * - any of the following pointers is NULL:
+ * pSrcDst, pAlpha, pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8.
+ * - pThresholds is not 4-byte aligned.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..7] is outside
+ * of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - pBS is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DeblockLuma_I (6.3.3.3.5)
+ *
+ * Description:
+ * This function performs in-place deblock filtering the horizontal and
+ * vertical edges of a luma macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep - image width; must be a multiple of 16.
+ * pAlpha - pointer to a 2x2 table of alpha thresholds, organized as
+ * follows: {external vertical edge, internal vertical edge,
+ * external horizontal edge, internal horizontal edge }. Per
+ * [ISO14496-10] alpha values must be in the range [0,255].
+ * pBeta - pointer to a 2x2 table of beta thresholds, organized as follows:
+ * {external vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }. Per [ISO14496-10]
+ * beta values must be in the range [0,18].
+ * pThresholds - pointer to a 16x2 table of threshold (TC0), organized as
+ * follows: {values for the left or above edge of each 4x4 block,
+ * arranged in vertical block order and then in horizontal block
+ * order}; must be aligned on a 4-byte boundary. Per [ISO14496-10]
+ * values must be in the range [0,25].
+ * pBS - pointer to a 16x2 table of BS parameters arranged in scan block
+ * order for vertical edges and then horizontal edges; valid in the
+ * range [0,4] with the following restrictions: i) pBS[i]== 4 may
+ * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]==
+ * 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds or pBS.
+ * - pSrcDst is not 16-byte aligned.
+ * - either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * - one or more entries in the table pAlpha[0..3] is outside the range
+ * [0,255].
+ * - one or more entries in the table pBeta[0..3] is outside the range
+ * [0,18].
+ * - one or more entries in the table pThresholds[0..31]is outside of
+ * the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 16.
+ *
+ */
+OMXResult omxVCM4P10_DeblockLuma_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DeblockChroma_I (6.3.3.3.6)
+ *
+ * Description:
+ * Performs in-place deblocking filtering on all edges of the chroma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - step of the arrays; must be a multiple of 8.
+ * pAlpha - pointer to a 2x2 array of alpha thresholds, organized as
+ * follows: {external vertical edge, internal vertical edge,
+ * external horizontal edge, internal horizontal edge }. Per
+ * [ISO14496-10] alpha values must be in the range [0,255].
+ * pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows:
+ * { external vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }. Per [ISO14496-10]
+ * beta values must be in the range [0,18].
+ * pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left
+ * or above edge of each 4x2 or 2x4 block, arranged in vertical
+ * block order and then in horizontal block order); must be aligned
+ * on a 4-byte boundary. Per [ISO14496-10] values must be in the
+ * range [0,25].
+ * pBS - array of size 16x2 of BS parameters (arranged in scan block order
+ * for vertical edges and then horizontal edges); valid in the
+ * range [0,4] with the following restrictions: i) pBS[i]== 4 may
+ * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]==
+ * 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - either pThresholds or pBS is not 4-byte aligned.
+ * - one or more entries in the table pAlpha[0..3] is outside the range
+ * [0,255].
+ * - one or more entries in the table pBeta[0..3] is outside the range
+ * [0,18].
+ * - one or more entries in the table pThresholds[0..15]is outside of
+ * the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (6.3.4.1.1)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for a 2x2 block of
+ * ChromaDCLevel. The decoded coefficients in the packed position-coefficient
+ * buffer are stored in reverse zig-zag order, i.e., the first buffer element
+ * contains the last non-zero postion-coefficient pair of the block. Within
+ * each position-coefficient pair, the position entry indicates the
+ * raster-scan position of the coefficient, while the coefficient entry
+ * contains the coefficient value.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - Double pointer to current byte in bit stream buffer
+ * pOffset - Pointer to current bit position in the byte pointed to by
+ * *ppBitStream; valid in the range [0,7].
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after each block is decoded
+ * pOffset - *pOffset is updated after each block is decoded
+ * pNumCoeff - Pointer to the number of nonzero coefficients in this block
+ * ppPosCoefBuf - Double pointer to destination residual
+ * coefficient-position pair buffer. Buffer position
+ * (*ppPosCoefBuf) is updated upon return, unless there are only
+ * zero coefficients in the currently decoded block. In this case
+ * the caller is expected to bypass the transform/dequantization of
+ * the empty blocks.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppBitStream or pOffset is NULL.
+ * - ppPosCoefBuf or pNumCoeff is NULL.
+ * OMX_Sts_Err - if one of the following is true:
+ * - an illegal code is encountered in the bitstream
+ *
+ */
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_S32*pOffset,
+ OMX_U8 *pNumCoeff,
+ OMX_U8 **ppPosCoefbuf
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC (6.3.4.1.2)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of
+ * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse
+ * field scan is not supported. The decoded coefficients in the packed
+ * position-coefficient buffer are stored in reverse zig-zag order, i.e., the
+ * first buffer element contains the last non-zero postion-coefficient pair of
+ * the block. Within each position-coefficient pair, the position entry
+ * indicates the raster-scan position of the coefficient, while the
+ * coefficient entry contains the coefficient value.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream -Double pointer to current byte in bit stream buffer
+ * pOffset - Pointer to current bit position in the byte pointed to by
+ * *ppBitStream; valid in the range [0,7].
+ * sMaxNumCoeff - Maximum the number of non-zero coefficients in current
+ * block
+ * sVLCSelect - VLC table selector, obtained from the number of non-zero
+ * coefficients contained in the above and left 4x4 blocks. It is
+ * equivalent to the variable nC described in H.264 standard table
+ * 9 5, except its value can t be less than zero.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after each block is decoded.
+ * Buffer position (*ppPosCoefBuf) is updated upon return, unless
+ * there are only zero coefficients in the currently decoded block.
+ * In this case the caller is expected to bypass the
+ * transform/dequantization of the empty blocks.
+ * pOffset - *pOffset is updated after each block is decoded
+ * pNumCoeff - Pointer to the number of nonzero coefficients in this block
+ * ppPosCoefBuf - Double pointer to destination residual
+ * coefficient-position pair buffer
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppBitStream or pOffset is NULL.
+ * - ppPosCoefBuf or pNumCoeff is NULL.
+ * - sMaxNumCoeff is not equal to either 15 or 16.
+ * - sVLCSelect is less than 0.
+ *
+ * OMX_Sts_Err - if one of the following is true:
+ * - an illegal code is encountered in the bitstream
+ *
+ */
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_S32 *pOffset,
+ OMX_U8 *pNumCoeff,
+ OMX_U8 **ppPosCoefbuf,
+ OMX_INT sVLCSelect,
+ OMX_INT sMaxNumCoeff
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformDequantLumaDCFromPair (6.3.4.2.1)
+ *
+ * Description:
+ * Reconstructs the 4x4 LumaDC block from the coefficient-position pair
+ * buffer, performs integer inverse, and dequantization for 4x4 LumaDC
+ * coefficients, and updates the pair buffer pointer to the next non-empty
+ * block.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * QP - Quantization parameter QpY
+ *
+ * Output Arguments:
+ *
+ * ppSrc - *ppSrc is updated to the start of next non empty block
+ * pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must
+ * be aligned on a 8-byte boundary.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppSrc or pDst is NULL.
+ * - pDst is not 8 byte aligned.
+ * - QP is not in the range of [0-51].
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantLumaDCFromPair (
+ const OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformDequantChromaDCFromPair (6.3.4.2.2)
+ *
+ * Description:
+ * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer,
+ * perform integer inverse transformation, and dequantization for 2x2 chroma
+ * DC coefficients, and update the pair buffer pointer to next non-empty
+ * block.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * QP - Quantization parameter QpC
+ *
+ * Output Arguments:
+ *
+ * ppSrc - *ppSrc is updated to the start of next non empty block
+ * pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer;
+ * must be aligned on a 4-byte boundary.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppSrc or pDst is NULL.
+ * - pDst is not 4-byte aligned.
+ * - QP is not in the range of [0-51].
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantChromaDCFromPair (
+ const OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd (6.3.4.2.3)
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantization and integer inverse transformation for 4x4 block of
+ * residuals with previous intra prediction or motion compensation data, and
+ * update the pair buffer pointer to next non-empty block. If pDC == NULL,
+ * there re 16 non-zero AC coefficients at most in the packed buffer starting
+ * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC
+ * coefficients at most in the packet buffer starting from 4x4 block position
+ * 1.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte
+ * boundary
+ * predStep - Predicted frame step size in bytes; must be a multiple of 4
+ * dstStep - Destination frame step in bytes; must be a multiple of 4
+ * pDC - Pointer to the DC coefficient of this block, NULL if it doesn't
+ * exist
+ * QP - QP Quantization parameter. It should be QpC in chroma 4x4 block
+ * decoding, otherwise it should be QpY.
+ * AC - Flag indicating if at least one non-zero AC coefficient exists
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the reconstructed 4x4 block data; must be aligned on a
+ * 4-byte boundary
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pPred or pDst is NULL.
+ * - pPred or pDst is not 4-byte aligned.
+ * - predStep or dstStep is not a multiple of 4.
+ * - AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL.
+ * - AC ==0 && pDC ==NULL.
+ *
+ */
+OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd (
+ const OMX_U8 **ppSrc,
+ const OMX_U8 *pPred,
+ const OMX_S16 *pDC,
+ OMX_U8 *pDst,
+ OMX_INT predStep,
+ OMX_INT dstStep,
+ OMX_INT QP,
+ OMX_INT AC
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MEGetBufSize (6.3.5.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification
+ * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer
+ * and MotionEstimationMB.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P10MEMode
+ * pMEParams -motion estimation parameters
+ *
+ * Output Arguments:
+ *
+ * pSize - pointer to the number of bytes required for the motion
+ * estimation specification structure
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pMEParams or pSize is NULL.
+ * - an invalid MEMode is specified.
+ *
+ */
+OMXResult omxVCM4P10_MEGetBufSize (
+ OMXVCM4P10MEMode MEmode,
+ const OMXVCM4P10MEParams *pMEParams,
+ OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MEInit (6.3.5.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the
+ * omxVCM4P10 motion estimation functions: BlockMatch_Integer and
+ * MotionEstimationMB. Memory for the specification structure *pMESpec must be
+ * allocated prior to calling the function, and should be aligned on a 4-byte
+ * boundary. The number of bytes required for the specification structure can
+ * be determined using the function omxVCM4P10_MEGetBufSize. Following
+ * initialization by this function, the vendor-specific structure *pMESpec
+ * should contain an implementation-specific representation of all motion
+ * estimation parameters received via the structure pMEParams, for example
+ * searchRange16x16, searchRange8x8, etc.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P10MEMode
+ * pMEParams - motion estimation parameters
+ * pMESpec - pointer to the uninitialized ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pMESpec - pointer to the initialized ME specification structure
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pMEParams or pSize is NULL.
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for one of the search ranges
+ * (e.g., pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.)
+ * - either in isolation or in combination, one or more of the enables or
+ * search ranges in the structure *pMEParams were configured such
+ * that the requested behavior fails to comply with [ISO14496-10].
+ *
+ */
+OMXResult omxVCM4P10_MEInit (
+ OMXVCM4P10MEMode MEmode,
+ const OMXVCM4P10MEParams *pMEParams,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Integer (6.3.5.2.1)
+ *
+ * Description:
+ * Performs integer block match. Returns best MV and associated cost.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the top-left corner of the current block:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane, expressed in terms
+ * of integer pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane, expressed in terms
+ * of integer pixels
+ * pRefRect - pointer to the valid reference rectangle inside the reference
+ * picture plane
+ * nCurrPointPos - position of the current block in the current plane
+ * iBlockWidth - Width of the current block, expressed in terms of integer
+ * pixels; must be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block, expressed in terms of
+ * integer pixels; must be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor; used to compute motion cost
+ * pMVPred - Predicted MV; used to compute motion cost, expressed in terms
+ * of 1/4-pel units
+ * pMVCandidate - Candidate MV; used to initialize the motion search,
+ * expressed in terms of integer pixels
+ * pMESpec - pointer to the ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pDstBestMV - Best MV resulting from integer search, expressed in terms
+ * of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - any of the following pointers are NULL:
+ * pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec.
+ * - Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Integer (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ const OMXVCMotionVector *pMVCandidate,
+ OMXVCMotionVector *pBestMV,
+ OMX_S32 *pBestCost,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Half (6.3.5.2.2)
+ *
+ * Description:
+ * Performs a half-pel block match using results from a prior integer search.
+ * Returns the best MV and associated cost. This function estimates the
+ * half-pixel motion vector by interpolating the integer resolution motion
+ * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial
+ * integer MV is generated externally. The function
+ * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the current position in original picture plane:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane in terms of full
+ * pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane in terms of full
+ * pixels
+ * iBlockWidth - Width of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor, used to compute motion cost
+ * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to
+ * compute motion cost
+ * pSrcDstBestMV - The best MV resulting from a prior integer search,
+ * represented in terms of 1/4-pel units
+ *
+ * Output Arguments:
+ *
+ * pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in
+ * terms of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - any of the following pointers is NULL: pSrcOrgY, pSrcRefY,
+ * pSrcDstBestMV, pMVPred, pBestCost
+ * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Half (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ OMXVCMotionVector *pSrcDstBestMV,
+ OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Quarter (6.3.5.2.3)
+ *
+ * Description:
+ * Performs a quarter-pel block match using results from a prior half-pel
+ * search. Returns the best MV and associated cost. This function estimates
+ * the quarter-pixel motion vector by interpolating the half-pel resolution
+ * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the
+ * initial half-pel MV is generated externally. The function
+ * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the current position in original picture plane:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane in terms of full
+ * pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane in terms of full
+ * pixels
+ * iBlockWidth - Width of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor, used to compute motion cost
+ * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to
+ * compute motion cost
+ * pSrcDstBestMV - The best MV resulting from a prior half-pel search,
+ * represented in terms of 1/4 pel units
+ *
+ * Output Arguments:
+ *
+ * pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed
+ * in terms of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL:
+ * pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost
+ * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Quarter (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ OMXVCMotionVector *pSrcDstBestMV,
+ OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MotionEstimationMB (6.3.5.3.1)
+ *
+ * Description:
+ * Performs MB-level motion estimation and selects best motion estimation
+ * strategy from the set of modes supported in baseline profile [ISO14496-10].
+ *
+ * Input Arguments:
+ *
+ * pSrcCurrBuf - Pointer to the current position in original picture plane;
+ * 16-byte alignment required
+ * pSrcRefBufList - Pointer to an array with 16 entries. Each entry points
+ * to the top-left corner of the co-located MB in a reference
+ * picture. The array is filled from low-to-high with valid
+ * reference frame pointers; the unused high entries should be set
+ * to NULL. Ordering of the reference frames should follow
+ * [ISO14496-10] subclause 8.2.4 Decoding Process for Reference
+ * Picture Lists. The entries must be 16-byte aligned.
+ * pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the
+ * reconstructed picture; must be 16-byte aligned.
+ * SrcCurrStep - Width of the original picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * SrcRefStep - Width of the reference picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * SrcRecStep - Width of the reconstructed picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * pRefRect - Pointer to the valid reference rectangle; relative to the
+ * image origin.
+ * pCurrPointPos - Position of the current macroblock in the current plane.
+ * Lambda - Lagrange factor for computing the cost function
+ * pMESpec - Pointer to the motion estimation specification structure; must
+ * have been allocated and initialized prior to calling this
+ * function.
+ * pMBInter - Array, of dimension four, containing pointers to information
+ * associated with four adjacent type INTER MBs (Left, Top,
+ * Top-Left, Top-Right). Any pointer in the array may be set equal
+ * to NULL if the corresponding MB doesn t exist or is not of type
+ * INTER.
+ * - pMBInter[0] - Pointer to left MB information
+ * - pMBInter[1] - Pointer to top MB information
+ * - pMBInter[2] - Pointer to top-left MB information
+ * - pMBInter[3] - Pointer to top-right MB information
+ * pMBIntra - Array, of dimension four, containing pointers to information
+ * associated with four adjacent type INTRA MBs (Left, Top,
+ * Top-Left, Top-Right). Any pointer in the array may be set equal
+ * to NULL if the corresponding MB doesn t exist or is not of type
+ * INTRA.
+ * - pMBIntra[0] - Pointer to left MB information
+ * - pMBIntra[1] - Pointer to top MB information
+ * - pMBIntra[2] - Pointer to top-left MB information
+ * - pMBIntra[3] - Pointer to top-right MB information
+ * pSrcDstMBCurr - Pointer to information structure for the current MB.
+ * The following entries should be set prior to calling the
+ * function: sliceID - the number of the slice the to which the
+ * current MB belongs.
+ *
+ * Output Arguments:
+ *
+ * pDstCost - Pointer to the minimum motion cost for the current MB.
+ * pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma
+ * 4x4 blocks in each MB. The block SADs are in scan order for
+ * each MB. For implementations that cannot compute the SAD values
+ * individually, the maximum possible value (0xffff) is returned
+ * for each of the 16 block SAD entries.
+ * pSrcDstMBCurr - Pointer to updated information structure for the current
+ * MB after MB-level motion estimation has been completed. The
+ * following fields are updated by the ME function. The following
+ * parameter set quantifies the MB-level ME search results:
+ * - MbType
+ * - subMBType[4]
+ * - pMV0[4][4]
+ * - pMVPred[4][4]
+ * - pRefL0Idx[4]
+ * - Intra16x16PredMode
+ * - pIntra4x4PredMode[4][4]
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL: pSrcCurrBuf,
+ * pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec,
+ * pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0]
+ * - SrcRefStep, SrcRecStep are not multiples of 16
+ * - iBlockWidth or iBlockHeight are values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_MotionEstimationMB (
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8 *pSrcRefBufList[15],
+ OMX_S32 SrcRefStep,
+ const OMX_U8 *pSrcRecBuf,
+ OMX_S32 SrcRecStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U32 Lambda,
+ void *pMESpec,
+ const OMXVCM4P10MBInfoPtr *pMBInter,
+ const OMXVCM4P10MBInfoPtr *pMBIntra,
+ OMXVCM4P10MBInfoPtr pSrcDstMBCurr,
+ OMX_INT *pDstCost,
+ OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SAD_4x (6.3.5.4.1)
+ *
+ * Description:
+ * This function calculates the SAD for 4x8 and 4x4 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg -Pointer to the original block; must be aligned on a 4-byte
+ * boundary.
+ * iStepOrg -Step of the original block buffer; must be a multiple of 4.
+ * pSrcRef -Pointer to the reference block
+ * iStepRef -Step of the reference block buffer
+ * iHeight -Height of the block; must be equal to either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL:
+ * pSrcOrg, pSrcRef, or pDstSAD
+ * - iHeight is not equal to either 4 or 8.
+ * - iStepOrg is not a multiple of 4
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SAD_4x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_4x (6.3.5.4.2)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks. Rounding
+ * is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on a 4-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 4.
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal to either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 4 or 8.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 4
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_4x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_8x (6.3.5.4.3)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks.
+ * Rounding is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on an 8-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 8.
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal either 4, 8, or 16.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 4, 8, or 16.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 8
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_8x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_16x (6.3.5.4.4)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks.
+ * Rounding is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on a 16-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 16
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal to either 8 or 16
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 8 or 16.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 16
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_16x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SATD_4x4 (6.3.5.4.5)
+ *
+ * Description:
+ * This function calculates the sum of absolute transform differences (SATD)
+ * for a 4x4 block by applying a Hadamard transform to the difference block
+ * and then calculating the sum of absolute coefficient values.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 4-byte
+ * boundary
+ * iStepOrg - Step of the original block buffer; must be a multiple of 4
+ * pSrcRef - Pointer to the reference block; must be aligned on a 4-byte
+ * boundary
+ * iStepRef - Step of the reference block buffer; must be a multiple of 4
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - pointer to the resulting SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg
+ * - pSrcRef is not aligned on a 4-byte boundary
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 4
+ * - iStepRef <= 0 or iStepRef is not a multiple of 4
+ *
+ */
+OMXResult omxVCM4P10_SATD_4x4 (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_U32 *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateHalfHor_Luma (6.3.5.5.1)
+ *
+ * Description:
+ * This function performs interpolation for two horizontal 1/2-pel positions
+ * (-1/2,0) and (1/2, 0) - around a full-pel position.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the top-left corner of the block used to interpolate in
+ * the reconstruction frame plane.
+ * iSrcStep - Step of the source buffer.
+ * iDstStep - Step of the destination(interpolation) buffer; must be a
+ * multiple of iWidth.
+ * iWidth - Width of the current block; must be equal to either 4, 8, or 16
+ * iHeight - Height of the current block; must be equal to 4, 8, or 16
+ *
+ * Output Arguments:
+ *
+ * pDstLeft -Pointer to the interpolation buffer of the left -pel position
+ * (-1/2, 0)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ * pDstRight -Pointer to the interpolation buffer of the right -pel
+ * position (1/2, 0)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrc, pDstLeft, or pDstRight
+ * - iWidth or iHeight have values other than 4, 8, or 16
+ * - iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary
+ * - iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary
+ * - iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary
+ * - any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfHor_Luma (
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDstLeft,
+ OMX_U8 *pDstRight,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateHalfVer_Luma (6.3.5.5.2)
+ *
+ * Description:
+ * This function performs interpolation for two vertical 1/2-pel positions -
+ * (0, -1/2) and (0, 1/2) - around a full-pel position.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to top-left corner of block used to interpolate in the
+ * reconstructed frame plane
+ * iSrcStep - Step of the source buffer.
+ * iDstStep - Step of the destination (interpolation) buffer; must be a
+ * multiple of iWidth.
+ * iWidth - Width of the current block; must be equal to either 4, 8, or 16
+ * iHeight - Height of the current block; must be equal to either 4, 8, or 16
+ *
+ * Output Arguments:
+ *
+ * pDstUp -Pointer to the interpolation buffer of the -pel position above
+ * the current full-pel position (0, -1/2)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ * pDstDown -Pointer to the interpolation buffer of the -pel position below
+ * the current full-pel position (0, 1/2)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrc, pDstUp, or pDstDown
+ * - iWidth or iHeight have values other than 4, 8, or 16
+ * - iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary
+ * - iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary
+ * - iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfVer_Luma (
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDstUp,
+ OMX_U8 *pDstDown,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_Average_4x (6.3.5.5.3)
+ *
+ * Description:
+ * This function calculates the average of two 4x4, 4x8 blocks. The result
+ * is rounded according to (a+b+1)/2.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0; must be a multiple of 4.
+ * iPredStep1 - Step of reference block 1; must be a multiple of 4.
+ * iDstStep - Step of the destination buffer; must be a multiple of 4.
+ * iHeight - Height of the blocks; must be either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 4-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pPred0, pPred1, or pDstPred
+ * - pDstPred is not aligned on a 4-byte boundary
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 4
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 4
+ * - iDstStep <= 0 or iDstStep is not a multiple of 4
+ * - iHeight is not equal to either 4 or 8
+ *
+ */
+OMXResult omxVCM4P10_Average_4x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformQuant_ChromaDC (6.3.5.6.1)
+ *
+ * Description:
+ * This function performs 2x2 Hadamard transform of chroma DC coefficients
+ * and then quantizes the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the 2x2 array of chroma DC coefficients. 8-byte
+ * alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ * bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - Pointer to transformed and quantized coefficients. 8-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcDst
+ * - pSrcDst is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_ChromaDC (
+ OMX_S16 *pSrcDst,
+ OMX_U32 iQP,
+ OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformQuant_LumaDC (6.3.5.6.2)
+ *
+ * Description:
+ * This function performs a 4x4 Hadamard transform of luma DC coefficients
+ * and then quantizes the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the 4x4 array of luma DC coefficients. 16-byte
+ * alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - Pointer to transformed and quantized coefficients. 16-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrcDst
+ * - pSrcDst is not aligned on an 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_LumaDC (
+ OMX_S16 *pSrcDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformDequant_LumaDC (6.3.5.6.3)
+ *
+ * Description:
+ * This function performs inverse 4x4 Hadamard transform and then dequantizes
+ * the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and
+ * quantized coefficients. 16 byte alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to inverse-transformed and dequantized coefficients.
+ * 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrc
+ * - pSrc or pDst is not aligned on a 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_LumaDC (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformDequant_ChromaDC (6.3.5.6.4)
+ *
+ * Description:
+ * This function performs inverse 2x2 Hadamard transform and then dequantizes
+ * the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and
+ * quantized coefficients. 8 byte alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to inverse-transformed and dequantized coefficients.
+ * 8-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrc
+ * - pSrc or pDst is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_ChromaDC (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformResidualAndAdd (6.3.5.7.1)
+ *
+ * Description:
+ * This function performs inverse an 4x4 integer transformation to produce
+ * the difference signal and then adds the difference to the prediction to get
+ * the reconstructed signal.
+ *
+ * Input Arguments:
+ *
+ * pSrcPred - Pointer to prediction signal. 4-byte alignment required.
+ * pDequantCoeff - Pointer to the transformed coefficients. 8-byte
+ * alignment required.
+ * iSrcPredStep - Step of the prediction buffer; must be a multiple of 4.
+ * iDstReconStep - Step of the destination reconstruction buffer; must be a
+ * multiple of 4.
+ * bAC - Indicate whether there is AC coefficients in the coefficients
+ * matrix.
+ *
+ * Output Arguments:
+ *
+ * pDstRecon -Pointer to the destination reconstruction buffer. 4-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcPred, pDequantCoeff, pDstRecon
+ * - pSrcPred is not aligned on a 4-byte boundary
+ * - iSrcPredStep or iDstReconStep is not a multiple of 4.
+ * - pDequantCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformResidualAndAdd (
+ const OMX_U8 *pSrcPred,
+ const OMX_S16 *pDequantCoeff,
+ OMX_U8 *pDstRecon,
+ OMX_U32 iSrcPredStep,
+ OMX_U32 iDstReconStep,
+ OMX_U8 bAC
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SubAndTransformQDQResidual (6.3.5.8.1)
+ *
+ * Description:
+ * This function subtracts the prediction signal from the original signal to
+ * produce the difference signal and then performs a 4x4 integer transform and
+ * quantization. The quantized transformed coefficients are stored as
+ * pDstQuantCoeff. This function can also output dequantized coefficients or
+ * unquantized DC coefficients optionally by setting the pointers
+ * pDstDeQuantCoeff, pDCCoeff.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to original signal. 4-byte alignment required.
+ * pSrcPred - Pointer to prediction signal. 4-byte alignment required.
+ * iSrcOrgStep - Step of the original signal buffer; must be a multiple of
+ * 4.
+ * iSrcPredStep - Step of the prediction signal buffer; must be a multiple
+ * of 4.
+ * pNumCoeff -Number of non-zero coefficients after quantization. If this
+ * parameter is not required, it is set to NULL.
+ * nThreshSAD - Zero-block early detection threshold. If this parameter is
+ * not required, it is set to 0.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ * bIntra - Indicates whether this is an INTRA block, either 1-INTRA or
+ * 0-INTER
+ *
+ * Output Arguments:
+ *
+ * pDstQuantCoeff - Pointer to the quantized transformed coefficients.
+ * 8-byte alignment required.
+ * pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients
+ * if this parameter is not equal to NULL. 8-byte alignment
+ * required.
+ * pDCCoeff - Pointer to the unquantized DC coefficient if this parameter
+ * is not equal to NULL.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff,
+ * pDstDeQuantCoeff, pDCCoeff
+ * - pSrcOrg is not aligned on a 4-byte boundary
+ * - pSrcPred is not aligned on a 4-byte boundary
+ * - iSrcOrgStep is not a multiple of 4
+ * - iSrcPredStep is not a multiple of 4
+ * - pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_SubAndTransformQDQResidual (
+ const OMX_U8 *pSrcOrg,
+ const OMX_U8 *pSrcPred,
+ OMX_U32 iSrcOrgStep,
+ OMX_U32 iSrcPredStep,
+ OMX_S16 *pDstQuantCoeff,
+ OMX_S16 *pDstDeQuantCoeff,
+ OMX_S16 *pDCCoeff,
+ OMX_S8 *pNumCoeff,
+ OMX_U32 nThreshSAD,
+ OMX_U32 iQP,
+ OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function: omxVCM4P10_GetVLCInfo (6.3.5.9.1)
+ *
+ * Description:
+ * This function extracts run-length encoding (RLE) information from the
+ * coefficient matrix. The results are returned in an OMXVCM4P10VLCInfo
+ * structure.
+ *
+ * Input Arguments:
+ *
+ * pSrcCoeff - pointer to the transform coefficient matrix. 8-byte
+ * alignment required.
+ * pScanMatrix - pointer to the scan order definition matrix. For a luma
+ * block the scan matrix should follow [ISO14496-10] section 8.5.4,
+ * and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13,
+ * 10, 7, 11, 14, 15. For a chroma block, the scan matrix should
+ * contain the values 0, 1, 2, 3.
+ * bAC - indicates presence of a DC coefficient; 0 = DC coefficient
+ * present, 1= DC coefficient absent.
+ * MaxNumCoef - specifies the number of coefficients contained in the
+ * transform coefficient matrix, pSrcCoeff. The value should be 16
+ * for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The
+ * value should be 4 for blocks of type CHROMADC.
+ *
+ * Output Arguments:
+ *
+ * pDstVLCInfo - pointer to structure that stores information for
+ * run-length coding.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcCoeff, pScanMatrix, pDstVLCInfo
+ * - pSrcCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_GetVLCInfo (
+ const OMX_S16 *pSrcCoeff,
+ const OMX_U8 *pScanMatrix,
+ OMX_U8 bAC,
+ OMX_U32 MaxNumCoef,
+ OMXVCM4P10VLCInfo*pDstVLCInfo
+);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /** end of #define _OMXVC_H_ */
+
+/** EOF */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_Average.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_Average.c
new file mode 100644
index 0000000..1e51077
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_Average.c
@@ -0,0 +1,78 @@
+/**
+ *
+ * File Name: armVCCOMM_Average.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate Average of two blocks if size iWidth X iHeight
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: armVCCOMM_Average
+ *
+ * Description:
+ * This function calculates the average of two blocks and stores the result.
+ *
+ * Remarks:
+ *
+ * [in] pPred0 Pointer to the top-left corner of reference block 0
+ * [in] pPred1 Pointer to the top-left corner of reference block 1
+ * [in] iPredStep0 Step of reference block 0
+ * [in] iPredStep1 Step of reference block 1
+ * [in] iDstStep Step of the destination buffer
+ * [in] iWidth Width of the blocks
+ * [in] iHeight Height of the blocks
+ * [out] pDstPred Pointer to the destination buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCCOMM_Average (
+ const OMX_U8* pPred0,
+ const OMX_U8* pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8* pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+)
+{
+ OMX_U32 x, y;
+
+ /* check for argument error */
+ armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr)
+
+ for (y = 0; y < iHeight; y++)
+ {
+ for (x = 0; x < iWidth; x++)
+ {
+ pDstPred [y * iDstStep + x] =
+ (OMX_U8)(((OMX_U32)pPred0 [y * iPredStep0 + x] +
+ pPred1 [y * iPredStep1 + x] + 1) >> 1);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_SAD.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_SAD.c
new file mode 100644
index 0000000..d41ac9a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/armVCCOMM_SAD.c
@@ -0,0 +1,75 @@
+/**
+ *
+ * File Name: armVCCOMM_SAD.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD for NxM blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: armVCCOMM_SAD
+ *
+ * Description:
+ * This function calculate the SAD for NxM blocks.
+ *
+ * Remarks:
+ *
+ * [in] pSrcOrg Pointer to the original block
+ * [in] iStepOrg Step of the original block buffer
+ * [in] pSrcRef Pointer to the reference block
+ * [in] iStepRef Step of the reference block buffer
+ * [in] iHeight Height of the block
+ * [in] iWidth Width of the block
+ * [out] pDstSAD Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCCOMM_SAD(
+ const OMX_U8* pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8* pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32* pDstSAD,
+ OMX_U32 iHeight,
+ OMX_U32 iWidth
+)
+{
+ OMX_INT x, y;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+
+ *pDstSAD = 0;
+ for (y = 0; y < iHeight; y++)
+ {
+ for (x = 0; x < iWidth; x++)
+ {
+ *pDstSAD += armAbs(pSrcOrg [(y * iStepOrg) + x] -
+ pSrcRef [(y * iStepRef) + x]);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_16x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_16x.c
new file mode 100644
index 0000000..6d1447e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_16x.c
@@ -0,0 +1,86 @@
+/**
+ *
+ * File Name: omxVCCOMM_Average_16x.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate Average of two 16x16 or 16x8 blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCCOMM_Average_16x (6.1.3.1.2)
+ *
+ * Description:
+ * This function calculates the average of two 16x16 or 16x8 blocks. The
+ * result is rounded according to (a+b+1)/2. The block average function can
+ * be used in conjunction with half-pixel interpolation to obtain quarter
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0
+ * iPredStep1 - Step of reference block 1
+ * iDstStep - Step of the destination buffer
+ * iHeight - Height of the blocks
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 16-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pPred0, pPred1, or
+ * pDstPred.
+ * - pDstPred is not aligned on a 16-byte boundary.
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 16.
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 16.
+ * - iDstStep <= 0 or iDstStep is not a multiple of 16.
+ * - iHeight is not 8 or 16.
+ *
+ */
+ OMXResult omxVCCOMM_Average_16x (
+ const OMX_U8* pPred0,
+ const OMX_U8* pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8* pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+)
+{
+ /* check for argument error */
+ armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iHeight != 8) && (iHeight != 16), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot16ByteAligned(pDstPred), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iPredStep0 == 0) || (iPredStep0 & 15), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iPredStep1 == 0) || (iPredStep1 & 15), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iDstStep == 0) || (iDstStep & 15), OMX_Sts_BadArgErr)
+
+ return armVCCOMM_Average
+ (pPred0, pPred1, iPredStep0, iPredStep1, pDstPred, iDstStep, 16, iHeight);
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_8x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_8x.c
new file mode 100644
index 0000000..17b1326
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Average_8x.c
@@ -0,0 +1,87 @@
+/**
+ *
+ * File Name: omxVCCOMM_Average_8x.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate Average of two 8x4 or 8x8 or 8x16 blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCCOMM_Average_8x (6.1.3.1.1)
+ *
+ * Description:
+ * This function calculates the average of two 8x4, 8x8, or 8x16 blocks. The
+ * result is rounded according to (a+b+1)/2. The block average function can
+ * be used in conjunction with half-pixel interpolation to obtain quarter
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0
+ * iPredStep1 - Step of reference block 1
+ * iDstStep - Step of the destination buffer.
+ * iHeight - Height of the blocks
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 8-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pPred0, pPred1, or
+ * pDstPred.
+ * - pDstPred is not aligned on an 8-byte boundary.
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 8.
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 8.
+ * - iDstStep <= 0 or iDstStep is not a multiple of 8.
+ * - iHeight is not 4, 8, or 16.
+ *
+ */
+ OMXResult omxVCCOMM_Average_8x (
+ const OMX_U8* pPred0,
+ const OMX_U8* pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8* pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+)
+{
+ /* check for argument error */
+ armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iPredStep0 == 0) || (iPredStep0 & 7), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iPredStep1 == 0) || (iPredStep1 & 7), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iDstStep == 0) || (iDstStep & 7), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iHeight != 4) && (iHeight != 8) && (iHeight != 16), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot8ByteAligned(pDstPred), OMX_Sts_BadArgErr)
+
+ return armVCCOMM_Average
+ (pPred0, pPred1, iPredStep0, iPredStep1, pDstPred, iDstStep, 8, iHeight);
+}
+
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c
new file mode 100644
index 0000000..e559adf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock.c
@@ -0,0 +1,88 @@
+/**
+ *
+ * File Name: omxVCCOMM_ComputeTextureErrorBlock.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module computing the error for a MB of size 8x8
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCCOMM_ComputeTextureErrorBlock (6.1.4.1.2)
+ *
+ * Description:
+ * Computes the texture error of the block.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the source plane. This should be aligned on an 8-byte
+ * boundary.
+ * srcStep - step of the source plane
+ * pSrcRef - pointer to the reference buffer, an 8x8 block. This should be
+ * aligned on an 8-byte boundary.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer, an 8x8 block. This should be
+ * aligned on an 8-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * pSrc, pSrcRef, pDst.
+ * - pSrc is not 8-byte aligned.
+ * - SrcStep <= 0 or srcStep is not a multiple of 8.
+ * - pSrcRef is not 8-byte aligned.
+ * - pDst is not 8-byte aligned
+ *
+ */
+
+OMXResult omxVCCOMM_ComputeTextureErrorBlock(
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_U8 *pSrcRef,
+ OMX_S16 * pDst
+)
+{
+
+ OMX_INT x, y, count;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pSrcRef), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf((srcStep <= 0) || (srcStep & 7), OMX_Sts_BadArgErr);
+
+ /* Calculate the error block */
+ for (y = 0, count = 0;
+ y < 8;
+ y++, pSrc += srcStep)
+ {
+ for (x = 0; x < 8; x++, count++)
+ {
+ pDst[count] = pSrc[x] - pSrcRef[count];
+ }
+ }
+
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c
new file mode 100644
index 0000000..c4731aa
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ComputeTextureErrorBlock_SAD.c
@@ -0,0 +1,93 @@
+/**
+ *
+ * File Name: omxVCCOMM_ComputeTextureErrorBlock_SAD.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module computing the error for a MB of size 8x8
+ *
+ */
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCCOMM_ComputeTextureErrorBlock_SAD (6.1.4.1.1)
+ *
+ * Description:
+ * Computes texture error of the block; also returns SAD.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the source plane; must be aligned on an 8-byte
+ * boundary.
+ * srcStep - step of the source plane
+ * pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned
+ * on an 8-byte boundary.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer, an 8x8 block; must be aligned
+ * on an 8-byte boundary.
+ * pDstSAD - pointer to the Sum of Absolute Differences (SAD) value
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following
+ * pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD.
+ * - pSrc is not 8-byte aligned.
+ * - SrcStep <= 0 or srcStep is not a multiple of 8.
+ * - pSrcRef is not 8-byte aligned.
+ * - pDst is not 8-byte aligned.
+ *
+ */
+
+OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD(
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_U8 *pSrcRef,
+ OMX_S16 * pDst,
+ OMX_INT *pDstSAD
+)
+{
+
+ OMX_INT x, y, count;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pSrcRef), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf((srcStep <= 0) || (srcStep & 7), OMX_Sts_BadArgErr);
+
+ /* Calculate the error block */
+ for (y = 0, count = 0, *pDstSAD = 0;
+ y < 8;
+ y++, pSrc += srcStep)
+ {
+ for (x = 0; x < 8; x++, count++)
+ {
+ pDst[count] = pSrc[x] - pSrcRef[count];
+ *pDstSAD += armAbs(pDst[count]);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy16x16.c
new file mode 100644
index 0000000..4857024
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy16x16.c
@@ -0,0 +1,79 @@
+/**
+ *
+ * File Name: omxVCCOMM_Copy16x16.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * MPEG4 16x16 Copy module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCCOMM_Copy16x16 (6.1.3.3.2)
+ *
+ * Description:
+ * Copies the reference 16x16 macroblock to the current macroblock.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the reference macroblock in the source frame; must be
+ * aligned on a 16-byte boundary.
+ * step - distance between the starts of consecutive lines in the reference
+ * frame, in bytes; must be a multiple of 16 and must be larger
+ * than or equal to 16.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination macroblock; must be aligned on a
+ * 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pSrc, pDst
+ * - one or more of the following pointers is not aligned on a 16-byte
+ * boundary: pSrc, pDst
+ * - step <16 or step is not a multiple of 16.
+ *
+ */
+
+OMXResult omxVCCOMM_Copy16x16(
+ const OMX_U8 *pSrc,
+ OMX_U8 *pDst,
+ OMX_INT step)
+ {
+ /* Definitions and Initializations*/
+
+ OMX_INT count,index, x, y;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((step < 16) || (step % 16)), OMX_Sts_BadArgErr);
+
+
+ /* Copying the ref 16x16 blk to the curr blk */
+ for (y = 0, count = 0, index = 0; y < 16; y++, count = count + step - 16)
+ {
+ for (x = 0; x < 16; x++, count++, index++)
+ {
+ pDst[index] = pSrc[count];
+ }
+ }
+ return OMX_Sts_NoErr;
+ }
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy8x8.c
new file mode 100644
index 0000000..a4f9dde
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_Copy8x8.c
@@ -0,0 +1,79 @@
+/**
+ *
+ * File Name: omxVCCOMM_Copy8x8.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * MPEG4 8x8 Copy module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCCOMM_Copy8x8 (6.1.3.3.1)
+ *
+ * Description:
+ * Copies the reference 8x8 block to the current block.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the reference block in the source frame; must be
+ * aligned on an 8-byte boundary.
+ * step - distance between the starts of consecutive lines in the reference
+ * frame, in bytes; must be a multiple of 8 and must be larger than
+ * or equal to 8.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination block; must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pSrc, pDst
+ * - one or more of the following pointers is not aligned on an 8-byte
+ * boundary: pSrc, pDst
+ * - step <8 or step is not a multiple of 8.
+ *
+ */
+
+OMXResult omxVCCOMM_Copy8x8(
+ const OMX_U8 *pSrc,
+ OMX_U8 *pDst,
+ OMX_INT step)
+ {
+ /* Definitions and Initializations*/
+
+ OMX_INT count,index, x, y;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((step < 8) || (step % 8)), OMX_Sts_BadArgErr);
+
+
+ /* Copying the ref 8x8 blk to the curr blk */
+ for (y = 0, count = 0, index = 0; y < 8; y++, count = count + step - 8)
+ {
+ for (x = 0; x < 8; x++, count++, index++)
+ {
+ pDst[index] = pSrc[count];
+ }
+ }
+ return OMX_Sts_NoErr;
+ }
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ExpandFrame_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ExpandFrame_I.c
new file mode 100644
index 0000000..9536df7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_ExpandFrame_I.c
@@ -0,0 +1,126 @@
+/**
+ *
+ * File Name: omxVCCOMM_ExpandFrame_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will Expand Frame boundary pixels into Plane
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCCOMM_ExpandFrame_I (6.1.3.2.1)
+ *
+ * Description:
+ * This function expands a reconstructed frame in-place. The unexpanded
+ * source frame should be stored in a plane buffer with sufficient space
+ * pre-allocated for edge expansion, and the input frame should be located in
+ * the plane buffer center. This function executes the pixel expansion by
+ * replicating source frame edge pixel intensities in the empty pixel
+ * locations (expansion region) between the source frame edge and the plane
+ * buffer edge. The width/height of the expansion regions on the
+ * horizontal/vertical edges is controlled by the parameter iExpandPels.
+ *
+ * Input Arguments:
+ *
+ * pSrcDstPlane - pointer to the top-left corner of the frame to be
+ * expanded; must be aligned on an 8-byte boundary.
+ * iFrameWidth - frame width; must be a multiple of 8.
+ * iFrameHeight -frame height; must be a multiple of 8.
+ * iExpandPels - number of pixels to be expanded in the horizontal and
+ * vertical directions; must be a multiple of 8.
+ * iPlaneStep - distance, in bytes, between the start of consecutive lines
+ * in the plane buffer; must be larger than or equal to
+ * (iFrameWidth + 2 * iExpandPels).
+ *
+ * Output Arguments:
+ *
+ * pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the
+ * top-left corner of the plane); must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - pSrcDstPlane is NULL.
+ * - pSrcDstPlane is not aligned on an 8-byte boundary.
+ * - one of the following parameters is either equal to zero or is a
+ * non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or
+ * iExpandPels.
+ * - iPlaneStep < (iFrameWidth + 2 * iExpandPels).
+ *
+ */
+OMXResult omxVCCOMM_ExpandFrame_I(
+ OMX_U8* pSrcDstPlane,
+ OMX_U32 iFrameWidth,
+ OMX_U32 iFrameHeight,
+ OMX_U32 iExpandPels,
+ OMX_U32 iPlaneStep
+)
+{
+ OMX_INT x, y;
+ OMX_U8* pLeft;
+ OMX_U8* pRight;
+ OMX_U8* pTop;
+ OMX_U8* pBottom;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrcDstPlane == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot8ByteAligned(pSrcDstPlane), OMX_Sts_BadArgErr)
+ armRetArgErrIf(iFrameWidth == 0 || iFrameWidth & 7, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iFrameHeight== 0 || iFrameHeight & 7, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iExpandPels == 0 || iExpandPels & 7, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iPlaneStep == 0 || iPlaneStep & 7, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iPlaneStep < (iFrameWidth + 2 * iExpandPels),
+ OMX_Sts_BadArgErr)
+
+ /* Top and Bottom */
+ pTop = pSrcDstPlane - (iExpandPels * iPlaneStep);
+ pBottom = pSrcDstPlane + (iFrameHeight * iPlaneStep);
+
+ for (y = 0; y < (OMX_INT)iExpandPels; y++)
+ {
+ for (x = 0; x < (OMX_INT)iFrameWidth; x++)
+ {
+ pTop [y * iPlaneStep + x] =
+ pSrcDstPlane [x];
+ pBottom [y * iPlaneStep + x] =
+ pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x];
+ }
+ }
+
+ /* Left, Right and Corners */
+ pLeft = pSrcDstPlane - iExpandPels;
+ pRight = pSrcDstPlane + iFrameWidth;
+
+ for (y = -(OMX_INT)iExpandPels; y < (OMX_INT)(iFrameHeight + iExpandPels); y++)
+ {
+ for (x = 0; x < (OMX_INT)iExpandPels; x++)
+ {
+ pLeft [y * iPlaneStep + x] =
+ pSrcDstPlane [y * iPlaneStep + 0];
+ pRight [y * iPlaneStep + x] =
+ pSrcDstPlane [y * iPlaneStep + (iFrameWidth - 1)];
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_LimitMVToRect.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_LimitMVToRect.c
new file mode 100644
index 0000000..af04582
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_LimitMVToRect.c
@@ -0,0 +1,81 @@
+/**
+ *
+ * File Name: omxVCCOMM_LimitMVToRect.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for limiting the MV
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCCOMM_LimitMVToRect (6.1.4.1.3)
+ *
+ * Description:
+ * Limits the motion vector associated with the current block/macroblock to
+ * prevent the motion compensated block/macroblock from moving outside a
+ * bounding rectangle as shown in Figure 6-1.
+ *
+ * Input Arguments:
+ *
+ * pSrcMV - pointer to the motion vector associated with the current block
+ * or macroblock
+ * pRectVOPRef - pointer to the bounding rectangle
+ * Xcoord, Ycoord - coordinates of the current block or macroblock
+ * size - size of the current block or macroblock; must be equal to 8 or
+ * 16.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to the limited motion vector
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcMV, pDstMV, or pRectVOPRef.
+ * - size is not equal to either 8 or 16.
+ * - the width or height of the bounding rectangle is less than
+ * twice the block size.
+ */
+OMXResult omxVCCOMM_LimitMVToRect(
+ const OMXVCMotionVector * pSrcMV,
+ OMXVCMotionVector *pDstMV,
+ const OMXRect * pRectVOPRef,
+ OMX_INT Xcoord,
+ OMX_INT Ycoord,
+ OMX_INT size
+)
+{
+ /* Argument error checks */
+ armRetArgErrIf(pSrcMV == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDstMV == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pRectVOPRef == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((size != 8) && (size != 16), OMX_Sts_BadArgErr);
+ armRetArgErrIf((pRectVOPRef->width < (2* size)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((pRectVOPRef->height < (2* size)), OMX_Sts_BadArgErr);
+
+ pDstMV->dx = armMin (armMax (pSrcMV->dx, 2*pRectVOPRef->x - Xcoord),
+ (2*pRectVOPRef->x + pRectVOPRef->width - Xcoord - size));
+ pDstMV->dy = armMin (armMax (pSrcMV->dy, 2*pRectVOPRef->y - Ycoord),
+ (2*pRectVOPRef->y + pRectVOPRef->height - Ycoord - size));
+
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_16x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_16x.c
new file mode 100644
index 0000000..0f0cedb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_16x.c
@@ -0,0 +1,80 @@
+/**
+ *
+ * File Name: omxVCCOMM_SAD_16x.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD for 16x16 and 16x8 blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCCOMM_SAD_16x (6.1.4.1.4)
+ *
+ * Description:
+ * This function calculates the SAD for 16x16 and 16x8 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 16-byte
+ * boundary.
+ * iStepOrg - Step of the original block buffer
+ * pSrcRef - Pointer to the reference block
+ * iStepRef - Step of the reference block buffer
+ * iHeight - Height of the block
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pDstSAD, or pSrcRef
+ * - pSrcOrg is not 16-byte aligned.
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 16
+ * - iStepRef <= 0 or iStepRef is not a multiple of 16
+ * - iHeight is not 8 or 16
+ *
+ */
+OMXResult omxVCCOMM_SAD_16x(
+ const OMX_U8* pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8* pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32* pDstSAD,
+ OMX_U32 iHeight
+)
+{
+ /* check for argument error */
+ armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iHeight != 16) && (iHeight != 8), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot16ByteAligned(pSrcOrg), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 15), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iStepRef == 0) || (iStepRef & 15), OMX_Sts_BadArgErr)
+
+ return armVCCOMM_SAD
+ (pSrcOrg, iStepOrg, pSrcRef, iStepRef, pDstSAD, iHeight, 16);
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_8x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_8x.c
new file mode 100644
index 0000000..1421d99
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/comm/src/omxVCCOMM_SAD_8x.c
@@ -0,0 +1,80 @@
+/**
+ *
+ * File Name: omxVCCOMM_SAD_8x.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD for 8x16, 8x8, 8x4 blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCCOMM_SAD_8x (6.1.4.1.5)
+ *
+ * Description:
+ * This function calculates the SAD for 8x16, 8x8, 8x4 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 8-byte
+ * boundary.
+ * iStepOrg - Step of the original block buffer
+ * pSrcRef - Pointer to the reference block
+ * iStepRef - Step of the reference block buffer
+ * iHeight - Height of the block
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pDstSAD, or pSrcRef
+ * - pSrcOrg is not 8-byte aligned.
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 8
+ * - iStepRef <= 0 or iStepRef is not a multiple of 8
+ * - iHeight is not 4, 8 or 16
+ *
+ */
+OMXResult omxVCCOMM_SAD_8x(
+ const OMX_U8* pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8* pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32* pDstSAD,
+ OMX_U32 iHeight
+)
+{
+ /* check for argument error */
+ armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iHeight != 16) && (iHeight != 8) && (iHeight != 4), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot8ByteAligned(pSrcOrg), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 7), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iStepRef == 0) || (iStepRef & 7), OMX_Sts_BadArgErr)
+
+ return armVCCOMM_SAD
+ (pSrcOrg, iStepOrg, pSrcRef, iStepRef, pDstSAD, iHeight, 8);
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/api/armVCM4P10_CAVLCTables.h
new file mode 100644
index 0000000..8d18a8f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/api/armVCM4P10_CAVLCTables.h
@@ -0,0 +1,34 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_CAVLCTables.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * ----------------------------------------------------------------
+ * File: armVCM4P10_CAVLCTables.h
+ * ----------------------------------------------------------------
+ *
+ * Header file for ARM implementation of OpenMAX VCM4P10
+ *
+ */
+
+#ifndef ARMVCM4P10_CAVLCTABLES_H
+#define ARMVCM4P10_CAVLCTABLES_H
+
+/* CAVLC tables */
+
+extern const OMX_U8 armVCM4P10_CAVLCTrailingOnes[62];
+extern const OMX_U8 armVCM4P10_CAVLCTotalCoeff[62];
+extern const ARM_VLC32 *armVCM4P10_CAVLCCoeffTokenTables[5];
+extern const ARM_VLC32 armVCM4P10_CAVLCLevelPrefix[17];
+extern const ARM_VLC32 *armVCM4P10_CAVLCTotalZeroTables[15];
+extern const ARM_VLC32 *armVCM4P10_CAVLCTotalZeros2x2Tables[3];
+extern const ARM_VLC32 *armVCM4P10_CAVLCRunBeforeTables[7];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CAVLCTables.c
new file mode 100644
index 0000000..f4e36ad
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CAVLCTables.c
@@ -0,0 +1,703 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_CAVLCTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * CAVLC tables for H.264
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM_Bitstream.h"
+#include "armVC.h"
+#include "armVCM4P10_CAVLCTables.h"
+
+/* Tables mapping a code to TrailingOnes and TotalCoeff */
+
+const OMX_U8 armVCM4P10_CAVLCTrailingOnes[62] = {
+ 0,
+ 0, 1,
+ 0, 1, 2,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3,
+ 0, 1, 2, 3
+};
+
+const OMX_U8 armVCM4P10_CAVLCTotalCoeff[62] = {
+ 0,
+ 1, 1,
+ 2, 2, 2,
+ 3, 3, 3, 3,
+ 4, 4, 4, 4,
+ 5, 5, 5, 5,
+ 6, 6, 6, 6,
+ 7, 7, 7, 7,
+ 8, 8, 8, 8,
+ 9, 9, 9, 9,
+ 10, 10, 10, 10,
+ 11, 11, 11, 11,
+ 12, 12, 12, 12,
+ 13, 13, 13, 13,
+ 14, 14, 14, 14,
+ 15, 15, 15, 15,
+ 16, 16, 16, 16
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken0[63] = {
+ { 1, 0x0001 },
+ { 6, 0x0005 },
+ { 2, 0x0001 },
+ { 8, 0x0007 },
+ { 6, 0x0004 },
+ { 3, 0x0001 },
+ { 9, 0x0007 },
+ { 8, 0x0006 },
+ { 7, 0x0005 },
+ { 5, 0x0003 },
+ { 10, 0x0007 },
+ { 9, 0x0006 },
+ { 8, 0x0005 },
+ { 6, 0x0003 },
+ { 11, 0x0007 },
+ { 10, 0x0006 },
+ { 9, 0x0005 },
+ { 7, 0x0004 },
+ { 13, 0x000f },
+ { 11, 0x0006 },
+ { 10, 0x0005 },
+ { 8, 0x0004 },
+ { 13, 0x000b },
+ { 13, 0x000e },
+ { 11, 0x0005 },
+ { 9, 0x0004 },
+ { 13, 0x0008 },
+ { 13, 0x000a },
+ { 13, 0x000d },
+ { 10, 0x0004 },
+ { 14, 0x000f },
+ { 14, 0x000e },
+ { 13, 0x0009 },
+ { 11, 0x0004 },
+ { 14, 0x000b },
+ { 14, 0x000a },
+ { 14, 0x000d },
+ { 13, 0x000c },
+ { 15, 0x000f },
+ { 15, 0x000e },
+ { 14, 0x0009 },
+ { 14, 0x000c },
+ { 15, 0x000b },
+ { 15, 0x000a },
+ { 15, 0x000d },
+ { 14, 0x0008 },
+ { 16, 0x000f },
+ { 15, 0x0001 },
+ { 15, 0x0009 },
+ { 15, 0x000c },
+ { 16, 0x000b },
+ { 16, 0x000e },
+ { 16, 0x000d },
+ { 15, 0x0008 },
+ { 16, 0x0007 },
+ { 16, 0x000a },
+ { 16, 0x0009 },
+ { 16, 0x000c },
+ { 16, 0x0004 },
+ { 16, 0x0006 },
+ { 16, 0x0005 },
+ { 16, 0x0008 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken1[63] = {
+ { 2, 0x0003 },
+ { 6, 0x000b },
+ { 2, 0x0002 },
+ { 6, 0x0007 },
+ { 5, 0x0007 },
+ { 3, 0x0003 },
+ { 7, 0x0007 },
+ { 6, 0x000a },
+ { 6, 0x0009 },
+ { 4, 0x0005 },
+ { 8, 0x0007 },
+ { 6, 0x0006 },
+ { 6, 0x0005 },
+ { 4, 0x0004 },
+ { 8, 0x0004 },
+ { 7, 0x0006 },
+ { 7, 0x0005 },
+ { 5, 0x0006 },
+ { 9, 0x0007 },
+ { 8, 0x0006 },
+ { 8, 0x0005 },
+ { 6, 0x0008 },
+ { 11, 0x000f },
+ { 9, 0x0006 },
+ { 9, 0x0005 },
+ { 6, 0x0004 },
+ { 11, 0x000b },
+ { 11, 0x000e },
+ { 11, 0x000d },
+ { 7, 0x0004 },
+ { 12, 0x000f },
+ { 11, 0x000a },
+ { 11, 0x0009 },
+ { 9, 0x0004 },
+ { 12, 0x000b },
+ { 12, 0x000e },
+ { 12, 0x000d },
+ { 11, 0x000c },
+ { 12, 0x0008 },
+ { 12, 0x000a },
+ { 12, 0x0009 },
+ { 11, 0x0008 },
+ { 13, 0x000f },
+ { 13, 0x000e },
+ { 13, 0x000d },
+ { 12, 0x000c },
+ { 13, 0x000b },
+ { 13, 0x000a },
+ { 13, 0x0009 },
+ { 13, 0x000c },
+ { 13, 0x0007 },
+ { 14, 0x000b },
+ { 13, 0x0006 },
+ { 13, 0x0008 },
+ { 14, 0x0009 },
+ { 14, 0x0008 },
+ { 14, 0x000a },
+ { 13, 0x0001 },
+ { 14, 0x0007 },
+ { 14, 0x0006 },
+ { 14, 0x0005 },
+ { 14, 0x0004 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken2[63] = {
+ { 4, 0x000f },
+ { 6, 0x000f },
+ { 4, 0x000e },
+ { 6, 0x000b },
+ { 5, 0x000f },
+ { 4, 0x000d },
+ { 6, 0x0008 },
+ { 5, 0x000c },
+ { 5, 0x000e },
+ { 4, 0x000c },
+ { 7, 0x000f },
+ { 5, 0x000a },
+ { 5, 0x000b },
+ { 4, 0x000b },
+ { 7, 0x000b },
+ { 5, 0x0008 },
+ { 5, 0x0009 },
+ { 4, 0x000a },
+ { 7, 0x0009 },
+ { 6, 0x000e },
+ { 6, 0x000d },
+ { 4, 0x0009 },
+ { 7, 0x0008 },
+ { 6, 0x000a },
+ { 6, 0x0009 },
+ { 4, 0x0008 },
+ { 8, 0x000f },
+ { 7, 0x000e },
+ { 7, 0x000d },
+ { 5, 0x000d },
+ { 8, 0x000b },
+ { 8, 0x000e },
+ { 7, 0x000a },
+ { 6, 0x000c },
+ { 9, 0x000f },
+ { 8, 0x000a },
+ { 8, 0x000d },
+ { 7, 0x000c },
+ { 9, 0x000b },
+ { 9, 0x000e },
+ { 8, 0x0009 },
+ { 8, 0x000c },
+ { 9, 0x0008 },
+ { 9, 0x000a },
+ { 9, 0x000d },
+ { 8, 0x0008 },
+ { 10, 0x000d },
+ { 9, 0x0007 },
+ { 9, 0x0009 },
+ { 9, 0x000c },
+ { 10, 0x0009 },
+ { 10, 0x000c },
+ { 10, 0x000b },
+ { 10, 0x000a },
+ { 10, 0x0005 },
+ { 10, 0x0008 },
+ { 10, 0x0007 },
+ { 10, 0x0006 },
+ { 10, 0x0001 },
+ { 10, 0x0004 },
+ { 10, 0x0003 },
+ { 10, 0x0002 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken3[63] = {
+ { 6, 0x0003 },
+ { 6, 0x0000 },
+ { 6, 0x0001 },
+ { 6, 0x0004 },
+ { 6, 0x0005 },
+ { 6, 0x0006 },
+ { 6, 0x0008 },
+ { 6, 0x0009 },
+ { 6, 0x000a },
+ { 6, 0x000b },
+ { 6, 0x000c },
+ { 6, 0x000d },
+ { 6, 0x000e },
+ { 6, 0x000f },
+ { 6, 0x0010 },
+ { 6, 0x0011 },
+ { 6, 0x0012 },
+ { 6, 0x0013 },
+ { 6, 0x0014 },
+ { 6, 0x0015 },
+ { 6, 0x0016 },
+ { 6, 0x0017 },
+ { 6, 0x0018 },
+ { 6, 0x0019 },
+ { 6, 0x001a },
+ { 6, 0x001b },
+ { 6, 0x001c },
+ { 6, 0x001d },
+ { 6, 0x001e },
+ { 6, 0x001f },
+ { 6, 0x0020 },
+ { 6, 0x0021 },
+ { 6, 0x0022 },
+ { 6, 0x0023 },
+ { 6, 0x0024 },
+ { 6, 0x0025 },
+ { 6, 0x0026 },
+ { 6, 0x0027 },
+ { 6, 0x0028 },
+ { 6, 0x0029 },
+ { 6, 0x002a },
+ { 6, 0x002b },
+ { 6, 0x002c },
+ { 6, 0x002d },
+ { 6, 0x002e },
+ { 6, 0x002f },
+ { 6, 0x0030 },
+ { 6, 0x0031 },
+ { 6, 0x0032 },
+ { 6, 0x0033 },
+ { 6, 0x0034 },
+ { 6, 0x0035 },
+ { 6, 0x0036 },
+ { 6, 0x0037 },
+ { 6, 0x0038 },
+ { 6, 0x0039 },
+ { 6, 0x003a },
+ { 6, 0x003b },
+ { 6, 0x003c },
+ { 6, 0x003d },
+ { 6, 0x003e },
+ { 6, 0x003f },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCCoeffToken4[15] = {
+ { 2, 0x0001 },
+ { 6, 0x0007 },
+ { 1, 0x0001 },
+ { 6, 0x0004 },
+ { 6, 0x0006 },
+ { 3, 0x0001 },
+ { 6, 0x0003 },
+ { 7, 0x0003 },
+ { 7, 0x0002 },
+ { 6, 0x0005 },
+ { 6, 0x0002 },
+ { 8, 0x0003 },
+ { 8, 0x0002 },
+ { 7, 0x0000 },
+ { 0, 0x0000 }
+};
+
+
+const ARM_VLC32 *armVCM4P10_CAVLCCoeffTokenTables[5] = {
+ armVCM4P10_CAVLCCoeffToken0,
+ armVCM4P10_CAVLCCoeffToken1,
+ armVCM4P10_CAVLCCoeffToken2,
+ armVCM4P10_CAVLCCoeffToken3,
+ armVCM4P10_CAVLCCoeffToken4
+};
+
+/* Table for level_prefix */
+
+const ARM_VLC32 armVCM4P10_CAVLCLevelPrefix[17] = {
+ { 1, 1},
+ { 2, 1},
+ { 3, 1},
+ { 4, 1},
+ { 5, 1},
+ { 6, 1},
+ { 7, 1},
+ { 8, 1},
+ { 9, 1},
+ { 10, 1},
+ { 11, 1},
+ { 12, 1},
+ { 13, 1},
+ { 14, 1},
+ { 15, 1},
+ { 16, 1},
+ { 0, 0}
+};
+
+/* Tables for total_zeros */
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros1[17] = {
+ { 1, 0x0001 },
+ { 3, 0x0003 },
+ { 3, 0x0002 },
+ { 4, 0x0003 },
+ { 4, 0x0002 },
+ { 5, 0x0003 },
+ { 5, 0x0002 },
+ { 6, 0x0003 },
+ { 6, 0x0002 },
+ { 7, 0x0003 },
+ { 7, 0x0002 },
+ { 8, 0x0003 },
+ { 8, 0x0002 },
+ { 9, 0x0003 },
+ { 9, 0x0002 },
+ { 9, 0x0001 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2[16] = {
+ { 3, 0x0007 },
+ { 3, 0x0006 },
+ { 3, 0x0005 },
+ { 3, 0x0004 },
+ { 3, 0x0003 },
+ { 4, 0x0005 },
+ { 4, 0x0004 },
+ { 4, 0x0003 },
+ { 4, 0x0002 },
+ { 5, 0x0003 },
+ { 5, 0x0002 },
+ { 6, 0x0003 },
+ { 6, 0x0002 },
+ { 6, 0x0001 },
+ { 6, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros3[15] = {
+ { 4, 0x0005 },
+ { 3, 0x0007 },
+ { 3, 0x0006 },
+ { 3, 0x0005 },
+ { 4, 0x0004 },
+ { 4, 0x0003 },
+ { 3, 0x0004 },
+ { 3, 0x0003 },
+ { 4, 0x0002 },
+ { 5, 0x0003 },
+ { 5, 0x0002 },
+ { 6, 0x0001 },
+ { 5, 0x0001 },
+ { 6, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros4[14] = {
+ { 5, 0x0003 },
+ { 3, 0x0007 },
+ { 4, 0x0005 },
+ { 4, 0x0004 },
+ { 3, 0x0006 },
+ { 3, 0x0005 },
+ { 3, 0x0004 },
+ { 4, 0x0003 },
+ { 3, 0x0003 },
+ { 4, 0x0002 },
+ { 5, 0x0002 },
+ { 5, 0x0001 },
+ { 5, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros5[13] = {
+ { 4, 0x0005 },
+ { 4, 0x0004 },
+ { 4, 0x0003 },
+ { 3, 0x0007 },
+ { 3, 0x0006 },
+ { 3, 0x0005 },
+ { 3, 0x0004 },
+ { 3, 0x0003 },
+ { 4, 0x0002 },
+ { 5, 0x0001 },
+ { 4, 0x0001 },
+ { 5, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros6[12] = {
+ { 6, 0x0001 },
+ { 5, 0x0001 },
+ { 3, 0x0007 },
+ { 3, 0x0006 },
+ { 3, 0x0005 },
+ { 3, 0x0004 },
+ { 3, 0x0003 },
+ { 3, 0x0002 },
+ { 4, 0x0001 },
+ { 3, 0x0001 },
+ { 6, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros7[11] = {
+ { 6, 0x0001 },
+ { 5, 0x0001 },
+ { 3, 0x0005 },
+ { 3, 0x0004 },
+ { 3, 0x0003 },
+ { 2, 0x0003 },
+ { 3, 0x0002 },
+ { 4, 0x0001 },
+ { 3, 0x0001 },
+ { 6, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros8[10] = {
+ { 6, 0x0001 },
+ { 4, 0x0001 },
+ { 5, 0x0001 },
+ { 3, 0x0003 },
+ { 2, 0x0003 },
+ { 2, 0x0002 },
+ { 3, 0x0002 },
+ { 3, 0x0001 },
+ { 6, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros9[9] = {
+ { 6, 0x0001 },
+ { 6, 0x0000 },
+ { 4, 0x0001 },
+ { 2, 0x0003 },
+ { 2, 0x0002 },
+ { 3, 0x0001 },
+ { 2, 0x0001 },
+ { 5, 0x0001 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros10[8] = {
+ { 5, 0x0001 },
+ { 5, 0x0000 },
+ { 3, 0x0001 },
+ { 2, 0x0003 },
+ { 2, 0x0002 },
+ { 2, 0x0001 },
+ { 4, 0x0001 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros11[7] = {
+ { 4, 0x0000 },
+ { 4, 0x0001 },
+ { 3, 0x0001 },
+ { 3, 0x0002 },
+ { 1, 0x0001 },
+ { 3, 0x0003 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros12[6] = {
+ { 4, 0x0000 },
+ { 4, 0x0001 },
+ { 2, 0x0001 },
+ { 1, 0x0001 },
+ { 3, 0x0001 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros13[5] = {
+ { 3, 0x0000 },
+ { 3, 0x0001 },
+ { 1, 0x0001 },
+ { 2, 0x0001 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros14[4] = {
+ { 2, 0x0000 },
+ { 2, 0x0001 },
+ { 1, 0x0001 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros15[3] = {
+ { 1, 0x0000 },
+ { 1, 0x0001 },
+ { 0, 0x0000 }
+};
+
+const ARM_VLC32 *armVCM4P10_CAVLCTotalZeroTables[15] = {
+ armVCM4P10_CAVLCTotalZeros1,
+ armVCM4P10_CAVLCTotalZeros2,
+ armVCM4P10_CAVLCTotalZeros3,
+ armVCM4P10_CAVLCTotalZeros4,
+ armVCM4P10_CAVLCTotalZeros5,
+ armVCM4P10_CAVLCTotalZeros6,
+ armVCM4P10_CAVLCTotalZeros7,
+ armVCM4P10_CAVLCTotalZeros8,
+ armVCM4P10_CAVLCTotalZeros9,
+ armVCM4P10_CAVLCTotalZeros10,
+ armVCM4P10_CAVLCTotalZeros11,
+ armVCM4P10_CAVLCTotalZeros12,
+ armVCM4P10_CAVLCTotalZeros13,
+ armVCM4P10_CAVLCTotalZeros14,
+ armVCM4P10_CAVLCTotalZeros15
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2x2_1[5] = {
+ { 1, 1 },
+ { 2, 1 },
+ { 3, 1 },
+ { 3, 0 },
+ { 0, 0 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2x2_2[4] = {
+ { 1, 1 },
+ { 2, 1 },
+ { 2, 0 },
+ { 0, 0 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCTotalZeros2x2_3[3] = {
+ { 1, 1 },
+ { 1, 0 },
+ { 0, 0 }
+};
+
+const ARM_VLC32 *armVCM4P10_CAVLCTotalZeros2x2Tables[3] = {
+ armVCM4P10_CAVLCTotalZeros2x2_1,
+ armVCM4P10_CAVLCTotalZeros2x2_2,
+ armVCM4P10_CAVLCTotalZeros2x2_3
+};
+
+
+/* Tables for run_before */
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore1[3] = {
+ { 1, 0x0001 },
+ { 1, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore2[4] = {
+ { 1, 0x0001 },
+ { 2, 0x0001 },
+ { 2, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore3[5] = {
+ { 2, 0x0003 },
+ { 2, 0x0002 },
+ { 2, 0x0001 },
+ { 2, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore4[6] = {
+ { 2, 0x0003 },
+ { 2, 0x0002 },
+ { 2, 0x0001 },
+ { 3, 0x0001 },
+ { 3, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore5[7] = {
+ { 2, 0x0003 },
+ { 2, 0x0002 },
+ { 3, 0x0003 },
+ { 3, 0x0002 },
+ { 3, 0x0001 },
+ { 3, 0x0000 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore6[8] = {
+ { 2, 0x0003 },
+ { 3, 0x0000 },
+ { 3, 0x0001 },
+ { 3, 0x0003 },
+ { 3, 0x0002 },
+ { 3, 0x0005 },
+ { 3, 0x0004 },
+ { 0, 0x0000 }
+};
+
+static const ARM_VLC32 armVCM4P10_CAVLCRunBefore7[16] = {
+ { 3, 0x0007 },
+ { 3, 0x0006 },
+ { 3, 0x0005 },
+ { 3, 0x0004 },
+ { 3, 0x0003 },
+ { 3, 0x0002 },
+ { 3, 0x0001 },
+ { 4, 0x0001 },
+ { 5, 0x0001 },
+ { 6, 0x0001 },
+ { 7, 0x0001 },
+ { 8, 0x0001 },
+ { 9, 0x0001 },
+ { 10, 0x0001 },
+ { 11, 0x0001 },
+ { 0, 0x0000 }
+};
+
+const ARM_VLC32 *armVCM4P10_CAVLCRunBeforeTables[7] = {
+ armVCM4P10_CAVLCRunBefore1,
+ armVCM4P10_CAVLCRunBefore2,
+ armVCM4P10_CAVLCRunBefore3,
+ armVCM4P10_CAVLCRunBefore4,
+ armVCM4P10_CAVLCRunBefore5,
+ armVCM4P10_CAVLCRunBefore6,
+ armVCM4P10_CAVLCRunBefore7
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c
new file mode 100644
index 0000000..e4bedc2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_CompareMotionCostToMV.c
@@ -0,0 +1,133 @@
+/**
+ *
+ * File Name: armVCM4P10_CompareMotionCostToMV.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for comparing motion vectors and SAD's to decide
+ * the best MV and SAD
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P10_ExpGolBitsUsed
+ *
+ * Description:
+ * Performs calculating Exp-Golomb code length for a given values
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] val Signed number for which Exp-Golomb code length has
+ * to be calculated
+ *
+ * Return Value:
+ * Returns the length of the Exp-Golomb code for val
+ */
+
+static OMX_U16 armVCM4P10_ExpGolBitsUsed (OMX_S16 val)
+{
+ OMX_U16 sizeCodeNum, codeNum;
+
+ /* Mapping val to codeNum */
+ codeNum = armAbs (val);
+ if (val > 0)
+ {
+ codeNum = (2 * codeNum) - 1;
+ }
+ else
+ {
+ codeNum = 2 * codeNum;
+ }
+
+ /* Size of the exp-golomb code */
+ sizeCodeNum = (2 * armLogSize (codeNum + 1)) - 1;
+
+ return sizeCodeNum;
+}
+
+
+/**
+ * Function: armVCM4P10_CompareMotionCostToMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and Motion cost to decide the
+ * best MV and best MC
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] mvX x coordinate of the candidate motion vector in 1/4 pel units
+ * [in] mvY y coordinate of the candidate motion vector in 1/4 pel units
+ * [in] diffMV differential MV
+ * [in] candSAD Candidate SAD
+ * [in] bestMV Best MV, contains best MV till the previous interation.
+ * [in] nLamda Lamda factor; used to compute motion cost
+ * [in] *pBestCost Contains the current best motion cost.
+ * [out] *pBestCost pBestCost Motion cost will be associated with the best MV
+ * after judgement;
+ * computed as SAD+Lamda*BitsUsedByMV, if the candCost is less
+ * than the best cost passed then the *pBestCost will be equal to candCost
+ * [out] bestMV Finally will have the best MV after the judgement.
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current motion cost is the best
+ * 0 to indicate that it is NOT the best motion cost
+ */
+
+OMX_INT armVCM4P10_CompareMotionCostToMV (
+ OMX_S16 mvX,
+ OMX_S16 mvY,
+ OMXVCMotionVector diffMV,
+ OMX_INT candSAD,
+ OMXVCMotionVector *bestMV,
+ OMX_U32 nLamda,
+ OMX_S32 *pBestCost
+)
+{
+ OMX_S32 candCost;
+ OMX_U16 sizeCodeNum;
+
+ sizeCodeNum = armVCM4P10_ExpGolBitsUsed (diffMV.dx);
+ sizeCodeNum += armVCM4P10_ExpGolBitsUsed (diffMV.dy);
+
+ /* Motion cost = SAD + lamda * ((bitsused(diffMVx) + (bitsused(diffMVy))*/
+ candCost = candSAD + (nLamda * sizeCodeNum);
+
+ /* Calculate candCost */
+ if (candCost < *pBestCost)
+ {
+ *pBestCost = candCost;
+ bestMV->dx = mvX;
+ bestMV->dy = mvY;
+ return 1;
+ }
+ if (candCost > *pBestCost)
+ {
+ return 0;
+ }
+ /* shorter motion vector */
+ if ( (mvX * mvX + mvY * mvY) < ((bestMV->dx * bestMV->dx) + (bestMV->dy * bestMV->dy)) )
+ {
+ *pBestCost = candCost;
+ bestMV->dx = mvX;
+ bestMV->dy = mvY;
+ return 1;
+ }
+
+ return 0;
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DeBlockPixel.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DeBlockPixel.c
new file mode 100644
index 0000000..f4fb1d9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DeBlockPixel.c
@@ -0,0 +1,151 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_DeBlockPixel.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 luma deblock module
+ *
+ */
+
+#ifdef DEBUG_ARMVCM4P10_DEBLOCKPIXEL
+#undef DEBUG_ON
+#define DEBUG_ON
+#endif /* DEBUG_ARMVCM4P10_DEBLOCKPIXEL */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description
+ * Deblock one boundary pixel
+ *
+ * Parameters:
+ * [in] pQ0 Pointer to pixel q0
+ * [in] Step Step between pixels q0 and q1
+ * [in] tC0 Edge threshold value
+ * [in] alpha alpha threshold value
+ * [in] beta beta threshold value
+ * [in] bS deblocking strength
+ * [in] ChromaFlag True for chroma blocks
+ * [out] pQ0 Deblocked pixels
+ *
+ */
+
+void armVCM4P10_DeBlockPixel(
+ OMX_U8 *pQ0, /* pointer to the pixel q0 */
+ int Step, /* step between pixels q0 and q1 */
+ int tC0, /* edge threshold value */
+ int alpha, /* alpha */
+ int beta, /* beta */
+ int bS, /* deblocking strength */
+ int ChromaFlag
+)
+{
+ int p3, p2, p1, p0, q0, q1, q2, q3;
+ int ap, aq, delta;
+
+ if (bS==0)
+ {
+ return;
+ }
+
+ p3 = pQ0[-4*Step];
+ p2 = pQ0[-3*Step];
+ p1 = pQ0[-2*Step];
+ p0 = pQ0[-1*Step];
+ q0 = pQ0[ 0*Step];
+ q1 = pQ0[ 1*Step];
+ q2 = pQ0[ 2*Step];
+ q3 = pQ0[ 3*Step];
+
+ if (armAbs(p0-q0)>=alpha || armAbs(p1-p0)>=beta || armAbs(q1-q0)>=beta)
+ {
+ DEBUG_PRINTF_10("DeBlockPixel: %02x %02x %02x %02x | %02x %02x %02x %02x alpha=%d beta=%d\n",
+ p3, p2, p1, p0, q0, q1, q2, q3, alpha, beta);
+ return;
+ }
+
+ ap = armAbs(p2 - p0);
+ aq = armAbs(q2 - q0);
+
+ if (bS < 4)
+ {
+ int tC = tC0;
+
+ if (ChromaFlag)
+ {
+ tC++;
+ }
+ else
+ {
+ if (ap < beta)
+ {
+ tC++;
+ }
+ if (aq < beta)
+ {
+ tC++;
+ }
+ }
+
+ delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3;
+ delta = armClip(-tC, tC, delta);
+
+ pQ0[-1*Step] = (OMX_U8)armClip(0, 255, p0 + delta);
+ pQ0[ 0*Step] = (OMX_U8)armClip(0, 255, q0 - delta);
+
+ if (ChromaFlag==0 && ap<beta)
+ {
+ delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1;
+ delta = armClip(-tC0, tC0, delta);
+ pQ0[-2*Step] = (OMX_U8)(p1 + delta);
+ }
+
+ if (ChromaFlag==0 && aq<beta)
+ {
+ delta = (q2 + ((p0+q0+1)>>1) - (q1<<1))>>1;
+ delta = armClip(-tC0, tC0, delta);
+ pQ0[ 1*Step] = (OMX_U8)(q1 + delta);
+ }
+ }
+ else /* bS==4 */
+ {
+ if (ChromaFlag==0 && ap<beta && armAbs(p0-q0)<((alpha>>2)+2))
+ {
+ pQ0[-1*Step] = (OMX_U8)((p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3);
+ pQ0[-2*Step] = (OMX_U8)((p2 + p1 + p0 + q0 + 2)>>2);
+ pQ0[-3*Step] = (OMX_U8)((2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3);
+ }
+ else
+ {
+ pQ0[-1*Step] = (OMX_U8)((2*p1 + p0 + q1 + 2)>>2);
+ }
+
+ if (ChromaFlag==0 && aq<beta && armAbs(p0-q0)<((alpha>>2)+2))
+ {
+ pQ0[ 0*Step] = (OMX_U8)((q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3);
+ pQ0[ 1*Step] = (OMX_U8)((q2 + q1 + p0 + q0 + 2)>>2);
+ pQ0[ 2*Step] = (OMX_U8)((2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3);
+ }
+ else
+ {
+ pQ0[ 0*Step] = (OMX_U8)((2*q1 + q0 + p1 + 2)>>2);
+ }
+ }
+
+ DEBUG_PRINTF_13("DeBlockPixel: %02x %02x %02x %02x | %02x %02x %02x %02x bS=%d -> %02x %02x %02x %02x\n",
+ p3, p2, p1, p0, q0, q1, q2, q3, bS,
+ pQ0[-2*Step], pQ0[-1*Step],pQ0[0*Step],pQ0[1*Step]);
+
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c
new file mode 100644
index 0000000..7616add
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair.c
@@ -0,0 +1,267 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_DecodeCoeffsToPair.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 decode coefficients module
+ *
+ */
+
+#ifdef DEBUG_ARMVCM4P10_DECODECOEFFSTOPAIR
+#undef DEBUG_ON
+#define DEBUG_ON
+#endif
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+#include "armVCM4P10_CAVLCTables.h"
+
+/* 4x4 DeZigZag table */
+
+static const OMX_U8 armVCM4P10_ZigZag[16] =
+{
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/*
+ * Description:
+ * This function perform the work required by the OpenMAX
+ * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair.
+ * Since most of the code is common we share it here.
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream buffer
+ * [in] pOffset Pointer to current bit position in the byte pointed
+ * to by *ppBitStream
+ * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current
+ * block (4,15 or 16)
+ * [in] nTable Table number (0 to 4) according to the five columns
+ * of Table 9-5 in the H.264 spec
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients in
+ * this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+
+ */
+
+OMXResult armVCM4P10_DecodeCoeffsToPair(
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8 **ppPosCoefbuf,
+ OMX_INT nTable,
+ OMX_INT sMaxNumCoeff
+ )
+{
+ int CoeffToken, TotalCoeff, TrailingOnes;
+ int Level, LevelCode, LevelPrefix, LevelSuffix, LevelSuffixSize;
+ int SuffixLength, Run, ZerosLeft,CoeffNum;
+ int i, Flags;
+ OMX_U8 *pPosCoefbuf = *ppPosCoefbuf;
+ OMX_S16 pLevel[16];
+ OMX_U8 pRun[16];
+
+ CoeffToken = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCCoeffTokenTables[nTable]);
+ armRetDataErrIf(CoeffToken == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err);
+
+ TrailingOnes = armVCM4P10_CAVLCTrailingOnes[CoeffToken];
+ TotalCoeff = armVCM4P10_CAVLCTotalCoeff[CoeffToken];
+ *pNumCoeff = (OMX_U8)TotalCoeff;
+
+ DEBUG_PRINTF_2("TotalCoeff = %d, TrailingOnes = %d\n", TotalCoeff, TrailingOnes);
+
+ if (TotalCoeff == 0)
+ {
+ /* Nothing to do */
+ return OMX_Sts_NoErr;
+ }
+
+ /* Decode trailing ones */
+ for (i=TotalCoeff-1; i>=TotalCoeff-TrailingOnes; i--)
+ {
+ if (armGetBits(ppBitStream, pOffset, 1))
+ {
+ Level = -1;
+ }
+ else
+ {
+ Level = +1;
+ }
+ pLevel[i] = (OMX_S16)Level;
+
+ DEBUG_PRINTF_2("Level[%d] = %d\n", i, pLevel[i]);
+ }
+
+ /* Decode (non zero) level values */
+ SuffixLength = 0;
+ if (TotalCoeff>10 && TrailingOnes<3)
+ {
+ SuffixLength=1;
+ }
+ for ( ; i>=0; i--)
+ {
+ LevelPrefix = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCLevelPrefix);
+ armRetDataErrIf(LevelPrefix == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err);
+
+ LevelSuffixSize = SuffixLength;
+ if (LevelPrefix==14 && SuffixLength==0)
+ {
+ LevelSuffixSize = 4;
+ }
+ if (LevelPrefix==15)
+ {
+ LevelSuffixSize = 12;
+ }
+
+ LevelSuffix = 0;
+ if (LevelSuffixSize > 0)
+ {
+ LevelSuffix = armGetBits(ppBitStream, pOffset, LevelSuffixSize);
+ }
+
+ LevelCode = (LevelPrefix << SuffixLength) + LevelSuffix;
+
+
+ if (LevelPrefix==15 && SuffixLength==0)
+ {
+ LevelCode += 15;
+ }
+
+ /* LevelCode = 2*(magnitude-1) + sign */
+
+ if (i==TotalCoeff-1-TrailingOnes && TrailingOnes<3)
+ {
+ /* Level magnitude can't be 1 */
+ LevelCode += 2;
+ }
+ if (LevelCode & 1)
+ {
+ /* 2a+1 maps to -a-1 */
+ Level = (-LevelCode-1)>>1;
+ }
+ else
+ {
+ /* 2a+0 maps to +a+1 */
+ Level = (LevelCode+2)>>1;
+ }
+ pLevel[i] = (OMX_S16)Level;
+
+ DEBUG_PRINTF_2("Level[%d] = %d\n", i, pLevel[i]);
+
+ if (SuffixLength==0)
+ {
+ SuffixLength=1;
+ }
+ if ( ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6 )
+ {
+ SuffixLength++;
+ }
+ }
+
+ /* Decode run values */
+ ZerosLeft = 0;
+ if (TotalCoeff < sMaxNumCoeff)
+ {
+ /* Decode TotalZeros VLC */
+ if (sMaxNumCoeff==4)
+ {
+ ZerosLeft = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCTotalZeros2x2Tables[TotalCoeff-1]);
+ armRetDataErrIf(ZerosLeft ==ARM_NO_CODEBOOK_INDEX , OMX_Sts_Err);
+ }
+ else
+ {
+ ZerosLeft = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCTotalZeroTables[TotalCoeff-1]);
+ armRetDataErrIf(ZerosLeft ==ARM_NO_CODEBOOK_INDEX , OMX_Sts_Err);
+ }
+ }
+
+ DEBUG_PRINTF_1("TotalZeros = %d\n", ZerosLeft);
+
+ CoeffNum=ZerosLeft+TotalCoeff-1;
+
+ for (i=TotalCoeff-1; i>0; i--)
+ {
+ Run = 0;
+ if (ZerosLeft > 0)
+ {
+ int Table = ZerosLeft;
+ if (Table > 6)
+ {
+ Table = 7;
+ }
+ Run = armUnPackVLC32(ppBitStream, pOffset, armVCM4P10_CAVLCRunBeforeTables[Table-1]);
+ armRetDataErrIf(Run == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err);
+ }
+ pRun[i] = (OMX_U8)Run;
+
+ DEBUG_PRINTF_2("Run[%d] = %d\n", i, pRun[i]);
+
+ ZerosLeft -= Run;
+ }
+ pRun[0] = (OMX_U8)ZerosLeft;
+
+ DEBUG_PRINTF_1("Run[0] = %d\n", pRun[i]);
+
+
+ /* Fill in coefficients */
+
+ if (sMaxNumCoeff==15)
+ {
+ CoeffNum++; /* Skip the DC position */
+ }
+
+ /*for (i=0;i<TotalCoeff;i++)
+ CoeffNum += pRun[i]+1;*/
+
+ for (i=(TotalCoeff-1); i>=0; i--)
+ {
+ /*CoeffNum += pRun[i]+1;*/
+ Level = pLevel[i];
+
+ DEBUG_PRINTF_2("Coef[%d] = %d\n", CoeffNum, Level);
+
+ Flags = CoeffNum;
+ CoeffNum -= (pRun[i]+1);
+ if (sMaxNumCoeff>4)
+ {
+ /* Perform 4x4 DeZigZag */
+ Flags = armVCM4P10_ZigZag[Flags];
+ }
+ if (i==0)
+ {
+ /* End of block flag */
+ Flags += 0x20;
+ }
+ if (Level<-128 || Level>127)
+ {
+ /* Overflow flag */
+ Flags += 0x10;
+ }
+
+ *pPosCoefbuf++ = (OMX_U8)(Flags);
+ *pPosCoefbuf++ = (OMX_U8)(Level & 0xFF);
+ if (Flags & 0x10)
+ {
+ *pPosCoefbuf++ = (OMX_U8)(Level>>8);
+ }
+ }
+
+ *ppPosCoefbuf = pPosCoefbuf;
+
+ return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DequantTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DequantTables.c
new file mode 100644
index 0000000..d9c2541
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_DequantTables.c
@@ -0,0 +1,45 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_DequantTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 inverse quantize tables
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+#include "armVC.h"
+
+
+const OMX_U8 armVCM4P10_PosToVCol4x4[16] =
+{
+ 0, 2, 0, 2,
+ 2, 1, 2, 1,
+ 0, 2, 0, 2,
+ 2, 1, 2, 1
+};
+
+const OMX_U8 armVCM4P10_PosToVCol2x2[4] =
+{
+ 0, 2,
+ 2, 1
+};
+
+const OMX_U8 armVCM4P10_VMatrix[6][3] =
+{
+ { 10, 16, 13 },
+ { 11, 18, 14 },
+ { 13, 20, 16 },
+ { 14, 23, 18 },
+ { 16, 25, 20 },
+ { 18, 29, 23 }
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c
new file mode 100644
index 0000000..93d54c3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_FwdTransformResidual4x4.c
@@ -0,0 +1,78 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_FwdTransformResidual4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 transform module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Forward Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in] pSrc Source 4x4 block
+ * [out] pDst Destination 4x4 block
+ *
+ */
+void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc)
+{
+ int i;
+
+ /* Transform rows */
+ for (i=0; i<16; i+=4)
+ {
+ int d0 = pSrc[i+0];
+ int d1 = pSrc[i+1];
+ int d2 = pSrc[i+2];
+ int d3 = pSrc[i+3];
+ int e0 = d0 + d3;
+ int e1 = d0 - d3;
+ int e2 = d1 + d2;
+ int e3 = d1 - d2;
+ int f0 = e0 + e2;
+ int f1 = (e1 << 1) + e3;
+ int f2 = e0 - e2;
+ int f3 = e1 - (e3 << 1);
+ pDst[i+0] = (OMX_S16)f0;
+ pDst[i+1] = (OMX_S16)f1;
+ pDst[i+2] = (OMX_S16)f2;
+ pDst[i+3] = (OMX_S16)f3;
+ }
+
+ /* Transform columns */
+ for (i=0; i<4; i++)
+ {
+ int f0 = pDst[i+0];
+ int f1 = pDst[i+4];
+ int f2 = pDst[i+8];
+ int f3 = pDst[i+12];
+ int g0 = f0 + f3;
+ int g1 = f0 - f3;
+ int g2 = f1 + f2;
+ int g3 = f1 - f2;
+ int h0 = g0 + g2;
+ int h1 = (g1 << 1) + g3;
+ int h2 = g0 - g2;
+ int h3 = g1 - (g3 << 1);
+ pDst[i+0] = (OMX_S16) h0;
+ pDst[i+4] = (OMX_S16) h1;
+ pDst[i+8] = (OMX_S16) h2;
+ pDst[i+12] = (OMX_S16) h3;
+ }
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c
new file mode 100644
index 0000000..8732f4f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfDiag_Luma.c
@@ -0,0 +1,106 @@
+/**
+ *
+ * File Name: armVCM4P10_InterpolateHalfDiag_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This functions will help to calculate Half Pel luma interpolation
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+
+/**
+ * Function: armVCM4P10_InterpolateHalfDiag_Luma
+ *
+ * Description:
+ * This function performs interpolation for (1/2, 1/2) positions
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the (1/2,1/2)-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfDiag_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+)
+{
+ OMX_S32 HalfCoeff, pos;
+ OMX_S16 Buf [21 * 16]; /* 21 rows by 16 pixels per row */
+ OMX_U32 y, x;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+
+ /*
+ * Intermediate values will be 1/2 pel at Horizontal direction
+ * Starting at (0.5, -2) at top extending to (0.5, height + 3) at bottom
+ * Buf contains a 2D array of size (iWidth)X(iHeight + 5)
+ */
+ for (y = 0; y < iHeight + 5; y++)
+ {
+ for (x = 0; x < iWidth; x++)
+ {
+ pos = (y-2) * iSrcStep + x;
+ HalfCoeff =
+ pSrc [pos - 2] -
+ 5 * pSrc [pos - 1] +
+ 20 * pSrc [pos] +
+ 20 * pSrc [pos + 1] -
+ 5 * pSrc [pos + 2] +
+ pSrc [pos + 3];
+ Buf [y * iWidth + x] = (OMX_S16)HalfCoeff;
+ } /* x */
+ } /* y */
+
+ /* Vertical interpolate */
+ for (y = 0; y < iHeight; y++)
+ {
+ for (x = 0; x < iWidth; x++)
+ {
+ pos = y * iWidth + x;
+ HalfCoeff =
+ Buf [pos] -
+ 5 * Buf [pos + 1 * iWidth] +
+ 20 * Buf [pos + 2 * iWidth] +
+ 20 * Buf [pos + 3 * iWidth] -
+ 5 * Buf [pos + 4 * iWidth] +
+ Buf [pos + 5 * iWidth];
+
+ HalfCoeff = (HalfCoeff + 512) >> 10;
+ HalfCoeff = armClip(0, 255, HalfCoeff);
+
+ pDst [y * iDstStep + x] = (OMX_U8) HalfCoeff;
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c
new file mode 100644
index 0000000..89c0079
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfHor_Luma.c
@@ -0,0 +1,82 @@
+/**
+ *
+ * File Name: armVCM4P10_InterpolateHalfHor_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This functions will help to calculate Half Pel luma interpolation
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: armVCM4P10_InterpolateHalfHor_Luma
+ *
+ * Description:
+ * This function performs interpolation for horizontal 1/2-pel positions
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfHor_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+)
+{
+ OMX_INT x, y;
+ OMX_S32 HalfCoeff, pos;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+
+ for (y = 0; y < iHeight; y++)
+ {
+ for (x = 0; x < iWidth; x++)
+ {
+ pos = y * iSrcStep + x;
+ HalfCoeff =
+ pSrc [pos - 2] -
+ 5 * pSrc [pos - 1] +
+ 20 * pSrc [pos] +
+ 20 * pSrc [pos + 1] -
+ 5 * pSrc [pos + 2] +
+ pSrc [pos + 3];
+
+ HalfCoeff = (HalfCoeff + 16) >> 5;
+ HalfCoeff = armClip(0, 255, HalfCoeff);
+
+ pDst [y * iDstStep + x] = HalfCoeff;
+ } /* x */
+ } /* y */
+
+ return OMX_Sts_NoErr;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c
new file mode 100644
index 0000000..f7ecfc5
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_InterpolateHalfVer_Luma.c
@@ -0,0 +1,84 @@
+/**
+ *
+ * File Name: armVCM4P10_InterpolateHalfVer_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This functions will help to calculate Half Pel luma interpolation
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: armVCM4P10_InterpolateHalfVer_Luma
+ *
+ * Description:
+ * This function performs interpolation for vertical 1/2-pel positions
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfVer_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+)
+{
+ OMX_S32 HalfCoeff, pos;
+ OMX_INT y, x;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+
+
+ for (y = 0; y < iHeight; y++)
+ {
+ for (x = 0; x < iWidth; x++)
+ {
+ pos = y * iSrcStep + x;
+ HalfCoeff =
+ pSrc [pos - 2 * iSrcStep] -
+ 5 * pSrc [pos - 1 * iSrcStep] +
+ 20 * pSrc [pos] +
+ 20 * pSrc [pos + 1 * iSrcStep] -
+ 5 * pSrc [pos + 2 * iSrcStep] +
+ pSrc [pos + 3 * iSrcStep];
+
+ HalfCoeff = (HalfCoeff + 16) >> 5;
+ HalfCoeff = armClip(0, 255, HalfCoeff);
+
+ pDst [y * iDstStep + x] = (OMX_U8) HalfCoeff;
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c
new file mode 100644
index 0000000..1507d23
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Chroma.c
@@ -0,0 +1,109 @@
+/**
+ *
+ * File Name: armVCM4P10_Interpolate_Chroma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate interpolation for chroma components
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P10_Interpolate_Chroma
+ *
+ * Description:
+ * This function performs interpolation for chroma components.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to
+ * interpolate in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [in] dx Fractional part of horizontal motion vector
+ * component in 1/8 pixel unit (0~7)
+ * [in] dy Fractional part of vertical motion vector
+ * component in 1/8 pixel unit (0~7)
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCM4P10_Interpolate_Chroma(
+ OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight,
+ OMX_U32 dx,
+ OMX_U32 dy
+)
+{
+ OMX_U32 EightMinusdx = 8 - dx;
+ OMX_U32 EightMinusdy = 8 - dy;
+ OMX_U32 ACoeff, BCoeff, CCoeff, DCoeff;
+ OMX_U32 x, y;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dx > 7, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dy > 7, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iSrcStep == 0, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iDstStep == 0, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iWidth == 0, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iHeight == 0, OMX_Sts_BadArgErr)
+
+ /* if fractionl mv is not (0, 0) */
+ if (dx != 0 || dy != 0)
+ {
+ ACoeff = EightMinusdx * EightMinusdy;
+ BCoeff = dx * EightMinusdy;
+ CCoeff = EightMinusdx * dy;
+ DCoeff = dx * dy;
+
+ for (y = 0; y < iHeight; y++)
+ {
+ for (x = 0; x < iWidth; x++)
+ {
+ pDst [y * iDstStep + x] = (
+ ACoeff * pSrc [y * iSrcStep + x] +
+ BCoeff * pSrc [y * iSrcStep + x + 1] +
+ CCoeff * pSrc [(y + 1) * iSrcStep + x] +
+ DCoeff * pSrc [(y + 1) * iSrcStep + x + 1] +
+ 32) >> 6;
+ }
+ }
+ }
+ else
+ {
+ for (y = 0; y < iHeight; y++)
+ {
+ for (x = 0; x < iWidth; x++)
+ {
+ pDst [y * iDstStep + x] = pSrc [y * iSrcStep + x];
+ }
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c
new file mode 100644
index 0000000..89978dd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_Interpolate_Luma.c
@@ -0,0 +1,195 @@
+/**
+ *
+ * File Name: armVCM4P10_Interpolate_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate interpolation for luma components
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: armM4P10_Copy
+ *
+ * Description:
+ * This function performs copy a block of data from source to destination
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+static OMXResult armM4P10_Copy(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+)
+{
+ OMX_U32 x, y;
+
+ for (y = 0; y < iHeight; y++)
+ {
+ for (x = 0; x < iWidth; x++)
+ {
+ pDst [y * iDstStep + x] = pSrc [y * iSrcStep + x];
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armVCM4P10_Interpolate_Luma
+ *
+ * Description:
+ * This function performs interpolation for luma components.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to
+ * interpolate in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [in] dx Fractional part of horizontal motion vector
+ * component in 1/4 pixel unit (0~3)
+ * [in] dy Fractional part of vertical motion vector
+ * component in 1/4 pixel unit (0~3)
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+ OMXResult armVCM4P10_Interpolate_Luma(
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight,
+ OMX_U32 dx,
+ OMX_U32 dy
+)
+{
+ OMX_U8 pBuf1 [16*16];
+ const OMX_U8 *pSrcHalfHor = pSrc;
+ const OMX_U8 *pSrcHalfVer = pSrc;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dx > 3, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dy > 3, OMX_Sts_BadArgErr)
+
+ /* Work out positions for half pixel interpolation */
+ if (dx == 3)
+ {
+ pSrcHalfVer += 1;
+ }
+ if (dy == 3)
+ {
+ pSrcHalfHor += iSrcStep;
+ }
+
+ /* Switch on type of pixel
+ * Pixels are named 'a' to 's' as in the H.264 standard
+ */
+ if (dx == 0 && dy == 0)
+ {
+ /* G */
+ armM4P10_Copy(pSrc, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+ }
+ else if (dy == 0)
+ {
+ /* a, b, c */
+ armVCM4P10_InterpolateHalfHor_Luma
+ (pSrcHalfHor, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+
+ if (dx == 1 || dx == 3)
+ {
+ armVCCOMM_Average
+ (pDst, pSrcHalfVer, iDstStep, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+ }
+ }
+ else if (dx == 0)
+ {
+ /* d, h, n */
+ armVCM4P10_InterpolateHalfVer_Luma
+ (pSrcHalfVer, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+
+ if (dy == 1 || dy == 3)
+ {
+ armVCCOMM_Average
+ (pDst, pSrcHalfHor, iDstStep, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+ }
+ }
+ else if (dx == 2 || dy == 2)
+ {
+ /* j */
+ armVCM4P10_InterpolateHalfDiag_Luma
+ (pSrc, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+
+ if (dx == 1 || dx == 3)
+ {
+ /* i, k */
+ armVCM4P10_InterpolateHalfVer_Luma
+ (pSrcHalfVer, iSrcStep, pBuf1, iWidth, iWidth, iHeight);
+
+ armVCCOMM_Average
+ (pDst, pBuf1, iDstStep, iWidth, pDst, iDstStep, iWidth, iHeight);
+ }
+ if (dy == 1 || dy == 3)
+ {
+ /* f,q */
+ armVCM4P10_InterpolateHalfHor_Luma
+ (pSrcHalfHor, iSrcStep, pBuf1, iWidth, iWidth, iHeight);
+
+ armVCCOMM_Average
+ (pDst, pBuf1, iDstStep, iWidth, pDst, iDstStep, iWidth, iHeight);
+ }
+ }
+ else /* dx=1,3 and dy=1,3 */
+ {
+ /* e, g, p, r */
+ armVCM4P10_InterpolateHalfHor_Luma
+ (pSrcHalfHor, iSrcStep, pBuf1, iWidth, iWidth, iHeight);
+
+ armVCM4P10_InterpolateHalfVer_Luma
+ (pSrcHalfVer, iSrcStep, pDst, iDstStep, iWidth, iHeight);
+
+ armVCCOMM_Average
+ (pBuf1, pDst, iWidth, iDstStep, pDst, iDstStep, iWidth, iHeight);
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c
new file mode 100644
index 0000000..b713073
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_PredictIntraDC4x4.c
@@ -0,0 +1,88 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_PredictIntraDC4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 4x4 intra prediction module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Perform DC style intra prediction, averaging upper and left block
+ *
+ * Parameters:
+ * [in] pSrcLeft Pointer to the buffer of 16 left coefficients:
+ * p[x, y] (x = -1, y = 0..3)
+ * [in] pSrcAbove Pointer to the buffer of 16 above coefficients:
+ * p[x,y] (x = 0..3, y = -1)
+ * [in] leftStep Step of left coefficient buffer
+ * [in] dstStep Step of the destination buffer
+ * [in] availability Neighboring 16x16 MB availability flag
+ * [out] pDst Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+void armVCM4P10_PredictIntraDC4x4(
+ const OMX_U8* pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ OMX_U8* pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMX_S32 availability
+)
+{
+ int x, y, Sum=0, Count = 0;
+
+ if (availability & OMX_VC_LEFT)
+ {
+ for (y=0; y<4; y++)
+ {
+ Sum += pSrcLeft[y*leftStep];
+ }
+ Count++;
+ }
+ if (availability & OMX_VC_UPPER)
+ {
+ for (x=0; x<4; x++)
+ {
+ Sum += pSrcAbove[x];
+ }
+ Count++;
+ }
+ if (Count==0)
+ {
+ Sum = 128;
+ }
+ else if (Count==1)
+ {
+ Sum = (Sum + 2) >> 2;
+ }
+ else /* Count = 2 */
+ {
+ Sum = (Sum + 4) >> 3;
+ }
+ for (y=0; y<4; y++)
+ {
+ for (x=0; x<4; x++)
+ {
+ pDst[y*dstStep+x] = (OMX_U8)Sum;
+ }
+ }
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_QuantTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_QuantTables.c
new file mode 100644
index 0000000..f0b5bb0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_QuantTables.c
@@ -0,0 +1,31 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_QuantTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 inverse quantize tables
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+
+const OMX_U32 armVCM4P10_MFMatrix[6][3] =
+{
+ {13107, 5243, 8066},
+ {11916, 4660, 7490},
+ {10082, 4194, 6554},
+ { 9362, 3647, 5825},
+ { 8192, 3355, 5243},
+ { 7282, 2893, 4559}
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_SADQuar.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_SADQuar.c
new file mode 100644
index 0000000..a41e04b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_SADQuar.c
@@ -0,0 +1,84 @@
+/**
+ *
+ * File Name: armVCM4P10_SADQuar.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD of pSrc with average of two Ref blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P10_SADQuar
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the
+ * average of the other two (pSrcRef0 and pSrcRef1)
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to the original block
+ * [in] pSrcRef0 Pointer to reference block 0
+ * [in] pSrcRef1 Pointer to reference block 1
+ * [in] iSrcStep Step of the original block buffer
+ * [in] iRefStep0 Step of reference block 0
+ * [in] iRefStep1 Step of reference block 1
+ * [in] iHeight Height of the block
+ * [in] iWidth Width of the block
+ * [out] pDstSAD Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCM4P10_SADQuar(
+ const OMX_U8* pSrc,
+ const OMX_U8* pSrcRef0,
+ const OMX_U8* pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32* pDstSAD,
+ OMX_U32 iHeight,
+ OMX_U32 iWidth
+)
+{
+ OMX_INT x, y;
+ OMX_S32 SAD = 0;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+
+ for (y = 0; y < iHeight; y++)
+ {
+ for (x = 0; x < iWidth; x++)
+ {
+ SAD += armAbs(pSrc [y * iSrcStep + x] - ((
+ pSrcRef0 [y * iRefStep0 + x] +
+ pSrcRef1 [y * iRefStep1 + x] + 1) >> 1));
+ }
+ }
+
+ *pDstSAD = SAD;
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c
new file mode 100644
index 0000000..f9f756a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_TransformResidual4x4.c
@@ -0,0 +1,80 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_TransformResidual4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 transform module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in] pSrc Source 4x4 block
+ * [out] pDst Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc)
+{
+ int i;
+
+ /* Transform rows */
+ for (i=0; i<16; i+=4)
+ {
+ int d0 = pSrc[i+0];
+ int d1 = pSrc[i+1];
+ int d2 = pSrc[i+2];
+ int d3 = pSrc[i+3];
+ int e0 = d0 + d2;
+ int e1 = d0 - d2;
+ int e2 = (d1>>1) - d3;
+ int e3 = d1 + (d3>>1);
+ int f0 = e0 + e3;
+ int f1 = e1 + e2;
+ int f2 = e1 - e2;
+ int f3 = e0 - e3;
+ pDst[i+0] = (OMX_S16)f0;
+ pDst[i+1] = (OMX_S16)f1;
+ pDst[i+2] = (OMX_S16)f2;
+ pDst[i+3] = (OMX_S16)f3;
+ }
+
+ /* Transform columns */
+ for (i=0; i<4; i++)
+ {
+ int f0 = pDst[i+0];
+ int f1 = pDst[i+4];
+ int f2 = pDst[i+8];
+ int f3 = pDst[i+12];
+ int g0 = f0 + f2;
+ int g1 = f0 - f2;
+ int g2 = (f1>>1) - f3;
+ int g3 = f1 + (f3>>1);
+ int h0 = g0 + g3;
+ int h1 = g1 + g2;
+ int h2 = g1 - g2;
+ int h3 = g0 - g3;
+ pDst[i+0] = (OMX_S16)((h0+32)>>6);
+ pDst[i+4] = (OMX_S16)((h1+32)>>6);
+ pDst[i+8] = (OMX_S16)((h2+32)>>6);
+ pDst[i+12] = (OMX_S16)((h3+32)>>6);
+ }
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c
new file mode 100644
index 0000000..dda49f6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock2x2.c
@@ -0,0 +1,78 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_UnpackBlock2x2.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 inverse quantize and transform helper module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+
+/*
+ * Description
+ * Unpack a 2x2 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position pair
+ * buffer output by CALVC decoding
+ * [out] ppSrc *ppSrc is updated to the start of next non empty block
+ * [out] pDst Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock2x2(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst
+)
+{
+ const OMX_U8 *pSrc = *ppSrc;
+ int i;
+ int Flag, Value;
+
+ for (i=0; i<4; i++)
+ {
+ pDst[i] = 0;
+ }
+
+ do
+ {
+ Flag = *pSrc++;
+ if (Flag & 0x10)
+ {
+ /* 16 bit */
+ Value = *pSrc++;
+ Value = Value | ((*pSrc++)<<8);
+ if (Value & 0x8000)
+ {
+ Value -= 0x10000;
+ }
+ }
+ else
+ {
+ /* 8 bit */
+ Value = *pSrc++;
+ if (Value & 0x80)
+ {
+ Value -= 0x100;
+ }
+ }
+ i = Flag & 15;
+ pDst[i] = (OMX_S16)Value;
+ }
+ while ((Flag & 0x20)==0);
+
+ *ppSrc = pSrc;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c
new file mode 100644
index 0000000..3c0dcbd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/armVCM4P10_UnpackBlock4x4.c
@@ -0,0 +1,78 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_UnpackBlock4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 inverse quantize and transform helper module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+
+/*
+ * Description
+ * Unpack a 4x4 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position pair
+ * buffer output by CALVC decoding
+ * [out] ppSrc *ppSrc is updated to the start of next non empty block
+ * [out] pDst Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock4x4(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst
+)
+{
+ const OMX_U8 *pSrc = *ppSrc;
+ int i;
+ int Flag, Value;
+
+ for (i=0; i<16; i++)
+ {
+ pDst[i] = 0;
+ }
+
+ do
+ {
+ Flag = *pSrc++;
+ if (Flag & 0x10)
+ {
+ /* 16 bit */
+ Value = *pSrc++;
+ Value = Value | ((*pSrc++)<<8);
+ if (Value & 0x8000)
+ {
+ Value -= 0x10000;
+ }
+ }
+ else
+ {
+ /* 8 bit */
+ Value = *pSrc++;
+ if (Value & 0x80)
+ {
+ Value -= 0x100;
+ }
+ }
+ i = Flag & 15;
+ pDst[i] = (OMX_S16)Value;
+ }
+ while ((Flag & 0x20)==0);
+
+ *ppSrc = pSrc;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_Average_4x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_Average_4x.c
new file mode 100644
index 0000000..ac0d523
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_Average_4x.c
@@ -0,0 +1,84 @@
+/**
+ *
+ * File Name: omxVCM4P10_Average_4x.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate Average of two 4x4 or 4x8 blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_Average_4x (6.3.5.5.3)
+ *
+ * Description:
+ * This function calculates the average of two 4x4, 4x8 blocks. The result
+ * is rounded according to (a+b+1)/2.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0; must be a multiple of 4.
+ * iPredStep1 - Step of reference block 1; must be a multiple of 4.
+ * iDstStep - Step of the destination buffer; must be a multiple of 4.
+ * iHeight - Height of the blocks; must be either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 4-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pPred0, pPred1, or pDstPred
+ * - pDstPred is not aligned on a 4-byte boundary
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 4
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 4
+ * - iDstStep <= 0 or iDstStep is not a multiple of 4
+ * - iHeight is not equal to either 4 or 8
+ *
+ */
+ OMXResult omxVCM4P10_Average_4x (
+ const OMX_U8* pPred0,
+ const OMX_U8* pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8* pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+)
+{
+ /* check for argument error */
+ armRetArgErrIf(pPred0 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pPred1 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstPred == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iHeight != 4) && (iHeight != 8), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iPredStep0 == 0) || (iPredStep0 & 3), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iPredStep1 == 0) || (iPredStep1 & 3), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iDstStep == 0) || (iDstStep & 3), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot4ByteAligned(pDstPred), OMX_Sts_BadArgErr)
+
+ return armVCCOMM_Average
+ (pPred0, pPred1, iPredStep0, iPredStep1, pDstPred, iDstStep, 4, iHeight);
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c
new file mode 100644
index 0000000..c490e10
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Half.c
@@ -0,0 +1,191 @@
+/**
+ *
+ * File Name: omxVCM4P10_BlockMatch_Half.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for half pel Block matching,
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Half (6.3.5.2.2)
+ *
+ * Description:
+ * Performs a half-pel block match using results from a prior integer search.
+ * Returns the best MV and associated cost. This function estimates the
+ * half-pixel motion vector by interpolating the integer resolution motion
+ * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial
+ * integer MV is generated externally. The function
+ * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the current position in original picture plane. If
+ * iBlockWidth==4, 4-byte alignment required. If iBlockWidth==8,
+ * 8-byte alignment required. If iBlockWidth==16, 16-byte alignment
+ * required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture If iBlockWidth==4, 4-byte alignment
+ * required. If iBlockWidth==8, 8-byte alignment required. If
+ * iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane in terms of full
+ * pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane in terms of full
+ * pixels
+ * iBlockWidth - Width of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor, used to compute motion cost
+ * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to
+ * compute motion cost
+ * pSrcDstBestMV - The best MV resulting from a prior integer search,
+ * represented in terms of 1/4-pel units
+ *
+ * Output Arguments:
+ *
+ * pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in
+ * terms of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - any of the following pointers is NULL: pSrcOrgY, pSrcRefY,
+ * pSrcDstBestMV, pMVPred, pBestCost
+ * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+
+OMXResult omxVCM4P10_BlockMatch_Half(
+ const OMX_U8* pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8* pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector* pMVPred,
+ OMXVCMotionVector* pSrcDstBestMV,
+ OMX_S32* pBestCost
+)
+{
+ /* Definitions and Initializations*/
+ OMX_INT candSAD;
+ OMX_INT fromX, toX, fromY, toY;
+ /* Offset to the reference at the begining of the bounding box */
+ const OMX_U8 *pTempSrcRefY, *pTempSrcOrgY;
+ OMX_S16 x, y;
+ OMXVCMotionVector diffMV, candMV, integerMV;
+ OMX_U8 interpolY[256];
+
+ /* Argument error checks */
+ armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((nSrcOrgStep % iBlockWidth), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcOrgY == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcRefY == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pMVPred == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcDstBestMV == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBestCost == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(((iBlockWidth!=4)&&(iBlockWidth!=8)&&(iBlockWidth!=16)) , OMX_Sts_BadArgErr);
+ armRetArgErrIf(((iBlockHeight!=4)&&(iBlockHeight!=8)&&(iBlockHeight!=16)) , OMX_Sts_BadArgErr);
+
+
+ /* Check for valid region */
+ fromX = 1;
+ toX = 1;
+ fromY = 1;
+ toY = 1;
+
+ /* Initialize to max value as a start point */
+ *pBestCost = 0x7fffffff;
+
+ integerMV.dx = pSrcDstBestMV->dx;
+ integerMV.dy = pSrcDstBestMV->dy;
+
+ /* Looping on y- axis */
+ for (y = -fromY; y <= toY; y++)
+ {
+ /* Looping on x- axis */
+ for (x = -fromX; x <= toX; x++)
+ {
+ /* Positioning the pointer */
+ pTempSrcRefY = pSrcRefY + (nSrcRefStep * (integerMV.dy/4)) + (integerMV.dx/4);
+ if (x < 0)
+ {
+ pTempSrcRefY = pTempSrcRefY + x;
+ }
+ if (y < 0)
+ {
+ pTempSrcRefY = pTempSrcRefY + (y * nSrcRefStep);
+ }
+ pTempSrcOrgY = pSrcOrgY;
+
+ /* Prepare cand MV */
+ candMV.dx = integerMV.dx + x * 2;
+ candMV.dy = integerMV.dy + y * 2;
+
+ /* Interpolate half pel for the current position*/
+ armVCM4P10_Interpolate_Luma(
+ pTempSrcRefY,
+ nSrcRefStep,
+ interpolY,
+ iBlockWidth,
+ iBlockWidth,
+ iBlockHeight,
+ armAbs(x) * 2,
+ armAbs(y) * 2);
+
+ /* Calculate the SAD */
+ armVCCOMM_SAD(
+ pTempSrcOrgY,
+ nSrcOrgStep,
+ interpolY,
+ iBlockWidth,
+ &candSAD,
+ iBlockHeight,
+ iBlockWidth);
+
+ diffMV.dx = candMV.dx - pMVPred->dx;
+ diffMV.dy = candMV.dy - pMVPred->dy;
+
+ /* Result calculations */
+ armVCM4P10_CompareMotionCostToMV (
+ candMV.dx,
+ candMV.dy,
+ diffMV,
+ candSAD,
+ pSrcDstBestMV,
+ nLamda,
+ pBestCost);
+
+ } /* End of x- axis */
+ } /* End of y-axis */
+
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c
new file mode 100644
index 0000000..f7764e1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Integer.c
@@ -0,0 +1,196 @@
+/**
+ *
+ * File Name: omxVCM4P10_BlockMatch_Integer.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Integer (6.3.5.2.1)
+ *
+ * Description:
+ * Performs integer block match. Returns best MV and associated cost.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the top-left corner of the current block. If
+ * iBlockWidth==4, 4-byte alignment required. If iBlockWidth==8,
+ * 8-byte alignment required. If iBlockWidth==16, 16-byte alignment
+ * required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture. If iBlockWidth==4, 4-byte alignment
+ * required. If iBlockWidth==8, 8-byte alignment required. If
+ * iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane, expressed in terms
+ * of integer pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane, expressed in terms
+ * of integer pixels
+ * pRefRect - pointer to the valid reference rectangle inside the reference
+ * picture plane
+ * nCurrPointPos - position of the current block in the current plane
+ * iBlockWidth - Width of the current block, expressed in terms of integer
+ * pixels; must be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block, expressed in terms of
+ * integer pixels; must be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor; used to compute motion cost
+ * pMVPred - Predicted MV; used to compute motion cost, expressed in terms
+ * of 1/4-pel units
+ * pMVCandidate - Candidate MV; used to initialize the motion search,
+ * expressed in terms of integer pixels
+ * pMESpec - pointer to the ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pDstBestMV - Best MV resulting from integer search, expressed in terms
+ * of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - any of the following poitners are NULL:
+ * pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec.
+ * - Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+
+ OMXResult omxVCM4P10_BlockMatch_Integer (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ const OMXVCMotionVector *pMVCandidate,
+ OMXVCMotionVector *pBestMV,
+ OMX_S32 *pBestCost,
+ void *pMESpec
+)
+{
+ /* Definitions and Initializations*/
+ OMX_INT candSAD;
+ OMX_INT fromX, toX, fromY, toY;
+ /* Offset to the reference at the begining of the bounding box */
+ const OMX_U8 *pTempSrcRefY, *pTempSrcOrgY;
+ OMX_S16 x, y;
+ OMXVCMotionVector diffMV;
+ OMX_S32 nSearchRange;
+ ARMVCM4P10_MESpec *armMESpec = (ARMVCM4P10_MESpec *) pMESpec;
+
+ /* Argument error checks */
+ armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcOrgY == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcRefY == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pMVPred == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pMVCandidate == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBestMV == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBestCost == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(((iBlockWidth!=4)&&(iBlockWidth!=8)&&(iBlockWidth!=16)) , OMX_Sts_BadArgErr);
+ armRetArgErrIf(((iBlockHeight!=4)&&(iBlockHeight!=8)&&(iBlockHeight!=16)) , OMX_Sts_BadArgErr);
+ armIgnore (pMESpec);
+
+ if(iBlockWidth == 4)
+ {
+ nSearchRange = armMESpec->MEParams.searchRange4x4;
+ }
+ else if(iBlockWidth == 8)
+ {
+ nSearchRange = armMESpec->MEParams.searchRange8x8;
+ }
+ else
+ {
+ nSearchRange = armMESpec->MEParams.searchRange16x16;
+ }
+ /* Check for valid region */
+ fromX = nSearchRange;
+ toX = nSearchRange;
+ fromY = nSearchRange;
+ toY = nSearchRange;
+
+ if ((pCurrPointPos->x - nSearchRange) < pRefRect->x)
+ {
+ fromX = pCurrPointPos->x - pRefRect->x;
+ }
+
+ if ((pCurrPointPos->x + iBlockWidth + nSearchRange) > (pRefRect->x + pRefRect->width))
+ {
+ toX = pRefRect->width - (pCurrPointPos->x - pRefRect->x) - iBlockWidth;
+ }
+
+ if ((pCurrPointPos->y - nSearchRange) < pRefRect->y)
+ {
+ fromY = pCurrPointPos->y - pRefRect->y;
+ }
+
+ if ((pCurrPointPos->y + iBlockWidth + nSearchRange) > (pRefRect->y + pRefRect->height))
+ {
+ toY = pRefRect->width - (pCurrPointPos->y - pRefRect->y) - iBlockWidth;
+ }
+
+ pBestMV->dx = -fromX * 4;
+ pBestMV->dy = -fromY * 4;
+ /* Initialize to max value as a start point */
+ *pBestCost = 0x7fffffff;
+
+ /* Looping on y- axis */
+ for (y = -fromY; y <= toY; y++)
+ {
+ /* Looping on x- axis */
+ for (x = -fromX; x <= toX; x++)
+ {
+ /* Positioning the pointer */
+ pTempSrcRefY = pSrcRefY + (nSrcRefStep * y) + x;
+ pTempSrcOrgY = pSrcOrgY;
+
+ /* Calculate the SAD */
+ armVCCOMM_SAD(
+ pTempSrcOrgY,
+ nSrcOrgStep,
+ pTempSrcRefY,
+ nSrcRefStep,
+ &candSAD,
+ iBlockHeight,
+ iBlockWidth);
+
+ diffMV.dx = (x * 4) - pMVPred->dx;
+ diffMV.dy = (y * 4) - pMVPred->dy;
+
+ /* Result calculations */
+ armVCM4P10_CompareMotionCostToMV ((x * 4), (y * 4), diffMV, candSAD, pBestMV, nLamda, pBestCost);
+
+ } /* End of x- axis */
+ } /* End of y-axis */
+
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c
new file mode 100644
index 0000000..513ee25
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_BlockMatch_Quarter.c
@@ -0,0 +1,199 @@
+/**
+ *
+ * File Name: omxVCM4P10_BlockMatch_Quarter.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for quater pel Block matching,
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Quarter (6.3.5.2.3)
+ *
+ * Description:
+ * Performs a quarter-pel block match using results from a prior half-pel
+ * search. Returns the best MV and associated cost. This function estimates
+ * the quarter-pixel motion vector by interpolating the half-pel resolution
+ * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the
+ * initial half-pel MV is generated externally. The function
+ * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the current position in original picture plane. If
+ * iBlockWidth==4, 4-byte alignment required. If iBlockWidth==8,
+ * 8-byte alignment required. If iBlockWidth==16, 16-byte alignment
+ * required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture If iBlockWidth==4, 4-byte alignment
+ * required. If iBlockWidth==8, 8-byte alignment required. If
+ * iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane in terms of full
+ * pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane in terms of full
+ * pixels
+ * iBlockWidth - Width of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor, used to compute motion cost
+ * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to
+ * compute motion cost
+ * pSrcDstBestMV - The best MV resulting from a prior half-pel search,
+ * represented in terms of 1/4 pel units
+ *
+ * Output Arguments:
+ *
+ * pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed
+ * in terms of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One of more of the following pointers is NULL:
+ * pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost
+ * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+
+OMXResult omxVCM4P10_BlockMatch_Quarter(
+ const OMX_U8* pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8* pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector* pMVPred,
+ OMXVCMotionVector* pSrcDstBestMV,
+ OMX_S32* pBestCost
+)
+{
+ /* Definitions and Initializations*/
+ OMX_INT candSAD;
+ OMX_INT fromX, toX, fromY, toY;
+ /* Offset to the reference at the begining of the bounding box */
+ const OMX_U8 *pTempSrcRefY, *pTempSrcOrgY;
+ OMX_S16 x, y;
+ OMXVCMotionVector diffMV, candMV, initialMV;
+ OMX_U8 interpolY[256];
+ OMX_S32 pelPosX, pelPosY;
+
+ /* Argument error checks */
+ armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcOrgY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 4) && (!armIs4ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 8) && (!armIs8ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlockWidth == 16) && (!armIs16ByteAligned(pSrcRefY)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((nSrcOrgStep % iBlockWidth), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcOrgY == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcRefY == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pMVPred == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcDstBestMV == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBestCost == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(((iBlockWidth!=4)&&(iBlockWidth!=8)&&(iBlockWidth!=16)) , OMX_Sts_BadArgErr);
+ armRetArgErrIf(((iBlockHeight!=4)&&(iBlockHeight!=8)&&(iBlockHeight!=16)) , OMX_Sts_BadArgErr);
+
+
+ /* Check for valid region */
+ fromX = 1;
+ toX = 1;
+ fromY = 1;
+ toY = 1;
+
+ /* Initialize to max value as a start point */
+ *pBestCost = 0x7fffffff;
+
+ initialMV.dx = pSrcDstBestMV->dx;
+ initialMV.dy = pSrcDstBestMV->dy;
+
+ /* Looping on y- axis */
+ for (y = -fromY; y <= toY; y++)
+ {
+ /* Looping on x- axis */
+ for (x = -fromX; x <= toX; x++)
+ {
+ /* Positioning the pointer */
+ pTempSrcRefY = pSrcRefY + (nSrcRefStep * (initialMV.dy/4)) + (initialMV.dx/4);
+
+ /* Calculating the fract pel position */
+ pelPosX = (initialMV.dx % 4) + x;
+ if (pelPosX < 0)
+ {
+ pTempSrcRefY = pTempSrcRefY - 1;
+ pelPosX += 4;
+ }
+ pelPosY = (initialMV.dy % 4) + y;
+ if (pelPosY < 0)
+ {
+ pTempSrcRefY = pTempSrcRefY - (1 * nSrcRefStep);
+ pelPosY += 4;
+ }
+
+ pTempSrcOrgY = pSrcOrgY;
+
+ /* Prepare cand MV */
+ candMV.dx = initialMV.dx + x;
+ candMV.dy = initialMV.dy + y;
+
+ /* Interpolate Quater pel for the current position*/
+ armVCM4P10_Interpolate_Luma(
+ pTempSrcRefY,
+ nSrcRefStep,
+ interpolY,
+ iBlockWidth,
+ iBlockWidth,
+ iBlockHeight,
+ pelPosX,
+ pelPosY);
+
+ /* Calculate the SAD */
+ armVCCOMM_SAD(
+ pTempSrcOrgY,
+ nSrcOrgStep,
+ interpolY,
+ iBlockWidth,
+ &candSAD,
+ iBlockHeight,
+ iBlockWidth);
+
+ diffMV.dx = candMV.dx - pMVPred->dx;
+ diffMV.dy = candMV.dy - pMVPred->dy;
+
+ /* Result calculations */
+ armVCM4P10_CompareMotionCostToMV (
+ candMV.dx,
+ candMV.dy,
+ diffMV,
+ candSAD,
+ pSrcDstBestMV,
+ nLamda,
+ pBestCost);
+
+ } /* End of x- axis */
+ } /* End of y-axis */
+
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
new file mode 100644
index 0000000..a07b1bb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
@@ -0,0 +1,107 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DeblockChroma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 intra chroma deblock
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DeblockChroma_I (6.3.3.3.6)
+ *
+ * Description:
+ * Performs in-place deblocking filtering on all edges of the chroma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - step of the arrays; must be a multiple of 8.
+ * pAlpha - pointer to a 2x2 array of alpha thresholds, organized as
+ * follows: {external vertical edge, internal vertical edge,
+ * external horizontal edge, internal horizontal edge }. Per
+ * [ISO14496-10] alpha values must be in the range [0,255].
+ * pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows:
+ * { external vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }. Per [ISO14496-10]
+ * beta values must be in the range [0,18].
+ * pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left
+ * or above edge of each 4x2 or 2x4 block, arranged in vertical
+ * block order and then in horizontal block order); must be aligned
+ * on a 4-byte boundary. Per [ISO14496-10] values must be in the
+ * range [0,25].
+ * pBS - array of size 16x2 of BS parameters (arranged in scan block order
+ * for vertical edges and then horizontal edges); valid in the
+ * range [0,4] with the following restrictions: i) pBS[i]== 4 may
+ * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]==
+ * 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS. pSrcDst is not 8-byte aligned.
+ * either pThresholds or pBS is not 4-byte aligned.
+ * - one or more entries in the table pAlpha[0..3] is outside the range
+ * [0,255].
+ * - one or more entries in the table pBeta[0..3] is outside the range
+ * [0,18].
+ * - one or more entries in the table pThresholds[0..15]is outside of
+ * the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+)
+{
+ OMXResult errorCode;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+
+ errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I(
+ pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+ armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+
+ errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I(
+ pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16);
+
+ return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
new file mode 100644
index 0000000..1f3a646
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
@@ -0,0 +1,109 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DeblockLuma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 luma deblock
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+
+/**
+ * Function: omxVCM4P10_DeblockLuma_I (6.3.3.3.5)
+ *
+ * Description:
+ * This function performs in-place deblock filtering the horizontal and
+ * vertical edges of a luma macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep - image width; must be a multiple of 16.
+ * pAlpha - pointer to a 2x2 table of alpha thresholds, organized as
+ * follows: {external vertical edge, internal vertical edge,
+ * external horizontal edge, internal horizontal edge }. Per
+ * [ISO14496-10] alpha values must be in the range [0,255].
+ * pBeta - pointer to a 2x2 table of beta thresholds, organized as follows:
+ * {external vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }. Per [ISO14496-10]
+ * beta values must be in the range [0,18].
+ * pThresholds - pointer to a 16x2 table of threshold (TC0), organized as
+ * follows: {values for the left or above edge of each 4x4 block,
+ * arranged in vertical block order and then in horizontal block
+ * order}; must be aligned on a 4-byte boundary. Per [ISO14496-10]
+ * values must be in the range [0,25].
+ * pBS - pointer to a 16x2 table of BS parameters arranged in scan block
+ * order for vertical edges and then horizontal edges; valid in the
+ * range [0,4] with the following restrictions: i) pBS[i]== 4 may
+ * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]==
+ * 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds or pBS. pSrcDst is not 16-byte aligned.
+ * either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * - one or more entries in the table pAlpha[0..3] is outside the range
+ * [0,255].
+ * - one or more entries in the table pBeta[0..3] is outside the range
+ * [0,18].
+ * - one or more entries in the table pThresholds[0..31]is outside of
+ * the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 16.
+ *
+ */
+
+OMXResult omxVCM4P10_DeblockLuma_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+)
+{
+ OMXResult errorCode;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot16ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 15, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+
+ errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I(
+ pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+ armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+
+ errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I(
+ pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16);
+
+ return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
new file mode 100644
index 0000000..830ddc7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
@@ -0,0 +1,86 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 decode coefficients module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (6.3.4.1.1)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for a 2x2 block of
+ * ChromaDCLevel. The decoded coefficients in the packed position-coefficient
+ * buffer are stored in reverse zig-zag order, i.e., the first buffer element
+ * contains the last non-zero postion-coefficient pair of the block. Within
+ * each position-coefficient pair, the position entry indicates the
+ * raster-scan position of the coefficient, while the coefficient entry
+ * contains the coefficient value.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - Double pointer to current byte in bit stream buffer
+ * pOffset - Pointer to current bit position in the byte pointed to by
+ * *ppBitStream; valid in the range [0,7].
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after each block is decoded
+ * pOffset - *pOffset is updated after each block is decoded
+ * pNumCoeff - Pointer to the number of nonzero coefficients in this block
+ * ppPosCoefBuf - Double pointer to destination residual
+ * coefficient-position pair buffer. Buffer position
+ * (*ppPosCoefBuf) is updated upon return, unless there are only
+ * zero coefficients in the currently decoded block. In this case
+ * the caller is expected to bypass the transform/dequantization of
+ * the empty blocks.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppBitStream or pOffset is NULL.
+ * - ppPosCoefBuf or pNumCoeff is NULL.
+ * OMX_Sts_Err - if one of the following is true:
+ * - an illegal code is encountered in the bitstream
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8** ppPosCoefbuf
+ )
+
+{
+ armRetArgErrIf(ppBitStream==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(pOffset==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(*pOffset<0 , OMX_Sts_BadArgErr);
+ armRetArgErrIf(*pOffset>7 , OMX_Sts_BadArgErr);
+ armRetArgErrIf(pNumCoeff==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(ppPosCoefbuf==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppPosCoefbuf==NULL , OMX_Sts_BadArgErr);
+
+ return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+ ppPosCoefbuf, 4, 4);
+
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
new file mode 100644
index 0000000..7e83d1e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
@@ -0,0 +1,117 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 decode coefficients module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC (6.3.4.1.2)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of
+ * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse
+ * field scan is not supported. The decoded coefficients in the packed
+ * position-coefficient buffer are stored in reverse zig-zag order, i.e., the
+ * first buffer element contains the last non-zero postion-coefficient pair of
+ * the block. Within each position-coefficient pair, the position entry
+ * indicates the raster-scan position of the coefficient, while the
+ * coefficient entry contains the coefficient value.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream -Double pointer to current byte in bit stream buffer
+ * pOffset - Pointer to current bit position in the byte pointed to by
+ * *ppBitStream; valid in the range [0,7].
+ * sMaxNumCoeff - Maximum the number of non-zero coefficients in current
+ * block
+ * sVLCSelect - VLC table selector, obtained from the number of non-zero
+ * coefficients contained in the above and left 4x4 blocks. It is
+ * equivalent to the variable nC described in H.264 standard table
+ * 9 5, except its value can t be less than zero.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after each block is decoded.
+ * Buffer position (*ppPosCoefBuf) is updated upon return, unless
+ * there are only zero coefficients in the currently decoded block.
+ * In this case the caller is expected to bypass the
+ * transform/dequantization of the empty blocks.
+ * pOffset - *pOffset is updated after each block is decoded
+ * pNumCoeff - Pointer to the number of nonzero coefficients in this block
+ * ppPosCoefBuf - Double pointer to destination residual
+ * coefficient-position pair buffer
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppBitStream or pOffset is NULL.
+ * - ppPosCoefBuf or pNumCoeff is NULL.
+ * - sMaxNumCoeff is not equal to either 15 or 16.
+ * - sVLCSelect is less than 0.
+ *
+ * OMX_Sts_Err - if one of the following is true:
+ * - an illegal code is encountered in the bitstream
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC(
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8**ppPosCoefbuf,
+ OMX_INT sVLCSelect,
+ OMX_INT sMaxNumCoeff
+ )
+{
+ int nTable;
+
+ armRetArgErrIf(ppBitStream==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(pOffset==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(*pOffset<0 , OMX_Sts_BadArgErr);
+ armRetArgErrIf(*pOffset>7 , OMX_Sts_BadArgErr);
+ armRetArgErrIf(pNumCoeff==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(ppPosCoefbuf==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppPosCoefbuf==NULL , OMX_Sts_BadArgErr);
+ armRetArgErrIf(sVLCSelect<0 , OMX_Sts_BadArgErr);
+ armRetArgErrIf(sMaxNumCoeff<15 , OMX_Sts_BadArgErr);
+ armRetArgErrIf(sMaxNumCoeff>16 , OMX_Sts_BadArgErr);
+
+ /* Find VLC table number */
+ if (sVLCSelect<2)
+ {
+ nTable = 0;
+ }
+ else if (sVLCSelect<4)
+ {
+ nTable = 1;
+ }
+ else if (sVLCSelect<8)
+ {
+ nTable = 2;
+ }
+ else /* sVLCSelect >= 8 */
+ {
+ nTable = 3;
+ }
+
+ return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+ ppPosCoefbuf, nTable, sMaxNumCoeff);
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c
new file mode 100644
index 0000000..ed5a158
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd.c
@@ -0,0 +1,145 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DequantTransformResidualFromPairAndAdd.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 inverse quantize and transform module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Dequantize Luma AC block
+ */
+
+static void DequantLumaAC4x4(
+ OMX_S16* pSrcDst,
+ OMX_INT QP
+)
+{
+ const OMX_U8 *pVRow = &armVCM4P10_VMatrix[QP%6][0];
+ int Shift = QP / 6;
+ int i;
+ OMX_S32 Value;
+
+ for (i=0; i<16; i++)
+ {
+
+ Value = (pSrcDst[i] * pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift;
+ pSrcDst[i] = (OMX_S16)Value;
+ }
+}
+
+/**
+ * Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd (6.3.4.2.3)
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantization and integer inverse transformation for 4x4 block of
+ * residuals with previous intra prediction or motion compensation data, and
+ * update the pair buffer pointer to next non-empty block. If pDC == NULL,
+ * there re 16 non-zero AC coefficients at most in the packed buffer starting
+ * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC
+ * coefficients at most in the packet buffer starting from 4x4 block position
+ * 1.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte
+ * boundary
+ * predStep - Predicted frame step size in bytes; must be a multiple of 4
+ * dstStep - Destination frame step in bytes; must be a multiple of 4
+ * pDC - Pointer to the DC coefficient of this block, NULL if it doesn't
+ * exist
+ * QP - QP Quantization parameter. It should be QpC in chroma 4x4 block
+ * decoding, otherwise it should be QpY.
+ * AC - Flag indicating if at least one non-zero AC coefficient exists
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the reconstructed 4x4 block data; must be aligned on a
+ * 4-byte boundary
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pPred or pDst is NULL.
+ * - pPred or pDst is not 4-byte aligned.
+ * - predStep or dstStep is not a multiple of 4.
+ * - AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL.
+ * - AC ==0 && pDC ==NULL.
+ *
+ */
+
+OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd(
+ const OMX_U8 **ppSrc,
+ const OMX_U8 *pPred,
+ const OMX_S16 *pDC,
+ OMX_U8 *pDst,
+ OMX_INT predStep,
+ OMX_INT dstStep,
+ OMX_INT QP,
+ OMX_INT AC
+)
+{
+ OMX_S16 pBuffer[16+4];
+ OMX_S16 *pDelta;
+ int i,x,y;
+
+ armRetArgErrIf(pPred == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pPred),OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predStep & 3, OMX_Sts_BadArgErr);
+ armRetArgErrIf(dstStep & 3, OMX_Sts_BadArgErr);
+ armRetArgErrIf(AC!=0 && (QP<0), OMX_Sts_BadArgErr);
+ armRetArgErrIf(AC!=0 && (QP>51), OMX_Sts_BadArgErr);
+ armRetArgErrIf(AC!=0 && ppSrc==NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(AC!=0 && *ppSrc==NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(AC==0 && pDC==NULL, OMX_Sts_BadArgErr);
+
+ pDelta = armAlignTo8Bytes(pBuffer);
+
+ for (i=0; i<16; i++)
+ {
+ pDelta[i] = 0;
+ }
+ if (AC)
+ {
+ armVCM4P10_UnpackBlock4x4(ppSrc, pDelta);
+ DequantLumaAC4x4(pDelta, QP);
+ }
+ if (pDC)
+ {
+ pDelta[0] = pDC[0];
+ }
+ armVCM4P10_TransformResidual4x4(pDelta,pDelta);
+
+ for (y=0; y<4; y++)
+ {
+ for (x=0; x<4; x++)
+ {
+ pDst[y*dstStep+x] = (OMX_U8)armClip(0,255,pPred[y*predStep+x] + pDelta[4*y+x]);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c
new file mode 100644
index 0000000..75edee2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c
@@ -0,0 +1,130 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 chroma deblock module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingChroma_HorEdge_I (6.3.3.3.4)
+ *
+ * Description:
+ * Performs in-place deblock filtering on the horizontal edges of the chroma
+ * macroblock (8x8).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - array step; must be a multiple of 8.
+ * pAlpha - array of size 2 containing alpha thresholds; the first element
+ * contains the threshold for the external horizontal edge, and the
+ * second element contains the threshold for internal horizontal
+ * edge. Per [ISO14496-10] alpha values must be in the range
+ * [0,255].
+ * pBeta - array of size 2 containing beta thresholds; the first element
+ * contains the threshold for the external horizontal edge, and the
+ * second element contains the threshold for the internal
+ * horizontal edge. Per [ISO14496-10] beta values must be in the
+ * range [0,18].
+ * pThresholds - array of size 8 containing thresholds, TC0, for the top
+ * horizontal edge of each 2x4 chroma block, arranged in horizontal
+ * block order; must be aligned on a 4-byte boundary. Per
+ * [ISO14496-10] values must be in the range [0,25].
+ * pBS - array of size 16 containing BS parameters for each 2x2 chroma
+ * block, arranged in horizontal block order; valid in the range
+ * [0,4] with the following restrictions: i) pBS[i]== 4 may occur
+ * only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4.
+ * Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr, if one of the following cases occurs:
+ * - any of the following pointers is NULL:
+ * pSrcDst, pAlpha, pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8.
+ * - pThresholds is not 4-byte aligned.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..7] is outside
+ * of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - pBS is not 4-byte aligned.
+ *
+ */
+
+OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+ )
+{
+ int I, X, Y, Internal=0;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+
+ for (Y=0; Y<8; Y+=4, Internal=1)
+ {
+ for (X=0; X<8; X++)
+ {
+ I = (X>>1)+4*(Y>>1);
+
+ armRetArgErrIf(pBS[I] > 4, OMX_Sts_BadArgErr)
+
+ armRetArgErrIf( (I > 3) && (pBS[I] == 4),
+ OMX_Sts_BadArgErr)
+
+ armRetArgErrIf( (I < 4) &&
+ ( (pBS[I] == 4) && (pBS[I^1] != 4) ),
+ OMX_Sts_BadArgErr)
+
+
+ /* Filter horizontal edge with q0 at (X,Y) */
+ armVCM4P10_DeBlockPixel(
+ pSrcDst + Y*srcdstStep + X,
+ srcdstStep,
+ pThresholds[(X>>1)+4*(Y>>2)],
+ pAlpha[Internal],
+ pBeta[Internal],
+ pBS[I],
+ 1);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c
new file mode 100644
index 0000000..10b2592
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c
@@ -0,0 +1,131 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 deblocking module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingChroma_VerEdge_I (6.3.3.3.3)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the chroma
+ * macroblock (8x8).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - Step of the arrays; must be a multiple of 8.
+ * pAlpha - Array of size 2 of alpha thresholds (the first item is alpha
+ * threshold for external vertical edge, and the second item is for
+ * internal vertical edge); per [ISO14496-10] alpha values must be
+ * in the range [0,255].
+ * pBeta - Array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] beta values
+ * must be in the range [0,18].
+ * pThresholds - Array of size 8 containing thresholds, TC0, for the left
+ * vertical edge of each 4x2 chroma block, arranged in vertical
+ * block order; must be aligned on a 4-byte boundary. Per
+ * [ISO14496-10] values must be in the range [0,25].
+ * pBS - Array of size 16 of BS parameters (values for each 2x2 chroma
+ * block, arranged in vertical block order). This parameter is the
+ * same as the pBSparameter passed into FilterDeblockLuma_VerEdge;
+ * valid in the range [0,4] with the following restrictions: i)
+ * pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and
+ * only if pBS[i^3]== 4. Must be 4 byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8.
+ * - pThresholds is not 4-byte aligned.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..7] is outside
+ * of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - pBS is not 4-byte aligned.
+ *
+ */
+
+OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+ )
+{
+ int I, X, Y, Internal=0;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta[0] > 18, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta[1] > 18, OMX_Sts_BadArgErr);
+
+ for (X=0; X<8; X+=4, Internal=1)
+ {
+ for (Y=0; Y<8; Y++)
+ {
+ I = (Y>>1)+4*(X>>1);
+
+ armRetArgErrIf(pBS[I] > 4, OMX_Sts_BadArgErr);
+
+ armRetArgErrIf( (I > 3) && (pBS[I] == 4),
+ OMX_Sts_BadArgErr);
+
+ armRetArgErrIf( ( (pBS[I] == 4) && (pBS[I^3] != 4) ),
+ OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds[Y] > 25, OMX_Sts_BadArgErr);
+
+
+ /* Filter vertical edge with q0 at (X,Y) */
+ armVCM4P10_DeBlockPixel(
+ pSrcDst + Y*srcdstStep + X,
+ 1,
+ pThresholds[(Y>>1)+4*(X>>2)],
+ pAlpha[Internal],
+ pBeta[Internal],
+ pBS[I],
+ 1);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c
new file mode 100644
index 0000000..30a37da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c
@@ -0,0 +1,125 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 luma deblock module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingLuma_HorEdge_I (6.3.3.3.2)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four horizontal edges of the luma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep -s tep of the arrays; must be a multiple of 16.
+ * pAlpha - array of size 2 of alpha thresholds (the first item is the alpha
+ * threshold for the external vertical edge, and the second item is
+ * for the internal horizontal edge); per [ISO14496-10] alpha
+ * values must be in the range [0,255].
+ * pBeta - array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external horizontal edge, and the second item
+ * is for the internal horizontal edge). Per [ISO14496-10] beta
+ * values must be in the range [0,18].
+ * pThresholds - array of size 16 containing thresholds, TC0, for the top
+ * horizontal edge of each 4x4 block, arranged in horizontal block
+ * order; must be aligned on a 4-byte boundary. Per [ISO14496 10]
+ * values must be in the range [0,25].
+ * pBS - array of size 16 of BS parameters (arranged in horizontal block
+ * order); valid in the range [0,4] with the following
+ * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii)
+ * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr, if one of the following cases occurs:
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * - pSrcDst is not 16-byte aligned.
+ * - srcdstStep is not a multiple of 16.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..15] is
+ * outside of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ *
+ */
+
+OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+ )
+{
+ int I, X, Y, Internal=0;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+
+ for (Y=0; Y<16; Y+=4, Internal=1)
+ {
+ for (X=0; X<16; X++)
+ {
+ I = (X>>2)+4*(Y>>2);
+
+ armRetArgErrIf(pBS[I] > 4, OMX_Sts_BadArgErr)
+
+ armRetArgErrIf( (I > 3) && (pBS[I] == 4),
+ OMX_Sts_BadArgErr)
+
+ armRetArgErrIf( (I < 4) &&
+ ( (pBS[I] == 4) && (pBS[I^1] != 4) ),
+ OMX_Sts_BadArgErr)
+
+ /* Filter horizontal edge with q0 at (X,Y) */
+ armVCM4P10_DeBlockPixel(
+ pSrcDst + Y*srcdstStep + X,
+ srcdstStep,
+ pThresholds[I],
+ pAlpha[Internal],
+ pBeta[Internal],
+ pBS[I],
+ 0);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c
new file mode 100644
index 0000000..8733427
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c
@@ -0,0 +1,128 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_FilterDeblockingLuma_VerEdge_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 luma deblock module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingLuma_VerEdge_I (6.3.3.3.1)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the luma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep -Step of the arrays; must be a multiple of 16.
+ * pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] alpha values
+ * must be in the range [0,255].
+ * pBeta -Array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] beta values
+ * must be in the range [0,18].
+ * pThresholds -Array of size 16 of Thresholds (TC0) (values for the left
+ * edge of each 4x4 block, arranged in vertical block order); must
+ * be aligned on a 4-byte boundary.. Per [ISO14496-10] values must
+ * be in the range [0,25].
+ * pBS -Array of size 16 of BS parameters (arranged in vertical block
+ * order); valid in the range [0,4] with the following
+ * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii)
+ * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS
+ * is NULL.
+ * Either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * pSrcDst is not 16-byte aligned.
+ * srcdstStep is not a multiple of 16.
+ * pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * One or more entries in the table pThresholds[0..15]is outside of the
+ * range [0,25].
+ * pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 &&
+ * pBS[i^3]!=4) for 0<=i<=3.
+ *
+ */
+
+OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+ )
+{
+ int X, Y, I, Internal=0;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot16ByteAligned(pSrcDst),OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 15, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta[0] > 18, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta[1] > 18, OMX_Sts_BadArgErr);
+
+
+ for (X=0; X<16; X+=4, Internal=1)
+ {
+ for (Y=0; Y<16; Y++)
+ {
+ I = (Y>>2)+4*(X>>2);
+
+ armRetArgErrIf(pBS[Y] > 4, OMX_Sts_BadArgErr);
+
+ armRetArgErrIf((pBS[Y] == 4) && (Y > 3),
+ OMX_Sts_BadArgErr);
+
+ armRetArgErrIf(( (pBS[Y] == 4) && (pBS[Y^3] != 4) ),
+ OMX_Sts_BadArgErr);
+
+ armRetArgErrIf(pThresholds[Y] > 25, OMX_Sts_BadArgErr);
+
+ /* Filter vertical edge with q0 at (X,Y) */
+ armVCM4P10_DeBlockPixel(
+ pSrcDst + Y*srcdstStep + X,
+ 1,
+ pThresholds[I],
+ pAlpha[Internal],
+ pBeta[Internal],
+ pBS[I],
+ 0);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c
new file mode 100644
index 0000000..81c59d6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_GetVLCInfo.c
@@ -0,0 +1,192 @@
+/**
+ *
+ * File Name: omxVCM4P10_GetVLCInfo.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ *
+ * This function extracts run-length encoding (RLE) information
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_GetVLCInfo (6.3.5.9.1)
+ *
+ * Description:
+ * This function extracts run-length encoding (RLE) information from the
+ * coefficient matrix. The results are returned in an OMXVCM4P10VLCInfo
+ * structure.
+ *
+ * Input Arguments:
+ *
+ * pSrcCoeff - pointer to the transform coefficient matrix. 8-byte
+ * alignment required.
+ * pScanMatrix - pointer to the scan order definition matrix. For a luma
+ * block the scan matrix should follow [ISO14496-10] section 8.5.4,
+ * and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13,
+ * 10, 7, 11, 14, 15. For a chroma block, the scan matrix should
+ * contain the values 0, 1, 2, 3.
+ * bAC - indicates presence of a DC coefficient; 0 = DC coefficient
+ * present, 1= DC coefficient absent.
+ * MaxNumCoef - specifies the number of coefficients contained in the
+ * transform coefficient matrix, pSrcCoeff. The value should be 16
+ * for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The
+ * value should be 4 for blocks of type CHROMADC.
+ *
+ * Output Arguments:
+ *
+ * pDstVLCInfo - pointer to structure that stores information for
+ * run-length coding.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcCoeff, pScanMatrix, pDstVLCInfo
+ * - pSrcCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_GetVLCInfo (
+ const OMX_S16* pSrcCoeff,
+ const OMX_U8* pScanMatrix,
+ OMX_U8 bAC,
+ OMX_U32 MaxNumCoef,
+ OMXVCM4P10VLCInfo* pDstVLCInfo
+)
+{
+ OMX_INT i, MinIndex;
+ OMX_S32 Value;
+ OMX_U32 Mask = 4, RunBefore;
+ OMX_S16 *pLevel;
+ OMX_U8 *pRun;
+ OMX_S16 Buf [16];
+
+ /* check for argument error */
+ armRetArgErrIf(pSrcCoeff == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot8ByteAligned(pSrcCoeff), OMX_Sts_BadArgErr)
+ armRetArgErrIf(pScanMatrix == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstVLCInfo == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(bAC > 1, OMX_Sts_BadArgErr)
+ armRetArgErrIf(MaxNumCoef > 16, OMX_Sts_BadArgErr)
+
+ /* Initialize RLE Info structure */
+ pDstVLCInfo->uTrailing_Ones = 0;
+ pDstVLCInfo->uTrailing_One_Signs = 0;
+ pDstVLCInfo->uNumCoeffs = 0;
+ pDstVLCInfo->uTotalZeros = 0;
+
+ for (i = 0; i < 16; i++)
+ {
+ pDstVLCInfo->iLevels [i] = 0;
+ pDstVLCInfo->uRuns [i] = 0;
+ }
+
+ MinIndex = (bAC == 0 && MaxNumCoef == 15) ? 1 : 0;
+ for (i = MinIndex; i < (MaxNumCoef + MinIndex); i++)
+ {
+ /* Scan */
+ Buf [i - MinIndex] = pSrcCoeff [pScanMatrix [i]];
+ }
+
+ /* skip zeros at the end */
+ i = MaxNumCoef - 1;
+ while (!Buf [i] && i >= 0)
+ {
+ i--;
+ }
+
+ if (i < 0)
+ {
+ return OMX_Sts_NoErr;
+ }
+
+ /* Fill RLE Info structure */
+ pLevel = pDstVLCInfo->iLevels;
+ pRun = pDstVLCInfo->uRuns;
+ RunBefore = 0;
+
+ /* Handle first non zero separate */
+ pDstVLCInfo->uNumCoeffs++;
+ Value = Buf [i];
+ if (Value == 1 || Value == -1)
+ {
+ pDstVLCInfo->uTrailing_Ones++;
+
+ pDstVLCInfo->uTrailing_One_Signs |=
+ Value == -1 ? Mask : 0;
+ Mask >>= 1;
+ }
+ else
+ {
+ Value -= (Value > 0 ? 1 : -1);
+ *pLevel++ = Value;
+ Mask = 0;
+ }
+
+ /* Remaining non zero */
+ while (--i >= 0)
+ {
+ Value = Buf [i];
+ if (Value)
+ {
+ pDstVLCInfo->uNumCoeffs++;
+
+ /* Mask becomes zero after entering */
+ if (Mask &&
+ (Value == 1 ||
+ Value == -1))
+ {
+ pDstVLCInfo->uTrailing_Ones++;
+
+ pDstVLCInfo->uTrailing_One_Signs |=
+ Value == -1 ? Mask : 0;
+ Mask >>= 1;
+ *pRun++ = RunBefore;
+ RunBefore = 0;
+ }
+ else
+ {
+ /* If 3 trailing ones are not completed */
+ if (Mask)
+ {
+ Mask = 0;
+ Value -= (Value > 0 ? 1 : -1);
+ }
+ *pLevel++ = Value;
+ *pRun++ = RunBefore;
+ RunBefore = 0;
+ }
+ }
+ else
+ {
+ pDstVLCInfo->uTotalZeros++;
+ RunBefore++;
+ }
+ }
+
+ /* Update last run */
+ if (RunBefore)
+ {
+ *pRun++ = RunBefore;
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
new file mode 100644
index 0000000..8824de2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
@@ -0,0 +1,99 @@
+/**
+ *
+ * File Name: omxVCM4P10_InterpolateChroma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate 1/8 Pixel interpolation for Chroma Block
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P10_InterpolateChroma (6.3.3.2.2)
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB.
+ *
+ * Input Arguments:
+ *
+ * pSrc -Pointer to the source reference frame buffer
+ * srcStep -Reference frame step in bytes
+ * dstStep -Destination frame step in bytes; must be a multiple of
+ * roi.width.
+ * dx -Fractional part of horizontal motion vector component in 1/8 pixel
+ * unit; valid in the range [0,7]
+ * dy -Fractional part of vertical motion vector component in 1/8 pixel
+ * unit; valid in the range [0,7]
+ * roi -Dimension of the interpolation region; the parameters roi.width and
+ * roi.height must be equal to either 2, 4, or 8.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination frame buffer if roi.width==2, 2-byte
+ * alignment required if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < 8.
+ * dx or dy is out of range [0-7].
+ * roi.width or roi.height is out of range {2,4,8}.
+ * roi.width is equal to 2, but pDst is not 2-byte aligned.
+ * roi.width is equal to 4, but pDst is not 4-byte aligned.
+ * roi.width is equal to 8, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateChroma (
+ const OMX_U8* pSrc,
+ OMX_S32 srcStep,
+ OMX_U8* pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+ )
+{
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(srcStep < 8, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dstStep < 8, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dx < 0, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dx > 7, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dy < 0, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dy > 7, OMX_Sts_BadArgErr)
+ armRetArgErrIf((roi.width != 2) && (roi.width != 4) && (roi.width != 8), OMX_Sts_BadArgErr)
+ armRetArgErrIf((roi.height != 2) && (roi.height != 4) && (roi.height != 8), OMX_Sts_BadArgErr)
+ armRetArgErrIf((roi.width == 2) && armNot2ByteAligned(pDst), OMX_Sts_BadArgErr)
+ armRetArgErrIf((roi.width == 4) && armNot4ByteAligned(pDst), OMX_Sts_BadArgErr)
+ armRetArgErrIf((roi.width == 8) && armNot8ByteAligned(pDst), OMX_Sts_BadArgErr)
+ armRetArgErrIf(srcStep & 7, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dstStep & 7, OMX_Sts_BadArgErr)
+
+ return armVCM4P10_Interpolate_Chroma
+ ((OMX_U8*)pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy);
+}
+
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c
new file mode 100644
index 0000000..ef0befa
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfHor_Luma.c
@@ -0,0 +1,124 @@
+/**
+ *
+ * File Name: omxVCM4P10_InterpolateHalfHor_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate Half horizontal luma interpolation
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_InterpolateHalfHor_Luma (6.3.5.5.1)
+ *
+ * Description:
+ * This function performs interpolation for two horizontal 1/2-pel positions
+ * (-1/2,0) and (1/2, 0) - around a full-pel position.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the top-left corner of the block used to interpolate in
+ * the reconstruction frame plane.
+ * iSrcStep - Step of the source buffer.
+ * iDstStep - Step of the destination(interpolation) buffer; must be a
+ * multiple of iWidth.
+ * iWidth - Width of the current block; must be equal to either 4, 8, or 16
+ * iHeight - Height of the current block; must be equal to 4, 8, or 16
+ *
+ * Output Arguments:
+ *
+ * pDstLeft -Pointer to the interpolation buffer of the left -pel position
+ * (-1/2, 0)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ * pDstRight -Pointer to the interpolation buffer of the right -pel
+ * position (1/2, 0)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrc, pDstLeft, or pDstRight
+ * - iWidth or iHeight have values other than 4, 8, or 16
+ * - iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary
+ * - iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary
+ * - iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary
+ * - any alignment restrictions are violated
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateHalfHor_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDstLeft,
+ OMX_U8* pDstRight,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+)
+{
+ OMXResult RetValue;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstLeft == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstRight == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iWidth == 4) &&
+ armNot4ByteAligned(pDstLeft) &&
+ armNot4ByteAligned(pDstRight), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iWidth == 8) &&
+ armNot8ByteAligned(pDstLeft) &&
+ armNot8ByteAligned(pDstRight), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iWidth == 16) &&
+ armNot16ByteAligned(pDstLeft) &&
+ armNot16ByteAligned(pDstRight), OMX_Sts_BadArgErr)
+
+ armRetArgErrIf((iHeight != 16) && (iHeight != 8)&& (iHeight != 4), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iWidth != 16) && (iWidth != 8)&& (iWidth != 4), OMX_Sts_BadArgErr)
+
+ RetValue = armVCM4P10_InterpolateHalfHor_Luma (
+ pSrc - 1,
+ iSrcStep,
+ pDstLeft,
+ iDstStep,
+ iWidth,
+ iHeight);
+
+ if (RetValue != OMX_Sts_NoErr)
+ {
+ return RetValue;
+ }
+
+ RetValue = armVCM4P10_InterpolateHalfHor_Luma (
+ pSrc,
+ iSrcStep,
+ pDstRight,
+ iDstStep,
+ iWidth,
+ iHeight);
+
+ return RetValue;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c
new file mode 100644
index 0000000..3560ff8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateHalfVer_Luma.c
@@ -0,0 +1,123 @@
+/**
+ *
+ * File Name: omxVCM4P10_InterpolateHalfVer_Luma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD for 4x4 blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+
+/**
+ * Function: omxVCM4P10_InterpolateHalfVer_Luma (6.3.5.5.2)
+ *
+ * Description:
+ * This function performs interpolation for two vertical 1/2-pel positions -
+ * (0, -1/2) and (0, 1/2) - around a full-pel position.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to top-left corner of block used to interpolate in the
+ * reconstructed frame plane
+ * iSrcStep - Step of the source buffer.
+ * iDstStep - Step of the destination (interpolation) buffer; must be a
+ * multiple of iWidth.
+ * iWidth - Width of the current block; must be equal to either 4, 8, or 16
+ * iHeight - Height of the current block; must be equal to either 4, 8, or 16
+ *
+ * Output Arguments:
+ *
+ * pDstUp -Pointer to the interpolation buffer of the -pel position above
+ * the current full-pel position (0, -1/2)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ * pDstDown -Pointer to the interpolation buffer of the -pel position below
+ * the current full-pel position (0, 1/2)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrc, pDstUp, or pDstDown
+ * - iWidth or iHeight have values other than 4, 8, or 16
+ * - iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary
+ * - iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary
+ * - iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary
+ *
+ */
+ OMXResult omxVCM4P10_InterpolateHalfVer_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDstUp,
+ OMX_U8* pDstDown,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+)
+{
+ OMXResult RetValue;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstUp == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstDown == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iWidth == 4) &&
+ armNot4ByteAligned(pDstUp) &&
+ armNot4ByteAligned(pDstDown), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iWidth == 8) &&
+ armNot8ByteAligned(pDstUp) &&
+ armNot8ByteAligned(pDstDown), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iWidth == 16) &&
+ armNot16ByteAligned(pDstUp) &&
+ armNot16ByteAligned(pDstDown), OMX_Sts_BadArgErr)
+
+ armRetArgErrIf((iHeight != 16) && (iHeight != 8)&& (iHeight != 4), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iWidth != 16) && (iWidth != 8)&& (iWidth != 4), OMX_Sts_BadArgErr)
+
+ RetValue = armVCM4P10_InterpolateHalfVer_Luma(
+ pSrc - iSrcStep,
+ iSrcStep,
+ pDstUp,
+ iDstStep,
+ iWidth,
+ iHeight);
+
+ if (RetValue != OMX_Sts_NoErr)
+ {
+ return RetValue;
+ }
+
+ RetValue = armVCM4P10_InterpolateHalfVer_Luma(
+ pSrc,
+ iSrcStep,
+ pDstDown,
+ iDstStep,
+ iWidth,
+ iHeight);
+
+ return RetValue;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c
new file mode 100644
index 0000000..d233735
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InterpolateLuma.c
@@ -0,0 +1,99 @@
+/**
+ *
+ * File Name: omxVCM4P10_InterpolateLuma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate Performs quarter-pixel interpolation
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_InterpolateLuma (6.3.3.2.1)
+ *
+ * Description:
+ * Performs quarter-pixel interpolation for inter luma MB. It is assumed that
+ * the frame is already padded when calling this function.
+ *
+ * Input Arguments:
+ *
+ * pSrc -Pointer to the source reference frame buffer
+ * srcStep -reference frame step, in bytes; must be a multiple of roi.width
+ * dstStep -destination frame step, in bytes; must be a multiple of
+ * roi.width
+ * dx -Fractional part of horizontal motion vector component in 1/4 pixel
+ * unit; valid in the range [0,3]
+ * dy -Fractional part of vertical motion vector y component in 1/4 pixel
+ * unit; valid in the range [0,3]
+ * roi -Dimension of the interpolation region; the parameters roi.width and
+ * roi.height must be equal to either 4, 8, or 16.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination frame buffer if roi.width==4, 4-byte
+ * alignment required if roi.width==8, 8-byte alignment required
+ * if roi.width==16, 16-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < roi.width.
+ * dx or dy is out of range [0,3].
+ * roi.width or roi.height is out of range {4, 8, 16}.
+ * roi.width is equal to 4, but pDst is not 4 byte aligned.
+ * roi.width is equal to 8 or 16, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateLuma (
+ const OMX_U8* pSrc,
+ OMX_S32 srcStep,
+ OMX_U8* pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+ )
+{
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(srcStep < roi.width, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dstStep < roi.width, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dx < 0, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dx > 3, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dy < 0, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dy > 3, OMX_Sts_BadArgErr)
+ armRetArgErrIf((roi.width != 4) && (roi.width != 8) && (roi.width != 16), OMX_Sts_BadArgErr)
+ armRetArgErrIf((roi.height != 4) && (roi.height != 8) && (roi.height != 16), OMX_Sts_BadArgErr)
+ armRetArgErrIf((roi.width == 4) && armNot4ByteAligned(pDst), OMX_Sts_BadArgErr)
+ armRetArgErrIf((roi.width == 8) && armNot8ByteAligned(pDst), OMX_Sts_BadArgErr)
+ armRetArgErrIf((roi.width == 16) && armNot16ByteAligned(pDst), OMX_Sts_BadArgErr)
+ armRetArgErrIf(srcStep & 7, OMX_Sts_BadArgErr)
+ armRetArgErrIf(dstStep & 7, OMX_Sts_BadArgErr)
+
+ return armVCM4P10_Interpolate_Luma
+ (pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy);
+
+}
+
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c
new file mode 100644
index 0000000..92ba031
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_ChromaDC.c
@@ -0,0 +1,102 @@
+/**
+ *
+ * File Name: omxVCM4P10_InvTransformDequant_ChromaDC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate 4x4 hadamard transform of chroma DC
+ * coefficients and quantization
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_InvTransformDequant_ChromaDC (6.3.5.6.4)
+ *
+ * Description:
+ * This function performs inverse 2x2 Hadamard transform and then dequantizes
+ * the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and
+ * quantized coefficients. 8 byte alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to inverse-transformed and dequantized coefficients.
+ * 8-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrc
+ * - pSrc or pDst is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_ChromaDC(
+ const OMX_S16* pSrc,
+ OMX_S16* pDst,
+ OMX_U32 iQP
+)
+{
+ OMX_INT i, j;
+ OMX_S32 m[2][2];
+ OMX_S32 QPer, V00, Value;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot8ByteAligned(pSrc), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot8ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr)
+
+ /* Inv Hadamard Transform for 2x2 block */
+ m[0][0] = pSrc[0] + pSrc[1] + pSrc[2] + pSrc[3];
+ m[0][1] = pSrc[0] - pSrc[1] + pSrc[2] - pSrc[3];
+ m[1][0] = pSrc[0] + pSrc[1] - pSrc[2] - pSrc[3];
+ m[1][1] = pSrc[0] - pSrc[1] - pSrc[2] + pSrc[3];
+
+ /* Quantization */
+ /* Scaling */
+ QPer = iQP / 6;
+ V00 = armVCM4P10_VMatrix [iQP % 6][0];
+
+ for (j = 0; j < 2; j++)
+ {
+ for (i = 0; i < 2; i++)
+ {
+ if (QPer < 1)
+ {
+ Value = (m[j][i] * V00) >> 1;
+ }
+ else
+ {
+ Value = (m[j][i] * V00) << (QPer - 1);
+ }
+
+ pDst[j * 2 + i] = (OMX_S16) Value;
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c
new file mode 100644
index 0000000..a3b1200
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformDequant_LumaDC.c
@@ -0,0 +1,128 @@
+/**
+ *
+ * File Name: omxVCM4P10_InvTransformDequant_LumaDC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate 4x4 hadamard transform of luma DC coefficients
+ * and quantization
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_InvTransformDequant_LumaDC (6.3.5.6.3)
+ *
+ * Description:
+ * This function performs inverse 4x4 Hadamard transform and then dequantizes
+ * the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and
+ * quantized coefficients. 16 byte alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to inverse-transformed and dequantized coefficients.
+ * 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrc
+ * - pSrc or pDst is not aligned on a 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_LumaDC(
+ const OMX_S16* pSrc,
+ OMX_S16* pDst,
+ OMX_U32 iQP
+)
+{
+ OMX_INT i, j;
+ OMX_S32 m1[4][4], m2[4][4], Value;
+ OMX_S32 QPer, V;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot16ByteAligned(pSrc), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot16ByteAligned(pDst), OMX_Sts_BadArgErr)
+
+ /* Inv Hadamard Transform for DC Luma 4x4 block */
+ /* Horizontal */
+ for (i = 0; i < 4; i++)
+ {
+ j = i * 4;
+
+ m1[i][0] = pSrc[j + 0] + pSrc[j + 2]; /* a+c */
+ m1[i][1] = pSrc[j + 1] + pSrc[j + 3]; /* b+d */
+ m1[i][2] = pSrc[j + 0] - pSrc[j + 2]; /* a-c */
+ m1[i][3] = pSrc[j + 1] - pSrc[j + 3]; /* b-d */
+
+ m2[i][0] = m1[i][0] + m1[i][1]; /* a+b+c+d */
+ m2[i][1] = m1[i][2] + m1[i][3]; /* a+b-c-d */
+ m2[i][2] = m1[i][2] - m1[i][3]; /* a-b-c+d */
+ m2[i][3] = m1[i][0] - m1[i][1]; /* a-b+c-d */
+
+ }
+
+ /* Vertical */
+ for (i = 0; i < 4; i++)
+ {
+ m1[0][i] = m2[0][i] + m2[2][i];
+ m1[1][i] = m2[1][i] + m2[3][i];
+ m1[2][i] = m2[0][i] - m2[2][i];
+ m1[3][i] = m2[1][i] - m2[3][i];
+
+ m2[0][i] = m1[0][i] + m1[1][i];
+ m2[1][i] = m1[2][i] + m1[3][i];
+ m2[2][i] = m1[2][i] - m1[3][i];
+ m2[3][i] = m1[0][i] - m1[1][i];
+ }
+
+
+ /* Scaling */
+ QPer = iQP / 6;
+ V = armVCM4P10_VMatrix [iQP % 6][0];
+
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ if (QPer < 2)
+ {
+ Value = (m2[j][i] * V + (1 << (1 - QPer))) >> (2 - QPer);
+ }
+ else
+ {
+ Value = m2[j][i] * V * (1 << (QPer - 2));
+ }
+
+ pDst[j * 4 + i] = (OMX_S16) Value;
+
+ }
+ }
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c
new file mode 100644
index 0000000..3303997
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_InvTransformResidualAndAdd.c
@@ -0,0 +1,124 @@
+/**
+ *
+ * File Name: omxVCM4P10_InvTransformResidualAndAdd.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will inverse integer 4x4 transform
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_InvTransformResidualAndAdd (6.3.5.7.1)
+ *
+ * Description:
+ * This function performs inverse an 4x4 integer transformation to produce
+ * the difference signal and then adds the difference to the prediction to get
+ * the reconstructed signal.
+ *
+ * Input Arguments:
+ *
+ * pSrcPred - Pointer to prediction signal. 4-byte alignment required.
+ * pDequantCoeff - Pointer to the transformed coefficients. 8-byte
+ * alignment required.
+ * iSrcPredStep - Step of the prediction buffer; must be a multiple of 4.
+ * iDstReconStep - Step of the destination reconstruction buffer; must be a
+ * multiple of 4.
+ * bAC - Indicate whether there is AC coefficients in the coefficients
+ * matrix.
+ *
+ * Output Arguments:
+ *
+ * pDstRecon -Pointer to the destination reconstruction buffer. 4-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcPred, pDequantCoeff, pDstRecon
+ * - pSrcPred is not aligned on a 4-byte boundary
+ * - iSrcPredStep or iDstReconStep is not a multiple of 4.
+ * - pDequantCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformResidualAndAdd(
+ const OMX_U8* pSrcPred,
+ const OMX_S16* pDequantCoeff,
+ OMX_U8* pDstRecon,
+ OMX_U32 iSrcPredStep,
+ OMX_U32 iDstReconStep,
+ OMX_U8 bAC
+)
+{
+ OMX_INT i, j;
+ OMX_S16 In[16], Out[16];
+ OMX_S32 Value;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrcPred == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot4ByteAligned(pSrcPred), OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDequantCoeff == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot8ByteAligned(pDequantCoeff), OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstRecon == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot4ByteAligned(pDstRecon), OMX_Sts_BadArgErr)
+ armRetArgErrIf(bAC > 1, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iSrcPredStep == 0 || iSrcPredStep & 3, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iDstReconStep == 0 || iDstReconStep & 3, OMX_Sts_BadArgErr)
+
+ if (bAC)
+ {
+ for (i = 0; i < 16; i++)
+ {
+ In[i] = pDequantCoeff [i];
+ }
+ }
+ else
+ {
+ /* Copy DC */
+ In[0] = pDequantCoeff [0];
+
+ for (i = 1; i < 16; i++)
+ {
+ In[i] = 0;
+ }
+ }
+
+ /* Residual Transform */
+ armVCM4P10_TransformResidual4x4 (Out, In);
+
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ /* Add predition */
+ Value = (OMX_S32) Out [j * 4 + i] + pSrcPred [j * iSrcPredStep + i];
+
+ /* Saturate Value to OMX_U8 */
+ Value = armClip (0, 255, Value);
+
+ pDstRecon[j * iDstReconStep + i] = (OMX_U8) Value;
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c
new file mode 100644
index 0000000..8c3a5c3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEGetBufSize.c
@@ -0,0 +1,70 @@
+/**
+ *
+ * File Name: omxVCM4P10_MEGetBufSize.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Initialization modules for the vendor specific Motion Estimation structure.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_MEGetBufSize (6.3.5.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification
+ * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer
+ * and MotionEstimationMB.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P10MEMode
+ * pMEParams -motion estimation parameters
+ *
+ * Output Arguments:
+ *
+ * pSize - pointer to the number of bytes required for the motion
+ * estimation specification structure
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pMEParams or pSize is NULL.
+ * - an invalid MEMode is specified.
+ *
+ */
+
+OMXResult omxVCM4P10_MEGetBufSize(
+ OMXVCM4P10MEMode MEMode,
+ const OMXVCM4P10MEParams *pMEParams,
+ OMX_U32 *pSize
+ )
+{
+ armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!pSize, OMX_Sts_BadArgErr);
+ armRetArgErrIf((MEMode != OMX_VC_M4P10_FAST_SEARCH) &&
+ (MEMode != OMX_VC_M4P10_FULL_SEARCH), OMX_Sts_BadArgErr);
+ armRetArgErrIf((pMEParams->searchRange16x16 <= 0) ||
+ (pMEParams->searchRange8x8 <= 0) ||
+ (pMEParams->searchRange4x4 <= 0), OMX_Sts_BadArgErr);
+
+ *pSize = (OMX_INT) sizeof(ARMVCM4P10_MESpec);
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEInit.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEInit.c
new file mode 100644
index 0000000..58ecc88
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MEInit.c
@@ -0,0 +1,92 @@
+/**
+ *
+ * File Name: omxVCM4P10_MEInit.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Initialization modules for the vendor specific Motion Estimation structure.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_MEInit (6.3.5.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the
+ * omxVCM4P10 motion estimation functions: BlockMatch_Integer and
+ * MotionEstimationMB. Memory for the specification structure *pMESpec must be
+ * allocated prior to calling the function, and should be aligned on a 4-byte
+ * boundary. The number of bytes required for the specification structure can
+ * be determined using the function omxVCM4P10_MEGetBufSize. Following
+ * initialization by this function, the vendor-specific structure *pMESpec
+ * should contain an implementation-specific representation of all motion
+ * estimation parameters received via the structure pMEParams, for example
+ * searchRange16x16, searchRange8x8, etc.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P10MEMode
+ * pMEParams - motion estimation parameters
+ * pMESpec - pointer to the uninitialized ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pMESpec - pointer to the initialized ME specification structure
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pMEParams or pSize is NULL.
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for one of the search ranges
+ * (e.g., pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.)
+ * - either in isolation or in combination, one or more of the enables or
+ * search ranges in the structure *pMEParams were configured such
+ * that the requested behavior fails to comply with [ISO14496-10].
+ *
+ */
+
+OMXResult omxVCM4P10_MEInit(
+ OMXVCM4P10MEMode MEMode,
+ const OMXVCM4P10MEParams *pMEParams,
+ void *pMESpec
+ )
+{
+ ARMVCM4P10_MESpec *armMESpec = (ARMVCM4P10_MESpec *) pMESpec;
+
+ armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!pMESpec, OMX_Sts_BadArgErr);
+ armRetArgErrIf((MEMode != OMX_VC_M4P10_FAST_SEARCH) &&
+ (MEMode != OMX_VC_M4P10_FULL_SEARCH), OMX_Sts_BadArgErr);
+ armRetArgErrIf((pMEParams->searchRange16x16 <= 0) ||
+ (pMEParams->searchRange8x8 <= 0) ||
+ (pMEParams->searchRange4x4 <= 0), OMX_Sts_BadArgErr);
+
+ armMESpec->MEParams.blockSplitEnable8x8 = pMEParams->blockSplitEnable8x8;
+ armMESpec->MEParams.blockSplitEnable4x4 = pMEParams->blockSplitEnable4x4;
+ armMESpec->MEParams.halfSearchEnable = pMEParams->halfSearchEnable;
+ armMESpec->MEParams.quarterSearchEnable = pMEParams->quarterSearchEnable;
+ armMESpec->MEParams.intraEnable4x4 = pMEParams->intraEnable4x4;
+ armMESpec->MEParams.searchRange16x16 = pMEParams->searchRange16x16;
+ armMESpec->MEParams.searchRange8x8 = pMEParams->searchRange8x8;
+ armMESpec->MEParams.searchRange4x4 = pMEParams->searchRange4x4;
+ armMESpec->MEMode = MEMode;
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c
new file mode 100644
index 0000000..33dbf3f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_MotionEstimationMB.c
@@ -0,0 +1,1892 @@
+/** x
+ *
+ * File Name: omxVCM4P10_MotionEstimationMB.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function perform MB level motion estimation
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+#define ARM_VCM4P10_MAX_FRAMES (15)
+#define ARM_VCM4P10_MAX_4x4_SAD (0xffff)
+#define ARM_VCM4P10_MAX_MODE_VALUE (0xffffffff)
+#define ARM_VCM4P10_MAX_MODES (16)
+#define ARM_VCM4P10_MB_BLOCK_SIZE (16)
+#define ARM_VCM4P10_MEDIAN(a,b,c) (a>b?a>c?b>c?b:c:a:b>c?a>c?a:c:b)
+#define ARM_VCM4P10_SHIFT_QP (12)
+
+#define ARM_VCM4P10_MVPRED_MEDIAN (0)
+#define ARM_VCM4P10_MVPRED_L (1)
+#define ARM_VCM4P10_MVPRED_U (2)
+#define ARM_VCM4P10_MVPRED_UR (3)
+
+#define ARM_VCM4P10_MB_BLOCK_SIZE (16)
+#define ARM_VCM4P10_BLOCK_SIZE (4)
+#define ARM_VCM4P10_MAX_COST (1 << 30)
+#define ARM_VCM4P10_INVALID_BLOCK (-2)
+
+
+/**
+ * Function: armVCM4P10_CalculateBlockSAD
+ *
+ * Description:
+ * Calculate SAD value for the selected MB encoding mode and update
+ * pDstBlockSAD parameter. These SAD values are calculated 4x4 blocks at
+ * a time and in the scan order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcMBInfo -
+ * [in] pSrcCurrBuf -
+ * [in] SrcCurrStep -
+ * [in] pSrcRefBufList-
+ * [in] SrcRefStep -
+ * [in] pSrcRecBuf -
+ * [in] SrcRecStep -
+ * [in] pRefRect -
+ * [in] pCurrPointPos -
+ * [in] Lambda -
+ * [in] pMESpec -
+ * [in] pMBInter -
+ * [in] pMBIntra -
+ * [out] pDstBlockSAD - pointer to 16 element array for SAD corresponding to 4x4 blocks
+ * Return Value:
+ * None
+ *
+ */
+
+static OMXResult armVCM4P10_CalculateBlockSAD(
+ OMXVCM4P10MBInfo *pSrcMBInfo,
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES],
+ OMX_S32 SrcRefStep,
+ const OMX_U8 *pSrcRecBuf,
+ OMX_S32 SrcRecStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCM4P10MBInfoPtr *pMBInter,
+ const OMXVCM4P10MBInfoPtr *pMBIntra,
+ OMX_U16 *pDstBlockSAD)
+{
+ OMX_INT InvalidSAD = 0;
+ OMX_INT i;
+
+ OMX_U8 Buffer [16*16 + 15];
+ OMX_U8 *pTempDstBuf;
+ OMX_S32 TempDstStep;
+ OMX_U8 *pTempRefBuf;
+ OMX_S32 TempRefStep;
+
+ /* Temporary buffer to store the predicted mb coefficients */
+ pTempDstBuf = armAlignTo16Bytes(Buffer);
+ TempDstStep = 16;
+
+ /* Update pDstBlockSAD if MB is a valid type */
+ if (pSrcMBInfo)
+ {
+ OMX_U32 Width=0, Height=0, MaxXPart, MaxYPart,MaxSubXPart,MaxSubYPart;
+
+ /* Depending on type of MB, do prediction and fill temp buffer */
+ switch (pSrcMBInfo->mbType)
+ {
+ case OMX_VC_P_16x16:
+ Width = 16;
+ Height = 16;
+ break;
+ case OMX_VC_P_16x8:
+ Width = 16;
+ Height = 8;
+ break;
+ case OMX_VC_P_8x16:
+ Width = 8;
+ Height = 16;
+ break;
+ case OMX_VC_P_8x8:
+ Width = 8;
+ Height = 8;
+ break;
+ case OMX_VC_INTRA_4x4:
+ {
+ /* Create predicted MB Intra4x4 mode */
+ OMX_S32 PredIntra4x4Mode [5][9];
+ OMX_S32 x, y, Block8x8, Block4x4, BlockX, BlockY;
+ OMX_U8 pSrcYBuff [(16*3)*(16*2)];
+ OMX_U8 *pSrcY;
+ OMX_S32 StepSrcY;
+ OMX_S32 availability;
+
+ for (y = 0; y < 5; y++)
+ {
+ for (x = 0; x < 9; x++)
+ {
+ /*
+ * Initialize with value of ARM_VCM4P10_INVALID_BLOCK, to mean this
+ * 4x4 block is not available
+ */
+ PredIntra4x4Mode [y][x] = ARM_VCM4P10_INVALID_BLOCK;
+ }
+ }
+
+ /* Replace ARM_VCM4P10_INVALID_BLOCK value with available MBs values*/
+ for (x = 0; x < 4; x++)
+ {
+ /* Store values of b0, b1, b2, b3 */
+ if (pMBIntra[1] != NULL)
+ {
+ PredIntra4x4Mode [0][x + 1] =
+ pMBIntra[1]->pIntra4x4PredMode[3*4 + x];
+ }
+
+ /* Store values of d0, d1, d2, d3 */
+ if (pMBIntra[3] != NULL)
+ {
+ PredIntra4x4Mode [0][x + 5] =
+ pMBIntra[3]->pIntra4x4PredMode[3*4 + x];
+ }
+ }
+
+ /* Store values of c3 */
+ if (pMBIntra[2] != NULL)
+ {
+ PredIntra4x4Mode [0][0] = pMBIntra[2]->pIntra4x4PredMode[15];
+ }
+
+ for (y = 0; y < 4; y++)
+ {
+ /* Store values of a0, a1, a2, a3 */
+ if (pMBIntra[0] != NULL)
+ {
+ PredIntra4x4Mode [y + 1][0] =
+ pMBIntra[0]->pIntra4x4PredMode[y*4 + 3];
+ }
+ }
+
+ /*
+ * Update neighbouring Pred mode array which will be used for
+ * prediction of Intra4x4 modes.
+ */
+
+ pSrcY = pSrcYBuff;
+ StepSrcY = 16 * 3;
+ for (y = 0; y < (16 * 2); y++)
+ {
+ for (x = 0; x < (16 * 3); x++)
+ {
+ pSrcY [StepSrcY * y + x] =
+ pSrcRecBuf [SrcRecStep * (y - 16) + x - 16];
+ }
+ }
+
+
+ /* for each 8x8 block */
+ for (Block8x8 = 0; Block8x8 < 4; Block8x8++)
+ {
+ /* for each 4x4 block inside 8x8 block */
+ for (Block4x4 = 0; Block4x4 < 4; Block4x4++)
+ {
+ /* Get block cordinates from 8x8 block index and 4x4 block index */
+ BlockX = ((Block8x8 & 1) << 1) + (Block4x4 & 1);
+ BlockY = ((Block8x8 >> 1) << 1) + (Block4x4 >> 1);
+
+ /* Add offset to point to start of current MB in the array pIntra4x4PredMode */
+ x = BlockX + 1;
+ y = BlockY + 1;
+
+ availability = 0;
+
+ /* Check for availability of LEFT Block */
+ if (PredIntra4x4Mode [y][x - 1] != ARM_VCM4P10_INVALID_BLOCK)
+ {
+ availability |= OMX_VC_LEFT;
+ }
+
+ /* Check for availability of UPPER Block */
+ if (PredIntra4x4Mode [y - 1][x] != ARM_VCM4P10_INVALID_BLOCK)
+ {
+ availability |= OMX_VC_UPPER;
+ }
+
+ /* Check for availability of UPPER LEFT Block */
+ if (PredIntra4x4Mode [y - 1][x - 1] != ARM_VCM4P10_INVALID_BLOCK)
+ {
+ availability |= OMX_VC_UPPER_LEFT;
+ }
+
+ PredIntra4x4Mode [y][x] = pSrcMBInfo->pIntra4x4PredMode[BlockY*4+BlockX];
+ x = BlockX * 4;
+ y = BlockY * 4;
+
+ pSrcY = pSrcYBuff + 16 * StepSrcY + 16 + y * StepSrcY + x;
+
+ omxVCM4P10_PredictIntra_4x4(
+ pSrcY - 1,
+ pSrcY - StepSrcY,
+ pSrcY - StepSrcY - 1,
+ pTempDstBuf + x + y * TempDstStep,
+ StepSrcY,
+ TempDstStep,
+ pSrcMBInfo->pIntra4x4PredMode[BlockY*4+BlockX],
+ availability);
+
+ for (BlockY=0;BlockY<4;BlockY++)
+ {
+ for(BlockX=0;BlockX<4;BlockX++)
+ {
+ pSrcY [BlockY * StepSrcY + BlockX] =
+ (OMX_U8)(*(pTempDstBuf + x + y * TempDstStep + BlockY * TempDstStep + BlockX));
+ }
+ }
+
+ }
+ }
+ break;
+ }
+ case OMX_VC_INTRA_16x16:
+ {
+ OMX_U32 MBPosX = pCurrPointPos->x >> 4;
+ OMX_U32 MBPosY = pCurrPointPos->y >> 4;
+ OMX_U32 availability = 0;
+
+ /* Check for availability of LEFT MB */
+ if ((MBPosX != 0) && (pMBIntra [0] != 0 || pMBInter [0] != 0))
+ {
+ availability |= OMX_VC_LEFT;
+ }
+
+ /* Check for availability of UP MB */
+ if ((MBPosY != 0) && (pMBIntra [1] != 0 || pMBInter [1] != 0))
+ {
+ availability |= OMX_VC_UPPER;
+ }
+
+ /* Check for availability of UP-LEFT MB */
+ if ((MBPosX > 0) && (MBPosY > 0) &&
+ (pMBIntra [2] != 0 || pMBInter [2] != 0))
+ {
+ availability |= OMX_VC_UPPER_LEFT;
+ }
+
+ omxVCM4P10_PredictIntra_16x16(
+ pSrcRecBuf - 1,
+ pSrcRecBuf - SrcRecStep,
+ pSrcRecBuf - SrcRecStep - 1,
+ pTempDstBuf,
+ SrcRecStep,
+ TempDstStep,
+ pSrcMBInfo->Intra16x16PredMode,
+ availability);
+
+ break;
+ }
+
+ case OMX_VC_INTER_SKIP:
+ case OMX_VC_PREF0_8x8:
+ case OMX_VC_INTRA_PCM:
+ default:
+ /* These cases will update pDstBlockSAD with MAX value */
+ InvalidSAD = 1;
+ break;
+ }
+
+ /* INTER MB */
+ if ((pSrcMBInfo->mbType == OMX_VC_P_16x16) ||
+ (pSrcMBInfo->mbType == OMX_VC_P_8x16) ||
+ (pSrcMBInfo->mbType == OMX_VC_P_16x8) ||
+ (pSrcMBInfo->mbType == OMX_VC_P_8x8))
+ {
+ const OMX_U8 *pTempSrcBuf;
+ OMX_S32 TempSrcStep;
+ OMX_S32 mvx,mvy;
+ OMX_U32 PartX, PartY, SubPartX, SubPartY;
+
+ TempSrcStep = SrcRefStep;
+
+ MaxXPart = 16/Width;
+ MaxYPart = 16/Height;
+
+
+ for (PartY = 0; PartY < MaxYPart; PartY++)
+ {
+ for (PartX = 0; PartX < MaxXPart; PartX++)
+ {
+
+ pTempSrcBuf = pSrcRefBufList[pSrcMBInfo->pRefL0Idx[PartY * 2 + PartX]];
+
+ if (MaxXPart == 2 && MaxYPart == 2)
+ {
+ switch (pSrcMBInfo->subMBType[PartY*2+PartX])
+ {
+ case OMX_VC_SUB_P_8x8:
+ Width = 8;
+ Height = 8;
+ break;
+ case OMX_VC_SUB_P_8x4:
+ Width = 8;
+ Height = 4;
+ break;
+ case OMX_VC_SUB_P_4x8:
+ Width = 4;
+ Height = 8;
+ break;
+ case OMX_VC_SUB_P_4x4:
+ Width = 4;
+ Height = 4;
+ break;
+ default:
+ /* Default */
+ Width = 4;
+ Height = 4;
+ break;
+ }
+
+ MaxSubXPart = 8/Width;
+ MaxSubYPart = 8/Height;
+
+ for (SubPartY = 0; SubPartY < MaxSubYPart; SubPartY++)
+ {
+ for (SubPartX = 0; SubPartX < MaxSubXPart; SubPartX++)
+ {
+ mvx = pSrcMBInfo->pMV0 [2*PartY + SubPartY][2*PartX + SubPartX].dx;
+ mvy = pSrcMBInfo->pMV0 [2*PartY + SubPartY][2*PartX + SubPartX].dy;
+ armVCM4P10_Interpolate_Luma(
+ pTempSrcBuf + (8*PartX + 4*SubPartX + (mvx/4)) + (8*PartY + 4*SubPartY + (mvy/4)) * TempSrcStep,
+ TempSrcStep,
+ pTempDstBuf + (8*PartX + 4*SubPartX) + (8*PartY + 4*SubPartY) * TempDstStep,
+ TempDstStep,
+ Width,
+ Height,
+ mvx & 3,
+ mvy & 3
+ );
+ }
+ }
+ }
+ else
+ {
+
+ mvx = pSrcMBInfo->pMV0 [2*PartY][2*PartX].dx;
+ mvy = pSrcMBInfo->pMV0 [2*PartY][2*PartX].dy;
+ armVCM4P10_Interpolate_Luma(
+ pTempSrcBuf + (8*PartX + (mvx/4)) + (8*PartY + (mvy/4)) * TempSrcStep,
+ TempSrcStep,
+ pTempDstBuf + (8*PartX) + (8*PartY) * TempDstStep,
+ TempDstStep,
+ Width,
+ Height,
+ mvx & 3,
+ mvy & 3
+ );
+
+ }
+ }
+ }
+ }
+ }
+ else
+ {
+ InvalidSAD = 1;
+ }
+
+ /* Calculate SAD from predicted buffer */
+ if (!InvalidSAD)
+ {
+ OMX_U32 x8x8, y8x8, x4x4, y4x4, Block8x8, Block4x4;
+ OMX_S32 SAD;
+
+ pTempRefBuf = pTempDstBuf;
+ TempRefStep = 16;
+
+ /* SAD for each 4x4 block in scan order */
+ for (Block8x8 = 0; Block8x8 < 4; Block8x8++)
+ {
+ x8x8 = 8*(Block8x8 & 1);
+ y8x8 = 8*(Block8x8 >> 1);
+ for (Block4x4 = 0; Block4x4 < 4; Block4x4++)
+ {
+ x4x4 = 4*(Block4x4 & 1);
+ y4x4 = 4*(Block4x4 >> 1);
+
+ armVCCOMM_SAD(
+ pSrcCurrBuf + (x8x8 + x4x4) + (y8x8 + y4x4) * SrcCurrStep,
+ SrcCurrStep,
+ pTempRefBuf + (x8x8 + x4x4) + (y8x8 + y4x4) * TempRefStep,
+ TempRefStep,
+ &SAD,
+ 4, /* Height */
+ 4); /* Width */
+ *(pDstBlockSAD + 4 * Block8x8 + Block4x4) = (SAD < 0x7fff) ? (OMX_U16) SAD : ARM_VCM4P10_MAX_MODE_VALUE;
+ }
+ }
+ }
+ else
+ {
+ /* Fill SADs with max values and return*/
+ for (i = 0; i < 16; i++)
+ {
+ pDstBlockSAD [i] = ARM_VCM4P10_MAX_4x4_SAD;
+ }
+ }
+ return OMX_Sts_NoErr;
+}
+
+
+
+/**
+ * Function: armVCM4P10_Mode4x4Decision
+ *
+ * Description:
+ * Intra 4x4 Mode decision by calculating cost for all possible modes and
+ * choosing the best mode
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf - Pointer to the start of current Macroblock
+ * [in] SrcCurrStep - Step size of the pointer pSrcCurrBuf
+ * [in/out] pSrcDstMBCurr - Pointer to the OMXVCM4P10MBInfo which will be updated for
+ * field pIntra4x4PredMode of the current block.
+ * [in] Block8x8 - Index 8x8 block in which current 4x4 block belongs
+ * [in] Block4x4 - Index of current 4x4 block
+ * [in/out] pPredIntra4x4SrcY - Pointer to current block location in buffer
+ * with reconstructed values. This will be modified by this
+ * function with best mode predicted values
+ * [in] StepPredIntra4x4SrcY - Step size of the pointer pPredIntra4x4SrcY
+ * [in] pIntra4x4PredMode - Array of Intra 4x4 prediction mode for the MB.
+ * Current MB modes starts at [1,1].
+ * [in] pBestCost - Cost for the Best Intra 4x4 mode
+ * Return Value:
+ * None
+ *
+ */
+static OMXVoid armVCM4P10_Mode4x4Decision (
+ const OMX_U8* pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ OMXVCM4P10MBInfo *pSrcDstMBCurr,
+ OMX_S32 Block8x8,
+ OMX_S32 Block4x4,
+ OMX_U8 *pPredIntra4x4SrcY,
+ OMX_S32 StepPredIntra4x4SrcY,
+ OMX_S32 pIntra4x4PredMode [][9],
+ OMX_S32 *pBestCost
+)
+{
+ OMX_S32 i, j, x, y, BlockX, BlockY, mode;
+ OMX_S32 Cost, BestCost;
+ OMX_U8 *pSrcY;
+ OMX_S32 StepSrcY;
+ OMX_S32 availability = 0;
+ OMX_U8 pPredBlock [4*4];
+ OMXResult Ret = OMX_Sts_Err;
+
+ /* Get block cordinates from 8x8 block index and 4x4 block index */
+ BlockX = ((Block8x8 & 1) << 1) + (Block4x4 & 1);
+ BlockY = ((Block8x8 >> 1) << 1) + (Block4x4 >> 1);
+
+ /* Add offset to point to start of current MB in the array pIntra4x4PredMode */
+ x = BlockX + 1;
+ y = BlockY + 1;
+
+ /* Check for availability of LEFT Block */
+ if (pIntra4x4PredMode [y][x - 1] != ARM_VCM4P10_INVALID_BLOCK)
+ {
+ availability |= OMX_VC_LEFT;
+ }
+
+ /* Check for availability of UPPER Block */
+ if (pIntra4x4PredMode [y - 1][x] != ARM_VCM4P10_INVALID_BLOCK)
+ {
+ availability |= OMX_VC_UPPER;
+ }
+
+ /* Check for availability of UPPER LEFT Block */
+ if (pIntra4x4PredMode [y - 1][x - 1] != ARM_VCM4P10_INVALID_BLOCK)
+ {
+ availability |= OMX_VC_UPPER_LEFT;
+ }
+
+ pSrcY = pPredIntra4x4SrcY +
+ StepPredIntra4x4SrcY * (BlockY << 2) +
+ (BlockX << 2);
+
+ StepSrcY = StepPredIntra4x4SrcY;
+
+ x = BlockX * 4;
+ y = BlockY * 4;
+
+ Cost = BestCost = ARM_VCM4P10_MAX_COST;
+
+ /* Go through each mode for minim cost */
+ for (mode = 0; mode < 9; mode++)
+ {
+ Ret = omxVCM4P10_PredictIntra_4x4(
+ pSrcY - 1,
+ pSrcY - StepSrcY,
+ pSrcY - StepSrcY - 1,
+ pPredBlock,
+ StepSrcY,
+ 4,
+ (OMXVCM4P10Intra4x4PredMode) mode,
+ availability);
+
+ if (Ret == OMX_Sts_NoErr)
+ {
+ armVCCOMM_SAD(
+ pSrcCurrBuf + (y * SrcCurrStep) + x,
+ SrcCurrStep,
+ pPredBlock,
+ 4,
+ &Cost,
+ 4,
+ 4);
+
+ if (Cost < BestCost)
+ {
+ BestCost = Cost;
+
+ pIntra4x4PredMode [BlockY + 1][BlockX + 1] =
+ (OMXVCM4P10Intra4x4PredMode) mode;
+ pSrcDstMBCurr->pIntra4x4PredMode [BlockY * 4 + BlockX] =
+ (OMXVCM4P10Intra4x4PredMode) mode;
+
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ pSrcY [StepSrcY * j + i] = pPredBlock [4 * j + i];
+ }
+ }
+ }
+ }
+ }
+
+ *pBestCost = BestCost;
+ return;
+}
+
+/**
+ * Function: armVCM4P10_SetMotionVectorPredictor
+ *
+ * Description:
+ * This function will do the MV Prediction for Inter MBs
+ *
+ * Parameters:
+ * [in] BlockStartX - Start X index in integer pels in current Block
+ * [in] BlockStartY - Start Y index in integer pels in current Block
+ * [in] BlockSizeX - Width of current block
+ * [in] BlockSizeY - Height of current block
+ * [in] RefFrame - Index of the reference frame for prediction
+ * [in] pRefFrArr - Pointer to Ref array storing neighbouring MVs for MV prediction
+ * [in] pMVArr - Pointer to MV array storing neighbouring MVs for MV prediction
+ * [out] pMVPred - Pointer to predicted MVs
+ * Remarks:
+ *
+ * Return Value:
+ * None
+ *
+ */
+static OMXVoid armVCM4P10_SetMotionVectorPredictor(
+ OMX_U32 BlockStartX,
+ OMX_U32 BlockStartY,
+ OMX_U32 BlockSizex,
+ OMX_U32 BlockSizey,
+ OMX_S32 RefFrame,
+ OMX_S32 pRefFrArr[][6],
+ OMXVCMotionVector pMVArr[][12],
+ OMXVCMotionVector *pMVPred
+)
+{
+ OMX_S32 RFrameL; /* Left */
+ OMX_S32 RFrameU; /* Up */
+ OMX_S32 RFrameUR; /* Up-Right */
+
+ OMX_S32 BlockX, BlockY, BlockXFr, BlockYFr, MVPredType;
+ OMX_S32 BlockXPlusOff, BlockXPlusOffFr, BlockXMin1Fr, BlockYMin1Fr;
+
+ BlockX = 4 + (BlockStartX >> 2);
+ BlockY = 4 + (BlockStartY >> 2);
+ BlockXPlusOff = BlockX + (BlockSizex >> 2);
+
+ BlockXFr = BlockX >> 1;
+ BlockYFr = BlockY >> 1;
+ BlockXMin1Fr = (BlockX - 1) >> 1;
+ BlockYMin1Fr = (BlockY - 1) >> 1;
+ BlockXPlusOffFr = BlockXPlusOff >> 1;
+
+ MVPredType = ARM_VCM4P10_MVPRED_MEDIAN;
+
+ RFrameL = pRefFrArr [BlockYFr][BlockXMin1Fr];
+ RFrameU = pRefFrArr [BlockYMin1Fr][BlockXFr];
+ RFrameUR = pRefFrArr [BlockYMin1Fr][BlockXPlusOffFr];
+
+ if (RFrameUR == ARM_VCM4P10_INVALID_BLOCK)
+ {
+ RFrameUR = pRefFrArr [BlockYMin1Fr][BlockXMin1Fr];
+ }
+
+ /*
+ * Prediction if only one of the neighbors uses the reference frame
+ * we are checking
+ */
+
+ if (RFrameL == RefFrame && RFrameU != RefFrame && RFrameUR != RefFrame)
+ {
+ MVPredType = ARM_VCM4P10_MVPRED_L;
+ }
+ else if(RFrameL != RefFrame && RFrameU == RefFrame && RFrameUR != RefFrame)
+ {
+ MVPredType = ARM_VCM4P10_MVPRED_U;
+ }
+ else if(RFrameL != RefFrame && RFrameU != RefFrame && RFrameUR == RefFrame)
+ {
+ MVPredType = ARM_VCM4P10_MVPRED_UR;
+ }
+
+ /* Directional predictions */
+ else if(BlockSizex == 8 && BlockSizey == 16)
+ {
+ if(BlockStartX == 0)
+ {
+ if(RFrameL == RefFrame)
+ {
+ MVPredType = ARM_VCM4P10_MVPRED_L;
+ }
+ }
+ else
+ {
+ if (RFrameUR == RefFrame)
+ {
+ MVPredType = ARM_VCM4P10_MVPRED_UR;
+ }
+ }
+ }
+ else if(BlockSizex == 16 && BlockSizey == 8)
+ {
+ if(BlockStartY == 0)
+ {
+ if(RFrameU == RefFrame)
+ {
+ MVPredType = ARM_VCM4P10_MVPRED_U;
+ }
+ }
+ else
+ {
+ if(RFrameL == RefFrame)
+ {
+ MVPredType = ARM_VCM4P10_MVPRED_L;
+ }
+ }
+ }
+
+ switch (MVPredType)
+ {
+ case ARM_VCM4P10_MVPRED_MEDIAN:
+ if (!(pRefFrArr [BlockYMin1Fr][BlockXMin1Fr] == ARM_VCM4P10_INVALID_BLOCK ||
+ pRefFrArr [BlockYMin1Fr][BlockXFr] == ARM_VCM4P10_INVALID_BLOCK ||
+ pRefFrArr [BlockYMin1Fr][BlockXPlusOffFr] == ARM_VCM4P10_INVALID_BLOCK))
+ {
+ pMVPred->dx = pMVArr [BlockY][BlockX - 1].dx;
+ pMVPred->dy = pMVArr [BlockY][BlockX - 1].dy;
+ }
+ else
+ {
+ pMVPred->dx =
+ ARM_VCM4P10_MEDIAN(pMVArr [BlockY][BlockX - 1].dx,
+ pMVArr [BlockY - 1][BlockX].dx,
+ pMVArr [BlockY - 1][BlockXPlusOff].dx);
+ pMVPred->dy =
+ ARM_VCM4P10_MEDIAN(pMVArr [BlockY][BlockX - 1].dy,
+ pMVArr [BlockY - 1][BlockX].dy,
+ pMVArr [BlockY - 1][BlockXPlusOff].dy);
+ }
+ break;
+
+ case ARM_VCM4P10_MVPRED_L:
+ pMVPred->dx = pMVArr [BlockY][BlockX - 1].dx;
+ pMVPred->dy = pMVArr [BlockY][BlockX - 1].dy;
+ break;
+ case ARM_VCM4P10_MVPRED_U:
+ pMVPred->dx = pMVArr [BlockY - 1][BlockX].dx;
+ pMVPred->dy = pMVArr [BlockY - 1][BlockX].dy;
+ break;
+ case ARM_VCM4P10_MVPRED_UR:
+ if (pRefFrArr [BlockYMin1Fr][BlockXPlusOffFr] != ARM_VCM4P10_INVALID_BLOCK)
+ {
+ pMVPred->dx = pMVArr [BlockY - 1][BlockXPlusOff].dx;
+ pMVPred->dy = pMVArr [BlockY - 1][BlockXPlusOff].dy;
+ }
+ else
+ {
+ pMVPred->dx = pMVArr [BlockY - 1][BlockX - 1].dx;
+ pMVPred->dy = pMVArr [BlockY - 1][BlockX - 1].dy;
+ }
+ break;
+ default:
+ break;
+ }
+
+ return;
+}
+
+/**
+ * Function: armVCM4P10_BlockMotionSearch
+ *
+ * Description:
+ * Gets best MV for the current block
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf - Pointer to the start of luma component of current Macroblock
+ * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf
+ * [in] pSrcRefY - Pointer to the start of luma component of co-located reference MB
+ * [in] nSrcRefStep - Step size for the pointer pSrcRefY
+ * [in] pRefRect Pointer to the valid reference rectangle; relative to the image origin.
+ * [in] pCurrPointPos Position of the current macroblock in the current plane.
+ * [in] pMESpec - Motion estimation structure
+ * [in] pMBInter - Array, of dimension four, containing pointers to information associated with four
+ * adjacent type INTER MBs (Left, Top, Top-Left, Top-Right).
+ * [in] nLamda - For calculating the cost
+ * [out] pBestCost - Minimum cost for encoding current block
+ * [out] pBestMV - MV corresponding to best cost
+ * [in] BlockStartX - Block start X index in integer pels
+ * [in] BlockStartY - Block start Y index in integer pels
+ * [in] BlockSizeX - Width of current block
+ * [in] BlockSizeY - Height of current block
+ * [in] RefFrame - Index of the reference frame for prediction
+ * [in] pRefFrArr - Pointer to reference frame array storing neighbouring MVs for prediction
+ * [in] pMVArr - Pointer to MV array storing neighbouring MVs for MV prediction
+ * [in] pMVPred - Pointer to MV predicted from neighbour MVs
+ * Remarks:
+ *
+ * Return Value:
+ * OMXResult
+ *
+ */
+static OMXResult armVCM4P10_BlockMotionSearch(
+ const OMX_U8* pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8* pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ void* pMESpec,
+
+ OMX_S32 nLamda,
+ OMX_S32* pBestCost,
+ OMXVCMotionVector *pBestMV,
+
+ OMX_U32 BlockStartX,
+ OMX_U32 BlockStartY,
+ OMX_U32 BlockSizeX,
+ OMX_U32 BlockSizeY,
+ OMX_S32 RefFrame,
+ OMX_S32 pRefFrArr [][6],
+ OMXVCMotionVector pMVArr [][12],
+ OMXVCMotionVector *pMVPred
+ )
+{
+
+ OMXVCMotionVector MVCalculated, MVCandidate;
+ OMX_S32 Cost;
+ OMXResult RetValue;
+ OMXVCM4P10MEParams *pMEParams;
+ OMXVCM4P2Coordinate CurrBlockPos;
+
+ /* Get Predicted Motion Vectors */
+ armVCM4P10_SetMotionVectorPredictor (
+ BlockStartX,
+ BlockStartY,
+ BlockSizeX,
+ BlockSizeY,
+ RefFrame,
+ pRefFrArr,
+ pMVArr,
+ pMVPred);
+
+ /* Initialize candidate MV */
+ MVCandidate.dx = 0;
+ MVCandidate.dy = 0;
+
+ CurrBlockPos.x = pCurrPointPos->x + BlockStartX;
+ CurrBlockPos.y = pCurrPointPos->y + BlockStartY;
+
+ /* Block Match Integer */
+ RetValue = omxVCM4P10_BlockMatch_Integer (
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pSrcRefY,
+ nSrcRefStep,
+ pRefRect,
+ &CurrBlockPos,
+ BlockSizeX,
+ BlockSizeY,
+ nLamda,
+ pMVPred,
+ &MVCandidate,
+ &MVCalculated,
+ &Cost,
+ pMESpec);
+
+ /* updated BestMV*/
+ /**pBestCost = Cost;
+ pBestMV->dx = MVCalculated.dx;
+ pBestMV->dy = MVCalculated.dy;*/
+
+ pMEParams = (OMXVCM4P10MEParams *) pMESpec;
+
+ /* Block Match Half pel */
+ if (pMEParams->halfSearchEnable)
+ {
+ RetValue = omxVCM4P10_BlockMatch_Half(
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pSrcRefY,
+ nSrcRefStep,
+ BlockSizeX,
+ BlockSizeY,
+ nLamda,
+ pMVPred,
+ &MVCalculated, /* input/output*/
+ &Cost);
+ }
+
+ /* Block Match Quarter pel */
+ if (pMEParams->quarterSearchEnable)
+ {
+ RetValue = omxVCM4P10_BlockMatch_Quarter(
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pSrcRefY,
+ nSrcRefStep,
+ BlockSizeX,
+ BlockSizeY,
+ nLamda,
+ pMVPred,
+ &MVCalculated,
+ &Cost);
+ }
+
+ /* updated Best Cost and Best MV */
+ *pBestCost = Cost;
+ pBestMV->dx = MVCalculated.dx;
+ pBestMV->dy = MVCalculated.dy;
+
+ /*
+ * Skip MB cost calculations of 16x16 inter mode
+ */
+ return RetValue;
+}
+
+/**
+ * Function: armVCM4P10_PartitionME
+ *
+ * Description:
+ * Gets best cost for the current partition
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf - Pointer to the start of luma component of current Macroblock
+ * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf
+ * [in] pSrcRefBufList - Pointer to List of ref buffer of co-located reference MB
+ * [in] nSrcRefStep - Step size for the pointer pSrcRefY
+ * [in] pRefRect Pointer to the valid reference rectangle; relative to the image origin.
+ * [in] pCurrPointPos Position of the current macroblock in the current plane.
+ * [in] pMESpec - Motion estimation structure
+ * [in] PartWidth - Width of current partition
+ * [in] PartHeight - Height of current partition
+ * [in] BlockWidth - Width of current block
+ * [in] BlockHeight - Height of current block
+ * [in] PartStartX - Partition start X index in integer pels
+ * [in] PartStartY - Partition start Y index in integer pels
+ * [in] pMVArr - Pointer to MV array storing neighbouring MVs for MV prediction
+ * [in] pRefFrArr - Pointer to reference frame array storing neighbouring MVs for prediction
+ * [in] Lambda - For calculating the cost
+ * [out] pCost - Pointer to cost for Inter MB
+ *
+ * Return Value:
+ * OMXResult
+ *
+ */
+static OMXResult armVCM4P10_PartitionME (
+ const OMX_U8* pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES],
+ OMX_S32 SrcRefStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ void* pMESpec,
+
+ OMX_S32 PartWidth,
+ OMX_S32 PartHeight,
+ OMX_S32 BlockWidth,
+ OMX_S32 BlockHeight,
+ OMX_S32 PartStartX,
+ OMX_S32 PartStartY,
+
+ OMXVCMotionVector pMVArr [][12],
+ OMX_S32 pRefFrArr [][6],
+ OMXVCMotionVector pMVPredArr [][4],
+
+ OMX_S32 Lambda,
+ OMX_S32 *pCost
+)
+{
+ OMX_U32 x, y, i, j, ref, OffX, OffY, OffSrc, OffRef;
+ OMX_S32 BlockCost, PartitionCost, BestCost;
+ OMX_S32 BestRefFrame=0;
+ OMXVCMotionVector BestMV [4][4];
+ OMXVCMotionVector BestMVPred [4][4];
+ OMXVCMotionVector MVPred;
+ OMXVCMotionVector DstMV;
+
+ BestCost = ARM_VCM4P10_MAX_COST;
+
+ for (ref = 0; ref < ARM_VCM4P10_MAX_FRAMES; ref++)
+ {
+ if (pSrcRefBufList [ref] == NULL)
+ {
+ /* No reference frame, continue */
+ continue;
+ }
+
+ PartitionCost = 0;
+
+ for (y = 0; y < PartHeight; y += BlockHeight)
+ {
+ for (x = 0; x < PartWidth; x += BlockWidth)
+ {
+ OffSrc = SrcCurrStep * (PartStartY + y) + PartStartX + x;
+ OffRef = SrcRefStep * (PartStartY + y) + PartStartX + x;
+ armVCM4P10_BlockMotionSearch (
+ pSrcCurrBuf + OffSrc,
+ SrcCurrStep,
+ pSrcRefBufList [ref] + OffRef,
+ SrcRefStep,
+ pRefRect,
+ pCurrPointPos,
+ pMESpec,
+
+ Lambda,
+ &BlockCost,
+ &DstMV,
+
+ x + PartStartX,
+ y + PartStartY,
+ BlockWidth,
+ BlockHeight,
+ ref,
+ pRefFrArr,
+ pMVArr,
+ &MVPred);
+
+ PartitionCost += BlockCost;
+
+ OffX = (PartStartX + x) >> 2;
+ OffY = (PartStartY + y) >> 2;
+
+ for (j = 0; j < (BlockHeight >> 2); j++)
+ {
+ for (i = 0; i < (BlockWidth >> 2); i++)
+ {
+ pMVArr [4 + OffY + j][4 + OffX + i].dx = DstMV.dx;
+ pMVArr [4 + OffY + j][4 + OffX + i].dy = DstMV.dy;
+ pMVPredArr [OffY + j][OffX + i].dx = MVPred.dx;
+ pMVPredArr [OffY + j][OffX + i].dy = MVPred.dy;
+ }
+ }
+
+ pRefFrArr [2 + (OffY >> 1)][2 + (OffX >> 1)] = ref;
+ for (j = 0; j < (BlockHeight >> 3); j++)
+ {
+ for (i = 0; i < (BlockWidth >> 3); i++)
+ {
+ pRefFrArr [2 + (OffY >> 1) + j][2 + (OffX >> 1) + i] = ref;
+ }
+ }
+
+ }
+ }
+
+ /*
+ * If PartitionCost is less for this reference frame, motion vectors needs to be backedup
+ */
+ if (PartitionCost <= BestCost)
+ {
+ BestCost = PartitionCost;
+ BestRefFrame = ref;
+
+ for (y = 0; y < (PartHeight/BlockHeight); y++)
+ {
+ for (x = 0; x < (PartWidth/BlockWidth); x++)
+ {
+ OffX = (PartStartX + x * BlockWidth) >> 2;
+ OffY = (PartStartY + y * BlockHeight) >> 2;
+
+ BestMV[y][x].dx = pMVArr [4 + OffY][4 + OffX].dx;
+ BestMV[y][x].dy = pMVArr [4 + OffY][4 + OffX].dy;
+ BestMVPred[y][x].dx = pMVPredArr [OffY][OffX].dx;
+ BestMVPred[y][x].dy = pMVPredArr [OffY][OffX].dy;
+ }
+ }
+ }
+
+ }
+
+ /*
+ * Copy back best reference frame, motion vectors and cost.
+ */
+ for (y = 0; y < (PartHeight/BlockHeight); y++)
+ {
+ for (x = 0; x < (PartWidth/BlockWidth); x++)
+ {
+ OffX = (PartStartX + x * BlockWidth) >> 2;
+ OffY = (PartStartY + y * BlockHeight) >> 2;
+
+ for (j = 0; j < (BlockHeight >> 2); j++)
+ {
+ for (i = 0; i < (BlockWidth >> 2); i++)
+ {
+ pMVArr [4 + OffY + j][4 + OffX + i].dx = BestMV[y][x].dx;
+ pMVArr [4 + OffY + j][4 + OffX + i].dy = BestMV[y][x].dy;
+ pMVPredArr [OffY + j][OffX + i].dx = BestMVPred[y][x].dx;
+ pMVPredArr [OffY + j][OffX + i].dy = BestMVPred[y][x].dy;
+ }
+ }
+
+ for (j = 0; j < (BlockHeight >> 3); j++)
+ {
+ for (i = 0; i < (BlockWidth >> 3); i++)
+ {
+ pRefFrArr [2 + (OffY >> 1) + j][2 + (OffX >> 1) + i] = BestRefFrame;
+ }
+ }
+ }
+ }
+
+ *pCost = BestCost;
+ return OMX_Sts_NoErr;
+
+}
+
+/**
+ * Function: armVCM4P10_Intra16x16Estimation
+ *
+ * Description:
+ * Performs MB-level motion estimation for INTER MB type and selects best motion estimation strategy from
+ * the set of modes supported in baseline profile ISO/IEC 14496-10.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf - Pointer to the start of luma component of current Macroblock
+ * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf
+ * [in] pSrcRecBuf - Pointer to the start of luma component of co-located reconstructed MB
+ * [in] SrcRecStep - Step size for the pointer pSrcRecBuf
+ * [in] nMBPosX - Position of MB in the frame w.r.t X axis
+ * [in] nMBPosY - Position of MB in the frame w.r.t Y axis
+ * [in] pMBInter - Array, of dimension four, containing pointers to information associated with four
+ * adjacent type INTER MBs (Left, Top, Top-Left, Top-Right).
+ * [in] pMBIntra - Array, of dimension four, containing pointers to information associated with four
+ * adjacent type INTRA MBs (Left, Top, Top-Left, Top-Right).
+ * [in/out] pSrcDstMBCurr - Pointer to information structure for the current MB. Following member should be set
+ * before calling this function
+ * [in] Lambda - For calculating the cost
+ * [out] pCost - Pointer to cost for Intra16x16
+ * Return Value:
+ * OMX_Sts_NoErr - No Error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ *
+ */
+
+static OMXResult armVCM4P10_Intra16x16Estimation(
+ const OMX_U8* pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8* pSrcRecBuf,
+ OMX_S32 SrcRecStep,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCM4P10MBInfoPtr *pMBInter,
+ const OMXVCM4P10MBInfoPtr *pMBIntra,
+ OMXVCM4P10MBInfo *pSrcDstMBCurr,
+ OMX_U32 *pCost)
+{
+ OMX_U8 PredBuf [16*16 + 16];
+ OMX_U8 *pPred;
+ OMX_S32 mode;
+ OMX_S32 Cost;
+ OMX_S32 availability = 0;
+ OMXResult Ret;
+ OMXVCM4P10Intra16x16PredMode IntraMode16x16 [4] =
+ {OMX_VC_16X16_VERT, OMX_VC_16X16_HOR,
+ OMX_VC_16X16_DC, OMX_VC_16X16_PLANE};
+ OMX_U32 MBPosX = pCurrPointPos->x >> 4;
+ OMX_U32 MBPosY = pCurrPointPos->y >> 4;
+
+ pPred = armAlignTo16Bytes(PredBuf);
+
+ /* Check for availability of LEFT MB */
+ if ((MBPosX != 0) && (pMBIntra [0] != 0 || pMBInter [0] != 0))
+ {
+ availability |= OMX_VC_LEFT;
+ }
+
+ /* Check for availability of UP MB */
+ if ((MBPosY != 0) && (pMBIntra [1] != 0 || pMBInter [1] != 0))
+ {
+ availability |= OMX_VC_UPPER;
+ }
+
+ /* Check for availability of UP-LEFT MB */
+ if ((MBPosX > 0) && (MBPosY > 0) &&
+ (pMBIntra [2] != 0 || pMBInter [2] != 0))
+ {
+ availability |= OMX_VC_UPPER_LEFT;
+ }
+
+ *pCost = ARM_VCM4P10_MAX_COST;
+ for (mode = 0; mode < 4; mode++)
+ {
+ Ret = omxVCM4P10_PredictIntra_16x16(
+ pSrcRecBuf - 1,
+ pSrcRecBuf - SrcRecStep,
+ pSrcRecBuf - SrcRecStep - 1,
+ pPred,
+ SrcRecStep,
+ 16,
+ IntraMode16x16 [mode],
+ availability);
+ if (Ret == OMX_Sts_NoErr)
+ {
+ armVCCOMM_SAD(
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pPred,
+ 16,
+ &Cost,
+ 16,
+ 16);
+ if (Cost < *pCost)
+ {
+ *pCost = Cost;
+ pSrcDstMBCurr->Intra16x16PredMode = IntraMode16x16 [mode];
+ }
+
+ }
+
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armVCM4P10_Intra4x4Estimation
+ *
+ * Description:
+ * Performs MB-level motion estimation for Intra 4x4 MB type and selects
+ * the best set of modes supported in baseline profile.
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf - Pointer to the start of luma component of current Macroblock
+ * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf
+ * [in] pSrcRecBuf - Pointer to the start of luma component of co-located reconstructed MB
+ * [in] SrcRecStep - Step size for the pointer pSrcRecBuf
+ * [in] nMBPosX - Position of MB in the frame w.r.t X axis
+ * [in] nMBPosY - Position of MB in the frame w.r.t Y axis
+ * [in] pMBIntra - Array, of dimension four, containing pointers to information associated with four
+ * adjacent type INTRA MBs (Left, Top, Top-Left, Top-Right).
+ * [in/out] pSrcDstMBCurr - Pointer to information structure for the current MB. Following member should be set
+ * before calling this function
+ * [in] Lambda - For calculating the cost
+ * [out] pCost - Pointer to cost for Intra4x4
+ * Return Value:
+ * OMX_Sts_NoErr - No Error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ *
+ */
+
+static OMXResult armVCM4P10_Intra4x4Estimation(
+ const OMX_U8* pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8* pSrcRecBuf,
+ OMX_S32 SrcRecStep,
+ const OMXVCM4P10MBInfoPtr *pMBIntra,
+ OMXVCM4P10MBInfo *pSrcDstMBCurr,
+ OMX_U32 *pCost)
+{
+ OMX_S32 x, y, Block4x4, Block8x8;
+ OMX_S32 Cost;
+
+ /*
+ * PredIntra4x4Mode will store prediction modes of 4x4 blocks.
+ * Modes for current MB starts at index [1][1].
+ * Modes of nighbouring MB's will be as shown below
+ * A value of ARM_VCM4P10_INVALID_BLOCK for any block in this array means
+ * that block is not available for prediction.
+ *
+ * c3 b0 b1 b2 b3 d0 d1 d2 d3
+ * a0 xx xx xx xx - - - -
+ * a1 xx xx xx xx - - - -
+ * a2 xx xx xx xx - - - -
+ * a3 xx xx xx xx - - - -
+ *
+ */
+ OMX_S32 PredIntra4x4Mode [5][9];
+
+ /*
+ * pSrcY stores re-construsted source array of size 3MB X 2MB as below
+ *
+ * MB11 MB12 MB13
+ * MB21 MB22 MB23
+ *
+ * This array will be used for local reconstruction of 4x4 blocks
+ * with best prediction mode within an MB
+ */
+ OMX_U8 pSrcY [(16*3)*(16*2)];
+ OMX_S32 StepSrcY;
+
+ /* init */
+ *pCost = 0;
+
+ for (y = 0; y < 5; y++)
+ {
+ for (x = 0; x < 9; x++)
+ {
+ /*
+ * Initialize with value of ARM_VCM4P10_INVALID_BLOCK, to mean this
+ * 4x4 block is not available
+ */
+ PredIntra4x4Mode [y][x] = ARM_VCM4P10_INVALID_BLOCK;
+ }
+ }
+
+ /* Replace ARM_VCM4P10_INVALID_BLOCK value with available MBs values*/
+ for (x = 0; x < 4; x++)
+ {
+ /* Store values of b0, b1, b2, b3 */
+ if (pMBIntra[1] != NULL)
+ {
+ PredIntra4x4Mode [0][x + 1] =
+ pMBIntra[1]->pIntra4x4PredMode[3*4 + x];
+ }
+
+ /* Store values of d0, d1, d2, d3 */
+ if (pMBIntra[3] != NULL)
+ {
+ PredIntra4x4Mode [0][x + 5] =
+ pMBIntra[3]->pIntra4x4PredMode[3*4 + x];
+ }
+ }
+
+ /* Store values of c3 */
+ if (pMBIntra[2] != NULL)
+ {
+ PredIntra4x4Mode [0][0] = pMBIntra[2]->pIntra4x4PredMode[15];
+ }
+
+ for (y = 0; y < 4; y++)
+ {
+ /* Store values of a0, a1, a2, a3 */
+ if (pMBIntra[0] != NULL)
+ {
+ PredIntra4x4Mode [y + 1][0] =
+ pMBIntra[0]->pIntra4x4PredMode[y*4 + 3];
+ }
+ }
+
+ /*
+ * Update neighbouring Pred mode array which will be used for
+ * prediction of Intra4x4 modes.
+ */
+
+ StepSrcY = 16 * 3;
+ for (y = 0; y < (16 * 2); y++)
+ {
+ for (x = 0; x < (16 * 3); x++)
+ {
+ pSrcY [StepSrcY * y + x] =
+ pSrcRecBuf [SrcRecStep * (y - 16) + x - 16];
+ }
+ }
+
+ /* for each 8x8 block */
+ for (Block8x8 = 0; Block8x8 < 4; Block8x8++)
+ {
+ /* for each 4x4 block inside 8x8 block */
+ for (Block4x4 = 0; Block4x4 < 4; Block4x4++)
+ {
+ armVCM4P10_Mode4x4Decision (
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pSrcDstMBCurr,
+ Block8x8,
+ Block4x4,
+ pSrcY + 16 * StepSrcY + 16,
+ StepSrcY,
+ PredIntra4x4Mode,
+ &Cost);
+
+ *pCost += Cost;
+ }
+ }
+ return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armVCM4P10_InterMEMB
+ *
+ * Description:
+ * Performs MB-level motion estimation for INTER MB type and selects best motion estimation strategy from
+ * the set of modes supported in baseline profile ISO/IEC 14496-10.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcCurrBuf - Pointer to the start of luma component of current Macroblock
+ * [in] SrcCurrStep - Step size for the pointer pSrcCurrBuf
+ * [in] pSrcRefBufList - Pointer to the start of luma component of co-located reference MB
+ * [in] SrcRefStep - Step size for the pointer pSrcRefY
+ * [in] pRefRect Pointer to the valid reference rectangle; relative to the image origin.
+ * [in] pCurrPointPos Position of the current macroblock in the current plane.
+ * [in] pMESpec - Motion estimation structure
+ * [in] pMBInter - Array, of dimension four, containing pointers to information associated with four
+ * adjacent type INTER MBs (Left, Top, Top-Left, Top-Right).
+ * [in/out] pSrcDstMBCurr - Pointer to information structure for the current MB. Following member should be set
+ * before calling this function
+ * [in] Lambda - For calculating the cost
+ * [out] pDstCost - Pointer to cost for Inter MB
+ * Return Value:
+ * OMX_Sts_NoErr - No Error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ *
+ */
+
+static OMXResult armVCM4P10_InterMEMB(
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES],
+ OMX_S32 SrcRefStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U32 Lambda,
+ void *pMESpec,
+ const OMXVCM4P10MBInfoPtr *pMBInter,
+ OMXVCM4P10MBInfoPtr pSrcDstMBCurr,
+ OMX_U32 *pDstCost)
+{
+ OMX_S32 i, j, x, y, mode;
+ OMX_U32 Block8x8, XPerMB, YPerMB, Block2x, Block2y;
+ OMX_S32 PartStartX = 0, PartStartY = 0;
+ OMX_S32 PartWidth = 8, PartHeight = 8, BlockWidth = 4, BlockHeight = 4;
+ const OMX_U32 BlkSz [4][2] = {{4,4}, {4,8}, {8,4}};
+ const OMX_U32 PartSz [4][2] = {{8,8}, {8,16}, {16,8}, {16,16}};
+ const OMXVCM4P10SubMacroblockType
+ ModeSubMBType4x4 [] = {OMX_VC_SUB_P_4x4, OMX_VC_SUB_P_4x8,
+ OMX_VC_SUB_P_8x4, OMX_VC_SUB_P_8x8};
+ const OMXVCM4P10MacroblockType
+ ModeMBType [] = {OMX_VC_P_8x8, OMX_VC_P_8x16, OMX_VC_P_16x8, OMX_VC_P_16x16};
+
+ OMXVCM4P10MEParams *pMBOptions;
+ /*
+ * RefFrArr and MVArr will be used for temporary storage of Reference frame index and MVs
+ * It will store RefIndex and MVs of 6 MBs as shown below
+ *
+ * |------|------|------|
+ * |Tp-Lt |Top |Tp-R |
+ * | MB | MB | MB |
+ * |------|------|------|
+ * |Left | Curr | |
+ * | MB | MB | |
+ * |------|------|------|
+ */
+ OMX_S32 RefFrArr [4][6];
+ OMXVCMotionVector MVArr [8][12];
+ OMXVCMotionVector MVPredArr [4][4];
+
+ /*
+ * IndexToLoc will translate pMBInter index into spacial arrangement of MBs
+ */
+ OMX_S32 IndexToLoc [] = {2,1,3,0};
+ OMX_U32 part, MaxPart;
+ OMX_S32 Cost, MotionCost8x8 [4], MBCost, BestCost;
+
+ /*
+ * Update neighbouring MV array and Ref frame array which will be used for
+ * prediction of MVs and Ref frames.
+ */
+
+ /* Set cost to a high value */
+ Cost = BestCost = ARM_VCM4P10_MAX_COST;
+
+ for (y = 0; y < 8; y++)
+ {
+ for (x = 0; x < 12; x++)
+ {
+ i = 3 * (y >> 2) + (x >> 2);
+ if ((y < 4 || x < 4) && (pMBInter[IndexToLoc[i]] != NULL))
+ {
+ MVArr [y][x].dx =
+ pMBInter[IndexToLoc[i]]->pMV0[y % 4][x % 4].dx;
+ MVArr [y][x].dy =
+ pMBInter[IndexToLoc[i]]->pMV0[y % 4][x % 4].dy;
+ }
+ else
+ {
+ MVArr [y][x].dx = 0;
+ MVArr [y][x].dy = 0;
+ }
+ }
+ }
+
+ for (y = 0; y < 4; y++)
+ {
+ for (x = 0; x < 6; x++)
+ {
+ i = 3 * (y >> 1) + (x >> 1);
+ if ((y < 2 || x < 2) && (pMBInter[IndexToLoc[i]] != NULL))
+ {
+ RefFrArr [y][x] =
+ pMBInter[IndexToLoc[i]]->pRefL0Idx [(y % 2) * 2 + (x % 2)];
+ }
+ else
+ {
+ RefFrArr [y][x] = ARM_VCM4P10_INVALID_BLOCK;
+ }
+ }
+ }
+
+ for (y = 0; y < 4; y++)
+ {
+ for (x = 0; x < 4; x++)
+ {
+ MVPredArr [y][x].dx = 0;
+ MVPredArr [y][x].dy = 0;
+ }
+ }
+ /*
+ * Motion Estimation for 8x8 MB Partition
+ */
+
+ for (i = 0; i < 4; i++)
+ {
+ MotionCost8x8 [i] = 0;
+ }
+
+ pMBOptions = (OMXVCM4P10MEParams *) pMESpec;
+
+ if (pMBOptions->blockSplitEnable8x8 == 1 &&
+ pMBOptions->blockSplitEnable4x4 == 1)
+ {
+ pSrcDstMBCurr->mbType = OMX_VC_P_8x8;
+
+ PartWidth = PartSz [0][0];
+ PartHeight = PartSz [0][1];
+
+ /* For each 8x8 partitions */
+ for (Block8x8 = 0; Block8x8 < 4; Block8x8++)
+ {
+ PartStartX = (Block8x8 % 2) << 3;
+ PartStartY = (Block8x8 / 2) << 3;
+
+ Block2x = (Block8x8 & 1) << 1;
+ Block2y = (Block8x8 >> 1) << 1;
+
+ BestCost = ARM_VCM4P10_MAX_COST;
+ for (mode = 0; mode < 3; mode++)
+ {
+ BlockWidth = BlkSz [mode][0];
+ BlockHeight = BlkSz [mode][1];
+
+ armVCM4P10_PartitionME (
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pSrcRefBufList,
+ SrcRefStep,
+ pRefRect,
+ pCurrPointPos,
+ pMESpec,
+
+ PartWidth,
+ PartHeight,
+ BlockWidth,
+ BlockHeight,
+ PartStartX,
+ PartStartY,
+
+ MVArr,
+ RefFrArr,
+ MVPredArr,
+
+ Lambda,
+ &Cost);
+
+ if (Cost <= BestCost)
+ {
+ /* Update cost */
+ BestCost = Cost;
+
+ /* Update MBCurr struct */
+ pSrcDstMBCurr->subMBType [Block8x8] = ModeSubMBType4x4 [mode];
+
+ pSrcDstMBCurr->pRefL0Idx [Block8x8] = RefFrArr [2 + (PartStartY >> 3)][2 + (PartStartX >> 3)];
+
+ /* Update pMV0 and pMVPred of MBCurr struct */
+ for (j = 0; j < 2; j++)
+ {
+ for (i = 0; i < 2; i++)
+ {
+ pSrcDstMBCurr->pMV0 [Block2y + j][Block2x + i].dx =
+ MVArr [4 + Block2y + j][4 + Block2x + i].dx;
+ pSrcDstMBCurr->pMV0 [Block2y + j][Block2x + i].dy =
+ MVArr [4 + Block2y + j][4 + Block2x + i].dy;
+
+ pSrcDstMBCurr->pMVPred [Block2y + j][Block2x + i].dx =
+ MVPredArr [Block2y + j][Block2x + i].dx;
+ pSrcDstMBCurr->pMVPred [Block2y + j][Block2x + i].dy =
+ MVPredArr [Block2y + j][Block2x + i].dy;
+ }
+ }
+ }
+ }
+
+ /* Update cost */
+ MotionCost8x8 [Block8x8] = BestCost;
+ }
+
+ /* Cost for mbType OMX_VC_P_8x8 */
+ BestCost = 0;
+ for (i = 0; i < 4; i++)
+ {
+ BestCost += MotionCost8x8 [i];
+ }
+ }
+ else
+ {
+ /* Set sub MB type to 8x8 */
+ for (i = 0; i < 4; i++)
+ {
+ pSrcDstMBCurr->subMBType [i] = OMX_VC_SUB_P_8x8;
+ }
+ }
+
+ /*
+ * Motion Estimation for 8x8, 8x16, 16x8 and 16x16 MB Partition
+ * If pMBOptions->b8x8BlockSplitEnable is 0, do only 16x16 ME (mode 3)
+ */
+ for (mode = (pMBOptions->blockSplitEnable8x8 == 1 ? 0 : 3); mode < 4; mode++)
+ {
+ BlockWidth = PartWidth = PartSz [mode][0];
+ BlockHeight = PartHeight = PartSz [mode][1];
+
+ XPerMB = 16 / PartWidth;
+ YPerMB = 16 / PartHeight;
+ MaxPart = XPerMB * YPerMB;
+
+ MBCost = 0;
+
+ /* part size 4, 2, 2 and 1 corresponding to 8x8, 8x16, 16x8 and 16x16 MB */
+ for (part = 0; part < MaxPart; part++)
+ {
+ PartStartX = (part % XPerMB) * PartWidth;
+ PartStartY = (part / XPerMB) * PartHeight;
+
+ armVCM4P10_PartitionME (
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pSrcRefBufList,
+ SrcRefStep,
+ pRefRect,
+ pCurrPointPos,
+ pMESpec,
+
+ PartWidth,
+ PartHeight,
+ BlockWidth,
+ BlockHeight,
+ PartStartX,
+ PartStartY,
+
+ MVArr,
+ RefFrArr,
+ MVPredArr,
+
+ Lambda,
+ &Cost);
+
+ MBCost += Cost;
+ }
+
+ if (MBCost <= BestCost)
+ {
+ /* Update cost */
+ BestCost = MBCost;
+
+ /* Update mbType of MBCurr struct */
+ pSrcDstMBCurr->mbType = ModeMBType [mode];
+
+ /* Update pMV0 and pMVPred of MBCurr struct */
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ pSrcDstMBCurr->pMV0 [j][i].dx = MVArr [4+j][4+i].dx;
+ pSrcDstMBCurr->pMV0 [j][i].dy = MVArr [4+j][4+i].dy;
+ pSrcDstMBCurr->pMVPred [j][i].dx = MVPredArr [j][i].dx;
+ pSrcDstMBCurr->pMVPred [j][i].dy = MVPredArr [j][i].dy;
+ }
+ }
+ for (j = 0; j < 2; j++)
+ {
+ for (i = 0; i < 2; i++)
+ {
+ pSrcDstMBCurr->pRefL0Idx [j*2+i] = RefFrArr [2+j][2+i];
+ }
+ }
+ }
+
+ }
+
+ /* Update Best Cost */
+ *pDstCost = BestCost;
+
+ return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: omxVCM4P10_MotionEstimationMB (6.3.5.3.1)
+ *
+ * Description:
+ * Performs MB-level motion estimation and selects best motion estimation
+ * strategy from the set of modes supported in baseline profile [ISO14496-10].
+ *
+ * Input Arguments:
+ *
+ * pSrcCurrBuf - Pointer to the current position in original picture plane;
+ * 16-byte alignment required
+ * pSrcRefBufList - Pointer to an array with 16 entries. Each entry points
+ * to the top-left corner of the co-located MB in a reference
+ * picture. The array is filled from low-to-high with valid
+ * reference frame pointers; the unused high entries should be set
+ * to NULL. Ordering of the reference frames should follow
+ * [ISO14496-10] subclause 8.2.4 Decoding Process for Reference
+ * Picture Lists. The entries must be 16-byte aligned.
+ * pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the
+ * reconstructed picture; must be 16-byte aligned.
+ * SrcCurrStep - Width of the original picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * SrcRefStep - Width of the reference picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * SrcRecStep - Width of the reconstructed picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * pRefRect - Pointer to the valid reference rectangle; relative to the
+ * image origin.
+ * pCurrPointPos - Position of the current macroblock in the current plane.
+ * Lambda - Lagrange factor for computing the cost function
+ * pMESpec - Pointer to the motion estimation specification structure; must
+ * have been allocated and initialized prior to calling this
+ * function.
+ * pMBInter - Array, of dimension four, containing pointers to information
+ * associated with four adjacent type INTER MBs (Left, Top,
+ * Top-Left, Top-Right). Any pointer in the array may be set equal
+ * to NULL if the corresponding MB doesn t exist or is not of type
+ * INTER. pMBInter[0] - Pointer to left MB information pMBInter[1]
+ * - Pointer to top MB information pMBInter[2] - Pointer to
+ * top-left MB information pMBInter[3] - Pointer to top-right MB
+ * information
+ * pMBIntra - Array, of dimension four, containing pointers to information
+ * associated with four adjacent type INTRA MBs (Left, Top,
+ * Top-Left, Top-Right). Any pointer in the array may be set equal
+ * to NULL if the corresponding MB doesn t exist or is not of type
+ * INTRA. pMBIntra[0] - Pointer to left MB information pMBIntra[1]
+ * - Pointer to top MB information pMBIntra[2] - Pointer to
+ * top-left MB information pMBIntra[3] - Pointer to top-right MB
+ * information
+ * pSrcDstMBCurr - Pointer to information structure for the current MB.
+ * The following entries should be set prior to calling the
+ * function: sliceID - the number of the slice the to which the
+ * current MB belongs.
+ *
+ * Output Arguments:
+ *
+ * pDstCost - Pointer to the minimum motion cost for the current MB.
+ * pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma
+ * 4x4 blocks in each MB. The block SADs are in scan order for
+ * each MB. For implementations that cannot compute the SAD values
+ * individually, the maximum possible value (0xffff) is returned
+ * for each of the 16 block SAD entries.
+ * pSrcDstMBCurr - Pointer to updated information structure for the current
+ * MB after MB-level motion estimation has been completed. The
+ * following fields are updated by the ME function. The following
+ * parameter set quantifies the MB-level ME search results: MbType
+ * subMBType[4] pMV0[4][4] pMVPred[4][4] pRefL0Idx[4]
+ * Intra16x16PredMode pIntra4x4PredMode[4][4]
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One of more of the following pointers is NULL: pSrcCurrBuf,
+ * pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec,
+ * pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0]
+ * - SrcRefStep, SrcRecStep are not multiples of 16
+ * - iBlockWidth or iBlockHeight are values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+
+OMXResult omxVCM4P10_MotionEstimationMB(
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8 *pSrcRefBufList[ARM_VCM4P10_MAX_FRAMES],
+ OMX_S32 SrcRefStep,
+ const OMX_U8 *pSrcRecBuf,
+ OMX_S32 SrcRecStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U32 Lambda,
+ void *pMESpec,
+ const OMXVCM4P10MBInfoPtr *pMBInter,
+ const OMXVCM4P10MBInfoPtr *pMBIntra,
+ OMXVCM4P10MBInfo *pSrcDstMBCurr,
+ OMX_INT *pDstCost,
+ OMX_U16 *pDstBlockSAD)
+{
+ OMX_U32 Cost, i, IntraFlag = 1;
+ OMXVCM4P10MEParams *pMEParams;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRefBufList == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRecBuf == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pMESpec == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pMBInter == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pMBIntra == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcDstMBCurr == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstCost == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(SrcRefStep <= 0 || SrcRefStep & 15, OMX_Sts_BadArgErr)
+ armRetArgErrIf(SrcRecStep <= 0 || SrcRecStep & 15, OMX_Sts_BadArgErr)
+ armRetArgErrIf(SrcCurrStep <= 0 || SrcCurrStep & 15, OMX_Sts_BadArgErr)
+
+ armRetArgErrIf(armNot16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot16ByteAligned(pSrcRecBuf), OMX_Sts_BadArgErr)
+
+ for (i = 0; i < ARM_VCM4P10_MAX_FRAMES; i++)
+ {
+ armRetArgErrIf(pSrcRefBufList [i] != NULL &&
+ armNot16ByteAligned(pSrcRefBufList [i]), OMX_Sts_BadArgErr)
+
+ /* Check if current MB needs INTER cost calculations */
+ if (pSrcRefBufList [i] != NULL && IntraFlag == 1)
+ {
+ IntraFlag = 0;
+ }
+ }
+
+ *pDstCost = ARM_VCM4P10_MAX_COST;
+ /*
+ * Inter cost calculations
+ */
+
+ /* check this MB can be Inter */
+ if (IntraFlag != 1)
+ {
+ armVCM4P10_InterMEMB(
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pSrcRefBufList,
+ SrcRefStep,
+ pRefRect,
+ pCurrPointPos,
+ Lambda,
+ pMESpec,
+ pMBInter,
+ pSrcDstMBCurr,
+ &Cost
+ );
+
+ *pDstCost = Cost;
+ }
+
+ pMEParams = (OMXVCM4P10MEParams *)pMESpec;
+
+ if (pMEParams->intraEnable4x4 == 1)
+ {
+ /*
+ * Intra 4x4 cost calculations
+ */
+ armVCM4P10_Intra4x4Estimation(
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pSrcRecBuf,
+ SrcRecStep,
+ pMBIntra,
+ pSrcDstMBCurr,
+ &Cost
+ );
+
+ if (Cost <= *pDstCost)
+ {
+ *pDstCost = Cost;
+ pSrcDstMBCurr->mbType = OMX_VC_INTRA_4x4;
+
+ }
+
+ }
+
+ /*
+ * Cost for Intra 16x16 mode
+ */
+
+ armVCM4P10_Intra16x16Estimation(
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pSrcRecBuf,
+ SrcRecStep,
+ pCurrPointPos,
+ pMBInter,
+ pMBIntra,
+ pSrcDstMBCurr,
+ &Cost
+ );
+
+ if (Cost <= *pDstCost)
+ {
+ *pDstCost = Cost;
+ pSrcDstMBCurr->mbType = OMX_VC_INTRA_16x16;
+ }
+
+ /*
+ * Update pDstBlockSAD to max value
+ */
+ armVCM4P10_CalculateBlockSAD( pSrcDstMBCurr,
+ pSrcCurrBuf,
+ SrcCurrStep,
+ pSrcRefBufList,
+ SrcRefStep,
+ pSrcRecBuf,
+ SrcRecStep,
+ pRefRect,
+ pCurrPointPos,
+ pMBInter,
+ pMBIntra,
+ pDstBlockSAD);
+
+
+ return OMX_Sts_NoErr;
+}
+
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c
new file mode 100644
index 0000000..d6ca783
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8.c
@@ -0,0 +1,284 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_PredictIntraChroma_8x8.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 Chroma 8x8 intra prediction module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Perform DC style intra prediction, upper block has priority
+ *
+ * Parameters:
+ * [in] pSrcLeft Pointer to the buffer of 16 left coefficients:
+ * p[x, y] (x = -1, y = 0..3)
+ * [in] pSrcAbove Pointer to the buffer of 16 above coefficients:
+ * p[x,y] (x = 0..3, y = -1)
+ * [in] leftStep Step of left coefficient buffer
+ * [in] dstStep Step of the destination buffer
+ * [in] availability Neighboring 16x16 MB availability flag
+ * [out] pDst Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+static void armVCM4P10_PredictIntraDCUp4x4(
+ const OMX_U8* pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ OMX_U8* pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMX_S32 availability
+)
+{
+ int x, y, Sum=0, Count = 0;
+
+ if (availability & OMX_VC_UPPER)
+ {
+ for (x=0; x<4; x++)
+ {
+ Sum += pSrcAbove[x];
+ }
+ Count++;
+ }
+ else if (availability & OMX_VC_LEFT)
+ {
+ for (y=0; y<4; y++)
+ {
+ Sum += pSrcLeft[y*leftStep];
+ }
+ Count++;
+ }
+ if (Count==0)
+ {
+ Sum = 128;
+ }
+ else
+ {
+ Sum = (Sum + 2) >> 2;
+ }
+ for (y=0; y<4; y++)
+ {
+ for (x=0; x<4; x++)
+ {
+ pDst[y*dstStep+x] = (OMX_U8)Sum;
+ }
+ }
+}
+
+/*
+ * Description:
+ * Perform DC style intra prediction, left block has priority
+ *
+ * Parameters:
+ * [in] pSrcLeft Pointer to the buffer of 16 left coefficients:
+ * p[x, y] (x = -1, y = 0..3)
+ * [in] pSrcAbove Pointer to the buffer of 16 above coefficients:
+ * p[x,y] (x = 0..3, y = -1)
+ * [in] leftStep Step of left coefficient buffer
+ * [in] dstStep Step of the destination buffer
+ * [in] availability Neighboring 16x16 MB availability flag
+ * [out] pDst Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+static void armVCM4P10_PredictIntraDCLeft4x4(
+ const OMX_U8* pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ OMX_U8* pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMX_S32 availability
+)
+{
+ int x, y, Sum=0, Count = 0;
+
+ if (availability & OMX_VC_LEFT)
+ {
+ for (y=0; y<4; y++)
+ {
+ Sum += pSrcLeft[y*leftStep];
+ }
+ Count++;
+ }
+ else if (availability & OMX_VC_UPPER)
+ {
+ for (x=0; x<4; x++)
+ {
+ Sum += pSrcAbove[x];
+ }
+ Count++;
+ }
+ if (Count==0)
+ {
+ Sum = 128;
+ }
+ else
+ {
+ Sum = (Sum + 2) >> 2;
+ }
+ for (y=0; y<4; y++)
+ {
+ for (x=0; x<4; x++)
+ {
+ pDst[y*dstStep+x] = (OMX_U8)Sum;
+ }
+ }
+}
+
+/**
+ * Function: omxVCM4P10_PredictIntraChroma_8x8 (6.3.3.1.3)
+ *
+ * Description:
+ * Performs intra prediction for chroma samples.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y=
+ * 0..7).
+ * pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y
+ * = -1); must be aligned on an 8-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 8.
+ * dstStep - Step of the destination buffer; must be a multiple of 8.
+ * predMode - Intra chroma prediction mode, please refer to section 3.4.3.
+ * availability - Neighboring chroma block availability flag, please refer
+ * to "Neighboring Macroblock Availability".
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination buffer; must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If any of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 8 or dstStep is not a multiple of 8.
+ * leftStep is not a multiple of 8.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10IntraChromaPredMode.
+ * predMode is OMX_VC_CHROMA_VERT, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available.
+ * predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..7) is not available.
+ * predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not
+ * available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 8-byte boundary. Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if
+ * they are not used by intra prediction implied in predMode.
+ * Note: OMX_VC_UPPER_RIGHT is not used in intra chroma
+ * prediction.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntraChroma_8x8(
+ const OMX_U8* pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8* pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10IntraChromaPredMode predMode,
+ OMX_S32 availability
+ )
+{
+ int x, y, Sum;
+ int H, V, a, b, c;
+
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(dstStep < 8, OMX_Sts_BadArgErr);
+ armRetArgErrIf((dstStep % 8) != 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf((leftStep % 8) != 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcAbove), OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf((availability & OMX_VC_UPPER) && pSrcAbove == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((availability & OMX_VC_LEFT ) && pSrcLeft == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((availability & OMX_VC_UPPER_LEFT) && pSrcAboveLeft == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_CHROMA_VERT && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_CHROMA_HOR && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_CHROMA_PLANE && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_CHROMA_PLANE && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_CHROMA_PLANE && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf((unsigned)predMode > OMX_VC_CHROMA_PLANE, OMX_Sts_BadArgErr);
+
+ switch (predMode)
+ {
+ case OMX_VC_CHROMA_DC:
+ armVCM4P10_PredictIntraDC4x4( pSrcLeft, pSrcAbove, pDst, leftStep, dstStep, availability);
+ armVCM4P10_PredictIntraDCUp4x4( pSrcLeft, pSrcAbove+4, pDst+4, leftStep, dstStep, availability);
+ armVCM4P10_PredictIntraDCLeft4x4( pSrcLeft+4*leftStep, pSrcAbove, pDst+4*dstStep, leftStep, dstStep, availability);
+ armVCM4P10_PredictIntraDC4x4( pSrcLeft+4*leftStep, pSrcAbove+4, pDst+4+4*dstStep, leftStep, dstStep, availability);
+ break;
+
+ case OMX_VC_CHROMA_HOR:
+ for (y=0; y<8; y++)
+ {
+ for (x=0; x<8; x++)
+ {
+ pDst[y*dstStep+x] = pSrcLeft[y*leftStep];
+ }
+ }
+ break;
+
+ case OMX_VC_CHROMA_VERT:
+ for (y=0; y<8; y++)
+ {
+ for (x=0; x<8; x++)
+ {
+ pDst[y*dstStep+x] = pSrcAbove[x];
+ }
+ }
+ break;
+
+ case OMX_VC_CHROMA_PLANE:
+ H = 4*(pSrcAbove[7] - pSrcAboveLeft[0]);
+ for (x=2; x>=0; x--)
+ {
+ H += (x+1)*(pSrcAbove[4+x] - pSrcAbove[2-x]);
+ }
+ V = 4*(pSrcLeft[7*leftStep] - pSrcAboveLeft[0]);
+ for (y=2; y>=0; y--)
+ {
+ V += (y+1)*(pSrcLeft[(4+y)*leftStep] - pSrcLeft[(2-y)*leftStep]);
+ }
+ a = 16*(pSrcAbove[7] + pSrcLeft[7*leftStep]);
+ b = (17*H+16)>>5;
+ c = (17*V+16)>>5;
+ for (y=0; y<8; y++)
+ {
+ for (x=0; x<8; x++)
+ {
+ Sum = (a + b*(x-3) + c*(y-3) + 16)>>5;
+ pDst[y*dstStep+x] = (OMX_U8)armClip(0,255,Sum);
+ }
+ }
+ break;
+ }
+
+ return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c
new file mode 100644
index 0000000..c90cb4c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16.c
@@ -0,0 +1,198 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_PredictIntra_16x16.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 16x16 intra prediction module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_PredictIntra_16x16 (6.3.3.1.2)
+ *
+ * Description:
+ * Perform Intra_16x16 prediction for luma samples. If the upper-right block
+ * is not available, then duplication work should be handled inside the
+ * function. Users need not define them outside.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y =
+ * 0..15)
+ * pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15,
+ * y= -1); must be aligned on a 16-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 16.
+ * dstStep - Step of the destination buffer; must be a multiple of 16.
+ * predMode - Intra_16x16 prediction mode, please refer to section 3.4.1.
+ * availability - Neighboring 16x16 MB availability flag. Refer to
+ * section 3.4.4.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination buffer; must be aligned on a 16-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 16. or dstStep is not a multiple of 16.
+ * leftStep is not a multiple of 16.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10Intra16x16PredMode
+ * predMode is OMX_VC_16X16_VERT, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available.
+ * predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..15) is not available.
+ * predMode is OMX_VC_16X16_PLANE, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not
+ * available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 16-byte boundary.
+ *
+ * Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if
+ * they are not used by intra prediction implied in predMode.
+ * Note:
+ * OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_16x16(
+ const OMX_U8* pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8* pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10Intra16x16PredMode predMode,
+ OMX_S32 availability)
+{
+ int x,y,Sum,Count;
+ int H,V,a,b,c;
+
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(dstStep < 16, OMX_Sts_BadArgErr);
+ armRetArgErrIf((dstStep % 16) != 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf((leftStep % 16) != 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot16ByteAligned(pSrcAbove), OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot16ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf((availability & OMX_VC_UPPER) && pSrcAbove == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((availability & OMX_VC_LEFT ) && pSrcLeft == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((availability & OMX_VC_UPPER_LEFT) && pSrcAboveLeft == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_16X16_VERT && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_16X16_HOR && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_16X16_PLANE && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_16X16_PLANE && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_16X16_PLANE && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf((unsigned)predMode > OMX_VC_16X16_PLANE, OMX_Sts_BadArgErr);
+
+ switch (predMode)
+ {
+ case OMX_VC_16X16_VERT:
+ for (y=0; y<16; y++)
+ {
+ for (x=0; x<16; x++)
+ {
+ pDst[y*dstStep+x] = pSrcAbove[x];
+ }
+ }
+ break;
+
+ case OMX_VC_16X16_HOR:
+ for (y=0; y<16; y++)
+ {
+ for (x=0; x<16; x++)
+ {
+ pDst[y*dstStep+x] = pSrcLeft[y*leftStep];
+ }
+ }
+ break;
+
+ case OMX_VC_16X16_DC:
+ /* This can always be used even if no blocks available */
+ Sum = 0;
+ Count = 0;
+ if (availability & OMX_VC_LEFT)
+ {
+ for (y=0; y<16; y++)
+ {
+ Sum += pSrcLeft[y*leftStep];
+ }
+ Count++;
+ }
+ if (availability & OMX_VC_UPPER)
+ {
+ for (x=0; x<16; x++)
+ {
+ Sum += pSrcAbove[x];
+ }
+ Count++;
+ }
+ if (Count==0)
+ {
+ Sum = 128;
+ }
+ else if (Count==1)
+ {
+ Sum = (Sum + 8) >> 4;
+ }
+ else /* Count = 2 */
+ {
+ Sum = (Sum + 16) >> 5;
+ }
+ for (y=0; y<16; y++)
+ {
+ for (x=0; x<16; x++)
+ {
+ pDst[y*dstStep+x] = (OMX_U8)Sum;
+ }
+ }
+ break;
+
+ case OMX_VC_16X16_PLANE:
+ H = 8*(pSrcAbove[15] - pSrcAboveLeft[0]);
+ for (x=6; x>=0; x--)
+ {
+ H += (x+1)*(pSrcAbove[8+x] - pSrcAbove[6-x]);
+ }
+ V = 8*(pSrcLeft[15*leftStep] - pSrcAboveLeft[0]);
+ for (y=6; y>=0; y--)
+ {
+ V += (y+1)*(pSrcLeft[(8+y)*leftStep] - pSrcLeft[(6-y)*leftStep]);
+ }
+ a = 16*(pSrcAbove[15] + pSrcLeft[15*leftStep]);
+ b = (5*H+32)>>6;
+ c = (5*V+32)>>6;
+ for (y=0; y<16; y++)
+ {
+ for (x=0; x<16; x++)
+ {
+ Sum = (a + b*(x-7) + c*(y-7) + 16)>>5;
+ pDst[y*dstStep+x] = (OMX_U8)armClip(0,255,Sum);
+ }
+ }
+ break;
+ }
+
+ return OMX_Sts_NoErr;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c
new file mode 100644
index 0000000..3fa8212
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4.c
@@ -0,0 +1,338 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_PredictIntra_4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 4x4 intra prediction module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_PredictIntra_4x4 (6.3.3.1.1)
+ *
+ * Description:
+ * Perform Intra_4x4 prediction for luma samples. If the upper-right block is
+ * not available, then duplication work should be handled inside the function.
+ * Users need not define them outside.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 4 left pixels:
+ * p[x, y] (x = -1, y = 0..3)
+ * pSrcAbove - Pointer to the buffer of 8 above pixels:
+ * p[x,y] (x = 0..7, y =-1);
+ * must be aligned on a 4-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 4.
+ * dstStep - Step of the destination buffer; must be a multiple of 4.
+ * predMode - Intra_4x4 prediction mode.
+ * availability - Neighboring 4x4 block availability flag, refer to
+ * "Neighboring Macroblock Availability" .
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination buffer; must be aligned on a 4-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 4, or dstStep is not a multiple of 4.
+ * leftStep is not a multiple of 4.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10Intra4x4PredMode.
+ * predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER
+ * indicating p[x,-1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..3) is not available.
+ * predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x, 1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_VR, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_HD, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER
+ * indicating p[x,-1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..3) is not available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 4-byte boundary.
+ *
+ * Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if
+ * they are not used by intra prediction as implied in predMode.
+ *
+ */
+
+OMXResult omxVCM4P10_PredictIntra_4x4(
+ const OMX_U8* pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8* pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10Intra4x4PredMode predMode,
+ OMX_S32 availability
+ )
+{
+ int x, y;
+ OMX_U8 pTmp[10];
+
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((leftStep % 4) != 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf((dstStep % 4) != 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf((dstStep < 4), OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pSrcAbove), OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf((availability & OMX_VC_UPPER) && pSrcAbove == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((availability & OMX_VC_LEFT ) && pSrcLeft == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((availability & OMX_VC_UPPER_LEFT) && pSrcAboveLeft == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_VERT && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_HOR && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DL && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DR && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DR && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_DIAG_DR && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_VR && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_VR && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_VR && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_HD && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_HD && !(availability & OMX_VC_UPPER_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_HD && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_VL && !(availability & OMX_VC_UPPER), OMX_Sts_BadArgErr);
+ armRetArgErrIf(predMode==OMX_VC_4X4_HU && !(availability & OMX_VC_LEFT), OMX_Sts_BadArgErr);
+ armRetArgErrIf((unsigned)predMode > OMX_VC_4X4_HU, OMX_Sts_BadArgErr);
+
+ /* Note: This code must not read the pSrc arrays unless the corresponding
+ * block is marked as available. If the block is not avaibable then pSrc
+ * may not be a valid pointer.
+ *
+ * Note: To make the code more readable we refer to the neighbouring pixels
+ * in variables named as below:
+ *
+ * UL U0 U1 U2 U3 U4 U5 U6 U7
+ * L0 xx xx xx xx
+ * L1 xx xx xx xx
+ * L2 xx xx xx xx
+ * L3 xx xx xx xx
+ */
+
+#define UL pSrcAboveLeft[0]
+#define U0 pSrcAbove[0]
+#define U1 pSrcAbove[1]
+#define U2 pSrcAbove[2]
+#define U3 pSrcAbove[3]
+#define U4 pSrcAbove[4]
+#define U5 pSrcAbove[5]
+#define U6 pSrcAbove[6]
+#define U7 pSrcAbove[7]
+#define L0 pSrcLeft[0*leftStep]
+#define L1 pSrcLeft[1*leftStep]
+#define L2 pSrcLeft[2*leftStep]
+#define L3 pSrcLeft[3*leftStep]
+
+ switch (predMode)
+ {
+ case OMX_VC_4X4_VERT:
+ for (y=0; y<4; y++)
+ {
+ pDst[y*dstStep+0] = U0;
+ pDst[y*dstStep+1] = U1;
+ pDst[y*dstStep+2] = U2;
+ pDst[y*dstStep+3] = U3;
+ }
+ break;
+
+ case OMX_VC_4X4_HOR:
+ for (x=0; x<4; x++)
+ {
+ pDst[0*dstStep+x] = L0;
+ pDst[1*dstStep+x] = L1;
+ pDst[2*dstStep+x] = L2;
+ pDst[3*dstStep+x] = L3;
+ }
+ break;
+
+ case OMX_VC_4X4_DC:
+ /* This can always be used even if no blocks available */
+ armVCM4P10_PredictIntraDC4x4(pSrcLeft, pSrcAbove, pDst, leftStep, dstStep, availability);
+ break;
+
+ case OMX_VC_4X4_DIAG_DL:
+ pTmp[0] = (OMX_U8)((U0 + 2*U1 + U2 + 2)>>2);
+ pTmp[1] = (OMX_U8)((U1 + 2*U2 + U3 + 2)>>2);
+ if (availability & OMX_VC_UPPER_RIGHT)
+ {
+ pTmp[2] = (OMX_U8)((U2 + 2*U3 + U4 + 2)>>2);
+ pTmp[3] = (OMX_U8)((U3 + 2*U4 + U5 + 2)>>2);
+ pTmp[4] = (OMX_U8)((U4 + 2*U5 + U6 + 2)>>2);
+ pTmp[5] = (OMX_U8)((U5 + 2*U6 + U7 + 2)>>2);
+ pTmp[6] = (OMX_U8)((U6 + 3*U7 + 2)>>2);
+ }
+ else
+ {
+ pTmp[2] = (OMX_U8)((U2 + 3*U3 + 2)>>2);
+ pTmp[3] = U3;
+ pTmp[4] = U3;
+ pTmp[5] = U3;
+ pTmp[6] = U3;
+ }
+ for (y=0; y<4; y++)
+ {
+ for (x=0; x<4; x++)
+ {
+ pDst[y*dstStep+x] = pTmp[x+y];
+ }
+ }
+ break;
+
+ case OMX_VC_4X4_DIAG_DR:
+ /* x-y = -3, -2, -1, 0, 1, 2, 3 */
+ pTmp[0] = (OMX_U8)((L1 + 2*L2 + L3 + 2)>>2);
+ pTmp[1] = (OMX_U8)((L0 + 2*L1 + L2 + 2)>>2);
+ pTmp[2] = (OMX_U8)((UL + 2*L0 + L1 + 2)>>2);
+ pTmp[3] = (OMX_U8)((U0 + 2*UL + L0 + 2)>>2);
+ pTmp[4] = (OMX_U8)((U1 + 2*U0 + UL + 2)>>2);
+ pTmp[5] = (OMX_U8)((U2 + 2*U1 + U0 + 2)>>2);
+ pTmp[6] = (OMX_U8)((U3 + 2*U2 + U1 + 2)>>2);
+ for (y=0; y<4; y++)
+ {
+ for (x=0; x<4; x++)
+ {
+ pDst[y*dstStep+x] = pTmp[3+x-y];
+ }
+ }
+ break;
+
+ case OMX_VC_4X4_VR:
+ /* zVR=2x-y = -3, -2, -1, 0, 1, 2, 3, 4, 5, 6
+ * x-(y>>1) = -1, -1, 0, 0, 1, 1, 2, 2, 3, 3
+ * y = 3, 2, ?, ?, ?, ?, ?, ?, 1, 0
+ */
+ pTmp[0] = (OMX_U8)((L2 + 2*L1 + L0 + 2)>>2);
+ pTmp[1] = (OMX_U8)((L1 + 2*L0 + UL + 2)>>2);
+ pTmp[2] = (OMX_U8)((L0 + 2*UL + U0 + 2)>>2);
+ pTmp[3] = (OMX_U8)((UL + U0 + 1)>>1);
+ pTmp[4] = (OMX_U8)((UL + 2*U0 + U1 + 2)>>2);
+ pTmp[5] = (OMX_U8)((U0 + U1 + 1)>>1);
+ pTmp[6] = (OMX_U8)((U0 + 2*U1 + U2 + 2)>>2);
+ pTmp[7] = (OMX_U8)((U1 + U2 + 1)>>1);
+ pTmp[8] = (OMX_U8)((U1 + 2*U2 + U3 + 2)>>2);
+ pTmp[9] = (OMX_U8)((U2 + U3 + 1)>>1);
+ for (y=0; y<4; y++)
+ {
+ for (x=0; x<4; x++)
+ {
+ pDst[y*dstStep+x] = pTmp[3+2*x-y];
+ }
+ }
+ break;
+
+ case OMX_VC_4X4_HD:
+ /* zHD=2y-x = -3 -2 -1 0 1 2 3 4 5 6
+ * y-(x>>1) = -1 -1 0 0 1 1 2 2 3 3
+ * x = 3 2 1 0
+ */
+ pTmp[0] = (OMX_U8)((U2 + 2*U1 + U0 + 2)>>2);
+ pTmp[1] = (OMX_U8)((U1 + 2*U0 + UL + 2)>>2);
+ pTmp[2] = (OMX_U8)((U0 + 2*UL + L0 + 2)>>2);
+ pTmp[3] = (OMX_U8)((UL + L0 + 1)>>1);
+ pTmp[4] = (OMX_U8)((UL + 2*L0 + L1 + 2)>>2);
+ pTmp[5] = (OMX_U8)((L0 + L1 + 1)>>1);
+ pTmp[6] = (OMX_U8)((L0 + 2*L1 + L2 + 2)>>2);
+ pTmp[7] = (OMX_U8)((L1 + L2 + 1)>>1);
+ pTmp[8] = (OMX_U8)((L1 + 2*L2 + L3 + 2)>>2);
+ pTmp[9] = (OMX_U8)((L2 + L3 + 1)>>1);
+ for (y=0; y<4; y++)
+ {
+ for (x=0; x<4; x++)
+ {
+ pDst[y*dstStep+x] = pTmp[3+2*y-x];
+ }
+ }
+ break;
+
+ case OMX_VC_4X4_VL:
+ /* Note: x+(y>>1) = (2*x+y)>>1
+ * 2x+y = 0 1 2 3 4 5 6 7 8 9
+ */
+ pTmp[0] = (OMX_U8)((U0 + U1 + 1)>>1);
+ pTmp[1] = (OMX_U8)((U0 + 2*U1 + U2 + 2)>>2);
+ pTmp[2] = (OMX_U8)((U1 + U2 + 1)>>1);
+ pTmp[3] = (OMX_U8)((U1 + 2*U2 + U3 + 2)>>2);
+ pTmp[4] = (OMX_U8)((U2 + U3 + 1)>>1);
+ if (availability & OMX_VC_UPPER_RIGHT)
+ {
+ pTmp[5] = (OMX_U8)((U2 + 2*U3 + U4 + 2)>>2);
+ pTmp[6] = (OMX_U8)((U3 + U4 + 1)>>1);
+ pTmp[7] = (OMX_U8)((U3 + 2*U4 + U5 + 2)>>2);
+ pTmp[8] = (OMX_U8)((U4 + U5 + 1)>>1);
+ pTmp[9] = (OMX_U8)((U4 + 2*U5 + U6 + 2)>>2);
+ }
+ else
+ {
+ pTmp[5] = (OMX_U8)((U2 + 3*U3 + 2)>>2);
+ pTmp[6] = U3;
+ pTmp[7] = U3;
+ pTmp[8] = U3;
+ pTmp[9] = U3;
+ }
+ for (y=0; y<4; y++)
+ {
+ for (x=0; x<4; x++)
+ {
+ pDst[y*dstStep+x] = pTmp[2*x+y];
+ }
+ }
+ break;
+
+ case OMX_VC_4X4_HU:
+ /* zHU = x+2*y */
+ pTmp[0] = (OMX_U8)((L0 + L1 + 1)>>1);
+ pTmp[1] = (OMX_U8)((L0 + 2*L1 + L2 + 2)>>2);
+ pTmp[2] = (OMX_U8)((L1 + L2 + 1)>>1);
+ pTmp[3] = (OMX_U8)((L1 + 2*L2 + L3 + 2)>>2);
+ pTmp[4] = (OMX_U8)((L2 + L3 + 1)>>1);
+ pTmp[5] = (OMX_U8)((L2 + 3*L3 + 2)>>2);
+ pTmp[6] = L3;
+ pTmp[7] = L3;
+ pTmp[8] = L3;
+ pTmp[9] = L3;
+ for (y=0; y<4; y++)
+ {
+ for (x=0; x<4; x++)
+ {
+ pDst[y*dstStep+x] = pTmp[x+2*y];
+ }
+ }
+ break;
+ }
+
+ return OMX_Sts_NoErr;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c
new file mode 100644
index 0000000..c8114ee
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_16x.c
@@ -0,0 +1,86 @@
+/**
+ *
+ * File Name: omxVCM4P10_SADQuar_16x.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD of pSrc with average of two Ref blocks
+ * of 16x16 or 16x8
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_SADQuar_16x (6.3.5.4.4)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks.
+ * Rounding is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on a 16-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 16
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal to either 8 or 16
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 8 or 16.
+ * - One of more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 16
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_16x(
+ const OMX_U8* pSrc,
+ const OMX_U8* pSrcRef0,
+ const OMX_U8* pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32* pDstSAD,
+ OMX_U32 iHeight
+)
+{
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iHeight != 16) && (iHeight != 8), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot16ByteAligned(pSrc), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iSrcStep == 0) || (iSrcStep & 15), OMX_Sts_BadArgErr)
+
+
+ return armVCM4P10_SADQuar
+ (pSrc, pSrcRef0, pSrcRef1, iSrcStep,
+ iRefStep0, iRefStep1, pDstSAD, iHeight, 16);
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c
new file mode 100644
index 0000000..4b330ba
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_4x.c
@@ -0,0 +1,85 @@
+/**
+ *
+ * File Name: omxVCM4P10_SADQuar_4x.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD of pSrc with average of two Ref blocks
+ * of 4x8 or 4x4 blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_SADQuar_4x (6.3.5.4.2)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks. Rounding
+ * is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on a 4-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 4.
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal to either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 4 or 8.
+ * - One of more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 4
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_4x(
+ const OMX_U8* pSrc,
+ const OMX_U8* pSrcRef0,
+ const OMX_U8* pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32* pDstSAD,
+ OMX_U32 iHeight
+)
+{
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((iHeight != 8) && (iHeight != 4), OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pSrc), OMX_Sts_BadArgErr);
+ armRetArgErrIf((iSrcStep == 0) || (iSrcStep & 3), OMX_Sts_BadArgErr);
+
+ return armVCM4P10_SADQuar
+ (pSrc, pSrcRef0, pSrcRef1, iSrcStep,
+ iRefStep0, iRefStep1, pDstSAD, iHeight, 4);
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c
new file mode 100644
index 0000000..c9e9c24
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SADQuar_8x.c
@@ -0,0 +1,87 @@
+/**
+ *
+ * File Name: omxVCM4P10_SADQuar_8x.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD of pSrc with average of two Ref blocks
+ * of 8x16 or 8x8 or 8x4
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_SADQuar_8x (6.3.5.4.3)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks.
+ * Rounding is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on an 8-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 8.
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal either 4, 8, or 16.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 4, 8, or 16.
+ * - One of more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 8
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_8x(
+ const OMX_U8* pSrc,
+ const OMX_U8* pSrcRef0,
+ const OMX_U8* pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32* pDstSAD,
+ OMX_U32 iHeight
+)
+{
+ /* check for argument error */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef0 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef1 == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iHeight != 16) && (iHeight != 8) &&
+ (iHeight != 4), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot8ByteAligned(pSrc), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iSrcStep == 0) || (iSrcStep & 7), OMX_Sts_BadArgErr)
+
+
+ return armVCM4P10_SADQuar
+ (pSrc, pSrcRef0, pSrcRef1, iSrcStep,
+ iRefStep0, iRefStep1, pDstSAD, iHeight, 8);
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SAD_4x.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SAD_4x.c
new file mode 100644
index 0000000..927c454
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SAD_4x.c
@@ -0,0 +1,77 @@
+/**
+ *
+ * File Name: omxVCM4P10_SAD_4x.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD for 4x8 and 4x4 blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_SAD_4x (6.3.5.4.1)
+ *
+ * Description:
+ * This function calculates the SAD for 4x8 and 4x4 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg -Pointer to the original block; must be aligned on a 4-byte
+ * boundary.
+ * iStepOrg -Step of the original block buffer; must be a multiple of 4.
+ * pSrcRef -Pointer to the reference block
+ * iStepRef -Step of the reference block buffer
+ * iHeight -Height of the block; must be equal to either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One of more of the following pointers is NULL:
+ * pSrcOrg, pSrcRef, or pDstSAD
+ * - iHeight is not equal to either 4 or 8.
+ * - iStepOrg is not a multiple of 4
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SAD_4x(
+ const OMX_U8* pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8* pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32* pDstSAD,
+ OMX_U32 iHeight
+)
+{
+ /* check for argument error */
+ armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iHeight != 8) && (iHeight != 4), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot4ByteAligned(pSrcOrg), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 3), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iStepRef == 0) || (iStepRef & 3), OMX_Sts_BadArgErr)
+
+ return armVCCOMM_SAD
+ (pSrcOrg, iStepOrg, pSrcRef, iStepRef, pDstSAD, iHeight, 4);
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SATD_4x4.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SATD_4x4.c
new file mode 100644
index 0000000..a91ae66
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SATD_4x4.c
@@ -0,0 +1,132 @@
+/**
+ *
+ * File Name: omxVCM4P10_SATD_4x4.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD for 4x4 blocks
+ *
+ */
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_SATD_4x4 (6.3.5.4.5)
+ *
+ * Description:
+ * This function calculates the sum of absolute transform differences (SATD)
+ * for a 4x4 block by applying a Hadamard transform to the difference block
+ * and then calculating the sum of absolute coefficient values.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 4-byte
+ * boundary
+ * iStepOrg - Step of the original block buffer; must be a multiple of 4
+ * pSrcRef - Pointer to the reference block; must be aligned on a 4-byte
+ * boundary
+ * iStepRef - Step of the reference block buffer; must be a multiple of 4
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - pointer to the resulting SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg
+ * - pSrcRef is not aligned on a 4-byte boundary
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 4
+ * - iStepRef <= 0 or iStepRef is not a multiple of 4
+ *
+ */
+OMXResult omxVCM4P10_SATD_4x4(
+ const OMX_U8* pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8* pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_U32* pDstSAD
+)
+{
+ OMX_INT i, j;
+ OMX_S32 SATD = 0;
+ OMX_S32 d [4][4], m1[4][4], m2[4][4];
+
+ /* check for argument error */
+ armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcRef == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf((iStepOrg == 0) || (iStepOrg & 3), OMX_Sts_BadArgErr)
+ armRetArgErrIf((iStepRef == 0) || (iStepRef & 3), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot4ByteAligned(pSrcOrg), OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot4ByteAligned(pSrcRef), OMX_Sts_BadArgErr)
+
+ /* Calculate the difference */
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ d [j][i] = pSrcOrg [j * iStepOrg + i] - pSrcRef [j * iStepRef + i];
+ }
+ }
+
+ /* Hadamard Transfor for 4x4 block */
+
+ /* Horizontal */
+ for (i = 0; i < 4; i++)
+ {
+ m1[i][0] = d[i][0] + d[i][2]; /* a+c */
+ m1[i][1] = d[i][1] + d[i][3]; /* b+d */
+ m1[i][2] = d[i][0] - d[i][2]; /* a-c */
+ m1[i][3] = d[i][1] - d[i][3]; /* b-d */
+
+ m2[i][0] = m1[i][0] + m1[i][1]; /* a+b+c+d */
+ m2[i][1] = m1[i][2] + m1[i][3]; /* a+b-c-d */
+ m2[i][2] = m1[i][2] - m1[i][3]; /* a-b-c+d */
+ m2[i][3] = m1[i][0] - m1[i][1]; /* a-b+c-d */
+
+ }
+
+ /* Vertical */
+ for (i = 0; i < 4; i++)
+ {
+ m1[0][i] = m2[0][i] + m2[2][i];
+ m1[1][i] = m2[1][i] + m2[3][i];
+ m1[2][i] = m2[0][i] - m2[2][i];
+ m1[3][i] = m2[1][i] - m2[3][i];
+
+ m2[0][i] = m1[0][i] + m1[1][i];
+ m2[1][i] = m1[2][i] + m1[3][i];
+ m2[2][i] = m1[2][i] - m1[3][i];
+ m2[3][i] = m1[0][i] - m1[1][i];
+ }
+
+ /* calculate SAD for Transformed coefficients */
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ SATD += armAbs(m2 [j][i]);
+ }
+ }
+
+ *pDstSAD = (SATD + 1) / 2;
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c
new file mode 100644
index 0000000..23a5662
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_SubAndTransformQDQResidual.c
@@ -0,0 +1,220 @@
+/**
+ *
+ * File Name: omxVCM4P10_SubAndTransformQDQResidual.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate SAD for 4x4 blocks
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_SubAndTransformQDQResidual (6.3.5.8.1)
+ *
+ * Description:
+ * This function subtracts the prediction signal from the original signal to
+ * produce the difference signal and then performs a 4x4 integer transform and
+ * quantization. The quantized transformed coefficients are stored as
+ * pDstQuantCoeff. This function can also output dequantized coefficients or
+ * unquantized DC coefficients optionally by setting the pointers
+ * pDstDeQuantCoeff, pDCCoeff.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to original signal. 4-byte alignment required.
+ * pSrcPred - Pointer to prediction signal. 4-byte alignment required.
+ * iSrcOrgStep - Step of the original signal buffer; must be a multiple of
+ * 4.
+ * iSrcPredStep - Step of the prediction signal buffer; must be a multiple
+ * of 4.
+ * pNumCoeff -Number of non-zero coefficients after quantization. If this
+ * parameter is not required, it is set to NULL.
+ * nThreshSAD - Zero-block early detection threshold. If this parameter is
+ * not required, it is set to 0.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ * bIntra - Indicates whether this is an INTRA block, either 1-INTRA or
+ * 0-INTER
+ *
+ * Output Arguments:
+ *
+ * pDstQuantCoeff - Pointer to the quantized transformed coefficients.
+ * 8-byte alignment required.
+ * pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients
+ * if this parameter is not equal to NULL. 8-byte alignment
+ * required.
+ * pDCCoeff - Pointer to the unquantized DC coefficient if this parameter
+ * is not equal to NULL.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff,
+ * pDstDeQuantCoeff, pDCCoeff
+ * - pSrcOrg is not aligned on a 4-byte boundary
+ * - pSrcPred is not aligned on a 4-byte boundary
+ * - iSrcOrgStep is not a multiple of 4
+ * - iSrcPredStep is not a multiple of 4
+ * - pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary
+ *
+ */
+ OMXResult omxVCM4P10_SubAndTransformQDQResidual (
+ const OMX_U8* pSrcOrg,
+ const OMX_U8* pSrcPred,
+ OMX_U32 iSrcOrgStep,
+ OMX_U32 iSrcPredStep,
+ OMX_S16* pDstQuantCoeff,
+ OMX_S16* pDstDeQuantCoeff,
+ OMX_S16* pDCCoeff,
+ OMX_S8* pNumCoeff,
+ OMX_U32 nThreshSAD,
+ OMX_U32 iQP,
+ OMX_U8 bIntra
+)
+{
+ OMX_INT i, j;
+ OMX_S8 NumCoeff = 0;
+ OMX_S16 Buf[16], m[16];
+ OMX_U32 QBits, QPper, QPmod, f;
+ OMX_S32 Value, MF, ThreshDC;
+
+ /* check for argument error */
+ armRetArgErrIf(pSrcOrg == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstDeQuantCoeff == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pNumCoeff == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDCCoeff == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot4ByteAligned(pSrcOrg), OMX_Sts_BadArgErr)
+ armRetArgErrIf(pSrcPred == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot4ByteAligned(pSrcPred), OMX_Sts_BadArgErr)
+ armRetArgErrIf(pDstQuantCoeff == NULL, OMX_Sts_BadArgErr)
+ armRetArgErrIf(armNot8ByteAligned(pDstQuantCoeff), OMX_Sts_BadArgErr)
+ armRetArgErrIf((pDstDeQuantCoeff != NULL) &&
+ armNot8ByteAligned(pDstDeQuantCoeff), OMX_Sts_BadArgErr)
+ armRetArgErrIf((bIntra != 0) && (bIntra != 1), OMX_Sts_BadArgErr)
+ armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iSrcOrgStep == 0, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iSrcPredStep == 0, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iSrcOrgStep & 3, OMX_Sts_BadArgErr)
+ armRetArgErrIf(iSrcPredStep & 3, OMX_Sts_BadArgErr)
+
+ /*
+ * Zero-Block Early detection using nThreshSAD param
+ */
+
+ QPper = iQP / 6;
+ QPmod = iQP % 6;
+ QBits = 15 + QPper;
+
+ f = (1 << QBits) / (bIntra ? 3 : 6);
+
+ /* Do Zero-Block Early detection if enabled */
+ if (nThreshSAD)
+ {
+ ThreshDC = ((1 << QBits) - f) / armVCM4P10_MFMatrix[QPmod][0];
+ if (nThreshSAD < ThreshDC)
+ {
+ /* Set block to zero */
+ if (pDCCoeff != NULL)
+ {
+ *pDCCoeff = 0;
+ }
+
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ pDstQuantCoeff [4 * j + i] = 0;
+ if (pDstDeQuantCoeff != NULL)
+ {
+ pDstDeQuantCoeff [4 * j + i] = 0;
+ }
+ }
+ }
+
+ if (pNumCoeff != NULL)
+ {
+ *pNumCoeff = 0;
+ }
+ return OMX_Sts_NoErr;
+ }
+ }
+
+
+ /* Calculate difference */
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ Buf [j * 4 + i] =
+ pSrcOrg [j * iSrcOrgStep + i] - pSrcPred [j * iSrcPredStep + i];
+ }
+ }
+
+ /* Residual Transform */
+ armVCM4P10_FwdTransformResidual4x4 (m, Buf);
+
+ if (pDCCoeff != NULL)
+ {
+ /* Copy unquantized DC value into pointer */
+ *pDCCoeff = m[0];
+ }
+
+ /* Quantization */
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ MF = armVCM4P10_MFMatrix[QPmod][armVCM4P10_PosToVCol4x4[j * 4 + i]];
+ Value = armAbs(m[j * 4 + i]) * MF + f;
+ Value >>= QBits;
+ Value = m[j * 4 + i] < 0 ? -Value : Value;
+ Buf[4 * j + i] = pDstQuantCoeff [4 * j + i] = (OMX_S16)Value;
+ if ((pNumCoeff != NULL) && Value)
+ {
+ NumCoeff++;
+ }
+ }
+ }
+
+ /* Output number of non-zero Coeffs */
+ if (pNumCoeff != NULL)
+ {
+ *pNumCoeff = NumCoeff;
+ }
+
+ /* Residual Inv Transform */
+ if (pDstDeQuantCoeff != NULL)
+ {
+ /* Re Scale */
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ m [j * 4 + i] = Buf [j * 4 + i] * (1 << QPper) *
+ armVCM4P10_VMatrix[QPmod][armVCM4P10_PosToVCol4x4[j * 4 + i]];
+ }
+ }
+ armVCM4P10_TransformResidual4x4 (pDstDeQuantCoeff, m);
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c
new file mode 100644
index 0000000..9ad0e81
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair.c
@@ -0,0 +1,131 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_TransformDequantChromaDCFromPair.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 inverse quantize and transform module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Dequantize Chroma 2x2 DC block
+ */
+
+static void DequantChromaDC2x2(
+ OMX_S16* pDst,
+ OMX_INT QP
+)
+{
+ int Shift = (QP/6)-1 ;
+ int Scale = armVCM4P10_VMatrix[QP%6][0];
+ int i, Value;
+
+ if (Shift >= 0)
+ {
+ for (i=0; i<4; i++)
+ {
+ Value = (pDst[i] * Scale) << Shift;
+ pDst[i] = (OMX_S16)Value;
+ }
+ }
+ else
+ {
+ for (i=0; i<4; i++)
+ {
+ Value = (pDst[i] * Scale) >> 1;
+ pDst[i] = (OMX_S16)Value;
+ }
+ }
+}
+
+
+/*
+ * Description:
+ * Inverse Transform DC 2x2 Coefficients
+ */
+
+static void InvTransformDC2x2(OMX_S16* pData)
+{
+ int c00 = pData[0];
+ int c01 = pData[1];
+ int c10 = pData[2];
+ int c11 = pData[3];
+
+ int d00 = c00 + c01;
+ int d01 = c00 - c01;
+ int d10 = c10 + c11;
+ int d11 = c10 - c11;
+
+ pData[0] = (OMX_S16)(d00 + d10);
+ pData[1] = (OMX_S16)(d01 + d11);
+ pData[2] = (OMX_S16)(d00 - d10);
+ pData[3] = (OMX_S16)(d01 - d11);
+}
+
+
+/**
+ * Function: omxVCM4P10_TransformDequantChromaDCFromPair (6.3.4.2.2)
+ *
+ * Description:
+ * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer,
+ * perform integer inverse transformation, and dequantization for 2x2 chroma
+ * DC coefficients, and update the pair buffer pointer to next non-empty
+ * block.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * QP - Quantization parameter QpC
+ *
+ * Output Arguments:
+ *
+ * ppSrc - *ppSrc is updated to the start of next non empty block
+ * pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer;
+ * must be aligned on a 4-byte boundary.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppSrc or pDst is NULL.
+ * - pDst is not 4-byte aligned.
+ * - QP is not in the range of [0-51].
+ *
+ */
+
+OMXResult omxVCM4P10_TransformDequantChromaDCFromPair(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst,
+ OMX_INT QP
+ )
+{
+ armRetArgErrIf(ppSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(QP<0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(QP>51, OMX_Sts_BadArgErr);
+
+ armVCM4P10_UnpackBlock2x2(ppSrc, pDst);
+ InvTransformDC2x2(pDst);
+ DequantChromaDC2x2(pDst, QP);
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c
new file mode 100644
index 0000000..16c8be1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair.c
@@ -0,0 +1,148 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_TransformDequantLumaDCFromPair.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 inverse quantize and transform module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/*
+ * Description:
+ * Dequantize Luma DC block
+ */
+
+static void DequantLumaDC4x4(
+ OMX_S16* pDst,
+ OMX_INT QP
+)
+{
+ int Shift = (QP/6)-2 ;
+ int Scale = armVCM4P10_VMatrix[QP%6][0];
+ int i, Round, Value;
+
+ if (Shift >= 0)
+ {
+ for (i=0; i<16; i++)
+ {
+ Value = (pDst[i] * Scale) << Shift;
+ pDst[i] = (OMX_S16)Value;
+ }
+ }
+ else
+ {
+ Shift = -Shift;;
+ Round = 1<<(Shift-1);
+
+ for (i=0; i<16; i++)
+ {
+ Value = (pDst[i] * Scale + Round) >> Shift;
+ pDst[i] = (OMX_S16)Value;
+ }
+ }
+}
+
+
+
+/*
+ * Description:
+ * Inverse Transform DC 4x4 Coefficients
+ */
+static void InvTransformDC4x4(OMX_S16* pData)
+{
+ int i;
+
+ /* Transform rows */
+ for (i=0; i<16; i+=4)
+ {
+ int c0 = pData[i+0];
+ int c1 = pData[i+1];
+ int c2 = pData[i+2];
+ int c3 = pData[i+3];
+ pData[i+0] = (OMX_S16)(c0+c1+c2+c3);
+ pData[i+1] = (OMX_S16)(c0+c1-c2-c3);
+ pData[i+2] = (OMX_S16)(c0-c1-c2+c3);
+ pData[i+3] = (OMX_S16)(c0-c1+c2-c3);
+ }
+
+ /* Transform columns */
+ for (i=0; i<4; i++)
+ {
+ int c0 = pData[i+0];
+ int c1 = pData[i+4];
+ int c2 = pData[i+8];
+ int c3 = pData[i+12];
+ pData[i+0] = (OMX_S16)(c0+c1+c2+c3);
+ pData[i+4] = (OMX_S16)(c0+c1-c2-c3);
+ pData[i+8] = (OMX_S16)(c0-c1-c2+c3);
+ pData[i+12] = (OMX_S16)(c0-c1+c2-c3);
+ }
+}
+
+
+/**
+ * Function: omxVCM4P10_TransformDequantLumaDCFromPair (6.3.4.2.1)
+ *
+ * Description:
+ * Reconstructs the 4x4 LumaDC block from the coefficient-position pair
+ * buffer, performs integer inverse, and dequantization for 4x4 LumaDC
+ * coefficients, and updates the pair buffer pointer to the next non-empty
+ * block.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * QP - Quantization parameter QpY
+ *
+ * Output Arguments:
+ *
+ * ppSrc - *ppSrc is updated to the start of next non empty block
+ * pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must
+ * be aligned on a 8-byte boundary.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppSrc or pDst is NULL.
+ * - pDst is not 8 byte aligned.
+ * - QP is not in the range of [0-51].
+ *
+ */
+
+OMXResult omxVCM4P10_TransformDequantLumaDCFromPair(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst,
+ OMX_INT QP
+ )
+{
+ armRetArgErrIf(ppSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(QP<0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(QP>51, OMX_Sts_BadArgErr);
+
+ armVCM4P10_UnpackBlock4x4(ppSrc, pDst);
+ /*InvTransformDequantLumaDC4x4(pDst, QP);*/
+ InvTransformDC4x4(pDst);
+ DequantLumaDC4x4(pDst, QP);
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c
new file mode 100644
index 0000000..b5544dd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_ChromaDC.c
@@ -0,0 +1,97 @@
+/**
+ *
+ * File Name: omxVCM4P10_TransformQuant_ChromaDC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate 4x4 hadamard transform of chroma DC
+ * coefficients and quantization
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P10_TransformQuant_ChromaDC (6.3.5.6.1)
+ *
+ * Description:
+ * This function performs 2x2 Hadamard transform of chroma DC coefficients
+ * and then quantizes the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the 2x2 array of chroma DC coefficients. 8-byte
+ * alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ * bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - Pointer to transformed and quantized coefficients. 8-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcDst
+ * - pSrcDst is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_ChromaDC(
+ OMX_S16* pSrcDst,
+ OMX_U32 iQP,
+ OMX_U8 bIntra
+)
+{
+ OMX_INT i, j;
+ OMX_S32 m[2][2];
+ OMX_S32 Value;
+ OMX_S32 QbitsPlusOne, Two_f, MF00;
+
+ /* Check for argument error */
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr);
+
+ /* Hadamard Transform for 2x2 block */
+ m[0][0] = pSrcDst[0] + pSrcDst[1] + pSrcDst[2] + pSrcDst[3];
+ m[0][1] = pSrcDst[0] - pSrcDst[1] + pSrcDst[2] - pSrcDst[3];
+ m[1][0] = pSrcDst[0] + pSrcDst[1] - pSrcDst[2] - pSrcDst[3];
+ m[1][1] = pSrcDst[0] - pSrcDst[1] - pSrcDst[2] + pSrcDst[3];
+
+ /* Quantization */
+ QbitsPlusOne = ARM_M4P10_Q_OFFSET + 1 + (iQP / 6); /*floor (QP/6)*/
+ MF00 = armVCM4P10_MFMatrix [iQP % 6][0];
+
+ Two_f = (1 << QbitsPlusOne) / (bIntra ? 3 : 6); /* 3->INTRA, 6->INTER */
+
+ /* Scaling */
+ for (j = 0; j < 2; j++)
+ {
+ for (i = 0; i < 2; i++)
+ {
+ Value = (armAbs(m[j][i]) * MF00 + Two_f) >> QbitsPlusOne;
+ pSrcDst[j * 2 + i] = (OMX_S16)((m[j][i] < 0) ? -Value : Value);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c
new file mode 100644
index 0000000..2ccf7f0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p10/src/omxVCM4P10_TransformQuant_LumaDC.c
@@ -0,0 +1,119 @@
+/**
+ *
+ * File Name: omxVCM4P10_TransformQuant_LumaDC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate 4x4 hadamard transform of luma DC coefficients
+ * and quantization
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_TransformQuant_LumaDC (6.3.5.6.2)
+ *
+ * Description:
+ * This function performs a 4x4 Hadamard transform of luma DC coefficients
+ * and then quantizes the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the 4x4 array of luma DC coefficients. 16-byte
+ * alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - Pointer to transformed and quantized coefficients. 16-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrcDst
+ * - pSrcDst is not aligned on an 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_LumaDC(
+ OMX_S16* pSrcDst,
+ OMX_U32 iQP
+)
+{
+ OMX_INT i, j;
+ OMX_S32 m1[4][4], m2[4][4];
+ OMX_S32 Value;
+ OMX_U32 QbitsPlusOne, Two_f, MF;
+
+ /* Check for argument error */
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot16ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(iQP > 51, OMX_Sts_BadArgErr);
+
+ /* Hadamard Transform for 4x4 block */
+ /* Horizontal Hadamard */
+ for (i = 0; i < 4; i++)
+ {
+ j = i * 4;
+
+ m1[i][0] = pSrcDst[j + 0] + pSrcDst[j + 2]; /* a+c */
+ m1[i][1] = pSrcDst[j + 1] + pSrcDst[j + 3]; /* b+d */
+ m1[i][2] = pSrcDst[j + 0] - pSrcDst[j + 2]; /* a-c */
+ m1[i][3] = pSrcDst[j + 1] - pSrcDst[j + 3]; /* b-d */
+
+ m2[i][0] = m1[i][0] + m1[i][1]; /* a+b+c+d */
+ m2[i][1] = m1[i][2] + m1[i][3]; /* a+b-c-d */
+ m2[i][2] = m1[i][2] - m1[i][3]; /* a-b-c+d */
+ m2[i][3] = m1[i][0] - m1[i][1]; /* a-b+c-d */
+
+ }
+
+ /* Vertical */
+ for (i = 0; i < 4; i++)
+ {
+ m1[0][i] = m2[0][i] + m2[2][i];
+ m1[1][i] = m2[1][i] + m2[3][i];
+ m1[2][i] = m2[0][i] - m2[2][i];
+ m1[3][i] = m2[1][i] - m2[3][i];
+
+ m2[0][i] = m1[0][i] + m1[1][i];
+ m2[1][i] = m1[2][i] + m1[3][i];
+ m2[2][i] = m1[2][i] - m1[3][i];
+ m2[3][i] = m1[0][i] - m1[1][i];
+ }
+
+
+ /* Quantization */
+ QbitsPlusOne = ARM_M4P10_Q_OFFSET + 1 + (iQP / 6); /*floor (QP/6)*/
+ Two_f = (1 << QbitsPlusOne) / 3; /* 3->INTRA, 6->INTER */
+ MF = armVCM4P10_MFMatrix [iQP % 6][0];
+
+ /* Scaling */
+ for (j = 0; j < 4; j++)
+ {
+ for (i = 0; i < 4; i++)
+ {
+ Value = (armAbs((m2[j][i]/* + 1*/) / 2) * MF + Two_f) >> QbitsPlusOne;
+ pSrcDst[j * 4 + i] = (OMX_S16)((m2[j][i] < 0) ? -Value : Value);
+ }
+ }
+ return OMX_Sts_NoErr;
+}
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_DCT_Table.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_DCT_Table.h
new file mode 100644
index 0000000..3255b61
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_DCT_Table.h
@@ -0,0 +1,30 @@
+/**
+ *
+ * File Name: armVCM4P2_DCT_Table.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_DCT_Table.h
+ * Description: Declares Tables used for DCT/IDCT module
+ * in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXDCTTAB_H_
+#define _OMXDCTTAB_H_
+
+extern const OMX_F64 armVCM4P2_preCalcDCTCos[8][8];
+
+#endif /* _OMXDCTTAB_H_ */
+
+
+/* End of file */
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
new file mode 100644
index 0000000..92ecc05
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
@@ -0,0 +1,42 @@
+/**
+ *
+ * File Name: armVCM4P2_Huff_Tables_VLC.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_Huff_Tables.h
+ * Description: Declares Tables used for Hufffman coding and decoding
+ * in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXHUFFTAB_H_
+#define _OMXHUFFTAB_H_
+
+extern const OMX_U8 armVCM4P2_IntraL0RunIdx[11];
+extern const ARM_VLC32 armVCM4P2_IntraVlcL0[68];
+extern const OMX_U8 armVCM4P2_IntraL1RunIdx[7];
+extern const ARM_VLC32 armVCM4P2_IntraVlcL1[36];
+extern const OMX_U8 armVCM4P2_IntraL0LMAX[15];
+extern const OMX_U8 armVCM4P2_IntraL1LMAX[21];
+extern const OMX_U8 armVCM4P2_IntraL0RMAX[27];
+extern const OMX_U8 armVCM4P2_IntraL1RMAX[8];
+extern const OMX_U8 armVCM4P2_InterL0RunIdx[12];
+extern const ARM_VLC32 armVCM4P2_InterVlcL0[59];
+extern const OMX_U8 armVCM4P2_InterL1RunIdx[3];
+extern const ARM_VLC32 armVCM4P2_InterVlcL1[45];
+extern const OMX_U8 armVCM4P2_InterL0LMAX[27];
+extern const OMX_U8 armVCM4P2_InterL1LMAX[41];
+extern const OMX_U8 armVCM4P2_InterL0RMAX[12];
+extern const OMX_U8 armVCM4P2_InterL1RMAX[3];
+extern const ARM_VLC32 armVCM4P2_aIntraDCLumaIndex[14];
+extern const ARM_VLC32 armVCM4P2_aIntraDCChromaIndex[14];
+extern const ARM_VLC32 armVCM4P2_aVlcMVD[66];
+
+#endif /* _OMXHUFFTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
new file mode 100644
index 0000000..c75ed89
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
@@ -0,0 +1,25 @@
+/**
+ *
+ * File Name: armVCM4P2_ZigZag_Tables.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_Zigzag_Tables.h
+ * Description: Declares Tables used for Zigzag scan in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXZIGZAGTAB_H_
+#define _OMXZIGZAGTAB_H_
+
+extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [64];
+extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64];
+extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64];
+
+#endif /* _OMXZIGZAGTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_ACDCPredict.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_ACDCPredict.c
new file mode 100644
index 0000000..b6a396a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_ACDCPredict.c
@@ -0,0 +1,253 @@
+/**
+ *
+ * File Name: armVCM4P2_ACDCPredict.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for DC/AC coefficient prediction
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_ACDCPredict
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected
+ * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the coefficient buffer which contains
+ * the quantized coefficient residuals (PQF) of the
+ * current block
+ * [in] pPredBufRow pointer to the coefficient row buffer
+ * [in] pPredBufCol pointer to the coefficient column buffer
+ * [in] curQP quantization parameter of the current block. curQP
+ * may equal to predQP especially when the current
+ * block and the predictor block are in the same
+ * macroblock.
+ * [in] predQP quantization parameter of the predictor block
+ * [in] predDir indicates the prediction direction which takes one
+ * of the following values:
+ * OMX_VC_HORIZONTAL predict horizontally
+ * OMX_VC_VERTICAL predict vertically
+ * [in] ACPredFlag a flag indicating if AC prediction should be
+ * performed. It is equal to ac_pred_flag in the bit
+ * stream syntax of MPEG-4
+ * [in] videoComp video component type (luminance, chrominance or
+ * alpha) of the current block
+ * [in] flag This flag defines the if one wants to use this functions to
+ * calculate PQF (set 1, prediction) or QF (set 0, reconstruction)
+ * [out] pPreACPredict pointer to the predicted coefficients buffer.
+ * Filled ONLY if it is not NULL
+ * [out] pSrcDst pointer to the coefficient buffer which contains
+ * the quantized coefficients (QF) of the current
+ * block
+ * [out] pPredBufRow pointer to the updated coefficient row buffer
+ * [out] pPredBufCol pointer to the updated coefficient column buffer
+ * [out] pSumErr pointer to the updated sum of the difference
+ * between predicted and unpredicted coefficients
+ * If this is NULL, do not update
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_ACDCPredict(
+ OMX_S16 * pSrcDst,
+ OMX_S16 * pPreACPredict,
+ OMX_S16 * pPredBufRow,
+ OMX_S16 * pPredBufCol,
+ OMX_INT curQP,
+ OMX_INT predQP,
+ OMX_INT predDir,
+ OMX_INT ACPredFlag,
+ OMXVCM4P2VideoComponent videoComp,
+ OMX_U8 flag,
+ OMX_INT *pSumErr
+)
+{
+ OMX_INT dcScaler, i;
+ OMX_S16 tempPred;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(curQP <= 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(predQP <= 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf((predDir != 1) && (predDir != 2), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs4ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs4ByteAligned(pPredBufRow), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs4ByteAligned(pPredBufCol), OMX_Sts_BadArgErr);
+
+
+ /* Set DC scaler value to avoid some compilers giving a warning. */
+ dcScaler=0;
+
+ /* Calculate the DC scaler value */
+ if (videoComp == OMX_VC_LUMINANCE)
+ {
+ if (curQP >= 1 && curQP <= 4)
+ {
+ dcScaler = 8;
+ }
+ else if (curQP >= 5 && curQP <= 8)
+ {
+ dcScaler = 2 * curQP;
+ }
+ else if (curQP >= 9 && curQP <= 24)
+ {
+ dcScaler = curQP + 8;
+ }
+ else
+ {
+ dcScaler = (2 * curQP) - 16;
+ }
+ }
+ else if (videoComp == OMX_VC_CHROMINANCE)
+ {
+ if (curQP >= 1 && curQP <= 4)
+ {
+ dcScaler = 8;
+ }
+ else if (curQP >= 5 && curQP <= 24)
+ {
+ dcScaler = (curQP + 13)/2;
+ }
+ else
+ {
+ dcScaler = curQP - 6;
+ }
+ }
+
+ if (pPreACPredict != NULL)
+ {
+ pPreACPredict[0] = predDir;
+ }
+
+ if (predDir == OMX_VC_VERTICAL)
+ {
+ /* F[0][0]//dc_scaler */
+ tempPred = armIntDivAwayFromZero(pPredBufRow[0], dcScaler);
+ }
+ else
+ {
+ /* F[0][0]//dc_scaler */
+ tempPred = armIntDivAwayFromZero(pPredBufCol[0], dcScaler);
+ }
+
+ /* Updating the DC value to the row and col buffer */
+ *(pPredBufRow - 8) = *pPredBufCol;
+
+ if (flag)
+ {
+ /* Cal and store F[0][0] into the col buffer */
+ *pPredBufCol = pSrcDst[0] * dcScaler;
+
+ /* PQF = QF - F[0][0]//dc_scaler */
+ pSrcDst[0] -= tempPred;
+ }
+ else
+ {
+ /* QF = PQF + F[0][0]//dc_scaler */
+ pSrcDst[0] += tempPred;
+
+ /* Saturate */
+ pSrcDst[0] = armClip (-2048, 2047, pSrcDst[0]);
+
+ /* Cal and store F[0][0] into the col buffer */
+ *pPredBufCol = pSrcDst[0] * dcScaler;
+ }
+
+
+ if (ACPredFlag == 1)
+ {
+ if (predDir == OMX_VC_VERTICAL)
+ {
+ for (i = 1; i < 8; i++)
+ {
+ tempPred = armIntDivAwayFromZero \
+ (pPredBufRow[i] * predQP, curQP);
+ if (flag)
+ {
+ /* Updating QF to the row buff */
+ pPredBufRow[i] = pSrcDst[i];
+ /*PQFX[v][0] = QFX[v][0] - (QFA[v][0] * QPA) // QPX */
+ pSrcDst[i] -= tempPred;
+ /* Sum of absolute values of AC prediction error, this can
+ be used as a reference to choose whether to use
+ AC prediction */
+ *pSumErr += armAbs(pSrcDst[i]);
+ /* pPreACPredict[1~7] store the error signal
+ after AC prediction */
+ pPreACPredict[i] = pSrcDst[i];
+ }
+ else
+ {
+ /*QFX[v][0] = PQFX[v][0] + (QFA[v][0] * QPA) // QPX */
+ pSrcDst[i] += tempPred;
+
+ /* Saturate */
+ pSrcDst[i] = armClip (-2048, 2047, pSrcDst[i]);
+
+ /* Updating QF to the row buff */
+ pPredBufRow[i] = pSrcDst[i];
+ }
+ }
+ }
+ else
+ {
+ for (i = 8; i < 64; i += 8)
+ {
+ tempPred = armIntDivAwayFromZero \
+ (pPredBufCol[i>>3] * predQP, curQP);
+ if (flag)
+ {
+ /* Updating QF to col buff */
+ pPredBufCol[i>>3] = pSrcDst[i];
+ /*PQFX[0][u] = QFX[0][u] - (QFA[0][u] * QPA) // QPX */
+ pSrcDst[i] -= tempPred;
+ /* Sum of absolute values of AC prediction error, this can
+ be used as a reference to choose whether to use AC
+ prediction */
+ *pSumErr += armAbs(pSrcDst[i]);
+ /* pPreACPredict[1~7] store the error signal
+ after AC prediction */
+ pPreACPredict[i>>3] = pSrcDst[i];
+ }
+ else
+ {
+ /*QFX[0][u] = PQFX[0][u] + (QFA[0][u] * QPA) // QPX */
+ pSrcDst[i] += tempPred;
+
+ /* Saturate */
+ pSrcDst[i] = armClip (-2048, 2047, pSrcDst[i]);
+
+ /* Updating QF to col buff */
+ pPredBufCol[i>>3] = pSrcDst[i];
+ }
+ }
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c
new file mode 100644
index 0000000..1b69a33
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Half.c
@@ -0,0 +1,187 @@
+/**
+ *
+ * File Name: armVCM4P2_BlockMatch_Half.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_BlockMatch_Half
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution. Returns the estimated
+ * motion vector and associated minimum SAD. This function estimates the half-pixel
+ * motion vector by interpolating the integer resolution motion vector referenced
+ * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated
+ * externally. The input parameters pSrcRefBuf and pSearchPointRefPos should be
+ * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16.
+ * The function BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB
+ * that corresponds to the location of the current macroblock in
+ * the current plane.
+ * [in] refWidth width of the reference plane
+ * [in] pRefRect reference plane valid region rectangle
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane
+ * (linear array, 256 entries); must be aligned on an 8-byte boundary.
+ * [in] pSearchPointRefPos position of the starting point for half pixel search (specified
+ * in terms of integer pixel units) in the reference plane.
+ * [in] rndVal rounding control bit for half pixel motion estimation;
+ * 0=rounding control disabled; 1=rounding control enabled
+ * [in] pSrcDstMV pointer to the initial MV estimate; typically generated during a prior
+ * 16X16 integer search and its unit is half pixel.
+ * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]pSrcDstMV pointer to estimated MV
+ * [out]pDstSAD pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Half(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD,
+ OMX_U8 BlockSize
+)
+{
+ OMX_INT outer, inner, count, index;
+ OMX_S16 halfPelX = 0, halfPelY = 0, x, y;
+ OMX_INT candSAD, refSAD = 0;
+ OMX_INT minSAD, fromX, toX, fromY, toY;
+ /* Offset to the reference at the begining of the bounding box */
+ const OMX_U8 *pTempSrcRefBuf;
+ OMX_U8 tempPel;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSearchPointRefPos == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcDstMV == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
+
+ /* Positioning the pointer */
+ pTempSrcRefBuf = pSrcRefBuf + (refWidth * (pSrcDstMV->dy/2)) + (pSrcDstMV->dx/2);
+
+ /* Copy the candidate to the temporary linear array */
+ for (outer = 0, count = 0,index = 0;
+ outer < BlockSize;
+ outer++, index += refWidth - BlockSize)
+ {
+ for (inner = 0; inner < BlockSize; inner++, count++, index++)
+ {
+ refSAD += armAbs (pTempSrcRefBuf[index] - pSrcCurrBuf[count]);
+ }
+ }
+
+ /* Set the minSad as reference SAD */
+ minSAD = refSAD;
+ *pDstSAD = refSAD;
+
+ /* Check for valid region */
+ fromX = 1;
+ toX = 1;
+ fromY = 1;
+ toY = 1;
+ if ((pSearchPointRefPos->x - 1) < pRefRect->x)
+ {
+ fromX = 0;
+ }
+
+ if ((pSearchPointRefPos->x + BlockSize + 1) > (pRefRect->x + pRefRect->width))
+ {
+ toX = 0;
+ }
+
+ if ((pSearchPointRefPos->y - 1) < pRefRect->y)
+ {
+ fromY = 0;
+ }
+
+ if ((pSearchPointRefPos->y + BlockSize + 1) > (pRefRect->y + pRefRect->height))
+ {
+ toY = 0;
+ }
+
+ /* Looping on y- axis */
+ for (y = -fromY; y <= toY; y++)
+ {
+ /* Looping on x- axis */
+ for (x = -fromX; x <= toX; x++)
+ {
+ /* check for integer position */
+ if ( x == 0 && y == 0)
+ {
+ continue;
+ }
+ /* Positioning the pointer */
+ pTempSrcRefBuf = pSrcRefBuf + (refWidth * (pSrcDstMV->dy/2))
+ + (pSrcDstMV->dx/2);
+
+ /* Interpolate the pixel and calculate the SAD*/
+ for (outer = 0, count = 0, candSAD = 0,index = 0;
+ outer < BlockSize;
+ outer++, index += refWidth - BlockSize)
+ {
+ for (inner = 0; inner < BlockSize; inner++, count++,index++)
+ {
+ tempPel = (
+ pTempSrcRefBuf[index]
+ + pTempSrcRefBuf[index + x] * armAbs(x)
+ + pTempSrcRefBuf[index + refWidth * y] * armAbs(y)
+ + pTempSrcRefBuf[index + refWidth * y + x]
+ * armAbs(x*y)
+ + armAbs (x) + armAbs (y) - rndVal
+ ) / (2 * (armAbs (x) + armAbs (y)));
+ candSAD += armAbs (tempPel - pSrcCurrBuf[count]);
+ }
+ }
+
+ /* Result calculations */
+ if (armVCM4P2_CompareMV (x, y, candSAD, halfPelX, halfPelY, minSAD))
+ {
+ *pDstSAD = candSAD;
+ minSAD = candSAD;
+ halfPelX = x;
+ halfPelY = y;
+ }
+
+ } /* End of x- axis */
+ } /* End of y-axis */
+
+ pSrcDstMV->dx += halfPelX;
+ pSrcDstMV->dy += halfPelY;
+
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c
new file mode 100644
index 0000000..77fe358
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_BlockMatch_Integer.c
@@ -0,0 +1,167 @@
+/**
+ *
+ * File Name: armVCM4P2_BlockMatch_Integer.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_BlockMatch_Integer
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated minimum SAD.
+ * Both the input and output motion vectors are represented using half-pixel units, and
+ * therefore a shift left or right by 1 bit may be required, respectively, to match the
+ * input or output MVs with other functions that either generate output MVs or expect
+ * input MVs represented using integer pixel units.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that
+ * corresponds to the location of the current macroblock in the current
+ * plane.
+ * [in] refWidth width of the reference plane
+ * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin.
+ * It's not limited to the image boundary, but depended on the padding. For example,
+ * if you pad 4 pixels outside the image border, then the value for left border
+ * can be -4
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array,
+ * 256 entries); must be aligned on an 8-byte boundary.
+ * [in] pCurrPointPos position of the current macroblock in the current plane
+ * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV)
+ * [in] searchRange search range for 16X16 integer block,the units of it is full pixel,the search range
+ * is the same in all directions.It is in inclusive of the boundary and specified in
+ * terms of integer pixel units.
+ * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated
+ * and then initialized using omxVCM4P2_MEInit prior to calling the block matching
+ * function.
+ * [out] pDstMV pointer to estimated MV
+ * [out] pDstSAD pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error.
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Integer(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD,
+ OMX_U8 BlockSize
+)
+{
+
+ /* Definitions and Initializations*/
+
+ OMX_INT outer, inner, count,index;
+ OMX_INT candSAD;
+ /*(256*256 +1) this is to make the SAD max initially*/
+ OMX_INT minSAD = 0x10001, fromX, toX, fromY, toY;
+ /* Offset to the reference at the begining of the bounding box */
+ const OMX_U8 *pTempSrcRefBuf;
+ OMX_S16 x, y;
+ OMX_INT searchRange;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pMESpec == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDstMV == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
+
+ searchRange = ((OMXVCM4P2MEParams *)pMESpec)->searchRange;
+ /* Check for valid region */
+ fromX = searchRange;
+ toX = searchRange;
+ fromY = searchRange;
+ toY = searchRange;
+
+ if ((pCurrPointPos->x - searchRange) < pRefRect->x)
+ {
+ fromX = pCurrPointPos->x - pRefRect->x;
+ }
+
+ if ((pCurrPointPos->x + BlockSize + searchRange) > (pRefRect->x + pRefRect->width))
+ {
+ toX = pRefRect->width - (pCurrPointPos->x - pRefRect->x) - BlockSize;
+ }
+
+ if ((pCurrPointPos->y - searchRange) < pRefRect->y)
+ {
+ fromY = pCurrPointPos->y - pRefRect->y;
+ }
+
+ if ((pCurrPointPos->y + BlockSize + searchRange) > (pRefRect->y + pRefRect->height))
+ {
+ toY = pRefRect->width - (pCurrPointPos->y - pRefRect->y) - BlockSize;
+ }
+
+ pDstMV->dx = -fromX;
+ pDstMV->dy = -fromY;
+ /* Looping on y- axis */
+ for (y = -fromY; y <= toY; y++)
+ {
+
+ /* Looping on x- axis */
+ for (x = -fromX; x <= toX; x++)
+ {
+ /* Positioning the pointer */
+ pTempSrcRefBuf = pSrcRefBuf + (refWidth * y) + x;
+
+ /* Calculate the SAD */
+ for (outer = 0, count = 0, index = 0, candSAD = 0;
+ outer < BlockSize;
+ outer++, index += refWidth - BlockSize)
+ {
+ for (inner = 0; inner < BlockSize; inner++, count++, index++)
+ {
+ candSAD += armAbs (pTempSrcRefBuf[index] - pSrcCurrBuf[count]);
+ }
+ }
+
+ /* Result calculations */
+ if (armVCM4P2_CompareMV (x, y, candSAD, pDstMV->dx/2, pDstMV->dy/2, minSAD))
+ {
+ *pDstSAD = candSAD;
+ minSAD = candSAD;
+ pDstMV->dx = x*2;
+ pDstMV->dy = y*2;
+ }
+
+ } /* End of x- axis */
+ } /* End of y-axis */
+
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c
new file mode 100644
index 0000000..94e8639
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CheckVLCEscapeMode.c
@@ -0,0 +1,156 @@
+/**
+ *
+ * File Name: armVCM4P2_CheckVLCEscapeMode.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for VLC escape mode check
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_CheckVLCEscapeMode
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] run Run value (count of zeros) to be encoded
+ * [in] level Level value (non-zero value) to be encoded
+ * [in] runPlus Calculated as runPlus = run - (RMAX + 1)
+ * [in] levelPlus Calculated as
+ * levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] maxStoreRun Max store possible (considering last and inter/intra)
+ * [in] maxRunForMultipleEntries
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status)
+ * [in] pRunIndexTable Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c
+ * (considering last and inter/intra status)
+ *
+ *
+ * Return Value:
+ * Returns an Escape mode which can take values from 0 to 3
+ * 0 --> no escape mode, 1 --> escape type 1,
+ * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3
+ * in the MPEG ISO standard.
+ *
+ */
+
+OMX_U8 armVCM4P2_CheckVLCEscapeMode(
+ OMX_U32 run,
+ OMX_U32 runPlus,
+ OMX_S16 level,
+ OMX_S16 levelPlus,
+ OMX_U8 maxStoreRun,
+ OMX_U8 maxRunForMultipleEntries,
+ OMX_INT shortVideoHeader,
+ const OMX_U8 *pRunIndexTable
+)
+{
+ OMX_U8 escape = 0, fMode = 0, entries;
+
+ level = armAbs (level);
+ levelPlus = armAbs (levelPlus);
+
+ /* Check for a valid entry with run, level and Last combination
+ Mode 0 check */
+ if (run <= maxStoreRun)
+ {
+ entries = pRunIndexTable[run + 1]
+ - pRunIndexTable[run];
+ if (run > maxRunForMultipleEntries)
+ {
+ entries = 1;
+ }
+ if (level > entries)
+ {
+ escape = 1;
+ }
+ }
+ else
+ {
+ escape = 1;
+ }
+ if(escape && shortVideoHeader)
+ {
+ escape = 0;
+ fMode = 4;
+ }
+ /* Check for a valid entry with run, levelPlus and Last combination
+ Mode 1 check */
+ if (escape)
+ {
+ escape = 0;
+ fMode = 1;
+ if (run <= maxStoreRun)
+ {
+ entries = pRunIndexTable[run + 1]
+ - pRunIndexTable[run];
+ if (run > maxRunForMultipleEntries)
+ {
+ entries = 1;
+ }
+ if (levelPlus > entries)
+ {
+ escape = 1;
+ }
+ }
+ else
+ {
+ escape = 1;
+ }
+ }
+
+ /* Check for a valid entry with runPlus, level and Last combination
+ Mode 2 check */
+ if (escape)
+ {
+ escape = 0;
+ fMode = 2;
+ if (runPlus <= maxStoreRun)
+ {
+ entries = pRunIndexTable[runPlus + 1]
+ - pRunIndexTable[runPlus];
+ if (runPlus > maxRunForMultipleEntries)
+ {
+ entries = 1;
+ }
+ if (level > entries)
+ {
+ escape = 1;
+ }
+ }
+ else
+ {
+ escape = 1;
+ }
+ }
+
+ /* select mode 3 --> FLC */
+ if (escape)
+ {
+ fMode = 3;
+ }
+
+ return fMode;
+}
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CompareMV.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CompareMV.c
new file mode 100644
index 0000000..3b8845e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_CompareMV.c
@@ -0,0 +1,71 @@
+/**
+ *
+ * File Name: armVCM4P2_CompareMV.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for comparing motion vectors and SAD's to decide
+ * the best MV and SAD
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_CompareMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and SAD's to decide the
+ * best MV and SAD
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] mvX x coordinate of the candidate motion vector
+ * [in] mvY y coordinate of the candidate motion vector
+ * [in] candSAD Candidate SAD
+ * [in] bestMVX x coordinate of the best motion vector
+ * [in] bestMVY y coordinate of the best motion vector
+ * [in] bestSAD best SAD
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current sad is the best
+ * 0 to indicate that it is NOT the best SAD
+ */
+
+OMX_INT armVCM4P2_CompareMV (
+ OMX_S16 mvX,
+ OMX_S16 mvY,
+ OMX_INT candSAD,
+ OMX_S16 bestMVX,
+ OMX_S16 bestMVY,
+ OMX_INT bestSAD
+)
+{
+ if (candSAD < bestSAD)
+ {
+ return 1;
+ }
+ if (candSAD > bestSAD)
+ {
+ return 0;
+ }
+ /* shorter motion vector */
+ if ( (mvX * mvX + mvY * mvY) < (bestMVX*bestMVX+bestMVY*bestMVY) )
+ {
+ return 1;
+ }
+ return 0;
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DCT_Table.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DCT_Table.c
new file mode 100644
index 0000000..a6f713e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DCT_Table.c
@@ -0,0 +1,112 @@
+ /**
+ *
+ * File Name: armVCM4P2_DCT_Table.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_DCT_Table.c
+ * Description: Contains the DCT/IDCT coefficent matrix
+ *
+ */
+
+#ifndef _OMXDCTCOSTAB_C_
+#define _OMXDCTCOSTAB_C_
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+const OMX_F64 armVCM4P2_preCalcDCTCos[8][8] =
+{
+ {
+ 0.353553390593273730,
+ 0.490392640201615220,
+ 0.461939766255643370,
+ 0.415734806151272620,
+ 0.353553390593273790,
+ 0.277785116509801140,
+ 0.191341716182544920,
+ 0.097545161008064152
+ },
+ {
+ 0.353553390593273730,
+ 0.415734806151272620,
+ 0.191341716182544920,
+ -0.097545161008064096,
+ -0.353553390593273730,
+ -0.490392640201615220,
+ -0.461939766255643420,
+ -0.277785116509801090
+ },
+ {
+ 0.353553390593273730,
+ 0.277785116509801140,
+ -0.191341716182544860,
+ -0.490392640201615220,
+ -0.353553390593273840,
+ 0.097545161008064138,
+ 0.461939766255643260,
+ 0.415734806151272730
+ },
+ {
+ 0.353553390593273730,
+ 0.097545161008064152,
+ -0.461939766255643370,
+ -0.277785116509801090,
+ 0.353553390593273680,
+ 0.415734806151272730,
+ -0.191341716182544920,
+ -0.490392640201615330
+ },
+ {
+ 0.353553390593273730,
+ -0.097545161008064096,
+ -0.461939766255643420,
+ 0.277785116509800920,
+ 0.353553390593273840,
+ -0.415734806151272620,
+ -0.191341716182545280,
+ 0.490392640201615220
+ },
+ {
+ 0.353553390593273730,
+ -0.277785116509800980,
+ -0.191341716182545170,
+ 0.490392640201615220,
+ -0.353553390593273340,
+ -0.097545161008064013,
+ 0.461939766255643370,
+ -0.415734806151272510
+ },
+ {
+ 0.353553390593273730,
+ -0.415734806151272670,
+ 0.191341716182545000,
+ 0.097545161008064388,
+ -0.353553390593273620,
+ 0.490392640201615330,
+ -0.461939766255643200,
+ 0.277785116509800760
+ },
+ {
+ 0.353553390593273730,
+ -0.490392640201615220,
+ 0.461939766255643260,
+ -0.415734806151272620,
+ 0.353553390593273290,
+ -0.277785116509800760,
+ 0.191341716182544780,
+ -0.097545161008064277
+ }
+};
+
+#endif /*_OMXDCTCOSTAB_C_*/
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c
new file mode 100644
index 0000000..a2572e0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_intra.c
@@ -0,0 +1,144 @@
+/**
+ *
+ * File Name: armVCM4P2_DecodeVLCZigzag_intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for filling of the coefficient buffer
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+/**
+ * Function: armVCM4P2_DecodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bitstream buffer
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7].
+ * [in] predDir AC prediction direction which is used to decide
+ * the zigzag scan pattern. It takes one of the
+ * following values:
+ * OMX_VC_NONE AC prediction not used;
+ * perform classical zigzag scan;
+ * OMX_VC_HORIZONTAL Horizontal prediction;
+ * perform alternate-vertical
+ * zigzag scan;
+ * OMX_VC_VERTICAL Vertical prediction;
+ * thus perform
+ * alternate-horizontal
+ * zigzag scan.
+ * [in] start start indicates whether the encoding begins with 0th element
+ * or 1st.
+ * [out] ppBitStream *ppBitStream is updated after the block is
+ * decoded, so that it points to the current byte
+ * in the bit stream buffer
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream
+ * [out] pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start
+)
+{
+ OMX_U8 last = 0;
+ const OMX_U8 *pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+ OMXResult errorCode;
+
+ /* Argument error checks */
+ armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
+ armRetArgErrIf((predDir > 2), OMX_Sts_BadArgErr);
+
+ switch (predDir)
+ {
+ case OMX_VC_NONE:
+ {
+ pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+ break;
+ }
+
+ case OMX_VC_HORIZONTAL:
+ {
+ pZigzagTable = armVCM4P2_aVerticalZigzagScan;
+ break;
+ }
+
+ case OMX_VC_VERTICAL:
+ {
+ pZigzagTable = armVCM4P2_aHorizontalZigzagScan;
+ break;
+ }
+ }
+
+ errorCode = armVCM4P2_GetVLCBits (
+ ppBitStream,
+ pBitOffset,
+ pDst,
+ shortVideoHeader,
+ start,
+ &last,
+ 10,
+ 62,
+ 7,
+ 21,
+ armVCM4P2_IntraL0RunIdx,
+ armVCM4P2_IntraVlcL0,
+ armVCM4P2_IntraL1RunIdx,
+ armVCM4P2_IntraVlcL1,
+ armVCM4P2_IntraL0LMAX,
+ armVCM4P2_IntraL1LMAX,
+ armVCM4P2_IntraL0RMAX,
+ armVCM4P2_IntraL1RMAX,
+ pZigzagTable );
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ if (last == 0)
+ {
+ return OMX_Sts_Err;
+ }
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c
new file mode 100644
index 0000000..cd6b56d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_EncodeVLCZigzag_intra.c
@@ -0,0 +1,145 @@
+/**
+ *
+ * File Name: armVCM4P2_EncodeVLCZigzag_intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for zigzag scanning and VLC encoding
+ * for intra block.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+/**
+ * Function: armVCM4P2_EncodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs zigzag scanning and VLC encoding for one intra block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7.
+ * [in] pQDctBlkCoef pointer to the quantized DCT coefficient
+ * [in] predDir AC prediction direction, which is used to decide
+ * the zigzag scan pattern. This takes one of the
+ * following values:
+ * OMX_VC_NONE AC prediction not used.
+ * Performs classical zigzag
+ * scan.
+ * OMX_VC_HORIZONTAL Horizontal prediction.
+ * Performs alternate-vertical
+ * zigzag scan.
+ * OMX_VC_VERTICAL Vertical prediction.
+ * Performs alternate-horizontal
+ * zigzag scan.
+ * [in] pattern block pattern which is used to decide whether
+ * this block is encoded
+ * [in] start start indicates whether the encoding begins with 0th element
+ * or 1st.
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_EncodeVLCZigzag_Intra(
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start
+)
+{
+ const OMX_U8 *pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+ OMXResult errorCode;
+
+ /* Argument error checks */
+ armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pQDctBlkCoef == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
+ armRetArgErrIf(start > 1, OMX_Sts_BadArgErr);
+ armRetArgErrIf(predDir > 2, OMX_Sts_BadArgErr);
+
+ if (pattern)
+ {
+ switch (predDir)
+ {
+ case OMX_VC_NONE:
+ {
+ pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+ break;
+ }
+
+ case OMX_VC_HORIZONTAL:
+ {
+ pZigzagTable = armVCM4P2_aVerticalZigzagScan;
+ break;
+ }
+
+ case OMX_VC_VERTICAL:
+ {
+ pZigzagTable = armVCM4P2_aHorizontalZigzagScan;
+ break;
+ }
+ }
+
+ errorCode = armVCM4P2_PutVLCBits (
+ ppBitStream,
+ pBitOffset,
+ pQDctBlkCoef,
+ shortVideoHeader,
+ start,
+ 14,
+ 20,
+ 9,
+ 6,
+ armVCM4P2_IntraL0RunIdx,
+ armVCM4P2_IntraVlcL0,
+ armVCM4P2_IntraL1RunIdx,
+ armVCM4P2_IntraVlcL1,
+ armVCM4P2_IntraL0LMAX,
+ armVCM4P2_IntraL1LMAX,
+ armVCM4P2_IntraL0RMAX,
+ armVCM4P2_IntraL1RMAX,
+ pZigzagTable
+ );
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ } /* Pattern check ends*/
+
+ return (OMX_Sts_NoErr);
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c
new file mode 100644
index 0000000..93c9504
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLCBuffer.c
@@ -0,0 +1,153 @@
+/**
+ *
+ * File Name: armVCM4P2_FillVLCBuffer.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for putting VLC bits
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+
+/**
+ * Function: armVCM4P2_FillVLCBuffer
+ *
+ * Description:
+ * Performs calculating the VLC bits depending on the escape type and insert
+ * the same in the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] run Run value (count of zeros) to be encoded
+ * [in] level Level value (non-zero value) to be encoded
+ * [in] runPlus Calculated as runPlus = run - (RMAX + 1)
+ * [in] levelPlus Calculated as
+ * levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] fMode Flag indicating the escape modes
+ * [in] last status of the last flag
+ * [in] maxRunForMultipleEntries
+ * The run value after which level will be equal to 1:
+ * (considering last and inter/intra status)
+ * [in] pRunIndexTable Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.h
+ * [in] pVlcTable VLC table defined in armVCM4P2_Huff_Tables_VLC.h
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLCBuffer (
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_U32 run,
+ OMX_S16 level,
+ OMX_U32 runPlus,
+ OMX_S16 levelPlus,
+ OMX_U8 fMode,
+ OMX_U8 last,
+ OMX_U8 maxRunForMultipleEntries,
+ const OMX_U8 *pRunIndexTable,
+ const ARM_VLC32 *pVlcTable
+)
+{
+ OMX_INT tempIndex;
+ OMX_U32 tempRun = run, sign = 0;
+ OMX_S16 tempLevel = level;
+
+ /* Escape sequence addition */
+ if (fMode == 1)
+ {
+ armPackBits(ppBitStream, pBitOffset, 3, 7);
+ armPackBits(ppBitStream, pBitOffset, 0, 1);
+ tempLevel = levelPlus;
+
+ }
+ else if(fMode == 2)
+ {
+ armPackBits(ppBitStream, pBitOffset, 3, 7);
+ armPackBits(ppBitStream, pBitOffset, 2, 2);
+ tempRun = runPlus;
+ }
+ else if (fMode == 3)
+ {
+ armPackBits(ppBitStream, pBitOffset, 3, 7);
+ armPackBits(ppBitStream, pBitOffset, 3, 2);
+ }
+ else if (fMode == 4)
+ {
+ armPackBits(ppBitStream, pBitOffset, 3, 7);
+ armPackBits(ppBitStream, pBitOffset, (OMX_U32)last, 1);
+ armPackBits(ppBitStream, pBitOffset, tempRun, 6);
+ if((tempLevel != 0) && (tempLevel != -128))
+ {
+ armPackBits(ppBitStream, pBitOffset,
+ (OMX_U32) tempLevel, 8);
+ }
+ return OMX_Sts_NoErr;
+ }
+
+ if (tempLevel < 0)
+ {
+ sign = 1;
+ tempLevel = armAbs(tempLevel);
+ }
+ /* Putting VLC bits in the stream */
+ if (fMode < 3)
+ {
+ if (tempRun > maxRunForMultipleEntries)
+ {
+ tempIndex = pRunIndexTable [maxRunForMultipleEntries + 1] +
+ (tempRun - maxRunForMultipleEntries - 1);
+ }
+ else
+ {
+ tempIndex = pRunIndexTable [tempRun] + (tempLevel -1);
+ }
+
+ armPackVLC32 (ppBitStream, pBitOffset,
+ pVlcTable [tempIndex]);
+ armPackBits(ppBitStream, pBitOffset, (OMX_U32)sign, 1);
+ }
+ else
+ {
+ if (sign)
+ {
+ tempLevel = -tempLevel;
+ }
+ tempRun = run;
+ armPackBits(ppBitStream, pBitOffset, (OMX_U32)last, 1);
+ armPackBits(ppBitStream, pBitOffset, tempRun, 6);
+ armPackBits(ppBitStream, pBitOffset, 1, 1);
+ armPackBits(ppBitStream, pBitOffset,
+ (OMX_U32) tempLevel, 12);
+ armPackBits(ppBitStream, pBitOffset, 1, 1);
+ }
+ return OMX_Sts_NoErr;
+}
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c
new file mode 100644
index 0000000..1712c3a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_FillVLDBuffer.c
@@ -0,0 +1,84 @@
+/**
+ *
+ * File Name: armVCM4P2_FillVLDBuffer.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for VLC get bits from the stream
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+/**
+ * Function: armVCM4P2_FillVLDBuffer
+ *
+ * Description:
+ * Performs filling of the coefficient buffer according to the run, level
+ * and sign, also updates the index
+ *
+ * Parameters:
+ * [in] storeRun Stored Run value (count of zeros)
+ * [in] storeLevel Stored Level value (non-zero value)
+ * [in] sign Flag indicating the sign of level
+ * [in] last status of the last flag
+ * [in] pIndex pointer to coefficient index in 8x8 matrix
+ * [out] pIndex pointer to updated coefficient index in 8x8
+ * matrix
+ * [in] pZigzagTable pointer to the zigzag tables
+ * [out] pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLDBuffer(
+ OMX_U32 storeRun,
+ OMX_S16 * pDst,
+ OMX_S16 storeLevel,
+ OMX_U8 sign,
+ OMX_U8 last,
+ OMX_U8 * pIndex,
+ const OMX_U8 * pZigzagTable
+)
+{
+ /* Store the zero's as per the run length count */
+ for (;storeRun > 0; storeRun--, (*pIndex)++)
+ {
+ pDst[pZigzagTable[*pIndex]] = 0;
+ }
+ /* Store the level depending on the sign*/
+ if (sign == 1)
+ {
+ pDst[pZigzagTable[*pIndex]] = -storeLevel;
+ }
+ else
+ {
+ pDst[pZigzagTable[*pIndex]] = storeLevel;
+ }
+ (*pIndex)++;
+
+ /* If last is 1, fill the remaining elments of the buffer with zeros */
+ if (last == 1)
+ {
+ while (*pIndex < 64)
+ {
+ pDst[pZigzagTable[*pIndex]] = 0;
+ (*pIndex)++;
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_GetVLCBits.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_GetVLCBits.c
new file mode 100644
index 0000000..953f597
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_GetVLCBits.c
@@ -0,0 +1,278 @@
+/**
+ *
+ * File Name: armVCM4P2_GetVLCBits.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for VLC get bits from the stream
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+
+
+/**
+ * Function: armVCM4P2_GetVLCBits
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] start start indicates whether the encoding begins with
+ * 0th element or 1st.
+ * [in/out] pLast pointer to last status flag
+ * [in] runBeginSingleLevelEntriesL0 The run value from which level
+ * will be equal to 1: last == 0
+ * [in] IndexBeginSingleLevelEntriesL0 Array index in the VLC table
+ * pointing to the
+ * runBeginSingleLevelEntriesL0
+ * [in] runBeginSingleLevelEntriesL1 The run value from which level
+ * will be equal to 1: last == 1
+ * [in] IndexBeginSingleLevelEntriesL1 Array index in the VLC table
+ * pointing to the
+ * runBeginSingleLevelEntriesL0
+ * [in] pRunIndexTableL0 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0 VLC table for last == 0
+ * [in] pRunIndexTableL1 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1 VLC table for last == 1
+ * [in] pLMAXTableL0 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out]pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_GetVLCBits (
+ const OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start,
+ OMX_U8 * pLast,
+ OMX_U8 runBeginSingleLevelEntriesL0,
+ OMX_U8 maxIndexForMultipleEntriesL0,
+ OMX_U8 maxRunForMultipleEntriesL1,
+ OMX_U8 maxIndexForMultipleEntriesL1,
+ const OMX_U8 * pRunIndexTableL0,
+ const ARM_VLC32 *pVlcTableL0,
+ const OMX_U8 * pRunIndexTableL1,
+ const ARM_VLC32 *pVlcTableL1,
+ const OMX_U8 * pLMAXTableL0,
+ const OMX_U8 * pLMAXTableL1,
+ const OMX_U8 * pRMAXTableL0,
+ const OMX_U8 * pRMAXTableL1,
+ const OMX_U8 * pZigzagTable
+)
+{
+ OMX_U32 storeRun;
+ OMX_U8 tabIndex, markerBit;
+ OMX_S16 storeLevel;
+ OMX_U16 unpackRetIndex;
+ OMX_U8 i, fType, escape;
+ OMX_U8 sign = 0;
+
+ /* Unpacking the bitstream and RLD */
+ for (i = start; i < 64;)
+ {
+ escape = armLookAheadBits(ppBitStream, pBitOffset, 7);
+ if (escape != 3)
+ {
+ fType = 0; /* Not in escape mode */
+ }
+ else
+ {
+ armSkipBits (ppBitStream, pBitOffset, 7);
+ if(shortVideoHeader)
+ {
+ *pLast = armGetBits(ppBitStream, pBitOffset, 1);
+ storeRun = armGetBits(ppBitStream, pBitOffset, 6);
+ storeLevel = armGetBits(ppBitStream, pBitOffset, 8);
+
+ /* Ref to Table B-18 (c) in MPEG4 Standard- FLC code for */
+ /* LEVEL when short_video_header is 1, the storeLevel is */
+ /* a signed value and the sign and the unsigned value for */
+ /* storeLevel need to be extracted and passed to arm */
+ /* FillVLDBuffer function */
+
+ sign = (storeLevel & 0x80);
+ if(sign==0x80)
+ {
+ storeLevel=(storeLevel^0xff)+1;
+ sign=1;
+
+ }
+
+ armRetDataErrIf( storeLevel == 0 || sign*storeLevel == 128 , OMX_Sts_Err); /* Invalid FLC */
+ armRetDataErrIf((i + storeRun) >= 64, OMX_Sts_Err);
+ armVCM4P2_FillVLDBuffer(
+ storeRun,
+ pDst,
+ storeLevel,
+ sign,
+ *pLast,
+ &i,
+ pZigzagTable);
+ return OMX_Sts_NoErr;
+
+ }
+ if (armGetBits(ppBitStream, pBitOffset, 1))
+ {
+ if (armGetBits(ppBitStream, pBitOffset, 1))
+ {
+ fType = 3;
+ }
+ else
+ {
+ fType = 2;
+ }
+ }
+ else
+ {
+ fType = 1;
+ }
+ }
+
+ if (fType < 3)
+ {
+ unpackRetIndex = armUnPackVLC32(ppBitStream, pBitOffset,
+ pVlcTableL0);
+ if (unpackRetIndex != ARM_NO_CODEBOOK_INDEX)
+ {
+ /* Decode run and level from the index */
+ /* last = 0 */
+ *pLast = 0;
+ if (unpackRetIndex > maxIndexForMultipleEntriesL0)
+ {
+ storeLevel = 1;
+ storeRun = (unpackRetIndex - maxIndexForMultipleEntriesL0)
+ + runBeginSingleLevelEntriesL0;
+ }
+ else
+ {
+ tabIndex = 1;
+ while (pRunIndexTableL0[tabIndex] <= unpackRetIndex)
+ {
+ tabIndex++;
+ }
+ storeRun = tabIndex - 1;
+ storeLevel = unpackRetIndex - pRunIndexTableL0[tabIndex - 1] + 1;
+ }
+ sign = (OMX_U8) armGetBits(ppBitStream, pBitOffset, 1);
+
+ if (fType == 1)
+ {
+ storeLevel = (armAbs(storeLevel) + pLMAXTableL0[storeRun]);
+ }
+ else if (fType == 2)
+ {
+ storeRun = storeRun + pRMAXTableL0[storeLevel-1] + 1;
+ }
+ }
+ else
+ {
+ unpackRetIndex = armUnPackVLC32(ppBitStream, pBitOffset,
+ pVlcTableL1);
+
+ armRetDataErrIf(unpackRetIndex == ARM_NO_CODEBOOK_INDEX, OMX_Sts_Err);
+
+ /* Decode run and level from the index */
+ /* last = 1 */
+ *pLast = 1;
+ if (unpackRetIndex > maxIndexForMultipleEntriesL1)
+ {
+ storeLevel = 1;
+ storeRun = (unpackRetIndex - maxIndexForMultipleEntriesL1)
+ + maxRunForMultipleEntriesL1;
+ }
+ else
+ {
+ tabIndex = 1;
+ while (pRunIndexTableL1[tabIndex] <= unpackRetIndex)
+ {
+ tabIndex++;
+ }
+ storeRun = tabIndex - 1;
+ storeLevel = unpackRetIndex - pRunIndexTableL1[tabIndex - 1] + 1;
+ }
+ sign = (OMX_U8) armGetBits(ppBitStream, pBitOffset, 1);
+
+ if (fType == 1)
+ {
+ storeLevel = (armAbs(storeLevel) + pLMAXTableL1[storeRun]);
+ }
+ else if (fType == 2)
+ {
+ storeRun = storeRun + pRMAXTableL1[storeLevel-1] + 1;
+ }
+ }
+ armRetDataErrIf((i + storeRun) >= 64, OMX_Sts_Err);
+ armVCM4P2_FillVLDBuffer(
+ storeRun,
+ pDst,
+ storeLevel,
+ sign,
+ *pLast,
+ &i,
+ pZigzagTable);
+ }
+ else
+ {
+ *pLast = armGetBits(ppBitStream, pBitOffset, 1);
+ storeRun = armGetBits(ppBitStream, pBitOffset, 6);
+ armRetDataErrIf((i + storeRun) >= 64, OMX_Sts_Err);
+ markerBit = armGetBits(ppBitStream, pBitOffset, 1);
+ armRetDataErrIf( markerBit == 0, OMX_Sts_Err);
+ storeLevel = armGetBits(ppBitStream, pBitOffset, 12);
+ if (storeLevel & 0x800)
+ {
+ storeLevel -= 4096;
+ }
+ armRetDataErrIf( storeLevel == 0 || storeLevel == -2048 , OMX_Sts_Err); /* Invalid FLC */
+ armGetBits(ppBitStream, pBitOffset, 1);
+ armVCM4P2_FillVLDBuffer(
+ storeRun,
+ pDst,
+ storeLevel,
+ 0, /* Sign is not used, preprocessing done */
+ *pLast,
+ &i,
+ pZigzagTable);
+
+ }
+ } /* End of forloop for i */
+ return OMX_Sts_NoErr;
+}
+
+/* End of File */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
new file mode 100644
index 0000000..cd7e9e4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
@@ -0,0 +1,495 @@
+ /**
+ *
+ * File Name: armVCM4P2_Huff_Tables_VLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_Huff_Tables_VLC.c
+ * Description: Contains all the Huffman tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM_Bitstream.h"
+
+/*
+* For Intra
+* last = 0
+*/
+const OMX_U8 armVCM4P2_IntraL0RunIdx[11] =
+{
+ 0, 27, 37, 42, 46, 49, 52,
+ 55, 58, 60, 62
+};
+
+/* Entry defined for all values
+* for run = 0 to 14
+* Note: the last entry is to terminate while decoding
+*/
+const ARM_VLC32 armVCM4P2_IntraVlcL0[68] =
+{
+ {2, 2},
+ {3, 6},
+ {4, 15},
+ {5, 13},
+ {5, 12},
+ {6, 21},
+ {6, 19},
+ {6, 18},
+ {7, 23},
+ {8, 31},
+ {8, 30},
+ {8, 29},
+ {9, 37},
+ {9, 36},
+ {9, 35},
+ {9, 33},
+ {10, 33},
+ {10, 32},
+ {10, 15},
+ {10, 14},
+ {11, 7},
+ {11, 6},
+ {11, 32},
+ {11, 33},
+ {12, 80},
+ {12, 81},
+ {12, 82},
+ {4, 14},
+ {6, 20},
+ {7, 22},
+ {8, 28},
+ {9, 32},
+ {9, 31},
+ {10, 13},
+ {11, 34},
+ {12, 83},
+ {12, 85},
+ {5, 11},
+ {7, 21},
+ {9, 30},
+ {10, 12},
+ {12, 86},
+ {6, 17},
+ {8, 27},
+ {9, 29},
+ {10, 11},
+ {6, 16},
+ {9, 34},
+ {10, 10},
+ {6, 13},
+ {9, 28},
+ {10, 8},
+ {7, 18},
+ {9, 27},
+ {12, 84},
+ {7, 20},
+ {9, 26},
+ {12, 87},
+ {8, 25},
+ {10, 9},
+ {8, 24},
+ {11, 35},
+ {8, 23},
+ {9, 25},
+ {9, 24},
+ {10, 7},
+ {12, 88},
+ {0, 0}
+};
+
+/*
+* For Intra
+* last = 1
+*/
+
+const OMX_U8 armVCM4P2_IntraL1RunIdx[8] =
+{
+ 0, 8, 11, 13, 15, 17, 19, 21
+};
+
+/* Entry defined for all values
+* for run = 0 to 20
+* * Note: the last entry is to terminate while decoding
+*/
+const ARM_VLC32 armVCM4P2_IntraVlcL1[36] =
+{
+ {4, 7},
+ {6, 12},
+ {8, 22},
+ {9, 23},
+ {10, 6},
+ {11, 5},
+ {11, 4},
+ {12, 89},
+ {6, 15},
+ {9, 22},
+ {10, 5},
+ {6, 14},
+ {10, 4},
+ {7, 17},
+ {11, 36},
+ {7, 16},
+ {11, 37},
+ {7, 19},
+ {12, 90},
+ {8, 21},
+ {12, 91},
+ {8, 20},
+ {8, 19},
+ {8, 26},
+ {9, 21},
+ {9, 20},
+ {9, 19},
+ {9, 18},
+ {9, 17},
+ {11, 38},
+ {11, 39},
+ {12, 92},
+ {12, 93},
+ {12, 94},
+ {12, 95},
+ {0, 0}
+};
+
+/* LMAX table for Intra (Last == 0)*/
+const OMX_U8 armVCM4P2_IntraL0LMAX[15] =
+{
+ 27, 10, 5, 4, 3, 3, 3,
+ 3, 2, 2, 1, 1, 1, 1, 1
+};
+
+/* LMAX table for Intra (Last == 1)*/
+const OMX_U8 armVCM4P2_IntraL1LMAX[21] =
+{
+ 8, 3, 2, 2, 2, 2, 2, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+};
+
+/* RMAX table for Intra (Last == 0)
+ Level - 1 Indexed
+*/
+const OMX_U8 armVCM4P2_IntraL0RMAX[27] =
+{
+ 14, 9, 7, 3, 2, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0
+};
+
+/* RMAX table for Intra (Last == 1)
+ Level - 1 Indexed
+*/
+const OMX_U8 armVCM4P2_IntraL1RMAX[8] =
+{
+ 20, 6, 1, 0, 0, 0, 0, 0
+};
+
+/*
+* For Inter
+* last = 0
+*/
+const OMX_U8 armVCM4P2_InterL0RunIdx[12] =
+{
+ 0, 12, 18, 22, 25, 28,
+ 31, 34, 36, 38, 40, 42
+};
+
+/* Entry defined for all values
+* for run = 0 to 26
+* Note: the last entry is to terminate while decoding
+*/
+const ARM_VLC32 armVCM4P2_InterVlcL0[59] =
+{
+ {2, 2},
+ {4, 15},
+ {6, 21},
+ {7, 23},
+ {8, 31},
+ {9, 37},
+ {9, 36},
+ {10, 33},
+ {10, 32},
+ {11, 7},
+ {11, 6},
+ {11, 32},
+ {3, 6},
+ {6, 20},
+ {8, 30},
+ {10, 15},
+ {11, 33},
+ {12, 80},
+ {4, 14},
+ {8, 29},
+ {10, 14},
+ {12, 81},
+ {5, 13},
+ {9, 35},
+ {10, 13},
+ {5, 12},
+ {9, 34},
+ {12, 82},
+ {5, 11},
+ {10, 12},
+ {12, 83},
+ {6, 19},
+ {10, 11},
+ {12, 84},
+ {6, 18},
+ {10, 10},
+ {6, 17},
+ {10, 9},
+ {6, 16},
+ {10, 8},
+ {7, 22},
+ {12, 85},
+ {7, 21},
+ {7, 20},
+ {8, 28},
+ {8, 27},
+ {9, 33},
+ {9, 32},
+ {9, 31},
+ {9, 30},
+ {9, 29},
+ {9, 28},
+ {9, 27},
+ {9, 26},
+ {11, 34},
+ {11, 35},
+ {12, 86},
+ {12, 87},
+ {0, 0}
+};
+
+
+/*
+* For Intra
+* last = 1
+*/
+
+const OMX_U8 armVCM4P2_InterL1RunIdx[3] =
+{
+ 0, 3, 5
+};
+
+/* Entry defined for all values
+* for run = 0 to 40
+* Note: the last entry is to terminate while decoding
+*/
+const ARM_VLC32 armVCM4P2_InterVlcL1[45] =
+{
+ {4, 7},
+ {9, 25},
+ {11, 5},
+ {6, 15},
+ {11, 4},
+ {6, 14},
+ {6, 13},
+ {6, 12},
+ {7, 19},
+ {7, 18},
+ {7, 17},
+ {7, 16},
+ {8, 26},
+ {8, 25},
+ {8, 24},
+ {8, 23},
+ {8, 22},
+ {8, 21},
+ {8, 20},
+ {8, 19},
+ {9, 24},
+ {9, 23},
+ {9, 22},
+ {9, 21},
+ {9, 20},
+ {9, 19},
+ {9, 18},
+ {9, 17},
+ {10, 7},
+ {10, 6},
+ {10, 5},
+ {10, 4},
+ {11, 36},
+ {11, 37},
+ {11, 38},
+ {11, 39},
+ {12, 88},
+ {12, 89},
+ {12, 90},
+ {12, 91},
+ {12, 92},
+ {12, 93},
+ {12, 94},
+ {12, 95},
+ { 0, 0}
+};
+
+/* LMAX table for Intra (Last == 0)*/
+const OMX_U8 armVCM4P2_InterL0LMAX[27] =
+{
+ 12, 6, 4, 3, 3, 3, 3, 2,
+ 2, 2, 2, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1,
+};
+
+/* LMAX table for Intra (Last == 1)*/
+const OMX_U8 armVCM4P2_InterL1LMAX[41] =
+{
+ 3, 2, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1,
+};
+
+/* RMAX table for Intra (Last == 0)
+ Level - 1 Indexed
+*/
+const OMX_U8 armVCM4P2_InterL0RMAX[12] =
+{
+ 26, 10, 6, 2, 1, 1,
+ 0, 0, 0, 0, 0, 0
+};
+
+/* RMAX table for Intra (Last == 1)
+ Level - 1 Indexed
+*/
+const OMX_U8 armVCM4P2_InterL1RMAX[3] =
+{
+ 40, 1, 0
+};
+
+/*
+* For Intra - Luminance
+*/
+
+const ARM_VLC32 armVCM4P2_aIntraDCLumaIndex[14] =
+{
+ {3, 3},
+ {2, 3},
+ {2, 2},
+ {3, 2},
+ {3, 1},
+ {4, 1},
+ {5, 1},
+ {6, 1},
+ {7, 1},
+ {8, 1},
+ {9, 1},
+ {10, 1},
+ {11, 1},
+ {0, 0}
+};
+
+/*
+* For Intra - Chrominance
+*/
+
+const ARM_VLC32 armVCM4P2_aIntraDCChromaIndex[14] =
+{
+ {2, 3},
+ {2, 2},
+ {2, 1},
+ {3, 1},
+ {4, 1},
+ {5, 1},
+ {6, 1},
+ {7, 1},
+ {8, 1},
+ {9, 1},
+ {10, 1},
+ {11, 1},
+ {12, 1},
+ {0, 0}
+};
+
+/*
+ * Motion vector decoding table
+ */
+
+const ARM_VLC32 armVCM4P2_aVlcMVD[66] =
+{
+ {13, 5},
+ {13, 7},
+ {12, 5},
+ {12, 7},
+ {12, 9},
+ {12, 11},
+ {12, 13},
+ {12, 15},
+ {11, 9},
+ {11, 11},
+ {11, 13},
+ {11, 15},
+ {11, 17},
+ {11, 19},
+ {11, 21},
+ {11, 23},
+ {11, 25},
+ {11, 27},
+ {11, 29},
+ {11, 31},
+ {11, 33},
+ {11, 35},
+ {10, 19},
+ {10, 21},
+ {10, 23},
+ {8, 7},
+ {8, 9},
+ {8, 11},
+ {7, 7},
+ {5, 3},
+ {4, 3},
+ {3, 3},
+ {1, 1},
+ {3, 2},
+ {4, 2},
+ {5, 2},
+ {7, 6},
+ {8, 10},
+ {8, 8},
+ {8, 6},
+ {10, 22},
+ {10, 20},
+ {10, 18},
+ {11, 34},
+ {11, 32},
+ {11, 30},
+ {11, 28},
+ {11, 26},
+ {11, 24},
+ {11, 22},
+ {11, 20},
+ {11, 18},
+ {11, 16},
+ {11, 14},
+ {11, 12},
+ {11, 10},
+ {11, 8},
+ {12, 14},
+ {12, 12},
+ {12, 10},
+ {12, 8},
+ {12, 6},
+ {12, 4},
+ {13, 6},
+ {13, 4},
+ { 0, 0}
+};
+
+/* End of file */
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_PutVLCBits.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_PutVLCBits.c
new file mode 100644
index 0000000..ca9efec
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_PutVLCBits.c
@@ -0,0 +1,200 @@
+/**
+ *
+ * File Name: armVCM4P2_PutVLCBits.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for VLC put bits to bitstream
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+
+
+/**
+ * Function: armVCM4P2_PutVLCBits
+ *
+ * Description:
+ * Checks the type of Escape Mode and put encoded bits for
+ * quantized DCT coefficients.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with
+ * 0th element or 1st.
+ * [in] maxStoreRunL0 Max store possible (considering last and inter/intra)
+ * for last = 0
+ * [in] maxStoreRunL1 Max store possible (considering last and inter/intra)
+ * for last = 1
+ * [in] maxRunForMultipleEntriesL0
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status) for last = 0
+ * [in] maxRunForMultipleEntriesL1
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status) for last = 1
+ * [in] pRunIndexTableL0 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0 VLC table for last == 0
+ * [in] pRunIndexTableL1 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1 VLC table for last == 1
+ * [in] pLMAXTableL0 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out] pQDctBlkCoef pointer to the quantized DCT coefficient
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+
+OMXResult armVCM4P2_PutVLCBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start,
+ OMX_U8 maxStoreRunL0,
+ OMX_U8 maxStoreRunL1,
+ OMX_U8 maxRunForMultipleEntriesL0,
+ OMX_U8 maxRunForMultipleEntriesL1,
+ const OMX_U8 * pRunIndexTableL0,
+ const ARM_VLC32 *pVlcTableL0,
+ const OMX_U8 * pRunIndexTableL1,
+ const ARM_VLC32 *pVlcTableL1,
+ const OMX_U8 * pLMAXTableL0,
+ const OMX_U8 * pLMAXTableL1,
+ const OMX_U8 * pRMAXTableL0,
+ const OMX_U8 * pRMAXTableL1,
+ const OMX_U8 * pZigzagTable
+)
+{
+
+ OMX_U32 storeRun = 0, run, storeRunPlus;
+ OMX_U8 last = 0, first = 1, fMode;
+ OMX_S16 level, storeLevel = 0, storeLevelPlus;
+ OMX_INT i;
+
+ /* RLE encoding and packing the bits into the streams */
+ for (i = start, run=0; i < 64; i++)
+ {
+ level = pQDctBlkCoef[pZigzagTable[i]];
+
+ /* Counting the run */
+ if (level == 0)
+ {
+ run++;
+ }
+
+ /* Found a non-zero coeff */
+ else
+ {
+ if (first == 0)
+ {
+ last = 0;
+
+ /* Check for a valid entry in the VLC table */
+ storeLevelPlus = armSignCheck(storeLevel) *
+ (armAbs(storeLevel) - pLMAXTableL0[storeRun]);
+ storeRunPlus = storeRun -
+ (pRMAXTableL0[armAbs(storeLevel) - 1] + 1);
+
+ fMode = armVCM4P2_CheckVLCEscapeMode(
+ storeRun,
+ storeRunPlus,
+ storeLevel,
+ storeLevelPlus,
+ maxStoreRunL0,
+ maxRunForMultipleEntriesL0,
+ shortVideoHeader,
+ pRunIndexTableL0);
+
+ armVCM4P2_FillVLCBuffer (
+ ppBitStream,
+ pBitOffset,
+ storeRun,
+ storeLevel,
+ storeRunPlus,
+ storeLevelPlus,
+ fMode,
+ last,
+ maxRunForMultipleEntriesL0,
+ pRunIndexTableL0,
+ pVlcTableL0);
+ }
+ storeLevel = level;
+ storeRun = run;
+ first = 0;
+ run = 0;
+ }
+
+ } /* end of for loop for 64 elements */
+
+ /* writing the last element */
+ last = 1;
+
+ /* Check for a valid entry in the VLC table */
+ storeLevelPlus = armSignCheck(storeLevel) *
+ (armAbs(storeLevel) - pLMAXTableL1[run]);
+ storeRunPlus = storeRun -
+ (pRMAXTableL1[armAbs(storeLevel) - 1] + 1);
+ fMode = armVCM4P2_CheckVLCEscapeMode(
+ storeRun,
+ storeRunPlus,
+ storeLevel,
+ storeLevelPlus,
+ maxStoreRunL1,
+ maxRunForMultipleEntriesL1,
+ shortVideoHeader,
+ pRunIndexTableL1);
+
+ armVCM4P2_FillVLCBuffer (
+ ppBitStream,
+ pBitOffset,
+ storeRun,
+ storeLevel,
+ storeRunPlus,
+ storeLevelPlus,
+ fMode,
+ last,
+ maxRunForMultipleEntriesL1,
+ pRunIndexTableL1,
+ pVlcTableL1);
+ return OMX_Sts_NoErr;
+}
+
+/* End of File */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_SetPredDir.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_SetPredDir.c
new file mode 100644
index 0000000..a9cd008
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_SetPredDir.c
@@ -0,0 +1,89 @@
+/**
+ *
+ * File Name: armVCM4P2_SetPredDir.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for detecting the prediction direction
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_SetPredDir
+ *
+ * Description:
+ * Performs detecting the prediction direction
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] blockIndex block index indicating the component type and
+ * position as defined in subclause 6.1.3.8, of ISO/IEC
+ * 14496-2. Furthermore, indexes 6 to 9 indicate the
+ * alpha blocks spatially corresponding to luminance
+ * blocks 0 to 3 in the same macroblock.
+ * [in] pCoefBufRow pointer to the coefficient row buffer
+ * [in] pQpBuf pointer to the quantization parameter buffer
+ * [out] predQP quantization parameter of the predictor block
+ * [out] predDir indicates the prediction direction which takes one
+ * of the following values:
+ * OMX_VC_HORIZONTAL predict horizontally
+ * OMX_VC_VERTICAL predict vertically
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_SetPredDir(
+ OMX_INT blockIndex,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_INT *predDir,
+ OMX_INT *predQP,
+ const OMX_U8 *pQpBuf
+)
+{
+ OMX_U8 blockDCLeft;
+ OMX_U8 blockDCTop;
+ OMX_U8 blockDCTopLeft;
+
+ if (blockIndex == 3)
+ {
+ blockDCTop = *(pCoefBufCol - 8);
+ }
+ else
+ {
+ blockDCTop = *pCoefBufRow;
+ }
+ blockDCLeft = *pCoefBufCol;
+ blockDCTopLeft = *(pCoefBufRow - 8);
+
+ if (armAbs(blockDCLeft - blockDCTopLeft) < armAbs(blockDCTopLeft \
+ - blockDCTop))
+ {
+ *predDir = OMX_VC_VERTICAL;
+ *predQP = pQpBuf[1];
+ }
+ else
+ {
+ *predDir = OMX_VC_HORIZONTAL;
+ *predQP = pQpBuf[0];
+ }
+ return OMX_Sts_NoErr;
+}
+
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
new file mode 100644
index 0000000..a247c69
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
@@ -0,0 +1,58 @@
+ /**
+ *
+ * File Name: armVCM4P2_Zigzag_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_ZigZag_Tables.c
+ * Description: Contains the zigzag tables
+ *
+ */
+
+#include "omxtypes.h"
+
+const OMX_U8 armVCM4P2_aClassicalZigzagScan [64] =
+{
+ 0, 1, 8, 16, 9, 2, 3, 10,
+ 17, 24, 32, 25, 18, 11, 4, 5,
+ 12, 19, 26, 33, 40, 48, 41, 34,
+ 27, 20, 13, 6, 7, 14, 21, 28,
+ 35, 42, 49, 56, 57, 50, 43, 36,
+ 29, 22, 15, 23, 30, 37, 44, 51,
+ 58, 59, 52, 45, 38, 31, 39, 46,
+ 53, 60, 61, 54, 47, 55, 62, 63
+};
+
+const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64] =
+{
+ 0, 1, 2, 3, 8, 9, 16, 17,
+ 10, 11, 4, 5, 6, 7, 15, 14,
+ 13, 12, 19, 18, 24, 25, 32, 33,
+ 26, 27, 20, 21, 22, 23, 28, 29,
+ 30, 31, 34, 35, 40, 41, 48, 49,
+ 42, 43, 36, 37, 38, 39, 44, 45,
+ 46, 47, 50, 51, 56, 57, 58, 59,
+ 52, 53, 54, 55, 60, 61, 62, 63
+};
+
+const OMX_U8 armVCM4P2_aVerticalZigzagScan [64] =
+{
+ 0, 8, 16, 24, 1, 9, 2, 10,
+ 17, 25, 32, 40, 48, 56, 57, 49,
+ 41, 33, 26, 18, 3, 11, 4, 12,
+ 19, 27, 34, 42, 50, 58, 35, 43,
+ 51, 59, 20, 28, 5, 13, 6, 14,
+ 21, 29, 36, 44, 52, 60, 37, 45,
+ 53, 61, 22, 30, 7, 15, 23, 31,
+ 38, 46, 54, 62, 39, 47, 55, 63
+};
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c
new file mode 100644
index 0000000..dcd3ce1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_16x16.c
@@ -0,0 +1,111 @@
+/**
+ *
+ * File Name: omxVCM4P2_BlockMatch_Half_16x16.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Half_16x16 (6.2.4.2.3)
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution. Returns the
+ * estimated motion vector and associated minimum SAD. This function
+ * estimates the half-pixel motion vector by interpolating the integer
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e.,
+ * the initial integer MV is generated externally. The input parameters
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of
+ * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function
+ * BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * macroblock that corresponds to the location of the current
+ * macroblock in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - reference plane valid region rectangle
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 256
+ * entries); must be aligned on a 16-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pSearchPointRefPos - position of the starting point for half pixel
+ * search (specified in terms of integer pixel units) in the
+ * reference plane, i.e., the reference position pointed to by the
+ * predicted motion vector.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ * pSrcDstMV - pointer to the initial MV estimate; typically generated
+ * during a prior 16X16 integer search; specified in terms of
+ * half-pixel units.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV.
+ * - pSrcCurrBuf is not 16-byte aligned, or
+ *
+ */
+
+OMXResult omxVCM4P2_BlockMatch_Half_16x16(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+)
+{
+
+ /* For a blocksize of 16x16 */
+ OMX_U8 BlockSize = 16;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSearchPointRefPos == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcDstMV == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
+
+ return (armVCM4P2_BlockMatch_Half(
+ pSrcRefBuf,
+ refWidth,
+ pRefRect,
+ pSrcCurrBuf,
+ pSearchPointRefPos,
+ rndVal,
+ pSrcDstMV,
+ pDstSAD,
+ BlockSize));
+
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c
new file mode 100644
index 0000000..6996e6d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Half_8x8.c
@@ -0,0 +1,109 @@
+/**
+ *
+ * File Name: omxVCM4P2_BlockMatch_Half_8x8.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Half_8x8 (6.2.4.2.4)
+ *
+ * Description:
+ * Performs an 8x8 block match with half-pixel resolution. Returns the
+ * estimated motion vector and associated minimum SAD. This function
+ * estimates the half-pixel motion vector by interpolating the integer
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e.,
+ * the initial integer MV is generated externally. The input parameters
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of
+ * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function
+ * BlockMatch_Integer_8x8 may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * block that corresponds to the location of the current 8x8 block
+ * in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - reference plane valid region rectangle
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 128
+ * entries); must be aligned on a 8-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pSearchPointRefPos - position of the starting point for half pixel
+ * search (specified in terms of integer pixel units) in the
+ * reference plane.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ * pSrcDstMV - pointer to the initial MV estimate; typically generated
+ * during a prior 8x8 integer search, specified in terms of
+ * half-pixel units.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV
+ * - pSrcCurrBuf is not 8-byte aligned
+ *
+ */
+
+OMXResult omxVCM4P2_BlockMatch_Half_8x8(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+)
+{
+ /* For a blocksize of 8x8 */
+ OMX_U8 BlockSize = 8;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSearchPointRefPos == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcDstMV == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
+
+ return (armVCM4P2_BlockMatch_Half(
+ pSrcRefBuf,
+ refWidth,
+ pRefRect,
+ pSrcCurrBuf,
+ pSearchPointRefPos,
+ rndVal,
+ pSrcDstMV,
+ pDstSAD,
+ BlockSize));
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c
new file mode 100644
index 0000000..e714ef1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_16x16.c
@@ -0,0 +1,114 @@
+/**
+ *
+ * File Name: omxVCM4P2_BlockMatch_Integer_16x16.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Integer_16x16 (6.2.4.2.1)
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated
+ * minimum SAD. Both the input and output motion vectors are represented using
+ * half-pixel units, and therefore a shift left or right by 1 bit may be
+ * required, respectively, to match the input or output MVs with other
+ * functions that either generate output MVs or expect input MVs represented
+ * using integer pixel units.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * MB that corresponds to the location of the current macroblock in
+ * the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - pointer to the valid reference plane rectangle; coordinates
+ * are specified relative to the image origin. Rectangle
+ * boundaries may extend beyond image boundaries if the image has
+ * been padded. For example, if padding extends 4 pixels beyond
+ * frame border, then the value for the left border could be set to
+ * -4.
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 256
+ * entries); must be aligned on a 16-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pCurrPointPos - position of the current macroblock in the current plane
+ * pSrcPreMV - pointer to predicted motion vector; NULL indicates no
+ * predicted MV
+ * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced
+ * by pSrcPreMV); may be set to NULL if unavailable.
+ * pMESpec - vendor-specific motion estimation specification structure;
+ * must have been allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling the block matching function.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or
+ * pMESpec, or
+ * - pSrcCurrBuf is not 16-byte aligned
+ *
+ */
+
+OMXResult omxVCM4P2_BlockMatch_Integer_16x16(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD
+)
+{
+
+ OMX_U8 BlockSize = 16;
+
+ /* Argument error checks */
+ armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
+
+ return ( armVCM4P2_BlockMatch_Integer(
+ pSrcRefBuf,
+ refWidth,
+ pRefRect,
+ pSrcCurrBuf,
+ pCurrPointPos,
+ pSrcPreMV,
+ pSrcPreSAD,
+ pMESpec,
+ pDstMV,
+ pDstSAD,
+ BlockSize)
+ );
+
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c
new file mode 100644
index 0000000..607e64c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_BlockMatch_Integer_8x8.c
@@ -0,0 +1,110 @@
+/**
+ *
+ * File Name: omxVCM4P2_BlockMatch_Integer_8x8.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for Block matching, a full search algorithm
+ * is implemented
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Integer_8x8 (6.2.4.2.2)
+ *
+ * Description:
+ * Performs an 8x8 block search; estimates motion vector and associated
+ * minimum SAD. Both the input and output motion vectors are represented
+ * using half-pixel units, and therefore a shift left or right by 1 bit may be
+ * required, respectively, to match the input or output MVs with other
+ * functions that either generate output MVs or expect input MVs represented
+ * using integer pixel units.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * block that corresponds to the location of the current 8x8 block
+ * in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - pointer to the valid reference plane rectangle; coordinates
+ * are specified relative to the image origin. Rectangle
+ * boundaries may extend beyond image boundaries if the image has
+ * been padded.
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 128
+ * entries); must be aligned on an 8-byte boundary. The number of
+ * bytes between lines (step) is 16 bytes.
+ * pCurrPointPos - position of the current block in the current plane
+ * pSrcPreMV - pointer to predicted motion vector; NULL indicates no
+ * predicted MV
+ * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced
+ * by pSrcPreMV); may be set to NULL if unavailable.
+ * pMESpec - vendor-specific motion estimation specification structure;
+ * must have been allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling the block matching function.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or
+ * pMESpec, or
+ * - pSrcCurrBuf is not 8-byte aligned
+ *
+ */
+
+OMXResult omxVCM4P2_BlockMatch_Integer_8x8(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD
+)
+{
+ OMX_U8 BlockSize = 8;
+
+ /* Argument error checks */
+ armRetArgErrIf(!armIs8ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
+
+ return ( armVCM4P2_BlockMatch_Integer(
+ pSrcRefBuf,
+ refWidth,
+ pRefRect,
+ pSrcCurrBuf,
+ pCurrPointPos,
+ pSrcPreMV,
+ pSrcPreSAD,
+ pMESpec,
+ pDstMV,
+ pDstSAD,
+ BlockSize)
+ );
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c
new file mode 100644
index 0000000..a077ac8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DCT8x8blk.c
@@ -0,0 +1,87 @@
+/**
+ *
+ * File Name: omxVCM4P2_DCT8x8blk.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for 8x8 block DCT
+ *
+ */
+
+#include <math.h>
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM.h"
+#include "armVCM4P2_DCT_Table.h"
+
+/**
+ * Function: omxVCM4P2_DCT8x8blk (6.2.4.4.1)
+ *
+ * Description:
+ * Computes a 2D forward DCT for a single 8x8 block, as defined in
+ * [ISO14496-2].
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the start of the linearly arranged input buffer; must
+ * be aligned on a 16-byte boundary. Input values (pixel
+ * intensities) are valid in the range [-255,255].
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the start of the linearly arranged output buffer; must
+ * be aligned on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, returned if:
+ * - pSrc or pDst is NULL.
+ * - pSrc or pDst is not 16-byte aligned.
+ *
+ */
+
+OMXResult omxVCM4P2_DCT8x8blk (const OMX_S16 *pSrc, OMX_S16 *pDst)
+{
+ OMX_INT x, y, u, v;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+
+
+ for (u = 0; u < 8; u++)
+ {
+ for (v = 0; v < 8; v++)
+ {
+ OMX_F64 sum = 0.0;
+ for (x = 0; x < 8; x++)
+ {
+ for (y = 0; y < 8; y++)
+ {
+ sum += pSrc[(x * 8) + y] *
+ armVCM4P2_preCalcDCTCos[x][u] *
+ armVCM4P2_preCalcDCTCos[y][v];
+ }
+ }
+ pDst[(u * 8) + v]= armRoundFloatToS16 (sum);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
new file mode 100644
index 0000000..51f7bab
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
@@ -0,0 +1,115 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeBlockCoef_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for inter reconstruction
+ *
+ */
+
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter (6.2.5.4.2)
+ *
+ * Description:
+ * Decodes the INTER block coefficients. This function performs inverse
+ * quantization, inverse zigzag positioning, and IDCT (with appropriate
+ * clipping on each step) on the coefficients. The results (residuals) are
+ * placed in a contiguous array of 64 elements. For INTER block, the output
+ * buffer holds the residuals for further reconstruction.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer. There is no boundary check for the bit stream
+ * buffer.
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7]
+ * QP - quantization parameter
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDst - pointer to the decoded residual buffer (a contiguous array of 64
+ * elements of OMX_S16 data type); must be aligned on a 16-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is Null:
+ * ppBitStream, *ppBitStream, pBitOffset , pDst
+ * - *pBitOffset exceeds [0,7]
+ * - QP <= 0.
+ * - pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter .
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT QP,
+ OMX_INT shortVideoHeader
+)
+{
+ /* 64 elements are needed but to align it to 16 bytes need
+ 15 more elements of padding */
+ OMX_S16 tempBuf[79];
+ OMX_S16 *pTempBuf1;
+ OMXResult errorCode;
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf);
+
+ /* Argument error checks */
+ armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((*pBitOffset < 0) || (*pBitOffset > 7)), OMX_Sts_BadArgErr);
+
+
+ /* VLD and zigzag */
+ errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset,
+ pTempBuf1,shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Dequantization */
+ errorCode = omxVCM4P2_QuantInvInter_I(
+ pTempBuf1,
+ QP);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Inverse transform */
+ errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
new file mode 100644
index 0000000..a0b2376
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
@@ -0,0 +1,225 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeBlockCoef_Intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for intra reconstruction
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra (6.2.5.4.1)
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely
+ * zigzag positioning, and IDCT, with appropriate clipping on each step, are
+ * performed on the coefficients. The results are then placed in the output
+ * frame/plane on a pixel basis. Note: This function will be used only when
+ * at least one non-zero AC coefficient of current block exists in the bit
+ * stream. The DC only condition will be handled in another function.
+ *
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer. There is no boundary check for the bit stream
+ * buffer.
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7].
+ * step - width of the destination plane
+ * pCoefBufRow - pointer to the coefficient row buffer; must be aligned on
+ * an 8-byte boundary.
+ * pCoefBufCol - pointer to the coefficient column buffer; must be aligned
+ * on an 8-byte boundary.
+ * curQP - quantization parameter of the macroblock which the current block
+ * belongs to
+ * pQPBuf - pointer to the quantization parameter buffer
+ * blockIndex - block index indicating the component type and position as
+ * defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5.
+ * intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a
+ * mechanism to switch between two VLC for coding of Intra DC
+ * coefficients as per [ISO14496-2], Table 6-21.
+ * ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if
+ * the ac coefficients of the first row or first column are
+ * differentially coded for intra coded macroblock.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDst - pointer to the block in the destination plane; must be aligned on
+ * an 8-byte boundary.
+ * pCoefBufRow - pointer to the updated coefficient row buffer.
+ * pCoefBufCol - pointer to the updated coefficient column buffer Note:
+ * The coefficient buffers must be updated in accordance with the
+ * update procedure defined in section 6.2.2.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol,
+ * pQPBuf, pDst.
+ * - *pBitOffset exceeds [0,7]
+ * - curQP exceeds (1, 31)
+ * - blockIndex exceeds [0,5]
+ * - step is not the multiple of 8
+ * - a pointer alignment requirement was violated.
+ * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra.
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_U8 *pDst,
+ OMX_INT step,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_U8 curQP,
+ const OMX_U8 *pQPBuf,
+ OMX_INT blockIndex,
+ OMX_INT intraDCVLC,
+ OMX_INT ACPredFlag,
+ OMX_INT shortVideoHeader
+ )
+{
+ OMX_S16 tempBuf1[79], tempBuf2[79];
+ OMX_S16 *pTempBuf1, *pTempBuf2;
+ OMX_INT predDir, predACDir, i, j, count;
+ OMX_INT predQP;
+ OMXVCM4P2VideoComponent videoComp;
+ OMXResult errorCode;
+
+ /* Argument error checks */
+ armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pCoefBufRow == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pCoefBufCol == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pQPBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((curQP <= 0) || (curQP >= 32)), OMX_Sts_BadArgErr);
+ armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
+ armRetArgErrIf((blockIndex < 0) || (blockIndex > 5), OMX_Sts_BadArgErr);
+ armRetArgErrIf((step % 8) != 0, OMX_Sts_BadArgErr);
+
+
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+ pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+
+ /* Setting the AC prediction direction and prediction direction */
+ armVCM4P2_SetPredDir(
+ blockIndex,
+ pCoefBufRow,
+ pCoefBufCol,
+ &predDir,
+ &predQP,
+ pQPBuf);
+
+ predACDir = predDir;
+
+ armRetArgErrIf(((predQP <= 0) || (predQP >= 32)), OMX_Sts_BadArgErr);
+
+ if (ACPredFlag == 0)
+ {
+ predACDir = OMX_VC_NONE;
+ }
+
+ /* Setting the videoComp */
+ if (blockIndex <= 3)
+ {
+ videoComp = OMX_VC_LUMINANCE;
+ }
+ else
+ {
+ videoComp = OMX_VC_CHROMINANCE;
+ }
+
+
+ /* VLD and zigzag */
+ if (intraDCVLC == 1)
+ {
+ errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+ ppBitStream,
+ pBitOffset,
+ pTempBuf1,
+ predACDir,
+ shortVideoHeader,
+ videoComp);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+ }
+ else
+ {
+ errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+ ppBitStream,
+ pBitOffset,
+ pTempBuf1,
+ predACDir,
+ shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+ }
+
+ /* AC DC prediction */
+ errorCode = omxVCM4P2_PredictReconCoefIntra(
+ pTempBuf1,
+ pCoefBufRow,
+ pCoefBufCol,
+ curQP,
+ predQP,
+ predDir,
+ ACPredFlag,
+ videoComp);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Dequantization */
+ errorCode = omxVCM4P2_QuantInvIntra_I(
+ pTempBuf1,
+ curQP,
+ videoComp,
+ shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Inverse transform */
+ errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Placing the linear array into the destination plane and clipping
+ it to 0 to 255 */
+ for (j = 0, count = 0; j < 8; j++)
+ {
+ for(i = 0; i < 8; i++, count++)
+ {
+ pDst[i] = armClip (0, 255, pTempBuf2[count]);
+ }
+ pDst += step;
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c
new file mode 100644
index 0000000..7e159b7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP.c
@@ -0,0 +1,243 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodePadMV_PVOP.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for decoding MV and padding the same
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+
+
+
+/**
+ * Function: omxVCM4P2_DecodePadMV_PVOP (6.2.5.1.1)
+ *
+ * Description:
+ * Decodes and pads the four motion vectors associated with a non-intra P-VOP
+ * macroblock. For macroblocks of type OMX_VC_INTER4V, the output MV is
+ * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for
+ * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to
+ * all four output MV buffer entries.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7].
+ * pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the
+ * motion vector buffers of the macroblocks specially at the left,
+ * upper, and upper-right side of the current macroblock,
+ * respectively; a value of NULL indicates unavailability. Note:
+ * Any neighborhood macroblock outside the current VOP or video
+ * packet or outside the current GOB (when short_video_header is
+ * 1 ) for which gob_header_empty is 0 is treated as
+ * transparent, according to [ISO14496-2], subclause 7.6.5.
+ * fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream
+ * syntax
+ * MBType - the type of the current macroblock. If MBType is not equal to
+ * OMX_VC_INTER4V, the destination motion vector buffer is still
+ * filled with the same decoded vector.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDstMVCurMB - pointer to the motion vector buffer for the current
+ * macroblock; contains four decoded motion vectors
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB
+ * - *pBitOffset exceeds [0,7]
+ * - fcodeForward exceeds (0,7]
+ * - MBType less than zero
+ * - motion vector buffer is not 4-byte aligned.
+ * OMX_Sts_Err - status error
+ *
+ */
+
+OMXResult omxVCM4P2_DecodePadMV_PVOP(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMXVCMotionVector * pSrcMVLeftMB,
+ OMXVCMotionVector *pSrcMVUpperMB,
+ OMXVCMotionVector * pSrcMVUpperRightMB,
+ OMXVCMotionVector * pDstMVCurMB,
+ OMX_INT fcodeForward,
+ OMXVCM4P2MacroblockType MBType
+ )
+{
+ OMXVCMotionVector diffMV;
+ OMXVCMotionVector dstMVPredME[12];
+ OMX_INT iBlk, i, count = 1;
+ OMX_S32 mvHorResidual = 1, mvVerResidual = 1, mvHorData, mvVerData;
+ OMX_S8 scaleFactor, index;
+ OMX_S16 high, low, range;
+
+
+ /* Argument error checks */
+ armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDstMVCurMB == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(((*pBitOffset < 0) || (*pBitOffset > 7)), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((fcodeForward < 1) || (fcodeForward > 7)), \
+ OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs4ByteAligned(pDstMVCurMB), OMX_Sts_BadArgErr);
+
+ if ((MBType == OMX_VC_INTRA) ||
+ (MBType == OMX_VC_INTRA_Q)
+ )
+ {
+ /* All MV's are zero */
+ for (i = 0; i < 4; i++)
+ {
+ pDstMVCurMB[i].dx = 0;
+ pDstMVCurMB[i].dy = 0;
+ }
+
+ return OMX_Sts_NoErr;
+ }
+
+ if ((MBType == OMX_VC_INTER4V) || (MBType == OMX_VC_INTER4V_Q))
+ {
+ count = 4;
+ }
+ else if ((MBType == OMX_VC_INTER) || (MBType == OMX_VC_INTER_Q))
+ {
+ count = 1;
+ }
+
+ /* Calculating the scale factor */
+ scaleFactor = 1 << (fcodeForward -1);
+ high = ( 32 * scaleFactor) - 1;
+ low = ( (-32) * scaleFactor);
+ range = ( 64 * scaleFactor);
+
+ /* Huffman decoding and MV reconstruction */
+ for (iBlk = 0; iBlk < count; iBlk++)
+ {
+
+ /* Huffman decoding to get Horizontal data and residual */
+ index = armUnPackVLC32(ppBitStream, pBitOffset,
+ armVCM4P2_aVlcMVD);
+ armRetDataErrIf(index == -1, OMX_Sts_Err);
+
+ mvHorData = index - 32;
+
+ if ((fcodeForward > 1) && (mvHorData != 0))
+ {
+ mvHorResidual = (OMX_S32) armGetBits(ppBitStream,
+ pBitOffset, (fcodeForward -1));
+ }
+
+ /* Huffman decoding to get Vertical data and residual */
+ index = armUnPackVLC32(ppBitStream, pBitOffset, armVCM4P2_aVlcMVD);
+ armRetDataErrIf(index == -1, OMX_Sts_Err);
+
+ mvVerData = index - 32;
+
+ if ((fcodeForward > 1) && (mvVerData != 0))
+ {
+ mvVerResidual = (OMX_S32) armGetBits(ppBitStream,
+ pBitOffset, (fcodeForward -1));
+ }
+
+ /* Calculating the differtial MV */
+ if ( (scaleFactor == 1) || (mvHorData == 0) )
+ {
+ diffMV.dx = mvHorData;
+ }
+ else
+ {
+ diffMV.dx = ((armAbs(mvHorData) - 1) * fcodeForward)
+ + mvHorResidual + 1;
+ if (mvHorData < 0)
+ {
+ diffMV.dx = -diffMV.dx;
+ }
+ }
+
+ if ( (scaleFactor == 1) || (mvVerData == 0) )
+ {
+ diffMV.dy = mvVerData;
+ }
+ else
+ {
+ diffMV.dy = ((armAbs(mvVerData) - 1) * fcodeForward)
+ + mvVerResidual + 1;
+ if (mvVerData < 0)
+ {
+ diffMV.dy = -diffMV.dy;
+ }
+ }
+
+ /* Find the predicted vector */
+ omxVCM4P2_FindMVpred (
+ pDstMVCurMB,
+ pSrcMVLeftMB,
+ pSrcMVUpperMB,
+ pSrcMVUpperRightMB,
+ &pDstMVCurMB[iBlk],
+ dstMVPredME,
+ iBlk);
+
+ /* Adding the difference to the predicted MV to reconstruct MV */
+ pDstMVCurMB[iBlk].dx += diffMV.dx;
+ pDstMVCurMB[iBlk].dy += diffMV.dy;
+
+ /* Checking the range and keeping it within the limits */
+ if ( pDstMVCurMB[iBlk].dx < low )
+ {
+ pDstMVCurMB[iBlk].dx += range;
+ }
+ if (pDstMVCurMB[iBlk].dx > high)
+ {
+ pDstMVCurMB[iBlk].dx -= range;
+ }
+
+ if ( pDstMVCurMB[iBlk].dy < low )
+ {
+ pDstMVCurMB[iBlk].dy += range;
+ }
+ if (pDstMVCurMB[iBlk].dy > high)
+ {
+ pDstMVCurMB[iBlk].dy -= range;
+ }
+ }
+
+ if ((MBType == OMX_VC_INTER) || (MBType == OMX_VC_INTER_Q))
+ {
+ pDstMVCurMB[1] = pDstMVCurMB[0];
+ pDstMVCurMB[2] = pDstMVCurMB[0];
+ pDstMVCurMB[3] = pDstMVCurMB[0];
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c
new file mode 100644
index 0000000..88a8d04
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter.c
@@ -0,0 +1,120 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeVLCZigzag_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for zigzag scanning and VLC decoding
+ * for inter block.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_Inter (6.2.5.2.3)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one inter-coded block.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the stream buffer
+ * pBitOffset - pointer to the next available bit in the current stream
+ * byte referenced by *ppBitStream. The parameter *pBitOffset is
+ * valid within the range [0-7].
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the stream buffer
+ * pBitOffset - *pBitOffset is updated after decoding such that it points
+ * to the next available bit in the stream byte referenced by
+ * *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDst
+ * - pDst is not 4-byte aligned
+ * - *pBitOffset exceeds [0,7]
+ * OMX_Sts_Err - status error, if:
+ * - At least one mark bit is equal to zero
+ * - Encountered an illegal stream code that cannot be found in the VLC table
+ * - Encountered an illegal code in the VLC FLC table
+ * - The number of coefficients is greater than 64
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeVLCZigzag_Inter(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT shortVideoHeader
+)
+{
+ OMX_U8 last,start = 0;
+ const OMX_U8 *pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+ OMXResult errorCode;
+
+ /* Argument error checks */
+ armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr);
+
+ errorCode = armVCM4P2_GetVLCBits (
+ ppBitStream,
+ pBitOffset,
+ pDst,
+ shortVideoHeader,
+ start,
+ &last,
+ 11,
+ 42,
+ 2,
+ 5,
+ armVCM4P2_InterL0RunIdx,
+ armVCM4P2_InterVlcL0,
+ armVCM4P2_InterL1RunIdx,
+ armVCM4P2_InterVlcL1,
+ armVCM4P2_InterL0LMAX,
+ armVCM4P2_InterL1LMAX,
+ armVCM4P2_InterL0RMAX,
+ armVCM4P2_InterL1RMAX,
+ pZigzagTable );
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ if (last == 0)
+ {
+ return OMX_Sts_Err;
+ }
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c
new file mode 100644
index 0000000..96593d1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c
@@ -0,0 +1,103 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeVLCZigzag_IntraACVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for zigzag scanning and VLC decoding
+ * for intra block.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_IntraACVLC (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients
+ * for one intra block. Two versions of the function (DCVLC and ACVLC) are
+ * provided in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the
+ * bitstream buffer
+ * pBitOffset - pointer to the bit position in the current byte referenced
+ * by *ppBitStream. The parameter *pBitOffset is valid in the
+ * range [0-7]. Bit Position in one byte: |Most Least| *pBitOffset
+ * |0 1 2 3 4 5 6 7|
+ * predDir - AC prediction direction; used to select the zigzag scan
+ * pattern; takes one of the following values: OMX_VC_NONE - AC
+ * prediction not used; performs classical zigzag scan.
+ * OMX_VC_HORIZONTAL - Horizontal prediction; performs
+ * alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical
+ * prediction; performs alternate-horizontal zigzag scan.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated such that it points to the current
+ * bit position in the byte pointed by *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments At least one of the following
+ * pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst,
+ * or At least one of the following conditions is true:
+ * *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is
+ * not 4-byte aligned
+ * OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of
+ * mark bits equals zero Illegal stream encountered; code cannot
+ * be located in VLC table Forbidden code encountered in the VLC
+ * FLC table The number of coefficients is greater than 64
+ *
+ */
+
+
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader
+)
+{
+ OMX_U8 start = 0;
+
+ return armVCM4P2_DecodeVLCZigzag_Intra(
+ ppBitStream,
+ pBitOffset,
+ pDst,
+ predDir,
+ shortVideoHeader,
+ start);
+}
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c
new file mode 100644
index 0000000..95e00d7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c
@@ -0,0 +1,170 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for zigzag scanning and VLC decoding
+ * for intra block.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients
+ * for one intra block. Two versions of the function (DCVLC and ACVLC) are
+ * provided in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the
+ * bitstream buffer
+ * pBitOffset - pointer to the bit position in the current byte referenced
+ * by *ppBitStream. The parameter *pBitOffset is valid in the
+ * range [0-7].
+ * Bit Position in one byte: |Most Least|
+ * *pBitOffset |0 1 2 3 4 5 6 7|
+ * predDir - AC prediction direction; used to select the zigzag scan
+ * pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used;
+ * performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction;
+ * performs alternate-vertical zigzag scan;
+ * - OMX_VC_VERTICAL - Vertical prediction;
+ * performs alternate-horizontal zigzag scan.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated such that it points to the current
+ * bit position in the byte pointed by *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDst
+ * - *pBitOffset exceeds [0,7]
+ * - preDir exceeds [0,2]
+ * - pDst is not 4-byte aligned
+ * OMX_Sts_Err - if:
+ * - In DecodeVLCZigzag_IntraDCVLC, dc_size > 12
+ * - At least one of mark bits equals zero
+ * - Illegal stream encountered; code cannot be located in VLC table
+ * - Forbidden code encountered in the VLC FLC table.
+ * - The number of coefficients is greater than 64
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader,
+ OMXVCM4P2VideoComponent videoComp
+)
+{
+ /* Dummy initilaization to remove compilation error */
+ OMX_S8 DCValueSize = 0;
+ OMX_U16 powOfSize, fetchDCbits;
+ OMX_U8 start = 1;
+
+ /* Argument error checks */
+ armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs4ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset > 7), OMX_Sts_BadArgErr);
+ armRetArgErrIf((predDir > 2), OMX_Sts_BadArgErr);
+
+ /* Insert the code into the bitstream */
+ if (videoComp == OMX_VC_LUMINANCE)
+ {
+ DCValueSize = armUnPackVLC32(ppBitStream,
+ pBitOffset, armVCM4P2_aIntraDCLumaIndex);
+ }
+ else if (videoComp == OMX_VC_CHROMINANCE)
+ {
+ DCValueSize = armUnPackVLC32(ppBitStream,
+ pBitOffset, armVCM4P2_aIntraDCChromaIndex);
+ }
+ armRetDataErrIf(DCValueSize == -1, OMX_Sts_Err);
+ armRetDataErrIf(DCValueSize > 12, OMX_Sts_Err);
+
+
+ if (DCValueSize == 0)
+ {
+ pDst[0] = 0;
+ }
+ else
+ {
+ fetchDCbits = (OMX_U16) armGetBits(ppBitStream, pBitOffset, \
+ DCValueSize);
+
+ if ( (fetchDCbits >> (DCValueSize - 1)) == 0)
+ {
+ /* calulate pow */
+ powOfSize = (1 << DCValueSize);
+
+ pDst[0] = (OMX_S16) (fetchDCbits ^ (powOfSize - 1));
+ pDst[0] = -pDst[0];
+ }
+ else
+ {
+ pDst[0] = fetchDCbits;
+ }
+
+ if (DCValueSize > 8)
+ {
+ /* reading and checking the marker bit*/
+ armRetDataErrIf (armGetBits(ppBitStream, pBitOffset, 1) == 0, \
+ OMX_Sts_Err);
+ }
+ }
+
+ return armVCM4P2_DecodeVLCZigzag_Intra(
+ ppBitStream,
+ pBitOffset,
+ pDst,
+ predDir,
+ shortVideoHeader,
+ start);
+}
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeMV.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeMV.c
new file mode 100644
index 0000000..def2b6d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeMV.c
@@ -0,0 +1,212 @@
+/**
+ *
+ * File Name: omxVCM4P2_EncodeMV.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for predicting MV of MB
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeMV (6.2.4.5.4)
+ *
+ * Description:
+ * Predicts a motion vector for the current macroblock, encodes the
+ * difference, and writes the output to the stream buffer. The input MVs
+ * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie
+ * within the ranges associated with the input parameter fcodeForward, as
+ * described in [ISO14496-2], subclause 7.6.3. This function provides a
+ * superset of the functionality associated with the function
+ * omxVCM4P2_FindMVpred.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream buffer
+ * pBitOffset - index of the first free (next available) bit in the stream
+ * buffer referenced by *ppBitStream, valid in the range 0 to 7.
+ * pMVCurMB - pointer to the current macroblock motion vector; a value of
+ * NULL indicates unavailability.
+ * pSrcMVLeftMB - pointer to the source left macroblock motion vector; a
+ * value of NULLindicates unavailability.
+ * pSrcMVUpperMB - pointer to source upper macroblock motion vector; a
+ * value of NULL indicates unavailability.
+ * pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a
+ * value of NULL indicates unavailability.
+ * fcodeForward - an integer with values from 1 to 7; used in encoding
+ * motion vectors related to search range, as described in
+ * [ISO14496-2], subclause 7.6.3.
+ * MBType - macro block type, valid in the range 0 to 5
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - updated pointer to the current byte in the bit stream
+ * buffer
+ * pBitOffset - updated index of the next available bit position in stream
+ * buffer referenced by *ppBitStream
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pMVCurMB
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - fcodeForward <= 0, or fcodeForward > 7, or MBType < 0.
+ *
+ */
+
+OMXResult omxVCM4P2_EncodeMV(
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMXVCMotionVector * pMVCurMB,
+ const OMXVCMotionVector * pSrcMVLeftMB,
+ const OMXVCMotionVector * pSrcMVUpperMB,
+ const OMXVCMotionVector * pSrcMVUpperRightMB,
+ OMX_INT fcodeForward,
+ OMXVCM4P2MacroblockType MBType
+)
+{
+ OMXVCMotionVector dstMVPred, diffMV;
+ OMXVCMotionVector dstMVPredME[12];
+ /* Initialized to remove compilation warning */
+ OMX_INT iBlk, i, count = 1;
+ OMX_S32 mvHorResidual, mvVerResidual, mvHorData, mvVerData;
+ OMX_U8 scaleFactor, index;
+
+ /* Argument error checks */
+ armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pMVCurMB == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(((*pBitOffset < 0) || (*pBitOffset > 7)), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((fcodeForward < 1) || (fcodeForward > 7)), \
+ OMX_Sts_BadArgErr);
+
+ if ((MBType == OMX_VC_INTRA) ||
+ (MBType == OMX_VC_INTRA_Q)
+ )
+ {
+ /* No candidate vectors hence make them zero */
+ for (i = 0; i < 12; i++)
+ {
+ dstMVPredME[i].dx = 0;
+ dstMVPredME[i].dy = 0;
+ }
+
+ return OMX_Sts_NoErr;
+ }
+
+ if ((MBType == OMX_VC_INTER4V) || (MBType == OMX_VC_INTER4V_Q))
+ {
+ count = 4;
+ }
+ else if ((MBType == OMX_VC_INTER) || (MBType == OMX_VC_INTER_Q))
+ {
+ count = 1;
+ }
+
+ /* Calculating the scale factor */
+ scaleFactor = 1 << (fcodeForward -1);
+
+ for (iBlk = 0; iBlk < count; iBlk++)
+ {
+
+ /* Find the predicted vector */
+ omxVCM4P2_FindMVpred (
+ pMVCurMB,
+ pSrcMVLeftMB,
+ pSrcMVUpperMB,
+ pSrcMVUpperRightMB,
+ &dstMVPred,
+ dstMVPredME,
+ iBlk );
+
+ /* Calculating the differential motion vector (diffMV) */
+ diffMV.dx = pMVCurMB[iBlk].dx - dstMVPred.dx;
+ diffMV.dy = pMVCurMB[iBlk].dy - dstMVPred.dy;
+
+ /* Calculating the mv_data and mv_residual for Horizantal MV */
+ if (diffMV.dx == 0)
+ {
+ mvHorResidual = 0;
+ mvHorData = 0;
+ }
+ else
+ {
+ mvHorResidual = ( armAbs(diffMV.dx) - 1) % scaleFactor;
+ mvHorData = (armAbs(diffMV.dx) - mvHorResidual + (scaleFactor - 1))
+ / scaleFactor;
+ if (diffMV.dx < 0)
+ {
+ mvHorData = -mvHorData;
+ }
+ }
+
+ /* Calculating the mv_data and mv_residual for Vertical MV */
+ if (diffMV.dy == 0)
+ {
+ mvVerResidual = 0;
+ mvVerData = 0;
+ }
+ else
+ {
+ mvVerResidual = ( armAbs(diffMV.dy) - 1) % scaleFactor;
+ mvVerData = (armAbs(diffMV.dy) - mvVerResidual + (scaleFactor - 1))
+ / scaleFactor;
+ if (diffMV.dy < 0)
+ {
+ mvVerData = -mvVerData;
+ }
+ }
+
+ /* Huffman encoding */
+
+ /* The index is actually calculate as
+ index = ((float) (mvHorData/2) + 16) * 2,
+ meaning the MV data is halfed and then normalized
+ to begin with zero and then doubled to take care of indexing
+ the fractional part included */
+ index = mvHorData + 32;
+ armPackVLC32 (ppBitStream, pBitOffset, armVCM4P2_aVlcMVD[index]);
+ if ((fcodeForward > 1) && (diffMV.dx != 0))
+ {
+ armPackBits (ppBitStream, pBitOffset, mvHorResidual, (fcodeForward -1));
+ }
+
+ /* The index is actually calculate as
+ index = ((float) (mvVerData/2) + 16) * 2,
+ meaning the MV data is halfed and then normalized
+ to begin with zero and then doubled to take care of indexing
+ the fractional part included */
+ index = mvVerData + 32;
+ armPackVLC32 (ppBitStream, pBitOffset, armVCM4P2_aVlcMVD[index]);
+ if ((fcodeForward > 1) && (diffMV.dy != 0))
+ {
+ armPackBits (ppBitStream, pBitOffset, mvVerResidual, (fcodeForward -1));
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c
new file mode 100644
index 0000000..b6c73ea
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_Inter.c
@@ -0,0 +1,112 @@
+/**
+ *
+ * File Name: omxVCM4P2_EncodeVLCZigzag_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for zigzag scanning and VLC encoding
+ * for inter block.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_Inter (6.2.4.5.3)
+ *
+ * Description:
+ * Performs classical zigzag scanning and VLC encoding for one inter block.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded so that
+ * it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments
+ * - At least one of the pointers: is NULL: ppBitStream, *ppBitStream,
+ * pBitOffset, pQDctBlkCoef
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_Inter(
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader
+)
+{
+ OMX_U8 start = 0;
+ const OMX_U8 *pZigzagTable = armVCM4P2_aClassicalZigzagScan;
+
+ /* Argument error checks */
+ armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pQDctBlkCoef == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
+
+ if (pattern)
+ {
+ armVCM4P2_PutVLCBits (
+ ppBitStream,
+ pBitOffset,
+ pQDctBlkCoef,
+ shortVideoHeader,
+ start,
+ 26,
+ 40,
+ 10,
+ 1,
+ armVCM4P2_InterL0RunIdx,
+ armVCM4P2_InterVlcL0,
+ armVCM4P2_InterL1RunIdx,
+ armVCM4P2_InterVlcL1,
+ armVCM4P2_InterL0LMAX,
+ armVCM4P2_InterL1LMAX,
+ armVCM4P2_InterL0RMAX,
+ armVCM4P2_InterL1RMAX,
+ pZigzagTable
+ );
+ } /* Pattern check ends*/
+
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c
new file mode 100644
index 0000000..d047942
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c
@@ -0,0 +1,97 @@
+/**
+ *
+ * File Name: omxVCM4P2_EncodeVLCZigzag_IntraACVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for zigzag scanning and VLC encoding
+ * for intra block.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_IntraACVLC (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one
+ * intra block. Two versions of the function (DCVLC and ACVLC) are provided
+ * in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7.
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * predDir - AC prediction direction, which is used to decide the zigzag
+ * scan pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used.
+ * Performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction.
+ * Performs alternate-vertical zigzag scan.
+ * - OMX_VC_VERTICAL - Vertical prediction.
+ * Performs alternate-horizontal zigzag scan.
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded, so
+ * that it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pQDctBlkCoef.
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL.
+ * - VideoComp is not one component of enum OMXVCM4P2VideoComponent.
+ *
+ */
+
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC(
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader
+)
+{
+ OMX_U8 start = 0;
+
+ return armVCM4P2_EncodeVLCZigzag_Intra(
+ ppBitStream,
+ pBitOffset,
+ pQDctBlkCoef,
+ predDir,
+ pattern,
+ shortVideoHeader,
+ start);
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c
new file mode 100644
index 0000000..c57acd2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c
@@ -0,0 +1,160 @@
+/**
+ *
+ * File Name: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for zigzag scanning and VLC encoding
+ * for intra block.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM_Bitstream.h"
+#include "armCOMM.h"
+#include "armVCM4P2_Huff_Tables_VLC.h"
+#include "armVCM4P2_ZigZag_Tables.h"
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one
+ * intra block. Two versions of the function (DCVLC and ACVLC) are provided
+ * in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding".
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7.
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * predDir - AC prediction direction, which is used to decide the zigzag
+ * scan pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used.
+ * Performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction.
+ * Performs alternate-vertical zigzag scan.
+ * - OMX_VC_VERTICAL - Vertical prediction.
+ * Performs alternate-horizontal zigzag scan.
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance, chrominance) of the current
+ * block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded, so
+ * that it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pQDctBlkCoef.
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL.
+ * - VideoComp is not one component of enum OMXVCM4P2VideoComponent.
+ *
+ */
+
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC(
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader,
+ OMXVCM4P2VideoComponent videoComp
+)
+{
+ OMX_S16 dcValue, powOfSize;
+ OMX_U8 DCValueSize, start = 1;
+ OMX_U16 absDCValue;
+
+ /* Argument error checks */
+ armRetArgErrIf(ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(*ppBitStream == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBitOffset == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pQDctBlkCoef == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((*pBitOffset < 0) || (*pBitOffset >7), OMX_Sts_BadArgErr);
+ armRetArgErrIf((videoComp != OMX_VC_LUMINANCE) && (videoComp != OMX_VC_CHROMINANCE), OMX_Sts_BadArgErr);
+ armRetArgErrIf((predDir != OMX_VC_NONE) && (predDir != OMX_VC_HORIZONTAL) && (predDir != OMX_VC_VERTICAL) , OMX_Sts_BadArgErr);
+
+ if (pattern)
+ {
+ dcValue = pQDctBlkCoef[0];
+ absDCValue = armAbs(dcValue);
+
+ /* Find the size */
+ DCValueSize = armLogSize (absDCValue);
+ absDCValue = armAbs(dcValue);
+
+ /* Insert the code into the bitstream */
+ if (videoComp == OMX_VC_LUMINANCE)
+ {
+
+ armPackVLC32 (ppBitStream, pBitOffset,
+ armVCM4P2_aIntraDCLumaIndex[DCValueSize]);
+ }
+ else if (videoComp == OMX_VC_CHROMINANCE)
+ {
+
+ armPackVLC32 (ppBitStream, pBitOffset,
+ armVCM4P2_aIntraDCChromaIndex[DCValueSize]);
+ }
+
+ /* Additional code generation in case of negative
+ dc value the additional */
+ if (DCValueSize > 0)
+ {
+ if (dcValue < 0)
+ {
+ /* calulate 2 pow */
+ powOfSize = (1 << DCValueSize);
+
+ absDCValue = absDCValue ^ (powOfSize - 1);
+ }
+ armPackBits(ppBitStream, pBitOffset, (OMX_U32)absDCValue, \
+ DCValueSize);
+
+ if (DCValueSize > 8)
+ {
+ armPackBits(ppBitStream, pBitOffset, 1, 1);
+ }
+ }
+ }
+
+ return armVCM4P2_EncodeVLCZigzag_Intra(
+ ppBitStream,
+ pBitOffset,
+ pQDctBlkCoef,
+ predDir,
+ pattern,
+ shortVideoHeader,
+ start);
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_FindMVpred.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_FindMVpred.c
new file mode 100644
index 0000000..a0cff48
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_FindMVpred.c
@@ -0,0 +1,188 @@
+/**
+ *
+ * File Name: omxVCM4P2_FindMVpred.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for predicting MV of MB
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P2_FindMVpred (6.2.3.1.1)
+ *
+ * Description:
+ * Predicts a motion vector for the current block using the procedure
+ * specified in [ISO14496-2], subclause 7.6.5. The resulting predicted MV is
+ * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then
+ * the set of three MV candidates used for prediction is also returned,
+ * otherwise pDstMVPredMEis NULL upon return.
+ *
+ * Input Arguments:
+ *
+ * pSrcMVCurMB - pointer to the MV buffer associated with the current Y
+ * macroblock; a value of NULL indicates unavailability.
+ * pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located to the left of the current MB; set to NULL
+ * if there is no MB to the left.
+ * pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located above the current MB; set to NULL if there
+ * is no MB located above the current MB.
+ * pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located to the right and above the current MB; set
+ * to NULL if there is no MB located to the above-right.
+ * iBlk - the index of block in the current macroblock
+ * pDstMVPredME - MV candidate return buffer; if set to NULL then
+ * prediction candidate MVs are not returned and pDstMVPredME will
+ * be NULL upon function return; if pDstMVPredME is non-NULL then it
+ * must point to a buffer containing sufficient space for three
+ * return MVs.
+ *
+ * Output Arguments:
+ *
+ * pDstMVPred - pointer to the predicted motion vector
+ * pDstMVPredME - if non-NULL upon input then pDstMVPredME points upon
+ * return to a buffer containing the three motion vector candidates
+ * used for prediction as specified in [ISO14496-2], subclause
+ * 7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL
+ * upon output.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - the pointer pDstMVPred is NULL
+ * - the parameter iBlk does not fall into the range 0 <= iBlk<=3
+ *
+ */
+
+OMXResult omxVCM4P2_FindMVpred(
+ const OMXVCMotionVector* pSrcMVCurMB,
+ const OMXVCMotionVector* pSrcCandMV1,
+ const OMXVCMotionVector* pSrcCandMV2,
+ const OMXVCMotionVector* pSrcCandMV3,
+ OMXVCMotionVector* pDstMVPred,
+ OMXVCMotionVector* pDstMVPredME,
+ OMX_INT iBlk
+ )
+{
+ OMXVCMotionVector CandMV;
+ const OMXVCMotionVector *pCandMV1;
+ const OMXVCMotionVector *pCandMV2;
+ const OMXVCMotionVector *pCandMV3;
+
+ /* Argument error checks */
+ armRetArgErrIf(iBlk!=0 && pSrcMVCurMB == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDstMVPred == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((iBlk < 0) || (iBlk > 3), OMX_Sts_BadArgErr);
+
+ CandMV.dx = CandMV.dy = 0;
+ /* Based on the position of the block extract the motion vectors and
+ the tranperancy status */
+
+
+ /* Set the default value for these to be used if pSrcCandMV[1|2|3] == NULL */
+ pCandMV1 = pCandMV2 = pCandMV3 = &CandMV;
+
+
+ switch (iBlk)
+ {
+ case 0:
+ {
+ if(pSrcCandMV1 != NULL)
+ {
+ pCandMV1 = &pSrcCandMV1[1];
+ }
+ if(pSrcCandMV2 != NULL)
+ {
+ pCandMV2 = &pSrcCandMV2[2];
+ }
+ if(pSrcCandMV3 != NULL)
+ {
+ pCandMV3 = &pSrcCandMV3[2];
+ }
+ if ((pSrcCandMV1 == NULL) && (pSrcCandMV2 == NULL))
+ {
+ pCandMV1 = pCandMV2 = pCandMV3;
+ }
+ else if((pSrcCandMV1 == NULL) && (pSrcCandMV3 == NULL))
+ {
+ pCandMV1 = pCandMV3 = pCandMV2;
+ }
+ else if((pSrcCandMV2 == NULL) && (pSrcCandMV3 == NULL))
+ {
+ pCandMV2 = pCandMV3 = pCandMV1;
+ }
+ break;
+ }
+ case 1:
+ {
+ pCandMV1 = &pSrcMVCurMB[0];
+ if(pSrcCandMV2 != NULL)
+ {
+ pCandMV2 = &pSrcCandMV2[3];
+ }
+ if(pSrcCandMV3 != NULL)
+ {
+ pCandMV3 = &pSrcCandMV3[2];
+ }
+ if((pSrcCandMV2 == NULL) && (pSrcCandMV3 == NULL))
+ {
+ pCandMV2 = pCandMV3 = pCandMV1;
+ }
+ break;
+ }
+ case 2:
+ {
+ if(pSrcCandMV1 != NULL)
+ {
+ pCandMV1 = &pSrcCandMV1[3];
+ }
+ pCandMV2 = &pSrcMVCurMB[0];
+ pCandMV3 = &pSrcMVCurMB[1];
+ break;
+ }
+ case 3:
+ {
+ pCandMV1 = &pSrcMVCurMB[2];
+ pCandMV2 = &pSrcMVCurMB[0];
+ pCandMV3 = &pSrcMVCurMB[1];
+ break;
+ }
+ }
+
+ /* Find the median of the 3 candidate MV's */
+ pDstMVPred->dx = armMedianOf3 (pCandMV1->dx, pCandMV2->dx, pCandMV3->dx);
+ pDstMVPred->dy = armMedianOf3 (pCandMV1->dy, pCandMV2->dy, pCandMV3->dy);
+
+ if (pDstMVPredME != NULL)
+ {
+ /* Store the candidate MV's into the pDstMVPredME, these can be used
+ in the fast algorithm if implemented */
+ pDstMVPredME[0].dx = pCandMV1->dx;
+ pDstMVPredME[0].dy = pCandMV1->dy;
+ pDstMVPredME[1].dx = pCandMV2->dx;
+ pDstMVPredME[1].dy = pCandMV2->dy;
+ pDstMVPredME[2].dx = pCandMV3->dx;
+ pDstMVPredME[2].dy = pCandMV3->dy;
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c
new file mode 100644
index 0000000..1886d92
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_IDCT8x8blk.c
@@ -0,0 +1,92 @@
+/**
+ *
+ * File Name: omxVCM4P2_IDCT8x8blk.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for 8x8 block IDCT
+ *
+ */
+
+
+#include <math.h>
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVCM4P2_DCT_Table.h"
+
+/**
+ * Function: omxVCM4P2_IDCT8x8blk (6.2.3.2.1)
+ *
+ * Description:
+ * Computes a 2D inverse DCT for a single 8x8 block, as defined in
+ * [ISO14496-2].
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the start of the linearly arranged IDCT input buffer;
+ * must be aligned on a 16-byte boundary. According to
+ * [ISO14496-2], the input coefficient values should lie within the
+ * range [-2048, 2047].
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the start of the linearly arranged IDCT output buffer;
+ * must be aligned on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrc or pDst is NULL.
+ * - pSrc or pDst is not 16-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_IDCT8x8blk (const OMX_S16 *pSrc, OMX_S16 *pDst)
+{
+ OMX_INT x, y, u, v;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+
+ for (x = 0; x < 8; x++)
+ {
+ for (y = 0; y < 8; y++)
+ {
+ OMX_F64 sum = 0.0;
+ for (u = 0; u < 8; u++)
+ {
+ for (v = 0; v < 8; v++)
+ {
+ sum += pSrc[(u * 8) + v] *
+ armVCM4P2_preCalcDCTCos[x][u] *
+ armVCM4P2_preCalcDCTCos[y][v];
+ }
+ }
+ pDst[(x * 8) + y] = (OMX_S16) floor(sum + 0.5);
+
+ /* Saturate to [-256, 255] */
+ pDst[(x * 8) + y] = armClip (
+ -256,
+ 255,
+ pDst[(x * 8) + y]);
+ }
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MCReconBlock.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MCReconBlock.c
new file mode 100644
index 0000000..7b3faee
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MCReconBlock.c
@@ -0,0 +1,357 @@
+/**
+ *
+ * File Name: omxVCM4P2_MCReconBlock.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * MPEG4 motion compensation prediction for an 8x8 block using
+ * interpolation
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_HalfPelVer
+ *
+ * Description:
+ * Performs half pel motion compensation for an 8x8 block using vertical
+ * interpolation described in ISO/IEC 14496-2, subclause 7.6.2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc pointer to the block in the reference plane.
+ * [in] srcStep distance between the start of consecutive lines
+ * in the reference plane, in bytes; must be a multiple
+ * of 8.
+ * [in] rndVal rounding control parameter: 0 - disabled; 1 - enabled.
+ * [out] pDst pointer to the linaer 8x8 destination buffer;
+ *
+ */
+static OMXVoid armVCM4P2_HalfPelVer(
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ OMX_U8 *pDst,
+ OMX_INT rndVal)
+{
+ const OMX_U8 *pTempSrc1;
+ const OMX_U8 *pTempSrc2;
+ OMX_INT y, x;
+
+ pTempSrc1 = pSrc;
+ pTempSrc2 = pSrc + srcStep;
+ srcStep -= 8;
+ for (y = 0; y < 8; y++)
+ {
+ for (x = 0; x < 8; x++)
+ {
+ *pDst++ = ((*pTempSrc1++ + *pTempSrc2++) + 1 - rndVal) >> 1;
+ }
+ pTempSrc1 += srcStep;
+ pTempSrc2 += srcStep;
+ }
+}
+
+/**
+ * Function: armVCM4P2_HalfPelHor
+ *
+ * Description:
+ * Performs half pel motion compensation for an 8x8 block using horizontal
+ * interpolation described in ISO/IEC 14496-2, subclause 7.6.2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc pointer to the block in the reference plane.
+ * [in] srcStep distance between the start of consecutive lines
+ * in the reference plane, in bytes; must be a multiple
+ * of 8.
+ * [in] rndVal rounding control parameter: 0 - disabled; 1 - enabled.
+ * [out] pDst pointer to the linaer 8x8 destination buffer;
+ *
+ */
+static OMXVoid armVCM4P2_HalfPelHor(
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ OMX_U8 *pDst,
+ OMX_INT rndVal)
+{
+ const OMX_U8 *pTempSrc1;
+ const OMX_U8 *pTempSrc2;
+ OMX_INT y, x;
+
+ pTempSrc1 = pSrc;
+ pTempSrc2 = pTempSrc1 + 1;
+
+ srcStep -= 8;
+ for (y=0; y<8; y++)
+ {
+ for (x=0; x<8; x++)
+ {
+ *pDst++ = ((*pTempSrc1++ + *pTempSrc2++) + 1 - rndVal) >> 1;
+ }
+ pTempSrc1 += srcStep;
+ pTempSrc2 += srcStep;
+ }
+}
+
+
+/**
+ * Function: armVCM4P2_HalfPelVerHor
+ *
+ * Description:
+ * Performs half pel motion compensation for an 8x8 block using both
+ * horizontal and vertical interpolation described in ISO/IEC 14496-2,
+ * subclause 7.6.2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc pointer to the block in the reference plane.
+ * [in] srcStep distance between the start of consecutive lines
+ * in the reference plane, in bytes; must be a multiple
+ * of 8.
+ * [in] rndVal rounding control parameter: 0 - disabled; 1 - enabled.
+ * [out] pDst pointer to the linaer 8x8 destination buffer;
+ *
+ */
+static OMXVoid armVCM4P2_HalfPelVerHor(
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ OMX_U8 *pDst,
+ OMX_INT rndVal)
+{
+ const OMX_U8 *pTempSrc1;
+ const OMX_U8 *pTempSrc2;
+ const OMX_U8 *pTempSrc3;
+ const OMX_U8 *pTempSrc4;
+ OMX_INT y, x;
+
+ pTempSrc1 = pSrc;
+ pTempSrc2 = pSrc + srcStep;
+ pTempSrc3 = pSrc + 1;
+ pTempSrc4 = pSrc + srcStep + 1;
+
+ srcStep -= 8;
+ for (y=0; y<8; y++)
+ {
+ for (x=0; x<8; x++)
+ {
+ *pDst++ = ((*pTempSrc1++ + *pTempSrc2++ + *pTempSrc3++ + *pTempSrc4++) +
+ 2 - rndVal) >> 2;
+ }
+ pTempSrc1 += srcStep;
+ pTempSrc2 += srcStep;
+ pTempSrc3 += srcStep;
+ pTempSrc4 += srcStep;
+ }
+}
+
+/**
+ * Function: armVCM4P2_MCReconBlock_NoRes
+ *
+ * Description:
+ * Do motion compensation and copy the result to the current block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc pointer to the block in the reference plane.
+ * [in] srcStep distance between the start of consecutive lines
+ * in the reference plane, in bytes; must be a multiple
+ * of 8.
+ * [in] dstStep distance between the start of consecutive lines in the
+ * destination plane, in bytes; must be a multiple of 8.
+ * [in] predictType bilinear interpolation type, as defined in section 6.2.1.2.
+ * [in] rndVal rounding control parameter: 0 - disabled; 1 - enabled.
+ * [out] pDst pointer to the destination buffer; must be 8-byte aligned.
+ * If prediction residuals are added then output intensities
+ * are clipped to the range [0,255].
+ *
+ */
+static OMXVoid armVCM4P2_MCReconBlock_NoRes(
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ OMX_U8 *pDst,
+ OMX_INT dstStep)
+{
+ OMX_U8 x,y,count,index;
+
+ /* Copying the ref 8x8 blk to the curr blk */
+ for (y = 0, count = 0, index = 0; y < 8; y++,index += (srcStep -8), count += (dstStep - 8))
+ {
+ for (x = 0; x < 8; x++, count++,index++)
+ {
+ pDst[count] = pSrc[index];
+ }
+ }
+}
+
+/**
+ * Function: armVCM4P2_MCReconBlock_Res
+ *
+ * Description:
+ * Reconstructs INTER block by summing the motion compensation results
+ * and the results of the inverse transformation (prediction residuals).
+ * Output intensities are clipped to the range [0,255].
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc pointer to the block in the reference plane.
+ * [in] pSrcResidue pointer to a buffer containing the 16-bit prediction
+ * residuals. If the pointer is NULL,then no prediction
+ * is done, only motion compensation, i.e., the block is
+ * moved with interpolation.
+ * [in] dstStep distance between the start of consecutive lines in the
+ * destination plane, in bytes; must be a multiple of 8.
+ * [out] pDst pointer to the destination buffer; must be 8-byte aligned.
+ * If prediction residuals are added then output intensities
+ * are clipped to the range [0,255].
+ *
+ */
+static OMXVoid armVCM4P2_MCReconBlock_Res(
+ const OMX_U8 *pSrc,
+ const OMX_S16 *pSrcResidue,
+ OMX_U8 *pDst,
+ OMX_INT dstStep)
+{
+
+ OMX_U8 x,y;
+ OMX_INT temp;
+
+ for(y = 0; y < 8; y++)
+ {
+ for(x = 0; x < 8; x++)
+ {
+ temp = pSrc[x] + pSrcResidue[x];
+ pDst[x] = armClip(0,255,temp);
+ }
+ pDst += dstStep;
+ pSrc += 8;
+ pSrcResidue += 8;
+ }
+}
+
+/**
+ * Function: omxVCM4P2_MCReconBlock (6.2.5.5.1)
+ *
+ * Description:
+ * Performs motion compensation prediction for an 8x8 block using
+ * interpolation described in [ISO14496-2], subclause 7.6.2.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the block in the reference plane.
+ * srcStep - distance between the start of consecutive lines in the
+ * reference plane, in bytes; must be a multiple of 8.
+ * dstStep - distance between the start of consecutive lines in the
+ * destination plane, in bytes; must be a multiple of 8.
+ * pSrcResidue - pointer to a buffer containing the 16-bit prediction
+ * residuals; must be 16-byte aligned. If the pointer is NULL, then
+ * no prediction is done, only motion compensation, i.e., the block
+ * is moved with interpolation.
+ * predictType - bilinear interpolation type, as defined in section
+ * 6.2.1.2.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer; must be 8-byte aligned. If
+ * prediction residuals are added then output intensities are
+ * clipped to the range [0,255].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - pDst is not 8-byte aligned.
+ * - pSrcResidue is not 16-byte aligned.
+ * - one or more of the following pointers is NULL: pSrc or pDst.
+ * - either srcStep or dstStep is not a multiple of 8.
+ * - invalid type specified for the parameter predictType.
+ * - the parameter rndVal is not equal either to 0 or 1.
+ *
+ */
+OMXResult omxVCM4P2_MCReconBlock(
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_S16 *pSrcResidue,
+ OMX_U8 *pDst,
+ OMX_INT dstStep,
+ OMX_INT predictType,
+ OMX_INT rndVal)
+{
+ /* Definitions and Initializations*/
+ OMX_U8 pTempDst[64];
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pSrcResidue), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((dstStep % 8) || (srcStep % 8)), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((predictType != OMX_VC_INTEGER_PIXEL) &&
+ (predictType != OMX_VC_HALF_PIXEL_X) &&
+ (predictType != OMX_VC_HALF_PIXEL_Y) &&
+ (predictType != OMX_VC_HALF_PIXEL_XY)
+ ),OMX_Sts_BadArgErr);
+ armRetArgErrIf(((rndVal != 0) && (rndVal != 1)),OMX_Sts_BadArgErr);
+
+ switch(predictType)
+ {
+ case OMX_VC_INTEGER_PIXEL:
+ armVCM4P2_MCReconBlock_NoRes(pSrc,
+ srcStep,
+ &(pTempDst[0]),
+ 8);
+ break;
+ case OMX_VC_HALF_PIXEL_X:
+ armVCM4P2_HalfPelHor(pSrc,
+ srcStep,
+ &(pTempDst[0]),
+ rndVal);
+ break;
+ case OMX_VC_HALF_PIXEL_Y:
+ armVCM4P2_HalfPelVer(pSrc,
+ srcStep,
+ &(pTempDst[0]),
+ rndVal);
+ break;
+ case OMX_VC_HALF_PIXEL_XY:
+ armVCM4P2_HalfPelVerHor(pSrc,
+ srcStep,
+ &(pTempDst[0]),
+ rndVal);
+ break;
+ }
+
+ if(pSrcResidue == NULL)
+ {
+ armVCM4P2_MCReconBlock_NoRes(&(pTempDst[0]),
+ 8,
+ pDst,
+ dstStep);
+ }
+ else
+ {
+ armVCM4P2_MCReconBlock_Res(&(pTempDst[0]),
+ pSrcResidue,
+ pDst,
+ dstStep);
+ }
+
+ return OMX_Sts_NoErr;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c
new file mode 100644
index 0000000..a8e51da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEGetBufSize.c
@@ -0,0 +1,70 @@
+/**
+ *
+ * File Name: omxVCM4P2_MEGetBufSize.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Initialization modules for the vendor specific Motion Estimation structure.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P2_MEGetBufSize (6.2.4.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification
+ * structure for the following motion estimation functions:
+ * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P2MEMode
+ * pMEParams - motion estimation parameters
+ *
+ * Output Arguments:
+ *
+ * pSize - pointer to the number of bytes required for the specification
+ * structure
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - one or more of the following is true:
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for the
+ * parameter pMEParams->searchRange
+ *
+ */
+
+OMXResult omxVCM4P2_MEGetBufSize(
+ OMXVCM4P2MEMode MEMode,
+ const OMXVCM4P2MEParams *pMEParams,
+ OMX_U32 *pSize
+ )
+{
+ armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!pSize, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pMEParams->searchRange <= 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf((MEMode != OMX_VC_M4P10_FAST_SEARCH) &&
+ (MEMode != OMX_VC_M4P10_FULL_SEARCH), OMX_Sts_BadArgErr);
+
+ *pSize = (OMX_INT) sizeof(ARMVCM4P2_MESpec);
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEInit.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEInit.c
new file mode 100644
index 0000000..419e71a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MEInit.c
@@ -0,0 +1,84 @@
+/**
+ *
+ * File Name: omxVCM4P2_MEInit.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Initialization modules for the vendor specific Motion Estimation structure.
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P2_MEInit (6.2.4.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the
+ * following motion estimation functions: BlockMatch_Integer_8x8,
+ * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the
+ * specification structure *pMESpec must be allocated prior to calling the
+ * function, and should be aligned on a 4-byte boundary. Following
+ * initialization by this function, the vendor-specific structure *pMESpec
+ * should contain an implementation-specific representation of all motion
+ * estimation parameters received via the structure pMEParams, for example
+ * rndVal, searchRange, etc. The number of bytes required for the
+ * specification structure can be determined using the function
+ * omxVCM4P2_MEGetBufSize.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P2MEMode
+ * pMEParams - motion estimation parameters
+ * pMESpec - pointer to the uninitialized ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pMESpec - pointer to the initialized ME specification structure
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - one or more of the following is true:
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for the
+ * parameter pMEParams->searchRange
+ *
+ */
+
+OMXResult omxVCM4P2_MEInit(
+ OMXVCM4P2MEMode MEMode,
+ const OMXVCM4P2MEParams *pMEParams,
+ void *pMESpec
+ )
+{
+ ARMVCM4P2_MESpec *armMESpec = (ARMVCM4P2_MESpec *) pMESpec;
+
+ armRetArgErrIf(!pMEParams, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!pMESpec, OMX_Sts_BadArgErr);
+ armRetArgErrIf((MEMode != OMX_VC_M4P2_FAST_SEARCH) &&
+ (MEMode != OMX_VC_M4P2_FULL_SEARCH), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pMEParams->searchRange <= 0, OMX_Sts_BadArgErr);
+
+ armMESpec->MEParams.searchEnable8x8 = pMEParams->searchEnable8x8;
+ armMESpec->MEParams.halfPelSearchEnable = pMEParams->halfPelSearchEnable;
+ armMESpec->MEParams.searchRange = pMEParams->searchRange;
+ armMESpec->MEParams.rndVal = pMEParams->rndVal;
+ armMESpec->MEMode = MEMode;
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c
new file mode 100644
index 0000000..9549050
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_MotionEstimationMB.c
@@ -0,0 +1,630 @@
+/**
+ *
+ * File Name: omxVCM4P2_MotionEstimationMB.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains module for motion search 16x16 macroblock
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+/**
+ * Function: armVCM4P2_BlockMatch_16x16
+ *
+ * Description:
+ * 16x16 block match wrapper function, calls omxVCM4P2_BlockMatch_Integer_16x16.
+ * If half pel search is enabled it also calls omxVCM4P2_BlockMatch_Half_16x16
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that
+ * corresponds to the location of the current macroblock in the current
+ * plane.
+ * [in] srcRefStep width of the reference plane
+ * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin.
+ * It's not limited to the image boundary, but depended on the padding. For example,
+ * if you pad 4 pixels outside the image border, then the value for left border
+ * can be -4
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array,
+ * 256 entries); must be aligned on an 16-byte boundary.
+ * [in] pCurrPointPos position of the current macroblock in the current plane
+ * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV); may be set to NULL if unavailable.
+ * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated
+ * and then initialized using omxVCM4P2_MEInit prior to calling the block matching
+ * function.
+ * [out] pDstMV pointer to estimated MV
+ * [out] pDstSAD pointer to minimum SAD
+ * *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *
+ */
+static OMXResult armVCM4P2_BlockMatch_16x16(
+ const OMX_U8 *pSrcRefBuf,
+ const OMX_INT srcRefStep,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMXVCMotionVector *pSrcPreMV,
+ OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD
+)
+{
+ OMXVCM4P2MEParams *pMEParams = (OMXVCM4P2MEParams *)pMESpec;
+ OMX_INT rndVal;
+
+ rndVal = pMEParams->rndVal;
+
+ omxVCM4P2_BlockMatch_Integer_16x16(
+ pSrcRefBuf,
+ srcRefStep,
+ pRefRect,
+ pSrcCurrBuf,
+ pCurrPointPos,
+ pSrcPreMV,
+ pSrcPreSAD,
+ pMEParams,
+ pDstMV,
+ pDstSAD);
+
+ if (pMEParams->halfPelSearchEnable)
+ {
+ omxVCM4P2_BlockMatch_Half_16x16(
+ pSrcRefBuf,
+ srcRefStep,
+ pRefRect,
+ pSrcCurrBuf,
+ pCurrPointPos,
+ rndVal,
+ pDstMV,
+ pDstSAD);
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armVCM4P2_BlockMatch_8x8
+ *
+ * Description:
+ * 8x8 block match wrapper function, calls omxVCM4P2_BlockMatch_Integer_8x8.
+ * If half pel search is enabled it also calls omxVCM4P2_BlockMatch_Half_8x8
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that
+ * corresponds to the location of the current macroblock in the current
+ * plane.
+ * [in] srcRefStep width of the reference plane
+ * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin.
+ * It's not limited to the image boundary, but depended on the padding. For example,
+ * if you pad 4 pixels outside the image border, then the value for left border
+ * can be -4
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array,
+ * 256 entries); must be aligned on an 16-byte boundary.
+ * [in] pCurrPointPos position of the current macroblock in the current plane
+ * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV); may be set to NULL if unavailable.
+ * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated
+ * and then initialized using omxVCM4P2_MEInit prior to calling the block matching
+ * function.
+ * [out] pDstMV pointer to estimated MV
+ * [out] pDstSAD pointer to minimum SAD
+ * *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *
+ */
+static OMXResult armVCM4P2_BlockMatch_8x8(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT srcRefStep,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMXVCMotionVector *pSrcPreMV,
+ OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+)
+{
+ OMXVCM4P2MEParams *pMEParams = (OMXVCM4P2MEParams *)pMESpec;
+ OMX_INT rndVal;
+
+ rndVal = pMEParams->rndVal;
+
+ omxVCM4P2_BlockMatch_Integer_8x8(
+ pSrcRefBuf,
+ srcRefStep,
+ pRefRect,
+ pSrcCurrBuf,
+ pCurrPointPos,
+ pSrcPreMV,
+ pSrcPreSAD,
+ pMEParams,
+ pSrcDstMV,
+ pDstSAD);
+
+ if (pMEParams->halfPelSearchEnable)
+ {
+ omxVCM4P2_BlockMatch_Half_8x8(
+ pSrcRefBuf,
+ srcRefStep,
+ pRefRect,
+ pSrcCurrBuf,
+ pCurrPointPos,
+ rndVal,
+ pSrcDstMV,
+ pDstSAD);
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+
+/**
+ * Function: omxVCM4P2_MotionEstimationMB (6.2.4.3.1)
+ *
+ * Description:
+ * Performs motion search for a 16x16 macroblock. Selects best motion search
+ * strategy from among inter-1MV, inter-4MV, and intra modes. Supports
+ * integer and half pixel resolution.
+ *
+ * Input Arguments:
+ *
+ * pSrcCurrBuf - pointer to the top-left corner of the current MB in the
+ * original picture plane; must be aligned on a 16-byte boundary.
+ * The function does not expect source data outside the region
+ * bounded by the MB to be available; for example it is not
+ * necessary for the caller to guarantee the availability of
+ * pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB
+ * to be processed.
+ * srcCurrStep - width of the original picture plane, in terms of full
+ * pixels; must be a multiple of 16.
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * plane location corresponding to the location of the current
+ * macroblock in the current plane; must be aligned on a 16-byte
+ * boundary.
+ * srcRefStep - width of the reference picture plane, in terms of full
+ * pixels; must be a multiple of 16.
+ * pRefRect - reference plane valid region rectangle, specified relative to
+ * the image origin
+ * pCurrPointPos - position of the current macroblock in the current plane
+ * pMESpec - pointer to the vendor-specific motion estimation specification
+ * structure; must be allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling this function.
+ * pMBInfo - array, of dimension four, containing pointers to information
+ * associated with four nearby MBs:
+ * - pMBInfo[0] - pointer to left MB information
+ * - pMBInfo[1] - pointer to top MB information
+ * - pMBInfo[2] - pointer to top-left MB information
+ * - pMBInfo[3] - pointer to top-right MB information
+ * Any pointer in the array may be set equal to NULL if the
+ * corresponding MB doesn't exist. For each MB, the following structure
+ * members are used:
+ * - mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or
+ * OMX_VC_INTER4V
+ * - pMV0[2][2] - estimated motion vectors; represented
+ * in 1/2 pixel units
+ * - sliceID - number of the slice to which the MB belongs
+ * pSrcDstMBCurr - pointer to information structure for the current MB.
+ * The following entries should be set prior to calling the
+ * function: sliceID - the number of the slice the to which the
+ * current MB belongs. The structure elements cbpy and cbpc are
+ * ignored.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMBCurr - pointer to updated information structure for the current
+ * MB after MB-level motion estimation has been completed. The
+ * following structure members are updated by the ME function:
+ * - mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or
+ * OMX_VC_INTER4V.
+ * - pMV0[2][2] - estimated motion vectors; represented in
+ * terms of 1/2 pel units.
+ * - pMVPred[2][2] - predicted motion vectors; represented
+ * in terms of 1/2 pel units.
+ * The structure members cbpy and cbpc are not updated by the function.
+ * pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs
+ * for INTER4V
+ * pDstBlockSAD - pointer to an array of SAD values for each of the four
+ * 8x8 luma blocks in the MB. The block SADs are in scan order for
+ * each MB.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL: pSrcCurrBuf,
+ * pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra,
+ * pSrcDstMBCurr, or pDstSAD.
+ *
+ */
+
+OMXResult omxVCM4P2_MotionEstimationMB (
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 srcCurrStep,
+ const OMX_U8 *pSrcRefBuf,
+ OMX_S32 srcRefStep,
+ const OMXRect*pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ void *pMESpec,
+ const OMXVCM4P2MBInfoPtr *pMBInfo,
+ OMXVCM4P2MBInfo *pSrcDstMBCurr,
+ OMX_U16 *pDstSAD,
+ OMX_U16 *pDstBlockSAD
+)
+{
+
+ OMX_INT intraSAD, average, count, index, x, y;
+ OMXVCMotionVector dstMV16x16;
+ OMX_INT dstSAD16x16;
+ OMX_INT dstSAD8x8;
+ OMXVCM4P2MEParams *pMEParams;
+ OMXVCM4P2Coordinate TempCurrPointPos;
+ OMXVCM4P2Coordinate *pTempCurrPointPos;
+ OMX_U8 aTempSrcCurrBuf[271];
+ OMX_U8 *pTempSrcCurrBuf;
+ OMX_U8 *pDst;
+ OMX_U8 aDst[71];
+ OMX_S32 dstStep = 8;
+ OMX_INT predictType;
+ OMX_S32 Sad;
+ const OMX_U8 *pTempSrcRefBuf;
+ OMXVCMotionVector* pSrcCandMV1[4];
+ OMXVCMotionVector* pSrcCandMV2[4];
+ OMXVCMotionVector* pSrcCandMV3[4];
+
+ /* Argument error checks */
+ armRetArgErrIf(!armIs16ByteAligned(pSrcCurrBuf), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pSrcRefBuf), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((srcCurrStep % 16) || (srcRefStep % 16)), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcCurrBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcRefBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pRefRect == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pCurrPointPos == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSrcDstMBCurr == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDstSAD == NULL, OMX_Sts_BadArgErr);
+
+
+ pTempCurrPointPos = &(TempCurrPointPos);
+ pTempSrcCurrBuf = armAlignTo16Bytes(aTempSrcCurrBuf);
+ pMEParams = (OMXVCM4P2MEParams *)pMESpec;
+ pTempCurrPointPos->x = pCurrPointPos->x;
+ pTempCurrPointPos->y = pCurrPointPos->y;
+ pSrcDstMBCurr->mbType = OMX_VC_INTER;
+
+ /* Preparing a linear buffer for block match */
+ for (y = 0, index = count = 0; y < 16; y++, index += srcCurrStep - 16)
+ {
+ for(x = 0; x < 16; x++, count++, index++)
+ {
+ pTempSrcCurrBuf[count] = pSrcCurrBuf[index];
+ }
+ }
+ for(y = 0, index = 0; y < 2; y++)
+ {
+ for(x = 0; x < 2; x++,index++)
+ {
+ if((pMBInfo[0] != NULL) && (pMBInfo[0]->mbType != OMX_VC_INTRA))
+ {
+ pSrcCandMV1[index] = &(pMBInfo[0]->pMV0[y][x]);
+ }
+ else
+ {
+ pSrcCandMV1[index] = NULL;
+ }
+ if((pMBInfo[1] != NULL) && (pMBInfo[1]->mbType != OMX_VC_INTRA))
+ {
+ pSrcCandMV2[index] = &(pMBInfo[1]->pMV0[y][x]);
+ }
+ else
+ {
+ pSrcCandMV2[index] = NULL;
+ }
+ if((pMBInfo[3] != NULL) && (pMBInfo[3]->mbType != OMX_VC_INTRA))
+ {
+ pSrcCandMV3[index] = &(pMBInfo[3]->pMV0[y][x]);
+ }
+ else
+ {
+ pSrcCandMV3[index] = NULL;
+ }
+ }
+ }
+ /* Calculating SAD at MV(0,0) */
+ armVCCOMM_SAD(pTempSrcCurrBuf,
+ 16,
+ pSrcRefBuf,
+ srcRefStep,
+ &Sad,
+ 16,
+ 16);
+ *pDstSAD = Sad;
+
+ /* Mode decision for NOT_CODED MB */
+ if(*pDstSAD == 0)
+ {
+ pSrcDstMBCurr->pMV0[0][0].dx = 0;
+ pSrcDstMBCurr->pMV0[0][0].dy = 0;
+ *pDstSAD = 0;
+ return OMX_Sts_NoErr;
+ }
+
+ omxVCM4P2_FindMVpred(
+ &(pSrcDstMBCurr->pMV0[0][0]),
+ pSrcCandMV1[0],
+ pSrcCandMV2[0],
+ pSrcCandMV3[0],
+ &(pSrcDstMBCurr->pMVPred[0][0]),
+ NULL,
+ 0);
+
+ /* Inter 1 MV */
+ armVCM4P2_BlockMatch_16x16(
+ pSrcRefBuf,
+ srcRefStep,
+ pRefRect,
+ pTempSrcCurrBuf,
+ pCurrPointPos,
+ &(pSrcDstMBCurr->pMVPred[0][0]),
+ NULL,
+ pMEParams,
+ &dstMV16x16,
+ &dstSAD16x16);
+
+ /* Initialize all with 1 MV values */
+ pSrcDstMBCurr->pMV0[0][0].dx = dstMV16x16.dx;
+ pSrcDstMBCurr->pMV0[0][0].dy = dstMV16x16.dy;
+ pSrcDstMBCurr->pMV0[0][1].dx = dstMV16x16.dx;
+ pSrcDstMBCurr->pMV0[0][1].dy = dstMV16x16.dy;
+ pSrcDstMBCurr->pMV0[1][0].dx = dstMV16x16.dx;
+ pSrcDstMBCurr->pMV0[1][0].dy = dstMV16x16.dy;
+ pSrcDstMBCurr->pMV0[1][1].dx = dstMV16x16.dx;
+ pSrcDstMBCurr->pMV0[1][1].dy = dstMV16x16.dy;
+
+ *pDstSAD = dstSAD16x16;
+
+ if (pMEParams->searchEnable8x8)
+ {
+ /* Inter 4MV */
+ armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
+ srcRefStep, pRefRect,
+ pTempSrcCurrBuf, pTempCurrPointPos,
+ &(pSrcDstMBCurr->pMVPred[0][0]), NULL,
+ pMEParams, &(pSrcDstMBCurr->pMV0[0][0]),
+ &dstSAD8x8
+ );
+ pDstBlockSAD[0] = dstSAD8x8;
+ *pDstSAD = dstSAD8x8;
+ pTempCurrPointPos->x += 8;
+ pSrcRefBuf += 8;
+ omxVCM4P2_FindMVpred(
+ &(pSrcDstMBCurr->pMV0[0][1]),
+ pSrcCandMV1[1],
+ pSrcCandMV2[1],
+ pSrcCandMV3[1],
+ &(pSrcDstMBCurr->pMVPred[0][1]),
+ NULL,
+ 1);
+
+ armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
+ srcRefStep, pRefRect,
+ pTempSrcCurrBuf, pTempCurrPointPos,
+ &(pSrcDstMBCurr->pMVPred[0][1]), NULL,
+ pMEParams, &(pSrcDstMBCurr->pMV0[0][1]),
+ &dstSAD8x8
+ );
+ pDstBlockSAD[1] = dstSAD8x8;
+ *pDstSAD += dstSAD8x8;
+ pTempCurrPointPos->x -= 8;
+ pTempCurrPointPos->y += 8;
+ pSrcRefBuf += (srcRefStep * 8) - 8;
+
+ omxVCM4P2_FindMVpred(
+ &(pSrcDstMBCurr->pMV0[1][0]),
+ pSrcCandMV1[2],
+ pSrcCandMV2[2],
+ pSrcCandMV3[2],
+ &(pSrcDstMBCurr->pMVPred[1][0]),
+ NULL,
+ 2);
+ armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
+ srcRefStep, pRefRect,
+ pTempSrcCurrBuf, pTempCurrPointPos,
+ &(pSrcDstMBCurr->pMVPred[1][0]), NULL,
+ pMEParams, &(pSrcDstMBCurr->pMV0[1][0]),
+ &dstSAD8x8
+ );
+ pDstBlockSAD[2] = dstSAD8x8;
+ *pDstSAD += dstSAD8x8;
+ pTempCurrPointPos->x += 8;
+ pSrcRefBuf += 8;
+ omxVCM4P2_FindMVpred(
+ &(pSrcDstMBCurr->pMV0[1][1]),
+ pSrcCandMV1[3],
+ pSrcCandMV2[3],
+ pSrcCandMV3[3],
+ &(pSrcDstMBCurr->pMVPred[1][1]),
+ NULL,
+ 3);
+ armVCM4P2_BlockMatch_8x8 (pSrcRefBuf,
+ srcRefStep, pRefRect,
+ pTempSrcCurrBuf, pTempCurrPointPos,
+ &(pSrcDstMBCurr->pMVPred[1][1]), NULL,
+ pMEParams, &(pSrcDstMBCurr->pMV0[1][1]),
+ &dstSAD8x8
+ );
+ pDstBlockSAD[3] = dstSAD8x8;
+ *pDstSAD += dstSAD8x8;
+
+
+ /* Checking if 4MV is equal to 1MV */
+ if (
+ (pSrcDstMBCurr->pMV0[0][0].dx != dstMV16x16.dx) ||
+ (pSrcDstMBCurr->pMV0[0][0].dy != dstMV16x16.dy) ||
+ (pSrcDstMBCurr->pMV0[0][1].dx != dstMV16x16.dx) ||
+ (pSrcDstMBCurr->pMV0[0][1].dy != dstMV16x16.dy) ||
+ (pSrcDstMBCurr->pMV0[1][0].dx != dstMV16x16.dx) ||
+ (pSrcDstMBCurr->pMV0[1][0].dy != dstMV16x16.dy) ||
+ (pSrcDstMBCurr->pMV0[1][1].dx != dstMV16x16.dx) ||
+ (pSrcDstMBCurr->pMV0[1][1].dy != dstMV16x16.dy)
+ )
+ {
+ /* select the 4 MV */
+ pSrcDstMBCurr->mbType = OMX_VC_INTER4V;
+ }
+ }
+
+ /* finding the error in intra mode */
+ for (count = 0, average = 0; count < 256 ; count++)
+ {
+ average = average + pTempSrcCurrBuf[count];
+ }
+ average = average/256;
+
+ intraSAD = 0;
+
+ /* Intra SAD calculation */
+ for (count = 0; count < 256 ; count++)
+ {
+ intraSAD += armAbs ((pTempSrcCurrBuf[count]) - (average));
+ }
+
+ /* Using the MPEG4 VM formula for intra/inter mode decision
+ Var < (SAD - 2*NB) where NB = N^2 is the number of pixels
+ of the macroblock.*/
+
+ if (intraSAD <= (*pDstSAD - 512))
+ {
+ pSrcDstMBCurr->mbType = OMX_VC_INTRA;
+ pSrcDstMBCurr->pMV0[0][0].dx = 0;
+ pSrcDstMBCurr->pMV0[0][0].dy = 0;
+ *pDstSAD = intraSAD;
+ pDstBlockSAD[0] = 0xFFFF;
+ pDstBlockSAD[1] = 0xFFFF;
+ pDstBlockSAD[2] = 0xFFFF;
+ pDstBlockSAD[3] = 0xFFFF;
+ }
+
+ if(pSrcDstMBCurr->mbType == OMX_VC_INTER)
+ {
+ pTempSrcRefBuf = pSrcRefBuf + (srcRefStep * dstMV16x16.dy) + dstMV16x16.dx;
+
+ if((dstMV16x16.dx & 0x1) && (dstMV16x16.dy & 0x1))
+ {
+ predictType = OMX_VC_HALF_PIXEL_XY;
+ }
+ else if(dstMV16x16.dx & 0x1)
+ {
+ predictType = OMX_VC_HALF_PIXEL_X;
+ }
+ else if(dstMV16x16.dy & 0x1)
+ {
+ predictType = OMX_VC_HALF_PIXEL_Y;
+ }
+ else
+ {
+ predictType = OMX_VC_INTEGER_PIXEL;
+ }
+
+ pDst = armAlignTo8Bytes(&(aDst[0]));
+ /* Calculating Block SAD at MV(dstMV16x16.dx,dstMV16x16.dy) */
+ /* Block 0 */
+ omxVCM4P2_MCReconBlock(pTempSrcRefBuf,
+ srcRefStep,
+ NULL,
+ pDst,
+ dstStep,
+ predictType,
+ pMEParams->rndVal);
+
+ armVCCOMM_SAD(pTempSrcCurrBuf,
+ 16,
+ pDst,
+ dstStep,
+ &Sad,
+ 8,
+ 8);
+ pDstBlockSAD[0] = Sad;
+
+ /* Block 1 */
+ omxVCM4P2_MCReconBlock(pTempSrcRefBuf + 8,
+ srcRefStep,
+ NULL,
+ pDst,
+ dstStep,
+ predictType,
+ pMEParams->rndVal);
+
+ armVCCOMM_SAD(pTempSrcCurrBuf + 8,
+ 16,
+ pDst,
+ dstStep,
+ &Sad,
+ 8,
+ 8);
+ pDstBlockSAD[1] = Sad;
+
+ /* Block 2 */
+ omxVCM4P2_MCReconBlock(pTempSrcRefBuf + (srcRefStep*8),
+ srcRefStep,
+ NULL,
+ pDst,
+ dstStep,
+ predictType,
+ pMEParams->rndVal);
+
+ armVCCOMM_SAD(pTempSrcCurrBuf + (16*8),
+ 16,
+ pDst,
+ dstStep,
+ &Sad,
+ 8,
+ 8);
+ pDstBlockSAD[2] = Sad;
+
+ /* Block 3 */
+ omxVCM4P2_MCReconBlock(pTempSrcRefBuf + (srcRefStep*8) + 8,
+ srcRefStep,
+ NULL,
+ pDst,
+ dstStep,
+ predictType,
+ pMEParams->rndVal);
+
+ armVCCOMM_SAD(pTempSrcCurrBuf + (16*8) + 8,
+ 16,
+ pDst,
+ dstStep,
+ &Sad,
+ 8,
+ 8);
+ pDstBlockSAD[3] = Sad;
+ }
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c
new file mode 100644
index 0000000..1613f47
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra.c
@@ -0,0 +1,121 @@
+ /**
+ *
+ * File Name: omxVCM4P2_PredictReconCoefIntra.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: omxVCM4P2_PredictReconCoefIntra_S16.c
+ * Description: Contains modules for AC DC prediction
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P2_PredictReconCoefIntra (6.2.5.4.3)
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected as
+ * specified in [ISO14496-2], subclause 7.4.3.1.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the coefficient buffer which contains the quantized
+ * coefficient residuals (PQF) of the current block; must be
+ * aligned on a 4-byte boundary. The output coefficients are
+ * saturated to the range [-2048, 2047].
+ * pPredBufRow - pointer to the coefficient row buffer; must be aligned on
+ * a 4-byte boundary.
+ * pPredBufCol - pointer to the coefficient column buffer; must be aligned
+ * on a 4-byte boundary.
+ * curQP - quantization parameter of the current block. curQP may equal to
+ * predQP especially when the current block and the predictor block
+ * are in the same macroblock.
+ * predQP - quantization parameter of the predictor block
+ * predDir - indicates the prediction direction which takes one of the
+ * following values: OMX_VC_HORIZONTAL - predict horizontally
+ * OMX_VC_VERTICAL - predict vertically
+ * ACPredFlag - a flag indicating if AC prediction should be performed. It
+ * is equal to ac_pred_flag in the bit stream syntax of MPEG-4
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the coefficient buffer which contains the quantized
+ * coefficients (QF) of the current block
+ * pPredBufRow - pointer to the updated coefficient row buffer
+ * pPredBufCol - pointer to the updated coefficient column buffer Note:
+ * Buffer update: Update the AC prediction buffer (both row and
+ * column buffer).
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the pointers is NULL:
+ * pSrcDst, pPredBufRow, or pPredBufCol.
+ * - curQP <= 0,
+ * - predQP <= 0,
+ * - curQP >31,
+ * - predQP > 31,
+ * - preDir exceeds [1,2]
+ * - pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned.
+ *
+ */
+
+OMXResult omxVCM4P2_PredictReconCoefIntra(
+ OMX_S16 * pSrcDst,
+ OMX_S16 * pPredBufRow,
+ OMX_S16 * pPredBufCol,
+ OMX_INT curQP,
+ OMX_INT predQP,
+ OMX_INT predDir,
+ OMX_INT ACPredFlag,
+ OMXVCM4P2VideoComponent videoComp
+ )
+{
+ OMX_U8 flag;
+ /* Argument error checks */
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(curQP <= 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(predQP <= 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(curQP > 31, OMX_Sts_BadArgErr);
+ armRetArgErrIf(predQP > 31, OMX_Sts_BadArgErr);
+ armRetArgErrIf((predDir != 1) && (predDir != 2), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs4ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs4ByteAligned(pPredBufRow), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs4ByteAligned(pPredBufCol), OMX_Sts_BadArgErr);
+
+ flag = 0;
+ return armVCM4P2_ACDCPredict(
+ pSrcDst,
+ NULL,
+ pPredBufRow,
+ pPredBufCol,
+ curQP,
+ predQP,
+ predDir,
+ ACPredFlag,
+ videoComp,
+ flag,
+ NULL);
+
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInter_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInter_I.c
new file mode 100644
index 0000000..5964f73
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInter_I.c
@@ -0,0 +1,117 @@
+/**
+ *
+ * File Name: omxVCM4P2_QuantInter_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for inter Quantization
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P2_QuantInter_I (6.2.4.4.3)
+ *
+ * Description:
+ * Performs quantization on an inter coefficient block; supports
+ * bits_per_pixel == 8.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input inter block coefficients; must be aligned
+ * on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * shortVideoHeader - binary flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode, and
+ * shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (quantized) interblock coefficients.
+ * When shortVideoHeader==1, AC coefficients are saturated on the
+ * interval [-127, 127], and DC coefficients are saturated on the
+ * interval [1, 254]. When shortVideoHeader==0, AC coefficients
+ * are saturated on the interval [-2047, 2047].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrcDst is NULL.
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+
+OMXResult omxVCM4P2_QuantInter_I(
+ OMX_S16 * pSrcDst,
+ OMX_U8 QP,
+ OMX_INT shortVideoHeader
+)
+{
+
+ /* Definitions and Initializations*/
+ OMX_INT coeffCount;
+ OMX_INT fSign;
+ OMX_INT maxClpAC = 0, minClpAC = 0;
+ OMX_INT maxClpDC = 0, minClpDC = 0;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+ /* One argument check is delayed until we have ascertained that */
+ /* pQMatrix is not NULL. */
+
+ /* Set the Clip Range based on SVH on/off */
+ if(shortVideoHeader == 1)
+ {
+ maxClpDC = 254;
+ minClpDC = 1;
+ maxClpAC = 127;
+ minClpAC = -127;
+ }
+ else
+ {
+ maxClpDC = 2047;
+ minClpDC = -2047;
+ maxClpAC = 2047;
+ minClpAC = -2047;
+ }
+
+ /* Second Inverse quantisation method */
+ for (coeffCount = 0; coeffCount < 64; coeffCount++)
+ {
+ fSign = armSignCheck (pSrcDst[coeffCount]);
+ pSrcDst[coeffCount] = (armAbs(pSrcDst[coeffCount])
+ - (QP/2))/(2 * QP);
+ pSrcDst[coeffCount] *= fSign;
+
+ /* Clip */
+ if (coeffCount == 0)
+ {
+ pSrcDst[coeffCount] =
+ (OMX_S16) armClip (minClpDC, maxClpDC, pSrcDst[coeffCount]);
+ }
+ else
+ {
+ pSrcDst[coeffCount] =
+ (OMX_S16) armClip (minClpAC, maxClpAC, pSrcDst[coeffCount]);
+ }
+ }
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c
new file mode 100644
index 0000000..a10da68
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantIntra_I.c
@@ -0,0 +1,153 @@
+/**
+ *
+ * File Name: omxVCM4P2_QuantIntra_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for intra Quantization
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+/**
+ * Function: omxVCM4P2_QuantIntra_I (6.2.4.4.2)
+ *
+ * Description:
+ * Performs quantization on intra block coefficients. This function supports
+ * bits_per_pixel == 8.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input intra block coefficients; must be aligned
+ * on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale).
+ * blockIndex - block index indicating the component type and position,
+ * valid in the range 0 to 5, as defined in [ISO14496-2], subclause
+ * 6.1.3.8.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (quantized) interblock coefficients.
+ * When shortVideoHeader==1, AC coefficients are saturated on the
+ * interval [-127, 127], and DC coefficients are saturated on the
+ * interval [1, 254]. When shortVideoHeader==0, AC coefficients
+ * are saturated on the interval [-2047, 2047].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrcDst is NULL.
+ * - blockIndex < 0 or blockIndex >= 10
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+
+OMXResult omxVCM4P2_QuantIntra_I(
+ OMX_S16 * pSrcDst,
+ OMX_U8 QP,
+ OMX_INT blockIndex,
+ OMX_INT shortVideoHeader
+ )
+{
+
+ /* Definitions and Initializations*/
+ /* Initialized to remove compilation error */
+ OMX_INT dcScaler = 0, coeffCount,fSign;
+ OMX_INT maxClpAC, minClpAC;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(((blockIndex < 0) || (blockIndex >= 10)), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+ /* One argument check is delayed until we have ascertained that */
+ /* pQMatrix is not NULL. */
+
+
+ /* Set the Clip Range based on SVH on/off */
+ if(shortVideoHeader == 1)
+ {
+ maxClpAC = 127;
+ minClpAC = -127;
+ dcScaler = 8;
+ /* Dequant the DC value, this applies to both the methods */
+ pSrcDst[0] = armIntDivAwayFromZero (pSrcDst[0], dcScaler);
+
+ /* Clip between 1 and 254 */
+ pSrcDst[0] = (OMX_S16) armClip (1, 254, pSrcDst[0]);
+ }
+ else
+ {
+ maxClpAC = 2047;
+ minClpAC = -2047;
+ /* Calculate the DC scaler value */
+ if ((blockIndex < 4) || (blockIndex > 5))
+ {
+ if (QP >= 1 && QP <= 4)
+ {
+ dcScaler = 8;
+ }
+ else if (QP >= 5 && QP <= 8)
+ {
+ dcScaler = 2 * QP;
+ }
+ else if (QP >= 9 && QP <= 24)
+ {
+ dcScaler = QP + 8;
+ }
+ else
+ {
+ dcScaler = (2 * QP) - 16;
+ }
+ }
+ else if (blockIndex < 6)
+ {
+ if (QP >= 1 && QP <= 4)
+ {
+ dcScaler = 8;
+ }
+ else if (QP >= 5 && QP <= 24)
+ {
+ dcScaler = (QP + 13)/2;
+ }
+ else
+ {
+ dcScaler = QP - 6;
+ }
+ }
+
+ /* Dequant the DC value, this applies to both the methods */
+ pSrcDst[0] = armIntDivAwayFromZero (pSrcDst[0], dcScaler);
+ }
+
+ /* Second Inverse quantisation method */
+ for (coeffCount = 1; coeffCount < 64; coeffCount++)
+ {
+ fSign = armSignCheck (pSrcDst[coeffCount]);
+ pSrcDst[coeffCount] = armAbs(pSrcDst[coeffCount])/(2 * QP);
+ pSrcDst[coeffCount] *= fSign;
+
+ /* Clip */
+ pSrcDst[coeffCount] =
+ (OMX_S16) armClip (minClpAC, maxClpAC, pSrcDst[coeffCount]);
+ }
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c
new file mode 100644
index 0000000..6e0de5c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvInter_I.c
@@ -0,0 +1,96 @@
+/**
+ *
+ * File Name: omxVCM4P2_QuantInvInter_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for inter inverse Quantization
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_QuantInvInter_I (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to
+ * the range [-2048, 2047].
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input (quantized) intra/inter block; must be
+ * aligned on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * videoComp - video component type of the current block. Takes one of the
+ * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra
+ * version only).
+ * shortVideoHeader - binary flag indicating presence of short_video_header
+ * (intra version only).
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (dequantized) intra/inter block
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; one or more of the following is
+ * true:
+ * - pSrcDst is NULL
+ * - QP <= 0 or QP >=31
+ * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE.
+ *
+ */
+
+OMXResult omxVCM4P2_QuantInvInter_I(
+ OMX_S16 * pSrcDst,
+ OMX_INT QP
+ )
+{
+
+ OMX_INT coeffCount, Sign;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+
+ /* Second Inverse quantisation method */
+ for (coeffCount = 0; coeffCount < 64; coeffCount++)
+ {
+ /* check sign */
+ Sign = armSignCheck (pSrcDst[coeffCount]);
+
+ /* Quantize the coeff */
+ if (QP & 0x1)
+ {
+ pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1) * QP;
+ pSrcDst[coeffCount] *= Sign;
+ }
+ else
+ {
+ pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1)
+ * QP - 1;
+ pSrcDst[coeffCount] *= Sign;
+ }
+ /* Saturate */
+ pSrcDst[coeffCount] = armClip (-2048, 2047, pSrcDst[coeffCount]);
+ }
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c
new file mode 100644
index 0000000..a946d7b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I.c
@@ -0,0 +1,153 @@
+/**
+ *
+ * File Name: omxVCM4P2_QuantInvIntra_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for intra inverse Quantization
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+/**
+ * Function: omxVCM4P2_QuantInvIntra_I (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to
+ * the range [-2048, 2047].
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input (quantized) intra/inter block; must be
+ * aligned on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * videoComp - video component type of the current block. Takes one of the
+ * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra
+ * version only).
+ * shortVideoHeader - binary flag indicating presence of short_video_header
+ * (intra version only).
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (dequantized) intra/inter block
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; one or more of the following is
+ * true:
+ * - pSrcDst is NULL
+ * - QP <= 0 or QP >=31
+ * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE.
+ *
+ */
+
+OMXResult omxVCM4P2_QuantInvIntra_I(
+ OMX_S16 * pSrcDst,
+ OMX_INT QP,
+ OMXVCM4P2VideoComponent videoComp,
+ OMX_INT shortVideoHeader
+)
+{
+
+ /* Initialized to remove compilation error */
+ OMX_INT dcScaler = 0, coeffCount, Sign;
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((videoComp != OMX_VC_LUMINANCE) && (videoComp != OMX_VC_CHROMINANCE)), OMX_Sts_BadArgErr);
+
+ /* Calculate the DC scaler value */
+
+ /* linear intra DC mode */
+ if(shortVideoHeader)
+ {
+ dcScaler = 8;
+ }
+ /* nonlinear intra DC mode */
+ else
+ {
+
+ if (videoComp == OMX_VC_LUMINANCE)
+ {
+ if (QP >= 1 && QP <= 4)
+ {
+ dcScaler = 8;
+ }
+ else if (QP >= 5 && QP <= 8)
+ {
+ dcScaler = 2 * QP;
+ }
+ else if (QP >= 9 && QP <= 24)
+ {
+ dcScaler = QP + 8;
+ }
+ else
+ {
+ dcScaler = (2 * QP) - 16;
+ }
+ }
+
+ else if (videoComp == OMX_VC_CHROMINANCE)
+ {
+ if (QP >= 1 && QP <= 4)
+ {
+ dcScaler = 8;
+ }
+ else if (QP >= 5 && QP <= 24)
+ {
+ dcScaler = (QP + 13)/2;
+ }
+ else
+ {
+ dcScaler = QP - 6;
+ }
+ }
+ }
+ /* Dequant the DC value, this applies to both the methods */
+ pSrcDst[0] = pSrcDst[0] * dcScaler;
+
+ /* Saturate */
+ pSrcDst[0] = armClip (-2048, 2047, pSrcDst[0]);
+
+ /* Second Inverse quantisation method */
+ for (coeffCount = 1; coeffCount < 64; coeffCount++)
+ {
+ /* check sign */
+ Sign = armSignCheck (pSrcDst[coeffCount]);
+
+ if (QP & 0x1)
+ {
+ pSrcDst[coeffCount] = (2* armAbs(pSrcDst[coeffCount]) + 1) * QP;
+ pSrcDst[coeffCount] *= Sign;
+ }
+ else
+ {
+ pSrcDst[coeffCount] =
+ (2* armAbs(pSrcDst[coeffCount]) + 1) * QP - 1;
+ pSrcDst[coeffCount] *= Sign;
+ }
+
+ /* Saturate */
+ pSrcDst[coeffCount] = armClip (-2048, 2047, pSrcDst[coeffCount]);
+ }
+ return OMX_Sts_NoErr;
+
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c
new file mode 100644
index 0000000..6e0c59b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_inter.c
@@ -0,0 +1,108 @@
+/**
+ *
+ * File Name: omxVCM4P2_TransRecBlockCoef_inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules DCT->quant and reconstructing the inter texture data
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_TransRecBlockCoef_inter (6.2.4.4.5)
+ *
+ * Description:
+ * Implements DCT, and quantizes the DCT coefficients of the inter block
+ * while reconstructing the texture residual. There is no boundary check for
+ * the bit stream buffer.
+ *
+ * Input Arguments:
+ *
+ * pSrc -pointer to the residuals to be encoded; must be aligned on an
+ * 16-byte boundary.
+ * QP - quantization parameter.
+ * shortVideoHeader - binary flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode, and
+ * shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the quantized DCT coefficients buffer; must be aligned
+ * on a 16-byte boundary.
+ * pRec - pointer to the reconstructed texture residuals; must be aligned
+ * on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is either NULL or
+ * not 16-byte aligned:
+ * - pSrc
+ * - pDst
+ * - pRec
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+
+OMXResult omxVCM4P2_TransRecBlockCoef_inter(
+ const OMX_S16 *pSrc,
+ OMX_S16 * pDst,
+ OMX_S16 * pRec,
+ OMX_U8 QP,
+ OMX_INT shortVideoHeader
+)
+{
+ /* 64 elements are needed but to align it to 16 bytes need
+ 8 more elements of padding */
+ OMX_S16 tempBuffer[72];
+ OMX_S16 *pTempBuffer;
+ OMX_INT i;
+
+ /* Aligning the local buffers */
+ pTempBuffer = armAlignTo16Bytes(tempBuffer);
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pSrc), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pRec), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(((QP <= 0) || (QP >= 32)), OMX_Sts_BadArgErr);
+
+ omxVCM4P2_DCT8x8blk (pSrc, pDst);
+ omxVCM4P2_QuantInter_I(
+ pDst,
+ QP,
+ shortVideoHeader);
+
+ for (i = 0; i < 64; i++)
+ {
+ pTempBuffer[i] = pDst[i];
+ }
+
+ omxVCM4P2_QuantInvInter_I(
+ pTempBuffer,
+ QP);
+ omxVCM4P2_IDCT8x8blk (pTempBuffer, pRec);
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c
new file mode 100644
index 0000000..dd444f9
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/m4p2/src/omxVCM4P2_TransRecBlockCoef_intra.c
@@ -0,0 +1,260 @@
+/**
+ *
+ * File Name: omxVCM4P2_TransRecBlockCoef_intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules DCT->quant and reconstructing the intra texture data
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+
+/**
+ * Function: omxVCM4P2_TransRecBlockCoef_intra (6.2.4.4.4)
+ *
+ * Description:
+ * Quantizes the DCT coefficients, implements intra block AC/DC coefficient
+ * prediction, and reconstructs the current intra block texture for prediction
+ * on the next frame. Quantized row and column coefficients are returned in
+ * the updated coefficient buffers.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the pixels of current intra block; must be aligned on
+ * an 8-byte boundary.
+ * pPredBufRow - pointer to the coefficient row buffer containing
+ * ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16.
+ * Coefficients are organized into blocks of eight as described
+ * below (Internal Prediction Coefficient Update Procedures). The
+ * DC coefficient is first, and the remaining buffer locations
+ * contain the quantized AC coefficients. Each group of eight row
+ * buffer elements combined with one element eight elements ahead
+ * contains the coefficient predictors of the neighboring block
+ * that is spatially above or to the left of the block currently to
+ * be decoded. A negative-valued DC coefficient indicates that this
+ * neighboring block is not INTRA-coded or out of bounds, and
+ * therefore the AC and DC coefficients are invalid. Pointer must
+ * be aligned on an 8-byte boundary.
+ * pPredBufCol - pointer to the prediction coefficient column buffer
+ * containing 16 elements of type OMX_S16. Coefficients are
+ * organized as described in section 6.2.2.5. Pointer must be
+ * aligned on an 8-byte boundary.
+ * pSumErr - pointer to a flag indicating whether or not AC prediction is
+ * required; AC prediction is enabled if *pSumErr >=0, but the
+ * value is not used for coefficient prediction, i.e., the sum of
+ * absolute differences starts from 0 for each call to this
+ * function. Otherwise AC prediction is disabled if *pSumErr < 0 .
+ * blockIndex - block index indicating the component type and position, as
+ * defined in [ISO14496-2], subclause 6.1.3.8.
+ * curQp - quantization parameter of the macroblock to which the current
+ * block belongs
+ * pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0]
+ * contains the quantization parameter associated with the 8x8
+ * block left of the current block (QPa), and pQpBuf[1] contains
+ * the quantization parameter associated with the 8x8 block above
+ * the current block (QPc). In the event that the corresponding
+ * block is outside of the VOP bound, the Qp value will not affect
+ * the intra prediction process, as described in [ISO14496-2],
+ * sub-clause 7.4.3.3, Adaptive AC Coefficient Prediction.
+ * srcStep - width of the source buffer; must be a multiple of 8.
+ * dstStep - width of the reconstructed destination buffer; must be a
+ * multiple of 16.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains
+ * the predicted DC coefficient; the remaining entries contain the
+ * quantized AC coefficients (without prediction). The pointer
+ * pDstmust be aligned on a 16-byte boundary.
+ * pRec - pointer to the reconstructed texture; must be aligned on an
+ * 8-byte boundary.
+ * pPredBufRow - pointer to the updated coefficient row buffer
+ * pPredBufCol - pointer to the updated coefficient column buffer
+ * pPreACPredict - if prediction is enabled, the parameter points to the
+ * start of the buffer containing the coefficient differences for
+ * VLC encoding. The entry pPreACPredict[0]indicates prediction
+ * direction for the current block and takes one of the following
+ * values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL. The entries
+ * pPreACPredict[1]-pPreACPredict[7]contain predicted AC
+ * coefficients. If prediction is disabled (*pSumErr<0) then the
+ * contents of this buffer are undefined upon return from the
+ * function
+ * pSumErr - pointer to the value of the accumulated AC coefficient errors,
+ * i.e., sum of the absolute differences between predicted and
+ * unpredicted AC coefficients
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: pSrc, pDst, pRec,
+ * pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr.
+ * - blockIndex < 0 or blockIndex >= 10;
+ * - curQP <= 0 or curQP >= 32.
+ * - srcStep, or dstStep <= 0 or not a multiple of 8.
+ * - pDst is not 16-byte aligned: .
+ * - At least one of the following pointers is not 8-byte aligned:
+ * pSrc, pRec.
+ *
+ * Note: The coefficient buffers must be updated in accordance with the
+ * update procedures defined in section in 6.2.2.
+ *
+ */
+
+OMXResult omxVCM4P2_TransRecBlockCoef_intra(
+ const OMX_U8 *pSrc,
+ OMX_S16 * pDst,
+ OMX_U8 * pRec,
+ OMX_S16 *pPredBufRow,
+ OMX_S16 *pPredBufCol,
+ OMX_S16 * pPreACPredict,
+ OMX_INT *pSumErr,
+ OMX_INT blockIndex,
+ OMX_U8 curQp,
+ const OMX_U8 *pQpBuf,
+ OMX_INT srcStep,
+ OMX_INT dstStep,
+ OMX_INT shortVideoHeader
+)
+{
+ /* 64 elements are needed but to align it to 16 bytes need
+ 8 more elements of padding */
+ OMX_S16 tempBuf1[79], tempBuf2[79];
+ OMX_S16 tempBuf3[79];
+ OMX_S16 *pTempBuf1, *pTempBuf2,*pTempBuf3;
+ OMXVCM4P2VideoComponent videoComp;
+ OMX_U8 flag;
+ OMX_INT x, y, count, predDir;
+ OMX_INT predQP, ACPredFlag;
+
+
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+ pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+ pTempBuf3 = armAlignTo16Bytes(tempBuf3);
+
+ /* Argument error checks */
+ armRetArgErrIf(pSrc == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pRec == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pSrc), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs8ByteAligned(pRec), OMX_Sts_BadArgErr);
+ armRetArgErrIf(!armIs16ByteAligned(pDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pPredBufRow == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pPredBufCol == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pPreACPredict == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pSumErr == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pQpBuf == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf((srcStep <= 0) || (dstStep <= 0) ||
+ (dstStep & 7) || (srcStep & 7)
+ , OMX_Sts_BadArgErr);
+ armRetArgErrIf((blockIndex < 0) || (blockIndex > 9), OMX_Sts_BadArgErr);
+
+ armRetArgErrIf((curQp <= 0) || (curQp >=32), OMX_Sts_BadArgErr);
+
+
+ /* Setting the videoComp */
+ if (blockIndex <= 3)
+ {
+ videoComp = OMX_VC_LUMINANCE;
+ }
+ else
+ {
+ videoComp = OMX_VC_CHROMINANCE;
+ }
+ /* Converting from 2-d to 1-d buffer */
+ for (y = 0, count = 0; y < 8; y++)
+ {
+ for(x= 0; x < 8; x++, count++)
+ {
+ pTempBuf1[count] = pSrc[(y*srcStep) + x];
+ }
+ }
+
+ omxVCM4P2_DCT8x8blk (pTempBuf1, pTempBuf2);
+ omxVCM4P2_QuantIntra_I(
+ pTempBuf2,
+ curQp,
+ blockIndex,
+ shortVideoHeader);
+
+ /* Converting from 1-D to 2-D buffer */
+ for (y = 0, count = 0; y < 8; y++)
+ {
+ for(x = 0; x < 8; x++, count++)
+ {
+ /* storing tempbuf2 to tempbuf1 */
+ pTempBuf1[count] = pTempBuf2[count];
+ pDst[(y*dstStep) + x] = pTempBuf2[count];
+ }
+ }
+
+ /* AC and DC prediction */
+ armVCM4P2_SetPredDir(
+ blockIndex,
+ pPredBufRow,
+ pPredBufCol,
+ &predDir,
+ &predQP,
+ pQpBuf);
+
+ armRetDataErrIf(((predQP <= 0) || (predQP >= 32)), OMX_Sts_BadArgErr);
+
+ flag = 1;
+ if (*pSumErr < 0)
+ {
+ ACPredFlag = 0;
+ }
+ else
+ {
+ ACPredFlag = 1;
+ }
+
+ armVCM4P2_ACDCPredict(
+ pTempBuf2,
+ pPreACPredict,
+ pPredBufRow,
+ pPredBufCol,
+ curQp,
+ predQP,
+ predDir,
+ ACPredFlag,
+ videoComp,
+ flag,
+ pSumErr);
+
+ /* Reconstructing the texture data */
+ omxVCM4P2_QuantInvIntra_I(
+ pTempBuf1,
+ curQp,
+ videoComp,
+ shortVideoHeader);
+ omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf3);
+ for(count = 0; count < 64; count++)
+ {
+ pRec[count] = armMax(0,pTempBuf3[count]);
+ }
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/src/armVC_Version.c
new file mode 100644
index 0000000..5d93681
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/reference/vc/src/armVC_Version.c
@@ -0,0 +1,6 @@
+#include "omxtypes.h"
+#include "armCOMM_Version.h"
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING " Rel=" OMX_ARM_RELEASE_TAG " Arch=" OMX_ARM_BUILD_ARCHITECTURE " Tools=" OMX_ARM_BUILD_TOOLCHAIN ;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
diff --git a/media/libstagefright/codecs/on2/h264dec/source/DecTestBench.c b/media/libstagefright/codecs/on2/h264dec/source/DecTestBench.c
new file mode 100755
index 0000000..dcf2ef6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/DecTestBench.c
@@ -0,0 +1,761 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "H264SwDecApi.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/*------------------------------------------------------------------------------
+ Module defines
+------------------------------------------------------------------------------*/
+
+/* CHECK_MEMORY_USAGE prints and sums the memory allocated in calls to
+ * H264SwDecMalloc() */
+/* #define CHECK_MEMORY_USAGE */
+
+/* _NO_OUT disables output file writing */
+/* #define _NO_OUT */
+
+/* Debug prints */
+#define DEBUG(argv) printf argv
+
+/* CVS tag name for identification */
+const char tagName[256] = "$Name: FIRST_ANDROID_COPYRIGHT $";
+
+void WriteOutput(char *filename, u8 *data, u32 picSize);
+u32 NextPacket(u8 **pStrm);
+u32 CropPicture(u8 *pOutImage, u8 *pInImage,
+ u32 picWidth, u32 picHeight, CropParams *pCropParams);
+
+/* Global variables for stream handling */
+u8 *streamStop = NULL;
+u32 packetize = 0;
+u32 nalUnitStream = 0;
+FILE *foutput = NULL;
+
+#ifdef SOC_DESIGNER
+
+// Initialisation function defined in InitCache.s
+extern void cache_init(void);
+
+/*------------------------------------------------------------------------------
+
+ Function name: $Sub$$main
+
+ Purpose:
+ This function is called at the end of the C library initialisation and
+ before main. Its purpose is to do any further initialisation before the
+ application start.
+
+------------------------------------------------------------------------------*/
+int $Sub$$main(char argc, char * argv[])
+{
+ cache_init(); // does some extra setup work setting up caches
+ return $Super$$main(argc, argv); // calls the original function
+}
+#endif
+
+/*------------------------------------------------------------------------------
+
+ Function name: main
+
+ Purpose:
+ main function of decoder testbench. Provides command line interface
+ with file I/O for H.264 decoder. Prints out the usage information
+ when executed without arguments.
+
+------------------------------------------------------------------------------*/
+
+int main(int argc, char **argv)
+{
+
+ u32 i, tmp;
+ u32 maxNumPics = 0;
+ u8 *byteStrmStart;
+ u8 *imageData;
+ u8 *tmpImage = NULL;
+ u32 strmLen;
+ u32 picSize;
+ H264SwDecInst decInst;
+ H264SwDecRet ret;
+ H264SwDecInput decInput;
+ H264SwDecOutput decOutput;
+ H264SwDecPicture decPicture;
+ H264SwDecInfo decInfo;
+ H264SwDecApiVersion decVer;
+ u32 picDecodeNumber;
+ u32 picDisplayNumber;
+ u32 numErrors = 0;
+ u32 cropDisplay = 0;
+ u32 disableOutputReordering = 0;
+
+ FILE *finput;
+
+ char outFileName[256] = "";
+
+ /* Print API version number */
+ decVer = H264SwDecGetAPIVersion();
+ DEBUG(("H.264 Decoder API v%d.%d\n", decVer.major, decVer.minor));
+
+ /* Print tag name if '-T' argument present */
+ if ( argc > 1 && strcmp(argv[1], "-T") == 0 )
+ {
+ DEBUG(("%s\n", tagName));
+ return 0;
+ }
+
+ /* Check that enough command line arguments given, if not -> print usage
+ * information out */
+ if (argc < 2)
+ {
+ DEBUG((
+ "Usage: %s [-Nn] [-Ooutfile] [-P] [-U] [-C] [-R] [-T] file.h264\n",
+ argv[0]));
+ DEBUG(("\t-Nn forces decoding to stop after n pictures\n"));
+#if defined(_NO_OUT)
+ DEBUG(("\t-Ooutfile output writing disabled at compile time\n"));
+#else
+ DEBUG(("\t-Ooutfile write output to \"outfile\" (default out_wxxxhyyy.yuv)\n"));
+ DEBUG(("\t-Onone does not write output\n"));
+#endif
+ DEBUG(("\t-P packet-by-packet mode\n"));
+ DEBUG(("\t-U NAL unit stream mode\n"));
+ DEBUG(("\t-C display cropped image (default decoded image)\n"));
+ DEBUG(("\t-R disable DPB output reordering\n"));
+ DEBUG(("\t-T to print tag name and exit\n"));
+ return 0;
+ }
+
+ /* read command line arguments */
+ for (i = 1; i < (u32)(argc-1); i++)
+ {
+ if ( strncmp(argv[i], "-N", 2) == 0 )
+ {
+ maxNumPics = (u32)atoi(argv[i]+2);
+ }
+ else if ( strncmp(argv[i], "-O", 2) == 0 )
+ {
+ strcpy(outFileName, argv[i]+2);
+ }
+ else if ( strcmp(argv[i], "-P") == 0 )
+ {
+ packetize = 1;
+ }
+ else if ( strcmp(argv[i], "-U") == 0 )
+ {
+ nalUnitStream = 1;
+ }
+ else if ( strcmp(argv[i], "-C") == 0 )
+ {
+ cropDisplay = 1;
+ }
+ else if ( strcmp(argv[i], "-R") == 0 )
+ {
+ disableOutputReordering = 1;
+ }
+ }
+
+ /* open input file for reading, file name given by user. If file open
+ * fails -> exit */
+ finput = fopen(argv[argc-1],"rb");
+ if (finput == NULL)
+ {
+ DEBUG(("UNABLE TO OPEN INPUT FILE\n"));
+ return -1;
+ }
+
+ /* check size of the input file -> length of the stream in bytes */
+ fseek(finput,0L,SEEK_END);
+ strmLen = (u32)ftell(finput);
+ rewind(finput);
+
+ /* allocate memory for stream buffer. if unsuccessful -> exit */
+ byteStrmStart = (u8 *)malloc(sizeof(u8)*strmLen);
+ if (byteStrmStart == NULL)
+ {
+ DEBUG(("UNABLE TO ALLOCATE MEMORY\n"));
+ return -1;
+ }
+
+ /* read input stream from file to buffer and close input file */
+ fread(byteStrmStart, sizeof(u8), strmLen, finput);
+ fclose(finput);
+
+ /* initialize decoder. If unsuccessful -> exit */
+ ret = H264SwDecInit(&decInst, disableOutputReordering);
+ if (ret != H264SWDEC_OK)
+ {
+ DEBUG(("DECODER INITIALIZATION FAILED\n"));
+ free(byteStrmStart);
+ return -1;
+ }
+
+ /* initialize H264SwDecDecode() input structure */
+ streamStop = byteStrmStart + strmLen;
+ decInput.pStream = byteStrmStart;
+ decInput.dataLen = strmLen;
+ decInput.intraConcealmentMethod = 0;
+
+ /* get pointer to next packet and the size of packet
+ * (for packetize or nalUnitStream modes) */
+ if ( (tmp = NextPacket(&decInput.pStream)) != 0 )
+ decInput.dataLen = tmp;
+
+ picDecodeNumber = picDisplayNumber = 1;
+ /* main decoding loop */
+ do
+ {
+ /* Picture ID is the picture number in decoding order */
+ decInput.picId = picDecodeNumber;
+
+ /* call API function to perform decoding */
+ ret = H264SwDecDecode(decInst, &decInput, &decOutput);
+
+ switch(ret)
+ {
+
+ case H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY:
+ /* Stream headers were successfully decoded
+ * -> stream information is available for query now */
+
+ ret = H264SwDecGetInfo(decInst, &decInfo);
+ if (ret != H264SWDEC_OK)
+ return -1;
+
+ DEBUG(("Profile %d\n", decInfo.profile));
+
+ DEBUG(("Width %d Height %d\n",
+ decInfo.picWidth, decInfo.picHeight));
+
+ if (cropDisplay && decInfo.croppingFlag)
+ {
+ DEBUG(("Cropping params: (%d, %d) %dx%d\n",
+ decInfo.cropParams.cropLeftOffset,
+ decInfo.cropParams.cropTopOffset,
+ decInfo.cropParams.cropOutWidth,
+ decInfo.cropParams.cropOutHeight));
+
+ /* Cropped frame size in planar YUV 4:2:0 */
+ picSize = decInfo.cropParams.cropOutWidth *
+ decInfo.cropParams.cropOutHeight;
+ picSize = (3 * picSize)/2;
+ tmpImage = malloc(picSize);
+ if (tmpImage == NULL)
+ return -1;
+ }
+ else
+ {
+ /* Decoder output frame size in planar YUV 4:2:0 */
+ picSize = decInfo.picWidth * decInfo.picHeight;
+ picSize = (3 * picSize)/2;
+ }
+
+ DEBUG(("videoRange %d, matrixCoefficients %d\n",
+ decInfo.videoRange, decInfo.matrixCoefficients));
+
+ /* update H264SwDecDecode() input structure, number of bytes
+ * "consumed" is computed as difference between the new stream
+ * pointer and old stream pointer */
+ decInput.dataLen -=
+ (u32)(decOutput.pStrmCurrPos - decInput.pStream);
+ decInput.pStream = decOutput.pStrmCurrPos;
+
+ /* If -O option not used, generate default file name */
+ if (outFileName[0] == 0)
+ sprintf(outFileName, "out_w%dh%d.yuv",
+ decInfo.picWidth, decInfo.picHeight);
+ break;
+
+ case H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY:
+ /* Picture is ready and more data remains in input buffer
+ * -> update H264SwDecDecode() input structure, number of bytes
+ * "consumed" is computed as difference between the new stream
+ * pointer and old stream pointer */
+ decInput.dataLen -=
+ (u32)(decOutput.pStrmCurrPos - decInput.pStream);
+ decInput.pStream = decOutput.pStrmCurrPos;
+ /* fall through */
+
+ case H264SWDEC_PIC_RDY:
+
+ /*lint -esym(644,tmpImage,picSize) variable initialized at
+ * H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY case */
+
+ if (ret == H264SWDEC_PIC_RDY)
+ decInput.dataLen = NextPacket(&decInput.pStream);
+
+ /* If enough pictures decoded -> force decoding to end
+ * by setting that no more stream is available */
+ if (maxNumPics && picDecodeNumber == maxNumPics)
+ decInput.dataLen = 0;
+
+ /* Increment decoding number for every decoded picture */
+ picDecodeNumber++;
+
+ /* use function H264SwDecNextPicture() to obtain next picture
+ * in display order. Function is called until no more images
+ * are ready for display */
+ while ( H264SwDecNextPicture(decInst, &decPicture, 0) ==
+ H264SWDEC_PIC_RDY )
+ {
+ DEBUG(("PIC %d, type %s", picDisplayNumber,
+ decPicture.isIdrPicture ? "IDR" : "NON-IDR"));
+ if (picDisplayNumber != decPicture.picId)
+ DEBUG((", decoded pic %d", decPicture.picId));
+ if (decPicture.nbrOfErrMBs)
+ {
+ DEBUG((", concealed %d\n", decPicture.nbrOfErrMBs));
+ }
+ else
+ DEBUG(("\n"));
+ fflush(stdout);
+
+ numErrors += decPicture.nbrOfErrMBs;
+
+ /* Increment display number for every displayed picture */
+ picDisplayNumber++;
+
+ /*lint -esym(644,decInfo) always initialized if pictures
+ * available for display */
+
+ /* Write output picture to file */
+ imageData = (u8*)decPicture.pOutputPicture;
+ if (cropDisplay && decInfo.croppingFlag)
+ {
+ tmp = CropPicture(tmpImage, imageData,
+ decInfo.picWidth, decInfo.picHeight,
+ &decInfo.cropParams);
+ if (tmp)
+ return -1;
+ WriteOutput(outFileName, tmpImage, picSize);
+ }
+ else
+ {
+ WriteOutput(outFileName, imageData, picSize);
+ }
+ }
+
+ break;
+
+ case H264SWDEC_STRM_PROCESSED:
+ case H264SWDEC_STRM_ERR:
+ /* Input stream was decoded but no picture is ready
+ * -> Get more data */
+ decInput.dataLen = NextPacket(&decInput.pStream);
+ break;
+
+ default:
+ DEBUG(("FATAL ERROR\n"));
+ return -1;
+
+ }
+ /* keep decoding until all data from input stream buffer consumed */
+ } while (decInput.dataLen > 0);
+
+ /* if output in display order is preferred, the decoder shall be forced
+ * to output pictures remaining in decoded picture buffer. Use function
+ * H264SwDecNextPicture() to obtain next picture in display order. Function
+ * is called until no more images are ready for display. Second parameter
+ * for the function is set to '1' to indicate that this is end of the
+ * stream and all pictures shall be output */
+ while (H264SwDecNextPicture(decInst, &decPicture, 1) == H264SWDEC_PIC_RDY)
+ {
+ DEBUG(("PIC %d, type %s", picDisplayNumber,
+ decPicture.isIdrPicture ? "IDR" : "NON-IDR"));
+ if (picDisplayNumber != decPicture.picId)
+ DEBUG((", decoded pic %d", decPicture.picId));
+ if (decPicture.nbrOfErrMBs)
+ {
+ DEBUG((", concealed %d\n", decPicture.nbrOfErrMBs));
+ }
+ else
+ DEBUG(("\n"));
+ fflush(stdout);
+
+ numErrors += decPicture.nbrOfErrMBs;
+
+ /* Increment display number for every displayed picture */
+ picDisplayNumber++;
+
+ /* Write output picture to file */
+ imageData = (u8*)decPicture.pOutputPicture;
+ if (cropDisplay && decInfo.croppingFlag)
+ {
+ tmp = CropPicture(tmpImage, imageData,
+ decInfo.picWidth, decInfo.picHeight,
+ &decInfo.cropParams);
+ if (tmp)
+ return -1;
+ WriteOutput(outFileName, tmpImage, picSize);
+ }
+ else
+ {
+ WriteOutput(outFileName, imageData, picSize);
+ }
+ }
+
+ /* release decoder instance */
+ H264SwDecRelease(decInst);
+
+ if (foutput)
+ fclose(foutput);
+
+ /* free allocated buffers */
+ free(byteStrmStart);
+ free(tmpImage);
+
+ DEBUG(("Output file: %s\n", outFileName));
+
+ DEBUG(("DECODING DONE\n"));
+ if (numErrors || picDecodeNumber == 1)
+ {
+ DEBUG(("ERRORS FOUND\n"));
+ return 1;
+ }
+
+ return 0;
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: WriteOutput
+
+ Purpose:
+ Write picture pointed by data to file. Size of the
+ picture in pixels is indicated by picSize.
+
+------------------------------------------------------------------------------*/
+void WriteOutput(char *filename, u8 *data, u32 picSize)
+{
+
+ /* foutput is global file pointer */
+ if (foutput == NULL)
+ {
+ /* open output file for writing, can be disabled with define.
+ * If file open fails -> exit */
+ if (strcmp(filename, "none") != 0)
+ {
+#if !defined(_NO_OUT)
+ foutput = fopen(filename, "wb");
+ if (foutput == NULL)
+ {
+ DEBUG(("UNABLE TO OPEN OUTPUT FILE\n"));
+ exit(100);
+ }
+#endif
+ }
+ }
+
+ if (foutput && data)
+ fwrite(data, 1, picSize, foutput);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: NextPacket
+
+ Purpose:
+ Get the pointer to start of next packet in input stream. Uses
+ global variables 'packetize' and 'nalUnitStream' to determine the
+ decoder input stream mode and 'streamStop' to determine the end
+ of stream. There are three possible stream modes:
+ default - the whole stream at once
+ packetize - a single NAL-unit with start code prefix
+ nalUnitStream - a single NAL-unit without start code prefix
+
+ pStrm stores pointer to the start of previous decoder input and is
+ replaced with pointer to the start of the next decoder input.
+
+ Returns the packet size in bytes
+
+------------------------------------------------------------------------------*/
+u32 NextPacket(u8 **pStrm)
+{
+
+ u32 index;
+ u32 maxIndex;
+ u32 zeroCount;
+ u8 *stream;
+ u8 byte;
+ static u32 prevIndex=0;
+
+ /* For default stream mode all the stream is in first packet */
+ if (!packetize && !nalUnitStream)
+ return 0;
+
+ index = 0;
+ stream = *pStrm + prevIndex;
+ maxIndex = (u32)(streamStop - stream);
+
+ if (maxIndex == 0)
+ return(0);
+
+ /* leading zeros of first NAL unit */
+ do
+ {
+ byte = stream[index++];
+ } while (byte != 1 && index < maxIndex);
+
+ /* invalid start code prefix */
+ if (index == maxIndex || index < 3)
+ {
+ DEBUG(("INVALID BYTE STREAM\n"));
+ exit(100);
+ }
+
+ /* nalUnitStream is without start code prefix */
+ if (nalUnitStream)
+ {
+ stream += index;
+ maxIndex -= index;
+ index = 0;
+ }
+
+ zeroCount = 0;
+
+ /* Search stream for next start code prefix */
+ /*lint -e(716) while(1) used consciously */
+ while (1)
+ {
+ byte = stream[index++];
+ if (!byte)
+ zeroCount++;
+
+ if ( (byte == 0x01) && (zeroCount >= 2) )
+ {
+ /* Start code prefix has two zeros
+ * Third zero is assumed to be leading zero of next packet
+ * Fourth and more zeros are assumed to be trailing zeros of this
+ * packet */
+ if (zeroCount > 3)
+ {
+ index -= 4;
+ zeroCount -= 3;
+ }
+ else
+ {
+ index -= zeroCount+1;
+ zeroCount = 0;
+ }
+ break;
+ }
+ else if (byte)
+ zeroCount = 0;
+
+ if (index == maxIndex)
+ {
+ break;
+ }
+
+ }
+
+ /* Store pointer to the beginning of the packet */
+ *pStrm = stream;
+ prevIndex = index;
+
+ /* nalUnitStream is without trailing zeros */
+ if (nalUnitStream)
+ index -= zeroCount;
+
+ return(index);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: CropPicture
+
+ Purpose:
+ Perform cropping for picture. Input picture pInImage with dimensions
+ picWidth x picHeight is cropped with pCropParams and the resulting
+ picture is stored in pOutImage.
+
+------------------------------------------------------------------------------*/
+u32 CropPicture(u8 *pOutImage, u8 *pInImage,
+ u32 picWidth, u32 picHeight, CropParams *pCropParams)
+{
+
+ u32 i, j;
+ u32 outWidth, outHeight;
+ u8 *pOut, *pIn;
+
+ if (pOutImage == NULL || pInImage == NULL || pCropParams == NULL ||
+ !picWidth || !picHeight)
+ {
+ /* just to prevent lint warning, returning non-zero will result in
+ * return without freeing the memory */
+ free(pOutImage);
+ return(1);
+ }
+
+ if ( ((pCropParams->cropLeftOffset + pCropParams->cropOutWidth) >
+ picWidth ) ||
+ ((pCropParams->cropTopOffset + pCropParams->cropOutHeight) >
+ picHeight ) )
+ {
+ /* just to prevent lint warning, returning non-zero will result in
+ * return without freeing the memory */
+ free(pOutImage);
+ return(1);
+ }
+
+ outWidth = pCropParams->cropOutWidth;
+ outHeight = pCropParams->cropOutHeight;
+
+ /* Calculate starting pointer for luma */
+ pIn = pInImage + pCropParams->cropTopOffset*picWidth +
+ pCropParams->cropLeftOffset;
+ pOut = pOutImage;
+
+ /* Copy luma pixel values */
+ for (i = outHeight; i; i--)
+ {
+ for (j = outWidth; j; j--)
+ {
+ *pOut++ = *pIn++;
+ }
+ pIn += picWidth - outWidth;
+ }
+
+ outWidth >>= 1;
+ outHeight >>= 1;
+
+ /* Calculate starting pointer for cb */
+ pIn = pInImage + picWidth*picHeight +
+ pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2;
+
+ /* Copy cb pixel values */
+ for (i = outHeight; i; i--)
+ {
+ for (j = outWidth; j; j--)
+ {
+ *pOut++ = *pIn++;
+ }
+ pIn += picWidth/2 - outWidth;
+ }
+
+ /* Calculate starting pointer for cr */
+ pIn = pInImage + 5*picWidth*picHeight/4 +
+ pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2;
+
+ /* Copy cr pixel values */
+ for (i = outHeight; i; i--)
+ {
+ for (j = outWidth; j; j--)
+ {
+ *pOut++ = *pIn++;
+ }
+ pIn += picWidth/2 - outWidth;
+ }
+
+ return (0);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecTrace
+
+ Purpose:
+ Example implementation of H264SwDecTrace function. Prototype of this
+ function is given in H264SwDecApi.h. This implementation appends
+ trace messages to file named 'dec_api.trc'.
+
+------------------------------------------------------------------------------*/
+void H264SwDecTrace(char *string)
+{
+ FILE *fp;
+
+ fp = fopen("dec_api.trc", "at");
+
+ if (!fp)
+ return;
+
+ fwrite(string, 1, strlen(string), fp);
+ fwrite("\n", 1,1, fp);
+
+ fclose(fp);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecMalloc
+
+ Purpose:
+ Example implementation of H264SwDecMalloc function. Prototype of this
+ function is given in H264SwDecApi.h. This implementation uses
+ library function malloc for allocation of memory.
+
+------------------------------------------------------------------------------*/
+void* H264SwDecMalloc(u32 size)
+{
+
+#if defined(CHECK_MEMORY_USAGE)
+ /* Note that if the decoder has to free and reallocate some of the buffers
+ * the total value will be invalid */
+ static u32 numBytes = 0;
+ numBytes += size;
+ DEBUG(("Allocated %d bytes, total %d\n", size, numBytes));
+#endif
+
+ return malloc(size);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecFree
+
+ Purpose:
+ Example implementation of H264SwDecFree function. Prototype of this
+ function is given in H264SwDecApi.h. This implementation uses
+ library function free for freeing of memory.
+
+------------------------------------------------------------------------------*/
+void H264SwDecFree(void *ptr)
+{
+ free(ptr);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecMemcpy
+
+ Purpose:
+ Example implementation of H264SwDecMemcpy function. Prototype of this
+ function is given in H264SwDecApi.h. This implementation uses
+ library function memcpy to copy src to dest.
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemcpy(void *dest, void *src, u32 count)
+{
+ memcpy(dest, src, count);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecMemset
+
+ Purpose:
+ Example implementation of H264SwDecMemset function. Prototype of this
+ function is given in H264SwDecApi.h. This implementation uses
+ library function memset to set content of memory area pointed by ptr.
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemset(void *ptr, i32 value, u32 count)
+{
+ memset(ptr, value, count);
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/EvaluationTestBench.c b/media/libstagefright/codecs/on2/h264dec/source/EvaluationTestBench.c
new file mode 100755
index 0000000..aadc75f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/EvaluationTestBench.c
@@ -0,0 +1,350 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "H264SwDecApi.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+void WriteOutput(FILE *fid, u8 *data, u32 picSize);
+
+/*------------------------------------------------------------------------------
+
+ Function name: main
+
+ Purpose:
+ main function. Assuming that executable is named 'decoder' the usage
+ is as follows
+
+ decoder inputFileName
+
+ , where inputFileName shall be name of file containing h264 stream
+ data.
+
+------------------------------------------------------------------------------*/
+int main(int argc, char **argv)
+{
+
+ u8 *byteStrmStart;
+ u8 *byteStrm;
+ u32 strmLen;
+ u32 picSize;
+ H264SwDecInst decInst;
+ H264SwDecRet ret;
+ H264SwDecInput decInput;
+ H264SwDecOutput decOutput;
+ H264SwDecPicture decPicture;
+ H264SwDecInfo decInfo;
+ u32 picNumber;
+
+ FILE *finput;
+ FILE *foutput;
+
+ /* Check that enough command line arguments given, if not -> print usage
+ * information out */
+ if (argc < 2)
+ {
+ printf( "Usage: %s file.h264\n", argv[0]);
+ return -1;
+ }
+
+ /* open output file for writing, output file named out.yuv. If file open
+ * fails -> exit */
+ foutput = fopen("out.yuv", "wb");
+ if (foutput == NULL)
+ {
+ printf("UNABLE TO OPEN OUTPUT FILE\n");
+ return -1;
+ }
+
+ /* open input file for reading, file name given by user. If file open
+ * fails -> exit */
+ finput = fopen(argv[argc-1], "rb");
+ if (finput == NULL)
+ {
+ printf("UNABLE TO OPEN INPUT FILE\n");
+ return -1;
+ }
+
+ /* check size of the input file -> length of the stream in bytes */
+ fseek(finput, 0L, SEEK_END);
+ strmLen = (u32)ftell(finput);
+ rewind(finput);
+
+ /* allocate memory for stream buffer, exit if unsuccessful */
+ byteStrm = byteStrmStart = (u8 *)H264SwDecMalloc(sizeof(u8)*strmLen);
+ if (byteStrm == NULL)
+ {
+ printf("UNABLE TO ALLOCATE MEMORY\n");
+ return -1;
+ }
+
+ /* read input stream from file to buffer and close input file */
+ fread(byteStrm, sizeof(u8), strmLen, finput);
+ fclose(finput);
+
+ /* initialize decoder. If unsuccessful -> exit */
+ ret = H264SwDecInit(&decInst, 0);
+ if (ret != H264SWDEC_OK)
+ {
+ printf("DECODER INITIALIZATION FAILED\n");
+ return -1;
+ }
+
+ /* initialize H264SwDecDecode() input structure */
+ decInput.pStream = byteStrmStart;
+ decInput.dataLen = strmLen;
+ decInput.intraConcealmentMethod = 0;
+
+ picNumber = 0;
+
+ /* For performance measurements, read the start time (in seconds) here.
+ * The decoding time should be measured over several frames and after
+ * that average fps (frames/second) can be calculated.
+ *
+ * startTime = GetTime();
+ *
+ * To prevent calculating file I/O latensies as a decoding time,
+ * comment out WriteOutput function call. Also prints to stdout might
+ * consume considerable amount of cycles during measurement */
+
+ /* main decoding loop */
+ do
+ {
+ /* call API function to perform decoding */
+ ret = H264SwDecDecode(decInst, &decInput, &decOutput);
+
+ switch(ret)
+ {
+
+ case H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY:
+
+ /* picture dimensions are available for query now */
+ ret = H264SwDecGetInfo(decInst, &decInfo);
+ if (ret != H264SWDEC_OK)
+ return -1;
+
+ /* picture size in pixels */
+ picSize = decInfo.picWidth * decInfo.picHeight;
+ /* memory needed for YCbCr 4:2:0 picture in bytes */
+ picSize = (3 * picSize)/2;
+ /* memory needed for 16-bit RGB picture in bytes
+ * picSize = (decInfo.picWidth * decInfo.picHeight) * 2; */
+
+ printf("Width %d Height %d\n",
+ decInfo.picWidth, decInfo.picHeight);
+
+ /* update H264SwDecDecode() input structure, number of bytes
+ * "consumed" is computed as difference between the new stream
+ * pointer and old stream pointer */
+ decInput.dataLen -=
+ (u32)(decOutput.pStrmCurrPos - decInput.pStream);
+ decInput.pStream = decOutput.pStrmCurrPos;
+ break;
+
+ case H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY:
+ case H264SWDEC_PIC_RDY:
+
+ /* update H264SwDecDecode() input structure, number of bytes
+ * "consumed" is computed as difference between the new stream
+ * pointer and old stream pointer */
+ decInput.dataLen -=
+ (u32)(decOutput.pStrmCurrPos - decInput.pStream);
+ decInput.pStream = decOutput.pStrmCurrPos;
+
+ /* use function H264SwDecNextPicture() to obtain next picture
+ * in display order. Function is called until no more images
+ * are ready for display */
+ while (H264SwDecNextPicture(decInst, &decPicture, 0) ==
+ H264SWDEC_PIC_RDY) { picNumber++;
+
+ printf("PIC %d, type %s, concealed %d\n", picNumber,
+ decPicture.isIdrPicture ? "IDR" : "NON-IDR",
+ decPicture.nbrOfErrMBs);
+ fflush(stdout);
+
+ /* Do color conversion if needed to get display image
+ * in RGB-format
+ *
+ * YuvToRgb( decPicture.pOutputPicture, pRgbPicture ); */
+
+ /* write next display image to output file */
+ WriteOutput(foutput, (u8*)decPicture.pOutputPicture,
+ picSize);
+ }
+
+ break;
+
+ case H264SWDEC_EVALUATION_LIMIT_EXCEEDED:
+ /* evaluation version of the decoder has limited decoding
+ * capabilities */
+ printf("EVALUATION LIMIT REACHED\n");
+ goto end;
+
+ default:
+ printf("UNRECOVERABLE ERROR\n");
+ return -1;
+ }
+ /* keep decoding until all data from input stream buffer consumed */
+ } while (decInput.dataLen > 0);
+
+end:
+
+ /* if output in display order is preferred, the decoder shall be forced
+ * to output pictures remaining in decoded picture buffer. Use function
+ * H264SwDecNextPicture() to obtain next picture in display order. Function
+ * is called until no more images are ready for display. Second parameter
+ * for the function is set to '1' to indicate that this is end of the
+ * stream and all pictures shall be output */
+ while (H264SwDecNextPicture(decInst, &decPicture, 1) ==
+ H264SWDEC_PIC_RDY) {
+
+ picNumber++;
+
+ printf("PIC %d, type %s, concealed %d\n", picNumber,
+ decPicture.isIdrPicture ? "IDR" : "NON-IDR",
+ decPicture.nbrOfErrMBs);
+ fflush(stdout);
+
+ /* Do color conversion if needed to get display image
+ * in RGB-format
+ *
+ * YuvToRgb( decPicture.pOutputPicture, pRgbPicture ); */
+
+ /* write next display image to output file */
+ WriteOutput(foutput, (u8*)decPicture.pOutputPicture, picSize);
+ }
+
+ /* For performance measurements, read the end time (in seconds) here.
+ *
+ * endTime = GetTime();
+ *
+ * Now the performance can be calculated as frames per second:
+ * fps = picNumber / (endTime - startTime); */
+
+
+ /* release decoder instance */
+ H264SwDecRelease(decInst);
+
+ /* close output file */
+ fclose(foutput);
+
+ /* free byte stream buffer */
+ free(byteStrmStart);
+
+ return 0;
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: WriteOutput
+
+ Purpose:
+ Write picture pointed by data to file pointed by fid. Size of the
+ picture in pixels is indicated by picSize.
+
+------------------------------------------------------------------------------*/
+void WriteOutput(FILE *fid, u8 *data, u32 picSize)
+{
+ fwrite(data, 1, picSize, fid);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecTrace
+
+ Purpose:
+ Example implementation of H264SwDecTrace function. Prototype of this
+ function is given in H264SwDecApi.h. This implementation appends
+ trace messages to file named 'dec_api.trc'.
+
+------------------------------------------------------------------------------*/
+void H264SwDecTrace(char *string)
+{
+ FILE *fp;
+
+ fp = fopen("dec_api.trc", "at");
+
+ if (!fp)
+ return;
+
+ fwrite(string, 1, strlen(string), fp);
+ fwrite("\n", 1,1, fp);
+
+ fclose(fp);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecmalloc
+
+ Purpose:
+ Example implementation of H264SwDecMalloc function. Prototype of this
+ function is given in H264SwDecApi.h. This implementation uses
+ library function malloc for allocation of memory.
+
+------------------------------------------------------------------------------*/
+void* H264SwDecMalloc(u32 size)
+{
+ return malloc(size);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecFree
+
+ Purpose:
+ Example implementation of H264SwDecFree function. Prototype of this
+ function is given in H264SwDecApi.h. This implementation uses
+ library function free for freeing of memory.
+
+------------------------------------------------------------------------------*/
+void H264SwDecFree(void *ptr)
+{
+ free(ptr);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecMemcpy
+
+ Purpose:
+ Example implementation of H264SwDecMemcpy function. Prototype of this
+ function is given in H264SwDecApi.h. This implementation uses
+ library function memcpy to copy src to dest.
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemcpy(void *dest, void *src, u32 count)
+{
+ memcpy(dest, src, count);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecMemset
+
+ Purpose:
+ Example implementation of H264SwDecMemset function. Prototype of this
+ function is given in H264SwDecApi.h. This implementation uses
+ library function memset to set content of memory area pointed by ptr.
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemset(void *ptr, i32 value, u32 count)
+{
+ memset(ptr, value, count);
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/H264SwDecApi.c b/media/libstagefright/codecs/on2/h264dec/source/H264SwDecApi.c
new file mode 100644
index 0000000..2bb4c4d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/H264SwDecApi.c
@@ -0,0 +1,567 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ H264SwDecInit
+ H264SwDecGetInfo
+ H264SwDecRelease
+ H264SwDecDecode
+ H264SwDecGetAPIVersion
+ H264SwDecNextPicture
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+#include <stdlib.h>
+#include "basetype.h"
+#include "h264bsd_container.h"
+#include "H264SwDecApi.h"
+#include "h264bsd_decoder.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+ Version Information
+------------------------------------------------------------------------------*/
+
+#define H264SWDEC_MAJOR_VERSION 2
+#define H264SWDEC_MINOR_VERSION 3
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+H264DEC_TRACE Trace H264 Decoder API function calls.
+H264DEC_EVALUATION Compile evaluation version, restricts number of frames
+ that can be decoded
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+#ifdef H264DEC_TRACE
+#include <stdio.h>
+#define DEC_API_TRC(str) H264SwDecTrace(str)
+#else
+#define DEC_API_TRC(str)
+#endif
+
+#ifdef H264DEC_EVALUATION
+#define H264DEC_EVALUATION_LIMIT 500
+#endif
+
+void H264SwDecTrace(char *string) {
+}
+
+void* H264SwDecMalloc(u32 size) {
+ return malloc(size);
+}
+
+void H264SwDecFree(void *ptr) {
+ free(ptr);
+}
+
+void H264SwDecMemcpy(void *dest, void *src, u32 count) {
+ memcpy(dest, src, count);
+}
+
+void H264SwDecMemset(void *ptr, i32 value, u32 count) {
+ memset(ptr, value, count);
+}
+
+
+/*------------------------------------------------------------------------------
+
+ Function: H264SwDecInit()
+
+ Functional description:
+ Initialize decoder software. Function reserves memory for the
+ decoder instance and calls h264bsdInit to initialize the
+ instance data.
+
+ Inputs:
+ noOutputReordering flag to indicate decoder that it doesn't have
+ to try to provide output pictures in display
+ order, saves memory
+
+ Outputs:
+ decInst pointer to initialized instance is stored here
+
+ Returns:
+ H264SWDEC_OK successfully initialized the instance
+ H264SWDEC_INITFAIL initialization failed
+ H264SWDEC_PARAM_ERR invalid parameters
+ H264SWDEC_MEM_FAIL memory allocation failed
+
+------------------------------------------------------------------------------*/
+
+H264SwDecRet H264SwDecInit(H264SwDecInst *decInst, u32 noOutputReordering)
+{
+ u32 rv = 0;
+
+ decContainer_t *pDecCont;
+
+ DEC_API_TRC("H264SwDecInit#");
+
+ /* check that right shift on negative numbers is performed signed */
+ /*lint -save -e* following check causes multiple lint messages */
+ if ( ((-1)>>1) != (-1) )
+ {
+ DEC_API_TRC("H264SwDecInit# ERROR: Right shift is not signed");
+ return(H264SWDEC_INITFAIL);
+ }
+ /*lint -restore */
+
+ if (decInst == NULL)
+ {
+ DEC_API_TRC("H264SwDecInit# ERROR: decInst == NULL");
+ return(H264SWDEC_PARAM_ERR);
+ }
+
+ pDecCont = (decContainer_t *)H264SwDecMalloc(sizeof(decContainer_t));
+
+ if (pDecCont == NULL)
+ {
+ DEC_API_TRC("H264SwDecInit# ERROR: Memory allocation failed");
+ return(H264SWDEC_MEMFAIL);
+ }
+
+#ifdef H264DEC_TRACE
+ sprintf(pDecCont->str, "H264SwDecInit# decInst %p noOutputReordering %d",
+ (void*)decInst, noOutputReordering);
+ DEC_API_TRC(pDecCont->str);
+#endif
+
+ rv = h264bsdInit(&pDecCont->storage, noOutputReordering);
+ if (rv != HANTRO_OK)
+ {
+ H264SwDecRelease(pDecCont);
+ return(H264SWDEC_MEMFAIL);
+ }
+
+ pDecCont->decStat = INITIALIZED;
+ pDecCont->picNumber = 0;
+
+#ifdef H264DEC_TRACE
+ sprintf(pDecCont->str, "H264SwDecInit# OK: return %p", (void*)pDecCont);
+ DEC_API_TRC(pDecCont->str);
+#endif
+
+ *decInst = (decContainer_t *)pDecCont;
+
+ return(H264SWDEC_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: H264SwDecGetInfo()
+
+ Functional description:
+ This function provides read access to decoder information. This
+ function should not be called before H264SwDecDecode function has
+ indicated that headers are ready.
+
+ Inputs:
+ decInst decoder instance
+
+ Outputs:
+ pDecInfo pointer to info struct where data is written
+
+ Returns:
+ H264SWDEC_OK success
+ H264SWDEC_PARAM_ERR invalid parameters
+ H264SWDEC_HDRS_NOT_RDY information not available yet
+
+------------------------------------------------------------------------------*/
+
+H264SwDecRet H264SwDecGetInfo(H264SwDecInst decInst, H264SwDecInfo *pDecInfo)
+{
+
+ storage_t *pStorage;
+
+ DEC_API_TRC("H264SwDecGetInfo#");
+
+ if (decInst == NULL || pDecInfo == NULL)
+ {
+ DEC_API_TRC("H264SwDecGetInfo# ERROR: decInst or pDecInfo is NULL");
+ return(H264SWDEC_PARAM_ERR);
+ }
+
+ pStorage = &(((decContainer_t *)decInst)->storage);
+
+ if (pStorage->activeSps == NULL || pStorage->activePps == NULL)
+ {
+ DEC_API_TRC("H264SwDecGetInfo# ERROR: Headers not decoded yet");
+ return(H264SWDEC_HDRS_NOT_RDY);
+ }
+
+#ifdef H264DEC_TRACE
+ sprintf(((decContainer_t*)decInst)->str,
+ "H264SwDecGetInfo# decInst %p pDecInfo %p", decInst, (void*)pDecInfo);
+ DEC_API_TRC(((decContainer_t*)decInst)->str);
+#endif
+
+ /* h264bsdPicWidth and -Height return dimensions in macroblock units,
+ * picWidth and -Height in pixels */
+ pDecInfo->picWidth = h264bsdPicWidth(pStorage) << 4;
+ pDecInfo->picHeight = h264bsdPicHeight(pStorage) << 4;
+ pDecInfo->videoRange = h264bsdVideoRange(pStorage);
+ pDecInfo->matrixCoefficients = h264bsdMatrixCoefficients(pStorage);
+
+ h264bsdCroppingParams(pStorage,
+ &pDecInfo->croppingFlag,
+ &pDecInfo->cropParams.cropLeftOffset,
+ &pDecInfo->cropParams.cropOutWidth,
+ &pDecInfo->cropParams.cropTopOffset,
+ &pDecInfo->cropParams.cropOutHeight);
+
+ /* sample aspect ratio */
+ h264bsdSampleAspectRatio(pStorage,
+ &pDecInfo->parWidth,
+ &pDecInfo->parHeight);
+
+ /* profile */
+ pDecInfo->profile = h264bsdProfile(pStorage);
+
+ DEC_API_TRC("H264SwDecGetInfo# OK");
+
+ return(H264SWDEC_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: H264SwDecRelease()
+
+ Functional description:
+ Release the decoder instance. Function calls h264bsdShutDown to
+ release instance data and frees the memory allocated for the
+ instance.
+
+ Inputs:
+ decInst Decoder instance
+
+ Outputs:
+ none
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void H264SwDecRelease(H264SwDecInst decInst)
+{
+
+ decContainer_t *pDecCont;
+
+ DEC_API_TRC("H264SwDecRelease#");
+
+ if (decInst == NULL)
+ {
+ DEC_API_TRC("H264SwDecRelease# ERROR: decInst == NULL");
+ return;
+ }
+
+ pDecCont = (decContainer_t*)decInst;
+
+#ifdef H264DEC_TRACE
+ sprintf(pDecCont->str, "H264SwDecRelease# decInst %p",decInst);
+ DEC_API_TRC(pDecCont->str);
+#endif
+
+ h264bsdShutdown(&pDecCont->storage);
+
+ H264SwDecFree(pDecCont);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: H264SwDecDecode
+
+ Functional description:
+ Decode stream data. Calls h264bsdDecode to do the actual decoding.
+
+ Input:
+ decInst decoder instance
+ pInput pointer to input struct
+
+ Outputs:
+ pOutput pointer to output struct
+
+ Returns:
+ H264SWDEC_NOT_INITIALIZED decoder instance not initialized yet
+ H264SWDEC_PARAM_ERR invalid parameters
+
+ H264SWDEC_STRM_PROCESSED stream buffer decoded
+ H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY headers decoded,
+ stream buffer not finished
+ H264SWDEC_PIC_RDY decoding of a picture finished
+ H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY decoding of a picture finished,
+ stream buffer not finished
+ H264SWDEC_STRM_ERR serious error in decoding, no
+ valid parameter sets available
+ to decode picture data
+ H264SWDEC_EVALUATION_LIMIT_EXCEEDED this can only occur when
+ evaluation version is used,
+ max number of frames reached
+
+------------------------------------------------------------------------------*/
+
+H264SwDecRet H264SwDecDecode(H264SwDecInst decInst, H264SwDecInput *pInput,
+ H264SwDecOutput *pOutput)
+{
+
+ decContainer_t *pDecCont;
+ u32 strmLen;
+ u32 numReadBytes;
+ u8 *tmpStream;
+ u32 decResult = 0;
+ H264SwDecRet returnValue = H264SWDEC_STRM_PROCESSED;
+
+ DEC_API_TRC("H264SwDecDecode#");
+
+ /* Check that function input parameters are valid */
+ if (pInput == NULL || pOutput == NULL)
+ {
+ DEC_API_TRC("H264SwDecDecode# ERROR: pInput or pOutput is NULL");
+ return(H264SWDEC_PARAM_ERR);
+ }
+
+ if ((pInput->pStream == NULL) || (pInput->dataLen == 0))
+ {
+ DEC_API_TRC("H264SwDecDecode# ERROR: Invalid input parameters");
+ return(H264SWDEC_PARAM_ERR);
+ }
+
+ pDecCont = (decContainer_t *)decInst;
+
+ /* Check if decoder is in an incorrect mode */
+ if (decInst == NULL || pDecCont->decStat == UNINITIALIZED)
+ {
+ DEC_API_TRC("H264SwDecDecode# ERROR: Decoder not initialized");
+ return(H264SWDEC_NOT_INITIALIZED);
+ }
+
+#ifdef H264DEC_EVALUATION
+ if (pDecCont->picNumber >= H264DEC_EVALUATION_LIMIT)
+ return(H264SWDEC_EVALUATION_LIMIT_EXCEEDED);
+#endif
+
+#ifdef H264DEC_TRACE
+ sprintf(pDecCont->str, "H264SwDecDecode# decInst %p pInput %p pOutput %p",
+ decInst, (void*)pInput, (void*)pOutput);
+ DEC_API_TRC(pDecCont->str);
+#endif
+
+ pOutput->pStrmCurrPos = NULL;
+
+ numReadBytes = 0;
+ strmLen = pInput->dataLen;
+ tmpStream = pInput->pStream;
+ pDecCont->storage.intraConcealmentFlag = pInput->intraConcealmentMethod;
+
+ do
+ {
+ /* Return HDRS_RDY after DPB flush caused by new SPS */
+ if (pDecCont->decStat == NEW_HEADERS)
+ {
+ decResult = H264BSD_HDRS_RDY;
+ pDecCont->decStat = INITIALIZED;
+ }
+ else /* Continue decoding normally */
+ {
+ decResult = h264bsdDecode(&pDecCont->storage, tmpStream, strmLen,
+ pInput->picId, &numReadBytes);
+ }
+ tmpStream += numReadBytes;
+ /* check if too many bytes are read from stream */
+ if ( (i32)(strmLen - numReadBytes) >= 0 )
+ strmLen -= numReadBytes;
+ else
+ strmLen = 0;
+
+ pOutput->pStrmCurrPos = tmpStream;
+
+ switch (decResult)
+ {
+ case H264BSD_HDRS_RDY:
+
+ if(pDecCont->storage.dpb->flushed &&
+ pDecCont->storage.dpb->numOut !=
+ pDecCont->storage.dpb->outIndex)
+ {
+ /* output first all DPB stored pictures
+ * DPB flush caused by new SPS */
+ pDecCont->storage.dpb->flushed = 0;
+ pDecCont->decStat = NEW_HEADERS;
+ returnValue = H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY;
+ strmLen = 0;
+ }
+ else
+ {
+ returnValue = H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY;
+ strmLen = 0;
+ }
+ break;
+
+ case H264BSD_PIC_RDY:
+ pDecCont->picNumber++;
+
+ if (strmLen == 0)
+ returnValue = H264SWDEC_PIC_RDY;
+ else
+ returnValue = H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY;
+
+ strmLen = 0;
+ break;
+
+ case H264BSD_PARAM_SET_ERROR:
+ if ( !h264bsdCheckValidParamSets(&pDecCont->storage) &&
+ strmLen == 0 )
+ {
+ returnValue = H264SWDEC_STRM_ERR;
+ }
+ break;
+ case H264BSD_MEMALLOC_ERROR:
+ {
+ returnValue = H264SWDEC_MEMFAIL;
+ strmLen = 0;
+ }
+ break;
+ default:
+ break;
+ }
+
+ } while (strmLen);
+
+#ifdef H264DEC_TRACE
+ sprintf(pDecCont->str, "H264SwDecDecode# OK: DecResult %d",
+ returnValue);
+ DEC_API_TRC(pDecCont->str);
+#endif
+
+ return(returnValue);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: H264SwDecGetAPIVersion
+
+ Functional description:
+ Return version information of the API
+
+ Inputs:
+ none
+
+ Outputs:
+ none
+
+ Returns:
+ API version
+
+------------------------------------------------------------------------------*/
+
+H264SwDecApiVersion H264SwDecGetAPIVersion()
+{
+ H264SwDecApiVersion ver;
+
+ ver.major = H264SWDEC_MAJOR_VERSION;
+ ver.minor = H264SWDEC_MINOR_VERSION;
+
+ return(ver);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: H264SwDecNextPicture
+
+ Functional description:
+ Get next picture in display order if any available.
+
+ Input:
+ decInst decoder instance.
+ flushBuffer force output of all buffered pictures
+
+ Output:
+ pOutput pointer to output structure
+
+ Returns:
+ H264SWDEC_OK no pictures available for display
+ H264SWDEC_PIC_RDY picture available for display
+ H264SWDEC_PARAM_ERR invalid parameters
+
+------------------------------------------------------------------------------*/
+
+H264SwDecRet H264SwDecNextPicture(H264SwDecInst decInst,
+ H264SwDecPicture *pOutput, u32 flushBuffer)
+{
+
+ decContainer_t *pDecCont;
+ u32 numErrMbs, isIdrPic, picId;
+ u32 *pOutPic;
+
+ DEC_API_TRC("H264SwDecNextPicture#");
+
+ if (decInst == NULL || pOutput == NULL)
+ {
+ DEC_API_TRC("H264SwDecNextPicture# ERROR: decInst or pOutput is NULL");
+ return(H264SWDEC_PARAM_ERR);
+ }
+
+ pDecCont = (decContainer_t*)decInst;
+
+#ifdef H264DEC_TRACE
+ sprintf(pDecCont->str, "H264SwDecNextPicture# decInst %p pOutput %p %s %d",
+ decInst, (void*)pOutput, "flushBuffer", flushBuffer);
+ DEC_API_TRC(pDecCont->str);
+#endif
+
+ if (flushBuffer)
+ h264bsdFlushBuffer(&pDecCont->storage);
+
+ pOutPic = (u32*)h264bsdNextOutputPicture(&pDecCont->storage, &picId,
+ &isIdrPic, &numErrMbs);
+
+ if (pOutPic == NULL)
+ {
+ DEC_API_TRC("H264SwDecNextPicture# OK: return H264SWDEC_OK");
+ return(H264SWDEC_OK);
+ }
+ else
+ {
+ pOutput->pOutputPicture = pOutPic;
+ pOutput->picId = picId;
+ pOutput->isIdrPicture = isIdrPic;
+ pOutput->nbrOfErrMBs = numErrMbs;
+ DEC_API_TRC("H264SwDecNextPicture# OK: return H264SWDEC_PIC_RDY");
+ return(H264SWDEC_PIC_RDY);
+ }
+
+}
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/TestBenchMultipleInstance.c b/media/libstagefright/codecs/on2/h264dec/source/TestBenchMultipleInstance.c
new file mode 100755
index 0000000..42170d3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/TestBenchMultipleInstance.c
@@ -0,0 +1,531 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* CVS tag name for identification */
+const char tagName[256] = "$Name: FIRST_ANDROID_COPYRIGHT $";
+
+#include "H264SwDecApi.h"
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#define DEBUG(argv) printf argv
+
+/* _NO_OUT disables output file writing */
+#ifdef __arm
+#define _NO_OUT
+#endif
+
+/*------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------*/
+void WriteOutput(FILE *fid, u8 *data, u32 picSize);
+
+u32 CropPicture(u8 *pOutImage, u8 *pInImage,
+ u32 picWidth, u32 picHeight, CropParams *pCropParams);
+
+void CropWriteOutput(FILE *fid, u8 *imageData, u32 cropDisplay,
+ H264SwDecInfo *decInfo);
+
+typedef struct
+{
+ H264SwDecInst decInst;
+ H264SwDecInput decInput;
+ H264SwDecOutput decOutput;
+ H264SwDecPicture decPicture;
+ H264SwDecInfo decInfo;
+ FILE *foutput;
+ char outFileName[256];
+ u8 *byteStrmStart;
+ u32 picNumber;
+} Decoder;
+
+
+/*------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------*/
+int main(int argc, char **argv)
+{
+
+ i32 instCount, instRunning;
+ i32 i;
+ u32 maxNumPics;
+ u32 strmLen;
+ H264SwDecRet ret;
+ u32 numErrors = 0;
+ u32 cropDisplay = 0;
+ u32 disableOutputReordering = 0;
+ FILE *finput;
+ Decoder **decoder;
+ char outFileName[256] = "out.yuv";
+
+
+ if ( argc > 1 && strcmp(argv[1], "-T") == 0 )
+ {
+ fprintf(stderr, "%s\n", tagName);
+ return 0;
+ }
+
+ if (argc < 2)
+ {
+ DEBUG((
+ "Usage: %s [-Nn] [-Ooutfile] [-P] [-U] [-C] [-R] [-T] file1.264 [file2.264] .. [fileN.264]\n",
+ argv[0]));
+ DEBUG(("\t-Nn forces decoding to stop after n pictures\n"));
+#if defined(_NO_OUT)
+ DEBUG(("\t-Ooutfile output writing disabled at compile time\n"));
+#else
+ DEBUG(("\t-Ooutfile write output to \"outfile\" (default out.yuv)\n"));
+ DEBUG(("\t-Onone does not write output\n"));
+#endif
+ DEBUG(("\t-C display cropped image (default decoded image)\n"));
+ DEBUG(("\t-R disable DPB output reordering\n"));
+ DEBUG(("\t-T to print tag name and exit\n"));
+ exit(100);
+ }
+
+ instCount = argc - 1;
+
+ /* read command line arguments */
+ maxNumPics = 0;
+ for (i = 1; i < (argc-1); i++)
+ {
+ if ( strncmp(argv[i], "-N", 2) == 0 )
+ {
+ maxNumPics = (u32)atoi(argv[i]+2);
+ instCount--;
+ }
+ else if ( strncmp(argv[i], "-O", 2) == 0 )
+ {
+ strcpy(outFileName, argv[i]+2);
+ instCount--;
+ }
+ else if ( strcmp(argv[i], "-C") == 0 )
+ {
+ cropDisplay = 1;
+ instCount--;
+ }
+ else if ( strcmp(argv[i], "-R") == 0 )
+ {
+ disableOutputReordering = 1;
+ instCount--;
+ }
+ }
+
+ if (instCount < 1)
+ {
+ DEBUG(("No input files\n"));
+ exit(100);
+ }
+
+ /* allocate memory for multiple decoder instances
+ * one instance for every stream file */
+ decoder = (Decoder **)malloc(sizeof(Decoder*)*(u32)instCount);
+ if (decoder == NULL)
+ {
+ DEBUG(("Unable to allocate memory\n"));
+ exit(100);
+ }
+
+ /* prepare each decoder instance */
+ for (i = 0; i < instCount; i++)
+ {
+ decoder[i] = (Decoder *)calloc(1, sizeof(Decoder));
+
+ /* open input file */
+ finput = fopen(argv[argc-instCount+i],"rb");
+ if (finput == NULL)
+ {
+ DEBUG(("Unable to open input file <%s>\n", argv[argc-instCount+i]));
+ exit(100);
+ }
+
+ DEBUG(("Reading input file[%d] %s\n", i, argv[argc-instCount+i]));
+
+ /* read input stream to buffer */
+ fseek(finput,0L,SEEK_END);
+ strmLen = (u32)ftell(finput);
+ rewind(finput);
+ decoder[i]->byteStrmStart = (u8 *)malloc(sizeof(u8)*strmLen);
+ if (decoder[i]->byteStrmStart == NULL)
+ {
+ DEBUG(("Unable to allocate memory\n"));
+ exit(100);
+ }
+ fread(decoder[i]->byteStrmStart, sizeof(u8), strmLen, finput);
+ fclose(finput);
+
+ /* open output file */
+ if (strcmp(outFileName, "none") != 0)
+ {
+#if defined(_NO_OUT)
+ decoder[i]->foutput = NULL;
+#else
+ sprintf(decoder[i]->outFileName, "%s%i", outFileName, i);
+ decoder[i]->foutput = fopen(decoder[i]->outFileName, "wb");
+ if (decoder[i]->foutput == NULL)
+ {
+ DEBUG(("Unable to open output file\n"));
+ exit(100);
+ }
+#endif
+ }
+
+ ret = H264SwDecInit(&(decoder[i]->decInst), disableOutputReordering);
+
+ if (ret != H264SWDEC_OK)
+ {
+ DEBUG(("Init failed %d\n", ret));
+ exit(100);
+ }
+
+ decoder[i]->decInput.pStream = decoder[i]->byteStrmStart;
+ decoder[i]->decInput.dataLen = strmLen;
+ decoder[i]->decInput.intraConcealmentMethod = 0;
+
+ }
+
+ /* main decoding loop */
+ do
+ {
+ /* decode once using each instance */
+ for (i = 0; i < instCount; i++)
+ {
+ ret = H264SwDecDecode(decoder[i]->decInst,
+ &(decoder[i]->decInput),
+ &(decoder[i]->decOutput));
+
+ switch(ret)
+ {
+
+ case H264SWDEC_HDRS_RDY_BUFF_NOT_EMPTY:
+
+ ret = H264SwDecGetInfo(decoder[i]->decInst,
+ &(decoder[i]->decInfo));
+ if (ret != H264SWDEC_OK)
+ exit(1);
+
+ if (cropDisplay && decoder[i]->decInfo.croppingFlag)
+ {
+ DEBUG(("Decoder[%d] Cropping params: (%d, %d) %dx%d\n",
+ i,
+ decoder[i]->decInfo.cropParams.cropLeftOffset,
+ decoder[i]->decInfo.cropParams.cropTopOffset,
+ decoder[i]->decInfo.cropParams.cropOutWidth,
+ decoder[i]->decInfo.cropParams.cropOutHeight));
+ }
+
+ DEBUG(("Decoder[%d] Width %d Height %d\n", i,
+ decoder[i]->decInfo.picWidth,
+ decoder[i]->decInfo.picHeight));
+
+ DEBUG(("Decoder[%d] videoRange %d, matricCoefficients %d\n",
+ i, decoder[i]->decInfo.videoRange,
+ decoder[i]->decInfo.matrixCoefficients));
+ decoder[i]->decInput.dataLen -=
+ (u32)(decoder[i]->decOutput.pStrmCurrPos -
+ decoder[i]->decInput.pStream);
+ decoder[i]->decInput.pStream =
+ decoder[i]->decOutput.pStrmCurrPos;
+ break;
+
+ case H264SWDEC_PIC_RDY_BUFF_NOT_EMPTY:
+ decoder[i]->decInput.dataLen -=
+ (u32)(decoder[i]->decOutput.pStrmCurrPos -
+ decoder[i]->decInput.pStream);
+ decoder[i]->decInput.pStream =
+ decoder[i]->decOutput.pStrmCurrPos;
+ /* fall through */
+ case H264SWDEC_PIC_RDY:
+ if (ret == H264SWDEC_PIC_RDY)
+ decoder[i]->decInput.dataLen = 0;
+
+ ret = H264SwDecGetInfo(decoder[i]->decInst,
+ &(decoder[i]->decInfo));
+ if (ret != H264SWDEC_OK)
+ exit(1);
+
+ while (H264SwDecNextPicture(decoder[i]->decInst,
+ &(decoder[i]->decPicture), 0) == H264SWDEC_PIC_RDY)
+ {
+ decoder[i]->picNumber++;
+
+ numErrors += decoder[i]->decPicture.nbrOfErrMBs;
+
+ DEBUG(("Decoder[%d] PIC %d, type %s, concealed %d\n",
+ i, decoder[i]->picNumber,
+ decoder[i]->decPicture.isIdrPicture
+ ? "IDR" : "NON-IDR",
+ decoder[i]->decPicture.nbrOfErrMBs));
+ fflush(stdout);
+
+ CropWriteOutput(decoder[i]->foutput,
+ (u8*)decoder[i]->decPicture.pOutputPicture,
+ cropDisplay, &(decoder[i]->decInfo));
+ }
+
+ if (maxNumPics && decoder[i]->picNumber == maxNumPics)
+ decoder[i]->decInput.dataLen = 0;
+ break;
+
+ case H264SWDEC_STRM_PROCESSED:
+ case H264SWDEC_STRM_ERR:
+ case H264SWDEC_PARAM_ERR:
+ decoder[i]->decInput.dataLen = 0;
+ break;
+
+ default:
+ DEBUG(("Decoder[%d] FATAL ERROR\n", i));
+ exit(10);
+ break;
+
+ }
+ }
+
+ /* check if any of the instances is still running (=has more data) */
+ instRunning = instCount;
+ for (i = 0; i < instCount; i++)
+ {
+ if (decoder[i]->decInput.dataLen == 0)
+ instRunning--;
+ }
+
+ } while (instRunning);
+
+
+ /* get last frames and close each instance */
+ for (i = 0; i < instCount; i++)
+ {
+ while (H264SwDecNextPicture(decoder[i]->decInst,
+ &(decoder[i]->decPicture), 1) == H264SWDEC_PIC_RDY)
+ {
+ decoder[i]->picNumber++;
+
+ DEBUG(("Decoder[%d] PIC %d, type %s, concealed %d\n",
+ i, decoder[i]->picNumber,
+ decoder[i]->decPicture.isIdrPicture
+ ? "IDR" : "NON-IDR",
+ decoder[i]->decPicture.nbrOfErrMBs));
+ fflush(stdout);
+
+ CropWriteOutput(decoder[i]->foutput,
+ (u8*)decoder[i]->decPicture.pOutputPicture,
+ cropDisplay, &(decoder[i]->decInfo));
+ }
+
+ H264SwDecRelease(decoder[i]->decInst);
+
+ if (decoder[i]->foutput)
+ fclose(decoder[i]->foutput);
+
+ free(decoder[i]->byteStrmStart);
+
+ free(decoder[i]);
+ }
+
+ free(decoder);
+
+ if (numErrors)
+ return 1;
+ else
+ return 0;
+
+}
+
+/*------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------*/
+void CropWriteOutput(FILE *foutput, u8 *imageData, u32 cropDisplay,
+ H264SwDecInfo *decInfo)
+{
+ u8 *tmpImage = NULL;
+ u32 tmp, picSize;
+
+ if (cropDisplay && decInfo->croppingFlag)
+ {
+ picSize = decInfo->cropParams.cropOutWidth *
+ decInfo->cropParams.cropOutHeight;
+ picSize = (3 * picSize)/2;
+ tmpImage = malloc(picSize);
+ if (tmpImage == NULL)
+ exit(1);
+ tmp = CropPicture(tmpImage, imageData,
+ decInfo->picWidth, decInfo->picHeight,
+ &(decInfo->cropParams));
+ if (tmp)
+ exit(1);
+ WriteOutput(foutput, tmpImage, picSize);
+ free(tmpImage);
+ }
+ else
+ {
+ picSize = decInfo->picWidth * decInfo->picHeight;
+ picSize = (3 * picSize)/2;
+ WriteOutput(foutput, imageData, picSize);
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+------------------------------------------------------------------------------*/
+void WriteOutput(FILE *fid, u8 *data, u32 picSize)
+{
+ if (fid)
+ fwrite(data, 1, picSize, fid);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecTrace
+
+------------------------------------------------------------------------------*/
+void H264SwDecTrace(char *string)
+{
+ FILE *fp;
+
+ fp = fopen("dec_api.trc", "at");
+
+ if (!fp)
+ return;
+
+ fwrite(string, 1, strlen(string), fp);
+ fwrite("\n", 1,1, fp);
+
+ fclose(fp);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecmalloc
+
+------------------------------------------------------------------------------*/
+void* H264SwDecMalloc(u32 size)
+{
+ return malloc(size);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecFree
+
+------------------------------------------------------------------------------*/
+void H264SwDecFree(void *ptr)
+{
+ free(ptr);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecMemcpy
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemcpy(void *dest, void *src, u32 count)
+{
+ memcpy(dest, src, count);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: H264SwDecMemset
+
+------------------------------------------------------------------------------*/
+void H264SwDecMemset(void *ptr, i32 value, u32 count)
+{
+ memset(ptr, value, count);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: CropPicture
+
+------------------------------------------------------------------------------*/
+u32 CropPicture(u8 *pOutImage, u8 *pInImage,
+ u32 picWidth, u32 picHeight, CropParams *pCropParams)
+{
+
+ u32 i, j;
+ u32 outWidth, outHeight;
+ u8 *pOut, *pIn;
+
+ if (pOutImage == NULL || pInImage == NULL || pCropParams == NULL ||
+ !picWidth || !picHeight)
+ {
+ /* due to lint warning */
+ free(pOutImage);
+ return(1);
+ }
+
+ if ( ((pCropParams->cropLeftOffset + pCropParams->cropOutWidth) >
+ picWidth ) ||
+ ((pCropParams->cropTopOffset + pCropParams->cropOutHeight) >
+ picHeight ) )
+ {
+ /* due to lint warning */
+ free(pOutImage);
+ return(1);
+ }
+
+ outWidth = pCropParams->cropOutWidth;
+ outHeight = pCropParams->cropOutHeight;
+
+ pIn = pInImage + pCropParams->cropTopOffset*picWidth +
+ pCropParams->cropLeftOffset;
+ pOut = pOutImage;
+
+ /* luma */
+ for (i = outHeight; i; i--)
+ {
+ for (j = outWidth; j; j--)
+ {
+ *pOut++ = *pIn++;
+ }
+ pIn += picWidth - outWidth;
+ }
+
+ outWidth >>= 1;
+ outHeight >>= 1;
+
+ pIn = pInImage + picWidth*picHeight +
+ pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2;
+
+ /* cb */
+ for (i = outHeight; i; i--)
+ {
+ for (j = outWidth; j; j--)
+ {
+ *pOut++ = *pIn++;
+ }
+ pIn += picWidth/2 - outWidth;
+ }
+
+ pIn = pInImage + 5*picWidth*picHeight/4 +
+ pCropParams->cropTopOffset*picWidth/4 + pCropParams->cropLeftOffset/2;
+
+ /* cr */
+ for (i = outHeight; i; i--)
+ {
+ for (j = outWidth; j; j--)
+ {
+ *pOut++ = *pIn++;
+ }
+ pIn += picWidth/2 - outWidth;
+ }
+
+ return (0);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor.s
new file mode 100755
index 0000000..634a484
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor.s
@@ -0,0 +1,298 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaHor function
+;--
+;-------------------------------------------------------------------------------
+
+
+ IF :DEF: H264DEC_WINASM
+ ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+ ELSE
+ REQUIRE8
+ PRESERVE8
+ ENDIF
+
+ AREA |.text|, CODE
+
+
+;// h264bsdInterpolateChromaHor register allocation
+
+ref RN 0
+ptrA RN 0
+
+mb RN 1
+block RN 1
+
+x0 RN 2
+count RN 2
+
+y0 RN 3
+valX RN 3
+
+width RN 4
+
+height RN 5
+tmp7 RN 5
+
+chrPW RN 6
+tmp8 RN 6
+
+tmp1 RN 7
+chrPH RN 7
+
+tmp2 RN 8
+
+tmp3 RN 9
+
+tmp4 RN 10
+
+tmp5 RN 11
+
+tmp6 RN 12
+
+c32 RN 14
+xFrac RN 14
+
+;// Function exports and imports
+
+ IMPORT h264bsdFillBlock
+
+ EXPORT h264bsdInterpolateChromaHor
+
+;// Function arguments
+;//
+;// u8 *ref, : 0xc4
+;// u8 *predPartChroma, : 0xc8
+;// i32 x0, : 0xcc
+;// i32 y0, : 0xd0
+;// u32 width, : 0xf8
+;// u32 height, : 0xfc
+;// u32 xFrac, : 0x100
+;// u32 chromaPartWidth, : 0x104
+;// u32 chromaPartHeight : 0x108
+
+h264bsdInterpolateChromaHor
+ STMFD sp!, {r0-r11,lr}
+ SUB sp, sp, #0xc4
+
+ LDR chrPW, [sp, #0x104] ;// chromaPartWidth
+ LDR width, [sp, #0xf8] ;// width
+ CMP x0, #0
+ BLT do_fill
+
+ ADD tmp6, x0, chrPW ;// tmp6 = x0+ chromaPartWidth
+ ADD tmp6, tmp6, #1 ;// tmp6 = x0 + chromaPartWidth + 1
+ CMP tmp6, width ;// x0+chromaPartWidth+1 > width
+ BHI do_fill
+
+ CMP y0, #0
+ BLT do_fill
+ LDR chrPH, [sp, #0x108] ;// chromaPartHeight
+ LDR height, [sp, #0xfc] ;// height
+ ADD tmp6, y0, chrPH ;// tmp6 = y0 + chromaPartHeight
+ CMP tmp6, height
+ BLS skip_fill
+
+do_fill
+ LDR chrPH, [sp, #0x108] ;// chromaPartHeight
+ LDR height, [sp, #0xfc] ;// height
+ ADD tmp8, chrPW, #1 ;// tmp8 = chromaPartWidth+1
+ MOV tmp2, tmp8 ;// tmp2 = chromaPartWidth+1
+ STMIA sp,{width,height,tmp8,chrPH,tmp2}
+ ADD block, sp, #0x1c ;// block
+ BL h264bsdFillBlock
+
+ LDR x0, [sp, #0xcc]
+ LDR y0, [sp, #0xd0]
+ LDR ref, [sp, #0xc4] ;// ref
+ STMIA sp,{width,height,tmp8,chrPH,tmp2}
+ ADD block, sp, #0x1c ;// block
+ MLA ref, height, width, ref ;// ref += width * height;
+ MLA block, chrPH, tmp8, block;// block + (chromaPH)*(chromaPW+1)
+ BL h264bsdFillBlock
+
+ MOV x0, #0 ;// x0 = 0
+ MOV y0, #0 ;// y0 = 0
+ STR x0, [sp, #0xcc]
+ STR y0, [sp, #0xd0]
+ ADD ref, sp, #0x1c ;// ref = block
+ STR ref, [sp, #0xc4] ;// ref
+
+ STR chrPH, [sp, #0xfc] ;// height
+ STR tmp8, [sp, #0xf8] ;// width
+ MOV width, tmp8
+ SUB chrPW, chrPW, #1
+
+skip_fill
+ MLA tmp3, y0, width, x0 ;// tmp3 = y0*width+x0
+ LDR xFrac, [sp, #0x100] ;// xFrac
+ ADD ptrA, ref, tmp3 ;// ptrA = ref + y0*width+x0
+ RSB valX, xFrac, #8 ;// valX = 8-xFrac
+
+ LDR mb, [sp, #0xc8] ;// predPartChroma
+
+
+ ;// pack values to count register
+ ;// [31:28] loop_x (chromaPartWidth-1)
+ ;// [27:24] loop_y (chromaPartHeight-1)
+ ;// [23:20] chromaPartWidth-1
+ ;// [19:16] chromaPartHeight-1
+ ;// [15:00] nothing
+
+ SUB tmp2, chrPH, #1 ;// chromaPartHeight-1
+ SUB tmp1, chrPW, #1 ;// chromaPartWidth-1
+ ADD count, count, tmp2, LSL #16 ;// chromaPartHeight-1
+ ADD count, count, tmp2, LSL #24 ;// loop_y
+ ADD count, count, tmp1, LSL #20 ;// chromaPartWidth-1
+ AND tmp2, count, #0x00F00000 ;// loop_x
+ PKHBT valX, valX, xFrac, LSL #16 ;// |xFrac|valX |
+ MOV valX, valX, LSL #3 ;// multiply by 8 in advance
+ MOV c32, #32
+
+
+ ;///////////////////////////////////////////////////////////////////////////
+ ;// Cb
+ ;///////////////////////////////////////////////////////////////////////////
+
+ ;// 2x2 pels per iteration
+ ;// bilinear vertical interpolation
+
+loop1_y
+ ADD count, count, tmp2, LSL #8
+ LDRB tmp1, [ptrA, width]
+ LDRB tmp2, [ptrA], #1
+
+loop1_x
+ LDRB tmp3, [ptrA, width]
+ LDRB tmp4, [ptrA], #1
+
+ PKHBT tmp5, tmp1, tmp3, LSL #16
+ PKHBT tmp6, tmp2, tmp4, LSL #16
+
+ LDRB tmp1, [ptrA, width]
+ LDRB tmp2, [ptrA], #1
+
+ SMLAD tmp5, tmp5, valX, c32 ;// multiply
+ SMLAD tmp6, tmp6, valX, c32 ;// multiply
+
+ PKHBT tmp7, tmp3, tmp1, LSL #16
+ PKHBT tmp8, tmp4, tmp2, LSL #16
+
+ SMLAD tmp7, tmp7, valX, c32 ;// multiply
+ SMLAD tmp8, tmp8, valX, c32 ;// multiply
+
+ MOV tmp5, tmp5, LSR #6 ;// scale down
+ STRB tmp5, [mb,#8] ;// store row 2 col 1
+
+ MOV tmp6, tmp6, LSR #6 ;// scale down
+ STRB tmp6, [mb],#1 ;// store row 1 col 1
+
+ MOV tmp7, tmp7, LSR #6 ;// scale down
+ STRB tmp7, [mb,#8] ;// store row 2 col 2
+
+ MOV tmp8, tmp8, LSR #6 ;// scale down
+ STRB tmp8, [mb],#1 ;// store row 1 col 2
+
+ SUBS count, count, #2<<28
+ BCS loop1_x
+
+ AND tmp2, count, #0x00F00000
+
+ ADDS mb, mb, #16
+ SBC mb, mb, tmp2, LSR #20
+ ADD ptrA, ptrA, width, LSL #1
+ SBC ptrA, ptrA, tmp2, LSR #20
+ SUB ptrA, ptrA, #1
+
+ ADDS count, count, #0xE << 24
+ BGE loop1_y
+
+ ;///////////////////////////////////////////////////////////////////////////
+ ;// Cr
+ ;///////////////////////////////////////////////////////////////////////////
+ LDR height, [sp,#0xfc] ;// height
+ LDR ref, [sp, #0xc4] ;// ref
+ LDR tmp1, [sp, #0xd0] ;// y0
+ LDR tmp2, [sp, #0xcc] ;// x0
+ LDR mb, [sp, #0xc8] ;// predPartChroma
+
+ ADD tmp1, height, tmp1
+ MLA tmp3, tmp1, width, tmp2
+ ADD ptrA, ref, tmp3
+ ADD mb, mb, #64
+
+ AND count, count, #0x00FFFFFF
+ AND tmp1, count, #0x000F0000
+ ADD count, count, tmp1, LSL #8
+ AND tmp2, count, #0x00F00000
+
+ ;// 2x2 pels per iteration
+ ;// bilinear vertical interpolation
+loop2_y
+ ADD count, count, tmp2, LSL #8
+ LDRB tmp1, [ptrA, width]
+ LDRB tmp2, [ptrA], #1
+
+loop2_x
+ LDRB tmp3, [ptrA, width]
+ LDRB tmp4, [ptrA], #1
+
+ PKHBT tmp5, tmp1, tmp3, LSL #16
+ PKHBT tmp6, tmp2, tmp4, LSL #16
+
+ LDRB tmp1, [ptrA, width]
+ LDRB tmp2, [ptrA], #1
+
+ SMLAD tmp5, tmp5, valX, c32 ;// multiply
+ SMLAD tmp6, tmp6, valX, c32 ;// multiply
+
+ PKHBT tmp7, tmp3, tmp1, LSL #16
+ PKHBT tmp8, tmp4, tmp2, LSL #16
+
+ SMLAD tmp7, tmp7, valX, c32 ;// multiply
+ SMLAD tmp8, tmp8, valX, c32 ;// multiply
+
+ MOV tmp5, tmp5, LSR #6 ;// scale down
+ STRB tmp5, [mb,#8] ;// store row 2 col 1
+
+ MOV tmp6, tmp6, LSR #6 ;// scale down
+ STRB tmp6, [mb],#1 ;// store row 1 col 1
+
+ MOV tmp7, tmp7, LSR #6 ;// scale down
+ STRB tmp7, [mb,#8] ;// store row 2 col 2
+
+ MOV tmp8, tmp8, LSR #6 ;// scale down
+ STRB tmp8, [mb],#1 ;// store row 1 col 2
+
+ SUBS count, count, #2<<28
+ BCS loop2_x
+
+ AND tmp2, count, #0x00F00000
+
+ ADDS mb, mb, #16
+ SBC mb, mb, tmp2, LSR #20
+ ADD ptrA, ptrA, width, LSL #1
+ SBC ptrA, ptrA, tmp2, LSR #20
+ SUB ptrA, ptrA, #1
+
+ ADDS count, count, #0xE << 24
+ BGE loop2_y
+
+ ADD sp,sp,#0xd4
+ LDMFD sp!, {r4-r11,pc}
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s
new file mode 100755
index 0000000..7420ad3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s
@@ -0,0 +1,339 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaHorVer
+;-- function
+;--
+;-------------------------------------------------------------------------------
+
+
+ IF :DEF: H264DEC_WINASM
+ ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+ ELSE
+ REQUIRE8
+ PRESERVE8
+ ENDIF
+
+ AREA |.text|, CODE
+
+
+;// h264bsdInterpolateChromaHorVer register allocation
+
+ref RN 0
+ptrA RN 0
+
+mb RN 1
+block RN 1
+
+x0 RN 2
+count RN 2
+
+y0 RN 3
+valY RN 3
+
+width RN 4
+
+tmp4 RN 5
+height RN 5
+
+tmp1 RN 6
+
+tmp2 RN 7
+
+tmp3 RN 8
+
+valX RN 9
+
+tmp5 RN 10
+chrPW RN 10
+
+tmp6 RN 11
+chrPH RN 11
+
+xFrac RN 12
+
+c32 RN 14
+yFrac RN 14
+
+;// function exports and imports
+
+ IMPORT h264bsdFillBlock
+
+ EXPORT h264bsdInterpolateChromaHorVer
+
+;// Function arguments
+;//
+;// u8 *ref, : 0xc4
+;// u8 *predPartChroma, : 0xc8
+;// i32 x0, : 0xcc
+;// i32 y0, : 0xd0
+;// u32 width, : 0xf8
+;// u32 height, : 0xfc
+;// u32 xFrac, : 0x100
+;// u32 yFrac, : 0x104
+;// u32 chromaPartWidth, : 0x108
+;// u32 chromaPartHeight : 0x10c
+
+h264bsdInterpolateChromaHorVer
+ STMFD sp!, {r0-r11,lr}
+ SUB sp, sp, #0xc4
+
+ LDR chrPW, [sp, #0x108] ;// chromaPartWidth
+ LDR xFrac, [sp, #0x100] ;// xFrac
+ LDR width, [sp, #0xf8] ;// width
+ CMP x0, #0
+ BLT do_fill
+
+ ADD tmp1, x0, chrPW ;// tmp1 = x0+ chromaPartWidth
+ ADD tmp1, tmp1, #1 ;// tmp1 = x0+ chromaPartWidth+1
+ CMP tmp1, width ;// x0+chromaPartWidth+1 > width
+ BHI do_fill
+
+ CMP y0, #0
+ BLT do_fill
+ LDR chrPH, [sp, #0x10c] ;// chromaPartHeight
+ LDR height, [sp, #0xfc] ;// height
+ ADD tmp1, y0, chrPH ;// tmp1 = y0 + chromaPartHeight
+ ADD tmp1, tmp1, #1 ;// tmp1 = y0 + chromaPartHeight + 1
+ CMP tmp1, height
+ BLS skip_fill
+
+do_fill
+ LDR chrPH, [sp, #0x10c] ;// chromaPartHeight
+ LDR height, [sp, #0xfc] ;// height
+ ADD tmp3, chrPW, #1 ;// tmp3 = chromaPartWidth+1
+ ADD tmp1, chrPW, #1 ;// tmp1 = chromaPartWidth+1
+ ADD tmp2, chrPH, #1 ;// tmp2 = chromaPartHeight+1
+ STMIA sp,{width,height,tmp1,tmp2,tmp3}
+ ADD block, sp, #0x1c ;// block
+ BL h264bsdFillBlock
+
+ LDR x0, [sp, #0xcc]
+ LDR y0, [sp, #0xd0]
+ LDR ref, [sp, #0xc4] ;// ref
+ STMIA sp,{width,height,tmp1,tmp2,tmp3}
+ ADD block, sp, #0x1c ;// block
+ MLA ref, height, width, ref ;// ref += width * height;
+ MLA block, tmp2, tmp1, block;// block + (chromaPW+1)*(chromaPH+1)
+ BL h264bsdFillBlock
+
+ MOV x0, #0 ;// x0 = 0
+ MOV y0, #0 ;// y0 = 0
+ STR x0, [sp, #0xcc]
+ STR y0, [sp, #0xd0]
+ ADD ref, sp, #0x1c ;// ref = block
+ STR ref, [sp, #0xc4] ;// ref
+
+ STR tmp2, [sp, #0xfc] ;// height
+ STR tmp1, [sp, #0xf8] ;// width
+ MOV width, tmp1
+
+skip_fill
+ MLA tmp3, y0, width, x0 ;// tmp3 = y0*width+x0
+ LDR yFrac, [sp, #0x104] ;// yFrac
+ LDR xFrac, [sp, #0x100]
+ ADD ptrA, ref, tmp3 ;// ptrA = ref + y0*width+x0
+ RSB valX, xFrac, #8 ;// valX = 8-xFrac
+ RSB valY, yFrac, #8 ;// valY = 8-yFrac
+
+ LDR mb, [sp, #0xc8] ;// predPartChroma
+
+
+ ;// pack values to count register
+ ;// [31:28] loop_x (chromaPartWidth-1)
+ ;// [27:24] loop_y (chromaPartHeight-1)
+ ;// [23:20] chromaPartWidth-1
+ ;// [19:16] chromaPartHeight-1
+ ;// [15:00] nothing
+
+ SUB tmp2, chrPH, #1 ;// chromaPartHeight-1
+ SUB tmp1, chrPW, #1 ;// chromaPartWidth-1
+ ADD count, count, tmp2, LSL #16 ;// chromaPartHeight-1
+ ADD count, count, tmp2, LSL #24 ;// loop_y
+ ADD count, count, tmp1, LSL #20 ;// chromaPartWidth-1
+ AND tmp2, count, #0x00F00000 ;// loop_x
+ PKHBT valY, valY, yFrac, LSL #16 ;// |yFrac|valY |
+ MOV c32, #32
+
+
+ ;///////////////////////////////////////////////////////////////////////////
+ ;// Cb
+ ;///////////////////////////////////////////////////////////////////////////
+
+ ;// 2x2 pels per iteration
+ ;// bilinear vertical and horizontal interpolation
+
+loop1_y
+ LDRB tmp1, [ptrA]
+ LDRB tmp3, [ptrA, width]
+ LDRB tmp5, [ptrA, width, LSL #1]
+
+ PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1|
+ PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3|
+
+ SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac)
+ SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac)
+
+ ADD count, count, tmp2, LSL #8
+loop1_x
+ ;// first
+ LDRB tmp2, [ptrA, #1]!
+ LDRB tmp4, [ptrA, width]
+ LDRB tmp6, [ptrA, width, LSL #1]
+
+ PKHBT tmp2, tmp2, tmp4, LSL #16 ;// |t4|t2|
+ PKHBT tmp4, tmp4, tmp6, LSL #16 ;// |t6|t4|
+
+ SMUAD tmp2, tmp2, valY ;// t2=(t2*valY + t4*yFrac)
+ MLA tmp5, tmp1, valX, c32 ;// t5=t1*valX+32
+ MLA tmp5, tmp2, xFrac, tmp5 ;// t5=t2*xFrac+t5
+
+ SMUAD tmp4, tmp4, valY ;// t4=(t4*valY + t6*yFrac)
+ MLA tmp6, tmp3, valX, c32 ;// t3=t3*valX+32
+ MLA tmp6, tmp4, xFrac, tmp6 ;// t6=t4*xFrac+t6
+
+ MOV tmp6, tmp6, LSR #6 ;// scale down
+ STRB tmp6, [mb, #8] ;// store pixel
+ MOV tmp5, tmp5, LSR #6 ;// scale down
+ STRB tmp5, [mb], #1 ;// store pixel
+
+ ;// second
+ LDRB tmp1, [ptrA, #1]!
+ LDRB tmp3, [ptrA, width]
+ LDRB tmp5, [ptrA, width, LSL #1]
+
+ PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1|
+ PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3|
+
+ SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac)
+ MLA tmp5, tmp1, xFrac, c32 ;// t1=t1*xFrac+32
+ MLA tmp5, tmp2, valX, tmp5 ;// t5=t2*valX+t5
+
+ SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac)
+ MLA tmp6, tmp3, xFrac, c32 ;// t3=t3*xFrac+32
+ MLA tmp6, tmp4, valX, tmp6 ;// t6=t4*valX+t6
+
+ MOV tmp6, tmp6, LSR #6 ;// scale down
+ STRB tmp6, [mb, #8] ;// store pixel
+ MOV tmp5, tmp5, LSR #6 ;// scale down
+ STRB tmp5, [mb], #1 ;// store pixel
+
+ SUBS count, count, #2<<28
+ BCS loop1_x
+
+ AND tmp2, count, #0x00F00000
+
+ ADDS mb, mb, #16
+ SBC mb, mb, tmp2, LSR #20
+ ADD ptrA, ptrA, width, LSL #1
+ SBC ptrA, ptrA, tmp2, LSR #20
+
+ ADDS count, count, #0xE << 24
+ BGE loop1_y
+
+ ;///////////////////////////////////////////////////////////////////////////
+ ;// Cr
+ ;///////////////////////////////////////////////////////////////////////////
+ LDR height, [sp,#0xfc] ;// height
+ LDR ref, [sp, #0xc4] ;// ref
+ LDR tmp1, [sp, #0xd0] ;// y0
+ LDR tmp2, [sp, #0xcc] ;// x0
+ LDR mb, [sp, #0xc8] ;// predPartChroma
+
+ ADD tmp1, height, tmp1
+ MLA tmp3, tmp1, width, tmp2
+ ADD ptrA, ref, tmp3
+ ADD mb, mb, #64
+
+ AND count, count, #0x00FFFFFF
+ AND tmp1, count, #0x000F0000
+ ADD count, count, tmp1, LSL #8
+ AND tmp2, count, #0x00F00000
+
+ ;// 2x2 pels per iteration
+ ;// bilinear vertical and horizontal interpolation
+loop2_y
+ LDRB tmp1, [ptrA]
+ LDRB tmp3, [ptrA, width]
+ LDRB tmp5, [ptrA, width, LSL #1]
+
+ PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1|
+ PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3|
+
+ SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac)
+ SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac)
+
+ ADD count, count, tmp2, LSL #8
+loop2_x
+ ;// first
+ LDRB tmp2, [ptrA, #1]!
+ LDRB tmp4, [ptrA, width]
+ LDRB tmp6, [ptrA, width, LSL #1]
+
+ PKHBT tmp2, tmp2, tmp4, LSL #16 ;// |t4|t2|
+ PKHBT tmp4, tmp4, tmp6, LSL #16 ;// |t6|t4|
+
+ SMUAD tmp2, tmp2, valY ;// t2=(t2*valY + t4*yFrac)
+ MLA tmp5, tmp1, valX, c32 ;// t5=t1*valX+32
+ MLA tmp5, tmp2, xFrac, tmp5 ;// t5=t2*xFrac+t5
+
+ SMUAD tmp4, tmp4, valY ;// t4=(t4*valY + t6*yFrac)
+ MLA tmp6, tmp3, valX, c32 ;// t3=t3*valX+32
+ MLA tmp6, tmp4, xFrac, tmp6 ;// t6=t4*xFrac+t6
+
+ MOV tmp6, tmp6, LSR #6 ;// scale down
+ STRB tmp6, [mb, #8] ;// store pixel
+ MOV tmp5, tmp5, LSR #6 ;// scale down
+ STRB tmp5, [mb], #1 ;// store pixel
+
+ ;// second
+ LDRB tmp1, [ptrA, #1]!
+ LDRB tmp3, [ptrA, width]
+ LDRB tmp5, [ptrA, width, LSL #1]
+
+ PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1|
+ PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3|
+
+ SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac)
+ MLA tmp5, tmp1, xFrac, c32 ;// t1=t1*xFrac+32
+ MLA tmp5, tmp2, valX, tmp5 ;// t5=t2*valX+t5
+
+ SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac)
+ MLA tmp6, tmp3, xFrac, c32 ;// t3=t3*xFrac+32
+ MLA tmp6, tmp4, valX, tmp6 ;// t6=t4*valX+t6
+
+ MOV tmp6, tmp6, LSR #6 ;// scale down
+ STRB tmp6, [mb, #8] ;// store pixel
+ MOV tmp5, tmp5, LSR #6 ;// scale down
+ STRB tmp5, [mb], #1 ;// store pixel
+
+ SUBS count, count, #2<<28
+ BCS loop2_x
+
+ AND tmp2, count, #0x00F00000
+
+ ADDS mb, mb, #16
+ SBC mb, mb, tmp2, LSR #20
+ ADD ptrA, ptrA, width, LSL #1
+ SBC ptrA, ptrA, tmp2, LSR #20
+
+ ADDS count, count, #0xE << 24
+ BGE loop2_y
+
+ ADD sp,sp,#0xd4
+ LDMFD sp!,{r4-r11,pc}
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_ver.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_ver.s
new file mode 100755
index 0000000..af9df1b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_ver.s
@@ -0,0 +1,288 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaVer function
+;--
+;-------------------------------------------------------------------------------
+
+
+ IF :DEF: H264DEC_WINASM
+ ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+ ELSE
+ REQUIRE8
+ PRESERVE8
+ ENDIF
+
+ AREA |.text|, CODE
+
+;// h264bsdInterpolateChromaVer register allocation
+
+ref RN 0
+ptrA RN 0
+
+mb RN 1
+block RN 1
+
+x0 RN 2
+count RN 2
+
+y0 RN 3
+valY RN 3
+
+width RN 4
+
+height RN 5
+tmp7 RN 5
+
+chrPW RN 6
+tmp8 RN 6
+
+tmp1 RN 7
+
+tmp2 RN 8
+
+tmp3 RN 9
+
+tmp4 RN 10
+
+tmp5 RN 11
+chrPH RN 11
+
+tmp6 RN 12
+
+c32 RN 14
+yFrac RN 14
+
+;// Function exports and imports
+
+ IMPORT h264bsdFillBlock
+
+ EXPORT h264bsdInterpolateChromaVer
+
+;// Function arguments
+;//
+;// u8 *ref, : 0xc4
+;// u8 *predPartChroma, : 0xc8
+;// i32 x0, : 0xcc
+;// i32 y0, : 0xd0
+;// u32 width, : 0xf8
+;// u32 height, : 0xfc
+;// u32 yFrac, : 0x100
+;// u32 chromaPartWidth, : 0x104
+;// u32 chromaPartHeight : 0x108
+
+h264bsdInterpolateChromaVer
+ STMFD sp!, {r0-r11,lr}
+ SUB sp, sp, #0xc4
+
+ LDR chrPW, [sp, #0x104] ;// chromaPartWidth
+ LDR width, [sp, #0xf8] ;// width
+ CMP x0, #0
+ BLT do_fill
+
+ ADD tmp1, x0, chrPW ;// tmp1 = x0+ chromaPartWidth
+ CMP tmp1, width ;// x0+chromaPartWidth > width
+ BHI do_fill
+
+ CMP y0, #0
+ BLT do_fill
+ LDR chrPH, [sp, #0x108] ;// chromaPartHeight
+ LDR height, [sp, #0xfc] ;// height
+ ADD tmp1, y0, chrPH ;// tmp1 = y0 + chromaPartHeight
+ ADD tmp1, tmp1, #1 ;// tmp1 = y0 + chromaPartHeight + 1
+ CMP tmp1, height
+ BLS skip_fill
+
+do_fill
+ LDR chrPH, [sp, #0x108] ;// chromaPartHeight
+ LDR height, [sp, #0xfc] ;// height
+ ADD tmp1, chrPH, #1 ;// tmp1 = chromaPartHeight+1
+ MOV tmp2, chrPW ;// tmp2 = chromaPartWidth
+ STMIA sp,{width,height,chrPW,tmp1,tmp2}
+ ADD block, sp, #0x1c ;// block
+ BL h264bsdFillBlock
+
+ LDR x0, [sp, #0xcc]
+ LDR y0, [sp, #0xd0]
+ LDR ref, [sp, #0xc4] ;// ref
+ STMIA sp,{width,height,chrPW,tmp1,tmp2}
+ ADD block, sp, #0x1c ;// block
+ MLA ref, height, width, ref ;// ref += width * height;
+ MLA block, chrPW, tmp1, block;// block + (chromaPW)*(chromaPH+1)
+ BL h264bsdFillBlock
+
+ MOV x0, #0 ;// x0 = 0
+ MOV y0, #0 ;// y0 = 0
+ STR x0, [sp, #0xcc]
+ STR y0, [sp, #0xd0]
+ ADD ref, sp, #0x1c ;// ref = block
+ STR ref, [sp, #0xc4] ;// ref
+
+ STR tmp1, [sp, #0xfc] ;// height
+ STR chrPW, [sp, #0xf8] ;// width
+ MOV width, chrPW
+
+skip_fill
+ MLA tmp3, y0, width, x0 ;// tmp3 = y0*width+x0
+ LDR yFrac, [sp, #0x100] ;// yFrac
+ ADD ptrA, ref, tmp3 ;// ptrA = ref + y0*width+x0
+ RSB valY, yFrac, #8 ;// valY = 8-yFrac
+
+ LDR mb, [sp, #0xc8] ;// predPartChroma
+
+
+ ;// pack values to count register
+ ;// [31:28] loop_x (chromaPartWidth-1)
+ ;// [27:24] loop_y (chromaPartHeight-1)
+ ;// [23:20] chromaPartWidth-1
+ ;// [19:16] chromaPartHeight-1
+ ;// [15:00] nothing
+
+ SUB tmp2, chrPH, #1 ;// chromaPartHeight-1
+ SUB tmp1, chrPW, #1 ;// chromaPartWidth-1
+ ADD count, count, tmp2, LSL #16 ;// chromaPartHeight-1
+ ADD count, count, tmp2, LSL #24 ;// loop_y
+ ADD count, count, tmp1, LSL #20 ;// chromaPartWidth-1
+ AND tmp2, count, #0x00F00000 ;// loop_x
+ PKHBT valY, valY, yFrac, LSL #16 ;// |yFrac|valY |
+ MOV valY, valY, LSL #3 ;// multiply by 8 in advance
+ MOV c32, #32
+
+
+ ;///////////////////////////////////////////////////////////////////////////
+ ;// Cb
+ ;///////////////////////////////////////////////////////////////////////////
+
+ ;// 2x2 pels per iteration
+ ;// bilinear vertical interpolation
+
+loop1_y
+ ADD count, count, tmp2, LSL #8
+loop1_x
+ ;// Process 2x2 block
+ LDRB tmp2, [ptrA,width] ;// 2 row, 1 col
+ LDRB tmp3, [ptrA,width, LSL #1] ;// 3 row, 1 col
+ LDRB tmp1, [ptrA],#1 ;// 1 row, 1 col
+
+ LDRB tmp5, [ptrA,width] ;// 2 row, 2 col
+ LDRB tmp6, [ptrA,width, LSL #1] ;// 3 row, 2 col
+ LDRB tmp4, [ptrA],#1 ;// 1 row, 2 col
+
+ PKHBT tmp1, tmp1, tmp2, LSL #16 ;// |B|A|
+ PKHBT tmp2, tmp2, tmp3, LSL #16 ;// |C|B|
+ PKHBT tmp4, tmp4, tmp5, LSL #16 ;// |B|A|
+
+ SMLAD tmp7, tmp2, valY, c32 ;// multiply
+ PKHBT tmp5, tmp5, tmp6, LSL #16 ;// |C|B|
+ SMLAD tmp2, tmp1, valY, c32 ;// multiply
+ SMLAD tmp8, tmp5, valY, c32 ;// multiply
+ SMLAD tmp5, tmp4, valY, c32 ;// multiply
+
+ MOV tmp7, tmp7, LSR #6 ;// scale down
+ STRB tmp7, [mb,#8] ;// store row 2 col 1
+ MOV tmp2, tmp2, LSR #6 ;// scale down
+ STRB tmp2, [mb],#1 ;// store row 1 col 1
+
+ MOV tmp8, tmp8, LSR #6 ;// scale down
+ STRB tmp8, [mb,#8] ;// store row 2 col 2
+ MOV tmp5, tmp5, LSR #6 ;// scale down
+ STRB tmp5, [mb],#1 ;// store row 1 col 2
+
+
+ SUBS count, count, #2<<28
+ BCS loop1_x
+
+ AND tmp2, count, #0x00F00000
+
+ ADDS mb, mb, #16
+ SBC mb, mb, tmp2, LSR #20
+ ADD ptrA, ptrA, width, LSL #1
+ SBC ptrA, ptrA, tmp2, LSR #20
+
+ ADDS count, count, #0xE << 24
+ BGE loop1_y
+
+ ;///////////////////////////////////////////////////////////////////////////
+ ;// Cr
+ ;///////////////////////////////////////////////////////////////////////////
+ LDR height, [sp,#0xfc] ;// height
+ LDR ref, [sp, #0xc4] ;// ref
+ LDR tmp1, [sp, #0xd0] ;// y0
+ LDR tmp2, [sp, #0xcc] ;// x0
+ LDR mb, [sp, #0xc8] ;// predPartChroma
+
+ ADD tmp1, height, tmp1
+ MLA tmp3, tmp1, width, tmp2
+ ADD ptrA, ref, tmp3
+ ADD mb, mb, #64
+
+ AND count, count, #0x00FFFFFF
+ AND tmp1, count, #0x000F0000
+ ADD count, count, tmp1, LSL #8
+ AND tmp2, count, #0x00F00000
+
+ ;// 2x2 pels per iteration
+ ;// bilinear vertical interpolation
+loop2_y
+ ADD count, count, tmp2, LSL #8
+loop2_x
+ ;// Process 2x2 block
+ LDRB tmp2, [ptrA,width] ;// 2 row, 1 col
+ LDRB tmp3, [ptrA,width, LSL #1] ;// 3 row, 1 col
+ LDRB tmp1, [ptrA],#1 ;// 1 row, 1 col
+
+ LDRB tmp5, [ptrA,width] ;// 2 row, 2 col
+ LDRB tmp6, [ptrA,width, LSL #1] ;// 3 row, 2 col
+ LDRB tmp4, [ptrA],#1 ;// 1 row, 2 col
+
+ PKHBT tmp1, tmp1, tmp2, LSL #16 ;// |B|A|
+ PKHBT tmp2, tmp2, tmp3, LSL #16 ;// |C|B|
+ PKHBT tmp4, tmp4, tmp5, LSL #16 ;// |B|A|
+
+ SMLAD tmp7, tmp2, valY, c32 ;// multiply
+ PKHBT tmp5, tmp5, tmp6, LSL #16 ;// |C|B|
+ SMLAD tmp2, tmp1, valY, c32 ;// multiply
+ SMLAD tmp8, tmp5, valY, c32 ;// multiply
+ SMLAD tmp5, tmp4, valY, c32 ;// multiply
+
+ MOV tmp7, tmp7, LSR #6 ;// scale down
+ STRB tmp7, [mb,#8] ;// store row 2 col 1
+ MOV tmp2, tmp2, LSR #6 ;// scale down
+ STRB tmp2, [mb],#1 ;// store row 1 col 1
+
+ MOV tmp8, tmp8, LSR #6 ;// scale down
+ STRB tmp8, [mb,#8] ;// store row 2 col 2
+ MOV tmp5, tmp5, LSR #6 ;// scale down
+ STRB tmp5, [mb],#1 ;// store row 1 col 2
+
+
+ SUBS count, count, #2<<28
+ BCS loop2_x
+
+ AND tmp2, count, #0x00F00000
+
+ ADDS mb, mb, #16
+ SBC mb, mb, tmp2, LSR #20
+ ADD ptrA, ptrA, width, LSL #1
+ SBC ptrA, ptrA, tmp2, LSR #20
+
+ ADDS count, count, #0xE << 24
+ BGE loop2_y
+
+ ADD sp,sp,#0xd4
+ LDMFD sp!, {r4-r11,pc}
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_half.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_half.s
new file mode 100755
index 0000000..93968b6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_half.s
@@ -0,0 +1,251 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorHalf function
+;--
+;-------------------------------------------------------------------------------
+
+
+ IF :DEF: H264DEC_WINASM
+ ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+ ELSE
+ REQUIRE8
+ PRESERVE8
+ ENDIF
+
+ AREA |.text|, CODE
+
+;// h264bsdInterpolateHorHalf register allocation
+
+ref RN 0
+
+mb RN 1
+buff RN 1
+
+count RN 2
+x0 RN 2
+
+y0 RN 3
+x_2_0 RN 3
+
+width RN 4
+x_3_1 RN 4
+
+height RN 5
+x_6_4 RN 5
+
+partW RN 6
+x_7_5 RN 6
+
+partH RN 7
+tmp1 RN 7
+
+tmp2 RN 8
+
+tmp3 RN 9
+
+tmp4 RN 10
+
+mult_20_01 RN 11
+mult_20_m5 RN 12
+
+plus16 RN 14
+
+
+;// function exports and imports
+
+ IMPORT h264bsdFillBlock
+
+ EXPORT h264bsdInterpolateHorHalf
+
+;// Horizontal filter approach
+;//
+;// Basic idea in horizontal filtering is to adjust coefficients
+;// like below. Calculation is done with 16-bit maths.
+;//
+;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0
+;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ...
+;// y_0 = 20 1 20 -5 -5 1
+;// y_1 = -5 20 1 1 20 -5
+;// y_2 = 1 -5 -5 20 1 20
+;// y_3 = 1 20 -5 -5 20 1
+
+
+h264bsdInterpolateHorHalf
+ STMFD sp!, {r0-r11, lr}
+ SUB sp, sp, #0x1e4
+
+ CMP x0, #0
+ BLT do_fill ;// (x0 < 0)
+ LDR partW, [sp,#0x220] ;// partWidth
+ ADD tmp4, x0, partW ;// (x0+partWidth)
+ ADD tmp4, tmp4, #5 ;// (y0+partW+5)
+ LDR width, [sp,#0x218] ;// width
+ CMP tmp4, width
+ BHI do_fill ;// (x0+partW)>width
+
+ CMP y0, #0
+ BLT do_fill ;// (y0 < 0)
+ LDR partH, [sp,#0x224] ;// partHeight
+ ADD tmp2, y0, partH ;// (y0+partHeight)
+ LDR height, [sp,#0x21c] ;// height
+ CMP tmp2, height
+ BLS skip_fill ;// no overfill needed
+
+
+do_fill
+ LDR partH, [sp,#0x224] ;// partHeight
+ LDR height, [sp,#0x21c] ;// height
+ LDR partW, [sp,#0x220] ;// partWidth
+ ADD tmp4, partW, #5 ;// tmp4 = partW + 5;
+ STMIB sp, {height, tmp4} ;// sp+4 = height, sp+8 = partWidth+5
+ STR partH, [sp,#0xc] ;// sp+c = partHeight
+ STR tmp4, [sp,#0x10] ;// sp+10 = partWidth+5
+ LDR width, [sp,#0x218] ;// width
+ STR width, [sp,#0] ;// sp+0 = width
+ ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1]
+ BL h264bsdFillBlock
+
+ MOV x0, #0
+ STR x0,[sp,#0x1ec] ;// x0 = 0
+ STR x0,[sp,#0x1f0] ;// y0 = 0
+ ADD ref,sp,#0x28 ;// ref = p1
+ STR tmp4, [sp,#0x218] ;// width = partWidth+5
+
+
+skip_fill
+ LDR x0 ,[sp,#0x1ec] ;// x0
+ LDR y0 ,[sp,#0x1f0] ;// y0
+ LDR width, [sp,#0x218] ;// width
+ MLA tmp2, width, y0, x0 ;// y0*width+x0
+ ADD ref, ref, tmp2 ;// ref += y0*width+x0
+ ADD ref, ref, #8 ;// ref = ref+8
+ LDR mb, [sp, #0x1e8] ;// mb
+
+ ;// pack values to count register
+ ;// [31:28] loop_x (partWidth-1)
+ ;// [27:24] loop_y (partHeight-1)
+ ;// [23:20] partWidth-1
+ ;// [19:16] partHeight-1
+ ;// [15:00] width
+ MOV count, width
+ SUB partW, partW, #1;
+ SUB partH, partH, #1;
+ ADD tmp2, partH, partW, LSL #4
+ ADD count, count, tmp2, LSL #16
+
+
+ LDR mult_20_01, = 0x00140001
+ LDR mult_20_m5, = 0x0014FFFB
+ MOV plus16, #16
+ AND tmp1, count, #0x000F0000 ;// partHeight-1
+ AND tmp3, count, #0x00F00000 ;// partWidth-1
+ ADD count, count, tmp1, LSL #8
+loop_y
+ LDR x_3_1, [ref, #-8]
+ ADD count, count, tmp3, LSL #8
+ LDR x_7_5, [ref, #-4]
+ UXTB16 x_2_0, x_3_1
+ UXTB16 x_3_1, x_3_1, ROR #8
+ UXTB16 x_6_4, x_7_5
+
+loop_x
+ UXTB16 x_7_5, x_7_5, ROR #8
+
+ SMLAD tmp1, x_2_0, mult_20_01, plus16
+ SMLATB tmp3, x_2_0, mult_20_01, plus16
+ SMLATB tmp2, x_2_0, mult_20_m5, plus16
+ SMLATB tmp4, x_3_1, mult_20_01, plus16
+
+ SMLAD tmp1, x_3_1, mult_20_m5, tmp1
+ SMLATB tmp3, x_3_1, mult_20_m5, tmp3
+ SMLAD tmp2, x_3_1, mult_20_01, tmp2
+ LDR x_3_1, [ref], #4
+ SMLAD tmp4, x_6_4, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_6_4, mult_20_m5, tmp1
+ SMLADX tmp3, x_6_4, mult_20_m5, tmp3
+ SMLADX tmp2, x_6_4, mult_20_01, tmp2
+ SMLADX tmp4, x_7_5, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_7_5, mult_20_01, tmp1
+ UXTB16 x_2_0, x_3_1
+ SMLABB tmp2, x_7_5, mult_20_m5, tmp2
+ SMLADX tmp3, x_7_5, mult_20_01, tmp3
+ SMLABB tmp4, x_2_0, mult_20_01, tmp4
+
+ MOV tmp2, tmp2, ASR #5
+ MOV tmp1, tmp1, ASR #5
+ PKHBT tmp2, tmp2, tmp4, LSL #(16-5)
+ PKHBT tmp1, tmp1, tmp3, LSL #(16-5)
+ USAT16 tmp2, #8, tmp2
+ USAT16 tmp1, #8, tmp1
+
+ SUBS count, count, #4<<28
+ ORR tmp1, tmp1, tmp2, LSL #8
+ STR tmp1, [mb], #4
+ BCC next_y
+
+ UXTB16 x_3_1, x_3_1, ROR #8
+
+ SMLAD tmp1, x_6_4, mult_20_01, plus16
+ SMLATB tmp3, x_6_4, mult_20_01, plus16
+ SMLATB tmp2, x_6_4, mult_20_m5, plus16
+ SMLATB tmp4, x_7_5, mult_20_01, plus16
+
+ SMLAD tmp1, x_7_5, mult_20_m5, tmp1
+ SMLATB tmp3, x_7_5, mult_20_m5, tmp3
+ SMLAD tmp2, x_7_5, mult_20_01, tmp2
+ LDR x_7_5, [ref], #4
+ SMLAD tmp4, x_2_0, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_2_0, mult_20_m5, tmp1
+ SMLADX tmp3, x_2_0, mult_20_m5, tmp3
+ SMLADX tmp2, x_2_0, mult_20_01, tmp2
+ SMLADX tmp4, x_3_1, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_3_1, mult_20_01, tmp1
+ UXTB16 x_6_4, x_7_5
+ SMLABB tmp2, x_3_1, mult_20_m5, tmp2
+ SMLADX tmp3, x_3_1, mult_20_01, tmp3
+ SMLABB tmp4, x_6_4, mult_20_01, tmp4
+
+ MOV tmp2, tmp2, ASR #5
+ MOV tmp1, tmp1, ASR #5
+ PKHBT tmp2, tmp2, tmp4, LSL #(16-5)
+ PKHBT tmp1, tmp1, tmp3, LSL #(16-5)
+ USAT16 tmp2, #8, tmp2
+ USAT16 tmp1, #8, tmp1
+
+ SUBS count, count, #4<<28
+ ORR tmp1, tmp1, tmp2, LSL #8
+ STR tmp1, [mb], #4
+ BCS loop_x
+
+next_y
+ AND tmp3, count, #0x00F00000 ;// partWidth-1
+ SMLABB ref, count, mult_20_01, ref ;// +width
+ ADDS mb, mb, #16 ;// +16, Carry=0
+ SBC mb, mb, tmp3, LSR #20 ;// -(partWidth-1)-1
+ SBC ref, ref, tmp3, LSR #20 ;// -(partWidth-1)-1
+ ADDS count, count, #(1<<28)-(1<<24)
+ BGE loop_y
+
+ ADD sp,sp,#0x1f4
+ LDMFD sp!, {r4-r11, pc}
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_quarter.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_quarter.s
new file mode 100755
index 0000000..de243d4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_quarter.s
@@ -0,0 +1,273 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorQuarter function
+;--
+;-------------------------------------------------------------------------------
+
+
+ IF :DEF: H264DEC_WINASM
+ ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+ ELSE
+ REQUIRE8
+ PRESERVE8
+ ENDIF
+
+ AREA |.text|, CODE
+
+;// h264bsdInterpolateHorQuarter register allocation
+
+ref RN 0
+
+mb RN 1
+buff RN 1
+
+count RN 2
+x0 RN 2
+
+y0 RN 3
+x_2_0 RN 3
+
+width RN 4
+x_3_1 RN 4
+
+height RN 5
+x_6_4 RN 5
+
+partW RN 6
+x_7_5 RN 6
+
+partH RN 7
+tmp1 RN 7
+
+tmp2 RN 8
+
+tmp3 RN 9
+
+tmp4 RN 10
+
+mult_20_01 RN 11
+
+mult_20_m5 RN 12
+
+plus16 RN 14
+
+
+;// function exports and imports
+
+ IMPORT h264bsdFillBlock
+
+ EXPORT h264bsdInterpolateHorQuarter
+
+
+;// Horizontal filter approach
+;//
+;// Basic idea in horizontal filtering is to adjust coefficients
+;// like below. Calculation is done with 16-bit maths.
+;//
+;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0
+;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ...
+;// y_0 = 20 1 20 -5 -5 1
+;// y_1 = -5 20 1 1 20 -5
+;// y_2 = 1 -5 -5 20 1 20
+;// y_3 = 1 20 -5 -5 20 1
+
+
+h264bsdInterpolateHorQuarter
+ STMFD sp!, {r0-r11, lr}
+ SUB sp, sp, #0x1e4
+
+ CMP x0, #0
+ BLT do_fill ;// (x0 < 0)
+ LDR partW, [sp,#0x220] ;// partWidth
+ ADD tmp4, x0, partW ;// (x0+partWidth)
+ ADD tmp4, tmp4, #5 ;// (y0+partW+5)
+ LDR width, [sp,#0x218] ;// width
+ CMP tmp4, width
+ BHI do_fill ;// (x0+partW)>width
+
+ CMP y0, #0
+ BLT do_fill ;// (y0 < 0)
+ LDR partH, [sp,#0x224] ;// partHeight
+ ADD tmp2, y0, partH ;// (y0+partHeight)
+ LDR height, [sp,#0x21c] ;// height
+ CMP tmp2, height
+ BLS skip_fill ;// no overfill needed
+
+
+do_fill
+ LDR partH, [sp,#0x224] ;// partHeight
+ LDR height, [sp,#0x21c] ;// height
+ LDR partW, [sp,#0x220] ;// partWidth
+ ADD tmp4, partW, #5 ;// tmp4 = partW + 5;
+ STMIB sp, {height, tmp4} ;// sp+4 = height, sp+8 = partWidth+5
+ STR partH, [sp,#0xc] ;// sp+c = partHeight
+ STR tmp4, [sp,#0x10] ;// sp+10 = partWidth+5
+ LDR width, [sp,#0x218] ;// width
+ STR width, [sp,#0] ;// sp+0 = width
+ ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1]
+ BL h264bsdFillBlock
+
+ MOV x0, #0
+ STR x0,[sp,#0x1ec] ;// x0 = 0
+ STR x0,[sp,#0x1f0] ;// y0 = 0
+ ADD ref,sp,#0x28 ;// ref = p1
+ STR tmp4, [sp,#0x218] ;// width = partWidth+5
+
+
+skip_fill
+ LDR x0 ,[sp,#0x1ec] ;// x0
+ LDR y0 ,[sp,#0x1f0] ;// y0
+ LDR width, [sp,#0x218] ;// width
+ MLA tmp2, width, y0, x0 ;// y0*width+x0
+ ADD ref, ref, tmp2 ;// ref += y0*width+x0
+ ADD ref, ref, #8 ;// ref = ref+8
+ LDR mb, [sp, #0x1e8] ;// mb
+
+ ;// pack values to count register
+ ;// [31:28] loop_x (partWidth-1)
+ ;// [27:24] loop_y (partHeight-1)
+ ;// [23:20] partWidth-1
+ ;// [19:16] partHeight-1
+ ;// [15:00] width
+ MOV count, width
+ SUB partW, partW, #1;
+ SUB partH, partH, #1;
+ ADD tmp2, partH, partW, LSL #4
+ ADD count, count, tmp2, LSL #16
+
+
+ LDR mult_20_01, = 0x00140001
+ LDR mult_20_m5, = 0x0014FFFB
+ MOV plus16, #16
+ AND tmp1, count, #0x000F0000 ;// partHeight-1
+ AND tmp3, count, #0x00F00000 ;// partWidth-1
+ ADD count, count, tmp1, LSL #8
+loop_y
+ LDR x_3_1, [ref, #-8]
+ ADD count, count, tmp3, LSL #8
+ LDR x_7_5, [ref, #-4]
+ UXTB16 x_2_0, x_3_1
+ UXTB16 x_3_1, x_3_1, ROR #8
+ UXTB16 x_6_4, x_7_5
+
+loop_x
+ UXTB16 x_7_5, x_7_5, ROR #8
+
+ SMLAD tmp1, x_2_0, mult_20_01, plus16
+ SMLATB tmp3, x_2_0, mult_20_01, plus16
+ SMLATB tmp2, x_2_0, mult_20_m5, plus16
+ SMLATB tmp4, x_3_1, mult_20_01, plus16
+
+ SMLAD tmp1, x_3_1, mult_20_m5, tmp1
+ SMLATB tmp3, x_3_1, mult_20_m5, tmp3
+ SMLAD tmp2, x_3_1, mult_20_01, tmp2
+ LDR x_3_1, [ref], #4
+ SMLAD tmp4, x_6_4, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_6_4, mult_20_m5, tmp1
+ SMLADX tmp3, x_6_4, mult_20_m5, tmp3
+ SMLADX tmp2, x_6_4, mult_20_01, tmp2
+ SMLADX tmp4, x_7_5, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_7_5, mult_20_01, tmp1
+ UXTB16 x_2_0, x_3_1
+ SMLABB tmp2, x_7_5, mult_20_m5, tmp2
+ SMLADX tmp3, x_7_5, mult_20_01, tmp3
+ SMLABB tmp4, x_2_0, mult_20_01, tmp4
+
+ MOV tmp2, tmp2, ASR #5
+ MOV tmp1, tmp1, ASR #5
+ PKHBT tmp2, tmp2, tmp4, LSL #(16-5)
+ PKHBT tmp1, tmp1, tmp3, LSL #(16-5)
+ LDR tmp4, [sp, #0x228]
+ USAT16 tmp2, #8, tmp2
+ USAT16 tmp1, #8, tmp1
+ SUB tmp4, tmp4, #10
+
+ SUBS count, count, #4<<28
+ LDR tmp3, [ref, tmp4]
+ ORR tmp1, tmp1, tmp2, LSL #8
+
+;// quarter pel position
+ LDR tmp2, = 0x80808080
+ MVN tmp3, tmp3
+ UHSUB8 tmp1, tmp1, tmp3
+ EOR tmp1, tmp1, tmp2
+ STR tmp1, [mb], #4
+
+ BCC next_y
+
+ UXTB16 x_3_1, x_3_1, ROR #8
+
+ SMLAD tmp1, x_6_4, mult_20_01, plus16
+ SMLATB tmp3, x_6_4, mult_20_01, plus16
+ SMLATB tmp2, x_6_4, mult_20_m5, plus16
+ SMLATB tmp4, x_7_5, mult_20_01, plus16
+
+ SMLAD tmp1, x_7_5, mult_20_m5, tmp1
+ SMLATB tmp3, x_7_5, mult_20_m5, tmp3
+ SMLAD tmp2, x_7_5, mult_20_01, tmp2
+ LDR x_7_5, [ref], #4
+ SMLAD tmp4, x_2_0, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_2_0, mult_20_m5, tmp1
+ SMLADX tmp3, x_2_0, mult_20_m5, tmp3
+ SMLADX tmp2, x_2_0, mult_20_01, tmp2
+ SMLADX tmp4, x_3_1, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_3_1, mult_20_01, tmp1
+ UXTB16 x_6_4, x_7_5
+ SMLABB tmp2, x_3_1, mult_20_m5, tmp2
+ SMLADX tmp3, x_3_1, mult_20_01, tmp3
+ SMLABB tmp4, x_6_4, mult_20_01, tmp4
+
+ MOV tmp2, tmp2, ASR #5
+ MOV tmp1, tmp1, ASR #5
+ PKHBT tmp2, tmp2, tmp4, LSL #(16-5)
+ PKHBT tmp1, tmp1, tmp3, LSL #(16-5)
+ LDR tmp4, [sp, #0x228]
+ USAT16 tmp2, #8, tmp2
+ USAT16 tmp1, #8, tmp1
+ SUB tmp4, tmp4, #10
+
+ SUBS count, count, #4<<28
+ LDR tmp3, [ref, tmp4]
+ ORR tmp1, tmp1, tmp2, LSL #8
+
+;// quarter pel
+ LDR tmp2, = 0x80808080
+ MVN tmp3, tmp3
+ UHSUB8 tmp1, tmp1, tmp3
+ EOR tmp1, tmp1, tmp2
+
+ STR tmp1, [mb], #4
+ BCS loop_x
+
+next_y
+ AND tmp3, count, #0x00F00000 ;// partWidth-1
+ SMLABB ref, count, mult_20_01, ref ;// +width
+ ADDS mb, mb, #16 ;// +16, Carry=0
+ SBC mb, mb, tmp3, LSR #20 ;// -(partWidth-1)-1
+ SBC ref, ref, tmp3, LSR #20 ;// -(partWidth-1)-1
+ ADDS count, count, #(1<<28)-(1<<24)
+ BGE loop_y
+
+ ADD sp,sp,#0x1f4
+ LDMFD sp!, {r4-r11, pc}
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_ver_quarter.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_ver_quarter.s
new file mode 100755
index 0000000..1c79b39
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_hor_ver_quarter.s
@@ -0,0 +1,536 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateHorVerQuarter
+;-- function
+;--
+;-------------------------------------------------------------------------------
+
+
+ IF :DEF: H264DEC_WINASM
+ ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+ ELSE
+ REQUIRE8
+ PRESERVE8
+ ENDIF
+
+ AREA |.text|, CODE
+
+;// h264bsdInterpolateHorVerQuarter register allocation
+
+ref RN 0
+
+mb RN 1
+buff RN 1
+
+count RN 2
+x0 RN 2
+
+y0 RN 3
+x_2_0 RN 3
+res RN 3
+
+x_3_1 RN 4
+tmp1 RN 4
+
+height RN 5
+x_6_4 RN 5
+tmp2 RN 5
+
+partW RN 6
+x_7_5 RN 6
+tmp3 RN 6
+
+partH RN 7
+tmp4 RN 7
+
+tmp5 RN 8
+
+tmp6 RN 9
+
+tmpa RN 10
+
+mult_20_01 RN 11
+tmpb RN 11
+
+mult_20_m5 RN 12
+width RN 12
+
+plus16 RN 14
+
+
+;// function exports and imports
+
+ IMPORT h264bsdFillBlock
+
+ EXPORT h264bsdInterpolateHorVerQuarter
+
+;// Horizontal filter approach
+;//
+;// Basic idea in horizontal filtering is to adjust coefficients
+;// like below. Calculation is done with 16-bit maths.
+;//
+;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0
+;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ...
+;// y_0 = 20 1 20 -5 -5 1
+;// y_1 = -5 20 1 1 20 -5
+;// y_2 = 1 -5 -5 20 1 20
+;// y_3 = 1 20 -5 -5 20 1
+
+
+h264bsdInterpolateHorVerQuarter
+ STMFD sp!, {r0-r11, lr}
+ SUB sp, sp, #0x1e4
+
+ CMP x0, #0
+ BLT do_fill ;// (x0 < 0)
+ LDR partW, [sp,#0x220] ;// partWidth
+ LDR width, [sp,#0x218] ;// width
+ ADD tmpa, x0, partW ;// (x0+partWidth)
+ ADD tmpa, tmpa, #5 ;// (x0+partW+5)
+ CMP tmpa, width
+ BHI do_fill ;// (x0+partW)>width
+
+ CMP y0, #0
+ BLT do_fill ;// (y0 < 0)
+ LDR partH, [sp,#0x224] ;// partHeight
+ LDR height, [sp,#0x21c] ;// height
+ ADD tmp5, y0, partH ;// (y0+partHeight)
+ ADD tmp5, tmp5, #5 ;// (y0+partH+5)
+ CMP tmp5, height
+ BLS skip_fill ;// no overfill needed
+
+
+do_fill
+ LDR partH, [sp,#0x224] ;// partHeight
+ LDR partW, [sp,#0x220] ;// partWidth
+ LDR height, [sp,#0x21c] ;// height
+ ADD tmp5, partH, #5 ;// tmp5 = partH + 5
+ ADD tmpa, partW, #5 ;// tmpa = partW + 5
+ STMIB sp, {height, tmpa} ;// sp+4 = height, sp+8 = partWidth+5
+ LDR width, [sp,#0x218] ;// width
+ STR tmp5, [sp,#0xc] ;// sp+c = partHeight+5
+ STR tmpa, [sp,#0x10] ;// sp+10 = partWidth+5
+ STR width, [sp,#0] ;// sp+0 = width
+ ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1]
+ BL h264bsdFillBlock
+
+ MOV x0, #0
+ STR x0,[sp,#0x1ec] ;// x0 = 0
+ STR x0,[sp,#0x1f0] ;// y0 = 0
+ ADD ref,sp,#0x28 ;// ref = p1
+ STR tmpa, [sp,#0x218] ;// width = partWidth+5
+
+
+skip_fill
+ LDR x0 ,[sp,#0x1ec] ;// x0
+ LDR y0 ,[sp,#0x1f0] ;// y0
+ LDR width, [sp,#0x218] ;// width
+ LDR tmp6, [sp,#0x228] ;// horVerOffset
+ LDR mb, [sp, #0x1e8] ;// mb
+ MLA tmp5, width, y0, x0 ;// y0*width+x0
+ ADD ref, ref, tmp5 ;// ref += y0*width+x0
+ STR ref, [sp, #0x1e4] ;// store "ref" for vertical filtering
+ AND tmp6, tmp6, #2 ;// calculate ref for horizontal filter
+ MOV tmpa, #2
+ ADD tmp6, tmpa, tmp6, LSR #1
+ MLA ref, tmp6, width, ref
+ ADD ref, ref, #8 ;// ref = ref+8
+
+ ;// pack values to count register
+ ;// [31:28] loop_x (partWidth-1)
+ ;// [27:24] loop_y (partHeight-1)
+ ;// [23:20] partWidth-1
+ ;// [19:16] partHeight-1
+ ;// [15:00] width
+ MOV count, width
+ SUB partW, partW, #1;
+ SUB partH, partH, #1;
+ ADD tmp5, partH, partW, LSL #4
+ ADD count, count, tmp5, LSL #16
+
+
+ LDR mult_20_01, = 0x00140001 ;// constant multipliers
+ LDR mult_20_m5, = 0x0014FFFB ;// constant multipliers
+ MOV plus16, #16 ;// constant for add
+ AND tmp4, count, #0x000F0000 ;// partHeight-1
+ AND tmp6, count, #0x00F00000 ;// partWidth-1
+ ADD count, count, tmp4, LSL #8 ;// partH-1 to lower part of top byte
+
+;// HORIZONTAL PART
+
+loop_y_hor
+ LDR x_3_1, [ref, #-8]
+ ADD count, count, tmp6, LSL #8 ;// partW-1 to upper part of top byte
+ LDR x_7_5, [ref, #-4]
+ UXTB16 x_2_0, x_3_1
+ UXTB16 x_3_1, x_3_1, ROR #8
+ UXTB16 x_6_4, x_7_5
+
+loop_x_hor
+ UXTB16 x_7_5, x_7_5, ROR #8
+
+ SMLAD tmp4, x_2_0, mult_20_01, plus16
+ SMLATB tmp6, x_2_0, mult_20_01, plus16
+ SMLATB tmp5, x_2_0, mult_20_m5, plus16
+ SMLATB tmpa, x_3_1, mult_20_01, plus16
+
+ SMLAD tmp4, x_3_1, mult_20_m5, tmp4
+ SMLATB tmp6, x_3_1, mult_20_m5, tmp6
+ SMLAD tmp5, x_3_1, mult_20_01, tmp5
+ LDR x_3_1, [ref], #4
+ SMLAD tmpa, x_6_4, mult_20_m5, tmpa
+
+ SMLABB tmp4, x_6_4, mult_20_m5, tmp4
+ SMLADX tmp6, x_6_4, mult_20_m5, tmp6
+ SMLADX tmp5, x_6_4, mult_20_01, tmp5
+ SMLADX tmpa, x_7_5, mult_20_m5, tmpa
+
+ SMLABB tmp4, x_7_5, mult_20_01, tmp4
+ UXTB16 x_2_0, x_3_1
+ SMLABB tmp5, x_7_5, mult_20_m5, tmp5
+ SMLADX tmp6, x_7_5, mult_20_01, tmp6
+ SMLABB tmpa, x_2_0, mult_20_01, tmpa
+
+ MOV tmp5, tmp5, ASR #5
+ MOV tmp4, tmp4, ASR #5
+ PKHBT tmp5, tmp5, tmpa, LSL #(16-5)
+ PKHBT tmp4, tmp4, tmp6, LSL #(16-5)
+ USAT16 tmp5, #8, tmp5
+ USAT16 tmp4, #8, tmp4
+
+ SUBS count, count, #4<<28
+ ORR tmp4, tmp4, tmp5, LSL #8
+ STR tmp4, [mb], #4
+ BCC next_y_hor
+
+ UXTB16 x_3_1, x_3_1, ROR #8
+
+ SMLAD tmp4, x_6_4, mult_20_01, plus16
+ SMLATB tmp6, x_6_4, mult_20_01, plus16
+ SMLATB tmp5, x_6_4, mult_20_m5, plus16
+ SMLATB tmpa, x_7_5, mult_20_01, plus16
+
+ SMLAD tmp4, x_7_5, mult_20_m5, tmp4
+ SMLATB tmp6, x_7_5, mult_20_m5, tmp6
+ SMLAD tmp5, x_7_5, mult_20_01, tmp5
+ LDR x_7_5, [ref], #4
+ SMLAD tmpa, x_2_0, mult_20_m5, tmpa
+
+ SMLABB tmp4, x_2_0, mult_20_m5, tmp4
+ SMLADX tmp6, x_2_0, mult_20_m5, tmp6
+ SMLADX tmp5, x_2_0, mult_20_01, tmp5
+ SMLADX tmpa, x_3_1, mult_20_m5, tmpa
+
+ SMLABB tmp4, x_3_1, mult_20_01, tmp4
+ UXTB16 x_6_4, x_7_5
+ SMLABB tmp5, x_3_1, mult_20_m5, tmp5
+ SMLADX tmp6, x_3_1, mult_20_01, tmp6
+ SMLABB tmpa, x_6_4, mult_20_01, tmpa
+
+ MOV tmp5, tmp5, ASR #5
+ MOV tmp4, tmp4, ASR #5
+ PKHBT tmp5, tmp5, tmpa, LSL #(16-5)
+ PKHBT tmp4, tmp4, tmp6, LSL #(16-5)
+ USAT16 tmp5, #8, tmp5
+ USAT16 tmp4, #8, tmp4
+
+ SUBS count, count, #4<<28
+ ORR tmp4, tmp4, tmp5, LSL #8
+ STR tmp4, [mb], #4
+ BCS loop_x_hor
+
+next_y_hor
+ AND tmp6, count, #0x00F00000 ;// partWidth-1
+ SMLABB ref, count, mult_20_01, ref ;// +width
+ ADDS mb, mb, #16 ;// +16, Carry=0
+ SBC mb, mb, tmp6, LSR #20 ;// -(partWidth-1)-1
+ SBC ref, ref, tmp6, LSR #20 ;// -(partWidth-1)-1
+ ADDS count, count, #(1<<28)-(1<<24) ;// decrement counter (partW)
+ BGE loop_y_hor
+
+
+
+;// VERTICAL PART
+;//
+;// Approach to vertical interpolation
+;//
+;// Interpolation is done by using 32-bit loads and stores
+;// and by using 16 bit arithmetic. 4x4 block is processed
+;// in each round.
+;//
+;// |a_11|a_11|a_11|a_11|...|a_1n|a_1n|a_1n|a_1n|
+;// |b_11|b_11|b_11|b_11|...|b_1n|b_1n|b_1n|b_1n|
+;// |c_11|c_11|c_11|c_11|...|c_1n|c_1n|c_1n|c_1n|
+;// |d_11|d_11|d_11|d_11|...|d_1n|d_1n|d_1n|d_1n|
+;// ..
+;// ..
+;// |a_m1|a_m1|a_m1|a_m1|...
+;// |b_m1|b_m1|b_m1|b_m1|...
+;// |c_m1|c_m1|c_m1|c_m1|...
+;// |d_m1|d_m1|d_m1|d_m1|...
+
+;// Approach to bilinear interpolation to quarter pel position.
+;// 4 bytes are processed parallel
+;//
+;// algorithm (a+b+1)/2. Rouding upwards +1 can be achieved by
+;// negating second operand to get one's complement (instead of 2's)
+;// and using subtraction, EOR is used to correct sign.
+;//
+;// MVN b, b
+;// UHSUB8 a, a, b
+;// EOR a, a, 0x80808080
+
+
+ LDR ref, [sp, #0x1e4] ;// ref
+ LDR tmpa, [sp, #0x228] ;// horVerOffset
+ LDR mb, [sp, #0x1e8] ;// mb
+ LDR width, [sp, #0x218] ;// width
+ ADD ref, ref, #2 ;// calculate correct position
+ AND tmpa, tmpa, #1
+ ADD ref, ref, tmpa
+ LDR plus16, = 0x00100010 ;// +16 to lower and upperf halfwords
+ AND count, count, #0x00FFFFFF ;// partWidth-1
+
+ AND tmpa, count, #0x000F0000 ;// partHeight-1
+ ADD count, count, tmpa, LSL #8
+
+loop_y
+ ADD count, count, tmp6, LSL #8 ;// partWidth-1
+
+loop_x
+ LDR tmp1, [ref], width ;// |a4|a3|a2|a1|
+ LDR tmp2, [ref], width ;// |c4|c3|c2|c1|
+ LDR tmp3, [ref], width ;// |g4|g3|g2|g1|
+ LDR tmp4, [ref], width ;// |m4|m3|m2|m1|
+ LDR tmp5, [ref], width ;// |r4|r3|r2|r1|
+ LDR tmp6, [ref], width ;// |t4|t3|t2|t1|
+
+ ;// first four pixels
+ UXTB16 tmpa, tmp3 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp4 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp2 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+
+ UXTAB16 tmpb, tmpb, tmp5 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp6 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp3, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp2, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp5, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmp1, [mb]
+ LDR tmpa, = 0xFF00FF00
+ MVN tmp1, tmp1
+ AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divede by 32
+ ORR res, res, tmpa
+
+ LDR tmpa, = 0x80808080
+ UHSUB8 res, res, tmp1 ;// bilinear interpolation
+ LDR tmp1, [ref], width ;// load next row
+ EOR res, res, tmpa ;// correct sign
+
+ STR res, [mb], #16 ;// next row (mb)
+
+
+ ;// tmp2 = |a4|a3|a2|a1|
+ ;// tmp3 = |c4|c3|c2|c1|
+ ;// tmp4 = |g4|g3|g2|g1|
+ ;// tmp5 = |m4|m3|m2|m1|
+ ;// tmp6 = |r4|r3|r2|r1|
+ ;// tmp1 = |t4|t3|t2|t1|
+
+ ;// second four pixels
+ UXTB16 tmpa, tmp4 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp5 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp3 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTAB16 tmpb, tmpb, tmp6 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp4, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp5, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp3, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp6, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmp2, [mb]
+ LDR tmpa, = 0xFF00FF00
+ MVN tmp2, tmp2
+
+ AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32
+ ORR res, res, tmpa
+ LDR tmpa, = 0x80808080
+ UHSUB8 res, res, tmp2 ;// bilinear interpolation
+ LDR tmp2, [ref], width ;// load next row
+ EOR res, res, tmpa ;// correct sign
+ STR res, [mb], #16 ;// next row
+
+ ;// tmp3 = |a4|a3|a2|a1|
+ ;// tmp4 = |c4|c3|c2|c1|
+ ;// tmp5 = |g4|g3|g2|g1|
+ ;// tmp6 = |m4|m3|m2|m1|
+ ;// tmp1 = |r4|r3|r2|r1|
+ ;// tmp2 = |t4|t3|t2|t1|
+
+ ;// third four pixels
+ UXTB16 tmpa, tmp5 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp6 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp4 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTAB16 tmpb, tmpb, tmp1 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp5, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp4, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp1, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A+T
+
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmp3, [mb]
+ LDR tmpa, = 0xFF00FF00
+ MVN tmp3, tmp3
+
+ AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32
+ ORR res, res, tmpa
+ LDR tmpa, = 0x80808080
+ UHSUB8 res, res, tmp3 ;// bilinear interpolation
+ LDR tmp3, [ref] ;// load next row
+ EOR res, res, tmpa ;// correct sign
+ STR res, [mb], #16 ;// next row
+
+ ;// tmp4 = |a4|a3|a2|a1|
+ ;// tmp5 = |c4|c3|c2|c1|
+ ;// tmp6 = |g4|g3|g2|g1|
+ ;// tmp1 = |m4|m3|m2|m1|
+ ;// tmp2 = |r4|r3|r2|r1|
+ ;// tmp3 = |t4|t3|t2|t1|
+
+ ;// fourth four pixels
+ UXTB16 tmpa, tmp6 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp1 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp5 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTAB16 tmpb, tmpb, tmp2 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp4 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp6, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp5, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp2, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmp5, [mb]
+ LDR tmp4, = 0xFF00FF00
+ MVN tmp5, tmp5
+
+ AND tmpa, tmp4, tmpb, LSL #3 ;// mask and divide by 32
+ ORR res, res, tmpa
+ LDR tmpa, = 0x80808080
+ UHSUB8 res, res, tmp5 ;// bilinear interpolation
+
+ ;// decrement loop_x counter
+ SUBS count, count, #4<<28 ;// decrement x loop counter
+
+ ;// calculate "ref" address for next round
+ SUB ref, ref, width, LSL #3 ;// ref -= 8*width;
+ ADD ref, ref, #4 ;// next column (4 pixels)
+
+ EOR res, res, tmpa ;// correct sign
+ STR res, [mb], #-44
+
+ BCS loop_x
+
+ ADDS mb, mb, #64 ;// set Carry=0
+ ADD ref, ref, width, LSL #2 ;// ref += 4*width
+ AND tmp6, count, #0x00F00000 ;// partWidth-1
+ SBC ref, ref, tmp6, LSR #20 ;// -(partWidth-1)-1
+ SBC mb, mb, tmp6, LSR #20 ;// -(partWidth-1)-1
+
+ ADDS count, count, #0xC << 24 ;// decrement y loop counter
+ BGE loop_y
+
+ ADD sp, sp, #0x1f4
+ LDMFD sp!, {r4-r11, pc}
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s
new file mode 100755
index 0000000..a81aed7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s
@@ -0,0 +1,163 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version horizontal part of
+;-- h264bsdInterpolateMid functions
+;--
+;-------------------------------------------------------------------------------
+
+
+ IF :DEF: H264DEC_WINASM
+ ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+ ELSE
+ REQUIRE8
+ PRESERVE8
+ ENDIF
+
+ AREA |.text|, CODE
+
+
+;// Register allocation
+
+ref RN 0 ;// pointer to current position in reference image
+mb RN 1 ;// pointer to current position in interpolated mb
+count RN 2 ;// bit-packed width and count values
+
+x_2_0 RN 4
+x_3_1 RN 5
+x_6_4 RN 6
+x_7_5 RN 7
+
+tmp1 RN 8
+tmp2 RN 9
+tmp3 RN 10
+tmp4 RN 11
+
+mult_20_01 RN 12 ;// [20, 1]
+mult_20_m5 RN 14 ;// [20, -5]
+
+
+ EXPORT h264bsdInterpolateMidHorPart
+
+;// Horizontal filter approach
+;//
+;// Basic idea in horizontal filtering is to adjust coefficients
+;// like below. Calculation is done with 16-bit maths.
+;//
+;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0
+;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ...
+;// y_0 = 20 1 20 -5 -5 1
+;// y_1 = -5 20 1 1 20 -5
+;// y_2 = 1 -5 -5 20 1 20
+;// y_3 = 1 20 -5 -5 20 1
+
+
+h264bsdInterpolateMidHorPart
+ STMFD sp!, {r4-r11, lr}
+
+ ;// pack values to count register
+ ;// [31:28] loop_x (partWidth-1)
+ ;// [27:24] loop_y (partHeight-1)
+ ;// [23:20] partWidth-1
+ ;// [19:16] partHeight-1
+ ;// [15:00] width
+
+
+ LDR mult_20_01, = 0x00140001
+ LDR mult_20_m5, = 0x0014FFFB
+ AND tmp3, count, #0x000F0000 ;// partWidth-1
+loop_y
+ LDR x_3_1, [ref, #-8]
+ ADD count, count, tmp3, LSL #12
+ LDR x_7_5, [ref, #-4]
+ UXTB16 x_2_0, x_3_1
+ UXTB16 x_3_1, x_3_1, ROR #8
+ UXTB16 x_6_4, x_7_5
+
+loop_x
+ UXTB16 x_7_5, x_7_5, ROR #8
+
+ SMUAD tmp1, x_2_0, mult_20_01
+ SMULTB tmp2, x_2_0, mult_20_m5
+ SMULTB tmp3, x_2_0, mult_20_01
+ SMULTB tmp4, x_3_1, mult_20_01
+
+ SMLAD tmp1, x_3_1, mult_20_m5, tmp1
+ SMLAD tmp2, x_3_1, mult_20_01, tmp2
+ SMLATB tmp3, x_3_1, mult_20_m5, tmp3
+ LDR x_3_1, [ref], #4
+ SMLAD tmp4, x_6_4, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_6_4, mult_20_m5, tmp1
+ SMLADX tmp2, x_6_4, mult_20_01, tmp2
+ SMLADX tmp3, x_6_4, mult_20_m5, tmp3
+ SMLADX tmp4, x_7_5, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_7_5, mult_20_01, tmp1
+ SMLABB tmp2, x_7_5, mult_20_m5, tmp2
+ UXTB16 x_2_0, x_3_1
+ SMLADX tmp3, x_7_5, mult_20_01, tmp3
+ SMLABB tmp4, x_2_0, mult_20_01, tmp4
+
+ SUBS count, count, #4<<28
+ STR tmp1, [mb], #4
+ STR tmp2, [mb], #4
+ STR tmp3, [mb], #4
+ STR tmp4, [mb], #4
+ BCC next_y
+
+ UXTB16 x_3_1, x_3_1, ROR #8
+
+ SMUAD tmp1, x_6_4, mult_20_01
+ SMULTB tmp2, x_6_4, mult_20_m5
+ SMULTB tmp3, x_6_4, mult_20_01
+ SMULTB tmp4, x_7_5, mult_20_01
+
+ SMLAD tmp1, x_7_5, mult_20_m5, tmp1
+ SMLAD tmp2, x_7_5, mult_20_01, tmp2
+ SMLATB tmp3, x_7_5, mult_20_m5, tmp3
+ LDR x_7_5, [ref], #4
+ SMLAD tmp4, x_2_0, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_2_0, mult_20_m5, tmp1
+ SMLADX tmp2, x_2_0, mult_20_01, tmp2
+ SMLADX tmp3, x_2_0, mult_20_m5, tmp3
+ SMLADX tmp4, x_3_1, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_3_1, mult_20_01, tmp1
+ SMLABB tmp2, x_3_1, mult_20_m5, tmp2
+ UXTB16 x_6_4, x_7_5
+ SMLADX tmp3, x_3_1, mult_20_01, tmp3
+ SMLABB tmp4, x_6_4, mult_20_01, tmp4
+
+ SUBS count, count, #4<<28
+ STR tmp1, [mb], #4
+ STR tmp2, [mb], #4
+ STR tmp3, [mb], #4
+ STR tmp4, [mb], #4
+ BCS loop_x
+
+next_y
+ AND tmp3, count, #0x000F0000 ;// partWidth-1
+ SMLABB ref, count, mult_20_01, ref ;// +width
+ SBC ref, ref, tmp3, LSR #16 ;// -(partWidth-1)-1
+ ADDS count, count, #(1<<28)-(1<<20)
+ BGE loop_y
+
+ LDMFD sp!, {r4-r11, pc}
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_half.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_half.s
new file mode 100755
index 0000000..244fc6f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_half.s
@@ -0,0 +1,347 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateVerHalf function
+;--
+;-------------------------------------------------------------------------------
+
+
+ IF :DEF: H264DEC_WINASM
+ ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+ ELSE
+ REQUIRE8
+ PRESERVE8
+ ENDIF
+
+ AREA |.text|, CODE
+
+;// h264bsdInterpolateVerHalf register allocation
+
+ref RN 0
+
+mb RN 1
+buff RN 1
+
+count RN 2
+x0 RN 2
+
+res RN 3
+y0 RN 3
+
+tmp1 RN 4
+
+tmp2 RN 5
+height RN 5
+
+tmp3 RN 6
+partW RN 6
+
+tmp4 RN 7
+partH RN 7
+
+tmp5 RN 8
+tmp6 RN 9
+
+tmpa RN 10
+tmpb RN 11
+width RN 12
+
+plus16 RN 14
+
+
+;// function exports and imports
+
+ IMPORT h264bsdFillBlock
+
+ EXPORT h264bsdInterpolateVerHalf
+
+;// Approach to vertical interpolation
+;//
+;// Interpolation is done by using 32-bit loads and stores
+;// and by using 16 bit arithmetic. 4x4 block is processed
+;// in each round.
+;//
+;// |a_11|a_11|a_11|a_11|...|a_1n|a_1n|a_1n|a_1n|
+;// |b_11|b_11|b_11|b_11|...|b_1n|b_1n|b_1n|b_1n|
+;// |c_11|c_11|c_11|c_11|...|c_1n|c_1n|c_1n|c_1n|
+;// |d_11|d_11|d_11|d_11|...|d_1n|d_1n|d_1n|d_1n|
+;// ..
+;// ..
+;// |a_m1|a_m1|a_m1|a_m1|...
+;// |b_m1|b_m1|b_m1|b_m1|...
+;// |c_m1|c_m1|c_m1|c_m1|...
+;// |d_m1|d_m1|d_m1|d_m1|...
+
+h264bsdInterpolateVerHalf
+ STMFD sp!, {r0-r11, lr}
+ SUB sp, sp, #0x1e4
+
+ CMP x0, #0
+ BLT do_fill ;// (x0 < 0)
+ LDR partW, [sp,#0x220] ;// partWidth
+ ADD tmp5, x0, partW ;// (x0+partWidth)
+ LDR width, [sp,#0x218] ;// width
+ CMP tmp5, width
+ BHI do_fill ;// (x0+partW)>width
+
+ CMP y0, #0
+ BLT do_fill ;// (y0 < 0)
+ LDR partH, [sp,#0x224] ;// partHeight
+ ADD tmp6, y0, partH ;// (y0+partHeight)
+ ADD tmp6, tmp6, #5 ;// (y0+partH+5)
+ LDR height, [sp,#0x21c] ;// height
+ CMP tmp6, height
+ BLS skip_fill ;// no overfill needed
+
+
+do_fill
+ LDR partH, [sp,#0x224] ;// partHeight
+ ADD tmp5, partH, #5 ;// r2 = partH + 5;
+ LDR height, [sp,#0x21c] ;// height
+ LDR partW, [sp,#0x220] ;// partWidth
+ STMIB sp, {height, partW} ;// sp+4 = height, sp+8 = partWidth
+ STR tmp5, [sp,#0xc] ;// sp+c partHeight+5
+ STR partW, [sp,#0x10] ;// sp+10 = partWidth
+ LDR width, [sp,#0x218] ;// width
+ STR width, [sp,#0] ;// sp+0 = width
+ ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1]
+ BL h264bsdFillBlock
+
+ MOV x0, #0
+ STR x0,[sp,#0x1ec] ;// x0 = 0
+ STR x0,[sp,#0x1f0] ;// y0 = 0
+ ADD ref,sp,#0x28 ;// ref = p1
+ STR partW, [sp,#0x218]
+
+
+skip_fill
+ LDR x0 ,[sp,#0x1ec] ;// x0
+ LDR y0 ,[sp,#0x1f0] ;// y0
+ LDR width, [sp,#0x218] ;// width
+ MLA tmp6, width, y0, x0 ;// y0*width+x0
+ ADD ref, ref, tmp6 ;// ref += y0*width+x0
+ LDR mb, [sp, #0x1e8] ;// mb
+
+ ADD count, partW, partH, LSL #16 ;// |partH|partW|
+ LDR tmp5, = 0x00010001
+ SSUB16 count, count, tmp5; ;// |partH-1|partW-1|
+ LDR plus16, = 0x00100010
+
+ AND tmp1, count, #0x000000FF ;// partWidth
+
+
+loop_y
+ ADD count, count, tmp1, LSL #24 ;// partWidth-1 to top byte
+
+loop_x
+ LDR tmp1, [ref], width ;// |a4|a3|a2|a1|
+ LDR tmp2, [ref], width ;// |c4|c3|c2|c1|
+ LDR tmp3, [ref], width ;// |g4|g3|g2|g1|
+ LDR tmp4, [ref], width ;// |m4|m3|m2|m1|
+ LDR tmp5, [ref], width ;// |r4|r3|r2|r1|
+ LDR tmp6, [ref], width ;// |t4|t3|t2|t1|
+
+ ;// first four pixels
+ UXTB16 tmpa, tmp3 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp4 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp2 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+
+ UXTAB16 tmpb, tmpb, tmp5 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp6 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp3, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp2, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp5, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmp1, [ref], width
+ LDR tmpa, = 0xFF00FF00
+
+ AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divede by 32
+ ORR res, res, tmpa
+ STR res, [mb], #16 ;// next row (mb)
+
+ ;// tmp2 = |a4|a3|a2|a1|
+ ;// tmp3 = |c4|c3|c2|c1|
+ ;// tmp4 = |g4|g3|g2|g1|
+ ;// tmp5 = |m4|m3|m2|m1|
+ ;// tmp6 = |r4|r3|r2|r1|
+ ;// tmp1 = |t4|t3|t2|t1|
+
+ ;// second four pixels
+ UXTB16 tmpa, tmp4 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp5 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp3 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTAB16 tmpb, tmpb, tmp6 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp4, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp5, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp3, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp6, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmp2, [ref], width
+ LDR tmpa, = 0xFF00FF00
+
+ AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32
+ ORR res, res, tmpa
+ STR res, [mb], #16 ;// next row
+
+ ;// tmp3 = |a4|a3|a2|a1|
+ ;// tmp4 = |c4|c3|c2|c1|
+ ;// tmp5 = |g4|g3|g2|g1|
+ ;// tmp6 = |m4|m3|m2|m1|
+ ;// tmp1 = |r4|r3|r2|r1|
+ ;// tmp2 = |t4|t3|t2|t1|
+
+ ;// third four pixels
+ UXTB16 tmpa, tmp5 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp6 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp4 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTAB16 tmpb, tmpb, tmp1 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp5, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp4, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp1, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A+T
+
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmp3, [ref]
+ LDR tmpa, = 0xFF00FF00
+
+ ;// decrement loop_x counter
+ SUBS count, count, #4<<24 ;// (partWidth-1) -= 4;
+
+ AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32
+ ORR res, res, tmpa
+ STR res, [mb], #16 ;// next row
+
+ ;// tmp4 = |a4|a3|a2|a1|
+ ;// tmp5 = |c4|c3|c2|c1|
+ ;// tmp6 = |g4|g3|g2|g1|
+ ;// tmp1 = |m4|m3|m2|m1|
+ ;// tmp2 = |r4|r3|r2|r1|
+ ;// tmp3 = |t4|t3|t2|t1|
+
+ ;// fourth four pixels
+ UXTB16 tmpa, tmp6 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp1 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp5 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTAB16 tmpb, tmpb, tmp2 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp4 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp6, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp5, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp2, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmp4, = 0xFF00FF00
+
+ ;// calculate "ref" address for next round
+ SUB ref, ref, width, LSL #3 ;// ref -= 8*width;
+ ADD ref, ref, #4; ;// next column (4 pixels)
+ AND tmpa, tmp4, tmpb, LSL #3 ;// mask and divide by 32
+ ORR res, res, tmpa
+ STR res, [mb], #-44
+
+ BCS loop_x
+
+ ADDS count, count, #252<<16 ;// (partHeight-1) -= 4;
+ ADD ref, ref, width, LSL #2 ;// ref += 4*width
+ AND tmp1, count, #0x000000FF ;// partWidth-1
+ ADD tmp2, tmp1, #1 ;// partWidth
+ SUB ref, ref, tmp2 ;// ref -= partWidth
+ ADD mb, mb, #64;
+ SUB mb, mb, tmp2; ;// mb -= partWidth
+ BGE loop_y
+
+ ADD sp,sp,#0x1f4
+ LDMFD sp!, {r4-r11, pc}
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_quarter.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_quarter.s
new file mode 100755
index 0000000..5266c85
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_ver_quarter.s
@@ -0,0 +1,374 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version of h264bsdInterpolateVerQuarter function
+;--
+;-------------------------------------------------------------------------------
+
+ IF :DEF: H264DEC_WINASM
+ ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+ ELSE
+ REQUIRE8
+ PRESERVE8
+ ENDIF
+
+ AREA |.text|, CODE
+
+;// h264bsdInterpolateVerQuarter register allocation
+
+ref RN 0
+
+mb RN 1
+buff RN 1
+
+count RN 2
+x0 RN 2
+
+res RN 3
+y0 RN 3
+
+tmp1 RN 4
+
+tmp2 RN 5
+height RN 5
+
+tmp3 RN 6
+partW RN 6
+
+tmp4 RN 7
+partH RN 7
+
+tmp5 RN 8
+tmp6 RN 9
+
+tmpa RN 10
+tmpb RN 11
+width RN 12
+
+plus16 RN 14
+
+
+;// function exports and imports
+
+ IMPORT h264bsdFillBlock
+
+ EXPORT h264bsdInterpolateVerQuarter
+
+;// Approach to vertical interpolation
+;//
+;// Interpolation is done by using 32-bit loads and stores
+;// and by using 16 bit arithmetic. 4x4 block is processed
+;// in each round.
+;//
+;// |a_11|a_11|a_11|a_11|...|a_1n|a_1n|a_1n|a_1n|
+;// |b_11|b_11|b_11|b_11|...|b_1n|b_1n|b_1n|b_1n|
+;// |c_11|c_11|c_11|c_11|...|c_1n|c_1n|c_1n|c_1n|
+;// |d_11|d_11|d_11|d_11|...|d_1n|d_1n|d_1n|d_1n|
+;// ..
+;// ..
+;// |a_m1|a_m1|a_m1|a_m1|...
+;// |b_m1|b_m1|b_m1|b_m1|...
+;// |c_m1|c_m1|c_m1|c_m1|...
+;// |d_m1|d_m1|d_m1|d_m1|...
+
+h264bsdInterpolateVerQuarter
+ STMFD sp!, {r0-r11, lr}
+ SUB sp, sp, #0x1e4
+
+ CMP x0, #0
+ BLT do_fill ;// (x0 < 0)
+ LDR partW, [sp,#0x220] ;// partWidth
+ ADD tmp5, x0, partW ;// (x0+partWidth)
+ LDR width, [sp,#0x218] ;// width
+ CMP tmp5, width
+ BHI do_fill ;// (x0+partW)>width
+
+ CMP y0, #0
+ BLT do_fill ;// (y0 < 0)
+ LDR partH, [sp,#0x224] ;// partHeight
+ ADD tmp6, y0, partH ;// (y0+partHeight)
+ ADD tmp6, tmp6, #5 ;// (y0+partH+5)
+ LDR height, [sp,#0x21c] ;// height
+ CMP tmp6, height
+ BLS skip_fill ;// no overfill needed
+
+
+do_fill
+ LDR partH, [sp,#0x224] ;// partHeight
+ ADD tmp5, partH, #5 ;// r2 = partH + 5;
+ LDR height, [sp,#0x21c] ;// height
+ LDR partW, [sp,#0x220] ;// partWidth
+ STMIB sp, {height, partW} ;// sp+4 = height, sp+8 = partWidth
+ STR tmp5, [sp,#0xc] ;// sp+c partHeight+5
+ STR partW, [sp,#0x10] ;// sp+10 = partWidth
+ LDR width, [sp,#0x218] ;// width
+ STR width, [sp,#0] ;// sp+0 = width
+ ADD buff, sp, #0x28 ;// buff = p1[21*21/4+1]
+ BL h264bsdFillBlock
+
+ MOV x0, #0
+ STR x0,[sp,#0x1ec] ;// x0 = 0
+ STR x0,[sp,#0x1f0] ;// y0 = 0
+ ADD ref,sp,#0x28 ;// ref = p1
+ STR partW, [sp,#0x218]
+
+
+skip_fill
+ LDR x0 ,[sp,#0x1ec] ;// x0
+ LDR y0 ,[sp,#0x1f0] ;// y0
+ LDR width, [sp,#0x218] ;// width
+ MLA tmp6, width, y0, x0 ;// y0*width+x0
+ ADD ref, ref, tmp6 ;// ref += y0*width+x0
+ LDR mb, [sp, #0x1e8] ;// mb
+
+ ADD count, partW, partH, LSL #8 ;// |xx|xx|partH|partW|
+ LDR tmp5, = 0x00010100
+ RSB count, tmp5, count, LSL #8 ;// |xx|partH-1|partW-1|xx|
+ LDR tmp2, [sp, #0x228] ;// verOffset
+ ADD count, count, tmp2 ;// |xx|partH-1|partW-1|verOffset|
+ LDR plus16, = 0x00100010
+
+ AND tmp1, count, #0x0000FF00 ;// partWidth
+
+
+loop_y
+ ADD count, count, tmp1, LSL #16 ;// partWidth-1 to top byte
+
+loop_x
+ LDR tmp1, [ref], width ;// |a4|a3|a2|a1|
+ LDR tmp2, [ref], width ;// |c4|c3|c2|c1|
+ LDR tmp3, [ref], width ;// |g4|g3|g2|g1|
+ LDR tmp4, [ref], width ;// |m4|m3|m2|m1|
+ LDR tmp5, [ref], width ;// |r4|r3|r2|r1|
+ LDR tmp6, [ref], width ;// |t4|t3|t2|t1|
+
+ ;// first four pixels
+ UXTB16 tmpa, tmp3 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp4 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp2 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+
+ UXTAB16 tmpb, tmpb, tmp5 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp6 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp3, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp2, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp5, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ MOVS tmp1, count, LSL #31 ;// update flags (verOffset)
+ LDR tmpa, = 0xFF00FF00
+ MVNEQ tmp1, tmp3 ;// select verOffset=0
+ MVNNE tmp1, tmp4 ;// select verOffset=1
+ AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divede by 32
+ ORR res, res, tmpa
+
+ LDR tmpa, = 0x80808080
+ UHSUB8 res, res, tmp1 ;// bilinear interpolation
+ LDR tmp1, [ref], width ;// load next row
+ EOR res, res, tmpa ;// correct sign
+
+ STR res, [mb], #16 ;// next row (mb)
+
+
+ ;// tmp2 = |a4|a3|a2|a1|
+ ;// tmp3 = |c4|c3|c2|c1|
+ ;// tmp4 = |g4|g3|g2|g1|
+ ;// tmp5 = |m4|m3|m2|m1|
+ ;// tmp6 = |r4|r3|r2|r1|
+ ;// tmp1 = |t4|t3|t2|t1|
+
+ ;// second four pixels
+ UXTB16 tmpa, tmp4 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp5 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp3 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTAB16 tmpb, tmpb, tmp6 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp1 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp4, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp5, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp3, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp6, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmpa, = 0xFF00FF00
+ MVNEQ tmp2, tmp4 ;// select verOffset=0
+ MVNNE tmp2, tmp5 ;// select verOffset=1
+
+ AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32
+ ORR res, res, tmpa
+ LDR tmpa, = 0x80808080
+ UHSUB8 res, res, tmp2 ;// bilinear interpolation
+ LDR tmp2, [ref], width ;// load next row
+ EOR res, res, tmpa ;// correct sign
+ STR res, [mb], #16 ;// next row
+
+ ;// tmp3 = |a4|a3|a2|a1|
+ ;// tmp4 = |c4|c3|c2|c1|
+ ;// tmp5 = |g4|g3|g2|g1|
+ ;// tmp6 = |m4|m3|m2|m1|
+ ;// tmp1 = |r4|r3|r2|r1|
+ ;// tmp2 = |t4|t3|t2|t1|
+
+ ;// third four pixels
+ UXTB16 tmpa, tmp5 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp6 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp4 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTAB16 tmpb, tmpb, tmp1 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp2 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp5, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp6, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp4, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp1, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp2, ROR #8 ;// 16+20(G+M)+A+T
+
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmpa, = 0xFF00FF00
+ MVNEQ tmp3, tmp5 ;// select verOffset=0
+ MVNNE tmp3, tmp6 ;// select verOffset=1
+
+ AND tmpa, tmpa, tmpb, LSL #3 ;// mask and divide by 32
+ ORR res, res, tmpa
+ LDR tmpa, = 0x80808080
+ UHSUB8 res, res, tmp3 ;// bilinear interpolation
+ LDR tmp3, [ref] ;// load next row
+ EOR res, res, tmpa ;// correct sign
+ STR res, [mb], #16 ;// next row
+
+ ;// tmp4 = |a4|a3|a2|a1|
+ ;// tmp5 = |c4|c3|c2|c1|
+ ;// tmp6 = |g4|g3|g2|g1|
+ ;// tmp1 = |m4|m3|m2|m1|
+ ;// tmp2 = |r4|r3|r2|r1|
+ ;// tmp3 = |t4|t3|t2|t1|
+
+ ;// fourth four pixels
+ UXTB16 tmpa, tmp6 ;// |g3|g1|
+ UXTAB16 tmpa, tmpa, tmp1 ;// |g3+m3|g1+m1|
+ UXTB16 tmpb, tmp5 ;// |c3|c1|
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTAB16 tmpb, tmpb, tmp2 ;// |c3+r3|c1+r1|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpa, tmpa, tmp4 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp3 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR res, = 0x00FF00FF
+ UXTB16 tmpa, tmp6, ROR #8 ;// |g4|g2|
+ UXTAB16 tmpa, tmpa, tmp1, ROR #8 ;// |g4+m4|g2+m2|
+ AND res, res, tmpb, LSR #5 ;// mask and divide by 32
+
+ ADD tmpa, tmpa, tmpa, LSL #2 ;// 5(G+M)
+ UXTB16 tmpb, tmp5, ROR #8 ;// |c4|c2|
+ ADD tmpa, plus16, tmpa, LSL #2 ;// 16+20(G+M)
+ UXTAB16 tmpb, tmpb, tmp2, ROR #8 ;// |c4+r4|c2+r2|
+ UXTAB16 tmpa, tmpa, tmp4, ROR #8 ;// 16+20(G+M)+A
+ UXTAB16 tmpa, tmpa, tmp3, ROR #8 ;// 16+20(G+M)+A+T
+
+ ADD tmpb, tmpb, tmpb, LSL #2 ;// 5(C+R)
+ SSUB16 tmpa, tmpa, tmpb ;// 16+20(G+M)+(A+T)-5(C+R)
+
+ USAT16 tmpb, #13, tmpa ;// saturate
+ LDR tmp4, = 0xFF00FF00
+ MVNEQ tmp5, tmp6 ;// select verOffset=0
+ MVNNE tmp5, tmp1 ;// select verOffset=1
+
+ AND tmpa, tmp4, tmpb, LSL #3 ;// mask and divide by 32
+ ORR res, res, tmpa
+ LDR tmpa, = 0x80808080
+ UHSUB8 res, res, tmp5 ;// bilinear interpolation
+
+ ;// decrement loop_x counter
+ SUBS count, count, #4<<24 ;// (partWidth-1) -= 4;
+
+ ;// calculate "ref" address for next round
+ SUB ref, ref, width, LSL #3 ;// ref -= 8*width;
+ ADD ref, ref, #4; ;// next column (4 pixels)
+
+ EOR res, res, tmpa ;// correct sign
+ STR res, [mb], #-44
+
+ BCS loop_x
+
+ ADDS count, count, #252<<16 ;// (partHeight-1) -= 4;
+ ADD ref, ref, width, LSL #2 ;// ref += 4*width
+ AND tmp1, count, #0x0000FF00 ;// partWidth-1
+ MOV tmp2, #1
+ ADD tmp2, tmp2, tmp1, LSR #8 ;// partWidth
+ SUB ref, ref, tmp2 ;// ref -= partWidth
+ ADD mb, mb, #64;
+ SUB mb, mb, tmp2; ;// mb -= partWidth
+ BGE loop_y
+
+ ADD sp,sp,#0x1f4
+ LDMFD sp!, {r4-r11, pc}
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/win_asm.bat b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/win_asm.bat
new file mode 100644
index 0000000..1b8d88c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/win_asm.bat
@@ -0,0 +1,15 @@
+echo off
+set ASMFLAGS= -checkreglist -CPU ARM1136 -PreDefine "H264DEC_WINASM SETL {TRUE}"
+set ASM="D:\Program Files\Microsoft Visual Studio 8\VC\ce\bin\x86_arm\armasm"
+echo on
+
+%ASM% %ASMFLAGS% h264bsd_interpolate_chroma_ver.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_chroma_hor.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_hor_half.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_hor_quarter.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_hor_ver_quarter.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_ver_half.s
+%ASM% %ASMFLAGS% h264bsd_interpolate_ver_quarter.s
+
+rem %ASM% %ASMFLAGS% h264bsd_interpolate_chroma_hor_ver.s
+rem %ASM% %ASMFLAGS% h264bsd_interpolate_mid_hor.s
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdClearMbLayer.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdClearMbLayer.s
new file mode 100644
index 0000000..db11654
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdClearMbLayer.s
@@ -0,0 +1,66 @@
+;
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE
+
+ EXPORT h264bsdClearMbLayer
+
+; Input / output registers
+pMbLayer RN 0
+size RN 1
+pTmp RN 2
+step RN 3
+
+; -- NEON registers --
+
+qZero QN Q0.U8
+
+;/*------------------------------------------------------------------------------
+;
+; Function: h264bsdClearMbLayer
+;
+; Functional description:
+;
+; Inputs:
+;
+; Outputs:
+;
+; Returns:
+;
+;------------------------------------------------------------------------------*/
+
+h264bsdClearMbLayer
+
+ VMOV qZero, #0
+ ADD pTmp, pMbLayer, #16
+ MOV step, #32
+ SUBS size, size, #64
+
+loop
+ VST1 qZero, [pMbLayer], step
+ SUBS size, size, #64
+ VST1 qZero, [pTmp], step
+ VST1 qZero, [pMbLayer], step
+ VST1 qZero, [pTmp], step
+ BCS loop
+
+ BX lr
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdCountLeadingZeros.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdCountLeadingZeros.s
new file mode 100644
index 0000000..c7bd73e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdCountLeadingZeros.s
@@ -0,0 +1,49 @@
+;
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE
+
+ EXPORT h264bsdCountLeadingZeros
+
+; Input / output registers
+value RN 0
+
+; -- NEON registers --
+
+;/*------------------------------------------------------------------------------
+;
+; Function: h264bsdCountLeadingZeros
+;
+; Functional description:
+;
+; Inputs:
+;
+; Outputs:
+;
+; Returns:
+;
+;------------------------------------------------------------------------------*/
+
+h264bsdCountLeadingZeros
+
+ CLZ value, value
+ BX lr
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFillRow7.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFillRow7.s
new file mode 100644
index 0000000..5bfac92
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFillRow7.s
@@ -0,0 +1,180 @@
+;
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE
+
+ EXPORT h264bsdFillRow7
+
+; Input / output registers
+
+ref RN 0
+fill RN 1
+left RN 2
+tmp2 RN 2
+center RN 3
+right RN 4
+tmp1 RN 5
+
+; -- NEON registers --
+
+qTmp0 QN Q0.U8
+qTmp1 QN Q1.U8
+dTmp0 DN D0.U8
+dTmp1 DN D1.U8
+dTmp2 DN D2.U8
+dTmp3 DN D3.U8
+
+
+;/*------------------------------------------------------------------------------
+;
+; Function: h264bsdFillRow7
+;
+; Functional description:
+;
+; Inputs:
+;
+; Outputs:
+;
+; Returns:
+;
+;------------------------------------------------------------------------------*/
+
+h264bsdFillRow7
+ PUSH {r4-r6,lr}
+ CMP left, #0
+ LDR right, [sp,#0x10]
+ BEQ switch_center
+ LDRB tmp1, [ref,#0]
+
+loop_left
+ SUBS left, left, #1
+ STRB tmp1, [fill], #1
+ BNE loop_left
+
+switch_center
+ ASR tmp2,center,#2
+ CMP tmp2,#9
+ ADDCC pc,pc,tmp2,LSL #2
+ B loop_center
+ B loop_center
+ B case_1
+ B case_2
+ B case_3
+ B case_4
+ B case_5
+ B case_6
+ B case_7
+ B case_8
+;case_8
+; LDR tmp2, [ref], #4
+; SUB center, center, #4
+; STR tmp2, [fill], #4
+;case_7
+; LDR tmp2, [ref], #4
+; SUB center, center, #4
+; STR tmp2, [fill], #4
+;case_6
+; LDR tmp2, [ref], #4
+; SUB center, center, #4
+; STR tmp2, [fill],#4
+;case_5
+; LDR tmp2, [ref], #4
+; SUB center, center, #4
+; STR tmp2, [fill],#4
+;case_4
+; LDR tmp2, [ref],#4
+; SUB center, center, #4
+; STR tmp2, [fill], #4
+;case_3
+; LDR tmp2, [ref],#4
+; SUB center, center, #4
+; STR tmp2, [fill], #4
+;case_2
+; LDR tmp2, [ref],#4
+; SUB center, center, #4
+; STR tmp2, [fill], #4
+;case_1
+; LDR tmp2, [ref],#4
+; SUB center, center, #4
+; STR tmp2, [fill], #4
+
+case_8
+ VLD1 {qTmp0, qTmp1}, [ref]!
+ SUB center, center, #32
+ VST1 qTmp0, [fill]!
+ VST1 qTmp1, [fill]!
+ B loop_center
+case_7
+ VLD1 {dTmp0,dTmp1,dTmp2}, [ref]!
+ SUB center, center, #28
+ LDR tmp2, [ref], #4
+ VST1 {dTmp0,dTmp1,dTmp2}, [fill]!
+ STR tmp2, [fill],#4
+ B loop_center
+case_6
+ VLD1 {dTmp0,dTmp1,dTmp2}, [ref]!
+ SUB center, center, #24
+ VST1 {dTmp0,dTmp1,dTmp2}, [fill]!
+ B loop_center
+case_5
+ VLD1 qTmp0, [ref]!
+ SUB center, center, #20
+ LDR tmp2, [ref], #4
+ VST1 qTmp0, [fill]!
+ STR tmp2, [fill],#4
+ B loop_center
+case_4
+ VLD1 qTmp0, [ref]!
+ SUB center, center, #16
+ VST1 qTmp0, [fill]!
+ B loop_center
+case_3
+ VLD1 dTmp0, [ref]!
+ SUB center, center, #12
+ LDR tmp2, [ref], #4
+ VST1 dTmp0, [fill]!
+ STR tmp2, [fill],#4
+ B loop_center
+case_2
+ LDR tmp2, [ref],#4
+ SUB center, center, #4
+ STR tmp2, [fill], #4
+case_1
+ LDR tmp2, [ref],#4
+ SUB center, center, #4
+ STR tmp2, [fill], #4
+
+loop_center
+ CMP center, #0
+ LDRBNE tmp2, [ref], #1
+ SUBNE center, center, #1
+ STRBNE tmp2, [fill], #1
+ BNE loop_center
+ CMP right,#0
+ POPEQ {r4-r6,pc}
+ LDRB tmp2, [ref,#-1]
+
+loop_right
+ STRB tmp2, [fill], #1
+ SUBS right, right, #1
+ BNE loop_right
+
+ POP {r4-r6,pc}
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFlushBits.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFlushBits.s
new file mode 100644
index 0000000..21335b8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdFlushBits.s
@@ -0,0 +1,82 @@
+;
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE
+
+ EXPORT h264bsdFlushBits
+
+; Input / output registers
+pStrmData RN 0
+numBits RN 1
+readBits RN 2
+strmBuffSize RN 3
+pStrmBuffStart RN 1
+pStrmCurrPos RN 2
+bitPosInWord RN 1
+
+; -- NEON registers --
+
+
+
+;/*------------------------------------------------------------------------------
+;
+; Function: h264bsdFlushBits
+;
+; Functional description:
+;
+; Inputs:
+;
+; Outputs:
+;
+; Returns:
+;
+;------------------------------------------------------------------------------*/
+
+h264bsdFlushBits
+;// PUSH {r4-r6,lr}
+
+ LDR readBits, [pStrmData, #0x10]
+ LDR strmBuffSize, [pStrmData, #0xC]
+
+ ADD readBits, readBits, numBits
+ AND bitPosInWord, readBits, #7
+
+ STR readBits, [pStrmData, #0x10]
+ STR bitPosInWord, [pStrmData, #0x8]
+
+ LDR pStrmBuffStart, [pStrmData, #0x0]
+
+ CMP readBits, strmBuffSize, LSL #3
+
+ BHI end_of_stream
+
+ ADD pStrmCurrPos, pStrmBuffStart, readBits, LSR #3
+ STR pStrmCurrPos, [pStrmData, #0x4]
+ MOV r0, #0
+ BX lr
+;// POP {r4-r6,pc}
+
+end_of_stream
+ MVN r0, #0
+ BX lr
+;// POP {r4-r6,pc}
+
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdWriteMacroblock.s b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdWriteMacroblock.s
new file mode 100644
index 0000000..38a0781
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm/h264bsdWriteMacroblock.s
@@ -0,0 +1,152 @@
+;
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+;
+
+ REQUIRE8
+ PRESERVE8
+
+ AREA |.text|, CODE
+
+ EXPORT h264bsdWriteMacroblock
+
+; Input / output registers
+image RN 0
+data RN 1
+width RN 2
+luma RN 3
+cb RN 4
+cr RN 5
+cwidth RN 6
+
+; -- NEON registers --
+
+qRow0 QN Q0.U8
+qRow1 QN Q1.U8
+qRow2 QN Q2.U8
+qRow3 QN Q3.U8
+qRow4 QN Q4.U8
+qRow5 QN Q5.U8
+qRow6 QN Q6.U8
+qRow7 QN Q7.U8
+qRow8 QN Q8.U8
+qRow9 QN Q9.U8
+qRow10 QN Q10.U8
+qRow11 QN Q11.U8
+qRow12 QN Q12.U8
+qRow13 QN Q13.U8
+qRow14 QN Q14.U8
+qRow15 QN Q15.U8
+
+dRow0 DN D0.U8
+dRow1 DN D1.U8
+dRow2 DN D2.U8
+dRow3 DN D3.U8
+dRow4 DN D4.U8
+dRow5 DN D5.U8
+dRow6 DN D6.U8
+dRow7 DN D7.U8
+dRow8 DN D8.U8
+dRow9 DN D9.U8
+dRow10 DN D10.U8
+dRow11 DN D11.U8
+dRow12 DN D12.U8
+dRow13 DN D13.U8
+dRow14 DN D14.U8
+dRow15 DN D15.U8
+
+;/*------------------------------------------------------------------------------
+;
+; Function: h264bsdWriteMacroblock
+;
+; Functional description:
+; Write one macroblock into the image. Both luma and chroma
+; components will be written at the same time.
+;
+; Inputs:
+; data pointer to macroblock data to be written, 256 values for
+; luma followed by 64 values for both chroma components
+;
+; Outputs:
+; image pointer to the image where the macroblock will be written
+;
+; Returns:
+; none
+;
+;------------------------------------------------------------------------------*/
+
+h264bsdWriteMacroblock
+ PUSH {r4-r6,lr}
+ VPUSH {q4-q7}
+
+ LDR width, [image, #4]
+ LDR luma, [image, #0xC]
+ LDR cb, [image, #0x10]
+ LDR cr, [image, #0x14]
+
+
+; Write luma
+ VLD1 {qRow0, qRow1}, [data]!
+ LSL width, width, #4
+ VLD1 {qRow2, qRow3}, [data]!
+ LSR cwidth, width, #1
+ VST1 {qRow0}, [luma@128], width
+ VLD1 {qRow4, qRow5}, [data]!
+ VST1 {qRow1}, [luma@128], width
+ VLD1 {qRow6, qRow7}, [data]!
+ VST1 {qRow2}, [luma@128], width
+ VLD1 {qRow8, qRow9}, [data]!
+ VST1 {qRow3}, [luma@128], width
+ VLD1 {qRow10, qRow11}, [data]!
+ VST1 {qRow4}, [luma@128], width
+ VLD1 {qRow12, qRow13}, [data]!
+ VST1 {qRow5}, [luma@128], width
+ VLD1 {qRow14, qRow15}, [data]!
+ VST1 {qRow6}, [luma@128], width
+
+ VLD1 {qRow0, qRow1}, [data]! ;cb rows 0,1,2,3
+ VST1 {qRow7}, [luma@128], width
+ VLD1 {qRow2, qRow3}, [data]! ;cb rows 4,5,6,7
+ VST1 {qRow8}, [luma@128], width
+ VLD1 {qRow4, qRow5}, [data]! ;cr rows 0,1,2,3
+ VST1 {qRow9}, [luma@128], width
+ VLD1 {qRow6, qRow7}, [data]! ;cr rows 4,5,6,7
+ VST1 {qRow10}, [luma@128], width
+ VST1 {dRow0}, [cb@64], cwidth
+ VST1 {dRow8}, [cr@64], cwidth
+ VST1 {qRow11}, [luma@128], width
+ VST1 {dRow1}, [cb@64], cwidth
+ VST1 {dRow9}, [cr@64], cwidth
+ VST1 {qRow12}, [luma@128], width
+ VST1 {dRow2}, [cb@64], cwidth
+ VST1 {dRow10}, [cr@64], cwidth
+ VST1 {qRow13}, [luma@128], width
+ VST1 {dRow3}, [cb@64], cwidth
+ VST1 {dRow11}, [cr@64], cwidth
+ VST1 {qRow14}, [luma@128], width
+ VST1 {dRow4}, [cb@64], cwidth
+ VST1 {dRow12}, [cr@64], cwidth
+ VST1 {qRow15}, [luma]
+ VST1 {dRow5}, [cb@64], cwidth
+ VST1 {dRow13}, [cr@64], cwidth
+ VST1 {dRow6}, [cb@64], cwidth
+ VST1 {dRow14}, [cr@64], cwidth
+ VST1 {dRow7}, [cb@64]
+ VST1 {dRow15}, [cr@64]
+
+ VPOP {q4-q7}
+ POP {r4-r6,pc}
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S
new file mode 100644
index 0000000..f39f5c4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S
@@ -0,0 +1,41 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+
+
+
+ .macro REQUIRE8
+ .eabi_attribute 24, 1
+ .endm
+
+ .macro PRESERVE8
+ .eabi_attribute 25, 1
+ .endm
+
+
+ .macro function name, export=0
+.if \export
+ .global \name
+.endif
+ .type \name, %function
+ .func \name
+\name:
+ .endm
+
+ .macro endfunction
+ .endfunc
+ .endm
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S
new file mode 100644
index 0000000..c8a940e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S
@@ -0,0 +1,68 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+ preserve8
+
+ .fpu neon
+ .text
+
+/* Input / output registers */
+#define pMbLayer r0
+#define size r1
+#define pTmp r2
+#define step r3
+
+/* -- NEON registers -- */
+
+#define qZero Q0.U8
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdClearMbLayer
+
+ Functional description:
+
+ Inputs:
+
+ Outputs:
+
+ Returns:
+
+------------------------------------------------------------------------------*/
+
+function h264bsdClearMbLayer, export=1
+
+ VMOV qZero, #0
+ ADD pTmp, pMbLayer, #16
+ MOV step, #32
+ SUBS size, size, #64
+
+loop:
+ VST1 {qZero}, [pMbLayer], step
+ SUBS size, size, #64
+ VST1 {qZero}, [pTmp], step
+ VST1 {qZero}, [pMbLayer], step
+ VST1 {qZero}, [pTmp], step
+ BCS loop
+
+ BX lr
+
+endfunction
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S
new file mode 100644
index 0000000..05253d0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S
@@ -0,0 +1,48 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+#include "asm_common.S"
+
+ preserve8
+ .arm
+ .text
+
+
+/* Input / output registers */
+#define value r0
+
+/* -- NEON registers -- */
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCountLeadingZeros
+
+ Functional description:
+
+ Inputs:
+
+ Outputs:
+
+ Returns:
+
+------------------------------------------------------------------------------*/
+
+function h264bsdCountLeadingZeros, export=1
+
+ CLZ value, value
+ BX lr
+
+endfunction
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S
new file mode 100644
index 0000000..6955b9a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S
@@ -0,0 +1,143 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+ preserve8
+
+ .fpu neon
+ .text
+
+/* Input / output registers */
+
+#define ref r0
+#define fill r1
+#define left r2
+#define tmp2 r2
+#define center r3
+#define right r4
+#define tmp1 r5
+
+/* -- NEON registers -- */
+
+#define qTmp0 Q0.U8
+#define qTmp1 Q1.U8
+#define dTmp0 D0.U8
+#define dTmp1 D1.U8
+#define dTmp2 D2.U8
+#define dTmp3 D3.U8
+
+/*
+void h264bsdFillRow7(const u8 * ref, u8 * fill, i32 left, i32 center,
+ i32 right);
+*/
+
+function h264bsdFillRow7, export=1
+
+ PUSH {r4-r6,lr}
+ CMP left, #0
+ LDR right, [sp,#0x10]
+ BEQ switch_center
+ LDRB tmp1, [ref,#0]
+
+loop_left:
+ SUBS left, left, #1
+ STRB tmp1, [fill], #1
+ BNE loop_left
+
+switch_center:
+ ASR tmp2,center,#2
+ CMP tmp2,#9
+ ADDCC pc,pc,tmp2,LSL #2
+ B loop_center
+ B loop_center
+ B case_1
+ B case_2
+ B case_3
+ B case_4
+ B case_5
+ B case_6
+ B case_7
+ B case_8
+
+case_8:
+ VLD1 {qTmp0, qTmp1}, [ref]!
+ SUB center, center, #32
+ VST1 {qTmp0}, [fill]!
+ VST1 {qTmp1}, [fill]!
+ B loop_center
+case_7:
+ VLD1 {dTmp0,dTmp1,dTmp2}, [ref]!
+ SUB center, center, #28
+ LDR tmp2, [ref], #4
+ VST1 {dTmp0,dTmp1,dTmp2}, [fill]!
+ STR tmp2, [fill],#4
+ B loop_center
+case_6:
+ VLD1 {dTmp0,dTmp1,dTmp2}, [ref]!
+ SUB center, center, #24
+ VST1 {dTmp0,dTmp1,dTmp2}, [fill]!
+ B loop_center
+case_5:
+ VLD1 {qTmp0}, [ref]!
+ SUB center, center, #20
+ LDR tmp2, [ref], #4
+ VST1 {qTmp0}, [fill]!
+ STR tmp2, [fill],#4
+ B loop_center
+case_4:
+ VLD1 {qTmp0}, [ref]!
+ SUB center, center, #16
+ VST1 {qTmp0}, [fill]!
+ B loop_center
+case_3:
+ VLD1 {dTmp0}, [ref]!
+ SUB center, center, #12
+ LDR tmp2, [ref], #4
+ VST1 dTmp0, [fill]!
+ STR tmp2, [fill],#4
+ B loop_center
+case_2:
+ LDR tmp2, [ref],#4
+ SUB center, center, #4
+ STR tmp2, [fill], #4
+case_1:
+ LDR tmp2, [ref],#4
+ SUB center, center, #4
+ STR tmp2, [fill], #4
+
+loop_center:
+ CMP center, #0
+ BEQ jump
+ LDRB tmp2, [ref], #1
+ SUB center, center, #1
+ STRB tmp2, [fill], #1
+ BNE loop_center
+jump:
+ CMP right,#0
+ POPEQ {r4-r6,pc}
+ LDRB tmp2, [ref,#-1]
+
+loop_right:
+ STRB tmp2, [fill], #1
+ SUBS right, right, #1
+ BNE loop_right
+
+ POP {r4-r6,pc}
+
+endfunction
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S
new file mode 100644
index 0000000..b3f3191
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S
@@ -0,0 +1,78 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+ preserve8
+
+ .arm
+ .text
+
+/* Input / output registers */
+#define pStrmData r0
+#define numBits r1
+#define readBits r2
+#define strmBuffSize r3
+#define pStrmBuffStart r1
+#define pStrmCurrPos r2
+#define bitPosInWord r1
+
+/* Input / output registers */
+
+
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdFlushBits
+
+ Functional description:
+
+ Inputs:
+
+ Outputs:
+
+ Returns:
+
+------------------------------------------------------------------------------*/
+function h264bsdFlushBits, export=1
+
+ LDR readBits, [pStrmData, #0x10]
+ LDR strmBuffSize, [pStrmData, #0xC]
+
+ ADD readBits, readBits, numBits
+ AND bitPosInWord, readBits, #7
+
+ STR readBits, [pStrmData, #0x10]
+ STR bitPosInWord, [pStrmData, #0x8]
+
+ LDR pStrmBuffStart, [pStrmData, #0x0]
+
+ CMP readBits, strmBuffSize, LSL #3
+
+ BHI end_of_stream
+
+ ADD pStrmCurrPos, pStrmBuffStart, readBits, LSR #3
+ STR pStrmCurrPos, [pStrmData, #0x4]
+ MOV r0, #0
+ BX lr
+
+end_of_stream:
+ MVN r0, #0
+ BX lr
+
+endfunction
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S
new file mode 100644
index 0000000..495d560
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S
@@ -0,0 +1,157 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+ require8
+ preserve8
+
+ .arm
+ .fpu neon
+ .text
+
+/* Input / output registers */
+#define image r0
+#define data r1
+#define width r2
+#define luma r3
+#define cb r4
+#define cr r5
+#define cwidth r6
+
+/* -- NEON registers -- */
+
+#define qRow0 Q0.U8
+#define qRow1 Q1.U8
+#define qRow2 Q2.U8
+#define qRow3 Q3.U8
+#define qRow4 Q4.U8
+#define qRow5 Q5.U8
+#define qRow6 Q6.U8
+#define qRow7 Q7.U8
+#define qRow8 Q8.U8
+#define qRow9 Q9.U8
+#define qRow10 Q10.U8
+#define qRow11 Q11.U8
+#define qRow12 Q12.U8
+#define qRow13 Q13.U8
+#define qRow14 Q14.U8
+#define qRow15 Q15.U8
+
+#define dRow0 D0.U8
+#define dRow1 D1.U8
+#define dRow2 D2.U8
+#define dRow3 D3.U8
+#define dRow4 D4.U8
+#define dRow5 D5.U8
+#define dRow6 D6.U8
+#define dRow7 D7.U8
+#define dRow8 D8.U8
+#define dRow9 D9.U8
+#define dRow10 D10.U8
+#define dRow11 D11.U8
+#define dRow12 D12.U8
+#define dRow13 D13.U8
+#define dRow14 D14.U8
+#define dRow15 D15.U8
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdWriteMacroblock
+
+ Functional description:
+ Write one macroblock into the image. Both luma and chroma
+ components will be written at the same time.
+
+ Inputs:
+ data pointer to macroblock data to be written, 256 values for
+ luma followed by 64 values for both chroma components
+
+ Outputs:
+ image pointer to the image where the macroblock will be written
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+function h264bsdWriteMacroblock, export=1
+ PUSH {r4-r6,lr}
+ VPUSH {q4-q7}
+
+ LDR width, [image, #4]
+ LDR luma, [image, #0xC]
+ LDR cb, [image, #0x10]
+ LDR cr, [image, #0x14]
+
+
+@ Write luma
+ VLD1 {qRow0, qRow1}, [data]!
+ LSL width, width, #4
+ VLD1 {qRow2, qRow3}, [data]!
+ LSR cwidth, width, #1
+ VST1 {qRow0}, [luma,:128], width
+ VLD1 {qRow4, qRow5}, [data]!
+ VST1 {qRow1}, [luma,:128], width
+ VLD1 {qRow6, qRow7}, [data]!
+ VST1 {qRow2}, [luma,:128], width
+ VLD1 {qRow8, qRow9}, [data]!
+ VST1 {qRow3}, [luma,:128], width
+ VLD1 {qRow10, qRow11}, [data]!
+ VST1 {qRow4}, [luma,:128], width
+ VLD1 {qRow12, qRow13}, [data]!
+ VST1 {qRow5}, [luma,:128], width
+ VLD1 {qRow14, qRow15}, [data]!
+ VST1 {qRow6}, [luma,:128], width
+
+ VLD1 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3
+ VST1 {qRow7}, [luma,:128], width
+ VLD1 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7
+ VST1 {qRow8}, [luma,:128], width
+ VLD1 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3
+ VST1 {qRow9}, [luma,:128], width
+ VLD1 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7
+ VST1 {qRow10}, [luma,:128], width
+ VST1 {dRow0}, [cb,:64], cwidth
+ VST1 {dRow8}, [cr,:64], cwidth
+ VST1 {qRow11}, [luma,:128], width
+ VST1 {dRow1}, [cb,:64], cwidth
+ VST1 {dRow9}, [cr,:64], cwidth
+ VST1 {qRow12}, [luma,:128], width
+ VST1 {dRow2}, [cb,:64], cwidth
+ VST1 {dRow10}, [cr,:64], cwidth
+ VST1 {qRow13}, [luma,:128], width
+ VST1 {dRow3}, [cb,:64], cwidth
+ VST1 {dRow11}, [cr,:64], cwidth
+ VST1 {qRow14}, [luma,:128], width
+ VST1 {dRow4}, [cb,:64], cwidth
+ VST1 {dRow12}, [cr,:64], cwidth
+ VST1 {qRow15}, [luma]
+ VST1 {dRow5}, [cb,:64], cwidth
+ VST1 {dRow13}, [cr,:64], cwidth
+ VST1 {dRow6}, [cb,:64], cwidth
+ VST1 {dRow14}, [cr,:64], cwidth
+ VST1 {dRow7}, [cb,:64]
+ VST1 {dRow15}, [cr,:64]
+
+ VPOP {q4-q7}
+ POP {r4-r6,pc}
+@ BX lr
+
+ .endfunc
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.c
new file mode 100755
index 0000000..db77f8c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.c
@@ -0,0 +1,237 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ ExtractNalUnit
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_byte_stream.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+#define BYTE_STREAM_ERROR 0xFFFFFFFF
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+ Function name: ExtractNalUnit
+
+ Functional description:
+ Extracts one NAL unit from the byte stream buffer. Removes
+ emulation prevention bytes if present. The original stream buffer
+ is used directly and is therefore modified if emulation prevention
+ bytes are present in the stream.
+
+ Stream buffer is assumed to contain either exactly one NAL unit
+ and nothing else, or one or more NAL units embedded in byte
+ stream format described in the Annex B of the standard. Function
+ detects which one is used based on the first bytes in the buffer.
+
+ Inputs:
+ pByteStream pointer to byte stream buffer
+ len length of the stream buffer (in bytes)
+
+ Outputs:
+ pStrmData stream information is stored here
+ readBytes number of bytes "consumed" from the stream buffer
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK error in byte stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdExtractNalUnit(u8 *pByteStream, u32 len, strmData_t *pStrmData,
+ u32 *readBytes)
+{
+
+/* Variables */
+
+ u32 i, tmp;
+ u32 byteCount,initByteCount;
+ u32 zeroCount;
+ u8 byte;
+ u32 hasEmulation = HANTRO_FALSE;
+ u32 invalidStream = HANTRO_FALSE;
+ u8 *readPtr, *writePtr;
+
+/* Code */
+
+ ASSERT(pByteStream);
+ ASSERT(len);
+ ASSERT(len < BYTE_STREAM_ERROR);
+ ASSERT(pStrmData);
+
+ /* byte stream format if starts with 0x000001 or 0x000000 */
+ if (len > 3 && pByteStream[0] == 0x00 && pByteStream[1] == 0x00 &&
+ (pByteStream[2]&0xFE) == 0x00)
+ {
+ /* search for NAL unit start point, i.e. point after first start code
+ * prefix in the stream */
+ zeroCount = byteCount = 2;
+ readPtr = pByteStream + 2;
+ /*lint -e(716) while(1) used consciously */
+ while (1)
+ {
+ byte = *readPtr++;
+ byteCount++;
+
+ if (byteCount == len)
+ {
+ /* no start code prefix found -> error */
+ *readBytes = len;
+ return(HANTRO_NOK);
+ }
+
+ if (!byte)
+ zeroCount++;
+ else if ((byte == 0x01) && (zeroCount >= 2))
+ break;
+ else
+ zeroCount = 0;
+ }
+
+ initByteCount = byteCount;
+
+ /* determine size of the NAL unit. Search for next start code prefix
+ * or end of stream and ignore possible trailing zero bytes */
+ zeroCount = 0;
+ /*lint -e(716) while(1) used consciously */
+ while (1)
+ {
+ byte = *readPtr++;
+ byteCount++;
+ if (!byte)
+ zeroCount++;
+
+ if ( (byte == 0x03) && (zeroCount == 2) )
+ {
+ hasEmulation = HANTRO_TRUE;
+ }
+
+ if ( (byte == 0x01) && (zeroCount >= 2 ) )
+ {
+ pStrmData->strmBuffSize =
+ byteCount - initByteCount - zeroCount - 1;
+ zeroCount -= MIN(zeroCount, 3);
+ break;
+ }
+ else if (byte)
+ {
+ if (zeroCount >= 3)
+ invalidStream = HANTRO_TRUE;
+ zeroCount = 0;
+ }
+
+ if (byteCount == len)
+ {
+ pStrmData->strmBuffSize = byteCount - initByteCount - zeroCount;
+ break;
+ }
+
+ }
+ }
+ /* separate NAL units as input -> just set stream params */
+ else
+ {
+ initByteCount = 0;
+ zeroCount = 0;
+ pStrmData->strmBuffSize = len;
+ hasEmulation = HANTRO_TRUE;
+ }
+
+ pStrmData->pStrmBuffStart = pByteStream + initByteCount;
+ pStrmData->pStrmCurrPos = pStrmData->pStrmBuffStart;
+ pStrmData->bitPosInWord = 0;
+ pStrmData->strmBuffReadBits = 0;
+
+ /* return number of bytes "consumed" */
+ *readBytes = pStrmData->strmBuffSize + initByteCount + zeroCount;
+
+ if (invalidStream)
+ {
+ return(HANTRO_NOK);
+ }
+
+ /* remove emulation prevention bytes before rbsp processing */
+ if (hasEmulation)
+ {
+ tmp = pStrmData->strmBuffSize;
+ readPtr = writePtr = pStrmData->pStrmBuffStart;
+ zeroCount = 0;
+ for (i = tmp; i--;)
+ {
+ if ((zeroCount == 2) && (*readPtr == 0x03))
+ {
+ /* emulation prevention byte shall be followed by one of the
+ * following bytes: 0x00, 0x01, 0x02, 0x03. This implies that
+ * emulation prevention 0x03 byte shall not be the last byte
+ * of the stream. */
+ if ( (i == 0) || (*(readPtr+1) > 0x03) )
+ return(HANTRO_NOK);
+
+ /* do not write emulation prevention byte */
+ readPtr++;
+ zeroCount = 0;
+ }
+ else
+ {
+ /* NAL unit shall not contain byte sequences 0x000000,
+ * 0x000001 or 0x000002 */
+ if ( (zeroCount == 2) && (*readPtr <= 0x02) )
+ return(HANTRO_NOK);
+
+ if (*readPtr == 0)
+ zeroCount++;
+ else
+ zeroCount = 0;
+
+ *writePtr++ = *readPtr++;
+ }
+ }
+
+ /* (readPtr - writePtr) indicates number of "removed" emulation
+ * prevention bytes -> subtract from stream buffer size */
+ pStrmData->strmBuffSize -= (u32)(readPtr - writePtr);
+ }
+
+ return(HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.h
new file mode 100755
index 0000000..36aec76
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_byte_stream.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_BYTE_STREAM_H
+#define H264SWDEC_BYTE_STREAM_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdExtractNalUnit(u8 *pByteStream, u32 len, strmData_t *pStrmData,
+ u32 *readBytes);
+
+#endif /* #ifdef H264SWDEC_BYTE_STREAM_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.c
new file mode 100755
index 0000000..91d78bd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.c
@@ -0,0 +1,916 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ DecodeCoeffToken
+ DecodeLevelPrefix
+ DecodeTotalZeros
+ DecodeRunBefore
+ DecodeResidualBlockCavlc
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_cavlc.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Following descriptions use term "information field" to represent combination
+ * of certain decoded symbol value and the length of the corresponding variable
+ * length code word. For example, total_zeros information field consists of
+ * 4 bits symbol value (bits [4,7]) along with four bits to represent length
+ * of the VLC code word (bits [0,3]) */
+
+/* macro to obtain length of the coeff token information field, bits [0,4] */
+#define LENGTH_TC(vlc) ((vlc) & 0x1F)
+/* macro to obtain length of the other information fields, bits [0,3] */
+#define LENGTH(vlc) ((vlc) & 0xF)
+/* macro to obtain code word from the information fields, bits [4,7] */
+#define INFO(vlc) (((vlc) >> 4) & 0xF) /* 4 MSB bits contain information */
+/* macro to obtain trailing ones from the coeff token information word,
+ * bits [5,10] */
+#define TRAILING_ONES(coeffToken) ((coeffToken>>5) & 0x3F)
+/* macro to obtain total coeff from the coeff token information word,
+ * bits [11,15] */
+#define TOTAL_COEFF(coeffToken) (((coeffToken) >> 11) & 0x1F)
+
+#define VLC_NOT_FOUND 0xFFFFFFFEU
+
+/* VLC tables for coeff_token. Because of long codes (max. 16 bits) some of the
+ * tables have been splitted into multiple separate tables. Each array/table
+ * element has the following structure:
+ * [5 bits for tot.coeff.] [6 bits for tr.ones] [5 bits for VLC length]
+ * If there is a 0x0000 value, it means that there is not corresponding VLC
+ * codeword for that index. */
+
+/* VLC lengths up to 6 bits, 0 <= nC < 2 */
+static const u16 coeffToken0_0[32] = {
+ 0x0000,0x0000,0x0000,0x2066,0x1026,0x0806,0x1865,0x1865,
+ 0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,
+ 0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,
+ 0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822,0x0822};
+
+/* VLC lengths up to 10 bits, 0 <= nC < 2 */
+static const u16 coeffToken0_1[48] = {
+ 0x0000,0x0000,0x0000,0x0000,0x406a,0x304a,0x282a,0x200a,
+ 0x3869,0x3869,0x2849,0x2849,0x2029,0x2029,0x1809,0x1809,
+ 0x3068,0x3068,0x3068,0x3068,0x2048,0x2048,0x2048,0x2048,
+ 0x1828,0x1828,0x1828,0x1828,0x1008,0x1008,0x1008,0x1008,
+ 0x2867,0x2867,0x2867,0x2867,0x2867,0x2867,0x2867,0x2867,
+ 0x1847,0x1847,0x1847,0x1847,0x1847,0x1847,0x1847,0x1847};
+
+/* VLC lengths up to 14 bits, 0 <= nC < 2 */
+static const u16 coeffToken0_2[56] = {
+ 0x606e,0x584e,0x502e,0x500e,0x586e,0x504e,0x482e,0x480e,
+ 0x400d,0x400d,0x484d,0x484d,0x402d,0x402d,0x380d,0x380d,
+ 0x506d,0x506d,0x404d,0x404d,0x382d,0x382d,0x300d,0x300d,
+ 0x486b,0x486b,0x486b,0x486b,0x486b,0x486b,0x486b,0x486b,
+ 0x384b,0x384b,0x384b,0x384b,0x384b,0x384b,0x384b,0x384b,
+ 0x302b,0x302b,0x302b,0x302b,0x302b,0x302b,0x302b,0x302b,
+ 0x280b,0x280b,0x280b,0x280b,0x280b,0x280b,0x280b,0x280b};
+
+/* VLC lengths up to 16 bits, 0 <= nC < 2 */
+static const u16 coeffToken0_3[32] = {
+ 0x0000,0x0000,0x682f,0x682f,0x8010,0x8050,0x8030,0x7810,
+ 0x8070,0x7850,0x7830,0x7010,0x7870,0x7050,0x7030,0x6810,
+ 0x706f,0x706f,0x684f,0x684f,0x602f,0x602f,0x600f,0x600f,
+ 0x686f,0x686f,0x604f,0x604f,0x582f,0x582f,0x580f,0x580f};
+
+/* VLC lengths up to 6 bits, 2 <= nC < 4 */
+static const u16 coeffToken2_0[32] = {
+ 0x0000,0x0000,0x0000,0x0000,0x3866,0x2046,0x2026,0x1006,
+ 0x3066,0x1846,0x1826,0x0806,0x2865,0x2865,0x1025,0x1025,
+ 0x2064,0x2064,0x2064,0x2064,0x1864,0x1864,0x1864,0x1864,
+ 0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043,0x1043};
+
+/* VLC lengths up to 9 bits, 2 <= nC < 4 */
+static const u16 coeffToken2_1[32] = {
+ 0x0000,0x0000,0x0000,0x0000,0x4869,0x3849,0x3829,0x3009,
+ 0x2808,0x2808,0x3048,0x3048,0x3028,0x3028,0x2008,0x2008,
+ 0x4067,0x4067,0x4067,0x4067,0x2847,0x2847,0x2847,0x2847,
+ 0x2827,0x2827,0x2827,0x2827,0x1807,0x1807,0x1807,0x1807};
+
+/* VLC lengths up to 14 bits, 2 <= nC < 4 */
+static const u16 coeffToken2_2[128] = {
+ 0x0000,0x0000,0x786d,0x786d,0x806e,0x804e,0x802e,0x800e,
+ 0x782e,0x780e,0x784e,0x702e,0x704d,0x704d,0x700d,0x700d,
+ 0x706d,0x706d,0x684d,0x684d,0x682d,0x682d,0x680d,0x680d,
+ 0x686d,0x686d,0x604d,0x604d,0x602d,0x602d,0x600d,0x600d,
+ 0x580c,0x580c,0x580c,0x580c,0x584c,0x584c,0x584c,0x584c,
+ 0x582c,0x582c,0x582c,0x582c,0x500c,0x500c,0x500c,0x500c,
+ 0x606c,0x606c,0x606c,0x606c,0x504c,0x504c,0x504c,0x504c,
+ 0x502c,0x502c,0x502c,0x502c,0x480c,0x480c,0x480c,0x480c,
+ 0x586b,0x586b,0x586b,0x586b,0x586b,0x586b,0x586b,0x586b,
+ 0x484b,0x484b,0x484b,0x484b,0x484b,0x484b,0x484b,0x484b,
+ 0x482b,0x482b,0x482b,0x482b,0x482b,0x482b,0x482b,0x482b,
+ 0x400b,0x400b,0x400b,0x400b,0x400b,0x400b,0x400b,0x400b,
+ 0x506b,0x506b,0x506b,0x506b,0x506b,0x506b,0x506b,0x506b,
+ 0x404b,0x404b,0x404b,0x404b,0x404b,0x404b,0x404b,0x404b,
+ 0x402b,0x402b,0x402b,0x402b,0x402b,0x402b,0x402b,0x402b,
+ 0x380b,0x380b,0x380b,0x380b,0x380b,0x380b,0x380b,0x380b};
+
+/* VLC lengths up to 6 bits, 4 <= nC < 8 */
+static const u16 coeffToken4_0[64] = {
+ 0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,0x0000,
+ 0x1806,0x3846,0x3826,0x1006,0x4866,0x3046,0x3026,0x0806,
+ 0x2825,0x2825,0x2845,0x2845,0x2025,0x2025,0x2045,0x2045,
+ 0x1825,0x1825,0x4065,0x4065,0x1845,0x1845,0x1025,0x1025,
+ 0x3864,0x3864,0x3864,0x3864,0x3064,0x3064,0x3064,0x3064,
+ 0x2864,0x2864,0x2864,0x2864,0x2064,0x2064,0x2064,0x2064,
+ 0x1864,0x1864,0x1864,0x1864,0x1044,0x1044,0x1044,0x1044,
+ 0x0824,0x0824,0x0824,0x0824,0x0004,0x0004,0x0004,0x0004};
+
+/* VLC lengths up to 10 bits, 4 <= nC < 8 */
+static const u16 coeffToken4_1[128] = {
+ 0x0000,0x800a,0x806a,0x804a,0x802a,0x780a,0x786a,0x784a,
+ 0x782a,0x700a,0x706a,0x704a,0x702a,0x680a,0x6829,0x6829,
+ 0x6009,0x6009,0x6849,0x6849,0x6029,0x6029,0x5809,0x5809,
+ 0x6869,0x6869,0x6049,0x6049,0x5829,0x5829,0x5009,0x5009,
+ 0x6068,0x6068,0x6068,0x6068,0x5848,0x5848,0x5848,0x5848,
+ 0x5028,0x5028,0x5028,0x5028,0x4808,0x4808,0x4808,0x4808,
+ 0x5868,0x5868,0x5868,0x5868,0x5048,0x5048,0x5048,0x5048,
+ 0x4828,0x4828,0x4828,0x4828,0x4008,0x4008,0x4008,0x4008,
+ 0x3807,0x3807,0x3807,0x3807,0x3807,0x3807,0x3807,0x3807,
+ 0x3007,0x3007,0x3007,0x3007,0x3007,0x3007,0x3007,0x3007,
+ 0x4847,0x4847,0x4847,0x4847,0x4847,0x4847,0x4847,0x4847,
+ 0x2807,0x2807,0x2807,0x2807,0x2807,0x2807,0x2807,0x2807,
+ 0x5067,0x5067,0x5067,0x5067,0x5067,0x5067,0x5067,0x5067,
+ 0x4047,0x4047,0x4047,0x4047,0x4047,0x4047,0x4047,0x4047,
+ 0x4027,0x4027,0x4027,0x4027,0x4027,0x4027,0x4027,0x4027,
+ 0x2007,0x2007,0x2007,0x2007,0x2007,0x2007,0x2007,0x2007};
+
+/* fixed 6 bit length VLC, nC <= 8 */
+static const u16 coeffToken8[64] = {
+ 0x0806,0x0826,0x0000,0x0006,0x1006,0x1026,0x1046,0x0000,
+ 0x1806,0x1826,0x1846,0x1866,0x2006,0x2026,0x2046,0x2066,
+ 0x2806,0x2826,0x2846,0x2866,0x3006,0x3026,0x3046,0x3066,
+ 0x3806,0x3826,0x3846,0x3866,0x4006,0x4026,0x4046,0x4066,
+ 0x4806,0x4826,0x4846,0x4866,0x5006,0x5026,0x5046,0x5066,
+ 0x5806,0x5826,0x5846,0x5866,0x6006,0x6026,0x6046,0x6066,
+ 0x6806,0x6826,0x6846,0x6866,0x7006,0x7026,0x7046,0x7066,
+ 0x7806,0x7826,0x7846,0x7866,0x8006,0x8026,0x8046,0x8066};
+
+/* VLC lengths up to 3 bits, nC == -1 */
+static const u16 coeffTokenMinus1_0[8] = {
+ 0x0000,0x1043,0x0002,0x0002,0x0821,0x0821,0x0821,0x0821};
+
+/* VLC lengths up to 8 bits, nC == -1 */
+static const u16 coeffTokenMinus1_1[32] = {
+ 0x2067,0x2067,0x2048,0x2028,0x1847,0x1847,0x1827,0x1827,
+ 0x2006,0x2006,0x2006,0x2006,0x1806,0x1806,0x1806,0x1806,
+ 0x1006,0x1006,0x1006,0x1006,0x1866,0x1866,0x1866,0x1866,
+ 0x1026,0x1026,0x1026,0x1026,0x0806,0x0806,0x0806,0x0806};
+
+/* VLC tables for total_zeros. One table containing longer code, totalZeros_1,
+ * has been broken into two separate tables. Table elements have the
+ * following structure:
+ * [4 bits for info] [4 bits for VLC length] */
+
+/* VLC lengths up to 5 bits */
+static const u8 totalZeros_1_0[32] = {
+ 0x00,0x00,0x65,0x55,0x44,0x44,0x34,0x34,
+ 0x23,0x23,0x23,0x23,0x13,0x13,0x13,0x13,
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01,
+ 0x01,0x01,0x01,0x01,0x01,0x01,0x01,0x01};
+
+/* VLC lengths up to 9 bits */
+static const u8 totalZeros_1_1[32] = {
+ 0x00,0xf9,0xe9,0xd9,0xc8,0xc8,0xb8,0xb8,
+ 0xa7,0xa7,0xa7,0xa7,0x97,0x97,0x97,0x97,
+ 0x86,0x86,0x86,0x86,0x86,0x86,0x86,0x86,
+ 0x76,0x76,0x76,0x76,0x76,0x76,0x76,0x76};
+
+static const u8 totalZeros_2[64] = {
+ 0xe6,0xd6,0xc6,0xb6,0xa5,0xa5,0x95,0x95,
+ 0x84,0x84,0x84,0x84,0x74,0x74,0x74,0x74,
+ 0x64,0x64,0x64,0x64,0x54,0x54,0x54,0x54,
+ 0x43,0x43,0x43,0x43,0x43,0x43,0x43,0x43,
+ 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,
+ 0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
+ 0x13,0x13,0x13,0x13,0x13,0x13,0x13,0x13,
+ 0x03,0x03,0x03,0x03,0x03,0x03,0x03,0x03};
+
+static const u8 totalZeros_3[64] = {
+ 0xd6,0xb6,0xc5,0xc5,0xa5,0xa5,0x95,0x95,
+ 0x84,0x84,0x84,0x84,0x54,0x54,0x54,0x54,
+ 0x44,0x44,0x44,0x44,0x04,0x04,0x04,0x04,
+ 0x73,0x73,0x73,0x73,0x73,0x73,0x73,0x73,
+ 0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,
+ 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,
+ 0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
+ 0x13,0x13,0x13,0x13,0x13,0x13,0x13,0x13};
+
+static const u8 totalZeros_4[32] = {
+ 0xc5,0xb5,0xa5,0x05,0x94,0x94,0x74,0x74,
+ 0x34,0x34,0x24,0x24,0x83,0x83,0x83,0x83,
+ 0x63,0x63,0x63,0x63,0x53,0x53,0x53,0x53,
+ 0x43,0x43,0x43,0x43,0x13,0x13,0x13,0x13};
+
+static const u8 totalZeros_5[32] = {
+ 0xb5,0x95,0xa4,0xa4,0x84,0x84,0x24,0x24,
+ 0x14,0x14,0x04,0x04,0x73,0x73,0x73,0x73,
+ 0x63,0x63,0x63,0x63,0x53,0x53,0x53,0x53,
+ 0x43,0x43,0x43,0x43,0x33,0x33,0x33,0x33};
+
+static const u8 totalZeros_6[64] = {
+ 0xa6,0x06,0x15,0x15,0x84,0x84,0x84,0x84,
+ 0x93,0x93,0x93,0x93,0x93,0x93,0x93,0x93,
+ 0x73,0x73,0x73,0x73,0x73,0x73,0x73,0x73,
+ 0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,
+ 0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,
+ 0x43,0x43,0x43,0x43,0x43,0x43,0x43,0x43,
+ 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,
+ 0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23};
+
+static const u8 totalZeros_7[64] = {
+ 0x96,0x06,0x15,0x15,0x74,0x74,0x74,0x74,
+ 0x83,0x83,0x83,0x83,0x83,0x83,0x83,0x83,
+ 0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,
+ 0x43,0x43,0x43,0x43,0x43,0x43,0x43,0x43,
+ 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,
+ 0x23,0x23,0x23,0x23,0x23,0x23,0x23,0x23,
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52};
+
+static const u8 totalZeros_8[64] = {
+ 0x86,0x06,0x25,0x25,0x14,0x14,0x14,0x14,
+ 0x73,0x73,0x73,0x73,0x73,0x73,0x73,0x73,
+ 0x63,0x63,0x63,0x63,0x63,0x63,0x63,0x63,
+ 0x33,0x33,0x33,0x33,0x33,0x33,0x33,0x33,
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
+ 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+ 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42};
+
+static const u8 totalZeros_9[64] = {
+ 0x16,0x06,0x75,0x75,0x24,0x24,0x24,0x24,
+ 0x53,0x53,0x53,0x53,0x53,0x53,0x53,0x53,
+ 0x62,0x62,0x62,0x62,0x62,0x62,0x62,0x62,
+ 0x62,0x62,0x62,0x62,0x62,0x62,0x62,0x62,
+ 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+ 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+ 0x32,0x32,0x32,0x32,0x32,0x32,0x32,0x32,
+ 0x32,0x32,0x32,0x32,0x32,0x32,0x32,0x32};
+
+static const u8 totalZeros_10[32] = {
+ 0x15,0x05,0x64,0x64,0x23,0x23,0x23,0x23,
+ 0x52,0x52,0x52,0x52,0x52,0x52,0x52,0x52,
+ 0x42,0x42,0x42,0x42,0x42,0x42,0x42,0x42,
+ 0x32,0x32,0x32,0x32,0x32,0x32,0x32,0x32};
+
+static const u8 totalZeros_11[16] = {
+ 0x04,0x14,0x23,0x23,0x33,0x33,0x53,0x53,
+ 0x41,0x41,0x41,0x41,0x41,0x41,0x41,0x41};
+
+static const u8 totalZeros_12[16] = {
+ 0x04,0x14,0x43,0x43,0x22,0x22,0x22,0x22,
+ 0x31,0x31,0x31,0x31,0x31,0x31,0x31,0x31};
+
+static const u8 totalZeros_13[8] = {0x03,0x13,0x32,0x32,0x21,0x21,0x21,0x21};
+
+static const u8 totalZeros_14[4] = {0x02,0x12,0x21,0x21};
+
+/* VLC tables for run_before. Table elements have the following structure:
+ * [4 bits for info] [4bits for VLC length]
+ */
+
+static const u8 runBefore_6[8] = {0x13,0x23,0x43,0x33,0x63,0x53,0x02,0x02};
+
+static const u8 runBefore_5[8] = {0x53,0x43,0x33,0x23,0x12,0x12,0x02,0x02};
+
+static const u8 runBefore_4[8] = {0x43,0x33,0x22,0x22,0x12,0x12,0x02,0x02};
+
+static const u8 runBefore_3[4] = {0x32,0x22,0x12,0x02};
+
+static const u8 runBefore_2[4] = {0x22,0x12,0x01,0x01};
+
+static const u8 runBefore_1[2] = {0x11,0x01};
+
+/* following four macros are used to handle stream buffer "cache" in the CAVLC
+ * decoding function */
+
+/* macro to initialize stream buffer cache, fills the buffer (32 bits) */
+#define BUFFER_INIT(value, bits) \
+{ \
+ bits = 32; \
+ value = h264bsdShowBits32(pStrmData); \
+}
+
+/* macro to read numBits bits from the buffer, bits will be written to
+ * outVal. Refills the buffer if not enough bits left */
+#define BUFFER_SHOW(value, bits, outVal, numBits) \
+{ \
+ if (bits < (numBits)) \
+ { \
+ if(h264bsdFlushBits(pStrmData,32-bits) == END_OF_STREAM) \
+ return(HANTRO_NOK); \
+ value = h264bsdShowBits32(pStrmData); \
+ bits = 32; \
+ } \
+ (outVal) = value >> (32 - (numBits)); \
+}
+
+/* macro to flush numBits bits from the buffer */
+#define BUFFER_FLUSH(value, bits, numBits) \
+{ \
+ value <<= (numBits); \
+ bits -= (numBits); \
+}
+
+/* macro to read and flush numBits bits from the buffer, bits will be written
+ * to outVal. Refills the buffer if not enough bits left */
+#define BUFFER_GET(value, bits, outVal, numBits) \
+{ \
+ if (bits < (numBits)) \
+ { \
+ if(h264bsdFlushBits(pStrmData,32-bits) == END_OF_STREAM) \
+ return(HANTRO_NOK); \
+ value = h264bsdShowBits32(pStrmData); \
+ bits = 32; \
+ } \
+ (outVal) = value >> (32 - (numBits)); \
+ value <<= (numBits); \
+ bits -= (numBits); \
+}
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 DecodeCoeffToken(u32 bits, u32 nc);
+
+static u32 DecodeLevelPrefix(u32 bits);
+
+static u32 DecodeTotalZeros(u32 bits, u32 totalCoeff, u32 isChromaDC);
+
+static u32 DecodeRunBefore(u32 bits,u32 zerosLeft);
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeCoeffToken
+
+ Functional description:
+ Function to decode coeff_token information field from the stream.
+
+ Inputs:
+ u32 bits next 16 stream bits
+ u32 nc nC, see standard for details
+
+ Outputs:
+ u32 information field (11 bits for value, 5 bits for length)
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeCoeffToken(u32 bits, u32 nc)
+{
+
+/* Variables */
+
+ u32 value;
+
+/* Code */
+
+ /* standard defines that nc for decoding of chroma dc coefficients is -1,
+ * represented by u32 here -> -1 maps to 2^32 - 1 */
+ ASSERT(nc <= 16 || nc == (u32)(-1));
+
+ if (nc < 2)
+ {
+ if (bits >= 0x8000)
+ {
+ value = 0x0001;
+ }
+ else if (bits >= 0x0C00)
+ value = coeffToken0_0[bits >> 10];
+ else if (bits >= 0x0100)
+ value = coeffToken0_1[bits >> 6];
+ else if (bits >= 0x0020)
+ value = coeffToken0_2[(bits>>2)-8];
+ else
+ value = coeffToken0_3[bits];
+ }
+ else if (nc < 4)
+ {
+ if (bits >= 0x8000)
+ {
+ value = bits & 0x4000 ? 0x0002 : 0x0822;
+ }
+ else if (bits >= 0x1000)
+ value = coeffToken2_0[bits >> 10];
+ else if (bits >= 0x0200)
+ value = coeffToken2_1[bits >> 7];
+ else
+ value = coeffToken2_2[bits>>2];
+ }
+ else if (nc < 8)
+ {
+ value = coeffToken4_0[bits >> 10];
+ if (!value)
+ value = coeffToken4_1[bits>>6];
+ }
+ else if (nc <= 16)
+ {
+ value = coeffToken8[bits>>10];
+ }
+ else
+ {
+ value = coeffTokenMinus1_0[bits >> 13];
+ if (!value)
+ value = coeffTokenMinus1_1[bits>>8];
+ }
+
+ return(value);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeLevelPrefix
+
+ Functional description:
+ Function to decode level_prefix information field from the stream
+
+ Inputs:
+ u32 bits next 16 stream bits
+
+ Outputs:
+ u32 level_prefix information field or VLC_NOT_FOUND
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeLevelPrefix(u32 bits)
+{
+
+/* Variables */
+
+ u32 numZeros;
+
+/* Code */
+
+ if (bits >= 0x8000)
+ numZeros = 0;
+ else if (bits >= 0x4000)
+ numZeros = 1;
+ else if (bits >= 0x2000)
+ numZeros = 2;
+ else if (bits >= 0x1000)
+ numZeros = 3;
+ else if (bits >= 0x0800)
+ numZeros = 4;
+ else if (bits >= 0x0400)
+ numZeros = 5;
+ else if (bits >= 0x0200)
+ numZeros = 6;
+ else if (bits >= 0x0100)
+ numZeros = 7;
+ else if (bits >= 0x0080)
+ numZeros = 8;
+ else if (bits >= 0x0040)
+ numZeros = 9;
+ else if (bits >= 0x0020)
+ numZeros = 10;
+ else if (bits >= 0x0010)
+ numZeros = 11;
+ else if (bits >= 0x0008)
+ numZeros = 12;
+ else if (bits >= 0x0004)
+ numZeros = 13;
+ else if (bits >= 0x0002)
+ numZeros = 14;
+ else if (bits >= 0x0001)
+ numZeros = 15;
+ else /* more than 15 zeros encountered which is an error */
+ return(VLC_NOT_FOUND);
+
+ return(numZeros);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeTotalZeros
+
+ Functional description:
+ Function to decode total_zeros information field from the stream
+
+ Inputs:
+ u32 bits next 9 stream bits
+ u32 totalCoeff total number of coefficients for the block
+ being decoded
+ u32 isChromaDC flag to indicate chroma DC block
+
+ Outputs:
+ u32 information field (4 bits value, 4 bits length)
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeTotalZeros(u32 bits, u32 totalCoeff, u32 isChromaDC)
+{
+
+/* Variables */
+
+ u32 value = 0x0;
+
+/* Code */
+
+ ASSERT(totalCoeff);
+
+ if (!isChromaDC)
+ {
+ ASSERT(totalCoeff < 16);
+ switch (totalCoeff)
+ {
+ case 1:
+ value = totalZeros_1_0[bits >> 4];
+ if (!value)
+ value = totalZeros_1_1[bits];
+ break;
+
+ case 2:
+ value = totalZeros_2[bits >> 3];
+ break;
+
+ case 3:
+ value = totalZeros_3[bits >> 3];
+ break;
+
+ case 4:
+ value = totalZeros_4[bits >> 4];
+ break;
+
+ case 5:
+ value = totalZeros_5[bits >> 4];
+ break;
+
+ case 6:
+ value = totalZeros_6[bits >> 3];
+ break;
+
+ case 7:
+ value = totalZeros_7[bits >> 3];
+ break;
+
+ case 8:
+ value = totalZeros_8[bits >> 3];
+ break;
+
+ case 9:
+ value = totalZeros_9[bits >> 3];
+ break;
+
+ case 10:
+ value = totalZeros_10[bits >> 4];
+ break;
+
+ case 11:
+ value = totalZeros_11[bits >> 5];
+ break;
+
+ case 12:
+ value = totalZeros_12[bits >> 5];
+ break;
+
+ case 13:
+ value = totalZeros_13[bits >> 6];
+ break;
+
+ case 14:
+ value = totalZeros_14[bits >> 7];
+ break;
+
+ default: /* case 15 */
+ value = (bits >> 8) ? 0x11 : 0x01;
+ break;
+ }
+ }
+ else
+ {
+ ASSERT(totalCoeff < 4);
+ bits >>= 6;
+ if (bits > 3)
+ value = 0x01;
+ else
+ {
+ if (totalCoeff == 3)
+ value = 0x11;
+ else if (bits > 1)
+ {
+ value = 0x12;
+ }
+ else if (totalCoeff == 2)
+ value = 0x22;
+ else if (bits)
+ value = 0x23;
+ else
+ value = 0x33;
+ }
+ }
+
+ return(value);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeRunBefore
+
+ Functional description:
+ Function to decode run_before information field from the stream
+
+ Inputs:
+ u32 bits next 11 stream bits
+ u32 zerosLeft number of zeros left for the current block
+
+ Outputs:
+ u32 information field (4 bits value, 4 bits length)
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeRunBefore(u32 bits, u32 zerosLeft)
+{
+
+/* Variables */
+
+ u32 value = 0x0;
+
+/* Code */
+
+ switch (zerosLeft)
+ {
+ case 1:
+ value = runBefore_1[bits>>10];
+ break;
+
+ case 2:
+ value = runBefore_2[bits>>9];
+ break;
+
+ case 3:
+ value = runBefore_3[bits>>9];
+ break;
+
+ case 4:
+ value = runBefore_4[bits>>8];
+ break;
+
+ case 5:
+ value = runBefore_5[bits>>8];
+ break;
+
+ case 6:
+ value = runBefore_6[bits>>8];
+ break;
+
+ default:
+ if (bits >= 0x100)
+ value = ((7-(bits>>8))<<4)+0x3;
+ else if (bits >= 0x80)
+ value = 0x74;
+ else if (bits >= 0x40)
+ value = 0x85;
+ else if (bits >= 0x20)
+ value = 0x96;
+ else if (bits >= 0x10)
+ value = 0xa7;
+ else if (bits >= 0x8)
+ value = 0xb8;
+ else if (bits >= 0x4)
+ value = 0xc9;
+ else if (bits >= 0x2)
+ value = 0xdA;
+ else if (bits)
+ value = 0xeB;
+ if (INFO(value) > zerosLeft)
+ value = 0;
+ break;
+ }
+
+ return(value);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeResidualBlockCavlc
+
+ Functional description:
+ Function to decode one CAVLC coded block. This corresponds to
+ syntax elements residual_block_cavlc() in the standard.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ nc nC value
+ maxNumCoeff maximum number of residual coefficients
+
+ Outputs:
+ coeffLevel stores decoded coefficient levels
+
+ Returns:
+ numCoeffs on bits [4,11] if successful
+ coeffMap on bits [16,31] if successful, this is bit map
+ where each bit indicates if the corresponding
+ coefficient was zero (0) or non-zero (1)
+ HANTRO_NOK end of stream or error in stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeResidualBlockCavlc(
+ strmData_t *pStrmData,
+ i32 *coeffLevel,
+ i32 nc,
+ u32 maxNumCoeff)
+{
+
+/* Variables */
+
+ u32 i, tmp, totalCoeff, trailingOnes, suffixLength, levelPrefix;
+ u32 levelSuffix, zerosLeft, bit;
+ i32 level[16];
+ u32 run[16];
+ /* stream "cache" */
+ u32 bufferValue;
+ u32 bufferBits;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(coeffLevel);
+ ASSERT(nc > -2);
+ ASSERT(maxNumCoeff == 4 || maxNumCoeff == 15 || maxNumCoeff == 16);
+ ASSERT(VLC_NOT_FOUND != END_OF_STREAM);
+
+ /* assume that coeffLevel array has been "cleaned" by caller */
+
+ BUFFER_INIT(bufferValue, bufferBits);
+
+ /*lint -e774 disable lint warning on always false comparison */
+ BUFFER_SHOW(bufferValue, bufferBits, bit, 16);
+ /*lint +e774 */
+ tmp = DecodeCoeffToken(bit, (u32)nc);
+ if (!tmp)
+ return(HANTRO_NOK);
+ BUFFER_FLUSH(bufferValue, bufferBits, LENGTH_TC(tmp));
+
+ totalCoeff = TOTAL_COEFF(tmp);
+ if (totalCoeff > maxNumCoeff)
+ return(HANTRO_NOK);
+ trailingOnes = TRAILING_ONES(tmp);
+
+ if (totalCoeff != 0)
+ {
+ i = 0;
+ /* nonzero coefficients: +/- 1 */
+ if (trailingOnes)
+ {
+ BUFFER_GET(bufferValue, bufferBits, bit, trailingOnes);
+ tmp = 1 << (trailingOnes - 1);
+ for (; tmp; i++)
+ {
+ level[i] = bit & tmp ? -1 : 1;
+ tmp >>= 1;
+ }
+ }
+
+ /* other levels */
+ if (totalCoeff > 10 && trailingOnes < 3)
+ suffixLength = 1;
+ else
+ suffixLength = 0;
+
+ for (; i < totalCoeff; i++)
+ {
+ BUFFER_SHOW(bufferValue, bufferBits, bit, 16);
+ levelPrefix = DecodeLevelPrefix(bit);
+ if (levelPrefix == VLC_NOT_FOUND)
+ return(HANTRO_NOK);
+ BUFFER_FLUSH(bufferValue, bufferBits, levelPrefix+1);
+
+ if (levelPrefix < 14)
+ tmp = suffixLength;
+ else if (levelPrefix == 14)
+ {
+ tmp = suffixLength ? suffixLength : 4;
+ }
+ else
+ {
+ /* setting suffixLength to 1 here corresponds to adding 15
+ * to levelCode value if levelPrefix == 15 and
+ * suffixLength == 0 */
+ if (!suffixLength)
+ suffixLength = 1;
+ tmp = 12;
+ }
+
+ if (suffixLength)
+ levelPrefix <<= suffixLength;
+
+ if (tmp)
+ {
+ BUFFER_GET(bufferValue, bufferBits, levelSuffix, tmp);
+ levelPrefix += levelSuffix;
+ }
+
+ tmp = levelPrefix;
+
+ if (i == trailingOnes && trailingOnes < 3)
+ tmp += 2;
+
+ level[i] = (tmp+2)>>1;
+
+ if (suffixLength == 0)
+ suffixLength = 1;
+
+ if ((level[i] > (3 << (suffixLength - 1))) && suffixLength < 6)
+ suffixLength++;
+
+ if (tmp & 0x1)
+ level[i] = -level[i];
+ }
+
+ /* zero runs */
+ if (totalCoeff < maxNumCoeff)
+ {
+ BUFFER_SHOW(bufferValue, bufferBits, bit,9);
+ zerosLeft = DecodeTotalZeros(bit, totalCoeff,
+ (u32)(maxNumCoeff == 4));
+ if (!zerosLeft)
+ return(HANTRO_NOK);
+ BUFFER_FLUSH(bufferValue, bufferBits, LENGTH(zerosLeft));
+ zerosLeft = INFO(zerosLeft);
+ }
+ else
+ zerosLeft = 0;
+
+ for (i = 0; i < totalCoeff - 1; i++)
+ {
+ if (zerosLeft > 0)
+ {
+ BUFFER_SHOW(bufferValue, bufferBits, bit,11);
+ tmp = DecodeRunBefore(bit, zerosLeft);
+ if (!tmp)
+ return(HANTRO_NOK);
+ BUFFER_FLUSH(bufferValue, bufferBits, LENGTH(tmp));
+ run[i] = INFO(tmp);
+ zerosLeft -= run[i]++;
+ }
+ else
+ {
+ run[i] = 1;
+ }
+ }
+
+ /* combining level and run, levelSuffix variable used to hold coeffMap,
+ * i.e. bit map indicating which coefficients had non-zero value. */
+
+ /*lint -esym(771,level,run) level and run are always initialized */
+ tmp = zerosLeft;
+ coeffLevel[tmp] = level[totalCoeff-1];
+ levelSuffix = 1 << tmp;
+ for (i = totalCoeff-1; i--;)
+ {
+ tmp += run[i];
+ levelSuffix |= 1 << tmp;
+ coeffLevel[tmp] = level[i];
+ }
+
+ }
+ else
+ levelSuffix = 0;
+
+ if (h264bsdFlushBits(pStrmData, 32-bufferBits) != HANTRO_OK)
+ return(HANTRO_NOK);
+
+ return((totalCoeff << 4) | (levelSuffix << 16));
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.h
new file mode 100755
index 0000000..80353d3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cavlc.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_CAVLC_H
+#define H264SWDEC_CAVLC_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeResidualBlockCavlc(
+ strmData_t *pStrmData,
+ i32 *coeffLevel,
+ i32 nc,
+ u32 maxNumCoeff);
+
+#endif /* #ifdef H264SWDEC_CAVLC_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cfg.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cfg.h
new file mode 100755
index 0000000..2baba5a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_cfg.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_CFG_H
+#define H264SWDEC_CFG_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+#define MAX_NUM_REF_PICS 16
+#define MAX_NUM_SLICE_GROUPS 8
+#define MAX_NUM_SEQ_PARAM_SETS 32
+#define MAX_NUM_PIC_PARAM_SETS 256
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+#endif /* #ifdef H264SWDEC_CFG_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.c
new file mode 100755
index 0000000..493fb9e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.c
@@ -0,0 +1,626 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdConceal
+ ConcealMb
+ Transform
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_conceal.h"
+#include "h264bsd_util.h"
+#include "h264bsd_reconstruct.h"
+#include "h264bsd_dpb.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/*lint -e702 disable lint warning on right shift of signed quantity */
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 ConcealMb(mbStorage_t *pMb, image_t *currImage, u32 row, u32 col,
+ u32 sliceType, u8 *data);
+
+static void Transform(i32 *data);
+
+/*------------------------------------------------------------------------------
+
+ Function name: h264bsdConceal
+
+ Functional description:
+ Perform error concealment for a picture. Two types of concealment
+ is performed based on sliceType:
+ 1) copy from previous picture for P-slices.
+ 2) concealment from neighbour pixels for I-slices
+
+ I-type concealment is based on ideas presented by Jarno Tulkki.
+ The concealment algorithm determines frequency domain coefficients
+ from the neighbour pixels, applies integer transform (the same
+ transform used in the residual processing) and uses the results as
+ pixel values for concealed macroblocks. Transform produces 4x4
+ array and one pixel value has to be used for 4x4 luma blocks and
+ 2x2 chroma blocks.
+
+ Similar concealment is performed for whole picture (the choise
+ of the type is based on last successfully decoded slice header of
+ the picture but it is handled by the calling function). It is
+ acknowledged that this may result in wrong type of concealment
+ when a picture contains both types of slices. However,
+ determination of slice type macroblock-by-macroblock cannot
+ be done due to the fact that it is impossible to know to which
+ slice each corrupted (not successfully decoded) macroblock
+ belongs.
+
+ The error concealment is started by searching the first propoerly
+ decoded macroblock and concealing the row containing the macroblock
+ in question. After that all macroblocks above the row in question
+ are concealed. Finally concealment of rows below is performed.
+ The order of concealment for 4x4 picture where macroblock 9 is the
+ first properly decoded one is as follows (properly decoded
+ macroblocks marked with 'x', numbers indicating the order of
+ concealment):
+
+ 4 6 8 10
+ 3 5 7 9
+ 1 x x 2
+ 11 12 13 14
+
+ If all macroblocks of the picture are lost, the concealment is
+ copy of previous picture for P-type and setting the image to
+ constant gray (pixel value 128) for I-type.
+
+ Concealment sets quantization parameter of the concealed
+ macroblocks to value 40 and macroblock type to intra to enable
+ deblocking filter to smooth the edges of the concealed areas.
+
+ Inputs:
+ pStorage pointer to storage structure
+ currImage pointer to current image structure
+ sliceType type of the slice
+
+ Outputs:
+ currImage concealed macroblocks will be written here
+
+ Returns:
+ HANTRO_OK
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdConceal(storage_t *pStorage, image_t *currImage, u32 sliceType)
+{
+
+/* Variables */
+
+ u32 i, j;
+ u32 row, col;
+ u32 width, height;
+ u8 *refData;
+ mbStorage_t *mb;
+
+/* Code */
+
+ ASSERT(pStorage);
+ ASSERT(currImage);
+
+ DEBUG(("Concealing %s slice\n", IS_I_SLICE(sliceType) ?
+ "intra" : "inter"));
+
+ width = currImage->width;
+ height = currImage->height;
+ refData = NULL;
+ /* use reference picture with smallest available index */
+ if (IS_P_SLICE(sliceType) || (pStorage->intraConcealmentFlag != 0))
+ {
+ i = 0;
+ do
+ {
+ refData = h264bsdGetRefPicData(pStorage->dpb, i);
+ i++;
+ if (i >= 16)
+ break;
+ } while (refData == NULL);
+ }
+
+ i = row = col = 0;
+ /* find first properly decoded macroblock -> start point for concealment */
+ while (i < pStorage->picSizeInMbs && !pStorage->mb[i].decoded)
+ {
+ i++;
+ col++;
+ if (col == width)
+ {
+ row++;
+ col = 0;
+ }
+ }
+
+ /* whole picture lost -> copy previous or set grey */
+ if (i == pStorage->picSizeInMbs)
+ {
+ if ( (IS_I_SLICE(sliceType) && (pStorage->intraConcealmentFlag == 0)) ||
+ refData == NULL)
+ H264SwDecMemset(currImage->data, 128, width*height*384);
+ else
+ H264SwDecMemcpy(currImage->data, refData, width*height*384);
+
+ pStorage->numConcealedMbs = pStorage->picSizeInMbs;
+
+ /* no filtering if whole picture concealed */
+ for (i = 0; i < pStorage->picSizeInMbs; i++)
+ pStorage->mb[i].disableDeblockingFilterIdc = 1;
+
+ return(HANTRO_OK);
+ }
+
+ /* start from the row containing the first correct macroblock, conceal the
+ * row in question, all rows above that row and then continue downwards */
+ mb = pStorage->mb + row * width;
+ for (j = col; j--;)
+ {
+ ConcealMb(mb+j, currImage, row, j, sliceType, refData);
+ mb[j].decoded = 1;
+ pStorage->numConcealedMbs++;
+ }
+ for (j = col + 1; j < width; j++)
+ {
+ if (!mb[j].decoded)
+ {
+ ConcealMb(mb+j, currImage, row, j, sliceType, refData);
+ mb[j].decoded = 1;
+ pStorage->numConcealedMbs++;
+ }
+ }
+ /* if previous row(s) could not be concealed -> conceal them now */
+ if (row)
+ {
+ for (j = 0; j < width; j++)
+ {
+ i = row - 1;
+ mb = pStorage->mb + i*width + j;
+ do
+ {
+ ConcealMb(mb, currImage, i, j, sliceType, refData);
+ mb->decoded = 1;
+ pStorage->numConcealedMbs++;
+ mb -= width;
+ } while(i--);
+ }
+ }
+
+ /* process rows below the one containing the first correct macroblock */
+ for (i = row + 1; i < height; i++)
+ {
+ mb = pStorage->mb + i * width;
+
+ for (j = 0; j < width; j++)
+ {
+ if (!mb[j].decoded)
+ {
+ ConcealMb(mb+j, currImage, i, j, sliceType, refData);
+ mb[j].decoded = 1;
+ pStorage->numConcealedMbs++;
+ }
+ }
+ }
+
+ return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: ConcealMb
+
+ Functional description:
+ Perform error concealment for one macroblock, location of the
+ macroblock in the picture indicated by row and col
+
+------------------------------------------------------------------------------*/
+
+u32 ConcealMb(mbStorage_t *pMb, image_t *currImage, u32 row, u32 col,
+ u32 sliceType, u8 *refData)
+{
+
+/* Variables */
+
+ u32 i, j, comp;
+ u32 hor, ver;
+ u32 mbNum;
+ u32 width, height;
+ u8 *mbPos;
+ u8 data[384];
+ u8 *pData;
+ i32 tmp;
+ i32 firstPhase[16];
+ i32 *pTmp;
+ /* neighbours above, below, left and right */
+ i32 a[4], b[4], l[4], r[4];
+ u32 A, B, L, R;
+#ifdef H264DEC_OMXDL
+ u8 fillBuff[32*21 + 15 + 32];
+ u8 *pFill;
+#endif
+/* Code */
+
+ ASSERT(pMb);
+ ASSERT(!pMb->decoded);
+ ASSERT(currImage);
+ ASSERT(col < currImage->width);
+ ASSERT(row < currImage->height);
+
+#ifdef H264DEC_OMXDL
+ pFill = ALIGN(fillBuff, 16);
+#endif
+ width = currImage->width;
+ height = currImage->height;
+ mbNum = row * width + col;
+
+ h264bsdSetCurrImageMbPointers(currImage, mbNum);
+
+ mbPos = currImage->data + row * 16 * width * 16 + col * 16;
+ A = B = L = R = HANTRO_FALSE;
+
+ /* set qpY to 40 to enable some filtering in deblocking (stetson value) */
+ pMb->qpY = 40;
+ pMb->disableDeblockingFilterIdc = 0;
+ /* mbType set to intra to perform filtering despite the values of other
+ * boundary strength determination fields */
+ pMb->mbType = I_4x4;
+ pMb->filterOffsetA = 0;
+ pMb->filterOffsetB = 0;
+ pMb->chromaQpIndexOffset = 0;
+
+ if (IS_I_SLICE(sliceType))
+ H264SwDecMemset(data, 0, sizeof(data));
+ else
+ {
+ mv_t mv = {0,0};
+ image_t refImage;
+ refImage.width = width;
+ refImage.height = height;
+ refImage.data = refData;
+ if (refImage.data)
+ {
+#ifndef H264DEC_OMXDL
+ h264bsdPredictSamples(data, &mv, &refImage, col*16, row*16,
+ 0, 0, 16, 16);
+#else
+ h264bsdPredictSamples(data, &mv, &refImage,
+ ((row*16) + ((col*16)<<16)),
+ 0x00001010, pFill);
+#endif
+ h264bsdWriteMacroblock(currImage, data);
+
+ return(HANTRO_OK);
+ }
+ else
+ H264SwDecMemset(data, 0, sizeof(data));
+ }
+
+ H264SwDecMemset(firstPhase, 0, sizeof(firstPhase));
+
+ /* counter for number of neighbours used */
+ j = 0;
+ hor = ver = 0;
+ if (row && (pMb-width)->decoded)
+ {
+ A = HANTRO_TRUE;
+ pData = mbPos - width*16;
+ a[0] = *pData++; a[0] += *pData++; a[0] += *pData++; a[0] += *pData++;
+ a[1] = *pData++; a[1] += *pData++; a[1] += *pData++; a[1] += *pData++;
+ a[2] = *pData++; a[2] += *pData++; a[2] += *pData++; a[2] += *pData++;
+ a[3] = *pData++; a[3] += *pData++; a[3] += *pData++; a[3] += *pData++;
+ j++;
+ hor++;
+ firstPhase[0] += a[0] + a[1] + a[2] + a[3];
+ firstPhase[1] += a[0] + a[1] - a[2] - a[3];
+ }
+ if ((row != height - 1) && (pMb+width)->decoded)
+ {
+ B = HANTRO_TRUE;
+ pData = mbPos + 16*width*16;
+ b[0] = *pData++; b[0] += *pData++; b[0] += *pData++; b[0] += *pData++;
+ b[1] = *pData++; b[1] += *pData++; b[1] += *pData++; b[1] += *pData++;
+ b[2] = *pData++; b[2] += *pData++; b[2] += *pData++; b[2] += *pData++;
+ b[3] = *pData++; b[3] += *pData++; b[3] += *pData++; b[3] += *pData++;
+ j++;
+ hor++;
+ firstPhase[0] += b[0] + b[1] + b[2] + b[3];
+ firstPhase[1] += b[0] + b[1] - b[2] - b[3];
+ }
+ if (col && (pMb-1)->decoded)
+ {
+ L = HANTRO_TRUE;
+ pData = mbPos - 1;
+ l[0] = pData[0]; l[0] += pData[16*width];
+ l[0] += pData[32*width]; l[0] += pData[48*width];
+ pData += 64*width;
+ l[1] = pData[0]; l[1] += pData[16*width];
+ l[1] += pData[32*width]; l[1] += pData[48*width];
+ pData += 64*width;
+ l[2] = pData[0]; l[2] += pData[16*width];
+ l[2] += pData[32*width]; l[2] += pData[48*width];
+ pData += 64*width;
+ l[3] = pData[0]; l[3] += pData[16*width];
+ l[3] += pData[32*width]; l[3] += pData[48*width];
+ j++;
+ ver++;
+ firstPhase[0] += l[0] + l[1] + l[2] + l[3];
+ firstPhase[4] += l[0] + l[1] - l[2] - l[3];
+ }
+ if ((col != width - 1) && (pMb+1)->decoded)
+ {
+ R = HANTRO_TRUE;
+ pData = mbPos + 16;
+ r[0] = pData[0]; r[0] += pData[16*width];
+ r[0] += pData[32*width]; r[0] += pData[48*width];
+ pData += 64*width;
+ r[1] = pData[0]; r[1] += pData[16*width];
+ r[1] += pData[32*width]; r[1] += pData[48*width];
+ pData += 64*width;
+ r[2] = pData[0]; r[2] += pData[16*width];
+ r[2] += pData[32*width]; r[2] += pData[48*width];
+ pData += 64*width;
+ r[3] = pData[0]; r[3] += pData[16*width];
+ r[3] += pData[32*width]; r[3] += pData[48*width];
+ j++;
+ ver++;
+ firstPhase[0] += r[0] + r[1] + r[2] + r[3];
+ firstPhase[4] += r[0] + r[1] - r[2] - r[3];
+ }
+
+ /* at least one properly decoded neighbour available */
+ ASSERT(j);
+
+ /*lint -esym(644,l,r,a,b) variable initialized above */
+ if (!hor && L && R)
+ firstPhase[1] = (l[0]+l[1]+l[2]+l[3]-r[0]-r[1]-r[2]-r[3]) >> 5;
+ else if (hor)
+ firstPhase[1] >>= (3+hor);
+
+ if (!ver && A && B)
+ firstPhase[4] = (a[0]+a[1]+a[2]+a[3]-b[0]-b[1]-b[2]-b[3]) >> 5;
+ else if (ver)
+ firstPhase[4] >>= (3+ver);
+
+ switch (j)
+ {
+ case 1:
+ firstPhase[0] >>= 4;
+ break;
+
+ case 2:
+ firstPhase[0] >>= 5;
+ break;
+
+ case 3:
+ /* approximate (firstPhase[0]*4/3)>>6 */
+ firstPhase[0] = (21 * firstPhase[0]) >> 10;
+ break;
+
+ default: /* 4 */
+ firstPhase[0] >>= 6;
+ break;
+
+ }
+
+
+ Transform(firstPhase);
+
+ for (i = 0, pData = data, pTmp = firstPhase; i < 256;)
+ {
+ tmp = pTmp[(i & 0xF)>>2];
+ /*lint -e734 CLIP1 macro results in value that fits into 8 bits */
+ *pData++ = CLIP1(tmp);
+ /*lint +e734 */
+
+ i++;
+ if (!(i & 0x3F))
+ pTmp += 4;
+ }
+
+ /* chroma components */
+ mbPos = currImage->data + width * height * 256 +
+ row * 8 * width * 8 + col * 8;
+ for (comp = 0; comp < 2; comp++)
+ {
+
+ H264SwDecMemset(firstPhase, 0, sizeof(firstPhase));
+
+ /* counter for number of neighbours used */
+ j = 0;
+ hor = ver = 0;
+ if (A)
+ {
+ pData = mbPos - width*8;
+ a[0] = *pData++; a[0] += *pData++;
+ a[1] = *pData++; a[1] += *pData++;
+ a[2] = *pData++; a[2] += *pData++;
+ a[3] = *pData++; a[3] += *pData++;
+ j++;
+ hor++;
+ firstPhase[0] += a[0] + a[1] + a[2] + a[3];
+ firstPhase[1] += a[0] + a[1] - a[2] - a[3];
+ }
+ if (B)
+ {
+ pData = mbPos + 8*width*8;
+ b[0] = *pData++; b[0] += *pData++;
+ b[1] = *pData++; b[1] += *pData++;
+ b[2] = *pData++; b[2] += *pData++;
+ b[3] = *pData++; b[3] += *pData++;
+ j++;
+ hor++;
+ firstPhase[0] += b[0] + b[1] + b[2] + b[3];
+ firstPhase[1] += b[0] + b[1] - b[2] - b[3];
+ }
+ if (L)
+ {
+ pData = mbPos - 1;
+ l[0] = pData[0]; l[0] += pData[8*width];
+ pData += 16*width;
+ l[1] = pData[0]; l[1] += pData[8*width];
+ pData += 16*width;
+ l[2] = pData[0]; l[2] += pData[8*width];
+ pData += 16*width;
+ l[3] = pData[0]; l[3] += pData[8*width];
+ j++;
+ ver++;
+ firstPhase[0] += l[0] + l[1] + l[2] + l[3];
+ firstPhase[4] += l[0] + l[1] - l[2] - l[3];
+ }
+ if (R)
+ {
+ pData = mbPos + 8;
+ r[0] = pData[0]; r[0] += pData[8*width];
+ pData += 16*width;
+ r[1] = pData[0]; r[1] += pData[8*width];
+ pData += 16*width;
+ r[2] = pData[0]; r[2] += pData[8*width];
+ pData += 16*width;
+ r[3] = pData[0]; r[3] += pData[8*width];
+ j++;
+ ver++;
+ firstPhase[0] += r[0] + r[1] + r[2] + r[3];
+ firstPhase[4] += r[0] + r[1] - r[2] - r[3];
+ }
+ if (!hor && L && R)
+ firstPhase[1] = (l[0]+l[1]+l[2]+l[3]-r[0]-r[1]-r[2]-r[3]) >> 4;
+ else if (hor)
+ firstPhase[1] >>= (2+hor);
+
+ if (!ver && A && B)
+ firstPhase[4] = (a[0]+a[1]+a[2]+a[3]-b[0]-b[1]-b[2]-b[3]) >> 4;
+ else if (ver)
+ firstPhase[4] >>= (2+ver);
+
+ switch (j)
+ {
+ case 1:
+ firstPhase[0] >>= 3;
+ break;
+
+ case 2:
+ firstPhase[0] >>= 4;
+ break;
+
+ case 3:
+ /* approximate (firstPhase[0]*4/3)>>5 */
+ firstPhase[0] = (21 * firstPhase[0]) >> 9;
+ break;
+
+ default: /* 4 */
+ firstPhase[0] >>= 5;
+ break;
+
+ }
+
+ Transform(firstPhase);
+
+ pData = data + 256 + comp*64;
+ for (i = 0, pTmp = firstPhase; i < 64;)
+ {
+ tmp = pTmp[(i & 0x7)>>1];
+ /*lint -e734 CLIP1 macro results in value that fits into 8 bits */
+ *pData++ = CLIP1(tmp);
+ /*lint +e734 */
+
+ i++;
+ if (!(i & 0xF))
+ pTmp += 4;
+ }
+
+ /* increment pointers for cr */
+ mbPos += width * height * 64;
+ }
+
+ h264bsdWriteMacroblock(currImage, data);
+
+ return(HANTRO_OK);
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+ Function name: Transform
+
+ Functional description:
+ Simplified transform, assuming that only dc component and lowest
+ horizontal and lowest vertical component may be non-zero
+
+------------------------------------------------------------------------------*/
+
+void Transform(i32 *data)
+{
+
+ u32 col;
+ i32 tmp0, tmp1;
+
+ if (!data[1] && !data[4])
+ {
+ data[1] = data[2] = data[3] = data[4] = data[5] =
+ data[6] = data[7] = data[8] = data[9] = data[10] =
+ data[11] = data[12] = data[13] = data[14] = data[15] = data[0];
+ return;
+ }
+ /* first horizontal transform for rows 0 and 1 */
+ tmp0 = data[0];
+ tmp1 = data[1];
+ data[0] = tmp0 + tmp1;
+ data[1] = tmp0 + (tmp1>>1);
+ data[2] = tmp0 - (tmp1>>1);
+ data[3] = tmp0 - tmp1;
+
+ tmp0 = data[4];
+ data[5] = tmp0;
+ data[6] = tmp0;
+ data[7] = tmp0;
+
+ /* then vertical transform */
+ for (col = 4; col--; data++)
+ {
+ tmp0 = data[0];
+ tmp1 = data[4];
+ data[0] = tmp0 + tmp1;
+ data[4] = tmp0 + (tmp1>>1);
+ data[8] = tmp0 - (tmp1>>1);
+ data[12] = tmp0 - tmp1;
+ }
+
+}
+/*lint +e702 */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.h
new file mode 100755
index 0000000..3134670
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_conceal.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_CONCEAL_H
+#define H264SWDEC_CONCEAL_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_storage.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdConceal(storage_t *pStorage, image_t *currImage, u32 sliceType);
+
+#endif /* #ifdef H264SWDEC_CONCEAL_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_container.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_container.h
new file mode 100755
index 0000000..99b74a0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_container.h
@@ -0,0 +1,69 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_CONTAINER_H
+#define H264SWDEC_CONTAINER_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_storage.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/* String length for tracing */
+#define H264DEC_TRACE_STR_LEN 100
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+ enum {
+ UNINITIALIZED,
+ INITIALIZED,
+ NEW_HEADERS
+ } decStat;
+
+ u32 picNumber;
+ storage_t storage;
+#ifdef H264DEC_TRACE
+ char str[H264DEC_TRACE_STR_LEN];
+#endif
+} decContainer_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+#endif /* #ifdef H264SWDEC_DECCONTAINER_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.c
new file mode 100755
index 0000000..f8c1f76
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.c
@@ -0,0 +1,2417 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdFilterPicture
+ FilterVerLumaEdge
+ FilterHorLumaEdge
+ FilterHorLuma
+ FilterVerChromaEdge
+ FilterHorChromaEdge
+ FilterHorChroma
+ InnerBoundaryStrength
+ EdgeBoundaryStrength
+ GetBoundaryStrengths
+ IsSliceBoundaryOnLeft
+ IsSliceBoundaryOnTop
+ GetMbFilteringFlags
+ GetLumaEdgeThresholds
+ GetChromaEdgeThresholds
+ FilterLuma
+ FilterChroma
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_util.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_deblocking.h"
+#include "h264bsd_dpb.h"
+
+#ifdef H264DEC_OMXDL
+#include "omxtypes.h"
+#include "omxVC.h"
+#include "armVC.h"
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Switch off the following Lint messages for this file:
+ * Info 701: Shift left of signed quantity (int)
+ * Info 702: Shift right of signed quantity (int)
+ */
+/*lint -e701 -e702 */
+
+/* array of alpha values, from the standard */
+static const u8 alphas[52] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,4,4,5,6,7,8,9,10,
+ 12,13,15,17,20,22,25,28,32,36,40,45,50,56,63,71,80,90,101,113,127,144,162,
+ 182,203,226,255,255};
+
+/* array of beta values, from the standard */
+static const u8 betas[52] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,2,2,3,3,3,3,4,4,
+ 4,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15,16,16,17,17,18,18};
+
+
+
+#ifndef H264DEC_OMXDL
+/* array of tc0 values, from the standard, each triplet corresponds to a
+ * column in the table. Indexing goes as tc0[indexA][bS-1] */
+static const u8 tc0[52][3] = {
+ {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
+ {0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{0,0,0},
+ {0,0,0},{0,0,1},{0,0,1},{0,0,1},{0,0,1},{0,1,1},{0,1,1},{1,1,1},
+ {1,1,1},{1,1,1},{1,1,1},{1,1,2},{1,1,2},{1,1,2},{1,1,2},{1,2,3},
+ {1,2,3},{2,2,3},{2,2,4},{2,3,4},{2,3,4},{3,3,5},{3,4,6},{3,4,6},
+ {4,5,7},{4,5,8},{4,6,9},{5,7,10},{6,8,11},{6,8,13},{7,10,14},{8,11,16},
+ {9,12,18},{10,13,20},{11,15,23},{13,17,25}
+};
+#else
+/* array of tc0 values, from the standard, each triplet corresponds to a
+ * column in the table. Indexing goes as tc0[indexA][bS] */
+static const u8 tc0[52][5] = {
+ {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0}, {0, 0, 0, 0, 0},
+ {0, 0, 0, 0, 0}, {0, 0, 0, 1, 0}, {0, 0, 0, 1, 0}, {0, 0, 0, 1, 0},
+ {0, 0, 0, 1, 0}, {0, 0, 1, 1, 0}, {0, 0, 1, 1, 0}, {0, 1, 1, 1, 0},
+ {0, 1, 1, 1, 0}, {0, 1, 1, 1, 0}, {0, 1, 1, 1, 0}, {0, 1, 1, 2, 0},
+ {0, 1, 1, 2, 0}, {0, 1, 1, 2, 0}, {0, 1, 1, 2, 0}, {0, 1, 2, 3, 0},
+ {0, 1, 2, 3, 0}, {0, 2, 2, 3, 0}, {0, 2, 2, 4, 0}, {0, 2, 3, 4, 0},
+ {0, 2, 3, 4, 0}, {0, 3, 3, 5, 0}, {0, 3, 4, 6, 0}, {0, 3, 4, 6, 0},
+ {0, 4, 5, 7, 0}, {0, 4, 5, 8, 0}, {0, 4, 6, 9, 0}, {0, 5, 7, 10, 0},
+ {0, 6, 8, 11, 0}, {0, 6, 8, 13, 0}, {0, 7, 10, 14, 0},
+ {0, 8, 11, 16, 0}, {0, 9, 12, 18, 0}, {0, 10, 13, 20, 0},
+ {0, 11, 15, 23, 0}, {0, 13, 17, 25, 0}
+};
+#endif
+
+
+#ifndef H264DEC_OMXDL
+/* mapping of raster scan block index to 4x4 block index */
+static const u32 mb4x4Index[16] =
+ {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+typedef struct {
+ const u8 *tc0;
+ u32 alpha;
+ u32 beta;
+} edgeThreshold_t;
+
+typedef struct {
+ u32 top;
+ u32 left;
+} bS_t;
+
+enum { TOP = 0, LEFT = 1, INNER = 2 };
+#endif /* H264DEC_OMXDL */
+
+#define FILTER_LEFT_EDGE 0x04
+#define FILTER_TOP_EDGE 0x02
+#define FILTER_INNER_EDGE 0x01
+
+
+/* clipping table defined in intra_prediction.c */
+extern const u8 h264bsdClip[];
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 InnerBoundaryStrength(mbStorage_t *mb1, u32 i1, u32 i2);
+
+#ifndef H264DEC_OMXDL
+static u32 EdgeBoundaryStrength(mbStorage_t *mb1, mbStorage_t *mb2,
+ u32 i1, u32 i2);
+#else
+static u32 InnerBoundaryStrength2(mbStorage_t *mb1, u32 i1, u32 i2);
+static u32 EdgeBoundaryStrengthLeft(mbStorage_t *mb1, mbStorage_t *mb2);
+static u32 EdgeBoundaryStrengthTop(mbStorage_t *mb1, mbStorage_t *mb2);
+#endif
+
+static u32 IsSliceBoundaryOnLeft(mbStorage_t *mb);
+
+static u32 IsSliceBoundaryOnTop(mbStorage_t *mb);
+
+static u32 GetMbFilteringFlags(mbStorage_t *mb);
+
+#ifndef H264DEC_OMXDL
+
+static u32 GetBoundaryStrengths(mbStorage_t *mb, bS_t *bs, u32 flags);
+
+static void FilterLuma(u8 *data, bS_t *bS, edgeThreshold_t *thresholds,
+ u32 imageWidth);
+
+static void FilterChroma(u8 *cb, u8 *cr, bS_t *bS, edgeThreshold_t *thresholds,
+ u32 imageWidth);
+
+static void FilterVerLumaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+ u32 imageWidth);
+static void FilterHorLumaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+ i32 imageWidth);
+static void FilterHorLuma( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+ i32 imageWidth);
+
+static void FilterVerChromaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+ u32 imageWidth);
+static void FilterHorChromaEdge( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+ i32 imageWidth);
+static void FilterHorChroma( u8 *data, u32 bS, edgeThreshold_t *thresholds,
+ i32 imageWidth);
+
+static void GetLumaEdgeThresholds(
+ edgeThreshold_t *thresholds,
+ mbStorage_t *mb,
+ u32 filteringFlags);
+
+static void GetChromaEdgeThresholds(
+ edgeThreshold_t *thresholds,
+ mbStorage_t *mb,
+ u32 filteringFlags,
+ i32 chromaQpIndexOffset);
+
+#else /* H264DEC_OMXDL */
+
+static u32 GetBoundaryStrengths(mbStorage_t *mb, u8 (*bs)[16], u32 flags);
+
+static void GetLumaEdgeThresholds(
+ mbStorage_t *mb,
+ u8 (*alpha)[2],
+ u8 (*beta)[2],
+ u8 (*threshold)[16],
+ u8 (*bs)[16],
+ u32 filteringFlags );
+
+static void GetChromaEdgeThresholds(
+ mbStorage_t *mb,
+ u8 (*alpha)[2],
+ u8 (*beta)[2],
+ u8 (*threshold)[8],
+ u8 (*bs)[16],
+ u32 filteringFlags,
+ i32 chromaQpIndexOffset);
+
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+ Function: IsSliceBoundaryOnLeft
+
+ Functional description:
+ Function to determine if there is a slice boundary on the left side
+ of a macroblock.
+
+------------------------------------------------------------------------------*/
+u32 IsSliceBoundaryOnLeft(mbStorage_t *mb)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(mb && mb->mbA);
+
+ if (mb->sliceId != mb->mbA->sliceId)
+ return(HANTRO_TRUE);
+ else
+ return(HANTRO_FALSE);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: IsSliceBoundaryOnTop
+
+ Functional description:
+ Function to determine if there is a slice boundary above the
+ current macroblock.
+
+------------------------------------------------------------------------------*/
+u32 IsSliceBoundaryOnTop(mbStorage_t *mb)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(mb && mb->mbB);
+
+ if (mb->sliceId != mb->mbB->sliceId)
+ return(HANTRO_TRUE);
+ else
+ return(HANTRO_FALSE);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: GetMbFilteringFlags
+
+ Functional description:
+ Function to determine which edges of a macroblock has to be
+ filtered. Output is a bit-wise OR of FILTER_LEFT_EDGE,
+ FILTER_TOP_EDGE and FILTER_INNER_EDGE, depending on which edges
+ shall be filtered.
+
+------------------------------------------------------------------------------*/
+u32 GetMbFilteringFlags(mbStorage_t *mb)
+{
+
+/* Variables */
+
+ u32 flags = 0;
+
+/* Code */
+
+ ASSERT(mb);
+
+ /* nothing will be filtered if disableDeblockingFilterIdc == 1 */
+ if (mb->disableDeblockingFilterIdc != 1)
+ {
+ flags |= FILTER_INNER_EDGE;
+
+ /* filterLeftMbEdgeFlag, left mb is MB_A */
+ if (mb->mbA &&
+ ((mb->disableDeblockingFilterIdc != 2) ||
+ !IsSliceBoundaryOnLeft(mb)))
+ flags |= FILTER_LEFT_EDGE;
+
+ /* filterTopMbEdgeFlag */
+ if (mb->mbB &&
+ ((mb->disableDeblockingFilterIdc != 2) ||
+ !IsSliceBoundaryOnTop(mb)))
+ flags |= FILTER_TOP_EDGE;
+ }
+
+ return(flags);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: InnerBoundaryStrength
+
+ Functional description:
+ Function to calculate boundary strength value bs for an inner
+ edge of a macroblock. Macroblock type is checked before this is
+ called -> no intra mb condition here.
+
+------------------------------------------------------------------------------*/
+u32 InnerBoundaryStrength(mbStorage_t *mb1, u32 ind1, u32 ind2)
+{
+ i32 tmp1, tmp2;
+ i32 mv1, mv2, mv3, mv4;
+
+ tmp1 = mb1->totalCoeff[ind1];
+ tmp2 = mb1->totalCoeff[ind2];
+ mv1 = mb1->mv[ind1].hor;
+ mv2 = mb1->mv[ind2].hor;
+ mv3 = mb1->mv[ind1].ver;
+ mv4 = mb1->mv[ind2].ver;
+
+ if (tmp1 || tmp2)
+ {
+ return 2;
+ }
+ else if ( (ABS(mv1 - mv2) >= 4) || (ABS(mv3 - mv4) >= 4) ||
+ (mb1->refAddr[ind1 >> 2] != mb1->refAddr[ind2 >> 2]) )
+ {
+ return 1;
+ }
+ else
+ return 0;
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: InnerBoundaryStrength2
+
+ Functional description:
+ Function to calculate boundary strength value bs for an inner
+ edge of a macroblock. The function is the same as
+ InnerBoundaryStrength but without checking totalCoeff.
+
+------------------------------------------------------------------------------*/
+u32 InnerBoundaryStrength2(mbStorage_t *mb1, u32 ind1, u32 ind2)
+{
+ i32 tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = mb1->mv[ind1].hor;
+ tmp2 = mb1->mv[ind2].hor;
+ tmp3 = mb1->mv[ind1].ver;
+ tmp4 = mb1->mv[ind2].ver;
+
+ if ( (ABS(tmp1 - tmp2) >= 4) || (ABS(tmp3 - tmp4) >= 4) ||
+ (mb1->refAddr[ind1 >> 2] != mb1->refAddr[ind2 >> 2]))
+ {
+ return 1;
+ }
+ else
+ return 0;
+}
+#ifndef H264DEC_OMXDL
+/*------------------------------------------------------------------------------
+
+ Function: EdgeBoundaryStrength
+
+ Functional description:
+ Function to calculate boundary strength value bs for left- or
+ top-most edge of a macroblock. Macroblock types are checked
+ before this is called -> no intra mb conditions here.
+
+------------------------------------------------------------------------------*/
+u32 EdgeBoundaryStrength(mbStorage_t *mb1, mbStorage_t *mb2,
+ u32 ind1, u32 ind2)
+{
+
+ if (mb1->totalCoeff[ind1] || mb2->totalCoeff[ind2])
+ {
+ return 2;
+ }
+ else if ((mb1->refAddr[ind1 >> 2] != mb2->refAddr[ind2 >> 2]) ||
+ (ABS(mb1->mv[ind1].hor - mb2->mv[ind2].hor) >= 4) ||
+ (ABS(mb1->mv[ind1].ver - mb2->mv[ind2].ver) >= 4))
+ {
+ return 1;
+ }
+ else
+ return 0;
+}
+
+#else /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+ Function: EdgeBoundaryStrengthTop
+
+ Functional description:
+ Function to calculate boundary strength value bs for
+ top-most edge of a macroblock. Macroblock types are checked
+ before this is called -> no intra mb conditions here.
+
+------------------------------------------------------------------------------*/
+u32 EdgeBoundaryStrengthTop(mbStorage_t *mb1, mbStorage_t *mb2)
+{
+ u32 topBs = 0;
+ u32 tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = mb1->totalCoeff[0];
+ tmp2 = mb2->totalCoeff[10];
+ tmp3 = mb1->totalCoeff[1];
+ tmp4 = mb2->totalCoeff[11];
+ if (tmp1 || tmp2)
+ {
+ topBs = 2<<0;
+ }
+ else if ((ABS(mb1->mv[0].hor - mb2->mv[10].hor) >= 4) ||
+ (ABS(mb1->mv[0].ver - mb2->mv[10].ver) >= 4) ||
+ (mb1->refAddr[0] != mb2->refAddr[10 >> 2]))
+ {
+ topBs = 1<<0;
+ }
+ tmp1 = mb1->totalCoeff[4];
+ tmp2 = mb2->totalCoeff[14];
+ if (tmp3 || tmp4)
+ {
+ topBs += 2<<8;
+ }
+ else if ((ABS(mb1->mv[1].hor - mb2->mv[11].hor) >= 4) ||
+ (ABS(mb1->mv[1].ver - mb2->mv[11].ver) >= 4) ||
+ (mb1->refAddr[0] != mb2->refAddr[11 >> 2]))
+ {
+ topBs += 1<<8;
+ }
+ tmp3 = mb1->totalCoeff[5];
+ tmp4 = mb2->totalCoeff[15];
+ if (tmp1 || tmp2)
+ {
+ topBs += 2<<16;
+ }
+ else if ((ABS(mb1->mv[4].hor - mb2->mv[14].hor) >= 4) ||
+ (ABS(mb1->mv[4].ver - mb2->mv[14].ver) >= 4) ||
+ (mb1->refAddr[4 >> 2] != mb2->refAddr[14 >> 2]))
+ {
+ topBs += 1<<16;
+ }
+ if (tmp3 || tmp4)
+ {
+ topBs += 2<<24;
+ }
+ else if ((ABS(mb1->mv[5].hor - mb2->mv[15].hor) >= 4) ||
+ (ABS(mb1->mv[5].ver - mb2->mv[15].ver) >= 4) ||
+ (mb1->refAddr[5 >> 2] != mb2->refAddr[15 >> 2]))
+ {
+ topBs += 1<<24;
+ }
+
+ return topBs;
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: EdgeBoundaryStrengthLeft
+
+ Functional description:
+ Function to calculate boundary strength value bs for left-
+ edge of a macroblock. Macroblock types are checked
+ before this is called -> no intra mb conditions here.
+
+------------------------------------------------------------------------------*/
+u32 EdgeBoundaryStrengthLeft(mbStorage_t *mb1, mbStorage_t *mb2)
+{
+ u32 leftBs = 0;
+ u32 tmp1, tmp2, tmp3, tmp4;
+
+ tmp1 = mb1->totalCoeff[0];
+ tmp2 = mb2->totalCoeff[5];
+ tmp3 = mb1->totalCoeff[2];
+ tmp4 = mb2->totalCoeff[7];
+
+ if (tmp1 || tmp2)
+ {
+ leftBs = 2<<0;
+ }
+ else if ((ABS(mb1->mv[0].hor - mb2->mv[5].hor) >= 4) ||
+ (ABS(mb1->mv[0].ver - mb2->mv[5].ver) >= 4) ||
+ (mb1->refAddr[0] != mb2->refAddr[5 >> 2]))
+ {
+ leftBs = 1<<0;
+ }
+ tmp1 = mb1->totalCoeff[8];
+ tmp2 = mb2->totalCoeff[13];
+ if (tmp3 || tmp4)
+ {
+ leftBs += 2<<8;
+ }
+ else if ((ABS(mb1->mv[2].hor - mb2->mv[7].hor) >= 4) ||
+ (ABS(mb1->mv[2].ver - mb2->mv[7].ver) >= 4) ||
+ (mb1->refAddr[0] != mb2->refAddr[7 >> 2]))
+ {
+ leftBs += 1<<8;
+ }
+ tmp3 = mb1->totalCoeff[10];
+ tmp4 = mb2->totalCoeff[15];
+ if (tmp1 || tmp2)
+ {
+ leftBs += 2<<16;
+ }
+ else if ((ABS(mb1->mv[8].hor - mb2->mv[13].hor) >= 4) ||
+ (ABS(mb1->mv[8].ver - mb2->mv[13].ver) >= 4) ||
+ (mb1->refAddr[8 >> 2] != mb2->refAddr[13 >> 2]))
+ {
+ leftBs += 1<<16;
+ }
+ if (tmp3 || tmp4)
+ {
+ leftBs += 2<<24;
+ }
+ else if ((ABS(mb1->mv[10].hor - mb2->mv[15].hor) >= 4) ||
+ (ABS(mb1->mv[10].ver - mb2->mv[15].ver) >= 4) ||
+ (mb1->refAddr[10 >> 2] != mb2->refAddr[15 >> 2]))
+ {
+ leftBs += 1<<24;
+ }
+
+ return leftBs;
+}
+#endif /* H264DEC_OMXDL */
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdFilterPicture
+
+ Functional description:
+ Perform deblocking filtering for a picture. Filter does not copy
+ the original picture anywhere but filtering is performed directly
+ on the original image. Parameters controlling the filtering process
+ are computed based on information in macroblock structures of the
+ filtered macroblock, macroblock above and macroblock on the left of
+ the filtered one.
+
+ Inputs:
+ image pointer to image to be filtered
+ mb pointer to macroblock data structure of the top-left
+ macroblock of the picture
+
+ Outputs:
+ image filtered image stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_OMXDL
+void h264bsdFilterPicture(
+ image_t *image,
+ mbStorage_t *mb)
+{
+
+/* Variables */
+
+ u32 flags;
+ u32 picSizeInMbs, mbRow, mbCol;
+ u32 picWidthInMbs;
+ u8 *data;
+ mbStorage_t *pMb;
+ bS_t bS[16];
+ edgeThreshold_t thresholds[3];
+
+/* Code */
+
+ ASSERT(image);
+ ASSERT(mb);
+ ASSERT(image->data);
+ ASSERT(image->width);
+ ASSERT(image->height);
+
+ picWidthInMbs = image->width;
+ data = image->data;
+ picSizeInMbs = picWidthInMbs * image->height;
+
+ pMb = mb;
+
+ for (mbRow = 0, mbCol = 0; mbRow < image->height; pMb++)
+ {
+ flags = GetMbFilteringFlags(pMb);
+
+ if (flags)
+ {
+ /* GetBoundaryStrengths function returns non-zero value if any of
+ * the bS values for the macroblock being processed was non-zero */
+ if (GetBoundaryStrengths(pMb, bS, flags))
+ {
+ /* luma */
+ GetLumaEdgeThresholds(thresholds, pMb, flags);
+ data = image->data + mbRow * picWidthInMbs * 256 + mbCol * 16;
+
+ FilterLuma((u8*)data, bS, thresholds, picWidthInMbs*16);
+
+ /* chroma */
+ GetChromaEdgeThresholds(thresholds, pMb, flags,
+ pMb->chromaQpIndexOffset);
+ data = image->data + picSizeInMbs * 256 +
+ mbRow * picWidthInMbs * 64 + mbCol * 8;
+
+ FilterChroma((u8*)data, data + 64*picSizeInMbs, bS,
+ thresholds, picWidthInMbs*8);
+
+ }
+ }
+
+ mbCol++;
+ if (mbCol == picWidthInMbs)
+ {
+ mbCol = 0;
+ mbRow++;
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: FilterVerLumaEdge
+
+ Functional description:
+ Filter one vertical 4-pixel luma edge.
+
+------------------------------------------------------------------------------*/
+void FilterVerLumaEdge(
+ u8 *data,
+ u32 bS,
+ edgeThreshold_t *thresholds,
+ u32 imageWidth)
+{
+
+/* Variables */
+
+ i32 delta, tc, tmp;
+ u32 i;
+ u8 p0, q0, p1, q1, p2, q2;
+ u32 tmpFlag;
+ const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(bS && bS <= 4);
+ ASSERT(thresholds);
+
+ if (bS < 4)
+ {
+ tc = thresholds->tc0[bS-1];
+ tmp = tc;
+ for (i = 4; i; i--, data += imageWidth)
+ {
+ p1 = data[-2]; p0 = data[-1];
+ q0 = data[0]; q1 = data[1];
+ if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+ ((unsigned)ABS(p1-p0) < thresholds->beta) &&
+ ((unsigned)ABS(q1-q0) < thresholds->beta) )
+ {
+ p2 = data[-3];
+ q2 = data[2];
+
+ if ((unsigned)ABS(p2-p0) < thresholds->beta)
+ {
+ data[-2] = (u8)(p1 + CLIP3(-tc,tc,
+ (p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1));
+ tmp++;
+ }
+
+ if ((unsigned)ABS(q2-q0) < thresholds->beta)
+ {
+ data[1] = (u8)(q1 + CLIP3(-tc,tc,
+ (q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1));
+ tmp++;
+ }
+
+ delta = CLIP3(-tmp, tmp, ((((q0 - p0) << 2) +
+ (p1 - q1) + 4) >> 3));
+
+ p0 = clp[p0 + delta];
+ q0 = clp[q0 - delta];
+ tmp = tc;
+ data[-1] = p0;
+ data[ 0] = q0;
+ }
+ }
+ }
+ else
+ {
+ for (i = 4; i; i--, data += imageWidth)
+ {
+ p1 = data[-2]; p0 = data[-1];
+ q0 = data[0]; q1 = data[1];
+ if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+ ((unsigned)ABS(p1-p0) < thresholds->beta) &&
+ ((unsigned)ABS(q1-q0) < thresholds->beta) )
+ {
+ tmpFlag =
+ ((unsigned)ABS(p0-q0) < ((thresholds->alpha >> 2) +2)) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ p2 = data[-3];
+ q2 = data[2];
+
+ if (tmpFlag && (unsigned)ABS(p2-p0) < thresholds->beta)
+ {
+ tmp = p1 + p0 + q0;
+ data[-1] = (u8)((p2 + 2 * tmp + q1 + 4) >> 3);
+ data[-2] = (u8)((p2 + tmp + 2) >> 2);
+ data[-3] = (u8)((2 * data[-4] + 3 * p2 + tmp + 4) >> 3);
+ }
+ else
+ data[-1] = (2 * p1 + p0 + q1 + 2) >> 2;
+
+ if (tmpFlag && (unsigned)ABS(q2-q0) < thresholds->beta)
+ {
+ tmp = p0 + q0 + q1;
+ data[0] = (u8)((p1 + 2 * tmp + q2 + 4) >> 3);
+ data[1] = (u8)((tmp + q2 + 2) >> 2);
+ data[2] = (u8)((2 * data[3] + 3 * q2 + tmp + 4) >> 3);
+ }
+ else
+ data[0] = (u8)((2 * q1 + q0 + p1 + 2) >> 2);
+ }
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: FilterHorLumaEdge
+
+ Functional description:
+ Filter one horizontal 4-pixel luma edge
+
+------------------------------------------------------------------------------*/
+void FilterHorLumaEdge(
+ u8 *data,
+ u32 bS,
+ edgeThreshold_t *thresholds,
+ i32 imageWidth)
+{
+
+/* Variables */
+
+ i32 delta, tc, tmp;
+ u32 i;
+ u8 p0, q0, p1, q1, p2, q2;
+ const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(bS < 4);
+ ASSERT(thresholds);
+
+ tc = thresholds->tc0[bS-1];
+ tmp = tc;
+ for (i = 4; i; i--, data++)
+ {
+ p1 = data[-imageWidth*2]; p0 = data[-imageWidth];
+ q0 = data[0]; q1 = data[imageWidth];
+ if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+ ((unsigned)ABS(p1-p0) < thresholds->beta) &&
+ ((unsigned)ABS(q1-q0) < thresholds->beta) )
+ {
+ p2 = data[-imageWidth*3];
+
+ if ((unsigned)ABS(p2-p0) < thresholds->beta)
+ {
+ data[-imageWidth*2] = (u8)(p1 + CLIP3(-tc,tc,
+ (p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1));
+ tmp++;
+ }
+
+ q2 = data[imageWidth*2];
+
+ if ((unsigned)ABS(q2-q0) < thresholds->beta)
+ {
+ data[imageWidth] = (u8)(q1 + CLIP3(-tc,tc,
+ (q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1));
+ tmp++;
+ }
+
+ delta = CLIP3(-tmp, tmp, ((((q0 - p0) << 2) +
+ (p1 - q1) + 4) >> 3));
+
+ p0 = clp[p0 + delta];
+ q0 = clp[q0 - delta];
+ tmp = tc;
+ data[-imageWidth] = p0;
+ data[ 0] = q0;
+ }
+ }
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: FilterHorLuma
+
+ Functional description:
+ Filter all four successive horizontal 4-pixel luma edges. This can
+ be done when bS is equal to all four edges.
+
+------------------------------------------------------------------------------*/
+void FilterHorLuma(
+ u8 *data,
+ u32 bS,
+ edgeThreshold_t *thresholds,
+ i32 imageWidth)
+{
+
+/* Variables */
+
+ i32 delta, tc, tmp;
+ u32 i;
+ u8 p0, q0, p1, q1, p2, q2;
+ u32 tmpFlag;
+ const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(bS <= 4);
+ ASSERT(thresholds);
+
+ if (bS < 4)
+ {
+ tc = thresholds->tc0[bS-1];
+ tmp = tc;
+ for (i = 16; i; i--, data++)
+ {
+ p1 = data[-imageWidth*2]; p0 = data[-imageWidth];
+ q0 = data[0]; q1 = data[imageWidth];
+ if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+ ((unsigned)ABS(p1-p0) < thresholds->beta) &&
+ ((unsigned)ABS(q1-q0) < thresholds->beta) )
+ {
+ p2 = data[-imageWidth*3];
+
+ if ((unsigned)ABS(p2-p0) < thresholds->beta)
+ {
+ data[-imageWidth*2] = (u8)(p1 + CLIP3(-tc,tc,
+ (p2 + ((p0 + q0 + 1) >> 1) - (p1 << 1)) >> 1));
+ tmp++;
+ }
+
+ q2 = data[imageWidth*2];
+
+ if ((unsigned)ABS(q2-q0) < thresholds->beta)
+ {
+ data[imageWidth] = (u8)(q1 + CLIP3(-tc,tc,
+ (q2 + ((p0 + q0 + 1) >> 1) - (q1 << 1)) >> 1));
+ tmp++;
+ }
+
+ delta = CLIP3(-tmp, tmp, ((((q0 - p0) << 2) +
+ (p1 - q1) + 4) >> 3));
+
+ p0 = clp[p0 + delta];
+ q0 = clp[q0 - delta];
+ tmp = tc;
+ data[-imageWidth] = p0;
+ data[ 0] = q0;
+ }
+ }
+ }
+ else
+ {
+ for (i = 16; i; i--, data++)
+ {
+ p1 = data[-imageWidth*2]; p0 = data[-imageWidth];
+ q0 = data[0]; q1 = data[imageWidth];
+ if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+ ((unsigned)ABS(p1-p0) < thresholds->beta) &&
+ ((unsigned)ABS(q1-q0) < thresholds->beta) )
+ {
+ tmpFlag = ((unsigned)ABS(p0-q0) < ((thresholds->alpha >> 2) +2))
+ ? HANTRO_TRUE : HANTRO_FALSE;
+
+ p2 = data[-imageWidth*3];
+ q2 = data[imageWidth*2];
+
+ if (tmpFlag && (unsigned)ABS(p2-p0) < thresholds->beta)
+ {
+ tmp = p1 + p0 + q0;
+ data[-imageWidth] = (u8)((p2 + 2 * tmp + q1 + 4) >> 3);
+ data[-imageWidth*2] = (u8)((p2 + tmp + 2) >> 2);
+ data[-imageWidth*3] = (u8)((2 * data[-imageWidth*4] +
+ 3 * p2 + tmp + 4) >> 3);
+ }
+ else
+ data[-imageWidth] = (u8)((2 * p1 + p0 + q1 + 2) >> 2);
+
+ if (tmpFlag && (unsigned)ABS(q2-q0) < thresholds->beta)
+ {
+ tmp = p0 + q0 + q1;
+ data[ 0] = (u8)((p1 + 2 * tmp + q2 + 4) >> 3);
+ data[imageWidth] = (u8)((tmp + q2 + 2) >> 2);
+ data[imageWidth*2] = (u8)((2 * data[imageWidth*3] +
+ 3 * q2 + tmp + 4) >> 3);
+ }
+ else
+ data[0] = (2 * q1 + q0 + p1 + 2) >> 2;
+ }
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: FilterVerChromaEdge
+
+ Functional description:
+ Filter one vertical 2-pixel chroma edge
+
+------------------------------------------------------------------------------*/
+void FilterVerChromaEdge(
+ u8 *data,
+ u32 bS,
+ edgeThreshold_t *thresholds,
+ u32 width)
+{
+
+/* Variables */
+
+ i32 delta, tc;
+ u8 p0, q0, p1, q1;
+ const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(bS <= 4);
+ ASSERT(thresholds);
+
+ p1 = data[-2]; p0 = data[-1];
+ q0 = data[0]; q1 = data[1];
+ if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+ ((unsigned)ABS(p1-p0) < thresholds->beta) &&
+ ((unsigned)ABS(q1-q0) < thresholds->beta) )
+ {
+ if (bS < 4)
+ {
+ tc = thresholds->tc0[bS-1] + 1;
+ delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) +
+ (p1 - q1) + 4) >> 3));
+ p0 = clp[p0 + delta];
+ q0 = clp[q0 - delta];
+ data[-1] = p0;
+ data[ 0] = q0;
+ }
+ else
+ {
+ data[-1] = (2 * p1 + p0 + q1 + 2) >> 2;
+ data[ 0] = (2 * q1 + q0 + p1 + 2) >> 2;
+ }
+ }
+ data += width;
+ p1 = data[-2]; p0 = data[-1];
+ q0 = data[0]; q1 = data[1];
+ if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+ ((unsigned)ABS(p1-p0) < thresholds->beta) &&
+ ((unsigned)ABS(q1-q0) < thresholds->beta) )
+ {
+ if (bS < 4)
+ {
+ tc = thresholds->tc0[bS-1] + 1;
+ delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) +
+ (p1 - q1) + 4) >> 3));
+ p0 = clp[p0 + delta];
+ q0 = clp[q0 - delta];
+ data[-1] = p0;
+ data[ 0] = q0;
+ }
+ else
+ {
+ data[-1] = (2 * p1 + p0 + q1 + 2) >> 2;
+ data[ 0] = (2 * q1 + q0 + p1 + 2) >> 2;
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: FilterHorChromaEdge
+
+ Functional description:
+ Filter one horizontal 2-pixel chroma edge
+
+------------------------------------------------------------------------------*/
+void FilterHorChromaEdge(
+ u8 *data,
+ u32 bS,
+ edgeThreshold_t *thresholds,
+ i32 width)
+{
+
+/* Variables */
+
+ i32 delta, tc;
+ u32 i;
+ u8 p0, q0, p1, q1;
+ const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(bS < 4);
+ ASSERT(thresholds);
+
+ tc = thresholds->tc0[bS-1] + 1;
+ for (i = 2; i; i--, data++)
+ {
+ p1 = data[-width*2]; p0 = data[-width];
+ q0 = data[0]; q1 = data[width];
+ if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+ ((unsigned)ABS(p1-p0) < thresholds->beta) &&
+ ((unsigned)ABS(q1-q0) < thresholds->beta) )
+ {
+ delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) +
+ (p1 - q1) + 4) >> 3));
+ p0 = clp[p0 + delta];
+ q0 = clp[q0 - delta];
+ data[-width] = p0;
+ data[ 0] = q0;
+ }
+ }
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: FilterHorChroma
+
+ Functional description:
+ Filter all four successive horizontal 2-pixel chroma edges. This
+ can be done if bS is equal for all four edges.
+
+------------------------------------------------------------------------------*/
+void FilterHorChroma(
+ u8 *data,
+ u32 bS,
+ edgeThreshold_t *thresholds,
+ i32 width)
+{
+
+/* Variables */
+
+ i32 delta, tc;
+ u32 i;
+ u8 p0, q0, p1, q1;
+ const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(bS <= 4);
+ ASSERT(thresholds);
+
+ if (bS < 4)
+ {
+ tc = thresholds->tc0[bS-1] + 1;
+ for (i = 8; i; i--, data++)
+ {
+ p1 = data[-width*2]; p0 = data[-width];
+ q0 = data[0]; q1 = data[width];
+ if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+ ((unsigned)ABS(p1-p0) < thresholds->beta) &&
+ ((unsigned)ABS(q1-q0) < thresholds->beta) )
+ {
+ delta = CLIP3(-tc, tc, ((((q0 - p0) << 2) +
+ (p1 - q1) + 4) >> 3));
+ p0 = clp[p0 + delta];
+ q0 = clp[q0 - delta];
+ data[-width] = p0;
+ data[ 0] = q0;
+ }
+ }
+ }
+ else
+ {
+ for (i = 8; i; i--, data++)
+ {
+ p1 = data[-width*2]; p0 = data[-width];
+ q0 = data[0]; q1 = data[width];
+ if ( ((unsigned)ABS(p0-q0) < thresholds->alpha) &&
+ ((unsigned)ABS(p1-p0) < thresholds->beta) &&
+ ((unsigned)ABS(q1-q0) < thresholds->beta) )
+ {
+ data[-width] = (2 * p1 + p0 + q1 + 2) >> 2;
+ data[ 0] = (2 * q1 + q0 + p1 + 2) >> 2;
+ }
+ }
+ }
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+ Function: GetBoundaryStrengths
+
+ Functional description:
+ Function to calculate boundary strengths for all edges of a
+ macroblock. Function returns HANTRO_TRUE if any of the bS values for
+ the macroblock had non-zero value, HANTRO_FALSE otherwise.
+
+------------------------------------------------------------------------------*/
+u32 GetBoundaryStrengths(mbStorage_t *mb, bS_t *bS, u32 flags)
+{
+
+/* Variables */
+
+ /* this flag is set HANTRO_TRUE as soon as any boundary strength value is
+ * non-zero */
+ u32 nonZeroBs = HANTRO_FALSE;
+
+/* Code */
+
+ ASSERT(mb);
+ ASSERT(bS);
+ ASSERT(flags);
+
+ /* top edges */
+ if (flags & FILTER_TOP_EDGE)
+ {
+ if (IS_INTRA_MB(*mb) || IS_INTRA_MB(*mb->mbB))
+ {
+ bS[0].top = bS[1].top = bS[2].top = bS[3].top = 4;
+ nonZeroBs = HANTRO_TRUE;
+ }
+ else
+ {
+ bS[0].top = EdgeBoundaryStrength(mb, mb->mbB, 0, 10);
+ bS[1].top = EdgeBoundaryStrength(mb, mb->mbB, 1, 11);
+ bS[2].top = EdgeBoundaryStrength(mb, mb->mbB, 4, 14);
+ bS[3].top = EdgeBoundaryStrength(mb, mb->mbB, 5, 15);
+ if (bS[0].top || bS[1].top || bS[2].top || bS[3].top)
+ nonZeroBs = HANTRO_TRUE;
+ }
+ }
+ else
+ {
+ bS[0].top = bS[1].top = bS[2].top = bS[3].top = 0;
+ }
+
+ /* left edges */
+ if (flags & FILTER_LEFT_EDGE)
+ {
+ if (IS_INTRA_MB(*mb) || IS_INTRA_MB(*mb->mbA))
+ {
+ bS[0].left = bS[4].left = bS[8].left = bS[12].left = 4;
+ nonZeroBs = HANTRO_TRUE;
+ }
+ else
+ {
+ bS[0].left = EdgeBoundaryStrength(mb, mb->mbA, 0, 5);
+ bS[4].left = EdgeBoundaryStrength(mb, mb->mbA, 2, 7);
+ bS[8].left = EdgeBoundaryStrength(mb, mb->mbA, 8, 13);
+ bS[12].left = EdgeBoundaryStrength(mb, mb->mbA, 10, 15);
+ if (!nonZeroBs &&
+ (bS[0].left || bS[4].left || bS[8].left || bS[12].left))
+ nonZeroBs = HANTRO_TRUE;
+ }
+ }
+ else
+ {
+ bS[0].left = bS[4].left = bS[8].left = bS[12].left = 0;
+ }
+
+ /* inner edges */
+ if (IS_INTRA_MB(*mb))
+ {
+ bS[4].top = bS[5].top = bS[6].top = bS[7].top =
+ bS[8].top = bS[9].top = bS[10].top = bS[11].top =
+ bS[12].top = bS[13].top = bS[14].top = bS[15].top = 3;
+
+ bS[1].left = bS[2].left = bS[3].left =
+ bS[5].left = bS[6].left = bS[7].left =
+ bS[9].left = bS[10].left = bS[11].left =
+ bS[13].left = bS[14].left = bS[15].left = 3;
+ nonZeroBs = HANTRO_TRUE;
+ }
+ else
+ {
+ /* 16x16 inter mb -> ref addresses or motion vectors cannot differ,
+ * only check if either of the blocks contain coefficients */
+ if (h264bsdNumMbPart(mb->mbType) == 1)
+ {
+ bS[4].top = mb->totalCoeff[2] || mb->totalCoeff[0] ? 2 : 0;
+ bS[5].top = mb->totalCoeff[3] || mb->totalCoeff[1] ? 2 : 0;
+ bS[6].top = mb->totalCoeff[6] || mb->totalCoeff[4] ? 2 : 0;
+ bS[7].top = mb->totalCoeff[7] || mb->totalCoeff[5] ? 2 : 0;
+ bS[8].top = mb->totalCoeff[8] || mb->totalCoeff[2] ? 2 : 0;
+ bS[9].top = mb->totalCoeff[9] || mb->totalCoeff[3] ? 2 : 0;
+ bS[10].top = mb->totalCoeff[12] || mb->totalCoeff[6] ? 2 : 0;
+ bS[11].top = mb->totalCoeff[13] || mb->totalCoeff[7] ? 2 : 0;
+ bS[12].top = mb->totalCoeff[10] || mb->totalCoeff[8] ? 2 : 0;
+ bS[13].top = mb->totalCoeff[11] || mb->totalCoeff[9] ? 2 : 0;
+ bS[14].top = mb->totalCoeff[14] || mb->totalCoeff[12] ? 2 : 0;
+ bS[15].top = mb->totalCoeff[15] || mb->totalCoeff[13] ? 2 : 0;
+
+ bS[1].left = mb->totalCoeff[1] || mb->totalCoeff[0] ? 2 : 0;
+ bS[2].left = mb->totalCoeff[4] || mb->totalCoeff[1] ? 2 : 0;
+ bS[3].left = mb->totalCoeff[5] || mb->totalCoeff[4] ? 2 : 0;
+ bS[5].left = mb->totalCoeff[3] || mb->totalCoeff[2] ? 2 : 0;
+ bS[6].left = mb->totalCoeff[6] || mb->totalCoeff[3] ? 2 : 0;
+ bS[7].left = mb->totalCoeff[7] || mb->totalCoeff[6] ? 2 : 0;
+ bS[9].left = mb->totalCoeff[9] || mb->totalCoeff[8] ? 2 : 0;
+ bS[10].left = mb->totalCoeff[12] || mb->totalCoeff[9] ? 2 : 0;
+ bS[11].left = mb->totalCoeff[13] || mb->totalCoeff[12] ? 2 : 0;
+ bS[13].left = mb->totalCoeff[11] || mb->totalCoeff[10] ? 2 : 0;
+ bS[14].left = mb->totalCoeff[14] || mb->totalCoeff[11] ? 2 : 0;
+ bS[15].left = mb->totalCoeff[15] || mb->totalCoeff[14] ? 2 : 0;
+ }
+ /* 16x8 inter mb -> ref addresses and motion vectors can be different
+ * only for the middle horizontal edge, for the other top edges it is
+ * enough to check whether the blocks contain coefficients or not. The
+ * same applies to all internal left edges. */
+ else if (mb->mbType == P_L0_L0_16x8)
+ {
+ bS[4].top = mb->totalCoeff[2] || mb->totalCoeff[0] ? 2 : 0;
+ bS[5].top = mb->totalCoeff[3] || mb->totalCoeff[1] ? 2 : 0;
+ bS[6].top = mb->totalCoeff[6] || mb->totalCoeff[4] ? 2 : 0;
+ bS[7].top = mb->totalCoeff[7] || mb->totalCoeff[5] ? 2 : 0;
+ bS[12].top = mb->totalCoeff[10] || mb->totalCoeff[8] ? 2 : 0;
+ bS[13].top = mb->totalCoeff[11] || mb->totalCoeff[9] ? 2 : 0;
+ bS[14].top = mb->totalCoeff[14] || mb->totalCoeff[12] ? 2 : 0;
+ bS[15].top = mb->totalCoeff[15] || mb->totalCoeff[13] ? 2 : 0;
+ bS[8].top = InnerBoundaryStrength(mb, 8, 2);
+ bS[9].top = InnerBoundaryStrength(mb, 9, 3);
+ bS[10].top = InnerBoundaryStrength(mb, 12, 6);
+ bS[11].top = InnerBoundaryStrength(mb, 13, 7);
+
+ bS[1].left = mb->totalCoeff[1] || mb->totalCoeff[0] ? 2 : 0;
+ bS[2].left = mb->totalCoeff[4] || mb->totalCoeff[1] ? 2 : 0;
+ bS[3].left = mb->totalCoeff[5] || mb->totalCoeff[4] ? 2 : 0;
+ bS[5].left = mb->totalCoeff[3] || mb->totalCoeff[2] ? 2 : 0;
+ bS[6].left = mb->totalCoeff[6] || mb->totalCoeff[3] ? 2 : 0;
+ bS[7].left = mb->totalCoeff[7] || mb->totalCoeff[6] ? 2 : 0;
+ bS[9].left = mb->totalCoeff[9] || mb->totalCoeff[8] ? 2 : 0;
+ bS[10].left = mb->totalCoeff[12] || mb->totalCoeff[9] ? 2 : 0;
+ bS[11].left = mb->totalCoeff[13] || mb->totalCoeff[12] ? 2 : 0;
+ bS[13].left = mb->totalCoeff[11] || mb->totalCoeff[10] ? 2 : 0;
+ bS[14].left = mb->totalCoeff[14] || mb->totalCoeff[11] ? 2 : 0;
+ bS[15].left = mb->totalCoeff[15] || mb->totalCoeff[14] ? 2 : 0;
+ }
+ /* 8x16 inter mb -> ref addresses and motion vectors can be different
+ * only for the middle vertical edge, for the other left edges it is
+ * enough to check whether the blocks contain coefficients or not. The
+ * same applies to all internal top edges. */
+ else if (mb->mbType == P_L0_L0_8x16)
+ {
+ bS[4].top = mb->totalCoeff[2] || mb->totalCoeff[0] ? 2 : 0;
+ bS[5].top = mb->totalCoeff[3] || mb->totalCoeff[1] ? 2 : 0;
+ bS[6].top = mb->totalCoeff[6] || mb->totalCoeff[4] ? 2 : 0;
+ bS[7].top = mb->totalCoeff[7] || mb->totalCoeff[5] ? 2 : 0;
+ bS[8].top = mb->totalCoeff[8] || mb->totalCoeff[2] ? 2 : 0;
+ bS[9].top = mb->totalCoeff[9] || mb->totalCoeff[3] ? 2 : 0;
+ bS[10].top = mb->totalCoeff[12] || mb->totalCoeff[6] ? 2 : 0;
+ bS[11].top = mb->totalCoeff[13] || mb->totalCoeff[7] ? 2 : 0;
+ bS[12].top = mb->totalCoeff[10] || mb->totalCoeff[8] ? 2 : 0;
+ bS[13].top = mb->totalCoeff[11] || mb->totalCoeff[9] ? 2 : 0;
+ bS[14].top = mb->totalCoeff[14] || mb->totalCoeff[12] ? 2 : 0;
+ bS[15].top = mb->totalCoeff[15] || mb->totalCoeff[13] ? 2 : 0;
+
+ bS[1].left = mb->totalCoeff[1] || mb->totalCoeff[0] ? 2 : 0;
+ bS[3].left = mb->totalCoeff[5] || mb->totalCoeff[4] ? 2 : 0;
+ bS[5].left = mb->totalCoeff[3] || mb->totalCoeff[2] ? 2 : 0;
+ bS[7].left = mb->totalCoeff[7] || mb->totalCoeff[6] ? 2 : 0;
+ bS[9].left = mb->totalCoeff[9] || mb->totalCoeff[8] ? 2 : 0;
+ bS[11].left = mb->totalCoeff[13] || mb->totalCoeff[12] ? 2 : 0;
+ bS[13].left = mb->totalCoeff[11] || mb->totalCoeff[10] ? 2 : 0;
+ bS[15].left = mb->totalCoeff[15] || mb->totalCoeff[14] ? 2 : 0;
+ bS[2].left = InnerBoundaryStrength(mb, 4, 1);
+ bS[6].left = InnerBoundaryStrength(mb, 6, 3);
+ bS[10].left = InnerBoundaryStrength(mb, 12, 9);
+ bS[14].left = InnerBoundaryStrength(mb, 14, 11);
+ }
+ else
+ {
+ bS[4].top =
+ InnerBoundaryStrength(mb, mb4x4Index[4], mb4x4Index[0]);
+ bS[5].top =
+ InnerBoundaryStrength(mb, mb4x4Index[5], mb4x4Index[1]);
+ bS[6].top =
+ InnerBoundaryStrength(mb, mb4x4Index[6], mb4x4Index[2]);
+ bS[7].top =
+ InnerBoundaryStrength(mb, mb4x4Index[7], mb4x4Index[3]);
+ bS[8].top =
+ InnerBoundaryStrength(mb, mb4x4Index[8], mb4x4Index[4]);
+ bS[9].top =
+ InnerBoundaryStrength(mb, mb4x4Index[9], mb4x4Index[5]);
+ bS[10].top =
+ InnerBoundaryStrength(mb, mb4x4Index[10], mb4x4Index[6]);
+ bS[11].top =
+ InnerBoundaryStrength(mb, mb4x4Index[11], mb4x4Index[7]);
+ bS[12].top =
+ InnerBoundaryStrength(mb, mb4x4Index[12], mb4x4Index[8]);
+ bS[13].top =
+ InnerBoundaryStrength(mb, mb4x4Index[13], mb4x4Index[9]);
+ bS[14].top =
+ InnerBoundaryStrength(mb, mb4x4Index[14], mb4x4Index[10]);
+ bS[15].top =
+ InnerBoundaryStrength(mb, mb4x4Index[15], mb4x4Index[11]);
+
+ bS[1].left =
+ InnerBoundaryStrength(mb, mb4x4Index[1], mb4x4Index[0]);
+ bS[2].left =
+ InnerBoundaryStrength(mb, mb4x4Index[2], mb4x4Index[1]);
+ bS[3].left =
+ InnerBoundaryStrength(mb, mb4x4Index[3], mb4x4Index[2]);
+ bS[5].left =
+ InnerBoundaryStrength(mb, mb4x4Index[5], mb4x4Index[4]);
+ bS[6].left =
+ InnerBoundaryStrength(mb, mb4x4Index[6], mb4x4Index[5]);
+ bS[7].left =
+ InnerBoundaryStrength(mb, mb4x4Index[7], mb4x4Index[6]);
+ bS[9].left =
+ InnerBoundaryStrength(mb, mb4x4Index[9], mb4x4Index[8]);
+ bS[10].left =
+ InnerBoundaryStrength(mb, mb4x4Index[10], mb4x4Index[9]);
+ bS[11].left =
+ InnerBoundaryStrength(mb, mb4x4Index[11], mb4x4Index[10]);
+ bS[13].left =
+ InnerBoundaryStrength(mb, mb4x4Index[13], mb4x4Index[12]);
+ bS[14].left =
+ InnerBoundaryStrength(mb, mb4x4Index[14], mb4x4Index[13]);
+ bS[15].left =
+ InnerBoundaryStrength(mb, mb4x4Index[15], mb4x4Index[14]);
+ }
+ if (!nonZeroBs &&
+ (bS[4].top || bS[5].top || bS[6].top || bS[7].top ||
+ bS[8].top || bS[9].top || bS[10].top || bS[11].top ||
+ bS[12].top || bS[13].top || bS[14].top || bS[15].top ||
+ bS[1].left || bS[2].left || bS[3].left ||
+ bS[5].left || bS[6].left || bS[7].left ||
+ bS[9].left || bS[10].left || bS[11].left ||
+ bS[13].left || bS[14].left || bS[15].left))
+ nonZeroBs = HANTRO_TRUE;
+ }
+
+ return(nonZeroBs);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: GetLumaEdgeThresholds
+
+ Functional description:
+ Compute alpha, beta and tc0 thresholds for inner, left and top
+ luma edges of a macroblock.
+
+------------------------------------------------------------------------------*/
+void GetLumaEdgeThresholds(
+ edgeThreshold_t *thresholds,
+ mbStorage_t *mb,
+ u32 filteringFlags)
+{
+
+/* Variables */
+
+ u32 indexA, indexB;
+ u32 qpAv, qp, qpTmp;
+
+/* Code */
+
+ ASSERT(thresholds);
+ ASSERT(mb);
+
+ qp = mb->qpY;
+
+ indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB);
+
+ thresholds[INNER].alpha = alphas[indexA];
+ thresholds[INNER].beta = betas[indexB];
+ thresholds[INNER].tc0 = tc0[indexA];
+
+ if (filteringFlags & FILTER_TOP_EDGE)
+ {
+ qpTmp = mb->mbB->qpY;
+ if (qpTmp != qp)
+ {
+ qpAv = (qp + qpTmp + 1) >> 1;
+
+ indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+ thresholds[TOP].alpha = alphas[indexA];
+ thresholds[TOP].beta = betas[indexB];
+ thresholds[TOP].tc0 = tc0[indexA];
+ }
+ else
+ {
+ thresholds[TOP].alpha = thresholds[INNER].alpha;
+ thresholds[TOP].beta = thresholds[INNER].beta;
+ thresholds[TOP].tc0 = thresholds[INNER].tc0;
+ }
+ }
+ if (filteringFlags & FILTER_LEFT_EDGE)
+ {
+ qpTmp = mb->mbA->qpY;
+ if (qpTmp != qp)
+ {
+ qpAv = (qp + qpTmp + 1) >> 1;
+
+ indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+ thresholds[LEFT].alpha = alphas[indexA];
+ thresholds[LEFT].beta = betas[indexB];
+ thresholds[LEFT].tc0 = tc0[indexA];
+ }
+ else
+ {
+ thresholds[LEFT].alpha = thresholds[INNER].alpha;
+ thresholds[LEFT].beta = thresholds[INNER].beta;
+ thresholds[LEFT].tc0 = thresholds[INNER].tc0;
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: GetChromaEdgeThresholds
+
+ Functional description:
+ Compute alpha, beta and tc0 thresholds for inner, left and top
+ chroma edges of a macroblock.
+
+------------------------------------------------------------------------------*/
+void GetChromaEdgeThresholds(
+ edgeThreshold_t *thresholds,
+ mbStorage_t *mb,
+ u32 filteringFlags,
+ i32 chromaQpIndexOffset)
+{
+
+/* Variables */
+
+ u32 indexA, indexB;
+ u32 qpAv, qp, qpTmp;
+
+/* Code */
+
+ ASSERT(thresholds);
+ ASSERT(mb);
+
+ qp = mb->qpY;
+ qp = h264bsdQpC[CLIP3(0, 51, (i32)qp + chromaQpIndexOffset)];
+
+ indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB);
+
+ thresholds[INNER].alpha = alphas[indexA];
+ thresholds[INNER].beta = betas[indexB];
+ thresholds[INNER].tc0 = tc0[indexA];
+
+ if (filteringFlags & FILTER_TOP_EDGE)
+ {
+ qpTmp = mb->mbB->qpY;
+ if (qpTmp != mb->qpY)
+ {
+ qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)];
+ qpAv = (qp + qpTmp + 1) >> 1;
+
+ indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+ thresholds[TOP].alpha = alphas[indexA];
+ thresholds[TOP].beta = betas[indexB];
+ thresholds[TOP].tc0 = tc0[indexA];
+ }
+ else
+ {
+ thresholds[TOP].alpha = thresholds[INNER].alpha;
+ thresholds[TOP].beta = thresholds[INNER].beta;
+ thresholds[TOP].tc0 = thresholds[INNER].tc0;
+ }
+ }
+ if (filteringFlags & FILTER_LEFT_EDGE)
+ {
+ qpTmp = mb->mbA->qpY;
+ if (qpTmp != mb->qpY)
+ {
+ qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)];
+ qpAv = (qp + qpTmp + 1) >> 1;
+
+ indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+ thresholds[LEFT].alpha = alphas[indexA];
+ thresholds[LEFT].beta = betas[indexB];
+ thresholds[LEFT].tc0 = tc0[indexA];
+ }
+ else
+ {
+ thresholds[LEFT].alpha = thresholds[INNER].alpha;
+ thresholds[LEFT].beta = thresholds[INNER].beta;
+ thresholds[LEFT].tc0 = thresholds[INNER].tc0;
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: FilterLuma
+
+ Functional description:
+ Function to filter all luma edges of a macroblock
+
+------------------------------------------------------------------------------*/
+void FilterLuma(
+ u8 *data,
+ bS_t *bS,
+ edgeThreshold_t *thresholds,
+ u32 width)
+{
+
+/* Variables */
+
+ u32 vblock;
+ bS_t *tmp;
+ u8 *ptr;
+ u32 offset;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(bS);
+ ASSERT(thresholds);
+
+ ptr = data;
+ tmp = bS;
+
+ offset = TOP;
+
+ /* loop block rows, perform filtering for all vertical edges of the block
+ * row first, then filter each horizontal edge of the row */
+ for (vblock = 4; vblock--;)
+ {
+ /* only perform filtering if bS is non-zero, first of the four
+ * FilterVerLumaEdge handles the left edge of the macroblock, others
+ * filter inner edges */
+ if (tmp[0].left)
+ FilterVerLumaEdge(ptr, tmp[0].left, thresholds + LEFT, width);
+ if (tmp[1].left)
+ FilterVerLumaEdge(ptr+4, tmp[1].left, thresholds + INNER, width);
+ if (tmp[2].left)
+ FilterVerLumaEdge(ptr+8, tmp[2].left, thresholds + INNER, width);
+ if (tmp[3].left)
+ FilterVerLumaEdge(ptr+12, tmp[3].left, thresholds + INNER, width);
+
+ /* if bS is equal for all horizontal edges of the row -> perform
+ * filtering with FilterHorLuma, otherwise use FilterHorLumaEdge for
+ * each edge separately. offset variable indicates top macroblock edge
+ * on the first loop round, inner edge for the other rounds */
+ if (tmp[0].top == tmp[1].top && tmp[1].top == tmp[2].top &&
+ tmp[2].top == tmp[3].top)
+ {
+ if(tmp[0].top)
+ FilterHorLuma(ptr, tmp[0].top, thresholds + offset, (i32)width);
+ }
+ else
+ {
+ if(tmp[0].top)
+ FilterHorLumaEdge(ptr, tmp[0].top, thresholds+offset,
+ (i32)width);
+ if(tmp[1].top)
+ FilterHorLumaEdge(ptr+4, tmp[1].top, thresholds+offset,
+ (i32)width);
+ if(tmp[2].top)
+ FilterHorLumaEdge(ptr+8, tmp[2].top, thresholds+offset,
+ (i32)width);
+ if(tmp[3].top)
+ FilterHorLumaEdge(ptr+12, tmp[3].top, thresholds+offset,
+ (i32)width);
+ }
+
+ /* four pixel rows ahead, i.e. next row of 4x4-blocks */
+ ptr += width*4;
+ tmp += 4;
+ offset = INNER;
+ }
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: FilterChroma
+
+ Functional description:
+ Function to filter all chroma edges of a macroblock
+
+------------------------------------------------------------------------------*/
+void FilterChroma(
+ u8 *dataCb,
+ u8 *dataCr,
+ bS_t *bS,
+ edgeThreshold_t *thresholds,
+ u32 width)
+{
+
+/* Variables */
+
+ u32 vblock;
+ bS_t *tmp;
+ u32 offset;
+
+/* Code */
+
+ ASSERT(dataCb);
+ ASSERT(dataCr);
+ ASSERT(bS);
+ ASSERT(thresholds);
+
+ tmp = bS;
+ offset = TOP;
+
+ /* loop block rows, perform filtering for all vertical edges of the block
+ * row first, then filter each horizontal edge of the row */
+ for (vblock = 0; vblock < 2; vblock++)
+ {
+ /* only perform filtering if bS is non-zero, first two of the four
+ * FilterVerChromaEdge calls handle the left edge of the macroblock,
+ * others filter the inner edge. Note that as chroma uses bS values
+ * determined for luma edges, each bS is used only for 2 pixels of
+ * a 4-pixel edge */
+ if (tmp[0].left)
+ {
+ FilterVerChromaEdge(dataCb, tmp[0].left, thresholds + LEFT, width);
+ FilterVerChromaEdge(dataCr, tmp[0].left, thresholds + LEFT, width);
+ }
+ if (tmp[4].left)
+ {
+ FilterVerChromaEdge(dataCb+2*width, tmp[4].left, thresholds + LEFT,
+ width);
+ FilterVerChromaEdge(dataCr+2*width, tmp[4].left, thresholds + LEFT,
+ width);
+ }
+ if (tmp[2].left)
+ {
+ FilterVerChromaEdge(dataCb+4, tmp[2].left, thresholds + INNER,
+ width);
+ FilterVerChromaEdge(dataCr+4, tmp[2].left, thresholds + INNER,
+ width);
+ }
+ if (tmp[6].left)
+ {
+ FilterVerChromaEdge(dataCb+2*width+4, tmp[6].left,
+ thresholds + INNER, width);
+ FilterVerChromaEdge(dataCr+2*width+4, tmp[6].left,
+ thresholds + INNER, width);
+ }
+
+ /* if bS is equal for all horizontal edges of the row -> perform
+ * filtering with FilterHorChroma, otherwise use FilterHorChromaEdge
+ * for each edge separately. offset variable indicates top macroblock
+ * edge on the first loop round, inner edge for the second */
+ if (tmp[0].top == tmp[1].top && tmp[1].top == tmp[2].top &&
+ tmp[2].top == tmp[3].top)
+ {
+ if(tmp[0].top)
+ {
+ FilterHorChroma(dataCb, tmp[0].top, thresholds+offset,
+ (i32)width);
+ FilterHorChroma(dataCr, tmp[0].top, thresholds+offset,
+ (i32)width);
+ }
+ }
+ else
+ {
+ if (tmp[0].top)
+ {
+ FilterHorChromaEdge(dataCb, tmp[0].top, thresholds+offset,
+ (i32)width);
+ FilterHorChromaEdge(dataCr, tmp[0].top, thresholds+offset,
+ (i32)width);
+ }
+ if (tmp[1].top)
+ {
+ FilterHorChromaEdge(dataCb+2, tmp[1].top, thresholds+offset,
+ (i32)width);
+ FilterHorChromaEdge(dataCr+2, tmp[1].top, thresholds+offset,
+ (i32)width);
+ }
+ if (tmp[2].top)
+ {
+ FilterHorChromaEdge(dataCb+4, tmp[2].top, thresholds+offset,
+ (i32)width);
+ FilterHorChromaEdge(dataCr+4, tmp[2].top, thresholds+offset,
+ (i32)width);
+ }
+ if (tmp[3].top)
+ {
+ FilterHorChromaEdge(dataCb+6, tmp[3].top, thresholds+offset,
+ (i32)width);
+ FilterHorChromaEdge(dataCr+6, tmp[3].top, thresholds+offset,
+ (i32)width);
+ }
+ }
+
+ tmp += 8;
+ dataCb += width*4;
+ dataCr += width*4;
+ offset = INNER;
+ }
+}
+
+#else /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdFilterPicture
+
+ Functional description:
+ Perform deblocking filtering for a picture. Filter does not copy
+ the original picture anywhere but filtering is performed directly
+ on the original image. Parameters controlling the filtering process
+ are computed based on information in macroblock structures of the
+ filtered macroblock, macroblock above and macroblock on the left of
+ the filtered one.
+
+ Inputs:
+ image pointer to image to be filtered
+ mb pointer to macroblock data structure of the top-left
+ macroblock of the picture
+
+ Outputs:
+ image filtered image stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+/*lint --e{550} Symbol not accessed */
+void h264bsdFilterPicture(
+ image_t *image,
+ mbStorage_t *mb)
+{
+
+/* Variables */
+
+ u32 flags;
+ u32 picSizeInMbs, mbRow, mbCol;
+ u32 picWidthInMbs;
+ u8 *data;
+ mbStorage_t *pMb;
+ u8 bS[2][16];
+ u8 thresholdLuma[2][16];
+ u8 thresholdChroma[2][8];
+ u8 alpha[2][2];
+ u8 beta[2][2];
+ OMXResult res;
+
+/* Code */
+
+ ASSERT(image);
+ ASSERT(mb);
+ ASSERT(image->data);
+ ASSERT(image->width);
+ ASSERT(image->height);
+
+ picWidthInMbs = image->width;
+ data = image->data;
+ picSizeInMbs = picWidthInMbs * image->height;
+
+ pMb = mb;
+
+ for (mbRow = 0, mbCol = 0; mbRow < image->height; pMb++)
+ {
+ flags = GetMbFilteringFlags(pMb);
+
+ if (flags)
+ {
+ /* GetBoundaryStrengths function returns non-zero value if any of
+ * the bS values for the macroblock being processed was non-zero */
+ if (GetBoundaryStrengths(pMb, bS, flags))
+ {
+
+ /* Luma */
+ GetLumaEdgeThresholds(pMb,alpha,beta,thresholdLuma,bS,flags);
+ data = image->data + mbRow * picWidthInMbs * 256 + mbCol * 16;
+
+ res = omxVCM4P10_FilterDeblockingLuma_VerEdge_I( data,
+ (OMX_S32)(picWidthInMbs*16),
+ (const OMX_U8*)alpha,
+ (const OMX_U8*)beta,
+ (const OMX_U8*)thresholdLuma,
+ (const OMX_U8*)bS );
+
+ res = omxVCM4P10_FilterDeblockingLuma_HorEdge_I( data,
+ (OMX_S32)(picWidthInMbs*16),
+ (const OMX_U8*)alpha+2,
+ (const OMX_U8*)beta+2,
+ (const OMX_U8*)thresholdLuma+16,
+ (const OMX_U8*)bS+16 );
+ /* Cb */
+ GetChromaEdgeThresholds(pMb, alpha, beta, thresholdChroma,
+ bS, flags, pMb->chromaQpIndexOffset);
+ data = image->data + picSizeInMbs * 256 +
+ mbRow * picWidthInMbs * 64 + mbCol * 8;
+
+ res = omxVCM4P10_FilterDeblockingChroma_VerEdge_I( data,
+ (OMX_S32)(picWidthInMbs*8),
+ (const OMX_U8*)alpha,
+ (const OMX_U8*)beta,
+ (const OMX_U8*)thresholdChroma,
+ (const OMX_U8*)bS );
+ res = omxVCM4P10_FilterDeblockingChroma_HorEdge_I( data,
+ (OMX_S32)(picWidthInMbs*8),
+ (const OMX_U8*)alpha+2,
+ (const OMX_U8*)beta+2,
+ (const OMX_U8*)thresholdChroma+8,
+ (const OMX_U8*)bS+16 );
+ /* Cr */
+ data += (picSizeInMbs * 64);
+ res = omxVCM4P10_FilterDeblockingChroma_VerEdge_I( data,
+ (OMX_S32)(picWidthInMbs*8),
+ (const OMX_U8*)alpha,
+ (const OMX_U8*)beta,
+ (const OMX_U8*)thresholdChroma,
+ (const OMX_U8*)bS );
+ res = omxVCM4P10_FilterDeblockingChroma_HorEdge_I( data,
+ (OMX_S32)(picWidthInMbs*8),
+ (const OMX_U8*)alpha+2,
+ (const OMX_U8*)beta+2,
+ (const OMX_U8*)thresholdChroma+8,
+ (const OMX_U8*)bS+16 );
+ }
+ }
+
+ mbCol++;
+ if (mbCol == picWidthInMbs)
+ {
+ mbCol = 0;
+ mbRow++;
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: GetBoundaryStrengths
+
+ Functional description:
+ Function to calculate boundary strengths for all edges of a
+ macroblock. Function returns HANTRO_TRUE if any of the bS values for
+ the macroblock had non-zero value, HANTRO_FALSE otherwise.
+
+------------------------------------------------------------------------------*/
+u32 GetBoundaryStrengths(mbStorage_t *mb, u8 (*bS)[16], u32 flags)
+{
+
+/* Variables */
+
+ /* this flag is set HANTRO_TRUE as soon as any boundary strength value is
+ * non-zero */
+ u32 nonZeroBs = HANTRO_FALSE;
+ u32 *pTmp;
+ u32 tmp1, tmp2, isIntraMb;
+
+/* Code */
+
+ ASSERT(mb);
+ ASSERT(bS);
+ ASSERT(flags);
+
+ isIntraMb = IS_INTRA_MB(*mb);
+
+ /* top edges */
+ pTmp = (u32*)&bS[1][0];
+ if (flags & FILTER_TOP_EDGE)
+ {
+ if (isIntraMb || IS_INTRA_MB(*mb->mbB))
+ {
+ *pTmp = 0x04040404;
+ nonZeroBs = HANTRO_TRUE;
+ }
+ else
+ {
+ *pTmp = EdgeBoundaryStrengthTop(mb, mb->mbB);
+ if (*pTmp)
+ nonZeroBs = HANTRO_TRUE;
+ }
+ }
+ else
+ {
+ *pTmp = 0;
+ }
+
+ /* left edges */
+ pTmp = (u32*)&bS[0][0];
+ if (flags & FILTER_LEFT_EDGE)
+ {
+ if (isIntraMb || IS_INTRA_MB(*mb->mbA))
+ {
+ /*bS[0][0] = bS[0][1] = bS[0][2] = bS[0][3] = 4;*/
+ *pTmp = 0x04040404;
+ nonZeroBs = HANTRO_TRUE;
+ }
+ else
+ {
+ *pTmp = EdgeBoundaryStrengthLeft(mb, mb->mbA);
+ if (!nonZeroBs && *pTmp)
+ nonZeroBs = HANTRO_TRUE;
+ }
+ }
+ else
+ {
+ *pTmp = 0;
+ }
+
+ /* inner edges */
+ if (isIntraMb)
+ {
+ pTmp++;
+ *pTmp++ = 0x03030303;
+ *pTmp++ = 0x03030303;
+ *pTmp++ = 0x03030303;
+ pTmp++;
+ *pTmp++ = 0x03030303;
+ *pTmp++ = 0x03030303;
+ *pTmp = 0x03030303;
+
+ nonZeroBs = HANTRO_TRUE;
+ }
+ else
+ {
+ pTmp = (u32*)mb->totalCoeff;
+
+ /* 16x16 inter mb -> ref addresses or motion vectors cannot differ,
+ * only check if either of the blocks contain coefficients */
+ if (h264bsdNumMbPart(mb->mbType) == 1)
+ {
+ tmp1 = *pTmp++;
+ tmp2 = *pTmp++;
+ bS[1][4] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [2] || [0] */
+ bS[1][5] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [3] || [1] */
+ bS[0][4] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [1] || [0] */
+ bS[0][5] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [3] || [2] */
+
+ tmp1 = *pTmp++;
+ bS[1][6] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [6] || [4] */
+ bS[1][7] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [7] || [5] */
+ bS[0][12] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [5] || [4] */
+ bS[0][13] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [7] || [6] */
+ tmp2 = *pTmp;
+ bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [10] || [8] */
+ bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [11] || [9] */
+ bS[0][6] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [9] || [8] */
+ bS[0][7] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [11] || [10] */
+
+ bS[1][14] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [14] || [12] */
+ bS[1][15] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [15] || [13] */
+ bS[0][14] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [13] || [12] */
+ bS[0][15] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [15] || [14] */
+
+ {
+ u32 tmp3, tmp4;
+
+ tmp1 = mb->totalCoeff[8];
+ tmp2 = mb->totalCoeff[2];
+ tmp3 = mb->totalCoeff[9];
+ tmp4 = mb->totalCoeff[3];
+
+ bS[1][8] = tmp1 || tmp2 ? 2 : 0;
+ tmp1 = mb->totalCoeff[12];
+ tmp2 = mb->totalCoeff[6];
+ bS[1][9] = tmp3 || tmp4 ? 2 : 0;
+ tmp3 = mb->totalCoeff[13];
+ tmp4 = mb->totalCoeff[7];
+ bS[1][10] = tmp1 || tmp2 ? 2 : 0;
+ tmp1 = mb->totalCoeff[4];
+ tmp2 = mb->totalCoeff[1];
+ bS[1][11] = tmp3 || tmp4 ? 2 : 0;
+ tmp3 = mb->totalCoeff[6];
+ tmp4 = mb->totalCoeff[3];
+ bS[0][8] = tmp1 || tmp2 ? 2 : 0;
+ tmp1 = mb->totalCoeff[12];
+ tmp2 = mb->totalCoeff[9];
+ bS[0][9] = tmp3 || tmp4 ? 2 : 0;
+ tmp3 = mb->totalCoeff[14];
+ tmp4 = mb->totalCoeff[11];
+ bS[0][10] = tmp1 || tmp2 ? 2 : 0;
+ bS[0][11] = tmp3 || tmp4 ? 2 : 0;
+ }
+ }
+
+ /* 16x8 inter mb -> ref addresses and motion vectors can be different
+ * only for the middle horizontal edge, for the other top edges it is
+ * enough to check whether the blocks contain coefficients or not. The
+ * same applies to all internal left edges. */
+ else if (mb->mbType == P_L0_L0_16x8)
+ {
+ tmp1 = *pTmp++;
+ tmp2 = *pTmp++;
+ bS[1][4] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [2] || [0] */
+ bS[1][5] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [3] || [1] */
+ bS[0][4] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [1] || [0] */
+ bS[0][5] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [3] || [2] */
+ tmp1 = *pTmp++;
+ bS[1][6] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [6] || [4] */
+ bS[1][7] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [7] || [5] */
+ bS[0][12] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [5] || [4] */
+ bS[0][13] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [7] || [6] */
+ tmp2 = *pTmp;
+ bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [10] || [8] */
+ bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [11] || [9] */
+ bS[0][6] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [9] || [8] */
+ bS[0][7] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [11] || [10] */
+
+ bS[1][14] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [14] || [12] */
+ bS[1][15] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [15] || [13] */
+ bS[0][14] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [13] || [12] */
+ bS[0][15] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [15] || [14] */
+
+ bS[1][8] = (u8)InnerBoundaryStrength(mb, 8, 2);
+ bS[1][9] = (u8)InnerBoundaryStrength(mb, 9, 3);
+ bS[1][10] = (u8)InnerBoundaryStrength(mb, 12, 6);
+ bS[1][11] = (u8)InnerBoundaryStrength(mb, 13, 7);
+
+ {
+ u32 tmp3, tmp4;
+
+ tmp1 = mb->totalCoeff[4];
+ tmp2 = mb->totalCoeff[1];
+ tmp3 = mb->totalCoeff[6];
+ tmp4 = mb->totalCoeff[3];
+ bS[0][8] = tmp1 || tmp2 ? 2 : 0;
+ tmp1 = mb->totalCoeff[12];
+ tmp2 = mb->totalCoeff[9];
+ bS[0][9] = tmp3 || tmp4 ? 2 : 0;
+ tmp3 = mb->totalCoeff[14];
+ tmp4 = mb->totalCoeff[11];
+ bS[0][10] = tmp1 || tmp2 ? 2 : 0;
+ bS[0][11] = tmp3 || tmp4 ? 2 : 0;
+ }
+ }
+ /* 8x16 inter mb -> ref addresses and motion vectors can be different
+ * only for the middle vertical edge, for the other left edges it is
+ * enough to check whether the blocks contain coefficients or not. The
+ * same applies to all internal top edges. */
+ else if (mb->mbType == P_L0_L0_8x16)
+ {
+ tmp1 = *pTmp++;
+ tmp2 = *pTmp++;
+ bS[1][4] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [2] || [0] */
+ bS[1][5] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [3] || [1] */
+ bS[0][4] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [1] || [0] */
+ bS[0][5] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [3] || [2] */
+ tmp1 = *pTmp++;
+ bS[1][6] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [6] || [4] */
+ bS[1][7] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [7] || [5] */
+ bS[0][12] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [5] || [4] */
+ bS[0][13] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [7] || [6] */
+ tmp2 = *pTmp;
+ bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : 0; /* [10] || [8] */
+ bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : 0; /* [11] || [9] */
+ bS[0][6] = (tmp1 & 0x0000FFFF) ? 2 : 0; /* [9] || [8] */
+ bS[0][7] = (tmp1 & 0xFFFF0000) ? 2 : 0; /* [11] || [10] */
+
+ bS[1][14] = (tmp2 & 0x00FF00FF) ? 2 : 0; /* [14] || [12] */
+ bS[1][15] = (tmp2 & 0xFF00FF00) ? 2 : 0; /* [15] || [13] */
+ bS[0][14] = (tmp2 & 0x0000FFFF) ? 2 : 0; /* [13] || [12] */
+ bS[0][15] = (tmp2 & 0xFFFF0000) ? 2 : 0; /* [15] || [14] */
+
+ bS[0][8] = (u8)InnerBoundaryStrength(mb, 4, 1);
+ bS[0][9] = (u8)InnerBoundaryStrength(mb, 6, 3);
+ bS[0][10] = (u8)InnerBoundaryStrength(mb, 12, 9);
+ bS[0][11] = (u8)InnerBoundaryStrength(mb, 14, 11);
+
+ {
+ u32 tmp3, tmp4;
+
+ tmp1 = mb->totalCoeff[8];
+ tmp2 = mb->totalCoeff[2];
+ tmp3 = mb->totalCoeff[9];
+ tmp4 = mb->totalCoeff[3];
+ bS[1][8] = tmp1 || tmp2 ? 2 : 0;
+ tmp1 = mb->totalCoeff[12];
+ tmp2 = mb->totalCoeff[6];
+ bS[1][9] = tmp3 || tmp4 ? 2 : 0;
+ tmp3 = mb->totalCoeff[13];
+ tmp4 = mb->totalCoeff[7];
+ bS[1][10] = tmp1 || tmp2 ? 2 : 0;
+ bS[1][11] = tmp3 || tmp4 ? 2 : 0;
+ }
+ }
+ else
+ {
+ tmp1 = *pTmp++;
+ bS[1][4] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 2, 0);
+ bS[1][5] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 3, 1);
+ bS[0][4] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 1, 0);
+ bS[0][5] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 3, 2);
+ tmp1 = *pTmp++;
+ bS[1][6] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 6, 4);
+ bS[1][7] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 7, 5);
+ bS[0][12] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 5, 4);
+ bS[0][13] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 7, 6);
+ tmp1 = *pTmp++;
+ bS[1][12] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 10, 8);
+ bS[1][13] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 11, 9);
+ bS[0][6] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 9, 8);
+ bS[0][7] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 11, 10);
+ tmp1 = *pTmp;
+ bS[1][14] = (tmp1 & 0x00FF00FF) ? 2 : (u8)InnerBoundaryStrength2(mb, 14, 12);
+ bS[1][15] = (tmp1 & 0xFF00FF00) ? 2 : (u8)InnerBoundaryStrength2(mb, 15, 13);
+ bS[0][14] = (tmp1 & 0x0000FFFF) ? 2 : (u8)InnerBoundaryStrength2(mb, 13, 12);
+ bS[0][15] = (tmp1 & 0xFFFF0000) ? 2 : (u8)InnerBoundaryStrength2(mb, 15, 14);
+
+ bS[1][8] = (u8)InnerBoundaryStrength(mb, 8, 2);
+ bS[1][9] = (u8)InnerBoundaryStrength(mb, 9, 3);
+ bS[1][10] = (u8)InnerBoundaryStrength(mb, 12, 6);
+ bS[1][11] = (u8)InnerBoundaryStrength(mb, 13, 7);
+
+ bS[0][8] = (u8)InnerBoundaryStrength(mb, 4, 1);
+ bS[0][9] = (u8)InnerBoundaryStrength(mb, 6, 3);
+ bS[0][10] = (u8)InnerBoundaryStrength(mb, 12, 9);
+ bS[0][11] = (u8)InnerBoundaryStrength(mb, 14, 11);
+ }
+ pTmp = (u32*)&bS[0][0];
+ if (!nonZeroBs && (pTmp[1] || pTmp[2] || pTmp[3] ||
+ pTmp[5] || pTmp[6] || pTmp[7]) )
+ {
+ nonZeroBs = HANTRO_TRUE;
+ }
+ }
+
+ return(nonZeroBs);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: GetLumaEdgeThresholds
+
+ Functional description:
+ Compute alpha, beta and tc0 thresholds for inner, left and top
+ luma edges of a macroblock.
+
+------------------------------------------------------------------------------*/
+void GetLumaEdgeThresholds(
+ mbStorage_t *mb,
+ u8 (*alpha)[2],
+ u8 (*beta)[2],
+ u8 (*threshold)[16],
+ u8 (*bs)[16],
+ u32 filteringFlags )
+{
+
+/* Variables */
+
+ u32 indexA, indexB;
+ u32 qpAv, qp, qpTmp;
+ u32 i;
+
+/* Code */
+
+ ASSERT(threshold);
+ ASSERT(bs);
+ ASSERT(beta);
+ ASSERT(alpha);
+ ASSERT(mb);
+
+ qp = mb->qpY;
+
+ indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB);
+
+ /* Internal edge values */
+ alpha[0][1] = alphas[indexA];
+ alpha[1][1] = alphas[indexA];
+ alpha[1][0] = alphas[indexA];
+ alpha[0][0] = alphas[indexA];
+ beta[0][1] = betas[indexB];
+ beta[1][1] = betas[indexB];
+ beta[1][0] = betas[indexB];
+ beta[0][0] = betas[indexB];
+
+ /* vertical scan order */
+ for (i = 0; i < 2; i++)
+ {
+ u32 t1, t2;
+
+ t1 = bs[i][0];
+ t2 = bs[i][1];
+ threshold[i][0] = (t1) ? tc0[indexA][t1] : 0;
+ t1 = bs[i][2];
+ threshold[i][1] = (t2) ? tc0[indexA][t2] : 0;
+ t2 = bs[i][3];
+ threshold[i][2] = (t1) ? tc0[indexA][t1] : 0;
+ t1 = bs[i][4];
+ threshold[i][3] = (t2) ? tc0[indexA][t2] : 0;
+ t2 = bs[i][5];
+ threshold[i][4] = (t1) ? tc0[indexA][t1] : 0;
+ t1 = bs[i][6];
+ threshold[i][5] = (t2) ? tc0[indexA][t2] : 0;
+ t2 = bs[i][7];
+ threshold[i][6] = (t1) ? tc0[indexA][t1] : 0;
+ t1 = bs[i][8];
+ threshold[i][7] = (t2) ? tc0[indexA][t2] : 0;
+ t2 = bs[i][9];
+ threshold[i][8] = (t1) ? tc0[indexA][t1] : 0;
+ t1 = bs[i][10];
+ threshold[i][9] = (t2) ? tc0[indexA][t2] : 0;
+ t2 = bs[i][11];
+ threshold[i][10] = (t1) ? tc0[indexA][t1] : 0;
+ t1 = bs[i][12];
+ threshold[i][11] = (t2) ? tc0[indexA][t2] : 0;
+ t2 = bs[i][13];
+ threshold[i][12] = (t1) ? tc0[indexA][t1] : 0;
+ t1 = bs[i][14];
+ threshold[i][13] = (t2) ? tc0[indexA][t2] : 0;
+ t2 = bs[i][15];
+ threshold[i][14] = (t1) ? tc0[indexA][t1] : 0;
+ threshold[i][15] = (t2) ? tc0[indexA][t2] : 0;
+ }
+
+ if (filteringFlags & FILTER_TOP_EDGE)
+ {
+ qpTmp = mb->mbB->qpY;
+ if (qpTmp != qp)
+ {
+ u32 t1, t2, t3, t4;
+ qpAv = (qp + qpTmp + 1) >> 1;
+
+ indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+ alpha[1][0] = alphas[indexA];
+ beta[1][0] = betas[indexB];
+ t1 = bs[1][0];
+ t2 = bs[1][1];
+ t3 = bs[1][2];
+ t4 = bs[1][3];
+ threshold[1][0] = (t1 && (t1 < 4)) ? tc0[indexA][t1] : 0;
+ threshold[1][1] = (t2 && (t2 < 4)) ? tc0[indexA][t2] : 0;
+ threshold[1][2] = (t3 && (t3 < 4)) ? tc0[indexA][t3] : 0;
+ threshold[1][3] = (t4 && (t4 < 4)) ? tc0[indexA][t4] : 0;
+ }
+ }
+ if (filteringFlags & FILTER_LEFT_EDGE)
+ {
+ qpTmp = mb->mbA->qpY;
+ if (qpTmp != qp)
+ {
+ qpAv = (qp + qpTmp + 1) >> 1;
+
+ indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+ alpha[0][0] = alphas[indexA];
+ beta[0][0] = betas[indexB];
+ threshold[0][0] = (bs[0][0] && (bs[0][0] < 4)) ? tc0[indexA][bs[0][0]] : 0;
+ threshold[0][1] = (bs[0][1] && (bs[0][1] < 4)) ? tc0[indexA][bs[0][1]] : 0;
+ threshold[0][2] = (bs[0][2] && (bs[0][2] < 4)) ? tc0[indexA][bs[0][2]] : 0;
+ threshold[0][3] = (bs[0][3] && (bs[0][3] < 4)) ? tc0[indexA][bs[0][3]] : 0;
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: GetChromaEdgeThresholds
+
+ Functional description:
+ Compute alpha, beta and tc0 thresholds for inner, left and top
+ chroma edges of a macroblock.
+
+------------------------------------------------------------------------------*/
+void GetChromaEdgeThresholds(
+ mbStorage_t *mb,
+ u8 (*alpha)[2],
+ u8 (*beta)[2],
+ u8 (*threshold)[8],
+ u8 (*bs)[16],
+ u32 filteringFlags,
+ i32 chromaQpIndexOffset)
+{
+
+/* Variables */
+
+ u32 indexA, indexB;
+ u32 qpAv, qp, qpTmp;
+ u32 i;
+
+/* Code */
+
+ ASSERT(threshold);
+ ASSERT(bs);
+ ASSERT(beta);
+ ASSERT(alpha);
+ ASSERT(mb);
+ ASSERT(mb);
+
+ qp = mb->qpY;
+ qp = h264bsdQpC[CLIP3(0, 51, (i32)qp + chromaQpIndexOffset)];
+
+ indexA = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qp + mb->filterOffsetB);
+
+ alpha[0][1] = alphas[indexA];
+ alpha[1][1] = alphas[indexA];
+ alpha[1][0] = alphas[indexA];
+ alpha[0][0] = alphas[indexA];
+ beta[0][1] = betas[indexB];
+ beta[1][1] = betas[indexB];
+ beta[1][0] = betas[indexB];
+ beta[0][0] = betas[indexB];
+
+ for (i = 0; i < 2; i++)
+ {
+ u32 t1, t2;
+
+ t1 = bs[i][0];
+ t2 = bs[i][1];
+ threshold[i][0] = (t1) ? tc0[indexA][t1] : 0;
+ t1 = bs[i][2];
+ threshold[i][1] = (t2) ? tc0[indexA][t2] : 0;
+ t2 = bs[i][3];
+ threshold[i][2] = (t1) ? tc0[indexA][t1] : 0;
+ t1 = bs[i][8];
+ threshold[i][3] = (t2) ? tc0[indexA][t2] : 0;
+ t2 = bs[i][9];
+ threshold[i][4] = (t1) ? tc0[indexA][t1] : 0;
+ t1 = bs[i][10];
+ threshold[i][5] = (t2) ? tc0[indexA][t2] : 0;
+ t2 = bs[i][11];
+ threshold[i][6] = (t1) ? tc0[indexA][t1] : 0;
+ threshold[i][7] = (t2) ? tc0[indexA][t2] : 0;
+ }
+
+ if (filteringFlags & FILTER_TOP_EDGE)
+ {
+ qpTmp = mb->mbB->qpY;
+ if (qpTmp != mb->qpY)
+ {
+ u32 t1, t2, t3, t4;
+ qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)];
+ qpAv = (qp + qpTmp + 1) >> 1;
+
+ indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+ alpha[1][0] = alphas[indexA];
+ beta[1][0] = betas[indexB];
+
+ t1 = bs[1][0];
+ t2 = bs[1][1];
+ t3 = bs[1][2];
+ t4 = bs[1][3];
+ threshold[1][0] = (t1) ? tc0[indexA][t1] : 0;
+ threshold[1][1] = (t2) ? tc0[indexA][t2] : 0;
+ threshold[1][2] = (t3) ? tc0[indexA][t3] : 0;
+ threshold[1][3] = (t4) ? tc0[indexA][t4] : 0;
+ }
+ }
+ if (filteringFlags & FILTER_LEFT_EDGE)
+ {
+ qpTmp = mb->mbA->qpY;
+ if (qpTmp != mb->qpY)
+ {
+
+ qpTmp = h264bsdQpC[CLIP3(0, 51, (i32)qpTmp + chromaQpIndexOffset)];
+ qpAv = (qp + qpTmp + 1) >> 1;
+
+ indexA = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetA);
+ indexB = (u32)CLIP3(0, 51, (i32)qpAv + mb->filterOffsetB);
+
+ alpha[0][0] = alphas[indexA];
+ beta[0][0] = betas[indexB];
+ threshold[0][0] = (bs[0][0]) ? tc0[indexA][bs[0][0]] : 0;
+ threshold[0][1] = (bs[0][1]) ? tc0[indexA][bs[0][1]] : 0;
+ threshold[0][2] = (bs[0][2]) ? tc0[indexA][bs[0][2]] : 0;
+ threshold[0][3] = (bs[0][3]) ? tc0[indexA][bs[0][3]] : 0;
+ }
+ }
+
+}
+
+#endif /* H264DEC_OMXDL */
+
+/*lint +e701 +e702 */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.h
new file mode 100755
index 0000000..2571dda
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_deblocking.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_DEBLOCKING_H
+#define H264SWDEC_DEBLOCKING_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_image.h"
+#include "h264bsd_macroblock_layer.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+void h264bsdFilterPicture(
+ image_t *image,
+ mbStorage_t *mb);
+
+#endif /* #ifdef H264SWDEC_DEBLOCKING_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.c
new file mode 100644
index 0000000..a816871
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.c
@@ -0,0 +1,961 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdInit
+ h264bsdDecode
+ h264bsdShutdown
+ h264bsdCurrentImage
+ h264bsdNextOutputPicture
+ h264bsdPicWidth
+ h264bsdPicHeight
+ h264bsdFlushBuffer
+ h264bsdCheckValidParamSets
+ h264bsdVideoRange
+ h264bsdMatrixCoefficients
+ h264bsdCroppingParams
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_decoder.h"
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_byte_stream.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_pic_param_set.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_slice_data.h"
+#include "h264bsd_neighbour.h"
+#include "h264bsd_util.h"
+#include "h264bsd_dpb.h"
+#include "h264bsd_deblocking.h"
+#include "h264bsd_conceal.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+ Function name: h264bsdInit
+
+ Functional description:
+ Initialize the decoder.
+
+ Inputs:
+ noOutputReordering flag to indicate the decoder that it does not
+ have to perform reordering of display images.
+
+ Outputs:
+ pStorage pointer to initialized storage structure
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdInit(storage_t *pStorage, u32 noOutputReordering)
+{
+
+/* Variables */
+ u32 size;
+/* Code */
+
+ ASSERT(pStorage);
+
+ h264bsdInitStorage(pStorage);
+
+ /* allocate mbLayer to be next multiple of 64 to enable use of
+ * specific NEON optimized "memset" for clearing the structure */
+ size = (sizeof(macroblockLayer_t) + 63) & ~0x3F;
+
+ pStorage->mbLayer = (macroblockLayer_t*)H264SwDecMalloc(size);
+ if (!pStorage->mbLayer)
+ return HANTRO_NOK;
+
+ if (noOutputReordering)
+ pStorage->noReordering = HANTRO_TRUE;
+
+ return HANTRO_OK;
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdDecode
+
+ Functional description:
+ Decode a NAL unit. This function calls other modules to perform
+ tasks like
+ * extract and decode NAL unit from the byte stream
+ * decode parameter sets
+ * decode slice header and slice data
+ * conceal errors in the picture
+ * perform deblocking filtering
+
+ This function contains top level control logic of the decoder.
+
+ Inputs:
+ pStorage pointer to storage data structure
+ byteStrm pointer to stream buffer given by application
+ len length of the buffer in bytes
+ picId identifier for a picture, assigned by the
+ application
+
+ Outputs:
+ readBytes number of bytes read from the stream is stored
+ here
+
+ Returns:
+ H264BSD_RDY decoding finished, nothing special
+ H264BSD_PIC_RDY decoding of a picture finished
+ H264BSD_HDRS_RDY param sets activated, information like
+ picture dimensions etc can be read
+ H264BSD_ERROR error in decoding
+ H264BSD_PARAM_SET_ERROR serius error in decoding, failed to
+ activate param sets
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecode(storage_t *pStorage, u8 *byteStrm, u32 len, u32 picId,
+ u32 *readBytes)
+{
+
+/* Variables */
+
+ u32 tmp, ppsId, spsId;
+ i32 picOrderCnt;
+ nalUnit_t nalUnit;
+ seqParamSet_t seqParamSet;
+ picParamSet_t picParamSet;
+ strmData_t strm;
+ u32 accessUnitBoundaryFlag = HANTRO_FALSE;
+ u32 picReady = HANTRO_FALSE;
+
+/* Code */
+
+ ASSERT(pStorage);
+ ASSERT(byteStrm);
+ ASSERT(len);
+ ASSERT(readBytes);
+
+ /* if previous buffer was not finished and same pointer given -> skip NAL
+ * unit extraction */
+ if (pStorage->prevBufNotFinished && byteStrm == pStorage->prevBufPointer)
+ {
+ strm = pStorage->strm[0];
+ strm.pStrmCurrPos = strm.pStrmBuffStart;
+ strm.strmBuffReadBits = strm.bitPosInWord = 0;
+ *readBytes = pStorage->prevBytesConsumed;
+ }
+ else
+ {
+ tmp = h264bsdExtractNalUnit(byteStrm, len, &strm, readBytes);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("BYTE_STREAM");
+ return(H264BSD_ERROR);
+ }
+ /* store stream */
+ pStorage->strm[0] = strm;
+ pStorage->prevBytesConsumed = *readBytes;
+ pStorage->prevBufPointer = byteStrm;
+ }
+ pStorage->prevBufNotFinished = HANTRO_FALSE;
+
+ tmp = h264bsdDecodeNalUnit(&strm, &nalUnit);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("NAL_UNIT");
+ return(H264BSD_ERROR);
+ }
+
+ /* Discard unspecified, reserved, SPS extension and auxiliary picture slices */
+ if(nalUnit.nalUnitType == 0 || nalUnit.nalUnitType >= 13)
+ {
+ DEBUG(("DISCARDED NAL (UNSPECIFIED, REGISTERED, SPS ext or AUX slice)\n"));
+ return(H264BSD_RDY);
+ }
+
+ tmp = h264bsdCheckAccessUnitBoundary(
+ &strm,
+ &nalUnit,
+ pStorage,
+ &accessUnitBoundaryFlag);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("ACCESS UNIT BOUNDARY CHECK");
+ if (tmp == PARAM_SET_ERROR)
+ return(H264BSD_PARAM_SET_ERROR);
+ else
+ return(H264BSD_ERROR);
+ }
+
+ if ( accessUnitBoundaryFlag )
+ {
+ DEBUG(("Access unit boundary\n"));
+ /* conceal if picture started and param sets activated */
+ if (pStorage->picStarted && pStorage->activeSps != NULL)
+ {
+ DEBUG(("CONCEALING..."));
+
+ /* return error if second phase of
+ * initialization is not completed */
+ if (pStorage->pendingActivation)
+ {
+ EPRINT("Pending activation not completed");
+ return (H264BSD_ERROR);
+ }
+
+ if (!pStorage->validSliceInAccessUnit)
+ {
+ pStorage->currImage->data =
+ h264bsdAllocateDpbImage(pStorage->dpb);
+ h264bsdInitRefPicList(pStorage->dpb);
+ tmp = h264bsdConceal(pStorage, pStorage->currImage, P_SLICE);
+ }
+ else
+ tmp = h264bsdConceal(pStorage, pStorage->currImage,
+ pStorage->sliceHeader->sliceType);
+
+ picReady = HANTRO_TRUE;
+
+ /* current NAL unit should be decoded on next activation -> set
+ * readBytes to 0 */
+ *readBytes = 0;
+ pStorage->prevBufNotFinished = HANTRO_TRUE;
+ DEBUG(("...DONE\n"));
+ }
+ else
+ {
+ pStorage->validSliceInAccessUnit = HANTRO_FALSE;
+ }
+ pStorage->skipRedundantSlices = HANTRO_FALSE;
+ }
+
+ if (!picReady)
+ {
+ switch (nalUnit.nalUnitType)
+ {
+ case NAL_SEQ_PARAM_SET:
+ DEBUG(("SEQ PARAM SET\n"));
+ tmp = h264bsdDecodeSeqParamSet(&strm, &seqParamSet);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("SEQ_PARAM_SET");
+ FREE(seqParamSet.offsetForRefFrame);
+ FREE(seqParamSet.vuiParameters);
+ return(H264BSD_ERROR);
+ }
+ tmp = h264bsdStoreSeqParamSet(pStorage, &seqParamSet);
+ break;
+
+ case NAL_PIC_PARAM_SET:
+ DEBUG(("PIC PARAM SET\n"));
+ tmp = h264bsdDecodePicParamSet(&strm, &picParamSet);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("PIC_PARAM_SET");
+ FREE(picParamSet.runLength);
+ FREE(picParamSet.topLeft);
+ FREE(picParamSet.bottomRight);
+ FREE(picParamSet.sliceGroupId);
+ return(H264BSD_ERROR);
+ }
+ tmp = h264bsdStorePicParamSet(pStorage, &picParamSet);
+ break;
+
+ case NAL_CODED_SLICE_IDR:
+ DEBUG(("IDR "));
+ /* fall through */
+ case NAL_CODED_SLICE:
+ DEBUG(("SLICE HEADER\n"));
+
+ /* picture successfully finished and still decoding same old
+ * access unit -> no need to decode redundant slices */
+ if (pStorage->skipRedundantSlices)
+ return(H264BSD_RDY);
+
+ pStorage->picStarted = HANTRO_TRUE;
+
+ if (h264bsdIsStartOfPicture(pStorage))
+ {
+ pStorage->numConcealedMbs = 0;
+ pStorage->currentPicId = picId;
+
+ tmp = h264bsdCheckPpsId(&strm, &ppsId);
+ ASSERT(tmp == HANTRO_OK);
+ /* store old activeSpsId and return headers ready
+ * indication if activeSps changes */
+ spsId = pStorage->activeSpsId;
+ tmp = h264bsdActivateParamSets(pStorage, ppsId,
+ IS_IDR_NAL_UNIT(&nalUnit) ?
+ HANTRO_TRUE : HANTRO_FALSE);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("Param set activation");
+ pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS;
+ pStorage->activePps = NULL;
+ pStorage->activeSpsId = MAX_NUM_SEQ_PARAM_SETS;
+ pStorage->activeSps = NULL;
+ pStorage->pendingActivation = HANTRO_FALSE;
+
+ if(tmp == MEMORY_ALLOCATION_ERROR)
+ {
+ return H264BSD_MEMALLOC_ERROR;
+ }
+ else
+ return(H264BSD_PARAM_SET_ERROR);
+ }
+
+ if (spsId != pStorage->activeSpsId)
+ {
+ seqParamSet_t *oldSPS = NULL;
+ seqParamSet_t *newSPS = pStorage->activeSps;
+ u32 noOutputOfPriorPicsFlag = 1;
+
+ if(pStorage->oldSpsId < MAX_NUM_SEQ_PARAM_SETS)
+ {
+ oldSPS = pStorage->sps[pStorage->oldSpsId];
+ }
+
+ *readBytes = 0;
+ pStorage->prevBufNotFinished = HANTRO_TRUE;
+
+
+ if(nalUnit.nalUnitType == NAL_CODED_SLICE_IDR)
+ {
+ tmp =
+ h264bsdCheckPriorPicsFlag(&noOutputOfPriorPicsFlag,
+ &strm, newSPS,
+ pStorage->activePps,
+ nalUnit.nalUnitType);
+ }
+ else
+ {
+ tmp = HANTRO_NOK;
+ }
+
+ if((tmp != HANTRO_OK) ||
+ (noOutputOfPriorPicsFlag != 0) ||
+ (pStorage->dpb->noReordering) ||
+ (oldSPS == NULL) ||
+ (oldSPS->picWidthInMbs != newSPS->picWidthInMbs) ||
+ (oldSPS->picHeightInMbs != newSPS->picHeightInMbs) ||
+ (oldSPS->maxDpbSize != newSPS->maxDpbSize))
+ {
+ pStorage->dpb->flushed = 0;
+ }
+ else
+ {
+ h264bsdFlushDpb(pStorage->dpb);
+ }
+
+ pStorage->oldSpsId = pStorage->activeSpsId;
+
+ return(H264BSD_HDRS_RDY);
+ }
+ }
+
+ /* return error if second phase of
+ * initialization is not completed */
+ if (pStorage->pendingActivation)
+ {
+ EPRINT("Pending activation not completed");
+ return (H264BSD_ERROR);
+ }
+ tmp = h264bsdDecodeSliceHeader(&strm, pStorage->sliceHeader + 1,
+ pStorage->activeSps, pStorage->activePps, &nalUnit);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("SLICE_HEADER");
+ return(H264BSD_ERROR);
+ }
+ if (h264bsdIsStartOfPicture(pStorage))
+ {
+ if (!IS_IDR_NAL_UNIT(&nalUnit))
+ {
+ tmp = h264bsdCheckGapsInFrameNum(pStorage->dpb,
+ pStorage->sliceHeader[1].frameNum,
+ nalUnit.nalRefIdc != 0 ?
+ HANTRO_TRUE : HANTRO_FALSE,
+ pStorage->activeSps->
+ gapsInFrameNumValueAllowedFlag);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("Gaps in frame num");
+ return(H264BSD_ERROR);
+ }
+ }
+ pStorage->currImage->data =
+ h264bsdAllocateDpbImage(pStorage->dpb);
+ }
+
+ /* store slice header to storage if successfully decoded */
+ pStorage->sliceHeader[0] = pStorage->sliceHeader[1];
+ pStorage->validSliceInAccessUnit = HANTRO_TRUE;
+ pStorage->prevNalUnit[0] = nalUnit;
+
+ h264bsdComputeSliceGroupMap(pStorage,
+ pStorage->sliceHeader->sliceGroupChangeCycle);
+
+ h264bsdInitRefPicList(pStorage->dpb);
+ tmp = h264bsdReorderRefPicList(pStorage->dpb,
+ &pStorage->sliceHeader->refPicListReordering,
+ pStorage->sliceHeader->frameNum,
+ pStorage->sliceHeader->numRefIdxL0Active);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("Reordering");
+ return(H264BSD_ERROR);
+ }
+
+ DEBUG(("SLICE DATA, FIRST %d\n",
+ pStorage->sliceHeader->firstMbInSlice));
+ tmp = h264bsdDecodeSliceData(&strm, pStorage,
+ pStorage->currImage, pStorage->sliceHeader);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("SLICE_DATA");
+ h264bsdMarkSliceCorrupted(pStorage,
+ pStorage->sliceHeader->firstMbInSlice);
+ return(H264BSD_ERROR);
+ }
+
+ if (h264bsdIsEndOfPicture(pStorage))
+ {
+ picReady = HANTRO_TRUE;
+ pStorage->skipRedundantSlices = HANTRO_TRUE;
+ }
+ break;
+
+ case NAL_SEI:
+ DEBUG(("SEI MESSAGE, NOT DECODED"));
+ break;
+
+ default:
+ DEBUG(("NOT IMPLEMENTED YET %d\n",nalUnit.nalUnitType));
+ }
+ }
+
+ if (picReady)
+ {
+ h264bsdFilterPicture(pStorage->currImage, pStorage->mb);
+
+ h264bsdResetStorage(pStorage);
+
+ picOrderCnt = h264bsdDecodePicOrderCnt(pStorage->poc,
+ pStorage->activeSps, pStorage->sliceHeader, pStorage->prevNalUnit);
+
+ if (pStorage->validSliceInAccessUnit)
+ {
+ if (pStorage->prevNalUnit->nalRefIdc)
+ {
+ tmp = h264bsdMarkDecRefPic(pStorage->dpb,
+ &pStorage->sliceHeader->decRefPicMarking,
+ pStorage->currImage, pStorage->sliceHeader->frameNum,
+ picOrderCnt,
+ IS_IDR_NAL_UNIT(pStorage->prevNalUnit) ?
+ HANTRO_TRUE : HANTRO_FALSE,
+ pStorage->currentPicId, pStorage->numConcealedMbs);
+ }
+ /* non-reference picture, just store for possible display
+ * reordering */
+ else
+ {
+ tmp = h264bsdMarkDecRefPic(pStorage->dpb, NULL,
+ pStorage->currImage, pStorage->sliceHeader->frameNum,
+ picOrderCnt,
+ IS_IDR_NAL_UNIT(pStorage->prevNalUnit) ?
+ HANTRO_TRUE : HANTRO_FALSE,
+ pStorage->currentPicId, pStorage->numConcealedMbs);
+ }
+ }
+
+ pStorage->picStarted = HANTRO_FALSE;
+ pStorage->validSliceInAccessUnit = HANTRO_FALSE;
+
+ return(H264BSD_PIC_RDY);
+ }
+ else
+ return(H264BSD_RDY);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdShutdown
+
+ Functional description:
+ Shutdown a decoder instance. Function frees all the memories
+ allocated for the decoder instance.
+
+ Inputs:
+ pStorage pointer to storage data structure
+
+ Returns:
+ none
+
+
+------------------------------------------------------------------------------*/
+
+void h264bsdShutdown(storage_t *pStorage)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ for (i = 0; i < MAX_NUM_SEQ_PARAM_SETS; i++)
+ {
+ if (pStorage->sps[i])
+ {
+ FREE(pStorage->sps[i]->offsetForRefFrame);
+ FREE(pStorage->sps[i]->vuiParameters);
+ FREE(pStorage->sps[i]);
+ }
+ }
+
+ for (i = 0; i < MAX_NUM_PIC_PARAM_SETS; i++)
+ {
+ if (pStorage->pps[i])
+ {
+ FREE(pStorage->pps[i]->runLength);
+ FREE(pStorage->pps[i]->topLeft);
+ FREE(pStorage->pps[i]->bottomRight);
+ FREE(pStorage->pps[i]->sliceGroupId);
+ FREE(pStorage->pps[i]);
+ }
+ }
+
+ FREE(pStorage->mbLayer);
+ FREE(pStorage->mb);
+ FREE(pStorage->sliceGroupMap);
+
+ h264bsdFreeDpb(pStorage->dpb);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdNextOutputPicture
+
+ Functional description:
+ Get next output picture in display order.
+
+ Inputs:
+ pStorage pointer to storage data structure
+
+ Outputs:
+ picId identifier of the picture will be stored here
+ isIdrPic IDR flag of the picture will be stored here
+ numErrMbs number of concealed macroblocks in the picture
+ will be stored here
+
+ Returns:
+ pointer to the picture data
+ NULL if no pictures available for display
+
+------------------------------------------------------------------------------*/
+
+u8* h264bsdNextOutputPicture(storage_t *pStorage, u32 *picId, u32 *isIdrPic,
+ u32 *numErrMbs)
+{
+
+/* Variables */
+
+ dpbOutPicture_t *pOut;
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ pOut = h264bsdDpbOutputPicture(pStorage->dpb);
+
+ if (pOut != NULL)
+ {
+ *picId = pOut->picId;
+ *isIdrPic = pOut->isIdr;
+ *numErrMbs = pOut->numErrMbs;
+ return (pOut->data);
+ }
+ else
+ return(NULL);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdPicWidth
+
+ Functional description:
+ Get width of the picture in macroblocks
+
+ Inputs:
+ pStorage pointer to storage data structure
+
+ Outputs:
+ none
+
+ Returns:
+ picture width
+ 0 if parameters sets not yet activated
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdPicWidth(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ if (pStorage->activeSps)
+ return(pStorage->activeSps->picWidthInMbs);
+ else
+ return(0);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdPicHeight
+
+ Functional description:
+ Get height of the picture in macroblocks
+
+ Inputs:
+ pStorage pointer to storage data structure
+
+ Outputs:
+ none
+
+ Returns:
+ picture width
+ 0 if parameters sets not yet activated
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdPicHeight(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ if (pStorage->activeSps)
+ return(pStorage->activeSps->picHeightInMbs);
+ else
+ return(0);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdFlushBuffer
+
+ Functional description:
+ Flush the decoded picture buffer, see dpb.c for details
+
+ Inputs:
+ pStorage pointer to storage data structure
+
+------------------------------------------------------------------------------*/
+
+void h264bsdFlushBuffer(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ h264bsdFlushDpb(pStorage->dpb);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCheckValidParamSets
+
+ Functional description:
+ Check if any valid parameter set combinations (SPS/PPS) exists.
+
+ Inputs:
+ pStorage pointer to storage structure
+
+ Returns:
+ 1 at least one valid SPS/PPS combination found
+ 0 no valid param set combinations found
+
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckValidParamSets(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ return(h264bsdValidParamSets(pStorage) == HANTRO_OK ? 1 : 0);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdVideoRange
+
+ Functional description:
+ Get value of video_full_range_flag received in the VUI data.
+
+ Inputs:
+ pStorage pointer to storage structure
+
+ Returns:
+ 1 video_full_range_flag received and value is 1
+ 0 otherwise
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdVideoRange(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ if (pStorage->activeSps && pStorage->activeSps->vuiParametersPresentFlag &&
+ pStorage->activeSps->vuiParameters &&
+ pStorage->activeSps->vuiParameters->videoSignalTypePresentFlag &&
+ pStorage->activeSps->vuiParameters->videoFullRangeFlag)
+ return(1);
+ else /* default value of video_full_range_flag is 0 */
+ return(0);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdMatrixCoefficients
+
+ Functional description:
+ Get value of matrix_coefficients received in the VUI data
+
+ Inputs:
+ pStorage pointer to storage structure
+
+ Outputs:
+ value of matrix_coefficients if received
+ 2 otherwise (this is the default value)
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdMatrixCoefficients(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ if (pStorage->activeSps && pStorage->activeSps->vuiParametersPresentFlag &&
+ pStorage->activeSps->vuiParameters &&
+ pStorage->activeSps->vuiParameters->videoSignalTypePresentFlag &&
+ pStorage->activeSps->vuiParameters->colourDescriptionPresentFlag)
+ return(pStorage->activeSps->vuiParameters->matrixCoefficients);
+ else /* default unspecified */
+ return(2);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: hh264bsdCroppingParams
+
+ Functional description:
+ Get cropping parameters of the active SPS
+
+ Inputs:
+ pStorage pointer to storage structure
+
+ Outputs:
+ croppingFlag flag indicating if cropping params present is
+ stored here
+ leftOffset cropping left offset in pixels is stored here
+ width width of the image after cropping is stored here
+ topOffset cropping top offset in pixels is stored here
+ height height of the image after cropping is stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdCroppingParams(storage_t *pStorage, u32 *croppingFlag,
+ u32 *leftOffset, u32 *width, u32 *topOffset, u32 *height)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ if (pStorage->activeSps && pStorage->activeSps->frameCroppingFlag)
+ {
+ *croppingFlag = 1;
+ *leftOffset = 2 * pStorage->activeSps->frameCropLeftOffset;
+ *width = 16 * pStorage->activeSps->picWidthInMbs -
+ 2 * (pStorage->activeSps->frameCropLeftOffset +
+ pStorage->activeSps->frameCropRightOffset);
+ *topOffset = 2 * pStorage->activeSps->frameCropTopOffset;
+ *height = 16 * pStorage->activeSps->picHeightInMbs -
+ 2 * (pStorage->activeSps->frameCropTopOffset +
+ pStorage->activeSps->frameCropBottomOffset);
+ }
+ else
+ {
+ *croppingFlag = 0;
+ *leftOffset = 0;
+ *width = 0;
+ *topOffset = 0;
+ *height = 0;
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdSampleAspectRatio
+
+ Functional description:
+ Get aspect ratio received in the VUI data
+
+ Inputs:
+ pStorage pointer to storage structure
+
+ Outputs:
+ sarWidth sample aspect ratio height
+ sarHeight sample aspect ratio width
+
+------------------------------------------------------------------------------*/
+
+void h264bsdSampleAspectRatio(storage_t *pStorage, u32 *sarWidth, u32 *sarHeight)
+{
+
+/* Variables */
+ u32 w = 1;
+ u32 h = 1;
+/* Code */
+
+ ASSERT(pStorage);
+
+
+ if (pStorage->activeSps &&
+ pStorage->activeSps->vuiParametersPresentFlag &&
+ pStorage->activeSps->vuiParameters &&
+ pStorage->activeSps->vuiParameters->aspectRatioPresentFlag )
+ {
+ switch (pStorage->activeSps->vuiParameters->aspectRatioIdc)
+ {
+ case ASPECT_RATIO_UNSPECIFIED: w = 0; h = 0; break;
+ case ASPECT_RATIO_1_1: w = 1; h = 1; break;
+ case ASPECT_RATIO_12_11: w = 12; h = 11; break;
+ case ASPECT_RATIO_10_11: w = 10; h = 11; break;
+ case ASPECT_RATIO_16_11: w = 16; h = 11; break;
+ case ASPECT_RATIO_40_33: w = 40; h = 33; break;
+ case ASPECT_RATIO_24_11: w = 24; h = 11; break;
+ case ASPECT_RATIO_20_11: w = 20; h = 11; break;
+ case ASPECT_RATIO_32_11: w = 32; h = 11; break;
+ case ASPECT_RATIO_80_33: w = 80; h = 33; break;
+ case ASPECT_RATIO_18_11: w = 18; h = 11; break;
+ case ASPECT_RATIO_15_11: w = 15; h = 11; break;
+ case ASPECT_RATIO_64_33: w = 64; h = 33; break;
+ case ASPECT_RATIO_160_99: w = 160; h = 99; break;
+ case ASPECT_RATIO_EXTENDED_SAR:
+ w = pStorage->activeSps->vuiParameters->sarWidth;
+ h = pStorage->activeSps->vuiParameters->sarHeight;
+ if ((w == 0) || (h == 0))
+ w = h = 0;
+ break;
+ default:
+ w = 0;
+ h = 0;
+ break;
+ }
+ }
+
+ /* set aspect ratio*/
+ *sarWidth = w;
+ *sarHeight = h;
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdProfile
+
+ Functional description:
+ Get profile information from active SPS
+
+ Inputs:
+ pStorage pointer to storage structure
+
+ Outputs:
+ profile current profile
+
+------------------------------------------------------------------------------*/
+u32 h264bsdProfile(storage_t *pStorage)
+{
+ if (pStorage->activeSps)
+ return pStorage->activeSps->profileIdc;
+ else
+ return 0;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.h
new file mode 100644
index 0000000..8336523
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_decoder.h
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_DECODER_H
+#define H264SWDEC_DECODER_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_storage.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/* enumerated return values of the functions */
+enum {
+ H264BSD_RDY,
+ H264BSD_PIC_RDY,
+ H264BSD_HDRS_RDY,
+ H264BSD_ERROR,
+ H264BSD_PARAM_SET_ERROR,
+ H264BSD_MEMALLOC_ERROR
+};
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdInit(storage_t *pStorage, u32 noOutputReordering);
+u32 h264bsdDecode(storage_t *pStorage, u8 *byteStrm, u32 len, u32 picId,
+ u32 *readBytes);
+void h264bsdShutdown(storage_t *pStorage);
+
+u8* h264bsdNextOutputPicture(storage_t *pStorage, u32 *picId, u32 *isIdrPic,
+ u32 *numErrMbs);
+
+u32 h264bsdPicWidth(storage_t *pStorage);
+u32 h264bsdPicHeight(storage_t *pStorage);
+u32 h264bsdVideoRange(storage_t *pStorage);
+u32 h264bsdMatrixCoefficients(storage_t *pStorage);
+void h264bsdCroppingParams(storage_t *pStorage, u32 *croppingFlag,
+ u32 *left, u32 *width, u32 *top, u32 *height);
+void h264bsdSampleAspectRatio(storage_t *pStorage,
+ u32 *sarWidth, u32 *sarHeight);
+u32 h264bsdCheckValidParamSets(storage_t *pStorage);
+
+void h264bsdFlushBuffer(storage_t *pStorage);
+
+u32 h264bsdProfile(storage_t *pStorage);
+
+#endif /* #ifdef H264SWDEC_DECODER_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.c
new file mode 100755
index 0000000..9517d0a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.c
@@ -0,0 +1,1584 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ ComparePictures
+ h264bsdReorderRefPicList
+ Mmcop1
+ Mmcop2
+ Mmcop3
+ Mmcop4
+ Mmcop5
+ Mmcop6
+ h264bsdMarkDecRefPic
+ h264bsdGetRefPicData
+ h264bsdAllocateDpbImage
+ SlidingWindowRefPicMarking
+ h264bsdInitDpb
+ h264bsdResetDpb
+ h264bsdInitRefPicList
+ FindDpbPic
+ SetPicNums
+ h264bsdCheckGapsInFrameNum
+ FindSmallestPicOrderCnt
+ OutputPicture
+ h264bsdDpbOutputPicture
+ h264bsdFlushDpb
+ h264bsdFreeDpb
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_cfg.h"
+#include "h264bsd_dpb.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_image.h"
+#include "h264bsd_util.h"
+#include "basetype.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* macros to determine picture status. Note that IS_SHORT_TERM macro returns
+ * true also for non-existing pictures because non-existing pictures are
+ * regarded short term pictures according to H.264 standard */
+#define IS_REFERENCE(a) ((a).status)
+#define IS_EXISTING(a) ((a).status > NON_EXISTING)
+#define IS_SHORT_TERM(a) \
+ ((a).status == NON_EXISTING || (a).status == SHORT_TERM)
+#define IS_LONG_TERM(a) ((a).status == LONG_TERM)
+
+/* macro to set a picture unused for reference */
+#define SET_UNUSED(a) (a).status = UNUSED;
+
+#define MAX_NUM_REF_IDX_L0_ACTIVE 16
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static i32 ComparePictures(const void *ptr1, const void *ptr2);
+
+static u32 Mmcop1(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums);
+
+static u32 Mmcop2(dpbStorage_t *dpb, u32 longTermPicNum);
+
+static u32 Mmcop3(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums,
+ u32 longTermFrameIdx);
+
+static u32 Mmcop4(dpbStorage_t *dpb, u32 maxLongTermFrameIdx);
+
+static u32 Mmcop5(dpbStorage_t *dpb);
+
+static u32 Mmcop6(dpbStorage_t *dpb, u32 frameNum, i32 picOrderCnt,
+ u32 longTermFrameIdx);
+
+static u32 SlidingWindowRefPicMarking(dpbStorage_t *dpb);
+
+static i32 FindDpbPic(dpbStorage_t *dpb, i32 picNum, u32 isShortTerm);
+
+static void SetPicNums(dpbStorage_t *dpb, u32 currFrameNum);
+
+static dpbPicture_t* FindSmallestPicOrderCnt(dpbStorage_t *dpb);
+
+static u32 OutputPicture(dpbStorage_t *dpb);
+
+static void ShellSort(dpbPicture_t *pPic, u32 num);
+
+/*------------------------------------------------------------------------------
+
+ Function: ComparePictures
+
+ Functional description:
+ Function to compare dpb pictures, used by the ShellSort() function.
+ Order of the pictures after sorting shall be as follows:
+ 1) short term reference pictures starting with the largest
+ picNum
+ 2) long term reference pictures starting with the smallest
+ longTermPicNum
+ 3) pictures unused for reference but needed for display
+ 4) other pictures
+
+ Returns:
+ -1 pic 1 is greater than pic 2
+ 0 equal from comparison point of view
+ 1 pic 2 is greater then pic 1
+
+------------------------------------------------------------------------------*/
+
+static i32 ComparePictures(const void *ptr1, const void *ptr2)
+{
+
+/* Variables */
+
+ dpbPicture_t *pic1, *pic2;
+
+/* Code */
+
+ ASSERT(ptr1);
+ ASSERT(ptr2);
+
+ pic1 = (dpbPicture_t*)ptr1;
+ pic2 = (dpbPicture_t*)ptr2;
+
+ /* both are non-reference pictures, check if needed for display */
+ if (!IS_REFERENCE(*pic1) && !IS_REFERENCE(*pic2))
+ {
+ if (pic1->toBeDisplayed && !pic2->toBeDisplayed)
+ return(-1);
+ else if (!pic1->toBeDisplayed && pic2->toBeDisplayed)
+ return(1);
+ else
+ return(0);
+ }
+ /* only pic 1 needed for reference -> greater */
+ else if (!IS_REFERENCE(*pic2))
+ return(-1);
+ /* only pic 2 needed for reference -> greater */
+ else if (!IS_REFERENCE(*pic1))
+ return(1);
+ /* both are short term reference pictures -> check picNum */
+ else if (IS_SHORT_TERM(*pic1) && IS_SHORT_TERM(*pic2))
+ {
+ if (pic1->picNum > pic2->picNum)
+ return(-1);
+ else if (pic1->picNum < pic2->picNum)
+ return(1);
+ else
+ return(0);
+ }
+ /* only pic 1 is short term -> greater */
+ else if (IS_SHORT_TERM(*pic1))
+ return(-1);
+ /* only pic 2 is short term -> greater */
+ else if (IS_SHORT_TERM(*pic2))
+ return(1);
+ /* both are long term reference pictures -> check picNum (contains the
+ * longTermPicNum */
+ else
+ {
+ if (pic1->picNum > pic2->picNum)
+ return(1);
+ else if (pic1->picNum < pic2->picNum)
+ return(-1);
+ else
+ return(0);
+ }
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdReorderRefPicList
+
+ Functional description:
+ Function to perform reference picture list reordering based on
+ reordering commands received in the slice header. See details
+ of the process in the H.264 standard.
+
+ Inputs:
+ dpb pointer to dpb storage structure
+ order pointer to reordering commands
+ currFrameNum current frame number
+ numRefIdxActive number of active reference indices for current
+ picture
+
+ Outputs:
+ dpb 'list' field of the structure reordered
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK if non-existing pictures referred to in the
+ reordering commands
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdReorderRefPicList(
+ dpbStorage_t *dpb,
+ refPicListReordering_t *order,
+ u32 currFrameNum,
+ u32 numRefIdxActive)
+{
+
+/* Variables */
+
+ u32 i, j, k, picNumPred, refIdx;
+ i32 picNum, picNumNoWrap, index;
+ u32 isShortTerm;
+
+/* Code */
+
+ ASSERT(order);
+ ASSERT(currFrameNum <= dpb->maxFrameNum);
+ ASSERT(numRefIdxActive <= MAX_NUM_REF_IDX_L0_ACTIVE);
+
+ /* set dpb picture numbers for sorting */
+ SetPicNums(dpb, currFrameNum);
+
+ if (!order->refPicListReorderingFlagL0)
+ return(HANTRO_OK);
+
+ refIdx = 0;
+ picNumPred = currFrameNum;
+
+ i = 0;
+ while (order->command[i].reorderingOfPicNumsIdc < 3)
+ {
+ /* short term */
+ if (order->command[i].reorderingOfPicNumsIdc < 2)
+ {
+ if (order->command[i].reorderingOfPicNumsIdc == 0)
+ {
+ picNumNoWrap =
+ (i32)picNumPred - (i32)order->command[i].absDiffPicNum;
+ if (picNumNoWrap < 0)
+ picNumNoWrap += (i32)dpb->maxFrameNum;
+ }
+ else
+ {
+ picNumNoWrap =
+ (i32)(picNumPred + order->command[i].absDiffPicNum);
+ if (picNumNoWrap >= (i32)dpb->maxFrameNum)
+ picNumNoWrap -= (i32)dpb->maxFrameNum;
+ }
+ picNumPred = (u32)picNumNoWrap;
+ picNum = picNumNoWrap;
+ if ((u32)picNumNoWrap > currFrameNum)
+ picNum -= (i32)dpb->maxFrameNum;
+ isShortTerm = HANTRO_TRUE;
+ }
+ /* long term */
+ else
+ {
+ picNum = (i32)order->command[i].longTermPicNum;
+ isShortTerm = HANTRO_FALSE;
+
+ }
+ /* find corresponding picture from dpb */
+ index = FindDpbPic(dpb, picNum, isShortTerm);
+ if (index < 0 || !IS_EXISTING(dpb->buffer[index]))
+ return(HANTRO_NOK);
+
+ /* shift pictures */
+ for (j = numRefIdxActive; j > refIdx; j--)
+ dpb->list[j] = dpb->list[j-1];
+ /* put picture into the list */
+ dpb->list[refIdx++] = &dpb->buffer[index];
+ /* remove later references to the same picture */
+ for (j = k = refIdx; j <= numRefIdxActive; j++)
+ if(dpb->list[j] != &dpb->buffer[index])
+ dpb->list[k++] = dpb->list[j];
+
+ i++;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Mmcop1
+
+ Functional description:
+ Function to mark a short-term reference picture unused for
+ reference, memory_management_control_operation equal to 1
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure, picture does not exist in the buffer
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop1(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums)
+{
+
+/* Variables */
+
+ i32 index, picNum;
+
+/* Code */
+
+ ASSERT(currPicNum < dpb->maxFrameNum);
+
+ picNum = (i32)currPicNum - (i32)differenceOfPicNums;
+
+ index = FindDpbPic(dpb, picNum, HANTRO_TRUE);
+ if (index < 0)
+ return(HANTRO_NOK);
+
+ SET_UNUSED(dpb->buffer[index]);
+ dpb->numRefFrames--;
+ if (!dpb->buffer[index].toBeDisplayed)
+ dpb->fullness--;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Mmcop2
+
+ Functional description:
+ Function to mark a long-term reference picture unused for
+ reference, memory_management_control_operation equal to 2
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure, picture does not exist in the buffer
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop2(dpbStorage_t *dpb, u32 longTermPicNum)
+{
+
+/* Variables */
+
+ i32 index;
+
+/* Code */
+
+ index = FindDpbPic(dpb, (i32)longTermPicNum, HANTRO_FALSE);
+ if (index < 0)
+ return(HANTRO_NOK);
+
+ SET_UNUSED(dpb->buffer[index]);
+ dpb->numRefFrames--;
+ if (!dpb->buffer[index].toBeDisplayed)
+ dpb->fullness--;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Mmcop3
+
+ Functional description:
+ Function to assing a longTermFrameIdx to a short-term reference
+ frame (i.e. to change it to a long-term reference picture),
+ memory_management_control_operation equal to 3
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure, short-term picture does not exist in the
+ buffer or is a non-existing picture, or invalid
+ longTermFrameIdx given
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop3(dpbStorage_t *dpb, u32 currPicNum, u32 differenceOfPicNums,
+ u32 longTermFrameIdx)
+{
+
+/* Variables */
+
+ i32 index, picNum;
+ u32 i;
+
+/* Code */
+
+ ASSERT(dpb);
+ ASSERT(currPicNum < dpb->maxFrameNum);
+
+ if ( (dpb->maxLongTermFrameIdx == NO_LONG_TERM_FRAME_INDICES) ||
+ (longTermFrameIdx > dpb->maxLongTermFrameIdx) )
+ return(HANTRO_NOK);
+
+ /* check if a long term picture with the same longTermFrameIdx already
+ * exist and remove it if necessary */
+ for (i = 0; i < dpb->maxRefFrames; i++)
+ if (IS_LONG_TERM(dpb->buffer[i]) &&
+ (u32)dpb->buffer[i].picNum == longTermFrameIdx)
+ {
+ SET_UNUSED(dpb->buffer[i]);
+ dpb->numRefFrames--;
+ if (!dpb->buffer[i].toBeDisplayed)
+ dpb->fullness--;
+ break;
+ }
+
+ picNum = (i32)currPicNum - (i32)differenceOfPicNums;
+
+ index = FindDpbPic(dpb, picNum, HANTRO_TRUE);
+ if (index < 0)
+ return(HANTRO_NOK);
+ if (!IS_EXISTING(dpb->buffer[index]))
+ return(HANTRO_NOK);
+
+ dpb->buffer[index].status = LONG_TERM;
+ dpb->buffer[index].picNum = (i32)longTermFrameIdx;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Mmcop4
+
+ Functional description:
+ Function to set maxLongTermFrameIdx,
+ memory_management_control_operation equal to 4
+
+ Returns:
+ HANTRO_OK success
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop4(dpbStorage_t *dpb, u32 maxLongTermFrameIdx)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ dpb->maxLongTermFrameIdx = maxLongTermFrameIdx;
+
+ for (i = 0; i < dpb->maxRefFrames; i++)
+ if (IS_LONG_TERM(dpb->buffer[i]) &&
+ ( ((u32)dpb->buffer[i].picNum > maxLongTermFrameIdx) ||
+ (dpb->maxLongTermFrameIdx == NO_LONG_TERM_FRAME_INDICES) ) )
+ {
+ SET_UNUSED(dpb->buffer[i]);
+ dpb->numRefFrames--;
+ if (!dpb->buffer[i].toBeDisplayed)
+ dpb->fullness--;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Mmcop5
+
+ Functional description:
+ Function to mark all reference pictures unused for reference and
+ set maxLongTermFrameIdx to NO_LONG_TERM_FRAME_INDICES,
+ memory_management_control_operation equal to 5. Function flushes
+ the buffer and places all pictures that are needed for display into
+ the output buffer.
+
+ Returns:
+ HANTRO_OK success
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop5(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ for (i = 0; i < 16; i++)
+ {
+ if (IS_REFERENCE(dpb->buffer[i]))
+ {
+ SET_UNUSED(dpb->buffer[i]);
+ if (!dpb->buffer[i].toBeDisplayed)
+ dpb->fullness--;
+ }
+ }
+
+ /* output all pictures */
+ while (OutputPicture(dpb) == HANTRO_OK)
+ ;
+ dpb->numRefFrames = 0;
+ dpb->maxLongTermFrameIdx = NO_LONG_TERM_FRAME_INDICES;
+ dpb->prevRefFrameNum = 0;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Mmcop6
+
+ Functional description:
+ Function to assign longTermFrameIdx to the current picture,
+ memory_management_control_operation equal to 6
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid longTermFrameIdx or no room for current
+ picture in the buffer
+
+------------------------------------------------------------------------------*/
+
+static u32 Mmcop6(dpbStorage_t *dpb, u32 frameNum, i32 picOrderCnt,
+ u32 longTermFrameIdx)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ ASSERT(frameNum < dpb->maxFrameNum);
+
+ if ( (dpb->maxLongTermFrameIdx == NO_LONG_TERM_FRAME_INDICES) ||
+ (longTermFrameIdx > dpb->maxLongTermFrameIdx) )
+ return(HANTRO_NOK);
+
+ /* check if a long term picture with the same longTermFrameIdx already
+ * exist and remove it if necessary */
+ for (i = 0; i < dpb->maxRefFrames; i++)
+ if (IS_LONG_TERM(dpb->buffer[i]) &&
+ (u32)dpb->buffer[i].picNum == longTermFrameIdx)
+ {
+ SET_UNUSED(dpb->buffer[i]);
+ dpb->numRefFrames--;
+ if (!dpb->buffer[i].toBeDisplayed)
+ dpb->fullness--;
+ break;
+ }
+
+ if (dpb->numRefFrames < dpb->maxRefFrames)
+ {
+ dpb->currentOut->frameNum = frameNum;
+ dpb->currentOut->picNum = (i32)longTermFrameIdx;
+ dpb->currentOut->picOrderCnt = picOrderCnt;
+ dpb->currentOut->status = LONG_TERM;
+ if (dpb->noReordering)
+ dpb->currentOut->toBeDisplayed = HANTRO_FALSE;
+ else
+ dpb->currentOut->toBeDisplayed = HANTRO_TRUE;
+ dpb->numRefFrames++;
+ dpb->fullness++;
+ return(HANTRO_OK);
+ }
+ /* if there is no room, return an error */
+ else
+ return(HANTRO_NOK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdMarkDecRefPic
+
+ Functional description:
+ Function to perform reference picture marking process. This
+ function should be called both for reference and non-reference
+ pictures. Non-reference pictures shall have mark pointer set to
+ NULL.
+
+ Inputs:
+ dpb pointer to the DPB data structure
+ mark pointer to reference picture marking commands
+ image pointer to current picture to be placed in the buffer
+ frameNum frame number of the current picture
+ picOrderCnt picture order count for the current picture
+ isIdr flag to indicate if the current picture is an
+ IDR picture
+ currentPicId identifier for the current picture, from the
+ application, stored along with the picture
+ numErrMbs number of concealed macroblocks in the current
+ picture, stored along with the picture
+
+ Outputs:
+ dpb 'buffer' modified, possible output frames placed into
+ 'outBuf'
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdMarkDecRefPic(
+ dpbStorage_t *dpb,
+ decRefPicMarking_t *mark,
+ image_t *image,
+ u32 frameNum,
+ i32 picOrderCnt,
+ u32 isIdr,
+ u32 currentPicId,
+ u32 numErrMbs)
+{
+
+/* Variables */
+
+ u32 i, status;
+ u32 markedAsLongTerm;
+ u32 toBeDisplayed;
+
+/* Code */
+
+ ASSERT(dpb);
+ ASSERT(mark || !isIdr);
+ ASSERT(!isIdr || (frameNum == 0 && picOrderCnt == 0));
+ ASSERT(frameNum < dpb->maxFrameNum);
+
+ if (image->data != dpb->currentOut->data)
+ {
+ EPRINT("TRYING TO MARK NON-ALLOCATED IMAGE");
+ return(HANTRO_NOK);
+ }
+
+ dpb->lastContainsMmco5 = HANTRO_FALSE;
+ status = HANTRO_OK;
+
+ toBeDisplayed = dpb->noReordering ? HANTRO_FALSE : HANTRO_TRUE;
+
+ /* non-reference picture, stored for display reordering purposes */
+ if (mark == NULL)
+ {
+ dpb->currentOut->status = UNUSED;
+ dpb->currentOut->frameNum = frameNum;
+ dpb->currentOut->picNum = (i32)frameNum;
+ dpb->currentOut->picOrderCnt = picOrderCnt;
+ dpb->currentOut->toBeDisplayed = toBeDisplayed;
+ if (!dpb->noReordering)
+ dpb->fullness++;
+ }
+ /* IDR picture */
+ else if (isIdr)
+ {
+
+ /* h264bsdCheckGapsInFrameNum not called for IDR pictures -> have to
+ * reset numOut and outIndex here */
+ dpb->numOut = dpb->outIndex = 0;
+
+ /* flush the buffer */
+ Mmcop5(dpb);
+ /* if noOutputOfPriorPicsFlag was set -> the pictures preceding the
+ * IDR picture shall not be output -> set output buffer empty */
+ if (mark->noOutputOfPriorPicsFlag || dpb->noReordering)
+ {
+ dpb->numOut = 0;
+ dpb->outIndex = 0;
+ }
+
+ if (mark->longTermReferenceFlag)
+ {
+ dpb->currentOut->status = LONG_TERM;
+ dpb->maxLongTermFrameIdx = 0;
+ }
+ else
+ {
+ dpb->currentOut->status = SHORT_TERM;
+ dpb->maxLongTermFrameIdx = NO_LONG_TERM_FRAME_INDICES;
+ }
+ dpb->currentOut->frameNum = 0;
+ dpb->currentOut->picNum = 0;
+ dpb->currentOut->picOrderCnt = 0;
+ dpb->currentOut->toBeDisplayed = toBeDisplayed;
+ dpb->fullness = 1;
+ dpb->numRefFrames = 1;
+ }
+ /* reference picture */
+ else
+ {
+ markedAsLongTerm = HANTRO_FALSE;
+ if (mark->adaptiveRefPicMarkingModeFlag)
+ {
+ i = 0;
+ while (mark->operation[i].memoryManagementControlOperation)
+ {
+ switch (mark->operation[i].memoryManagementControlOperation)
+ {
+ case 1:
+ status = Mmcop1(
+ dpb,
+ frameNum,
+ mark->operation[i].differenceOfPicNums);
+ break;
+
+ case 2:
+ status = Mmcop2(dpb, mark->operation[i].longTermPicNum);
+ break;
+
+ case 3:
+ status = Mmcop3(
+ dpb,
+ frameNum,
+ mark->operation[i].differenceOfPicNums,
+ mark->operation[i].longTermFrameIdx);
+ break;
+
+ case 4:
+ status = Mmcop4(
+ dpb,
+ mark->operation[i].maxLongTermFrameIdx);
+ break;
+
+ case 5:
+ status = Mmcop5(dpb);
+ dpb->lastContainsMmco5 = HANTRO_TRUE;
+ frameNum = 0;
+ break;
+
+ case 6:
+ status = Mmcop6(
+ dpb,
+ frameNum,
+ picOrderCnt,
+ mark->operation[i].longTermFrameIdx);
+ if (status == HANTRO_OK)
+ markedAsLongTerm = HANTRO_TRUE;
+ break;
+
+ default: /* invalid memory management control operation */
+ status = HANTRO_NOK;
+ break;
+ }
+ if (status != HANTRO_OK)
+ {
+ break;
+ }
+ i++;
+ }
+ }
+ else
+ {
+ status = SlidingWindowRefPicMarking(dpb);
+ }
+ /* if current picture was not marked as long-term reference by
+ * memory management control operation 6 -> mark current as short
+ * term and insert it into dpb (if there is room) */
+ if (!markedAsLongTerm)
+ {
+ if (dpb->numRefFrames < dpb->maxRefFrames)
+ {
+ dpb->currentOut->frameNum = frameNum;
+ dpb->currentOut->picNum = (i32)frameNum;
+ dpb->currentOut->picOrderCnt = picOrderCnt;
+ dpb->currentOut->status = SHORT_TERM;
+ dpb->currentOut->toBeDisplayed = toBeDisplayed;
+ dpb->fullness++;
+ dpb->numRefFrames++;
+ }
+ /* no room */
+ else
+ {
+ status = HANTRO_NOK;
+ }
+ }
+ }
+
+ dpb->currentOut->isIdr = isIdr;
+ dpb->currentOut->picId = currentPicId;
+ dpb->currentOut->numErrMbs = numErrMbs;
+
+ /* dpb was initialized to not to reorder the pictures -> output current
+ * picture immediately */
+ if (dpb->noReordering)
+ {
+ ASSERT(dpb->numOut == 0);
+ ASSERT(dpb->outIndex == 0);
+ dpb->outBuf[dpb->numOut].data = dpb->currentOut->data;
+ dpb->outBuf[dpb->numOut].isIdr = dpb->currentOut->isIdr;
+ dpb->outBuf[dpb->numOut].picId = dpb->currentOut->picId;
+ dpb->outBuf[dpb->numOut].numErrMbs = dpb->currentOut->numErrMbs;
+ dpb->numOut++;
+ }
+ else
+ {
+ /* output pictures if buffer full */
+ while (dpb->fullness > dpb->dpbSize)
+ {
+ i = OutputPicture(dpb);
+ ASSERT(i == HANTRO_OK);
+ }
+ }
+
+ /* sort dpb */
+ ShellSort(dpb->buffer, dpb->dpbSize+1);
+
+ return(status);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdGetRefPicData
+
+ Functional description:
+ Function to get reference picture data from the reference picture
+ list
+
+ Returns:
+ pointer to desired reference picture data
+ NULL if invalid index or non-existing picture referred
+
+------------------------------------------------------------------------------*/
+
+u8* h264bsdGetRefPicData(dpbStorage_t *dpb, u32 index)
+{
+
+/* Variables */
+
+/* Code */
+
+ if(index > 16 || dpb->list[index] == NULL)
+ return(NULL);
+ else if(!IS_EXISTING(*dpb->list[index]))
+ return(NULL);
+ else
+ return(dpb->list[index]->data);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdAllocateDpbImage
+
+ Functional description:
+ function to allocate memory for a image. This function does not
+ really allocate any memory but reserves one of the buffer
+ positions for decoding of current picture
+
+ Returns:
+ pointer to memory area for the image
+
+
+------------------------------------------------------------------------------*/
+
+u8* h264bsdAllocateDpbImage(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT( !dpb->buffer[dpb->dpbSize].toBeDisplayed &&
+ !IS_REFERENCE(dpb->buffer[dpb->dpbSize]) );
+ ASSERT(dpb->fullness <= dpb->dpbSize);
+
+ dpb->currentOut = dpb->buffer + dpb->dpbSize;
+
+ return(dpb->currentOut->data);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: SlidingWindowRefPicMarking
+
+ Functional description:
+ Function to perform sliding window refence picture marking process.
+
+ Outputs:
+ HANTRO_OK success
+ HANTRO_NOK failure, no short-term reference frame found that
+ could be marked unused
+
+
+------------------------------------------------------------------------------*/
+
+static u32 SlidingWindowRefPicMarking(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+ i32 index, picNum;
+ u32 i;
+
+/* Code */
+
+ if (dpb->numRefFrames < dpb->maxRefFrames)
+ {
+ return(HANTRO_OK);
+ }
+ else
+ {
+ index = -1;
+ picNum = 0;
+ /* find the oldest short term picture */
+ for (i = 0; i < dpb->numRefFrames; i++)
+ if (IS_SHORT_TERM(dpb->buffer[i]))
+ if (dpb->buffer[i].picNum < picNum || index == -1)
+ {
+ index = (i32)i;
+ picNum = dpb->buffer[i].picNum;
+ }
+ if (index >= 0)
+ {
+ SET_UNUSED(dpb->buffer[index]);
+ dpb->numRefFrames--;
+ if (!dpb->buffer[index].toBeDisplayed)
+ dpb->fullness--;
+
+ return(HANTRO_OK);
+ }
+ }
+
+ return(HANTRO_NOK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInitDpb
+
+ Functional description:
+ Function to initialize DPB. Reserves memories for the buffer,
+ reference picture list and output buffer. dpbSize indicates
+ the maximum DPB size indicated by the levelIdc in the stream.
+ If noReordering flag is FALSE the DPB stores dpbSize pictures
+ for display reordering purposes. On the other hand, if the
+ flag is TRUE the DPB only stores maxRefFrames reference pictures
+ and outputs all the pictures immediately.
+
+ Inputs:
+ picSizeInMbs picture size in macroblocks
+ dpbSize size of the DPB (number of pictures)
+ maxRefFrames max number of reference frames
+ maxFrameNum max frame number
+ noReordering flag to indicate that DPB does not have to
+ prepare to reorder frames for display
+
+ Outputs:
+ dpb pointer to dpb data storage
+
+ Returns:
+ HANTRO_OK success
+ MEMORY_ALLOCATION_ERROR if memory allocation failed
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdInitDpb(
+ dpbStorage_t *dpb,
+ u32 picSizeInMbs,
+ u32 dpbSize,
+ u32 maxRefFrames,
+ u32 maxFrameNum,
+ u32 noReordering)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ ASSERT(picSizeInMbs);
+ ASSERT(maxRefFrames <= MAX_NUM_REF_PICS);
+ ASSERT(maxRefFrames <= dpbSize);
+ ASSERT(maxFrameNum);
+ ASSERT(dpbSize);
+
+ dpb->maxLongTermFrameIdx = NO_LONG_TERM_FRAME_INDICES;
+ dpb->maxRefFrames = MAX(maxRefFrames, 1);
+ if (noReordering)
+ dpb->dpbSize = dpb->maxRefFrames;
+ else
+ dpb->dpbSize = dpbSize;
+ dpb->maxFrameNum = maxFrameNum;
+ dpb->noReordering = noReordering;
+ dpb->fullness = 0;
+ dpb->numRefFrames = 0;
+ dpb->prevRefFrameNum = 0;
+
+ ALLOCATE(dpb->buffer, MAX_NUM_REF_IDX_L0_ACTIVE + 1, dpbPicture_t);
+ if (dpb->buffer == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+ H264SwDecMemset(dpb->buffer, 0,
+ (MAX_NUM_REF_IDX_L0_ACTIVE + 1)*sizeof(dpbPicture_t));
+ for (i = 0; i < dpb->dpbSize + 1; i++)
+ {
+ /* Allocate needed amount of memory, which is:
+ * image size + 32 + 15, where 32 cames from the fact that in ARM OpenMax
+ * DL implementation Functions may read beyond the end of an array,
+ * by a maximum of 32 bytes. And +15 cames for the need to align memory
+ * to 16-byte boundary */
+ ALLOCATE(dpb->buffer[i].pAllocatedData, (picSizeInMbs*384 + 32+15), u8);
+ if (dpb->buffer[i].pAllocatedData == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+
+ dpb->buffer[i].data = ALIGN(dpb->buffer[i].pAllocatedData, 16);
+ }
+
+ ALLOCATE(dpb->list, MAX_NUM_REF_IDX_L0_ACTIVE + 1, dpbPicture_t*);
+ ALLOCATE(dpb->outBuf, dpb->dpbSize+1, dpbOutPicture_t);
+
+ if (dpb->list == NULL || dpb->outBuf == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+
+ H264SwDecMemset(dpb->list, 0,
+ ((MAX_NUM_REF_IDX_L0_ACTIVE + 1) * sizeof(dpbPicture_t*)) );
+
+ dpb->numOut = dpb->outIndex = 0;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdResetDpb
+
+ Functional description:
+ Function to reset DPB. This function should be called when an IDR
+ slice (other than the first) activates new sequence parameter set.
+ Function calls h264bsdFreeDpb to free old allocated memories and
+ h264bsdInitDpb to re-initialize the DPB. Same inputs, outputs and
+ returns as for h264bsdInitDpb.
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdResetDpb(
+ dpbStorage_t *dpb,
+ u32 picSizeInMbs,
+ u32 dpbSize,
+ u32 maxRefFrames,
+ u32 maxFrameNum,
+ u32 noReordering)
+{
+
+/* Code */
+
+ ASSERT(picSizeInMbs);
+ ASSERT(maxRefFrames <= MAX_NUM_REF_PICS);
+ ASSERT(maxRefFrames <= dpbSize);
+ ASSERT(maxFrameNum);
+ ASSERT(dpbSize);
+
+ h264bsdFreeDpb(dpb);
+
+ return h264bsdInitDpb(dpb, picSizeInMbs, dpbSize, maxRefFrames,
+ maxFrameNum, noReordering);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInitRefPicList
+
+ Functional description:
+ Function to initialize reference picture list. Function just
+ sets pointers in the list according to pictures in the buffer.
+ The buffer is assumed to contain pictures sorted according to
+ what the H.264 standard says about initial reference picture list.
+
+ Inputs:
+ dpb pointer to dpb data structure
+
+ Outputs:
+ dpb 'list' field initialized
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInitRefPicList(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ for (i = 0; i < dpb->numRefFrames; i++)
+ dpb->list[i] = &dpb->buffer[i];
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: FindDpbPic
+
+ Functional description:
+ Function to find a reference picture from the buffer. The picture
+ to be found is identified by picNum and isShortTerm flag.
+
+ Returns:
+ index of the picture in the buffer
+ -1 if the specified picture was not found in the buffer
+
+------------------------------------------------------------------------------*/
+
+static i32 FindDpbPic(dpbStorage_t *dpb, i32 picNum, u32 isShortTerm)
+{
+
+/* Variables */
+
+ u32 i = 0;
+ u32 found = HANTRO_FALSE;
+
+/* Code */
+
+ if (isShortTerm)
+ {
+ while (i < dpb->maxRefFrames && !found)
+ {
+ if (IS_SHORT_TERM(dpb->buffer[i]) &&
+ dpb->buffer[i].picNum == picNum)
+ found = HANTRO_TRUE;
+ else
+ i++;
+ }
+ }
+ else
+ {
+ ASSERT(picNum >= 0);
+ while (i < dpb->maxRefFrames && !found)
+ {
+ if (IS_LONG_TERM(dpb->buffer[i]) &&
+ dpb->buffer[i].picNum == picNum)
+ found = HANTRO_TRUE;
+ else
+ i++;
+ }
+ }
+
+ if (found)
+ return((i32)i);
+ else
+ return(-1);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: SetPicNums
+
+ Functional description:
+ Function to set picNum values for short-term pictures in the
+ buffer. Numbering of pictures is based on frame numbers and as
+ frame numbers are modulo maxFrameNum -> frame numbers of older
+ pictures in the buffer may be bigger than the currFrameNum.
+ picNums will be set so that current frame has the largest picNum
+ and all the short-term frames in the buffer will get smaller picNum
+ representing their "distance" from the current frame. This
+ function kind of maps the modulo arithmetic back to normal.
+
+------------------------------------------------------------------------------*/
+
+static void SetPicNums(dpbStorage_t *dpb, u32 currFrameNum)
+{
+
+/* Variables */
+
+ u32 i;
+ i32 frameNumWrap;
+
+/* Code */
+
+ ASSERT(dpb);
+ ASSERT(currFrameNum < dpb->maxFrameNum);
+
+ for (i = 0; i < dpb->numRefFrames; i++)
+ if (IS_SHORT_TERM(dpb->buffer[i]))
+ {
+ if (dpb->buffer[i].frameNum > currFrameNum)
+ frameNumWrap =
+ (i32)dpb->buffer[i].frameNum - (i32)dpb->maxFrameNum;
+ else
+ frameNumWrap = (i32)dpb->buffer[i].frameNum;
+ dpb->buffer[i].picNum = frameNumWrap;
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCheckGapsInFrameNum
+
+ Functional description:
+ Function to check gaps in frame_num and generate non-existing
+ (short term) reference pictures if necessary. This function should
+ be called only for non-IDR pictures.
+
+ Inputs:
+ dpb pointer to dpb data structure
+ frameNum frame number of the current picture
+ isRefPic flag to indicate if current picture is a reference or
+ non-reference picture
+ gapsAllowed Flag which indicates active SPS stance on whether
+ to allow gaps
+
+ Outputs:
+ dpb 'buffer' possibly modified by inserting non-existing
+ pictures with sliding window marking process
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK error in sliding window reference picture marking or
+ frameNum equal to previous reference frame used for
+ a reference picture
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckGapsInFrameNum(dpbStorage_t *dpb, u32 frameNum, u32 isRefPic,
+ u32 gapsAllowed)
+{
+
+/* Variables */
+
+ u32 unUsedShortTermFrameNum;
+ u8 *tmp;
+
+/* Code */
+
+ ASSERT(dpb);
+ ASSERT(dpb->fullness <= dpb->dpbSize);
+ ASSERT(frameNum < dpb->maxFrameNum);
+
+ dpb->numOut = 0;
+ dpb->outIndex = 0;
+
+ if(!gapsAllowed)
+ return(HANTRO_OK);
+
+ if ( (frameNum != dpb->prevRefFrameNum) &&
+ (frameNum != ((dpb->prevRefFrameNum + 1) % dpb->maxFrameNum)))
+ {
+
+ unUsedShortTermFrameNum = (dpb->prevRefFrameNum + 1) % dpb->maxFrameNum;
+
+ /* store data pointer of last buffer position to be used as next
+ * "allocated" data pointer if last buffer position after this process
+ * contains data pointer located in outBuf (buffer placed in the output
+ * shall not be overwritten by the current picture) */
+ tmp = dpb->buffer[dpb->dpbSize].data;
+ do
+ {
+ SetPicNums(dpb, unUsedShortTermFrameNum);
+
+ if (SlidingWindowRefPicMarking(dpb) != HANTRO_OK)
+ {
+ return(HANTRO_NOK);
+ }
+
+ /* output pictures if buffer full */
+ while (dpb->fullness >= dpb->dpbSize)
+ {
+#ifdef _ASSERT_USED
+ ASSERT(!dpb->noReordering);
+ ASSERT(OutputPicture(dpb) == HANTRO_OK);
+#else
+ OutputPicture(dpb);
+#endif
+ }
+
+ /* add to end of list */
+ ASSERT( !dpb->buffer[dpb->dpbSize].toBeDisplayed &&
+ !IS_REFERENCE(dpb->buffer[dpb->dpbSize]) );
+ dpb->buffer[dpb->dpbSize].status = NON_EXISTING;
+ dpb->buffer[dpb->dpbSize].frameNum = unUsedShortTermFrameNum;
+ dpb->buffer[dpb->dpbSize].picNum = (i32)unUsedShortTermFrameNum;
+ dpb->buffer[dpb->dpbSize].picOrderCnt = 0;
+ dpb->buffer[dpb->dpbSize].toBeDisplayed = HANTRO_FALSE;
+ dpb->fullness++;
+ dpb->numRefFrames++;
+
+ /* sort the buffer */
+ ShellSort(dpb->buffer, dpb->dpbSize+1);
+
+ unUsedShortTermFrameNum = (unUsedShortTermFrameNum + 1) %
+ dpb->maxFrameNum;
+
+ } while (unUsedShortTermFrameNum != frameNum);
+
+ /* pictures placed in output buffer -> check that 'data' in
+ * buffer position dpbSize is not in the output buffer (this will be
+ * "allocated" by h264bsdAllocateDpbImage). If it is -> exchange data
+ * pointer with the one stored in the beginning */
+ if (dpb->numOut)
+ {
+ u32 i;
+
+ for (i = 0; i < dpb->numOut; i++)
+ {
+ if (dpb->outBuf[i].data == dpb->buffer[dpb->dpbSize].data)
+ {
+ /* find buffer position containing data pointer stored in
+ * tmp */
+ for (i = 0; i < dpb->dpbSize; i++)
+ {
+ if (dpb->buffer[i].data == tmp)
+ {
+ dpb->buffer[i].data =
+ dpb->buffer[dpb->dpbSize].data;
+ dpb->buffer[dpb->dpbSize].data = tmp;
+ break;
+ }
+ }
+ ASSERT(i < dpb->dpbSize);
+ break;
+ }
+ }
+ }
+ }
+ /* frameNum for reference pictures shall not be the same as for previous
+ * reference picture, otherwise accesses to pictures in the buffer cannot
+ * be solved unambiguously */
+ else if (isRefPic && frameNum == dpb->prevRefFrameNum)
+ {
+ return(HANTRO_NOK);
+ }
+
+ /* save current frame_num in prevRefFrameNum. For non-reference frame
+ * prevFrameNum is set to frame number of last non-existing frame above */
+ if (isRefPic)
+ dpb->prevRefFrameNum = frameNum;
+ else if (frameNum != dpb->prevRefFrameNum)
+ {
+ dpb->prevRefFrameNum =
+ (frameNum + dpb->maxFrameNum - 1) % dpb->maxFrameNum;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: FindSmallestPicOrderCnt
+
+ Functional description:
+ Function to find picture with smallest picture order count. This
+ will be the next picture in display order.
+
+ Returns:
+ pointer to the picture, NULL if no pictures to be displayed
+
+------------------------------------------------------------------------------*/
+
+dpbPicture_t* FindSmallestPicOrderCnt(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+ u32 i;
+ i32 picOrderCnt;
+ dpbPicture_t *tmp;
+
+/* Code */
+
+ ASSERT(dpb);
+
+ picOrderCnt = 0x7FFFFFFF;
+ tmp = NULL;
+
+ for (i = 0; i <= dpb->dpbSize; i++)
+ {
+ if (dpb->buffer[i].toBeDisplayed &&
+ (dpb->buffer[i].picOrderCnt < picOrderCnt))
+ {
+ tmp = dpb->buffer + i;
+ picOrderCnt = dpb->buffer[i].picOrderCnt;
+ }
+ }
+
+ return(tmp);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: OutputPicture
+
+ Functional description:
+ Function to put next display order picture into the output buffer.
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK no pictures to display
+
+------------------------------------------------------------------------------*/
+
+u32 OutputPicture(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+ dpbPicture_t *tmp;
+
+/* Code */
+
+ ASSERT(dpb);
+
+ if (dpb->noReordering)
+ return(HANTRO_NOK);
+
+ tmp = FindSmallestPicOrderCnt(dpb);
+
+ /* no pictures to be displayed */
+ if (tmp == NULL)
+ return(HANTRO_NOK);
+
+ dpb->outBuf[dpb->numOut].data = tmp->data;
+ dpb->outBuf[dpb->numOut].isIdr = tmp->isIdr;
+ dpb->outBuf[dpb->numOut].picId = tmp->picId;
+ dpb->outBuf[dpb->numOut].numErrMbs = tmp->numErrMbs;
+ dpb->numOut++;
+
+ tmp->toBeDisplayed = HANTRO_FALSE;
+ if (!IS_REFERENCE(*tmp))
+ {
+ dpb->fullness--;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdDpbOutputPicture
+
+ Functional description:
+ Function to get next display order picture from the output buffer.
+
+ Return:
+ pointer to output picture structure, NULL if no pictures to
+ display
+
+------------------------------------------------------------------------------*/
+
+dpbOutPicture_t* h264bsdDpbOutputPicture(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(dpb);
+
+ if (dpb->outIndex < dpb->numOut)
+ return(dpb->outBuf + dpb->outIndex++);
+ else
+ return(NULL);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdFlushDpb
+
+ Functional description:
+ Function to flush the DPB. Function puts all pictures needed for
+ display into the output buffer. This function shall be called in
+ the end of the stream to obtain pictures buffered for display
+ re-ordering purposes.
+
+------------------------------------------------------------------------------*/
+
+void h264bsdFlushDpb(dpbStorage_t *dpb)
+{
+
+ /* don't do anything if buffer not reserved */
+ if (dpb->buffer)
+ {
+ dpb->flushed = 1;
+ /* output all pictures */
+ while (OutputPicture(dpb) == HANTRO_OK)
+ ;
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdFreeDpb
+
+ Functional description:
+ Function to free memories reserved for the DPB.
+
+------------------------------------------------------------------------------*/
+
+void h264bsdFreeDpb(dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ ASSERT(dpb);
+
+ if (dpb->buffer)
+ {
+ for (i = 0; i < dpb->dpbSize+1; i++)
+ {
+ FREE(dpb->buffer[i].pAllocatedData);
+ }
+ }
+ FREE(dpb->buffer);
+ FREE(dpb->list);
+ FREE(dpb->outBuf);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: ShellSort
+
+ Functional description:
+ Sort pictures in the buffer. Function implements Shell's method,
+ i.e. diminishing increment sort. See e.g. "Numerical Recipes in C"
+ for more information.
+
+------------------------------------------------------------------------------*/
+
+static void ShellSort(dpbPicture_t *pPic, u32 num)
+{
+
+ u32 i, j;
+ u32 step;
+ dpbPicture_t tmpPic;
+
+ step = 7;
+
+ while (step)
+ {
+ for (i = step; i < num; i++)
+ {
+ tmpPic = pPic[i];
+ j = i;
+ while (j >= step && ComparePictures(pPic + j - step, &tmpPic) > 0)
+ {
+ pPic[j] = pPic[j-step];
+ j -= step;
+ }
+ pPic[j] = tmpPic;
+ }
+ step >>= 1;
+ }
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.h
new file mode 100755
index 0000000..0e25084
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_dpb.h
@@ -0,0 +1,149 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_DPB_H
+#define H264SWDEC_DPB_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_image.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/* enumeration to represent status of buffered image */
+typedef enum {
+ UNUSED = 0,
+ NON_EXISTING,
+ SHORT_TERM,
+ LONG_TERM
+} dpbPictureStatus_e;
+
+/* structure to represent a buffered picture */
+typedef struct {
+ u8 *data; /* 16-byte aligned pointer of pAllocatedData */
+ u8 *pAllocatedData; /* allocated picture pointer; (size + 15) bytes */
+ i32 picNum;
+ u32 frameNum;
+ i32 picOrderCnt;
+ dpbPictureStatus_e status;
+ u32 toBeDisplayed;
+ u32 picId;
+ u32 numErrMbs;
+ u32 isIdr;
+} dpbPicture_t;
+
+/* structure to represent display image output from the buffer */
+typedef struct {
+ u8 *data;
+ u32 picId;
+ u32 numErrMbs;
+ u32 isIdr;
+} dpbOutPicture_t;
+
+/* structure to represent DPB */
+typedef struct {
+ dpbPicture_t *buffer;
+ dpbPicture_t **list;
+ dpbPicture_t *currentOut;
+ dpbOutPicture_t *outBuf;
+ u32 numOut;
+ u32 outIndex;
+ u32 maxRefFrames;
+ u32 dpbSize;
+ u32 maxFrameNum;
+ u32 maxLongTermFrameIdx;
+ u32 numRefFrames;
+ u32 fullness;
+ u32 prevRefFrameNum;
+ u32 lastContainsMmco5;
+ u32 noReordering;
+ u32 flushed;
+} dpbStorage_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdInitDpb(
+ dpbStorage_t *dpb,
+ u32 picSizeInMbs,
+ u32 dpbSize,
+ u32 numRefFrames,
+ u32 maxFrameNum,
+ u32 noReordering);
+
+u32 h264bsdResetDpb(
+ dpbStorage_t *dpb,
+ u32 picSizeInMbs,
+ u32 dpbSize,
+ u32 numRefFrames,
+ u32 maxFrameNum,
+ u32 noReordering);
+
+void h264bsdInitRefPicList(dpbStorage_t *dpb);
+
+u8* h264bsdAllocateDpbImage(dpbStorage_t *dpb);
+
+u8* h264bsdGetRefPicData(dpbStorage_t *dpb, u32 index);
+
+u32 h264bsdReorderRefPicList(
+ dpbStorage_t *dpb,
+ refPicListReordering_t *order,
+ u32 currFrameNum,
+ u32 numRefIdxActive);
+
+u32 h264bsdMarkDecRefPic(
+ dpbStorage_t *dpb,
+ decRefPicMarking_t *mark,
+ image_t *image,
+ u32 frameNum,
+ i32 picOrderCnt,
+ u32 isIdr,
+ u32 picId,
+ u32 numErrMbs);
+
+u32 h264bsdCheckGapsInFrameNum(dpbStorage_t *dpb, u32 frameNum, u32 isRefPic,
+ u32 gapsAllowed);
+
+dpbOutPicture_t* h264bsdDpbOutputPicture(dpbStorage_t *dpb);
+
+void h264bsdFlushDpb(dpbStorage_t *dpb);
+
+void h264bsdFreeDpb(dpbStorage_t *dpb);
+
+#endif /* #ifdef H264SWDEC_DPB_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.c
new file mode 100755
index 0000000..7b92870
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.c
@@ -0,0 +1,345 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdWriteMacroblock
+ h264bsdWriteOutputBlocks
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_image.h"
+#include "h264bsd_util.h"
+#include "h264bsd_neighbour.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* x- and y-coordinates for each block, defined in h264bsd_intra_prediction.c */
+extern const u32 h264bsdBlockX[];
+extern const u32 h264bsdBlockY[];
+
+/* clipping table, defined in h264bsd_intra_prediction.c */
+extern const u8 h264bsdClip[];
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdWriteMacroblock
+
+ Functional description:
+ Write one macroblock into the image. Both luma and chroma
+ components will be written at the same time.
+
+ Inputs:
+ data pointer to macroblock data to be written, 256 values for
+ luma followed by 64 values for both chroma components
+
+ Outputs:
+ image pointer to the image where the macroblock will be written
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_NEON
+void h264bsdWriteMacroblock(image_t *image, u8 *data)
+{
+
+/* Variables */
+
+ u32 i;
+ u32 width;
+ u32 *lum, *cb, *cr;
+ u32 *ptr;
+ u32 tmp1, tmp2;
+
+/* Code */
+
+ ASSERT(image);
+ ASSERT(data);
+ ASSERT(!((u32)data&0x3));
+
+ width = image->width;
+
+ /*lint -save -e826 lum, cb and cr used to copy 4 bytes at the time, disable
+ * "area too small" info message */
+ lum = (u32*)image->luma;
+ cb = (u32*)image->cb;
+ cr = (u32*)image->cr;
+ ASSERT(!((u32)lum&0x3));
+ ASSERT(!((u32)cb&0x3));
+ ASSERT(!((u32)cr&0x3));
+
+ ptr = (u32*)data;
+
+ width *= 4;
+ for (i = 16; i ; i--)
+ {
+ tmp1 = *ptr++;
+ tmp2 = *ptr++;
+ *lum++ = tmp1;
+ *lum++ = tmp2;
+ tmp1 = *ptr++;
+ tmp2 = *ptr++;
+ *lum++ = tmp1;
+ *lum++ = tmp2;
+ lum += width-4;
+ }
+
+ width >>= 1;
+ for (i = 8; i ; i--)
+ {
+ tmp1 = *ptr++;
+ tmp2 = *ptr++;
+ *cb++ = tmp1;
+ *cb++ = tmp2;
+ cb += width-2;
+ }
+
+ for (i = 8; i ; i--)
+ {
+ tmp1 = *ptr++;
+ tmp2 = *ptr++;
+ *cr++ = tmp1;
+ *cr++ = tmp2;
+ cr += width-2;
+ }
+
+}
+#endif
+#ifndef H264DEC_OMXDL
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdWriteOutputBlocks
+
+ Functional description:
+ Write one macroblock into the image. Prediction for the macroblock
+ and the residual are given separately and will be combined while
+ writing the data to the image
+
+ Inputs:
+ data pointer to macroblock prediction data, 256 values for
+ luma followed by 64 values for both chroma components
+ mbNum number of the macroblock
+ residual pointer to residual data, 16 16-element arrays for luma
+ followed by 4 16-element arrays for both chroma
+ components
+
+ Outputs:
+ image pointer to the image where the data will be written
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdWriteOutputBlocks(image_t *image, u32 mbNum, u8 *data,
+ i32 residual[][16])
+{
+
+/* Variables */
+
+ u32 i;
+ u32 picWidth, picSize;
+ u8 *lum, *cb, *cr;
+ u8 *imageBlock;
+ u8 *tmp;
+ u32 row, col;
+ u32 block;
+ u32 x, y;
+ i32 *pRes;
+ i32 tmp1, tmp2, tmp3, tmp4;
+ const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+ ASSERT(image);
+ ASSERT(data);
+ ASSERT(mbNum < image->width * image->height);
+ ASSERT(!((u32)data&0x3));
+
+ /* Image size in macroblocks */
+ picWidth = image->width;
+ picSize = picWidth * image->height;
+ row = mbNum / picWidth;
+ col = mbNum % picWidth;
+
+ /* Output macroblock position in output picture */
+ lum = (image->data + row * picWidth * 256 + col * 16);
+ cb = (image->data + picSize * 256 + row * picWidth * 64 + col * 8);
+ cr = (cb + picSize * 64);
+
+ picWidth *= 16;
+
+ for (block = 0; block < 16; block++)
+ {
+ x = h264bsdBlockX[block];
+ y = h264bsdBlockY[block];
+
+ pRes = residual[block];
+
+ ASSERT(pRes);
+
+ tmp = data + y*16 + x;
+ imageBlock = lum + y*picWidth + x;
+
+ ASSERT(!((u32)tmp&0x3));
+ ASSERT(!((u32)imageBlock&0x3));
+
+ if (IS_RESIDUAL_EMPTY(pRes))
+ {
+ /*lint -e826 */
+ i32 *in32 = (i32*)tmp;
+ i32 *out32 = (i32*)imageBlock;
+
+ /* Residual is zero => copy prediction block to output */
+ tmp1 = *in32; in32 += 4;
+ tmp2 = *in32; in32 += 4;
+ *out32 = tmp1; out32 += picWidth/4;
+ *out32 = tmp2; out32 += picWidth/4;
+ tmp1 = *in32; in32 += 4;
+ tmp2 = *in32;
+ *out32 = tmp1; out32 += picWidth/4;
+ *out32 = tmp2;
+ }
+ else
+ {
+
+ RANGE_CHECK_ARRAY(pRes, -512, 511, 16);
+
+ /* Calculate image = prediction + residual
+ * Process four pixels in a loop */
+ for (i = 4; i; i--)
+ {
+ tmp1 = tmp[0];
+ tmp2 = *pRes++;
+ tmp3 = tmp[1];
+ tmp1 = clp[tmp1 + tmp2];
+ tmp4 = *pRes++;
+ imageBlock[0] = (u8)tmp1;
+ tmp3 = clp[tmp3 + tmp4];
+ tmp1 = tmp[2];
+ tmp2 = *pRes++;
+ imageBlock[1] = (u8)tmp3;
+ tmp1 = clp[tmp1 + tmp2];
+ tmp3 = tmp[3];
+ tmp4 = *pRes++;
+ imageBlock[2] = (u8)tmp1;
+ tmp3 = clp[tmp3 + tmp4];
+ tmp += 16;
+ imageBlock[3] = (u8)tmp3;
+ imageBlock += picWidth;
+ }
+ }
+
+ }
+
+ picWidth /= 2;
+
+ for (block = 16; block <= 23; block++)
+ {
+ x = h264bsdBlockX[block & 0x3];
+ y = h264bsdBlockY[block & 0x3];
+
+ pRes = residual[block];
+
+ ASSERT(pRes);
+
+ tmp = data + 256;
+ imageBlock = cb;
+
+ if (block >= 20)
+ {
+ imageBlock = cr;
+ tmp += 64;
+ }
+
+ tmp += y*8 + x;
+ imageBlock += y*picWidth + x;
+
+ ASSERT(!((u32)tmp&0x3));
+ ASSERT(!((u32)imageBlock&0x3));
+
+ if (IS_RESIDUAL_EMPTY(pRes))
+ {
+ /*lint -e826 */
+ i32 *in32 = (i32*)tmp;
+ i32 *out32 = (i32*)imageBlock;
+
+ /* Residual is zero => copy prediction block to output */
+ tmp1 = *in32; in32 += 2;
+ tmp2 = *in32; in32 += 2;
+ *out32 = tmp1; out32 += picWidth/4;
+ *out32 = tmp2; out32 += picWidth/4;
+ tmp1 = *in32; in32 += 2;
+ tmp2 = *in32;
+ *out32 = tmp1; out32 += picWidth/4;
+ *out32 = tmp2;
+ }
+ else
+ {
+
+ RANGE_CHECK_ARRAY(pRes, -512, 511, 16);
+
+ for (i = 4; i; i--)
+ {
+ tmp1 = tmp[0];
+ tmp2 = *pRes++;
+ tmp3 = tmp[1];
+ tmp1 = clp[tmp1 + tmp2];
+ tmp4 = *pRes++;
+ imageBlock[0] = (u8)tmp1;
+ tmp3 = clp[tmp3 + tmp4];
+ tmp1 = tmp[2];
+ tmp2 = *pRes++;
+ imageBlock[1] = (u8)tmp3;
+ tmp1 = clp[tmp1 + tmp2];
+ tmp3 = tmp[3];
+ tmp4 = *pRes++;
+ imageBlock[2] = (u8)tmp1;
+ tmp3 = clp[tmp3 + tmp4];
+ tmp += 8;
+ imageBlock[3] = (u8)tmp3;
+ imageBlock += picWidth;
+ }
+ }
+ }
+
+}
+#endif /* H264DEC_OMXDL */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.h
new file mode 100755
index 0000000..ed7c18c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_image.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_IMAGE_H
+#define H264SWDEC_IMAGE_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+ u8 *data;
+ u32 width;
+ u32 height;
+ /* current MB's components */
+ u8 *luma;
+ u8 *cb;
+ u8 *cr;
+} image_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+void h264bsdWriteMacroblock(image_t *image, u8 *data);
+
+#ifndef H264DEC_OMXDL
+void h264bsdWriteOutputBlocks(image_t *image, u32 mbNum, u8 *data,
+ i32 residual[][16]);
+#endif
+
+#endif /* #ifdef H264SWDEC_IMAGE_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.c
new file mode 100755
index 0000000..2a81c4a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.c
@@ -0,0 +1,1027 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdInterPrediction
+ MvPrediction16x16
+ MvPrediction16x8
+ MvPrediction8x16
+ MvPrediction8x8
+ MvPrediction
+ MedianFilter
+ GetInterNeighbour
+ GetPredictionMv
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_inter_prediction.h"
+#include "h264bsd_neighbour.h"
+#include "h264bsd_util.h"
+#include "h264bsd_reconstruct.h"
+#include "h264bsd_dpb.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+ u32 available;
+ u32 refIndex;
+ mv_t mv;
+} interNeighbour_t;
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 MvPrediction16x16(mbStorage_t *pMb, mbPred_t *mbPred,
+ dpbStorage_t *dpb);
+static u32 MvPrediction16x8(mbStorage_t *pMb, mbPred_t *mbPred,
+ dpbStorage_t *dpb);
+static u32 MvPrediction8x16(mbStorage_t *pMb, mbPred_t *mbPred,
+ dpbStorage_t *dpb);
+static u32 MvPrediction8x8(mbStorage_t *pMb, subMbPred_t *subMbPred,
+ dpbStorage_t *dpb);
+static u32 MvPrediction(mbStorage_t *pMb, subMbPred_t *subMbPred,
+ u32 mbPartIdx, u32 subMbPartIdx);
+static i32 MedianFilter(i32 a, i32 b, i32 c);
+
+static void GetInterNeighbour(u32 sliceId, mbStorage_t *nMb,
+ interNeighbour_t *n, u32 index);
+static void GetPredictionMv(mv_t *mv, interNeighbour_t *a, u32 refIndex);
+
+static const neighbour_t N_A_SUB_PART[4][4][4] = {
+ { { {MB_A,5}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_A,5}, {MB_A,7}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_A,5}, {MB_CURR,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_A,5}, {MB_CURR,0}, {MB_A,7}, {MB_CURR,2} } },
+
+ { { {MB_CURR,1}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,1}, {MB_CURR,3}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,1}, {MB_CURR,4}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,1}, {MB_CURR,4}, {MB_CURR,3}, {MB_CURR,6} } },
+
+ { { {MB_A,13}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_A,13}, {MB_A,15}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_A,13}, {MB_CURR,8}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_A,13}, {MB_CURR,8}, {MB_A,15}, {MB_CURR,10} } },
+
+ { { {MB_CURR,9}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,9}, {MB_CURR,11}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,9}, {MB_CURR,12}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,9}, {MB_CURR,12}, {MB_CURR,11}, {MB_CURR,14} } } };
+
+static const neighbour_t N_B_SUB_PART[4][4][4] = {
+ { { {MB_B,10}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,10}, {MB_CURR,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,10}, {MB_B,11}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,10}, {MB_B,11}, {MB_CURR,0}, {MB_CURR,1} } },
+
+ { { {MB_B,14}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,14}, {MB_CURR,4}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,14}, {MB_B,15}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,14}, {MB_B,15}, {MB_CURR,4}, {MB_CURR,5} } },
+
+ { { {MB_CURR,2}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,2}, {MB_CURR,8}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,2}, {MB_CURR,3}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,2}, {MB_CURR,3}, {MB_CURR,8}, {MB_CURR,9} } },
+
+ { { {MB_CURR,6}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,6}, {MB_CURR,12}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,6}, {MB_CURR,7}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,6}, {MB_CURR,7}, {MB_CURR,12}, {MB_CURR,13} } } };
+
+static const neighbour_t N_C_SUB_PART[4][4][4] = {
+ { { {MB_B,14}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,14}, {MB_NA,4}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,11}, {MB_B,14}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,11}, {MB_B,14}, {MB_CURR,1}, {MB_NA,4} } },
+
+ { { {MB_C,10}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_C,10}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,15}, {MB_C,10}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,15}, {MB_C,10}, {MB_CURR,5}, {MB_NA,0} } },
+
+ { { {MB_CURR,6}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,6}, {MB_NA,12}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,3}, {MB_CURR,6}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_NA,12} } },
+
+ { { {MB_NA,2}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_NA,2}, {MB_NA,8}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,7}, {MB_NA,2}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,7}, {MB_NA,2}, {MB_CURR,13}, {MB_NA,8} } } };
+
+static const neighbour_t N_D_SUB_PART[4][4][4] = {
+ { { {MB_D,15}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_D,15}, {MB_A,5}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_D,15}, {MB_B,10}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_D,15}, {MB_B,10}, {MB_A,5}, {MB_CURR,0} } },
+
+ { { {MB_B,11}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,11}, {MB_CURR,1}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,11}, {MB_B,14}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_B,11}, {MB_B,14}, {MB_CURR,1}, {MB_CURR,4} } },
+
+ { { {MB_A,7}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_A,7}, {MB_A,13}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_A,7}, {MB_CURR,2}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_A,7}, {MB_CURR,2}, {MB_A,13}, {MB_CURR,8} } },
+
+ { { {MB_CURR,3}, {MB_NA,0}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,3}, {MB_CURR,9}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,3}, {MB_CURR,6}, {MB_NA,0}, {MB_NA,0} },
+ { {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_CURR,12} } } };
+
+
+#ifdef H264DEC_OMXDL
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterPrediction
+
+ Functional description:
+ Processes one inter macroblock. Performs motion vector prediction
+ and reconstructs prediction macroblock. Writes the final macroblock
+ (prediction + residual) into the output image (currImage)
+
+ Inputs:
+ pMb pointer to macroblock specific information
+ pMbLayer pointer to current macroblock data from stream
+ dpb pointer to decoded picture buffer
+ mbNum current macroblock number
+ currImage pointer to output image
+ data pointer where predicted macroblock will be stored
+
+ Outputs:
+ pMb structure is updated with current macroblock
+ currImage current macroblock is written into image
+ data prediction is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK error in motion vector prediction
+
+------------------------------------------------------------------------------*/
+u32 h264bsdInterPrediction(mbStorage_t *pMb, macroblockLayer_t *pMbLayer,
+ dpbStorage_t *dpb, u32 mbNum, image_t *currImage, u8 *data)
+{
+
+/* Variables */
+
+ u32 i;
+ u32 x, y;
+ u32 colAndRow;
+ subMbPartMode_e subPartMode;
+ image_t refImage;
+ u8 fillBuff[32*21 + 15 + 32];
+ u8 *pFill;
+ u32 tmp;
+/* Code */
+
+ ASSERT(pMb);
+ ASSERT(h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTER);
+ ASSERT(pMbLayer);
+
+ /* 16-byte alignment */
+ pFill = ALIGN(fillBuff, 16);
+
+ /* set row bits 15:0 */
+ colAndRow = mbNum / currImage->width;
+ /*set col to bits 31:16 */
+ colAndRow += (mbNum - colAndRow * currImage->width) << 16;
+ colAndRow <<= 4;
+
+ refImage.width = currImage->width;
+ refImage.height = currImage->height;
+
+ switch (pMb->mbType)
+ {
+ case P_Skip:
+ case P_L0_16x16:
+ if (MvPrediction16x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+ return(HANTRO_NOK);
+ refImage.data = pMb->refAddr[0];
+ tmp = (0<<24) + (0<<16) + (16<<8) + 16;
+ h264bsdPredictSamples(data, pMb->mv, &refImage,
+ colAndRow, tmp, pFill);
+ break;
+
+ case P_L0_L0_16x8:
+ if ( MvPrediction16x8(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+ return(HANTRO_NOK);
+ refImage.data = pMb->refAddr[0];
+ tmp = (0<<24) + (0<<16) + (16<<8) + 8;
+ h264bsdPredictSamples(data, pMb->mv, &refImage,
+ colAndRow, tmp, pFill);
+
+ refImage.data = pMb->refAddr[2];
+ tmp = (0<<24) + (8<<16) + (16<<8) + 8;
+ h264bsdPredictSamples(data, pMb->mv+8, &refImage,
+ colAndRow, tmp, pFill);
+ break;
+
+ case P_L0_L0_8x16:
+ if ( MvPrediction8x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+ return(HANTRO_NOK);
+ refImage.data = pMb->refAddr[0];
+ tmp = (0<<24) + (0<<16) + (8<<8) + 16;
+ h264bsdPredictSamples(data, pMb->mv, &refImage,
+ colAndRow, tmp, pFill);
+ refImage.data = pMb->refAddr[1];
+ tmp = (8<<24) + (0<<16) + (8<<8) + 16;
+ h264bsdPredictSamples(data, pMb->mv+4, &refImage,
+ colAndRow, tmp, pFill);
+ break;
+
+ default: /* P_8x8 and P_8x8ref0 */
+ if ( MvPrediction8x8(pMb, &pMbLayer->subMbPred, dpb) != HANTRO_OK)
+ return(HANTRO_NOK);
+ for (i = 0; i < 4; i++)
+ {
+ refImage.data = pMb->refAddr[i];
+ subPartMode =
+ h264bsdSubMbPartMode(pMbLayer->subMbPred.subMbType[i]);
+ x = i & 0x1 ? 8 : 0;
+ y = i < 2 ? 0 : 8;
+ switch (subPartMode)
+ {
+ case MB_SP_8x8:
+ tmp = (x<<24) + (y<<16) + (8<<8) + 8;
+ h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+ colAndRow, tmp, pFill);
+ break;
+
+ case MB_SP_8x4:
+ tmp = (x<<24) + (y<<16) + (8<<8) + 4;
+ h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+ colAndRow, tmp, pFill);
+ tmp = (x<<24) + ((y+4)<<16) + (8<<8) + 4;
+ h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage,
+ colAndRow, tmp, pFill);
+ break;
+
+ case MB_SP_4x8:
+ tmp = (x<<24) + (y<<16) + (4<<8) + 8;
+ h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+ colAndRow, tmp, pFill);
+ tmp = ((x+4)<<24) + (y<<16) + (4<<8) + 8;
+ h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage,
+ colAndRow, tmp, pFill);
+ break;
+
+ default:
+ tmp = (x<<24) + (y<<16) + (4<<8) + 4;
+ h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+ colAndRow, tmp, pFill);
+ tmp = ((x+4)<<24) + (y<<16) + (4<<8) + 4;
+ h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage,
+ colAndRow, tmp, pFill);
+ tmp = (x<<24) + ((y+4)<<16) + (4<<8) + 4;
+ h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage,
+ colAndRow, tmp, pFill);
+ tmp = ((x+4)<<24) + ((y+4)<<16) + (4<<8) + 4;
+ h264bsdPredictSamples(data, pMb->mv+4*i+3, &refImage,
+ colAndRow, tmp, pFill);
+ break;
+ }
+ }
+ break;
+ }
+
+ /* if decoded flag > 1 -> mb has already been successfully decoded and
+ * written to output -> do not write again */
+ if (pMb->decoded > 1)
+ return HANTRO_OK;
+
+ return(HANTRO_OK);
+}
+
+#else /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterPrediction
+
+ Functional description:
+ Processes one inter macroblock. Performs motion vector prediction
+ and reconstructs prediction macroblock. Writes the final macroblock
+ (prediction + residual) into the output image (currImage)
+
+ Inputs:
+ pMb pointer to macroblock specific information
+ pMbLayer pointer to current macroblock data from stream
+ dpb pointer to decoded picture buffer
+ mbNum current macroblock number
+ currImage pointer to output image
+ data pointer where predicted macroblock will be stored
+
+ Outputs:
+ pMb structure is updated with current macroblock
+ currImage current macroblock is written into image
+ data prediction is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK error in motion vector prediction
+
+------------------------------------------------------------------------------*/
+u32 h264bsdInterPrediction(mbStorage_t *pMb, macroblockLayer_t *pMbLayer,
+ dpbStorage_t *dpb, u32 mbNum, image_t *currImage, u8 *data)
+{
+
+/* Variables */
+
+ u32 i;
+ u32 x, y;
+ u32 row, col;
+ subMbPartMode_e subPartMode;
+ image_t refImage;
+
+/* Code */
+
+ ASSERT(pMb);
+ ASSERT(h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTER);
+ ASSERT(pMbLayer);
+
+ row = mbNum / currImage->width;
+ col = mbNum - row * currImage->width;
+ row *= 16;
+ col *= 16;
+
+ refImage.width = currImage->width;
+ refImage.height = currImage->height;
+
+ switch (pMb->mbType)
+ {
+ case P_Skip:
+ case P_L0_16x16:
+ if (MvPrediction16x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+ return(HANTRO_NOK);
+ refImage.data = pMb->refAddr[0];
+ h264bsdPredictSamples(data, pMb->mv, &refImage, col, row, 0, 0,
+ 16, 16);
+ break;
+
+ case P_L0_L0_16x8:
+ if ( MvPrediction16x8(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+ return(HANTRO_NOK);
+ refImage.data = pMb->refAddr[0];
+ h264bsdPredictSamples(data, pMb->mv, &refImage, col, row, 0, 0,
+ 16, 8);
+ refImage.data = pMb->refAddr[2];
+ h264bsdPredictSamples(data, pMb->mv+8, &refImage, col, row, 0, 8,
+ 16, 8);
+ break;
+
+ case P_L0_L0_8x16:
+ if ( MvPrediction8x16(pMb, &pMbLayer->mbPred, dpb) != HANTRO_OK)
+ return(HANTRO_NOK);
+ refImage.data = pMb->refAddr[0];
+ h264bsdPredictSamples(data, pMb->mv, &refImage, col, row, 0, 0,
+ 8, 16);
+ refImage.data = pMb->refAddr[1];
+ h264bsdPredictSamples(data, pMb->mv+4, &refImage, col, row, 8, 0,
+ 8, 16);
+ break;
+
+ default: /* P_8x8 and P_8x8ref0 */
+ if ( MvPrediction8x8(pMb, &pMbLayer->subMbPred, dpb) != HANTRO_OK)
+ return(HANTRO_NOK);
+ for (i = 0; i < 4; i++)
+ {
+ refImage.data = pMb->refAddr[i];
+ subPartMode =
+ h264bsdSubMbPartMode(pMbLayer->subMbPred.subMbType[i]);
+ x = i & 0x1 ? 8 : 0;
+ y = i < 2 ? 0 : 8;
+ switch (subPartMode)
+ {
+ case MB_SP_8x8:
+ h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+ col, row, x, y, 8, 8);
+ break;
+
+ case MB_SP_8x4:
+ h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+ col, row, x, y, 8, 4);
+ h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage,
+ col, row, x, y+4, 8, 4);
+ break;
+
+ case MB_SP_4x8:
+ h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+ col, row, x, y, 4, 8);
+ h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage,
+ col, row, x+4, y, 4, 8);
+ break;
+
+ default:
+ h264bsdPredictSamples(data, pMb->mv+4*i, &refImage,
+ col, row, x, y, 4, 4);
+ h264bsdPredictSamples(data, pMb->mv+4*i+1, &refImage,
+ col, row, x+4, y, 4, 4);
+ h264bsdPredictSamples(data, pMb->mv+4*i+2, &refImage,
+ col, row, x, y+4, 4, 4);
+ h264bsdPredictSamples(data, pMb->mv+4*i+3, &refImage,
+ col, row, x+4, y+4, 4, 4);
+ break;
+ }
+ }
+ break;
+ }
+
+ /* if decoded flag > 1 -> mb has already been successfully decoded and
+ * written to output -> do not write again */
+ if (pMb->decoded > 1)
+ return HANTRO_OK;
+
+ if (pMb->mbType != P_Skip)
+ {
+ h264bsdWriteOutputBlocks(currImage, mbNum, data,
+ pMbLayer->residual.level);
+ }
+ else
+ {
+ h264bsdWriteMacroblock(currImage, data);
+ }
+
+ return(HANTRO_OK);
+}
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+ Function: MvPrediction16x16
+
+ Functional description:
+ Motion vector prediction for 16x16 partition mode
+
+------------------------------------------------------------------------------*/
+
+u32 MvPrediction16x16(mbStorage_t *pMb, mbPred_t *mbPred, dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+ mv_t mv;
+ mv_t mvPred;
+ interNeighbour_t a[3]; /* A, B, C */
+ u32 refIndex;
+ u8 *tmp;
+ u32 *tmpMv1, *tmpMv2;
+
+/* Code */
+
+ refIndex = mbPred->refIdxL0[0];
+
+ GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 5);
+ GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 10);
+ /*lint --e(740) Unusual pointer cast (incompatible indirect types) */
+ tmpMv1 = (u32*)(&a[0].mv); /* we test just that both MVs are zero */
+ /*lint --e(740) */
+ tmpMv2 = (u32*)(&a[1].mv); /* i.e. a[0].mv.hor == 0 && a[0].mv.ver == 0 */
+ if (pMb->mbType == P_Skip &&
+ (!a[0].available || !a[1].available ||
+ ( a[0].refIndex == 0 && ((u32)(*tmpMv1) == 0) ) ||
+ ( a[1].refIndex == 0 && ((u32)(*tmpMv2) == 0) )))
+ {
+ mv.hor = mv.ver = 0;
+ }
+ else
+ {
+ mv = mbPred->mvdL0[0];
+ GetInterNeighbour(pMb->sliceId, pMb->mbC, a+2, 10);
+ if (!a[2].available)
+ {
+ GetInterNeighbour(pMb->sliceId, pMb->mbD, a+2, 15);
+ }
+
+ GetPredictionMv(&mvPred, a, refIndex);
+
+ mv.hor += mvPred.hor;
+ mv.ver += mvPred.ver;
+
+ /* horizontal motion vector range [-2048, 2047.75] */
+ if ((u32)(i32)(mv.hor+8192) >= (16384))
+ return(HANTRO_NOK);
+
+ /* vertical motion vector range [-512, 511.75]
+ * (smaller for low levels) */
+ if ((u32)(i32)(mv.ver+2048) >= (4096))
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdGetRefPicData(dpb, refIndex);
+ if (tmp == NULL)
+ return(HANTRO_NOK);
+
+ pMb->mv[0] = pMb->mv[1] = pMb->mv[2] = pMb->mv[3] =
+ pMb->mv[4] = pMb->mv[5] = pMb->mv[6] = pMb->mv[7] =
+ pMb->mv[8] = pMb->mv[9] = pMb->mv[10] = pMb->mv[11] =
+ pMb->mv[12] = pMb->mv[13] = pMb->mv[14] = pMb->mv[15] = mv;
+
+ pMb->refPic[0] = refIndex;
+ pMb->refPic[1] = refIndex;
+ pMb->refPic[2] = refIndex;
+ pMb->refPic[3] = refIndex;
+ pMb->refAddr[0] = tmp;
+ pMb->refAddr[1] = tmp;
+ pMb->refAddr[2] = tmp;
+ pMb->refAddr[3] = tmp;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: MvPrediction16x8
+
+ Functional description:
+ Motion vector prediction for 16x8 partition mode
+
+------------------------------------------------------------------------------*/
+
+u32 MvPrediction16x8(mbStorage_t *pMb, mbPred_t *mbPred, dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+ mv_t mv;
+ mv_t mvPred;
+ interNeighbour_t a[3]; /* A, B, C */
+ u32 refIndex;
+ u8 *tmp;
+
+/* Code */
+
+ mv = mbPred->mvdL0[0];
+ refIndex = mbPred->refIdxL0[0];
+
+ GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 10);
+
+ if (a[1].refIndex == refIndex)
+ mvPred = a[1].mv;
+ else
+ {
+ GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 5);
+ GetInterNeighbour(pMb->sliceId, pMb->mbC, a+2, 10);
+ if (!a[2].available)
+ {
+ GetInterNeighbour(pMb->sliceId, pMb->mbD, a+2, 15);
+ }
+
+ GetPredictionMv(&mvPred, a, refIndex);
+
+ }
+ mv.hor += mvPred.hor;
+ mv.ver += mvPred.ver;
+
+ /* horizontal motion vector range [-2048, 2047.75] */
+ if ((u32)(i32)(mv.hor+8192) >= (16384))
+ return(HANTRO_NOK);
+
+ /* vertical motion vector range [-512, 511.75] (smaller for low levels) */
+ if ((u32)(i32)(mv.ver+2048) >= (4096))
+ return(HANTRO_NOK);
+
+ tmp = h264bsdGetRefPicData(dpb, refIndex);
+ if (tmp == NULL)
+ return(HANTRO_NOK);
+
+ pMb->mv[0] = pMb->mv[1] = pMb->mv[2] = pMb->mv[3] =
+ pMb->mv[4] = pMb->mv[5] = pMb->mv[6] = pMb->mv[7] = mv;
+ pMb->refPic[0] = refIndex;
+ pMb->refPic[1] = refIndex;
+ pMb->refAddr[0] = tmp;
+ pMb->refAddr[1] = tmp;
+
+ mv = mbPred->mvdL0[1];
+ refIndex = mbPred->refIdxL0[1];
+
+ GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 13);
+ if (a[0].refIndex == refIndex)
+ mvPred = a[0].mv;
+ else
+ {
+ a[1].available = HANTRO_TRUE;
+ a[1].refIndex = pMb->refPic[0];
+ a[1].mv = pMb->mv[0];
+
+ /* c is not available */
+ GetInterNeighbour(pMb->sliceId, pMb->mbA, a+2, 7);
+
+ GetPredictionMv(&mvPred, a, refIndex);
+
+ }
+ mv.hor += mvPred.hor;
+ mv.ver += mvPred.ver;
+
+ /* horizontal motion vector range [-2048, 2047.75] */
+ if ((u32)(i32)(mv.hor+8192) >= (16384))
+ return(HANTRO_NOK);
+
+ /* vertical motion vector range [-512, 511.75] (smaller for low levels) */
+ if ((u32)(i32)(mv.ver+2048) >= (4096))
+ return(HANTRO_NOK);
+
+ tmp = h264bsdGetRefPicData(dpb, refIndex);
+ if (tmp == NULL)
+ return(HANTRO_NOK);
+
+ pMb->mv[8] = pMb->mv[9] = pMb->mv[10] = pMb->mv[11] =
+ pMb->mv[12] = pMb->mv[13] = pMb->mv[14] = pMb->mv[15] = mv;
+ pMb->refPic[2] = refIndex;
+ pMb->refPic[3] = refIndex;
+ pMb->refAddr[2] = tmp;
+ pMb->refAddr[3] = tmp;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: MvPrediction8x16
+
+ Functional description:
+ Motion vector prediction for 8x16 partition mode
+
+------------------------------------------------------------------------------*/
+
+u32 MvPrediction8x16(mbStorage_t *pMb, mbPred_t *mbPred, dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+ mv_t mv;
+ mv_t mvPred;
+ interNeighbour_t a[3]; /* A, B, C */
+ u32 refIndex;
+ u8 *tmp;
+
+/* Code */
+
+ mv = mbPred->mvdL0[0];
+ refIndex = mbPred->refIdxL0[0];
+
+ GetInterNeighbour(pMb->sliceId, pMb->mbA, a, 5);
+
+ if (a[0].refIndex == refIndex)
+ mvPred = a[0].mv;
+ else
+ {
+ GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 10);
+ GetInterNeighbour(pMb->sliceId, pMb->mbB, a+2, 14);
+ if (!a[2].available)
+ {
+ GetInterNeighbour(pMb->sliceId, pMb->mbD, a+2, 15);
+ }
+
+ GetPredictionMv(&mvPred, a, refIndex);
+
+ }
+ mv.hor += mvPred.hor;
+ mv.ver += mvPred.ver;
+
+ /* horizontal motion vector range [-2048, 2047.75] */
+ if ((u32)(i32)(mv.hor+8192) >= (16384))
+ return(HANTRO_NOK);
+
+ /* vertical motion vector range [-512, 511.75] (smaller for low levels) */
+ if ((u32)(i32)(mv.ver+2048) >= (4096))
+ return(HANTRO_NOK);
+
+ tmp = h264bsdGetRefPicData(dpb, refIndex);
+ if (tmp == NULL)
+ return(HANTRO_NOK);
+
+ pMb->mv[0] = pMb->mv[1] = pMb->mv[2] = pMb->mv[3] =
+ pMb->mv[8] = pMb->mv[9] = pMb->mv[10] = pMb->mv[11] = mv;
+ pMb->refPic[0] = refIndex;
+ pMb->refPic[2] = refIndex;
+ pMb->refAddr[0] = tmp;
+ pMb->refAddr[2] = tmp;
+
+ mv = mbPred->mvdL0[1];
+ refIndex = mbPred->refIdxL0[1];
+
+ GetInterNeighbour(pMb->sliceId, pMb->mbC, a+2, 10);
+ if (!a[2].available)
+ {
+ GetInterNeighbour(pMb->sliceId, pMb->mbB, a+2, 11);
+ }
+ if (a[2].refIndex == refIndex)
+ mvPred = a[2].mv;
+ else
+ {
+ a[0].available = HANTRO_TRUE;
+ a[0].refIndex = pMb->refPic[0];
+ a[0].mv = pMb->mv[0];
+
+ GetInterNeighbour(pMb->sliceId, pMb->mbB, a+1, 14);
+
+ GetPredictionMv(&mvPred, a, refIndex);
+
+ }
+ mv.hor += mvPred.hor;
+ mv.ver += mvPred.ver;
+
+ /* horizontal motion vector range [-2048, 2047.75] */
+ if ((u32)(i32)(mv.hor+8192) >= (16384))
+ return(HANTRO_NOK);
+
+ /* vertical motion vector range [-512, 511.75] (smaller for low levels) */
+ if ((u32)(i32)(mv.ver+2048) >= (4096))
+ return(HANTRO_NOK);
+
+ tmp = h264bsdGetRefPicData(dpb, refIndex);
+ if (tmp == NULL)
+ return(HANTRO_NOK);
+
+ pMb->mv[4] = pMb->mv[5] = pMb->mv[6] = pMb->mv[7] =
+ pMb->mv[12] = pMb->mv[13] = pMb->mv[14] = pMb->mv[15] = mv;
+ pMb->refPic[1] = refIndex;
+ pMb->refPic[3] = refIndex;
+ pMb->refAddr[1] = tmp;
+ pMb->refAddr[3] = tmp;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: MvPrediction8x8
+
+ Functional description:
+ Motion vector prediction for 8x8 partition mode
+
+------------------------------------------------------------------------------*/
+
+u32 MvPrediction8x8(mbStorage_t *pMb, subMbPred_t *subMbPred, dpbStorage_t *dpb)
+{
+
+/* Variables */
+
+ u32 i, j;
+ u32 numSubMbPart;
+
+/* Code */
+
+ for (i = 0; i < 4; i++)
+ {
+ numSubMbPart = h264bsdNumSubMbPart(subMbPred->subMbType[i]);
+ pMb->refPic[i] = subMbPred->refIdxL0[i];
+ pMb->refAddr[i] = h264bsdGetRefPicData(dpb, subMbPred->refIdxL0[i]);
+ if (pMb->refAddr[i] == NULL)
+ return(HANTRO_NOK);
+ for (j = 0; j < numSubMbPart; j++)
+ {
+ if (MvPrediction(pMb, subMbPred, i, j) != HANTRO_OK)
+ return(HANTRO_NOK);
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: MvPrediction
+
+ Functional description:
+ Perform motion vector prediction for sub-partition
+
+------------------------------------------------------------------------------*/
+
+u32 MvPrediction(mbStorage_t *pMb, subMbPred_t *subMbPred, u32 mbPartIdx,
+ u32 subMbPartIdx)
+{
+
+/* Variables */
+
+ mv_t mv, mvPred;
+ u32 refIndex;
+ subMbPartMode_e subMbPartMode;
+ const neighbour_t *n;
+ mbStorage_t *nMb;
+ interNeighbour_t a[3]; /* A, B, C */
+
+/* Code */
+
+ mv = subMbPred->mvdL0[mbPartIdx][subMbPartIdx];
+ subMbPartMode = h264bsdSubMbPartMode(subMbPred->subMbType[mbPartIdx]);
+ refIndex = subMbPred->refIdxL0[mbPartIdx];
+
+ n = N_A_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx;
+ nMb = h264bsdGetNeighbourMb(pMb, n->mb);
+ GetInterNeighbour(pMb->sliceId, nMb, a, n->index);
+
+ n = N_B_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx;
+ nMb = h264bsdGetNeighbourMb(pMb, n->mb);
+ GetInterNeighbour(pMb->sliceId, nMb, a+1, n->index);
+
+ n = N_C_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx;
+ nMb = h264bsdGetNeighbourMb(pMb, n->mb);
+ GetInterNeighbour(pMb->sliceId, nMb, a+2, n->index);
+
+ if (!a[2].available)
+ {
+ n = N_D_SUB_PART[mbPartIdx][subMbPartMode]+subMbPartIdx;
+ nMb = h264bsdGetNeighbourMb(pMb, n->mb);
+ GetInterNeighbour(pMb->sliceId, nMb, a+2, n->index);
+ }
+
+ GetPredictionMv(&mvPred, a, refIndex);
+
+ mv.hor += mvPred.hor;
+ mv.ver += mvPred.ver;
+
+ /* horizontal motion vector range [-2048, 2047.75] */
+ if (((u32)(i32)(mv.hor+8192) >= (16384)))
+ return(HANTRO_NOK);
+
+ /* vertical motion vector range [-512, 511.75] (smaller for low levels) */
+ if (((u32)(i32)(mv.ver+2048) >= (4096)))
+ return(HANTRO_NOK);
+
+ switch (subMbPartMode)
+ {
+ case MB_SP_8x8:
+ pMb->mv[4*mbPartIdx] = mv;
+ pMb->mv[4*mbPartIdx + 1] = mv;
+ pMb->mv[4*mbPartIdx + 2] = mv;
+ pMb->mv[4*mbPartIdx + 3] = mv;
+ break;
+
+ case MB_SP_8x4:
+ pMb->mv[4*mbPartIdx + 2*subMbPartIdx] = mv;
+ pMb->mv[4*mbPartIdx + 2*subMbPartIdx + 1] = mv;
+ break;
+
+ case MB_SP_4x8:
+ pMb->mv[4*mbPartIdx + subMbPartIdx] = mv;
+ pMb->mv[4*mbPartIdx + subMbPartIdx + 2] = mv;
+ break;
+
+ case MB_SP_4x4:
+ pMb->mv[4*mbPartIdx + subMbPartIdx] = mv;
+ break;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: MedianFilter
+
+ Functional description:
+ Median filtering for motion vector prediction
+
+------------------------------------------------------------------------------*/
+
+i32 MedianFilter(i32 a, i32 b, i32 c)
+{
+
+/* Variables */
+
+ i32 max,min,med;
+
+/* Code */
+
+ max = min = med = a;
+ if (b > max)
+ {
+ max = b;
+ }
+ else if (b < min)
+ {
+ min = b;
+ }
+ if (c > max)
+ {
+ med = max;
+ }
+ else if (c < min)
+ {
+ med = min;
+ }
+ else
+ {
+ med = c;
+ }
+
+ return(med);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: GetInterNeighbour
+
+ Functional description:
+ Get availability, reference index and motion vector of a neighbour
+
+------------------------------------------------------------------------------*/
+
+void GetInterNeighbour(u32 sliceId, mbStorage_t *nMb,
+ interNeighbour_t *n, u32 index)
+{
+
+ n->available = HANTRO_FALSE;
+ n->refIndex = 0xFFFFFFFF;
+ n->mv.hor = n->mv.ver = 0;
+
+ if (nMb && (sliceId == nMb->sliceId))
+ {
+ u32 tmp;
+ mv_t tmpMv;
+
+ tmp = nMb->mbType;
+ n->available = HANTRO_TRUE;
+ /* MbPartPredMode "inlined" */
+ if (tmp <= P_8x8ref0)
+ {
+ tmpMv = nMb->mv[index];
+ tmp = nMb->refPic[index>>2];
+ n->refIndex = tmp;
+ n->mv = tmpMv;
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: GetPredictionMv
+
+ Functional description:
+ Compute motion vector predictor based on neighbours A, B and C
+
+------------------------------------------------------------------------------*/
+
+void GetPredictionMv(mv_t *mv, interNeighbour_t *a, u32 refIndex)
+{
+
+ if ( a[1].available || a[2].available || !a[0].available)
+ {
+ u32 isA, isB, isC;
+ isA = (a[0].refIndex == refIndex) ? HANTRO_TRUE : HANTRO_FALSE;
+ isB = (a[1].refIndex == refIndex) ? HANTRO_TRUE : HANTRO_FALSE;
+ isC = (a[2].refIndex == refIndex) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ if (((u32)isA+(u32)isB+(u32)isC) != 1)
+ {
+ mv->hor = (i16)MedianFilter(a[0].mv.hor, a[1].mv.hor, a[2].mv.hor);
+ mv->ver = (i16)MedianFilter(a[0].mv.ver, a[1].mv.ver, a[2].mv.ver);
+ }
+ else if (isA)
+ *mv = a[0].mv;
+ else if (isB)
+ *mv = a[1].mv;
+ else
+ *mv = a[2].mv;
+ }
+ else
+ {
+ *mv = a[0].mv;
+ }
+
+}
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.h
new file mode 100755
index 0000000..94dee25
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_inter_prediction.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_INTER_PREDICTION_H
+#define H264SWDEC_INTER_PREDICTION_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_image.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_dpb.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdInterPrediction(mbStorage_t *pMb, macroblockLayer_t *pMbLayer,
+ dpbStorage_t *dpb, u32 mbNum, image_t *image, u8 *data);
+
+#endif /* #ifdef H264SWDEC_INTER_PREDICTION_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.c
new file mode 100755
index 0000000..15eabfb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.c
@@ -0,0 +1,1937 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdIntraPrediction
+ h264bsdGetNeighbourPels
+ h264bsdIntra16x16Prediction
+ h264bsdIntra4x4Prediction
+ h264bsdIntraChromaPrediction
+ h264bsdAddResidual
+ Intra16x16VerticalPrediction
+ Intra16x16HorizontalPrediction
+ Intra16x16DcPrediction
+ Intra16x16PlanePrediction
+ IntraChromaDcPrediction
+ IntraChromaHorizontalPrediction
+ IntraChromaVerticalPrediction
+ IntraChromaPlanePrediction
+ Get4x4NeighbourPels
+ Write4x4To16x16
+ Intra4x4VerticalPrediction
+ Intra4x4HorizontalPrediction
+ Intra4x4DcPrediction
+ Intra4x4DiagonalDownLeftPrediction
+ Intra4x4DiagonalDownRightPrediction
+ Intra4x4VerticalRightPrediction
+ Intra4x4HorizontalDownPrediction
+ Intra4x4VerticalLeftPrediction
+ Intra4x4HorizontalUpPrediction
+ DetermineIntra4x4PredMode
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_intra_prediction.h"
+#include "h264bsd_util.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_neighbour.h"
+#include "h264bsd_image.h"
+
+#ifdef H264DEC_OMXDL
+#include "omxtypes.h"
+#include "omxVC.h"
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Switch off the following Lint messages for this file:
+ * Info 702: Shift right of signed quantity (int)
+ */
+/*lint -e702 */
+
+
+/* x- and y-coordinates for each block */
+const u32 h264bsdBlockX[16] =
+ { 0, 4, 0, 4, 8, 12, 8, 12, 0, 4, 0, 4, 8, 12, 8, 12 };
+const u32 h264bsdBlockY[16] =
+ { 0, 0, 4, 4, 0, 0, 4, 4, 8, 8, 12, 12, 8, 8, 12, 12 };
+
+const u8 h264bsdClip[1280] =
+{
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
+ 0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,
+ 16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,
+ 32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,
+ 48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,
+ 64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,
+ 80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,
+ 96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,
+ 112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,
+ 128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,
+ 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,
+ 160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,
+ 176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,
+ 192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,
+ 208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,
+ 224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,
+ 240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,
+ 255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255
+};
+
+#ifndef H264DEC_OMXDL
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+static void Get4x4NeighbourPels(u8 *a, u8 *l, u8 *data, u8 *above, u8 *left,
+ u32 blockNum);
+static void Intra16x16VerticalPrediction(u8 *data, u8 *above);
+static void Intra16x16HorizontalPrediction(u8 *data, u8 *left);
+static void Intra16x16DcPrediction(u8 *data, u8 *above, u8 *left,
+ u32 A, u32 B);
+static void Intra16x16PlanePrediction(u8 *data, u8 *above, u8 *left);
+static void IntraChromaDcPrediction(u8 *data, u8 *above, u8 *left,
+ u32 A, u32 B);
+static void IntraChromaHorizontalPrediction(u8 *data, u8 *left);
+static void IntraChromaVerticalPrediction(u8 *data, u8 *above);
+static void IntraChromaPlanePrediction(u8 *data, u8 *above, u8 *left);
+
+static void Intra4x4VerticalPrediction(u8 *data, u8 *above);
+static void Intra4x4HorizontalPrediction(u8 *data, u8 *left);
+static void Intra4x4DcPrediction(u8 *data, u8 *above, u8 *left, u32 A, u32 B);
+static void Intra4x4DiagonalDownLeftPrediction(u8 *data, u8 *above);
+static void Intra4x4DiagonalDownRightPrediction(u8 *data, u8 *above, u8 *left);
+static void Intra4x4VerticalRightPrediction(u8 *data, u8 *above, u8 *left);
+static void Intra4x4HorizontalDownPrediction(u8 *data, u8 *above, u8 *left);
+static void Intra4x4VerticalLeftPrediction(u8 *data, u8 *above);
+static void Intra4x4HorizontalUpPrediction(u8 *data, u8 *left);
+void h264bsdAddResidual(u8 *data, i32 *residual, u32 blockNum);
+
+static void Write4x4To16x16(u8 *data, u8 *data4x4, u32 blockNum);
+#endif /* H264DEC_OMXDL */
+
+static u32 DetermineIntra4x4PredMode(macroblockLayer_t *pMbLayer,
+ u32 available, neighbour_t *nA, neighbour_t *nB, u32 index,
+ mbStorage_t *nMbA, mbStorage_t *nMbB);
+
+
+#ifdef H264DEC_OMXDL
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdIntra16x16Prediction
+
+ Functional description:
+ Perform intra 16x16 prediction mode for luma pixels and add
+ residual into prediction. The resulting luma pixels are
+ stored in macroblock array 'data'.
+
+------------------------------------------------------------------------------*/
+u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, u8 *ptr,
+ u32 width, u32 constrainedIntraPred)
+{
+
+/* Variables */
+
+ u32 availableA, availableB, availableD;
+ OMXResult omxRes;
+
+/* Code */
+ ASSERT(pMb);
+ ASSERT(data);
+ ASSERT(ptr);
+ ASSERT(h264bsdPredModeIntra16x16(pMb->mbType) < 4);
+
+ availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA);
+ if (availableA && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER))
+ availableA = HANTRO_FALSE;
+ availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB);
+ if (availableB && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER))
+ availableB = HANTRO_FALSE;
+ availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD);
+ if (availableD && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER))
+ availableD = HANTRO_FALSE;
+
+ omxRes = omxVCM4P10_PredictIntra_16x16( (ptr-1),
+ (ptr - width),
+ (ptr - width-1),
+ data,
+ (i32)width,
+ 16,
+ (OMXVCM4P10Intra16x16PredMode)
+ h264bsdPredModeIntra16x16(pMb->mbType),
+ (i32)(availableB + (availableA<<1) +
+ (availableD<<5)) );
+ if (omxRes != OMX_Sts_NoErr)
+ return HANTRO_NOK;
+ else
+ return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdIntra4x4Prediction
+
+ Functional description:
+ Perform intra 4x4 prediction for luma pixels and add residual
+ into prediction. The resulting luma pixels are stored in
+ macroblock array 'data'. The intra 4x4 prediction mode for each
+ block is stored in 'pMb' structure.
+
+------------------------------------------------------------------------------*/
+u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data,
+ macroblockLayer_t *mbLayer,
+ u8 *ptr, u32 width,
+ u32 constrainedIntraPred, u32 block)
+{
+
+/* Variables */
+ u32 mode;
+ neighbour_t neighbour, neighbourB;
+ mbStorage_t *nMb, *nMb2;
+ u32 availableA, availableB, availableC, availableD;
+
+ OMXResult omxRes;
+ u32 x, y;
+ u8 *l, *a, *al;
+/* Code */
+ ASSERT(pMb);
+ ASSERT(data);
+ ASSERT(mbLayer);
+ ASSERT(ptr);
+ ASSERT(pMb->intra4x4PredMode[block] < 9);
+
+ neighbour = *h264bsdNeighbour4x4BlockA(block);
+ nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+ availableA = h264bsdIsNeighbourAvailable(pMb, nMb);
+ if (availableA && constrainedIntraPred &&
+ ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+ {
+ availableA = HANTRO_FALSE;
+ }
+
+ neighbourB = *h264bsdNeighbour4x4BlockB(block);
+ nMb2 = h264bsdGetNeighbourMb(pMb, neighbourB.mb);
+ availableB = h264bsdIsNeighbourAvailable(pMb, nMb2);
+ if (availableB && constrainedIntraPred &&
+ ( h264bsdMbPartPredMode(nMb2->mbType) == PRED_MODE_INTER) )
+ {
+ availableB = HANTRO_FALSE;
+ }
+
+ mode = DetermineIntra4x4PredMode(mbLayer,
+ (u32)(availableA && availableB),
+ &neighbour, &neighbourB, block, nMb, nMb2);
+ pMb->intra4x4PredMode[block] = (u8)mode;
+
+ neighbour = *h264bsdNeighbour4x4BlockC(block);
+ nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+ availableC = h264bsdIsNeighbourAvailable(pMb, nMb);
+ if (availableC && constrainedIntraPred &&
+ ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+ {
+ availableC = HANTRO_FALSE;
+ }
+
+ neighbour = *h264bsdNeighbour4x4BlockD(block);
+ nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+ availableD = h264bsdIsNeighbourAvailable(pMb, nMb);
+ if (availableD && constrainedIntraPred &&
+ ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+ {
+ availableD = HANTRO_FALSE;
+ }
+
+ x = h264bsdBlockX[block];
+ y = h264bsdBlockY[block];
+
+ if (y == 0)
+ a = ptr - width + x;
+ else
+ a = data-16;
+
+ if (x == 0)
+ l = ptr + y * width -1;
+ else
+ {
+ l = data-1;
+ width = 16;
+ }
+
+ if (x == 0)
+ al = l-width;
+ else
+ al = a-1;
+
+ omxRes = omxVCM4P10_PredictIntra_4x4( l,
+ a,
+ al,
+ data,
+ (i32)width,
+ 16,
+ (OMXVCM4P10Intra4x4PredMode)mode,
+ (i32)(availableB +
+ (availableA<<1) +
+ (availableD<<5) +
+ (availableC<<6)) );
+ if (omxRes != OMX_Sts_NoErr)
+ return HANTRO_NOK;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdIntraChromaPrediction
+
+ Functional description:
+ Perform intra prediction for chroma pixels and add residual
+ into prediction. The resulting chroma pixels are stored in 'data'.
+
+------------------------------------------------------------------------------*/
+u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, image_t *image,
+ u32 predMode, u32 constrainedIntraPred)
+{
+
+/* Variables */
+
+ u32 availableA, availableB, availableD;
+ OMXResult omxRes;
+ u8 *ptr;
+ u32 width;
+
+/* Code */
+ ASSERT(pMb);
+ ASSERT(data);
+ ASSERT(image);
+ ASSERT(predMode < 4);
+
+ availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA);
+ if (availableA && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER))
+ availableA = HANTRO_FALSE;
+ availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB);
+ if (availableB && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER))
+ availableB = HANTRO_FALSE;
+ availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD);
+ if (availableD && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER))
+ availableD = HANTRO_FALSE;
+
+ ptr = image->cb;
+ width = image->width*8;
+
+ omxRes = omxVCM4P10_PredictIntraChroma_8x8( (ptr-1),
+ (ptr - width),
+ (ptr - width -1),
+ data,
+ (i32)width,
+ 8,
+ (OMXVCM4P10IntraChromaPredMode)
+ predMode,
+ (i32)(availableB +
+ (availableA<<1) +
+ (availableD<<5)) );
+ if (omxRes != OMX_Sts_NoErr)
+ return HANTRO_NOK;
+
+ /* advance pointers */
+ data += 64;
+ ptr = image->cr;
+
+ omxRes = omxVCM4P10_PredictIntraChroma_8x8( (ptr-1),
+ (ptr - width),
+ (ptr - width -1),
+ data,
+ (i32)width,
+ 8,
+ (OMXVCM4P10IntraChromaPredMode)
+ predMode,
+ (i32)(availableB +
+ (availableA<<1) +
+ (availableD<<5)) );
+ if (omxRes != OMX_Sts_NoErr)
+ return HANTRO_NOK;
+
+ return(HANTRO_OK);
+
+}
+
+
+#else /* H264DEC_OMXDL */
+
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdIntraPrediction
+
+ Functional description:
+ Processes one intra macroblock. Performs intra prediction using
+ specified prediction mode. Writes the final macroblock
+ (prediction + residual) into the output image (image)
+
+ Inputs:
+ pMb pointer to macroblock specific information
+ mbLayer pointer to current macroblock data from stream
+ image pointer to output image
+ mbNum current macroblock number
+ constrainedIntraPred flag specifying if neighbouring inter
+ macroblocks are used in intra prediction
+ data pointer where output macroblock will be stored
+
+ Outputs:
+ pMb structure is updated with current macroblock
+ image current macroblock is written into image
+ data current macroblock is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK error in intra prediction
+
+------------------------------------------------------------------------------*/
+u32 h264bsdIntraPrediction(mbStorage_t *pMb, macroblockLayer_t *mbLayer,
+ image_t *image, u32 mbNum, u32 constrainedIntraPred, u8 *data)
+{
+
+/* Variables */
+
+ /* pelAbove and pelLeft contain samples above and left to the current
+ * macroblock. Above array contains also sample above-left to the current
+ * mb as well as 4 samples above-right to the current mb (latter only for
+ * luma) */
+ /* lumD + lumB + lumC + cbD + cbB + crD + crB */
+ u8 pelAbove[1 + 16 + 4 + 1 + 8 + 1 + 8];
+ /* lumA + cbA + crA */
+ u8 pelLeft[16 + 8 + 8];
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pMb);
+ ASSERT(image);
+ ASSERT(mbNum < image->width * image->height);
+ ASSERT(h264bsdMbPartPredMode(pMb->mbType) != PRED_MODE_INTER);
+
+ h264bsdGetNeighbourPels(image, pelAbove, pelLeft, mbNum);
+
+ if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA16x16)
+ {
+ tmp = h264bsdIntra16x16Prediction(pMb, data, mbLayer->residual.level,
+ pelAbove, pelLeft, constrainedIntraPred);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+ else
+ {
+ tmp = h264bsdIntra4x4Prediction(pMb, data, mbLayer,
+ pelAbove, pelLeft, constrainedIntraPred);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ tmp = h264bsdIntraChromaPrediction(pMb, data + 256,
+ mbLayer->residual.level+16, pelAbove + 21, pelLeft + 16,
+ mbLayer->mbPred.intraChromaPredMode, constrainedIntraPred);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* if decoded flag > 1 -> mb has already been successfully decoded and
+ * written to output -> do not write again */
+ if (pMb->decoded > 1)
+ return HANTRO_OK;
+
+ h264bsdWriteMacroblock(image, data);
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdGetNeighbourPels
+
+ Functional description:
+ Get pixel values from neighbouring macroblocks into 'above'
+ and 'left' arrays.
+
+------------------------------------------------------------------------------*/
+
+void h264bsdGetNeighbourPels(image_t *image, u8 *above, u8 *left, u32 mbNum)
+{
+
+/* Variables */
+
+ u32 i;
+ u32 width, picSize;
+ u8 *ptr, *tmp;
+ u32 row, col;
+
+/* Code */
+
+ ASSERT(image);
+ ASSERT(above);
+ ASSERT(left);
+ ASSERT(mbNum < image->width * image->height);
+
+ if (!mbNum)
+ return;
+
+ width = image->width;
+ picSize = width * image->height;
+ row = mbNum / width;
+ col = mbNum - row * width;
+
+ width *= 16;
+ ptr = image->data + row * 16 * width + col * 16;
+
+ /* note that luma samples above-right to current macroblock do not make
+ * sense when current mb is the right-most mb in a row. Same applies to
+ * sample above-left if col is zero. However, usage of pels in prediction
+ * is controlled by neighbour availability information in actual prediction
+ * process */
+ if (row)
+ {
+ tmp = ptr - (width + 1);
+ for (i = 21; i--;)
+ *above++ = *tmp++;
+ }
+
+ if (col)
+ {
+ ptr--;
+ for (i = 16; i--; ptr+=width)
+ *left++ = *ptr;
+ }
+
+ width >>= 1;
+ ptr = image->data + picSize * 256 + row * 8 * width + col * 8;
+
+ if (row)
+ {
+ tmp = ptr - (width + 1);
+ for (i = 9; i--;)
+ *above++ = *tmp++;
+ tmp += (picSize * 64) - 9;
+ for (i = 9; i--;)
+ *above++ = *tmp++;
+ }
+
+ if (col)
+ {
+ ptr--;
+ for (i = 8; i--; ptr+=width)
+ *left++ = *ptr;
+ ptr += (picSize * 64) - 8 * width;
+ for (i = 8; i--; ptr+=width)
+ *left++ = *ptr;
+ }
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra16x16Prediction
+
+ Functional description:
+ Perform intra 16x16 prediction mode for luma pixels and add
+ residual into prediction. The resulting luma pixels are
+ stored in macroblock array 'data'.
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, i32 residual[][16],
+ u8 *above, u8 *left, u32 constrainedIntraPred)
+{
+
+/* Variables */
+
+ u32 i;
+ u32 availableA, availableB, availableD;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(residual);
+ ASSERT(above);
+ ASSERT(left);
+ ASSERT(h264bsdPredModeIntra16x16(pMb->mbType) < 4);
+
+ availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA);
+ if (availableA && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER))
+ availableA = HANTRO_FALSE;
+ availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB);
+ if (availableB && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER))
+ availableB = HANTRO_FALSE;
+ availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD);
+ if (availableD && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER))
+ availableD = HANTRO_FALSE;
+
+ switch(h264bsdPredModeIntra16x16(pMb->mbType))
+ {
+ case 0: /* Intra_16x16_Vertical */
+ if (!availableB)
+ return(HANTRO_NOK);
+ Intra16x16VerticalPrediction(data, above+1);
+ break;
+
+ case 1: /* Intra_16x16_Horizontal */
+ if (!availableA)
+ return(HANTRO_NOK);
+ Intra16x16HorizontalPrediction(data, left);
+ break;
+
+ case 2: /* Intra_16x16_DC */
+ Intra16x16DcPrediction(data, above+1, left, availableA, availableB);
+ break;
+
+ default: /* case 3: Intra_16x16_Plane */
+ if (!availableA || !availableB || !availableD)
+ return(HANTRO_NOK);
+ Intra16x16PlanePrediction(data, above+1, left);
+ break;
+ }
+ /* add residual */
+ for (i = 0; i < 16; i++)
+ h264bsdAddResidual(data, residual[i], i);
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra4x4Prediction
+
+ Functional description:
+ Perform intra 4x4 prediction for luma pixels and add residual
+ into prediction. The resulting luma pixels are stored in
+ macroblock array 'data'. The intra 4x4 prediction mode for each
+ block is stored in 'pMb' structure.
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data,
+ macroblockLayer_t *mbLayer, u8 *above,
+ u8 *left, u32 constrainedIntraPred)
+{
+
+/* Variables */
+
+ u32 block;
+ u32 mode;
+ neighbour_t neighbour, neighbourB;
+ mbStorage_t *nMb, *nMb2;
+ u8 a[1 + 4 + 4], l[1 + 4];
+ u32 data4x4[4];
+ u32 availableA, availableB, availableC, availableD;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(mbLayer);
+ ASSERT(above);
+ ASSERT(left);
+
+ for (block = 0; block < 16; block++)
+ {
+
+ ASSERT(pMb->intra4x4PredMode[block] < 9);
+
+ neighbour = *h264bsdNeighbour4x4BlockA(block);
+ nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+ availableA = h264bsdIsNeighbourAvailable(pMb, nMb);
+ if (availableA && constrainedIntraPred &&
+ ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+ {
+ availableA = HANTRO_FALSE;
+ }
+
+ neighbourB = *h264bsdNeighbour4x4BlockB(block);
+ nMb2 = h264bsdGetNeighbourMb(pMb, neighbourB.mb);
+ availableB = h264bsdIsNeighbourAvailable(pMb, nMb2);
+ if (availableB && constrainedIntraPred &&
+ ( h264bsdMbPartPredMode(nMb2->mbType) == PRED_MODE_INTER) )
+ {
+ availableB = HANTRO_FALSE;
+ }
+
+ mode = DetermineIntra4x4PredMode(mbLayer,
+ (u32)(availableA && availableB),
+ &neighbour, &neighbourB, block, nMb, nMb2);
+ pMb->intra4x4PredMode[block] = (u8)mode;
+
+ neighbour = *h264bsdNeighbour4x4BlockC(block);
+ nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+ availableC = h264bsdIsNeighbourAvailable(pMb, nMb);
+ if (availableC && constrainedIntraPred &&
+ ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+ {
+ availableC = HANTRO_FALSE;
+ }
+
+ neighbour = *h264bsdNeighbour4x4BlockD(block);
+ nMb = h264bsdGetNeighbourMb(pMb, neighbour.mb);
+ availableD = h264bsdIsNeighbourAvailable(pMb, nMb);
+ if (availableD && constrainedIntraPred &&
+ ( h264bsdMbPartPredMode(nMb->mbType) == PRED_MODE_INTER) )
+ {
+ availableD = HANTRO_FALSE;
+ }
+
+ Get4x4NeighbourPels(a, l, data, above, left, block);
+
+ switch(mode)
+ {
+ case 0: /* Intra_4x4_Vertical */
+ if (!availableB)
+ return(HANTRO_NOK);
+ Intra4x4VerticalPrediction((u8*)data4x4, a + 1);
+ break;
+ case 1: /* Intra_4x4_Horizontal */
+ if (!availableA)
+ return(HANTRO_NOK);
+ Intra4x4HorizontalPrediction((u8*)data4x4, l + 1);
+ break;
+ case 2: /* Intra_4x4_DC */
+ Intra4x4DcPrediction((u8*)data4x4, a + 1, l + 1,
+ availableA, availableB);
+ break;
+ case 3: /* Intra_4x4_Diagonal_Down_Left */
+ if (!availableB)
+ return(HANTRO_NOK);
+ if (!availableC)
+ {
+ a[5] = a[6] = a[7] = a[8] = a[4];
+ }
+ Intra4x4DiagonalDownLeftPrediction((u8*)data4x4, a + 1);
+ break;
+ case 4: /* Intra_4x4_Diagonal_Down_Right */
+ if (!availableA || !availableB || !availableD)
+ return(HANTRO_NOK);
+ Intra4x4DiagonalDownRightPrediction((u8*)data4x4, a + 1, l + 1);
+ break;
+ case 5: /* Intra_4x4_Vertical_Right */
+ if (!availableA || !availableB || !availableD)
+ return(HANTRO_NOK);
+ Intra4x4VerticalRightPrediction((u8*)data4x4, a + 1, l + 1);
+ break;
+ case 6: /* Intra_4x4_Horizontal_Down */
+ if (!availableA || !availableB || !availableD)
+ return(HANTRO_NOK);
+ Intra4x4HorizontalDownPrediction((u8*)data4x4, a + 1, l + 1);
+ break;
+ case 7: /* Intra_4x4_Vertical_Left */
+ if (!availableB)
+ return(HANTRO_NOK);
+ if (!availableC)
+ {
+ a[5] = a[6] = a[7] = a[8] = a[4];
+ }
+ Intra4x4VerticalLeftPrediction((u8*)data4x4, a + 1);
+ break;
+ default: /* case 8 Intra_4x4_Horizontal_Up */
+ if (!availableA)
+ return(HANTRO_NOK);
+ Intra4x4HorizontalUpPrediction((u8*)data4x4, l + 1);
+ break;
+ }
+
+ Write4x4To16x16(data, (u8*)data4x4, block);
+ h264bsdAddResidual(data, mbLayer->residual.level[block], block);
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: IntraChromaPrediction
+
+ Functional description:
+ Perform intra prediction for chroma pixels and add residual
+ into prediction. The resulting chroma pixels are stored in 'data'.
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, i32 residual[][16],
+ u8 *above, u8 *left, u32 predMode, u32 constrainedIntraPred)
+{
+
+/* Variables */
+
+ u32 i, comp, block;
+ u32 availableA, availableB, availableD;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(residual);
+ ASSERT(above);
+ ASSERT(left);
+ ASSERT(predMode < 4);
+
+ availableA = h264bsdIsNeighbourAvailable(pMb, pMb->mbA);
+ if (availableA && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbA->mbType) == PRED_MODE_INTER))
+ availableA = HANTRO_FALSE;
+ availableB = h264bsdIsNeighbourAvailable(pMb, pMb->mbB);
+ if (availableB && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbB->mbType) == PRED_MODE_INTER))
+ availableB = HANTRO_FALSE;
+ availableD = h264bsdIsNeighbourAvailable(pMb, pMb->mbD);
+ if (availableD && constrainedIntraPred &&
+ (h264bsdMbPartPredMode(pMb->mbD->mbType) == PRED_MODE_INTER))
+ availableD = HANTRO_FALSE;
+
+ for (comp = 0, block = 16; comp < 2; comp++)
+ {
+ switch(predMode)
+ {
+ case 0: /* Intra_Chroma_DC */
+ IntraChromaDcPrediction(data, above+1, left, availableA,
+ availableB);
+ break;
+
+ case 1: /* Intra_Chroma_Horizontal */
+ if (!availableA)
+ return(HANTRO_NOK);
+ IntraChromaHorizontalPrediction(data, left);
+ break;
+
+ case 2: /* Intra_Chroma_Vertical */
+ if (!availableB)
+ return(HANTRO_NOK);
+ IntraChromaVerticalPrediction(data, above+1);
+
+ break;
+
+ default: /* case 3: Intra_Chroma_Plane */
+ if (!availableA || !availableB || !availableD)
+ return(HANTRO_NOK);
+ IntraChromaPlanePrediction(data, above+1, left);
+ break;
+ }
+ for (i = 0; i < 4; i++, block++)
+ h264bsdAddResidual(data, residual[i], block);
+
+ /* advance pointers */
+ data += 64;
+ above += 9;
+ left += 8;
+ residual += 4;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdAddResidual
+
+ Functional description:
+ Add residual of a block into prediction in macroblock array 'data'.
+ The result (residual + prediction) is stored in 'data'.
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_OMXDL
+void h264bsdAddResidual(u8 *data, i32 *residual, u32 blockNum)
+{
+
+/* Variables */
+
+ u32 i;
+ u32 x, y;
+ u32 width;
+ i32 tmp1, tmp2, tmp3, tmp4;
+ u8 *tmp;
+ const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(residual);
+ ASSERT(blockNum < 16 + 4 + 4);
+
+ if (IS_RESIDUAL_EMPTY(residual))
+ return;
+
+ RANGE_CHECK_ARRAY(residual, -512, 511, 16);
+
+ if (blockNum < 16)
+ {
+ width = 16;
+ x = h264bsdBlockX[blockNum];
+ y = h264bsdBlockY[blockNum];
+ }
+ else
+ {
+ width = 8;
+ x = h264bsdBlockX[blockNum & 0x3];
+ y = h264bsdBlockY[blockNum & 0x3];
+ }
+
+ tmp = data + y*width + x;
+ for (i = 4; i; i--)
+ {
+ tmp1 = *residual++;
+ tmp2 = tmp[0];
+ tmp3 = *residual++;
+ tmp4 = tmp[1];
+
+ tmp[0] = clp[tmp1 + tmp2];
+
+ tmp1 = *residual++;
+ tmp2 = tmp[2];
+
+ tmp[1] = clp[tmp3 + tmp4];
+
+ tmp3 = *residual++;
+ tmp4 = tmp[3];
+
+ tmp1 = clp[tmp1 + tmp2];
+ tmp3 = clp[tmp3 + tmp4];
+ tmp[2] = (u8)tmp1;
+ tmp[3] = (u8)tmp3;
+
+ tmp += width;
+ }
+
+}
+#endif
+/*------------------------------------------------------------------------------
+
+ Function: Intra16x16VerticalPrediction
+
+ Functional description:
+ Perform intra 16x16 vertical prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra16x16VerticalPrediction(u8 *data, u8 *above)
+{
+
+/* Variables */
+
+ u32 i, j;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ {
+ *data++ = above[j];
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra16x16HorizontalPrediction
+
+ Functional description:
+ Perform intra 16x16 horizontal prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra16x16HorizontalPrediction(u8 *data, u8 *left)
+{
+
+/* Variables */
+
+ u32 i, j;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(left);
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ {
+ *data++ = left[i];
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra16x16DcPrediction
+
+ Functional description:
+ Perform intra 16x16 DC prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra16x16DcPrediction(u8 *data, u8 *above, u8 *left, u32 availableA,
+ u32 availableB)
+{
+
+/* Variables */
+
+ u32 i, tmp;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+ ASSERT(left);
+
+ if (availableA && availableB)
+ {
+ for (i = 0, tmp = 0; i < 16; i++)
+ tmp += above[i] + left[i];
+ tmp = (tmp + 16) >> 5;
+ }
+ else if (availableA)
+ {
+ for (i = 0, tmp = 0; i < 16; i++)
+ tmp += left[i];
+ tmp = (tmp + 8) >> 4;
+ }
+ else if (availableB)
+ {
+ for (i = 0, tmp = 0; i < 16; i++)
+ tmp += above[i];
+ tmp = (tmp + 8) >> 4;
+ }
+ /* neither A nor B available */
+ else
+ {
+ tmp = 128;
+ }
+ for (i = 0; i < 256; i++)
+ data[i] = (u8)tmp;
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra16x16PlanePrediction
+
+ Functional description:
+ Perform intra 16x16 plane prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra16x16PlanePrediction(u8 *data, u8 *above, u8 *left)
+{
+
+/* Variables */
+
+ u32 i, j;
+ i32 a, b, c;
+ i32 tmp;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+ ASSERT(left);
+
+ a = 16 * (above[15] + left[15]);
+
+ for (i = 0, b = 0; i < 8; i++)
+ b += ((i32)i + 1) * (above[8+i] - above[6-i]);
+ b = (5 * b + 32) >> 6;
+
+ for (i = 0, c = 0; i < 7; i++)
+ c += ((i32)i + 1) * (left[8+i] - left[6-i]);
+ /* p[-1,-1] has to be accessed through above pointer */
+ c += ((i32)i + 1) * (left[8+i] - above[-1]);
+ c = (5 * c + 32) >> 6;
+
+ for (i = 0; i < 16; i++)
+ {
+ for (j = 0; j < 16; j++)
+ {
+ tmp = (a + b * ((i32)j - 7) + c * ((i32)i - 7) + 16) >> 5;
+ data[i*16+j] = (u8)CLIP1(tmp);
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: IntraChromaDcPrediction
+
+ Functional description:
+ Perform intra chroma DC prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void IntraChromaDcPrediction(u8 *data, u8 *above, u8 *left, u32 availableA,
+ u32 availableB)
+{
+
+/* Variables */
+
+ u32 i;
+ u32 tmp1, tmp2;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+ ASSERT(left);
+
+ /* y = 0..3 */
+ if (availableA && availableB)
+ {
+ tmp1 = above[0] + above[1] + above[2] + above[3] +
+ left[0] + left[1] + left[2] + left[3];
+ tmp1 = (tmp1 + 4) >> 3;
+ tmp2 = (above[4] + above[5] + above[6] + above[7] + 2) >> 2;
+ }
+ else if (availableB)
+ {
+ tmp1 = (above[0] + above[1] + above[2] + above[3] + 2) >> 2;
+ tmp2 = (above[4] + above[5] + above[6] + above[7] + 2) >> 2;
+ }
+ else if (availableA)
+ {
+ tmp1 = (left[0] + left[1] + left[2] + left[3] + 2) >> 2;
+ tmp2 = tmp1;
+ }
+ /* neither A nor B available */
+ else
+ {
+ tmp1 = tmp2 = 128;
+ }
+
+ ASSERT(tmp1 < 256 && tmp2 < 256);
+ for (i = 4; i--;)
+ {
+ *data++ = (u8)tmp1;
+ *data++ = (u8)tmp1;
+ *data++ = (u8)tmp1;
+ *data++ = (u8)tmp1;
+ *data++ = (u8)tmp2;
+ *data++ = (u8)tmp2;
+ *data++ = (u8)tmp2;
+ *data++ = (u8)tmp2;
+ }
+
+ /* y = 4...7 */
+ if (availableA)
+ {
+ tmp1 = (left[4] + left[5] + left[6] + left[7] + 2) >> 2;
+ if (availableB)
+ {
+ tmp2 = above[4] + above[5] + above[6] + above[7] +
+ left[4] + left[5] + left[6] + left[7];
+ tmp2 = (tmp2 + 4) >> 3;
+ }
+ else
+ tmp2 = tmp1;
+ }
+ else if (availableB)
+ {
+ tmp1 = (above[0] + above[1] + above[2] + above[3] + 2) >> 2;
+ tmp2 = (above[4] + above[5] + above[6] + above[7] + 2) >> 2;
+ }
+ else
+ {
+ tmp1 = tmp2 = 128;
+ }
+
+ ASSERT(tmp1 < 256 && tmp2 < 256);
+ for (i = 4; i--;)
+ {
+ *data++ = (u8)tmp1;
+ *data++ = (u8)tmp1;
+ *data++ = (u8)tmp1;
+ *data++ = (u8)tmp1;
+ *data++ = (u8)tmp2;
+ *data++ = (u8)tmp2;
+ *data++ = (u8)tmp2;
+ *data++ = (u8)tmp2;
+ }
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: IntraChromaHorizontalPrediction
+
+ Functional description:
+ Perform intra chroma horizontal prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void IntraChromaHorizontalPrediction(u8 *data, u8 *left)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(left);
+
+ for (i = 8; i--;)
+ {
+ *data++ = *left;
+ *data++ = *left;
+ *data++ = *left;
+ *data++ = *left;
+ *data++ = *left;
+ *data++ = *left;
+ *data++ = *left;
+ *data++ = *left++;
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: IntraChromaVerticalPrediction
+
+ Functional description:
+ Perform intra chroma vertical prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void IntraChromaVerticalPrediction(u8 *data, u8 *above)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+
+ for (i = 8; i--;data++/*above-=8*/)
+ {
+ data[0] = *above;
+ data[8] = *above;
+ data[16] = *above;
+ data[24] = *above;
+ data[32] = *above;
+ data[40] = *above;
+ data[48] = *above;
+ data[56] = *above++;
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: IntraChromaPlanePrediction
+
+ Functional description:
+ Perform intra chroma plane prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void IntraChromaPlanePrediction(u8 *data, u8 *above, u8 *left)
+{
+
+/* Variables */
+
+ u32 i;
+ i32 a, b, c;
+ i32 tmp;
+ const u8 *clp = h264bsdClip + 512;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+ ASSERT(left);
+
+ a = 16 * (above[7] + left[7]);
+
+ b = (above[4] - above[2]) + 2 * (above[5] - above[1])
+ + 3 * (above[6] - above[0]) + 4 * (above[7] - above[-1]);
+ b = (17 * b + 16) >> 5;
+
+ /* p[-1,-1] has to be accessed through above pointer */
+ c = (left[4] - left[2]) + 2 * (left[5] - left[1])
+ + 3 * (left[6] - left[0]) + 4 * (left[7] - above[-1]);
+ c = (17 * c + 16) >> 5;
+
+ /*a += 16;*/
+ a = a - 3 * c + 16;
+ for (i = 8; i--; a += c)
+ {
+ tmp = (a - 3 * b);
+ *data++ = clp[tmp>>5];
+ tmp += b;
+ *data++ = clp[tmp>>5];
+ tmp += b;
+ *data++ = clp[tmp>>5];
+ tmp += b;
+ *data++ = clp[tmp>>5];
+ tmp += b;
+ *data++ = clp[tmp>>5];
+ tmp += b;
+ *data++ = clp[tmp>>5];
+ tmp += b;
+ *data++ = clp[tmp>>5];
+ tmp += b;
+ *data++ = clp[tmp>>5];
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Get4x4NeighbourPels
+
+ Functional description:
+ Get neighbouring pixels of a 4x4 block into 'a' and 'l'.
+
+------------------------------------------------------------------------------*/
+
+void Get4x4NeighbourPels(u8 *a, u8 *l, u8 *data, u8 *above, u8 *left,
+ u32 blockNum)
+{
+
+/* Variables */
+
+ u32 x, y;
+ u8 t1, t2;
+
+/* Code */
+
+ ASSERT(a);
+ ASSERT(l);
+ ASSERT(data);
+ ASSERT(above);
+ ASSERT(left);
+ ASSERT(blockNum < 16);
+
+ x = h264bsdBlockX[blockNum];
+ y = h264bsdBlockY[blockNum];
+
+ /* A and D */
+ if (x == 0)
+ {
+ t1 = left[y ];
+ t2 = left[y + 1];
+ l[1] = t1;
+ l[2] = t2;
+ t1 = left[y + 2];
+ t2 = left[y + 3];
+ l[3] = t1;
+ l[4] = t2;
+ }
+ else
+ {
+ t1 = data[y * 16 + x - 1 ];
+ t2 = data[y * 16 + x - 1 + 16];
+ l[1] = t1;
+ l[2] = t2;
+ t1 = data[y * 16 + x - 1 + 32];
+ t2 = data[y * 16 + x - 1 + 48];
+ l[3] = t1;
+ l[4] = t2;
+ }
+
+ /* B, C and D */
+ if (y == 0)
+ {
+ t1 = above[x ];
+ t2 = above[x ];
+ l[0] = t1;
+ a[0] = t2;
+ t1 = above[x + 1];
+ t2 = above[x + 2];
+ a[1] = t1;
+ a[2] = t2;
+ t1 = above[x + 3];
+ t2 = above[x + 4];
+ a[3] = t1;
+ a[4] = t2;
+ t1 = above[x + 5];
+ t2 = above[x + 6];
+ a[5] = t1;
+ a[6] = t2;
+ t1 = above[x + 7];
+ t2 = above[x + 8];
+ a[7] = t1;
+ a[8] = t2;
+ }
+ else
+ {
+ t1 = data[(y - 1) * 16 + x ];
+ t2 = data[(y - 1) * 16 + x + 1];
+ a[1] = t1;
+ a[2] = t2;
+ t1 = data[(y - 1) * 16 + x + 2];
+ t2 = data[(y - 1) * 16 + x + 3];
+ a[3] = t1;
+ a[4] = t2;
+ t1 = data[(y - 1) * 16 + x + 4];
+ t2 = data[(y - 1) * 16 + x + 5];
+ a[5] = t1;
+ a[6] = t2;
+ t1 = data[(y - 1) * 16 + x + 6];
+ t2 = data[(y - 1) * 16 + x + 7];
+ a[7] = t1;
+ a[8] = t2;
+
+ if (x == 0)
+ l[0] = a[0] = left[y-1];
+ else
+ l[0] = a[0] = data[(y - 1) * 16 + x - 1];
+ }
+}
+
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra4x4VerticalPrediction
+
+ Functional description:
+ Perform intra 4x4 vertical prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4VerticalPrediction(u8 *data, u8 *above)
+{
+
+/* Variables */
+
+ u8 t1, t2;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+
+ t1 = above[0];
+ t2 = above[1];
+ data[0] = data[4] = data[8] = data[12] = t1;
+ data[1] = data[5] = data[9] = data[13] = t2;
+ t1 = above[2];
+ t2 = above[3];
+ data[2] = data[6] = data[10] = data[14] = t1;
+ data[3] = data[7] = data[11] = data[15] = t2;
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra4x4HorizontalPrediction
+
+ Functional description:
+ Perform intra 4x4 horizontal prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4HorizontalPrediction(u8 *data, u8 *left)
+{
+
+/* Variables */
+
+ u8 t1, t2;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(left);
+
+ t1 = left[0];
+ t2 = left[1];
+ data[0] = data[1] = data[2] = data[3] = t1;
+ data[4] = data[5] = data[6] = data[7] = t2;
+ t1 = left[2];
+ t2 = left[3];
+ data[8] = data[9] = data[10] = data[11] = t1;
+ data[12] = data[13] = data[14] = data[15] = t2;
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra4x4DcPrediction
+
+ Functional description:
+ Perform intra 4x4 DC prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4DcPrediction(u8 *data, u8 *above, u8 *left, u32 availableA,
+ u32 availableB)
+{
+
+/* Variables */
+
+ u32 tmp;
+ u8 t1, t2, t3, t4;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+ ASSERT(left);
+
+ if (availableA && availableB)
+ {
+ t1 = above[0]; t2 = above[1]; t3 = above[2]; t4 = above[3];
+ tmp = t1 + t2 + t3 + t4;
+ t1 = left[0]; t2 = left[1]; t3 = left[2]; t4 = left[3];
+ tmp += t1 + t2 + t3 + t4;
+ tmp = (tmp + 4) >> 3;
+ }
+ else if (availableA)
+ {
+ t1 = left[0]; t2 = left[1]; t3 = left[2]; t4 = left[3];
+ tmp = (t1 + t2 + t3 + t4 + 2) >> 2;
+ }
+ else if (availableB)
+ {
+ t1 = above[0]; t2 = above[1]; t3 = above[2]; t4 = above[3];
+ tmp = (t1 + t2 + t3 + t4 + 2) >> 2;
+ }
+ else
+ {
+ tmp = 128;
+ }
+
+ ASSERT(tmp < 256);
+ data[0] = data[1] = data[2] = data[3] =
+ data[4] = data[5] = data[6] = data[7] =
+ data[8] = data[9] = data[10] = data[11] =
+ data[12] = data[13] = data[14] = data[15] = (u8)tmp;
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra4x4DiagonalDownLeftPrediction
+
+ Functional description:
+ Perform intra 4x4 diagonal down-left prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4DiagonalDownLeftPrediction(u8 *data, u8 *above)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+
+ data[ 0] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+ data[ 1] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+ data[ 4] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+ data[ 2] = (above[2] + 2 * above[3] + above[4] + 2) >> 2;
+ data[ 5] = (above[2] + 2 * above[3] + above[4] + 2) >> 2;
+ data[ 8] = (above[2] + 2 * above[3] + above[4] + 2) >> 2;
+ data[ 3] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+ data[ 6] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+ data[ 9] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+ data[12] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+ data[ 7] = (above[4] + 2 * above[5] + above[6] + 2) >> 2;
+ data[10] = (above[4] + 2 * above[5] + above[6] + 2) >> 2;
+ data[13] = (above[4] + 2 * above[5] + above[6] + 2) >> 2;
+ data[11] = (above[5] + 2 * above[6] + above[7] + 2) >> 2;
+ data[14] = (above[5] + 2 * above[6] + above[7] + 2) >> 2;
+ data[15] = (above[6] + 3 * above[7] + 2) >> 2;
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra4x4DiagonalDownRightPrediction
+
+ Functional description:
+ Perform intra 4x4 diagonal down-right prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4DiagonalDownRightPrediction(u8 *data, u8 *above, u8 *left)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+ ASSERT(left);
+
+ data[ 0] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+ data[ 5] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+ data[10] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+ data[15] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+ data[ 1] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2;
+ data[ 6] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2;
+ data[11] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2;
+ data[ 2] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+ data[ 7] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+ data[ 3] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+ data[ 4] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2;
+ data[ 9] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2;
+ data[14] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2;
+ data[ 8] = (left[0] + 2 * left[1] + left[2] + 2) >> 2;
+ data[13] = (left[0] + 2 * left[1] + left[2] + 2) >> 2;
+ data[12] = (left[1] + 2 * left[2] + left[3] + 2) >> 2;
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra4x4VerticalRightPrediction
+
+ Functional description:
+ Perform intra 4x4 vertical right prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4VerticalRightPrediction(u8 *data, u8 *above, u8 *left)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+ ASSERT(left);
+
+ data[ 0] = (above[-1] + above[0] + 1) >> 1;
+ data[ 9] = (above[-1] + above[0] + 1) >> 1;
+ data[ 5] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2;
+ data[14] = (above[-1] + 2 * above[0] + above[1] + 2) >> 2;
+ data[ 4] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+ data[13] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+ data[ 1] = (above[0] + above[1] + 1) >> 1;
+ data[10] = (above[0] + above[1] + 1) >> 1;
+ data[ 6] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+ data[15] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+ data[ 2] = (above[1] + above[2] + 1) >> 1;
+ data[11] = (above[1] + above[2] + 1) >> 1;
+ data[ 7] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+ data[ 3] = (above[2] + above[3] + 1) >> 1;
+ data[ 8] = (left[1] + 2 * left[0] + left[-1] + 2) >> 2;
+ data[12] = (left[2] + 2 * left[1] + left[0] + 2) >> 2;
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra4x4HorizontalDownPrediction
+
+ Functional description:
+ Perform intra 4x4 horizontal down prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4HorizontalDownPrediction(u8 *data, u8 *above, u8 *left)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+ ASSERT(left);
+
+ data[ 0] = (left[-1] + left[0] + 1) >> 1;
+ data[ 6] = (left[-1] + left[0] + 1) >> 1;
+ data[ 5] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2;
+ data[11] = (left[-1] + 2 * left[0] + left[1] + 2) >> 2;
+ data[ 4] = (left[0] + left[1] + 1) >> 1;
+ data[10] = (left[0] + left[1] + 1) >> 1;
+ data[ 9] = (left[0] + 2 * left[1] + left[2] + 2) >> 2;
+ data[15] = (left[0] + 2 * left[1] + left[2] + 2) >> 2;
+ data[ 8] = (left[1] + left[2] + 1) >> 1;
+ data[14] = (left[1] + left[2] + 1) >> 1;
+ data[13] = (left[1] + 2 * left[2] + left[3] + 2) >> 2;
+ data[12] = (left[2] + left[3] + 1) >> 1;
+ data[ 1] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+ data[ 7] = (above[0] + 2 * above[-1] + left[0] + 2) >> 2;
+ data[ 2] = (above[1] + 2 * above[0] + above[-1] + 2) >> 2;
+ data[ 3] = (above[2] + 2 * above[1] + above[0] + 2) >> 2;
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra4x4VerticalLeftPrediction
+
+ Functional description:
+ Perform intra 4x4 vertical left prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4VerticalLeftPrediction(u8 *data, u8 *above)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(above);
+
+ data[ 0] = (above[0] + above[1] + 1) >> 1;
+ data[ 1] = (above[1] + above[2] + 1) >> 1;
+ data[ 2] = (above[2] + above[3] + 1) >> 1;
+ data[ 3] = (above[3] + above[4] + 1) >> 1;
+ data[ 4] = (above[0] + 2 * above[1] + above[2] + 2) >> 2;
+ data[ 5] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+ data[ 6] = (above[2] + 2 * above[3] + above[4] + 2) >> 2;
+ data[ 7] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+ data[ 8] = (above[1] + above[2] + 1) >> 1;
+ data[ 9] = (above[2] + above[3] + 1) >> 1;
+ data[10] = (above[3] + above[4] + 1) >> 1;
+ data[11] = (above[4] + above[5] + 1) >> 1;
+ data[12] = (above[1] + 2 * above[2] + above[3] + 2) >> 2;
+ data[13] = (above[2] + 2 * above[3] + above[4] + 2) >> 2;
+ data[14] = (above[3] + 2 * above[4] + above[5] + 2) >> 2;
+ data[15] = (above[4] + 2 * above[5] + above[6] + 2) >> 2;
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: Intra4x4HorizontalUpPrediction
+
+ Functional description:
+ Perform intra 4x4 horizontal up prediction mode.
+
+------------------------------------------------------------------------------*/
+
+void Intra4x4HorizontalUpPrediction(u8 *data, u8 *left)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(left);
+
+ data[ 0] = (left[0] + left[1] + 1) >> 1;
+ data[ 1] = (left[0] + 2 * left[1] + left[2] + 2) >> 2;
+ data[ 2] = (left[1] + left[2] + 1) >> 1;
+ data[ 3] = (left[1] + 2 * left[2] + left[3] + 2) >> 2;
+ data[ 4] = (left[1] + left[2] + 1) >> 1;
+ data[ 5] = (left[1] + 2 * left[2] + left[3] + 2) >> 2;
+ data[ 6] = (left[2] + left[3] + 1) >> 1;
+ data[ 7] = (left[2] + 3 * left[3] + 2) >> 2;
+ data[ 8] = (left[2] + left[3] + 1) >> 1;
+ data[ 9] = (left[2] + 3 * left[3] + 2) >> 2;
+ data[10] = left[3];
+ data[11] = left[3];
+ data[12] = left[3];
+ data[13] = left[3];
+ data[14] = left[3];
+ data[15] = left[3];
+
+}
+
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+ Function: Write4x4To16x16
+
+ Functional description:
+ Write a 4x4 block (data4x4) into correct position
+ in 16x16 macroblock (data).
+
+------------------------------------------------------------------------------*/
+
+void Write4x4To16x16(u8 *data, u8 *data4x4, u32 blockNum)
+{
+
+/* Variables */
+
+ u32 x, y;
+ u32 *in32, *out32;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(data4x4);
+ ASSERT(blockNum < 16);
+
+ x = h264bsdBlockX[blockNum];
+ y = h264bsdBlockY[blockNum];
+
+ data += y*16+x;
+
+ ASSERT(((u32)data&0x3) == 0);
+
+ /*lint --e(826) */
+ out32 = (u32 *)data;
+ /*lint --e(826) */
+ in32 = (u32 *)data4x4;
+
+ out32[0] = *in32++;
+ out32[4] = *in32++;
+ out32[8] = *in32++;
+ out32[12] = *in32++;
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DetermineIntra4x4PredMode
+
+ Functional description:
+ Returns the intra 4x4 prediction mode of a block based on the
+ neighbouring macroblocks and information parsed from stream.
+
+------------------------------------------------------------------------------*/
+
+u32 DetermineIntra4x4PredMode(macroblockLayer_t *pMbLayer,
+ u32 available, neighbour_t *nA, neighbour_t *nB, u32 index,
+ mbStorage_t *nMbA, mbStorage_t *nMbB)
+{
+
+/* Variables */
+
+ u32 mode1, mode2;
+ mbStorage_t *pMb;
+
+/* Code */
+
+ ASSERT(pMbLayer);
+
+ /* dc only prediction? */
+ if (!available)
+ mode1 = 2;
+ else
+ {
+ pMb = nMbA;
+ if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA4x4)
+ {
+ mode1 = pMb->intra4x4PredMode[nA->index];
+ }
+ else
+ mode1 = 2;
+
+ pMb = nMbB;
+ if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA4x4)
+ {
+ mode2 = pMb->intra4x4PredMode[nB->index];
+ }
+ else
+ mode2 = 2;
+
+ mode1 = MIN(mode1, mode2);
+ }
+
+ if (!pMbLayer->mbPred.prevIntra4x4PredModeFlag[index])
+ {
+ if (pMbLayer->mbPred.remIntra4x4PredMode[index] < mode1)
+ {
+ mode1 = pMbLayer->mbPred.remIntra4x4PredMode[index];
+ }
+ else
+ {
+ mode1 = pMbLayer->mbPred.remIntra4x4PredMode[index] + 1;
+ }
+ }
+
+ return(mode1);
+}
+
+
+/*lint +e702 */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.h
new file mode 100755
index 0000000..4652bd5
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_intra_prediction.h
@@ -0,0 +1,81 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_INTRA_PREDICTION_H
+#define H264SWDEC_INTRA_PREDICTION_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_image.h"
+#include "h264bsd_macroblock_layer.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_OMXDL
+u32 h264bsdIntraPrediction(mbStorage_t *pMb, macroblockLayer_t *mbLayer,
+ image_t *image, u32 mbNum, u32 constrainedIntraPred, u8 *data);
+
+u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data,
+ macroblockLayer_t *mbLayer,
+ u8 *above, u8 *left, u32 constrainedIntraPred);
+u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, i32 residual[][16],
+ u8 *above, u8 *left, u32 constrainedIntraPred);
+
+u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, i32 residual[][16],
+ u8 *above, u8 *left, u32 predMode, u32 constrainedIntraPred);
+
+void h264bsdGetNeighbourPels(image_t *image, u8 *above, u8 *left, u32 mbNum);
+
+#else
+
+u32 h264bsdIntra4x4Prediction(mbStorage_t *pMb, u8 *data,
+ macroblockLayer_t *mbLayer,
+ u8 *pImage, u32 width,
+ u32 constrainedIntraPred, u32 block);
+
+u32 h264bsdIntra16x16Prediction(mbStorage_t *pMb, u8 *data, u8 *pImage,
+ u32 width, u32 constrainedIntraPred);
+
+u32 h264bsdIntraChromaPrediction(mbStorage_t *pMb, u8 *data, image_t *image,
+ u32 predMode, u32 constrainedIntraPred);
+
+#endif
+
+#endif /* #ifdef H264SWDEC_INTRA_PREDICTION_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.c
new file mode 100755
index 0000000..2b3e7f0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.c
@@ -0,0 +1,1446 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdDecodeMacroblockLayer
+ h264bsdMbPartPredMode
+ h264bsdNumMbPart
+ h264bsdNumSubMbPart
+ DecodeMbPred
+ DecodeSubMbPred
+ DecodeResidual
+ DetermineNc
+ CbpIntra16x16
+ h264bsdPredModeIntra16x16
+ h264bsdDecodeMacroblock
+ ProcessResidual
+ h264bsdSubMbPartMode
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_util.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_cavlc.h"
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_neighbour.h"
+#include "h264bsd_transform.h"
+#include "h264bsd_intra_prediction.h"
+#include "h264bsd_inter_prediction.h"
+
+#ifdef H264DEC_OMXDL
+#include "omxtypes.h"
+#include "omxVC.h"
+#include "armVC.h"
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+#ifdef H264DEC_OMXDL
+static const u32 chromaIndex[8] = { 256, 260, 288, 292, 320, 324, 352, 356 };
+static const u32 lumaIndex[16] = { 0, 4, 64, 68,
+ 8, 12, 72, 76,
+ 128, 132, 192, 196,
+ 136, 140, 200, 204 };
+#endif
+/* mapping of dc coefficients array to luma blocks */
+static const u32 dcCoeffIndex[16] =
+ {0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15};
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 DecodeMbPred(strmData_t *pStrmData, mbPred_t *pMbPred,
+ mbType_e mbType, u32 numRefIdxActive);
+static u32 DecodeSubMbPred(strmData_t *pStrmData, subMbPred_t *pSubMbPred,
+ mbType_e mbType, u32 numRefIdxActive);
+static u32 DecodeResidual(strmData_t *pStrmData, residual_t *pResidual,
+ mbStorage_t *pMb, mbType_e mbType, u32 codedBlockPattern);
+
+#ifdef H264DEC_OMXDL
+static u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, u8 *pTotalCoeff);
+#else
+static u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, i16 *pTotalCoeff);
+#endif
+
+static u32 CbpIntra16x16(mbType_e mbType);
+#ifdef H264DEC_OMXDL
+static u32 ProcessIntra4x4Residual(mbStorage_t *pMb, u8 *data, u32 constrainedIntraPred,
+ macroblockLayer_t *mbLayer, const u8 **pSrc, image_t *image);
+static u32 ProcessChromaResidual(mbStorage_t *pMb, u8 *data, const u8 **pSrc );
+static u32 ProcessIntra16x16Residual(mbStorage_t *pMb, u8 *data, u32 constrainedIntraPred,
+ u32 intraChromaPredMode, const u8 **pSrc, image_t *image);
+
+
+#else
+static u32 ProcessResidual(mbStorage_t *pMb, i32 residualLevel[][16], u32 *);
+#endif
+
+/*------------------------------------------------------------------------------
+
+ Function name: h264bsdDecodeMacroblockLayer
+
+ Functional description:
+ Parse macroblock specific information from bit stream.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ pMb pointer to macroblock storage structure
+ sliceType type of the current slice
+ numRefIdxActive maximum reference index
+
+ Outputs:
+ pMbLayer stores the macroblock data parsed from stream
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK end of stream or error in stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeMacroblockLayer(strmData_t *pStrmData,
+ macroblockLayer_t *pMbLayer, mbStorage_t *pMb, u32 sliceType,
+ u32 numRefIdxActive)
+{
+
+/* Variables */
+
+ u32 tmp, i, value;
+ i32 itmp;
+ mbPartPredMode_e partMode;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pMbLayer);
+
+#ifdef H264DEC_NEON
+ h264bsdClearMbLayer(pMbLayer, ((sizeof(macroblockLayer_t) + 63) & ~0x3F));
+#else
+ H264SwDecMemset(pMbLayer, 0, sizeof(macroblockLayer_t));
+#endif
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+
+ if (IS_I_SLICE(sliceType))
+ {
+ if ((value + 6) > 31 || tmp != HANTRO_OK)
+ return(HANTRO_NOK);
+ pMbLayer->mbType = (mbType_e)(value + 6);
+ }
+ else
+ {
+ if ((value + 1) > 31 || tmp != HANTRO_OK)
+ return(HANTRO_NOK);
+ pMbLayer->mbType = (mbType_e)(value + 1);
+ }
+
+ if (pMbLayer->mbType == I_PCM)
+ {
+ i32 *level;
+ while( !h264bsdIsByteAligned(pStrmData) )
+ {
+ /* pcm_alignment_zero_bit */
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp)
+ return(HANTRO_NOK);
+ }
+
+ level = pMbLayer->residual.level[0];
+ for (i = 0; i < 384; i++)
+ {
+ value = h264bsdGetBits(pStrmData, 8);
+ if (value == END_OF_STREAM)
+ return(HANTRO_NOK);
+ *level++ = (i32)value;
+ }
+ }
+ else
+ {
+ partMode = h264bsdMbPartPredMode(pMbLayer->mbType);
+ if ( (partMode == PRED_MODE_INTER) &&
+ (h264bsdNumMbPart(pMbLayer->mbType) == 4) )
+ {
+ tmp = DecodeSubMbPred(pStrmData, &pMbLayer->subMbPred,
+ pMbLayer->mbType, numRefIdxActive);
+ }
+ else
+ {
+ tmp = DecodeMbPred(pStrmData, &pMbLayer->mbPred,
+ pMbLayer->mbType, numRefIdxActive);
+ }
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ if (partMode != PRED_MODE_INTRA16x16)
+ {
+ tmp = h264bsdDecodeExpGolombMapped(pStrmData, &value,
+ (u32)(partMode == PRED_MODE_INTRA4x4));
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pMbLayer->codedBlockPattern = value;
+ }
+ else
+ {
+ pMbLayer->codedBlockPattern = CbpIntra16x16(pMbLayer->mbType);
+ }
+
+ if ( pMbLayer->codedBlockPattern ||
+ (partMode == PRED_MODE_INTRA16x16) )
+ {
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK || (itmp < -26) || (itmp > 25) )
+ return(HANTRO_NOK);
+ pMbLayer->mbQpDelta = itmp;
+
+ tmp = DecodeResidual(pStrmData, &pMbLayer->residual, pMb,
+ pMbLayer->mbType, pMbLayer->codedBlockPattern);
+
+ pStrmData->strmBuffReadBits =
+ (u32)(pStrmData->pStrmCurrPos - pStrmData->pStrmBuffStart) * 8 +
+ pStrmData->bitPosInWord;
+
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdMbPartPredMode
+
+ Functional description:
+ Returns the prediction mode of a macroblock type
+
+------------------------------------------------------------------------------*/
+
+mbPartPredMode_e h264bsdMbPartPredMode(mbType_e mbType)
+{
+
+/* Variables */
+
+
+/* Code */
+
+ ASSERT(mbType <= 31);
+
+ if ((mbType <= P_8x8ref0))
+ return(PRED_MODE_INTER);
+ else if (mbType == I_4x4)
+ return(PRED_MODE_INTRA4x4);
+ else
+ return(PRED_MODE_INTRA16x16);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdNumMbPart
+
+ Functional description:
+ Returns the amount of macroblock partitions in a macroblock type
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdNumMbPart(mbType_e mbType)
+{
+
+/* Variables */
+
+
+/* Code */
+
+ ASSERT(h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER);
+
+ switch (mbType)
+ {
+ case P_L0_16x16:
+ case P_Skip:
+ return(1);
+
+ case P_L0_L0_16x8:
+ case P_L0_L0_8x16:
+ return(2);
+
+ /* P_8x8 or P_8x8ref0 */
+ default:
+ return(4);
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdNumSubMbPart
+
+ Functional description:
+ Returns the amount of sub-partitions in a sub-macroblock type
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdNumSubMbPart(subMbType_e subMbType)
+{
+
+/* Variables */
+
+
+/* Code */
+
+ ASSERT(subMbType <= P_L0_4x4);
+
+ switch (subMbType)
+ {
+ case P_L0_8x8:
+ return(1);
+
+ case P_L0_8x4:
+ case P_L0_4x8:
+ return(2);
+
+ /* P_L0_4x4 */
+ default:
+ return(4);
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeMbPred
+
+ Functional description:
+ Parse macroblock prediction information from bit stream and store
+ in 'pMbPred'.
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeMbPred(strmData_t *pStrmData, mbPred_t *pMbPred, mbType_e mbType,
+ u32 numRefIdxActive)
+{
+
+/* Variables */
+
+ u32 tmp, i, j, value;
+ i32 itmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pMbPred);
+
+ switch (h264bsdMbPartPredMode(mbType))
+ {
+ case PRED_MODE_INTER: /* PRED_MODE_INTER */
+ if (numRefIdxActive > 1)
+ {
+ for (i = h264bsdNumMbPart(mbType), j = 0; i--; j++)
+ {
+ tmp = h264bsdDecodeExpGolombTruncated(pStrmData, &value,
+ (u32)(numRefIdxActive > 2));
+ if (tmp != HANTRO_OK || value >= numRefIdxActive)
+ return(HANTRO_NOK);
+
+ pMbPred->refIdxL0[j] = value;
+ }
+ }
+
+ for (i = h264bsdNumMbPart(mbType), j = 0; i--; j++)
+ {
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pMbPred->mvdL0[j].hor = (i16)itmp;
+
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pMbPred->mvdL0[j].ver = (i16)itmp;
+ }
+ break;
+
+ case PRED_MODE_INTRA4x4:
+ for (itmp = 0, i = 0; itmp < 2; itmp++)
+ {
+ value = h264bsdShowBits32(pStrmData);
+ tmp = 0;
+ for (j = 8; j--; i++)
+ {
+ pMbPred->prevIntra4x4PredModeFlag[i] =
+ value & 0x80000000 ? HANTRO_TRUE : HANTRO_FALSE;
+ value <<= 1;
+ if (!pMbPred->prevIntra4x4PredModeFlag[i])
+ {
+ pMbPred->remIntra4x4PredMode[i] = value>>29;
+ value <<= 3;
+ tmp++;
+ }
+ }
+ if (h264bsdFlushBits(pStrmData, 8 + 3*tmp) == END_OF_STREAM)
+ return(HANTRO_NOK);
+ }
+ /* fall-through */
+
+ case PRED_MODE_INTRA16x16:
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK || value > 3)
+ return(HANTRO_NOK);
+ pMbPred->intraChromaPredMode = value;
+ break;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeSubMbPred
+
+ Functional description:
+ Parse sub-macroblock prediction information from bit stream and
+ store in 'pMbPred'.
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeSubMbPred(strmData_t *pStrmData, subMbPred_t *pSubMbPred,
+ mbType_e mbType, u32 numRefIdxActive)
+{
+
+/* Variables */
+
+ u32 tmp, i, j, value;
+ i32 itmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSubMbPred);
+ ASSERT(h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER);
+
+ for (i = 0; i < 4; i++)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK || value > 3)
+ return(HANTRO_NOK);
+ pSubMbPred->subMbType[i] = (subMbType_e)value;
+ }
+
+ if ( (numRefIdxActive > 1) && (mbType != P_8x8ref0) )
+ {
+ for (i = 0; i < 4; i++)
+ {
+ tmp = h264bsdDecodeExpGolombTruncated(pStrmData, &value,
+ (u32)(numRefIdxActive > 2));
+ if (tmp != HANTRO_OK || value >= numRefIdxActive)
+ return(HANTRO_NOK);
+ pSubMbPred->refIdxL0[i] = value;
+ }
+ }
+
+ for (i = 0; i < 4; i++)
+ {
+ j = 0;
+ for (value = h264bsdNumSubMbPart(pSubMbPred->subMbType[i]);
+ value--; j++)
+ {
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSubMbPred->mvdL0[i][j].hor = (i16)itmp;
+
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSubMbPred->mvdL0[i][j].ver = (i16)itmp;
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+
+#ifdef H264DEC_OMXDL
+/*------------------------------------------------------------------------------
+
+ Function: DecodeResidual
+
+ Functional description:
+ Parse residual information from bit stream and store in 'pResidual'.
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeResidual(strmData_t *pStrmData, residual_t *pResidual,
+ mbStorage_t *pMb, mbType_e mbType, u32 codedBlockPattern)
+{
+
+/* Variables */
+
+ u32 i, j;
+ u32 blockCoded;
+ u32 blockIndex;
+ u32 is16x16;
+ OMX_INT nc;
+ OMXResult omxRes;
+ OMX_U8 *pPosCoefBuf;
+
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pResidual);
+
+ pPosCoefBuf = pResidual->posCoefBuf;
+
+ /* luma DC is at index 24 */
+ if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA16x16)
+ {
+ nc = (OMX_INT)DetermineNc(pMb, 0, pResidual->totalCoeff);
+#ifndef H264DEC_NEON
+ omxRes = omxVCM4P10_DecodeCoeffsToPairCAVLC(
+ (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[24],
+ &pPosCoefBuf,
+ nc,
+ 16);
+#else
+ omxRes = armVCM4P10_DecodeCoeffsToPair(
+ (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[24],
+ &pPosCoefBuf,
+ nc,
+ 16);
+#endif
+ if (omxRes != OMX_Sts_NoErr)
+ return(HANTRO_NOK);
+ is16x16 = HANTRO_TRUE;
+ }
+ else
+ is16x16 = HANTRO_FALSE;
+
+ for (i = 4, blockIndex = 0; i--;)
+ {
+ /* luma cbp in bits 0-3 */
+ blockCoded = codedBlockPattern & 0x1;
+ codedBlockPattern >>= 1;
+ if (blockCoded)
+ {
+ for (j = 4; j--; blockIndex++)
+ {
+ nc = (OMX_INT)DetermineNc(pMb,blockIndex,pResidual->totalCoeff);
+ if (is16x16)
+ {
+#ifndef H264DEC_NEON
+ omxRes = omxVCM4P10_DecodeCoeffsToPairCAVLC(
+ (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[blockIndex],
+ &pPosCoefBuf,
+ nc,
+ 15);
+#else
+ omxRes = armVCM4P10_DecodeCoeffsToPair(
+ (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[blockIndex],
+ &pPosCoefBuf,
+ nc,
+ 15);
+#endif
+ }
+ else
+ {
+#ifndef H264DEC_NEON
+ omxRes = omxVCM4P10_DecodeCoeffsToPairCAVLC(
+ (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[blockIndex],
+ &pPosCoefBuf,
+ nc,
+ 16);
+#else
+ omxRes = armVCM4P10_DecodeCoeffsToPair(
+ (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[blockIndex],
+ &pPosCoefBuf,
+ nc,
+ 16);
+#endif
+ }
+ if (omxRes != OMX_Sts_NoErr)
+ return(HANTRO_NOK);
+ }
+ }
+ else
+ blockIndex += 4;
+ }
+
+ /* chroma DC block are at indices 25 and 26 */
+ blockCoded = codedBlockPattern & 0x3;
+ if (blockCoded)
+ {
+#ifndef H264DEC_NEON
+ omxRes = omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC(
+ (const OMX_U8**) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[25],
+ &pPosCoefBuf);
+#else
+ omxRes = armVCM4P10_DecodeCoeffsToPair(
+ (const OMX_U8**) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[25],
+ &pPosCoefBuf,
+ 17,
+ 4);
+#endif
+ if (omxRes != OMX_Sts_NoErr)
+ return(HANTRO_NOK);
+#ifndef H264DEC_NEON
+ omxRes = omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC(
+ (const OMX_U8**) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[26],
+ &pPosCoefBuf);
+#else
+ omxRes = armVCM4P10_DecodeCoeffsToPair(
+ (const OMX_U8**) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[26],
+ &pPosCoefBuf,
+ 17,
+ 4);
+#endif
+ if (omxRes != OMX_Sts_NoErr)
+ return(HANTRO_NOK);
+ }
+
+ /* chroma AC */
+ blockCoded = codedBlockPattern & 0x2;
+ if (blockCoded)
+ {
+ for (i = 8; i--;blockIndex++)
+ {
+ nc = (OMX_INT)DetermineNc(pMb, blockIndex, pResidual->totalCoeff);
+#ifndef H264DEC_NEON
+ omxRes = omxVCM4P10_DecodeCoeffsToPairCAVLC(
+ (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[blockIndex],
+ &pPosCoefBuf,
+ nc,
+ 15);
+#else
+ omxRes = armVCM4P10_DecodeCoeffsToPair(
+ (const OMX_U8 **) (&pStrmData->pStrmCurrPos),
+ (OMX_S32*) (&pStrmData->bitPosInWord),
+ &pResidual->totalCoeff[blockIndex],
+ &pPosCoefBuf,
+ nc,
+ 15);
+#endif
+ if (omxRes != OMX_Sts_NoErr)
+ return(HANTRO_NOK);
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+
+#else
+/*------------------------------------------------------------------------------
+
+ Function: DecodeResidual
+
+ Functional description:
+ Parse residual information from bit stream and store in 'pResidual'.
+
+------------------------------------------------------------------------------*/
+
+u32 DecodeResidual(strmData_t *pStrmData, residual_t *pResidual,
+ mbStorage_t *pMb, mbType_e mbType, u32 codedBlockPattern)
+{
+
+/* Variables */
+
+ u32 i, j, tmp;
+ i32 nc;
+ u32 blockCoded;
+ u32 blockIndex;
+ u32 is16x16;
+ i32 (*level)[16];
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pResidual);
+
+ level = pResidual->level;
+
+ /* luma DC is at index 24 */
+ if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA16x16)
+ {
+ nc = (i32)DetermineNc(pMb, 0, pResidual->totalCoeff);
+ tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, level[24], nc, 16);
+ if ((tmp & 0xF) != HANTRO_OK)
+ return(tmp);
+ pResidual->totalCoeff[24] = (tmp >> 4) & 0xFF;
+ is16x16 = HANTRO_TRUE;
+ }
+ else
+ is16x16 = HANTRO_FALSE;
+
+ for (i = 4, blockIndex = 0; i--;)
+ {
+ /* luma cbp in bits 0-3 */
+ blockCoded = codedBlockPattern & 0x1;
+ codedBlockPattern >>= 1;
+ if (blockCoded)
+ {
+ for (j = 4; j--; blockIndex++)
+ {
+ nc = (i32)DetermineNc(pMb, blockIndex, pResidual->totalCoeff);
+ if (is16x16)
+ {
+ tmp = h264bsdDecodeResidualBlockCavlc(pStrmData,
+ level[blockIndex] + 1, nc, 15);
+ pResidual->coeffMap[blockIndex] = tmp >> 15;
+ }
+ else
+ {
+ tmp = h264bsdDecodeResidualBlockCavlc(pStrmData,
+ level[blockIndex], nc, 16);
+ pResidual->coeffMap[blockIndex] = tmp >> 16;
+ }
+ if ((tmp & 0xF) != HANTRO_OK)
+ return(tmp);
+ pResidual->totalCoeff[blockIndex] = (tmp >> 4) & 0xFF;
+ }
+ }
+ else
+ blockIndex += 4;
+ }
+
+ /* chroma DC block are at indices 25 and 26 */
+ blockCoded = codedBlockPattern & 0x3;
+ if (blockCoded)
+ {
+ tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, level[25], -1, 4);
+ if ((tmp & 0xF) != HANTRO_OK)
+ return(tmp);
+ pResidual->totalCoeff[25] = (tmp >> 4) & 0xFF;
+ tmp = h264bsdDecodeResidualBlockCavlc(pStrmData, level[25]+4, -1, 4);
+ if ((tmp & 0xF) != HANTRO_OK)
+ return(tmp);
+ pResidual->totalCoeff[26] = (tmp >> 4) & 0xFF;
+ }
+
+ /* chroma AC */
+ blockCoded = codedBlockPattern & 0x2;
+ if (blockCoded)
+ {
+ for (i = 8; i--;blockIndex++)
+ {
+ nc = (i32)DetermineNc(pMb, blockIndex, pResidual->totalCoeff);
+ tmp = h264bsdDecodeResidualBlockCavlc(pStrmData,
+ level[blockIndex] + 1, nc, 15);
+ if ((tmp & 0xF) != HANTRO_OK)
+ return(tmp);
+ pResidual->totalCoeff[blockIndex] = (tmp >> 4) & 0xFF;
+ pResidual->coeffMap[blockIndex] = (tmp >> 15);
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+#endif
+
+/*------------------------------------------------------------------------------
+
+ Function: DetermineNc
+
+ Functional description:
+ Returns the nC of a block.
+
+------------------------------------------------------------------------------*/
+#ifdef H264DEC_OMXDL
+u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, u8 *pTotalCoeff)
+#else
+u32 DetermineNc(mbStorage_t *pMb, u32 blockIndex, i16 *pTotalCoeff)
+#endif
+{
+/*lint -e702 */
+/* Variables */
+
+ u32 tmp;
+ i32 n;
+ const neighbour_t *neighbourA, *neighbourB;
+ u8 neighbourAindex, neighbourBindex;
+
+/* Code */
+
+ ASSERT(blockIndex < 24);
+
+ /* if neighbour block belongs to current macroblock totalCoeff array
+ * mbStorage has not been set/updated yet -> use pTotalCoeff */
+ neighbourA = h264bsdNeighbour4x4BlockA(blockIndex);
+ neighbourB = h264bsdNeighbour4x4BlockB(blockIndex);
+ neighbourAindex = neighbourA->index;
+ neighbourBindex = neighbourB->index;
+ if (neighbourA->mb == MB_CURR && neighbourB->mb == MB_CURR)
+ {
+ n = (pTotalCoeff[neighbourAindex] +
+ pTotalCoeff[neighbourBindex] + 1)>>1;
+ }
+ else if (neighbourA->mb == MB_CURR)
+ {
+ n = pTotalCoeff[neighbourAindex];
+ if (h264bsdIsNeighbourAvailable(pMb, pMb->mbB))
+ {
+ n = (n + pMb->mbB->totalCoeff[neighbourBindex] + 1) >> 1;
+ }
+ }
+ else if (neighbourB->mb == MB_CURR)
+ {
+ n = pTotalCoeff[neighbourBindex];
+ if (h264bsdIsNeighbourAvailable(pMb, pMb->mbA))
+ {
+ n = (n + pMb->mbA->totalCoeff[neighbourAindex] + 1) >> 1;
+ }
+ }
+ else
+ {
+ n = tmp = 0;
+ if (h264bsdIsNeighbourAvailable(pMb, pMb->mbA))
+ {
+ n = pMb->mbA->totalCoeff[neighbourAindex];
+ tmp = 1;
+ }
+ if (h264bsdIsNeighbourAvailable(pMb, pMb->mbB))
+ {
+ if (tmp)
+ n = (n + pMb->mbB->totalCoeff[neighbourBindex] + 1) >> 1;
+ else
+ n = pMb->mbB->totalCoeff[neighbourBindex];
+ }
+ }
+ return((u32)n);
+/*lint +e702 */
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: CbpIntra16x16
+
+ Functional description:
+ Returns the coded block pattern for intra 16x16 macroblock.
+
+------------------------------------------------------------------------------*/
+
+u32 CbpIntra16x16(mbType_e mbType)
+{
+
+/* Variables */
+
+ u32 cbp;
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(mbType >= I_16x16_0_0_0 && mbType <= I_16x16_3_2_1);
+
+ if (mbType >= I_16x16_0_0_1)
+ cbp = 15;
+ else
+ cbp = 0;
+
+ /* tmp is 0 for I_16x16_0_0_0 mb type */
+ /* ignore lint warning on arithmetic on enum's */
+ tmp = /*lint -e(656)*/(mbType - I_16x16_0_0_0) >> 2;
+ if (tmp > 2)
+ tmp -= 3;
+
+ cbp += tmp << 4;
+
+ return(cbp);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdPredModeIntra16x16
+
+ Functional description:
+ Returns the prediction mode for intra 16x16 macroblock.
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdPredModeIntra16x16(mbType_e mbType)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(mbType >= I_16x16_0_0_0 && mbType <= I_16x16_3_2_1);
+
+ /* tmp is 0 for I_16x16_0_0_0 mb type */
+ /* ignore lint warning on arithmetic on enum's */
+ tmp = /*lint -e(656)*/(mbType - I_16x16_0_0_0);
+
+ return(tmp & 0x3);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdDecodeMacroblock
+
+ Functional description:
+ Decode one macroblock and write into output image.
+
+ Inputs:
+ pMb pointer to macroblock specific information
+ mbLayer pointer to current macroblock data from stream
+ currImage pointer to output image
+ dpb pointer to decoded picture buffer
+ qpY pointer to slice QP
+ mbNum current macroblock number
+ constrainedIntraPred flag specifying if neighbouring inter
+ macroblocks are used in intra prediction
+
+ Outputs:
+ pMb structure is updated with current macroblock
+ currImage decoded macroblock is written into output image
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK error in macroblock decoding
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeMacroblock(mbStorage_t *pMb, macroblockLayer_t *pMbLayer,
+ image_t *currImage, dpbStorage_t *dpb, i32 *qpY, u32 mbNum,
+ u32 constrainedIntraPredFlag, u8* data)
+{
+
+/* Variables */
+
+ u32 i, tmp;
+ mbType_e mbType;
+#ifdef H264DEC_OMXDL
+ const u8 *pSrc;
+#endif
+/* Code */
+
+ ASSERT(pMb);
+ ASSERT(pMbLayer);
+ ASSERT(currImage);
+ ASSERT(qpY && *qpY < 52);
+ ASSERT(mbNum < currImage->width*currImage->height);
+
+ mbType = pMbLayer->mbType;
+ pMb->mbType = mbType;
+
+ pMb->decoded++;
+
+ h264bsdSetCurrImageMbPointers(currImage, mbNum);
+
+ if (mbType == I_PCM)
+ {
+ u8 *pData = (u8*)data;
+#ifdef H264DEC_OMXDL
+ u8 *tot = pMb->totalCoeff;
+#else
+ i16 *tot = pMb->totalCoeff;
+#endif
+ i32 *lev = pMbLayer->residual.level[0];
+
+ pMb->qpY = 0;
+
+ /* if decoded flag > 1 -> mb has already been successfully decoded and
+ * written to output -> do not write again */
+ if (pMb->decoded > 1)
+ {
+ for (i = 24; i--;)
+ *tot++ = 16;
+ return HANTRO_OK;
+ }
+
+ for (i = 24; i--;)
+ {
+ *tot++ = 16;
+ for (tmp = 16; tmp--;)
+ *pData++ = (u8)(*lev++);
+ }
+ h264bsdWriteMacroblock(currImage, (u8*)data);
+
+ return(HANTRO_OK);
+ }
+ else
+ {
+#ifdef H264DEC_OMXDL
+ if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER)
+ {
+ tmp = h264bsdInterPrediction(pMb, pMbLayer, dpb, mbNum,
+ currImage, (u8*)data);
+ if (tmp != HANTRO_OK) return (tmp);
+ }
+#endif
+ if (mbType != P_Skip)
+ {
+ H264SwDecMemcpy(pMb->totalCoeff,
+ pMbLayer->residual.totalCoeff,
+ 27*sizeof(*pMb->totalCoeff));
+
+ /* update qpY */
+ if (pMbLayer->mbQpDelta)
+ {
+ *qpY = *qpY + pMbLayer->mbQpDelta;
+ if (*qpY < 0) *qpY += 52;
+ else if (*qpY >= 52) *qpY -= 52;
+ }
+ pMb->qpY = (u32)*qpY;
+
+#ifdef H264DEC_OMXDL
+ pSrc = pMbLayer->residual.posCoefBuf;
+
+ if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTER)
+ {
+ OMXResult res;
+ u8 *p;
+ u8 *totalCoeff = pMb->totalCoeff;
+
+ for (i = 0; i < 16; i++, totalCoeff++)
+ {
+ p = data + lumaIndex[i];
+ if (*totalCoeff)
+ {
+ res = omxVCM4P10_DequantTransformResidualFromPairAndAdd(
+ &pSrc, p, 0, p, 16, 16, *qpY, *totalCoeff);
+ if (res != OMX_Sts_NoErr)
+ return (HANTRO_NOK);
+ }
+ }
+
+ }
+ else if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA4x4)
+ {
+ tmp = ProcessIntra4x4Residual(pMb,
+ data,
+ constrainedIntraPredFlag,
+ pMbLayer,
+ &pSrc,
+ currImage);
+ if (tmp != HANTRO_OK)
+ return (tmp);
+ }
+ else if (h264bsdMbPartPredMode(mbType) == PRED_MODE_INTRA16x16)
+ {
+ tmp = ProcessIntra16x16Residual(pMb,
+ data,
+ constrainedIntraPredFlag,
+ pMbLayer->mbPred.intraChromaPredMode,
+ &pSrc,
+ currImage);
+ if (tmp != HANTRO_OK)
+ return (tmp);
+ }
+
+ tmp = ProcessChromaResidual(pMb, data, &pSrc);
+
+#else
+ tmp = ProcessResidual(pMb, pMbLayer->residual.level,
+ pMbLayer->residual.coeffMap);
+#endif
+ if (tmp != HANTRO_OK)
+ return (tmp);
+ }
+ else
+ {
+ H264SwDecMemset(pMb->totalCoeff, 0, 27*sizeof(*pMb->totalCoeff));
+ pMb->qpY = (u32)*qpY;
+ }
+#ifdef H264DEC_OMXDL
+ /* if decoded flag > 1 -> mb has already been successfully decoded and
+ * written to output -> do not write again */
+ if (pMb->decoded > 1)
+ return HANTRO_OK;
+
+ h264bsdWriteMacroblock(currImage, data);
+#else
+ if (h264bsdMbPartPredMode(mbType) != PRED_MODE_INTER)
+ {
+ tmp = h264bsdIntraPrediction(pMb, pMbLayer, currImage, mbNum,
+ constrainedIntraPredFlag, (u8*)data);
+ if (tmp != HANTRO_OK) return (tmp);
+ }
+ else
+ {
+ tmp = h264bsdInterPrediction(pMb, pMbLayer, dpb, mbNum,
+ currImage, (u8*)data);
+ if (tmp != HANTRO_OK) return (tmp);
+ }
+#endif
+ }
+
+ return HANTRO_OK;
+}
+
+
+#ifdef H264DEC_OMXDL
+
+/*------------------------------------------------------------------------------
+
+ Function: ProcessChromaResidual
+
+ Functional description:
+ Process the residual data of chroma with
+ inverse quantization and inverse transform.
+
+------------------------------------------------------------------------------*/
+u32 ProcessChromaResidual(mbStorage_t *pMb, u8 *data, const u8 **pSrc )
+{
+ u32 i;
+ u32 chromaQp;
+ i16 *pDc;
+ i16 dc[4 + 4] = {0,0,0,0,0,0,0,0};
+ u8 *totalCoeff;
+ OMXResult result;
+ u8 *p;
+
+ /* chroma DC processing. First chroma dc block is block with index 25 */
+ chromaQp =
+ h264bsdQpC[CLIP3(0, 51, (i32)pMb->qpY + pMb->chromaQpIndexOffset)];
+
+ if (pMb->totalCoeff[25])
+ {
+ pDc = dc;
+ result = omxVCM4P10_TransformDequantChromaDCFromPair(
+ pSrc,
+ pDc,
+ (i32)chromaQp);
+ if (result != OMX_Sts_NoErr)
+ return (HANTRO_NOK);
+ }
+ if (pMb->totalCoeff[26])
+ {
+ pDc = dc+4;
+ result = omxVCM4P10_TransformDequantChromaDCFromPair(
+ pSrc,
+ pDc,
+ (i32)chromaQp);
+ if (result != OMX_Sts_NoErr)
+ return (HANTRO_NOK);
+ }
+
+ pDc = dc;
+ totalCoeff = pMb->totalCoeff + 16;
+ for (i = 0; i < 8; i++, pDc++, totalCoeff++)
+ {
+ /* chroma prediction */
+ if (*totalCoeff || *pDc)
+ {
+ p = data + chromaIndex[i];
+ result = omxVCM4P10_DequantTransformResidualFromPairAndAdd(
+ pSrc,
+ p,
+ pDc,
+ p,
+ 8,
+ 8,
+ (i32)chromaQp,
+ *totalCoeff);
+ if (result != OMX_Sts_NoErr)
+ return (HANTRO_NOK);
+ }
+ }
+
+ return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: ProcessIntra16x16Residual
+
+ Functional description:
+ Process the residual data of luma with
+ inverse quantization and inverse transform.
+
+------------------------------------------------------------------------------*/
+u32 ProcessIntra16x16Residual(mbStorage_t *pMb,
+ u8 *data,
+ u32 constrainedIntraPred,
+ u32 intraChromaPredMode,
+ const u8** pSrc,
+ image_t *image)
+{
+ u32 i;
+ i16 *pDc;
+ i16 dc[16] = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0};
+ u8 *totalCoeff;
+ OMXResult result;
+ u8 *p;
+
+ totalCoeff = pMb->totalCoeff;
+
+ if (totalCoeff[24])
+ {
+ pDc = dc;
+ result = omxVCM4P10_TransformDequantLumaDCFromPair(
+ pSrc,
+ pDc,
+ (i32)pMb->qpY);
+ if (result != OMX_Sts_NoErr)
+ return (HANTRO_NOK);
+ }
+ /* Intra 16x16 pred */
+ if (h264bsdIntra16x16Prediction(pMb, data, image->luma,
+ image->width*16, constrainedIntraPred) != HANTRO_OK)
+ return(HANTRO_NOK);
+ for (i = 0; i < 16; i++, totalCoeff++)
+ {
+ p = data + lumaIndex[i];
+ pDc = &dc[dcCoeffIndex[i]];
+ if (*totalCoeff || *pDc)
+ {
+ result = omxVCM4P10_DequantTransformResidualFromPairAndAdd(
+ pSrc,
+ p,
+ pDc,
+ p,
+ 16,
+ 16,
+ (i32)pMb->qpY,
+ *totalCoeff);
+ if (result != OMX_Sts_NoErr)
+ return (HANTRO_NOK);
+ }
+ }
+
+ if (h264bsdIntraChromaPrediction(pMb, data + 256,
+ image,
+ intraChromaPredMode,
+ constrainedIntraPred) != HANTRO_OK)
+ return(HANTRO_NOK);
+
+ return HANTRO_OK;
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: ProcessIntra4x4Residual
+
+ Functional description:
+ Process the residual data of luma with
+ inverse quantization and inverse transform.
+
+------------------------------------------------------------------------------*/
+u32 ProcessIntra4x4Residual(mbStorage_t *pMb,
+ u8 *data,
+ u32 constrainedIntraPred,
+ macroblockLayer_t *mbLayer,
+ const u8 **pSrc,
+ image_t *image)
+{
+ u32 i;
+ u8 *totalCoeff;
+ OMXResult result;
+ u8 *p;
+
+ totalCoeff = pMb->totalCoeff;
+
+ for (i = 0; i < 16; i++, totalCoeff++)
+ {
+ p = data + lumaIndex[i];
+ if (h264bsdIntra4x4Prediction(pMb, p, mbLayer, image->luma,
+ image->width*16, constrainedIntraPred, i) != HANTRO_OK)
+ return(HANTRO_NOK);
+
+ if (*totalCoeff)
+ {
+ result = omxVCM4P10_DequantTransformResidualFromPairAndAdd(
+ pSrc,
+ p,
+ NULL,
+ p,
+ 16,
+ 16,
+ (i32)pMb->qpY,
+ *totalCoeff);
+ if (result != OMX_Sts_NoErr)
+ return (HANTRO_NOK);
+ }
+ }
+
+ if (h264bsdIntraChromaPrediction(pMb, data + 256,
+ image,
+ mbLayer->mbPred.intraChromaPredMode,
+ constrainedIntraPred) != HANTRO_OK)
+ return(HANTRO_NOK);
+
+ return HANTRO_OK;
+}
+
+#else /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+ Function: ProcessResidual
+
+ Functional description:
+ Process the residual data of one macroblock with
+ inverse quantization and inverse transform.
+
+------------------------------------------------------------------------------*/
+
+u32 ProcessResidual(mbStorage_t *pMb, i32 residualLevel[][16], u32 *coeffMap)
+{
+
+/* Variables */
+
+ u32 i;
+ u32 chromaQp;
+ i32 (*blockData)[16];
+ i32 (*blockDc)[16];
+ i16 *totalCoeff;
+ i32 *chromaDc;
+ const u32 *dcCoeffIdx;
+
+/* Code */
+
+ ASSERT(pMb);
+ ASSERT(residualLevel);
+
+ /* set pointers to DC coefficient blocks */
+ blockDc = residualLevel + 24;
+
+ blockData = residualLevel;
+ totalCoeff = pMb->totalCoeff;
+ if (h264bsdMbPartPredMode(pMb->mbType) == PRED_MODE_INTRA16x16)
+ {
+ if (totalCoeff[24])
+ {
+ h264bsdProcessLumaDc(*blockDc, pMb->qpY);
+ }
+ dcCoeffIdx = dcCoeffIndex;
+
+ for (i = 16; i--; blockData++, totalCoeff++, coeffMap++)
+ {
+ /* set dc coefficient of luma block */
+ (*blockData)[0] = (*blockDc)[*dcCoeffIdx++];
+ if ((*blockData)[0] || *totalCoeff)
+ {
+ if (h264bsdProcessBlock(*blockData, pMb->qpY, 1, *coeffMap) !=
+ HANTRO_OK)
+ return(HANTRO_NOK);
+ }
+ else
+ MARK_RESIDUAL_EMPTY(*blockData);
+ }
+ }
+ else
+ {
+ for (i = 16; i--; blockData++, totalCoeff++, coeffMap++)
+ {
+ if (*totalCoeff)
+ {
+ if (h264bsdProcessBlock(*blockData, pMb->qpY, 0, *coeffMap) !=
+ HANTRO_OK)
+ return(HANTRO_NOK);
+ }
+ else
+ MARK_RESIDUAL_EMPTY(*blockData);
+ }
+ }
+
+ /* chroma DC processing. First chroma dc block is block with index 25 */
+ chromaQp =
+ h264bsdQpC[CLIP3(0, 51, (i32)pMb->qpY + pMb->chromaQpIndexOffset)];
+ if (pMb->totalCoeff[25] || pMb->totalCoeff[26])
+ h264bsdProcessChromaDc(residualLevel[25], chromaQp);
+ chromaDc = residualLevel[25];
+ for (i = 8; i--; blockData++, totalCoeff++, coeffMap++)
+ {
+ /* set dc coefficient of chroma block */
+ (*blockData)[0] = *chromaDc++;
+ if ((*blockData)[0] || *totalCoeff)
+ {
+ if (h264bsdProcessBlock(*blockData, chromaQp, 1,*coeffMap) !=
+ HANTRO_OK)
+ return(HANTRO_NOK);
+ }
+ else
+ MARK_RESIDUAL_EMPTY(*blockData);
+ }
+
+ return(HANTRO_OK);
+}
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdSubMbPartMode
+
+ Functional description:
+ Returns the macroblock's sub-partition mode.
+
+------------------------------------------------------------------------------*/
+
+subMbPartMode_e h264bsdSubMbPartMode(subMbType_e subMbType)
+{
+
+/* Variables */
+
+
+/* Code */
+
+ ASSERT(subMbType < 4);
+
+ return((subMbPartMode_e)subMbType);
+
+}
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.h
new file mode 100755
index 0000000..32bc340
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_macroblock_layer.h
@@ -0,0 +1,212 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_MACROBLOCK_LAYER_H
+#define H264SWDEC_MACROBLOCK_LAYER_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_image.h"
+#include "h264bsd_dpb.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/* Macro to determine if a mb is an intra mb */
+#define IS_INTRA_MB(a) ((a).mbType > 5)
+
+/* Macro to determine if a mb is an I_PCM mb */
+#define IS_I_PCM_MB(a) ((a).mbType == 31)
+
+typedef enum {
+ P_Skip = 0,
+ P_L0_16x16 = 1,
+ P_L0_L0_16x8 = 2,
+ P_L0_L0_8x16 = 3,
+ P_8x8 = 4,
+ P_8x8ref0 = 5,
+ I_4x4 = 6,
+ I_16x16_0_0_0 = 7,
+ I_16x16_1_0_0 = 8,
+ I_16x16_2_0_0 = 9,
+ I_16x16_3_0_0 = 10,
+ I_16x16_0_1_0 = 11,
+ I_16x16_1_1_0 = 12,
+ I_16x16_2_1_0 = 13,
+ I_16x16_3_1_0 = 14,
+ I_16x16_0_2_0 = 15,
+ I_16x16_1_2_0 = 16,
+ I_16x16_2_2_0 = 17,
+ I_16x16_3_2_0 = 18,
+ I_16x16_0_0_1 = 19,
+ I_16x16_1_0_1 = 20,
+ I_16x16_2_0_1 = 21,
+ I_16x16_3_0_1 = 22,
+ I_16x16_0_1_1 = 23,
+ I_16x16_1_1_1 = 24,
+ I_16x16_2_1_1 = 25,
+ I_16x16_3_1_1 = 26,
+ I_16x16_0_2_1 = 27,
+ I_16x16_1_2_1 = 28,
+ I_16x16_2_2_1 = 29,
+ I_16x16_3_2_1 = 30,
+ I_PCM = 31
+} mbType_e;
+
+typedef enum {
+ P_L0_8x8 = 0,
+ P_L0_8x4 = 1,
+ P_L0_4x8 = 2,
+ P_L0_4x4 = 3
+} subMbType_e;
+
+typedef enum {
+ MB_P_16x16 = 0,
+ MB_P_16x8,
+ MB_P_8x16,
+ MB_P_8x8
+} mbPartMode_e;
+
+typedef enum {
+ MB_SP_8x8 = 0,
+ MB_SP_8x4,
+ MB_SP_4x8,
+ MB_SP_4x4
+} subMbPartMode_e;
+
+typedef enum {
+ PRED_MODE_INTRA4x4 = 0,
+ PRED_MODE_INTRA16x16 ,
+ PRED_MODE_INTER
+} mbPartPredMode_e;
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+ /* MvPrediction16x16 assumes that MVs are 16bits */
+ i16 hor;
+ i16 ver;
+} mv_t;
+
+typedef struct
+{
+ u32 prevIntra4x4PredModeFlag[16];
+ u32 remIntra4x4PredMode[16];
+ u32 intraChromaPredMode;
+ u32 refIdxL0[4];
+ mv_t mvdL0[4];
+} mbPred_t;
+
+typedef struct
+{
+ subMbType_e subMbType[4];
+ u32 refIdxL0[4];
+ mv_t mvdL0[4][4];
+} subMbPred_t;
+
+typedef struct
+{
+#ifdef H264DEC_OMXDL
+ u8 posCoefBuf[27*16*3];
+ u8 totalCoeff[27];
+#else
+ i16 totalCoeff[27];
+#endif
+ i32 level[26][16];
+ u32 coeffMap[24];
+} residual_t;
+
+typedef struct
+{
+ mbType_e mbType;
+ u32 codedBlockPattern;
+ i32 mbQpDelta;
+ mbPred_t mbPred;
+ subMbPred_t subMbPred;
+ residual_t residual;
+} macroblockLayer_t;
+
+typedef struct mbStorage
+{
+ mbType_e mbType;
+ u32 sliceId;
+ u32 disableDeblockingFilterIdc;
+ i32 filterOffsetA;
+ i32 filterOffsetB;
+ u32 qpY;
+ i32 chromaQpIndexOffset;
+#ifdef H264DEC_OMXDL
+ u8 totalCoeff[27];
+#else
+ i16 totalCoeff[27];
+#endif
+ u8 intra4x4PredMode[16];
+ u32 refPic[4];
+ u8* refAddr[4];
+ mv_t mv[16];
+ u32 decoded;
+ struct mbStorage *mbA;
+ struct mbStorage *mbB;
+ struct mbStorage *mbC;
+ struct mbStorage *mbD;
+} mbStorage_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeMacroblockLayer(strmData_t *pStrmData,
+ macroblockLayer_t *pMbLayer, mbStorage_t *pMb, u32 sliceType,
+ u32 numRefIdxActive);
+
+u32 h264bsdNumMbPart(mbType_e mbType);
+u32 h264bsdNumSubMbPart(subMbType_e subMbType);
+
+subMbPartMode_e h264bsdSubMbPartMode(subMbType_e subMbType);
+
+u32 h264bsdDecodeMacroblock(mbStorage_t *pMb, macroblockLayer_t *pMbLayer,
+ image_t *currImage, dpbStorage_t *dpb, i32 *qpY, u32 mbNum,
+ u32 constrainedIntraPredFlag, u8* data);
+
+u32 h264bsdPredModeIntra16x16(mbType_e mbType);
+
+mbPartPredMode_e h264bsdMbPartPredMode(mbType_e mbType);
+#ifdef H264DEC_NEON
+u32 h264bsdClearMbLayer(macroblockLayer_t *pMbLayer, u32 size);
+#endif
+
+#endif /* #ifdef H264SWDEC_MACROBLOCK_LAYER_H */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.c
new file mode 100755
index 0000000..e44c43a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.c
@@ -0,0 +1,117 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdDecodeNalUnit
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+ Function name: h264bsdDecodeNalUnit
+
+ Functional description:
+ Decode NAL unit header information
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ pNalUnit NAL unit header information is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid NAL unit header information
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeNalUnit(strmData_t *pStrmData, nalUnit_t *pNalUnit)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pNalUnit);
+ ASSERT(pStrmData->bitPosInWord == 0);
+
+ /* forbidden_zero_bit (not checked to be zero, errors ignored) */
+ tmp = h264bsdGetBits(pStrmData, 1);
+ /* Assuming that NAL unit starts from byte boundary ­> don't have to check
+ * following 7 bits for END_OF_STREAM */
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdGetBits(pStrmData, 2);
+ pNalUnit->nalRefIdc = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 5);
+ pNalUnit->nalUnitType = (nalUnitType_e)tmp;
+
+ /* data partitioning NAL units not supported */
+ if ( (tmp == 2) || (tmp == 3) || (tmp == 4) )
+ {
+ return(HANTRO_NOK);
+ }
+
+ /* nal_ref_idc shall not be zero for these nal_unit_types */
+ if ( ( (tmp == NAL_SEQ_PARAM_SET) || (tmp == NAL_PIC_PARAM_SET) ||
+ (tmp == NAL_CODED_SLICE_IDR) ) && (pNalUnit->nalRefIdc == 0) )
+ {
+ return(HANTRO_NOK);
+ }
+ /* nal_ref_idc shall be zero for these nal_unit_types */
+ else if ( ( (tmp == NAL_SEI) || (tmp == NAL_ACCESS_UNIT_DELIMITER) ||
+ (tmp == NAL_END_OF_SEQUENCE) || (tmp == NAL_END_OF_STREAM) ||
+ (tmp == NAL_FILLER_DATA) ) && (pNalUnit->nalRefIdc != 0) )
+ {
+ return(HANTRO_NOK);
+ }
+
+ return(HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.h
new file mode 100755
index 0000000..38957bf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_nal_unit.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_NAL_UNIT_H
+#define H264SWDEC_NAL_UNIT_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/* macro to determine if NAL unit pointed by pNalUnit contains an IDR slice */
+#define IS_IDR_NAL_UNIT(pNalUnit) \
+ ((pNalUnit)->nalUnitType == NAL_CODED_SLICE_IDR)
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+typedef enum {
+ NAL_CODED_SLICE = 1,
+ NAL_CODED_SLICE_IDR = 5,
+ NAL_SEI = 6,
+ NAL_SEQ_PARAM_SET = 7,
+ NAL_PIC_PARAM_SET = 8,
+ NAL_ACCESS_UNIT_DELIMITER = 9,
+ NAL_END_OF_SEQUENCE = 10,
+ NAL_END_OF_STREAM = 11,
+ NAL_FILLER_DATA = 12,
+ NAL_MAX_TYPE_VALUE = 31
+} nalUnitType_e;
+
+typedef struct
+{
+ nalUnitType_e nalUnitType;
+ u32 nalRefIdc;
+} nalUnit_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeNalUnit(strmData_t *pStrmData, nalUnit_t *pNalUnit);
+
+#endif /* #ifdef H264SWDEC_NAL_UNIT_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.c
new file mode 100755
index 0000000..ce5eeff
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.c
@@ -0,0 +1,382 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdInitMbNeighbours
+ h264bsdGetNeighbourMb
+ h264bsdNeighbour4x4BlockA
+ h264bsdNeighbour4x4BlockB
+ h264bsdNeighbour4x4BlockC
+ h264bsdNeighbour4x4BlockD
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_neighbour.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Following four tables indicate neighbours of each block of a macroblock.
+ * First 16 values are for luma blocks, next 4 values for Cb and last 4
+ * values for Cr. Elements of the table indicate to which macroblock the
+ * neighbour block belongs and the index of the neighbour block in question.
+ * Indexing of the blocks goes as follows
+ *
+ * Y Cb Cr
+ * 0 1 4 5 16 17 20 21
+ * 2 3 6 7 18 19 22 23
+ * 8 9 12 13
+ * 10 11 14 15
+ */
+
+/* left neighbour for each block */
+static const neighbour_t N_A_4x4B[24] = {
+ {MB_A,5}, {MB_CURR,0}, {MB_A,7}, {MB_CURR,2},
+ {MB_CURR,1}, {MB_CURR,4}, {MB_CURR,3}, {MB_CURR,6},
+ {MB_A,13}, {MB_CURR,8}, {MB_A,15}, {MB_CURR,10},
+ {MB_CURR,9}, {MB_CURR,12},{MB_CURR,11},{MB_CURR,14},
+ {MB_A,17}, {MB_CURR,16},{MB_A,19}, {MB_CURR,18},
+ {MB_A,21}, {MB_CURR,20},{MB_A,23}, {MB_CURR,22} };
+
+/* above neighbour for each block */
+static const neighbour_t N_B_4x4B[24] = {
+ {MB_B,10}, {MB_B,11}, {MB_CURR,0}, {MB_CURR,1},
+ {MB_B,14}, {MB_B,15}, {MB_CURR,4}, {MB_CURR,5},
+ {MB_CURR,2}, {MB_CURR,3}, {MB_CURR,8}, {MB_CURR,9},
+ {MB_CURR,6}, {MB_CURR,7}, {MB_CURR,12},{MB_CURR,13},
+ {MB_B,18}, {MB_B,19}, {MB_CURR,16},{MB_CURR,17},
+ {MB_B,22}, {MB_B,23}, {MB_CURR,20},{MB_CURR,21} };
+
+/* above-right neighbour for each block */
+static const neighbour_t N_C_4x4B[24] = {
+ {MB_B,11}, {MB_B,14}, {MB_CURR,1}, {MB_NA,4},
+ {MB_B,15}, {MB_C,10}, {MB_CURR,5}, {MB_NA,0},
+ {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_NA,12},
+ {MB_CURR,7}, {MB_NA,2}, {MB_CURR,13},{MB_NA,8},
+ {MB_B,19}, {MB_C,18}, {MB_CURR,17},{MB_NA,16},
+ {MB_B,23}, {MB_C,22}, {MB_CURR,21},{MB_NA,20} };
+
+/* above-left neighbour for each block */
+static const neighbour_t N_D_4x4B[24] = {
+ {MB_D,15}, {MB_B,10}, {MB_A,5}, {MB_CURR,0},
+ {MB_B,11}, {MB_B,14}, {MB_CURR,1}, {MB_CURR,4},
+ {MB_A,7}, {MB_CURR,2}, {MB_A,13}, {MB_CURR,8},
+ {MB_CURR,3}, {MB_CURR,6}, {MB_CURR,9}, {MB_CURR,12},
+ {MB_D,19}, {MB_B,18}, {MB_A,17}, {MB_CURR,16},
+ {MB_D,23}, {MB_B,22}, {MB_A,21}, {MB_CURR,20} };
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInitMbNeighbours
+
+ Functional description:
+ Initialize macroblock neighbours. Function sets neighbour
+ macroblock pointers in macroblock structures to point to
+ macroblocks on the left, above, above-right and above-left.
+ Pointers are set NULL if the neighbour does not fit into the
+ picture.
+
+ Inputs:
+ picWidth width of the picture in macroblocks
+ picSizeInMbs no need to clarify
+
+ Outputs:
+ pMbStorage neighbour pointers of each mbStorage structure
+ stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInitMbNeighbours(mbStorage_t *pMbStorage, u32 picWidth,
+ u32 picSizeInMbs)
+{
+
+/* Variables */
+
+ u32 i, row, col;
+
+/* Code */
+
+ ASSERT(pMbStorage);
+ ASSERT(picWidth);
+ ASSERT(picWidth <= picSizeInMbs);
+ ASSERT(((picSizeInMbs / picWidth) * picWidth) == picSizeInMbs);
+
+ row = col = 0;
+
+ for (i = 0; i < picSizeInMbs; i++)
+ {
+
+ if (col)
+ pMbStorage[i].mbA = pMbStorage + i - 1;
+ else
+ pMbStorage[i].mbA = NULL;
+
+ if (row)
+ pMbStorage[i].mbB = pMbStorage + i - picWidth;
+ else
+ pMbStorage[i].mbB = NULL;
+
+ if (row && (col < picWidth - 1))
+ pMbStorage[i].mbC = pMbStorage + i - (picWidth - 1);
+ else
+ pMbStorage[i].mbC = NULL;
+
+ if (row && col)
+ pMbStorage[i].mbD = pMbStorage + i - (picWidth + 1);
+ else
+ pMbStorage[i].mbD = NULL;
+
+ col++;
+ if (col == picWidth)
+ {
+ col = 0;
+ row++;
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdGetNeighbourMb
+
+ Functional description:
+ Get pointer to neighbour macroblock.
+
+ Inputs:
+ pMb pointer to macroblock structure of the macroblock
+ whose neighbour is wanted
+ neighbour indicates which neighbour is wanted
+
+ Outputs:
+ none
+
+ Returns:
+ pointer to neighbour macroblock
+ NULL if not available
+
+------------------------------------------------------------------------------*/
+
+mbStorage_t* h264bsdGetNeighbourMb(mbStorage_t *pMb, neighbourMb_e neighbour)
+{
+
+/* Variables */
+
+
+/* Code */
+
+ ASSERT((neighbour <= MB_CURR) || (neighbour == MB_NA));
+
+ if (neighbour == MB_A)
+ return(pMb->mbA);
+ else if (neighbour == MB_B)
+ return(pMb->mbB);
+ else if (neighbour == MB_C)
+ return(pMb->mbC);
+ else if (neighbour == MB_D)
+ return(pMb->mbD);
+ else if (neighbour == MB_CURR)
+ return(pMb);
+ else
+ return(NULL);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdNeighbour4x4BlockA
+
+ Functional description:
+ Get left neighbour of the block. Function returns pointer to
+ the table defined in the beginning of the file.
+
+ Inputs:
+ blockIndex indicates the block whose neighbours are wanted
+
+ Outputs:
+
+ Returns:
+ pointer to neighbour structure
+
+------------------------------------------------------------------------------*/
+
+const neighbour_t* h264bsdNeighbour4x4BlockA(u32 blockIndex)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(blockIndex < 24);
+
+ return(N_A_4x4B+blockIndex);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdNeighbour4x4BlockB
+
+ Functional description:
+ Get above neighbour of the block. Function returns pointer to
+ the table defined in the beginning of the file.
+
+ Inputs:
+ blockIndex indicates the block whose neighbours are wanted
+
+ Outputs:
+
+ Returns:
+ pointer to neighbour structure
+
+------------------------------------------------------------------------------*/
+
+const neighbour_t* h264bsdNeighbour4x4BlockB(u32 blockIndex)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(blockIndex < 24);
+
+ return(N_B_4x4B+blockIndex);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdNeighbour4x4BlockC
+
+ Functional description:
+ Get above-right neighbour of the block. Function returns pointer
+ to the table defined in the beginning of the file.
+
+ Inputs:
+ blockIndex indicates the block whose neighbours are wanted
+
+ Outputs:
+
+ Returns:
+ pointer to neighbour structure
+
+------------------------------------------------------------------------------*/
+
+const neighbour_t* h264bsdNeighbour4x4BlockC(u32 blockIndex)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(blockIndex < 24);
+
+ return(N_C_4x4B+blockIndex);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdNeighbour4x4BlockD
+
+ Functional description:
+ Get above-left neighbour of the block. Function returns pointer to
+ the table defined in the beginning of the file.
+
+ Inputs:
+ blockIndex indicates the block whose neighbours are wanted
+
+ Outputs:
+
+ Returns:
+ pointer to neighbour structure
+
+------------------------------------------------------------------------------*/
+
+const neighbour_t* h264bsdNeighbour4x4BlockD(u32 blockIndex)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(blockIndex < 24);
+
+ return(N_D_4x4B+blockIndex);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdIsNeighbourAvailable
+
+ Functional description:
+ Check if neighbour macroblock is available. Neighbour macroblock
+ is considered available if it is within the picture and belongs
+ to the same slice as the current macroblock.
+
+ Inputs:
+ pMb pointer to the current macroblock
+ pNeighbour pointer to the neighbour macroblock
+
+ Outputs:
+ none
+
+ Returns:
+ TRUE neighbour is available
+ FALSE neighbour is not available
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIsNeighbourAvailable(mbStorage_t *pMb, mbStorage_t *pNeighbour)
+{
+
+/* Variables */
+
+/* Code */
+
+ if ( (pNeighbour == NULL) || (pMb->sliceId != pNeighbour->sliceId) )
+ return(HANTRO_FALSE);
+ else
+ return(HANTRO_TRUE);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.h
new file mode 100755
index 0000000..fce0ad1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_neighbour.h
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_NEIGHBOUR_H
+#define H264SWDEC_NEIGHBOUR_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_macroblock_layer.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+typedef enum {
+ MB_A = 0,
+ MB_B,
+ MB_C,
+ MB_D,
+ MB_CURR,
+ MB_NA = 0xFF
+} neighbourMb_e;
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+ neighbourMb_e mb;
+ u8 index;
+} neighbour_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+void h264bsdInitMbNeighbours(mbStorage_t *pMbStorage, u32 picWidth,
+ u32 picSizeInMbs);
+
+mbStorage_t* h264bsdGetNeighbourMb(mbStorage_t *pMb, neighbourMb_e neighbour);
+
+u32 h264bsdIsNeighbourAvailable(mbStorage_t *pMb, mbStorage_t *pNeighbour);
+
+const neighbour_t* h264bsdNeighbour4x4BlockA(u32 blockIndex);
+const neighbour_t* h264bsdNeighbour4x4BlockB(u32 blockIndex);
+const neighbour_t* h264bsdNeighbour4x4BlockC(u32 blockIndex);
+const neighbour_t* h264bsdNeighbour4x4BlockD(u32 blockIndex);
+
+#endif /* #ifdef H264SWDEC_NEIGHBOUR_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.c
new file mode 100755
index 0000000..fb23352
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.c
@@ -0,0 +1,347 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdDecodePicOrderCnt
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_util.h"
+#include "h264bsd_pic_order_cnt.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdDecodePicOrderCnt
+
+ Functional description:
+ Compute picture order count for a picture. Function implements
+ computation of all POC types (0, 1 and 2), type is obtained from
+ sps. See standard for description of the POC types and how POC is
+ computed for each type.
+
+ Function returns the minimum of top field and bottom field pic
+ order counts.
+
+ Inputs:
+ poc pointer to previous results
+ sps pointer to sequence parameter set
+ slicHeader pointer to current slice header, frame number and
+ other params needed for POC computation
+ pNalUnit pointer to current NAL unit structrue, function needs
+ to know if this is an IDR picture and also if this is
+ a reference picture
+
+ Outputs:
+ poc results stored here for computation of next POC
+
+ Returns:
+ picture order count
+
+------------------------------------------------------------------------------*/
+
+i32 h264bsdDecodePicOrderCnt(pocStorage_t *poc, seqParamSet_t *sps,
+ sliceHeader_t *pSliceHeader, nalUnit_t *pNalUnit)
+{
+
+/* Variables */
+
+ u32 i;
+ i32 picOrderCnt;
+ u32 frameNumOffset, absFrameNum, picOrderCntCycleCnt;
+ u32 frameNumInPicOrderCntCycle;
+ i32 expectedDeltaPicOrderCntCycle;
+ u32 containsMmco5;
+
+/* Code */
+
+ ASSERT(poc);
+ ASSERT(sps);
+ ASSERT(pSliceHeader);
+ ASSERT(pNalUnit);
+ ASSERT(sps->picOrderCntType <= 2);
+
+#if 0
+ /* JanSa: I don't think this is necessary, don't see any reason to
+ * increment prevFrameNum one by one instead of one big increment.
+ * However, standard specifies that this should be done -> if someone
+ * figures out any case when the outcome would be different for step by
+ * step increment, this part of the code should be enabled */
+
+ /* if there was a gap in frame numbering and picOrderCntType is 1 or 2 ->
+ * "compute" pic order counts for non-existing frames. These are not
+ * actually computed, but process needs to be done to update the
+ * prevFrameNum and prevFrameNumOffset */
+ if ( sps->picOrderCntType > 0 &&
+ pSliceHeader->frameNum != poc->prevFrameNum &&
+ pSliceHeader->frameNum != ((poc->prevFrameNum + 1) % sps->maxFrameNum))
+ {
+
+ /* use variable i for unUsedShortTermFrameNum */
+ i = (poc->prevFrameNum + 1) % sps->maxFrameNum;
+
+ do
+ {
+ if (poc->prevFrameNum > i)
+ frameNumOffset = poc->prevFrameNumOffset + sps->maxFrameNum;
+ else
+ frameNumOffset = poc->prevFrameNumOffset;
+
+ poc->prevFrameNumOffset = frameNumOffset;
+ poc->prevFrameNum = i;
+
+ i = (i + 1) % sps->maxFrameNum;
+
+ } while (i != pSliceHeader->frameNum);
+ }
+#endif
+
+ /* check if current slice includes mmco equal to 5 */
+ containsMmco5 = HANTRO_FALSE;
+ if (pSliceHeader->decRefPicMarking.adaptiveRefPicMarkingModeFlag)
+ {
+ i = 0;
+ while (pSliceHeader->decRefPicMarking.operation[i].
+ memoryManagementControlOperation)
+ {
+ if (pSliceHeader->decRefPicMarking.operation[i].
+ memoryManagementControlOperation == 5)
+ {
+ containsMmco5 = HANTRO_TRUE;
+ break;
+ }
+ i++;
+ }
+ }
+ switch (sps->picOrderCntType)
+ {
+
+ case 0:
+ /* set prevPicOrderCnt values for IDR frame */
+ if (IS_IDR_NAL_UNIT(pNalUnit))
+ {
+ poc->prevPicOrderCntMsb = 0;
+ poc->prevPicOrderCntLsb = 0;
+ }
+
+ /* compute picOrderCntMsb (stored in picOrderCnt variable) */
+ if ( (pSliceHeader->picOrderCntLsb < poc->prevPicOrderCntLsb) &&
+ ((poc->prevPicOrderCntLsb - pSliceHeader->picOrderCntLsb) >=
+ sps->maxPicOrderCntLsb/2) )
+ {
+ picOrderCnt = poc->prevPicOrderCntMsb +
+ (i32)sps->maxPicOrderCntLsb;
+ }
+ else if ((pSliceHeader->picOrderCntLsb > poc->prevPicOrderCntLsb) &&
+ ((pSliceHeader->picOrderCntLsb - poc->prevPicOrderCntLsb) >
+ sps->maxPicOrderCntLsb/2) )
+ {
+ picOrderCnt = poc->prevPicOrderCntMsb -
+ (i32)sps->maxPicOrderCntLsb;
+ }
+ else
+ picOrderCnt = poc->prevPicOrderCntMsb;
+
+ /* standard specifies that prevPicOrderCntMsb is from previous
+ * rererence frame -> replace old value only if current frame is
+ * rererence frame */
+ if (pNalUnit->nalRefIdc)
+ poc->prevPicOrderCntMsb = picOrderCnt;
+
+ /* compute top field order cnt (stored in picOrderCnt) */
+ picOrderCnt += (i32)pSliceHeader->picOrderCntLsb;
+
+ /* if delta for bottom field is negative -> bottom will be the
+ * minimum pic order count */
+ if (pSliceHeader->deltaPicOrderCntBottom < 0)
+ picOrderCnt += pSliceHeader->deltaPicOrderCntBottom;
+
+ /* standard specifies that prevPicOrderCntLsb is from previous
+ * rererence frame -> replace old value only if current frame is
+ * rererence frame */
+ if (pNalUnit->nalRefIdc)
+ {
+ /* if current frame contains mmco5 -> modify values to be
+ * stored */
+ if (containsMmco5)
+ {
+ poc->prevPicOrderCntMsb = 0;
+ /* prevPicOrderCntLsb should be the top field picOrderCnt
+ * if previous frame included mmco5. Top field picOrderCnt
+ * for frames containing mmco5 is obtained by subtracting
+ * the picOrderCnt from original top field order count ->
+ * value is zero if top field was the minimum, i.e. delta
+ * for bottom was positive, otherwise value is
+ * -deltaPicOrderCntBottom */
+ if (pSliceHeader->deltaPicOrderCntBottom < 0)
+ poc->prevPicOrderCntLsb =
+ (u32)(-pSliceHeader->deltaPicOrderCntBottom);
+ else
+ poc->prevPicOrderCntLsb = 0;
+ picOrderCnt = 0;
+ }
+ else
+ {
+ poc->prevPicOrderCntLsb = pSliceHeader->picOrderCntLsb;
+ }
+ }
+
+ break;
+
+ case 1:
+
+ /* step 1 (in the description in the standard) */
+ if (IS_IDR_NAL_UNIT(pNalUnit))
+ frameNumOffset = 0;
+ else if (poc->prevFrameNum > pSliceHeader->frameNum)
+ frameNumOffset = poc->prevFrameNumOffset + sps->maxFrameNum;
+ else
+ frameNumOffset = poc->prevFrameNumOffset;
+
+ /* step 2 */
+ if (sps->numRefFramesInPicOrderCntCycle)
+ absFrameNum = frameNumOffset + pSliceHeader->frameNum;
+ else
+ absFrameNum = 0;
+
+ if (pNalUnit->nalRefIdc == 0 && absFrameNum > 0)
+ absFrameNum -= 1;
+
+ /* step 3 */
+ if (absFrameNum > 0)
+ {
+ picOrderCntCycleCnt =
+ (absFrameNum - 1)/sps->numRefFramesInPicOrderCntCycle;
+ frameNumInPicOrderCntCycle =
+ (absFrameNum - 1)%sps->numRefFramesInPicOrderCntCycle;
+ }
+
+ /* step 4 */
+ expectedDeltaPicOrderCntCycle = 0;
+ for (i = 0; i < sps->numRefFramesInPicOrderCntCycle; i++)
+ expectedDeltaPicOrderCntCycle += sps->offsetForRefFrame[i];
+
+ /* step 5 (picOrderCnt used to store expectedPicOrderCnt) */
+ /*lint -esym(644,picOrderCntCycleCnt) always initialized */
+ /*lint -esym(644,frameNumInPicOrderCntCycle) always initialized */
+ if (absFrameNum > 0)
+ {
+ picOrderCnt =
+ (i32)picOrderCntCycleCnt * expectedDeltaPicOrderCntCycle;
+ for (i = 0; i <= frameNumInPicOrderCntCycle; i++)
+ picOrderCnt += sps->offsetForRefFrame[i];
+ }
+ else
+ picOrderCnt = 0;
+
+ if (pNalUnit->nalRefIdc == 0)
+ picOrderCnt += sps->offsetForNonRefPic;
+
+ /* step 6 (picOrderCnt is top field order cnt if delta for bottom
+ * is positive, otherwise it is bottom field order cnt) */
+ picOrderCnt += pSliceHeader->deltaPicOrderCnt[0];
+
+ if ( (sps->offsetForTopToBottomField +
+ pSliceHeader->deltaPicOrderCnt[1]) < 0 )
+ {
+ picOrderCnt += sps->offsetForTopToBottomField +
+ pSliceHeader->deltaPicOrderCnt[1];
+ }
+
+ /* if current picture contains mmco5 -> set prevFrameNumOffset and
+ * prevFrameNum to 0 for computation of picOrderCnt of next
+ * frame, otherwise store frameNum and frameNumOffset to poc
+ * structure */
+ if (!containsMmco5)
+ {
+ poc->prevFrameNumOffset = frameNumOffset;
+ poc->prevFrameNum = pSliceHeader->frameNum;
+ }
+ else
+ {
+ poc->prevFrameNumOffset = 0;
+ poc->prevFrameNum = 0;
+ picOrderCnt = 0;
+ }
+ break;
+
+ default: /* case 2 */
+ /* derive frameNumOffset */
+ if (IS_IDR_NAL_UNIT(pNalUnit))
+ frameNumOffset = 0;
+ else if (poc->prevFrameNum > pSliceHeader->frameNum)
+ frameNumOffset = poc->prevFrameNumOffset + sps->maxFrameNum;
+ else
+ frameNumOffset = poc->prevFrameNumOffset;
+
+ /* derive picOrderCnt (type 2 has same value for top and bottom
+ * field order cnts) */
+ if (IS_IDR_NAL_UNIT(pNalUnit))
+ picOrderCnt = 0;
+ else if (pNalUnit->nalRefIdc == 0)
+ picOrderCnt =
+ 2 * (i32)(frameNumOffset + pSliceHeader->frameNum) - 1;
+ else
+ picOrderCnt =
+ 2 * (i32)(frameNumOffset + pSliceHeader->frameNum);
+
+ /* if current picture contains mmco5 -> set prevFrameNumOffset and
+ * prevFrameNum to 0 for computation of picOrderCnt of next
+ * frame, otherwise store frameNum and frameNumOffset to poc
+ * structure */
+ if (!containsMmco5)
+ {
+ poc->prevFrameNumOffset = frameNumOffset;
+ poc->prevFrameNum = pSliceHeader->frameNum;
+ }
+ else
+ {
+ poc->prevFrameNumOffset = 0;
+ poc->prevFrameNum = 0;
+ picOrderCnt = 0;
+ }
+ break;
+
+ }
+
+ /*lint -esym(644,picOrderCnt) always initialized */
+ return(picOrderCnt);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.h
new file mode 100755
index 0000000..19741eb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_order_cnt.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_PIC_ORDER_CNT_H
+#define H264SWDEC_PIC_ORDER_CNT_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_nal_unit.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/* structure to store information computed for previous picture, needed for
+ * POC computation of a picture. Two first fields for POC type 0, last two
+ * for types 1 and 2 */
+typedef struct
+{
+ u32 prevPicOrderCntLsb;
+ i32 prevPicOrderCntMsb;
+ u32 prevFrameNum;
+ u32 prevFrameNumOffset;
+} pocStorage_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+i32 h264bsdDecodePicOrderCnt(pocStorage_t *poc, seqParamSet_t *sps,
+ sliceHeader_t *sliceHeader, nalUnit_t *pNalUnit);
+
+#endif /* #ifdef H264SWDEC_PIC_ORDER_CNT_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.c
new file mode 100755
index 0000000..e04dea4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.c
@@ -0,0 +1,335 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdDecodePicParamSet
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_pic_param_set.h"
+#include "h264bsd_util.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_cfg.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* lookup table for ceil(log2(numSliceGroups)), i.e. number of bits needed to
+ * represent range [0, numSliceGroups)
+ *
+ * NOTE: if MAX_NUM_SLICE_GROUPS is higher than 8 this table has to be resized
+ * accordingly */
+static const u32 CeilLog2NumSliceGroups[8] = {1, 1, 2, 2, 3, 3, 3, 3};
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+ Function name: h264bsdDecodePicParamSet
+
+ Functional description:
+ Decode picture parameter set information from the stream.
+
+ Function allocates memory for
+ - run lengths if slice group map type is 0
+ - top-left and bottom-right arrays if map type is 2
+ - for slice group ids if map type is 6
+
+ Validity of some of the slice group mapping information depends
+ on the image dimensions which are not known here. Therefore the
+ validity has to be checked afterwards, currently in the parameter
+ set activation phase.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ pPicParamSet decoded information is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure, invalid information or end of stream
+ MEMORY_ALLOCATION_ERROR for memory allocation failure
+
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodePicParamSet(strmData_t *pStrmData, picParamSet_t *pPicParamSet)
+{
+
+/* Variables */
+
+ u32 tmp, i, value;
+ i32 itmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pPicParamSet);
+
+
+ H264SwDecMemset(pPicParamSet, 0, sizeof(picParamSet_t));
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pPicParamSet->picParameterSetId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pPicParamSet->picParameterSetId >= MAX_NUM_PIC_PARAM_SETS)
+ {
+ EPRINT("pic_parameter_set_id");
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pPicParamSet->seqParameterSetId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pPicParamSet->seqParameterSetId >= MAX_NUM_SEQ_PARAM_SETS)
+ {
+ EPRINT("seq_param_set_id");
+ return(HANTRO_NOK);
+ }
+
+ /* entropy_coding_mode_flag, shall be 0 for baseline profile */
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp)
+ {
+ EPRINT("entropy_coding_mode_flag");
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicParamSet->picOrderPresentFlag = (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ /* num_slice_groups_minus1 */
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pPicParamSet->numSliceGroups = value + 1;
+ if (pPicParamSet->numSliceGroups > MAX_NUM_SLICE_GROUPS)
+ {
+ EPRINT("num_slice_groups_minus1");
+ return(HANTRO_NOK);
+ }
+
+ /* decode slice group mapping information if more than one slice groups */
+ if (pPicParamSet->numSliceGroups > 1)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pPicParamSet->sliceGroupMapType);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pPicParamSet->sliceGroupMapType > 6)
+ {
+ EPRINT("slice_group_map_type");
+ return(HANTRO_NOK);
+ }
+
+ if (pPicParamSet->sliceGroupMapType == 0)
+ {
+ ALLOCATE(pPicParamSet->runLength,
+ pPicParamSet->numSliceGroups, u32);
+ if (pPicParamSet->runLength == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+ for (i = 0; i < pPicParamSet->numSliceGroups; i++)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pPicParamSet->runLength[i] = value+1;
+ /* param values checked in CheckPps() */
+ }
+ }
+ else if (pPicParamSet->sliceGroupMapType == 2)
+ {
+ ALLOCATE(pPicParamSet->topLeft,
+ pPicParamSet->numSliceGroups - 1, u32);
+ ALLOCATE(pPicParamSet->bottomRight,
+ pPicParamSet->numSliceGroups - 1, u32);
+ if (pPicParamSet->topLeft == NULL ||
+ pPicParamSet->bottomRight == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+ for (i = 0; i < pPicParamSet->numSliceGroups - 1; i++)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pPicParamSet->topLeft[i] = value;
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pPicParamSet->bottomRight[i] = value;
+ /* param values checked in CheckPps() */
+ }
+ }
+ else if ( (pPicParamSet->sliceGroupMapType == 3) ||
+ (pPicParamSet->sliceGroupMapType == 4) ||
+ (pPicParamSet->sliceGroupMapType == 5) )
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicParamSet->sliceGroupChangeDirectionFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pPicParamSet->sliceGroupChangeRate = value + 1;
+ /* param value checked in CheckPps() */
+ }
+ else if (pPicParamSet->sliceGroupMapType == 6)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pPicParamSet->picSizeInMapUnits = value + 1;
+
+ ALLOCATE(pPicParamSet->sliceGroupId,
+ pPicParamSet->picSizeInMapUnits, u32);
+ if (pPicParamSet->sliceGroupId == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+
+ /* determine number of bits needed to represent range
+ * [0, numSliceGroups) */
+ tmp = CeilLog2NumSliceGroups[pPicParamSet->numSliceGroups-1];
+
+ for (i = 0; i < pPicParamSet->picSizeInMapUnits; i++)
+ {
+ pPicParamSet->sliceGroupId[i] = h264bsdGetBits(pStrmData, tmp);
+ if ( pPicParamSet->sliceGroupId[i] >=
+ pPicParamSet->numSliceGroups )
+ {
+ EPRINT("slice_group_id");
+ return(HANTRO_NOK);
+ }
+ }
+ }
+ }
+
+ /* num_ref_idx_l0_active_minus1 */
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (value > 31)
+ {
+ EPRINT("num_ref_idx_l0_active_minus1");
+ return(HANTRO_NOK);
+ }
+ pPicParamSet->numRefIdxL0Active = value + 1;
+
+ /* num_ref_idx_l1_active_minus1 */
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (value > 31)
+ {
+ EPRINT("num_ref_idx_l1_active_minus1");
+ return(HANTRO_NOK);
+ }
+
+ /* weighted_pred_flag, this shall be 0 for baseline profile */
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp)
+ {
+ EPRINT("weighted_pred_flag");
+ return(HANTRO_NOK);
+ }
+
+ /* weighted_bipred_idc */
+ tmp = h264bsdGetBits(pStrmData, 2);
+ if (tmp > 2)
+ {
+ EPRINT("weighted_bipred_idc");
+ return(HANTRO_NOK);
+ }
+
+ /* pic_init_qp_minus26 */
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if ((itmp < -26) || (itmp > 25))
+ {
+ EPRINT("pic_init_qp_minus26");
+ return(HANTRO_NOK);
+ }
+ pPicParamSet->picInitQp = (u32)(itmp + 26);
+
+ /* pic_init_qs_minus26 */
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if ((itmp < -26) || (itmp > 25))
+ {
+ EPRINT("pic_init_qs_minus26");
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if ((itmp < -12) || (itmp > 12))
+ {
+ EPRINT("chroma_qp_index_offset");
+ return(HANTRO_NOK);
+ }
+ pPicParamSet->chromaQpIndexOffset = itmp;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicParamSet->deblockingFilterControlPresentFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicParamSet->constrainedIntraPredFlag = (tmp == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicParamSet->redundantPicCntPresentFlag = (tmp == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdRbspTrailingBits(pStrmData);
+
+ /* ignore possible errors in trailing bits of parameters sets */
+ return(HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.h
new file mode 100755
index 0000000..6328638
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_pic_param_set.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_PIC_PARAM_SET_H
+#define H264SWDEC_PIC_PARAM_SET_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/* data structure to store PPS information decoded from the stream */
+typedef struct
+{
+ u32 picParameterSetId;
+ u32 seqParameterSetId;
+ u32 picOrderPresentFlag;
+ u32 numSliceGroups;
+ u32 sliceGroupMapType;
+ u32 *runLength;
+ u32 *topLeft;
+ u32 *bottomRight;
+ u32 sliceGroupChangeDirectionFlag;
+ u32 sliceGroupChangeRate;
+ u32 picSizeInMapUnits;
+ u32 *sliceGroupId;
+ u32 numRefIdxL0Active;
+ u32 picInitQp;
+ i32 chromaQpIndexOffset;
+ u32 deblockingFilterControlPresentFlag;
+ u32 constrainedIntraPredFlag;
+ u32 redundantPicCntPresentFlag;
+} picParamSet_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodePicParamSet(strmData_t *pStrmData,
+ picParamSet_t *pPicParamSet);
+
+#endif /* #ifdef H264SWDEC_PIC_PARAM_SET_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c
new file mode 100755
index 0000000..c948776
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.c
@@ -0,0 +1,2315 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_reconstruct.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_image.h"
+#include "h264bsd_util.h"
+
+#ifdef H264DEC_OMXDL
+#include "omxtypes.h"
+#include "omxVC.h"
+#include "armVC.h"
+#endif /* H264DEC_OMXDL */
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Switch off the following Lint messages for this file:
+ * Info 701: Shift left of signed quantity (int)
+ * Info 702: Shift right of signed quantity (int)
+ */
+/*lint -e701 -e702 */
+
+/* Luma fractional-sample positions
+ *
+ * G a b c H
+ * d e f g
+ * h i j k m
+ * n p q r
+ * M s N
+ *
+ * G, H, M and N are integer sample positions
+ * a-s are fractional samples that need to be interpolated.
+ */
+#ifndef H264DEC_OMXDL
+static const u32 lumaFracPos[4][4] = {
+ /* G d h n a e i p b f j q c g k r */
+ {0, 1, 2, 3}, {4, 5, 6, 7}, {8, 9, 10, 11}, {12, 13, 14, 15}};
+#endif /* H264DEC_OMXDL */
+
+/* clipping table, defined in h264bsd_intra_prediction.c */
+extern const u8 h264bsdClip[];
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+#ifndef H264DEC_OMXDL
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateChromaHor
+
+ Functional description:
+ This function performs chroma interpolation in horizontal direction.
+ Overfilling is done only if needed. Reference image (pRef) is
+ read at correct position and the predicted part is written to
+ macroblock's chrominance (predPartChroma)
+ Inputs:
+ pRef pointer to reference frame Cb top-left corner
+ x0 integer x-coordinate for prediction
+ y0 integer y-coordinate for prediction
+ width width of the reference frame chrominance in pixels
+ height height of the reference frame chrominance in pixels
+ xFrac horizontal fraction for prediction in 1/8 pixels
+ chromaPartWidth width of the predicted part in pixels
+ chromaPartHeight height of the predicted part in pixels
+ Outputs:
+ predPartChroma pointer where predicted part is written
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_ARM11
+void h264bsdInterpolateChromaHor(
+ u8 *pRef,
+ u8 *predPartChroma,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 xFrac,
+ u32 chromaPartWidth,
+ u32 chromaPartHeight)
+{
+
+/* Variables */
+
+ u32 x, y, tmp1, tmp2, tmp3, tmp4, c, val;
+ u8 *ptrA, *cbr;
+ u32 comp;
+ u8 block[9*8*2];
+
+/* Code */
+
+ ASSERT(predPartChroma);
+ ASSERT(chromaPartWidth);
+ ASSERT(chromaPartHeight);
+ ASSERT(xFrac < 8);
+ ASSERT(pRef);
+
+ if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) ||
+ (y0 < 0) || ((u32)y0+chromaPartHeight > height))
+ {
+ h264bsdFillBlock(pRef, block, x0, y0, width, height,
+ chromaPartWidth + 1, chromaPartHeight, chromaPartWidth + 1);
+ pRef += width * height;
+ h264bsdFillBlock(pRef, block + (chromaPartWidth+1)*chromaPartHeight,
+ x0, y0, width, height, chromaPartWidth + 1,
+ chromaPartHeight, chromaPartWidth + 1);
+
+ pRef = block;
+ x0 = 0;
+ y0 = 0;
+ width = chromaPartWidth+1;
+ height = chromaPartHeight;
+ }
+
+ val = 8 - xFrac;
+
+ for (comp = 0; comp <= 1; comp++)
+ {
+
+ ptrA = pRef + (comp * height + (u32)y0) * width + x0;
+ cbr = predPartChroma + comp * 8 * 8;
+
+ /* 2x2 pels per iteration
+ * bilinear horizontal interpolation */
+ for (y = (chromaPartHeight >> 1); y; y--)
+ {
+ for (x = (chromaPartWidth >> 1); x; x--)
+ {
+ tmp1 = ptrA[width];
+ tmp2 = *ptrA++;
+ tmp3 = ptrA[width];
+ tmp4 = *ptrA++;
+ c = ((val * tmp1 + xFrac * tmp3) << 3) + 32;
+ c >>= 6;
+ cbr[8] = (u8)c;
+ c = ((val * tmp2 + xFrac * tmp4) << 3) + 32;
+ c >>= 6;
+ *cbr++ = (u8)c;
+ tmp1 = ptrA[width];
+ tmp2 = *ptrA;
+ c = ((val * tmp3 + xFrac * tmp1) << 3) + 32;
+ c >>= 6;
+ cbr[8] = (u8)c;
+ c = ((val * tmp4 + xFrac * tmp2) << 3) + 32;
+ c >>= 6;
+ *cbr++ = (u8)c;
+ }
+ cbr += 2*8 - chromaPartWidth;
+ ptrA += 2*width - chromaPartWidth;
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateChromaVer
+
+ Functional description:
+ This function performs chroma interpolation in vertical direction.
+ Overfilling is done only if needed. Reference image (pRef) is
+ read at correct position and the predicted part is written to
+ macroblock's chrominance (predPartChroma)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateChromaVer(
+ u8 *pRef,
+ u8 *predPartChroma,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 yFrac,
+ u32 chromaPartWidth,
+ u32 chromaPartHeight)
+{
+
+/* Variables */
+
+ u32 x, y, tmp1, tmp2, tmp3, c, val;
+ u8 *ptrA, *cbr;
+ u32 comp;
+ u8 block[9*8*2];
+
+/* Code */
+
+ ASSERT(predPartChroma);
+ ASSERT(chromaPartWidth);
+ ASSERT(chromaPartHeight);
+ ASSERT(yFrac < 8);
+ ASSERT(pRef);
+
+ if ((x0 < 0) || ((u32)x0+chromaPartWidth > width) ||
+ (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height))
+ {
+ h264bsdFillBlock(pRef, block, x0, y0, width, height, chromaPartWidth,
+ chromaPartHeight + 1, chromaPartWidth);
+ pRef += width * height;
+ h264bsdFillBlock(pRef, block + chromaPartWidth*(chromaPartHeight+1),
+ x0, y0, width, height, chromaPartWidth,
+ chromaPartHeight + 1, chromaPartWidth);
+
+ pRef = block;
+ x0 = 0;
+ y0 = 0;
+ width = chromaPartWidth;
+ height = chromaPartHeight+1;
+ }
+
+ val = 8 - yFrac;
+
+ for (comp = 0; comp <= 1; comp++)
+ {
+
+ ptrA = pRef + (comp * height + (u32)y0) * width + x0;
+ cbr = predPartChroma + comp * 8 * 8;
+
+ /* 2x2 pels per iteration
+ * bilinear vertical interpolation */
+ for (y = (chromaPartHeight >> 1); y; y--)
+ {
+ for (x = (chromaPartWidth >> 1); x; x--)
+ {
+ tmp3 = ptrA[width*2];
+ tmp2 = ptrA[width];
+ tmp1 = *ptrA++;
+ c = ((val * tmp2 + yFrac * tmp3) << 3) + 32;
+ c >>= 6;
+ cbr[8] = (u8)c;
+ c = ((val * tmp1 + yFrac * tmp2) << 3) + 32;
+ c >>= 6;
+ *cbr++ = (u8)c;
+ tmp3 = ptrA[width*2];
+ tmp2 = ptrA[width];
+ tmp1 = *ptrA++;
+ c = ((val * tmp2 + yFrac * tmp3) << 3) + 32;
+ c >>= 6;
+ cbr[8] = (u8)c;
+ c = ((val * tmp1 + yFrac * tmp2) << 3) + 32;
+ c >>= 6;
+ *cbr++ = (u8)c;
+ }
+ cbr += 2*8 - chromaPartWidth;
+ ptrA += 2*width - chromaPartWidth;
+ }
+ }
+
+}
+#endif
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateChromaHorVer
+
+ Functional description:
+ This function performs chroma interpolation in horizontal and
+ vertical direction. Overfilling is done only if needed. Reference
+ image (ref) is read at correct position and the predicted part
+ is written to macroblock's chrominance (predPartChroma)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateChromaHorVer(
+ u8 *ref,
+ u8 *predPartChroma,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 xFrac,
+ u32 yFrac,
+ u32 chromaPartWidth,
+ u32 chromaPartHeight)
+{
+ u8 block[9*9*2];
+ u32 x, y, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, valX, valY, plus32 = 32;
+ u32 comp;
+ u8 *ptrA, *cbr;
+
+/* Code */
+
+ ASSERT(predPartChroma);
+ ASSERT(chromaPartWidth);
+ ASSERT(chromaPartHeight);
+ ASSERT(xFrac < 8);
+ ASSERT(yFrac < 8);
+ ASSERT(ref);
+
+ if ((x0 < 0) || ((u32)x0+chromaPartWidth+1 > width) ||
+ (y0 < 0) || ((u32)y0+chromaPartHeight+1 > height))
+ {
+ h264bsdFillBlock(ref, block, x0, y0, width, height,
+ chromaPartWidth + 1, chromaPartHeight + 1, chromaPartWidth + 1);
+ ref += width * height;
+ h264bsdFillBlock(ref, block + (chromaPartWidth+1)*(chromaPartHeight+1),
+ x0, y0, width, height, chromaPartWidth + 1,
+ chromaPartHeight + 1, chromaPartWidth + 1);
+
+ ref = block;
+ x0 = 0;
+ y0 = 0;
+ width = chromaPartWidth+1;
+ height = chromaPartHeight+1;
+ }
+
+ valX = 8 - xFrac;
+ valY = 8 - yFrac;
+
+ for (comp = 0; comp <= 1; comp++)
+ {
+
+ ptrA = ref + (comp * height + (u32)y0) * width + x0;
+ cbr = predPartChroma + comp * 8 * 8;
+
+ /* 2x2 pels per iteration
+ * bilinear vertical and horizontal interpolation */
+ for (y = (chromaPartHeight >> 1); y; y--)
+ {
+ tmp1 = *ptrA;
+ tmp3 = ptrA[width];
+ tmp5 = ptrA[width*2];
+ tmp1 *= valY;
+ tmp1 += tmp3 * yFrac;
+ tmp3 *= valY;
+ tmp3 += tmp5 * yFrac;
+ for (x = (chromaPartWidth >> 1); x; x--)
+ {
+ tmp2 = *++ptrA;
+ tmp4 = ptrA[width];
+ tmp6 = ptrA[width*2];
+ tmp2 *= valY;
+ tmp2 += tmp4 * yFrac;
+ tmp4 *= valY;
+ tmp4 += tmp6 * yFrac;
+ tmp1 = tmp1 * valX + plus32;
+ tmp3 = tmp3 * valX + plus32;
+ tmp1 += tmp2 * xFrac;
+ tmp1 >>= 6;
+ tmp3 += tmp4 * xFrac;
+ tmp3 >>= 6;
+ cbr[8] = (u8)tmp3;
+ *cbr++ = (u8)tmp1;
+
+ tmp1 = *++ptrA;
+ tmp3 = ptrA[width];
+ tmp5 = ptrA[width*2];
+ tmp1 *= valY;
+ tmp1 += tmp3 * yFrac;
+ tmp3 *= valY;
+ tmp3 += tmp5 * yFrac;
+ tmp2 = tmp2 * valX + plus32;
+ tmp4 = tmp4 * valX + plus32;
+ tmp2 += tmp1 * xFrac;
+ tmp2 >>= 6;
+ tmp4 += tmp3 * xFrac;
+ tmp4 >>= 6;
+ cbr[8] = (u8)tmp4;
+ *cbr++ = (u8)tmp2;
+ }
+ cbr += 2*8 - chromaPartWidth;
+ ptrA += 2*width - chromaPartWidth;
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: PredictChroma
+
+ Functional description:
+ Top level chroma prediction function that calls the appropriate
+ interpolation function. The output is written to macroblock array.
+
+------------------------------------------------------------------------------*/
+
+static void PredictChroma(
+ u8 *mbPartChroma,
+ u32 xAL,
+ u32 yAL,
+ u32 partWidth,
+ u32 partHeight,
+ mv_t *mv,
+ image_t *refPic)
+{
+
+/* Variables */
+
+ u32 xFrac, yFrac, width, height, chromaPartWidth, chromaPartHeight;
+ i32 xInt, yInt;
+ u8 *ref;
+
+/* Code */
+
+ ASSERT(mv);
+ ASSERT(refPic);
+ ASSERT(refPic->data);
+ ASSERT(refPic->width);
+ ASSERT(refPic->height);
+
+ width = 8 * refPic->width;
+ height = 8 * refPic->height;
+
+ xInt = (xAL >> 1) + (mv->hor >> 3);
+ yInt = (yAL >> 1) + (mv->ver >> 3);
+ xFrac = mv->hor & 0x7;
+ yFrac = mv->ver & 0x7;
+
+ chromaPartWidth = partWidth >> 1;
+ chromaPartHeight = partHeight >> 1;
+ ref = refPic->data + 256 * refPic->width * refPic->height;
+
+ if (xFrac && yFrac)
+ {
+ h264bsdInterpolateChromaHorVer(ref, mbPartChroma, xInt, yInt, width,
+ height, xFrac, yFrac, chromaPartWidth, chromaPartHeight);
+ }
+ else if (xFrac)
+ {
+ h264bsdInterpolateChromaHor(ref, mbPartChroma, xInt, yInt, width,
+ height, xFrac, chromaPartWidth, chromaPartHeight);
+ }
+ else if (yFrac)
+ {
+ h264bsdInterpolateChromaVer(ref, mbPartChroma, xInt, yInt, width,
+ height, yFrac, chromaPartWidth, chromaPartHeight);
+ }
+ else
+ {
+ h264bsdFillBlock(ref, mbPartChroma, xInt, yInt, width, height,
+ chromaPartWidth, chromaPartHeight, 8);
+ ref += width * height;
+ h264bsdFillBlock(ref, mbPartChroma + 8*8, xInt, yInt, width, height,
+ chromaPartWidth, chromaPartHeight, 8);
+ }
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateVerHalf
+
+ Functional description:
+ Function to perform vertical interpolation of pixel position 'h'
+ for a block. Overfilling is done only if needed. Reference
+ image (ref) is read at correct position and the predicted part
+ is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_ARM11
+void h264bsdInterpolateVerHalf(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight)
+{
+ u32 p1[21*21/4+1];
+ u32 i, j;
+ i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ u8 *ptrC, *ptrV;
+ const u8 *clp = h264bsdClip + 512;
+
+ /* Code */
+
+ ASSERT(ref);
+ ASSERT(mb);
+
+ if ((x0 < 0) || ((u32)x0+partWidth > width) ||
+ (y0 < 0) || ((u32)y0+partHeight+5 > height))
+ {
+ h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+ partWidth, partHeight+5, partWidth);
+
+ x0 = 0;
+ y0 = 0;
+ ref = (u8*)p1;
+ width = partWidth;
+ }
+
+ ref += (u32)y0 * width + (u32)x0;
+
+ ptrC = ref + width;
+ ptrV = ptrC + 5*width;
+
+ /* 4 pixels per iteration, interpolate using 5 vertical samples */
+ for (i = (partHeight >> 2); i; i--)
+ {
+ /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */
+ for (j = partWidth; j; j--)
+ {
+ tmp4 = ptrV[-(i32)width*2];
+ tmp5 = ptrV[-(i32)width];
+ tmp1 = ptrV[width];
+ tmp2 = ptrV[width*2];
+ tmp6 = *ptrV++;
+
+ tmp7 = tmp4 + tmp1;
+ tmp2 -= (tmp7 << 2);
+ tmp2 -= tmp7;
+ tmp2 += 16;
+ tmp7 = tmp5 + tmp6;
+ tmp3 = ptrC[width*2];
+ tmp2 += (tmp7 << 4);
+ tmp2 += (tmp7 << 2);
+ tmp2 += tmp3;
+ tmp2 = clp[tmp2>>5];
+ tmp1 += 16;
+ mb[48] = (u8)tmp2;
+
+ tmp7 = tmp3 + tmp6;
+ tmp1 -= (tmp7 << 2);
+ tmp1 -= tmp7;
+ tmp7 = tmp4 + tmp5;
+ tmp2 = ptrC[width];
+ tmp1 += (tmp7 << 4);
+ tmp1 += (tmp7 << 2);
+ tmp1 += tmp2;
+ tmp1 = clp[tmp1>>5];
+ tmp6 += 16;
+ mb[32] = (u8)tmp1;
+
+ tmp7 = tmp2 + tmp5;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp7 = tmp4 + tmp3;
+ tmp1 = *ptrC;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp6 += tmp1;
+ tmp6 = clp[tmp6>>5];
+ tmp5 += 16;
+ mb[16] = (u8)tmp6;
+
+ tmp1 += tmp4;
+ tmp5 -= (tmp1 << 2);
+ tmp5 -= tmp1;
+ tmp3 += tmp2;
+ tmp6 = ptrC[-(i32)width];
+ tmp5 += (tmp3 << 4);
+ tmp5 += (tmp3 << 2);
+ tmp5 += tmp6;
+ tmp5 = clp[tmp5>>5];
+ *mb++ = (u8)tmp5;
+ ptrC++;
+ }
+ ptrC += 4*width - partWidth;
+ ptrV += 4*width - partWidth;
+ mb += 4*16 - partWidth;
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateVerQuarter
+
+ Functional description:
+ Function to perform vertical interpolation of pixel position 'd'
+ or 'n' for a block. Overfilling is done only if needed. Reference
+ image (ref) is read at correct position and the predicted part
+ is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateVerQuarter(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight,
+ u32 verOffset) /* 0 for pixel d, 1 for pixel n */
+{
+ u32 p1[21*21/4+1];
+ u32 i, j;
+ i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ u8 *ptrC, *ptrV, *ptrInt;
+ const u8 *clp = h264bsdClip + 512;
+
+ /* Code */
+
+ ASSERT(ref);
+ ASSERT(mb);
+
+ if ((x0 < 0) || ((u32)x0+partWidth > width) ||
+ (y0 < 0) || ((u32)y0+partHeight+5 > height))
+ {
+ h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+ partWidth, partHeight+5, partWidth);
+
+ x0 = 0;
+ y0 = 0;
+ ref = (u8*)p1;
+ width = partWidth;
+ }
+
+ ref += (u32)y0 * width + (u32)x0;
+
+ ptrC = ref + width;
+ ptrV = ptrC + 5*width;
+
+ /* Pointer to integer sample position, either M or R */
+ ptrInt = ptrC + (2+verOffset)*width;
+
+ /* 4 pixels per iteration
+ * interpolate using 5 vertical samples and average between
+ * interpolated value and integer sample value */
+ for (i = (partHeight >> 2); i; i--)
+ {
+ /* h1 = (16 + A + 16(G+M) + 4(G+M) - 4(C+R) - (C+R) + T) >> 5 */
+ for (j = partWidth; j; j--)
+ {
+ tmp4 = ptrV[-(i32)width*2];
+ tmp5 = ptrV[-(i32)width];
+ tmp1 = ptrV[width];
+ tmp2 = ptrV[width*2];
+ tmp6 = *ptrV++;
+
+ tmp7 = tmp4 + tmp1;
+ tmp2 -= (tmp7 << 2);
+ tmp2 -= tmp7;
+ tmp2 += 16;
+ tmp7 = tmp5 + tmp6;
+ tmp3 = ptrC[width*2];
+ tmp2 += (tmp7 << 4);
+ tmp2 += (tmp7 << 2);
+ tmp2 += tmp3;
+ tmp2 = clp[tmp2>>5];
+ tmp7 = ptrInt[width*2];
+ tmp1 += 16;
+ tmp2++;
+ mb[48] = (u8)((tmp2 + tmp7) >> 1);
+
+ tmp7 = tmp3 + tmp6;
+ tmp1 -= (tmp7 << 2);
+ tmp1 -= tmp7;
+ tmp7 = tmp4 + tmp5;
+ tmp2 = ptrC[width];
+ tmp1 += (tmp7 << 4);
+ tmp1 += (tmp7 << 2);
+ tmp1 += tmp2;
+ tmp1 = clp[tmp1>>5];
+ tmp7 = ptrInt[width];
+ tmp6 += 16;
+ tmp1++;
+ mb[32] = (u8)((tmp1 + tmp7) >> 1);
+
+ tmp7 = tmp2 + tmp5;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp7 = tmp4 + tmp3;
+ tmp1 = *ptrC;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp6 += tmp1;
+ tmp6 = clp[tmp6>>5];
+ tmp7 = *ptrInt;
+ tmp5 += 16;
+ tmp6++;
+ mb[16] = (u8)((tmp6 + tmp7) >> 1);
+
+ tmp1 += tmp4;
+ tmp5 -= (tmp1 << 2);
+ tmp5 -= tmp1;
+ tmp3 += tmp2;
+ tmp6 = ptrC[-(i32)width];
+ tmp5 += (tmp3 << 4);
+ tmp5 += (tmp3 << 2);
+ tmp5 += tmp6;
+ tmp5 = clp[tmp5>>5];
+ tmp7 = ptrInt[-(i32)width];
+ tmp5++;
+ *mb++ = (u8)((tmp5 + tmp7) >> 1);
+ ptrC++;
+ ptrInt++;
+ }
+ ptrC += 4*width - partWidth;
+ ptrV += 4*width - partWidth;
+ ptrInt += 4*width - partWidth;
+ mb += 4*16 - partWidth;
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateHorHalf
+
+ Functional description:
+ Function to perform horizontal interpolation of pixel position 'b'
+ for a block. Overfilling is done only if needed. Reference
+ image (ref) is read at correct position and the predicted part
+ is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateHorHalf(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight)
+{
+ u32 p1[21*21/4+1];
+ u8 *ptrJ;
+ u32 x, y;
+ i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ const u8 *clp = h264bsdClip + 512;
+
+ /* Code */
+
+ ASSERT(ref);
+ ASSERT(mb);
+ ASSERT((partWidth&0x3) == 0);
+ ASSERT((partHeight&0x3) == 0);
+
+ if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+ (y0 < 0) || ((u32)y0+partHeight > height))
+ {
+ h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+ partWidth+5, partHeight, partWidth+5);
+
+ x0 = 0;
+ y0 = 0;
+ ref = (u8*)p1;
+ width = partWidth + 5;
+ }
+
+ ref += (u32)y0 * width + (u32)x0;
+
+ ptrJ = ref + 5;
+
+ for (y = partHeight; y; y--)
+ {
+ tmp6 = *(ptrJ - 5);
+ tmp5 = *(ptrJ - 4);
+ tmp4 = *(ptrJ - 3);
+ tmp3 = *(ptrJ - 2);
+ tmp2 = *(ptrJ - 1);
+
+ /* calculate 4 pels per iteration */
+ for (x = (partWidth >> 2); x; x--)
+ {
+ /* First pixel */
+ tmp6 += 16;
+ tmp7 = tmp3 + tmp4;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp7 = tmp2 + tmp5;
+ tmp1 = *ptrJ++;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp6 += tmp1;
+ tmp6 = clp[tmp6>>5];
+ /* Second pixel */
+ tmp5 += 16;
+ tmp7 = tmp2 + tmp3;
+ *mb++ = (u8)tmp6;
+ tmp5 += (tmp7 << 4);
+ tmp5 += (tmp7 << 2);
+ tmp7 = tmp1 + tmp4;
+ tmp6 = *ptrJ++;
+ tmp5 -= (tmp7 << 2);
+ tmp5 -= tmp7;
+ tmp5 += tmp6;
+ tmp5 = clp[tmp5>>5];
+ /* Third pixel */
+ tmp4 += 16;
+ tmp7 = tmp1 + tmp2;
+ *mb++ = (u8)tmp5;
+ tmp4 += (tmp7 << 4);
+ tmp4 += (tmp7 << 2);
+ tmp7 = tmp6 + tmp3;
+ tmp5 = *ptrJ++;
+ tmp4 -= (tmp7 << 2);
+ tmp4 -= tmp7;
+ tmp4 += tmp5;
+ tmp4 = clp[tmp4>>5];
+ /* Fourth pixel */
+ tmp3 += 16;
+ tmp7 = tmp6 + tmp1;
+ *mb++ = (u8)tmp4;
+ tmp3 += (tmp7 << 4);
+ tmp3 += (tmp7 << 2);
+ tmp7 = tmp5 + tmp2;
+ tmp4 = *ptrJ++;
+ tmp3 -= (tmp7 << 2);
+ tmp3 -= tmp7;
+ tmp3 += tmp4;
+ tmp3 = clp[tmp3>>5];
+ tmp7 = tmp4;
+ tmp4 = tmp6;
+ tmp6 = tmp2;
+ tmp2 = tmp7;
+ *mb++ = (u8)tmp3;
+ tmp3 = tmp5;
+ tmp5 = tmp1;
+ }
+ ptrJ += width - partWidth;
+ mb += 16 - partWidth;
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateHorQuarter
+
+ Functional description:
+ Function to perform horizontal interpolation of pixel position 'a'
+ or 'c' for a block. Overfilling is done only if needed. Reference
+ image (ref) is read at correct position and the predicted part
+ is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateHorQuarter(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight,
+ u32 horOffset) /* 0 for pixel a, 1 for pixel c */
+{
+ u32 p1[21*21/4+1];
+ u8 *ptrJ;
+ u32 x, y;
+ i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ const u8 *clp = h264bsdClip + 512;
+
+ /* Code */
+
+ ASSERT(ref);
+ ASSERT(mb);
+
+ if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+ (y0 < 0) || ((u32)y0+partHeight > height))
+ {
+ h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+ partWidth+5, partHeight, partWidth+5);
+
+ x0 = 0;
+ y0 = 0;
+ ref = (u8*)p1;
+ width = partWidth + 5;
+ }
+
+ ref += (u32)y0 * width + (u32)x0;
+
+ ptrJ = ref + 5;
+
+ for (y = partHeight; y; y--)
+ {
+ tmp6 = *(ptrJ - 5);
+ tmp5 = *(ptrJ - 4);
+ tmp4 = *(ptrJ - 3);
+ tmp3 = *(ptrJ - 2);
+ tmp2 = *(ptrJ - 1);
+
+ /* calculate 4 pels per iteration */
+ for (x = (partWidth >> 2); x; x--)
+ {
+ /* First pixel */
+ tmp6 += 16;
+ tmp7 = tmp3 + tmp4;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp7 = tmp2 + tmp5;
+ tmp1 = *ptrJ++;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp6 += tmp1;
+ tmp6 = clp[tmp6>>5];
+ tmp5 += 16;
+ if (!horOffset)
+ tmp6 += tmp4;
+ else
+ tmp6 += tmp3;
+ *mb++ = (u8)((tmp6 + 1) >> 1);
+ /* Second pixel */
+ tmp7 = tmp2 + tmp3;
+ tmp5 += (tmp7 << 4);
+ tmp5 += (tmp7 << 2);
+ tmp7 = tmp1 + tmp4;
+ tmp6 = *ptrJ++;
+ tmp5 -= (tmp7 << 2);
+ tmp5 -= tmp7;
+ tmp5 += tmp6;
+ tmp5 = clp[tmp5>>5];
+ tmp4 += 16;
+ if (!horOffset)
+ tmp5 += tmp3;
+ else
+ tmp5 += tmp2;
+ *mb++ = (u8)((tmp5 + 1) >> 1);
+ /* Third pixel */
+ tmp7 = tmp1 + tmp2;
+ tmp4 += (tmp7 << 4);
+ tmp4 += (tmp7 << 2);
+ tmp7 = tmp6 + tmp3;
+ tmp5 = *ptrJ++;
+ tmp4 -= (tmp7 << 2);
+ tmp4 -= tmp7;
+ tmp4 += tmp5;
+ tmp4 = clp[tmp4>>5];
+ tmp3 += 16;
+ if (!horOffset)
+ tmp4 += tmp2;
+ else
+ tmp4 += tmp1;
+ *mb++ = (u8)((tmp4 + 1) >> 1);
+ /* Fourth pixel */
+ tmp7 = tmp6 + tmp1;
+ tmp3 += (tmp7 << 4);
+ tmp3 += (tmp7 << 2);
+ tmp7 = tmp5 + tmp2;
+ tmp4 = *ptrJ++;
+ tmp3 -= (tmp7 << 2);
+ tmp3 -= tmp7;
+ tmp3 += tmp4;
+ tmp3 = clp[tmp3>>5];
+ if (!horOffset)
+ tmp3 += tmp1;
+ else
+ tmp3 += tmp6;
+ *mb++ = (u8)((tmp3 + 1) >> 1);
+ tmp3 = tmp5;
+ tmp5 = tmp1;
+ tmp7 = tmp4;
+ tmp4 = tmp6;
+ tmp6 = tmp2;
+ tmp2 = tmp7;
+ }
+ ptrJ += width - partWidth;
+ mb += 16 - partWidth;
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateHorVerQuarter
+
+ Functional description:
+ Function to perform horizontal and vertical interpolation of pixel
+ position 'e', 'g', 'p' or 'r' for a block. Overfilling is done only
+ if needed. Reference image (ref) is read at correct position and
+ the predicted part is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateHorVerQuarter(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight,
+ u32 horVerOffset) /* 0 for pixel e, 1 for pixel g,
+ 2 for pixel p, 3 for pixel r */
+{
+ u32 p1[21*21/4+1];
+ u8 *ptrC, *ptrJ, *ptrV;
+ u32 x, y;
+ i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ const u8 *clp = h264bsdClip + 512;
+
+ /* Code */
+
+ ASSERT(ref);
+ ASSERT(mb);
+
+ if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+ (y0 < 0) || ((u32)y0+partHeight+5 > height))
+ {
+ h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+ partWidth+5, partHeight+5, partWidth+5);
+
+ x0 = 0;
+ y0 = 0;
+ ref = (u8*)p1;
+ width = partWidth+5;
+ }
+
+ /* Ref points to G + (-2, -2) */
+ ref += (u32)y0 * width + (u32)x0;
+
+ /* ptrJ points to either J or Q, depending on vertical offset */
+ ptrJ = ref + (((horVerOffset & 0x2) >> 1) + 2) * width + 5;
+
+ /* ptrC points to either C or D, depending on horizontal offset */
+ ptrC = ref + width + 2 + (horVerOffset & 0x1);
+
+ for (y = partHeight; y; y--)
+ {
+ tmp6 = *(ptrJ - 5);
+ tmp5 = *(ptrJ - 4);
+ tmp4 = *(ptrJ - 3);
+ tmp3 = *(ptrJ - 2);
+ tmp2 = *(ptrJ - 1);
+
+ /* Horizontal interpolation, calculate 4 pels per iteration */
+ for (x = (partWidth >> 2); x; x--)
+ {
+ /* First pixel */
+ tmp6 += 16;
+ tmp7 = tmp3 + tmp4;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp7 = tmp2 + tmp5;
+ tmp1 = *ptrJ++;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp6 += tmp1;
+ tmp6 = clp[tmp6>>5];
+ /* Second pixel */
+ tmp5 += 16;
+ tmp7 = tmp2 + tmp3;
+ *mb++ = (u8)tmp6;
+ tmp5 += (tmp7 << 4);
+ tmp5 += (tmp7 << 2);
+ tmp7 = tmp1 + tmp4;
+ tmp6 = *ptrJ++;
+ tmp5 -= (tmp7 << 2);
+ tmp5 -= tmp7;
+ tmp5 += tmp6;
+ tmp5 = clp[tmp5>>5];
+ /* Third pixel */
+ tmp4 += 16;
+ tmp7 = tmp1 + tmp2;
+ *mb++ = (u8)tmp5;
+ tmp4 += (tmp7 << 4);
+ tmp4 += (tmp7 << 2);
+ tmp7 = tmp6 + tmp3;
+ tmp5 = *ptrJ++;
+ tmp4 -= (tmp7 << 2);
+ tmp4 -= tmp7;
+ tmp4 += tmp5;
+ tmp4 = clp[tmp4>>5];
+ /* Fourth pixel */
+ tmp3 += 16;
+ tmp7 = tmp6 + tmp1;
+ *mb++ = (u8)tmp4;
+ tmp3 += (tmp7 << 4);
+ tmp3 += (tmp7 << 2);
+ tmp7 = tmp5 + tmp2;
+ tmp4 = *ptrJ++;
+ tmp3 -= (tmp7 << 2);
+ tmp3 -= tmp7;
+ tmp3 += tmp4;
+ tmp3 = clp[tmp3>>5];
+ tmp7 = tmp4;
+ tmp4 = tmp6;
+ tmp6 = tmp2;
+ tmp2 = tmp7;
+ *mb++ = (u8)tmp3;
+ tmp3 = tmp5;
+ tmp5 = tmp1;
+ }
+ ptrJ += width - partWidth;
+ mb += 16 - partWidth;
+ }
+
+ mb -= 16*partHeight;
+ ptrV = ptrC + 5*width;
+
+ for (y = (partHeight >> 2); y; y--)
+ {
+ /* Vertical interpolation and averaging, 4 pels per iteration */
+ for (x = partWidth; x; x--)
+ {
+ tmp4 = ptrV[-(i32)width*2];
+ tmp5 = ptrV[-(i32)width];
+ tmp1 = ptrV[width];
+ tmp2 = ptrV[width*2];
+ tmp6 = *ptrV++;
+
+ tmp7 = tmp4 + tmp1;
+ tmp2 -= (tmp7 << 2);
+ tmp2 -= tmp7;
+ tmp2 += 16;
+ tmp7 = tmp5 + tmp6;
+ tmp3 = ptrC[width*2];
+ tmp2 += (tmp7 << 4);
+ tmp2 += (tmp7 << 2);
+ tmp2 += tmp3;
+ tmp7 = clp[tmp2>>5];
+ tmp2 = mb[48];
+ tmp1 += 16;
+ tmp7++;
+ mb[48] = (u8)((tmp2 + tmp7) >> 1);
+
+ tmp7 = tmp3 + tmp6;
+ tmp1 -= (tmp7 << 2);
+ tmp1 -= tmp7;
+ tmp7 = tmp4 + tmp5;
+ tmp2 = ptrC[width];
+ tmp1 += (tmp7 << 4);
+ tmp1 += (tmp7 << 2);
+ tmp1 += tmp2;
+ tmp7 = clp[tmp1>>5];
+ tmp1 = mb[32];
+ tmp6 += 16;
+ tmp7++;
+ mb[32] = (u8)((tmp1 + tmp7) >> 1);
+
+ tmp1 = *ptrC;
+ tmp7 = tmp2 + tmp5;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp7 = tmp4 + tmp3;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp6 += tmp1;
+ tmp7 = clp[tmp6>>5];
+ tmp6 = mb[16];
+ tmp5 += 16;
+ tmp7++;
+ mb[16] = (u8)((tmp6 + tmp7) >> 1);
+
+ tmp6 = ptrC[-(i32)width];
+ tmp1 += tmp4;
+ tmp5 -= (tmp1 << 2);
+ tmp5 -= tmp1;
+ tmp3 += tmp2;
+ tmp5 += (tmp3 << 4);
+ tmp5 += (tmp3 << 2);
+ tmp5 += tmp6;
+ tmp7 = clp[tmp5>>5];
+ tmp5 = *mb;
+ tmp7++;
+ *mb++ = (u8)((tmp5 + tmp7) >> 1);
+ ptrC++;
+
+ }
+ ptrC += 4*width - partWidth;
+ ptrV += 4*width - partWidth;
+ mb += 4*16 - partWidth;
+ }
+
+}
+#endif
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateMidHalf
+
+ Functional description:
+ Function to perform horizontal and vertical interpolation of pixel
+ position 'j' for a block. Overfilling is done only if needed.
+ Reference image (ref) is read at correct position and the predicted
+ part is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateMidHalf(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight)
+{
+ u32 p1[21*21/4+1];
+ u32 x, y;
+ i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ i32 *ptrC, *ptrV, *b1;
+ u8 *ptrJ;
+ i32 table[21*16];
+ const u8 *clp = h264bsdClip + 512;
+
+ /* Code */
+
+ ASSERT(ref);
+ ASSERT(mb);
+
+ if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+ (y0 < 0) || ((u32)y0+partHeight+5 > height))
+ {
+ h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+ partWidth+5, partHeight+5, partWidth+5);
+
+ x0 = 0;
+ y0 = 0;
+ ref = (u8*)p1;
+ width = partWidth+5;
+ }
+
+ ref += (u32)y0 * width + (u32)x0;
+
+ b1 = table;
+ ptrJ = ref + 5;
+
+ /* First step: calculate intermediate values for
+ * horizontal interpolation */
+ for (y = partHeight + 5; y; y--)
+ {
+ tmp6 = *(ptrJ - 5);
+ tmp5 = *(ptrJ - 4);
+ tmp4 = *(ptrJ - 3);
+ tmp3 = *(ptrJ - 2);
+ tmp2 = *(ptrJ - 1);
+
+ /* 4 pels per iteration */
+ for (x = (partWidth >> 2); x; x--)
+ {
+ /* First pixel */
+ tmp7 = tmp3 + tmp4;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp7 = tmp2 + tmp5;
+ tmp1 = *ptrJ++;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp6 += tmp1;
+ *b1++ = tmp6;
+ /* Second pixel */
+ tmp7 = tmp2 + tmp3;
+ tmp5 += (tmp7 << 4);
+ tmp5 += (tmp7 << 2);
+ tmp7 = tmp1 + tmp4;
+ tmp6 = *ptrJ++;
+ tmp5 -= (tmp7 << 2);
+ tmp5 -= tmp7;
+ tmp5 += tmp6;
+ *b1++ = tmp5;
+ /* Third pixel */
+ tmp7 = tmp1 + tmp2;
+ tmp4 += (tmp7 << 4);
+ tmp4 += (tmp7 << 2);
+ tmp7 = tmp6 + tmp3;
+ tmp5 = *ptrJ++;
+ tmp4 -= (tmp7 << 2);
+ tmp4 -= tmp7;
+ tmp4 += tmp5;
+ *b1++ = tmp4;
+ /* Fourth pixel */
+ tmp7 = tmp6 + tmp1;
+ tmp3 += (tmp7 << 4);
+ tmp3 += (tmp7 << 2);
+ tmp7 = tmp5 + tmp2;
+ tmp4 = *ptrJ++;
+ tmp3 -= (tmp7 << 2);
+ tmp3 -= tmp7;
+ tmp3 += tmp4;
+ *b1++ = tmp3;
+ tmp7 = tmp4;
+ tmp4 = tmp6;
+ tmp6 = tmp2;
+ tmp2 = tmp7;
+ tmp3 = tmp5;
+ tmp5 = tmp1;
+ }
+ ptrJ += width - partWidth;
+ }
+
+ /* Second step: calculate vertical interpolation */
+ ptrC = table + partWidth;
+ ptrV = ptrC + 5*partWidth;
+ for (y = (partHeight >> 2); y; y--)
+ {
+ /* 4 pels per iteration */
+ for (x = partWidth; x; x--)
+ {
+ tmp4 = ptrV[-(i32)partWidth*2];
+ tmp5 = ptrV[-(i32)partWidth];
+ tmp1 = ptrV[partWidth];
+ tmp2 = ptrV[partWidth*2];
+ tmp6 = *ptrV++;
+
+ tmp7 = tmp4 + tmp1;
+ tmp2 -= (tmp7 << 2);
+ tmp2 -= tmp7;
+ tmp2 += 512;
+ tmp7 = tmp5 + tmp6;
+ tmp3 = ptrC[partWidth*2];
+ tmp2 += (tmp7 << 4);
+ tmp2 += (tmp7 << 2);
+ tmp2 += tmp3;
+ tmp7 = clp[tmp2>>10];
+ tmp1 += 512;
+ mb[48] = (u8)tmp7;
+
+ tmp7 = tmp3 + tmp6;
+ tmp1 -= (tmp7 << 2);
+ tmp1 -= tmp7;
+ tmp7 = tmp4 + tmp5;
+ tmp2 = ptrC[partWidth];
+ tmp1 += (tmp7 << 4);
+ tmp1 += (tmp7 << 2);
+ tmp1 += tmp2;
+ tmp7 = clp[tmp1>>10];
+ tmp6 += 512;
+ mb[32] = (u8)tmp7;
+
+ tmp1 = *ptrC;
+ tmp7 = tmp2 + tmp5;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp7 = tmp4 + tmp3;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp6 += tmp1;
+ tmp7 = clp[tmp6>>10];
+ tmp5 += 512;
+ mb[16] = (u8)tmp7;
+
+ tmp6 = ptrC[-(i32)partWidth];
+ tmp1 += tmp4;
+ tmp5 -= (tmp1 << 2);
+ tmp5 -= tmp1;
+ tmp3 += tmp2;
+ tmp5 += (tmp3 << 4);
+ tmp5 += (tmp3 << 2);
+ tmp5 += tmp6;
+ tmp7 = clp[tmp5>>10];
+ *mb++ = (u8)tmp7;
+ ptrC++;
+ }
+ mb += 4*16 - partWidth;
+ ptrC += 3*partWidth;
+ ptrV += 3*partWidth;
+ }
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateMidVerQuarter
+
+ Functional description:
+ Function to perform horizontal and vertical interpolation of pixel
+ position 'f' or 'q' for a block. Overfilling is done only if needed.
+ Reference image (ref) is read at correct position and the predicted
+ part is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateMidVerQuarter(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight,
+ u32 verOffset) /* 0 for pixel f, 1 for pixel q */
+{
+ u32 p1[21*21/4+1];
+ u32 x, y;
+ i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ i32 *ptrC, *ptrV, *ptrInt, *b1;
+ u8 *ptrJ;
+ i32 table[21*16];
+ const u8 *clp = h264bsdClip + 512;
+
+ /* Code */
+
+ ASSERT(ref);
+ ASSERT(mb);
+
+ if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+ (y0 < 0) || ((u32)y0+partHeight+5 > height))
+ {
+ h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+ partWidth+5, partHeight+5, partWidth+5);
+
+ x0 = 0;
+ y0 = 0;
+ ref = (u8*)p1;
+ width = partWidth+5;
+ }
+
+ ref += (u32)y0 * width + (u32)x0;
+
+ b1 = table;
+ ptrJ = ref + 5;
+
+ /* First step: calculate intermediate values for
+ * horizontal interpolation */
+ for (y = partHeight + 5; y; y--)
+ {
+ tmp6 = *(ptrJ - 5);
+ tmp5 = *(ptrJ - 4);
+ tmp4 = *(ptrJ - 3);
+ tmp3 = *(ptrJ - 2);
+ tmp2 = *(ptrJ - 1);
+ for (x = (partWidth >> 2); x; x--)
+ {
+ /* First pixel */
+ tmp7 = tmp3 + tmp4;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp7 = tmp2 + tmp5;
+ tmp1 = *ptrJ++;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp6 += tmp1;
+ *b1++ = tmp6;
+ /* Second pixel */
+ tmp7 = tmp2 + tmp3;
+ tmp5 += (tmp7 << 4);
+ tmp5 += (tmp7 << 2);
+ tmp7 = tmp1 + tmp4;
+ tmp6 = *ptrJ++;
+ tmp5 -= (tmp7 << 2);
+ tmp5 -= tmp7;
+ tmp5 += tmp6;
+ *b1++ = tmp5;
+ /* Third pixel */
+ tmp7 = tmp1 + tmp2;
+ tmp4 += (tmp7 << 4);
+ tmp4 += (tmp7 << 2);
+ tmp7 = tmp6 + tmp3;
+ tmp5 = *ptrJ++;
+ tmp4 -= (tmp7 << 2);
+ tmp4 -= tmp7;
+ tmp4 += tmp5;
+ *b1++ = tmp4;
+ /* Fourth pixel */
+ tmp7 = tmp6 + tmp1;
+ tmp3 += (tmp7 << 4);
+ tmp3 += (tmp7 << 2);
+ tmp7 = tmp5 + tmp2;
+ tmp4 = *ptrJ++;
+ tmp3 -= (tmp7 << 2);
+ tmp3 -= tmp7;
+ tmp3 += tmp4;
+ *b1++ = tmp3;
+ tmp7 = tmp4;
+ tmp4 = tmp6;
+ tmp6 = tmp2;
+ tmp2 = tmp7;
+ tmp3 = tmp5;
+ tmp5 = tmp1;
+ }
+ ptrJ += width - partWidth;
+ }
+
+ /* Second step: calculate vertical interpolation and average */
+ ptrC = table + partWidth;
+ ptrV = ptrC + 5*partWidth;
+ /* Pointer to integer sample position, either M or R */
+ ptrInt = ptrC + (2+verOffset)*partWidth;
+ for (y = (partHeight >> 2); y; y--)
+ {
+ for (x = partWidth; x; x--)
+ {
+ tmp4 = ptrV[-(i32)partWidth*2];
+ tmp5 = ptrV[-(i32)partWidth];
+ tmp1 = ptrV[partWidth];
+ tmp2 = ptrV[partWidth*2];
+ tmp6 = *ptrV++;
+
+ tmp7 = tmp4 + tmp1;
+ tmp2 -= (tmp7 << 2);
+ tmp2 -= tmp7;
+ tmp2 += 512;
+ tmp7 = tmp5 + tmp6;
+ tmp3 = ptrC[partWidth*2];
+ tmp2 += (tmp7 << 4);
+ tmp2 += (tmp7 << 2);
+ tmp7 = ptrInt[partWidth*2];
+ tmp2 += tmp3;
+ tmp2 = clp[tmp2>>10];
+ tmp7 += 16;
+ tmp7 = clp[tmp7>>5];
+ tmp1 += 512;
+ tmp2++;
+ mb[48] = (u8)((tmp7 + tmp2) >> 1);
+
+ tmp7 = tmp3 + tmp6;
+ tmp1 -= (tmp7 << 2);
+ tmp1 -= tmp7;
+ tmp7 = tmp4 + tmp5;
+ tmp2 = ptrC[partWidth];
+ tmp1 += (tmp7 << 4);
+ tmp1 += (tmp7 << 2);
+ tmp7 = ptrInt[partWidth];
+ tmp1 += tmp2;
+ tmp1 = clp[tmp1>>10];
+ tmp7 += 16;
+ tmp7 = clp[tmp7>>5];
+ tmp6 += 512;
+ tmp1++;
+ mb[32] = (u8)((tmp7 + tmp1) >> 1);
+
+ tmp1 = *ptrC;
+ tmp7 = tmp2 + tmp5;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp7 = tmp4 + tmp3;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp7 = *ptrInt;
+ tmp6 += tmp1;
+ tmp6 = clp[tmp6>>10];
+ tmp7 += 16;
+ tmp7 = clp[tmp7>>5];
+ tmp5 += 512;
+ tmp6++;
+ mb[16] = (u8)((tmp7 + tmp6) >> 1);
+
+ tmp6 = ptrC[-(i32)partWidth];
+ tmp1 += tmp4;
+ tmp5 -= (tmp1 << 2);
+ tmp5 -= tmp1;
+ tmp3 += tmp2;
+ tmp5 += (tmp3 << 4);
+ tmp5 += (tmp3 << 2);
+ tmp7 = ptrInt[-(i32)partWidth];
+ tmp5 += tmp6;
+ tmp5 = clp[tmp5>>10];
+ tmp7 += 16;
+ tmp7 = clp[tmp7>>5];
+ tmp5++;
+ *mb++ = (u8)((tmp7 + tmp5) >> 1);
+ ptrC++;
+ ptrInt++;
+ }
+ mb += 4*16 - partWidth;
+ ptrC += 3*partWidth;
+ ptrV += 3*partWidth;
+ ptrInt += 3*partWidth;
+ }
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdInterpolateMidHorQuarter
+
+ Functional description:
+ Function to perform horizontal and vertical interpolation of pixel
+ position 'i' or 'k' for a block. Overfilling is done only if needed.
+ Reference image (ref) is read at correct position and the predicted
+ part is written to macroblock array (mb)
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInterpolateMidHorQuarter(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight,
+ u32 horOffset) /* 0 for pixel i, 1 for pixel k */
+{
+ u32 p1[21*21/4+1];
+ u32 x, y;
+ i32 tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
+ i32 *ptrJ, *ptrInt, *h1;
+ u8 *ptrC, *ptrV;
+ i32 table[21*16];
+ i32 tableWidth = (i32)partWidth+5;
+ const u8 *clp = h264bsdClip + 512;
+
+ /* Code */
+
+ ASSERT(ref);
+ ASSERT(mb);
+
+ if ((x0 < 0) || ((u32)x0+partWidth+5 > width) ||
+ (y0 < 0) || ((u32)y0+partHeight+5 > height))
+ {
+ h264bsdFillBlock(ref, (u8*)p1, x0, y0, width, height,
+ partWidth+5, partHeight+5, partWidth+5);
+
+ x0 = 0;
+ y0 = 0;
+ ref = (u8*)p1;
+ width = partWidth+5;
+ }
+
+ ref += (u32)y0 * width + (u32)x0;
+
+ h1 = table + tableWidth;
+ ptrC = ref + width;
+ ptrV = ptrC + 5*width;
+
+ /* First step: calculate intermediate values for
+ * vertical interpolation */
+ for (y = (partHeight >> 2); y; y--)
+ {
+ for (x = (u32)tableWidth; x; x--)
+ {
+ tmp4 = ptrV[-(i32)width*2];
+ tmp5 = ptrV[-(i32)width];
+ tmp1 = ptrV[width];
+ tmp2 = ptrV[width*2];
+ tmp6 = *ptrV++;
+
+ tmp7 = tmp4 + tmp1;
+ tmp2 -= (tmp7 << 2);
+ tmp2 -= tmp7;
+ tmp7 = tmp5 + tmp6;
+ tmp3 = ptrC[width*2];
+ tmp2 += (tmp7 << 4);
+ tmp2 += (tmp7 << 2);
+ tmp2 += tmp3;
+ h1[tableWidth*2] = tmp2;
+
+ tmp7 = tmp3 + tmp6;
+ tmp1 -= (tmp7 << 2);
+ tmp1 -= tmp7;
+ tmp7 = tmp4 + tmp5;
+ tmp2 = ptrC[width];
+ tmp1 += (tmp7 << 4);
+ tmp1 += (tmp7 << 2);
+ tmp1 += tmp2;
+ h1[tableWidth] = tmp1;
+
+ tmp1 = *ptrC;
+ tmp7 = tmp2 + tmp5;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp7 = tmp4 + tmp3;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp6 += tmp1;
+ *h1 = tmp6;
+
+ tmp6 = ptrC[-(i32)width];
+ tmp1 += tmp4;
+ tmp5 -= (tmp1 << 2);
+ tmp5 -= tmp1;
+ tmp3 += tmp2;
+ tmp5 += (tmp3 << 4);
+ tmp5 += (tmp3 << 2);
+ tmp5 += tmp6;
+ h1[-tableWidth] = tmp5;
+ h1++;
+ ptrC++;
+ }
+ ptrC += 4*width - partWidth - 5;
+ ptrV += 4*width - partWidth - 5;
+ h1 += 3*tableWidth;
+ }
+
+ /* Second step: calculate horizontal interpolation and average */
+ ptrJ = table + 5;
+ /* Pointer to integer sample position, either G or H */
+ ptrInt = table + 2 + horOffset;
+ for (y = partHeight; y; y--)
+ {
+ tmp6 = *(ptrJ - 5);
+ tmp5 = *(ptrJ - 4);
+ tmp4 = *(ptrJ - 3);
+ tmp3 = *(ptrJ - 2);
+ tmp2 = *(ptrJ - 1);
+ for (x = (partWidth>>2); x; x--)
+ {
+ /* First pixel */
+ tmp6 += 512;
+ tmp7 = tmp3 + tmp4;
+ tmp6 += (tmp7 << 4);
+ tmp6 += (tmp7 << 2);
+ tmp7 = tmp2 + tmp5;
+ tmp1 = *ptrJ++;
+ tmp6 -= (tmp7 << 2);
+ tmp6 -= tmp7;
+ tmp7 = *ptrInt++;
+ tmp6 += tmp1;
+ tmp6 = clp[tmp6 >> 10];
+ tmp7 += 16;
+ tmp7 = clp[tmp7 >> 5];
+ tmp5 += 512;
+ tmp6++;
+ *mb++ = (u8)((tmp6 + tmp7) >> 1);
+ /* Second pixel */
+ tmp7 = tmp2 + tmp3;
+ tmp5 += (tmp7 << 4);
+ tmp5 += (tmp7 << 2);
+ tmp7 = tmp1 + tmp4;
+ tmp6 = *ptrJ++;
+ tmp5 -= (tmp7 << 2);
+ tmp5 -= tmp7;
+ tmp7 = *ptrInt++;
+ tmp5 += tmp6;
+ tmp5 = clp[tmp5 >> 10];
+ tmp7 += 16;
+ tmp7 = clp[tmp7 >> 5];
+ tmp4 += 512;
+ tmp5++;
+ *mb++ = (u8)((tmp5 + tmp7) >> 1);
+ /* Third pixel */
+ tmp7 = tmp1 + tmp2;
+ tmp4 += (tmp7 << 4);
+ tmp4 += (tmp7 << 2);
+ tmp7 = tmp6 + tmp3;
+ tmp5 = *ptrJ++;
+ tmp4 -= (tmp7 << 2);
+ tmp4 -= tmp7;
+ tmp7 = *ptrInt++;
+ tmp4 += tmp5;
+ tmp4 = clp[tmp4 >> 10];
+ tmp7 += 16;
+ tmp7 = clp[tmp7 >> 5];
+ tmp3 += 512;
+ tmp4++;
+ *mb++ = (u8)((tmp4 + tmp7) >> 1);
+ /* Fourth pixel */
+ tmp7 = tmp6 + tmp1;
+ tmp3 += (tmp7 << 4);
+ tmp3 += (tmp7 << 2);
+ tmp7 = tmp5 + tmp2;
+ tmp4 = *ptrJ++;
+ tmp3 -= (tmp7 << 2);
+ tmp3 -= tmp7;
+ tmp7 = *ptrInt++;
+ tmp3 += tmp4;
+ tmp3 = clp[tmp3 >> 10];
+ tmp7 += 16;
+ tmp7 = clp[tmp7 >> 5];
+ tmp3++;
+ *mb++ = (u8)((tmp3 + tmp7) >> 1);
+ tmp3 = tmp5;
+ tmp5 = tmp1;
+ tmp7 = tmp4;
+ tmp4 = tmp6;
+ tmp6 = tmp2;
+ tmp2 = tmp7;
+ }
+ ptrJ += 5;
+ ptrInt += 5;
+ mb += 16 - partWidth;
+ }
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdPredictSamples
+
+ Functional description:
+ This function reconstructs a prediction for a macroblock partition.
+ The prediction is either copied or interpolated using the reference
+ frame and the motion vector. Both luminance and chrominance parts are
+ predicted. The prediction is stored in given macroblock array (data).
+ Inputs:
+ data pointer to macroblock array (384 bytes) for output
+ mv pointer to motion vector used for prediction
+ refPic pointer to reference picture structure
+ xA x-coordinate for current macroblock
+ yA y-coordinate for current macroblock
+ partX x-offset for partition in macroblock
+ partY y-offset for partition in macroblock
+ partWidth width of partition
+ partHeight height of partition
+ Outputs:
+ data macroblock array (16x16+8x8+8x8) where predicted
+ partition is stored at correct position
+
+------------------------------------------------------------------------------*/
+
+void h264bsdPredictSamples(
+ u8 *data,
+ mv_t *mv,
+ image_t *refPic,
+ u32 xA,
+ u32 yA,
+ u32 partX,
+ u32 partY,
+ u32 partWidth,
+ u32 partHeight)
+
+{
+
+/* Variables */
+
+ u32 xFrac, yFrac, width, height;
+ i32 xInt, yInt;
+ u8 *lumaPartData;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(mv);
+ ASSERT(partWidth);
+ ASSERT(partHeight);
+ ASSERT(refPic);
+ ASSERT(refPic->data);
+ ASSERT(refPic->width);
+ ASSERT(refPic->height);
+
+ /* luma */
+ lumaPartData = data + 16*partY + partX;
+
+ xFrac = mv->hor & 0x3;
+ yFrac = mv->ver & 0x3;
+
+ width = 16 * refPic->width;
+ height = 16 * refPic->height;
+
+ xInt = (i32)xA + (i32)partX + (mv->hor >> 2);
+ yInt = (i32)yA + (i32)partY + (mv->ver >> 2);
+
+ ASSERT(lumaFracPos[xFrac][yFrac] < 16);
+
+ switch (lumaFracPos[xFrac][yFrac])
+ {
+ case 0: /* G */
+ h264bsdFillBlock(refPic->data, lumaPartData,
+ xInt,yInt,width,height,partWidth,partHeight,16);
+ break;
+ case 1: /* d */
+ h264bsdInterpolateVerQuarter(refPic->data, lumaPartData,
+ xInt, yInt-2, width, height, partWidth, partHeight, 0);
+ break;
+ case 2: /* h */
+ h264bsdInterpolateVerHalf(refPic->data, lumaPartData,
+ xInt, yInt-2, width, height, partWidth, partHeight);
+ break;
+ case 3: /* n */
+ h264bsdInterpolateVerQuarter(refPic->data, lumaPartData,
+ xInt, yInt-2, width, height, partWidth, partHeight, 1);
+ break;
+ case 4: /* a */
+ h264bsdInterpolateHorQuarter(refPic->data, lumaPartData,
+ xInt-2, yInt, width, height, partWidth, partHeight, 0);
+ break;
+ case 5: /* e */
+ h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
+ xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
+ break;
+ case 6: /* i */
+ h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData,
+ xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
+ break;
+ case 7: /* p */
+ h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
+ xInt-2, yInt-2, width, height, partWidth, partHeight, 2);
+ break;
+ case 8: /* b */
+ h264bsdInterpolateHorHalf(refPic->data, lumaPartData,
+ xInt-2, yInt, width, height, partWidth, partHeight);
+ break;
+ case 9: /* f */
+ h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData,
+ xInt-2, yInt-2, width, height, partWidth, partHeight, 0);
+ break;
+ case 10: /* j */
+ h264bsdInterpolateMidHalf(refPic->data, lumaPartData,
+ xInt-2, yInt-2, width, height, partWidth, partHeight);
+ break;
+ case 11: /* q */
+ h264bsdInterpolateMidVerQuarter(refPic->data, lumaPartData,
+ xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
+ break;
+ case 12: /* c */
+ h264bsdInterpolateHorQuarter(refPic->data, lumaPartData,
+ xInt-2, yInt, width, height, partWidth, partHeight, 1);
+ break;
+ case 13: /* g */
+ h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
+ xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
+ break;
+ case 14: /* k */
+ h264bsdInterpolateMidHorQuarter(refPic->data, lumaPartData,
+ xInt-2, yInt-2, width, height, partWidth, partHeight, 1);
+ break;
+ default: /* case 15, r */
+ h264bsdInterpolateHorVerQuarter(refPic->data, lumaPartData,
+ xInt-2, yInt-2, width, height, partWidth, partHeight, 3);
+ break;
+ }
+
+ /* chroma */
+ PredictChroma(
+ data + 16*16 + (partY>>1)*8 + (partX>>1),
+ xA + partX,
+ yA + partY,
+ partWidth,
+ partHeight,
+ mv,
+ refPic);
+
+}
+
+#else /* H264DEC_OMXDL */
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdPredictSamples
+
+ Functional description:
+ This function reconstructs a prediction for a macroblock partition.
+ The prediction is either copied or interpolated using the reference
+ frame and the motion vector. Both luminance and chrominance parts are
+ predicted. The prediction is stored in given macroblock array (data).
+ Inputs:
+ data pointer to macroblock array (384 bytes) for output
+ mv pointer to motion vector used for prediction
+ refPic pointer to reference picture structure
+ xA x-coordinate for current macroblock
+ yA y-coordinate for current macroblock
+ partX x-offset for partition in macroblock
+ partY y-offset for partition in macroblock
+ partWidth width of partition
+ partHeight height of partition
+ Outputs:
+ data macroblock array (16x16+8x8+8x8) where predicted
+ partition is stored at correct position
+
+------------------------------------------------------------------------------*/
+
+/*lint -e{550} Symbol 'res' not accessed */
+void h264bsdPredictSamples(
+ u8 *data,
+ mv_t *mv,
+ image_t *refPic,
+ u32 colAndRow,
+ u32 part,
+ u8 *pFill)
+
+{
+
+/* Variables */
+
+ u32 xFrac, yFrac;
+ u32 width, height;
+ i32 xInt, yInt, x0, y0;
+ u8 *partData, *ref;
+ OMXSize roi;
+ u32 fillWidth;
+ u32 fillHeight;
+ OMXResult res;
+ u32 xA, yA;
+ u32 partX, partY;
+ u32 partWidth, partHeight;
+
+/* Code */
+
+ ASSERT(data);
+ ASSERT(mv);
+ ASSERT(refPic);
+ ASSERT(refPic->data);
+ ASSERT(refPic->width);
+ ASSERT(refPic->height);
+
+ xA = (colAndRow & 0xFFFF0000) >> 16;
+ yA = (colAndRow & 0x0000FFFF);
+
+ partX = (part & 0xFF000000) >> 24;
+ partY = (part & 0x00FF0000) >> 16;
+ partWidth = (part & 0x0000FF00) >> 8;
+ partHeight = (part & 0x000000FF);
+
+ ASSERT(partWidth);
+ ASSERT(partHeight);
+
+ /* luma */
+ partData = data + 16*partY + partX;
+
+ xFrac = mv->hor & 0x3;
+ yFrac = mv->ver & 0x3;
+
+ width = 16 * refPic->width;
+ height = 16 * refPic->height;
+
+ xInt = (i32)xA + (i32)partX + (mv->hor >> 2);
+ yInt = (i32)yA + (i32)partY + (mv->ver >> 2);
+
+ x0 = (xFrac) ? xInt-2 : xInt;
+ y0 = (yFrac) ? yInt-2 : yInt;
+
+ if (xFrac)
+ {
+ if (partWidth == 16)
+ fillWidth = 32;
+ else
+ fillWidth = 16;
+ }
+ else
+ fillWidth = (partWidth*2);
+ if (yFrac)
+ fillHeight = partHeight+5;
+ else
+ fillHeight = partHeight;
+
+
+ if ((x0 < 0) || ((u32)x0+fillWidth > width) ||
+ (y0 < 0) || ((u32)y0+fillHeight > height))
+ {
+ h264bsdFillBlock(refPic->data, (u8*)pFill, x0, y0, width, height,
+ fillWidth, fillHeight, fillWidth);
+
+ x0 = 0;
+ y0 = 0;
+ ref = pFill;
+ width = fillWidth;
+ if (yFrac)
+ ref += 2*width;
+ if (xFrac)
+ ref += 2;
+ }
+ else
+ {
+ /*lint --e(737) Loss of sign */
+ ref = refPic->data + yInt*width + xInt;
+ }
+ /* Luma interpolation */
+ roi.width = (i32)partWidth;
+ roi.height = (i32)partHeight;
+
+ res = omxVCM4P10_InterpolateLuma(ref, (i32)width, partData, 16,
+ (i32)xFrac, (i32)yFrac, roi);
+ ASSERT(res == 0);
+
+ /* Chroma */
+ width = 8 * refPic->width;
+ height = 8 * refPic->height;
+
+ x0 = ((xA + partX) >> 1) + (mv->hor >> 3);
+ y0 = ((yA + partY) >> 1) + (mv->ver >> 3);
+ xFrac = mv->hor & 0x7;
+ yFrac = mv->ver & 0x7;
+
+ ref = refPic->data + 256 * refPic->width * refPic->height;
+
+ roi.width = (i32)(partWidth >> 1);
+ fillWidth = ((partWidth >> 1) + 8) & ~0x7;
+ roi.height = (i32)(partHeight >> 1);
+ fillHeight = (partHeight >> 1) + 1;
+
+ if ((x0 < 0) || ((u32)x0+fillWidth > width) ||
+ (y0 < 0) || ((u32)y0+fillHeight > height))
+ {
+ h264bsdFillBlock(ref, pFill, x0, y0, width, height,
+ fillWidth, fillHeight, fillWidth);
+ ref += width * height;
+ h264bsdFillBlock(ref, pFill + fillWidth*fillHeight,
+ x0, y0, width, height, fillWidth,
+ fillHeight, fillWidth);
+
+ ref = pFill;
+ x0 = 0;
+ y0 = 0;
+ width = fillWidth;
+ height = fillHeight;
+ }
+
+ partData = data + 16*16 + (partY>>1)*8 + (partX>>1);
+
+ /* Chroma interpolation */
+ /*lint --e(737) Loss of sign */
+ ref += y0 * width + x0;
+ res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8,
+ (u32)roi.width, (u32)roi.height, xFrac, yFrac);
+ ASSERT(res == 0);
+ partData += 8 * 8;
+ ref += height * width;
+ res = armVCM4P10_Interpolate_Chroma(ref, width, partData, 8,
+ (u32)roi.width, (u32)roi.height, xFrac, yFrac);
+ ASSERT(res == 0);
+
+}
+
+#endif /* H264DEC_OMXDL */
+
+
+/*------------------------------------------------------------------------------
+
+ Function: FillRow1
+
+ Functional description:
+ This function gets a row of reference pels in a 'normal' case when no
+ overfilling is necessary.
+
+------------------------------------------------------------------------------*/
+
+static void FillRow1(
+ u8 *ref,
+ u8 *fill,
+ i32 left,
+ i32 center,
+ i32 right)
+{
+
+ ASSERT(ref);
+ ASSERT(fill);
+
+ H264SwDecMemcpy(fill, ref, (u32)center);
+
+ /*lint -e(715) */
+}
+
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdFillRow7
+
+ Functional description:
+ This function gets a row of reference pels when horizontal coordinate
+ is partly negative or partly greater than reference picture width
+ (overfilling some pels on left and/or right edge).
+ Inputs:
+ ref pointer to reference samples
+ left amount of pixels to overfill on left-edge
+ center amount of pixels to copy
+ right amount of pixels to overfill on right-edge
+ Outputs:
+ fill pointer where samples are stored
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_NEON
+void h264bsdFillRow7(
+ u8 *ref,
+ u8 *fill,
+ i32 left,
+ i32 center,
+ i32 right)
+{
+ u8 tmp;
+
+ ASSERT(ref);
+ ASSERT(fill);
+
+ if (left)
+ tmp = *ref;
+
+ for ( ; left; left--)
+ /*lint -esym(644,tmp) tmp is initialized if used */
+ *fill++ = tmp;
+
+ for ( ; center; center--)
+ *fill++ = *ref++;
+
+ if (right)
+ tmp = ref[-1];
+
+ for ( ; right; right--)
+ /*lint -esym(644,tmp) tmp is initialized if used */
+ *fill++ = tmp;
+}
+#endif
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdFillBlock
+
+ Functional description:
+ This function gets a block of reference pels. It determines whether
+ overfilling is needed or not and repeatedly calls an appropriate
+ function (by using a function pointer) that fills one row the block.
+ Inputs:
+ ref pointer to reference frame
+ x0 x-coordinate for block
+ y0 y-coordinate for block
+ width width of reference frame
+ height height of reference frame
+ blockWidth width of block
+ blockHeight height of block
+ fillScanLength length of a line in output array (pixels)
+ Outputs:
+ fill pointer to array where output block is written
+
+------------------------------------------------------------------------------*/
+
+void h264bsdFillBlock(
+ u8 *ref,
+ u8 *fill,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 blockWidth,
+ u32 blockHeight,
+ u32 fillScanLength)
+
+{
+
+/* Variables */
+
+ i32 xstop, ystop;
+ void (*fp)(u8*, u8*, i32, i32, i32);
+ i32 left, x, right;
+ i32 top, y, bottom;
+
+/* Code */
+
+ ASSERT(ref);
+ ASSERT(fill);
+ ASSERT(width);
+ ASSERT(height);
+ ASSERT(fill);
+ ASSERT(blockWidth);
+ ASSERT(blockHeight);
+
+ xstop = x0 + (i32)blockWidth;
+ ystop = y0 + (i32)blockHeight;
+
+ /* Choose correct function whether overfilling on left-edge or right-edge
+ * is needed or not */
+ if (x0 >= 0 && xstop <= (i32)width)
+ fp = FillRow1;
+ else
+ fp = h264bsdFillRow7;
+
+ if (ystop < 0)
+ y0 = -(i32)blockHeight;
+
+ if (xstop < 0)
+ x0 = -(i32)blockWidth;
+
+ if (y0 > (i32)height)
+ y0 = (i32)height;
+
+ if (x0 > (i32)width)
+ x0 = (i32)width;
+
+ xstop = x0 + (i32)blockWidth;
+ ystop = y0 + (i32)blockHeight;
+
+ if (x0 > 0)
+ ref += x0;
+
+ if (y0 > 0)
+ ref += y0 * (i32)width;
+
+ left = x0 < 0 ? -x0 : 0;
+ right = xstop > (i32)width ? xstop - (i32)width : 0;
+ x = (i32)blockWidth - left - right;
+
+ top = y0 < 0 ? -y0 : 0;
+ bottom = ystop > (i32)height ? ystop - (i32)height : 0;
+ y = (i32)blockHeight - top - bottom;
+
+ /* Top-overfilling */
+ for ( ; top; top-- )
+ {
+ (*fp)(ref, fill, left, x, right);
+ fill += fillScanLength;
+ }
+
+ /* Lines inside reference image */
+ for ( ; y; y-- )
+ {
+ (*fp)(ref, fill, left, x, right);
+ ref += width;
+ fill += fillScanLength;
+ }
+
+ ref -= width;
+
+ /* Bottom-overfilling */
+ for ( ; bottom; bottom-- )
+ {
+ (*fp)(ref, fill, left, x, right);
+ fill += fillScanLength;
+ }
+}
+
+/*lint +e701 +e702 */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.h
new file mode 100755
index 0000000..5a1a140
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_reconstruct.h
@@ -0,0 +1,210 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_RECONSTRUCT_H
+#define H264SWDEC_RECONSTRUCT_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_image.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_OMXDL
+void h264bsdPredictSamples(
+ u8 *data,
+ mv_t *mv,
+ image_t *refPic,
+ u32 xA,
+ u32 yA,
+ u32 partX,
+ u32 partY,
+ u32 partWidth,
+ u32 partHeight);
+#else
+void h264bsdPredictSamples(
+ u8 *data,
+ mv_t *mv,
+ image_t *refPic,
+ u32 colAndRow,/* packaged data | column | row |*/
+ u32 part, /* packaged data |partX|partY|partWidth|partHeight|*/
+ u8 *pFill);
+#endif
+
+void h264bsdFillBlock(
+ u8 * ref,
+ u8 * fill,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 blockWidth,
+ u32 blockHeight,
+ u32 fillScanLength);
+
+void h264bsdInterpolateChromaHor(
+ u8 *pRef,
+ u8 *predPartChroma,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 xFrac,
+ u32 chromaPartWidth,
+ u32 chromaPartHeight);
+
+void h264bsdInterpolateChromaVer(
+ u8 *pRef,
+ u8 *predPartChroma,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 yFrac,
+ u32 chromaPartWidth,
+ u32 chromaPartHeight);
+
+void h264bsdInterpolateChromaHorVer(
+ u8 *ref,
+ u8 *predPartChroma,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 xFrac,
+ u32 yFrac,
+ u32 chromaPartWidth,
+ u32 chromaPartHeight);
+
+void h264bsdInterpolateVerHalf(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight);
+
+void h264bsdInterpolateVerQuarter(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight,
+ u32 verOffset);
+
+void h264bsdInterpolateHorHalf(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight);
+
+void h264bsdInterpolateHorQuarter(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight,
+ u32 horOffset);
+
+void h264bsdInterpolateHorVerQuarter(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight,
+ u32 horVerOffset);
+
+void h264bsdInterpolateMidHalf(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight);
+
+void h264bsdInterpolateMidVerQuarter(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight,
+ u32 verOffset);
+
+void h264bsdInterpolateMidHorQuarter(
+ u8 *ref,
+ u8 *mb,
+ i32 x0,
+ i32 y0,
+ u32 width,
+ u32 height,
+ u32 partWidth,
+ u32 partHeight,
+ u32 horOffset);
+
+
+void h264bsdFillRow7(
+ u8 *ref,
+ u8 *fill,
+ i32 left,
+ i32 center,
+ i32 right);
+
+#endif /* #ifdef H264SWDEC_RECONSTRUCT_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.c
new file mode 100755
index 0000000..0756c47
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.c
@@ -0,0 +1,1692 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdDecodeSeiMessage
+ DecodeBufferingPeriod
+ DecodePictureTiming
+ DecodePanScanRectangle
+ DecodeFillerPayload
+ DecodeUserDataRegisteredITuTT35
+ DecodeUserDataUnregistered
+ DecodeRecoveryPoint
+ DecodeDecRefPicMarkingRepetition
+ DecodeSparePic
+ DecodeSceneInfo
+ DecodeSubSeqInfo
+ DecodeSubSeqLayerCharacteristics
+ DecodeSubSeqCharacteristics
+ DecodeFullFrameFreeze
+ DecodeFullFrameSnapshot
+ DecodeProgressiveRefinementSegmentStart
+ DecodeProgressiveRefinementSegmentEnd
+ DecodeMotionConstrainedSliceGroupSet
+ DecodeReservedSeiMessage
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_sei.h"
+#include "basetype.h"
+#include "h264bsd_util.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_slice_header.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+static const u32 numClockTS[9] = {1,1,1,2,2,3,3,2,3};
+static const u32 ceilLog2NumSliceGroups[9] = {0,1,1,2,2,3,3,3,3};
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 DecodeBufferingPeriod(
+ strmData_t *pStrmData,
+ seiBufferingPeriod_t *pBufferingPeriod,
+ u32 cpbCnt,
+ u32 initialCpbRemovalDelayLength,
+ u32 nalHrdBpPresentFlag,
+ u32 vclHrdBpPresentFlag);
+
+static u32 DecodePictureTiming(
+ strmData_t *pStrmData,
+ seiPicTiming_t *pPicTiming,
+ u32 cpbRemovalDelayLength,
+ u32 dpbOutputDelayLength,
+ u32 timeOffsetLength,
+ u32 cpbDpbDelaysPresentFlag,
+ u32 picStructPresentFlag);
+
+static u32 DecodePanScanRectangle(
+ strmData_t *pStrmData,
+ seiPanScanRect_t *pPanScanRectangle);
+
+static u32 DecodeFillerPayload(strmData_t *pStrmData, u32 payloadSize);
+
+static u32 DecodeUserDataRegisteredITuTT35(
+ strmData_t *pStrmData,
+ seiUserDataRegisteredItuTT35_t *pUserDataRegisteredItuTT35,
+ u32 payloadSize);
+
+static u32 DecodeUserDataUnregistered(
+ strmData_t *pStrmData,
+ seiUserDataUnregistered_t *pUserDataUnregistered,
+ u32 payloadSize);
+
+static u32 DecodeRecoveryPoint(
+ strmData_t *pStrmData,
+ seiRecoveryPoint_t *pRecoveryPoint);
+
+static u32 DecodeDecRefPicMarkingRepetition(
+ strmData_t *pStrmData,
+ seiDecRefPicMarkingRepetition_t *pDecRefPicMarkingRepetition,
+ u32 numRefFrames);
+
+static u32 DecodeSparePic(
+ strmData_t *pStrmData,
+ seiSparePic_t *pSparePic,
+ u32 picSizeInMapUnits);
+
+static u32 DecodeSceneInfo(
+ strmData_t *pStrmData,
+ seiSceneInfo_t *pSceneInfo);
+
+static u32 DecodeSubSeqInfo(
+ strmData_t *pStrmData,
+ seiSubSeqInfo_t *pSubSeqInfo);
+
+static u32 DecodeSubSeqLayerCharacteristics(
+ strmData_t *pStrmData,
+ seiSubSeqLayerCharacteristics_t *pSubSeqLayerCharacteristics);
+
+static u32 DecodeSubSeqCharacteristics(
+ strmData_t *pStrmData,
+ seiSubSeqCharacteristics_t *pSubSeqCharacteristics);
+
+static u32 DecodeFullFrameFreeze(
+ strmData_t *pStrmData,
+ seiFullFrameFreeze_t *pFullFrameFreeze);
+
+static u32 DecodeFullFrameSnapshot(
+ strmData_t *pStrmData,
+ seiFullFrameSnapshot_t *pFullFrameSnapshot);
+
+static u32 DecodeProgressiveRefinementSegmentStart(
+ strmData_t *pStrmData,
+ seiProgressiveRefinementSegmentStart_t *pProgressiveRefinementSegmentStart);
+
+static u32 DecodeProgressiveRefinementSegmentEnd(
+ strmData_t *pStrmData,
+ seiProgressiveRefinementSegmentEnd_t *pProgressiveRefinementSegmentEnd);
+
+static u32 DecodeMotionConstrainedSliceGroupSet(
+ strmData_t *pStrmData,
+ seiMotionConstrainedSliceGroupSet_t *pMotionConstrainedSliceGroupSet,
+ u32 numSliceGroups);
+
+static u32 DecodeReservedSeiMessage(
+ strmData_t *pStrmData,
+ seiReservedSeiMessage_t *pReservedSeiMessage,
+ u32 payloadSize);
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdDecodeSeiMessage
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSeiMessage(
+ strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet,
+ seiMessage_t *pSeiMessage,
+ u32 numSliceGroups)
+{
+
+/* Variables */
+
+ u32 tmp, payloadType, payloadSize, status;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSeiMessage);
+
+
+ H264SwDecMemset(pSeiMessage, 0, sizeof(seiMessage_t));
+
+ do
+ {
+ payloadType = 0;
+ while((tmp = h264bsdGetBits(pStrmData, 8)) == 0xFF)
+ {
+ payloadType += 255;
+ }
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ payloadType += tmp;
+
+ payloadSize = 0;
+ while((tmp = h264bsdGetBits(pStrmData, 8)) == 0xFF)
+ {
+ payloadSize += 255;
+ }
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ payloadSize += tmp;
+
+ pSeiMessage->payloadType = payloadType;
+
+ switch (payloadType)
+ {
+ case 0:
+ ASSERT(pSeqParamSet);
+ status = DecodeBufferingPeriod(
+ pStrmData,
+ &pSeiMessage->bufferingPeriod,
+ pSeqParamSet->vuiParameters->vclHrdParameters.cpbCnt,
+ pSeqParamSet->vuiParameters->vclHrdParameters.
+ initialCpbRemovalDelayLength,
+ pSeqParamSet->vuiParameters->nalHrdParametersPresentFlag,
+ pSeqParamSet->vuiParameters->vclHrdParametersPresentFlag);
+ break;
+
+ case 1:
+ ASSERT(pSeqParamSet->vuiParametersPresentFlag);
+ status = DecodePictureTiming(
+ pStrmData,
+ &pSeiMessage->picTiming,
+ pSeqParamSet->vuiParameters->vclHrdParameters.
+ cpbRemovalDelayLength,
+ pSeqParamSet->vuiParameters->vclHrdParameters.
+ dpbOutputDelayLength,
+ pSeqParamSet->vuiParameters->vclHrdParameters.
+ timeOffsetLength,
+ pSeqParamSet->vuiParameters->nalHrdParametersPresentFlag ||
+ pSeqParamSet->vuiParameters->vclHrdParametersPresentFlag ?
+ HANTRO_TRUE : HANTRO_FALSE,
+ pSeqParamSet->vuiParameters->picStructPresentFlag);
+ break;
+
+ case 2:
+ status = DecodePanScanRectangle(
+ pStrmData,
+ &pSeiMessage->panScanRect);
+ break;
+
+ case 3:
+ status = DecodeFillerPayload(pStrmData, payloadSize);
+ break;
+
+ case 4:
+ status = DecodeUserDataRegisteredITuTT35(
+ pStrmData,
+ &pSeiMessage->userDataRegisteredItuTT35,
+ payloadSize);
+ break;
+
+ case 5:
+ status = DecodeUserDataUnregistered(
+ pStrmData,
+ &pSeiMessage->userDataUnregistered,
+ payloadSize);
+ break;
+
+ case 6:
+ status = DecodeRecoveryPoint(
+ pStrmData,
+ &pSeiMessage->recoveryPoint);
+ break;
+
+ case 7:
+ status = DecodeDecRefPicMarkingRepetition(
+ pStrmData,
+ &pSeiMessage->decRefPicMarkingRepetition,
+ pSeqParamSet->numRefFrames);
+ break;
+
+ case 8:
+ ASSERT(pSeqParamSet);
+ status = DecodeSparePic(
+ pStrmData,
+ &pSeiMessage->sparePic,
+ pSeqParamSet->picWidthInMbs * pSeqParamSet->picHeightInMbs);
+ break;
+
+ case 9:
+ status = DecodeSceneInfo(
+ pStrmData,
+ &pSeiMessage->sceneInfo);
+ break;
+
+ case 10:
+ status = DecodeSubSeqInfo(
+ pStrmData,
+ &pSeiMessage->subSeqInfo);
+ break;
+
+ case 11:
+ status = DecodeSubSeqLayerCharacteristics(
+ pStrmData,
+ &pSeiMessage->subSeqLayerCharacteristics);
+ break;
+
+ case 12:
+ status = DecodeSubSeqCharacteristics(
+ pStrmData,
+ &pSeiMessage->subSeqCharacteristics);
+ break;
+
+ case 13:
+ status = DecodeFullFrameFreeze(
+ pStrmData,
+ &pSeiMessage->fullFrameFreeze);
+ break;
+
+ case 14: /* This SEI does not contain data, what to do ??? */
+ status = HANTRO_OK;
+ break;
+
+ case 15:
+ status = DecodeFullFrameSnapshot(
+ pStrmData,
+ &pSeiMessage->fullFrameSnapshot);
+ break;
+
+ case 16:
+ status = DecodeProgressiveRefinementSegmentStart(
+ pStrmData,
+ &pSeiMessage->progressiveRefinementSegmentStart);
+ break;
+
+ case 17:
+ status = DecodeProgressiveRefinementSegmentEnd(
+ pStrmData,
+ &pSeiMessage->progressiveRefinementSegmentEnd);
+ break;
+
+ case 18:
+ ASSERT(numSliceGroups);
+ status = DecodeMotionConstrainedSliceGroupSet(
+ pStrmData,
+ &pSeiMessage->motionConstrainedSliceGroupSet,
+ numSliceGroups);
+ break;
+
+ default:
+ status = DecodeReservedSeiMessage(
+ pStrmData,
+ &pSeiMessage->reservedSeiMessage,
+ payloadSize);
+ break;
+ }
+
+ if (status != HANTRO_OK)
+ return(status);
+
+ while (!h264bsdIsByteAligned(pStrmData))
+ {
+ if (h264bsdGetBits(pStrmData, 1) != 1)
+ return(HANTRO_NOK);
+ while (!h264bsdIsByteAligned(pStrmData))
+ {
+ if (h264bsdGetBits(pStrmData, 1) != 0)
+ return(HANTRO_NOK);
+ }
+ }
+ } while (h264bsdMoreRbspData(pStrmData));
+
+ return(h264bsdRbspTrailingBits(pStrmData));
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeBufferingPeriod
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeBufferingPeriod(
+ strmData_t *pStrmData,
+ seiBufferingPeriod_t *pBufferingPeriod,
+ u32 cpbCnt,
+ u32 initialCpbRemovalDelayLength,
+ u32 nalHrdBpPresentFlag,
+ u32 vclHrdBpPresentFlag)
+{
+
+/* Variables */
+
+ u32 tmp, i;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pBufferingPeriod);
+ ASSERT(cpbCnt);
+ ASSERT(initialCpbRemovalDelayLength);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pBufferingPeriod->seqParameterSetId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pBufferingPeriod->seqParameterSetId > 31)
+ return(HANTRO_NOK);
+
+ if (nalHrdBpPresentFlag)
+ {
+ for (i = 0; i < cpbCnt; i++)
+ {
+ tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp == 0)
+ return(HANTRO_NOK);
+ pBufferingPeriod->initialCpbRemovalDelay[i] = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pBufferingPeriod->initialCpbRemovalDelayOffset[i] = tmp;
+ }
+ }
+
+ if (vclHrdBpPresentFlag)
+ {
+ for (i = 0; i < cpbCnt; i++)
+ {
+ tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pBufferingPeriod->initialCpbRemovalDelay[i] = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, initialCpbRemovalDelayLength);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pBufferingPeriod->initialCpbRemovalDelayOffset[i] = tmp;
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodePictureTiming
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodePictureTiming(
+ strmData_t *pStrmData,
+ seiPicTiming_t *pPicTiming,
+ u32 cpbRemovalDelayLength,
+ u32 dpbOutputDelayLength,
+ u32 timeOffsetLength,
+ u32 cpbDpbDelaysPresentFlag,
+ u32 picStructPresentFlag)
+{
+
+/* Variables */
+
+ u32 tmp, i;
+ i32 itmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pPicTiming);
+
+
+ if (cpbDpbDelaysPresentFlag)
+ {
+ tmp = h264bsdGetBits(pStrmData, cpbRemovalDelayLength);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->cpbRemovalDelay = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, dpbOutputDelayLength);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->dpbOutputDelay = tmp;
+ }
+
+ if (picStructPresentFlag)
+ {
+ tmp = h264bsdGetBits(pStrmData, 4);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp > 8)
+ return(HANTRO_NOK);
+ pPicTiming->picStruct = tmp;
+
+ for (i = 0; i < numClockTS[pPicTiming->picStruct]; i++)
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->clockTimeStampFlag[i] = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pPicTiming->clockTimeStampFlag[i])
+ {
+ tmp = h264bsdGetBits(pStrmData, 2);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->ctType[i] = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->nuitFieldBasedFlag[i] = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 5);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp > 6)
+ return(HANTRO_NOK);
+ pPicTiming->countingType[i] = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->fullTimeStampFlag[i] = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->discontinuityFlag[i] = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->cntDroppedFlag[i] = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->nFrames[i] = tmp;
+
+ if (pPicTiming->fullTimeStampFlag[i])
+ {
+ tmp = h264bsdGetBits(pStrmData, 6);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp > 59)
+ return(HANTRO_NOK);
+ pPicTiming->secondsValue[i] = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 6);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp > 59)
+ return(HANTRO_NOK);
+ pPicTiming->minutesValue[i] = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 5);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp > 23)
+ return(HANTRO_NOK);
+ pPicTiming->hoursValue[i] = tmp;
+ }
+ else
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->secondsFlag[i] = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pPicTiming->secondsFlag[i])
+ {
+ tmp = h264bsdGetBits(pStrmData, 6);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp > 59)
+ return(HANTRO_NOK);
+ pPicTiming->secondsValue[i] = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->minutesFlag[i] = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pPicTiming->minutesFlag[i])
+ {
+ tmp = h264bsdGetBits(pStrmData, 6);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp > 59)
+ return(HANTRO_NOK);
+ pPicTiming->minutesValue[i] = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPicTiming->hoursFlag[i] = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pPicTiming->hoursFlag[i])
+ {
+ tmp = h264bsdGetBits(pStrmData, 5);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp > 23)
+ return(HANTRO_NOK);
+ pPicTiming->hoursValue[i] = tmp;
+ }
+ }
+ }
+ }
+ if (timeOffsetLength)
+ {
+ tmp = h264bsdGetBits(pStrmData, timeOffsetLength);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ itmp = (i32)tmp;
+ /* following "converts" timeOffsetLength-bit signed
+ * integer into i32 */
+ /*lint -save -e701 -e702 */
+ itmp <<= (32 - timeOffsetLength);
+ itmp >>= (32 - timeOffsetLength);
+ /*lint -restore */
+ pPicTiming->timeOffset[i] = itmp;
+ }
+ else
+ pPicTiming->timeOffset[i] = 0;
+ }
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodePanScanRectangle
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodePanScanRectangle(
+ strmData_t *pStrmData,
+ seiPanScanRect_t *pPanScanRectangle)
+{
+
+/* Variables */
+
+ u32 tmp, i;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pPanScanRectangle);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pPanScanRectangle->panScanRectId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pPanScanRectangle->panScanRectCancelFlag = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (!pPanScanRectangle->panScanRectCancelFlag)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pPanScanRectangle->panScanCnt);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pPanScanRectangle->panScanCnt > 2)
+ return(HANTRO_NOK);
+ pPanScanRectangle->panScanCnt++;
+
+ for (i = 0; i < pPanScanRectangle->panScanCnt; i++)
+ {
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+ &pPanScanRectangle->panScanRectLeftOffset[i]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+ &pPanScanRectangle->panScanRectRightOffset[i]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+ &pPanScanRectangle->panScanRectTopOffset[i]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+ &pPanScanRectangle->panScanRectBottomOffset[i]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pPanScanRectangle->panScanRectRepetitionPeriod);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pPanScanRectangle->panScanRectRepetitionPeriod > 16384)
+ return(HANTRO_NOK);
+ if (pPanScanRectangle->panScanCnt > 1 &&
+ pPanScanRectangle->panScanRectRepetitionPeriod > 1)
+ return(HANTRO_NOK);
+ }
+
+ return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeFillerPayload
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeFillerPayload(strmData_t *pStrmData, u32 payloadSize)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(pStrmData);
+
+
+ if (payloadSize)
+ if (h264bsdFlushBits(pStrmData, 8 * payloadSize) == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeUserDataRegisteredITuTT35
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeUserDataRegisteredITuTT35(
+ strmData_t *pStrmData,
+ seiUserDataRegisteredItuTT35_t *pUserDataRegisteredItuTT35,
+ u32 payloadSize)
+{
+
+/* Variables */
+
+ u32 tmp, i, j;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pUserDataRegisteredItuTT35);
+ ASSERT(payloadSize);
+
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pUserDataRegisteredItuTT35->ituTT35CountryCode = tmp;
+
+ if (pUserDataRegisteredItuTT35->ituTT35CountryCode != 0xFF)
+ i = 1;
+ else
+ {
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pUserDataRegisteredItuTT35->ituTT35CountryCodeExtensionByte = tmp;
+ i = 2;
+ }
+
+ /* where corresponding FREE() ??? */
+ ALLOCATE(pUserDataRegisteredItuTT35->ituTT35PayloadByte,payloadSize-i,u8);
+ pUserDataRegisteredItuTT35->numPayloadBytes = payloadSize - i;
+ if (pUserDataRegisteredItuTT35->ituTT35PayloadByte == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+
+ j = 0;
+ do
+ {
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pUserDataRegisteredItuTT35->ituTT35PayloadByte[j] = (u8)tmp;
+ i++;
+ j++;
+ } while (i < payloadSize);
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeUserDataUnregistered
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeUserDataUnregistered(
+ strmData_t *pStrmData,
+ seiUserDataUnregistered_t *pUserDataUnregistered,
+ u32 payloadSize)
+{
+
+/* Variables */
+
+ u32 i, tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pUserDataUnregistered);
+
+
+ for (i = 0; i < 4; i++)
+ {
+ pUserDataUnregistered->uuidIsoIec11578[i] = h264bsdShowBits32(pStrmData);
+ if (h264bsdFlushBits(pStrmData,32) == END_OF_STREAM)
+ return(HANTRO_NOK);
+ }
+
+ /* where corresponding FREE() ??? */
+ ALLOCATE(pUserDataUnregistered->userDataPayloadByte, payloadSize - 16, u8);
+ if (pUserDataUnregistered->userDataPayloadByte == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+
+ pUserDataUnregistered->numPayloadBytes = payloadSize - 16;
+
+ for (i = 0; i < payloadSize - 16; i++)
+ {
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pUserDataUnregistered->userDataPayloadByte[i] = (u8)tmp;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeRecoveryPoint
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeRecoveryPoint(
+ strmData_t *pStrmData,
+ seiRecoveryPoint_t *pRecoveryPoint)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pRecoveryPoint);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pRecoveryPoint->recoveryFrameCnt);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pRecoveryPoint->exactMatchFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pRecoveryPoint->brokenLinkFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 2);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp > 2)
+ return(HANTRO_NOK);
+ pRecoveryPoint->changingSliceGroupIdc = tmp;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeDecRefPicMarkingRepetition
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeDecRefPicMarkingRepetition(
+ strmData_t *pStrmData,
+ seiDecRefPicMarkingRepetition_t *pDecRefPicMarkingRepetition,
+ u32 numRefFrames)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pDecRefPicMarkingRepetition);
+
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pDecRefPicMarkingRepetition->originalIdrFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pDecRefPicMarkingRepetition->originalFrameNum);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* frame_mbs_only_flag assumed always true so some field related syntax
+ * elements are skipped, see H.264 standard */
+ tmp = h264bsdDecRefPicMarking(pStrmData,
+ &pDecRefPicMarkingRepetition->decRefPicMarking, NAL_SEI, numRefFrames);
+
+ return(tmp);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeSparePic
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeSparePic(
+ strmData_t *pStrmData,
+ seiSparePic_t *pSparePic,
+ u32 picSizeInMapUnits)
+{
+
+/* Variables */
+
+ u32 tmp, i, j, mapUnitCnt;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSparePic);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSparePic->targetFrameNum);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSparePic->spareFieldFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+ /* do not accept fields */
+ if (pSparePic->spareFieldFlag)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pSparePic->numSparePics);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSparePic->numSparePics++;
+ if (pSparePic->numSparePics > MAX_NUM_SPARE_PICS)
+ return(HANTRO_NOK);
+
+ for (i = 0; i < pSparePic->numSparePics; i++)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSparePic->deltaSpareFrameNum[i]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSparePic->spareAreaIdc[i]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pSparePic->spareAreaIdc[i] > 2)
+ return(HANTRO_NOK);
+
+ if (pSparePic->spareAreaIdc[i] == 1)
+ {
+ /* where corresponding FREE() ??? */
+ ALLOCATE(pSparePic->spareUnitFlag[i], picSizeInMapUnits, u32);
+ if (pSparePic->spareUnitFlag[i] == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+ pSparePic->zeroRunLength[i] = NULL;
+
+ for (j = 0; j < picSizeInMapUnits; j++)
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSparePic->spareUnitFlag[i][j] = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+ }
+ }
+ else if (pSparePic->spareAreaIdc[i] == 2)
+ {
+ /* where corresponding FREE() ??? */
+ ALLOCATE(pSparePic->zeroRunLength[i], picSizeInMapUnits, u32);
+ if (pSparePic->zeroRunLength[i] == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+ pSparePic->spareUnitFlag[i] = NULL;
+
+ for (j = 0, mapUnitCnt = 0; mapUnitCnt < picSizeInMapUnits; j++)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSparePic->zeroRunLength[i][j]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ mapUnitCnt += pSparePic->zeroRunLength[i][j] + 1;
+ }
+ }
+ }
+
+ /* set rest to null */
+ for (i = pSparePic->numSparePics; i < MAX_NUM_SPARE_PICS; i++)
+ {
+ pSparePic->spareUnitFlag[i] = NULL;
+ pSparePic->zeroRunLength[i] = NULL;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeSceneInfo
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeSceneInfo(
+ strmData_t *pStrmData,
+ seiSceneInfo_t *pSceneInfo)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSceneInfo);
+
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSceneInfo->sceneInfoPresentFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pSceneInfo->sceneInfoPresentFlag)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pSceneInfo->sceneId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSceneInfo->sceneTransitionType);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pSceneInfo->sceneTransitionType > 6)
+ return(HANTRO_NOK);
+
+ if (pSceneInfo->sceneTransitionType)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSceneInfo->secondSceneId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeSubSeqInfo
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+-----------------------------------------------------------------------------*/
+
+static u32 DecodeSubSeqInfo(
+ strmData_t *pStrmData,
+ seiSubSeqInfo_t *pSubSeqInfo)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSubSeqInfo);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSubSeqInfo->subSeqLayerNum);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pSubSeqInfo->subSeqLayerNum > 255)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pSubSeqInfo->subSeqId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pSubSeqInfo->subSeqId > 65535)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqInfo->firstRefPicFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqInfo->leadingNonRefPicFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqInfo->lastPicFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqInfo->subSeqFrameNumFlag = tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pSubSeqInfo->subSeqFrameNumFlag)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSubSeqInfo->subSeqFrameNum);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeSubSeqLayerCharacteristics
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeSubSeqLayerCharacteristics(
+ strmData_t *pStrmData,
+ seiSubSeqLayerCharacteristics_t *pSubSeqLayerCharacteristics)
+{
+
+/* Variables */
+
+ u32 tmp, i;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSubSeqLayerCharacteristics);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSubSeqLayerCharacteristics->numSubSeqLayers);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSubSeqLayerCharacteristics->numSubSeqLayers++;
+ if (pSubSeqLayerCharacteristics->numSubSeqLayers > MAX_NUM_SUB_SEQ_LAYERS)
+ return(HANTRO_NOK);
+
+ for (i = 0; i < pSubSeqLayerCharacteristics->numSubSeqLayers; i++)
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqLayerCharacteristics->accurateStatisticsFlag[i] =
+ tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 16);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqLayerCharacteristics->averageBitRate[i] = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 16);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqLayerCharacteristics->averageFrameRate[i] = tmp;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeSubSeqCharacteristics
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeSubSeqCharacteristics(
+ strmData_t *pStrmData,
+ seiSubSeqCharacteristics_t *pSubSeqCharacteristics)
+{
+
+/* Variables */
+
+ u32 tmp, i;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSubSeqCharacteristics);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSubSeqCharacteristics->subSeqLayerNum);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pSubSeqCharacteristics->subSeqLayerNum > MAX_NUM_SUB_SEQ_LAYERS-1)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSubSeqCharacteristics->subSeqId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pSubSeqCharacteristics->subSeqId > 65535)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqCharacteristics->durationFlag = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pSubSeqCharacteristics->durationFlag)
+ {
+ pSubSeqCharacteristics->subSeqDuration = h264bsdShowBits32(pStrmData);
+ if (h264bsdFlushBits(pStrmData,32) == END_OF_STREAM)
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqCharacteristics->averageRateFlag = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pSubSeqCharacteristics->averageRateFlag)
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqCharacteristics->accurateStatisticsFlag =
+ tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 16);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqCharacteristics->averageBitRate = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 16);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqCharacteristics->averageFrameRate = tmp;
+ }
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSubSeqCharacteristics->numReferencedSubseqs);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pSubSeqCharacteristics->numReferencedSubseqs > MAX_NUM_SUB_SEQ_LAYERS-1)
+ return(HANTRO_NOK);
+
+ for (i = 0; i < pSubSeqCharacteristics->numReferencedSubseqs; i++)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSubSeqCharacteristics->refSubSeqLayerNum[i]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSubSeqCharacteristics->refSubSeqId[i]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSubSeqCharacteristics->refSubSeqDirection[i] = tmp;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeFullFrameFreeze
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeFullFrameFreeze(
+ strmData_t *pStrmData,
+ seiFullFrameFreeze_t *pFullFrameFreeze)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pFullFrameFreeze);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pFullFrameFreeze->fullFrameFreezeRepetitionPeriod);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pFullFrameFreeze->fullFrameFreezeRepetitionPeriod > 16384)
+ return(HANTRO_NOK);
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeFullFrameSnapshot
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeFullFrameSnapshot(
+ strmData_t *pStrmData,
+ seiFullFrameSnapshot_t *pFullFrameSnapshot)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pFullFrameSnapshot);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pFullFrameSnapshot->snapShotId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeProgressiveRefinementSegmentStart
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeProgressiveRefinementSegmentStart(
+ strmData_t *pStrmData,
+ seiProgressiveRefinementSegmentStart_t *pProgressiveRefinementSegmentStart)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pProgressiveRefinementSegmentStart);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pProgressiveRefinementSegmentStart->progressiveRefinementId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pProgressiveRefinementSegmentStart->numRefinementSteps);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pProgressiveRefinementSegmentStart->numRefinementSteps++;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeProgressiveRefinementSegmentEnd
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeProgressiveRefinementSegmentEnd(
+ strmData_t *pStrmData,
+ seiProgressiveRefinementSegmentEnd_t *pProgressiveRefinementSegmentEnd)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pProgressiveRefinementSegmentEnd);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pProgressiveRefinementSegmentEnd->progressiveRefinementId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeMotionConstrainedSliceGroupSet
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeMotionConstrainedSliceGroupSet(
+ strmData_t *pStrmData,
+ seiMotionConstrainedSliceGroupSet_t *pMotionConstrainedSliceGroupSet,
+ u32 numSliceGroups)
+{
+
+/* Variables */
+
+ u32 tmp,i;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pMotionConstrainedSliceGroupSet);
+ ASSERT(numSliceGroups < MAX_NUM_SLICE_GROUPS);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pMotionConstrainedSliceGroupSet->numSliceGroupsInSet);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pMotionConstrainedSliceGroupSet->numSliceGroupsInSet++;
+ if (pMotionConstrainedSliceGroupSet->numSliceGroupsInSet > numSliceGroups)
+ return(HANTRO_NOK);
+
+ for (i = 0; i < pMotionConstrainedSliceGroupSet->numSliceGroupsInSet; i++)
+ {
+ tmp = h264bsdGetBits(pStrmData,
+ ceilLog2NumSliceGroups[numSliceGroups]);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pMotionConstrainedSliceGroupSet->sliceGroupId[i] = tmp;
+ if (pMotionConstrainedSliceGroupSet->sliceGroupId[i] >
+ pMotionConstrainedSliceGroupSet->numSliceGroupsInSet-1)
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pMotionConstrainedSliceGroupSet->exactSampleValueMatchFlag =
+ tmp == 1 ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pMotionConstrainedSliceGroupSet->panScanRectFlag = tmp == 1 ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pMotionConstrainedSliceGroupSet->panScanRectFlag)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pMotionConstrainedSliceGroupSet->panScanRectId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeReservedSeiMessage
+
+ Functional description:
+ <++>
+ Inputs:
+ <++>
+ Outputs:
+ <++>
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeReservedSeiMessage(
+ strmData_t *pStrmData,
+ seiReservedSeiMessage_t *pReservedSeiMessage,
+ u32 payloadSize)
+{
+
+/* Variables */
+
+ u32 i, tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pReservedSeiMessage);
+
+
+ /* where corresponding FREE() ??? */
+ ALLOCATE(pReservedSeiMessage->reservedSeiMessagePayloadByte,payloadSize,u8);
+ if (pReservedSeiMessage->reservedSeiMessagePayloadByte == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+
+ pReservedSeiMessage->numPayloadBytes = payloadSize;
+
+ for (i = 0; i < payloadSize; i++)
+ {
+ tmp = h264bsdGetBits(pStrmData,8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pReservedSeiMessage->reservedSeiMessagePayloadByte[i] = (u8)tmp;
+ }
+
+ return(HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.h
new file mode 100755
index 0000000..efe543a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_sei.h
@@ -0,0 +1,252 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_SEI_H
+#define H264SWDEC_SEI_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_vui.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+#define MAX_PAN_SCAN_CNT 32
+#define MAX_NUM_SPARE_PICS 16
+#define MAX_NUM_CLOCK_TS 3
+#define MAX_NUM_SUB_SEQ_LAYERS 256
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+ u32 seqParameterSetId;
+ u32 initialCpbRemovalDelay[MAX_CPB_CNT];
+ u32 initialCpbRemovalDelayOffset[MAX_CPB_CNT];
+} seiBufferingPeriod_t;
+
+typedef struct
+{
+ u32 cpbRemovalDelay;
+ u32 dpbOutputDelay;
+ u32 picStruct;
+ u32 clockTimeStampFlag[MAX_NUM_CLOCK_TS];
+ u32 clockTimeStamp[MAX_NUM_CLOCK_TS];
+ u32 ctType[MAX_NUM_CLOCK_TS];
+ u32 nuitFieldBasedFlag[MAX_NUM_CLOCK_TS];
+ u32 countingType[MAX_NUM_CLOCK_TS];
+ u32 fullTimeStampFlag[MAX_NUM_CLOCK_TS];
+ u32 discontinuityFlag[MAX_NUM_CLOCK_TS];
+ u32 cntDroppedFlag[MAX_NUM_CLOCK_TS];
+ u32 nFrames[MAX_NUM_CLOCK_TS];
+ u32 secondsFlag[MAX_NUM_CLOCK_TS];
+ u32 secondsValue[MAX_NUM_CLOCK_TS];
+ u32 minutesFlag[MAX_NUM_CLOCK_TS];
+ u32 minutesValue[MAX_NUM_CLOCK_TS];
+ u32 hoursFlag[MAX_NUM_CLOCK_TS];
+ u32 hoursValue[MAX_NUM_CLOCK_TS];
+ i32 timeOffset[MAX_NUM_CLOCK_TS];
+} seiPicTiming_t;
+
+typedef struct
+{
+ u32 panScanRectId;
+ u32 panScanRectCancelFlag;
+ u32 panScanCnt;
+ i32 panScanRectLeftOffset[MAX_PAN_SCAN_CNT];
+ i32 panScanRectRightOffset[MAX_PAN_SCAN_CNT];
+ i32 panScanRectTopOffset[MAX_PAN_SCAN_CNT];
+ i32 panScanRectBottomOffset[MAX_PAN_SCAN_CNT];
+ u32 panScanRectRepetitionPeriod;
+} seiPanScanRect_t;
+
+typedef struct
+{
+ u32 ituTT35CountryCode;
+ u32 ituTT35CountryCodeExtensionByte;
+ u8 *ituTT35PayloadByte;
+ u32 numPayloadBytes;
+} seiUserDataRegisteredItuTT35_t;
+
+typedef struct
+{
+ u32 uuidIsoIec11578[4];
+ u8 *userDataPayloadByte;
+ u32 numPayloadBytes;
+} seiUserDataUnregistered_t;
+
+typedef struct
+{
+ u32 recoveryFrameCnt;
+ u32 exactMatchFlag;
+ u32 brokenLinkFlag;
+ u32 changingSliceGroupIdc;
+} seiRecoveryPoint_t;
+
+typedef struct
+{
+ u32 originalIdrFlag;
+ u32 originalFrameNum;
+ decRefPicMarking_t decRefPicMarking;
+} seiDecRefPicMarkingRepetition_t;
+
+typedef struct
+{
+ u32 targetFrameNum;
+ u32 spareFieldFlag;
+ u32 targetBottomFieldFlag;
+ u32 numSparePics;
+ u32 deltaSpareFrameNum[MAX_NUM_SPARE_PICS];
+ u32 spareBottomFieldFlag[MAX_NUM_SPARE_PICS];
+ u32 spareAreaIdc[MAX_NUM_SPARE_PICS];
+ u32 *spareUnitFlag[MAX_NUM_SPARE_PICS];
+ u32 *zeroRunLength[MAX_NUM_SPARE_PICS];
+} seiSparePic_t;
+
+typedef struct
+{
+ u32 sceneInfoPresentFlag;
+ u32 sceneId;
+ u32 sceneTransitionType;
+ u32 secondSceneId;
+} seiSceneInfo_t;
+
+typedef struct
+{
+ u32 subSeqLayerNum;
+ u32 subSeqId;
+ u32 firstRefPicFlag;
+ u32 leadingNonRefPicFlag;
+ u32 lastPicFlag;
+ u32 subSeqFrameNumFlag;
+ u32 subSeqFrameNum;
+} seiSubSeqInfo_t;
+
+typedef struct
+{
+ u32 numSubSeqLayers;
+ u32 accurateStatisticsFlag[MAX_NUM_SUB_SEQ_LAYERS];
+ u32 averageBitRate[MAX_NUM_SUB_SEQ_LAYERS];
+ u32 averageFrameRate[MAX_NUM_SUB_SEQ_LAYERS];
+} seiSubSeqLayerCharacteristics_t;
+
+typedef struct
+{
+ u32 subSeqLayerNum;
+ u32 subSeqId;
+ u32 durationFlag;
+ u32 subSeqDuration;
+ u32 averageRateFlag;
+ u32 accurateStatisticsFlag;
+ u32 averageBitRate;
+ u32 averageFrameRate;
+ u32 numReferencedSubseqs;
+ u32 refSubSeqLayerNum[MAX_NUM_SUB_SEQ_LAYERS];
+ u32 refSubSeqId[MAX_NUM_SUB_SEQ_LAYERS];
+ u32 refSubSeqDirection[MAX_NUM_SUB_SEQ_LAYERS];
+} seiSubSeqCharacteristics_t;
+
+typedef struct
+{
+ u32 fullFrameFreezeRepetitionPeriod;
+} seiFullFrameFreeze_t;
+
+typedef struct
+{
+ u32 snapShotId;
+} seiFullFrameSnapshot_t;
+
+typedef struct
+{
+ u32 progressiveRefinementId;
+ u32 numRefinementSteps;
+} seiProgressiveRefinementSegmentStart_t;
+
+typedef struct
+{
+ u32 progressiveRefinementId;
+} seiProgressiveRefinementSegmentEnd_t;
+
+typedef struct
+{
+ u32 numSliceGroupsInSet;
+ u32 sliceGroupId[MAX_NUM_SLICE_GROUPS];
+ u32 exactSampleValueMatchFlag;
+ u32 panScanRectFlag;
+ u32 panScanRectId;
+} seiMotionConstrainedSliceGroupSet_t;
+
+typedef struct
+{
+ u8 *reservedSeiMessagePayloadByte;
+ u32 numPayloadBytes;
+} seiReservedSeiMessage_t;
+
+typedef struct
+{
+ u32 payloadType;
+ seiBufferingPeriod_t bufferingPeriod;
+ seiPicTiming_t picTiming;
+ seiPanScanRect_t panScanRect;
+ seiUserDataRegisteredItuTT35_t userDataRegisteredItuTT35;
+ seiUserDataUnregistered_t userDataUnregistered;
+ seiRecoveryPoint_t recoveryPoint;
+ seiDecRefPicMarkingRepetition_t decRefPicMarkingRepetition;
+ seiSparePic_t sparePic;
+ seiSceneInfo_t sceneInfo;
+ seiSubSeqInfo_t subSeqInfo;
+ seiSubSeqLayerCharacteristics_t subSeqLayerCharacteristics;
+ seiSubSeqCharacteristics_t subSeqCharacteristics;
+ seiFullFrameFreeze_t fullFrameFreeze;
+ seiFullFrameSnapshot_t fullFrameSnapshot;
+ seiProgressiveRefinementSegmentStart_t progressiveRefinementSegmentStart;
+ seiProgressiveRefinementSegmentEnd_t progressiveRefinementSegmentEnd;
+ seiMotionConstrainedSliceGroupSet_t motionConstrainedSliceGroupSet;
+ seiReservedSeiMessage_t reservedSeiMessage;
+} seiMessage_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSeiMessage(
+ strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet,
+ seiMessage_t *pSeiMessage,
+ u32 numSliceGroups);
+
+#endif /* #ifdef H264SWDEC_SEI_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.c
new file mode 100644
index 0000000..751051a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.c
@@ -0,0 +1,577 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdDecodeSeqParamSet
+ GetDpbSize
+ h264bsdCompareSeqParamSets
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_util.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_vui.h"
+#include "h264bsd_cfg.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* enumeration to indicate invalid return value from the GetDpbSize function */
+enum {INVALID_DPB_SIZE = 0x7FFFFFFF};
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 GetDpbSize(u32 picSizeInMbs, u32 levelIdc);
+
+/*------------------------------------------------------------------------------
+
+ Function name: h264bsdDecodeSeqParamSet
+
+ Functional description:
+ Decode sequence parameter set information from the stream.
+
+ Function allocates memory for offsetForRefFrame array if
+ picture order count type is 1 and numRefFramesInPicOrderCntCycle
+ is greater than zero.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ pSeqParamSet decoded information is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure, invalid information or end of stream
+ MEMORY_ALLOCATION_ERROR for memory allocation failure
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSeqParamSet(strmData_t *pStrmData, seqParamSet_t *pSeqParamSet)
+{
+
+/* Variables */
+
+ u32 tmp, i, value;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSeqParamSet);
+
+ H264SwDecMemset(pSeqParamSet, 0, sizeof(seqParamSet_t));
+
+ /* profile_idc */
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp != 66)
+ {
+ DEBUG(("NOT BASELINE PROFILE %d\n", tmp));
+ }
+ pSeqParamSet->profileIdc = tmp;
+
+ /* constrained_set0_flag */
+ tmp = h264bsdGetBits(pStrmData, 1);
+ /* constrained_set1_flag */
+ tmp = h264bsdGetBits(pStrmData, 1);
+ /* constrained_set2_flag */
+ tmp = h264bsdGetBits(pStrmData, 1);
+
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ /* reserved_zero_5bits, values of these bits shall be ignored */
+ tmp = h264bsdGetBits(pStrmData, 5);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSeqParamSet->levelIdc = tmp;
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSeqParamSet->seqParameterSetId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pSeqParamSet->seqParameterSetId >= MAX_NUM_SEQ_PARAM_SETS)
+ {
+ EPRINT("seq_param_set_id");
+ return(HANTRO_NOK);
+ }
+
+ /* log2_max_frame_num_minus4 */
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (value > 12)
+ {
+ EPRINT("log2_max_frame_num_minus4");
+ return(HANTRO_NOK);
+ }
+ /* maxFrameNum = 2^(log2_max_frame_num_minus4 + 4) */
+ pSeqParamSet->maxFrameNum = 1 << (value+4);
+
+ /* valid POC types are 0, 1 and 2 */
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (value > 2)
+ {
+ EPRINT("pic_order_cnt_type");
+ return(HANTRO_NOK);
+ }
+ pSeqParamSet->picOrderCntType = value;
+
+ if (pSeqParamSet->picOrderCntType == 0)
+ {
+ /* log2_max_pic_order_cnt_lsb_minus4 */
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (value > 12)
+ {
+ EPRINT("log2_max_pic_order_cnt_lsb_minus4");
+ return(HANTRO_NOK);
+ }
+ /* maxPicOrderCntLsb = 2^(log2_max_pic_order_cnt_lsb_minus4 + 4) */
+ pSeqParamSet->maxPicOrderCntLsb = 1 << (value+4);
+ }
+ else if (pSeqParamSet->picOrderCntType == 1)
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSeqParamSet->deltaPicOrderAlwaysZeroFlag = (tmp == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+ &pSeqParamSet->offsetForNonRefPic);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+ &pSeqParamSet->offsetForTopToBottomField);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSeqParamSet->numRefFramesInPicOrderCntCycle);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pSeqParamSet->numRefFramesInPicOrderCntCycle > 255)
+ {
+ EPRINT("num_ref_frames_in_pic_order_cnt_cycle");
+ return(HANTRO_NOK);
+ }
+
+ if (pSeqParamSet->numRefFramesInPicOrderCntCycle)
+ {
+ /* NOTE: This has to be freed somewhere! */
+ ALLOCATE(pSeqParamSet->offsetForRefFrame,
+ pSeqParamSet->numRefFramesInPicOrderCntCycle, i32);
+ if (pSeqParamSet->offsetForRefFrame == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+
+ for (i = 0; i < pSeqParamSet->numRefFramesInPicOrderCntCycle; i++)
+ {
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData,
+ pSeqParamSet->offsetForRefFrame + i);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+ }
+ else
+ {
+ pSeqParamSet->offsetForRefFrame = NULL;
+ }
+ }
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSeqParamSet->numRefFrames);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pSeqParamSet->numRefFrames > MAX_NUM_REF_PICS)
+ {
+ EPRINT("num_ref_frames");
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSeqParamSet->gapsInFrameNumValueAllowedFlag = (tmp == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSeqParamSet->picWidthInMbs = value + 1;
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSeqParamSet->picHeightInMbs = value + 1;
+
+ /* frame_mbs_only_flag, shall be 1 for baseline profile */
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (!tmp)
+ {
+ EPRINT("frame_mbs_only_flag");
+ return(HANTRO_NOK);
+ }
+
+ /* direct_8x8_inference_flag */
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSeqParamSet->frameCroppingFlag = (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pSeqParamSet->frameCroppingFlag)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSeqParamSet->frameCropLeftOffset);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSeqParamSet->frameCropRightOffset);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSeqParamSet->frameCropTopOffset);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pSeqParamSet->frameCropBottomOffset);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* check that frame cropping params are valid, parameters shall
+ * specify non-negative area within the original picture */
+ if ( ( (i32)pSeqParamSet->frameCropLeftOffset >
+ ( 8 * (i32)pSeqParamSet->picWidthInMbs -
+ ((i32)pSeqParamSet->frameCropRightOffset + 1) ) ) ||
+ ( (i32)pSeqParamSet->frameCropTopOffset >
+ ( 8 * (i32)pSeqParamSet->picHeightInMbs -
+ ((i32)pSeqParamSet->frameCropBottomOffset + 1) ) ) )
+ {
+ EPRINT("frame_cropping");
+ return(HANTRO_NOK);
+ }
+ }
+
+ /* check that image dimensions and levelIdc match */
+ tmp = pSeqParamSet->picWidthInMbs * pSeqParamSet->picHeightInMbs;
+ value = GetDpbSize(tmp, pSeqParamSet->levelIdc);
+ if (value == INVALID_DPB_SIZE || pSeqParamSet->numRefFrames > value)
+ {
+ DEBUG(("WARNING! Invalid DPB size based on SPS Level!\n"));
+ DEBUG(("WARNING! Using num_ref_frames =%d for DPB size!\n",
+ pSeqParamSet->numRefFrames));
+ value = pSeqParamSet->numRefFrames;
+ }
+ pSeqParamSet->maxDpbSize = value;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSeqParamSet->vuiParametersPresentFlag = (tmp == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ /* VUI */
+ if (pSeqParamSet->vuiParametersPresentFlag)
+ {
+ ALLOCATE(pSeqParamSet->vuiParameters, 1, vuiParameters_t);
+ if (pSeqParamSet->vuiParameters == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+ tmp = h264bsdDecodeVuiParameters(pStrmData,
+ pSeqParamSet->vuiParameters);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ /* check numReorderFrames and maxDecFrameBuffering */
+ if (pSeqParamSet->vuiParameters->bitstreamRestrictionFlag)
+ {
+ if (pSeqParamSet->vuiParameters->numReorderFrames >
+ pSeqParamSet->vuiParameters->maxDecFrameBuffering ||
+ pSeqParamSet->vuiParameters->maxDecFrameBuffering <
+ pSeqParamSet->numRefFrames ||
+ pSeqParamSet->vuiParameters->maxDecFrameBuffering >
+ pSeqParamSet->maxDpbSize)
+ {
+ return(HANTRO_NOK);
+ }
+
+ /* standard says that "the sequence shall not require a DPB with
+ * size of more than max(1, maxDecFrameBuffering) */
+ pSeqParamSet->maxDpbSize =
+ MAX(1, pSeqParamSet->vuiParameters->maxDecFrameBuffering);
+ }
+ }
+
+ tmp = h264bsdRbspTrailingBits(pStrmData);
+
+ /* ignore possible errors in trailing bits of parameters sets */
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: GetDpbSize
+
+ Functional description:
+ Get size of the DPB in frames. Size is determined based on the
+ picture size and MaxDPB for the specified level. These determine
+ how many pictures may fit into to the buffer. However, the size
+ is also limited to a maximum of 16 frames and therefore function
+ returns the minimum of the determined size and 16.
+
+ Inputs:
+ picSizeInMbs number of macroblocks in the picture
+ levelIdc indicates the level
+
+ Outputs:
+ none
+
+ Returns:
+ size of the DPB in frames
+ INVALID_DPB_SIZE when invalid levelIdc specified or picSizeInMbs
+ is higher than supported by the level in question
+
+------------------------------------------------------------------------------*/
+
+u32 GetDpbSize(u32 picSizeInMbs, u32 levelIdc)
+{
+
+/* Variables */
+
+ u32 tmp;
+ u32 maxPicSizeInMbs;
+
+/* Code */
+
+ ASSERT(picSizeInMbs);
+
+ /* use tmp as the size of the DPB in bytes, computes as 1024 * MaxDPB
+ * (from table A-1 in Annex A) */
+ switch (levelIdc)
+ {
+ case 10:
+ tmp = 152064;
+ maxPicSizeInMbs = 99;
+ break;
+
+ case 11:
+ tmp = 345600;
+ maxPicSizeInMbs = 396;
+ break;
+
+ case 12:
+ tmp = 912384;
+ maxPicSizeInMbs = 396;
+ break;
+
+ case 13:
+ tmp = 912384;
+ maxPicSizeInMbs = 396;
+ break;
+
+ case 20:
+ tmp = 912384;
+ maxPicSizeInMbs = 396;
+ break;
+
+ case 21:
+ tmp = 1824768;
+ maxPicSizeInMbs = 792;
+ break;
+
+ case 22:
+ tmp = 3110400;
+ maxPicSizeInMbs = 1620;
+ break;
+
+ case 30:
+ tmp = 3110400;
+ maxPicSizeInMbs = 1620;
+ break;
+
+ case 31:
+ tmp = 6912000;
+ maxPicSizeInMbs = 3600;
+ break;
+
+ case 32:
+ tmp = 7864320;
+ maxPicSizeInMbs = 5120;
+ break;
+
+ case 40:
+ tmp = 12582912;
+ maxPicSizeInMbs = 8192;
+ break;
+
+ case 41:
+ tmp = 12582912;
+ maxPicSizeInMbs = 8192;
+ break;
+
+ case 42:
+ tmp = 34816*384;
+ maxPicSizeInMbs = 8704;
+ break;
+
+ case 50:
+ /* standard says 42301440 here, but corrigendum "corrects" this to
+ * 42393600 */
+ tmp = 42393600;
+ maxPicSizeInMbs = 22080;
+ break;
+
+ case 51:
+ tmp = 70778880;
+ maxPicSizeInMbs = 36864;
+ break;
+
+ default:
+ return(INVALID_DPB_SIZE);
+ }
+
+ /* this is not "correct" return value! However, it results in error in
+ * decoding and this was easiest place to check picture size */
+ if (picSizeInMbs > maxPicSizeInMbs)
+ return(INVALID_DPB_SIZE);
+
+ tmp /= (picSizeInMbs*384);
+
+ return(MIN(tmp, 16));
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: h264bsdCompareSeqParamSets
+
+ Functional description:
+ Compare two sequence parameter sets.
+
+ Inputs:
+ pSps1 pointer to a sequence parameter set
+ pSps2 pointer to another sequence parameter set
+
+ Outputs:
+ 0 sequence parameter sets are equal
+ 1 otherwise
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCompareSeqParamSets(seqParamSet_t *pSps1, seqParamSet_t *pSps2)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ ASSERT(pSps1);
+ ASSERT(pSps2);
+
+ /* first compare parameters whose existence does not depend on other
+ * parameters and only compare the rest of the params if these are equal */
+ if (pSps1->profileIdc == pSps2->profileIdc &&
+ pSps1->levelIdc == pSps2->levelIdc &&
+ pSps1->maxFrameNum == pSps2->maxFrameNum &&
+ pSps1->picOrderCntType == pSps2->picOrderCntType &&
+ pSps1->numRefFrames == pSps2->numRefFrames &&
+ pSps1->gapsInFrameNumValueAllowedFlag ==
+ pSps2->gapsInFrameNumValueAllowedFlag &&
+ pSps1->picWidthInMbs == pSps2->picWidthInMbs &&
+ pSps1->picHeightInMbs == pSps2->picHeightInMbs &&
+ pSps1->frameCroppingFlag == pSps2->frameCroppingFlag &&
+ pSps1->vuiParametersPresentFlag == pSps2->vuiParametersPresentFlag)
+ {
+ if (pSps1->picOrderCntType == 0)
+ {
+ if (pSps1->maxPicOrderCntLsb != pSps2->maxPicOrderCntLsb)
+ return 1;
+ }
+ else if (pSps1->picOrderCntType == 1)
+ {
+ if (pSps1->deltaPicOrderAlwaysZeroFlag !=
+ pSps2->deltaPicOrderAlwaysZeroFlag ||
+ pSps1->offsetForNonRefPic != pSps2->offsetForNonRefPic ||
+ pSps1->offsetForTopToBottomField !=
+ pSps2->offsetForTopToBottomField ||
+ pSps1->numRefFramesInPicOrderCntCycle !=
+ pSps2->numRefFramesInPicOrderCntCycle)
+ {
+ return 1;
+ }
+ else
+ {
+ for (i = 0; i < pSps1->numRefFramesInPicOrderCntCycle; i++)
+ if (pSps1->offsetForRefFrame[i] !=
+ pSps2->offsetForRefFrame[i])
+ {
+ return 1;
+ }
+ }
+ }
+ if (pSps1->frameCroppingFlag)
+ {
+ if (pSps1->frameCropLeftOffset != pSps2->frameCropLeftOffset ||
+ pSps1->frameCropRightOffset != pSps2->frameCropRightOffset ||
+ pSps1->frameCropTopOffset != pSps2->frameCropTopOffset ||
+ pSps1->frameCropBottomOffset != pSps2->frameCropBottomOffset)
+ {
+ return 1;
+ }
+ }
+
+ return 0;
+ }
+
+ return 1;
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.h
new file mode 100755
index 0000000..e18df94
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_seq_param_set.h
@@ -0,0 +1,86 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_SEQ_PARAM_SET_H
+#define H264SWDEC_SEQ_PARAM_SET_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_vui.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/* structure to store sequence parameter set information decoded from the
+ * stream */
+typedef struct
+{
+ u32 profileIdc;
+ u32 levelIdc;
+ u32 seqParameterSetId;
+ u32 maxFrameNum;
+ u32 picOrderCntType;
+ u32 maxPicOrderCntLsb;
+ u32 deltaPicOrderAlwaysZeroFlag;
+ i32 offsetForNonRefPic;
+ i32 offsetForTopToBottomField;
+ u32 numRefFramesInPicOrderCntCycle;
+ i32 *offsetForRefFrame;
+ u32 numRefFrames;
+ u32 gapsInFrameNumValueAllowedFlag;
+ u32 picWidthInMbs;
+ u32 picHeightInMbs;
+ u32 frameCroppingFlag;
+ u32 frameCropLeftOffset;
+ u32 frameCropRightOffset;
+ u32 frameCropTopOffset;
+ u32 frameCropBottomOffset;
+ u32 vuiParametersPresentFlag;
+ vuiParameters_t *vuiParameters;
+ u32 maxDpbSize;
+} seqParamSet_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSeqParamSet(strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet);
+
+u32 h264bsdCompareSeqParamSets(seqParamSet_t *pSps1, seqParamSet_t *pSps2);
+
+#endif /* #ifdef H264SWDEC_SEQ_PARAM_SET_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.c
new file mode 100755
index 0000000..c288d4b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.c
@@ -0,0 +1,354 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdDecodeSliceData
+ SetMbParams
+ h264bsdMarkSliceCorrupted
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_slice_data.h"
+#include "h264bsd_util.h"
+#include "h264bsd_vlc.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static void SetMbParams(mbStorage_t *pMb, sliceHeader_t *pSlice, u32 sliceId,
+ i32 chromaQpIndexOffset);
+
+/*------------------------------------------------------------------------------
+
+ 5.1 Function name: h264bsdDecodeSliceData
+
+ Functional description:
+ Decode one slice. Function decodes stream data, i.e. macroblocks
+ and possible skip_run fields. h264bsdDecodeMacroblock function is
+ called to handle all other macroblock related processing.
+ Macroblock to slice group mapping is considered when next
+ macroblock to process is determined (h264bsdNextMbAddress function)
+ map
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ pStorage pointer to storage structure
+ currImage pointer to current processed picture, needed for
+ intra prediction of the macroblocks
+ pSliceHeader pointer to slice header of the current slice
+
+ Outputs:
+ currImage processed macroblocks are written to current image
+ pStorage mbStorage structure of each processed macroblock
+ is updated here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSliceData(strmData_t *pStrmData, storage_t *pStorage,
+ image_t *currImage, sliceHeader_t *pSliceHeader)
+{
+
+/* Variables */
+
+ u8 mbData[384 + 15 + 32];
+ u8 *data;
+ u32 tmp;
+ u32 skipRun;
+ u32 prevSkipped;
+ u32 currMbAddr;
+ u32 moreMbs;
+ u32 mbCount;
+ i32 qpY;
+ macroblockLayer_t *mbLayer;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSliceHeader);
+ ASSERT(pStorage);
+ ASSERT(pSliceHeader->firstMbInSlice < pStorage->picSizeInMbs);
+
+ /* ensure 16-byte alignment */
+ data = (u8*)ALIGN(mbData, 16);
+
+ mbLayer = pStorage->mbLayer;
+
+ currMbAddr = pSliceHeader->firstMbInSlice;
+ skipRun = 0;
+ prevSkipped = HANTRO_FALSE;
+
+ /* increment slice index, will be one for decoding of the first slice of
+ * the picture */
+ pStorage->slice->sliceId++;
+
+ /* lastMbAddr stores address of the macroblock that was last successfully
+ * decoded, needed for error handling */
+ pStorage->slice->lastMbAddr = 0;
+
+ mbCount = 0;
+ /* initial quantization parameter for the slice is obtained as the sum of
+ * initial QP for the picture and sliceQpDelta for the current slice */
+ qpY = (i32)pStorage->activePps->picInitQp + pSliceHeader->sliceQpDelta;
+ do
+ {
+ /* primary picture and already decoded macroblock -> error */
+ if (!pSliceHeader->redundantPicCnt && pStorage->mb[currMbAddr].decoded)
+ {
+ EPRINT("Primary and already decoded");
+ return(HANTRO_NOK);
+ }
+
+ SetMbParams(pStorage->mb + currMbAddr, pSliceHeader,
+ pStorage->slice->sliceId, pStorage->activePps->chromaQpIndexOffset);
+
+ if (!IS_I_SLICE(pSliceHeader->sliceType))
+ {
+ if (!prevSkipped)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &skipRun);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ /* skip_run shall be less than or equal to number of
+ * macroblocks left */
+ if (skipRun > (pStorage->picSizeInMbs - currMbAddr))
+ {
+ EPRINT("skip_run");
+ return(HANTRO_NOK);
+ }
+ if (skipRun)
+ {
+ prevSkipped = HANTRO_TRUE;
+ H264SwDecMemset(&mbLayer->mbPred, 0, sizeof(mbPred_t));
+ /* mark current macroblock skipped */
+ mbLayer->mbType = P_Skip;
+ }
+ }
+ }
+
+ if (skipRun)
+ {
+ DEBUG(("Skipping macroblock %d\n", currMbAddr));
+ skipRun--;
+ }
+ else
+ {
+ prevSkipped = HANTRO_FALSE;
+ tmp = h264bsdDecodeMacroblockLayer(pStrmData, mbLayer,
+ pStorage->mb + currMbAddr, pSliceHeader->sliceType,
+ pSliceHeader->numRefIdxL0Active);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("macroblock_layer");
+ return(tmp);
+ }
+ }
+
+ tmp = h264bsdDecodeMacroblock(pStorage->mb + currMbAddr, mbLayer,
+ currImage, pStorage->dpb, &qpY, currMbAddr,
+ pStorage->activePps->constrainedIntraPredFlag, data);
+ if (tmp != HANTRO_OK)
+ {
+ EPRINT("MACRO_BLOCK");
+ return(tmp);
+ }
+
+ /* increment macroblock count only for macroblocks that were decoded
+ * for the first time (redundant slices) */
+ if (pStorage->mb[currMbAddr].decoded == 1)
+ mbCount++;
+
+ /* keep on processing as long as there is stream data left or
+ * processing of macroblocks to be skipped based on the last skipRun is
+ * not finished */
+ moreMbs = (h264bsdMoreRbspData(pStrmData) || skipRun) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ /* lastMbAddr is only updated for intra slices (all macroblocks of
+ * inter slices will be lost in case of an error) */
+ if (IS_I_SLICE(pSliceHeader->sliceType))
+ pStorage->slice->lastMbAddr = currMbAddr;
+
+ currMbAddr = h264bsdNextMbAddress(pStorage->sliceGroupMap,
+ pStorage->picSizeInMbs, currMbAddr);
+ /* data left in the buffer but no more macroblocks for current slice
+ * group -> error */
+ if (moreMbs && !currMbAddr)
+ {
+ EPRINT("Next mb address");
+ return(HANTRO_NOK);
+ }
+
+ } while (moreMbs);
+
+ if ((pStorage->slice->numDecodedMbs + mbCount) > pStorage->picSizeInMbs)
+ {
+ EPRINT("Num decoded mbs");
+ return(HANTRO_NOK);
+ }
+
+ pStorage->slice->numDecodedMbs += mbCount;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ 5.2 Function: SetMbParams
+
+ Functional description:
+ Set macroblock parameters that remain constant for this slice
+
+ Inputs:
+ pSlice pointer to current slice header
+ sliceId id of the current slice
+ chromaQpIndexOffset
+
+ Outputs:
+ pMb pointer to macroblock structure which is updated
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void SetMbParams(mbStorage_t *pMb, sliceHeader_t *pSlice, u32 sliceId,
+ i32 chromaQpIndexOffset)
+{
+
+/* Variables */
+ u32 tmp1;
+ i32 tmp2, tmp3;
+
+/* Code */
+
+ tmp1 = pSlice->disableDeblockingFilterIdc;
+ tmp2 = pSlice->sliceAlphaC0Offset;
+ tmp3 = pSlice->sliceBetaOffset;
+ pMb->sliceId = sliceId;
+ pMb->disableDeblockingFilterIdc = tmp1;
+ pMb->filterOffsetA = tmp2;
+ pMb->filterOffsetB = tmp3;
+ pMb->chromaQpIndexOffset = chromaQpIndexOffset;
+
+}
+
+/*------------------------------------------------------------------------------
+
+ 5.3 Function name: h264bsdMarkSliceCorrupted
+
+ Functional description:
+ Mark macroblocks of the slice corrupted. If lastMbAddr in the slice
+ storage is set -> picWidhtInMbs (or at least 10) macroblocks back
+ from the lastMbAddr are marked corrupted. However, if lastMbAddr
+ is not set -> all macroblocks of the slice are marked.
+
+ Inputs:
+ pStorage pointer to storage structure
+ firstMbInSlice address of the first macroblock in the slice, this
+ identifies the slice to be marked corrupted
+
+ Outputs:
+ pStorage mbStorage for the corrupted macroblocks updated
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdMarkSliceCorrupted(storage_t *pStorage, u32 firstMbInSlice)
+{
+
+/* Variables */
+
+ u32 tmp, i;
+ u32 sliceId;
+ u32 currMbAddr;
+
+/* Code */
+
+ ASSERT(pStorage);
+ ASSERT(firstMbInSlice < pStorage->picSizeInMbs);
+
+ currMbAddr = firstMbInSlice;
+
+ sliceId = pStorage->slice->sliceId;
+
+ /* DecodeSliceData sets lastMbAddr for I slices -> if it was set, go back
+ * MAX(picWidthInMbs, 10) macroblocks and start marking from there */
+ if (pStorage->slice->lastMbAddr)
+ {
+ ASSERT(pStorage->mb[pStorage->slice->lastMbAddr].sliceId == sliceId);
+ i = pStorage->slice->lastMbAddr - 1;
+ tmp = 0;
+ while (i > currMbAddr)
+ {
+ if (pStorage->mb[i].sliceId == sliceId)
+ {
+ tmp++;
+ if (tmp >= MAX(pStorage->activeSps->picWidthInMbs, 10))
+ break;
+ }
+ i--;
+ }
+ currMbAddr = i;
+ }
+
+ do
+ {
+
+ if ( (pStorage->mb[currMbAddr].sliceId == sliceId) &&
+ (pStorage->mb[currMbAddr].decoded) )
+ {
+ pStorage->mb[currMbAddr].decoded--;
+ }
+ else
+ {
+ break;
+ }
+
+ currMbAddr = h264bsdNextMbAddress(pStorage->sliceGroupMap,
+ pStorage->picSizeInMbs, currMbAddr);
+
+ } while (currMbAddr);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.h
new file mode 100755
index 0000000..f23d49e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_data.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_SLICE_DATA_H
+#define H264SWDEC_SLICE_DATA_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_cfg.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_storage.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSliceData(strmData_t *pStrmData, storage_t *pStorage,
+ image_t *currImage, sliceHeader_t *pSliceHeader);
+
+void h264bsdMarkSliceCorrupted(storage_t *pStorage, u32 firstMbInSlice);
+
+#endif /* #ifdef H264SWDEC_SLICE_DATA_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.c
new file mode 100755
index 0000000..7cbb534
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.c
@@ -0,0 +1,589 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ DecodeInterleavedMap
+ DecodeDispersedMap
+ DecodeForegroundLeftOverMap
+ DecodeBoxOutMap
+ DecodeRasterScanMap
+ DecodeWipeMap
+ h264bsdDecodeSliceGroupMap
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_slice_group_map.h"
+#include "h264bsd_cfg.h"
+#include "h264bsd_pic_param_set.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static void DecodeInterleavedMap(
+ u32 *map,
+ u32 numSliceGroups,
+ u32 *runLength,
+ u32 picSize);
+
+static void DecodeDispersedMap(
+ u32 *map,
+ u32 numSliceGroups,
+ u32 picWidth,
+ u32 picHeight);
+
+static void DecodeForegroundLeftOverMap(
+ u32 *map,
+ u32 numSliceGroups,
+ u32 *topLeft,
+ u32 *bottomRight,
+ u32 picWidth,
+ u32 picHeight);
+
+static void DecodeBoxOutMap(
+ u32 *map,
+ u32 sliceGroupChangeDirectionFlag,
+ u32 unitsInSliceGroup0,
+ u32 picWidth,
+ u32 picHeight);
+
+static void DecodeRasterScanMap(
+ u32 *map,
+ u32 sliceGroupChangeDirectionFlag,
+ u32 sizeOfUpperLeftGroup,
+ u32 picSize);
+
+static void DecodeWipeMap(
+ u32 *map,
+ u32 sliceGroupChangeDirectionFlag,
+ u32 sizeOfUpperLeftGroup,
+ u32 picWidth,
+ u32 picHeight);
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeInterleavedMap
+
+ Functional description:
+ Function to decode interleaved slice group map type, i.e. slice
+ group map type 0.
+
+ Inputs:
+ map pointer to the map
+ numSliceGroups number of slice groups
+ runLength run_length[] values for each slice group
+ picSize picture size in macroblocks
+
+ Outputs:
+ map slice group map is stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void DecodeInterleavedMap(
+ u32 *map,
+ u32 numSliceGroups,
+ u32 *runLength,
+ u32 picSize)
+{
+
+/* Variables */
+
+ u32 i,j, group;
+
+/* Code */
+
+ ASSERT(map);
+ ASSERT(numSliceGroups >= 1 && numSliceGroups <= MAX_NUM_SLICE_GROUPS);
+ ASSERT(runLength);
+
+ i = 0;
+
+ do {
+ for (group = 0; group < numSliceGroups && i < picSize;
+ i += runLength[group++])
+ {
+ ASSERT(runLength[group] <= picSize);
+ for (j = 0; j < runLength[group] && i + j < picSize; j++)
+ map[i+j] = group;
+ }
+ } while (i < picSize);
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeDispersedMap
+
+ Functional description:
+ Function to decode dispersed slice group map type, i.e. slice
+ group map type 1.
+
+ Inputs:
+ map pointer to the map
+ numSliceGroups number of slice groups
+ picWidth picture width in macroblocks
+ picHeight picture height in macroblocks
+
+ Outputs:
+ map slice group map is stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void DecodeDispersedMap(
+ u32 *map,
+ u32 numSliceGroups,
+ u32 picWidth,
+ u32 picHeight)
+{
+
+/* Variables */
+
+ u32 i, picSize;
+
+/* Code */
+
+ ASSERT(map);
+ ASSERT(numSliceGroups >= 1 && numSliceGroups <= MAX_NUM_SLICE_GROUPS);
+ ASSERT(picWidth);
+ ASSERT(picHeight);
+
+ picSize = picWidth * picHeight;
+
+ for (i = 0; i < picSize; i++)
+ map[i] = ((i % picWidth) + (((i / picWidth) * numSliceGroups) >> 1)) %
+ numSliceGroups;
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeForegroundLeftOverMap
+
+ Functional description:
+ Function to decode foreground with left-over slice group map type,
+ i.e. slice group map type 2.
+
+ Inputs:
+ map pointer to the map
+ numSliceGroups number of slice groups
+ topLeft top_left[] values
+ bottomRight bottom_right[] values
+ picWidth picture width in macroblocks
+ picHeight picture height in macroblocks
+
+ Outputs:
+ map slice group map is stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void DecodeForegroundLeftOverMap(
+ u32 *map,
+ u32 numSliceGroups,
+ u32 *topLeft,
+ u32 *bottomRight,
+ u32 picWidth,
+ u32 picHeight)
+{
+
+/* Variables */
+
+ u32 i,y,x,yTopLeft,yBottomRight,xTopLeft,xBottomRight, picSize;
+ u32 group;
+
+/* Code */
+
+ ASSERT(map);
+ ASSERT(numSliceGroups >= 1 && numSliceGroups <= MAX_NUM_SLICE_GROUPS);
+ ASSERT(topLeft);
+ ASSERT(bottomRight);
+ ASSERT(picWidth);
+ ASSERT(picHeight);
+
+ picSize = picWidth * picHeight;
+
+ for (i = 0; i < picSize; i++)
+ map[i] = numSliceGroups - 1;
+
+ for (group = numSliceGroups - 1; group--; )
+ {
+ ASSERT( topLeft[group] <= bottomRight[group] &&
+ bottomRight[group] < picSize );
+ yTopLeft = topLeft[group] / picWidth;
+ xTopLeft = topLeft[group] % picWidth;
+ yBottomRight = bottomRight[group] / picWidth;
+ xBottomRight = bottomRight[group] % picWidth;
+ ASSERT(xTopLeft <= xBottomRight);
+
+ for (y = yTopLeft; y <= yBottomRight; y++)
+ for (x = xTopLeft; x <= xBottomRight; x++)
+ map[ y * picWidth + x ] = group;
+ }
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeBoxOutMap
+
+ Functional description:
+ Function to decode box-out slice group map type, i.e. slice group
+ map type 3.
+
+ Inputs:
+ map pointer to the map
+ sliceGroupChangeDirectionFlag slice_group_change_direction_flag
+ unitsInSliceGroup0 mbs on slice group 0
+ picWidth picture width in macroblocks
+ picHeight picture height in macroblocks
+
+ Outputs:
+ map slice group map is stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void DecodeBoxOutMap(
+ u32 *map,
+ u32 sliceGroupChangeDirectionFlag,
+ u32 unitsInSliceGroup0,
+ u32 picWidth,
+ u32 picHeight)
+{
+
+/* Variables */
+
+ u32 i, k, picSize;
+ i32 x, y, xDir, yDir, leftBound, topBound, rightBound, bottomBound;
+ u32 mapUnitVacant;
+
+/* Code */
+
+ ASSERT(map);
+ ASSERT(picWidth);
+ ASSERT(picHeight);
+
+ picSize = picWidth * picHeight;
+ ASSERT(unitsInSliceGroup0 <= picSize);
+
+ for (i = 0; i < picSize; i++)
+ map[i] = 1;
+
+ x = (picWidth - (u32)sliceGroupChangeDirectionFlag) >> 1;
+ y = (picHeight - (u32)sliceGroupChangeDirectionFlag) >> 1;
+
+ leftBound = x;
+ topBound = y;
+
+ rightBound = x;
+ bottomBound = y;
+
+ xDir = (i32)sliceGroupChangeDirectionFlag - 1;
+ yDir = (i32)sliceGroupChangeDirectionFlag;
+
+ for (k = 0; k < unitsInSliceGroup0; k += mapUnitVacant ? 1 : 0)
+ {
+ mapUnitVacant = (map[ (u32)y * picWidth + (u32)x ] == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (mapUnitVacant)
+ map[ (u32)y * picWidth + (u32)x ] = 0;
+
+ if (xDir == -1 && x == leftBound)
+ {
+ leftBound = MAX(leftBound - 1, 0);
+ x = leftBound;
+ xDir = 0;
+ yDir = 2 * (i32)sliceGroupChangeDirectionFlag - 1;
+ }
+ else if (xDir == 1 && x == rightBound)
+ {
+ rightBound = MIN(rightBound + 1, (i32)picWidth - 1);
+ x = rightBound;
+ xDir = 0;
+ yDir = 1 - 2 * (i32)sliceGroupChangeDirectionFlag;
+ }
+ else if (yDir == -1 && y == topBound)
+ {
+ topBound = MAX(topBound - 1, 0);
+ y = topBound;
+ xDir = 1 - 2 * (i32)sliceGroupChangeDirectionFlag;
+ yDir = 0;
+ }
+ else if (yDir == 1 && y == bottomBound)
+ {
+ bottomBound = MIN(bottomBound + 1, (i32)picHeight - 1);
+ y = bottomBound;
+ xDir = 2 * (i32)sliceGroupChangeDirectionFlag - 1;
+ yDir = 0;
+ }
+ else
+ {
+ x += xDir;
+ y += yDir;
+ }
+ }
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeRasterScanMap
+
+ Functional description:
+ Function to decode raster scan slice group map type, i.e. slice
+ group map type 4.
+
+ Inputs:
+ map pointer to the map
+ sliceGroupChangeDirectionFlag slice_group_change_direction_flag
+ sizeOfUpperLeftGroup mbs in upperLeftGroup
+ picSize picture size in macroblocks
+
+ Outputs:
+ map slice group map is stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void DecodeRasterScanMap(
+ u32 *map,
+ u32 sliceGroupChangeDirectionFlag,
+ u32 sizeOfUpperLeftGroup,
+ u32 picSize)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ ASSERT(map);
+ ASSERT(picSize);
+ ASSERT(sizeOfUpperLeftGroup <= picSize);
+
+ for (i = 0; i < picSize; i++)
+ if (i < sizeOfUpperLeftGroup)
+ map[i] = (u32)sliceGroupChangeDirectionFlag;
+ else
+ map[i] = 1 - (u32)sliceGroupChangeDirectionFlag;
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeWipeMap
+
+ Functional description:
+ Function to decode wipe slice group map type, i.e. slice group map
+ type 5.
+
+ Inputs:
+ sliceGroupChangeDirectionFlag slice_group_change_direction_flag
+ sizeOfUpperLeftGroup mbs in upperLeftGroup
+ picWidth picture width in macroblocks
+ picHeight picture height in macroblocks
+
+ Outputs:
+ map slice group map is stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void DecodeWipeMap(
+ u32 *map,
+ u32 sliceGroupChangeDirectionFlag,
+ u32 sizeOfUpperLeftGroup,
+ u32 picWidth,
+ u32 picHeight)
+{
+
+/* Variables */
+
+ u32 i,j,k;
+
+/* Code */
+
+ ASSERT(map);
+ ASSERT(picWidth);
+ ASSERT(picHeight);
+ ASSERT(sizeOfUpperLeftGroup <= picWidth * picHeight);
+
+ k = 0;
+ for (j = 0; j < picWidth; j++)
+ for (i = 0; i < picHeight; i++)
+ if (k++ < sizeOfUpperLeftGroup)
+ map[ i * picWidth + j ] = (u32)sliceGroupChangeDirectionFlag;
+ else
+ map[ i * picWidth + j ] = 1 -
+ (u32)sliceGroupChangeDirectionFlag;
+
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdDecodeSliceGroupMap
+
+ Functional description:
+ Function to decode macroblock to slice group map. Construction
+ of different slice group map types is handled by separate
+ functions defined above. See standard for details how slice group
+ maps are computed.
+
+ Inputs:
+ pps active picture parameter set
+ sliceGroupChangeCycle slice_group_change_cycle
+ picWidth picture width in macroblocks
+ picHeight picture height in macroblocks
+
+ Outputs:
+ map slice group map is stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdDecodeSliceGroupMap(
+ u32 *map,
+ picParamSet_t *pps,
+ u32 sliceGroupChangeCycle,
+ u32 picWidth,
+ u32 picHeight)
+{
+
+/* Variables */
+
+ u32 i, picSize, unitsInSliceGroup0 = 0, sizeOfUpperLeftGroup = 0;
+
+/* Code */
+
+ ASSERT(map);
+ ASSERT(pps);
+ ASSERT(picWidth);
+ ASSERT(picHeight);
+ ASSERT(pps->sliceGroupMapType < 7);
+
+ picSize = picWidth * picHeight;
+
+ /* just one slice group -> all macroblocks belong to group 0 */
+ if (pps->numSliceGroups == 1)
+ {
+ H264SwDecMemset(map, 0, picSize * sizeof(u32));
+ return;
+ }
+
+ if (pps->sliceGroupMapType > 2 && pps->sliceGroupMapType < 6)
+ {
+ ASSERT(pps->sliceGroupChangeRate &&
+ pps->sliceGroupChangeRate <= picSize);
+
+ unitsInSliceGroup0 =
+ MIN(sliceGroupChangeCycle * pps->sliceGroupChangeRate, picSize);
+
+ if (pps->sliceGroupMapType == 4 || pps->sliceGroupMapType == 5)
+ sizeOfUpperLeftGroup = pps->sliceGroupChangeDirectionFlag ?
+ (picSize - unitsInSliceGroup0) : unitsInSliceGroup0;
+ }
+
+ switch (pps->sliceGroupMapType)
+ {
+ case 0:
+ DecodeInterleavedMap(map, pps->numSliceGroups,
+ pps->runLength, picSize);
+ break;
+
+ case 1:
+ DecodeDispersedMap(map, pps->numSliceGroups, picWidth,
+ picHeight);
+ break;
+
+ case 2:
+ DecodeForegroundLeftOverMap(map, pps->numSliceGroups,
+ pps->topLeft, pps->bottomRight, picWidth, picHeight);
+ break;
+
+ case 3:
+ DecodeBoxOutMap(map, pps->sliceGroupChangeDirectionFlag,
+ unitsInSliceGroup0, picWidth, picHeight);
+ break;
+
+ case 4:
+ DecodeRasterScanMap(map,
+ pps->sliceGroupChangeDirectionFlag, sizeOfUpperLeftGroup,
+ picSize);
+ break;
+
+ case 5:
+ DecodeWipeMap(map, pps->sliceGroupChangeDirectionFlag,
+ sizeOfUpperLeftGroup, picWidth, picHeight);
+ break;
+
+ default:
+ ASSERT(pps->sliceGroupId);
+ for (i = 0; i < picSize; i++)
+ {
+ ASSERT(pps->sliceGroupId[i] < pps->numSliceGroups);
+ map[i] = pps->sliceGroupId[i];
+ }
+ break;
+ }
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.h
new file mode 100755
index 0000000..4bcb6f2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_group_map.h
@@ -0,0 +1,60 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_SLICE_GROUP_MAP_H
+#define H264SWDEC_SLICE_GROUP_MAP_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_pic_param_set.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+void h264bsdDecodeSliceGroupMap(
+ u32 *map,
+ picParamSet_t *pps,
+ u32 sliceGroupChangeCycle,
+ u32 picWidth,
+ u32 picHeight);
+
+#endif /* #ifdef H264SWDEC_SLICE_GROUP_MAP_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.c
new file mode 100755
index 0000000..a7c6f64
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.c
@@ -0,0 +1,1511 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdDecodeSliceHeader
+ NumSliceGroupChangeCycleBits
+ RefPicListReordering
+ DecRefPicMarking
+ CheckPpsId
+ CheckFrameNum
+ CheckIdrPicId
+ CheckPicOrderCntLsb
+ CheckDeltaPicOrderCntBottom
+ CheckDeltaPicOrderCnt
+ CheckRedundantPicCnt
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_slice_header.h"
+#include "h264bsd_util.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_dpb.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 RefPicListReordering(strmData_t *, refPicListReordering_t *,
+ u32, u32);
+
+static u32 NumSliceGroupChangeCycleBits(u32 picSizeInMbs,
+ u32 sliceGroupChangeRate);
+
+static u32 DecRefPicMarking(strmData_t *pStrmData,
+ decRefPicMarking_t *pDecRefPicMarking, nalUnitType_e nalUnitType,
+ u32 numRefFrames);
+
+
+/*------------------------------------------------------------------------------
+
+ Function name: h264bsdDecodeSliceHeader
+
+ Functional description:
+ Decode slice header data from the stream.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ pSeqParamSet pointer to active sequence parameter set
+ pPicParamSet pointer to active picture parameter set
+ pNalUnit pointer to current NAL unit structure
+
+ Outputs:
+ pSliceHeader decoded data is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data or end of stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSliceHeader(strmData_t *pStrmData, sliceHeader_t *pSliceHeader,
+ seqParamSet_t *pSeqParamSet, picParamSet_t *pPicParamSet,
+ nalUnit_t *pNalUnit)
+{
+
+/* Variables */
+
+ u32 tmp, i, value;
+ i32 itmp;
+ u32 picSizeInMbs;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSliceHeader);
+ ASSERT(pSeqParamSet);
+ ASSERT(pPicParamSet);
+ ASSERT( pNalUnit->nalUnitType == NAL_CODED_SLICE ||
+ pNalUnit->nalUnitType == NAL_CODED_SLICE_IDR );
+
+
+ H264SwDecMemset(pSliceHeader, 0, sizeof(sliceHeader_t));
+
+ picSizeInMbs = pSeqParamSet->picWidthInMbs * pSeqParamSet->picHeightInMbs;
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSliceHeader->firstMbInSlice = value;
+ if (value >= picSizeInMbs)
+ {
+ EPRINT("first_mb_in_slice");
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSliceHeader->sliceType = value;
+ /* slice type has to be either I or P slice. P slice is not allowed when
+ * current NAL unit is an IDR NAL unit or num_ref_frames is 0 */
+ if ( !IS_I_SLICE(pSliceHeader->sliceType) &&
+ ( !IS_P_SLICE(pSliceHeader->sliceType) ||
+ IS_IDR_NAL_UNIT(pNalUnit) ||
+ !pSeqParamSet->numRefFrames ) )
+ {
+ EPRINT("slice_type");
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSliceHeader->picParameterSetId = value;
+ if (pSliceHeader->picParameterSetId != pPicParamSet->picParameterSetId)
+ {
+ EPRINT("pic_parameter_set_id");
+ return(HANTRO_NOK);
+ }
+
+ /* log2(maxFrameNum) -> num bits to represent frame_num */
+ i = 0;
+ while (pSeqParamSet->maxFrameNum >> i)
+ i++;
+ i--;
+
+ tmp = h264bsdGetBits(pStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (IS_IDR_NAL_UNIT(pNalUnit) && tmp != 0)
+ {
+ EPRINT("frame_num");
+ return(HANTRO_NOK);
+ }
+ pSliceHeader->frameNum = tmp;
+
+ if (IS_IDR_NAL_UNIT(pNalUnit))
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSliceHeader->idrPicId = value;
+ if (value > 65535)
+ {
+ EPRINT("idr_pic_id");
+ return(HANTRO_NOK);
+ }
+ }
+
+ if (pSeqParamSet->picOrderCntType == 0)
+ {
+ /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */
+ i = 0;
+ while (pSeqParamSet->maxPicOrderCntLsb >> i)
+ i++;
+ i--;
+
+ tmp = h264bsdGetBits(pStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSliceHeader->picOrderCntLsb = tmp;
+
+ if (pPicParamSet->picOrderPresentFlag)
+ {
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSliceHeader->deltaPicOrderCntBottom = itmp;
+ }
+
+ /* check that picOrderCnt for IDR picture will be zero. See
+ * DecodePicOrderCnt function to understand the logic here */
+ if ( IS_IDR_NAL_UNIT(pNalUnit) &&
+ ( (pSliceHeader->picOrderCntLsb >
+ pSeqParamSet->maxPicOrderCntLsb/2) ||
+ MIN((i32)pSliceHeader->picOrderCntLsb,
+ (i32)pSliceHeader->picOrderCntLsb +
+ pSliceHeader->deltaPicOrderCntBottom) != 0 ) )
+ {
+ return(HANTRO_NOK);
+ }
+ }
+
+ if ( (pSeqParamSet->picOrderCntType == 1) &&
+ !pSeqParamSet->deltaPicOrderAlwaysZeroFlag )
+ {
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSliceHeader->deltaPicOrderCnt[0] = itmp;
+
+ if (pPicParamSet->picOrderPresentFlag)
+ {
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSliceHeader->deltaPicOrderCnt[1] = itmp;
+ }
+
+ /* check that picOrderCnt for IDR picture will be zero. See
+ * DecodePicOrderCnt function to understand the logic here */
+ if ( IS_IDR_NAL_UNIT(pNalUnit) &&
+ MIN(pSliceHeader->deltaPicOrderCnt[0],
+ pSliceHeader->deltaPicOrderCnt[0] +
+ pSeqParamSet->offsetForTopToBottomField +
+ pSliceHeader->deltaPicOrderCnt[1]) != 0)
+ {
+ return(HANTRO_NOK);
+ }
+ }
+
+ if (pPicParamSet->redundantPicCntPresentFlag)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSliceHeader->redundantPicCnt = value;
+ if (value > 127)
+ {
+ EPRINT("redundant_pic_cnt");
+ return(HANTRO_NOK);
+ }
+ }
+
+ if (IS_P_SLICE(pSliceHeader->sliceType))
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSliceHeader->numRefIdxActiveOverrideFlag = tmp;
+
+ if (pSliceHeader->numRefIdxActiveOverrideFlag)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (value > 15)
+ {
+ EPRINT("num_ref_idx_l0_active_minus1");
+ return(HANTRO_NOK);
+ }
+ pSliceHeader->numRefIdxL0Active = value + 1;
+ }
+ /* set numRefIdxL0Active from pic param set */
+ else
+ {
+ /* if value (minus1) in picture parameter set exceeds 15 it should
+ * have been overridden here */
+ if (pPicParamSet->numRefIdxL0Active > 16)
+ {
+ EPRINT("num_ref_idx_active_override_flag");
+ return(HANTRO_NOK);
+ }
+ pSliceHeader->numRefIdxL0Active = pPicParamSet->numRefIdxL0Active;
+ }
+ }
+
+ if (IS_P_SLICE(pSliceHeader->sliceType))
+ {
+ tmp = RefPicListReordering(pStrmData,
+ &pSliceHeader->refPicListReordering,
+ pSliceHeader->numRefIdxL0Active,
+ pSeqParamSet->maxFrameNum);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ if (pNalUnit->nalRefIdc != 0)
+ {
+ tmp = DecRefPicMarking(pStrmData, &pSliceHeader->decRefPicMarking,
+ pNalUnit->nalUnitType, pSeqParamSet->numRefFrames);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ /* decode sliceQpDelta and check that initial QP for the slice will be on
+ * the range [0, 51] */
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSliceHeader->sliceQpDelta = itmp;
+ itmp += (i32)pPicParamSet->picInitQp;
+ if ( (itmp < 0) || (itmp > 51) )
+ {
+ EPRINT("slice_qp_delta");
+ return(HANTRO_NOK);
+ }
+
+ if (pPicParamSet->deblockingFilterControlPresentFlag)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pSliceHeader->disableDeblockingFilterIdc = value;
+ if (pSliceHeader->disableDeblockingFilterIdc > 2)
+ {
+ EPRINT("disable_deblocking_filter_idc");
+ return(HANTRO_NOK);
+ }
+
+ if (pSliceHeader->disableDeblockingFilterIdc != 1)
+ {
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if ( (itmp < -6) || (itmp > 6) )
+ {
+ EPRINT("slice_alpha_c0_offset_div2");
+ return(HANTRO_NOK);
+ }
+ pSliceHeader->sliceAlphaC0Offset = itmp * 2;
+
+ tmp = h264bsdDecodeExpGolombSigned(pStrmData, &itmp);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if ( (itmp < -6) || (itmp > 6) )
+ {
+ EPRINT("slice_beta_offset_div2");
+ return(HANTRO_NOK);
+ }
+ pSliceHeader->sliceBetaOffset = itmp * 2;
+ }
+ }
+
+ if ( (pPicParamSet->numSliceGroups > 1) &&
+ (pPicParamSet->sliceGroupMapType >= 3) &&
+ (pPicParamSet->sliceGroupMapType <= 5) )
+ {
+ /* set tmp to number of bits used to represent slice_group_change_cycle
+ * in the stream */
+ tmp = NumSliceGroupChangeCycleBits(picSizeInMbs,
+ pPicParamSet->sliceGroupChangeRate);
+ value = h264bsdGetBits(pStrmData, tmp);
+ if (value == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pSliceHeader->sliceGroupChangeCycle = value;
+
+ /* corresponds to tmp = Ceil(picSizeInMbs / sliceGroupChangeRate) */
+ tmp = (picSizeInMbs + pPicParamSet->sliceGroupChangeRate - 1) /
+ pPicParamSet->sliceGroupChangeRate;
+ if (pSliceHeader->sliceGroupChangeCycle > tmp)
+ {
+ EPRINT("slice_group_change_cycle");
+ return(HANTRO_NOK);
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: NumSliceGroupChangeCycleBits
+
+ Functional description:
+ Determine number of bits needed to represent
+ slice_group_change_cycle in the stream. The standard states that
+ slice_group_change_cycle is represented by
+ Ceil( Log2( (picSizeInMbs / sliceGroupChangeRate) + 1) )
+
+ bits. Division "/" in the equation is non-truncating division.
+
+ Inputs:
+ picSizeInMbs picture size in macroblocks
+ sliceGroupChangeRate
+
+ Outputs:
+ none
+
+ Returns:
+ number of bits needed
+
+------------------------------------------------------------------------------*/
+
+u32 NumSliceGroupChangeCycleBits(u32 picSizeInMbs, u32 sliceGroupChangeRate)
+{
+
+/* Variables */
+
+ u32 tmp,numBits,mask;
+
+/* Code */
+
+ ASSERT(picSizeInMbs);
+ ASSERT(sliceGroupChangeRate);
+ ASSERT(sliceGroupChangeRate <= picSizeInMbs);
+
+ /* compute (picSizeInMbs / sliceGroupChangeRate + 1), rounded up */
+ if (picSizeInMbs % sliceGroupChangeRate)
+ tmp = 2 + picSizeInMbs/sliceGroupChangeRate;
+ else
+ tmp = 1 + picSizeInMbs/sliceGroupChangeRate;
+
+ numBits = 0;
+ mask = ~0U;
+
+ /* set numBits to position of right-most non-zero bit */
+ while (tmp & (mask<<++numBits))
+ ;
+ numBits--;
+
+ /* add one more bit if value greater than 2^numBits */
+ if (tmp & ((1<<numBits)-1))
+ numBits++;
+
+ return(numBits);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: RefPicListReordering
+
+ Functional description:
+ Decode reference picture list reordering syntax elements from
+ the stream. Max number of reordering commands is numRefIdxActive.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ numRefIdxActive number of active reference indices to be used for
+ current slice
+ maxPicNum maxFrameNum from the active SPS
+
+ Outputs:
+ pRefPicListReordering decoded data is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 RefPicListReordering(strmData_t *pStrmData,
+ refPicListReordering_t *pRefPicListReordering, u32 numRefIdxActive,
+ u32 maxPicNum)
+{
+
+/* Variables */
+
+ u32 tmp, value, i;
+ u32 command;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pRefPicListReordering);
+ ASSERT(numRefIdxActive);
+ ASSERT(maxPicNum);
+
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ pRefPicListReordering->refPicListReorderingFlagL0 = tmp;
+
+ if (pRefPicListReordering->refPicListReorderingFlagL0)
+ {
+ i = 0;
+
+ do
+ {
+ if (i > numRefIdxActive)
+ {
+ EPRINT("Too many reordering commands");
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &command);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (command > 3)
+ {
+ EPRINT("reordering_of_pic_nums_idc");
+ return(HANTRO_NOK);
+ }
+
+ pRefPicListReordering->command[i].reorderingOfPicNumsIdc = command;
+
+ if ((command == 0) || (command == 1))
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (value >= maxPicNum)
+ {
+ EPRINT("abs_diff_pic_num_minus1");
+ return(HANTRO_NOK);
+ }
+ pRefPicListReordering->command[i].absDiffPicNum = value + 1;
+ }
+ else if (command == 2)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pRefPicListReordering->command[i].longTermPicNum = value;
+ }
+ i++;
+ } while (command != 3);
+
+ /* there shall be at least one reordering command if
+ * refPicListReorderingFlagL0 was set */
+ if (i == 1)
+ {
+ EPRINT("ref_pic_list_reordering");
+ return(HANTRO_NOK);
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecRefPicMarking
+
+ Functional description:
+ Decode decoded reference picture marking syntax elements from
+ the stream.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ nalUnitType type of the current NAL unit
+ numRefFrames max number of reference frames from the active SPS
+
+ Outputs:
+ pDecRefPicMarking decoded data is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 DecRefPicMarking(strmData_t *pStrmData,
+ decRefPicMarking_t *pDecRefPicMarking, nalUnitType_e nalUnitType,
+ u32 numRefFrames)
+{
+
+/* Variables */
+
+ u32 tmp, value;
+ u32 i;
+ u32 operation;
+ /* variables for error checking purposes, store number of memory
+ * management operations of certain type */
+ u32 num4 = 0, num5 = 0, num6 = 0, num1to3 = 0;
+
+/* Code */
+
+ ASSERT( nalUnitType == NAL_CODED_SLICE_IDR ||
+ nalUnitType == NAL_CODED_SLICE ||
+ nalUnitType == NAL_SEI );
+
+
+ if (nalUnitType == NAL_CODED_SLICE_IDR)
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pDecRefPicMarking->noOutputOfPriorPicsFlag = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pDecRefPicMarking->longTermReferenceFlag = tmp;
+ if (!numRefFrames && pDecRefPicMarking->longTermReferenceFlag)
+ {
+ EPRINT("long_term_reference_flag");
+ return(HANTRO_NOK);
+ }
+ }
+ else
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pDecRefPicMarking->adaptiveRefPicMarkingModeFlag = tmp;
+ if (pDecRefPicMarking->adaptiveRefPicMarkingModeFlag)
+ {
+ i = 0;
+ do
+ {
+ /* see explanation of the MAX_NUM_MMC_OPERATIONS in
+ * slice_header.h */
+ if (i > (2 * numRefFrames + 2))
+ {
+ EPRINT("Too many management operations");
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &operation);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (operation > 6)
+ {
+ EPRINT("memory_management_control_operation");
+ return(HANTRO_NOK);
+ }
+
+ pDecRefPicMarking->operation[i].
+ memoryManagementControlOperation = operation;
+ if ((operation == 1) || (operation == 3))
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pDecRefPicMarking->operation[i].differenceOfPicNums =
+ value + 1;
+ }
+ if (operation == 2)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pDecRefPicMarking->operation[i].longTermPicNum = value;
+ }
+ if ((operation == 3) || (operation == 6))
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ pDecRefPicMarking->operation[i].longTermFrameIdx =
+ value;
+ }
+ if (operation == 4)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ /* value shall be in range [0, numRefFrames] */
+ if (value > numRefFrames)
+ {
+ EPRINT("max_long_term_frame_idx_plus1");
+ return(HANTRO_NOK);
+ }
+ if (value == 0)
+ {
+ pDecRefPicMarking->operation[i].
+ maxLongTermFrameIdx =
+ NO_LONG_TERM_FRAME_INDICES;
+ }
+ else
+ {
+ pDecRefPicMarking->operation[i].
+ maxLongTermFrameIdx = value - 1;
+ }
+ num4++;
+ }
+ if (operation == 5)
+ {
+ num5++;
+ }
+ if (operation && operation <= 3)
+ num1to3++;
+ if (operation == 6)
+ num6++;
+
+ i++;
+ } while (operation != 0);
+
+ /* error checking */
+ if (num4 > 1 || num5 > 1 || num6 > 1 || (num1to3 && num5))
+ return(HANTRO_NOK);
+
+ }
+ }
+
+ return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function name: h264bsdCheckPpsId
+
+ Functional description:
+ Peek value of pic_parameter_set_id from the slice header. Function
+ does not modify current stream positions but copies the stream
+ data structure to tmp structure which is used while accessing
+ stream data.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ picParamSetId value is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckPpsId(strmData_t *pStrmData, u32 *picParamSetId)
+{
+
+/* Variables */
+
+ u32 tmp, value;
+ strmData_t tmpStrmData[1];
+
+/* Code */
+
+ ASSERT(pStrmData);
+
+ /* don't touch original stream position params */
+ *tmpStrmData = *pStrmData;
+
+ /* first_mb_in_slice */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* slice_type */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (value >= MAX_NUM_PIC_PARAM_SETS)
+ return(HANTRO_NOK);
+
+ *picParamSetId = value;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCheckFrameNum
+
+ Functional description:
+ Peek value of frame_num from the slice header. Function does not
+ modify current stream positions but copies the stream data
+ structure to tmp structure which is used while accessing stream
+ data.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ maxFrameNum
+
+ Outputs:
+ frameNum value is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckFrameNum(
+ strmData_t *pStrmData,
+ u32 maxFrameNum,
+ u32 *frameNum)
+{
+
+/* Variables */
+
+ u32 tmp, value, i;
+ strmData_t tmpStrmData[1];
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(maxFrameNum);
+ ASSERT(frameNum);
+
+ /* don't touch original stream position params */
+ *tmpStrmData = *pStrmData;
+
+ /* skip first_mb_in_slice */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip slice_type */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip pic_parameter_set_id */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* log2(maxFrameNum) -> num bits to represent frame_num */
+ i = 0;
+ while (maxFrameNum >> i)
+ i++;
+ i--;
+
+ /* frame_num */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ *frameNum = tmp;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCheckIdrPicId
+
+ Functional description:
+ Peek value of idr_pic_id from the slice header. Function does not
+ modify current stream positions but copies the stream data
+ structure to tmp structure which is used while accessing stream
+ data.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ maxFrameNum max frame number from active SPS
+ nalUnitType type of the current NAL unit
+
+ Outputs:
+ idrPicId value is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckIdrPicId(
+ strmData_t *pStrmData,
+ u32 maxFrameNum,
+ nalUnitType_e nalUnitType,
+ u32 *idrPicId)
+{
+
+/* Variables */
+
+ u32 tmp, value, i;
+ strmData_t tmpStrmData[1];
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(maxFrameNum);
+ ASSERT(idrPicId);
+
+ /* nalUnitType must be equal to 5 because otherwise idrPicId is not
+ * present */
+ if (nalUnitType != NAL_CODED_SLICE_IDR)
+ return(HANTRO_NOK);
+
+ /* don't touch original stream position params */
+ *tmpStrmData = *pStrmData;
+
+ /* skip first_mb_in_slice */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip slice_type */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip pic_parameter_set_id */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* log2(maxFrameNum) -> num bits to represent frame_num */
+ i = 0;
+ while (maxFrameNum >> i)
+ i++;
+ i--;
+
+ /* skip frame_num */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ /* idr_pic_id */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, idrPicId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCheckPicOrderCntLsb
+
+ Functional description:
+ Peek value of pic_order_cnt_lsb from the slice header. Function
+ does not modify current stream positions but copies the stream
+ data structure to tmp structure which is used while accessing
+ stream data.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ pSeqParamSet pointer to active SPS
+ nalUnitType type of the current NAL unit
+
+ Outputs:
+ picOrderCntLsb value is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckPicOrderCntLsb(
+ strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet,
+ nalUnitType_e nalUnitType,
+ u32 *picOrderCntLsb)
+{
+
+/* Variables */
+
+ u32 tmp, value, i;
+ strmData_t tmpStrmData[1];
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSeqParamSet);
+ ASSERT(picOrderCntLsb);
+
+ /* picOrderCntType must be equal to 0 */
+ ASSERT(pSeqParamSet->picOrderCntType == 0);
+ ASSERT(pSeqParamSet->maxFrameNum);
+ ASSERT(pSeqParamSet->maxPicOrderCntLsb);
+
+ /* don't touch original stream position params */
+ *tmpStrmData = *pStrmData;
+
+ /* skip first_mb_in_slice */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip slice_type */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip pic_parameter_set_id */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* log2(maxFrameNum) -> num bits to represent frame_num */
+ i = 0;
+ while (pSeqParamSet->maxFrameNum >> i)
+ i++;
+ i--;
+
+ /* skip frame_num */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ /* skip idr_pic_id when necessary */
+ if (nalUnitType == NAL_CODED_SLICE_IDR)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */
+ i = 0;
+ while (pSeqParamSet->maxPicOrderCntLsb >> i)
+ i++;
+ i--;
+
+ /* pic_order_cnt_lsb */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ *picOrderCntLsb = tmp;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCheckDeltaPicOrderCntBottom
+
+ Functional description:
+ Peek value of delta_pic_order_cnt_bottom from the slice header.
+ Function does not modify current stream positions but copies the
+ stream data structure to tmp structure which is used while
+ accessing stream data.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ pSeqParamSet pointer to active SPS
+ nalUnitType type of the current NAL unit
+
+ Outputs:
+ deltaPicOrderCntBottom value is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckDeltaPicOrderCntBottom(
+ strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet,
+ nalUnitType_e nalUnitType,
+ i32 *deltaPicOrderCntBottom)
+{
+
+/* Variables */
+
+ u32 tmp, value, i;
+ strmData_t tmpStrmData[1];
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSeqParamSet);
+ ASSERT(deltaPicOrderCntBottom);
+
+ /* picOrderCntType must be equal to 0 and picOrderPresentFlag must be TRUE
+ * */
+ ASSERT(pSeqParamSet->picOrderCntType == 0);
+ ASSERT(pSeqParamSet->maxFrameNum);
+ ASSERT(pSeqParamSet->maxPicOrderCntLsb);
+
+ /* don't touch original stream position params */
+ *tmpStrmData = *pStrmData;
+
+ /* skip first_mb_in_slice */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip slice_type */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip pic_parameter_set_id */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* log2(maxFrameNum) -> num bits to represent frame_num */
+ i = 0;
+ while (pSeqParamSet->maxFrameNum >> i)
+ i++;
+ i--;
+
+ /* skip frame_num */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ /* skip idr_pic_id when necessary */
+ if (nalUnitType == NAL_CODED_SLICE_IDR)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */
+ i = 0;
+ while (pSeqParamSet->maxPicOrderCntLsb >> i)
+ i++;
+ i--;
+
+ /* skip pic_order_cnt_lsb */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ /* delta_pic_order_cnt_bottom */
+ tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, deltaPicOrderCntBottom);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCheckDeltaPicOrderCnt
+
+ Functional description:
+ Peek values delta_pic_order_cnt[0] and delta_pic_order_cnt[1]
+ from the slice header. Function does not modify current stream
+ positions but copies the stream data structure to tmp structure
+ which is used while accessing stream data.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ pSeqParamSet pointer to active SPS
+ nalUnitType type of the current NAL unit
+ picOrderPresentFlag flag indicating if delta_pic_order_cnt[1]
+ is present in the stream
+
+ Outputs:
+ deltaPicOrderCnt values are stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckDeltaPicOrderCnt(
+ strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet,
+ nalUnitType_e nalUnitType,
+ u32 picOrderPresentFlag,
+ i32 *deltaPicOrderCnt)
+{
+
+/* Variables */
+
+ u32 tmp, value, i;
+ strmData_t tmpStrmData[1];
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSeqParamSet);
+ ASSERT(deltaPicOrderCnt);
+
+ /* picOrderCntType must be equal to 1 and deltaPicOrderAlwaysZeroFlag must
+ * be FALSE */
+ ASSERT(pSeqParamSet->picOrderCntType == 1);
+ ASSERT(!pSeqParamSet->deltaPicOrderAlwaysZeroFlag);
+ ASSERT(pSeqParamSet->maxFrameNum);
+
+ /* don't touch original stream position params */
+ *tmpStrmData = *pStrmData;
+
+ /* skip first_mb_in_slice */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip slice_type */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip pic_parameter_set_id */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* log2(maxFrameNum) -> num bits to represent frame_num */
+ i = 0;
+ while (pSeqParamSet->maxFrameNum >> i)
+ i++;
+ i--;
+
+ /* skip frame_num */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ /* skip idr_pic_id when necessary */
+ if (nalUnitType == NAL_CODED_SLICE_IDR)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ /* delta_pic_order_cnt[0] */
+ tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &deltaPicOrderCnt[0]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* delta_pic_order_cnt[1] if present */
+ if (picOrderPresentFlag)
+ {
+ tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &deltaPicOrderCnt[1]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCheckRedundantPicCnt
+
+ Functional description:
+ Peek value of redundant_pic_cnt from the slice header. Function
+ does not modify current stream positions but copies the stream
+ data structure to tmp structure which is used while accessing
+ stream data.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ pSeqParamSet pointer to active SPS
+ pPicParamSet pointer to active PPS
+ nalUnitType type of the current NAL unit
+
+ Outputs:
+ redundantPicCnt value is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckRedundantPicCnt(
+ strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet,
+ picParamSet_t *pPicParamSet,
+ nalUnitType_e nalUnitType,
+ u32 *redundantPicCnt)
+{
+
+/* Variables */
+
+ u32 tmp, value, i;
+ i32 ivalue;
+ strmData_t tmpStrmData[1];
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSeqParamSet);
+ ASSERT(pPicParamSet);
+ ASSERT(redundantPicCnt);
+
+ /* redundant_pic_cnt_flag must be TRUE */
+ ASSERT(pPicParamSet->redundantPicCntPresentFlag);
+ ASSERT(pSeqParamSet->maxFrameNum);
+ ASSERT(pSeqParamSet->picOrderCntType > 0 ||
+ pSeqParamSet->maxPicOrderCntLsb);
+
+ /* don't touch original stream position params */
+ *tmpStrmData = *pStrmData;
+
+ /* skip first_mb_in_slice */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip slice_type */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* skip pic_parameter_set_id */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* log2(maxFrameNum) -> num bits to represent frame_num */
+ i = 0;
+ while (pSeqParamSet->maxFrameNum >> i)
+ i++;
+ i--;
+
+ /* skip frame_num */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ /* skip idr_pic_id when necessary */
+ if (nalUnitType == NAL_CODED_SLICE_IDR)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+
+ if (pSeqParamSet->picOrderCntType == 0)
+ {
+ /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */
+ i = 0;
+ while (pSeqParamSet->maxPicOrderCntLsb >> i)
+ i++;
+ i--;
+
+ /* pic_order_cnt_lsb */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ if (pPicParamSet->picOrderPresentFlag)
+ {
+ /* skip delta_pic_order_cnt_bottom */
+ tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+ }
+
+ if (pSeqParamSet->picOrderCntType == 1 &&
+ !pSeqParamSet->deltaPicOrderAlwaysZeroFlag)
+ {
+ /* delta_pic_order_cnt[0] */
+ tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* delta_pic_order_cnt[1] if present */
+ if (pPicParamSet->picOrderPresentFlag)
+ {
+ tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+ }
+
+ /* redundant_pic_cnt */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, redundantPicCnt);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ return(HANTRO_OK);
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCheckPriorPicsFlag
+
+ Functional description:
+ Peek value of no_output_of_prior_pics_flag from the slice header.
+ Function does not modify current stream positions but copies
+ the stream data structure to tmp structure which is used while
+ accessing stream data.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ pSeqParamSet pointer to active SPS
+ pPicParamSet pointer to active PPS
+ nalUnitType type of the current NAL unit
+
+ Outputs:
+ noOutputOfPriorPicsFlag value is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+/*lint -e715 disable lint info nalUnitType not referenced */
+u32 h264bsdCheckPriorPicsFlag(u32 * noOutputOfPriorPicsFlag,
+ const strmData_t * pStrmData,
+ const seqParamSet_t * pSeqParamSet,
+ const picParamSet_t * pPicParamSet,
+ nalUnitType_e nalUnitType)
+{
+/* Variables */
+
+ u32 tmp, value, i;
+ i32 ivalue;
+ strmData_t tmpStrmData[1];
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pSeqParamSet);
+ ASSERT(pPicParamSet);
+ ASSERT(noOutputOfPriorPicsFlag);
+
+ /* must be IDR lsice */
+ ASSERT(nalUnitType == NAL_CODED_SLICE_IDR);
+
+ /* don't touch original stream position params */
+ *tmpStrmData = *pStrmData;
+
+ /* skip first_mb_in_slice */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if(tmp != HANTRO_OK)
+ return (tmp);
+
+ /* slice_type */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if(tmp != HANTRO_OK)
+ return (tmp);
+
+ /* skip pic_parameter_set_id */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if(tmp != HANTRO_OK)
+ return (tmp);
+
+ /* log2(maxFrameNum) -> num bits to represent frame_num */
+ i = 0;
+ while(pSeqParamSet->maxFrameNum >> i)
+ i++;
+ i--;
+
+ /* skip frame_num */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if(tmp == END_OF_STREAM)
+ return (HANTRO_NOK);
+
+ /* skip idr_pic_id */
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if(tmp != HANTRO_OK)
+ return (tmp);
+
+ if(pSeqParamSet->picOrderCntType == 0)
+ {
+ /* log2(maxPicOrderCntLsb) -> num bits to represent pic_order_cnt_lsb */
+ i = 0;
+ while(pSeqParamSet->maxPicOrderCntLsb >> i)
+ i++;
+ i--;
+
+ /* skip pic_order_cnt_lsb */
+ tmp = h264bsdGetBits(tmpStrmData, i);
+ if(tmp == END_OF_STREAM)
+ return (HANTRO_NOK);
+
+ if(pPicParamSet->picOrderPresentFlag)
+ {
+ /* skip delta_pic_order_cnt_bottom */
+ tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+ if(tmp != HANTRO_OK)
+ return (tmp);
+ }
+ }
+
+ if(pSeqParamSet->picOrderCntType == 1 &&
+ !pSeqParamSet->deltaPicOrderAlwaysZeroFlag)
+ {
+ /* skip delta_pic_order_cnt[0] */
+ tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+ if(tmp != HANTRO_OK)
+ return (tmp);
+
+ /* skip delta_pic_order_cnt[1] if present */
+ if(pPicParamSet->picOrderPresentFlag)
+ {
+ tmp = h264bsdDecodeExpGolombSigned(tmpStrmData, &ivalue);
+ if(tmp != HANTRO_OK)
+ return (tmp);
+ }
+ }
+
+ /* skip redundant_pic_cnt */
+ if(pPicParamSet->redundantPicCntPresentFlag)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(tmpStrmData, &value);
+ if(tmp != HANTRO_OK)
+ return (tmp);
+ }
+
+ *noOutputOfPriorPicsFlag = h264bsdGetBits(tmpStrmData, 1);
+ if(*noOutputOfPriorPicsFlag == END_OF_STREAM)
+ return (HANTRO_NOK);
+
+ return (HANTRO_OK);
+
+}
+/*lint +e715 */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.h
new file mode 100755
index 0000000..198898a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_slice_header.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_SLICE_HEADER_H
+#define H264SWDEC_SLICE_HEADER_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_cfg.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_pic_param_set.h"
+#include "h264bsd_nal_unit.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+enum {
+ P_SLICE = 0,
+ I_SLICE = 2
+};
+
+enum {NO_LONG_TERM_FRAME_INDICES = 0xFFFF};
+
+/* macro to determine if slice is an inter slice, sliceTypes 0 and 5 */
+#define IS_P_SLICE(sliceType) (((sliceType) == P_SLICE) || \
+ ((sliceType) == P_SLICE + 5))
+
+/* macro to determine if slice is an intra slice, sliceTypes 2 and 7 */
+#define IS_I_SLICE(sliceType) (((sliceType) == I_SLICE) || \
+ ((sliceType) == I_SLICE + 5))
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/* structure to store data of one reference picture list reordering operation */
+typedef struct
+{
+ u32 reorderingOfPicNumsIdc;
+ u32 absDiffPicNum;
+ u32 longTermPicNum;
+} refPicListReorderingOperation_t;
+
+/* structure to store reference picture list reordering operations */
+typedef struct
+{
+ u32 refPicListReorderingFlagL0;
+ refPicListReorderingOperation_t command[MAX_NUM_REF_PICS+1];
+} refPicListReordering_t;
+
+/* structure to store data of one DPB memory management control operation */
+typedef struct
+{
+ u32 memoryManagementControlOperation;
+ u32 differenceOfPicNums;
+ u32 longTermPicNum;
+ u32 longTermFrameIdx;
+ u32 maxLongTermFrameIdx;
+} memoryManagementOperation_t;
+
+/* worst case scenario: all MAX_NUM_REF_PICS pictures in the buffer are
+ * short term pictures, each one of them is first marked as long term
+ * reference picture which is then marked as unused for reference.
+ * Additionally, max long-term frame index is set and current picture is
+ * marked as long term reference picture. Last position reserved for
+ * end memory_management_control_operation command */
+#define MAX_NUM_MMC_OPERATIONS (2*MAX_NUM_REF_PICS+2+1)
+
+/* structure to store decoded reference picture marking data */
+typedef struct
+{
+ u32 noOutputOfPriorPicsFlag;
+ u32 longTermReferenceFlag;
+ u32 adaptiveRefPicMarkingModeFlag;
+ memoryManagementOperation_t operation[MAX_NUM_MMC_OPERATIONS];
+} decRefPicMarking_t;
+
+/* structure to store slice header data decoded from the stream */
+typedef struct
+{
+ u32 firstMbInSlice;
+ u32 sliceType;
+ u32 picParameterSetId;
+ u32 frameNum;
+ u32 idrPicId;
+ u32 picOrderCntLsb;
+ i32 deltaPicOrderCntBottom;
+ i32 deltaPicOrderCnt[2];
+ u32 redundantPicCnt;
+ u32 numRefIdxActiveOverrideFlag;
+ u32 numRefIdxL0Active;
+ i32 sliceQpDelta;
+ u32 disableDeblockingFilterIdc;
+ i32 sliceAlphaC0Offset;
+ i32 sliceBetaOffset;
+ u32 sliceGroupChangeCycle;
+ refPicListReordering_t refPicListReordering;
+ decRefPicMarking_t decRefPicMarking;
+} sliceHeader_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeSliceHeader(strmData_t *pStrmData,
+ sliceHeader_t *pSliceHeader,
+ seqParamSet_t *pSeqParamSet,
+ picParamSet_t *pPicParamSet,
+ nalUnit_t *pNalUnit);
+
+u32 h264bsdCheckPpsId(strmData_t *pStrmData, u32 *ppsId);
+
+u32 h264bsdCheckFrameNum(
+ strmData_t *pStrmData,
+ u32 maxFrameNum,
+ u32 *frameNum);
+
+u32 h264bsdCheckIdrPicId(
+ strmData_t *pStrmData,
+ u32 maxFrameNum,
+ nalUnitType_e nalUnitType,
+ u32 *idrPicId);
+
+u32 h264bsdCheckPicOrderCntLsb(
+ strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet,
+ nalUnitType_e nalUnitType,
+ u32 *picOrderCntLsb);
+
+u32 h264bsdCheckDeltaPicOrderCntBottom(
+ strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet,
+ nalUnitType_e nalUnitType,
+ i32 *deltaPicOrderCntBottom);
+
+u32 h264bsdCheckDeltaPicOrderCnt(
+ strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet,
+ nalUnitType_e nalUnitType,
+ u32 picOrderPresentFlag,
+ i32 *deltaPicOrderCnt);
+
+u32 h264bsdCheckRedundantPicCnt(
+ strmData_t *pStrmData,
+ seqParamSet_t *pSeqParamSet,
+ picParamSet_t *pPicParamSet,
+ nalUnitType_e nalUnitType,
+ u32 *redundantPicCnt);
+
+u32 h264bsdCheckPriorPicsFlag(u32 * noOutputOfPriorPicsFlag,
+ const strmData_t * pStrmData,
+ const seqParamSet_t * pSeqParamSet,
+ const picParamSet_t * pPicParamSet,
+ nalUnitType_e nalUnitType);
+
+#endif /* #ifdef H264SWDEC_SLICE_HEADER_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.c
new file mode 100755
index 0000000..3234754
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.c
@@ -0,0 +1,888 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdInitStorage
+ h264bsdStoreSeqParamSet
+ h264bsdStorePicParamSet
+ h264bsdActivateParamSets
+ h264bsdResetStorage
+ h264bsdIsStartOfPicture
+ h264bsdIsEndOfPicture
+ h264bsdComputeSliceGroupMap
+ h264bsdCheckAccessUnitBoundary
+ CheckPps
+ h264bsdValidParamSets
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_storage.h"
+#include "h264bsd_util.h"
+#include "h264bsd_neighbour.h"
+#include "h264bsd_slice_group_map.h"
+#include "h264bsd_dpb.h"
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_seq_param_set.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 CheckPps(picParamSet_t *pps, seqParamSet_t *sps);
+
+/*------------------------------------------------------------------------------
+
+ Function name: h264bsdInitStorage
+
+ Functional description:
+ Initialize storage structure. Sets contents of the storage to '0'
+ except for the active parameter set ids, which are initialized
+ to invalid values.
+
+ Inputs:
+
+ Outputs:
+ pStorage initialized data stored here
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdInitStorage(storage_t *pStorage)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ H264SwDecMemset(pStorage, 0, sizeof(storage_t));
+
+ pStorage->activeSpsId = MAX_NUM_SEQ_PARAM_SETS;
+ pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS;
+
+ pStorage->aub->firstCallFlag = HANTRO_TRUE;
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdStoreSeqParamSet
+
+ Functional description:
+ Store sequence parameter set into the storage. If active SPS is
+ overwritten -> check if contents changes and if it does, set
+ parameters to force reactivation of parameter sets
+
+ Inputs:
+ pStorage pointer to storage structure
+ pSeqParamSet pointer to param set to be stored
+
+ Outputs:
+ none
+
+ Returns:
+ HANTRO_OK success
+ MEMORY_ALLOCATION_ERROR failure in memory allocation
+
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdStoreSeqParamSet(storage_t *pStorage, seqParamSet_t *pSeqParamSet)
+{
+
+/* Variables */
+
+ u32 id;
+
+/* Code */
+
+ ASSERT(pStorage);
+ ASSERT(pSeqParamSet);
+ ASSERT(pSeqParamSet->seqParameterSetId < MAX_NUM_SEQ_PARAM_SETS);
+
+ id = pSeqParamSet->seqParameterSetId;
+
+ /* seq parameter set with id not used before -> allocate memory */
+ if (pStorage->sps[id] == NULL)
+ {
+ ALLOCATE(pStorage->sps[id], 1, seqParamSet_t);
+ if (pStorage->sps[id] == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+ }
+ /* sequence parameter set with id equal to id of active sps */
+ else if (id == pStorage->activeSpsId)
+ {
+ /* if seq parameter set contents changes
+ * -> overwrite and re-activate when next IDR picture decoded
+ * ids of active param sets set to invalid values to force
+ * re-activation. Memories allocated for old sps freed
+ * otherwise free memeries allocated for just decoded sps and
+ * continue */
+ if (h264bsdCompareSeqParamSets(pSeqParamSet, pStorage->activeSps) != 0)
+ {
+ FREE(pStorage->sps[id]->offsetForRefFrame);
+ FREE(pStorage->sps[id]->vuiParameters);
+ pStorage->activeSpsId = MAX_NUM_SEQ_PARAM_SETS + 1;
+ pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS + 1;
+ pStorage->activeSps = NULL;
+ pStorage->activePps = NULL;
+ }
+ else
+ {
+ FREE(pSeqParamSet->offsetForRefFrame);
+ FREE(pSeqParamSet->vuiParameters);
+ return(HANTRO_OK);
+ }
+ }
+ /* overwrite seq param set other than active one -> free memories
+ * allocated for old param set */
+ else
+ {
+ FREE(pStorage->sps[id]->offsetForRefFrame);
+ FREE(pStorage->sps[id]->vuiParameters);
+ }
+
+ *pStorage->sps[id] = *pSeqParamSet;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdStorePicParamSet
+
+ Functional description:
+ Store picture parameter set into the storage. If active PPS is
+ overwritten -> check if active SPS changes and if it does -> set
+ parameters to force reactivation of parameter sets
+
+ Inputs:
+ pStorage pointer to storage structure
+ pPicParamSet pointer to param set to be stored
+
+ Outputs:
+ none
+
+ Returns:
+ HANTRO_OK success
+ MEMORY_ALLOCATION_ERROR failure in memory allocation
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdStorePicParamSet(storage_t *pStorage, picParamSet_t *pPicParamSet)
+{
+
+/* Variables */
+
+ u32 id;
+
+/* Code */
+
+ ASSERT(pStorage);
+ ASSERT(pPicParamSet);
+ ASSERT(pPicParamSet->picParameterSetId < MAX_NUM_PIC_PARAM_SETS);
+ ASSERT(pPicParamSet->seqParameterSetId < MAX_NUM_SEQ_PARAM_SETS);
+
+ id = pPicParamSet->picParameterSetId;
+
+ /* pic parameter set with id not used before -> allocate memory */
+ if (pStorage->pps[id] == NULL)
+ {
+ ALLOCATE(pStorage->pps[id], 1, picParamSet_t);
+ if (pStorage->pps[id] == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+ }
+ /* picture parameter set with id equal to id of active pps */
+ else if (id == pStorage->activePpsId)
+ {
+ /* check whether seq param set changes, force re-activation of
+ * param set if it does. Set activeSpsId to invalid value to
+ * accomplish this */
+ if (pPicParamSet->seqParameterSetId != pStorage->activeSpsId)
+ {
+ pStorage->activePpsId = MAX_NUM_PIC_PARAM_SETS + 1;
+ }
+ /* free memories allocated for old param set */
+ FREE(pStorage->pps[id]->runLength);
+ FREE(pStorage->pps[id]->topLeft);
+ FREE(pStorage->pps[id]->bottomRight);
+ FREE(pStorage->pps[id]->sliceGroupId);
+ }
+ /* overwrite pic param set other than active one -> free memories
+ * allocated for old param set */
+ else
+ {
+ FREE(pStorage->pps[id]->runLength);
+ FREE(pStorage->pps[id]->topLeft);
+ FREE(pStorage->pps[id]->bottomRight);
+ FREE(pStorage->pps[id]->sliceGroupId);
+ }
+
+ *pStorage->pps[id] = *pPicParamSet;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdActivateParamSets
+
+ Functional description:
+ Activate certain SPS/PPS combination. This function shall be
+ called in the beginning of each picture. Picture parameter set
+ can be changed as wanted, but sequence parameter set may only be
+ changed when the starting picture is an IDR picture.
+
+ When new SPS is activated the function allocates memory for
+ macroblock storages and slice group map and (re-)initializes the
+ decoded picture buffer. If this is not the first activation the old
+ allocations are freed and FreeDpb called before new allocations.
+
+ Inputs:
+ pStorage pointer to storage data structure
+ ppsId identifies the PPS to be activated, SPS id obtained
+ from the PPS
+ isIdr flag to indicate if the picture is an IDR picture
+
+ Outputs:
+ none
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK non-existing or invalid param set combination,
+ trying to change SPS with non-IDR picture
+ MEMORY_ALLOCATION_ERROR failure in memory allocation
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdActivateParamSets(storage_t *pStorage, u32 ppsId, u32 isIdr)
+{
+
+/* Variables */
+
+ u32 tmp;
+ u32 flag;
+
+/* Code */
+
+ ASSERT(pStorage);
+ ASSERT(ppsId < MAX_NUM_PIC_PARAM_SETS);
+
+ /* check that pps and corresponding sps exist */
+ if ( (pStorage->pps[ppsId] == NULL) ||
+ (pStorage->sps[pStorage->pps[ppsId]->seqParameterSetId] == NULL) )
+ {
+ return(HANTRO_NOK);
+ }
+
+ /* check that pps parameters do not violate picture size constraints */
+ tmp = CheckPps(pStorage->pps[ppsId],
+ pStorage->sps[pStorage->pps[ppsId]->seqParameterSetId]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* first activation part1 */
+ if (pStorage->activePpsId == MAX_NUM_PIC_PARAM_SETS)
+ {
+ pStorage->activePpsId = ppsId;
+ pStorage->activePps = pStorage->pps[ppsId];
+ pStorage->activeSpsId = pStorage->activePps->seqParameterSetId;
+ pStorage->activeSps = pStorage->sps[pStorage->activeSpsId];
+ pStorage->picSizeInMbs =
+ pStorage->activeSps->picWidthInMbs *
+ pStorage->activeSps->picHeightInMbs;
+
+ pStorage->currImage->width = pStorage->activeSps->picWidthInMbs;
+ pStorage->currImage->height = pStorage->activeSps->picHeightInMbs;
+
+ pStorage->pendingActivation = HANTRO_TRUE;
+ }
+ /* first activation part2 */
+ else if (pStorage->pendingActivation)
+ {
+ pStorage->pendingActivation = HANTRO_FALSE;
+
+ FREE(pStorage->mb);
+ FREE(pStorage->sliceGroupMap);
+
+ ALLOCATE(pStorage->mb, pStorage->picSizeInMbs, mbStorage_t);
+ ALLOCATE(pStorage->sliceGroupMap, pStorage->picSizeInMbs, u32);
+ if (pStorage->mb == NULL || pStorage->sliceGroupMap == NULL)
+ return(MEMORY_ALLOCATION_ERROR);
+
+ H264SwDecMemset(pStorage->mb, 0,
+ pStorage->picSizeInMbs * sizeof(mbStorage_t));
+
+ h264bsdInitMbNeighbours(pStorage->mb,
+ pStorage->activeSps->picWidthInMbs,
+ pStorage->picSizeInMbs);
+
+ /* dpb output reordering disabled if
+ * 1) application set noReordering flag
+ * 2) POC type equal to 2
+ * 3) num_reorder_frames in vui equal to 0 */
+ if ( pStorage->noReordering ||
+ pStorage->activeSps->picOrderCntType == 2 ||
+ (pStorage->activeSps->vuiParametersPresentFlag &&
+ pStorage->activeSps->vuiParameters->bitstreamRestrictionFlag &&
+ !pStorage->activeSps->vuiParameters->numReorderFrames) )
+ flag = HANTRO_TRUE;
+ else
+ flag = HANTRO_FALSE;
+
+ tmp = h264bsdResetDpb(pStorage->dpb,
+ pStorage->activeSps->picWidthInMbs *
+ pStorage->activeSps->picHeightInMbs,
+ pStorage->activeSps->maxDpbSize,
+ pStorage->activeSps->numRefFrames,
+ pStorage->activeSps->maxFrameNum,
+ flag);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+ else if (ppsId != pStorage->activePpsId)
+ {
+ /* sequence parameter set shall not change but before an IDR picture */
+ if (pStorage->pps[ppsId]->seqParameterSetId != pStorage->activeSpsId)
+ {
+ DEBUG(("SEQ PARAM SET CHANGING...\n"));
+ if (isIdr)
+ {
+ pStorage->activePpsId = ppsId;
+ pStorage->activePps = pStorage->pps[ppsId];
+ pStorage->activeSpsId = pStorage->activePps->seqParameterSetId;
+ pStorage->activeSps = pStorage->sps[pStorage->activeSpsId];
+ pStorage->picSizeInMbs =
+ pStorage->activeSps->picWidthInMbs *
+ pStorage->activeSps->picHeightInMbs;
+
+ pStorage->currImage->width = pStorage->activeSps->picWidthInMbs;
+ pStorage->currImage->height =
+ pStorage->activeSps->picHeightInMbs;
+
+ pStorage->pendingActivation = HANTRO_TRUE;
+ }
+ else
+ {
+ DEBUG(("TRYING TO CHANGE SPS IN NON-IDR SLICE\n"));
+ return(HANTRO_NOK);
+ }
+ }
+ else
+ {
+ pStorage->activePpsId = ppsId;
+ pStorage->activePps = pStorage->pps[ppsId];
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdResetStorage
+
+ Functional description:
+ Reset contents of the storage. This should be called before
+ processing of new image is started.
+
+ Inputs:
+ pStorage pointer to storage structure
+
+ Outputs:
+ none
+
+ Returns:
+ none
+
+
+------------------------------------------------------------------------------*/
+
+void h264bsdResetStorage(storage_t *pStorage)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ pStorage->slice->numDecodedMbs = 0;
+ pStorage->slice->sliceId = 0;
+
+ for (i = 0; i < pStorage->picSizeInMbs; i++)
+ {
+ pStorage->mb[i].sliceId = 0;
+ pStorage->mb[i].decoded = 0;
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdIsStartOfPicture
+
+ Functional description:
+ Determine if the decoder is in the start of a picture. This
+ information is needed to decide if h264bsdActivateParamSets and
+ h264bsdCheckGapsInFrameNum functions should be called. Function
+ considers that new picture is starting if no slice headers
+ have been successfully decoded for the current access unit.
+
+ Inputs:
+ pStorage pointer to storage structure
+
+ Outputs:
+ none
+
+ Returns:
+ HANTRO_TRUE new picture is starting
+ HANTRO_FALSE not starting
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIsStartOfPicture(storage_t *pStorage)
+{
+
+/* Variables */
+
+
+/* Code */
+
+ if (pStorage->validSliceInAccessUnit == HANTRO_FALSE)
+ return(HANTRO_TRUE);
+ else
+ return(HANTRO_FALSE);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdIsEndOfPicture
+
+ Functional description:
+ Determine if the decoder is in the end of a picture. This
+ information is needed to determine when deblocking filtering
+ and reference picture marking processes should be performed.
+
+ If the decoder is processing primary slices the return value
+ is determined by checking the value of numDecodedMbs in the
+ storage. On the other hand, if the decoder is processing
+ redundant slices the numDecodedMbs may not contain valid
+ informationa and each macroblock has to be checked separately.
+
+ Inputs:
+ pStorage pointer to storage structure
+
+ Outputs:
+ none
+
+ Returns:
+ HANTRO_TRUE end of picture
+ HANTRO_FALSE noup
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIsEndOfPicture(storage_t *pStorage)
+{
+
+/* Variables */
+
+ u32 i, tmp;
+
+/* Code */
+
+ /* primary picture */
+ if (!pStorage->sliceHeader[0].redundantPicCnt)
+ {
+ if (pStorage->slice->numDecodedMbs == pStorage->picSizeInMbs)
+ return(HANTRO_TRUE);
+ }
+ else
+ {
+ for (i = 0, tmp = 0; i < pStorage->picSizeInMbs; i++)
+ tmp += pStorage->mb[i].decoded ? 1 : 0;
+
+ if (tmp == pStorage->picSizeInMbs)
+ return(HANTRO_TRUE);
+ }
+
+ return(HANTRO_FALSE);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdComputeSliceGroupMap
+
+ Functional description:
+ Compute slice group map. Just call h264bsdDecodeSliceGroupMap with
+ appropriate parameters.
+
+ Inputs:
+ pStorage pointer to storage structure
+ sliceGroupChangeCycle
+
+ Outputs:
+ none
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+void h264bsdComputeSliceGroupMap(storage_t *pStorage, u32 sliceGroupChangeCycle)
+{
+
+/* Variables */
+
+
+/* Code */
+
+ h264bsdDecodeSliceGroupMap(pStorage->sliceGroupMap,
+ pStorage->activePps, sliceGroupChangeCycle,
+ pStorage->activeSps->picWidthInMbs,
+ pStorage->activeSps->picHeightInMbs);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCheckAccessUnitBoundary
+
+ Functional description:
+ Check if next NAL unit starts a new access unit. Following
+ conditions specify start of a new access unit:
+
+ -NAL unit types 6-11, 13-18 (e.g. SPS, PPS)
+
+ following conditions checked only for slice NAL units, values
+ compared to ones obtained from previous slice:
+
+ -NAL unit type differs (slice / IDR slice)
+ -frame_num differs
+ -nal_ref_idc differs and one of the values is 0
+ -POC information differs
+ -both are IDR slices and idr_pic_id differs
+
+ Inputs:
+ strm pointer to stream data structure
+ nuNext pointer to NAL unit structure
+ storage pointer to storage structure
+
+ Outputs:
+ accessUnitBoundaryFlag the result is stored here, TRUE for
+ access unit boundary, FALSE otherwise
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure, invalid stream data
+ PARAM_SET_ERROR invalid param set usage
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdCheckAccessUnitBoundary(
+ strmData_t *strm,
+ nalUnit_t *nuNext,
+ storage_t *storage,
+ u32 *accessUnitBoundaryFlag)
+{
+
+/* Variables */
+
+ u32 tmp, ppsId, frameNum, idrPicId, picOrderCntLsb;
+ i32 deltaPicOrderCntBottom, deltaPicOrderCnt[2];
+ seqParamSet_t *sps;
+ picParamSet_t *pps;
+
+/* Code */
+
+ ASSERT(strm);
+ ASSERT(nuNext);
+ ASSERT(storage);
+ ASSERT(storage->sps);
+ ASSERT(storage->pps);
+
+ /* initialize default output to FALSE */
+ *accessUnitBoundaryFlag = HANTRO_FALSE;
+
+ if ( ( (nuNext->nalUnitType > 5) && (nuNext->nalUnitType < 12) ) ||
+ ( (nuNext->nalUnitType > 12) && (nuNext->nalUnitType <= 18) ) )
+ {
+ *accessUnitBoundaryFlag = HANTRO_TRUE;
+ return(HANTRO_OK);
+ }
+ else if ( nuNext->nalUnitType != NAL_CODED_SLICE &&
+ nuNext->nalUnitType != NAL_CODED_SLICE_IDR )
+ {
+ return(HANTRO_OK);
+ }
+
+ /* check if this is the very first call to this function */
+ if (storage->aub->firstCallFlag)
+ {
+ *accessUnitBoundaryFlag = HANTRO_TRUE;
+ storage->aub->firstCallFlag = HANTRO_FALSE;
+ }
+
+ /* get picture parameter set id */
+ tmp = h264bsdCheckPpsId(strm, &ppsId);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ /* store sps and pps in separate pointers just to make names shorter */
+ pps = storage->pps[ppsId];
+ if ( pps == NULL || storage->sps[pps->seqParameterSetId] == NULL ||
+ (storage->activeSpsId != MAX_NUM_SEQ_PARAM_SETS &&
+ pps->seqParameterSetId != storage->activeSpsId &&
+ nuNext->nalUnitType != NAL_CODED_SLICE_IDR) )
+ return(PARAM_SET_ERROR);
+ sps = storage->sps[pps->seqParameterSetId];
+
+ if (storage->aub->nuPrev->nalRefIdc != nuNext->nalRefIdc &&
+ (storage->aub->nuPrev->nalRefIdc == 0 || nuNext->nalRefIdc == 0))
+ *accessUnitBoundaryFlag = HANTRO_TRUE;
+
+ if ((storage->aub->nuPrev->nalUnitType == NAL_CODED_SLICE_IDR &&
+ nuNext->nalUnitType != NAL_CODED_SLICE_IDR) ||
+ (storage->aub->nuPrev->nalUnitType != NAL_CODED_SLICE_IDR &&
+ nuNext->nalUnitType == NAL_CODED_SLICE_IDR))
+ *accessUnitBoundaryFlag = HANTRO_TRUE;
+
+ tmp = h264bsdCheckFrameNum(strm, sps->maxFrameNum, &frameNum);
+ if (tmp != HANTRO_OK)
+ return(HANTRO_NOK);
+
+ if (storage->aub->prevFrameNum != frameNum)
+ {
+ storage->aub->prevFrameNum = frameNum;
+ *accessUnitBoundaryFlag = HANTRO_TRUE;
+ }
+
+ if (nuNext->nalUnitType == NAL_CODED_SLICE_IDR)
+ {
+ tmp = h264bsdCheckIdrPicId(strm, sps->maxFrameNum, nuNext->nalUnitType,
+ &idrPicId);
+ if (tmp != HANTRO_OK)
+ return(HANTRO_NOK);
+
+ if (storage->aub->nuPrev->nalUnitType == NAL_CODED_SLICE_IDR &&
+ storage->aub->prevIdrPicId != idrPicId)
+ *accessUnitBoundaryFlag = HANTRO_TRUE;
+
+ storage->aub->prevIdrPicId = idrPicId;
+ }
+
+ if (sps->picOrderCntType == 0)
+ {
+ tmp = h264bsdCheckPicOrderCntLsb(strm, sps, nuNext->nalUnitType,
+ &picOrderCntLsb);
+ if (tmp != HANTRO_OK)
+ return(HANTRO_NOK);
+
+ if (storage->aub->prevPicOrderCntLsb != picOrderCntLsb)
+ {
+ storage->aub->prevPicOrderCntLsb = picOrderCntLsb;
+ *accessUnitBoundaryFlag = HANTRO_TRUE;
+ }
+
+ if (pps->picOrderPresentFlag)
+ {
+ tmp = h264bsdCheckDeltaPicOrderCntBottom(strm, sps,
+ nuNext->nalUnitType, &deltaPicOrderCntBottom);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ if (storage->aub->prevDeltaPicOrderCntBottom !=
+ deltaPicOrderCntBottom)
+ {
+ storage->aub->prevDeltaPicOrderCntBottom =
+ deltaPicOrderCntBottom;
+ *accessUnitBoundaryFlag = HANTRO_TRUE;
+ }
+ }
+ }
+ else if (sps->picOrderCntType == 1 && !sps->deltaPicOrderAlwaysZeroFlag)
+ {
+ tmp = h264bsdCheckDeltaPicOrderCnt(strm, sps, nuNext->nalUnitType,
+ pps->picOrderPresentFlag, deltaPicOrderCnt);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ if (storage->aub->prevDeltaPicOrderCnt[0] != deltaPicOrderCnt[0])
+ {
+ storage->aub->prevDeltaPicOrderCnt[0] = deltaPicOrderCnt[0];
+ *accessUnitBoundaryFlag = HANTRO_TRUE;
+ }
+
+ if (pps->picOrderPresentFlag)
+ if (storage->aub->prevDeltaPicOrderCnt[1] != deltaPicOrderCnt[1])
+ {
+ storage->aub->prevDeltaPicOrderCnt[1] = deltaPicOrderCnt[1];
+ *accessUnitBoundaryFlag = HANTRO_TRUE;
+ }
+ }
+
+ *storage->aub->nuPrev = *nuNext;
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: CheckPps
+
+ Functional description:
+ Check picture parameter set. Contents of the picture parameter
+ set information that depends on the image dimensions is checked
+ against the dimensions in the sps.
+
+ Inputs:
+ pps pointer to picture paramter set
+ sps pointer to sequence parameter set
+
+ Outputs:
+ none
+
+ Returns:
+ HANTRO_OK everything ok
+ HANTRO_NOK invalid data in picture parameter set
+
+------------------------------------------------------------------------------*/
+u32 CheckPps(picParamSet_t *pps, seqParamSet_t *sps)
+{
+
+ u32 i;
+ u32 picSize;
+
+ picSize = sps->picWidthInMbs * sps->picHeightInMbs;
+
+ /* check slice group params */
+ if (pps->numSliceGroups > 1)
+ {
+ if (pps->sliceGroupMapType == 0)
+ {
+ ASSERT(pps->runLength);
+ for (i = 0; i < pps->numSliceGroups; i++)
+ {
+ if (pps->runLength[i] > picSize)
+ return(HANTRO_NOK);
+ }
+ }
+ else if (pps->sliceGroupMapType == 2)
+ {
+ ASSERT(pps->topLeft);
+ ASSERT(pps->bottomRight);
+ for (i = 0; i < pps->numSliceGroups-1; i++)
+ {
+ if (pps->topLeft[i] > pps->bottomRight[i] ||
+ pps->bottomRight[i] >= picSize)
+ return(HANTRO_NOK);
+
+ if ( (pps->topLeft[i] % sps->picWidthInMbs) >
+ (pps->bottomRight[i] % sps->picWidthInMbs) )
+ return(HANTRO_NOK);
+ }
+ }
+ else if (pps->sliceGroupMapType > 2 && pps->sliceGroupMapType < 6)
+ {
+ if (pps->sliceGroupChangeRate > picSize)
+ return(HANTRO_NOK);
+ }
+ else if (pps->sliceGroupMapType == 6 &&
+ pps->picSizeInMapUnits < picSize)
+ return(HANTRO_NOK);
+ }
+
+ return(HANTRO_OK);
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdValidParamSets
+
+ Functional description:
+ Check if any valid SPS/PPS combination exists in the storage.
+ Function tries each PPS in the buffer and checks if corresponding
+ SPS exists and calls CheckPps to determine if the PPS conforms
+ to image dimensions of the SPS.
+
+ Inputs:
+ pStorage pointer to storage structure
+
+ Outputs:
+ HANTRO_OK there is at least one valid combination
+ HANTRO_NOK no valid combinations found
+
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdValidParamSets(storage_t *pStorage)
+{
+
+/* Variables */
+
+ u32 i;
+
+/* Code */
+
+ ASSERT(pStorage);
+
+ for (i = 0; i < MAX_NUM_PIC_PARAM_SETS; i++)
+ {
+ if ( pStorage->pps[i] &&
+ pStorage->sps[pStorage->pps[i]->seqParameterSetId] &&
+ CheckPps(pStorage->pps[i],
+ pStorage->sps[pStorage->pps[i]->seqParameterSetId]) ==
+ HANTRO_OK)
+ {
+ return(HANTRO_OK);
+ }
+ }
+
+ return(HANTRO_NOK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.h
new file mode 100755
index 0000000..ba3b2da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_storage.h
@@ -0,0 +1,174 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_STORAGE_H
+#define H264SWDEC_STORAGE_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_cfg.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_pic_param_set.h"
+#include "h264bsd_macroblock_layer.h"
+#include "h264bsd_nal_unit.h"
+#include "h264bsd_slice_header.h"
+#include "h264bsd_seq_param_set.h"
+#include "h264bsd_dpb.h"
+#include "h264bsd_pic_order_cnt.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+ u32 sliceId;
+ u32 numDecodedMbs;
+ u32 lastMbAddr;
+} sliceStorage_t;
+
+/* structure to store parameters needed for access unit boundary checking */
+typedef struct
+{
+ nalUnit_t nuPrev[1];
+ u32 prevFrameNum;
+ u32 prevIdrPicId;
+ u32 prevPicOrderCntLsb;
+ i32 prevDeltaPicOrderCntBottom;
+ i32 prevDeltaPicOrderCnt[2];
+ u32 firstCallFlag;
+} aubCheck_t;
+
+/* storage data structure, holds all data of a decoder instance */
+typedef struct
+{
+ /* active paramet set ids and pointers */
+ u32 oldSpsId;
+ u32 activePpsId;
+ u32 activeSpsId;
+ picParamSet_t *activePps;
+ seqParamSet_t *activeSps;
+ seqParamSet_t *sps[MAX_NUM_SEQ_PARAM_SETS];
+ picParamSet_t *pps[MAX_NUM_PIC_PARAM_SETS];
+
+ /* current slice group map, recomputed for each slice */
+ u32 *sliceGroupMap;
+
+ u32 picSizeInMbs;
+
+ /* this flag is set after all macroblocks of a picture successfully
+ * decoded -> redundant slices not decoded */
+ u32 skipRedundantSlices;
+ u32 picStarted;
+
+ /* flag to indicate if current access unit contains any valid slices */
+ u32 validSliceInAccessUnit;
+
+ /* store information needed for handling of slice decoding */
+ sliceStorage_t slice[1];
+
+ /* number of concealed macroblocks in the current image */
+ u32 numConcealedMbs;
+
+ /* picId given by application */
+ u32 currentPicId;
+
+ /* macroblock specific storages, size determined by image dimensions */
+ mbStorage_t *mb;
+
+ /* flag to store noOutputReordering flag set by the application */
+ u32 noReordering;
+
+ /* DPB */
+ dpbStorage_t dpb[1];
+
+ /* structure to store picture order count related information */
+ pocStorage_t poc[1];
+
+ /* access unit boundary checking related data */
+ aubCheck_t aub[1];
+
+ /* current processed image */
+ image_t currImage[1];
+
+ /* last valid NAL unit header is stored here */
+ nalUnit_t prevNalUnit[1];
+
+ /* slice header, second structure used as a temporary storage while
+ * decoding slice header, first one stores last successfully decoded
+ * slice header */
+ sliceHeader_t sliceHeader[2];
+
+ /* fields to store old stream buffer pointers, needed when only part of
+ * a stream buffer is processed by h264bsdDecode function */
+ u32 prevBufNotFinished;
+ u8 *prevBufPointer;
+ u32 prevBytesConsumed;
+ strmData_t strm[1];
+
+ /* macroblock layer structure, there is no need to store this but it
+ * would have increased the stack size excessively and needed to be
+ * allocated from head -> easiest to put it here */
+ macroblockLayer_t *mbLayer;
+
+ u32 pendingActivation; /* Activate parameter sets after returning
+ HEADERS_RDY to the user */
+ u32 intraConcealmentFlag; /* 0 gray picture for corrupted intra
+ 1 previous frame used if available */
+} storage_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+void h264bsdInitStorage(storage_t *pStorage);
+void h264bsdResetStorage(storage_t *pStorage);
+u32 h264bsdIsStartOfPicture(storage_t *pStorage);
+u32 h264bsdIsEndOfPicture(storage_t *pStorage);
+u32 h264bsdStoreSeqParamSet(storage_t *pStorage, seqParamSet_t *pSeqParamSet);
+u32 h264bsdStorePicParamSet(storage_t *pStorage, picParamSet_t *pPicParamSet);
+u32 h264bsdActivateParamSets(storage_t *pStorage, u32 ppsId, u32 isIdr);
+void h264bsdComputeSliceGroupMap(storage_t *pStorage,
+ u32 sliceGroupChangeCycle);
+
+u32 h264bsdCheckAccessUnitBoundary(
+ strmData_t *strm,
+ nalUnit_t *nuNext,
+ storage_t *storage,
+ u32 *accessUnitBoundaryFlag);
+
+u32 h264bsdValidParamSets(storage_t *pStorage);
+
+#endif /* #ifdef H264SWDEC_STORAGE_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.c
new file mode 100755
index 0000000..20d1083
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.c
@@ -0,0 +1,242 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdGetBits
+ h264bsdShowBits32
+ h264bsdFlushBits
+ h264bsdIsByteAligned
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_util.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdGetBits
+
+ Functional description:
+ Read and remove bits from the stream buffer.
+
+ Input:
+ pStrmData pointer to stream data structure
+ numBits number of bits to read
+
+ Output:
+ none
+
+ Returns:
+ bits read from stream
+ END_OF_STREAM if not enough bits left
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdGetBits(strmData_t *pStrmData, u32 numBits)
+{
+
+ u32 out;
+
+ ASSERT(pStrmData);
+ ASSERT(numBits < 32);
+
+ out = h264bsdShowBits32(pStrmData) >> (32 - numBits);
+
+ if (h264bsdFlushBits(pStrmData, numBits) == HANTRO_OK)
+ {
+ return(out);
+ }
+ else
+ {
+ return(END_OF_STREAM);
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdShowBits32
+
+ Functional description:
+ Read 32 bits from the stream buffer. Buffer is left as it is, i.e.
+ no bits are removed. First bit read from the stream is the MSB of
+ the return value. If there is not enough bits in the buffer ->
+ bits beyong the end of the stream are set to '0' in the return
+ value.
+
+ Input:
+ pStrmData pointer to stream data structure
+
+ Output:
+ none
+
+ Returns:
+ bits read from stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdShowBits32(strmData_t *pStrmData)
+{
+
+ i32 bits, shift;
+ u32 out;
+ u8 *pStrm;
+
+ ASSERT(pStrmData);
+ ASSERT(pStrmData->pStrmCurrPos);
+ ASSERT(pStrmData->bitPosInWord < 8);
+ ASSERT(pStrmData->bitPosInWord ==
+ (pStrmData->strmBuffReadBits & 0x7));
+
+ pStrm = pStrmData->pStrmCurrPos;
+
+ /* number of bits left in the buffer */
+ bits = (i32)pStrmData->strmBuffSize*8 - (i32)pStrmData->strmBuffReadBits;
+
+ /* at least 32-bits in the buffer */
+ if (bits >= 32)
+ {
+ u32 bitPosInWord = pStrmData->bitPosInWord;
+ out = ((u32)pStrm[0] << 24) | ((u32)pStrm[1] << 16) |
+ ((u32)pStrm[2] << 8) | ((u32)pStrm[3]);
+
+ if (bitPosInWord)
+ {
+ u32 byte = (u32)pStrm[4];
+ u32 tmp = (8-bitPosInWord);
+ out <<= bitPosInWord;
+ out |= byte>>tmp;
+ }
+ return (out);
+ }
+ /* at least one bit in the buffer */
+ else if (bits > 0)
+ {
+ shift = (i32)(24 + pStrmData->bitPosInWord);
+ out = (u32)(*pStrm++) << shift;
+ bits -= (i32)(8 - pStrmData->bitPosInWord);
+ while (bits > 0)
+ {
+ shift -= 8;
+ out |= (u32)(*pStrm++) << shift;
+ bits -= 8;
+ }
+ return (out);
+ }
+ else
+ return (0);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdFlushBits
+
+ Functional description:
+ Remove bits from the stream buffer
+
+ Input:
+ pStrmData pointer to stream data structure
+ numBits number of bits to remove
+
+ Output:
+ none
+
+ Returns:
+ HANTRO_OK success
+ END_OF_STREAM not enough bits left
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_NEON
+u32 h264bsdFlushBits(strmData_t *pStrmData, u32 numBits)
+{
+
+ ASSERT(pStrmData);
+ ASSERT(pStrmData->pStrmBuffStart);
+ ASSERT(pStrmData->pStrmCurrPos);
+ ASSERT(pStrmData->bitPosInWord < 8);
+ ASSERT(pStrmData->bitPosInWord == (pStrmData->strmBuffReadBits & 0x7));
+
+ pStrmData->strmBuffReadBits += numBits;
+ pStrmData->bitPosInWord = pStrmData->strmBuffReadBits & 0x7;
+ if ( (pStrmData->strmBuffReadBits ) <= (8*pStrmData->strmBuffSize) )
+ {
+ pStrmData->pStrmCurrPos = pStrmData->pStrmBuffStart +
+ (pStrmData->strmBuffReadBits >> 3);
+ return(HANTRO_OK);
+ }
+ else
+ return(END_OF_STREAM);
+
+}
+#endif
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdIsByteAligned
+
+ Functional description:
+ Check if current stream position is byte aligned.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ none
+
+ Returns:
+ TRUE stream is byte aligned
+ FALSE stream is not byte aligned
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdIsByteAligned(strmData_t *pStrmData)
+{
+
+/* Variables */
+
+/* Code */
+
+ if (!pStrmData->bitPosInWord)
+ return(HANTRO_TRUE);
+ else
+ return(HANTRO_FALSE);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.h
new file mode 100755
index 0000000..4404b66
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_stream.h
@@ -0,0 +1,67 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_STREAM_H
+#define H264SWDEC_STREAM_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+typedef struct
+{
+ u8 *pStrmBuffStart; /* pointer to start of stream buffer */
+ u8 *pStrmCurrPos; /* current read address in stream buffer */
+ u32 bitPosInWord; /* bit position in stream buffer byte */
+ u32 strmBuffSize; /* size of stream buffer (bytes) */
+ u32 strmBuffReadBits; /* number of bits read from stream buffer */
+} strmData_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdGetBits(strmData_t *pStrmData, u32 numBits);
+
+u32 h264bsdShowBits32(strmData_t *pStrmData);
+
+u32 h264bsdFlushBits(strmData_t *pStrmData, u32 numBits);
+
+u32 h264bsdIsByteAligned(strmData_t *);
+
+#endif /* #ifdef H264SWDEC_STREAM_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.c
new file mode 100755
index 0000000..4eb6dd0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdProcessBlock
+ h264bsdProcessLumaDc
+ h264bsdProcessChromaDc
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_transform.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* Switch off the following Lint messages for this file:
+ * Info 701: Shift left of signed quantity (int)
+ * Info 702: Shift right of signed quantity (int)
+ */
+/*lint -e701 -e702 */
+
+/* LevelScale function */
+static const i32 levelScale[6][3] = {
+ {10,13,16}, {11,14,18}, {13,16,20}, {14,18,23}, {16,20,25}, {18,23,29}};
+
+/* qp % 6 as a function of qp */
+static const u8 qpMod6[52] = {0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,
+ 0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3,4,5,0,1,2,3};
+
+/* qp / 6 as a function of qp */
+static const u8 qpDiv6[52] = {0,0,0,0,0,0,1,1,1,1,1,1,2,2,2,2,2,2,3,3,3,3,3,3,
+ 4,4,4,4,4,4,5,5,5,5,5,5,6,6,6,6,6,6,7,7,7,7,7,7,8,8,8,8};
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdProcessBlock
+
+ Functional description:
+ Function performs inverse zig-zag scan, inverse scaling and
+ inverse transform for a luma or a chroma residual block
+
+ Inputs:
+ data pointer to data to be processed
+ qp quantization parameter
+ skip skip processing of data[0], set to non-zero value
+ if dc coeff hanled separately
+ coeffMap 16 lsb's indicate which coeffs are non-zero,
+ bit 0 (lsb) for coeff 0, bit 1 for coeff 1 etc.
+
+ Outputs:
+ data processed data
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK processed data not in valid range [-512, 511]
+
+------------------------------------------------------------------------------*/
+u32 h264bsdProcessBlock(i32 *data, u32 qp, u32 skip, u32 coeffMap)
+{
+
+/* Variables */
+
+ i32 tmp0, tmp1, tmp2, tmp3;
+ i32 d1, d2, d3;
+ u32 row,col;
+ u32 qpDiv;
+ i32 *ptr;
+
+/* Code */
+
+ qpDiv = qpDiv6[qp];
+ tmp1 = levelScale[qpMod6[qp]][0] << qpDiv;
+ tmp2 = levelScale[qpMod6[qp]][1] << qpDiv;
+ tmp3 = levelScale[qpMod6[qp]][2] << qpDiv;
+
+ if (!skip)
+ data[0] = (data[0] * tmp1);
+
+ /* at least one of the rows 1, 2 or 3 contain non-zero coeffs, mask takes
+ * the scanning order into account */
+ if (coeffMap & 0xFF9C)
+ {
+ /* do the zig-zag scan and inverse quantization */
+ d1 = data[1];
+ d2 = data[14];
+ d3 = data[15];
+ data[1] = (d1 * tmp2);
+ data[14] = (d2 * tmp2);
+ data[15] = (d3 * tmp3);
+
+ d1 = data[2];
+ d2 = data[5];
+ d3 = data[4];
+ data[4] = (d1 * tmp2);
+ data[2] = (d2 * tmp1);
+ data[5] = (d3 * tmp3);
+
+ d1 = data[8];
+ d2 = data[3];
+ d3 = data[6];
+ tmp0 = (d1 * tmp2);
+ data[8] = (d2 * tmp1);
+ data[3] = (d3 * tmp2);
+ d1 = data[7];
+ d2 = data[12];
+ d3 = data[9];
+ data[6] = (d1 * tmp2);
+ data[7] = (d2 * tmp3);
+ data[12] = (d3 * tmp2);
+ data[9] = tmp0;
+
+ d1 = data[10];
+ d2 = data[11];
+ d3 = data[13];
+ data[13] = (d1 * tmp3);
+ data[10] = (d2 * tmp1);
+ data[11] = (d3 * tmp2);
+
+ /* horizontal transform */
+ for (row = 4, ptr = data; row--; ptr += 4)
+ {
+ tmp0 = ptr[0] + ptr[2];
+ tmp1 = ptr[0] - ptr[2];
+ tmp2 = (ptr[1] >> 1) - ptr[3];
+ tmp3 = ptr[1] + (ptr[3] >> 1);
+ ptr[0] = tmp0 + tmp3;
+ ptr[1] = tmp1 + tmp2;
+ ptr[2] = tmp1 - tmp2;
+ ptr[3] = tmp0 - tmp3;
+ }
+
+ /*lint +e661 +e662*/
+ /* then vertical transform */
+ for (col = 4; col--; data++)
+ {
+ tmp0 = data[0] + data[8];
+ tmp1 = data[0] - data[8];
+ tmp2 = (data[4] >> 1) - data[12];
+ tmp3 = data[4] + (data[12] >> 1);
+ data[0 ] = (tmp0 + tmp3 + 32)>>6;
+ data[4 ] = (tmp1 + tmp2 + 32)>>6;
+ data[8 ] = (tmp1 - tmp2 + 32)>>6;
+ data[12] = (tmp0 - tmp3 + 32)>>6;
+ /* check that each value is in the range [-512,511] */
+ if (((u32)(data[0] + 512) > 1023) ||
+ ((u32)(data[4] + 512) > 1023) ||
+ ((u32)(data[8] + 512) > 1023) ||
+ ((u32)(data[12] + 512) > 1023) )
+ return(HANTRO_NOK);
+ }
+ }
+ else /* rows 1, 2 and 3 are zero */
+ {
+ /* only dc-coeff is non-zero, i.e. coeffs at original positions
+ * 1, 5 and 6 are zero */
+ if ((coeffMap & 0x62) == 0)
+ {
+ tmp0 = (data[0] + 32) >> 6;
+ /* check that value is in the range [-512,511] */
+ if ((u32)(tmp0 + 512) > 1023)
+ return(HANTRO_NOK);
+ data[0] = data[1] = data[2] = data[3] = data[4] = data[5] =
+ data[6] = data[7] = data[8] = data[9] = data[10] =
+ data[11] = data[12] = data[13] = data[14] = data[15] =
+ tmp0;
+ }
+ else /* at least one of the coeffs 1, 5 or 6 is non-zero */
+ {
+ data[1] = (data[1] * tmp2);
+ data[2] = (data[5] * tmp1);
+ data[3] = (data[6] * tmp2);
+ tmp0 = data[0] + data[2];
+ tmp1 = data[0] - data[2];
+ tmp2 = (data[1] >> 1) - data[3];
+ tmp3 = data[1] + (data[3] >> 1);
+ data[0] = (tmp0 + tmp3 + 32)>>6;
+ data[1] = (tmp1 + tmp2 + 32)>>6;
+ data[2] = (tmp1 - tmp2 + 32)>>6;
+ data[3] = (tmp0 - tmp3 + 32)>>6;
+ data[4] = data[8] = data[12] = data[0];
+ data[5] = data[9] = data[13] = data[1];
+ data[6] = data[10] = data[14] = data[2];
+ data[7] = data[11] = data[15] = data[3];
+ /* check that each value is in the range [-512,511] */
+ if (((u32)(data[0] + 512) > 1023) ||
+ ((u32)(data[1] + 512) > 1023) ||
+ ((u32)(data[2] + 512) > 1023) ||
+ ((u32)(data[3] + 512) > 1023) )
+ return(HANTRO_NOK);
+ }
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdProcessLumaDc
+
+ Functional description:
+ Function performs inverse zig-zag scan, inverse transform and
+ inverse scaling for a luma DC coefficients block
+
+ Inputs:
+ data pointer to data to be processed
+ qp quantization parameter
+
+ Outputs:
+ data processed data
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+void h264bsdProcessLumaDc(i32 *data, u32 qp)
+{
+
+/* Variables */
+
+ i32 tmp0, tmp1, tmp2, tmp3;
+ u32 row,col;
+ u32 qpMod, qpDiv;
+ i32 levScale;
+ i32 *ptr;
+
+/* Code */
+
+ qpMod = qpMod6[qp];
+ qpDiv = qpDiv6[qp];
+
+ /* zig-zag scan */
+ tmp0 = data[2];
+ data[2] = data[5];
+ data[5] = data[4];
+ data[4] = tmp0;
+
+ tmp0 = data[8];
+ data[8] = data[3];
+ data[3] = data[6];
+ data[6] = data[7];
+ data[7] = data[12];
+ data[12] = data[9];
+ data[9] = tmp0;
+
+ tmp0 = data[10];
+ data[10] = data[11];
+ data[11] = data[13];
+ data[13] = tmp0;
+
+ /* horizontal transform */
+ for (row = 4, ptr = data; row--; ptr += 4)
+ {
+ tmp0 = ptr[0] + ptr[2];
+ tmp1 = ptr[0] - ptr[2];
+ tmp2 = ptr[1] - ptr[3];
+ tmp3 = ptr[1] + ptr[3];
+ ptr[0] = tmp0 + tmp3;
+ ptr[1] = tmp1 + tmp2;
+ ptr[2] = tmp1 - tmp2;
+ ptr[3] = tmp0 - tmp3;
+ }
+
+ /*lint +e661 +e662*/
+ /* then vertical transform and inverse scaling */
+ levScale = levelScale[ qpMod ][0];
+ if (qp >= 12)
+ {
+ levScale <<= (qpDiv-2);
+ for (col = 4; col--; data++)
+ {
+ tmp0 = data[0] + data[8 ];
+ tmp1 = data[0] - data[8 ];
+ tmp2 = data[4] - data[12];
+ tmp3 = data[4] + data[12];
+ data[0 ] = ((tmp0 + tmp3)*levScale);
+ data[4 ] = ((tmp1 + tmp2)*levScale);
+ data[8 ] = ((tmp1 - tmp2)*levScale);
+ data[12] = ((tmp0 - tmp3)*levScale);
+ }
+ }
+ else
+ {
+ i32 tmp;
+ tmp = ((1 - qpDiv) == 0) ? 1 : 2;
+ for (col = 4; col--; data++)
+ {
+ tmp0 = data[0] + data[8 ];
+ tmp1 = data[0] - data[8 ];
+ tmp2 = data[4] - data[12];
+ tmp3 = data[4] + data[12];
+ data[0 ] = ((tmp0 + tmp3)*levScale+tmp) >> (2-qpDiv);
+ data[4 ] = ((tmp1 + tmp2)*levScale+tmp) >> (2-qpDiv);
+ data[8 ] = ((tmp1 - tmp2)*levScale+tmp) >> (2-qpDiv);
+ data[12] = ((tmp0 - tmp3)*levScale+tmp) >> (2-qpDiv);
+ }
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdProcessChromaDc
+
+ Functional description:
+ Function performs inverse transform and inverse scaling for a
+ chroma DC coefficients block
+
+ Inputs:
+ data pointer to data to be processed
+ qp quantization parameter
+
+ Outputs:
+ data processed data
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+void h264bsdProcessChromaDc(i32 *data, u32 qp)
+{
+
+/* Variables */
+
+ i32 tmp0, tmp1, tmp2, tmp3;
+ u32 qpDiv;
+ i32 levScale;
+ u32 levShift;
+
+/* Code */
+
+ qpDiv = qpDiv6[qp];
+ levScale = levelScale[ qpMod6[qp] ][0];
+
+ if (qp >= 6)
+ {
+ levScale <<= (qpDiv-1);
+ levShift = 0;
+ }
+ else
+ {
+ levShift = 1;
+ }
+
+ tmp0 = data[0] + data[2];
+ tmp1 = data[0] - data[2];
+ tmp2 = data[1] - data[3];
+ tmp3 = data[1] + data[3];
+ data[0] = ((tmp0 + tmp3) * levScale) >> levShift;
+ data[1] = ((tmp0 - tmp3) * levScale) >> levShift;
+ data[2] = ((tmp1 + tmp2) * levScale) >> levShift;
+ data[3] = ((tmp1 - tmp2) * levScale) >> levShift;
+ tmp0 = data[4] + data[6];
+ tmp1 = data[4] - data[6];
+ tmp2 = data[5] - data[7];
+ tmp3 = data[5] + data[7];
+ data[4] = ((tmp0 + tmp3) * levScale) >> levShift;
+ data[5] = ((tmp0 - tmp3) * levScale) >> levShift;
+ data[6] = ((tmp1 + tmp2) * levScale) >> levShift;
+ data[7] = ((tmp1 - tmp2) * levScale) >> levShift;
+
+}
+
+/*lint +e701 +e702 */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.h
new file mode 100755
index 0000000..4f41a23
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_transform.h
@@ -0,0 +1,54 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_TRANSFORM_H
+#define H264SWDEC_TRANSFORM_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdProcessBlock(i32 *data, u32 qp, u32 skip, u32 coeffMap);
+void h264bsdProcessLumaDc(i32 *data, u32 qp);
+void h264bsdProcessChromaDc(i32 *data, u32 qp);
+
+#endif /* #ifdef H264SWDEC_TRANSFORM_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.c
new file mode 100755
index 0000000..53b2fd8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdCountLeadingZeros
+ h264bsdRbspTrailingBits
+ h264bsdMoreRbspData
+ h264bsdNextMbAddress
+ h264bsdSetCurrImageMbPointers
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* look-up table for expected values of stuffing bits */
+static const u32 stuffingTable[8] = {0x1,0x2,0x4,0x8,0x10,0x20,0x40,0x80};
+
+/* look-up table for chroma quantization parameter as a function of luma QP */
+const u32 h264bsdQpC[52] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,
+ 20,21,22,23,24,25,26,27,28,29,29,30,31,32,32,33,34,34,35,35,36,36,37,37,37,
+ 38,38,38,39,39,39,39};
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+ 5.1 Function: h264bsdCountLeadingZeros
+
+ Functional description:
+ Count leading zeros in a code word. Code word is assumed to be
+ right-aligned, last bit of the code word in the lsb of the value.
+
+ Inputs:
+ value code word
+ length number of bits in the code word
+
+ Outputs:
+ none
+
+ Returns:
+ number of leading zeros in the code word
+
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_NEON
+u32 h264bsdCountLeadingZeros(u32 value, u32 length)
+{
+
+/* Variables */
+
+ u32 zeros = 0;
+ u32 mask = 1 << (length - 1);
+
+/* Code */
+
+ ASSERT(length <= 32);
+
+ while (mask && !(value & mask))
+ {
+ zeros++;
+ mask >>= 1;
+ }
+ return(zeros);
+
+}
+#endif
+/*------------------------------------------------------------------------------
+
+ 5.2 Function: h264bsdRbspTrailingBits
+
+ Functional description:
+ Check Raw Byte Stream Payload (RBSP) trailing bits, i.e. stuffing.
+ Rest of the current byte (whole byte if allready byte aligned)
+ in the stream buffer shall contain a '1' bit followed by zero or
+ more '0' bits.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ none
+
+ Returns:
+ HANTRO_OK RBSP trailing bits found
+ HANTRO_NOK otherwise
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdRbspTrailingBits(strmData_t *pStrmData)
+{
+
+/* Variables */
+
+ u32 stuffing;
+ u32 stuffingLength;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pStrmData->bitPosInWord < 8);
+
+ stuffingLength = 8 - pStrmData->bitPosInWord;
+
+ stuffing = h264bsdGetBits(pStrmData, stuffingLength);
+ if (stuffing == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ if (stuffing != stuffingTable[stuffingLength - 1])
+ return(HANTRO_NOK);
+ else
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ 5.3 Function: h264bsdMoreRbspData
+
+ Functional description:
+ Check if there is more data in the current RBSP. The standard
+ defines this function so that there is more data if
+ -more than 8 bits left or
+ -last bits are not RBSP trailing bits
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ none
+
+ Returns:
+ HANTRO_TRUE there is more data
+ HANTRO_FALSE no more data
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdMoreRbspData(strmData_t *pStrmData)
+{
+
+/* Variables */
+
+ u32 bits;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pStrmData->strmBuffReadBits <= 8 * pStrmData->strmBuffSize);
+
+ bits = pStrmData->strmBuffSize * 8 - pStrmData->strmBuffReadBits;
+
+ if (bits == 0)
+ return(HANTRO_FALSE);
+
+ if ( (bits > 8) ||
+ ((h264bsdShowBits32(pStrmData)>>(32-bits)) != (1 << (bits-1))) )
+ return(HANTRO_TRUE);
+ else
+ return(HANTRO_FALSE);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ 5.4 Function: h264bsdNextMbAddress
+
+ Functional description:
+ Get address of the next macroblock in the current slice group.
+
+ Inputs:
+ pSliceGroupMap slice group for each macroblock
+ picSizeInMbs size of the picture
+ currMbAddr where to start
+
+ Outputs:
+ none
+
+ Returns:
+ address of the next macroblock
+ 0 if none of the following macroblocks belong to same slice
+ group as currMbAddr
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdNextMbAddress(u32 *pSliceGroupMap, u32 picSizeInMbs, u32 currMbAddr)
+{
+
+/* Variables */
+
+ u32 i, sliceGroup, tmp;
+
+/* Code */
+
+ ASSERT(pSliceGroupMap);
+ ASSERT(picSizeInMbs);
+ ASSERT(currMbAddr < picSizeInMbs);
+
+ sliceGroup = pSliceGroupMap[currMbAddr];
+
+ i = currMbAddr + 1;
+ tmp = pSliceGroupMap[i];
+ while ((i < picSizeInMbs) && (tmp != sliceGroup))
+ {
+ i++;
+ tmp = pSliceGroupMap[i];
+ }
+
+ if (i == picSizeInMbs)
+ i = 0;
+
+ return(i);
+
+}
+
+
+/*------------------------------------------------------------------------------
+
+ 5.5 Function: h264bsdSetCurrImageMbPointers
+
+ Functional description:
+ Set luma and chroma pointers in image_t for current MB
+
+ Inputs:
+ image Current image
+ mbNum number of current MB
+
+ Outputs:
+ none
+
+ Returns:
+ none
+------------------------------------------------------------------------------*/
+void h264bsdSetCurrImageMbPointers(image_t *image, u32 mbNum)
+{
+ u32 width, height;
+ u32 picSize;
+ u32 row, col;
+ u32 tmp;
+
+ width = image->width;
+ height = image->height;
+ row = mbNum / width;
+ col = mbNum % width;
+
+ tmp = row * width;
+ picSize = width * height;
+
+ image->luma = (u8*)(image->data + col * 16 + tmp * 256);
+ image->cb = (u8*)(image->data + picSize * 256 + tmp * 64 + col * 8);
+ image->cr = (u8*)(image->cb + picSize * 64);
+}
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.h
new file mode 100755
index 0000000..cb3adda
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_util.h
@@ -0,0 +1,178 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_UTIL_H
+#define H264SWDEC_UTIL_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#ifdef _ASSERT_USED
+#include <assert.h>
+#endif
+
+#include "H264SwDecApi.h"
+
+#if defined(_RANGE_CHECK) || defined(_DEBUG_PRINT) || defined(_ERROR_PRINT)
+#include <stdio.h>
+#endif
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_image.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+#define HANTRO_OK 0
+#define HANTRO_NOK 1
+
+#define HANTRO_TRUE (1)
+#define HANTRO_FALSE (0)
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#define MEMORY_ALLOCATION_ERROR 0xFFFF
+#define PARAM_SET_ERROR 0xFFF0
+
+/* value to be returned by GetBits if stream buffer is empty */
+#define END_OF_STREAM 0xFFFFFFFFU
+
+#define EMPTY_RESIDUAL_INDICATOR 0xFFFFFF
+
+/* macro to mark a residual block empty, i.e. contain zero coefficients */
+#define MARK_RESIDUAL_EMPTY(residual) ((residual)[0] = EMPTY_RESIDUAL_INDICATOR)
+/* macro to check if residual block is empty */
+#define IS_RESIDUAL_EMPTY(residual) ((residual)[0] == EMPTY_RESIDUAL_INDICATOR)
+
+/* macro for assertion, used only if compiler flag _ASSERT_USED is defined */
+#ifdef _ASSERT_USED
+#define ASSERT(expr) assert(expr)
+#else
+#define ASSERT(expr)
+#endif
+
+/* macro for range checking an value, used only if compiler flag _RANGE_CHECK
+ * is defined */
+#ifdef _RANGE_CHECK
+#define RANGE_CHECK(value, minBound, maxBound) \
+{ \
+ if ((value) < (minBound) || (value) > (maxBound)) \
+ fprintf(stderr, "Warning: Value exceeds given limit(s)!\n"); \
+}
+#else
+#define RANGE_CHECK(value, minBound, maxBound)
+#endif
+
+/* macro for range checking an array, used only if compiler flag _RANGE_CHECK
+ * is defined */
+#ifdef _RANGE_CHECK
+#define RANGE_CHECK_ARRAY(array, minBound, maxBound, length) \
+{ \
+ i32 i; \
+ for (i = 0; i < (length); i++) \
+ if ((array)[i] < (minBound) || (array)[i] > (maxBound)) \
+ fprintf(stderr,"Warning: Value [%d] exceeds given limit(s)!\n",i); \
+}
+#else
+#define RANGE_CHECK_ARRAY(array, minBound, maxBound, length)
+#endif
+
+/* macro for debug printing, used only if compiler flag _DEBUG_PRINT is
+ * defined */
+#ifdef _DEBUG_PRINT
+#define DEBUG(args) printf args
+#else
+#define DEBUG(args)
+#endif
+
+/* macro for error printing, used only if compiler flag _ERROR_PRINT is
+ * defined */
+#ifdef _ERROR_PRINT
+#define EPRINT(msg) fprintf(stderr,"ERROR: %s\n",msg)
+#else
+#define EPRINT(msg)
+#endif
+
+/* macro to get smaller of two values */
+#define MIN(a, b) (((a) < (b)) ? (a) : (b))
+
+/* macro to get greater of two values */
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+/* macro to get absolute value */
+#define ABS(a) (((a) < 0) ? -(a) : (a))
+
+/* macro to clip a value z, so that x <= z =< y */
+#define CLIP3(x,y,z) (((z) < (x)) ? (x) : (((z) > (y)) ? (y) : (z)))
+
+/* macro to clip a value z, so that 0 <= z =< 255 */
+#define CLIP1(z) (((z) < 0) ? 0 : (((z) > 255) ? 255 : (z)))
+
+/* macro to allocate memory */
+#define ALLOCATE(ptr, count, type) \
+{ \
+ (ptr) = H264SwDecMalloc((count) * sizeof(type)); \
+}
+
+/* macro to free allocated memory */
+#define FREE(ptr) \
+{ \
+ H264SwDecFree((ptr)); (ptr) = NULL; \
+}
+
+#define ALIGN(ptr, bytePos) \
+ (ptr + ( ((bytePos - (int)ptr) & (bytePos - 1)) / sizeof(*ptr) ))
+
+extern const u32 h264bsdQpC[52];
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+#ifndef H264DEC_NEON
+u32 h264bsdCountLeadingZeros(u32 value, u32 length);
+#else
+u32 h264bsdCountLeadingZeros(u32 value);
+#endif
+u32 h264bsdRbspTrailingBits(strmData_t *strmData);
+
+u32 h264bsdMoreRbspData(strmData_t *strmData);
+
+u32 h264bsdNextMbAddress(u32 *pSliceGroupMap, u32 picSizeInMbs, u32 currMbAddr);
+
+void h264bsdSetCurrImageMbPointers(image_t *image, u32 mbNum);
+
+#endif /* #ifdef H264SWDEC_UTIL_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.c
new file mode 100755
index 0000000..060f35e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.c
@@ -0,0 +1,391 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdDecodeExpGolombUnsigned
+ h264bsdDecodeExpGolombSigned
+ h264bsdDecodeExpGolombMapped
+ h264bsdDecodeExpGolombTruncated
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_vlc.h"
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+/* definition of special code num, this along with the return value is used
+ * to handle code num in the range [0, 2^32] in the DecodeExpGolombUnsigned
+ * function */
+#define BIG_CODE_NUM 0xFFFFFFFFU
+
+/* Mapping tables for coded_block_pattern, used for decoding of mapped
+ * Exp-Golomb codes */
+static const u8 codedBlockPatternIntra4x4[48] = {
+ 47,31,15,0,23,27,29,30,7,11,13,14,39,43,45,46,16,3,5,10,12,19,21,26,28,35,
+ 37,42,44,1,2,4,8,17,18,20,24,6,9,22,25,32,33,34,36,40,38,41};
+
+static const u8 codedBlockPatternInter[48] = {
+ 0,16,1,2,4,8,32,3,5,10,12,15,47,7,11,13,14,6,9,31,35,37,42,44,33,34,36,40,
+ 39,43,45,46,17,18,20,24,19,21,26,28,23,27,29,30,22,25,38,41};
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+
+ 5.1 Function: h264bsdDecodeExpGolombUnsigned
+
+ Functional description:
+ Decode unsigned Exp-Golomb code. This is the same as codeNum used
+ in other Exp-Golomb code mappings. Code num (i.e. the decoded
+ symbol) is determined as
+
+ codeNum = 2^leadingZeros - 1 + GetBits(leadingZeros)
+
+ Normal decoded symbols are in the range [0, 2^32 - 2]. Symbol
+ 2^32-1 is indicated by BIG_CODE_NUM with return value HANTRO_OK
+ while symbol 2^32 is indicated by BIG_CODE_NUM with return value
+ HANTRO_NOK. These two symbols are special cases with code length
+ of 65, i.e. 32 '0' bits, a '1' bit, and either 0 or 1 represented
+ by 32 bits.
+
+ Symbol 2^32 is out of unsigned 32-bit range but is needed for
+ DecodeExpGolombSigned to express value -2^31.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ codeNum decoded code word is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure, no valid code word found, note exception
+ with BIG_CODE_NUM
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeExpGolombUnsigned(strmData_t *pStrmData, u32 *codeNum)
+{
+
+/* Variables */
+
+ u32 bits, numZeros;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(codeNum);
+
+ bits = h264bsdShowBits32(pStrmData);
+
+ /* first bit is 1 -> code length 1 */
+ if (bits >= 0x80000000)
+ {
+ h264bsdFlushBits(pStrmData, 1);
+ *codeNum = 0;
+ return(HANTRO_OK);
+ }
+ /* second bit is 1 -> code length 3 */
+ else if (bits >= 0x40000000)
+ {
+ if (h264bsdFlushBits(pStrmData, 3) == END_OF_STREAM)
+ return(HANTRO_NOK);
+ *codeNum = 1 + ((bits >> 29) & 0x1);
+ return(HANTRO_OK);
+ }
+ /* third bit is 1 -> code length 5 */
+ else if (bits >= 0x20000000)
+ {
+ if (h264bsdFlushBits(pStrmData, 5) == END_OF_STREAM)
+ return(HANTRO_NOK);
+ *codeNum = 3 + ((bits >> 27) & 0x3);
+ return(HANTRO_OK);
+ }
+ /* fourth bit is 1 -> code length 7 */
+ else if (bits >= 0x10000000)
+ {
+ if (h264bsdFlushBits(pStrmData, 7) == END_OF_STREAM)
+ return(HANTRO_NOK);
+ *codeNum = 7 + ((bits >> 25) & 0x7);
+ return(HANTRO_OK);
+ }
+ /* other code lengths */
+ else
+ {
+#ifndef H264DEC_NEON
+ numZeros = 4 + h264bsdCountLeadingZeros(bits, 28);
+#else
+ numZeros = h264bsdCountLeadingZeros(bits);
+#endif
+ /* all 32 bits are zero */
+ if (numZeros == 32)
+ {
+ *codeNum = 0;
+ h264bsdFlushBits(pStrmData,32);
+ bits = h264bsdGetBits(pStrmData, 1);
+ /* check 33rd bit, must be 1 */
+ if (bits == 1)
+ {
+ /* cannot use h264bsdGetBits, limited to 31 bits */
+ bits = h264bsdShowBits32(pStrmData);
+ if (h264bsdFlushBits(pStrmData, 32) == END_OF_STREAM)
+ return(HANTRO_NOK);
+ /* code num 2^32 - 1, needed for unsigned mapping */
+ if (bits == 0)
+ {
+ *codeNum = BIG_CODE_NUM;
+ return(HANTRO_OK);
+ }
+ /* code num 2^32, needed for unsigned mapping
+ * (results in -2^31) */
+ else if (bits == 1)
+ {
+ *codeNum = BIG_CODE_NUM;
+ return(HANTRO_NOK);
+ }
+ }
+ /* if more zeros than 32, it is an error */
+ return(HANTRO_NOK);
+ }
+ else
+ h264bsdFlushBits(pStrmData,numZeros+1);
+
+ bits = h264bsdGetBits(pStrmData, numZeros);
+ if (bits == END_OF_STREAM)
+ return(HANTRO_NOK);
+
+ *codeNum = (1 << numZeros) - 1 + bits;
+
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ 5.2 Function: h264bsdDecodeExpGolombSigned
+
+ Functional description:
+ Decode signed Exp-Golomb code. Code num is determined by
+ h264bsdDecodeExpGolombUnsigned and then mapped to signed
+ representation as
+
+ symbol = (-1)^(codeNum+1) * (codeNum+1)/2
+
+ Signed symbols shall be in the range [-2^31, 2^31 - 1]. Symbol
+ -2^31 is obtained when codeNum is 2^32, which cannot be expressed
+ by unsigned 32-bit value. This is signaled as a special case from
+ the h264bsdDecodeExpGolombUnsigned by setting codeNum to
+ BIG_CODE_NUM and returning HANTRO_NOK status.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ value decoded code word is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure, no valid code word found
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeExpGolombSigned(strmData_t *pStrmData, i32 *value)
+{
+
+/* Variables */
+
+ u32 status, codeNum = 0;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(value);
+
+ status = h264bsdDecodeExpGolombUnsigned(pStrmData, &codeNum);
+
+ if (codeNum == BIG_CODE_NUM)
+ {
+ /* BIG_CODE_NUM and HANTRO_OK status means codeNum 2^32-1 which would
+ * result in signed integer valued 2^31 (i.e. out of 32-bit signed
+ * integer range) */
+ if (status == HANTRO_OK)
+ return(HANTRO_NOK);
+ /* BIG_CODE_NUM and HANTRO_NOK status means codeNum 2^32 which results
+ * in signed integer valued -2^31 */
+ else
+ {
+ *value = (i32)(2147483648U);
+ return (HANTRO_OK);
+ }
+ }
+ else if (status == HANTRO_OK)
+ {
+ /* (-1)^(codeNum+1) results in positive sign if codeNum is odd,
+ * negative when it is even. (codeNum+1)/2 is obtained as
+ * (codeNum+1)>>1 when value is positive and as (-codeNum)>>1 for
+ * negative value */
+ /*lint -e702 */
+ *value = (codeNum & 0x1) ? (i32)((codeNum + 1) >> 1) :
+ -(i32)((codeNum + 1) >> 1);
+ /*lint +e702 */
+ return(HANTRO_OK);
+ }
+
+ return(HANTRO_NOK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ 5.3 Function: h264bsdDecodeExpGolombMapped
+
+ Functional description:
+ Decode mapped Exp-Golomb code. Code num is determined by
+ h264bsdDecodeExpGolombUnsigned and then mapped to codedBlockPattern
+ either for intra or inter macroblock. The mapping is implemented by
+ look-up tables defined in the beginning of the file.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ isIntra flag to indicate if intra or inter mapping is to
+ be used
+
+ Outputs:
+ value decoded code word is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure, no valid code word found
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeExpGolombMapped(strmData_t *pStrmData, u32 *value,
+ u32 isIntra)
+{
+
+/* Variables */
+
+ u32 status, codeNum;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(value);
+
+ status = h264bsdDecodeExpGolombUnsigned(pStrmData, &codeNum);
+
+ if (status != HANTRO_OK)
+ return (HANTRO_NOK);
+ else
+ {
+ /* range of valid codeNums [0,47] */
+ if (codeNum > 47)
+ return (HANTRO_NOK);
+ if (isIntra)
+ *value = codedBlockPatternIntra4x4[codeNum];
+ else
+ *value = codedBlockPatternInter[codeNum];
+ return(HANTRO_OK);
+ }
+
+}
+
+/*------------------------------------------------------------------------------
+
+ 5.4 Function: h264bsdDecodeExpGolombTruncated
+
+ Functional description:
+ Decode truncated Exp-Golomb code. greaterThanOne flag indicates
+ the range of the symbol to be decoded as follows:
+ FALSE -> [0,1]
+ TRUE -> [0,2^32-1]
+
+ If flag is false the decoding is performed by reading one bit
+ from the stream with h264bsdGetBits and mapping this to decoded
+ symbol as
+ symbol = bit ? 0 : 1
+
+ Otherwise, i.e. when flag is TRUE, code num is determined by
+ h264bsdDecodeExpGolombUnsigned and this is used as the decoded
+ symbol.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+ greaterThanOne flag to indicate if range is wider than [0,1]
+
+ Outputs:
+ value decoded code word is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK failure, no valid code word found
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeExpGolombTruncated(
+ strmData_t *pStrmData,
+ u32 *value,
+ u32 greaterThanOne)
+{
+
+/* Variables */
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(value);
+
+ if (greaterThanOne)
+ {
+ return(h264bsdDecodeExpGolombUnsigned(pStrmData, value));
+ }
+ else
+ {
+ *value = h264bsdGetBits(pStrmData,1);
+ if (*value == END_OF_STREAM)
+ return (HANTRO_NOK);
+ *value ^= 0x1;
+ }
+
+ return (HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.h
new file mode 100755
index 0000000..4c16773
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vlc.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_VLC_H
+#define H264SWDEC_VLC_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_transform.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeExpGolombUnsigned(strmData_t *pStrmData, u32 *value);
+
+u32 h264bsdDecodeExpGolombSigned(strmData_t *pStrmData, i32 *value);
+
+u32 h264bsdDecodeExpGolombMapped(strmData_t *pStrmData, u32 *value,
+ u32 isIntra);
+
+u32 h264bsdDecodeExpGolombTruncated(strmData_t *pStrmData, u32 *value,
+ u32 greaterThanOne);
+
+#endif /* #ifdef H264SWDEC_VLC_H */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.c b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.c
new file mode 100755
index 0000000..4a9335a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.c
@@ -0,0 +1,490 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. External compiler flags
+ 3. Module defines
+ 4. Local function prototypes
+ 5. Functions
+ h264bsdDecodeVuiParameters
+ DecodeHrdParameters
+
+------------------------------------------------------------------------------*/
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "h264bsd_vui.h"
+#include "basetype.h"
+#include "h264bsd_vlc.h"
+#include "h264bsd_stream.h"
+#include "h264bsd_util.h"
+
+/*------------------------------------------------------------------------------
+ 2. External compiler flags
+--------------------------------------------------------------------------------
+
+--------------------------------------------------------------------------------
+ 3. Module defines
+------------------------------------------------------------------------------*/
+
+#define MAX_DPB_SIZE 16
+#define MAX_BR 240000 /* for level 5.1 */
+#define MAX_CPB 240000 /* for level 5.1 */
+
+/*------------------------------------------------------------------------------
+ 4. Local function prototypes
+------------------------------------------------------------------------------*/
+
+static u32 DecodeHrdParameters(
+ strmData_t *pStrmData,
+ hrdParameters_t *pHrdParameters);
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdDecodeVuiParameters
+
+ Functional description:
+ Decode VUI parameters from the stream. See standard for details.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ pVuiParameters decoded information is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data or end of stream
+
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeVuiParameters(strmData_t *pStrmData,
+ vuiParameters_t *pVuiParameters)
+{
+
+/* Variables */
+
+ u32 tmp;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pVuiParameters);
+
+ H264SwDecMemset(pVuiParameters, 0, sizeof(vuiParameters_t));
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->aspectRatioPresentFlag = (tmp == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pVuiParameters->aspectRatioPresentFlag)
+ {
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->aspectRatioIdc = tmp;
+
+ if (pVuiParameters->aspectRatioIdc == ASPECT_RATIO_EXTENDED_SAR)
+ {
+ tmp = h264bsdGetBits(pStrmData, 16);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->sarWidth = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 16);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->sarHeight = tmp;
+ }
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->overscanInfoPresentFlag = (tmp == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pVuiParameters->overscanInfoPresentFlag)
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->overscanAppropriateFlag = (tmp == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->videoSignalTypePresentFlag = (tmp == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pVuiParameters->videoSignalTypePresentFlag)
+ {
+ tmp = h264bsdGetBits(pStrmData, 3);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->videoFormat = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->videoFullRangeFlag = (tmp == 1) ?
+ HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->colourDescriptionPresentFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pVuiParameters->colourDescriptionPresentFlag)
+ {
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->colourPrimaries = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->transferCharacteristics = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 8);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->matrixCoefficients = tmp;
+ }
+ else
+ {
+ pVuiParameters->colourPrimaries = 2;
+ pVuiParameters->transferCharacteristics = 2;
+ pVuiParameters->matrixCoefficients = 2;
+ }
+ }
+ else
+ {
+ pVuiParameters->videoFormat = 5;
+ pVuiParameters->colourPrimaries = 2;
+ pVuiParameters->transferCharacteristics = 2;
+ pVuiParameters->matrixCoefficients = 2;
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->chromaLocInfoPresentFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pVuiParameters->chromaLocInfoPresentFlag)
+ {
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pVuiParameters->chromaSampleLocTypeTopField);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pVuiParameters->chromaSampleLocTypeTopField > 5)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pVuiParameters->chromaSampleLocTypeBottomField);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pVuiParameters->chromaSampleLocTypeBottomField > 5)
+ return(HANTRO_NOK);
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->timingInfoPresentFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pVuiParameters->timingInfoPresentFlag)
+ {
+ tmp = h264bsdShowBits32(pStrmData);
+ if (h264bsdFlushBits(pStrmData, 32) == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp == 0)
+ return(HANTRO_NOK);
+ pVuiParameters->numUnitsInTick = tmp;
+
+ tmp = h264bsdShowBits32(pStrmData);
+ if (h264bsdFlushBits(pStrmData, 32) == END_OF_STREAM)
+ return(HANTRO_NOK);
+ if (tmp == 0)
+ return(HANTRO_NOK);
+ pVuiParameters->timeScale = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->fixedFrameRateFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->nalHrdParametersPresentFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pVuiParameters->nalHrdParametersPresentFlag)
+ {
+ tmp = DecodeHrdParameters(pStrmData, &pVuiParameters->nalHrdParameters);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+ else
+ {
+ pVuiParameters->nalHrdParameters.cpbCnt = 1;
+ /* MaxBR and MaxCPB should be the values correspondig to the levelIdc
+ * in the SPS containing these VUI parameters. However, these values
+ * are not used anywhere and maximum for any level will be used here */
+ pVuiParameters->nalHrdParameters.bitRateValue[0] = 1200 * MAX_BR + 1;
+ pVuiParameters->nalHrdParameters.cpbSizeValue[0] = 1200 * MAX_CPB + 1;
+ pVuiParameters->nalHrdParameters.initialCpbRemovalDelayLength = 24;
+ pVuiParameters->nalHrdParameters.cpbRemovalDelayLength = 24;
+ pVuiParameters->nalHrdParameters.dpbOutputDelayLength = 24;
+ pVuiParameters->nalHrdParameters.timeOffsetLength = 24;
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->vclHrdParametersPresentFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pVuiParameters->vclHrdParametersPresentFlag)
+ {
+ tmp = DecodeHrdParameters(pStrmData, &pVuiParameters->vclHrdParameters);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+ else
+ {
+ pVuiParameters->vclHrdParameters.cpbCnt = 1;
+ /* MaxBR and MaxCPB should be the values correspondig to the levelIdc
+ * in the SPS containing these VUI parameters. However, these values
+ * are not used anywhere and maximum for any level will be used here */
+ pVuiParameters->vclHrdParameters.bitRateValue[0] = 1000 * MAX_BR + 1;
+ pVuiParameters->vclHrdParameters.cpbSizeValue[0] = 1000 * MAX_CPB + 1;
+ pVuiParameters->vclHrdParameters.initialCpbRemovalDelayLength = 24;
+ pVuiParameters->vclHrdParameters.cpbRemovalDelayLength = 24;
+ pVuiParameters->vclHrdParameters.dpbOutputDelayLength = 24;
+ pVuiParameters->vclHrdParameters.timeOffsetLength = 24;
+ }
+
+ if (pVuiParameters->nalHrdParametersPresentFlag ||
+ pVuiParameters->vclHrdParametersPresentFlag)
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->lowDelayHrdFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->picStructPresentFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->bitstreamRestrictionFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ if (pVuiParameters->bitstreamRestrictionFlag)
+ {
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pVuiParameters->motionVectorsOverPicBoundariesFlag =
+ (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pVuiParameters->maxBytesPerPicDenom);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pVuiParameters->maxBytesPerPicDenom > 16)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pVuiParameters->maxBitsPerMbDenom);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pVuiParameters->maxBitsPerMbDenom > 16)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pVuiParameters->log2MaxMvLengthHorizontal);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pVuiParameters->log2MaxMvLengthHorizontal > 16)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pVuiParameters->log2MaxMvLengthVertical);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pVuiParameters->log2MaxMvLengthVertical > 16)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pVuiParameters->numReorderFrames);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pVuiParameters->maxDecFrameBuffering);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ }
+ else
+ {
+ pVuiParameters->motionVectorsOverPicBoundariesFlag = HANTRO_TRUE;
+ pVuiParameters->maxBytesPerPicDenom = 2;
+ pVuiParameters->maxBitsPerMbDenom = 1;
+ pVuiParameters->log2MaxMvLengthHorizontal = 16;
+ pVuiParameters->log2MaxMvLengthVertical = 16;
+ pVuiParameters->numReorderFrames = MAX_DPB_SIZE;
+ pVuiParameters->maxDecFrameBuffering = MAX_DPB_SIZE;
+ }
+
+ return(HANTRO_OK);
+
+}
+
+/*------------------------------------------------------------------------------
+
+ Function: DecodeHrdParameters
+
+ Functional description:
+ Decode HRD parameters from the stream. See standard for details.
+
+ Inputs:
+ pStrmData pointer to stream data structure
+
+ Outputs:
+ pHrdParameters decoded information is stored here
+
+ Returns:
+ HANTRO_OK success
+ HANTRO_NOK invalid stream data
+
+------------------------------------------------------------------------------*/
+
+static u32 DecodeHrdParameters(
+ strmData_t *pStrmData,
+ hrdParameters_t *pHrdParameters)
+{
+
+/* Variables */
+
+ u32 tmp, i;
+
+/* Code */
+
+ ASSERT(pStrmData);
+ ASSERT(pHrdParameters);
+
+
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData, &pHrdParameters->cpbCnt);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ /* cpbCount = cpb_cnt_minus1 + 1 */
+ pHrdParameters->cpbCnt++;
+ if (pHrdParameters->cpbCnt > MAX_CPB_CNT)
+ return(HANTRO_NOK);
+
+ tmp = h264bsdGetBits(pStrmData, 4);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pHrdParameters->bitRateScale = tmp;
+
+ tmp = h264bsdGetBits(pStrmData, 4);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pHrdParameters->cpbSizeScale = tmp;
+
+ for (i = 0; i < pHrdParameters->cpbCnt; i++)
+ {
+ /* bit_rate_value_minus1 in the range [0, 2^32 - 2] */
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pHrdParameters->bitRateValue[i]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pHrdParameters->bitRateValue[i] > 4294967294U)
+ return(HANTRO_NOK);
+ pHrdParameters->bitRateValue[i]++;
+ /* this may result in overflow, but this value is not used for
+ * anything */
+ pHrdParameters->bitRateValue[i] *=
+ 1 << (6 + pHrdParameters->bitRateScale);
+
+ /* cpb_size_value_minus1 in the range [0, 2^32 - 2] */
+ tmp = h264bsdDecodeExpGolombUnsigned(pStrmData,
+ &pHrdParameters->cpbSizeValue[i]);
+ if (tmp != HANTRO_OK)
+ return(tmp);
+ if (pHrdParameters->cpbSizeValue[i] > 4294967294U)
+ return(HANTRO_NOK);
+ pHrdParameters->cpbSizeValue[i]++;
+ /* this may result in overflow, but this value is not used for
+ * anything */
+ pHrdParameters->cpbSizeValue[i] *=
+ 1 << (4 + pHrdParameters->cpbSizeScale);
+
+ tmp = h264bsdGetBits(pStrmData, 1);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pHrdParameters->cbrFlag[i] = (tmp == 1) ? HANTRO_TRUE : HANTRO_FALSE;
+ }
+
+ tmp = h264bsdGetBits(pStrmData, 5);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pHrdParameters->initialCpbRemovalDelayLength = tmp + 1;
+
+ tmp = h264bsdGetBits(pStrmData, 5);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pHrdParameters->cpbRemovalDelayLength = tmp + 1;
+
+ tmp = h264bsdGetBits(pStrmData, 5);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pHrdParameters->dpbOutputDelayLength = tmp + 1;
+
+ tmp = h264bsdGetBits(pStrmData, 5);
+ if (tmp == END_OF_STREAM)
+ return(HANTRO_NOK);
+ pHrdParameters->timeOffsetLength = tmp;
+
+ return(HANTRO_OK);
+
+}
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.h b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.h
new file mode 100755
index 0000000..05d52a4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/h264bsd_vui.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (C) 2009 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*------------------------------------------------------------------------------
+
+ Table of contents
+
+ 1. Include headers
+ 2. Module defines
+ 3. Data types
+ 4. Function prototypes
+
+------------------------------------------------------------------------------*/
+
+#ifndef H264SWDEC_VUI_H
+#define H264SWDEC_VUI_H
+
+/*------------------------------------------------------------------------------
+ 1. Include headers
+------------------------------------------------------------------------------*/
+
+#include "basetype.h"
+#include "h264bsd_stream.h"
+
+/*------------------------------------------------------------------------------
+ 2. Module defines
+------------------------------------------------------------------------------*/
+
+#define MAX_CPB_CNT 32
+
+/*------------------------------------------------------------------------------
+ 3. Data types
+------------------------------------------------------------------------------*/
+
+/* enumerated sample aspect ratios, ASPECT_RATIO_M_N means M:N */
+enum
+{
+ ASPECT_RATIO_UNSPECIFIED = 0,
+ ASPECT_RATIO_1_1,
+ ASPECT_RATIO_12_11,
+ ASPECT_RATIO_10_11,
+ ASPECT_RATIO_16_11,
+ ASPECT_RATIO_40_33,
+ ASPECT_RATIO_24_11,
+ ASPECT_RATIO_20_11,
+ ASPECT_RATIO_32_11,
+ ASPECT_RATIO_80_33,
+ ASPECT_RATIO_18_11,
+ ASPECT_RATIO_15_11,
+ ASPECT_RATIO_64_33,
+ ASPECT_RATIO_160_99,
+ ASPECT_RATIO_EXTENDED_SAR = 255
+};
+
+/* structure to store Hypothetical Reference Decoder (HRD) parameters */
+typedef struct
+{
+ u32 cpbCnt;
+ u32 bitRateScale;
+ u32 cpbSizeScale;
+ u32 bitRateValue[MAX_CPB_CNT];
+ u32 cpbSizeValue[MAX_CPB_CNT];
+ u32 cbrFlag[MAX_CPB_CNT];
+ u32 initialCpbRemovalDelayLength;
+ u32 cpbRemovalDelayLength;
+ u32 dpbOutputDelayLength;
+ u32 timeOffsetLength;
+} hrdParameters_t;
+
+/* storage for VUI parameters */
+typedef struct
+{
+ u32 aspectRatioPresentFlag;
+ u32 aspectRatioIdc;
+ u32 sarWidth;
+ u32 sarHeight;
+ u32 overscanInfoPresentFlag;
+ u32 overscanAppropriateFlag;
+ u32 videoSignalTypePresentFlag;
+ u32 videoFormat;
+ u32 videoFullRangeFlag;
+ u32 colourDescriptionPresentFlag;
+ u32 colourPrimaries;
+ u32 transferCharacteristics;
+ u32 matrixCoefficients;
+ u32 chromaLocInfoPresentFlag;
+ u32 chromaSampleLocTypeTopField;
+ u32 chromaSampleLocTypeBottomField;
+ u32 timingInfoPresentFlag;
+ u32 numUnitsInTick;
+ u32 timeScale;
+ u32 fixedFrameRateFlag;
+ u32 nalHrdParametersPresentFlag;
+ hrdParameters_t nalHrdParameters;
+ u32 vclHrdParametersPresentFlag;
+ hrdParameters_t vclHrdParameters;
+ u32 lowDelayHrdFlag;
+ u32 picStructPresentFlag;
+ u32 bitstreamRestrictionFlag;
+ u32 motionVectorsOverPicBoundariesFlag;
+ u32 maxBytesPerPicDenom;
+ u32 maxBitsPerMbDenom;
+ u32 log2MaxMvLengthHorizontal;
+ u32 log2MaxMvLengthVertical;
+ u32 numReorderFrames;
+ u32 maxDecFrameBuffering;
+} vuiParameters_t;
+
+/*------------------------------------------------------------------------------
+ 4. Function prototypes
+------------------------------------------------------------------------------*/
+
+u32 h264bsdDecodeVuiParameters(strmData_t *pStrmData,
+ vuiParameters_t *pVuiParameters);
+
+#endif /* #ifdef H264SWDEC_VUI_H */
+
diff --git a/media/libstagefright/omx/SoftOMXPlugin.cpp b/media/libstagefright/omx/SoftOMXPlugin.cpp
index 04ca39e..02b1c8e 100644
--- a/media/libstagefright/omx/SoftOMXPlugin.cpp
+++ b/media/libstagefright/omx/SoftOMXPlugin.cpp
@@ -37,6 +37,7 @@ static const struct {
{ "OMX.google.aac.decoder", "aacdec", "audio_decoder.aac" },
{ "OMX.google.amrnb.decoder", "amrdec", "audio_decoder.amrnb" },
{ "OMX.google.amrwb.decoder", "amrdec", "audio_decoder.amrwb" },
+ { "OMX.google.h264.decoder", "h264dec", "video_decoder.avc" },
{ "OMX.google.avc.decoder", "avcdec", "video_decoder.avc" },
{ "OMX.google.g711.alaw.decoder", "g711dec", "audio_decoder.g711alaw" },
{ "OMX.google.g711.mlaw.decoder", "g711dec", "audio_decoder.g711mlaw" },