summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm11
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm11')
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT63
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT91
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h785
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h670
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h212
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h40
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h1445
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h27
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h43
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h1154
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h274
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h252
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h77
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl111
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt74
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c936
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c329
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c60
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c45
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h1153
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h72
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h4381
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h129
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s148
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s72
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s189
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h30
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s222
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c327
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s20
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s366
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s325
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s123
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s236
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s149
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s178
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s296
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s276
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s239
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s185
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s273
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s74
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s407
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s92
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c88
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c91
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c62
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c68
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s480
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s336
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s437
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s331
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s550
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c79
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s426
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s494
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s501
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s567
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s128
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s469
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h37
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h25
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s75
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s398
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c211
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c75
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s104
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c61
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c102
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c208
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s364
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s132
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s136
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s224
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s194
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s73
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s713
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s283
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s141
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s188
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c6
81 files changed, 25507 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT
new file mode 100644
index 0000000..5ce70ca
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT
@@ -0,0 +1,63 @@
+The contents of this transaction was created by Hedley Francis
+of ARM on 19-Feb-2008.
+
+It contains the ARM data versions listed below.
+
+This data, unless otherwise stated, is ARM Proprietary and access to it
+is subject to the agreements indicated below.
+
+If you experience problems with this data, please contact ARM support
+quoting transaction reference <97413>.
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+- OX001-SW-98010-r0p0-00bet1
+ Video codecs - optimised code
+ V6 optimized code release for Hantro (Ver 1.0.2)
+ internal access
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+This transaction contains deliverables which are designated as being of
+beta release status (BET).
+
+Beta release status has a particular meaning to ARM of which the recipient
+must be aware. Beta is a pre-release status indicating that the deliverable
+so described is believed to robustly demonstrate specified behaviour, to be
+consistent across its included aspects and be ready for general deployment.
+But Beta also indicates that pre-release reliability trials are ongoing and
+that it is possible residual defects or errors in operation, consistency
+and documentation may still be encountered. The recipient should consider
+this position when using this Beta material supplied. ARM will normally
+attempt to provide fixes or a work-around for defects identified by the
+recipient, but the provision or timeliness of this support cannot be
+guaranteed. ARM shall not be responsible for direct or consequential
+damages as a result of encountering one or more of these residual defects.
+By accepting a Beta release, the recipient agrees to these constraints and
+to providing reasonable information to ARM to enable the replication of the
+defects identified by the recipient. The specific Beta version supplied
+will not be supported after release of a later or higher status version.
+It should be noted that Support for the Beta release of the deliverable
+will only be provided by ARM to a recipient who has a current support and
+maintenance contract for the deliverable.
+
+
+ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
+
+In addition to the data versions listed above, this transaction contains
+two additional files at the top level.
+
+The first is this file, ARM_DELIVERY_97413.TXT, which is the delivery
+note.
+
+The second is ARM_MANIFEST_97413.TXT which contains a manifest of all the
+files included in this transaction, together with their checksums.
+
+The checksums provided are calculated using the RSA Data Security, Inc.
+MD5 Message-Digest Algorithm.
+
+The checksums can be used to verify the integrity of this data using the
+"md5sum" tool (which is part of the GNU "textutils" package) by running:
+
+ % md5sum --check ARM_MANIFEST_97413.TXT
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT
new file mode 100644
index 0000000..9b2238b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT
@@ -0,0 +1,91 @@
+ OX001-SW-98010-r0p0-00bet1/
+ OX001-SW-98010-r0p0-00bet1/api/
+e049791cfab6060a08cbac7b3ad767d6 OX001-SW-98010-r0p0-00bet1/api/armCOMM_s.h
+ed798face25497b2703ede736d6d52b6 OX001-SW-98010-r0p0-00bet1/api/omxtypes_s.h
+4eebd63af087376811d6749f0646b864 OX001-SW-98010-r0p0-00bet1/api/armCOMM_BitDec_s.h
+43cf46c2cf2fe1f93c615b57bcbe4809 OX001-SW-98010-r0p0-00bet1/api/armCOMM.h
+8f248ceaac8f602e277a521b679dcbbe OX001-SW-98010-r0p0-00bet1/api/armCOMM_IDCTTable.h
+53f2ae8a98495f05e26a4cf862a7f750 OX001-SW-98010-r0p0-00bet1/api/armCOMM_Version.h
+3a2f420ddf6a1b950470bd0f5ebd5c62 OX001-SW-98010-r0p0-00bet1/api/armCOMM_IDCT_s.h
+511c0bb534fe223599e2c84eff24c9ed OX001-SW-98010-r0p0-00bet1/api/armCOMM_MaskTable.h
+8971932d56eed6b1ad1ba507f0bff5f0 OX001-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h
+f87fedd9ca432fefa757008176864ef8 OX001-SW-98010-r0p0-00bet1/api/armOMX.h
+8e49899a428822c36ef9dd94e0e05f18 OX001-SW-98010-r0p0-00bet1/api/omxtypes.h
+694281d11af52f88e6f9d4cb226ac8a7 OX001-SW-98010-r0p0-00bet1/build_vc.pl
+e72d96c0a415459748df9807f3dae72f OX001-SW-98010-r0p0-00bet1/filelist_vc.txt
+ OX001-SW-98010-r0p0-00bet1/src/
+5eeae659a29477f5c52296d24afffd3c OX001-SW-98010-r0p0-00bet1/src/armCOMM_IDCTTable.c
+d64cdcf38f7749dc7f77465e5b7d356d OX001-SW-98010-r0p0-00bet1/src/armCOMM_MaskTable.c
+ OX001-SW-98010-r0p0-00bet1/vc/
+ OX001-SW-98010-r0p0-00bet1/vc/m4p10/
+ OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/
+e7e0c320978564a7c9b2c723749a98d6 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c
+4adcd0df081990bdfc4729041a2a9152 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+852e0404142965dc1f3aa7f00ee5127b OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
+7054151c5bfea6b5e74feee86b2d7b01 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+38944c5e0bba01e32ff349c2c87c71b2 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables_s.s
+32ff4b8be62e2f0f3e764b83c1e5e2fd OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+90b0e6a04e764902c0a0903640c10b32 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
+28a19ae4fe2258628080d6a89bb54b91 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
+98e196b9e1ffebaf91f62ea9d17fb97d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
+01ba60eff66ea49a4f833ce6279f8e2f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+f301d5a95e07354f593ea5747c01cb0a OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+44c9ef21e840a100301f7d7a4189957c OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+a33b03bbd3352d24ed744769e12bb87d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+00c20bfda67bb86096b615fc17c94b35 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+2ddcaf60a8ea1e6e6b77737f768bfb9d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables_s.s
+c3002aad5600f872b70a5d7fe3915846 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+a2900f2c47f1c61d20bd6c1eda33d6d4 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+e4fecd66bc47f07539bc308935e84a1f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
+78815c9df50ba53131bb22d2b829e3c3 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+1909ae312ac79a03a5fac1d1e8bc0291 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+3d2c48580655928065de7839866d9bc4 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+23aa2fdf155d4fa6ff745eab6e01f32b OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
+97f20a93c481d7f6173d919f41e415bd OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+becd512da202436286811b6aec061f47 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+dd24a99ae3cd842dcacaf31d47de88b3 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+c2d995f787b6f44ef10c751c12d1935f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+3628fbdf0cd217c287b6ccc94135d06e OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
+4a52b3e9e268b8a8f07829bf500d03af OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
+11249f8a98c5d4b84cb5575b0e37ca9c OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
+3599b1074330965c8ca285d164efccff OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
+3339e026c7de655d9400949eb5e51451 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+cc4a6f32db0b72a91d3f278f6855df69 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+ OX001-SW-98010-r0p0-00bet1/vc/m4p10/api/
+6e530ddaa7c2b57ffe88162c020cb662 OX001-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h
+ OX001-SW-98010-r0p0-00bet1/vc/m4p2/
+ OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/
+cdf412920c2037a725d0420002b6752e OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Clip8_s.s
+dba9824e959b21d401cac925e68a11a6 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+b559b71d5c94627f10e616fb72c0cefc OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
+4fba4c431a783a78a2eb6497a94ac967 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+1e4c3be8c5eddc00c9f05e83bcf315ef OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
+1b0b2990c2669dfb87cf6b810611c01b OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+1c9b87abf3283e957816b3937c680701 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
+4fe1afca659a9055fc1172e58f78a506 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+2ea067f0436f91ba1351edaf411cb4ea OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
+acb92be1dbcdb3ebe824cbe9e28d03bf OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
+a6b41f01b1df7dd656ebdba3084bfa2a OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
+293a48a648a3085456e6665bb7366fad OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
+ffe6b96c74d4881f4d3c8de8cc737797 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
+437dfa204508850d61d4b87091446e9f OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+ff5915d181bfd2cd2f0bd588bd2300dc OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+6775eb0c561dbab965c60f85b08c96fd OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
+a0d85f4f517c945a4c9317ac021f2d08 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+386020dee8b725c7fe2526f1fc211d7d OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+ OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/
+4624e7c838e10a249abcc3d3f4f40748 OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+65e1057d04e2cb844559dc9f6e09795a OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+ OX001-SW-98010-r0p0-00bet1/vc/src/
+e627b3346b0dc9aff14446005ce0fa43 OX001-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c
+ OX001-SW-98010-r0p0-00bet1/vc/api/
+7ca94b1c33ac0211e17d38baadd7d1dd OX001-SW-98010-r0p0-00bet1/vc/api/armVC.h
+12cf7596edbbf6048b626d15e8d0ed48 OX001-SW-98010-r0p0-00bet1/vc/api/omxVC.h
+11726e286a81257cb45f5547fb4d374c OX001-SW-98010-r0p0-00bet1/vc/api/omxVC_s.h
+a5b2af605c319cd2491319e430741377 OX001-SW-98010-r0p0-00bet1/vc/api/armVCCOMM_s.h
+ OX001-SW-98010-r0p0-00bet1/vc/comm/
+ OX001-SW-98010-r0p0-00bet1/vc/comm/src/
+50cca6954c447b012ab39ca7872e5e8f OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16_s.s
+d1c3bce77fc5774c899b447d13f02cd0 OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8_s.s
+fdac1d1bad3fd23c880beb39bc2e89aa OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
+6d9adc2be5bd0311591030d0c6df771c ARM_DELIVERY_97413.TXT
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h
new file mode 100644
index 0000000..2ed86a4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h
@@ -0,0 +1,785 @@
+/**
+ *
+ * File Name: armCOMM.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM.h
+ * Brief: Declares Common APIs/Data Types used across OpenMAX API's
+ *
+ */
+
+
+#ifndef _armCommon_H_
+#define _armCommon_H_
+
+#include "omxtypes.h"
+
+typedef struct
+{
+ OMX_F32 Re; /** Real part */
+ OMX_F32 Im; /** Imaginary part */
+
+} OMX_FC32; /** single precision floating point complex number */
+
+typedef struct
+{
+ OMX_F64 Re; /** Real part */
+ OMX_F64 Im; /** Imaginary part */
+
+} OMX_FC64; /** double precision floating point complex number */
+
+
+/* Used by both IP and IC domains for 8x8 JPEG blocks. */
+typedef OMX_S16 ARM_BLOCK8x8[64];
+
+
+#include "armOMX.h"
+
+#define armPI (OMX_F64)(3.1415926535897932384626433832795)
+
+/***********************************************************************/
+
+/* Compiler extensions */
+#ifdef ARM_DEBUG
+/* debug version */
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#define armError(str) {printf((str)); printf("\n"); exit(-1);}
+#define armWarn(str) {printf((str)); printf("\n");}
+#define armIgnore(a) ((void)a)
+#define armAssert(a) assert(a)
+#else
+/* release version */
+#define armError(str) ((void) (str))
+#define armWarn(str) ((void) (str))
+#define armIgnore(a) ((void) (a))
+#define armAssert(a) ((void) (a))
+#endif /* ARM_DEBUG */
+
+/* Arithmetic operations */
+
+#define armMin(a,b) ( (a) > (b) ? (b):(a) )
+#define armMax(a,b) ( (a) > (b) ? (a):(b) )
+#define armAbs(a) ( (a) < 0 ? -(a):(a) )
+
+/* Alignment operation */
+
+#define armAlignToBytes(Ptr,N) (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) ))
+#define armAlignTo2Bytes(Ptr) armAlignToBytes(Ptr,2)
+#define armAlignTo4Bytes(Ptr) armAlignToBytes(Ptr,4)
+#define armAlignTo8Bytes(Ptr) armAlignToBytes(Ptr,8)
+#define armAlignTo16Bytes(Ptr) armAlignToBytes(Ptr,16)
+
+/* Error and Alignment check */
+
+#define armRetArgErrIf(condition, code) if(condition) { return (code); }
+#define armRetDataErrIf(condition, code) if(condition) { return (code); }
+
+#ifndef ALIGNMENT_DOESNT_MATTER
+#define armIsByteAligned(Ptr,N) ((((int)(Ptr)) % N)==0)
+#define armNotByteAligned(Ptr,N) ((((int)(Ptr)) % N)!=0)
+#else
+#define armIsByteAligned(Ptr,N) (1)
+#define armNotByteAligned(Ptr,N) (0)
+#endif
+
+#define armIs2ByteAligned(Ptr) armIsByteAligned(Ptr,2)
+#define armIs4ByteAligned(Ptr) armIsByteAligned(Ptr,4)
+#define armIs8ByteAligned(Ptr) armIsByteAligned(Ptr,8)
+#define armIs16ByteAligned(Ptr) armIsByteAligned(Ptr,16)
+
+#define armNot2ByteAligned(Ptr) armNotByteAligned(Ptr,2)
+#define armNot4ByteAligned(Ptr) armNotByteAligned(Ptr,4)
+#define armNot8ByteAligned(Ptr) armNotByteAligned(Ptr,8)
+#define armNot16ByteAligned(Ptr) armNotByteAligned(Ptr,16)
+#define armNot32ByteAligned(Ptr) armNotByteAligned(Ptr,32)
+
+/**
+ * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value);
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16/OMX_S32 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value);
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value);
+
+/**
+ * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int/int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16/OMX_U32 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value);
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value);
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] var Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT -- returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ */
+
+OMX_INT armSignCheck (OMX_S16 var);
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 -- returns clipped value
+ */
+
+OMX_S32 armClip (
+ OMX_INT min,
+ OMX_INT max,
+ OMX_S32 src
+ );
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 -- returns clipped value
+ */
+
+OMX_F32 armClip_F32 (
+ OMX_F32 min,
+ OMX_F32 max,
+ OMX_F32 src
+ );
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding
+ * integer value. Takes care of rounding while clipping the final
+ * value.
+ *
+ * Parameters:
+ * [in] v Number to be operated upon
+ * [in] shift Divides the input "v" by "2^shift"
+ * [in] satBits Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 -- returns "shifted" saturated value
+ */
+
+OMX_U32 armShiftSat_F32(
+ OMX_F32 v,
+ OMX_INT shift,
+ OMX_INT satBits
+ );
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * This function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize);
+
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry First entry
+ * [in] sEntry second entry
+ * [in] tEntry Third entry
+ *
+ * Return Value:
+ * OMX_S32 -- returns the median value
+ */
+
+OMX_S32 armMedianOf3 (
+ OMX_S32 fEntry,
+ OMX_S32 sEntry,
+ OMX_S32 tEntry
+ );
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value Positive value
+ *
+ * Return Value:
+ * OMX_U8 -- returns the size of the positive value
+ */
+
+OMX_U8 armLogSize (
+ OMX_U16 value
+ );
+
+/***********************************************************************/
+ /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S32 armSatAdd_S32(
+ OMX_S32 Value1,
+ OMX_S32 Value2
+ );
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S64 armSatAdd_S64(
+ OMX_S64 Value1,
+ OMX_S64 Value2
+ );
+
+/** Function :armSatSub_S32()
+ *
+ * Description :
+ * Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatSub_S32(
+ OMX_S32 Value1,
+ OMX_S32 Value2
+ );
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ * accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ * [in] Mac Accumulator
+ *
+ * Return:
+ * [out] Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(
+ OMX_S32 Mac,
+ OMX_S16 Value1,
+ OMX_S16 Value2
+ );
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ * mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem First 32 bit Operand
+ * [in] filTap Second 16 bit Operand
+ * [in] mac Result of MAC operation
+ *
+ * Return:
+ * [out] mac Result of operation
+ *
+ **/
+
+OMX_S32 armSatMac_S16S32_S32(
+ OMX_S32 mac,
+ OMX_S32 delayElem,
+ OMX_S16 filTap );
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ * Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ * output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) )
+ *
+ * Parametrs:
+ * [in] input The input to be operated on
+ * [in] scaleFactor The shift number
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(
+ OMX_S32 input,
+ OMX_INT scaleFactor);
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(
+ OMX_S32 Value,
+ OMX_INT shift
+ );
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S64 armSatRoundLeftShift_S64(
+ OMX_S64 Value,
+ OMX_INT shift
+ );
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ * Returns the result of a S16 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(
+ OMX_S16 input1,
+ OMX_S32 input2);
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ * Returns the result of a S32 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatMulS32S32_S32(
+ OMX_S32 input1,
+ OMX_S32 input2);
+
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer.
+ * Half-integer values are rounded away from zero
+ * unless otherwise specified. For example 3//2 is rounded
+ * to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num Operand 1
+ * [in] Deno Operand 2
+ *
+ * Return:
+ * [out] Result of operation input1//input2
+ *
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno);
+
+
+/***********************************************************************/
+/*
+ * Debugging macros
+ *
+ */
+
+
+/*
+ * Definition of output stream - change to stderr if necessary
+ */
+#define DEBUG_STREAM stdout
+
+/*
+ * Debug printf macros, one for each argument count.
+ * Add more if needed.
+ */
+#ifdef DEBUG_ON
+#include <stdio.h>
+
+#define DEBUG_PRINTF_0(a) fprintf(DEBUG_STREAM, a)
+#define DEBUG_PRINTF_1(a, b) fprintf(DEBUG_STREAM, a, b)
+#define DEBUG_PRINTF_2(a, b, c) fprintf(DEBUG_STREAM, a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d) fprintf(DEBUG_STREAM, a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e) fprintf(DEBUG_STREAM, a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f) fprintf(DEBUG_STREAM, a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#else /* DEBUG_ON */
+#define DEBUG_PRINTF_0(a)
+#define DEBUG_PRINTF_1(a, b)
+#define DEBUG_PRINTF_2(a, b, c)
+#define DEBUG_PRINTF_3(a, b, c, d)
+#define DEBUG_PRINTF_4(a, b, c, d, e)
+#define DEBUG_PRINTF_5(a, b, c, d, e, f)
+#define DEBUG_PRINTF_6(a, b, c, d, e, f, g)
+#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h)
+#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i)
+#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j)
+#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k)
+#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l)
+#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m)
+#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n)
+#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o)
+#endif /* DEBUG_ON */
+
+
+/*
+ * Domain and sub domain definitions
+ *
+ * In order to turn on debug for an entire domain or sub-domain
+ * at compile time, one of the DEBUG_DOMAIN_* below may be defined,
+ * which will activate debug in all of the defines it contains.
+ */
+
+#ifdef DEBUG_DOMAIN_AC
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4
+#define DEBUG_OMXACAAC_DECODECHANPAIRELT
+#define DEBUG_OMXACAAC_DECODEDATSTRELT
+#define DEBUG_OMXACAAC_DECODEFILLELT
+#define DEBUG_OMXACAAC_DECODEISSTEREO_S32
+#define DEBUG_OMXACAAC_DECODEMSPNS_S32
+#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I
+#define DEBUG_OMXACAAC_DECODEPRGCFGELT
+#define DEBUG_OMXACAAC_DECODETNS_S32_I
+#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32
+#define DEBUG_OMXACAAC_ENCODETNS_S32_I
+#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32
+#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32
+#define DEBUG_OMXACAAC_MDCTFWD_S32
+#define DEBUG_OMXACAAC_MDCTINV_S32_S16
+#define DEBUG_OMXACAAC_NOISELESSDECODE
+#define DEBUG_OMXACAAC_QUANTINV_S32_I
+#define DEBUG_OMXACAAC_UNPACKADIFHEADER
+#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER
+#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32
+#define DEBUG_OMXACMP3_HUFFMANDECODE_S32
+#define DEBUG_OMXACMP3_MDCTINV_S32
+#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I
+#define DEBUG_OMXACMP3_REQUANTIZE_S32_I
+#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16
+#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER
+#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8
+#define DEBUG_OMXACMP3_UNPACKSIDEINFO
+#endif /* DEBUG_DOMAIN_AC */
+
+
+#ifdef DEBUG_DOMAIN_VC
+#define DEBUG_OMXVCM4P10_AVERAGE_16X
+#define DEBUG_OMXVCM4P10_AVERAGE_4X
+#define DEBUG_OMXVCM4P10_AVERAGE_8X
+#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR
+#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8
+#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX
+#define DEBUG_OMXVCM4P10_EXPANDFRAME
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR
+#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R
+#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R
+#define DEBUG_OMXVCM4P10_SADQUAR_16X
+#define DEBUG_OMXVCM4P10_SADQUAR_4X
+#define DEBUG_OMXVCM4P10_SADQUAR_8X
+#define DEBUG_OMXVCM4P10_SAD_16X
+#define DEBUG_OMXVCM4P10_SAD_4X
+#define DEBUG_OMXVCM4P10_SAD_8X
+#define DEBUG_OMXVCM4P10_SATD_4X4
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC
+#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16
+#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16
+#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16
+#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16
+#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16
+#define DEBUG_OMXVCM4P2_FINDMVPRED
+#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX
+#define DEBUG_OMXVCM4P2_LIMITMVTORECT
+#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB
+#define DEBUG_OMXVCM4P2_PADMBGRAY_U8
+#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8
+#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8
+#define DEBUG_OMXVCM4P2_PADMV
+#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I
+#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER
+#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA
+#endif /* DEBUG_DOMAIN_VC */
+
+
+#ifdef DEBUG_DOMAIN_IC
+/* To be filled in */
+#endif /* DEBUG_DOMAIN_IC */
+
+
+#ifdef DEBUG_DOMAIN_SP
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S16
+#define DEBUG_OMXACSP_BLOCKEXP_S32
+#define DEBUG_OMXACSP_COPY_S16
+#define DEBUG_OMXACSP_DOTPROD_S16
+#define DEBUG_OMXACSP_DOTPROD_S16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS
+#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32
+#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32
+#define DEBUG_OMXACSP_FFTINIT_C_SC16
+#define DEBUG_OMXACSP_FFTINIT_C_SC32
+#define DEBUG_OMXACSP_FFTINIT_R_S16_S32
+#define DEBUG_OMXACSP_FFTINIT_R_S32
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS
+#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS
+#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I
+#define DEBUG_OMXACSP_FILTERMEDIAN_S32
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_FIR_DIRECT_S16
+#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIRONE_DIRECT_S16
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16
+#define DEBUG_OMXACSP_IIR_DIRECT_S16_I
+#define DEBUG_OMXACSP_IIR_DIRECT_S16
+#endif /* DEBUG_DOMAIN_SP */
+
+
+#ifdef DEBUG_DOMAIN_IP
+#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS
+#define DEBUG_OMXIPBM_COPY_U8_C1R
+#define DEBUG_OMXIPBM_COPY_U8_C3R
+#define DEBUG_OMXIPBM_MIRROR_U8_C1R
+#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS
+#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R
+#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R
+#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R
+#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R
+#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R
+#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R
+#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64
+#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64
+#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64
+#define DEBUG_OMXIPPP_MOMENTINIT_S64
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R
+#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R
+#endif /* DEBUG_DOMAIN_IP */
+
+
+#endif /* _armCommon_H_ */
+
+/*End of File*/
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h
new file mode 100644
index 0000000..abb98fc
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h
@@ -0,0 +1,670 @@
+;//
+;//
+;// File Name: armCOMM_BitDec_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// OpenMAX optimized bitstream decode module
+;//
+;// You must include armCOMM_s.h before including this file
+;//
+;// This module provides macros to perform assembly optimized fixed and
+;// variable length decoding from a read-only bitstream. The variable
+;// length decode modules take as input a pointer to a table of 16-bit
+;// entries of the following format.
+;//
+;// VLD Table Entry format
+;//
+;// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+;// +------------------------------------------------+
+;// | Len | Symbol | 1 |
+;// +------------------------------------------------+
+;// | Offset | 0 |
+;// +------------------------------------------------+
+;//
+;// If the table entry is a leaf entry then bit 0 set:
+;// Len = Number of bits overread (0 to 7)
+;// Symbol = Symbol payload (unsigned 12 bits)
+;//
+;// If the table entry is an internal node then bit 0 is clear:
+;// Offset = Number of (16-bit) half words from the table
+;// start to the next table node
+;//
+;// The table is accessed by successive lookup up on the
+;// next Step bits of the input bitstream until a leaf node
+;// is obtained. The Step sizes are supplied to the VLD macro.
+;//
+;// USAGE:
+;//
+;// To use any of the macros in this package, first call:
+;//
+;// M_BD_INIT ppBitStream, pBitOffset, pBitStream, RBitBuffer, RBitCount, Tmp
+;//
+;// This caches the current bitstream position and next available
+;// bits in registers pBitStream, RBitBuffer, RBitCount. These registers
+;// are reserved for use by the bitstream decode package until you
+;// call M_BD_FINI.
+;//
+;// Next call the following macro(s) as many times as you need:
+;//
+;// M_BD_LOOK8 - Look ahead constant 1<=N<=8 bits into the bitstream
+;// M_BD_LOOK16 - Look ahead constant 1<=N<=16 bits into the bitstream
+;// M_BD_READ8 - Read constant 1<=N<=8 bits from the bitstream
+;// M_BD_READ16 - Read constant 1<=N<=16 bits from the bitstream
+;// M_BD_VREAD8 - Read variable 1<=N<=8 bits from the bitstream
+;// M_BD_VREAD16 - Read variable 1<=N<=16 bits from the bitstream
+;// M_BD_VLD - Perform variable length decode using lookup table
+;//
+;// Finally call the macro:
+;//
+;// M_BD_FINI ppBitStream, pBitOffset
+;//
+;// This writes the bitstream state back to memory.
+;//
+;// The three bitstream cache register names are assigned to the following global
+;// variables:
+;//
+
+ GBLS pBitStream ;// Register name for pBitStream
+ GBLS BitBuffer ;// Register name for BitBuffer
+ GBLS BitCount ;// Register name for BitCount
+
+;//
+;// These register variables must have a certain defined state on entry to every bitstream
+;// macro (except M_BD_INIT) and on exit from every bitstream macro (except M_BD_FINI).
+;// The state may depend on implementation.
+;//
+;// For the default (ARM11) implementation the following hold:
+;// pBitStream - points to the first byte not held in the BitBuffer
+;// BitBuffer - is a cache of (4 bytes) 32 bits, bit 31 the first bit
+;// BitCount - is offset (from the top bit) to the next unused bitstream bit
+;// 0<=BitCount<=15 (so BitBuffer holds at least 17 unused bits)
+;//
+;//
+
+ ;// Bitstream Decode initialise
+ ;//
+ ;// Initialises the bitstream decode global registers from
+ ;// bitstream pointers. This macro is split into 3 parts to enable
+ ;// scheduling.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $ppBitStream - pointer to pointer to the next bitstream byte
+ ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7)
+ ;// $RBitStream - register to use for pBitStream (can be $ppBitStream)
+ ;// $RBitBuffer - register to use for BitBuffer
+ ;// $RBitCount - register to use for BitCount (can be $pBitOffset)
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1,$T2,$T3 - registers that must be preserved between calls to
+ ;// M_BD_INIT1 and M_BD_INIT2
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_INIT0 $ppBitStream, $pBitOffset, $RBitStream, $RBitBuffer, $RBitCount
+
+pBitStream SETS "$RBitStream"
+BitBuffer SETS "$RBitBuffer"
+BitCount SETS "$RBitCount"
+
+ ;// load inputs
+ LDR $pBitStream, [$ppBitStream]
+ LDR $BitCount, [$pBitOffset]
+ MEND
+
+ MACRO
+ M_BD_INIT1 $T1, $T2, $T3
+ LDRB $T2, [$pBitStream, #2]
+ LDRB $T1, [$pBitStream, #1]
+ LDRB $BitBuffer, [$pBitStream], #3
+ ADD $BitCount, $BitCount, #8
+ MEND
+
+ MACRO
+ M_BD_INIT2 $T1, $T2, $T3
+ ORR $T2, $T2, $T1, LSL #8
+ ORR $BitBuffer, $T2, $BitBuffer, LSL #16
+ MEND
+
+ ;//
+ ;// Look ahead fixed 1<=N<=8 bits without consuming any bits
+ ;// The next bits will be placed at bit 31..24 of destination register
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to look
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_LOOK8 $Symbol, $N
+ ASSERT ($N>=1):LAND:($N<=8)
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ MEND
+
+ ;//
+ ;// Look ahead fixed 1<=N<=16 bits without consuming any bits
+ ;// The next bits will be placed at bit 31..16 of destination register
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to look
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_LOOK16 $Symbol, $N, $T1
+ ASSERT ($N >= 1):LAND:($N <= 16)
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ MEND
+
+ ;//
+ ;// Skips fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_SKIP8 $N, $T1
+ ASSERT ($N>=1):LAND:($N<=8)
+ SUBS $BitCount, $BitCount, #(8-$N)
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Read fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_READ8 $Symbol, $N, $T1
+ ASSERT ($N>=1):LAND:($N<=8)
+ MOVS $Symbol, $BitBuffer, LSL $BitCount
+ SUBS $BitCount, $BitCount, #(8-$N)
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ MOV $Symbol, $Symbol, LSR #(32-$N)
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Read fixed 1<=N<=16 bits from the bitstream, advancing the bitstream pointer
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_READ16 $Symbol, $N, $T1, $T2
+ ASSERT ($N>=1):LAND:($N<=16)
+ ASSERT $Symbol<>$T1
+ IF ($N<=8)
+ M_BD_READ8 $Symbol, $N, $T1
+ ELSE
+ ;// N>8 so we will be able to refill at least one byte
+ LDRB $T1, [$pBitStream], #1
+ MOVS $Symbol, $BitBuffer, LSL $BitCount
+ ORR $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBS $BitCount, $BitCount, #(16-$N)
+ LDRCSB $T1, [$pBitStream], #1
+ MOV $Symbol, $Symbol, LSR #(32-$N)
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ ENDIF
+ MEND
+
+ ;//
+ ;// Skip variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits. 1<=N<=8
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VSKIP8 $N, $T1
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Skip variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits. 1<=N<=16
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VSKIP16 $N, $T1, $T2
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Read variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read. 1<=N<=8
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VREAD8 $Symbol, $N, $T1, $T2
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ RSB $T2, $N, #32
+ ADDCC $BitCount, $BitCount, #8
+ MOV $Symbol, $Symbol, LSR $T2
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Read variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $N - number of bits to read. 1<=N<=16
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the next N bits of the bitstream
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VREAD16 $Symbol, $N, $T1, $T2
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ ADD $BitCount, $BitCount, $N
+ SUBS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ RSB $T2, $N, #32
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ MOV $Symbol, $Symbol, LSR $T2
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Decode a code of the form 0000...001 where there
+ ;// are N zeros before the 1 and N<=15 (code length<=16)
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the number of zeros before the next 1
+ ;// >=16 is an illegal code
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_CLZ16 $Symbol, $T1, $T2
+ MOVS $Symbol, $BitBuffer, LSL $BitCount
+ CLZ $Symbol, $Symbol
+ ADD $BitCount, $BitCount, $Symbol
+ SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1
+ LDRCSB $T1, [$pBitStream], #1
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+ ;//
+ ;// Decode a code of the form 1111...110 where there
+ ;// are N ones before the 0 and N<=15 (code length<=16)
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - the number of zeros before the next 1
+ ;// >=16 is an illegal code
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_CLO16 $Symbol, $T1, $T2
+ MOV $Symbol, $BitBuffer, LSL $BitCount
+ MVN $Symbol, $Symbol
+ CLZ $Symbol, $Symbol
+ ADD $BitCount, $BitCount, $Symbol
+ SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1
+ LDRCSB $T1, [$pBitStream], #1
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ SUBCSS $BitCount, $BitCount, #8
+ LDRCSB $T1, [$pBitStream], #1
+ ADDCC $BitCount, $BitCount, #8
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8
+ MEND
+
+
+ ;//
+ ;// Variable Length Decode module
+ ;//
+ ;// Decodes one VLD Symbol from a bitstream and refill the bitstream
+ ;// buffer.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pVLDTable - pointer to VLD decode table of 16-bit entries.
+ ;// The format is described above at the start of
+ ;// this file.
+ ;// $S0 - The number of bits to look up for the first step
+ ;// 1<=$S0<=8
+ ;// $S1 - The number of bits to look up for each subsequent
+ ;// step 1<=$S1<=$S0.
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $Symbol - decoded VLD symbol value
+ ;// $T1 - corrupted temp/scratch register
+ ;// $T2 - corrupted temp/scratch register
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_VLD $Symbol, $T1, $T2, $pVLDTable, $S0, $S1
+ ASSERT (1<=$S0):LAND:($S0<=8)
+ ASSERT (1<=$S1):LAND:($S1<=$S0)
+
+ ;// Note 0<=BitCount<=15 on entry and exit
+
+ MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bits
+ MOVS $Symbol, #(2<<$S0)-2 ;// create mask
+ AND $Symbol, $Symbol, $T1, LSR #(31-$S0) ;// 2*(next $S0 bits)
+ SUBS $BitCount, $BitCount, #8 ;// CS if buffer can be filled
+01
+ LDRCSB $T1, [$pBitStream], #1 ;// load refill byte
+ LDRH $Symbol, [$pVLDTable, $Symbol] ;// load table entry
+ ADDCC $BitCount, $BitCount, #8 ;// refill not possible
+ ADD $BitCount, $BitCount, #$S0 ;// assume $S0 bits used
+ ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 ;// merge in refill byte
+ MOVS $T1, $Symbol, LSR #1 ;// CS=leaf entry
+ BCS %FT02
+
+ MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bit
+ IF (2*$S0-$S1<=8)
+ ;// Can combine refill check and -S0+S1 and keep $BitCount<=15
+ SUBS $BitCount, $BitCount, #8+($S0-$S1)
+ ELSE
+ ;// Separate refill check and -S0+S1 offset
+ SUBS $BitCount, $BitCount, #8
+ SUB $BitCount, $BitCount, #($S0-$S1)
+ ENDIF
+ ADD $Symbol, $Symbol, $T1, LSR #(31-$S1) ;// add 2*(next $S1 bits) to
+ BIC $Symbol, $Symbol, #1 ;// table offset
+ B %BT01 ;// load next table entry
+02
+ ;// BitCount range now depend on the route here
+ ;// if (first step) S0 <= BitCount <= 7+S0 <=15
+ ;// else if (2*S0-S1<=8) S0 <= BitCount <= 7+(2*S0-S1) <=15
+ ;// else S1 <= BitCount <= 7+S1 <=15
+
+ SUB $BitCount, $BitCount, $Symbol, LSR#13
+ BIC $Symbol, $T1, #0xF000
+ MEND
+
+
+ ;// Add an offset number of bits
+ ;//
+ ;// Outputs destination byte and bit index values which corresponds to an offset number of bits
+ ;// from the current location. This is used to compare bitstream positions using. M_BD_CMP.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $Offset - Offset to be added in bits.
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $ByteIndex - Destination pBitStream pointer after adding the Offset.
+ ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact
+ ;// pointer (as in M_BD_FINI). But for using with M_BD_CMP subtract is not needed.
+ ;// $BitIndex - Destination BitCount after the addition of Offset number of bits
+ ;//
+ MACRO
+ M_BD_ADD $ByteIndex, $BitIndex, $Offset
+
+ ;// ($ByteIndex,$BitIndex) = Current position + $Offset bits
+ ADD $Offset, $Offset, $BitCount
+ AND $BitIndex, $Offset, #7
+ ADD $ByteIndex, $pBitStream, $Offset, ASR #3
+ MEND
+
+ ;// Move bitstream pointers to the location given
+ ;//
+ ;// Outputs destination byte and bit index values which corresponds to
+ ;// the current location given (calculated using M_BD_ADD).
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;// $ByteIndex - Destination pBitStream pointer after move.
+ ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact
+ ;// pointer (as in M_BD_FINI).
+ ;// $BitIndex - Destination BitCount after the move
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $pBitStream \
+ ;// } See description above.
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_MOV $ByteIndex, $BitIndex
+
+ ;// ($pBitStream, $Offset) = ($ByteIndex,$BitIndex)
+ MOV $BitCount, $BitIndex
+ MOV $pBitStream, $ByteIndex
+ MEND
+
+ ;// Bitstream Compare
+ ;//
+ ;// Compares bitstream position with that of a destination position. Destination position
+ ;// is held in two input registers which are calculated using M_BD_ADD macro
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $ByteIndex - Destination pBitStream pointer, (4 byte ahead as described in M_BD_ADD)
+ ;// $BitIndex - Destination BitCount
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// FLAGS - GE if destination is reached, LT = is destination is ahead
+ ;// $T1 - corrupted temp/scratch register
+ ;//
+ MACRO
+ M_BD_CMP $ByteIndex, $BitIndex, $T1
+
+ ;// Return flags set by (current positon)-($ByteIndex,$BitIndex)
+ ;// so GE means that we have reached the indicated position
+
+ ADD $T1, $pBitStream, $BitCount, LSR #3
+ CMP $T1, $ByteIndex
+ AND $T1, $BitCount, #7
+ CMPEQ $T1, $BitIndex
+ MEND
+
+
+ ;// Bitstream Decode finalise
+ ;//
+ ;// Writes back the bitstream state to the bitstream pointers
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $pBitStream \
+ ;// $BitBuffer } See description above.
+ ;// $BitCount /
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $ppBitStream - pointer to pointer to the next bitstream byte
+ ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7)
+ ;// $pBitStream \
+ ;// $BitBuffer } these register are corrupted
+ ;// $BitCount /
+ ;//
+ MACRO
+ M_BD_FINI $ppBitStream, $pBitOffset
+
+ ;// Advance pointer by the number of free bits in the buffer
+ ADD $pBitStream, $pBitStream, $BitCount, LSR#3
+ AND $BitCount, $BitCount, #7
+
+ ;// Now move back 32 bits to reach the first usued bit
+ SUB $pBitStream, $pBitStream, #4
+
+ ;// Store out bitstream state
+ STR $BitCount, [$pBitOffset]
+ STR $pBitStream, [$ppBitStream]
+ MEND
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h
new file mode 100644
index 0000000..4f9bc3b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h
@@ -0,0 +1,212 @@
+/**
+ *
+ * File Name: armCOMM_Bitstream.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM_Bitstream.h
+ * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders.
+ *
+ */
+
+#ifndef _armCodec_H_
+#define _armCodec_H_
+
+#include "omxtypes.h"
+
+typedef struct {
+ OMX_U8 codeLen;
+ OMX_U32 codeWord;
+} ARM_VLC32;
+
+/* The above should be renamed as "ARM_VLC32" */
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] N=1...32
+ *
+ * Returns Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N=1..32
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ * Returns Value
+ */
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N);
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset);
+
+/**
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N);
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] pCodeBook
+ *
+ * [out] **ppBitStream
+ * [out] *pOffset
+ *
+ * Returns : Code Book Index if successfull.
+ * : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails.
+ **/
+
+#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF)
+
+OMX_U16 armUnPackVLC32(
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ const ARM_VLC32 *pCodeBook
+);
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] codeWord Code word that need to be inserted in to the
+ * bitstream
+ * [in] codeLength Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ OMX_U32 codeWord,
+ OMX_INT codeLength
+);
+
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pBitOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] code VLC code word that need to be inserted in to the
+ * bitstream
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackVLC32 (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ ARM_VLC32 code
+);
+
+#endif /*_armCodec_H_*/
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h
new file mode 100644
index 0000000..d5db32f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h
@@ -0,0 +1,40 @@
+/**
+ *
+ *
+ * File Name: armCOMM_IDCTTable.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File : armCOMM_IDCTTable.h
+ * Description : Contains declarations of tables for IDCT calculation.
+ *
+ */
+
+#ifndef _armCOMM_IDCTTable_H_
+#define _armCOMM_IDCTTable_H_
+
+#include "omxtypes.h"
+
+ /* Table of s(u)*A(u)*A(v)/16 at Q15
+ * s(u)=1.0 0 <= u <= 5
+ * s(6)=2.0
+ * s(7)=4.0
+ * A(0) = 2*sqrt(2)
+ * A(u) = 4*cos(u*pi/16) for (u!=0)
+ */
+extern const OMX_U16 armCOMM_IDCTPreScale [64];
+extern const OMX_U16 armCOMM_IDCTCoef [4];
+
+#endif /* _armCOMM_IDCTTable_H_ */
+
+
+/* End of File */
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h
new file mode 100644
index 0000000..03f7137
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h
@@ -0,0 +1,1445 @@
+;//
+;// This confidential and proprietary software may be used only as
+;// authorised by a licensing agreement from ARM Limited
+;// (C) COPYRIGHT 2004 ARM Limited
+;// ALL RIGHTS RESERVED
+;// The entire notice above must be reproduced on all authorised
+;// copies and copies may only be made to the extent permitted
+;// by a licensing agreement from ARM Limited.
+;//
+;// IDCT_s.s
+;//
+;// Inverse DCT module
+;//
+;//
+;// ALGORITHM DESCRIPTION
+;//
+;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each
+;// column and then a 1D IDCT for each row.
+;//
+;// The 8-point 1D IDCT is defined by
+;// f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2
+;//
+;// C(u) = 1/sqrt(2) if u=0 or 1 if u!=0
+;// c(u,x) = cos( (2x+1)*u*pi/16 )
+;//
+;// We compute the 8-point 1D IDCT using the reverse of
+;// the Arai-Agui-Nakajima flow graph which we split into
+;// 5 stages named in reverse order to identify with the
+;// forward DCT. Direct inversion of the forward formulae
+;// in file FDCT_s.s gives:
+;//
+;// IStage 5: j(u) = T(u)*A(u) [ A(u)=4*C(u)*c(u,0) ]
+;// [ A(0) = 2*sqrt(2)
+;// A(u) = 4*cos(u*pi/16) for (u!=0) ]
+;//
+;// IStage 4: i0 = j0 i1 = j4
+;// i3 = (j2+j6)/2 i2 = (j2-j6)/2
+;// i7 = (j5+j3)/2 i4 = (j5-j3)/2
+;// i5 = (j1+j7)/2 i6 = (j1-j7)/2
+;//
+;// IStage 3: h0 = (i0+i1)/2 h1 = (i0-i1)/2
+;// h2 = (i2*sqrt2)-i3 h3 = i3
+;// h4 = cos(pi/8)*i4 + sin(pi/8)*i6
+;// h6 = -sin(pi/8)*i4 + cos(pi/8)*i6
+;// [ The above two lines rotate by -(pi/8) ]
+;// h5 = (i5-i7)/sqrt2 h7 = (i5+i7)/2
+;//
+;// IStage 2: g0 = (h0+h3)/2 g3 = (h0-h3)/2
+;// g1 = (h1+h2)/2 g2 = (h1-h2)/2
+;// g7 = h7 g6 = h6 - h7
+;// g5 = h5 - g6 g4 = h4 - g5
+;//
+;// IStage 1: f0 = (g0+g7)/2 f7 = (g0-g7)/2
+;// f1 = (g1+g6)/2 f6 = (g1-g6)/2
+;// f2 = (g2+g5)/2 f5 = (g2-g5)/2
+;// f3 = (g3+g4)/2 f4 = (g3-g4)/2
+;//
+;// Note that most coefficients are halved 3 times during the
+;// above calculation. We can rescale the algorithm dividing
+;// the input by 8 to remove the halvings.
+;//
+;// IStage 5: j(u) = T(u)*A(u)/8
+;//
+;// IStage 4: i0 = j0 i1 = j4
+;// i3 = j2 + j6 i2 = j2 - j6
+;// i7 = j5 + j3 i4 = j5 - j3
+;// i5 = j1 + j7 i6 = j1 - j7
+;//
+;// IStage 3: h0 = i0 + i1 h1 = i0 - i1
+;// h2 = (i2*sqrt2)-i3 h3 = i3
+;// h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6)
+;// h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6)
+;// h5 = (i5-i7)*sqrt2 h7 = i5 + i7
+;//
+;// IStage 2: g0 = h0 + h3 g3 = h0 - h3
+;// g1 = h1 + h2 g2 = h1 - h2
+;// g7 = h7 g6 = h6 - h7
+;// g5 = h5 - g6 g4 = h4 - g5
+;//
+;// IStage 1: f0 = g0 + g7 f7 = g0 - g7
+;// f1 = g1 + g6 f6 = g1 - g6
+;// f2 = g2 + g5 f5 = g2 - g5
+;// f3 = g3 + g4 f4 = g3 - g4
+;//
+;// Note:
+;// 1. The scaling by A(u)/8 can often be combined with inverse
+;// quantization. The column and row scalings can be combined.
+;// 2. The flowgraph in the AAN paper has h4,g6 negated compared
+;// to the above code but is otherwise identical.
+;// 3. The rotation by -pi/8 can be peformed using three multiplies
+;// Eg c*i4+s*i6 = (i6-i4)*s + (c+s)*i4
+;// -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6
+;// 4. If |T(u)|<=1 then from the IDCT definition,
+;// |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2
+;// = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2
+;// = ((1/sqrt2) + (cot(pi/32)-1)/2)/2
+;// = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2)
+;// = (approx)2.64
+;// So the max gain of the 2D IDCT is ~x7.0 = 3 bits.
+;// The table below shows input patterns generating the maximum
+;// value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1
+;// InputPattern Max |f(x)|
+;// PPPPPPPP |f0| = 2.64
+;// PPPMMMMM |f1| = 2.64
+;// PPMMMPPP |f2| = 2.64
+;// PPMMPPMM |f3| = 2.64
+;// PMMPPMMP |f4| = 2.64
+;// PMMPMMPM |f5| = 2.64
+;// PMPPMPMP |f6| = 2.64
+;// PMPMPMPM |f7| = 2.64
+;// Note that this input pattern is the transpose of the
+;// corresponding max input patter for the FDCT.
+
+;// Arguments
+
+pSrc RN 0 ;// source data buffer
+Stride RN 1 ;// destination stride in bytes
+pDest RN 2 ;// destination data buffer
+pScale RN 3 ;// pointer to scaling table
+
+
+ ;// DCT Inverse Macro
+ ;// The DCT code should be parametrized according
+ ;// to the following inputs:
+ ;// $outsize = "u8" : 8-bit unsigned data saturated (0 to +255)
+ ;// "s9" : 16-bit signed data saturated to 9-bit (-256 to +255)
+ ;// "s16" : 16-bit signed data not saturated (max size ~+/-14273)
+ ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment
+ ;// "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment
+ ;//
+ ;// Inputs:
+ ;// pSrc = r0 = Pointer to input data
+ ;// Range is -256 to +255 (9-bit)
+ ;// Stride = r1 = Stride between input lines
+ ;// pDest = r2 = Pointer to output data
+ ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale
+
+
+
+ MACRO
+ M_IDCT $outsize, $inscale, $stride
+ LCLA SHIFT
+
+
+ IF ARM1136JS
+
+;// REGISTER ALLOCATION
+;// This is hard since we have 8 values, 9 free registers and each
+;// butterfly requires a temporary register. We also want to
+;// maintain register order so we can use LDM/STM. The table below
+;// summarises the register allocation that meets all these criteria.
+;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above.
+;//
+;// r1 a01 g0 h0
+;// r4 b01 f0 g1 h1 i0
+;// r5 a23 f1 g2 i1
+;// r6 b23 f2 g3 h2 i2
+;// r7 a45 f3 h3 i3
+;// r8 b45 f4 g4 h4 i4
+;// r9 a67 f5 g5 h5 i5
+;// r10 b67 f6 g6 h6 i6
+;// r11 f7 g7 h7 i7
+;//
+ra01 RN 1
+rb01 RN 4
+ra23 RN 5
+rb23 RN 6
+ra45 RN 7
+rb45 RN 8
+ra67 RN 9
+rb67 RN 10
+rtmp RN 11
+csPiBy8 RN 12 ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ]
+LoopRR2 RN 14 ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ]
+;// Transpose allocation
+xft RN ra01
+xf0 RN rb01
+xf1 RN ra23
+xf2 RN rb23
+xf3 RN ra45
+xf4 RN rb45
+xf5 RN ra67
+xf6 RN rb67
+xf7 RN rtmp
+;// IStage 1 allocation
+xg0 RN xft
+xg1 RN xf0
+xg2 RN xf1
+xg3 RN xf2
+xgt RN xf3
+xg4 RN xf4
+xg5 RN xf5
+xg6 RN xf6
+xg7 RN xf7
+;// IStage 2 allocation
+xh0 RN xg0
+xh1 RN xg1
+xht RN xg2
+xh2 RN xg3
+xh3 RN xgt
+xh4 RN xg4
+xh5 RN xg5
+xh6 RN xg6
+xh7 RN xg7
+;// IStage 3,4 allocation
+xit RN xh0
+xi0 RN xh1
+xi1 RN xht
+xi2 RN xh2
+xi3 RN xh3
+xi4 RN xh4
+xi5 RN xh5
+xi6 RN xh6
+xi7 RN xh7
+
+ M_STR pDest, ppDest
+ IF "$stride"="s"
+ M_STR Stride, pStride
+ ENDIF
+ M_ADR pDest, pBlk
+ LDR csPiBy8, =0x30fc7642
+ LDR LoopRR2, =0x00005a82
+
+v6_idct_col$_F
+ ;// Load even values
+ LDR xi4, [pSrc], #4 ;// j0
+ LDR xi5, [pSrc, #4*16-4] ;// j4
+ LDR xi6, [pSrc, #2*16-4] ;// j2
+ LDR xi7, [pSrc, #6*16-4] ;// j6
+
+ ;// Scale Even Values
+ IF "$inscale"="s16" ;// 16x16 mul
+SHIFT SETA 12
+ LDR xi0, [pScale], #4
+ LDR xi1, [pScale, #4*16-4]
+ LDR xi2, [pScale, #2*16-4]
+ MOV xit, #1<<(SHIFT-1)
+ SMLABB xi3, xi0, xi4, xit
+ SMLATT xi4, xi0, xi4, xit
+ SMLABB xi0, xi1, xi5, xit
+ SMLATT xi5, xi1, xi5, xit
+ MOV xi3, xi3, ASR #SHIFT
+ PKHBT xi4, xi3, xi4, LSL #(16-SHIFT)
+ LDR xi3, [pScale, #6*16-4]
+ SMLABB xi1, xi2, xi6, xit
+ SMLATT xi6, xi2, xi6, xit
+ MOV xi0, xi0, ASR #SHIFT
+ PKHBT xi5, xi0, xi5, LSL #(16-SHIFT)
+ SMLABB xi2, xi3, xi7, xit
+ SMLATT xi7, xi3, xi7, xit
+ MOV xi1, xi1, ASR #SHIFT
+ PKHBT xi6, xi1, xi6, LSL #(16-SHIFT)
+ MOV xi2, xi2, ASR #SHIFT
+ PKHBT xi7, xi2, xi7, LSL #(16-SHIFT)
+ ENDIF
+ IF "$inscale"="s32" ;// 32x16 mul
+SHIFT SETA (12+8-16)
+ MOV xit, #1<<(SHIFT-1)
+ LDR xi0, [pScale], #8
+ LDR xi1, [pScale, #0*32+4-8]
+ LDR xi2, [pScale, #4*32-8]
+ LDR xi3, [pScale, #4*32+4-8]
+ SMLAWB xi0, xi0, xi4, xit
+ SMLAWT xi1, xi1, xi4, xit
+ SMLAWB xi2, xi2, xi5, xit
+ SMLAWT xi3, xi3, xi5, xit
+ MOV xi0, xi0, ASR #SHIFT
+ PKHBT xi4, xi0, xi1, LSL #(16-SHIFT)
+ MOV xi2, xi2, ASR #SHIFT
+ PKHBT xi5, xi2, xi3, LSL #(16-SHIFT)
+ LDR xi0, [pScale, #2*32-8]
+ LDR xi1, [pScale, #2*32+4-8]
+ LDR xi2, [pScale, #6*32-8]
+ LDR xi3, [pScale, #6*32+4-8]
+ SMLAWB xi0, xi0, xi6, xit
+ SMLAWT xi1, xi1, xi6, xit
+ SMLAWB xi2, xi2, xi7, xit
+ SMLAWT xi3, xi3, xi7, xit
+ MOV xi0, xi0, ASR #SHIFT
+ PKHBT xi6, xi0, xi1, LSL #(16-SHIFT)
+ MOV xi2, xi2, ASR #SHIFT
+ PKHBT xi7, xi2, xi3, LSL #(16-SHIFT)
+ ENDIF
+
+ ;// Load odd values
+ LDR xi0, [pSrc, #1*16-4] ;// j1
+ LDR xi1, [pSrc, #7*16-4] ;// j7
+ LDR xi2, [pSrc, #5*16-4] ;// j5
+ LDR xi3, [pSrc, #3*16-4] ;// j3
+
+ IF {TRUE}
+ ;// shortcut if odd values 0
+ TEQ xi0, #0
+ TEQEQ xi1, #0
+ TEQEQ xi2, #0
+ TEQEQ xi3, #0
+ BEQ v6OddZero$_F
+ ENDIF
+
+ ;// Store scaled even values
+ STMIA pDest, {xi4, xi5, xi6, xi7}
+
+ ;// Scale odd values
+ IF "$inscale"="s16"
+ ;// Perform AAN Scale
+ LDR xi4, [pScale, #1*16-4]
+ LDR xi5, [pScale, #7*16-4]
+ LDR xi6, [pScale, #5*16-4]
+ SMLABB xi7, xi0, xi4, xit
+ SMLATT xi0, xi0, xi4, xit
+ SMLABB xi4, xi1, xi5, xit
+ SMLATT xi1, xi1, xi5, xit
+ MOV xi7, xi7, ASR #SHIFT
+ PKHBT xi0, xi7, xi0, LSL #(16-SHIFT)
+ LDR xi7, [pScale, #3*16-4]
+ SMLABB xi5, xi2, xi6, xit
+ SMLATT xi2, xi2, xi6, xit
+ MOV xi4, xi4, ASR #SHIFT
+ PKHBT xi1, xi4, xi1, LSL #(16-SHIFT)
+ SMLABB xi6, xi3, xi7, xit
+ SMLATT xi3, xi3, xi7, xit
+ MOV xi5, xi5, ASR #SHIFT
+ PKHBT xi2, xi5, xi2, LSL #(16-SHIFT)
+ MOV xi6, xi6, ASR #SHIFT
+ PKHBT xi3, xi6, xi3, LSL #(16-SHIFT)
+ ENDIF
+ IF "$inscale"="s32" ;// 32x16 mul
+ LDR xi4, [pScale, #1*32-8]
+ LDR xi5, [pScale, #1*32+4-8]
+ LDR xi6, [pScale, #7*32-8]
+ LDR xi7, [pScale, #7*32+4-8]
+ SMLAWB xi4, xi4, xi0, xit
+ SMLAWT xi5, xi5, xi0, xit
+ SMLAWB xi6, xi6, xi1, xit
+ SMLAWT xi7, xi7, xi1, xit
+ MOV xi4, xi4, ASR #SHIFT
+ PKHBT xi0, xi4, xi5, LSL #(16-SHIFT)
+ MOV xi6, xi6, ASR #SHIFT
+ PKHBT xi1, xi6, xi7, LSL #(16-SHIFT)
+ LDR xi4, [pScale, #5*32-8]
+ LDR xi5, [pScale, #5*32+4-8]
+ LDR xi6, [pScale, #3*32-8]
+ LDR xi7, [pScale, #3*32+4-8]
+ SMLAWB xi4, xi4, xi2, xit
+ SMLAWT xi5, xi5, xi2, xit
+ SMLAWB xi6, xi6, xi3, xit
+ SMLAWT xi7, xi7, xi3, xit
+ MOV xi4, xi4, ASR #SHIFT
+ PKHBT xi2, xi4, xi5, LSL #(16-SHIFT)
+ MOV xi6, xi6, ASR #SHIFT
+ PKHBT xi3, xi6, xi7, LSL #(16-SHIFT)
+ ENDIF
+
+ SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2
+ SSUB16 xi6, xi0, xi1 ;// j1-j7
+ SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2
+ SSUB16 xi4, xi2, xi3 ;// j5-j3
+
+ SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2
+
+ PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a
+ PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b
+
+ SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s]
+ SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s]
+ SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c]
+ SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c]
+
+ SMULBB xi1, xi3, LoopRR2
+ SMULTB xi3, xi3, LoopRR2
+
+ PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4
+ PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4
+ SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4
+
+ ;// xi0,xi1,xi2,xi3 now free
+ ;// IStage 4,3, rows 2to3 x1/2
+
+ MOV xi3, xi3, LSL #1
+ PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4
+ LDRD xi0, [pDest, #8] ;// j2,j6 scaled
+
+ ;// IStage 2, rows4to7
+ SSUB16 xg6, xh6, xh7
+ SSUB16 xg5, xh5, xg6
+ SSUB16 xg4, xh4, xg5
+
+ SSUB16 xi2, xi0, xi1 ;// (j2-j6)
+ SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2
+
+ SMULBB xi0, xi2, LoopRR2
+ SMULTB xi2, xi2, LoopRR2
+
+ MOV xi2, xi2, LSL #1
+ PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4
+
+ ;// xi0, xi1 now free
+ ;// IStage 4,3 rows 0to1 x 1/2
+ LDRD xi0, [pDest] ;// j0, j4 scaled
+ SSUB16 xh2, xh2, xi3
+ ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows
+
+ SHADD16 xh0, xi0, xi1
+ SHSUB16 xh1, xi0, xi1
+
+ ;// IStage 2 rows 0to3 x 1/2
+ SHSUB16 xg2, xh1, xh2
+ SHADD16 xg1, xh1, xh2
+ SHSUB16 xg3, xh0, xh3
+ SHADD16 xg0, xh0, xh3
+
+ ;// IStage 1 all rows
+ SADD16 xf3, xg3, xg4
+ SSUB16 xf4, xg3, xg4
+ SADD16 xf2, xg2, xg5
+ SSUB16 xf5, xg2, xg5
+ SADD16 xf1, xg1, xg6
+ SSUB16 xf6, xg1, xg6
+ SADD16 xf0, xg0, xg7
+ SSUB16 xf7, xg0, xg7
+
+ ;// Transpose, store and loop
+ PKHBT ra01, xf0, xf1, LSL #16
+ PKHTB rb01, xf1, xf0, ASR #16
+
+ PKHBT ra23, xf2, xf3, LSL #16
+ PKHTB rb23, xf3, xf2, ASR #16
+
+ PKHBT ra45, xf4, xf5, LSL #16
+ PKHTB rb45, xf5, xf4, ASR #16
+
+ PKHBT ra67, xf6, xf7, LSL #16
+ STMIA pDest!, {ra01, ra23, ra45, ra67}
+ PKHTB rb67, xf7, xf6, ASR #16
+ STMIA pDest!, {rb01, rb23, rb45, rb67}
+ BCC v6_idct_col$_F
+
+ SUB pSrc, pDest, #(64*2)
+ M_LDR pDest, ppDest
+ IF "$stride"="s"
+ M_LDR pScale, pStride
+ ENDIF
+ B v6_idct_row$_F
+
+v6OddZero$_F
+ SSUB16 xi2, xi6, xi7 ;// (j2-j6)
+ SHADD16 xi3, xi6, xi7 ;// (j2+j6)/2
+
+ SMULBB xi0, xi2, LoopRR2
+ SMULTB xi2, xi2, LoopRR2
+
+ MOV xi2, xi2, LSL #1
+ PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4
+ SSUB16 xh2, xh2, xi3
+
+ ;// xi0, xi1 now free
+ ;// IStage 4,3 rows 0to1 x 1/2
+
+ SHADD16 xh0, xi4, xi5
+ SHSUB16 xh1, xi4, xi5
+
+ ;// IStage 2 rows 0to3 x 1/2
+ SHSUB16 xg2, xh1, xh2
+ SHADD16 xg1, xh1, xh2
+ SHSUB16 xg3, xh0, xh3
+ SHADD16 xg0, xh0, xh3
+
+ ;// IStage 1 all rows
+ MOV xf3, xg3
+ MOV xf4, xg3
+ MOV xf2, xg2
+ MOV xf5, xg2
+ MOV xf1, xg1
+ MOV xf6, xg1
+ MOV xf0, xg0
+ MOV xf7, xg0
+
+ ;// Transpose
+ PKHBT ra01, xf0, xf1, LSL #16
+ PKHTB rb01, xf1, xf0, ASR #16
+
+ PKHBT ra23, xf2, xf3, LSL #16
+ PKHTB rb23, xf3, xf2, ASR #16
+
+ PKHBT ra45, xf4, xf5, LSL #16
+ PKHTB rb45, xf5, xf4, ASR #16
+
+ PKHBT ra67, xf6, xf7, LSL #16
+ PKHTB rb67, xf7, xf6, ASR #16
+
+ STMIA pDest!, {ra01, ra23, ra45, ra67}
+ ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows
+ STMIA pDest!, {rb01, rb23, rb45, rb67}
+
+ BCC v6_idct_col$_F
+ SUB pSrc, pDest, #(64*2)
+ M_LDR pDest, ppDest
+ IF "$stride"="s"
+ M_LDR pScale, pStride
+ ENDIF
+
+
+v6_idct_row$_F
+ ;// IStage 4,3, rows4to7 x1/4
+ LDR xit, =0x00010001 ;// rounding constant
+ LDR xi0, [pSrc, #1*16] ;// j1
+ LDR xi1, [pSrc, #7*16] ;// 4*j7
+ LDR xi2, [pSrc, #5*16] ;// j5
+ LDR xi3, [pSrc, #3*16] ;// j3
+
+ SHADD16 xi1, xi1, xit ;// 2*j7
+ SHADD16 xi1, xi1, xit ;// j7
+
+ SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2
+ SSUB16 xi6, xi0, xi1 ;// j1-j7
+ SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2
+ SSUB16 xi4, xi2, xi3 ;// j5-j3
+
+ SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2
+
+ PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a
+ PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b
+
+ SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s]
+ SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s]
+ SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c]
+ SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c]
+
+ SMULBB xi1, xi3, LoopRR2
+ SMULTB xi3, xi3, LoopRR2
+
+ PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4
+ PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4
+ SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4
+
+ MOV xi3, xi3, LSL #1
+ PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4
+
+ ;// xi0,xi1,xi2,xi3 now free
+ ;// IStage 4,3, rows 2to3 x1/2
+
+ LDR xi0, [pSrc, #2*16] ;// j2
+ LDR xi1, [pSrc, #6*16] ;// 2*j6
+
+ ;// IStage 2, rows4to7
+ SSUB16 xg6, xh6, xh7
+ SSUB16 xg5, xh5, xg6
+ SSUB16 xg4, xh4, xg5
+
+ SHADD16 xi1, xi1, xit ;// j6
+ SSUB16 xi2, xi0, xi1 ;// (j2-j6)
+ SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2
+
+ SMULBB xi0, xi2, LoopRR2
+ SMULTB xi2, xi2, LoopRR2
+
+ MOV xi2, xi2, LSL #1
+
+ PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4
+
+ ;// xi0, xi1 now free
+ ;// IStage 4,3 rows 0to1 x 1/2
+ LDR xi1, [pSrc, #4*16] ;// j4
+ LDR xi0, [pSrc], #4 ;// j0
+
+ SSUB16 xh2, xh2, xi3
+ ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows
+
+ ADD xi0, xi0, xit, LSL #2 ;// ensure correct round
+ SHADD16 xh0, xi0, xi1 ;// of DC result
+ SHSUB16 xh1, xi0, xi1
+
+ ;// IStage 2 rows 0to3 x 1/2
+ SHSUB16 xg2, xh1, xh2
+ SHADD16 xg1, xh1, xh2
+ SHSUB16 xg3, xh0, xh3
+ SHADD16 xg0, xh0, xh3
+
+ ;// IStage 1 all rows
+ SHADD16 xf3, xg3, xg4
+ SHSUB16 xf4, xg3, xg4
+ SHADD16 xf2, xg2, xg5
+ SHSUB16 xf5, xg2, xg5
+ SHADD16 xf1, xg1, xg6
+ SHSUB16 xf6, xg1, xg6
+ SHADD16 xf0, xg0, xg7
+ SHSUB16 xf7, xg0, xg7
+
+ ;// Saturate
+ IF ("$outsize"="u8")
+ USAT16 xf0, #8, xf0
+ USAT16 xf1, #8, xf1
+ USAT16 xf2, #8, xf2
+ USAT16 xf3, #8, xf3
+ USAT16 xf4, #8, xf4
+ USAT16 xf5, #8, xf5
+ USAT16 xf6, #8, xf6
+ USAT16 xf7, #8, xf7
+ ENDIF
+ IF ("$outsize"="s9")
+ SSAT16 xf0, #9, xf0
+ SSAT16 xf1, #9, xf1
+ SSAT16 xf2, #9, xf2
+ SSAT16 xf3, #9, xf3
+ SSAT16 xf4, #9, xf4
+ SSAT16 xf5, #9, xf5
+ SSAT16 xf6, #9, xf6
+ SSAT16 xf7, #9, xf7
+ ENDIF
+
+ ;// Transpose to Row, Pack and store
+ IF ("$outsize"="u8")
+ ORR xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ]
+ ORR xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ]
+ ORR xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ]
+ ORR xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ]
+ PKHBT ra01, xf0, xf2, LSL #16
+ PKHTB rb01, xf2, xf0, ASR #16
+ PKHBT ra23, xf4, xf6, LSL #16
+ PKHTB rb23, xf6, xf4, ASR #16
+ STMIA pDest, {ra01, ra23}
+ IF "$stride"="s"
+ ADD pDest, pDest, pScale
+ STMIA pDest, {rb01, rb23}
+ ADD pDest, pDest, pScale
+ ELSE
+ ADD pDest, pDest, #($stride)
+ STMIA pDest, {rb01, rb23}
+ ADD pDest, pDest, #($stride)
+ ENDIF
+ ENDIF
+ IF ("$outsize"="s9"):LOR:("$outsize"="s16")
+ PKHBT ra01, xf0, xf1, LSL #16
+ PKHTB rb01, xf1, xf0, ASR #16
+
+ PKHBT ra23, xf2, xf3, LSL #16
+ PKHTB rb23, xf3, xf2, ASR #16
+
+ PKHBT ra45, xf4, xf5, LSL #16
+ PKHTB rb45, xf5, xf4, ASR #16
+
+ PKHBT ra67, xf6, xf7, LSL #16
+ PKHTB rb67, xf7, xf6, ASR #16
+
+ STMIA pDest, {ra01, ra23, ra45, ra67}
+ IF "$stride"="s"
+ ADD pDest, pDest, pScale
+ STMIA pDest, {rb01, rb23, rb45, rb67}
+ ADD pDest, pDest, pScale
+ ELSE
+ ADD pDest, pDest, #($stride)
+ STMIA pDest, {rb01, rb23, rb45, rb67}
+ ADD pDest, pDest, #($stride)
+ ENDIF
+ ENDIF
+
+ BCC v6_idct_row$_F
+ ENDIF ;// ARM1136JS
+
+
+ IF CortexA8
+
+Src0 EQU 7
+Src1 EQU 8
+Src2 EQU 9
+Src3 EQU 10
+Src4 EQU 11
+Src5 EQU 12
+Src6 EQU 13
+Src7 EQU 14
+Tmp EQU 15
+
+qXj0 QN Src0.S16
+qXj1 QN Src1.S16
+qXj2 QN Src2.S16
+qXj3 QN Src3.S16
+qXj4 QN Src4.S16
+qXj5 QN Src5.S16
+qXj6 QN Src6.S16
+qXj7 QN Src7.S16
+qXjt QN Tmp.S16
+
+dXj0lo DN (Src0*2).S16
+dXj0hi DN (Src0*2+1).S16
+dXj1lo DN (Src1*2).S16
+dXj1hi DN (Src1*2+1).S16
+dXj2lo DN (Src2*2).S16
+dXj2hi DN (Src2*2+1).S16
+dXj3lo DN (Src3*2).S16
+dXj3hi DN (Src3*2+1).S16
+dXj4lo DN (Src4*2).S16
+dXj4hi DN (Src4*2+1).S16
+dXj5lo DN (Src5*2).S16
+dXj5hi DN (Src5*2+1).S16
+dXj6lo DN (Src6*2).S16
+dXj6hi DN (Src6*2+1).S16
+dXj7lo DN (Src7*2).S16
+dXj7hi DN (Src7*2+1).S16
+dXjtlo DN (Tmp*2).S16
+dXjthi DN (Tmp*2+1).S16
+
+qXi0 QN qXj0
+qXi1 QN qXj4
+qXi2 QN qXj2
+qXi3 QN qXj7
+qXi4 QN qXj5
+qXi5 QN qXjt
+qXi6 QN qXj1
+qXi7 QN qXj6
+qXit QN qXj3
+
+dXi0lo DN dXj0lo
+dXi0hi DN dXj0hi
+dXi1lo DN dXj4lo
+dXi1hi DN dXj4hi
+dXi2lo DN dXj2lo
+dXi2hi DN dXj2hi
+dXi3lo DN dXj7lo
+dXi3hi DN dXj7hi
+dXi4lo DN dXj5lo
+dXi4hi DN dXj5hi
+dXi5lo DN dXjtlo
+dXi5hi DN dXjthi
+dXi6lo DN dXj1lo
+dXi6hi DN dXj1hi
+dXi7lo DN dXj6lo
+dXi7hi DN dXj6hi
+dXitlo DN dXj3lo
+dXithi DN dXj3hi
+
+qXh0 QN qXit
+qXh1 QN qXi0
+qXh2 QN qXi2
+qXh3 QN qXi3
+qXh4 QN qXi7
+qXh5 QN qXi5
+qXh6 QN qXi4
+qXh7 QN qXi1
+qXht QN qXi6
+
+dXh0lo DN dXitlo
+dXh0hi DN dXithi
+dXh1lo DN dXi0lo
+dXh1hi DN dXi0hi
+dXh2lo DN dXi2lo
+dXh2hi DN dXi2hi
+dXh3lo DN dXi3lo
+dXh3hi DN dXi3hi
+dXh4lo DN dXi7lo
+dXh4hi DN dXi7hi
+dXh5lo DN dXi5lo
+dXh5hi DN dXi5hi
+dXh6lo DN dXi4lo
+dXh6hi DN dXi4hi
+dXh7lo DN dXi1lo
+dXh7hi DN dXi1hi
+dXhtlo DN dXi6lo
+dXhthi DN dXi6hi
+
+qXg0 QN qXh2
+qXg1 QN qXht
+qXg2 QN qXh1
+qXg3 QN qXh0
+qXg4 QN qXh4
+qXg5 QN qXh5
+qXg6 QN qXh6
+qXg7 QN qXh7
+qXgt QN qXh3
+
+qXf0 QN qXg6
+qXf1 QN qXg5
+qXf2 QN qXg4
+qXf3 QN qXgt
+qXf4 QN qXg3
+qXf5 QN qXg2
+qXf6 QN qXg1
+qXf7 QN qXg0
+qXft QN qXg7
+
+
+qXt0 QN 1.S32
+qXt1 QN 2.S32
+qT0lo QN 1.S32
+qT0hi QN 2.S32
+qT1lo QN 3.S32
+qT1hi QN 4.S32
+qScalelo QN 5.S32 ;// used to read post scale values
+qScalehi QN 6.S32
+qTemp0 QN 5.S32
+qTemp1 QN 6.S32
+
+
+Scale1 EQU 6
+Scale2 EQU 15
+qScale1 QN Scale1.S16
+qScale2 QN Scale2.S16
+dScale1lo DN (Scale1*2).S16
+dScale1hi DN (Scale1*2+1).S16
+dScale2lo DN (Scale2*2).S16
+dScale2hi DN (Scale2*2+1).S16
+
+dCoefs DN 0.S16 ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]}
+InvSqrt2 DN dCoefs[0] ;// 1/sqrt(2) in Q15
+S DN dCoefs[1] ;// Sin(PI/8) in Q15
+C DN dCoefs[2] ;// Cos(PI/8) in Q15
+
+pTemp RN 12
+
+
+ IMPORT armCOMM_IDCTCoef
+
+ VLD1 {qXj0,qXj1}, [pSrc @64]!
+ VLD1 {qXj2,qXj3}, [pSrc @64]!
+ VLD1 {qXj4,qXj5}, [pSrc @64]!
+ VLD1 {qXj6,qXj7}, [pSrc @64]!
+
+ ;// Load PreScale and multiply with Src
+ ;// IStage 4
+
+ IF "$inscale"="s16" ;// 16X16 Mul
+ M_IDCT_PRESCALE16
+ ENDIF
+
+ IF "$inscale"="s32" ;// 32X32 ,ul
+ M_IDCT_PRESCALE32
+ ENDIF
+
+ ;// IStage 3
+ VQRDMULH qXi2, qXi2, InvSqrt2 ;// i2/sqrt(2)
+ VHADD qXh0, qXi0, qXi1 ;// (i0+i1)/2
+ VHSUB qXh1, qXi0, qXi1 ;// (i0-i1)/2
+ VHADD qXh7, qXi5, qXi7 ;// (i5+i7)/4
+ VSUB qXh5, qXi5, qXi7 ;// (i5-i7)/2
+ VQRDMULH qXh5, qXh5, InvSqrt2 ;// h5/sqrt(2)
+ VSUB qXh2, qXi2, qXi3 ;// h2, h3
+
+ VMULL qXt0, dXi4lo, C ;// c*i4
+ VMLAL qXt0, dXi6lo, S ;// c*i4+s*i6
+ VMULL qXt1, dXi4hi, C
+ VMLAL qXt1, dXi6hi, S
+ VSHRN dXh4lo, qXt0, #16 ;// h4
+ VSHRN dXh4hi, qXt1, #16
+
+ VMULL qXt0, dXi6lo, C ;// c*i6
+ VMLSL qXt0, dXi4lo, S ;// -s*i4 + c*h6
+ VMULL qXt1, dXi6hi, C
+ VMLSL qXt1, dXi4hi, S
+ VSHRN dXh6lo, qXt0, #16 ;// h6
+ VSHRN dXh6hi, qXt1, #16
+
+ ;// IStage 2
+ VSUB qXg6, qXh6, qXh7
+ VSUB qXg5, qXh5, qXg6
+ VSUB qXg4, qXh4, qXg5
+ VHADD qXg1, qXh1, qXh2 ;// (h1+h2)/2
+ VHSUB qXg2, qXh1, qXh2 ;// (h1-h2)/2
+ VHADD qXg0, qXh0, qXh3 ;// (h0+h3)/2
+ VHSUB qXg3, qXh0, qXh3 ;// (h0-h3)/2
+
+ ;// IStage 1 all rows
+ VADD qXf3, qXg3, qXg4
+ VSUB qXf4, qXg3, qXg4
+ VADD qXf2, qXg2, qXg5
+ VSUB qXf5, qXg2, qXg5
+ VADD qXf1, qXg1, qXg6
+ VSUB qXf6, qXg1, qXg6
+ VADD qXf0, qXg0, qXg7
+ VSUB qXf7, qXg0, qXg7
+
+ ;// Transpose, store and loop
+XTR0 EQU Src5
+XTR1 EQU Tmp
+XTR2 EQU Src6
+XTR3 EQU Src7
+XTR4 EQU Src3
+XTR5 EQU Src0
+XTR6 EQU Src1
+XTR7 EQU Src2
+XTRt EQU Src4
+
+qA0 QN XTR0.S32 ;// for XTRpose
+qA1 QN XTR1.S32
+qA2 QN XTR2.S32
+qA3 QN XTR3.S32
+qA4 QN XTR4.S32
+qA5 QN XTR5.S32
+qA6 QN XTR6.S32
+qA7 QN XTR7.S32
+
+dB0 DN XTR0*2+1 ;// for using VSWP
+dB1 DN XTR1*2+1
+dB2 DN XTR2*2+1
+dB3 DN XTR3*2+1
+dB4 DN XTR4*2
+dB5 DN XTR5*2
+dB6 DN XTR6*2
+dB7 DN XTR7*2
+
+
+ VTRN qXf0, qXf1
+ VTRN qXf2, qXf3
+ VTRN qXf4, qXf5
+ VTRN qXf6, qXf7
+ VTRN qA0, qA2
+ VTRN qA1, qA3
+ VTRN qA4, qA6
+ VTRN qA5, qA7
+ VSWP dB0, dB4
+ VSWP dB1, dB5
+ VSWP dB2, dB6
+ VSWP dB3, dB7
+
+
+qYj0 QN qXf0
+qYj1 QN qXf1
+qYj2 QN qXf2
+qYj3 QN qXf3
+qYj4 QN qXf4
+qYj5 QN qXf5
+qYj6 QN qXf6
+qYj7 QN qXf7
+qYjt QN qXft
+
+dYj0lo DN (XTR0*2).S16
+dYj0hi DN (XTR0*2+1).S16
+dYj1lo DN (XTR1*2).S16
+dYj1hi DN (XTR1*2+1).S16
+dYj2lo DN (XTR2*2).S16
+dYj2hi DN (XTR2*2+1).S16
+dYj3lo DN (XTR3*2).S16
+dYj3hi DN (XTR3*2+1).S16
+dYj4lo DN (XTR4*2).S16
+dYj4hi DN (XTR4*2+1).S16
+dYj5lo DN (XTR5*2).S16
+dYj5hi DN (XTR5*2+1).S16
+dYj6lo DN (XTR6*2).S16
+dYj6hi DN (XTR6*2+1).S16
+dYj7lo DN (XTR7*2).S16
+dYj7hi DN (XTR7*2+1).S16
+dYjtlo DN (XTRt*2).S16
+dYjthi DN (XTRt*2+1).S16
+
+qYi0 QN qYj0
+qYi1 QN qYj4
+qYi2 QN qYj2
+qYi3 QN qYj7
+qYi4 QN qYj5
+qYi5 QN qYjt
+qYi6 QN qYj1
+qYi7 QN qYj6
+qYit QN qYj3
+
+dYi0lo DN dYj0lo
+dYi0hi DN dYj0hi
+dYi1lo DN dYj4lo
+dYi1hi DN dYj4hi
+dYi2lo DN dYj2lo
+dYi2hi DN dYj2hi
+dYi3lo DN dYj7lo
+dYi3hi DN dYj7hi
+dYi4lo DN dYj5lo
+dYi4hi DN dYj5hi
+dYi5lo DN dYjtlo
+dYi5hi DN dYjthi
+dYi6lo DN dYj1lo
+dYi6hi DN dYj1hi
+dYi7lo DN dYj6lo
+dYi7hi DN dYj6hi
+dYitlo DN dYj3lo
+dYithi DN dYj3hi
+
+qYh0 QN qYit
+qYh1 QN qYi0
+qYh2 QN qYi2
+qYh3 QN qYi3
+qYh4 QN qYi7
+qYh5 QN qYi5
+qYh6 QN qYi4
+qYh7 QN qYi1
+qYht QN qYi6
+
+dYh0lo DN dYitlo
+dYh0hi DN dYithi
+dYh1lo DN dYi0lo
+dYh1hi DN dYi0hi
+dYh2lo DN dYi2lo
+dYh2hi DN dYi2hi
+dYh3lo DN dYi3lo
+dYh3hi DN dYi3hi
+dYh4lo DN dYi7lo
+dYh4hi DN dYi7hi
+dYh5lo DN dYi5lo
+dYh5hi DN dYi5hi
+dYh6lo DN dYi4lo
+dYh6hi DN dYi4hi
+dYh7lo DN dYi1lo
+dYh7hi DN dYi1hi
+dYhtlo DN dYi6lo
+dYhthi DN dYi6hi
+
+qYg0 QN qYh2
+qYg1 QN qYht
+qYg2 QN qYh1
+qYg3 QN qYh0
+qYg4 QN qYh4
+qYg5 QN qYh5
+qYg6 QN qYh6
+qYg7 QN qYh7
+qYgt QN qYh3
+
+qYf0 QN qYg6
+qYf1 QN qYg5
+qYf2 QN qYg4
+qYf3 QN qYgt
+qYf4 QN qYg3
+qYf5 QN qYg2
+qYf6 QN qYg1
+qYf7 QN qYg0
+qYft QN qYg7
+
+ VRSHR qYj7, qYj7, #2
+ VRSHR qYj6, qYj6, #1
+
+ VHADD qYi5, qYj1, qYj7 ;// i5 = (j1+j7)/2
+ VSUB qYi6, qYj1, qYj7 ;// i6 = j1-j7
+ VHADD qYi3, qYj2, qYj6 ;// i3 = (j2+j6)/2
+ VSUB qYi2, qYj2, qYj6 ;// i2 = j2-j6
+ VHADD qYi7, qYj5, qYj3 ;// i7 = (j5+j3)/2
+ VSUB qYi4, qYj5, qYj3 ;// i4 = j5-j3
+
+ VQRDMULH qYi2, qYi2, InvSqrt2 ;// i2/sqrt(2)
+ ;// IStage 4,3 rows 0to1 x 1/2
+
+ MOV pTemp, #0x4 ;// ensure correct round
+ VDUP qScale1, pTemp ;// of DC result
+ VADD qYi0, qYi0, qScale1
+
+ VHADD qYh0, qYi0, qYi1 ;// (i0+i1)/2
+ VHSUB qYh1, qYi0, qYi1 ;// (i0-i1)/2
+
+ VHADD qYh7, qYi5, qYi7 ;// (i5+i7)/4
+ VSUB qYh5, qYi5, qYi7 ;// (i5-i7)/2
+ VSUB qYh2, qYi2, qYi3 ;// h2, h3
+ VQRDMULH qYh5, qYh5, InvSqrt2 ;// h5/sqrt(2)
+
+ VMULL qXt0, dYi4lo, C ;// c*i4
+ VMLAL qXt0, dYi6lo, S ;// c*i4+s*i6
+ VMULL qXt1, dYi4hi, C
+ VMLAL qXt1, dYi6hi, S
+ VSHRN dYh4lo, qXt0, #16 ;// h4
+ VSHRN dYh4hi, qXt1, #16
+
+ VMULL qXt0, dYi6lo, C ;// c*i6
+ VMLSL qXt0, dYi4lo, S ;// -s*i4 + c*h6
+ VMULL qXt1, dYi6hi, C
+ VMLSL qXt1, dYi4hi, S
+ VSHRN dYh6lo, qXt0, #16 ;// h6
+ VSHRN dYh6hi, qXt1, #16
+
+ VSUB qYg6, qYh6, qYh7
+ VSUB qYg5, qYh5, qYg6
+ VSUB qYg4, qYh4, qYg5
+
+ ;// IStage 2 rows 0to3 x 1/2
+ VHADD qYg1, qYh1, qYh2 ;// (h1+h2)/2
+ VHSUB qYg2, qYh1, qYh2 ;// (h1-h2)/2
+ VHADD qYg0, qYh0, qYh3 ;// (h0+h3)/2
+ VHSUB qYg3, qYh0, qYh3 ;// (h0-h3)/2
+
+
+ ;// IStage 1 all rows
+ VHADD qYf3, qYg3, qYg4
+ VHSUB qYf4, qYg3, qYg4
+ VHADD qYf2, qYg2, qYg5
+ VHSUB qYf5, qYg2, qYg5
+ VHADD qYf1, qYg1, qYg6
+ VHSUB qYf6, qYg1, qYg6
+ VHADD qYf0, qYg0, qYg7
+ VHSUB qYf7, qYg0, qYg7
+
+YTR0 EQU Src0
+YTR1 EQU Src4
+YTR2 EQU Src1
+YTR3 EQU Src2
+YTR4 EQU Src7
+YTR5 EQU Src5
+YTR6 EQU Tmp
+YTR7 EQU Src6
+YTRt EQU Src3
+
+qC0 QN YTR0.S32 ;// for YTRpose
+qC1 QN YTR1.S32
+qC2 QN YTR2.S32
+qC3 QN YTR3.S32
+qC4 QN YTR4.S32
+qC5 QN YTR5.S32
+qC6 QN YTR6.S32
+qC7 QN YTR7.S32
+
+dD0 DN YTR0*2+1 ;// for using VSWP
+dD1 DN YTR1*2+1
+dD2 DN YTR2*2+1
+dD3 DN YTR3*2+1
+dD4 DN YTR4*2
+dD5 DN YTR5*2
+dD6 DN YTR6*2
+dD7 DN YTR7*2
+
+ VTRN qYf0, qYf1
+ VTRN qYf2, qYf3
+ VTRN qYf4, qYf5
+ VTRN qYf6, qYf7
+ VTRN qC0, qC2
+ VTRN qC1, qC3
+ VTRN qC4, qC6
+ VTRN qC5, qC7
+ VSWP dD0, dD4
+ VSWP dD1, dD5
+ VSWP dD2, dD6
+ VSWP dD3, dD7
+
+
+dYf0U8 DN YTR0*2.U8
+dYf1U8 DN YTR1*2.U8
+dYf2U8 DN YTR2*2.U8
+dYf3U8 DN YTR3*2.U8
+dYf4U8 DN YTR4*2.U8
+dYf5U8 DN YTR5*2.U8
+dYf6U8 DN YTR6*2.U8
+dYf7U8 DN YTR7*2.U8
+
+ ;//
+ ;// Do saturation if outsize is other than S16
+ ;//
+
+ IF ("$outsize"="u8")
+ ;// Output range [0-255]
+ VQMOVN dYf0U8, qYf0
+ VQMOVN dYf1U8, qYf1
+ VQMOVN dYf2U8, qYf2
+ VQMOVN dYf3U8, qYf3
+ VQMOVN dYf4U8, qYf4
+ VQMOVN dYf5U8, qYf5
+ VQMOVN dYf6U8, qYf6
+ VQMOVN dYf7U8, qYf7
+ ENDIF
+
+ IF ("$outsize"="s9")
+ ;// Output range [-256 to +255]
+ VQSHL qYf0, qYf0, #16-9
+ VQSHL qYf1, qYf1, #16-9
+ VQSHL qYf2, qYf2, #16-9
+ VQSHL qYf3, qYf3, #16-9
+ VQSHL qYf4, qYf4, #16-9
+ VQSHL qYf5, qYf5, #16-9
+ VQSHL qYf6, qYf6, #16-9
+ VQSHL qYf7, qYf7, #16-9
+
+ VSHR qYf0, qYf0, #16-9
+ VSHR qYf1, qYf1, #16-9
+ VSHR qYf2, qYf2, #16-9
+ VSHR qYf3, qYf3, #16-9
+ VSHR qYf4, qYf4, #16-9
+ VSHR qYf5, qYf5, #16-9
+ VSHR qYf6, qYf6, #16-9
+ VSHR qYf7, qYf7, #16-9
+ ENDIF
+
+ ;// Store output depending on the Stride size
+ IF "$stride"="s"
+ VST1 qYf0, [pDest @64], Stride
+ VST1 qYf1, [pDest @64], Stride
+ VST1 qYf2, [pDest @64], Stride
+ VST1 qYf3, [pDest @64], Stride
+ VST1 qYf4, [pDest @64], Stride
+ VST1 qYf5, [pDest @64], Stride
+ VST1 qYf6, [pDest @64], Stride
+ VST1 qYf7, [pDest @64]
+ ELSE
+ IF ("$outsize"="u8")
+ VST1 dYf0U8, [pDest @64], #8
+ VST1 dYf1U8, [pDest @64], #8
+ VST1 dYf2U8, [pDest @64], #8
+ VST1 dYf3U8, [pDest @64], #8
+ VST1 dYf4U8, [pDest @64], #8
+ VST1 dYf5U8, [pDest @64], #8
+ VST1 dYf6U8, [pDest @64], #8
+ VST1 dYf7U8, [pDest @64]
+ ELSE
+ ;// ("$outsize"="s9") or ("$outsize"="s16")
+ VST1 qYf0, [pDest @64], #16
+ VST1 qYf1, [pDest @64], #16
+ VST1 qYf2, [pDest @64], #16
+ VST1 qYf3, [pDest @64], #16
+ VST1 qYf4, [pDest @64], #16
+ VST1 qYf5, [pDest @64], #16
+ VST1 qYf6, [pDest @64], #16
+ VST1 qYf7, [pDest @64]
+ ENDIF
+
+ ENDIF
+
+
+
+ ENDIF ;// CortexA8
+
+
+
+ MEND
+
+ ;// Scale TWO input rows with TWO rows of 16 bit scale values
+ ;//
+ ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row
+ ;// input (Eight input values) with one row of scale values. Also
+ ;// Loads next scale values from pScale, if $LastRow flag is not set.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// $dAlo - Input D register with first four S16 values of row n
+ ;// $dAhi - Input D register with next four S16 values of row n
+ ;// $dBlo - Input D register with first four S16 values of row n+1
+ ;// $dBhi - Input D register with next four S16 values of row n+1
+ ;// pScale - Pointer to next row of scale values
+ ;// qT0lo - Temporary scratch register
+ ;// qT0hi - Temporary scratch register
+ ;// qT1lo - Temporary scratch register
+ ;// qT1hi - Temporary scratch register
+ ;// dScale1lo - Scale value of row n
+ ;// dScale1hi - Scale value of row n
+ ;// dScale2lo - Scale value of row n+1
+ ;// dScale2hi - Scale value of row n+1
+ ;//
+ ;// Input Flag
+ ;//
+ ;// $LastRow - Flag to indicate whether current row is last row
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// $dAlo - Scaled output values (first four S16 of row n)
+ ;// $dAhi - Scaled output values (next four S16 of row n)
+ ;// $dBlo - Scaled output values (first four S16 of row n+1)
+ ;// $dBhi - Scaled output values (next four S16 of row n+1)
+ ;// qScale1 - Scale values for next row
+ ;// qScale2 - Scale values for next row+1
+ ;// pScale - Pointer to next row of scale values
+ ;//
+ MACRO
+ M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow
+ VMULL qT0lo, $dAlo, dScale1lo
+ VMULL qT0hi, $dAhi, dScale1hi
+ VMULL qT1lo, $dBlo, dScale2lo
+ VMULL qT1hi, $dBhi, dScale2hi
+ IF "$LastRow"="0"
+ VLD1 qScale1, [pScale], #16 ;// Load scale for row n+1
+ VLD1 qScale2, [pScale], #16 ;// Load scale for row n+2
+ ENDIF
+ VQRSHRN $dAlo, qT0lo, #12
+ VQRSHRN $dAhi, qT0hi, #12
+ VQRSHRN $dBlo, qT1lo, #12
+ VQRSHRN $dBhi, qT1hi, #12
+ MEND
+
+ ;// Scale 8x8 block input values with 16 bit scale values
+ ;//
+ ;// This macro is used to pre-scale block of 8x8 input.
+ ;// This also do the Ist stage transformations of IDCT.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// dXjnlo - n th input D register with first four S16 values
+ ;// dXjnhi - n th input D register with next four S16 values
+ ;// qXjn - n th input Q register with eight S16 values
+ ;// pScale - Pointer to scale values
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// qXin - n th output Q register with eight S16 output values of 1st stage
+ ;//
+ MACRO
+ M_IDCT_PRESCALE16
+ VLD1 qScale1, [pScale], #16 ;// Load Pre scale for row 0
+ VLD1 qScale2, [pScale], #16 ;// Load Pre scale for row 0
+ M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0 ;// Pre scale row 0 & 1
+ M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0
+ M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0
+ M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1
+ VHADD qXi5, qXj1, qXj7 ;// (j1+j7)/2
+ VSUB qXi6, qXj1, qXj7 ;// j1-j7
+ LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants
+ VHADD qXi3, qXj2, qXj6 ;// (j2+j6)/2
+ VSUB qXi2, qXj2, qXj6 ;// j2-j6
+ VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants
+ VHADD qXi7, qXj5, qXj3 ;// (j5+j3)/2
+ VSUB qXi4, qXj5, qXj3 ;// j5-j3
+ MEND
+
+
+ ;// Scale 8x8 block input values with 32 bit scale values
+ ;//
+ ;// This macro is used to pre-scale block of 8x8 input.
+ ;// This also do the Ist stage transformations of IDCT.
+ ;//
+ ;// Input Registers:
+ ;//
+ ;// dXjnlo - n th input D register with first four S16 values
+ ;// dXjnhi - n th input D register with next four S16 values
+ ;// qXjn - n th input Q register with eight S16 values
+ ;// pScale - Pointer to 32bit scale values in Q23 format
+ ;//
+ ;// Output Registers:
+ ;//
+ ;// dXinlo - n th output D register with first four S16 output values of 1st stage
+ ;// dXinhi - n th output D register with next four S16 output values of 1st stage
+ ;//
+ MACRO
+ M_IDCT_PRESCALE32
+qScale0lo QN 0.S32
+qScale0hi QN 1.S32
+qScale1lo QN 2.S32
+qScale1hi QN 3.S32
+qScale2lo QN qScale1lo
+qScale2hi QN qScale1hi
+qScale3lo QN qScale1lo
+qScale3hi QN qScale1hi
+qScale4lo QN qScale1lo
+qScale4hi QN qScale1hi
+qScale5lo QN qScale0lo
+qScale5hi QN qScale0hi
+qScale6lo QN qScale0lo
+qScale6hi QN qScale0hi
+qScale7lo QN qScale0lo
+qScale7hi QN qScale0hi
+
+qSrc0lo QN 4.S32
+qSrc0hi QN 5.S32
+qSrc1lo QN 6.S32
+qSrc1hi QN Src4.S32
+qSrc2lo QN qSrc0lo
+qSrc2hi QN qSrc0hi
+qSrc3lo QN qSrc0lo
+qSrc3hi QN qSrc0hi
+qSrc4lo QN qSrc0lo
+qSrc4hi QN qSrc0hi
+qSrc5lo QN qSrc1lo
+qSrc5hi QN qSrc1hi
+qSrc6lo QN qSrc1lo
+qSrc6hi QN qSrc1hi
+qSrc7lo QN qSrc0lo
+qSrc7hi QN qSrc0hi
+
+qRes17lo QN qScale0lo
+qRes17hi QN qScale0hi
+qRes26lo QN qScale0lo
+qRes26hi QN qScale0hi
+qRes53lo QN qScale0lo
+qRes53hi QN qScale0hi
+
+ ADD pTemp, pScale, #4*8*7 ;// Address of pScale[7]
+
+ ;// Row 0
+ VLD1 {qScale0lo, qScale0hi}, [pScale]!
+ VSHLL qSrc0lo, dXj0lo, #(12-1)
+ VSHLL qSrc0hi, dXj0hi, #(12-1)
+ VLD1 {qScale1lo, qScale1hi}, [pScale]!
+ VQRDMULH qSrc0lo, qScale0lo, qSrc0lo
+ VQRDMULH qSrc0hi, qScale0hi, qSrc0hi
+ VLD1 {qScale7lo, qScale7hi}, [pTemp]!
+ VSHLL qSrc1lo, dXj1lo, #(12-1)
+ VSHLL qSrc1hi, dXj1hi, #(12-1)
+ VMOVN dXi0lo, qSrc0lo ;// Output i0
+ VMOVN dXi0hi, qSrc0hi
+ VSHLL qSrc7lo, dXj7lo, #(12-1)
+ VSHLL qSrc7hi, dXj7hi, #(12-1)
+ SUB pTemp, pTemp, #((16*2)+(4*8*1))
+ VQRDMULH qSrc1lo, qScale1lo, qSrc1lo
+ VQRDMULH qSrc1hi, qScale1hi, qSrc1hi
+ VQRDMULH qSrc7lo, qScale7lo, qSrc7lo
+ VQRDMULH qSrc7hi, qScale7hi, qSrc7hi
+ VLD1 {qScale2lo, qScale2hi}, [pScale]!
+
+ ;// Row 1 & 7
+ VHADD qRes17lo, qSrc1lo, qSrc7lo ;// (j1+j7)/2
+ VHADD qRes17hi, qSrc1hi, qSrc7hi ;// (j1+j7)/2
+ VMOVN dXi5lo, qRes17lo ;// Output i5
+ VMOVN dXi5hi, qRes17hi
+ VSUB qRes17lo, qSrc1lo, qSrc7lo ;// j1-j7
+ VSUB qRes17hi, qSrc1hi, qSrc7hi ;// j1-j7
+ VMOVN dXi6lo, qRes17lo ;// Output i6
+ VMOVN dXi6hi, qRes17hi
+ VSHLL qSrc2lo, dXj2lo, #(12-1)
+ VSHLL qSrc2hi, dXj2hi, #(12-1)
+ VLD1 {qScale6lo, qScale6hi}, [pTemp]!
+ VSHLL qSrc6lo, dXj6lo, #(12-1)
+ VSHLL qSrc6hi, dXj6hi, #(12-1)
+ SUB pTemp, pTemp, #((16*2)+(4*8*1))
+ VQRDMULH qSrc2lo, qScale2lo, qSrc2lo
+ VQRDMULH qSrc2hi, qScale2hi, qSrc2hi
+ VQRDMULH qSrc6lo, qScale6lo, qSrc6lo
+ VQRDMULH qSrc6hi, qScale6hi, qSrc6hi
+ VLD1 {qScale3lo, qScale3hi}, [pScale]!
+
+ ;// Row 2 & 6
+ VHADD qRes26lo, qSrc2lo, qSrc6lo ;// (j2+j6)/2
+ VHADD qRes26hi, qSrc2hi, qSrc6hi ;// (j2+j6)/2
+ VMOVN dXi3lo, qRes26lo ;// Output i3
+ VMOVN dXi3hi, qRes26hi
+ VSUB qRes26lo, qSrc2lo, qSrc6lo ;// j2-j6
+ VSUB qRes26hi, qSrc2hi, qSrc6hi ;// j2-j6
+ VMOVN dXi2lo, qRes26lo ;// Output i2
+ VMOVN dXi2hi, qRes26hi
+ VSHLL qSrc3lo, dXj3lo, #(12-1)
+ VSHLL qSrc3hi, dXj3hi, #(12-1)
+ VLD1 {qScale5lo, qScale5hi}, [pTemp]!
+ VSHLL qSrc5lo, dXj5lo, #(12-1)
+ VSHLL qSrc5hi, dXj5hi, #(12-1)
+ VQRDMULH qSrc3lo, qScale3lo, qSrc3lo
+ VQRDMULH qSrc3hi, qScale3hi, qSrc3hi
+ VQRDMULH qSrc5lo, qScale5lo, qSrc5lo
+ VQRDMULH qSrc5hi, qScale5hi, qSrc5hi
+
+ ;// Row 3 & 5
+ VHADD qRes53lo, qSrc5lo, qSrc3lo ;// (j5+j3)/2
+ VHADD qRes53hi, qSrc5hi, qSrc3hi ;// (j5+j3)/2
+ SUB pSrc, pSrc, #16*2*2
+ VMOVN dXi7lo, qRes53lo ;// Output i7
+ VMOVN dXi7hi, qRes53hi
+ VSUB qRes53lo, qSrc5lo, qSrc3lo ;// j5-j3
+ VSUB qRes53hi, qSrc5hi, qSrc3hi ;// j5-j3
+ VLD1 qXj4, [pSrc @64]
+ VMOVN dXi4lo, qRes53lo ;// Output i4
+ VMOVN dXi4hi, qRes53hi
+ VSHLL qSrc4lo, dXj4lo, #(12-1)
+ VSHLL qSrc4hi, dXj4hi, #(12-1)
+ VLD1 {qScale4lo, qScale4hi}, [pScale]
+ LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants
+ VQRDMULH qSrc4lo, qScale4lo, qSrc4lo
+ VQRDMULH qSrc4hi, qScale4hi, qSrc4hi
+ VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants
+ ;// Row 4
+ VMOVN dXi1lo, qSrc4lo ;// Output i1
+ VMOVN dXi1hi, qSrc4hi
+
+ MEND
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h
new file mode 100644
index 0000000..b5da9dc
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h
@@ -0,0 +1,27 @@
+/**
+ *
+ * File Name: armCOMM_MaskTable.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Mask Table to mask the end of array
+ */
+
+
+
+#ifndef _ARMCOMM_MASKTABLE_H_
+#define _ARMCOMM_MASKTABLE_H_
+
+#define MaskTableSize 72
+
+/* Mask table */
+
+extern const OMX_U16 armCOMM_qMaskTable16[MaskTableSize];
+extern const OMX_U8 armCOMM_qMaskTable8[MaskTableSize];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h
new file mode 100644
index 0000000..13e5b2b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h
@@ -0,0 +1,43 @@
+/* Guard the header against multiple inclusion. */
+#ifndef __ARM_COMM_VERSION_H__
+#define __ARM_COMM_VERSION_H__
+
+
+/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */
+#define OMX_VERSION 102
+
+/* We need to define these macros in order to convert a #define number into a #define string. */
+#define ARM_QUOTE(a) #a
+#define ARM_INDIRECT(A) ARM_QUOTE(A)
+
+/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */
+#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION)
+
+
+/* Define this in order to turn on ARM version/release/build strings in each domain */
+#define ARM_INCLUDE_VERSION_DESCRIPTIONS
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+ extern const char * const omxAC_VersionDescription;
+ extern const char * const omxIC_VersionDescription;
+ extern const char * const omxIP_VersionDescription;
+ extern const char * const omxSP_VersionDescription;
+ extern const char * const omxVC_VersionDescription;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */
+
+
+/* The following entries should be automatically updated by the release script */
+/* They are used in the ARM version strings defined for each domain. */
+
+/* The release tag associated with this release of the library. - used for source and object releases */
+#define OMX_ARM_RELEASE_TAG "r0p0-00bet1"
+
+/* The ARM architecture used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_ARCHITECTURE "ARM Architecture V6"
+
+/* The ARM Toolchain used to build any objects or executables in this release. */
+#define OMX_ARM_BUILD_TOOLCHAIN "ARM RVCT 3.1"
+
+
+#endif /* __ARM_COMM_VERSION_H__ */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h
new file mode 100644
index 0000000..2df1fc8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h
@@ -0,0 +1,1154 @@
+;//
+;//
+;// File Name: armCOMM_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// ARM optimized OpenMAX common header file
+;//
+
+;// Protect against multiple inclusion
+ IF :LNOT::DEF:ARMCOMM_S_H
+ GBLL ARMCOMM_S_H
+
+ REQUIRE8 ;// Requires 8-byte stack alignment
+ PRESERVE8 ;// Preserves 8-byte stack alignment
+
+ GBLL ARM_ERRORCHECK
+ARM_ERRORCHECK SETL {FALSE}
+
+;// Globals
+
+ GBLS _RRegList ;// R saved register list
+ GBLS _DRegList ;// D saved register list
+ GBLS _Variant ;// Selected processor variant
+ GBLS _CPU ;// CPU name
+ GBLS _Struct ;// Structure name
+
+ GBLL _InFunc ;// Inside function assembly flag
+ GBLL _SwLong ;// Long switch flag
+
+ GBLA _RBytes ;// Number of register bytes on stack
+ GBLA _SBytes ;// Number of scratch bytes on stack
+ GBLA _ABytes ;// Stack offset of next argument
+ GBLA _Workspace ;// Stack offset of scratch workspace
+ GBLA _F ;// Function number
+ GBLA _StOff ;// Struct offset
+ GBLA _SwNum ;// Switch number
+ GBLS _32 ;// Suffix for 32 byte alignmnet
+ GBLS _16 ;// Suffix for 16 byte alignmnet
+
+_InFunc SETL {FALSE}
+_SBytes SETA 0
+_F SETA 0
+_SwNum SETA 0
+_32 SETS "ALIGN32"
+_16 SETS "ALIGN16"
+
+;/////////////////////////////////////////////////////////
+;// Override the tools settings of the CPU if the #define
+;// USECPU is set, otherwise use the CPU defined by the
+;// assembler settings.
+;/////////////////////////////////////////////////////////
+
+ IF :DEF: OVERRIDECPU
+_CPU SETS OVERRIDECPU
+ ELSE
+_CPU SETS {CPU}
+ ENDIF
+
+
+
+;/////////////////////////////////////////////////////////
+;// Work out which code to build
+;/////////////////////////////////////////////////////////
+
+ IF :DEF:ARM1136JS:LOR::DEF:CortexA8:LOR::DEF:ARM_GENERIC
+ INFO 1,"Please switch to using M_VARIANTS"
+ ENDIF
+
+ ;// Define and reset all officially recongnised variants
+ MACRO
+ _M_DEF_VARIANTS
+ _M_DEF_VARIANT ARM926EJS
+ _M_DEF_VARIANT ARM1136JS
+ _M_DEF_VARIANT ARM1136JS_U
+ _M_DEF_VARIANT CortexA8
+ _M_DEF_VARIANT ARM7TDMI
+ MEND
+
+ MACRO
+ _M_DEF_VARIANT $var
+ GBLL $var
+ GBLL _ok$var
+$var SETL {FALSE}
+ MEND
+
+
+ ;// Variant declaration
+ ;//
+ ;// Define a list of code variants supported by this
+ ;// source file. This macro then chooses the most
+ ;// appropriate variant to build for the currently configured
+ ;// core.
+ ;//
+ MACRO
+ M_VARIANTS $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
+ ;// Set to TRUE variants that are supported
+ _M_DEF_VARIANTS
+ _M_VARIANT $v0
+ _M_VARIANT $v1
+ _M_VARIANT $v2
+ _M_VARIANT $v3
+ _M_VARIANT $v4
+ _M_VARIANT $v5
+ _M_VARIANT $v6
+ _M_VARIANT $v7
+
+ ;// Look for first available variant to match a CPU
+ ;// _M_TRY cpu, variant fall back list
+_Variant SETS ""
+ _M_TRY ARM926EJ-S, ARM926EJS
+ _M_TRY ARM1176JZ-S, ARM1136JS
+ _M_TRY ARM1176JZF-S, ARM1136JS
+ _M_TRY ARM1156T2-S, ARM1136JS
+ _M_TRY ARM1156T2F-S, ARM1136JS
+ _M_TRY ARM1136J-S, ARM1136JS
+ _M_TRY ARM1136JF-S, ARM1136JS
+ _M_TRY MPCore, ARM1136JS
+ _M_TRY Cortex-A8, CortexA8, ARM1136JS
+ _M_TRY Cortex-R4, ARM1136JS
+ _M_TRY ARM7TDMI
+
+ ;// Select the correct variant
+ _M_DEF_VARIANTS
+ IF _Variant=""
+ INFO 1, "No match found for CPU '$_CPU'"
+ ELSE
+$_Variant SETL {TRUE}
+ ENDIF
+ MEND
+
+ ;// Register a variant as available
+ MACRO
+ _M_VARIANT $var
+ IF "$var"=""
+ MEXIT
+ ENDIF
+ IF :LNOT::DEF:_ok$var
+ INFO 1, "Unrecognized variant '$var'"
+ ENDIF
+$var SETL {TRUE}
+ MEND
+
+ ;// For a given CPU, see if any of the variants supporting
+ ;// this CPU are available. The first available variant is
+ ;// chosen
+ MACRO
+ _M_TRY $cpu, $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7
+ IF "$cpu"<>_CPU
+ MEXIT
+ ENDIF
+ _M_TRY1 $v0
+ _M_TRY1 $v1
+ _M_TRY1 $v2
+ _M_TRY1 $v3
+ _M_TRY1 $v4
+ _M_TRY1 $v5
+ _M_TRY1 $v6
+ _M_TRY1 $v7
+ ;// Check a match was found
+ IF _Variant=""
+ INFO 1, "No variant match found for CPU '$_CPU'"
+ ENDIF
+ MEND
+
+ MACRO
+ _M_TRY1 $var
+ IF "$var"=""
+ MEXIT
+ ENDIF
+ IF (_Variant=""):LAND:$var
+_Variant SETS "$var"
+ ENDIF
+ MEND
+
+;////////////////////////////////////////////////////////
+;// Structure definition
+;////////////////////////////////////////////////////////
+
+ ;// Declare a structure of given name
+ MACRO
+ M_STRUCT $sname
+_Struct SETS "$sname"
+_StOff SETA 0
+ MEND
+
+ ;// Declare a structure field
+ ;// The field is called $sname_$fname
+ ;// $size = the size of each entry, must be power of 2
+ ;// $number = (if provided) the number of entries for an array
+ MACRO
+ M_FIELD $fname, $size, $number
+ IF (_StOff:AND:($size-1))!=0
+_StOff SETA _StOff + ($size - (_StOff:AND:($size-1)))
+ ENDIF
+$_Struct._$fname EQU _StOff
+ IF "$number"<>""
+_StOff SETA _StOff + $size*$number
+ ELSE
+_StOff SETA _StOff + $size
+ ENDIF
+ MEND
+
+
+ MACRO
+ M_ENDSTRUCT
+sizeof_$_Struct EQU _StOff
+_Struct SETS ""
+ MEND
+
+;//////////////////////////////////////////////////////////
+;// Switch and table macros
+;//////////////////////////////////////////////////////////
+
+ ;// Start a relative switch table with register to switch on
+ ;//
+ ;// $v = the register to switch on
+ ;// $s = if specified must be "L" to indicate long
+ ;// this allows a greater range to the case code
+ MACRO
+ M_SWITCH $v, $s
+ ASSERT "$s"="":LOR:"$s"="L"
+_SwLong SETL {FALSE}
+ IF "$s"="L"
+_SwLong SETL {TRUE}
+ ENDIF
+_SwNum SETA _SwNum+1
+ IF {CONFIG}=16
+ ;// Thumb
+ IF _SwLong
+ TBH [pc, $v, LSL#1]
+ ELSE
+ TBB [pc, $v]
+ ENDIF
+_Switch$_SwNum
+ ELSE
+ ;// ARM
+ ADD pc, pc, $v, LSL #2
+ NOP
+ ENDIF
+ MEND
+
+ ;// Add a case to the switch statement
+ MACRO
+ M_CASE $label
+ IF {CONFIG}=16
+ ;// Thumb
+ IF _SwLong
+ DCW ($label - _Switch$_SwNum)/2
+ ELSE
+ DCB ($label - _Switch$_SwNum)/2
+ ENDIF
+ ELSE
+ ;// ARM
+ B $label
+ ENDIF
+ MEND
+
+ ;// End of switch statement
+ MACRO
+ M_ENDSWITCH
+ ALIGN 2
+ MEND
+
+
+;////////////////////////////////////////////////////////
+;// Data area allocation
+;////////////////////////////////////////////////////////
+
+ ;// Constant table allocator macro
+ ;//
+ ;// Creates a new section for each constant table
+ ;// $name is symbol through which the table can be accessed.
+ ;// $align is the optional alignment of the table, log2 of
+ ;// the byte alignment - $align=4 is 16 byte aligned
+ MACRO
+ M_TABLE $name, $align
+ ASSERT :LNOT:_InFunc
+ IF "$align"=""
+ AREA |.constdata|, READONLY, DATA
+ ELSE
+ ;// AREAs inherit the alignment of the first declaration.
+ ;// Therefore for each alignment size we must have an area
+ ;// of a different name.
+ AREA constdata_a$align, READONLY, DATA, ALIGN=$align
+
+ ;// We also force alignment incase we are tagging onto
+ ;// an already started area.
+ ALIGN (1<<$align)
+ ENDIF
+$name
+ MEND
+
+;/////////////////////////////////////////////////////
+;// Macros to allocate space on the stack
+;//
+;// These all assume that the stack is 8-byte aligned
+;// at entry to the function, which means that the
+;// 32-byte alignment macro needs to work in a
+;// bit more of a special way...
+;/////////////////////////////////////////////////////
+
+
+
+
+ ;// Allocate 1-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC1 $name, $size
+ ASSERT :LNOT:_InFunc
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+ ;// Allocate 2-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC2 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:1)!=0
+_SBytes SETA _SBytes + (2 - (_SBytes:AND:1))
+ ENDIF
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+ ;// Allocate 4-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC4 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:3)!=0
+_SBytes SETA _SBytes + (4 - (_SBytes:AND:3))
+ ENDIF
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+ ;// Allocate 8-byte aligned area of name
+ ;// $name size $size bytes.
+ MACRO
+ M_ALLOC8 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:7)!=0
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+$name$_F EQU _SBytes
+_SBytes SETA _SBytes + ($size)
+ MEND
+
+
+ ;// Allocate 8-byte aligned area of name
+ ;// $name size ($size+16) bytes.
+ ;// The extra 16 bytes are later used to align the pointer to 16 bytes
+
+ MACRO
+ M_ALLOC16 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:7)!=0
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+$name$_F$_16 EQU (_SBytes + 8)
+_SBytes SETA _SBytes + ($size) + 8
+ MEND
+
+ ;// Allocate 8-byte aligned area of name
+ ;// $name size ($size+32) bytes.
+ ;// The extra 32 bytes are later used to align the pointer to 32 bytes
+
+ MACRO
+ M_ALLOC32 $name, $size
+ ASSERT :LNOT:_InFunc
+ IF (_SBytes:AND:7)!=0
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+$name$_F$_32 EQU (_SBytes + 24)
+_SBytes SETA _SBytes + ($size) + 24
+ MEND
+
+
+
+
+ ;// Argument Declaration Macro
+ ;//
+ ;// Allocate an argument name $name
+ ;// size $size bytes
+ MACRO
+ M_ARG $name, $size
+ ASSERT _InFunc
+$name$_F EQU _ABytes
+_ABytes SETA _ABytes + ($size)
+ MEND
+
+;///////////////////////////////////////////////
+;// Macros to access stacked variables
+;///////////////////////////////////////////////
+
+ ;// Macro to perform a data processing operation
+ ;// with a constant second operand
+ MACRO
+ _M_OPC $op,$rd,$rn,$const
+ LCLA _sh
+ LCLA _cst
+_sh SETA 0
+_cst SETA $const
+ IF _cst=0
+ $op $rd, $rn, #_cst
+ MEXIT
+ ENDIF
+ WHILE (_cst:AND:3)=0
+_cst SETA _cst>>2
+_sh SETA _sh+2
+ WEND
+ $op $rd, $rn, #(_cst:AND:0x000000FF)<<_sh
+ IF _cst>=256
+ $op $rd, $rd, #(_cst:AND:0xFFFFFF00)<<_sh
+ ENDIF
+ MEND
+
+ ;// Macro to perform a data access operation
+ ;// Such as LDR or STR
+ ;// The addressing mode is modified such that
+ ;// 1. If no address is given then the name is taken
+ ;// as a stack offset
+ ;// 2. If the addressing mode is not available for the
+ ;// state being assembled for (eg Thumb) then a suitable
+ ;// addressing mode is substituted.
+ ;//
+ ;// On Entry:
+ ;// $i = Instruction to perform (eg "LDRB")
+ ;// $a = Required byte alignment
+ ;// $r = Register(s) to transfer (eg "r1")
+ ;// $a0,$a1,$a2. Addressing mode and condition. One of:
+ ;// label {,cc}
+ ;// [base] {,,,cc}
+ ;// [base, offset]{!} {,,cc}
+ ;// [base, offset, shift]{!} {,cc}
+ ;// [base], offset {,,cc}
+ ;// [base], offset, shift {,cc}
+ MACRO
+ _M_DATA $i,$a,$r,$a0,$a1,$a2,$a3
+ IF "$a0":LEFT:1="["
+ IF "$a1"=""
+ $i$a3 $r, $a0
+ ELSE
+ IF "$a0":RIGHT:1="]"
+ IF "$a2"=""
+ _M_POSTIND $i$a3, "$r", $a0, $a1
+ ELSE
+ _M_POSTIND $i$a3, "$r", $a0, "$a1,$a2"
+ ENDIF
+ ELSE
+ IF "$a2"=""
+ _M_PREIND $i$a3, "$r", $a0, $a1
+ ELSE
+ _M_PREIND $i$a3, "$r", $a0, "$a1,$a2"
+ ENDIF
+ ENDIF
+ ENDIF
+ ELSE
+ LCLA _Offset
+_Offset SETA _Workspace + $a0$_F
+ ASSERT (_Offset:AND:($a-1))=0
+ $i$a1 $r, [sp, #_Offset]
+ ENDIF
+ MEND
+
+ ;// Handle post indexed load/stores
+ ;// op reg, [base], offset
+ MACRO
+ _M_POSTIND $i,$r,$a0,$a1
+ LCLS _base
+ LCLS _offset
+ IF {CONFIG}=16 ;// Thumb
+_base SETS ("$a0":LEFT:(:LEN:"$a0"-1)):RIGHT:(:LEN:"$a0"-2) ;// remove []
+_offset SETS "$a1"
+ IF _offset:LEFT:1="+"
+_offset SETS _offset:RIGHT:(:LEN:_offset-1)
+ ENDIF
+ $i $r, $a0
+ IF _offset:LEFT:1="-"
+_offset SETS _offset:RIGHT:(:LEN:_offset-1)
+ SUB $_base, $_base, $_offset
+ ELSE
+ ADD $_base, $_base, $_offset
+ ENDIF
+ ELSE ;// ARM
+ $i $r, $a0, $a1
+ ENDIF
+ MEND
+
+ ;// Handle pre indexed load/store
+ ;// op reg, [base, offset]{!}
+ MACRO
+ _M_PREIND $i,$r,$a0,$a1
+ LCLS _base
+ LCLS _offset
+ IF ({CONFIG}=16):LAND:(("$a1":RIGHT:2)="]!")
+_base SETS "$a0":RIGHT:(:LEN:("$a0")-1)
+_offset SETS "$a1":LEFT:(:LEN:("$a1")-2)
+ $i $r, [$_base, $_offset]
+ ADD $_base, $_base, $_offset
+ ELSE
+ $i $r, $a0, $a1
+ ENDIF
+ MEND
+
+ ;// Load unsigned byte from stack
+ MACRO
+ M_LDRB $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRB",1,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load signed byte from stack
+ MACRO
+ M_LDRSB $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRSB",1,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store byte to stack
+ MACRO
+ M_STRB $r,$a0,$a1,$a2,$a3
+ _M_DATA "STRB",1,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load unsigned half word from stack
+ MACRO
+ M_LDRH $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRH",2,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load signed half word from stack
+ MACRO
+ M_LDRSH $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDRSH",2,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store half word to stack
+ MACRO
+ M_STRH $r,$a0,$a1,$a2,$a3
+ _M_DATA "STRH",2,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load word from stack
+ MACRO
+ M_LDR $r,$a0,$a1,$a2,$a3
+ _M_DATA "LDR",4,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store word to stack
+ MACRO
+ M_STR $r,$a0,$a1,$a2,$a3
+ _M_DATA "STR",4,$r,$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Load double word from stack
+ MACRO
+ M_LDRD $r0,$r1,$a0,$a1,$a2,$a3
+ _M_DATA "LDRD",8,"$r0,$r1",$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Store double word to stack
+ MACRO
+ M_STRD $r0,$r1,$a0,$a1,$a2,$a3
+ _M_DATA "STRD",8,"$r0,$r1",$a0,$a1,$a2,$a3
+ MEND
+
+ ;// Get absolute address of stack allocated location
+ MACRO
+ M_ADR $a, $b, $cc
+ _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F)
+ MEND
+
+ ;// Get absolute address of stack allocated location and align the address to 16 bytes
+ MACRO
+ M_ADR16 $a, $b, $cc
+ _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_16)
+
+ ;// Now align $a to 16 bytes
+ BIC$cc $a,$a,#0x0F
+ MEND
+
+ ;// Get absolute address of stack allocated location and align the address to 32 bytes
+ MACRO
+ M_ADR32 $a, $b, $cc
+ _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_32)
+
+ ;// Now align $a to 32 bytes
+ BIC$cc $a,$a,#0x1F
+ MEND
+
+;//////////////////////////////////////////////////////////
+;// Function header and footer macros
+;//////////////////////////////////////////////////////////
+
+ ;// Function Header Macro
+ ;// Generates the function prologue
+ ;// Note that functions should all be "stack-moves-once"
+ ;// The FNSTART and FNEND macros should be the only places
+ ;// where the stack moves.
+ ;//
+ ;// $name = function name
+ ;// $rreg = "" don't stack any registers
+ ;// "lr" stack "lr" only
+ ;// "rN" stack registers "r4-rN,lr"
+ ;// $dreg = "" don't stack any D registers
+ ;// "dN" stack registers "d8-dN"
+ ;//
+ ;// Note: ARM Archicture procedure call standard AAPCS
+ ;// states that r4-r11, sp, d8-d15 must be preserved by
+ ;// a compliant function.
+ MACRO
+ M_START $name, $rreg, $dreg
+ ASSERT :LNOT:_InFunc
+ ASSERT "$name"!=""
+_InFunc SETL {TRUE}
+_RBytes SETA 0
+_Workspace SETA 0
+
+ ;// Create an area for the function
+ AREA |.text|, CODE
+ EXPORT $name
+$name FUNCTION
+
+ ;// Save R registers
+ _M_GETRREGLIST $rreg
+ IF _RRegList<>""
+ STMFD sp!, {$_RRegList, lr}
+ ENDIF
+
+ ;// Save D registers
+ _M_GETDREGLIST $dreg
+ IF _DRegList<>""
+ VSTMFD sp!, {$_DRegList}
+ ENDIF
+
+
+ ;// Ensure size claimed on stack is 8-byte aligned
+ IF ((_SBytes:AND:7)!=0)
+_SBytes SETA _SBytes + (8 - (_SBytes:AND:7))
+ ENDIF
+
+ IF (_SBytes!=0)
+ _M_OPC SUB, sp, sp, _SBytes
+ ENDIF
+
+
+_ABytes SETA _SBytes + _RBytes - _Workspace
+
+
+ ;// Print function name if debug enabled
+ M_PRINTF "$name\n",
+ MEND
+
+ ;// Work out a list of R saved registers
+ MACRO
+ _M_GETRREGLIST $rreg
+ IF "$rreg"=""
+_RRegList SETS ""
+ MEXIT
+ ENDIF
+ IF "$rreg"="lr":LOR:"$rreg"="r4"
+_RRegList SETS "r4"
+_RBytes SETA _RBytes+8
+ MEXIT
+ ENDIF
+ IF "$rreg"="r5":LOR:"$rreg"="r6"
+_RRegList SETS "r4-r6"
+_RBytes SETA _RBytes+16
+ MEXIT
+ ENDIF
+ IF "$rreg"="r7":LOR:"$rreg"="r8"
+_RRegList SETS "r4-r8"
+_RBytes SETA _RBytes+24
+ MEXIT
+ ENDIF
+ IF "$rreg"="r9":LOR:"$rreg"="r10"
+_RRegList SETS "r4-r10"
+_RBytes SETA _RBytes+32
+ MEXIT
+ ENDIF
+ IF "$rreg"="r11":LOR:"$rreg"="r12"
+_RRegList SETS "r4-r12"
+_RBytes SETA _RBytes+40
+ MEXIT
+ ENDIF
+ INFO 1, "Unrecognized saved r register limit '$rreg'"
+ MEND
+
+ ;// Work out a list of D saved registers
+ MACRO
+ _M_GETDREGLIST $dreg
+ IF "$dreg"=""
+_DRegList SETS ""
+ MEXIT
+ ENDIF
+ IF "$dreg"="d8"
+_DRegList SETS "d8"
+_RBytes SETA _RBytes+8
+ MEXIT
+ ENDIF
+ IF "$dreg"="d9"
+_DRegList SETS "d8-d9"
+_RBytes SETA _RBytes+16
+ MEXIT
+ ENDIF
+ IF "$dreg"="d10"
+_DRegList SETS "d8-d10"
+_RBytes SETA _RBytes+24
+ MEXIT
+ ENDIF
+ IF "$dreg"="d11"
+_DRegList SETS "d8-d11"
+_RBytes SETA _RBytes+32
+ MEXIT
+ ENDIF
+ IF "$dreg"="d12"
+_DRegList SETS "d8-d12"
+_RBytes SETA _RBytes+40
+ MEXIT
+ ENDIF
+ IF "$dreg"="d13"
+_DRegList SETS "d8-d13"
+_RBytes SETA _RBytes+48
+ MEXIT
+ ENDIF
+ IF "$dreg"="d14"
+_DRegList SETS "d8-d14"
+_RBytes SETA _RBytes+56
+ MEXIT
+ ENDIF
+ IF "$dreg"="d15"
+_DRegList SETS "d8-d15"
+_RBytes SETA _RBytes+64
+ MEXIT
+ ENDIF
+ INFO 1, "Unrecognized saved d register limit '$dreg'"
+ MEND
+
+ ;// Produce function return instructions
+ MACRO
+ _M_RET $cc
+ IF _DRegList<>""
+ VPOP$cc {$_DRegList}
+ ENDIF
+ IF _RRegList=""
+ BX$cc lr
+ ELSE
+ LDM$cc.FD sp!, {$_RRegList, pc}
+ ENDIF
+ MEND
+
+ ;// Early Function Exit Macro
+ ;// $cc = condition to exit with
+ ;// (Example: M_EXIT EQ)
+ MACRO
+ M_EXIT $cc
+ ASSERT _InFunc
+ IF _SBytes!=0
+ ;// Restore stack frame and exit
+ B$cc _End$_F
+ ELSE
+ ;// Can return directly
+ _M_RET $cc
+ ENDIF
+ MEND
+
+ ;// Function Footer Macro
+ ;// Generates the function epilogue
+ MACRO
+ M_END
+ ASSERT _InFunc
+_InFunc SETL {FALSE}
+_End$_F
+
+ ;// Restore the stack pointer to its original value on function entry
+ IF _SBytes!=0
+ _M_OPC ADD, sp, sp, _SBytes
+ ENDIF
+ _M_RET
+ ENDFUNC
+
+ ;// Reset the global stack tracking variables back to their
+ ;// initial values, and increment the function count
+_SBytes SETA 0
+_F SETA _F+1
+ MEND
+
+
+;//==========================================================================
+;// Debug Macros
+;//==========================================================================
+
+ GBLL DEBUG_ON
+DEBUG_ON SETL {FALSE}
+ GBLL DEBUG_STALLS_ON
+DEBUG_STALLS_ON SETL {FALSE}
+
+ ;//==========================================================================
+ ;// Debug call to printf
+ ;// M_PRINTF $format, $val0, $val1, $val2
+ ;//
+ ;// Examples:
+ ;// M_PRINTF "x=%08x\n", r0
+ ;//
+ ;// This macro preserves the value of all registers including the
+ ;// flags.
+ ;//==========================================================================
+
+ MACRO
+ M_PRINTF $format, $val0, $val1, $val2
+ IF DEBUG_ON
+
+ IMPORT printf
+ LCLA nArgs
+nArgs SETA 0
+
+ ;// save registers so we don't corrupt them
+ STMFD sp!, {r0-r12, lr}
+
+ ;// Drop stack to give us some workspace
+ SUB sp, sp, #16
+
+ ;// Save registers we need to print to the stack
+ IF "$val2" <> ""
+ ASSERT "$val1" <> ""
+ STR $val2, [sp, #8]
+nArgs SETA nArgs+1
+ ENDIF
+ IF "$val1" <> ""
+ ASSERT "$val0" <> ""
+ STR $val1, [sp, #4]
+nArgs SETA nArgs+1
+ ENDIF
+ IF "$val0"<>""
+ STR $val0, [sp]
+nArgs SETA nArgs+1
+ ENDIF
+
+ ;// Now we are safe to corrupt registers
+ ADR r0, %FT00
+ IF nArgs=1
+ LDR r1, [sp]
+ ENDIF
+ IF nArgs=2
+ LDMIA sp, {r1,r2}
+ ENDIF
+ IF nArgs=3
+ LDMIA sp, {r1,r2,r3}
+ ENDIF
+
+ ;// print the values
+ MRS r4, cpsr ;// preserve flags
+ BL printf
+ MSR cpsr_f, r4 ;// restore flags
+ B %FT01
+00 ;// string to print
+ DCB "$format", 0
+ ALIGN
+01 ;// Finished
+ ADD sp, sp, #16
+ ;// Restore registers
+ LDMFD sp!, {r0-r12,lr}
+
+ ENDIF ;// DEBUG_ON
+ MEND
+
+
+ ;// Stall Simulation Macro
+ ;// Inserts a given number of NOPs for the currently
+ ;// defined platform
+ MACRO
+ M_STALL $plat1stall, $plat2stall, $plat3stall, $plat4stall, $plat5stall, $plat6stall
+ IF DEBUG_STALLS_ON
+ _M_STALL_SUB $plat1stall
+ _M_STALL_SUB $plat2stall
+ _M_STALL_SUB $plat3stall
+ _M_STALL_SUB $plat4stall
+ _M_STALL_SUB $plat5stall
+ _M_STALL_SUB $plat6stall
+ ENDIF
+ MEND
+
+ MACRO
+ _M_STALL_SUB $platstall
+ IF "$platstall"!=""
+ LCLA _pllen
+ LCLS _pl
+ LCLL _pllog
+_pllen SETA :LEN:"$platstall"
+_pl SETS "$platstall":LEFT:(_pllen - 2)
+ IF :DEF:$_pl
+ IF $_pl
+ LCLS _st
+ LCLA _stnum
+_st SETS "$platstall":RIGHT:1
+_stnum SETA $_st
+ WHILE _stnum>0
+ MOV sp, sp
+_stnum SETA _stnum - 1
+ WEND
+ ENDIF
+ ENDIF
+ ENDIF
+ MEND
+
+
+
+;//==========================================================================
+;// Endian Invarience Macros
+;//
+;// The idea behind these macros is that if an array is
+;// loaded as words then the SMUL00 macro will multiply
+;// array elements 0 regardless of the endianess of the
+;// system. For little endian SMUL00=SMULBB, for big
+;// endian SMUL00=SMULTT and similarly for other packed operations.
+;//
+;//==========================================================================
+
+ MACRO
+ LIBI4 $comli, $combi, $a, $b, $c, $d, $cc
+ IF {ENDIAN}="big"
+ $combi.$cc $a, $b, $c, $d
+ ELSE
+ $comli.$cc $a, $b, $c, $d
+ ENDIF
+ MEND
+
+ MACRO
+ LIBI3 $comli, $combi, $a, $b, $c, $cc
+ IF {ENDIAN}="big"
+ $combi.$cc $a, $b, $c
+ ELSE
+ $comli.$cc $a, $b, $c
+ ENDIF
+ MEND
+
+ ;// SMLAxy macros
+
+ MACRO
+ SMLA00 $a, $b, $c, $d, $cc
+ LIBI4 SMLABB, SMLATT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA01 $a, $b, $c, $d, $cc
+ LIBI4 SMLABT, SMLATB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA0B $a, $b, $c, $d, $cc
+ LIBI4 SMLABB, SMLATB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA0T $a, $b, $c, $d, $cc
+ LIBI4 SMLABT, SMLATT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA10 $a, $b, $c, $d, $cc
+ LIBI4 SMLATB, SMLABT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA11 $a, $b, $c, $d, $cc
+ LIBI4 SMLATT, SMLABB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA1B $a, $b, $c, $d, $cc
+ LIBI4 SMLATB, SMLABB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLA1T $a, $b, $c, $d, $cc
+ LIBI4 SMLATT, SMLABT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAB0 $a, $b, $c, $d, $cc
+ LIBI4 SMLABB, SMLABT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAB1 $a, $b, $c, $d, $cc
+ LIBI4 SMLABT, SMLABB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAT0 $a, $b, $c, $d, $cc
+ LIBI4 SMLATB, SMLATT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAT1 $a, $b, $c, $d, $cc
+ LIBI4 SMLATT, SMLATB, $a, $b, $c, $d, $cc
+ MEND
+
+ ;// SMULxy macros
+
+ MACRO
+ SMUL00 $a, $b, $c, $cc
+ LIBI3 SMULBB, SMULTT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL01 $a, $b, $c, $cc
+ LIBI3 SMULBT, SMULTB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL0B $a, $b, $c, $cc
+ LIBI3 SMULBB, SMULTB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL0T $a, $b, $c, $cc
+ LIBI3 SMULBT, SMULTT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL10 $a, $b, $c, $cc
+ LIBI3 SMULTB, SMULBT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL11 $a, $b, $c, $cc
+ LIBI3 SMULTT, SMULBB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL1B $a, $b, $c, $cc
+ LIBI3 SMULTB, SMULBB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMUL1T $a, $b, $c, $cc
+ LIBI3 SMULTT, SMULBT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULB0 $a, $b, $c, $cc
+ LIBI3 SMULBB, SMULBT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULB1 $a, $b, $c, $cc
+ LIBI3 SMULBT, SMULBB, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULT0 $a, $b, $c, $cc
+ LIBI3 SMULTB, SMULTT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULT1 $a, $b, $c, $cc
+ LIBI3 SMULTT, SMULTB, $a, $b, $c, $cc
+ MEND
+
+ ;// SMLAWx, SMULWx macros
+
+ MACRO
+ SMLAW0 $a, $b, $c, $d, $cc
+ LIBI4 SMLAWB, SMLAWT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAW1 $a, $b, $c, $d, $cc
+ LIBI4 SMLAWT, SMLAWB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMULW0 $a, $b, $c, $cc
+ LIBI3 SMULWB, SMULWT, $a, $b, $c, $cc
+ MEND
+
+ MACRO
+ SMULW1 $a, $b, $c, $cc
+ LIBI3 SMULWT, SMULWB, $a, $b, $c, $cc
+ MEND
+
+ ;// SMLALxy macros
+
+
+ MACRO
+ SMLAL00 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBB, SMLALTT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL01 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBT, SMLALTB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL0B $a, $b, $c, $d, $cc
+ LIBI4 SMLALBB, SMLALTB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL0T $a, $b, $c, $d, $cc
+ LIBI4 SMLALBT, SMLALTT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL10 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTB, SMLALBT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL11 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTT, SMLALBB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL1B $a, $b, $c, $d, $cc
+ LIBI4 SMLALTB, SMLALBB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLAL1T $a, $b, $c, $d, $cc
+ LIBI4 SMLALTT, SMLALBT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALB0 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBB, SMLALBT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALB1 $a, $b, $c, $d, $cc
+ LIBI4 SMLALBT, SMLALBB, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALT0 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTB, SMLALTT, $a, $b, $c, $d, $cc
+ MEND
+
+ MACRO
+ SMLALT1 $a, $b, $c, $d, $cc
+ LIBI4 SMLALTT, SMLALTB, $a, $b, $c, $d, $cc
+ MEND
+
+ ENDIF ;// ARMCOMM_S_H
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h
new file mode 100644
index 0000000..f629f72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h
@@ -0,0 +1,274 @@
+/*
+ *
+ * File Name: armOMX_ReleaseVersion.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * This file allows a version of the OMX DL libraries to be built where some or
+ * all of the function names can be given a user specified suffix.
+ *
+ * You might want to use it where:
+ *
+ * - you want to rename a function "out of the way" so that you could replace
+ * a function with a different version (the original version would still be
+ * in the library just with a different name - so you could debug the new
+ * version by comparing it to the output of the old)
+ *
+ * - you want to rename all the functions to versions with a suffix so that
+ * you can include two versions of the library and choose between functions
+ * at runtime.
+ *
+ * e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8
+ *
+ */
+
+
+#ifndef _armOMX_H_
+#define _armOMX_H_
+
+
+/* We need to define these two macros in order to expand and concatenate the names */
+#define OMXCAT2BAR(A, B) omx ## A ## B
+#define OMXCATBAR(A, B) OMXCAT2BAR(A, B)
+
+/* Define the suffix to add to all functions - the default is no suffix */
+#define BARE_SUFFIX
+
+
+
+/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */
+#define OMXACAAC_SUFFIX BARE_SUFFIX
+#define OMXACMP3_SUFFIX BARE_SUFFIX
+#define OMXICJP_SUFFIX BARE_SUFFIX
+#define OMXIPBM_SUFFIX BARE_SUFFIX
+#define OMXIPCS_SUFFIX BARE_SUFFIX
+#define OMXIPPP_SUFFIX BARE_SUFFIX
+#define OMXSP_SUFFIX BARE_SUFFIX
+#define OMXVCCOMM_SUFFIX BARE_SUFFIX
+#define OMXVCM4P10_SUFFIX BARE_SUFFIX
+#define OMXVCM4P2_SUFFIX BARE_SUFFIX
+
+
+
+
+/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */
+#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX)
+#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX)
+#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX)
+#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX)
+#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX)
+#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX)
+
+
+#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX)
+#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX)
+#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX)
+
+#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX)
+#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX)
+#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX)
+
+#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX)
+#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX)
+#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX)
+
+#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX)
+#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX)
+
+#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX)
+#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX)
+#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX)
+
+#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX)
+#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX)
+#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX)
+#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX)
+#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX)
+#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX)
+#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX)
+#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX)
+#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX)
+
+#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX)
+#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX)
+
+#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX)
+#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX)
+
+#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX)
+#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX)
+
+
+#endif /* _armOMX_h_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h
new file mode 100644
index 0000000..8b295a6
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h
@@ -0,0 +1,252 @@
+/**
+ * File: omxtypes.h
+ * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files.
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved.
+ *
+ * These materials are protected by copyright laws and contain material
+ * proprietary to the Khronos Group, Inc. You may use these materials
+ * for implementing Khronos specifications, without altering or removing
+ * any trademark, copyright or other notice from the specification.
+ *
+ * Khronos Group makes no, and expressly disclaims any, representations
+ * or warranties, express or implied, regarding these materials, including,
+ * without limitation, any implied warranties of merchantability or fitness
+ * for a particular purpose or non-infringement of any intellectual property.
+ * Khronos Group makes no, and expressly disclaims any, warranties, express
+ * or implied, regarding the correctness, accuracy, completeness, timeliness,
+ * and reliability of these materials.
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters,
+ * Contributors or Members or their respective partners, officers, directors,
+ * employees, agents or representatives be liable for any damages, whether
+ * direct, indirect, special or consequential damages for lost revenues,
+ * lost profits, or otherwise, arising from or in connection with these
+ * materials.
+ *
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc.
+ *
+ */
+
+#ifndef _OMXTYPES_H_
+#define _OMXTYPES_H_
+
+#include <limits.h>
+
+#define OMX_IN
+#define OMX_OUT
+#define OMX_INOUT
+
+
+typedef enum {
+
+ /* Mandatory return codes - use cases are explicitly described for each function */
+ OMX_Sts_NoErr = 0, /* No error, the function completed successfully */
+ OMX_Sts_Err = -2, /* Unknown/unspecified error */
+ OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */
+ OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */
+ OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */
+ OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */
+ OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */
+ OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */
+ OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */
+
+ /* Optional return codes - use cases are explicitly described for each function*/
+ OMX_Sts_BadArgErr = -5, /* Bad Arguments */
+
+ OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */
+ OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */
+ OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */
+ OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */
+ OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */
+ OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */
+
+ OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */
+ /* Huffman decoding operation terminated early. */
+ OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */
+ /* operation terminated early. */
+ OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */
+
+ OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */
+
+ OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/
+
+ } OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */
+
+
+/* OMX_U8 */
+#if UCHAR_MAX == 0xff
+typedef unsigned char OMX_U8;
+#elif USHRT_MAX == 0xff
+typedef unsigned short int OMX_U8;
+#else
+#error OMX_U8 undefined
+#endif
+
+
+/* OMX_S8 */
+#if SCHAR_MAX == 0x7f
+typedef signed char OMX_S8;
+#elif SHRT_MAX == 0x7f
+typedef signed short int OMX_S8;
+#else
+#error OMX_S8 undefined
+#endif
+
+
+/* OMX_U16 */
+#if USHRT_MAX == 0xffff
+typedef unsigned short int OMX_U16;
+#elif UINT_MAX == 0xffff
+typedef unsigned int OMX_U16;
+#else
+#error OMX_U16 undefined
+#endif
+
+
+/* OMX_S16 */
+#if SHRT_MAX == 0x7fff
+typedef signed short int OMX_S16;
+#elif INT_MAX == 0x7fff
+typedef signed int OMX_S16;
+#else
+#error OMX_S16 undefined
+#endif
+
+
+/* OMX_U32 */
+#if UINT_MAX == 0xffffffff
+typedef unsigned int OMX_U32;
+#elif LONG_MAX == 0xffffffff
+typedef unsigned long int OMX_U32;
+#else
+#error OMX_U32 undefined
+#endif
+
+
+/* OMX_S32 */
+#if INT_MAX == 0x7fffffff
+typedef signed int OMX_S32;
+#elif LONG_MAX == 0x7fffffff
+typedef long signed int OMX_S32;
+#else
+#error OMX_S32 undefined
+#endif
+
+
+/* OMX_U64 & OMX_S64 */
+#if defined( _WIN32 ) || defined ( _WIN64 )
+ typedef __int64 OMX_S64; /** Signed 64-bit integer */
+ typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */
+ #define OMX_MIN_S64 (0x8000000000000000i64)
+ #define OMX_MIN_U64 (0x0000000000000000i64)
+ #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64)
+ #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64)
+#else
+ typedef long long OMX_S64; /** Signed 64-bit integer */
+ typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */
+ #define OMX_MIN_S64 (0x8000000000000000LL)
+ #define OMX_MIN_U64 (0x0000000000000000LL)
+ #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL)
+ #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL)
+#endif
+
+
+/* OMX_SC8 */
+typedef struct
+{
+ OMX_S8 Re; /** Real part */
+ OMX_S8 Im; /** Imaginary part */
+
+} OMX_SC8; /** Signed 8-bit complex number */
+
+
+/* OMX_SC16 */
+typedef struct
+{
+ OMX_S16 Re; /** Real part */
+ OMX_S16 Im; /** Imaginary part */
+
+} OMX_SC16; /** Signed 16-bit complex number */
+
+
+/* OMX_SC32 */
+typedef struct
+{
+ OMX_S32 Re; /** Real part */
+ OMX_S32 Im; /** Imaginary part */
+
+} OMX_SC32; /** Signed 32-bit complex number */
+
+
+/* OMX_SC64 */
+typedef struct
+{
+ OMX_S64 Re; /** Real part */
+ OMX_S64 Im; /** Imaginary part */
+
+} OMX_SC64; /** Signed 64-bit complex number */
+
+
+/* OMX_F32 */
+typedef float OMX_F32; /** Single precision floating point,IEEE 754 */
+
+
+/* OMX_F64 */
+typedef double OMX_F64; /** Double precision floating point,IEEE 754 */
+
+
+/* OMX_INT */
+typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/
+
+
+#define OMX_MIN_S8 (-128)
+#define OMX_MIN_U8 0
+#define OMX_MIN_S16 (-32768)
+#define OMX_MIN_U16 0
+#define OMX_MIN_S32 (-2147483647-1)
+#define OMX_MIN_U32 0
+
+#define OMX_MAX_S8 (127)
+#define OMX_MAX_U8 (255)
+#define OMX_MAX_S16 (32767)
+#define OMX_MAX_U16 (0xFFFF)
+#define OMX_MAX_S32 (2147483647)
+#define OMX_MAX_U32 (0xFFFFFFFF)
+
+typedef void OMXVoid;
+
+#ifndef NULL
+#define NULL ((void*)0)
+#endif
+
+/** Defines the geometric position and size of a rectangle,
+ * where x,y defines the coordinates of the top left corner
+ * of the rectangle, with dimensions width in the x-direction
+ * and height in the y-direction */
+typedef struct {
+ OMX_INT x; /** x-coordinate of top left corner of rectangle */
+ OMX_INT y; /** y-coordinate of top left corner of rectangle */
+ OMX_INT width; /** Width in the x-direction. */
+ OMX_INT height; /** Height in the y-direction. */
+}OMXRect;
+
+
+/** Defines the geometric position of a point, */
+typedef struct
+{
+ OMX_INT x; /** x-coordinate */
+ OMX_INT y; /** y-coordinate */
+
+} OMXPoint;
+
+
+/** Defines the dimensions of a rectangle, or region of interest in an image */
+typedef struct
+{
+ OMX_INT width; /** Width of the rectangle, in the x-direction */
+ OMX_INT height; /** Height of the rectangle, in the y-direction */
+
+} OMXSize;
+
+#endif /* _OMXTYPES_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h
new file mode 100644
index 0000000..8d24b65
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h
@@ -0,0 +1,77 @@
+;//
+;//
+;// File Name: omxtypes_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Mandatory return codes - use cases are explicitly described for each function
+OMX_Sts_NoErr EQU 0 ;// No error the function completed successfully
+OMX_Sts_Err EQU -2 ;// Unknown/unspecified error
+OMX_Sts_InvalidBitstreamValErr EQU -182 ;// Invalid value detected during bitstream processing
+OMX_Sts_MemAllocErr EQU -9 ;// Not enough memory allocated for the operation
+OMX_StsACAAC_GainCtrErr EQU -159 ;// AAC: Unsupported gain control data detected
+OMX_StsACAAC_PrgNumErr EQU -167 ;// AAC: Invalid number of elements for one program
+OMX_StsACAAC_CoefValErr EQU -163 ;// AAC: Invalid quantized coefficient value
+OMX_StsACAAC_MaxSfbErr EQU -162 ;// AAC: Invalid maxSfb value in relation to numSwb
+OMX_StsACAAC_PlsDataErr EQU -160 ;// AAC: pulse escape sequence data error
+
+;// Optional return codes - use cases are explicitly described for each function
+OMX_Sts_BadArgErr EQU -5 ;// Bad Arguments
+
+OMX_StsACAAC_TnsNumFiltErr EQU -157 ;// AAC: Invalid number of TNS filters
+OMX_StsACAAC_TnsLenErr EQU -156 ;// AAC: Invalid TNS region length
+OMX_StsACAAC_TnsOrderErr EQU -155 ;// AAC: Invalid order of TNS filter
+OMX_StsACAAC_TnsCoefResErr EQU -154 ;// AAC: Invalid bit-resolution for TNS filter coefficients
+OMX_StsACAAC_TnsCoefErr EQU -153 ;// AAC: Invalid TNS filter coefficients
+OMX_StsACAAC_TnsDirectErr EQU -152 ;// AAC: Invalid TNS filter direction
+
+OMX_StsICJP_JPEGMarkerErr EQU -183 ;// JPEG marker encountered within an entropy-coded block;
+ ;// Huffman decoding operation terminated early.
+OMX_StsICJP_JPEGMarker EQU -181 ;// JPEG marker encountered; Huffman decoding
+ ;// operation terminated early.
+OMX_StsIPPP_ContextMatchErr EQU -17 ;// Context parameter doesn't match to the operation
+
+OMX_StsSP_EvenMedianMaskSizeErr EQU -180 ;// Even size of the Median Filter mask was replaced by the odd one
+
+OMX_Sts_MaximumEnumeration EQU 0x7FFFFFFF
+
+
+
+OMX_MIN_S8 EQU (-128)
+OMX_MIN_U8 EQU 0
+OMX_MIN_S16 EQU (-32768)
+OMX_MIN_U16 EQU 0
+
+
+OMX_MIN_S32 EQU (-2147483647-1)
+OMX_MIN_U32 EQU 0
+
+OMX_MAX_S8 EQU (127)
+OMX_MAX_U8 EQU (255)
+OMX_MAX_S16 EQU (32767)
+OMX_MAX_U16 EQU (0xFFFF)
+OMX_MAX_S32 EQU (2147483647)
+OMX_MAX_U32 EQU (0xFFFFFFFF)
+
+OMX_VC_UPPER EQU 0x1 ;// Used by the PredictIntra functions
+OMX_VC_LEFT EQU 0x2 ;// Used by the PredictIntra functions
+OMX_VC_UPPER_RIGHT EQU 0x40 ;// Used by the PredictIntra functions
+
+NULL EQU 0
+
+;// Structures
+
+ INCLUDE armCOMM_s.h
+
+ M_STRUCT OMXPoint
+ M_FIELD x, 4
+ M_FIELD y, 4
+ M_ENDSTRUCT
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl
new file mode 100755
index 0000000..1ae7005
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl
@@ -0,0 +1,111 @@
+#!/usr/bin/perl
+#
+#
+# File Name: build_vc.pl
+# OpenMAX DL: v1.0.2
+# Revision: 9641
+# Date: Thursday, February 7, 2008
+#
+# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+#
+#
+#
+# This file builds the OpenMAX DL vc domain library omxVC.o.
+#
+
+use File::Spec;
+use strict;
+
+my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE);
+
+$CC = 'armcc';
+$CC_OPTS = '--no_unaligned_access --cpu ARM1136J-S -c';
+$AS = 'armasm';
+$AS_OPTS = '--no_unaligned_access --cpu ARM1136J-S';
+# $LIB = 'armlink';
+# $LIB_OPTS = '--partial -o';
+# $LIB_TYPE = '.o';
+$LIB = 'armar';
+$LIB_OPTS = '--create -r';
+$LIB_TYPE = '.a';
+
+#------------------------
+
+my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h);
+
+# Define the list of directories containing included header files.
+@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api);
+
+# Define the list of source files to compile.
+open(FILES, '<filelist_vc.txt') or die("Can't open source file list\n");
+@filelist = <FILES>;
+close(FILES);
+
+# Fix the file separators in the header paths
+foreach $h (@headerlist)
+{
+ $h = File::Spec->canonpath($h);
+}
+
+# Create the include path to be passed to the compiler
+$hd = '-I' . join(' -I', @headerlist);
+
+# Create the build directories "/lib/" and "/obj/" (if they are not there already)
+mkdir "obj", 0777 if (! -d "obj");
+mkdir "lib", 0777 if (! -d "lib");
+
+$objlist = '';
+
+# Compile each file
+foreach $file (@filelist)
+{
+ my $f;
+ my $base;
+ my $ext;
+ my $objfile;
+
+ chomp($file);
+ $file = File::Spec->canonpath($file);
+
+ (undef, undef, $f) = File::Spec->splitpath($file);
+ if(($base, $ext) = $f =~ /(.+)\.(\w)$/)
+ {
+ $objfile = File::Spec->catfile('obj', $base.'.o');
+
+ if($ext eq 'c')
+ {
+ $objlist .= "$objfile ";
+ $command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+ print "$command\n";
+ system($command);
+ }
+ elsif($ext eq 's')
+ {
+ $objlist .= "$objfile ";
+ $command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file;
+ print "$command\n";
+ system($command);
+ }
+ else
+ {
+ print "Ignoring file: $f\n";
+ }
+ }
+ else
+ {
+ die "No file extension found: $f\n";
+ }
+}
+
+# Do the final link stage to create the libraries.
+$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE);
+$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist;
+print "$command\n";
+(system($command) == 0) and print "Build successful\n";
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt
new file mode 100644
index 0000000..0f1623f
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt
@@ -0,0 +1,74 @@
+./api/armCOMM.h
+./api/armCOMM_BitDec_s.h
+./api/armCOMM_Bitstream.h
+./api/armCOMM_IDCT_s.h
+./api/armCOMM_IDCTTable.h
+./api/armCOMM_MaskTable.h
+./api/armCOMM_s.h
+./api/armCOMM_Version.h
+./api/armOMX_ReleaseVersion.h
+./api/omxtypes.h
+./api/omxtypes_s.h
+./src/armCOMM_IDCTTable.c
+./src/armCOMM_MaskTable.c
+./vc/api/armVC.h
+./vc/api/armVCCOMM_s.h
+./vc/api/omxVC.h
+./vc/api/omxVC_s.h
+./vc/comm/src/omxVCCOMM_Copy16x16_s.s
+./vc/comm/src/omxVCCOMM_Copy8x8_s.s
+./vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
+./vc/m4p10/api/armVCM4P10_CAVLCTables.h
+./vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_CAVLCTables.c
+./vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
+./vc/m4p10/src/armVCM4P10_DequantTables_s.s
+./vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+./vc/m4p10/src/armVCM4P10_QuantTables_s.s
+./vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+./vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
+./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
+./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
+./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
+./vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
+./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
+./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
+./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
+./vc/m4p2/src/armVCM4P2_Clip8_s.s
+./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
+./vc/m4p2/src/armVCM4P2_Lookup_Tables.c
+./vc/m4p2/src/armVCM4P2_SetPredDir_s.s
+./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
+./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
+./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+./vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
+./vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
+./vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
+./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
+./vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
+./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
+./vc/src/armVC_Version.c \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c
new file mode 100644
index 0000000..e572a89
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c
@@ -0,0 +1,936 @@
+/**
+ *
+ * File Name: armCOMM.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Defines Common APIs used across OpenMAX API's
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+
+/***********************************************************************/
+ /* Miscellaneous Arithmetic operations */
+
+/**
+ * Function: armRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armRoundFloatToS16 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S16)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S16)(Value - .5);
+ }
+}
+
+/**
+ * Function: armRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armRoundFloatToS32 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S32)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S32)(Value - .5);
+ }
+}
+/**
+ * Function: armSatRoundFloatToS16
+ *
+ * Description:
+ * Converts a double precision value into a short int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S16 format
+ *
+ */
+
+OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ Value += 0.5;
+
+ if(Value > (OMX_S16)OMX_MAX_S16 )
+ {
+ return (OMX_S16)OMX_MAX_S16;
+ }
+ else
+ {
+ return (OMX_S16)Value;
+ }
+ }
+ else
+ {
+ Value -= 0.5;
+
+ if(Value < (OMX_S16)OMX_MIN_S16 )
+ {
+ return (OMX_S16)OMX_MIN_S16;
+ }
+ else
+ {
+ return (OMX_S16)Value;
+ }
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToS32
+ *
+ * Description:
+ * Converts a double precision value into a int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S32 format
+ *
+ */
+
+OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ Value += 0.5;
+
+ if(Value > (OMX_S32)OMX_MAX_S32 )
+ {
+ return (OMX_S32)OMX_MAX_S32;
+ }
+ else
+ {
+ return (OMX_S32)Value;
+ }
+ }
+ else
+ {
+ Value -= 0.5;
+
+ if(Value < (OMX_S32)OMX_MIN_S32 )
+ {
+ return (OMX_S32)OMX_MIN_S32;
+ }
+ else
+ {
+ return (OMX_S32)Value;
+ }
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToU16
+ *
+ * Description:
+ * Converts a double precision value into a unsigned short int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U16 format
+ *
+ */
+
+OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value)
+{
+ Value += 0.5;
+
+ if(Value > (OMX_U16)OMX_MAX_U16 )
+ {
+ return (OMX_U16)OMX_MAX_U16;
+ }
+ else
+ {
+ return (OMX_U16)Value;
+ }
+}
+
+/**
+ * Function: armSatRoundFloatToU32
+ *
+ * Description:
+ * Converts a double precision value into a unsigned int after rounding and saturation
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_U32 format
+ *
+ */
+
+OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value)
+{
+ Value += 0.5;
+
+ if(Value > (OMX_U32)OMX_MAX_U32 )
+ {
+ return (OMX_U32)OMX_MAX_U32;
+ }
+ else
+ {
+ return (OMX_U32)Value;
+ }
+}
+
+/**
+ * Function: armRoundFloatToS64
+ *
+ * Description:
+ * Converts a double precision value into a 64 bit int after rounding
+ *
+ * Parameters:
+ * [in] Value Float value to be converted
+ *
+ * Return Value:
+ * [out] converted value in OMX_S64 format
+ *
+ */
+
+OMX_S64 armRoundFloatToS64 (OMX_F64 Value)
+{
+ if (Value > 0)
+ {
+ return (OMX_S64)(Value + .5);
+ }
+ else
+ {
+ return (OMX_S64)(Value - .5);
+ }
+}
+
+/**
+ * Function: armSignCheck
+ *
+ * Description:
+ * Checks the sign of a variable:
+ * returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] var Variable to be checked
+ *
+ * Return Value:
+ * OMX_INT -- returns 1 if it is Positive
+ * returns 0 if it is 0
+ * returns -1 if it is Negative
+ */
+
+OMX_INT armSignCheck (
+ OMX_S16 var
+)
+
+{
+ OMX_INT Sign;
+
+ if (var < 0)
+ {
+ Sign = -1;
+ }
+ else if ( var > 0)
+ {
+ Sign = 1;
+ }
+ else
+ {
+ Sign = 0;
+ }
+
+ return Sign;
+}
+
+/**
+ * Function: armClip
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_S32 -- returns clipped value
+ */
+
+OMX_S32 armClip (
+ OMX_INT min,
+ OMX_INT max,
+ OMX_S32 src
+)
+
+{
+ if (src > max)
+ {
+ src = max;
+ }
+ else if (src < min)
+ {
+ src = min;
+ }
+
+ return src;
+}
+
+/**
+ * Function: armClip_F32
+ *
+ * Description: Clips the input between MAX and MIN value
+ *
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] Min lower bound
+ * [in] Max upper bound
+ * [in] src variable to the clipped
+ *
+ * Return Value:
+ * OMX_F32 -- returns clipped value
+ */
+
+OMX_F32 armClip_F32 (
+ OMX_F32 min,
+ OMX_F32 max,
+ OMX_F32 src
+)
+
+{
+ if (src > max)
+ {
+ src = max;
+ }
+ else if (src < min)
+ {
+ src = min;
+ }
+
+ return src;
+}
+
+/**
+ * Function: armShiftSat_F32
+ *
+ * Description: Divides a float value by 2^shift and
+ * saturates it for unsigned value range for satBits.
+ * Second parameter is like "shifting" the corresponding
+ * integer value. Takes care of rounding while clipping the final
+ * value.
+ *
+ * Parameters:
+ * [in] v Number to be operated upon
+ * [in] shift Divides the input "v" by "2^shift"
+ * [in] satBits Final range is [0, 2^satBits)
+ *
+ * Return Value:
+ * OMX_S32 -- returns "shifted" saturated value
+ */
+
+OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits)
+{
+ OMX_U32 allOnes = (OMX_U32)(-1);
+ OMX_U32 maxV = allOnes >> (32-satBits);
+ OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift);
+ OMX_U32 vInt;
+ OMX_U32 vIntSat;
+
+ if(v <= 0)
+ return 0;
+
+ vShifted = v / shiftDiv;
+ vRounded = (OMX_F32)(vShifted + 0.5);
+ vInt = (OMX_U32)vRounded;
+ vIntSat = vInt;
+ if(vIntSat > maxV)
+ vIntSat = maxV;
+ return vIntSat;
+}
+
+/**
+ * Functions: armSwapElem
+ *
+ * Description:
+ * These function swaps two elements at the specified pointer locations.
+ * The size of each element could be anything as specified by <elemSize>
+ *
+ * Return Value:
+ * OMXResult -- Error status from the function
+ */
+OMXResult armSwapElem(
+ OMX_U8 *pBuf1,
+ OMX_U8 *pBuf2,
+ OMX_INT elemSize
+ )
+{
+ OMX_INT i;
+ OMX_U8 temp;
+ armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr);
+
+ for(i = 0; i < elemSize; i++)
+ {
+ temp = *(pBuf1 + i);
+ *(pBuf1 + i) = *(pBuf2 + i);
+ *(pBuf2 + i) = temp;
+ }
+ return OMX_Sts_NoErr;
+}
+
+/**
+ * Function: armMedianOf3
+ *
+ * Description: Finds the median of three numbers
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] fEntry First entry
+ * [in] sEntry second entry
+ * [in] tEntry Third entry
+ *
+ * Return Value:
+ * OMX_S32 -- returns the median value
+ */
+
+OMX_S32 armMedianOf3 (
+ OMX_S32 fEntry,
+ OMX_S32 sEntry,
+ OMX_S32 tEntry
+)
+{
+ OMX_S32 a, b, c;
+
+ a = armMin (fEntry, sEntry);
+ b = armMax (fEntry, sEntry);
+ c = armMin (b, tEntry);
+ return (armMax (a, c));
+}
+
+/**
+ * Function: armLogSize
+ *
+ * Description: Finds the size of a positive value and returns the same
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] value Positive value
+ *
+ * Return Value:
+ * OMX_U8 -- Returns the minimum number of bits required to represent the positive value.
+ This is the smallest k>=0 such that that value is less than (1<<k).
+ */
+
+OMX_U8 armLogSize (
+ OMX_U16 value
+)
+{
+ OMX_U8 i;
+ for ( i = 0; value > 0; value = value >> 1)
+ {
+ i++;
+ }
+ return i;
+}
+
+/***********************************************************************/
+ /* Saturating Arithmetic operations */
+
+/**
+ * Function :armSatAdd_S32()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+ OMX_S32 Result;
+
+ Result = Value1 + Value2;
+
+ if( (Value1^Value2) >= 0)
+ {
+ /*Same sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ return OMX_MAX_S32;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S32;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/**
+ * Function :armSatAdd_S64()
+ *
+ * Description :
+ * Returns the result of saturated addition of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ *
+ **/
+
+OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2)
+{
+ OMX_S64 Result;
+
+ Result = Value1 + Value2;
+
+ if( (Value1^Value2) >= 0)
+ {
+ /*Same sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ Result = OMX_MAX_S64;
+ return Result;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S64;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/** Function :armSatSub_S32()
+ *
+ * Description :
+ * Returns the result of saturated substraction of the two inputs Value1, Value2
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2)
+{
+ OMX_S32 Result;
+
+ Result = Value1 - Value2;
+
+ if( (Value1^Value2) < 0)
+ {
+ /*Opposite sign*/
+ if( (Result^Value1) >= 0)
+ {
+ /*Result has not saturated*/
+ return Result;
+ }
+ else
+ {
+ if(Value1 >= 0)
+ {
+ /*Result has saturated in positive side*/
+ return OMX_MAX_S32;
+ }
+ else
+ {
+ /*Result has saturated in negative side*/
+ return OMX_MIN_S32;
+ }
+
+ }
+
+ }
+ else
+ {
+ return Result;
+ }
+
+}
+
+/**
+ * Function :armSatMac_S32()
+ *
+ * Description :
+ * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated
+ * accumulation with Mac
+ *
+ * Parametrs:
+ * [in] Value1 First Operand
+ * [in] Value2 Second Operand
+ * [in] Mac Accumulator
+ *
+ * Return:
+ * [out] Result of operation
+ **/
+
+OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2)
+{
+ OMX_S32 Result;
+
+ Result = (OMX_S32)(Value1*Value2);
+ Result = armSatAdd_S32( Mac , Result );
+
+ return Result;
+}
+
+/**
+ * Function :armSatMac_S16S32_S32
+ *
+ * Description :
+ * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac
+ *
+ * mac = mac + Saturate_in_32Bits(delayElem * filTap)
+ *
+ * Parametrs:
+ * [in] delayElem First 32 bit Operand
+ * [in] filTap Second 16 bit Operand
+ * [in] mac Result of MAC operation
+ *
+ * Return:
+ * [out] mac Result of operation
+ *
+ **/
+
+OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap )
+{
+
+ OMX_S32 result;
+
+ result = armSatMulS16S32_S32(filTap,delayElem);
+
+ if ( result > OMX_MAX_S16 )
+ {
+ result = OMX_MAX_S32;
+ }
+ else if( result < OMX_MIN_S16 )
+ {
+ result = OMX_MIN_S32;
+ }
+ else
+ {
+ result = delayElem * filTap;
+ }
+
+ mac = armSatAdd_S32(mac,result);
+
+ return mac;
+}
+
+
+/**
+ * Function :armSatRoundRightShift_S32_S16
+ *
+ * Description :
+ * Returns the result of rounded right shift operation of input by the scalefactor
+ *
+ * output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) )
+ *
+ * Parametrs:
+ * [in] input The input to be operated on
+ * [in] shift The shift number
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift)
+{
+ input = armSatRoundLeftShift_S32(input,-shift);
+
+ if ( input > OMX_MAX_S16 )
+ {
+ return (OMX_S16)OMX_MAX_S16;
+ }
+ else if (input < OMX_MIN_S16)
+ {
+ return (OMX_S16)OMX_MIN_S16;
+ }
+ else
+ {
+ return (OMX_S16)input;
+ }
+
+}
+
+/**
+ * Function :armSatRoundLeftShift_S32()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] Shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift)
+{
+ OMX_INT i;
+
+ if (Shift < 0)
+ {
+ Shift = -Shift;
+ Value = armSatAdd_S32(Value, (1 << (Shift - 1)));
+ Value = Value >> Shift;
+ }
+ else
+ {
+ for (i = 0; i < Shift; i++)
+ {
+ Value = armSatAdd_S32(Value, Value);
+ }
+ }
+ return Value;
+}
+
+/**
+ * Function :armSatRoundLeftShift_S64()
+ *
+ * Description :
+ * Returns the result of saturating left-shift operation on input
+ * Or rounded Right shift if the input Shift is negative.
+ *
+ * Parametrs:
+ * [in] Value Operand
+ * [in] shift Operand for shift operation
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift)
+{
+ OMX_INT i;
+
+ if (Shift < 0)
+ {
+ Shift = -Shift;
+ Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1)));
+ Value = Value >> Shift;
+ }
+ else
+ {
+ for (i = 0; i < Shift; i++)
+ {
+ Value = armSatAdd_S64(Value, Value);
+ }
+ }
+ return Value;
+}
+
+/**
+ * Function :armSatMulS16S32_S32()
+ *
+ * Description :
+ * Returns the result of a S16 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+
+OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2)
+{
+ OMX_S16 hi2,lo1;
+ OMX_U16 lo2;
+
+ OMX_S32 temp1,temp2;
+ OMX_S32 result;
+
+ lo1 = input1;
+
+ hi2 = ( input2 >> 16 );
+ lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 );
+
+ temp1 = hi2 * lo1;
+ temp2 = ( lo2* lo1 ) >> 16;
+
+ result = armSatAdd_S32(temp1,temp2);
+
+ return result;
+}
+
+/**
+ * Function :armSatMulS32S32_S32()
+ *
+ * Description :
+ * Returns the result of a S32 data type multiplied with an S32 data type
+ * in a S32 container
+ *
+ * Parametrs:
+ * [in] input1 Operand 1
+ * [in] input2 Operand 2
+ *
+ * Return:
+ * [out] Result of operation
+ *
+ **/
+
+OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2)
+{
+ OMX_S16 hi1,hi2;
+ OMX_U16 lo1,lo2;
+
+ OMX_S32 temp1,temp2,temp3;
+ OMX_S32 result;
+
+ hi1 = ( input1 >> 16 );
+ lo1 = ( (OMX_U32)( input1 << 16 ) >> 16 );
+
+ hi2 = ( input2 >> 16 );
+ lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 );
+
+ temp1 = hi1 * hi2;
+ temp2 = ( hi1* lo2 ) >> 16;
+ temp3 = ( hi2* lo1 ) >> 16;
+
+ result = armSatAdd_S32(temp1,temp2);
+ result = armSatAdd_S32(result,temp3);
+
+ return result;
+}
+
+/**
+ * Function :armIntDivAwayFromZero()
+ *
+ * Description : Integer division with rounding to the nearest integer.
+ * Half-integer values are rounded away from zero
+ * unless otherwise specified. For example 3//2 is rounded
+ * to 2, and -3//2 is rounded to -2.
+ *
+ * Parametrs:
+ * [in] Num Operand 1
+ * [in] Deno Operand 2
+ *
+ * Return:
+ * [out] Result of operation input1//input2
+ *
+ **/
+
+OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno)
+{
+ OMX_F64 result;
+
+ result = ((OMX_F64)Num)/((OMX_F64)Deno);
+
+ if (result >= 0)
+ {
+ result += 0.5;
+ }
+ else
+ {
+ result -= 0.5;
+ }
+
+ return (OMX_S32)(result);
+}
+
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c
new file mode 100644
index 0000000..9ef9319
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c
@@ -0,0 +1,329 @@
+/**
+ *
+ * File Name: armCOMM_Bitstream.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Defines bitstream encode and decode functions common to all codecs
+ */
+
+#include "omxtypes.h"
+#include "armCOMM.h"
+#include "armCOMM_Bitstream.h"
+
+/***************************************
+ * Fixed bit length Decode
+ ***************************************/
+
+/**
+ * Function: armLookAheadBits()
+ *
+ * Description:
+ * Get the next N bits from the bitstream without advancing the bitstream pointer
+ *
+ * Parameters:
+ * [in] **ppBitStream
+ * [in] *pOffset
+ * [in] N=1...32
+ *
+ * Returns Value
+ */
+
+OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ armAssert(Offset>=0 && Offset<=7);
+ armAssert(N>=1 && N<=32);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Return N bits */
+ return Value >> (32-N);
+}
+
+
+/**
+ * Function: armGetBits()
+ *
+ * Description:
+ * Read N bits from the bitstream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N=1..32
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ * Returns Value
+ */
+
+
+OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ if(N == 0)
+ {
+ return 0;
+ }
+
+ armAssert(Offset>=0 && Offset<=7);
+ armAssert(N>=1 && N<=32);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Advance bitstream pointer by N bits */
+ Offset += N;
+ *ppBitStream = pBitStream + (Offset>>3);
+ *pOffset = Offset & 7;
+
+ /* Return N bits */
+ return Value >> (32-N);
+}
+
+/**
+ * Function: armByteAlign()
+ *
+ * Description:
+ * Align the pointer *ppBitStream to the next byte boundary
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset)
+{
+ if(*pOffset > 0)
+ {
+ *ppBitStream += 1;
+ *pOffset = 0;
+ }
+}
+
+/**
+ * Function: armSkipBits()
+ *
+ * Description:
+ * Skip N bits from the value at *ppBitStream
+ *
+ * Parameters:
+ * [in] *ppBitStream
+ * [in] *pOffset
+ * [in] N
+ *
+ * [out] *ppBitStream
+ * [out] *pOffset
+ *
+ **/
+
+
+OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N)
+{
+ OMX_INT Offset = *pOffset;
+ const OMX_U8 *pBitStream = *ppBitStream;
+
+ /* Advance bitstream pointer by N bits */
+ Offset += N;
+ *ppBitStream = pBitStream + (Offset>>3);
+ *pOffset = Offset & 7;
+}
+
+/***************************************
+ * Variable bit length Decode
+ ***************************************/
+
+/**
+ * Function: armUnPackVLC32()
+ *
+ * Description:
+ * Variable length decode of variable length symbol (max size 32 bits) read from
+ * the bit stream pointed by *ppBitStream at *pOffset by using the table
+ * pointed by pCodeBook
+ *
+ * Parameters:
+ * [in] *pBitStream
+ * [in] *pOffset
+ * [in] pCodeBook
+ *
+ * [out] *pBitStream
+ * [out] *pOffset
+ *
+ * Returns : Code Book Index if successfull.
+ * : ARM_NO_CODEBOOK_INDEX = -1 if search fails.
+ **/
+#ifndef C_OPTIMIZED_IMPLEMENTATION
+
+OMX_U16 armUnPackVLC32(
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ const ARM_VLC32 *pCodeBook
+)
+{
+ const OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+ OMX_INT Index;
+
+ armAssert(Offset>=0 && Offset<=7);
+
+ /* Read next 32 bits from stream */
+ Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ;
+ Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset));
+
+ /* Search through the codebook */
+ for (Index=0; pCodeBook->codeLen != 0; Index++)
+ {
+ if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen)))
+ {
+ Offset = Offset + pCodeBook->codeLen;
+ *ppBitStream = pBitStream + (Offset >> 3) ;
+ *pOffset = Offset & 7;
+
+ return Index;
+ }
+ pCodeBook++;
+ }
+
+ /* No code match found */
+ return ARM_NO_CODEBOOK_INDEX;
+}
+
+#endif
+
+/***************************************
+ * Fixed bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackBits
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] codeWord Code word that need to be inserted in to the
+ * bitstream
+ * [in] codeLength Length of the code word valid range 1...32
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pOffset,
+ OMX_U32 codeWord,
+ OMX_INT codeLength
+)
+{
+ OMX_U8 *pBitStream = *ppBitStream;
+ OMX_INT Offset = *pOffset;
+ OMX_U32 Value;
+
+ /* checking argument validity */
+ armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr);
+ armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr);
+ armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr);
+
+ /* Prepare the first byte */
+ codeWord = codeWord << (32-codeLength);
+ Value = (pBitStream[0] >> (8-Offset)) << (8-Offset);
+ Value = Value | (codeWord >> (24+Offset));
+
+ /* Write out whole bytes */
+ while (8-Offset <= codeLength)
+ {
+ *pBitStream++ = (OMX_U8)Value;
+ codeWord = codeWord << (8-Offset);
+ codeLength = codeLength - (8-Offset);
+ Offset = 0;
+ Value = codeWord >> 24;
+ }
+
+ /* Write out final partial byte */
+ *pBitStream = (OMX_U8)Value;
+ *ppBitStream = pBitStream;
+ *pOffset = Offset + codeLength;
+
+ return OMX_Sts_NoErr;
+}
+
+/***************************************
+ * Variable bit length Encode
+ ***************************************/
+
+/**
+ * Function: armPackVLC32
+ *
+ * Description:
+ * Pack a VLC code word into the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte
+ * in the bit stream.
+ * [in] pBitOffset pointer to the bit position in the byte
+ * pointed by *ppBitStream. Valid within 0
+ * to 7.
+ * [in] code VLC code word that need to be inserted in to the
+ * bitstream
+ *
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMX_RESULT result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armPackVLC32 (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ ARM_VLC32 code
+)
+{
+ return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen));
+}
+
+/*End of File*/
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c
new file mode 100644
index 0000000..9e4679c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c
@@ -0,0 +1,60 @@
+/**
+ *
+ * File Name: armCOMM_IDCTTable.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armCOMM_IDCTTable.c
+ * Brief: Defines Tables used in IDCT computation
+ *
+ */
+
+#include "armCOMM_IDCTTable.h"
+
+ /* Table of s(u)*A(u)*A(v)/16 at Q15
+ * s(u)=1.0 0 <= u <= 5
+ * s(6)=2.0
+ * s(7)=4.0
+ * A(0) = 2*sqrt(2)
+ * A(u) = 4*cos(u*pi/16) for (u!=0)
+ */
+
+__align(4) const OMX_U16 armCOMM_IDCTPreScale [64] =
+{
+ 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1,
+ 0x58c5, 0x7b21, 0x73fc, 0x6862, 0x58c5, 0x45bf, 0x6016, 0x61f8,
+ 0x539f, 0x73fc, 0x6d41, 0x6254, 0x539f, 0x41b3, 0x5a82, 0x5c48,
+ 0x4b42, 0x6862, 0x6254, 0x587e, 0x4b42, 0x3b21, 0x5175, 0x530d,
+ 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1,
+ 0x3249, 0x45bf, 0x41b3, 0x3b21, 0x3249, 0x2782, 0x366d, 0x377e,
+ 0x22a3, 0x300b, 0x2d41, 0x28ba, 0x22a3, 0x1b37, 0x257e, 0x263a,
+ 0x11a8, 0x187e, 0x1712, 0x14c3, 0x11a8, 0x0de0, 0x131d, 0x137d
+};
+ /* Above array armCOMM_IDCTPreScale, in Q23 format */
+const OMX_U32 armCOMM_IDCTPreScaleU32 [64] =
+{
+ 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157,
+ 0x58c543, 0x7b20d8, 0x73fbfc, 0x686214, 0x58c543, 0x45bf1f, 0x6015a5, 0x61f78b,
+ 0x539eba, 0x73fbfc, 0x6d413d, 0x6253a6, 0x539eba, 0x41b328, 0x5a827a, 0x5c4869,
+ 0x4b418c, 0x686214, 0x6253a6, 0x587de3, 0x4b418c, 0x3b20d8, 0x5174e0, 0x530d69,
+ 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157,
+ 0x3248d4, 0x45bf1f, 0x41b328, 0x3b20d8, 0x3248d4, 0x27821d, 0x366d72, 0x377e6b,
+ 0x22a2f5, 0x300ad3, 0x2d413d, 0x28ba70, 0x22a2f5, 0x1b36b9, 0x257d86, 0x26398d,
+ 0x11a856, 0x187de3, 0x17121a, 0x14c35a, 0x11a856, 0x0ddf9b, 0x131cc7, 0x137ca2
+};
+
+const OMX_U16 armCOMM_IDCTCoef [4] =
+{
+ 0x5a82, /* InvSqrt2 */
+ 0x30fc, /* SinPIBy8 */
+ 0x7642, /* CosPIBy8 */
+ 0x0000
+};
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c
new file mode 100644
index 0000000..3241db2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c
@@ -0,0 +1,45 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armCOMM_MaskTable.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Mask Table to mask the end of array.
+ *
+ */
+
+#include "omxtypes.h"
+
+#define MaskTableSize 72
+
+const OMX_U16 armCOMM_qMaskTable16[MaskTableSize] =
+{
+ 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000,
+ 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF
+};
+
+const OMX_U8 armCOMM_qMaskTable8[MaskTableSize] =
+{
+ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00,
+ 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h
new file mode 100644
index 0000000..7fa7716
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h
@@ -0,0 +1,1153 @@
+/**
+ *
+ * File Name: armVC.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVideo.h
+ * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain
+ *
+ */
+
+
+#ifndef _armVideo_H_
+#define _armVideo_H_
+
+#include "omxVC.h"
+#include "armCOMM_Bitstream.h"
+
+/**
+ * ARM specific state structure to hold Motion Estimation information.
+ */
+
+struct m4p2_MESpec
+{
+ OMXVCM4P2MEParams MEParams;
+ OMXVCM4P2MEMode MEMode;
+};
+
+struct m4p10_MESpec
+{
+ OMXVCM4P10MEParams MEParams;
+ OMXVCM4P10MEMode MEMode;
+};
+
+typedef struct m4p2_MESpec ARMVCM4P2_MESpec;
+typedef struct m4p10_MESpec ARMVCM4P10_MESpec;
+
+/**
+ * Function: armVCM4P2_CompareMV
+ *
+ * Description:
+ * Performs comparision of motion vectors and SAD's to decide the
+ * best MV and SAD
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] mvX x coordinate of the candidate motion vector
+ * [in] mvY y coordinate of the candidate motion vector
+ * [in] candSAD Candidate SAD
+ * [in] bestMVX x coordinate of the best motion vector
+ * [in] bestMVY y coordinate of the best motion vector
+ * [in] bestSAD best SAD
+ *
+ * Return Value:
+ * OMX_INT -- 1 to indicate that the current sad is the best
+ * 0 to indicate that it is NOT the best SAD
+ */
+
+OMX_INT armVCM4P2_CompareMV (
+ OMX_S16 mvX,
+ OMX_S16 mvY,
+ OMX_INT candSAD,
+ OMX_S16 bestMVX,
+ OMX_S16 bestMVY,
+ OMX_INT bestSAD);
+
+/**
+ * Function: armVCM4P2_ACDCPredict
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected
+ * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the coefficient buffer which contains
+ * the quantized coefficient residuals (PQF) of the
+ * current block
+ * [in] pPredBufRow pointer to the coefficient row buffer
+ * [in] pPredBufCol pointer to the coefficient column buffer
+ * [in] curQP quantization parameter of the current block. curQP
+ * may equal to predQP especially when the current
+ * block and the predictor block are in the same
+ * macroblock.
+ * [in] predQP quantization parameter of the predictor block
+ * [in] predDir indicates the prediction direction which takes one
+ * of the following values:
+ * OMX_VIDEO_HORIZONTAL predict horizontally
+ * OMX_VIDEO_VERTICAL predict vertically
+ * [in] ACPredFlag a flag indicating if AC prediction should be
+ * performed. It is equal to ac_pred_flag in the bit
+ * stream syntax of MPEG-4
+ * [in] videoComp video component type (luminance, chrominance or
+ * alpha) of the current block
+ * [in] flag This flag defines the if one wants to use this functions to
+ * calculate PQF (set 1, prediction) or QF (set 0, reconstruction)
+ * [out] pPreACPredict pointer to the predicted coefficients buffer.
+ * Filled ONLY if it is not NULL
+ * [out] pSrcDst pointer to the coefficient buffer which contains
+ * the quantized coefficients (QF) of the current
+ * block
+ * [out] pPredBufRow pointer to the updated coefficient row buffer
+ * [out] pPredBufCol pointer to the updated coefficient column buffer
+ * [out] pSumErr pointer to the updated sum of the difference
+ * between predicted and unpredicted coefficients
+ * If this is NULL, do not update
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_ACDCPredict(
+ OMX_S16 * pSrcDst,
+ OMX_S16 * pPreACPredict,
+ OMX_S16 * pPredBufRow,
+ OMX_S16 * pPredBufCol,
+ OMX_INT curQP,
+ OMX_INT predQP,
+ OMX_INT predDir,
+ OMX_INT ACPredFlag,
+ OMXVCM4P2VideoComponent videoComp,
+ OMX_U8 flag,
+ OMX_INT *pSumErr
+);
+
+/**
+ * Function: armVCM4P2_SetPredDir
+ *
+ * Description:
+ * Performs detecting the prediction direction
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] blockIndex block index indicating the component type and
+ * position as defined in subclause 6.1.3.8, of ISO/IEC
+ * 14496-2. Furthermore, indexes 6 to 9 indicate the
+ * alpha blocks spatially corresponding to luminance
+ * blocks 0 to 3 in the same macroblock.
+ * [in] pCoefBufRow pointer to the coefficient row buffer
+ * [in] pQpBuf pointer to the quantization parameter buffer
+ * [out] predQP quantization parameter of the predictor block
+ * [out] predDir indicates the prediction direction which takes one
+ * of the following values:
+ * OMX_VIDEO_HORIZONTAL predict horizontally
+ * OMX_VIDEO_VERTICAL predict vertically
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_SetPredDir(
+ OMX_INT blockIndex,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_INT *predDir,
+ OMX_INT *predQP,
+ const OMX_U8 *pQpBuf
+);
+
+/**
+ * Function: armVCM4P2_EncodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs zigzag scanning and VLC encoding for one intra block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7.
+ * [in] pQDctBlkCoef pointer to the quantized DCT coefficient
+ * [in] predDir AC prediction direction, which is used to decide
+ * the zigzag scan pattern. This takes one of the
+ * following values:
+ * OMX_VIDEO_NONE AC prediction not used.
+ * Performs classical zigzag
+ * scan.
+ * OMX_VIDEO_HORIZONTAL Horizontal prediction.
+ * Performs alternate-vertical
+ * zigzag scan.
+ * OMX_VIDEO_VERTICAL Vertical prediction.
+ * Performs alternate-horizontal
+ * zigzag scan.
+ * [in] pattern block pattern which is used to decide whether
+ * this block is encoded
+ * [in] start start indicates whether the encoding begins with 0th element
+ * or 1st.
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded,
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_EncodeVLCZigzag_Intra(
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_DecodeVLCZigzag_Intra
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bitstream buffer
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7].
+ * [in] predDir AC prediction direction which is used to decide
+ * the zigzag scan pattern. It takes one of the
+ * following values:
+ * OMX_VIDEO_NONE AC prediction not used;
+ * perform classical zigzag scan;
+ * OMX_VIDEO_HORIZONTAL Horizontal prediction;
+ * perform alternate-vertical
+ * zigzag scan;
+ * OMX_VIDEO_VERTICAL Vertical prediction;
+ * thus perform
+ * alternate-horizontal
+ * zigzag scan.
+ * [in] videoComp video component type (luminance, chrominance or
+ * alpha) of the current block
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with 0th element
+ * or 1st.
+ * [out] ppBitStream *ppBitStream is updated after the block is
+ * decoded, so that it points to the current byte
+ * in the bit stream buffer
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream
+ * [out] pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_DecodeVLCZigzag_Intra(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start
+);
+
+/**
+ * Function: armVCM4P2_FillVLDBuffer
+ *
+ * Description:
+ * Performs filling of the coefficient buffer according to the run, level
+ * and sign, also updates the index
+ *
+ * Parameters:
+ * [in] storeRun Stored Run value (count of zeros)
+ * [in] storeLevel Stored Level value (non-zero value)
+ * [in] sign Flag indicating the sign of level
+ * [in] last status of the last flag
+ * [in] pIndex pointer to coefficient index in 8x8 matrix
+ * [out] pIndex pointer to updated coefficient index in 8x8
+ * matrix
+ * [in] pZigzagTable pointer to the zigzag tables
+ * [out] pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLDBuffer(
+ OMX_U32 storeRun,
+ OMX_S16 * pDst,
+ OMX_S16 storeLevel,
+ OMX_U8 sign,
+ OMX_U8 last,
+ OMX_U8 * index,
+ const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_GetVLCBits
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with
+ * 0th element or 1st.
+ * [in/out] pLast pointer to last status flag
+ * [in] runBeginSingleLevelEntriesL0 The run value from which level
+ * will be equal to 1: last == 0
+ * [in] IndexBeginSingleLevelEntriesL0 Array index in the VLC table
+ * pointing to the
+ * runBeginSingleLevelEntriesL0
+ * [in] runBeginSingleLevelEntriesL1 The run value from which level
+ * will be equal to 1: last == 1
+ * [in] IndexBeginSingleLevelEntriesL1 Array index in the VLC table
+ * pointing to the
+ * runBeginSingleLevelEntriesL0
+ * [in] pRunIndexTableL0 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0 VLC table for last == 0
+ * [in] pRunIndexTableL1 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1 VLC table for last == 1
+ * [in] pLMAXTableL0 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out]pDst pointer to the coefficient buffer of current
+ * block. Should be 32-bit aligned
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_GetVLCBits (
+ const OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start,
+ OMX_U8 * pLast,
+ OMX_U8 runBeginSingleLevelEntriesL0,
+ OMX_U8 maxIndexForMultipleEntriesL0,
+ OMX_U8 maxRunForMultipleEntriesL1,
+ OMX_U8 maxIndexForMultipleEntriesL1,
+ const OMX_U8 * pRunIndexTableL0,
+ const ARM_VLC32 *pVlcTableL0,
+ const OMX_U8 * pRunIndexTableL1,
+ const ARM_VLC32 *pVlcTableL1,
+ const OMX_U8 * pLMAXTableL0,
+ const OMX_U8 * pLMAXTableL1,
+ const OMX_U8 * pRMAXTableL0,
+ const OMX_U8 * pRMAXTableL1,
+ const OMX_U8 * pZigzagTable
+);
+
+/**
+ * Function: armVCM4P2_PutVLCBits
+ *
+ * Description:
+ * Checks the type of Escape Mode and put encoded bits for
+ * quantized DCT coefficients.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] start start indicates whether the encoding begins with
+ * 0th element or 1st.
+ * [in] maxStoreRunL0 Max store possible (considering last and inter/intra)
+ * for last = 0
+ * [in] maxStoreRunL1 Max store possible (considering last and inter/intra)
+ * for last = 1
+ * [in] maxRunForMultipleEntriesL0
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status) for last = 0
+ * [in] maxRunForMultipleEntriesL1
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status) for last = 1
+ * [in] pRunIndexTableL0 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pVlcTableL0 VLC table for last == 0
+ * [in] pRunIndexTableL1 Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pVlcTableL1 VLC table for last == 1
+ * [in] pLMAXTableL0 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pLMAXTableL1 Level MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [in] pRMAXTableL0 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 0
+ * [in] pRMAXTableL1 Run MAX table defined in
+ * armVCM4P2_Huff_Tables_VLC.c for last == 1
+ * [out] pQDctBlkCoef pointer to the quantized DCT coefficient
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+
+OMXResult armVCM4P2_PutVLCBits (
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_INT shortVideoHeader,
+ OMX_U8 start,
+ OMX_U8 maxStoreRunL0,
+ OMX_U8 maxStoreRunL1,
+ OMX_U8 maxRunForMultipleEntriesL0,
+ OMX_U8 maxRunForMultipleEntriesL1,
+ const OMX_U8 * pRunIndexTableL0,
+ const ARM_VLC32 *pVlcTableL0,
+ const OMX_U8 * pRunIndexTableL1,
+ const ARM_VLC32 *pVlcTableL1,
+ const OMX_U8 * pLMAXTableL0,
+ const OMX_U8 * pLMAXTableL1,
+ const OMX_U8 * pRMAXTableL0,
+ const OMX_U8 * pRMAXTableL1,
+ const OMX_U8 * pZigzagTable
+);
+/**
+ * Function: armVCM4P2_FillVLCBuffer
+ *
+ * Description:
+ * Performs calculating the VLC bits depending on the escape type and insert
+ * the same in the bitstream
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * by *ppBitStream. Valid within 0 to 7
+ * [in] run Run value (count of zeros) to be encoded
+ * [in] level Level value (non-zero value) to be encoded
+ * [in] runPlus Calculated as runPlus = run - (RMAX + 1)
+ * [in] levelPlus Calculated as
+ * levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] fMode Flag indicating the escape modes
+ * [in] last status of the last flag
+ * [in] maxRunForMultipleEntries
+ * The run value after which level will be equal to 1:
+ * (considering last and inter/intra status)
+ * [in] pRunIndexTable Run Index table defined in
+ * armVCM4P2_Huff_tables_VLC.h
+ * [in] pVlcTable VLC table defined in armVCM4P2_Huff_tables_VLC.h
+ * [out] ppBitStream *ppBitStream is updated after the block is encoded
+ * so that it points to the current byte in the bit
+ * stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream.
+ *
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_FillVLCBuffer (
+ OMX_U8 **ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_U32 run,
+ OMX_S16 level,
+ OMX_U32 runPlus,
+ OMX_S16 levelPlus,
+ OMX_U8 fMode,
+ OMX_U8 last,
+ OMX_U8 maxRunForMultipleEntries,
+ const OMX_U8 *pRunIndexTable,
+ const ARM_VLC32 *pVlcTable
+);
+
+/**
+ * Function: armVCM4P2_CheckVLCEscapeMode
+ *
+ * Description:
+ * Performs escape mode decision based on the run, run+, level, level+ and
+ * last combinations.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] run Run value (count of zeros) to be encoded
+ * [in] level Level value (non-zero value) to be encoded
+ * [in] runPlus Calculated as runPlus = run - (RMAX + 1)
+ * [in] levelPlus Calculated as
+ * levelPlus = sign(level)*[abs(level) - LMAX]
+ * [in] maxStoreRun Max store possible (considering last and inter/intra)
+ * [in] maxRunForMultipleEntries
+ * The run value after which level
+ * will be equal to 1:
+ * (considering last and inter/intra status)
+ * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0,
+ * and escape mode 4 is used when shortVideoHeader==1.
+ * [in] pRunIndexTable Run Index table defined in
+ * armVCM4P2_Huff_Tables_VLC.c
+ * (considering last and inter/intra status)
+ *
+ *
+ * Return Value:
+ * Returns an Escape mode which can take values from 0 to 3
+ * 0 --> no escape mode, 1 --> escape type 1,
+ * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3
+ * in the MPEG ISO standard.
+ *
+ */
+
+OMX_U8 armVCM4P2_CheckVLCEscapeMode(
+ OMX_U32 run,
+ OMX_U32 runPlus,
+ OMX_S16 level,
+ OMX_S16 levelPlus,
+ OMX_U8 maxStoreRun,
+ OMX_U8 maxRunForMultipleEntries,
+ OMX_INT shortVideoHeader,
+ const OMX_U8 *pRunIndexTable
+);
+
+
+/**
+ * Function: armVCM4P2_BlockMatch_Integer
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated minimum SAD.
+ * Both the input and output motion vectors are represented using half-pixel units, and
+ * therefore a shift left or right by 1 bit may be required, respectively, to match the
+ * input or output MVs with other functions that either generate output MVs or expect
+ * input MVs represented using integer pixel units.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that
+ * corresponds to the location of the current macroblock in the current
+ * plane.
+ * [in] refWidth width of the reference plane
+ * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin.
+ * It's not limited to the image boundary, but depended on the padding. For example,
+ * if you pad 4 pixels outside the image border, then the value for left border
+ * can be -4
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array,
+ * 256 entries); must be aligned on an 8-byte boundary.
+ * [in] pCurrPointPos position of the current macroblock in the current plane
+ * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV
+ * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV)
+ * [in] searchRange search range for 16X16 integer block,the units of it is full pixel,the search range
+ * is the same in all directions.It is in inclusive of the boundary and specified in
+ * terms of integer pixel units.
+ * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated
+ * and then initialized using omxVCM4P2_MEInit prior to calling the block matching
+ * function.
+ * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8.
+ * [out] pDstMV pointer to estimated MV
+ * [out] pDstSAD pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error.
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Integer(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD,
+ OMX_U8 BlockSize
+);
+
+/**
+ * Function: armVCM4P2_BlockMatch_Half
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution. Returns the estimated
+ * motion vector and associated minimum SAD. This function estimates the half-pixel
+ * motion vector by interpolating the integer resolution motion vector referenced
+ * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated
+ * externally. The input parameters pSrcRefBuf and pSearchPointRefPos should be
+ * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16.
+ * The function BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB
+ * that corresponds to the location of the current macroblock in
+ * the current plane.
+ * [in] refWidth width of the reference plane
+ * [in] pRefRect reference plane valid region rectangle
+ * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane
+ * (linear array, 256 entries); must be aligned on an 8-byte boundary.
+ * [in] pSearchPointRefPos position of the starting point for half pixel search (specified
+ * in terms of integer pixel units) in the reference plane.
+ * [in] rndVal rounding control bit for half pixel motion estimation;
+ * 0=rounding control disabled; 1=rounding control enabled
+ * [in] pSrcDstMV pointer to the initial MV estimate; typically generated during a prior
+ * 16X16 integer search and its unit is half pixel.
+ * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8.
+ * [out]pSrcDstMV pointer to estimated MV
+ * [out]pDstSAD pointer to minimum SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr ¨C no error
+ * OMX_Sts_BadArgErr ¨C bad arguments
+ *
+ */
+
+OMXResult armVCM4P2_BlockMatch_Half(
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD,
+ OMX_U8 BlockSize
+);
+/**
+ * Function: armVCM4P2_PadMV
+ *
+ * Description:
+ * Performs motion vector padding for a macroblock.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDstMV pointer to motion vector buffer of the current
+ * macroblock
+ * [in] pTransp pointer to transparent status buffer of the
+ * current macroblock
+ * [out] pSrcDstMV pointer to motion vector buffer in which the
+ * motion vectors have been padded
+ * Return Value:
+ * Standard OMXResult result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P2_PadMV(
+ OMXVCMotionVector * pSrcDstMV,
+ OMX_U8 * pTransp
+);
+
+/*
+ * H.264 Specific Declarations
+ */
+/* Defines */
+#define ARM_M4P10_Q_OFFSET (15)
+
+
+/* Dequant tables */
+
+extern const OMX_U8 armVCM4P10_PosToVCol4x4[16];
+extern const OMX_U8 armVCM4P10_PosToVCol2x2[4];
+extern const OMX_U8 armVCM4P10_VMatrix[6][3];
+extern const OMX_U32 armVCM4P10_MFMatrix[6][3];
+
+
+/*
+ * Description:
+ * This function perform the work required by the OpenMAX
+ * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair.
+ * Since most of the code is common we share it here.
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream buffer
+ * [in] pOffset Pointer to current bit position in the byte pointed
+ * to by *ppBitStream
+ * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current
+ * block (4,15 or 16)
+ * [in] nTable Table number (0 to 4) according to the five columns
+ * of Table 9-5 in the H.264 spec
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients in
+ * this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+
+ */
+
+OMXResult armVCM4P10_DecodeCoeffsToPair(
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8**ppPosCoefbuf,
+ OMX_INT nTable,
+ OMX_INT sMaxNumCoeff
+ );
+
+/*
+ * Description:
+ * Perform DC style intra prediction, averaging upper and left block
+ *
+ * Parameters:
+ * [in] pSrcLeft Pointer to the buffer of 16 left coefficients:
+ * p[x, y] (x = -1, y = 0..3)
+ * [in] pSrcAbove Pointer to the buffer of 16 above coefficients:
+ * p[x,y] (x = 0..3, y = -1)
+ * [in] leftStep Step of left coefficient buffer
+ * [in] dstStep Step of the destination buffer
+ * [in] availability Neighboring 16x16 MB availability flag
+ * [out] pDst Pointer to the destination buffer
+ *
+ * Return Value:
+ * None
+ */
+
+void armVCM4P10_PredictIntraDC4x4(
+ const OMX_U8* pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ OMX_U8* pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMX_S32 availability
+);
+
+/*
+ * Description
+ * Unpack a 4x4 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position pair
+ * buffer output by CALVC decoding
+ * [out] ppSrc *ppSrc is updated to the start of next non empty block
+ * [out] pDst Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock4x4(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Unpack a 2x2 block of coefficient-residual pair values
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position pair
+ * buffer output by CALVC decoding
+ * [out] ppSrc *ppSrc is updated to the start of next non empty block
+ * [out] pDst Pointer to unpacked 4x4 block
+ */
+
+void armVCM4P10_UnpackBlock2x2(
+ const OMX_U8 **ppSrc,
+ OMX_S16* pDst
+);
+
+/*
+ * Description
+ * Deblock one boundary pixel
+ *
+ * Parameters:
+ * [in] pQ0 Pointer to pixel q0
+ * [in] Step Step between pixels q0 and q1
+ * [in] tC0 Edge threshold value
+ * [in] alpha alpha threshold value
+ * [in] beta beta threshold value
+ * [in] bS deblocking strength
+ * [in] ChromaFlag True for chroma blocks
+ * [out] pQ0 Deblocked pixels
+ *
+ */
+
+void armVCM4P10_DeBlockPixel(
+ OMX_U8 *pQ0, /* pointer to the pixel q0 */
+ int Step, /* step between pixels q0 and q1 */
+ int tC0, /* edge threshold value */
+ int alpha, /* alpha */
+ int beta, /* beta */
+ int bS, /* deblocking strength */
+ int ChromaFlag
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfHor_Luma
+ *
+ * Description:
+ * This function performs interpolation for horizontal 1/2-pel positions
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfHor_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfVer_Luma
+ *
+ * Description:
+ * This function performs interpolation for vertical 1/2-pel positions
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the 1/2-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfVer_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_InterpolateHalfDiag_Luma
+ *
+ * Description:
+ * This function performs interpolation for (1/2, 1/2) positions
+ * around a full-pel position.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to interpolate
+ * in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [out] pDst Pointer to the interpolation buffer of the (1/2,1/2)-pel
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+OMXResult armVCM4P10_InterpolateHalfDiag_Luma(
+ const OMX_U8* pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8* pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/*
+ * Description:
+ * Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in] pSrc Source 4x4 block
+ * [out] pDst Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+/*
+ * Description:
+ * Forward Transform Residual 4x4 Coefficients
+ *
+ * Parameters:
+ * [in] pSrc Source 4x4 block
+ * [out] pDst Destination 4x4 block
+ *
+ */
+
+void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc);
+
+OMX_INT armVCM4P10_CompareMotionCostToMV (
+ OMX_S16 mvX,
+ OMX_S16 mvY,
+ OMXVCMotionVector diffMV,
+ OMX_INT candSAD,
+ OMXVCMotionVector *bestMV,
+ OMX_U32 nLamda,
+ OMX_S32 *pBestCost);
+
+/**
+ * Function: armVCCOMM_SAD
+ *
+ * Description:
+ * This function calculate the SAD for NxM blocks.
+ *
+ * Remarks:
+ *
+ * [in] pSrcOrg Pointer to the original block
+ * [in] iStepOrg Step of the original block buffer
+ * [in] pSrcRef Pointer to the reference block
+ * [in] iStepRef Step of the reference block buffer
+ * [in] iHeight Height of the block
+ * [in] iWidth Width of the block
+ * [out] pDstSAD Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCCOMM_SAD(
+ const OMX_U8* pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8* pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32* pDstSAD,
+ OMX_U32 iHeight,
+ OMX_U32 iWidth);
+
+/**
+ * Function: armVCCOMM_Average
+ *
+ * Description:
+ * This function calculates the average of two blocks and stores the result.
+ *
+ * Remarks:
+ *
+ * [in] pPred0 Pointer to the top-left corner of reference block 0
+ * [in] pPred1 Pointer to the top-left corner of reference block 1
+ * [in] iPredStep0 Step of reference block 0
+ * [in] iPredStep1 Step of reference block 1
+ * [in] iDstStep Step of the destination buffer
+ * [in] iWidth Width of the blocks
+ * [in] iHeight Height of the blocks
+ * [out] pDstPred Pointer to the destination buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCCOMM_Average (
+ const OMX_U8* pPred0,
+ const OMX_U8* pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8* pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+/**
+ * Function: armVCM4P10_SADQuar
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the
+ * average of the other two (pSrcRef0 and pSrcRef1)
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to the original block
+ * [in] pSrcRef0 Pointer to reference block 0
+ * [in] pSrcRef1 Pointer to reference block 1
+ * [in] iSrcStep Step of the original block buffer
+ * [in] iRefStep0 Step of reference block 0
+ * [in] iRefStep1 Step of reference block 1
+ * [in] iHeight Height of the block
+ * [in] iWidth Width of the block
+ * [out] pDstSAD Pointer of result SAD
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+OMXResult armVCM4P10_SADQuar(
+ const OMX_U8* pSrc,
+ const OMX_U8* pSrcRef0,
+ const OMX_U8* pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32* pDstSAD,
+ OMX_U32 iHeight,
+ OMX_U32 iWidth
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Chroma
+ *
+ * Description:
+ * This function performs interpolation for chroma components.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to
+ * interpolate in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [in] dx Fractional part of horizontal motion vector
+ * component in 1/8 pixel unit (0~7)
+ * [in] dy Fractional part of vertical motion vector
+ * component in 1/8 pixel unit (0~7)
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+ OMXResult armVCM4P10_Interpolate_Chroma(
+ OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight,
+ OMX_U32 dx,
+ OMX_U32 dy
+);
+
+/**
+ * Function: armVCM4P10_Interpolate_Luma
+ *
+ * Description:
+ * This function performs interpolation for luma components.
+ *
+ * Remarks:
+ *
+ * [in] pSrc Pointer to top-left corner of block used to
+ * interpolate in the reconstructed frame plane
+ * [in] iSrcStep Step of the source buffer.
+ * [in] iDstStep Step of the destination(interpolation) buffer.
+ * [in] iWidth Width of the current block
+ * [in] iHeight Height of the current block
+ * [in] dx Fractional part of horizontal motion vector
+ * component in 1/4 pixel unit (0~3)
+ * [in] dy Fractional part of vertical motion vector
+ * component in 1/4 pixel unit (0~3)
+ * [out] pDst Pointer to the interpolation buffer
+ *
+ * Return Value:
+ * Standard OMXResult value.
+ *
+ */
+
+ OMXResult armVCM4P10_Interpolate_Luma(
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDst,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight,
+ OMX_U32 dx,
+ OMX_U32 dy
+);
+
+/**
+ * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantisation and integer inverse transformation for 4x4 block of
+ * residuals and update the pair buffer pointer to next non-empty block.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppSrc Double pointer to residual coefficient-position
+ * pair buffer output by CALVC decoding
+ * [in] pDC Pointer to the DC coefficient of this block, NULL
+ * if it doesn't exist
+ * [in] QP Quantization parameter
+ * [in] AC Flag indicating if at least one non-zero coefficient exists
+ * [out] pDst pointer to the reconstructed 4x4 block data
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx(
+ OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP,
+ OMX_S16* pDC,
+ int AC
+);
+
+#endif /*_armVideo_H_*/
+
+/*End of File*/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h
new file mode 100644
index 0000000..7f0a9b8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h
@@ -0,0 +1,72 @@
+;//
+;//
+;// File Name: armVCCOMM_s.h
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// ARM optimized OpenMAX AC header file
+;//
+;// Formula used:
+;// MACRO for calculating median for three values.
+
+
+
+ IF :LNOT::DEF:ARMVCCOMM_S_H
+ INCLUDE armCOMM_s.h
+ M_VARIANTS CortexA8, ARM1136JS
+
+ IF ARM1136JS :LOR: CortexA8
+
+ ;///*
+ ;// * Macro: M_MEDIAN3
+ ;// *
+ ;// * Description: Finds the median of three numbers
+ ;// *
+ ;// * Remarks:
+ ;// *
+ ;// * Parameters:
+ ;// * [in] x First entry for the list of three numbers.
+ ;// * [in] y Second entry for the list of three numbers.
+ ;// * Input value may be corrupted at the end of
+ ;// * the execution of this macro.
+ ;// * [in] z Third entry of the list of three numbers.
+ ;// * Input value corrupted at the end of the
+ ;// * execution of this macro.
+ ;// * [in] t Temporary scratch register.
+ ;// * [out]z Median of the three numbers.
+ ;// */
+
+ MACRO
+
+ M_MEDIAN3 $x, $y, $z, $t
+
+ SUBS $t, $y, $z; // if (y < z)
+ ADDLT $z, $z, $t; // swap y and z
+ SUBLT $y, $y, $t;
+
+ ;// Now z' <= y', so there are three cases for the
+ ;// median value, depending on x.
+
+ ;// 1) x <= z' <= y' : median value is z'
+ ;// 2) z' <= x <= y' : median value is x
+ ;// 3) z' <= y' <= x : median value is y'
+
+ CMP $z, $x; // if ( x > min(y,z) )
+ MOVLT $z, $x; // ans = x
+
+ CMP $x, $y; // if ( x > max(y,z) )
+ MOVGT $z, $y; // ans = max(y,z)
+
+ MEND
+ ENDIF
+
+
+
+ ENDIF ;// ARMACCOMM_S_H
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h
new file mode 100644
index 0000000..7b3cc72
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h
@@ -0,0 +1,4381 @@
+/**
+ * File: omxVC.h
+ * Brief: OpenMAX DL v1.0.2 - Video Coding library
+ *
+ * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved.
+ *
+ * These materials are protected by copyright laws and contain material
+ * proprietary to the Khronos Group, Inc. You may use these materials
+ * for implementing Khronos specifications, without altering or removing
+ * any trademark, copyright or other notice from the specification.
+ *
+ * Khronos Group makes no, and expressly disclaims any, representations
+ * or warranties, express or implied, regarding these materials, including,
+ * without limitation, any implied warranties of merchantability or fitness
+ * for a particular purpose or non-infringement of any intellectual property.
+ * Khronos Group makes no, and expressly disclaims any, warranties, express
+ * or implied, regarding the correctness, accuracy, completeness, timeliness,
+ * and reliability of these materials.
+ *
+ * Under no circumstances will the Khronos Group, or any of its Promoters,
+ * Contributors or Members or their respective partners, officers, directors,
+ * employees, agents or representatives be liable for any damages, whether
+ * direct, indirect, special or consequential damages for lost revenues,
+ * lost profits, or otherwise, arising from or in connection with these
+ * materials.
+ *
+ * Khronos and OpenMAX are trademarks of the Khronos Group Inc.
+ *
+ */
+
+/* *****************************************************************************************/
+
+#ifndef _OMXVC_H_
+#define _OMXVC_H_
+
+#include "omxtypes.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+
+/* 6.1.1.1 Motion Vectors */
+/* In omxVC, motion vectors are represented as follows: */
+
+typedef struct {
+ OMX_S16 dx;
+ OMX_S16 dy;
+} OMXVCMotionVector;
+
+
+
+/**
+ * Function: omxVCCOMM_Average_8x (6.1.3.1.1)
+ *
+ * Description:
+ * This function calculates the average of two 8x4, 8x8, or 8x16 blocks. The
+ * result is rounded according to (a+b+1)/2. The block average function can
+ * be used in conjunction with half-pixel interpolation to obtain quarter
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0
+ * iPredStep1 - Step of reference block 1
+ * iDstStep - Step of the destination buffer.
+ * iHeight - Height of the blocks
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 8-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pPred0, pPred1, or
+ * pDstPred.
+ * - pDstPred is not aligned on an 8-byte boundary.
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 8.
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 8.
+ * - iDstStep <= 0 or iDstStep is not a multiple of 8.
+ * - iHeight is not 4, 8, or 16.
+ *
+ */
+OMXResult omxVCCOMM_Average_8x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Average_16x (6.1.3.1.2)
+ *
+ * Description:
+ * This function calculates the average of two 16x16 or 16x8 blocks. The
+ * result is rounded according to (a+b+1)/2. The block average function can
+ * be used in conjunction with half-pixel interpolation to obtain quarter
+ * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0
+ * iPredStep1 - Step of reference block 1
+ * iDstStep - Step of the destination buffer
+ * iHeight - Height of the blocks
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 16-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pPred0, pPred1, or
+ * pDstPred.
+ * - pDstPred is not aligned on a 16-byte boundary.
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 16.
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 16.
+ * - iDstStep <= 0 or iDstStep is not a multiple of 16.
+ * - iHeight is not 8 or 16.
+ *
+ */
+OMXResult omxVCCOMM_Average_16x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ExpandFrame_I (6.1.3.2.1)
+ *
+ * Description:
+ * This function expands a reconstructed frame in-place. The unexpanded
+ * source frame should be stored in a plane buffer with sufficient space
+ * pre-allocated for edge expansion, and the input frame should be located in
+ * the plane buffer center. This function executes the pixel expansion by
+ * replicating source frame edge pixel intensities in the empty pixel
+ * locations (expansion region) between the source frame edge and the plane
+ * buffer edge. The width/height of the expansion regions on the
+ * horizontal/vertical edges is controlled by the parameter iExpandPels.
+ *
+ * Input Arguments:
+ *
+ * pSrcDstPlane - pointer to the top-left corner of the frame to be
+ * expanded; must be aligned on an 8-byte boundary.
+ * iFrameWidth - frame width; must be a multiple of 8.
+ * iFrameHeight -frame height; must be a multiple of 8.
+ * iExpandPels - number of pixels to be expanded in the horizontal and
+ * vertical directions; must be a multiple of 8.
+ * iPlaneStep - distance, in bytes, between the start of consecutive lines
+ * in the plane buffer; must be larger than or equal to
+ * (iFrameWidth + 2 * iExpandPels).
+ *
+ * Output Arguments:
+ *
+ * pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the
+ * top-left corner of the plane); must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - pSrcDstPlane is NULL.
+ * - pSrcDstPlane is not aligned on an 8-byte boundary.
+ * - one of the following parameters is either equal to zero or is a
+ * non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or
+ * iExpandPels.
+ * - iPlaneStep < (iFrameWidth + 2 * iExpandPels).
+ *
+ */
+OMXResult omxVCCOMM_ExpandFrame_I (
+ OMX_U8 *pSrcDstPlane,
+ OMX_U32 iFrameWidth,
+ OMX_U32 iFrameHeight,
+ OMX_U32 iExpandPels,
+ OMX_U32 iPlaneStep
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Copy8x8 (6.1.3.3.1)
+ *
+ * Description:
+ * Copies the reference 8x8 block to the current block.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the reference block in the source frame; must be
+ * aligned on an 8-byte boundary.
+ * step - distance between the starts of consecutive lines in the reference
+ * frame, in bytes; must be a multiple of 8 and must be larger than
+ * or equal to 8.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination block; must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pSrc, pDst
+ * - one or more of the following pointers is not aligned on an 8-byte
+ * boundary: pSrc, pDst
+ * - step <8 or step is not a multiple of 8.
+ *
+ */
+OMXResult omxVCCOMM_Copy8x8 (
+ const OMX_U8 *pSrc,
+ OMX_U8 *pDst,
+ OMX_INT step
+);
+
+
+
+/**
+ * Function: omxVCCOMM_Copy16x16 (6.1.3.3.2)
+ *
+ * Description:
+ * Copies the reference 16x16 macroblock to the current macroblock.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the reference macroblock in the source frame; must be
+ * aligned on a 16-byte boundary.
+ * step - distance between the starts of consecutive lines in the reference
+ * frame, in bytes; must be a multiple of 16 and must be larger
+ * than or equal to 16.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination macroblock; must be aligned on a
+ * 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - one or more of the following pointers is NULL: pSrc, pDst
+ * - one or more of the following pointers is not aligned on a 16-byte
+ * boundary: pSrc, pDst
+ * - step <16 or step is not a multiple of 16.
+ *
+ */
+OMXResult omxVCCOMM_Copy16x16 (
+ const OMX_U8 *pSrc,
+ OMX_U8 *pDst,
+ OMX_INT step
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ComputeTextureErrorBlock_SAD (6.1.4.1.1)
+ *
+ * Description:
+ * Computes texture error of the block; also returns SAD.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the source plane; must be aligned on an 8-byte
+ * boundary.
+ * srcStep - step of the source plane
+ * pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned
+ * on an 8-byte boundary.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer, an 8x8 block; must be aligned
+ * on an 8-byte boundary.
+ * pDstSAD - pointer to the Sum of Absolute Differences (SAD) value
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following
+ * pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD.
+ * - pSrc is not 8-byte aligned.
+ * - SrcStep <= 0 or srcStep is not a multiple of 8.
+ * - pSrcRef is not 8-byte aligned.
+ * - pDst is not 8-byte aligned.
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_U8 *pSrcRef,
+ OMX_S16 *pDst,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCCOMM_ComputeTextureErrorBlock (6.1.4.1.2)
+ *
+ * Description:
+ * Computes the texture error of the block.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the source plane. This should be aligned on an 8-byte
+ * boundary.
+ * srcStep - step of the source plane
+ * pSrcRef - pointer to the reference buffer, an 8x8 block. This should be
+ * aligned on an 8-byte boundary.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer, an 8x8 block. This should be
+ * aligned on an 8-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * pSrc, pSrcRef, pDst.
+ * - pSrc is not 8-byte aligned.
+ * - SrcStep <= 0 or srcStep is not a multiple of 8.
+ * - pSrcRef is not 8-byte aligned.
+ * - pDst is not 8-byte aligned
+ *
+ */
+OMXResult omxVCCOMM_ComputeTextureErrorBlock (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_U8 *pSrcRef,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCCOMM_LimitMVToRect (6.1.4.1.3)
+ *
+ * Description:
+ * Limits the motion vector associated with the current block/macroblock to
+ * prevent the motion compensated block/macroblock from moving outside a
+ * bounding rectangle as shown in Figure 6-1.
+ *
+ * Input Arguments:
+ *
+ * pSrcMV - pointer to the motion vector associated with the current block
+ * or macroblock
+ * pRectVOPRef - pointer to the bounding rectangle
+ * Xcoord, Ycoord - coordinates of the current block or macroblock
+ * size - size of the current block or macroblock; must be equal to 8 or
+ * 16.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to the limited motion vector
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcMV, pDstMV, or pRectVOPRef.
+ * - size is not equal to either 8 or 16.
+ * - the width or height of the bounding rectangle is less than
+ * twice the block size.
+ */
+OMXResult omxVCCOMM_LimitMVToRect (
+ const OMXVCMotionVector *pSrcMV,
+ OMXVCMotionVector *pDstMV,
+ const OMXRect *pRectVOPRef,
+ OMX_INT Xcoord,
+ OMX_INT Ycoord,
+ OMX_INT size
+);
+
+
+
+/**
+ * Function: omxVCCOMM_SAD_16x (6.1.4.1.4)
+ *
+ * Description:
+ * This function calculates the SAD for 16x16 and 16x8 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 16-byte
+ * boundary.
+ * iStepOrg - Step of the original block buffer
+ * pSrcRef - Pointer to the reference block
+ * iStepRef - Step of the reference block buffer
+ * iHeight - Height of the block
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pDstSAD, or pSrcRef
+ * - pSrcOrg is not 16-byte aligned.
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 16
+ * - iStepRef <= 0 or iStepRef is not a multiple of 16
+ * - iHeight is not 8 or 16
+ *
+ */
+OMXResult omxVCCOMM_SAD_16x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCCOMM_SAD_8x (6.1.4.1.5)
+ *
+ * Description:
+ * This function calculates the SAD for 8x16, 8x8, 8x4 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 8-byte
+ * boundary.
+ * iStepOrg - Step of the original block buffer
+ * pSrcRef - Pointer to the reference block
+ * iStepRef - Step of the reference block buffer
+ * iHeight - Height of the block
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pDstSAD, or pSrcRef
+ * - pSrcOrg is not 8-byte aligned.
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 8
+ * - iStepRef <= 0 or iStepRef is not a multiple of 8
+ * - iHeight is not 4, 8 or 16
+ *
+ */
+OMXResult omxVCCOMM_SAD_8x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32*pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/* 6.2.1.1 Direction */
+/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan. */
+
+enum {
+ OMX_VC_NONE = 0,
+ OMX_VC_HORIZONTAL = 1,
+ OMX_VC_VERTICAL = 2
+};
+
+
+
+/* 6.2.1.2 Bilinear Interpolation */
+/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions. */
+
+enum {
+ OMX_VC_INTEGER_PIXEL = 0, /* case a */
+ OMX_VC_HALF_PIXEL_X = 1, /* case b */
+ OMX_VC_HALF_PIXEL_Y = 2, /* case c */
+ OMX_VC_HALF_PIXEL_XY = 3 /* case d */
+};
+
+
+
+/* 6.2.1.3 Neighboring Macroblock Availability */
+/* Neighboring macroblock availability is indicated using the following flags: */
+
+enum {
+ OMX_VC_UPPER = 1, /** above macroblock is available */
+ OMX_VC_LEFT = 2, /** left macroblock is available */
+ OMX_VC_CENTER = 4,
+ OMX_VC_RIGHT = 8,
+ OMX_VC_LOWER = 16,
+ OMX_VC_UPPER_LEFT = 32, /** above-left macroblock is available */
+ OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */
+ OMX_VC_LOWER_LEFT = 128,
+ OMX_VC_LOWER_RIGHT = 256
+};
+
+
+
+/* 6.2.1.4 Video Components */
+/* A data type that enumerates video components is defined as follows: */
+
+typedef enum {
+ OMX_VC_LUMINANCE, /** Luminance component */
+ OMX_VC_CHROMINANCE /** chrominance component */
+} OMXVCM4P2VideoComponent;
+
+
+
+/* 6.2.1.5 MacroblockTypes */
+/* A data type that enumerates macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_INTER = 0, /** P picture or P-VOP */
+ OMX_VC_INTER_Q = 1, /** P picture or P-VOP */
+ OMX_VC_INTER4V = 2, /** P picture or P-VOP */
+ OMX_VC_INTRA = 3, /** I and P picture, I- and P-VOP */
+ OMX_VC_INTRA_Q = 4, /** I and P picture, I- and P-VOP */
+ OMX_VC_INTER4V_Q = 5 /** P picture or P-VOP (H.263)*/
+} OMXVCM4P2MacroblockType;
+
+
+
+/* 6.2.1.6 Coordinates */
+/* Coordinates are represented as follows: */
+
+typedef struct {
+ OMX_INT x;
+ OMX_INT y;
+} OMXVCM4P2Coordinate;
+
+
+
+/* 6.2.1.7 Motion Estimation Algorithms */
+/* A data type that enumerates motion estimation search methods is defined as follows: */
+
+typedef enum {
+ OMX_VC_M4P2_FAST_SEARCH = 0, /** Fast motion search */
+ OMX_VC_M4P2_FULL_SEARCH = 1 /** Full motion search */
+} OMXVCM4P2MEMode;
+
+
+
+/* 6.2.1.8 Motion Estimation Parameters */
+/* A data structure containing control parameters for
+ * motion estimation functions is defined as follows:
+ */
+
+typedef struct {
+ OMX_INT searchEnable8x8; /** enables 8x8 search */
+ OMX_INT halfPelSearchEnable; /** enables half-pel resolution */
+ OMX_INT searchRange; /** search range */
+ OMX_INT rndVal; /** rounding control; 0-disabled, 1-enabled*/
+} OMXVCM4P2MEParams;
+
+
+
+/* 6.2.1.9 Macroblock Information */
+/* A data structure containing macroblock parameters for
+ * motion estimation functions is defined as follows:
+ */
+
+typedef struct {
+ OMX_S32 sliceId; /* slice number */
+ OMXVCM4P2MacroblockType mbType; /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */
+ OMX_S32 qp; /* quantization parameter*/
+ OMX_U32 cbpy; /* CBP Luma */
+ OMX_U32 cbpc; /* CBP Chroma */
+ OMXVCMotionVector pMV0[2][2]; /* motion vector, represented using 1/2-pel units,
+ * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1)
+ */
+ OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units,
+ * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1)
+ */
+ OMX_U8 pPredDir[2][2]; /* AC prediction direction:
+ * OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL
+ */
+} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr;
+
+
+
+/**
+ * Function: omxVCM4P2_FindMVpred (6.2.3.1.1)
+ *
+ * Description:
+ * Predicts a motion vector for the current block using the procedure
+ * specified in [ISO14496-2], subclause 7.6.5. The resulting predicted MV is
+ * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then
+ * the set of three MV candidates used for prediction is also returned,
+ * otherwise pDstMVPredMEis NULL upon return.
+ *
+ * Input Arguments:
+ *
+ * pSrcMVCurMB - pointer to the MV buffer associated with the current Y
+ * macroblock; a value of NULL indicates unavailability.
+ * pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located to the left of the current MB; set to NULL
+ * if there is no MB to the left.
+ * pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located above the current MB; set to NULL if there
+ * is no MB located above the current MB.
+ * pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated
+ * with the MB located to the right and above the current MB; set
+ * to NULL if there is no MB located to the above-right.
+ * iBlk - the index of block in the current macroblock
+ * pDstMVPredME - MV candidate return buffer; if set to NULL then
+ * prediction candidate MVs are not returned and pDstMVPredME will
+ * be NULL upon function return; if pDstMVPredME is non-NULL then it
+ * must point to a buffer containing sufficient space for three
+ * return MVs.
+ *
+ * Output Arguments:
+ *
+ * pDstMVPred - pointer to the predicted motion vector
+ * pDstMVPredME - if non-NULL upon input then pDstMVPredME points upon
+ * return to a buffer containing the three motion vector candidates
+ * used for prediction as specified in [ISO14496-2], subclause
+ * 7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL
+ * upon output.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - the pointer pDstMVPred is NULL
+ * - the parameter iBlk does not fall into the range 0 <= iBlk<=3
+ *
+ */
+OMXResult omxVCM4P2_FindMVpred (
+ const OMXVCMotionVector *pSrcMVCurMB,
+ const OMXVCMotionVector *pSrcCandMV1,
+ const OMXVCMotionVector *pSrcCandMV2,
+ const OMXVCMotionVector *pSrcCandMV3,
+ OMXVCMotionVector *pDstMVPred,
+ OMXVCMotionVector *pDstMVPredME,
+ OMX_INT iBlk
+);
+
+
+
+/**
+ * Function: omxVCM4P2_IDCT8x8blk (6.2.3.2.1)
+ *
+ * Description:
+ * Computes a 2D inverse DCT for a single 8x8 block, as defined in
+ * [ISO14496-2].
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the start of the linearly arranged IDCT input buffer;
+ * must be aligned on a 16-byte boundary. According to
+ * [ISO14496-2], the input coefficient values should lie within the
+ * range [-2048, 2047].
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the start of the linearly arranged IDCT output buffer;
+ * must be aligned on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrc or pDst is NULL.
+ * - pSrc or pDst is not 16-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_IDCT8x8blk (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MEGetBufSize (6.2.4.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification
+ * structure for the following motion estimation functions:
+ * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P2MEMode
+ * pMEParams - motion estimation parameters
+ *
+ * Output Arguments:
+ *
+ * pSize - pointer to the number of bytes required for the specification
+ * structure
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - one or more of the following is true:
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for the
+ * parameter pMEParams->searchRange
+ *
+ */
+OMXResult omxVCM4P2_MEGetBufSize (
+ OMXVCM4P2MEMode MEmode,
+ const OMXVCM4P2MEParams *pMEParams,
+ OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MEInit (6.2.4.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the
+ * following motion estimation functions: BlockMatch_Integer_8x8,
+ * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the
+ * specification structure *pMESpec must be allocated prior to calling the
+ * function, and should be aligned on a 4-byte boundary. Following
+ * initialization by this function, the vendor-specific structure *pMESpec
+ * should contain an implementation-specific representation of all motion
+ * estimation parameters received via the structure pMEParams, for example
+ * rndVal, searchRange, etc. The number of bytes required for the
+ * specification structure can be determined using the function
+ * omxVCM4P2_MEGetBufSize.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P2MEMode
+ * pMEParams - motion estimation parameters
+ * pMESpec - pointer to the uninitialized ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pMESpec - pointer to the initialized ME specification structure
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - one or more of the following is true:
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for the
+ * parameter pMEParams->searchRange
+ *
+ */
+OMXResult omxVCM4P2_MEInit (
+ OMXVCM4P2MEMode MEmode,
+ const OMXVCM4P2MEParams*pMEParams,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Integer_16x16 (6.2.4.2.1)
+ *
+ * Description:
+ * Performs a 16x16 block search; estimates motion vector and associated
+ * minimum SAD. Both the input and output motion vectors are represented using
+ * half-pixel units, and therefore a shift left or right by 1 bit may be
+ * required, respectively, to match the input or output MVs with other
+ * functions that either generate output MVs or expect input MVs represented
+ * using integer pixel units.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * MB that corresponds to the location of the current macroblock in
+ * the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - pointer to the valid reference plane rectangle; coordinates
+ * are specified relative to the image origin. Rectangle
+ * boundaries may extend beyond image boundaries if the image has
+ * been padded. For example, if padding extends 4 pixels beyond
+ * frame border, then the value for the left border could be set to
+ * -4.
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 256
+ * entries); must be aligned on a 16-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pCurrPointPos - position of the current macroblock in the current plane
+ * pSrcPreMV - pointer to predicted motion vector; NULL indicates no
+ * predicted MV
+ * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced
+ * by pSrcPreMV); may be set to NULL if unavailable.
+ * pMESpec - vendor-specific motion estimation specification structure;
+ * must have been allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling the block matching function.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or
+ * pMESpec, or
+ * - pSrcCurrBuf is not 16-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_16x16 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector*pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector*pDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Integer_8x8 (6.2.4.2.2)
+ *
+ * Description:
+ * Performs an 8x8 block search; estimates motion vector and associated
+ * minimum SAD. Both the input and output motion vectors are represented
+ * using half-pixel units, and therefore a shift left or right by 1 bit may be
+ * required, respectively, to match the input or output MVs with other
+ * functions that either generate output MVs or expect input MVs represented
+ * using integer pixel units.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * block that corresponds to the location of the current 8x8 block
+ * in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - pointer to the valid reference plane rectangle; coordinates
+ * are specified relative to the image origin. Rectangle
+ * boundaries may extend beyond image boundaries if the image has
+ * been padded.
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 128
+ * entries); must be aligned on an 8-byte boundary. The number of
+ * bytes between lines (step) is 16 bytes.
+ * pCurrPointPos - position of the current block in the current plane
+ * pSrcPreMV - pointer to predicted motion vector; NULL indicates no
+ * predicted MV
+ * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced
+ * by pSrcPreMV); may be set to NULL if unavailable.
+ * pMESpec - vendor-specific motion estimation specification structure;
+ * must have been allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling the block matching function.
+ *
+ * Output Arguments:
+ *
+ * pDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or
+ * pMESpec, or
+ * - pSrcCurrBuf is not 8-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Integer_8x8 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ const OMXVCMotionVector *pSrcPreMV,
+ const OMX_INT *pSrcPreSAD,
+ void *pMESpec,
+ OMXVCMotionVector *pDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Half_16x16 (6.2.4.2.3)
+ *
+ * Description:
+ * Performs a 16x16 block match with half-pixel resolution. Returns the
+ * estimated motion vector and associated minimum SAD. This function
+ * estimates the half-pixel motion vector by interpolating the integer
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e.,
+ * the initial integer MV is generated externally. The input parameters
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of
+ * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function
+ * BlockMatch_Integer_16x16 may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * macroblock that corresponds to the location of the current
+ * macroblock in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - reference plane valid region rectangle
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 256
+ * entries); must be aligned on a 16-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pSearchPointRefPos - position of the starting point for half pixel
+ * search (specified in terms of integer pixel units) in the
+ * reference plane, i.e., the reference position pointed to by the
+ * predicted motion vector.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ * pSrcDstMV - pointer to the initial MV estimate; typically generated
+ * during a prior 16X16 integer search; specified in terms of
+ * half-pixel units.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL: pSrcRefBuf,
+ * pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV.
+ * - pSrcCurrBuf is not 16-byte aligned, or
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_16x16 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_BlockMatch_Half_8x8 (6.2.4.2.4)
+ *
+ * Description:
+ * Performs an 8x8 block match with half-pixel resolution. Returns the
+ * estimated motion vector and associated minimum SAD. This function
+ * estimates the half-pixel motion vector by interpolating the integer
+ * resolution motion vector referenced by the input parameter pSrcDstMV, i.e.,
+ * the initial integer MV is generated externally. The input parameters
+ * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of
+ * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function
+ * BlockMatch_Integer_8x8 may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * block that corresponds to the location of the current 8x8 block
+ * in the current plane.
+ * refWidth - width of the reference plane
+ * pRefRect - reference plane valid region rectangle
+ * pSrcCurrBuf - pointer to the current block in the current macroblock
+ * buffer extracted from the original plane (linear array, 128
+ * entries); must be aligned on a 8-byte boundary. The number of
+ * bytes between lines (step) is 16.
+ * pSearchPointRefPos - position of the starting point for half pixel
+ * search (specified in terms of integer pixel units) in the
+ * reference plane.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ * pSrcDstMV - pointer to the initial MV estimate; typically generated
+ * during a prior 8x8 integer search, specified in terms of
+ * half-pixel units.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMV - pointer to estimated MV
+ * pDstSAD - pointer to minimum SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following
+ * conditions is true:
+ * - at least one of the following pointers is NULL:
+ * pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV
+ * - pSrcCurrBuf is not 8-byte aligned
+ *
+ */
+OMXResult omxVCM4P2_BlockMatch_Half_8x8 (
+ const OMX_U8 *pSrcRefBuf,
+ OMX_INT refWidth,
+ const OMXRect *pRefRect,
+ const OMX_U8 *pSrcCurrBuf,
+ const OMXVCM4P2Coordinate *pSearchPointRefPos,
+ OMX_INT rndVal,
+ OMXVCMotionVector *pSrcDstMV,
+ OMX_INT *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MotionEstimationMB (6.2.4.3.1)
+ *
+ * Description:
+ * Performs motion search for a 16x16 macroblock. Selects best motion search
+ * strategy from among inter-1MV, inter-4MV, and intra modes. Supports
+ * integer and half pixel resolution.
+ *
+ * Input Arguments:
+ *
+ * pSrcCurrBuf - pointer to the top-left corner of the current MB in the
+ * original picture plane; must be aligned on a 16-byte boundary.
+ * The function does not expect source data outside the region
+ * bounded by the MB to be available; for example it is not
+ * necessary for the caller to guarantee the availability of
+ * pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB
+ * to be processed.
+ * srcCurrStep - width of the original picture plane, in terms of full
+ * pixels; must be a multiple of 16.
+ * pSrcRefBuf - pointer to the reference Y plane; points to the reference
+ * plane location corresponding to the location of the current
+ * macroblock in the current plane; must be aligned on a 16-byte
+ * boundary.
+ * srcRefStep - width of the reference picture plane, in terms of full
+ * pixels; must be a multiple of 16.
+ * pRefRect - reference plane valid region rectangle, specified relative to
+ * the image origin
+ * pCurrPointPos - position of the current macroblock in the current plane
+ * pMESpec - pointer to the vendor-specific motion estimation specification
+ * structure; must be allocated and then initialized using
+ * omxVCM4P2_MEInit prior to calling this function.
+ * pMBInfo - array, of dimension four, containing pointers to information
+ * associated with four nearby MBs:
+ * - pMBInfo[0] - pointer to left MB information
+ * - pMBInfo[1] - pointer to top MB information
+ * - pMBInfo[2] - pointer to top-left MB information
+ * - pMBInfo[3] - pointer to top-right MB information
+ * Any pointer in the array may be set equal to NULL if the
+ * corresponding MB doesn't exist. For each MB, the following structure
+ * members are used:
+ * - mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or
+ * OMX_VC_INTER4V
+ * - pMV0[2][2] - estimated motion vectors; represented
+ * in 1/2 pixel units
+ * - sliceID - number of the slice to which the MB belongs
+ * pSrcDstMBCurr - pointer to information structure for the current MB.
+ * The following entries should be set prior to calling the
+ * function: sliceID - the number of the slice the to which the
+ * current MB belongs. The structure elements cbpy and cbpc are
+ * ignored.
+ *
+ * Output Arguments:
+ *
+ * pSrcDstMBCurr - pointer to updated information structure for the current
+ * MB after MB-level motion estimation has been completed. The
+ * following structure members are updated by the ME function:
+ * - mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or
+ * OMX_VC_INTER4V.
+ * - pMV0[2][2] - estimated motion vectors; represented in
+ * terms of 1/2 pel units.
+ * - pMVPred[2][2] - predicted motion vectors; represented
+ * in terms of 1/2 pel units.
+ * The structure members cbpy and cbpc are not updated by the function.
+ * pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs
+ * for INTER4V
+ * pDstBlockSAD - pointer to an array of SAD values for each of the four
+ * 8x8 luma blocks in the MB. The block SADs are in scan order for
+ * each MB.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the
+ * following conditions is true:
+ * - at least one of the following pointers is NULL: pSrcCurrBuf,
+ * pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra,
+ * pSrcDstMBCurr, or pDstSAD.
+ *
+ */
+OMXResult omxVCM4P2_MotionEstimationMB (
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 srcCurrStep,
+ const OMX_U8 *pSrcRefBuf,
+ OMX_S32 srcRefStep,
+ const OMXRect*pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ void *pMESpec,
+ const OMXVCM4P2MBInfoPtr *pMBInfo,
+ OMXVCM4P2MBInfo *pSrcDstMBCurr,
+ OMX_U16 *pDstSAD,
+ OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DCT8x8blk (6.2.4.4.1)
+ *
+ * Description:
+ * Computes a 2D forward DCT for a single 8x8 block, as defined in
+ * [ISO14496-2].
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the start of the linearly arranged input buffer; must
+ * be aligned on a 16-byte boundary. Input values (pixel
+ * intensities) are valid in the range [-255,255].
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the start of the linearly arranged output buffer; must
+ * be aligned on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, returned if:
+ * - pSrc or pDst is NULL.
+ * - pSrc or pDst is not 16-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_DCT8x8blk (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantIntra_I (6.2.4.4.2)
+ *
+ * Description:
+ * Performs quantization on intra block coefficients. This function supports
+ * bits_per_pixel == 8.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input intra block coefficients; must be aligned
+ * on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale).
+ * blockIndex - block index indicating the component type and position,
+ * valid in the range 0 to 5, as defined in [ISO14496-2], subclause
+ * 6.1.3.8.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (quantized) interblock coefficients.
+ * When shortVideoHeader==1, AC coefficients are saturated on the
+ * interval [-127, 127], and DC coefficients are saturated on the
+ * interval [1, 254]. When shortVideoHeader==0, AC coefficients
+ * are saturated on the interval [-2047, 2047].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrcDst is NULL.
+ * - blockIndex < 0 or blockIndex >= 10
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_QuantIntra_I (
+ OMX_S16 *pSrcDst,
+ OMX_U8 QP,
+ OMX_INT blockIndex,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInter_I (6.2.4.4.3)
+ *
+ * Description:
+ * Performs quantization on an inter coefficient block; supports
+ * bits_per_pixel == 8.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input inter block coefficients; must be aligned
+ * on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * shortVideoHeader - binary flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode, and
+ * shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (quantized) interblock coefficients.
+ * When shortVideoHeader==1, AC coefficients are saturated on the
+ * interval [-127, 127], and DC coefficients are saturated on the
+ * interval [1, 254]. When shortVideoHeader==0, AC coefficients
+ * are saturated on the interval [-2047, 2047].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - pSrcDst is NULL.
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_QuantInter_I (
+ OMX_S16 *pSrcDst,
+ OMX_U8 QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_TransRecBlockCoef_intra (6.2.4.4.4)
+ *
+ * Description:
+ * Quantizes the DCT coefficients, implements intra block AC/DC coefficient
+ * prediction, and reconstructs the current intra block texture for prediction
+ * on the next frame. Quantized row and column coefficients are returned in
+ * the updated coefficient buffers.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the pixels of current intra block; must be aligned on
+ * an 8-byte boundary.
+ * pPredBufRow - pointer to the coefficient row buffer containing
+ * ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16.
+ * Coefficients are organized into blocks of eight as described
+ * below (Internal Prediction Coefficient Update Procedures). The
+ * DC coefficient is first, and the remaining buffer locations
+ * contain the quantized AC coefficients. Each group of eight row
+ * buffer elements combined with one element eight elements ahead
+ * contains the coefficient predictors of the neighboring block
+ * that is spatially above or to the left of the block currently to
+ * be decoded. A negative-valued DC coefficient indicates that this
+ * neighboring block is not INTRA-coded or out of bounds, and
+ * therefore the AC and DC coefficients are invalid. Pointer must
+ * be aligned on an 8-byte boundary.
+ * pPredBufCol - pointer to the prediction coefficient column buffer
+ * containing 16 elements of type OMX_S16. Coefficients are
+ * organized as described in section 6.2.2.5. Pointer must be
+ * aligned on an 8-byte boundary.
+ * pSumErr - pointer to a flag indicating whether or not AC prediction is
+ * required; AC prediction is enabled if *pSumErr >=0, but the
+ * value is not used for coefficient prediction, i.e., the sum of
+ * absolute differences starts from 0 for each call to this
+ * function. Otherwise AC prediction is disabled if *pSumErr < 0 .
+ * blockIndex - block index indicating the component type and position, as
+ * defined in [ISO14496-2], subclause 6.1.3.8.
+ * curQp - quantization parameter of the macroblock to which the current
+ * block belongs
+ * pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0]
+ * contains the quantization parameter associated with the 8x8
+ * block left of the current block (QPa), and pQpBuf[1] contains
+ * the quantization parameter associated with the 8x8 block above
+ * the current block (QPc). In the event that the corresponding
+ * block is outside of the VOP bound, the Qp value will not affect
+ * the intra prediction process, as described in [ISO14496-2],
+ * sub-clause 7.4.3.3, Adaptive AC Coefficient Prediction.
+ * srcStep - width of the source buffer; must be a multiple of 8.
+ * dstStep - width of the reconstructed destination buffer; must be a
+ * multiple of 16.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains
+ * the predicted DC coefficient; the remaining entries contain the
+ * quantized AC coefficients (without prediction). The pointer
+ * pDstmust be aligned on a 16-byte boundary.
+ * pRec - pointer to the reconstructed texture; must be aligned on an
+ * 8-byte boundary.
+ * pPredBufRow - pointer to the updated coefficient row buffer
+ * pPredBufCol - pointer to the updated coefficient column buffer
+ * pPreACPredict - if prediction is enabled, the parameter points to the
+ * start of the buffer containing the coefficient differences for
+ * VLC encoding. The entry pPreACPredict[0]indicates prediction
+ * direction for the current block and takes one of the following
+ * values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL. The entries
+ * pPreACPredict[1]-pPreACPredict[7]contain predicted AC
+ * coefficients. If prediction is disabled (*pSumErr<0) then the
+ * contents of this buffer are undefined upon return from the
+ * function
+ * pSumErr - pointer to the value of the accumulated AC coefficient errors,
+ * i.e., sum of the absolute differences between predicted and
+ * unpredicted AC coefficients
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: pSrc, pDst, pRec,
+ * pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr.
+ * - blockIndex < 0 or blockIndex >= 10;
+ * - curQP <= 0 or curQP >= 32.
+ * - srcStep, or dstStep <= 0 or not a multiple of 8.
+ * - pDst is not 16-byte aligned: .
+ * - At least one of the following pointers is not 8-byte aligned:
+ * pSrc, pRec.
+ *
+ * Note: The coefficient buffers must be updated in accordance with the
+ * update procedures defined in section in 6.2.2.
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_intra (
+ const OMX_U8 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U8 *pRec,
+ OMX_S16 *pPredBufRow,
+ OMX_S16 *pPredBufCol,
+ OMX_S16 *pPreACPredict,
+ OMX_INT *pSumErr,
+ OMX_INT blockIndex,
+ OMX_U8 curQp,
+ const OMX_U8 *pQpBuf,
+ OMX_INT srcStep,
+ OMX_INT dstStep,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_TransRecBlockCoef_inter (6.2.4.4.5)
+ *
+ * Description:
+ * Implements DCT, and quantizes the DCT coefficients of the inter block
+ * while reconstructing the texture residual. There is no boundary check for
+ * the bit stream buffer.
+ *
+ * Input Arguments:
+ *
+ * pSrc -pointer to the residuals to be encoded; must be aligned on an
+ * 16-byte boundary.
+ * QP - quantization parameter.
+ * shortVideoHeader - binary flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode, and
+ * shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the quantized DCT coefficients buffer; must be aligned
+ * on a 16-byte boundary.
+ * pRec - pointer to the reconstructed texture residuals; must be aligned
+ * on a 16-byte boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is either NULL or
+ * not 16-byte aligned:
+ * - pSrc
+ * - pDst
+ * - pRec
+ * - QP <= 0 or QP >= 32.
+ *
+ */
+OMXResult omxVCM4P2_TransRecBlockCoef_inter (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_S16 *pRec,
+ OMX_U8 QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one
+ * intra block. Two versions of the function (DCVLC and ACVLC) are provided
+ * in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding".
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7.
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * predDir - AC prediction direction, which is used to decide the zigzag
+ * scan pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used.
+ * Performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction.
+ * Performs alternate-vertical zigzag scan.
+ * - OMX_VC_VERTICAL - Vertical prediction.
+ * Performs alternate-horizontal zigzag scan.
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance, chrominance) of the current
+ * block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded, so
+ * that it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pQDctBlkCoef.
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL.
+ * - VideoComp is not one component of enum OMXVCM4P2VideoComponent.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_IntraACVLC (6.2.4.5.2)
+ *
+ * Description:
+ * Performs zigzag scan and VLC encoding of AC and DC coefficients for one
+ * intra block. Two versions of the function (DCVLC and ACVLC) are provided
+ * in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7.
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * predDir - AC prediction direction, which is used to decide the zigzag
+ * scan pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used.
+ * Performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction.
+ * Performs alternate-vertical zigzag scan.
+ * - OMX_VC_VERTICAL - Vertical prediction.
+ * Performs alternate-horizontal zigzag scan.
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded, so
+ * that it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments:
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pQDctBlkCoef.
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or
+ * OMX_VC_VERTICAL.
+ * - VideoComp is not one component of enum OMXVCM4P2VideoComponent.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 predDir,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeVLCZigzag_Inter (6.2.4.5.3)
+ *
+ * Description:
+ * Performs classical zigzag scanning and VLC encoding for one inter block.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream
+ * pBitOffset - pointer to the bit position in the byte pointed by
+ * *ppBitStream. Valid within 0 to 7
+ * pQDctBlkCoef - pointer to the quantized DCT coefficient
+ * pattern - block pattern which is used to decide whether this block is
+ * encoded
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is encoded so that
+ * it points to the current byte in the bit stream buffer.
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - Bad arguments
+ * - At least one of the pointers: is NULL: ppBitStream, *ppBitStream,
+ * pBitOffset, pQDctBlkCoef
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ *
+ */
+OMXResult omxVCM4P2_EncodeVLCZigzag_Inter (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMX_S16 *pQDctBlkCoef,
+ OMX_U8 pattern,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_EncodeMV (6.2.4.5.4)
+ *
+ * Description:
+ * Predicts a motion vector for the current macroblock, encodes the
+ * difference, and writes the output to the stream buffer. The input MVs
+ * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie
+ * within the ranges associated with the input parameter fcodeForward, as
+ * described in [ISO14496-2], subclause 7.6.3. This function provides a
+ * superset of the functionality associated with the function
+ * omxVCM4P2_FindMVpred.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the bitstream buffer
+ * pBitOffset - index of the first free (next available) bit in the stream
+ * buffer referenced by *ppBitStream, valid in the range 0 to 7.
+ * pMVCurMB - pointer to the current macroblock motion vector; a value of
+ * NULL indicates unavailability.
+ * pSrcMVLeftMB - pointer to the source left macroblock motion vector; a
+ * value of NULLindicates unavailability.
+ * pSrcMVUpperMB - pointer to source upper macroblock motion vector; a
+ * value of NULL indicates unavailability.
+ * pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a
+ * value of NULL indicates unavailability.
+ * fcodeForward - an integer with values from 1 to 7; used in encoding
+ * motion vectors related to search range, as described in
+ * [ISO14496-2], subclause 7.6.3.
+ * MBType - macro block type, valid in the range 0 to 5
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - updated pointer to the current byte in the bit stream
+ * buffer
+ * pBitOffset - updated index of the next available bit position in stream
+ * buffer referenced by *ppBitStream
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is NULL: ppBitStream,
+ * *ppBitStream, pBitOffset, pMVCurMB
+ * - *pBitOffset < 0, or *pBitOffset >7.
+ * - fcodeForward <= 0, or fcodeForward > 7, or MBType < 0.
+ *
+ */
+OMXResult omxVCM4P2_EncodeMV (
+ OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ const OMXVCMotionVector *pMVCurMB,
+ const OMXVCMotionVector*pSrcMVLeftMB,
+ const OMXVCMotionVector *pSrcMVUpperMB,
+ const OMXVCMotionVector *pSrcMVUpperRightMB,
+ OMX_INT fcodeForward,
+ OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodePadMV_PVOP (6.2.5.1.1)
+ *
+ * Description:
+ * Decodes and pads the four motion vectors associated with a non-intra P-VOP
+ * macroblock. For macroblocks of type OMX_VC_INTER4V, the output MV is
+ * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for
+ * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to
+ * all four output MV buffer entries.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7].
+ * pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the
+ * motion vector buffers of the macroblocks specially at the left,
+ * upper, and upper-right side of the current macroblock,
+ * respectively; a value of NULL indicates unavailability. Note:
+ * Any neighborhood macroblock outside the current VOP or video
+ * packet or outside the current GOB (when short_video_header is
+ * 1 ) for which gob_header_empty is 0 is treated as
+ * transparent, according to [ISO14496-2], subclause 7.6.5.
+ * fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream
+ * syntax
+ * MBType - the type of the current macroblock. If MBType is not equal to
+ * OMX_VC_INTER4V, the destination motion vector buffer is still
+ * filled with the same decoded vector.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDstMVCurMB - pointer to the motion vector buffer for the current
+ * macroblock; contains four decoded motion vectors
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB
+ * - *pBitOffset exceeds [0,7]
+ * - fcodeForward exceeds (0,7]
+ * - MBType less than zero
+ * - motion vector buffer is not 4-byte aligned.
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodePadMV_PVOP (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMXVCMotionVector *pSrcMVLeftMB,
+ OMXVCMotionVector*pSrcMVUpperMB,
+ OMXVCMotionVector *pSrcMVUpperRightMB,
+ OMXVCMotionVector*pDstMVCurMB,
+ OMX_INT fcodeForward,
+ OMXVCM4P2MacroblockType MBType
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients
+ * for one intra block. Two versions of the function (DCVLC and ACVLC) are
+ * provided in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the
+ * bitstream buffer
+ * pBitOffset - pointer to the bit position in the current byte referenced
+ * by *ppBitStream. The parameter *pBitOffset is valid in the
+ * range [0-7].
+ * Bit Position in one byte: |Most Least|
+ * *pBitOffset |0 1 2 3 4 5 6 7|
+ * predDir - AC prediction direction; used to select the zigzag scan
+ * pattern; takes one of the following values:
+ * - OMX_VC_NONE - AC prediction not used;
+ * performs classical zigzag scan.
+ * - OMX_VC_HORIZONTAL - Horizontal prediction;
+ * performs alternate-vertical zigzag scan;
+ * - OMX_VC_VERTICAL - Vertical prediction;
+ * performs alternate-horizontal zigzag scan.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated such that it points to the current
+ * bit position in the byte pointed by *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDst
+ * - *pBitOffset exceeds [0,7]
+ * - preDir exceeds [0,2]
+ * - pDst is not 4-byte aligned
+ * OMX_Sts_Err - if:
+ * - In DecodeVLCZigzag_IntraDCVLC, dc_size > 12
+ * - At least one of mark bits equals zero
+ * - Illegal stream encountered; code cannot be located in VLC table
+ * - Forbidden code encountered in the VLC FLC table.
+ * - The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_IntraACVLC (6.2.5.2.2)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients
+ * for one intra block. Two versions of the function (DCVLC and ACVLC) are
+ * provided in order to support the two different methods of processing DC
+ * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC
+ * Coefficient Decoding for the Case of Switched VLC Encoding.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the
+ * bitstream buffer
+ * pBitOffset - pointer to the bit position in the current byte referenced
+ * by *ppBitStream. The parameter *pBitOffset is valid in the
+ * range [0-7]. Bit Position in one byte: |Most Least| *pBitOffset
+ * |0 1 2 3 4 5 6 7|
+ * predDir - AC prediction direction; used to select the zigzag scan
+ * pattern; takes one of the following values: OMX_VC_NONE - AC
+ * prediction not used; performs classical zigzag scan.
+ * OMX_VC_HORIZONTAL - Horizontal prediction; performs
+ * alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical
+ * prediction; performs alternate-horizontal zigzag scan.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated such that it points to the current
+ * bit position in the byte pointed by *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments At least one of the following
+ * pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst,
+ * or At least one of the following conditions is true:
+ * *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is
+ * not 4-byte aligned
+ * OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of
+ * mark bits equals zero Illegal stream encountered; code cannot
+ * be located in VLC table Forbidden code encountered in the VLC
+ * FLC table The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_U8 predDir,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeVLCZigzag_Inter (6.2.5.2.3)
+ *
+ * Description:
+ * Performs VLC decoding and inverse zigzag scan for one inter-coded block.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - double pointer to the current byte in the stream buffer
+ * pBitOffset - pointer to the next available bit in the current stream
+ * byte referenced by *ppBitStream. The parameter *pBitOffset is
+ * valid within the range [0-7].
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; escape modes 0-3 are used if
+ * shortVideoHeader==0, and escape mode 4 is used when
+ * shortVideoHeader==1.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded such
+ * that it points to the current byte in the stream buffer
+ * pBitOffset - *pBitOffset is updated after decoding such that it points
+ * to the next available bit in the stream byte referenced by
+ * *ppBitStream
+ * pDst - pointer to the coefficient buffer of current block; must be
+ * 4-byte aligned.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_BadArgErr - bad arguments:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pDst
+ * - pDst is not 4-byte aligned
+ * - *pBitOffset exceeds [0,7]
+ * OMX_Sts_Err - status error, if:
+ * - At least one mark bit is equal to zero
+ * - Encountered an illegal stream code that cannot be found in the VLC table
+ * - Encountered an illegal code in the VLC FLC table
+ * - The number of coefficients is greater than 64
+ *
+ */
+OMXResult omxVCM4P2_DecodeVLCZigzag_Inter (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInvIntra_I (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to
+ * the range [-2048, 2047].
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input (quantized) intra/inter block; must be
+ * aligned on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * videoComp - video component type of the current block. Takes one of the
+ * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra
+ * version only).
+ * shortVideoHeader - binary flag indicating presence of short_video_header
+ * (intra version only).
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (dequantized) intra/inter block
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; one or more of the following is
+ * true:
+ * - pSrcDst is NULL
+ * - QP <= 0 or QP >=31
+ * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE.
+ *
+ */
+OMXResult omxVCM4P2_QuantInvIntra_I (
+ OMX_S16 *pSrcDst,
+ OMX_INT QP,
+ OMXVCM4P2VideoComponent videoComp,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_QuantInvInter_I (6.2.5.3.2)
+ *
+ * Description:
+ * Performs the second inverse quantization mode on an intra/inter coded
+ * block. Supports bits_per_pixel = 8. The output coefficients are clipped to
+ * the range [-2048, 2047].
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input (quantized) intra/inter block; must be
+ * aligned on a 16-byte boundary.
+ * QP - quantization parameter (quantizer_scale)
+ * videoComp - video component type of the current block. Takes one of the
+ * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra
+ * version only).
+ * shortVideoHeader - binary flag indicating presence of short_video_header
+ * (intra version only).
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the output (dequantized) intra/inter block
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; one or more of the following is
+ * true:
+ * - pSrcDst is NULL
+ * - QP <= 0 or QP >=31
+ * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE.
+ *
+ */
+OMXResult omxVCM4P2_QuantInvInter_I (
+ OMX_S16 *pSrcDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra (6.2.5.4.1)
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely
+ * zigzag positioning, and IDCT, with appropriate clipping on each step, are
+ * performed on the coefficients. The results are then placed in the output
+ * frame/plane on a pixel basis. Note: This function will be used only when
+ * at least one non-zero AC coefficient of current block exists in the bit
+ * stream. The DC only condition will be handled in another function.
+ *
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer. There is no boundary check for the bit stream
+ * buffer.
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7].
+ * step - width of the destination plane
+ * pCoefBufRow - pointer to the coefficient row buffer; must be aligned on
+ * an 8-byte boundary.
+ * pCoefBufCol - pointer to the coefficient column buffer; must be aligned
+ * on an 8-byte boundary.
+ * curQP - quantization parameter of the macroblock which the current block
+ * belongs to
+ * pQPBuf - pointer to the quantization parameter buffer
+ * blockIndex - block index indicating the component type and position as
+ * defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5.
+ * intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a
+ * mechanism to switch between two VLC for coding of Intra DC
+ * coefficients as per [ISO14496-2], Table 6-21.
+ * ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if
+ * the ac coefficients of the first row or first column are
+ * differentially coded for intra coded macroblock.
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDst - pointer to the block in the destination plane; must be aligned on
+ * an 8-byte boundary.
+ * pCoefBufRow - pointer to the updated coefficient row buffer.
+ * pCoefBufCol - pointer to the updated coefficient column buffer Note:
+ * The coefficient buffers must be updated in accordance with the
+ * update procedure defined in section 6.2.2.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is NULL:
+ * ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol,
+ * pQPBuf, pDst.
+ * - *pBitOffset exceeds [0,7]
+ * - curQP exceeds (1, 31)
+ * - blockIndex exceeds [0,5]
+ * - step is not the multiple of 8
+ * - a pointer alignment requirement was violated.
+ * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra.
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_U8 *pDst,
+ OMX_INT step,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_U8 curQP,
+ const OMX_U8 *pQPBuf,
+ OMX_INT blockIndex,
+ OMX_INT intraDCVLC,
+ OMX_INT ACPredFlag,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter (6.2.5.4.2)
+ *
+ * Description:
+ * Decodes the INTER block coefficients. This function performs inverse
+ * quantization, inverse zigzag positioning, and IDCT (with appropriate
+ * clipping on each step) on the coefficients. The results (residuals) are
+ * placed in a contiguous array of 64 elements. For INTER block, the output
+ * buffer holds the residuals for further reconstruction.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - pointer to the pointer to the current byte in the bit
+ * stream buffer. There is no boundary check for the bit stream
+ * buffer.
+ * pBitOffset - pointer to the bit position in the byte pointed to by
+ * *ppBitStream. *pBitOffset is valid within [0-7]
+ * QP - quantization parameter
+ * shortVideoHeader - binary flag indicating presence of
+ * short_video_header; shortVideoHeader==1 selects linear intra DC
+ * mode, and shortVideoHeader==0 selects non linear intra DC mode.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after the block is decoded, so
+ * that it points to the current byte in the bit stream buffer
+ * pBitOffset - *pBitOffset is updated so that it points to the current bit
+ * position in the byte pointed by *ppBitStream
+ * pDst - pointer to the decoded residual buffer (a contiguous array of 64
+ * elements of OMX_S16 data type); must be aligned on a 16-byte
+ * boundary.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the following pointers is Null:
+ * ppBitStream, *ppBitStream, pBitOffset , pDst
+ * - *pBitOffset exceeds [0,7]
+ * - QP <= 0.
+ * - pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter .
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter (
+ const OMX_U8 **ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_S16 *pDst,
+ OMX_INT QP,
+ OMX_INT shortVideoHeader
+);
+
+
+
+/**
+ * Function: omxVCM4P2_PredictReconCoefIntra (6.2.5.4.3)
+ *
+ * Description:
+ * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+ * to the function call, prediction direction (predDir) should be selected as
+ * specified in [ISO14496-2], subclause 7.4.3.1.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the coefficient buffer which contains the quantized
+ * coefficient residuals (PQF) of the current block; must be
+ * aligned on a 4-byte boundary. The output coefficients are
+ * saturated to the range [-2048, 2047].
+ * pPredBufRow - pointer to the coefficient row buffer; must be aligned on
+ * a 4-byte boundary.
+ * pPredBufCol - pointer to the coefficient column buffer; must be aligned
+ * on a 4-byte boundary.
+ * curQP - quantization parameter of the current block. curQP may equal to
+ * predQP especially when the current block and the predictor block
+ * are in the same macroblock.
+ * predQP - quantization parameter of the predictor block
+ * predDir - indicates the prediction direction which takes one of the
+ * following values: OMX_VC_HORIZONTAL - predict horizontally
+ * OMX_VC_VERTICAL - predict vertically
+ * ACPredFlag - a flag indicating if AC prediction should be performed. It
+ * is equal to ac_pred_flag in the bit stream syntax of MPEG-4
+ * videoComp - video component type (luminance or chrominance) of the
+ * current block
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to the coefficient buffer which contains the quantized
+ * coefficients (QF) of the current block
+ * pPredBufRow - pointer to the updated coefficient row buffer
+ * pPredBufCol - pointer to the updated coefficient column buffer Note:
+ * Buffer update: Update the AC prediction buffer (both row and
+ * column buffer).
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments, if:
+ * - At least one of the pointers is NULL:
+ * pSrcDst, pPredBufRow, or pPredBufCol.
+ * - curQP <= 0,
+ * - predQP <= 0,
+ * - curQP >31,
+ * - predQP > 31,
+ * - preDir exceeds [1,2]
+ * - pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P2_PredictReconCoefIntra (
+ OMX_S16 *pSrcDst,
+ OMX_S16 *pPredBufRow,
+ OMX_S16 *pPredBufCol,
+ OMX_INT curQP,
+ OMX_INT predQP,
+ OMX_INT predDir,
+ OMX_INT ACPredFlag,
+ OMXVCM4P2VideoComponent videoComp
+);
+
+
+
+/**
+ * Function: omxVCM4P2_MCReconBlock (6.2.5.5.1)
+ *
+ * Description:
+ * Performs motion compensation prediction for an 8x8 block using
+ * interpolation described in [ISO14496-2], subclause 7.6.2.
+ *
+ * Input Arguments:
+ *
+ * pSrc - pointer to the block in the reference plane.
+ * srcStep - distance between the start of consecutive lines in the
+ * reference plane, in bytes; must be a multiple of 8.
+ * dstStep - distance between the start of consecutive lines in the
+ * destination plane, in bytes; must be a multiple of 8.
+ * pSrcResidue - pointer to a buffer containing the 16-bit prediction
+ * residuals; must be 16-byte aligned. If the pointer is NULL, then
+ * no prediction is done, only motion compensation, i.e., the block
+ * is moved with interpolation.
+ * predictType - bilinear interpolation type, as defined in section
+ * 6.2.1.2.
+ * rndVal - rounding control parameter: 0 - disabled; 1 - enabled.
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the destination buffer; must be 8-byte aligned. If
+ * prediction residuals are added then output intensities are
+ * clipped to the range [0,255].
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned under any of the following
+ * conditions:
+ * - pDst is not 8-byte aligned.
+ * - pSrcResidue is not 16-byte aligned.
+ * - one or more of the following pointers is NULL: pSrc or pDst.
+ * - either srcStep or dstStep is not a multiple of 8.
+ * - invalid type specified for the parameter predictType.
+ * - the parameter rndVal is not equal either to 0 or 1.
+ *
+ */
+OMXResult omxVCM4P2_MCReconBlock (
+ const OMX_U8 *pSrc,
+ OMX_INT srcStep,
+ const OMX_S16 *pSrcResidue,
+ OMX_U8 *pDst,
+ OMX_INT dstStep,
+ OMX_INT predictType,
+ OMX_INT rndVal
+);
+
+
+
+/* 6.3.1.1 Intra 16x16 Prediction Modes */
+/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_16X16_VERT = 0, /** Intra_16x16_Vertical */
+ OMX_VC_16X16_HOR = 1, /** Intra_16x16_Horizontal */
+ OMX_VC_16X16_DC = 2, /** Intra_16x16_DC */
+ OMX_VC_16X16_PLANE = 3 /** Intra_16x16_Plane */
+} OMXVCM4P10Intra16x16PredMode;
+
+
+
+/* 6.3.1.2 Intra 4x4 Prediction Modes */
+/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_4X4_VERT = 0, /** Intra_4x4_Vertical */
+ OMX_VC_4X4_HOR = 1, /** Intra_4x4_Horizontal */
+ OMX_VC_4X4_DC = 2, /** Intra_4x4_DC */
+ OMX_VC_4X4_DIAG_DL = 3, /** Intra_4x4_Diagonal_Down_Left */
+ OMX_VC_4X4_DIAG_DR = 4, /** Intra_4x4_Diagonal_Down_Right */
+ OMX_VC_4X4_VR = 5, /** Intra_4x4_Vertical_Right */
+ OMX_VC_4X4_HD = 6, /** Intra_4x4_Horizontal_Down */
+ OMX_VC_4X4_VL = 7, /** Intra_4x4_Vertical_Left */
+ OMX_VC_4X4_HU = 8 /** Intra_4x4_Horizontal_Up */
+} OMXVCM4P10Intra4x4PredMode;
+
+
+
+/* 6.3.1.3 Chroma Prediction Modes */
+/* A data type that enumerates intra chroma prediction modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_CHROMA_DC = 0, /** Intra_Chroma_DC */
+ OMX_VC_CHROMA_HOR = 1, /** Intra_Chroma_Horizontal */
+ OMX_VC_CHROMA_VERT = 2, /** Intra_Chroma_Vertical */
+ OMX_VC_CHROMA_PLANE = 3 /** Intra_Chroma_Plane */
+} OMXVCM4P10IntraChromaPredMode;
+
+
+
+/* 6.3.1.4 Motion Estimation Modes */
+/* A data type that enumerates H.264 motion estimation modes is defined as follows: */
+
+typedef enum {
+ OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */
+ OMX_VC_M4P10_FULL_SEARCH = 1 /** Full motion search */
+} OMXVCM4P10MEMode;
+
+
+
+/* 6.3.1.5 Macroblock Types */
+/* A data type that enumerates H.264 macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_P_16x16 = 0, /* defined by [ISO14496-10] */
+ OMX_VC_P_16x8 = 1,
+ OMX_VC_P_8x16 = 2,
+ OMX_VC_P_8x8 = 3,
+ OMX_VC_PREF0_8x8 = 4,
+ OMX_VC_INTER_SKIP = 5,
+ OMX_VC_INTRA_4x4 = 8,
+ OMX_VC_INTRA_16x16 = 9,
+ OMX_VC_INTRA_PCM = 10
+} OMXVCM4P10MacroblockType;
+
+
+
+/* 6.3.1.6 Sub-Macroblock Types */
+/* A data type that enumerates H.264 sub-macroblock types is defined as follows: */
+
+typedef enum {
+ OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */
+ OMX_VC_SUB_P_8x4 = 1,
+ OMX_VC_SUB_P_4x8 = 2,
+ OMX_VC_SUB_P_4x4 = 3
+} OMXVCM4P10SubMacroblockType;
+
+
+
+/* 6.3.1.7 Variable Length Coding (VLC) Information */
+
+typedef struct {
+ OMX_U8 uTrailing_Ones; /* Trailing ones; 3 at most */
+ OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */
+ OMX_U8 uNumCoeffs; /* Total number of non-zero coefs, including trailing ones */
+ OMX_U8 uTotalZeros; /* Total number of zero coefs */
+ OMX_S16 iLevels[16]; /* Levels of non-zero coefs, in reverse zig-zag order */
+ OMX_U8 uRuns[16]; /* Runs for levels and trailing ones, in reverse zig-zag order */
+} OMXVCM4P10VLCInfo;
+
+
+
+/* 6.3.1.8 Macroblock Information */
+
+typedef struct {
+ OMX_S32 sliceId; /* slice number */
+ OMXVCM4P10MacroblockType mbType; /* MB type */
+ OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */
+ OMX_S32 qpy; /* qp for luma */
+ OMX_S32 qpc; /* qp for chroma */
+ OMX_U32 cbpy; /* CBP Luma */
+ OMX_U32 cbpc; /* CBP Chroma */
+ OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */
+ OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */
+ OMX_U8 pRefL0Idx[4]; /* reference picture indices */
+ OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */
+ OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */
+} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr;
+
+
+
+/* 6.3.1.9 Motion Estimation Parameters */
+
+typedef struct {
+ OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */
+ OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */
+ OMX_S32 halfSearchEnable;
+ OMX_S32 quarterSearchEnable;
+ OMX_S32 intraEnable4x4; /* 1=enable, 0=disable */
+ OMX_S32 searchRange16x16; /* integer pixel units */
+ OMX_S32 searchRange8x8;
+ OMX_S32 searchRange4x4;
+} OMXVCM4P10MEParams;
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntra_4x4 (6.3.3.1.1)
+ *
+ * Description:
+ * Perform Intra_4x4 prediction for luma samples. If the upper-right block is
+ * not available, then duplication work should be handled inside the function.
+ * Users need not define them outside.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 4 left pixels:
+ * p[x, y] (x = -1, y = 0..3)
+ * pSrcAbove - Pointer to the buffer of 8 above pixels:
+ * p[x,y] (x = 0..7, y =-1);
+ * must be aligned on a 4-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 4.
+ * dstStep - Step of the destination buffer; must be a multiple of 4.
+ * predMode - Intra_4x4 prediction mode.
+ * availability - Neighboring 4x4 block availability flag, refer to
+ * "Neighboring Macroblock Availability" .
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination buffer; must be aligned on a 4-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 4, or dstStep is not a multiple of 4.
+ * leftStep is not a multiple of 4.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10Intra4x4PredMode.
+ * predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER
+ * indicating p[x,-1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..3) is not available.
+ * predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_VR, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_HD, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not
+ * available.
+ * predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER
+ * indicating p[x,-1] (x = 0..3) is not available.
+ * predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..3) is not available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 4-byte boundary.
+ *
+ * Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if
+ * they are not used by intra prediction as implied in predMode.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_4x4 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10Intra4x4PredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntra_16x16 (6.3.3.1.2)
+ *
+ * Description:
+ * Perform Intra_16x16 prediction for luma samples. If the upper-right block
+ * is not available, then duplication work should be handled inside the
+ * function. Users need not define them outside.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y =
+ * 0..15)
+ * pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15,
+ * y= -1); must be aligned on a 16-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 16.
+ * dstStep - Step of the destination buffer; must be a multiple of 16.
+ * predMode - Intra_16x16 prediction mode, please refer to section 3.4.1.
+ * availability - Neighboring 16x16 MB availability flag. Refer to
+ * section 3.4.4.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination buffer; must be aligned on a 16-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 16. or dstStep is not a multiple of 16.
+ * leftStep is not a multiple of 16.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10Intra16x16PredMode
+ * predMode is OMX_VC_16X16_VERT, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available.
+ * predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..15) is not available.
+ * predMode is OMX_VC_16X16_PLANE, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not
+ * available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 16-byte boundary.
+ *
+ * Note:
+ * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if
+ * they are not used by intra prediction implied in predMode.
+ * Note:
+ * OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntra_16x16 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10Intra16x16PredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_PredictIntraChroma_8x8 (6.3.3.1.3)
+ *
+ * Description:
+ * Performs intra prediction for chroma samples.
+ *
+ * Input Arguments:
+ *
+ * pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y=
+ * 0..7).
+ * pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y
+ * = -1); must be aligned on an 8-byte boundary.
+ * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1)
+ * leftStep - Step of left pixel buffer; must be a multiple of 8.
+ * dstStep - Step of the destination buffer; must be a multiple of 8.
+ * predMode - Intra chroma prediction mode, please refer to section 3.4.3.
+ * availability - Neighboring chroma block availability flag, please refer
+ * to "Neighboring Macroblock Availability".
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination buffer; must be aligned on an 8-byte
+ * boundary.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If any of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pDst is NULL.
+ * dstStep < 8 or dstStep is not a multiple of 8.
+ * leftStep is not a multiple of 8.
+ * predMode is not in the valid range of enumeration
+ * OMXVCM4P10IntraChromaPredMode.
+ * predMode is OMX_VC_CHROMA_VERT, but availability doesn't set
+ * OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available.
+ * predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT
+ * indicating p[-1,y] (y = 0..7) is not available.
+ * predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set
+ * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating
+ * p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not
+ * available.
+ * availability sets OMX_VC_UPPER, but pSrcAbove is NULL.
+ * availability sets OMX_VC_LEFT, but pSrcLeft is NULL.
+ * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL.
+ * either pSrcAbove or pDst is not aligned on a 8-byte boundary.
+ *
+ * Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if
+ * they are not used by intra prediction implied in predMode.
+ *
+ * Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction.
+ *
+ */
+OMXResult omxVCM4P10_PredictIntraChroma_8x8 (
+ const OMX_U8 *pSrcLeft,
+ const OMX_U8 *pSrcAbove,
+ const OMX_U8 *pSrcAboveLeft,
+ OMX_U8 *pDst,
+ OMX_INT leftStep,
+ OMX_INT dstStep,
+ OMXVCM4P10IntraChromaPredMode predMode,
+ OMX_S32 availability
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateLuma (6.3.3.2.1)
+ *
+ * Description:
+ * Performs quarter-pixel interpolation for inter luma MB. It is assumed that
+ * the frame is already padded when calling this function.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the source reference frame buffer
+ * srcStep - reference frame step, in bytes; must be a multiple of roi.width
+ * dstStep - destination frame step, in bytes; must be a multiple of
+ * roi.width
+ * dx - Fractional part of horizontal motion vector component in 1/4 pixel
+ * unit; valid in the range [0,3]
+ * dy - Fractional part of vertical motion vector y component in 1/4 pixel
+ * unit; valid in the range [0,3]
+ * roi - Dimension of the interpolation region; the parameters roi.width and
+ * roi.height must be equal to either 4, 8, or 16.
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to the destination frame buffer:
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ * if roi.width==16, 16-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < roi.width.
+ * dx or dy is out of range [0,3].
+ * roi.width or roi.height is out of range {4, 8, 16}.
+ * roi.width is equal to 4, but pDst is not 4 byte aligned.
+ * roi.width is equal to 8 or 16, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_InterpolateLuma (
+ const OMX_U8 *pSrc,
+ OMX_S32 srcStep,
+ OMX_U8 *pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateChroma (6.3.3.2.2)
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB.
+ *
+ * Input Arguments:
+ *
+ * pSrc -Pointer to the source reference frame buffer
+ * srcStep -Reference frame step in bytes
+ * dstStep -Destination frame step in bytes; must be a multiple of
+ * roi.width.
+ * dx -Fractional part of horizontal motion vector component in 1/8 pixel
+ * unit; valid in the range [0,7]
+ * dy -Fractional part of vertical motion vector component in 1/8 pixel
+ * unit; valid in the range [0,7]
+ * roi -Dimension of the interpolation region; the parameters roi.width and
+ * roi.height must be equal to either 2, 4, or 8.
+ *
+ * Output Arguments:
+ *
+ * pDst -Pointer to the destination frame buffer:
+ * if roi.width==2, 2-byte alignment required
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < 8.
+ * dx or dy is out of range [0-7].
+ * roi.width or roi.height is out of range {2,4,8}.
+ * roi.width is equal to 2, but pDst is not 2-byte aligned.
+ * roi.width is equal to 4, but pDst is not 4-byte aligned.
+ * roi.width is equal to 8, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_InterpolateChroma (
+ const OMX_U8 *pSrc,
+ OMX_S32 srcStep,
+ OMX_U8 *pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingLuma_VerEdge_I (6.3.3.3.1)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the luma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep -Step of the arrays; must be a multiple of 16.
+ * pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] alpha values
+ * must be in the range [0,255].
+ * pBeta -Array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] beta values
+ * must be in the range [0,18].
+ * pThresholds -Array of size 16 of Thresholds (TC0) (values for the left
+ * edge of each 4x4 block, arranged in vertical block order); must
+ * be aligned on a 4-byte boundary.. Per [ISO14496-10] values must
+ * be in the range [0,25].
+ * pBS -Array of size 16 of BS parameters (arranged in vertical block
+ * order); valid in the range [0,4] with the following
+ * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii)
+ * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns
+ * OMX_Sts_BadArgErr:
+ * Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS
+ * is NULL.
+ * Either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * pSrcDst is not 16-byte aligned.
+ * srcdstStep is not a multiple of 16.
+ * pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * One or more entries in the table pThresholds[0..15]is outside of the
+ * range [0,25].
+ * pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 &&
+ * pBS[i^3]!=4) for 0<=i<=3.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingLuma_HorEdge_I (6.3.3.3.2)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four horizontal edges of the luma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep - step of the arrays; must be a multiple of 16.
+ * pAlpha - array of size 2 of alpha thresholds (the first item is the alpha
+ * threshold for the external vertical edge, and the second item is
+ * for the internal horizontal edge); per [ISO14496-10] alpha
+ * values must be in the range [0,255].
+ * pBeta - array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external horizontal edge, and the second item
+ * is for the internal horizontal edge). Per [ISO14496-10] beta
+ * values must be in the range [0,18].
+ * pThresholds - array of size 16 containing thresholds, TC0, for the top
+ * horizontal edge of each 4x4 block, arranged in horizontal block
+ * order; must be aligned on a 4-byte boundary. Per [ISO14496 10]
+ * values must be in the range [0,25].
+ * pBS - array of size 16 of BS parameters (arranged in horizontal block
+ * order); valid in the range [0,4] with the following
+ * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii)
+ * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr, if one of the following cases occurs:
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * - pSrcDst is not 16-byte aligned.
+ * - srcdstStep is not a multiple of 16.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..15] is
+ * outside of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingChroma_VerEdge_I (6.3.3.3.3)
+ *
+ * Description:
+ * Performs in-place deblock filtering on four vertical edges of the chroma
+ * macroblock (8x8).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - Step of the arrays; must be a multiple of 8.
+ * pAlpha - Array of size 2 of alpha thresholds (the first item is alpha
+ * threshold for external vertical edge, and the second item is for
+ * internal vertical edge); per [ISO14496-10] alpha values must be
+ * in the range [0,255].
+ * pBeta - Array of size 2 of beta thresholds (the first item is the beta
+ * threshold for the external vertical edge, and the second item is
+ * for the internal vertical edge); per [ISO14496-10] beta values
+ * must be in the range [0,18].
+ * pThresholds - Array of size 8 containing thresholds, TC0, for the left
+ * vertical edge of each 4x2 chroma block, arranged in vertical
+ * block order; must be aligned on a 4-byte boundary. Per
+ * [ISO14496-10] values must be in the range [0,25].
+ * pBS - Array of size 16 of BS parameters (values for each 2x2 chroma
+ * block, arranged in vertical block order). This parameter is the
+ * same as the pBS parameter passed into FilterDeblockLuma_VerEdge;
+ * valid in the range [0,4] with the following restrictions: i)
+ * pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and
+ * only if pBS[i^3]== 4. Must be 4 byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8.
+ * - pThresholds is not 4-byte aligned.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..7] is outside
+ * of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - pBS is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_FilterDeblockingChroma_HorEdge_I (6.3.3.3.4)
+ *
+ * Description:
+ * Performs in-place deblock filtering on the horizontal edges of the chroma
+ * macroblock (8x8).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - array step; must be a multiple of 8.
+ * pAlpha - array of size 2 containing alpha thresholds; the first element
+ * contains the threshold for the external horizontal edge, and the
+ * second element contains the threshold for internal horizontal
+ * edge. Per [ISO14496-10] alpha values must be in the range
+ * [0,255].
+ * pBeta - array of size 2 containing beta thresholds; the first element
+ * contains the threshold for the external horizontal edge, and the
+ * second element contains the threshold for the internal
+ * horizontal edge. Per [ISO14496-10] beta values must be in the
+ * range [0,18].
+ * pThresholds - array of size 8 containing thresholds, TC0, for the top
+ * horizontal edge of each 2x4 chroma block, arranged in horizontal
+ * block order; must be aligned on a 4-byte boundary. Per
+ * [ISO14496-10] values must be in the range [0,25].
+ * pBS - array of size 16 containing BS parameters for each 2x2 chroma
+ * block, arranged in horizontal block order; valid in the range
+ * [0,4] with the following restrictions: i) pBS[i]== 4 may occur
+ * only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4.
+ * Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst -Pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr, if one of the following cases occurs:
+ * - any of the following pointers is NULL:
+ * pSrcDst, pAlpha, pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8.
+ * - pThresholds is not 4-byte aligned.
+ * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255].
+ * - pBeta[0] and/or pBeta[1] is outside the range [0,18].
+ * - One or more entries in the table pThresholds[0..7] is outside
+ * of the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - pBS is not 4-byte aligned.
+ *
+ */
+OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DeblockLuma_I (6.3.3.3.5)
+ *
+ * Description:
+ * This function performs in-place deblock filtering the horizontal and
+ * vertical edges of a luma macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 16-byte aligned.
+ * srcdstStep - image width; must be a multiple of 16.
+ * pAlpha - pointer to a 2x2 table of alpha thresholds, organized as
+ * follows: {external vertical edge, internal vertical edge,
+ * external horizontal edge, internal horizontal edge }. Per
+ * [ISO14496-10] alpha values must be in the range [0,255].
+ * pBeta - pointer to a 2x2 table of beta thresholds, organized as follows:
+ * {external vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }. Per [ISO14496-10]
+ * beta values must be in the range [0,18].
+ * pThresholds - pointer to a 16x2 table of threshold (TC0), organized as
+ * follows: {values for the left or above edge of each 4x4 block,
+ * arranged in vertical block order and then in horizontal block
+ * order}; must be aligned on a 4-byte boundary. Per [ISO14496-10]
+ * values must be in the range [0,25].
+ * pBS - pointer to a 16x2 table of BS parameters arranged in scan block
+ * order for vertical edges and then horizontal edges; valid in the
+ * range [0,4] with the following restrictions: i) pBS[i]== 4 may
+ * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]==
+ * 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds or pBS.
+ * - pSrcDst is not 16-byte aligned.
+ * - either pThresholds or pBS is not aligned on a 4-byte boundary.
+ * - one or more entries in the table pAlpha[0..3] is outside the range
+ * [0,255].
+ * - one or more entries in the table pBeta[0..3] is outside the range
+ * [0,18].
+ * - one or more entries in the table pThresholds[0..31]is outside of
+ * the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 16.
+ *
+ */
+OMXResult omxVCM4P10_DeblockLuma_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DeblockChroma_I (6.3.3.3.6)
+ *
+ * Description:
+ * Performs in-place deblocking filtering on all edges of the chroma
+ * macroblock (16x16).
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - pointer to the input macroblock; must be 8-byte aligned.
+ * srcdstStep - step of the arrays; must be a multiple of 8.
+ * pAlpha - pointer to a 2x2 array of alpha thresholds, organized as
+ * follows: {external vertical edge, internal vertical edge,
+ * external horizontal edge, internal horizontal edge }. Per
+ * [ISO14496-10] alpha values must be in the range [0,255].
+ * pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows:
+ * { external vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }. Per [ISO14496-10]
+ * beta values must be in the range [0,18].
+ * pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left
+ * or above edge of each 4x2 or 2x4 block, arranged in vertical
+ * block order and then in horizontal block order); must be aligned
+ * on a 4-byte boundary. Per [ISO14496-10] values must be in the
+ * range [0,25].
+ * pBS - array of size 16x2 of BS parameters (arranged in scan block order
+ * for vertical edges and then horizontal edges); valid in the
+ * range [0,4] with the following restrictions: i) pBS[i]== 4 may
+ * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]==
+ * 4. Must be 4-byte aligned.
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - pointer to filtered output macroblock.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - one or more of the following pointers is NULL: pSrcDst, pAlpha,
+ * pBeta, pThresholds, or pBS.
+ * - pSrcDst is not 8-byte aligned.
+ * - either pThresholds or pBS is not 4-byte aligned.
+ * - one or more entries in the table pAlpha[0..3] is outside the range
+ * [0,255].
+ * - one or more entries in the table pBeta[0..3] is outside the range
+ * [0,18].
+ * - one or more entries in the table pThresholds[0..15]is outside of
+ * the range [0,25].
+ * - pBS is out of range, i.e., one of the following conditions is true:
+ * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or
+ * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I (
+ OMX_U8 *pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8 *pAlpha,
+ const OMX_U8 *pBeta,
+ const OMX_U8 *pThresholds,
+ const OMX_U8 *pBS
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (6.3.4.1.1)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for a 2x2 block of
+ * ChromaDCLevel. The decoded coefficients in the packed position-coefficient
+ * buffer are stored in reverse zig-zag order, i.e., the first buffer element
+ * contains the last non-zero postion-coefficient pair of the block. Within
+ * each position-coefficient pair, the position entry indicates the
+ * raster-scan position of the coefficient, while the coefficient entry
+ * contains the coefficient value.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream - Double pointer to current byte in bit stream buffer
+ * pOffset - Pointer to current bit position in the byte pointed to by
+ * *ppBitStream; valid in the range [0,7].
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after each block is decoded
+ * pOffset - *pOffset is updated after each block is decoded
+ * pNumCoeff - Pointer to the number of nonzero coefficients in this block
+ * ppPosCoefBuf - Double pointer to destination residual
+ * coefficient-position pair buffer. Buffer position
+ * (*ppPosCoefBuf) is updated upon return, unless there are only
+ * zero coefficients in the currently decoded block. In this case
+ * the caller is expected to bypass the transform/dequantization of
+ * the empty blocks.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppBitStream or pOffset is NULL.
+ * - ppPosCoefBuf or pNumCoeff is NULL.
+ * OMX_Sts_Err - if one of the following is true:
+ * - an illegal code is encountered in the bitstream
+ *
+ */
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_S32*pOffset,
+ OMX_U8 *pNumCoeff,
+ OMX_U8 **ppPosCoefbuf
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC (6.3.4.1.2)
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of
+ * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse
+ * field scan is not supported. The decoded coefficients in the packed
+ * position-coefficient buffer are stored in reverse zig-zag order, i.e., the
+ * first buffer element contains the last non-zero postion-coefficient pair of
+ * the block. Within each position-coefficient pair, the position entry
+ * indicates the raster-scan position of the coefficient, while the
+ * coefficient entry contains the coefficient value.
+ *
+ * Input Arguments:
+ *
+ * ppBitStream -Double pointer to current byte in bit stream buffer
+ * pOffset - Pointer to current bit position in the byte pointed to by
+ * *ppBitStream; valid in the range [0,7].
+ * sMaxNumCoeff - Maximum the number of non-zero coefficients in current
+ * block
+ * sVLCSelect - VLC table selector, obtained from the number of non-zero
+ * coefficients contained in the above and left 4x4 blocks. It is
+ * equivalent to the variable nC described in H.264 standard table
+ * 9 5, except its value can t be less than zero.
+ *
+ * Output Arguments:
+ *
+ * ppBitStream - *ppBitStream is updated after each block is decoded.
+ * Buffer position (*ppPosCoefBuf) is updated upon return, unless
+ * there are only zero coefficients in the currently decoded block.
+ * In this case the caller is expected to bypass the
+ * transform/dequantization of the empty blocks.
+ * pOffset - *pOffset is updated after each block is decoded
+ * pNumCoeff - Pointer to the number of nonzero coefficients in this block
+ * ppPosCoefBuf - Double pointer to destination residual
+ * coefficient-position pair buffer
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ *
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppBitStream or pOffset is NULL.
+ * - ppPosCoefBuf or pNumCoeff is NULL.
+ * - sMaxNumCoeff is not equal to either 15 or 16.
+ * - sVLCSelect is less than 0.
+ *
+ * OMX_Sts_Err - if one of the following is true:
+ * - an illegal code is encountered in the bitstream
+ *
+ */
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC (
+ const OMX_U8 **ppBitStream,
+ OMX_S32 *pOffset,
+ OMX_U8 *pNumCoeff,
+ OMX_U8 **ppPosCoefbuf,
+ OMX_INT sVLCSelect,
+ OMX_INT sMaxNumCoeff
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformDequantLumaDCFromPair (6.3.4.2.1)
+ *
+ * Description:
+ * Reconstructs the 4x4 LumaDC block from the coefficient-position pair
+ * buffer, performs integer inverse, and dequantization for 4x4 LumaDC
+ * coefficients, and updates the pair buffer pointer to the next non-empty
+ * block.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * QP - Quantization parameter QpY
+ *
+ * Output Arguments:
+ *
+ * ppSrc - *ppSrc is updated to the start of next non empty block
+ * pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must
+ * be aligned on a 8-byte boundary.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppSrc or pDst is NULL.
+ * - pDst is not 8 byte aligned.
+ * - QP is not in the range of [0-51].
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantLumaDCFromPair (
+ const OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformDequantChromaDCFromPair (6.3.4.2.2)
+ *
+ * Description:
+ * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer,
+ * perform integer inverse transformation, and dequantization for 2x2 chroma
+ * DC coefficients, and update the pair buffer pointer to next non-empty
+ * block.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * QP - Quantization parameter QpC
+ *
+ * Output Arguments:
+ *
+ * ppSrc - *ppSrc is updated to the start of next non empty block
+ * pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer;
+ * must be aligned on a 4-byte boundary.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - ppSrc or pDst is NULL.
+ * - pDst is not 4-byte aligned.
+ * - QP is not in the range of [0-51].
+ *
+ */
+OMXResult omxVCM4P10_TransformDequantChromaDCFromPair (
+ const OMX_U8 **ppSrc,
+ OMX_S16 *pDst,
+ OMX_INT QP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd (6.3.4.2.3)
+ *
+ * Description:
+ * Reconstruct the 4x4 residual block from coefficient-position pair buffer,
+ * perform dequantization and integer inverse transformation for 4x4 block of
+ * residuals with previous intra prediction or motion compensation data, and
+ * update the pair buffer pointer to next non-empty block. If pDC == NULL,
+ * there re 16 non-zero AC coefficients at most in the packed buffer starting
+ * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC
+ * coefficients at most in the packet buffer starting from 4x4 block position
+ * 1.
+ *
+ * Input Arguments:
+ *
+ * ppSrc - Double pointer to residual coefficient-position pair buffer
+ * output by CALVC decoding
+ * pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte
+ * boundary
+ * predStep - Predicted frame step size in bytes; must be a multiple of 4
+ * dstStep - Destination frame step in bytes; must be a multiple of 4
+ * pDC - Pointer to the DC coefficient of this block, NULL if it doesn't
+ * exist
+ * QP - QP Quantization parameter. It should be QpC in chroma 4x4 block
+ * decoding, otherwise it should be QpY.
+ * AC - Flag indicating if at least one non-zero AC coefficient exists
+ *
+ * Output Arguments:
+ *
+ * pDst - pointer to the reconstructed 4x4 block data; must be aligned on a
+ * 4-byte boundary
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pPred or pDst is NULL.
+ * - pPred or pDst is not 4-byte aligned.
+ * - predStep or dstStep is not a multiple of 4.
+ * - AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL.
+ * - AC ==0 && pDC ==NULL.
+ *
+ */
+OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd (
+ const OMX_U8 **ppSrc,
+ const OMX_U8 *pPred,
+ const OMX_S16 *pDC,
+ OMX_U8 *pDst,
+ OMX_INT predStep,
+ OMX_INT dstStep,
+ OMX_INT QP,
+ OMX_INT AC
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MEGetBufSize (6.3.5.1.1)
+ *
+ * Description:
+ * Computes the size, in bytes, of the vendor-specific specification
+ * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer
+ * and MotionEstimationMB.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P10MEMode
+ * pMEParams -motion estimation parameters
+ *
+ * Output Arguments:
+ *
+ * pSize - pointer to the number of bytes required for the motion
+ * estimation specification structure
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pMEParams or pSize is NULL.
+ * - an invalid MEMode is specified.
+ *
+ */
+OMXResult omxVCM4P10_MEGetBufSize (
+ OMXVCM4P10MEMode MEmode,
+ const OMXVCM4P10MEParams *pMEParams,
+ OMX_U32 *pSize
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MEInit (6.3.5.1.2)
+ *
+ * Description:
+ * Initializes the vendor-specific specification structure required for the
+ * omxVCM4P10 motion estimation functions: BlockMatch_Integer and
+ * MotionEstimationMB. Memory for the specification structure *pMESpec must be
+ * allocated prior to calling the function, and should be aligned on a 4-byte
+ * boundary. The number of bytes required for the specification structure can
+ * be determined using the function omxVCM4P10_MEGetBufSize. Following
+ * initialization by this function, the vendor-specific structure *pMESpec
+ * should contain an implementation-specific representation of all motion
+ * estimation parameters received via the structure pMEParams, for example
+ * searchRange16x16, searchRange8x8, etc.
+ *
+ * Input Arguments:
+ *
+ * MEmode - motion estimation mode; available modes are defined by the
+ * enumerated type OMXVCM4P10MEMode
+ * pMEParams - motion estimation parameters
+ * pMESpec - pointer to the uninitialized ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pMESpec - pointer to the initialized ME specification structure
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - pMEParams or pSize is NULL.
+ * - an invalid value was specified for the parameter MEmode
+ * - a negative or zero value was specified for one of the search ranges
+ * (e.g., pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.)
+ * - either in isolation or in combination, one or more of the enables or
+ * search ranges in the structure *pMEParams were configured such
+ * that the requested behavior fails to comply with [ISO14496-10].
+ *
+ */
+OMXResult omxVCM4P10_MEInit (
+ OMXVCM4P10MEMode MEmode,
+ const OMXVCM4P10MEParams *pMEParams,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Integer (6.3.5.2.1)
+ *
+ * Description:
+ * Performs integer block match. Returns best MV and associated cost.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the top-left corner of the current block:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane, expressed in terms
+ * of integer pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane, expressed in terms
+ * of integer pixels
+ * pRefRect - pointer to the valid reference rectangle inside the reference
+ * picture plane
+ * nCurrPointPos - position of the current block in the current plane
+ * iBlockWidth - Width of the current block, expressed in terms of integer
+ * pixels; must be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block, expressed in terms of
+ * integer pixels; must be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor; used to compute motion cost
+ * pMVPred - Predicted MV; used to compute motion cost, expressed in terms
+ * of 1/4-pel units
+ * pMVCandidate - Candidate MV; used to initialize the motion search,
+ * expressed in terms of integer pixels
+ * pMESpec - pointer to the ME specification structure
+ *
+ * Output Arguments:
+ *
+ * pDstBestMV - Best MV resulting from integer search, expressed in terms
+ * of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - any of the following pointers are NULL:
+ * pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec.
+ * - Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Integer (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ const OMXVCMotionVector *pMVCandidate,
+ OMXVCMotionVector *pBestMV,
+ OMX_S32 *pBestCost,
+ void *pMESpec
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Half (6.3.5.2.2)
+ *
+ * Description:
+ * Performs a half-pel block match using results from a prior integer search.
+ * Returns the best MV and associated cost. This function estimates the
+ * half-pixel motion vector by interpolating the integer resolution motion
+ * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial
+ * integer MV is generated externally. The function
+ * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the current position in original picture plane:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane in terms of full
+ * pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane in terms of full
+ * pixels
+ * iBlockWidth - Width of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor, used to compute motion cost
+ * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to
+ * compute motion cost
+ * pSrcDstBestMV - The best MV resulting from a prior integer search,
+ * represented in terms of 1/4-pel units
+ *
+ * Output Arguments:
+ *
+ * pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in
+ * terms of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - any of the following pointers is NULL: pSrcOrgY, pSrcRefY,
+ * pSrcDstBestMV, pMVPred, pBestCost
+ * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Half (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ OMXVCMotionVector *pSrcDstBestMV,
+ OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function: omxVCM4P10_BlockMatch_Quarter (6.3.5.2.3)
+ *
+ * Description:
+ * Performs a quarter-pel block match using results from a prior half-pel
+ * search. Returns the best MV and associated cost. This function estimates
+ * the quarter-pixel motion vector by interpolating the half-pel resolution
+ * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the
+ * initial half-pel MV is generated externally. The function
+ * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrgY - Pointer to the current position in original picture plane:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * pSrcRefY - Pointer to the top-left corner of the co-located block in the
+ * reference picture:
+ * If iBlockWidth==4, 4-byte alignment required.
+ * If iBlockWidth==8, 8-byte alignment required.
+ * If iBlockWidth==16, 16-byte alignment required.
+ * nSrcOrgStep - Stride of the original picture plane in terms of full
+ * pixels; must be a multiple of iBlockWidth.
+ * nSrcRefStep - Stride of the reference picture plane in terms of full
+ * pixels
+ * iBlockWidth - Width of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * iBlockHeight - Height of the current block in terms of full pixels; must
+ * be equal to either 4, 8, or 16.
+ * nLamda - Lamda factor, used to compute motion cost
+ * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to
+ * compute motion cost
+ * pSrcDstBestMV - The best MV resulting from a prior half-pel search,
+ * represented in terms of 1/4 pel units
+ *
+ * Output Arguments:
+ *
+ * pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed
+ * in terms of 1/4-pel units
+ * pBestCost - Motion cost associated with the best MV; computed as
+ * SAD+Lamda*BitsUsedByMV
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL:
+ * pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost
+ * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_BlockMatch_Quarter (
+ const OMX_U8 *pSrcOrgY,
+ OMX_S32 nSrcOrgStep,
+ const OMX_U8 *pSrcRefY,
+ OMX_S32 nSrcRefStep,
+ OMX_U8 iBlockWidth,
+ OMX_U8 iBlockHeight,
+ OMX_U32 nLamda,
+ const OMXVCMotionVector *pMVPred,
+ OMXVCMotionVector *pSrcDstBestMV,
+ OMX_S32 *pBestCost
+);
+
+
+
+/**
+ * Function: omxVCM4P10_MotionEstimationMB (6.3.5.3.1)
+ *
+ * Description:
+ * Performs MB-level motion estimation and selects best motion estimation
+ * strategy from the set of modes supported in baseline profile [ISO14496-10].
+ *
+ * Input Arguments:
+ *
+ * pSrcCurrBuf - Pointer to the current position in original picture plane;
+ * 16-byte alignment required
+ * pSrcRefBufList - Pointer to an array with 16 entries. Each entry points
+ * to the top-left corner of the co-located MB in a reference
+ * picture. The array is filled from low-to-high with valid
+ * reference frame pointers; the unused high entries should be set
+ * to NULL. Ordering of the reference frames should follow
+ * [ISO14496-10] subclause 8.2.4 Decoding Process for Reference
+ * Picture Lists. The entries must be 16-byte aligned.
+ * pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the
+ * reconstructed picture; must be 16-byte aligned.
+ * SrcCurrStep - Width of the original picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * SrcRefStep - Width of the reference picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * SrcRecStep - Width of the reconstructed picture plane in terms of full
+ * pixels; must be a multiple of 16.
+ * pRefRect - Pointer to the valid reference rectangle; relative to the
+ * image origin.
+ * pCurrPointPos - Position of the current macroblock in the current plane.
+ * Lambda - Lagrange factor for computing the cost function
+ * pMESpec - Pointer to the motion estimation specification structure; must
+ * have been allocated and initialized prior to calling this
+ * function.
+ * pMBInter - Array, of dimension four, containing pointers to information
+ * associated with four adjacent type INTER MBs (Left, Top,
+ * Top-Left, Top-Right). Any pointer in the array may be set equal
+ * to NULL if the corresponding MB doesn t exist or is not of type
+ * INTER.
+ * - pMBInter[0] - Pointer to left MB information
+ * - pMBInter[1] - Pointer to top MB information
+ * - pMBInter[2] - Pointer to top-left MB information
+ * - pMBInter[3] - Pointer to top-right MB information
+ * pMBIntra - Array, of dimension four, containing pointers to information
+ * associated with four adjacent type INTRA MBs (Left, Top,
+ * Top-Left, Top-Right). Any pointer in the array may be set equal
+ * to NULL if the corresponding MB doesn t exist or is not of type
+ * INTRA.
+ * - pMBIntra[0] - Pointer to left MB information
+ * - pMBIntra[1] - Pointer to top MB information
+ * - pMBIntra[2] - Pointer to top-left MB information
+ * - pMBIntra[3] - Pointer to top-right MB information
+ * pSrcDstMBCurr - Pointer to information structure for the current MB.
+ * The following entries should be set prior to calling the
+ * function: sliceID - the number of the slice the to which the
+ * current MB belongs.
+ *
+ * Output Arguments:
+ *
+ * pDstCost - Pointer to the minimum motion cost for the current MB.
+ * pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma
+ * 4x4 blocks in each MB. The block SADs are in scan order for
+ * each MB. For implementations that cannot compute the SAD values
+ * individually, the maximum possible value (0xffff) is returned
+ * for each of the 16 block SAD entries.
+ * pSrcDstMBCurr - Pointer to updated information structure for the current
+ * MB after MB-level motion estimation has been completed. The
+ * following fields are updated by the ME function. The following
+ * parameter set quantifies the MB-level ME search results:
+ * - MbType
+ * - subMBType[4]
+ * - pMV0[4][4]
+ * - pMVPred[4][4]
+ * - pRefL0Idx[4]
+ * - Intra16x16PredMode
+ * - pIntra4x4PredMode[4][4]
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL: pSrcCurrBuf,
+ * pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec,
+ * pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0]
+ * - SrcRefStep, SrcRecStep are not multiples of 16
+ * - iBlockWidth or iBlockHeight are values other than 4, 8, or 16.
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_MotionEstimationMB (
+ const OMX_U8 *pSrcCurrBuf,
+ OMX_S32 SrcCurrStep,
+ const OMX_U8 *pSrcRefBufList[15],
+ OMX_S32 SrcRefStep,
+ const OMX_U8 *pSrcRecBuf,
+ OMX_S32 SrcRecStep,
+ const OMXRect *pRefRect,
+ const OMXVCM4P2Coordinate *pCurrPointPos,
+ OMX_U32 Lambda,
+ void *pMESpec,
+ const OMXVCM4P10MBInfoPtr *pMBInter,
+ const OMXVCM4P10MBInfoPtr *pMBIntra,
+ OMXVCM4P10MBInfoPtr pSrcDstMBCurr,
+ OMX_INT *pDstCost,
+ OMX_U16 *pDstBlockSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SAD_4x (6.3.5.4.1)
+ *
+ * Description:
+ * This function calculates the SAD for 4x8 and 4x4 blocks.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg -Pointer to the original block; must be aligned on a 4-byte
+ * boundary.
+ * iStepOrg -Step of the original block buffer; must be a multiple of 4.
+ * pSrcRef -Pointer to the reference block
+ * iStepRef -Step of the reference block buffer
+ * iHeight -Height of the block; must be equal to either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - One or more of the following pointers is NULL:
+ * pSrcOrg, pSrcRef, or pDstSAD
+ * - iHeight is not equal to either 4 or 8.
+ * - iStepOrg is not a multiple of 4
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SAD_4x (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_S32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_4x (6.3.5.4.2)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks. Rounding
+ * is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on a 4-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 4.
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal to either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 4 or 8.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 4
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_4x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_8x (6.3.5.4.3)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks.
+ * Rounding is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on an 8-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 8.
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal either 4, 8, or 16.
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 4, 8, or 16.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 8
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_8x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SADQuar_16x (6.3.5.4.4)
+ *
+ * Description:
+ * This function calculates the SAD between one block (pSrc) and the average
+ * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks.
+ * Rounding is applied according to the convention (a+b+1)>>1.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the original block; must be aligned on a 16-byte
+ * boundary.
+ * pSrcRef0 - Pointer to reference block 0
+ * pSrcRef1 - Pointer to reference block 1
+ * iSrcStep - Step of the original block buffer; must be a multiple of 16
+ * iRefStep0 - Step of reference block 0
+ * iRefStep1 - Step of reference block 1
+ * iHeight - Height of the block; must be equal to either 8 or 16
+ *
+ * Output Arguments:
+ *
+ * pDstSAD -Pointer of result SAD
+ *
+ * Return Value:
+ * OMX_Sts_NoErr, if the function runs without error.
+ * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs:
+ * - iHeight is not equal to either 8 or 16.
+ * - One or more of the following pointers is NULL: pSrc, pSrcRef0,
+ * pSrcRef1, pDstSAD.
+ * - iSrcStep is not a multiple of 16
+ * - Any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_SADQuar_16x (
+ const OMX_U8 *pSrc,
+ const OMX_U8 *pSrcRef0,
+ const OMX_U8 *pSrcRef1,
+ OMX_U32 iSrcStep,
+ OMX_U32 iRefStep0,
+ OMX_U32 iRefStep1,
+ OMX_U32 *pDstSAD,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SATD_4x4 (6.3.5.4.5)
+ *
+ * Description:
+ * This function calculates the sum of absolute transform differences (SATD)
+ * for a 4x4 block by applying a Hadamard transform to the difference block
+ * and then calculating the sum of absolute coefficient values.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to the original block; must be aligned on a 4-byte
+ * boundary
+ * iStepOrg - Step of the original block buffer; must be a multiple of 4
+ * pSrcRef - Pointer to the reference block; must be aligned on a 4-byte
+ * boundary
+ * iStepRef - Step of the reference block buffer; must be a multiple of 4
+ *
+ * Output Arguments:
+ *
+ * pDstSAD - pointer to the resulting SAD
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg
+ * - pSrcRef is not aligned on a 4-byte boundary
+ * - iStepOrg <= 0 or iStepOrg is not a multiple of 4
+ * - iStepRef <= 0 or iStepRef is not a multiple of 4
+ *
+ */
+OMXResult omxVCM4P10_SATD_4x4 (
+ const OMX_U8 *pSrcOrg,
+ OMX_U32 iStepOrg,
+ const OMX_U8 *pSrcRef,
+ OMX_U32 iStepRef,
+ OMX_U32 *pDstSAD
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateHalfHor_Luma (6.3.5.5.1)
+ *
+ * Description:
+ * This function performs interpolation for two horizontal 1/2-pel positions
+ * (-1/2,0) and (1/2, 0) - around a full-pel position.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the top-left corner of the block used to interpolate in
+ * the reconstruction frame plane.
+ * iSrcStep - Step of the source buffer.
+ * iDstStep - Step of the destination(interpolation) buffer; must be a
+ * multiple of iWidth.
+ * iWidth - Width of the current block; must be equal to either 4, 8, or 16
+ * iHeight - Height of the current block; must be equal to 4, 8, or 16
+ *
+ * Output Arguments:
+ *
+ * pDstLeft -Pointer to the interpolation buffer of the left -pel position
+ * (-1/2, 0)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ * pDstRight -Pointer to the interpolation buffer of the right -pel
+ * position (1/2, 0)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrc, pDstLeft, or pDstRight
+ * - iWidth or iHeight have values other than 4, 8, or 16
+ * - iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary
+ * - iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary
+ * - iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary
+ * - any alignment restrictions are violated
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfHor_Luma (
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDstLeft,
+ OMX_U8 *pDstRight,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InterpolateHalfVer_Luma (6.3.5.5.2)
+ *
+ * Description:
+ * This function performs interpolation for two vertical 1/2-pel positions -
+ * (0, -1/2) and (0, 1/2) - around a full-pel position.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to top-left corner of block used to interpolate in the
+ * reconstructed frame plane
+ * iSrcStep - Step of the source buffer.
+ * iDstStep - Step of the destination (interpolation) buffer; must be a
+ * multiple of iWidth.
+ * iWidth - Width of the current block; must be equal to either 4, 8, or 16
+ * iHeight - Height of the current block; must be equal to either 4, 8, or 16
+ *
+ * Output Arguments:
+ *
+ * pDstUp -Pointer to the interpolation buffer of the -pel position above
+ * the current full-pel position (0, -1/2)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ * pDstDown -Pointer to the interpolation buffer of the -pel position below
+ * the current full-pel position (0, 1/2)
+ * If iWidth==4, 4-byte alignment required.
+ * If iWidth==8, 8-byte alignment required.
+ * If iWidth==16, 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrc, pDstUp, or pDstDown
+ * - iWidth or iHeight have values other than 4, 8, or 16
+ * - iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary
+ * - iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary
+ * - iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InterpolateHalfVer_Luma (
+ const OMX_U8 *pSrc,
+ OMX_U32 iSrcStep,
+ OMX_U8 *pDstUp,
+ OMX_U8 *pDstDown,
+ OMX_U32 iDstStep,
+ OMX_U32 iWidth,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_Average_4x (6.3.5.5.3)
+ *
+ * Description:
+ * This function calculates the average of two 4x4, 4x8 blocks. The result
+ * is rounded according to (a+b+1)/2.
+ *
+ * Input Arguments:
+ *
+ * pPred0 - Pointer to the top-left corner of reference block 0
+ * pPred1 - Pointer to the top-left corner of reference block 1
+ * iPredStep0 - Step of reference block 0; must be a multiple of 4.
+ * iPredStep1 - Step of reference block 1; must be a multiple of 4.
+ * iDstStep - Step of the destination buffer; must be a multiple of 4.
+ * iHeight - Height of the blocks; must be either 4 or 8.
+ *
+ * Output Arguments:
+ *
+ * pDstPred - Pointer to the destination buffer. 4-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pPred0, pPred1, or pDstPred
+ * - pDstPred is not aligned on a 4-byte boundary
+ * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 4
+ * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 4
+ * - iDstStep <= 0 or iDstStep is not a multiple of 4
+ * - iHeight is not equal to either 4 or 8
+ *
+ */
+OMXResult omxVCM4P10_Average_4x (
+ const OMX_U8 *pPred0,
+ const OMX_U8 *pPred1,
+ OMX_U32 iPredStep0,
+ OMX_U32 iPredStep1,
+ OMX_U8 *pDstPred,
+ OMX_U32 iDstStep,
+ OMX_U32 iHeight
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformQuant_ChromaDC (6.3.5.6.1)
+ *
+ * Description:
+ * This function performs 2x2 Hadamard transform of chroma DC coefficients
+ * and then quantizes the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the 2x2 array of chroma DC coefficients. 8-byte
+ * alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ * bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - Pointer to transformed and quantized coefficients. 8-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcDst
+ * - pSrcDst is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_ChromaDC (
+ OMX_S16 *pSrcDst,
+ OMX_U32 iQP,
+ OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function: omxVCM4P10_TransformQuant_LumaDC (6.3.5.6.2)
+ *
+ * Description:
+ * This function performs a 4x4 Hadamard transform of luma DC coefficients
+ * and then quantizes the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrcDst - Pointer to the 4x4 array of luma DC coefficients. 16-byte
+ * alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pSrcDst - Pointer to transformed and quantized coefficients. 16-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrcDst
+ * - pSrcDst is not aligned on an 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_TransformQuant_LumaDC (
+ OMX_S16 *pSrcDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformDequant_LumaDC (6.3.5.6.3)
+ *
+ * Description:
+ * This function performs inverse 4x4 Hadamard transform and then dequantizes
+ * the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and
+ * quantized coefficients. 16 byte alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to inverse-transformed and dequantized coefficients.
+ * 16-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrc
+ * - pSrc or pDst is not aligned on a 16-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_LumaDC (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformDequant_ChromaDC (6.3.5.6.4)
+ *
+ * Description:
+ * This function performs inverse 2x2 Hadamard transform and then dequantizes
+ * the coefficients.
+ *
+ * Input Arguments:
+ *
+ * pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and
+ * quantized coefficients. 8 byte alignment required.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ *
+ * Output Arguments:
+ *
+ * pDst - Pointer to inverse-transformed and dequantized coefficients.
+ * 8-byte alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL: pSrc
+ * - pSrc or pDst is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformDequant_ChromaDC (
+ const OMX_S16 *pSrc,
+ OMX_S16 *pDst,
+ OMX_U32 iQP
+);
+
+
+
+/**
+ * Function: omxVCM4P10_InvTransformResidualAndAdd (6.3.5.7.1)
+ *
+ * Description:
+ * This function performs inverse an 4x4 integer transformation to produce
+ * the difference signal and then adds the difference to the prediction to get
+ * the reconstructed signal.
+ *
+ * Input Arguments:
+ *
+ * pSrcPred - Pointer to prediction signal. 4-byte alignment required.
+ * pDequantCoeff - Pointer to the transformed coefficients. 8-byte
+ * alignment required.
+ * iSrcPredStep - Step of the prediction buffer; must be a multiple of 4.
+ * iDstReconStep - Step of the destination reconstruction buffer; must be a
+ * multiple of 4.
+ * bAC - Indicate whether there is AC coefficients in the coefficients
+ * matrix.
+ *
+ * Output Arguments:
+ *
+ * pDstRecon -Pointer to the destination reconstruction buffer. 4-byte
+ * alignment required.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcPred, pDequantCoeff, pDstRecon
+ * - pSrcPred is not aligned on a 4-byte boundary
+ * - iSrcPredStep or iDstReconStep is not a multiple of 4.
+ * - pDequantCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_InvTransformResidualAndAdd (
+ const OMX_U8 *pSrcPred,
+ const OMX_S16 *pDequantCoeff,
+ OMX_U8 *pDstRecon,
+ OMX_U32 iSrcPredStep,
+ OMX_U32 iDstReconStep,
+ OMX_U8 bAC
+);
+
+
+
+/**
+ * Function: omxVCM4P10_SubAndTransformQDQResidual (6.3.5.8.1)
+ *
+ * Description:
+ * This function subtracts the prediction signal from the original signal to
+ * produce the difference signal and then performs a 4x4 integer transform and
+ * quantization. The quantized transformed coefficients are stored as
+ * pDstQuantCoeff. This function can also output dequantized coefficients or
+ * unquantized DC coefficients optionally by setting the pointers
+ * pDstDeQuantCoeff, pDCCoeff.
+ *
+ * Input Arguments:
+ *
+ * pSrcOrg - Pointer to original signal. 4-byte alignment required.
+ * pSrcPred - Pointer to prediction signal. 4-byte alignment required.
+ * iSrcOrgStep - Step of the original signal buffer; must be a multiple of
+ * 4.
+ * iSrcPredStep - Step of the prediction signal buffer; must be a multiple
+ * of 4.
+ * pNumCoeff -Number of non-zero coefficients after quantization. If this
+ * parameter is not required, it is set to NULL.
+ * nThreshSAD - Zero-block early detection threshold. If this parameter is
+ * not required, it is set to 0.
+ * iQP - Quantization parameter; must be in the range [0,51].
+ * bIntra - Indicates whether this is an INTRA block, either 1-INTRA or
+ * 0-INTER
+ *
+ * Output Arguments:
+ *
+ * pDstQuantCoeff - Pointer to the quantized transformed coefficients.
+ * 8-byte alignment required.
+ * pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients
+ * if this parameter is not equal to NULL. 8-byte alignment
+ * required.
+ * pDCCoeff - Pointer to the unquantized DC coefficient if this parameter
+ * is not equal to NULL.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff,
+ * pDstDeQuantCoeff, pDCCoeff
+ * - pSrcOrg is not aligned on a 4-byte boundary
+ * - pSrcPred is not aligned on a 4-byte boundary
+ * - iSrcOrgStep is not a multiple of 4
+ * - iSrcPredStep is not a multiple of 4
+ * - pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_SubAndTransformQDQResidual (
+ const OMX_U8 *pSrcOrg,
+ const OMX_U8 *pSrcPred,
+ OMX_U32 iSrcOrgStep,
+ OMX_U32 iSrcPredStep,
+ OMX_S16 *pDstQuantCoeff,
+ OMX_S16 *pDstDeQuantCoeff,
+ OMX_S16 *pDCCoeff,
+ OMX_S8 *pNumCoeff,
+ OMX_U32 nThreshSAD,
+ OMX_U32 iQP,
+ OMX_U8 bIntra
+);
+
+
+
+/**
+ * Function: omxVCM4P10_GetVLCInfo (6.3.5.9.1)
+ *
+ * Description:
+ * This function extracts run-length encoding (RLE) information from the
+ * coefficient matrix. The results are returned in an OMXVCM4P10VLCInfo
+ * structure.
+ *
+ * Input Arguments:
+ *
+ * pSrcCoeff - pointer to the transform coefficient matrix. 8-byte
+ * alignment required.
+ * pScanMatrix - pointer to the scan order definition matrix. For a luma
+ * block the scan matrix should follow [ISO14496-10] section 8.5.4,
+ * and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13,
+ * 10, 7, 11, 14, 15. For a chroma block, the scan matrix should
+ * contain the values 0, 1, 2, 3.
+ * bAC - indicates presence of a DC coefficient; 0 = DC coefficient
+ * present, 1= DC coefficient absent.
+ * MaxNumCoef - specifies the number of coefficients contained in the
+ * transform coefficient matrix, pSrcCoeff. The value should be 16
+ * for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The
+ * value should be 4 for blocks of type CHROMADC.
+ *
+ * Output Arguments:
+ *
+ * pDstVLCInfo - pointer to structure that stores information for
+ * run-length coding.
+ *
+ * Return Value:
+ *
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments; returned if any of the following
+ * conditions are true:
+ * - at least one of the following pointers is NULL:
+ * pSrcCoeff, pScanMatrix, pDstVLCInfo
+ * - pSrcCoeff is not aligned on an 8-byte boundary
+ *
+ */
+OMXResult omxVCM4P10_GetVLCInfo (
+ const OMX_S16 *pSrcCoeff,
+ const OMX_U8 *pScanMatrix,
+ OMX_U8 bAC,
+ OMX_U32 MaxNumCoef,
+ OMXVCM4P10VLCInfo*pDstVLCInfo
+);
+
+
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /** end of #define _OMXVC_H_ */
+
+/** EOF */
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h
new file mode 100644
index 0000000..be974d5
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h
@@ -0,0 +1,129 @@
+;/******************************************************************************
+;// Copyright (c) 1999-2005 The Khronos Group Inc. All Rights Reserved
+;//
+;//
+;//
+;//
+;//
+;//
+;//
+;//
+;******************************************************************************/
+
+;/** =============== Structure Definition for Sample Generation ============== */
+;/** transparent status */
+
+;enum {
+OMX_VIDEO_TRANSPARENT EQU 0; /** Wholly transparent */
+OMX_VIDEO_PARTIAL EQU 1; /** Partially transparent */
+OMX_VIDEO_OPAQUE EQU 2; /** Opaque */
+;}
+
+;/** direction */
+;enum {
+OMX_VIDEO_NONE EQU 0;
+OMX_VIDEO_HORIZONTAL EQU 1;
+OMX_VIDEO_VERTICAL EQU 2;
+;}
+
+;/** bilinear interpolation type */
+;enum {
+OMX_VIDEO_INTEGER_PIXEL EQU 0; /** case ¡°a¡± */
+OMX_VIDEO_HALF_PIXEL_X EQU 1; /** case ¡°b¡± */
+OMX_VIDEO_HALF_PIXEL_Y EQU 2; /** case ¡°c¡± */
+OMX_VIDEO_HALF_PIXEL_XY EQU 3; /** case ¡°d¡± */
+;}
+
+;enum {
+OMX_UPPER EQU 1; /** set if the above macroblock is available */
+OMX_LEFT EQU 2; /** set if the left macroblock is available */
+OMX_CENTER EQU 4;
+OMX_RIGHT EQU 8;
+OMX_LOWER EQU 16;
+OMX_UPPER_LEFT EQU 32; /** set if the above-left macroblock is available */
+OMX_UPPER_RIGHT EQU 64; /** set if the above-right macroblock is available */
+OMX_LOWER_LEFT EQU 128;
+OMX_LOWER_RIGHT EQU 256
+;}
+
+;enum {
+OMX_VIDEO_LUMINANCE EQU 0; /** Luminance component */
+OMX_VIDEO_CHROMINANCE EQU 1; /** chrominance component */
+OMX_VIDEO_ALPHA EQU 2; /** Alpha component */
+;}
+
+;enum {
+OMX_VIDEO_INTER EQU 0; /** P picture or P-VOP */
+OMX_VIDEO_INTER_Q EQU 1; /** P picture or P-VOP */
+OMX_VIDEO_INTER4V EQU 2; /** P picture or P-VOP */
+OMX_VIDEO_INTRA EQU 3; /** I and P picture; I- and P-VOP */
+OMX_VIDEO_INTRA_Q EQU 4; /** I and P picture; I- and P-VOP */
+OMX_VIDEO_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/
+OMX_VIDEO_DIRECT EQU 6; /** B picture or B-VOP (MPEG-4 only) */
+OMX_VIDEO_INTERPOLATE EQU 7; /** B picture or B-VOP */
+OMX_VIDEO_BACKWARD EQU 8; /** B picture or B-VOP */
+OMX_VIDEO_FORWARD EQU 9; /** B picture or B-VOP */
+OMX_VIDEO_NOTCODED EQU 10; /** B picture or B-VOP */
+;}
+
+;enum {
+OMX_16X16_VERT EQU 0; /** Intra_16x16_Vertical (prediction mode) */
+OMX_16X16_HOR EQU 1; /** Intra_16x16_Horizontal (prediction mode) */
+OMX_16X16_DC EQU 2; /** Intra_16x16_DC (prediction mode) */
+OMX_16X16_PLANE EQU 3; /** Intra_16x16_Plane (prediction mode) */
+;}
+
+;enum {
+OMX_4x4_VERT EQU 0; /** Intra_4x4_Vertical (prediction mode) */
+OMX_4x4_HOR EQU 1; /** Intra_4x4_Horizontal (prediction mode) */
+OMX_4x4_DC EQU 2; /** Intra_4x4_DC (prediction mode) */
+OMX_4x4_DIAG_DL EQU 3; /** Intra_4x4_Diagonal_Down_Left (prediction mode) */
+OMX_4x4_DIAG_DR EQU 4; /** Intra_4x4_Diagonal_Down_Right (prediction mode) */
+OMX_4x4_VR EQU 5; /** Intra_4x4_Vertical_Right (prediction mode) */
+OMX_4x4_HD EQU 6; /** Intra_4x4_Horizontal_Down (prediction mode) */
+OMX_4x4_VL EQU 7; /** Intra_4x4_Vertical_Left (prediction mode) */
+OMX_4x4_HU EQU 8; /** Intra_4x4_Horizontal_Up (prediction mode) */
+;}
+
+;enum {
+OMX_CHROMA_DC EQU 0; /** Intra_Chroma_DC (prediction mode) */
+OMX_CHROMA_HOR EQU 1; /** Intra_Chroma_Horizontal (prediction mode) */
+OMX_CHROMA_VERT EQU 2; /** Intra_Chroma_Vertical (prediction mode) */
+OMX_CHROMA_PLANE EQU 3; /** Intra_Chroma_Plane (prediction mode) */
+;}
+
+;typedef struct {
+x EQU 0;
+y EQU 4;
+;}OMXCoordinate;
+
+;typedef struct {
+dx EQU 0;
+dy EQU 2;
+;}OMXMotionVector;
+
+;typedef struct {
+xx EQU 0;
+yy EQU 4;
+width EQU 8;
+height EQU 12;
+;}OMXiRect;
+
+;typedef enum {
+OMX_VC_INTER EQU 0; /** P picture or P-VOP */
+OMX_VC_INTER_Q EQU 1; /** P picture or P-VOP */
+OMX_VC_INTER4V EQU 2; /** P picture or P-VOP */
+OMX_VC_INTRA EQU 3; /** I and P picture, I- and P-VOP */
+OMX_VC_INTRA_Q EQU 4; /** I and P picture, I- and P-VOP */
+OMX_VC_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/
+;} OMXVCM4P2MacroblockType;
+
+;enum {
+OMX_VC_NONE EQU 0
+OMX_VC_HORIZONTAL EQU 1
+OMX_VC_VERTICAL EQU 2
+;};
+
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s
new file mode 100644
index 0000000..2663a70
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s
@@ -0,0 +1,148 @@
+ ;/**
+ ; * Function: omxVCCOMM_Copy16x16
+ ; *
+ ; * Description:
+ ; * Copies the reference 16x16 block to the current block.
+ ; * Parameters:
+ ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 16-byte boundary.
+ ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes;
+ ; * must be a multiple of 16 and must be larger than or equal to 16.
+ ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary.
+ ; * Return Value:
+ ; * OMX_Sts_NoErr - no error
+ ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
+ ; * - one or more of the following pointers is NULL: pSrc, pDst
+ ; * - one or more of the following pointers is not aligned on an 16-byte boundary: pSrc, pDst
+ ; * - step <16 or step is not a multiple of 16.
+ ; */
+
+ INCLUDE omxtypes_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+pSrc RN 0
+pDst RN 1
+step RN 2
+
+;//Local Variables
+Count RN 14
+X0 RN 2
+X1 RN 4
+
+Return RN 0
+
+ M_START omxVCCOMM_Copy16x16,r5
+
+
+
+ SUB Count,step,#8 ;//Count=step-8
+ LDRD X0,[pSrc],#8 ;//pSrc after loading pSrc=pSrc+8
+ LDRD X1,[pSrc],Count ;//pSrc after loading pSrc=pSrc+step
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ ;// loading 16 bytes and storing
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],#8
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ STRD X0,[pDst],#8
+ MOV Return,#OMX_Sts_NoErr
+ STRD X1,[pDst],#8
+
+
+ M_END
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s
new file mode 100644
index 0000000..993873c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s
@@ -0,0 +1,72 @@
+ ;/**
+ ; * Function: omxVCCOMM_Copy8x8
+ ; *
+ ; * Description:
+ ; * Copies the reference 8x8 block to the current block.
+ ; * Parameters:
+ ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 8-byte boundary.
+ ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes;
+ ; * must be a multiple of 8 and must be larger than or equal to 8.
+ ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary.
+ ; * Return Value:
+ ; * OMX_Sts_NoErr - no error
+ ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
+ ; * - one or more of the following pointers is NULL: pSrc, pDst
+ ; * - one or more of the following pointers is not aligned on an 8-byte boundary: pSrc, pDst
+ ; * - step <8 or step is not a multiple of 8.
+ ; */
+
+ INCLUDE omxtypes_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+pSrc RN 0
+pDst RN 1
+step RN 2
+
+;//Local Variables
+Count RN 14
+X0 RN 2
+X1 RN 4
+Return RN 0
+ M_START omxVCCOMM_Copy8x8,r5
+
+
+
+ MOV Count,step ;//Count=step
+
+ LDRD X0,[pSrc],Count ;//pSrc after loading : pSrc=pSrc+step
+ LDRD X1,[pSrc],Count
+
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],Count
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],Count
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ STRD X0,[pDst],#8
+ LDRD X0,[pSrc],Count
+ STRD X1,[pDst],#8
+ LDRD X1,[pSrc],Count
+
+ STRD X0,[pDst],#8
+ MOV Return,#OMX_Sts_NoErr
+ STRD X1,[pDst],#8
+
+
+ M_END
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
new file mode 100644
index 0000000..02b4b08
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
@@ -0,0 +1,189 @@
+;//
+;//
+;// File Name: omxVCCOMM_ExpandFrame_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;// This function will Expand Frame boundary pixels into Plane
+;//
+;//
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+;// Import symbols required from other files
+;// (For example tables)
+
+
+;// Set debugging level
+DEBUG_ON SETL {FALSE}
+
+
+
+
+
+
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+;//Input Registers
+
+pSrcDstPlane RN 0
+iFrameWidth RN 1
+iFrameHeight RN 2
+iExpandPels RN 3
+
+
+;//Output Registers
+
+result RN 0
+
+;//Local Scratch Registers
+
+iPlaneStep RN 4
+pTop RN 5
+pBottom RN 6
+pBottomIndex RN 7
+x RN 8
+y RN 9
+tempTop RN 10
+tempBot RN 11
+ColStep RN 12
+pLeft RN 5
+pRight RN 6
+pRightIndex RN 7
+tempLeft1 RN 10
+tempRight1 RN 11
+tempLeft2 RN 14
+tempRight2 RN 2
+indexY RN 14
+RowStep RN 12
+expandTo4bytes RN 1 ;// copy a byte to 4 bytes of a word
+
+ ;// Allocate stack memory required by the function
+
+
+ ;// Write function header
+ M_START omxVCCOMM_ExpandFrame_I,r11
+
+ ;// Define stack arguments
+ M_ARG iPlaneStepOnStack, 4
+
+ ;// Load argument from the stack
+ M_LDR iPlaneStep, iPlaneStepOnStack
+
+ MUL pTop,iExpandPels,iPlaneStep
+ MLA pBottom,iFrameHeight,iPlaneStep,pSrcDstPlane
+ SUB x,iFrameWidth,#4
+ MOV indexY,pTop
+ ADD ColStep,indexY,#4
+ SUB pBottomIndex,pBottom,iPlaneStep
+ SUB pTop,pSrcDstPlane,pTop
+
+
+ ADD pTop,pTop,x
+ ADD pBottom,pBottom,x
+
+ ;//------------------------------------------------------------------------
+ ;// The following improves upon the C implmentation
+ ;// The x and y loops are interchanged: This ensures that the values of
+ ;// pSrcDstPlane [x] and pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x]
+ ;// which depend only on loop variable 'x' are loaded once and used in
+ ;// multiple stores in the 'Y' loop
+ ;//------------------------------------------------------------------------
+
+ ;// xloop
+ExpandFrameTopBotXloop
+
+ LDR tempTop,[pSrcDstPlane,x]
+ ;//------------------------------------------------------------------------
+ ;// pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x] is simplified as:
+ ;// pSrcDstPlane + (iFrameHeight * iPlaneStep) - iPlaneStep + x ==
+ ;// pBottom - iPlaneStep + x == pBottomIndex [x]
+ ;// The value of pBottomIndex is calculated above this 'x' loop
+ ;//------------------------------------------------------------------------
+ LDR tempBot,[pBottomIndex,x]
+
+ ;// yloop
+ MOV y,iExpandPels
+
+ExpandFrameTopBotYloop
+ SUBS y,y,#1
+ M_STR tempTop,[pTop],iPlaneStep
+ M_STR tempBot,[pBottom],iPlaneStep
+ BGT ExpandFrameTopBotYloop
+
+ SUBS x,x,#4
+ SUB pTop,pTop,ColStep
+ SUB pBottom,pBottom,ColStep
+ BGE ExpandFrameTopBotXloop
+
+
+ ;// y loop
+ ;// The product is already calculated above : Reuse
+ ;//MUL indexY,iExpandPels,iPlaneStep
+
+ SUB pSrcDstPlane,pSrcDstPlane,indexY
+ SUB pLeft,pSrcDstPlane,iExpandPels ;// pLeft->points to the top left of the expanded block
+ ADD pRight,pSrcDstPlane,iFrameWidth
+ SUB pRightIndex,pRight,#1
+
+ ADD y,iFrameHeight,iExpandPels,LSL #1
+ LDR expandTo4bytes,=0x01010101
+
+ RSB RowStep,iExpandPels,iPlaneStep,LSL #1
+
+ ;// The Y Loop is unrolled twice
+ExpandFrameLeftRightYloop
+ LDRB tempLeft2,[pSrcDstPlane,iPlaneStep] ;// PreLoad the values
+ LDRB tempRight2,[pRightIndex,iPlaneStep]
+ M_LDRB tempLeft1,[pSrcDstPlane],iPlaneStep,LSL #1 ;// PreLoad the values
+ M_LDRB tempRight1,[pRightIndex],iPlaneStep,LSL #1
+
+ SUB x,iExpandPels,#4
+ MUL tempLeft2,tempLeft2,expandTo4bytes ;// Copy the single byte to 4 bytes
+ MUL tempRight2,tempRight2,expandTo4bytes
+ MUL tempLeft1,tempLeft1,expandTo4bytes ;// Copy the single byte to 4 bytes
+ MUL tempRight1,tempRight1,expandTo4bytes
+
+
+ ;// x loop
+ExpandFrameLeftRightXloop
+ SUBS x,x,#4
+ STR tempLeft2,[pLeft,iPlaneStep] ;// Store the 4 bytes at one go
+ STR tempRight2,[pRight,iPlaneStep]
+ STR tempLeft1,[pLeft],#4 ;// Store the 4 bytes at one go
+ STR tempRight1,[pRight],#4
+ BGE ExpandFrameLeftRightXloop
+
+ SUBS y,y,#2
+ ADD pLeft,pLeft,RowStep
+ ADD pRight,pRight,RowStep
+ BGT ExpandFrameLeftRightYloop
+
+
+ ;// Set return value
+
+ MOV result,#OMX_Sts_NoErr
+End
+
+ ;// Write function tail
+
+ M_END
+
+ ENDIF ;//ARM1136JS
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h
new file mode 100644
index 0000000..4340f2a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h
@@ -0,0 +1,30 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_CAVLCTables.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Header file for optimized H.264 CALVC tables
+ *
+ */
+
+#ifndef ARMVCM4P10_CAVLCTABLES_H
+#define ARMVCM4P10_CAVLCTABLES_H
+
+/* CAVLC tables */
+
+extern const OMX_U16 *armVCM4P10_CAVLCCoeffTokenTables[18];
+extern const OMX_U16 *armVCM4P10_CAVLCTotalZeroTables[15];
+extern const OMX_U16 *armVCM4P10_CAVLCTotalZeros2x2Tables[3];
+extern const OMX_U16 *armVCM4P10_CAVLCRunBeforeTables[15];
+extern const OMX_U8 armVCM4P10_ZigZag_4x4[16];
+extern const OMX_U8 armVCM4P10_ZigZag_2x2[4];
+extern const OMX_S8 armVCM4P10_SuffixToLevel[7];
+
+#endif
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
new file mode 100644
index 0000000..b2cd9d1
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
@@ -0,0 +1,222 @@
+;//
+;//
+;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+;// Functions:
+;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
+;//
+;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
+;// First operand will be at offset ALIGNMENT from aligned address
+;// Second operand will be at aligned location and will be used as output.
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction
+;//
+;// Registers used as input for this function
+;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r2 - pointer to the aligned location
+;// r3 - step size to this aligned location
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_Average_4x4_Align0_unsafe
+ EXPORT armVCM4P10_Average_4x4_Align2_unsafe
+ EXPORT armVCM4P10_Average_4x4_Align3_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+;// Declare input registers
+pPred0 RN 0
+iPredStep0 RN 1
+pPred1 RN 2
+iPredStep1 RN 3
+pDstPred RN 2
+iDstStep RN 3
+
+;// Declare other intermediate registers
+iPredA0 RN 10
+iPredA1 RN 11
+iPredB0 RN 12
+iPredB1 RN 14
+Temp1 RN 4
+Temp2 RN 5
+ResultA RN 5
+ResultB RN 4
+r0x80808080 RN 7
+
+ IF ARM1136JS
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB1, [pPred1, iPredStep1]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ M_LDR iPredB1, [pPred1, iPredStep1]
+
+ MVN iPredB0, iPredB0
+ UHSUB8 ResultA, iPredA0, iPredB0
+ MVN iPredB1, iPredB1
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End0
+ M_END
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredB1, [pPred1, iPredStep1]
+ M_LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #16
+ ORR iPredA0, iPredA0, Temp1, LSL #16
+ MOV iPredA1, iPredA1, LSR #16
+ ORR iPredA1, iPredA1, Temp2, LSL #16
+
+ ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #16
+ ORR iPredA0, iPredA0, Temp1, LSL #16
+ MOV iPredA1, iPredA1, LSR #16
+ ORR iPredA1, iPredA1, Temp2, LSL #16
+
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End2
+ M_END
+
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #24
+ ORR iPredA0, iPredA0, Temp1, LSL #8
+ MOV iPredA1, iPredA1, LSR #24
+ ORR iPredA1, iPredA1, Temp2, LSL #8
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #24
+ ORR iPredA0, iPredA0, Temp1, LSL #8
+ MOV iPredA1, iPredA1, LSR #24
+ ORR iPredA1, iPredA1, Temp2, LSL #8
+
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End3
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c
new file mode 100644
index 0000000..17fe518
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c
@@ -0,0 +1,327 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: armVCM4P10_CAVLCTables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Optimized CAVLC tables for H.264
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armVCM4P10_CAVLCTables.h"
+
+/* 4x4 DeZigZag table */
+
+const OMX_U8 armVCM4P10_ZigZag_4x4[16] =
+{
+ 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15
+};
+
+/* 2x2 DeZigZag table */
+
+const OMX_U8 armVCM4P10_ZigZag_2x2[4] =
+{
+ 0, 1, 2, 3
+};
+
+
+/*
+ * Suffix To Level table
+ * We increment the suffix length if
+ * ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6
+ * (LevelCode>>1)>=(3<<(SuffixLength-1)) && SuffixLength<6
+ * LevelCode >= 3<<SuffixLength && SuffixLength<6
+ * (LevelCode+2) >= (3<<SuffixLength)+2 && SuffixLength<6
+ */
+const OMX_S8 armVCM4P10_SuffixToLevel[7] =
+{
+ (3<<1)+2, /* SuffixLength=1 */
+ (3<<1)+2, /* SuffixLength=1 */
+ (3<<2)+2, /* SuffixLength=2 */
+ (3<<3)+2, /* SuffixLength=3 */
+ (3<<4)+2, /* SuffixLength=4 */
+ (3<<5)+2, /* SuffixLength=5 */
+ -1 /* SuffixLength=6 - never increment */
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_0[132] = {
+ 0x0020, 0x0100, 0x2015, 0x2015, 0x400b, 0x400b, 0x400b, 0x400b,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+ 0x0028, 0x00f0, 0x00f8, 0x0027, 0x0030, 0x00d8, 0x00e0, 0x00e8,
+ 0x0038, 0x00a0, 0x00c8, 0x00d0, 0x0040, 0x0068, 0x0090, 0x0098,
+ 0x0048, 0x0050, 0x0058, 0x0060, 0x27ff, 0x27ff, 0x206b, 0x206b,
+ 0x0081, 0x0085, 0x0083, 0x0079, 0x0087, 0x007d, 0x007b, 0x0071,
+ 0x007f, 0x0075, 0x0073, 0x0069, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x2077, 0x2077, 0x206d, 0x206d, 0x2063, 0x2063, 0x2061, 0x2061,
+ 0x206f, 0x206f, 0x2065, 0x2065, 0x205b, 0x205b, 0x2059, 0x2059,
+ 0x0067, 0x005d, 0x0053, 0x0051, 0x005f, 0x0055, 0x004b, 0x0049,
+ 0x00a8, 0x00b0, 0x00b8, 0x00c0, 0x2041, 0x2041, 0x204d, 0x204d,
+ 0x2043, 0x2043, 0x2039, 0x2039, 0x2057, 0x2057, 0x2045, 0x2045,
+ 0x203b, 0x203b, 0x2031, 0x2031, 0x204f, 0x204f, 0x203d, 0x203d,
+ 0x2033, 0x2033, 0x2029, 0x2029, 0x0047, 0x0035, 0x002b, 0x0021,
+ 0x203f, 0x203f, 0x202d, 0x202d, 0x2023, 0x2023, 0x2019, 0x2019,
+ 0x0037, 0x0025, 0x001b, 0x0011, 0x202f, 0x202f, 0x201d, 0x201d,
+ 0x0013, 0x0009, 0x201f, 0x201f
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_1[128] = {
+ 0x0020, 0x00e8, 0x00f0, 0x00f8, 0x0027, 0x001f, 0x2015, 0x2015,
+ 0x400b, 0x400b, 0x400b, 0x400b, 0x4001, 0x4001, 0x4001, 0x4001,
+ 0x0028, 0x00d0, 0x00d8, 0x00e0, 0x0030, 0x0098, 0x00c0, 0x00c8,
+ 0x0038, 0x0060, 0x0088, 0x0090, 0x0040, 0x0048, 0x0050, 0x0058,
+ 0x27ff, 0x27ff, 0x207f, 0x207f, 0x0087, 0x0085, 0x0083, 0x0081,
+ 0x007b, 0x0079, 0x007d, 0x0073, 0x2075, 0x2075, 0x2071, 0x2071,
+ 0x0068, 0x0070, 0x0078, 0x0080, 0x2077, 0x2077, 0x206d, 0x206d,
+ 0x206b, 0x206b, 0x2069, 0x2069, 0x206f, 0x206f, 0x2065, 0x2065,
+ 0x2063, 0x2063, 0x2061, 0x2061, 0x0059, 0x005d, 0x005b, 0x0051,
+ 0x0067, 0x0055, 0x0053, 0x0049, 0x00a0, 0x00a8, 0x00b0, 0x00b8,
+ 0x205f, 0x205f, 0x204d, 0x204d, 0x204b, 0x204b, 0x2041, 0x2041,
+ 0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2039, 0x2039,
+ 0x204f, 0x204f, 0x203d, 0x203d, 0x203b, 0x203b, 0x2031, 0x2031,
+ 0x0029, 0x0035, 0x0033, 0x0021, 0x2047, 0x2047, 0x202d, 0x202d,
+ 0x202b, 0x202b, 0x2019, 0x2019, 0x003f, 0x0025, 0x0023, 0x0011,
+ 0x0037, 0x001d, 0x001b, 0x0009, 0x202f, 0x202f, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_2[112] = {
+ 0x0020, 0x0088, 0x00b0, 0x00b8, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x003f, 0x0037, 0x002f, 0x0027, 0x001f, 0x0015, 0x000b, 0x0001,
+ 0x0028, 0x0050, 0x0078, 0x0080, 0x0030, 0x0038, 0x0040, 0x0048,
+ 0x07ff, 0x0081, 0x0087, 0x0085, 0x0083, 0x0079, 0x007f, 0x007d,
+ 0x007b, 0x0071, 0x0077, 0x0075, 0x0073, 0x0069, 0x206b, 0x206b,
+ 0x0058, 0x0060, 0x0068, 0x0070, 0x2061, 0x2061, 0x206d, 0x206d,
+ 0x2063, 0x2063, 0x2059, 0x2059, 0x206f, 0x206f, 0x2065, 0x2065,
+ 0x205b, 0x205b, 0x2051, 0x2051, 0x0067, 0x005d, 0x0053, 0x0049,
+ 0x005f, 0x0055, 0x004b, 0x0041, 0x0090, 0x0098, 0x00a0, 0x00a8,
+ 0x2039, 0x2039, 0x2031, 0x2031, 0x204d, 0x204d, 0x2029, 0x2029,
+ 0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2021, 0x2021,
+ 0x0019, 0x003d, 0x003b, 0x0011, 0x004f, 0x0035, 0x0033, 0x0009,
+ 0x202b, 0x202b, 0x202d, 0x202d, 0x2023, 0x2023, 0x2025, 0x2025,
+ 0x201b, 0x201b, 0x2047, 0x2047, 0x201d, 0x201d, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_3[80] = {
+ 0x0020, 0x0028, 0x0030, 0x0038, 0x0040, 0x0048, 0x0050, 0x0058,
+ 0x0060, 0x0068, 0x0070, 0x0078, 0x0080, 0x0088, 0x0090, 0x0098,
+ 0x0009, 0x000b, 0x07ff, 0x0001, 0x0011, 0x0013, 0x0015, 0x07ff,
+ 0x0019, 0x001b, 0x001d, 0x001f, 0x0021, 0x0023, 0x0025, 0x0027,
+ 0x0029, 0x002b, 0x002d, 0x002f, 0x0031, 0x0033, 0x0035, 0x0037,
+ 0x0039, 0x003b, 0x003d, 0x003f, 0x0041, 0x0043, 0x0045, 0x0047,
+ 0x0049, 0x004b, 0x004d, 0x004f, 0x0051, 0x0053, 0x0055, 0x0057,
+ 0x0059, 0x005b, 0x005d, 0x005f, 0x0061, 0x0063, 0x0065, 0x0067,
+ 0x0069, 0x006b, 0x006d, 0x006f, 0x0071, 0x0073, 0x0075, 0x0077,
+ 0x0079, 0x007b, 0x007d, 0x007f, 0x0081, 0x0083, 0x0085, 0x0087
+};
+
+static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_4[32] = {
+ 0x0020, 0x0038, 0x2015, 0x2015, 0x4001, 0x4001, 0x4001, 0x4001,
+ 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b,
+ 0x0028, 0x0030, 0x0021, 0x0019, 0x2027, 0x2027, 0x0025, 0x0023,
+ 0x201d, 0x201d, 0x201b, 0x201b, 0x0011, 0x001f, 0x0013, 0x0009
+};
+
+const OMX_U16 * armVCM4P10_CAVLCCoeffTokenTables[18] = {
+ armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=0 */
+ armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=1 */
+ armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=2 */
+ armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=3 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=4 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=5 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=6 */
+ armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=7 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=8 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=9 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=10 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=11 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=12 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=13 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=14 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=15 */
+ armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=16 */
+ armVCM4P10_CAVLCCoeffTokenTables_4 /* nC=-1 */
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_0[40] = {
+ 0x0020, 0x0048, 0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+ 0x0028, 0x0040, 0x0011, 0x000f, 0x0030, 0x0038, 0x0019, 0x0017,
+ 0x27ff, 0x27ff, 0x201f, 0x201f, 0x201d, 0x201d, 0x201b, 0x201b,
+ 0x2015, 0x2015, 0x2013, 0x2013, 0x200d, 0x200d, 0x200b, 0x200b
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_1[24] = {
+ 0x0020, 0x0028, 0x0011, 0x000f, 0x000d, 0x000b, 0x2009, 0x2009,
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001,
+ 0x001d, 0x001b, 0x0019, 0x0017, 0x2015, 0x2015, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_2[24] = {
+ 0x0020, 0x0028, 0x0011, 0x000b, 0x0009, 0x0001, 0x200f, 0x200f,
+ 0x200d, 0x200d, 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003,
+ 0x001b, 0x0017, 0x2019, 0x2019, 0x2015, 0x2015, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_3[24] = {
+ 0x0020, 0x0028, 0x0013, 0x000f, 0x0007, 0x0005, 0x2011, 0x2011,
+ 0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2003, 0x2003,
+ 0x2019, 0x2019, 0x2017, 0x2017, 0x2015, 0x2015, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_4[20] = {
+ 0x0020, 0x0015, 0x0011, 0x0005, 0x0003, 0x0001, 0x200f, 0x200f,
+ 0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007,
+ 0x2017, 0x2017, 0x2013, 0x2013
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_5[20] = {
+ 0x0020, 0x0011, 0x2013, 0x2013, 0x200f, 0x200f, 0x200d, 0x200d,
+ 0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2005, 0x2005,
+ 0x0015, 0x0001, 0x2003, 0x2003
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_6[20] = {
+ 0x0020, 0x000f, 0x2011, 0x2011, 0x200d, 0x200d, 0x2009, 0x2009,
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b,
+ 0x0013, 0x0001, 0x2003, 0x2003
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_7[20] = {
+ 0x0020, 0x0003, 0x200f, 0x200f, 0x200d, 0x200d, 0x2007, 0x2007,
+ 0x400b, 0x400b, 0x400b, 0x400b, 0x4009, 0x4009, 0x4009, 0x4009,
+ 0x0011, 0x0001, 0x2005, 0x2005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_8[20] = {
+ 0x0020, 0x0005, 0x200b, 0x200b, 0x400d, 0x400d, 0x400d, 0x400d,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007,
+ 0x0003, 0x0001, 0x200f, 0x200f
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_9[20] = {
+ 0x0020, 0x000d, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007,
+ 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_10[16] = {
+ 0x0001, 0x0003, 0x2005, 0x2005, 0x2007, 0x2007, 0x200b, 0x200b,
+ 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_11[16] = {
+ 0x0001, 0x0003, 0x2009, 0x2009, 0x4005, 0x4005, 0x4005, 0x4005,
+ 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_12[16] = {
+ 0x2001, 0x2001, 0x2003, 0x2003, 0x4007, 0x4007, 0x4007, 0x4007,
+ 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_13[16] = {
+ 0x4001, 0x4001, 0x4001, 0x4001, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_14[16] = {
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001,
+ 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003
+};
+
+const OMX_U16 * armVCM4P10_CAVLCTotalZeroTables[15] = {
+ armVCM4P10_CAVLCTotalZeroTables_0,
+ armVCM4P10_CAVLCTotalZeroTables_1,
+ armVCM4P10_CAVLCTotalZeroTables_2,
+ armVCM4P10_CAVLCTotalZeroTables_3,
+ armVCM4P10_CAVLCTotalZeroTables_4,
+ armVCM4P10_CAVLCTotalZeroTables_5,
+ armVCM4P10_CAVLCTotalZeroTables_6,
+ armVCM4P10_CAVLCTotalZeroTables_7,
+ armVCM4P10_CAVLCTotalZeroTables_8,
+ armVCM4P10_CAVLCTotalZeroTables_9,
+ armVCM4P10_CAVLCTotalZeroTables_10,
+ armVCM4P10_CAVLCTotalZeroTables_11,
+ armVCM4P10_CAVLCTotalZeroTables_12,
+ armVCM4P10_CAVLCTotalZeroTables_13,
+ armVCM4P10_CAVLCTotalZeroTables_14
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_0[16] = {
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_1[16] = {
+ 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_2[16] = {
+ 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003,
+ 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001
+};
+
+const OMX_U16 * armVCM4P10_CAVLCTotalZeros2x2Tables[3] = {
+ armVCM4P10_CAVLCTotalZeros2x2Tables_0,
+ armVCM4P10_CAVLCTotalZeros2x2Tables_1,
+ armVCM4P10_CAVLCTotalZeros2x2Tables_2
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_0[8] = {
+ 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_1[8] = {
+ 0x2005, 0x2005, 0x2003, 0x2003, 0x4001, 0x4001, 0x4001, 0x4001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_2[8] = {
+ 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_3[8] = {
+ 0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_4[8] = {
+ 0x000b, 0x0009, 0x0007, 0x0005, 0x2003, 0x2003, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_5[8] = {
+ 0x0003, 0x0005, 0x0009, 0x0007, 0x000d, 0x000b, 0x2001, 0x2001
+};
+
+static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_6[24] = {
+ 0x0010, 0x000d, 0x000b, 0x0009, 0x0007, 0x0005, 0x0003, 0x0001,
+ 0x0018, 0x0011, 0x200f, 0x200f, 0x0020, 0x0015, 0x2013, 0x2013,
+ 0x0028, 0x0019, 0x2017, 0x2017, 0x07ff, 0x001d, 0x201b, 0x201b
+};
+
+/* Tables 7 to 14 are duplicates of table 6 */
+
+const OMX_U16 * armVCM4P10_CAVLCRunBeforeTables[15] = {
+ armVCM4P10_CAVLCRunBeforeTables_0, /* ZerosLeft=1 */
+ armVCM4P10_CAVLCRunBeforeTables_1,
+ armVCM4P10_CAVLCRunBeforeTables_2,
+ armVCM4P10_CAVLCRunBeforeTables_3,
+ armVCM4P10_CAVLCRunBeforeTables_4,
+ armVCM4P10_CAVLCRunBeforeTables_5, /* ZerosLeft=6 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=7 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=8 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=9 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=10 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=11 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=12 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=13 */
+ armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=14 */
+ armVCM4P10_CAVLCRunBeforeTables_6 /* ZerosLeft=15 */
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
new file mode 100644
index 0000000..dcbcd00
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
@@ -0,0 +1,20 @@
+;//
+;//
+;// File Name: armVCM4P10_DeblockingChroma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
new file mode 100644
index 0000000..14b37fe
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s
@@ -0,0 +1,366 @@
+;//
+;//
+;// File Name: armVCM4P10_DeblockingLuma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IF ARM1136JS
+
+MASK_1 EQU 0x01010101
+
+;// Declare input registers
+
+pQ0 RN 0
+StepArg RN 1
+tC0Arg RN 2
+alpha RN 6
+
+beta RN 14
+bS RN 14
+tC0 RN 14
+ptC0 RN 1
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0 RN 3
+p_1 RN 5
+p_2 RN 4
+p_3 RN 2
+q_0 RN 8
+q_1 RN 9
+q_2 RN 10
+q_3 RN 12
+
+
+;// Filtering
+
+ap0q0 RN 1
+filt RN 2
+
+m00 RN 7
+m01 RN 11
+
+apflg RN 0
+aqflg RN 6
+
+tC RN 1
+
+
+;//Declarations for bSLT4 kernel
+
+pos RN 7
+neg RN 12
+
+P0a RN 1
+P1a RN 8
+Q0a RN 7
+Q1a RN 4
+
+u1 RN 3
+max RN 12
+min RN 2
+
+
+
+;//Declarations for bSGE4 kernel
+
+q_3b RN 9
+p_3b RN 0
+apqflg RN 12
+
+P0b RN 6
+P1b RN 7
+P2b RN 1
+
+Q0b RN 9
+Q1b RN 0
+Q2b RN 2
+
+;// Miscellanous
+
+a RN 0
+t0 RN 3
+t1 RN 12
+t2 RN 7
+t3 RN 11
+t4 RN 4
+t5 RN 1
+t8 RN 6
+t9 RN 14
+t10 RN 5
+t11 RN 9
+
+;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe()
+;//
+;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2)
+;// - 2 - filt, 0 - apflg, 6 - aqflg
+;// - 11 - m01, 7 - tC0
+;//
+;// Outputs - 1,8,7,11 - Output Pixels(P0a,P1a,Q0a,Q1a)
+;//
+;// Registers Corrupted - 0-3,5-12,14
+
+
+ M_START armVCM4P10_DeblockingLumabSLT4_unsafe, lr
+
+ ;// Since beta <= 18 and alpha <= 255 we know
+ ;// -254 <= p0-q0 <= 254
+ ;// -17 <= q1-q0 <= 17
+ ;// -17 <= p1-p0 <= 17
+
+ ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
+ ;//
+ ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
+ ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
+ ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
+
+ USUB8 t1, p_1, p_0
+ MUL tC0, t2, m01
+
+ USUB8 t2, q_1, q_0
+ SSUB8 t1, t1, t2
+
+ USUB8 t2, p_0, q_0
+ AND t2, t2, m01
+ SHSUB8 t1, t1, t2
+ UHSUB8 t5, p_0, q_0
+ SSUB8 t1, t1, t2
+ SHSUB8 t1, t1, t5
+ MOV m00, #0
+ SADD8 t1, t1, m01
+ SHSUB8 t1, t1, t5
+
+ ;// tC = tC0
+ ;// if (ap < beta) tC++;
+ ;// if (aq < beta) tC++;
+ USUB8 t5, filt, m01
+ SEL tC0, tC0, m00
+ UQADD8 tC, tC0, apflg
+ SSUB8 t1, t1, m00
+ UQADD8 tC, tC, aqflg
+
+ ;// Split into positive and negative part and clip
+ SEL pos, t1, m00
+ USUB8 neg, pos, t1
+ USUB8 t3, pos, tC
+ SEL pos, tC, pos
+ USUB8 t3, neg, tC
+ SEL neg, tC, neg
+
+ ;//Reload m01
+ LDR m01,=MASK_1
+
+ UQADD8 P0a, p_0, pos
+ UQSUB8 Q0a, q_0, pos
+ UQSUB8 P0a, P0a, neg
+ UQADD8 Q0a, Q0a, neg
+
+ ;// Choose to store the filtered
+ ;// value or the original pixel
+ USUB8 t1, filt, m01
+ SEL P0a, P0a, p_0
+ SEL Q0a, Q0a, q_0
+
+ ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1;
+ ;// u1 = (p0 + q0 + 1)>>1
+ ;// u1 = ( (q_0 - p_0')>>1 ) ^ 0x80
+ MVN p_0, p_0
+ UHSUB8 u1, q_0, p_0
+ UQADD8 max, p_1, tC0
+ EOR u1, u1, m01 ,LSL #7
+
+ ;// Calculate A = (p2+u1)>>1
+ ;// Then delta = Clip3( -tC0, tC0, A - p1)
+
+ ;// Clip P1
+ UHADD8 P1a, p_2, u1
+ UQSUB8 min, p_1, tC0
+ USUB8 t4, P1a, max
+ SEL P1a, max, P1a
+ USUB8 t4, P1a, min
+ SEL P1a, P1a, min
+
+ ;// Clip Q1
+ UHADD8 Q1a, q_2, u1
+ UQADD8 max, q_1, tC0
+ UQSUB8 min, q_1, tC0
+ USUB8 t0, Q1a, max
+ SEL Q1a, max, Q1a
+ USUB8 t0, Q1a, min
+ SEL Q1a, Q1a, min
+
+ ;// Choose to store the filtered
+ ;// value or the original pixel
+ USUB8 t0, apflg, m01
+ SEL P1a, P1a, p_1
+ USUB8 t0, aqflg, m01
+ SEL t3, Q1a, q_1
+
+ M_END
+
+;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
+;//
+;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2)
+;// - 2 - filt, 0 - apflg,aqflg
+;// - 1 - ap0q0, 6 - alpha
+;// - 7 - m00, 11 - m01
+;//
+;// Outputs - 6,7,1,9,0,2 - Output Pixels(P0b,P1b,P2b, Q0b,Q1b,Q2b)
+;//
+;// Registers Corrupted - 0-3,5-12,14
+
+ M_START armVCM4P10_DeblockingLumabSGE4_unsafe, lr
+
+ ;// apflg = apflg && |p0-q0|<((alpha>>2)+2)
+ ;// apflg = aqflg && |p0-q0|<((alpha>>2)+2)
+
+ M_ARG pDummy,4
+ M_ARG pQ_3,4
+ M_ARG pP_3,4
+
+ UHADD8 alpha, alpha, m00
+ USUB8 t9, p_2, p_0 ;//t9 = dp2p0
+ UHADD8 alpha, alpha, m00
+ ADD alpha, alpha, m01, LSL #1
+ USUB8 ap0q0, ap0q0, alpha
+ SEL apqflg, m00, apflg
+
+ ;// P0 = (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3
+ ;// = ((p2-p0) + 2*(p1-p0) + (q1-q0) + 3*(q0-p0) + 8*p0 + 4)>>3
+ ;// = p0 + (((p2-p0) + 2*(p1-p0) + (q1-q0) - 3*(p0-q0) + 4)>>3)
+
+ ;// P1 = (p2 + p1 + q0 + p0 + 2)>>2
+ ;// = p0 + (((p2-p0) + (p1-p0) - (p0-q0) + 2)>>2)
+
+ ;// P2 = (2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3
+ ;// = (2*(p3-p0) + 3*(p2-p0) + (p1-p0) - (p0-q0) + 8*p0 + 4)>>3
+ ;// = p0 + (((p3-p0) + (p2-p0) + t2 + 2)>>2)
+
+ ;// Compute P0b
+ USUB8 t2, p_0, q_0
+ SSUB8 t5, t9, t2
+
+ USUB8 t8, q_1, q_0
+ SHADD8 t8, t5, t8
+
+ USUB8 t9, p_1, p_0
+ SADD8 t8, t8, t9
+ SHSUB8 t8, t8, t2
+ SHADD8 t5, t5, t9
+ SHADD8 t8, t8, m01
+ SHADD8 t9, t5, m01
+ SADD8 P0b, p_0, t8
+ ;// P0b ready
+
+ ;// Compute P1b
+ M_LDR p_3b, pP_3
+ SADD8 P1b, p_0, t9
+ ;// P1b ready
+
+ ;// Compute P2b
+ USUB8 t9, p_2, p_0
+ SADD8 t5, t5, t9
+ UHSUB8 t9, p_3b, p_0
+ EOR a, p_3b, p_0
+ AND a, a, m01
+ SHADD8 t5, t5, a
+ UHADD8 a, p_0, q_1
+ SADD8 t5, t5, m01
+ SHADD8 t5, t5, t9
+ MVN t9, p_1
+ SADD8 P2b, p_0, t5
+ ;// P2b ready
+
+ UHSUB8 a, a, t9
+ ORR t9, apqflg, m01
+ USUB8 t9, apqflg, t9
+
+ EOR a, a, m01, LSL #7
+ SEL P0b, P0b, a
+ SEL P1b, P1b, p_1
+ SEL P2b, P2b, p_2
+
+ USUB8 t4, filt, m01
+ SEL P0b, P0b, p_0
+
+
+ ;// Q0 = (q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3
+ ;// = ((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 8*q0 + 4)>>3
+ ;// = q0 + (((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 4)>>3)
+
+ ;// Q1 = (q2 + q1 + p0 + q0 + 2)>>2
+ ;// = q0 + (((q2-q0) + (q1-q0) + (p0-q0) + 2)>>2)
+
+ ;// Q2 = (2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3
+ ;// = (2*(q3-q0) + 3*(q2-q0) + (q1-q0) + (p0-q0) + 8*q0 + 4)>>3
+ ;// = q0 + (((q3-q0) + (q2-q0) + t2 + 2)>>2)
+
+
+ ;// Compute Q0b Q1b
+ USUB8 t4, q_2, q_0
+ USUB8 a, p_0, q_0
+ USUB8 t9, p_1, p_0
+ SADD8 t0, t4, a
+ SHADD8 t9, t0, t9
+ UHADD8 t10, q_0, p_1
+ SADD8 t9, t9, a
+ USUB8 a, q_1, q_0
+ SHADD8 t9, t9, a
+ SHADD8 t0, t0, a
+ SHADD8 t9, t9, m01
+ SHADD8 a, t0, m01
+ SADD8 t9, q_0, t9
+ ;// Q0b ready - t9
+
+ MOV t4, #0
+ UHADD8 apqflg, apqflg, t4
+
+ SADD8 Q1b, q_0, a
+ ;// Q1b ready
+
+ USUB8 t4, apqflg, m01
+ SEL Q1b, Q1b, q_1
+ MVN t11, q_1
+ UHSUB8 t10, t10, t11
+ M_LDR q_3b, pQ_3
+ EOR t10, t10, m01, LSL #7
+ SEL t9, t9, t10
+
+ ;// Compute Q2b
+ USUB8 t4, q_2, q_0
+ SADD8 t4, t0, t4
+ EOR t0, q_3b, q_0
+ AND t0, t0, m01
+ SHADD8 t4, t4, t0
+ UHSUB8 t10, q_3b, q_0
+ SADD8 t4, t4, m01
+ SHADD8 t4, t4, t10
+
+ USUB8 t10, filt, m01
+ SEL Q0b, t9, q_0
+
+ SADD8 t4, q_0, t4
+ ;// Q2b ready - t4
+
+ USUB8 t10, apqflg, m01
+ SEL Q2b, t4, q_2
+
+ M_END
+
+ ENDIF
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
new file mode 100644
index 0000000..ac448a0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s
@@ -0,0 +1,325 @@
+;//
+;//
+;// File Name: armVCM4P10_DecodeCoeffsToPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+ IMPORT armVCM4P10_CAVLCCoeffTokenTables
+ IMPORT armVCM4P10_CAVLCTotalZeroTables
+ IMPORT armVCM4P10_CAVLCTotalZeros2x2Tables
+ IMPORT armVCM4P10_CAVLCRunBeforeTables
+ IMPORT armVCM4P10_SuffixToLevel
+ IMPORT armVCM4P10_ZigZag_4x4
+ IMPORT armVCM4P10_ZigZag_2x2
+
+ M_VARIANTS ARM1136JS
+
+;//DEBUG_ON SETL {TRUE}
+
+LAST_COEFF EQU 0x20 ;// End of block flag
+TWO_BYTE_COEFF EQU 0x10
+
+;// Declare input registers
+
+ppBitStream RN 0
+pOffset RN 1
+pNumCoeff RN 2
+ppPosCoefbuf RN 3
+nC RN 4 ;// number of coeffs or 17 for chroma
+sMaxNumCoeff RN 5
+
+;// Declare inner loop registers
+
+;// Level loop
+Count RN 0
+TrailingOnes RN 1
+pLevel RN 2
+LevelSuffix RN 3
+SuffixLength RN 4
+TotalCoeff RN 5
+
+pVLDTable RN 6
+Symbol RN 7
+T1 RN 8
+T2 RN 9
+RBitStream RN 10
+RBitBuffer RN 11
+RBitCount RN 12
+lr RN 14
+
+;// Run loop
+Count RN 0
+ZerosLeft RN 1
+pLevel RN 2
+ppRunTable RN 3
+pRun RN 4
+TotalCoeff RN 5
+
+pVLDTable RN 6
+Symbol RN 7
+T1 RN 8
+T2 RN 9
+RBitStream RN 10
+RBitBuffer RN 11
+RBitCount RN 12
+lr RN 14
+
+;// Fill in coefficients loop
+pPosCoefbuf RN 0
+temp RN 1
+pLevel RN 2
+ppPosCoefbuf RN 3
+pRun RN 4
+TotalCoeff RN 5
+pZigZag RN 6
+
+T1 RN 8
+T2 RN 9
+RBitStream RN 10
+RBitBuffer RN 11
+RBitCount RN 12
+CoeffNum RN 14
+
+
+
+ IF ARM1136JS
+
+ ;// Allocate stack memory required by the function
+ M_ALLOC4 pppBitStream, 4
+ M_ALLOC4 ppOffset, 4
+ M_ALLOC4 pppPosCoefbuf, 4
+ M_ALLOC4 ppLevel, 16*2
+ M_ALLOC4 ppRun, 16
+
+ ;// Write function header
+ M_START armVCM4P10_DecodeCoeffsToPair, r11
+
+ ;// Define stack arguments
+ M_ARG pNC, 4
+ M_ARG pSMaxNumCoeff,4
+
+ ;// Code start
+ M_BD_INIT0 ppBitStream, pOffset, RBitStream, RBitBuffer, RBitCount
+ LDR pVLDTable, =armVCM4P10_CAVLCCoeffTokenTables
+ M_LDR nC, pNC
+
+ M_BD_INIT1 T1, T2, lr
+ LDR pVLDTable, [pVLDTable, nC, LSL #2] ;// Find VLD table
+
+ M_BD_INIT2 T1, T2, lr
+
+ ;// Decode Symbol = TotalCoeff*4 + TrailingOnes
+ M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2
+
+ MOVS TotalCoeff, Symbol, LSR #2
+ STRB TotalCoeff, [pNumCoeff]
+ M_PRINTF "TotalCoeff=%d\n", TotalCoeff
+ BEQ.W EndNoError ;// Finished if no coefficients
+
+ CMP Symbol, #17*4
+ BGE.W EndBadSymbol ;// Error if bad symbol
+
+ ;// Save bitstream pointers
+ M_STR ppBitStream, pppBitStream
+ M_STR pOffset, ppOffset
+ M_STR ppPosCoefbuf, pppPosCoefbuf
+
+ ;// Decode Trailing Ones
+ ANDS TrailingOnes, Symbol, #3
+ M_ADR pLevel, ppLevel
+ M_PRINTF "TrailingOnes=%d\n", TrailingOnes
+ BEQ TrailingOnesDone
+ MOV Count, TrailingOnes
+TrailingOnesLoop
+ M_BD_READ8 Symbol, 1, T1
+ SUBS Count, Count, #1
+ MOV T1, #1
+ SUB T1, T1, Symbol, LSL #1
+ M_PRINTF "Level=%d\n", T1
+ STRH T1, [pLevel], #2
+ BGT TrailingOnesLoop
+TrailingOnesDone
+
+ ;// Decode level values
+ SUBS Count, TotalCoeff, TrailingOnes ;// Number of levels to read
+ BEQ DecodeRuns ;// None left
+
+ MOV SuffixLength, #1
+ CMP TotalCoeff, #10
+ MOVLE SuffixLength, #0
+ CMP TrailingOnes, #3 ;// if (TrailingOnes<3)
+ MOVLT TrailingOnes, #4 ;// then TrailingOnes = +4
+ MOVGE TrailingOnes, #2 ;// else TrailingOnes = +2
+ MOVGE SuffixLength, #0 ;// SuffixLength = 0
+
+LevelLoop
+ M_BD_CLZ16 Symbol, T1, T2 ;// Symbol=LevelPrefix
+ CMP Symbol,#16
+ BGE EndBadSymbol
+
+ MOVS lr, SuffixLength ;// if LevelSuffixSize==0
+ TEQEQ Symbol, #14 ;// and LevelPrefix==14
+ MOVEQ lr, #4 ;// then LevelSuffixSize=4
+ TEQ Symbol, #15 ;// if LevelSuffixSize==15
+ MOVEQ lr, #12 ;// then LevelSuffixSize=12
+
+ TEQEQ SuffixLength,#0
+ ADDEQ Symbol,Symbol,#15
+
+ TEQ lr, #0 ;// if LevelSuffixSize==0
+ BEQ LevelCodeRead ;// LevelCode = LevelPrefix
+
+ M_BD_VREAD16 LevelSuffix, lr, T1, T2 ;// Read Level Suffix
+
+ MOV Symbol, Symbol, LSL SuffixLength
+ ADD Symbol, LevelSuffix, Symbol
+
+LevelCodeRead
+ ;// Symbol = LevelCode
+ ADD Symbol, Symbol, TrailingOnes ;// +4 if level cannot be +/-1, +2 o/w
+ MOV TrailingOnes, #2
+ MOVS T1, Symbol, LSR #1
+ RSBCS T1, T1, #0 ;// If Symbol odd then negate
+ M_PRINTF "Level=%d\n", T1
+ STRH T1, [pLevel], #2 ;// Store level.
+
+ LDR T2, =armVCM4P10_SuffixToLevel
+ LDRSB T1, [T2, SuffixLength] ;// Find increment level
+ TEQ SuffixLength, #0
+ MOVEQ SuffixLength, #1
+ CMP Symbol, T1
+ ADDCS SuffixLength, SuffixLength, #1
+ SUBS Count, Count, #1
+ BGT LevelLoop
+
+DecodeRuns
+ ;// Find number of zeros
+ M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff
+ SUB Count, TotalCoeff, #1 ;// Number of runs excluding last
+ SUBS ZerosLeft, T1, TotalCoeff ;// Maximum number of zeros there could be
+ M_ADR pRun, ppRun
+ MOV CoeffNum,TotalCoeff
+ SUB CoeffNum,CoeffNum,#1
+ BEQ NoZerosLeft
+
+ ;// Unpack number of zeros from bitstream
+ TEQ T1, #4
+ LDREQ pVLDTable, =(armVCM4P10_CAVLCTotalZeros2x2Tables-4)
+ LDRNE pVLDTable, =(armVCM4P10_CAVLCTotalZeroTables-4)
+ LDR pVLDTable, [pVLDTable, TotalCoeff, LSL #2]
+
+ M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2 ;// Symbol = ZerosLeft
+ CMP Symbol,#16
+ BGE EndBadSymbol
+
+ LDR ppRunTable, =(armVCM4P10_CAVLCRunBeforeTables-4)
+ M_ADR pRun, ppRun
+ MOVS ZerosLeft, Symbol
+
+ ADD CoeffNum,CoeffNum,ZerosLeft
+
+ BEQ NoZerosLeft
+
+ ;// Decode runs while zeros are left and more than one coefficient
+RunLoop
+ SUBS Count, Count, #1
+ LDR pVLDTable, [ppRunTable, ZerosLeft, LSL#2]
+ BLT LastRun
+ M_BD_VLD Symbol, T1, T2, pVLDTable, 3, 2 ;// Symbol = Run
+ CMP Symbol,#15
+ BGE EndBadSymbol
+
+ SUBS ZerosLeft, ZerosLeft, Symbol
+ M_PRINTF "Run=%d\n", Symbol
+ STRB Symbol, [pRun], #1
+ BGT RunLoop
+
+ ;// Decode runs while no zeros are left
+NoZerosLeft
+ SUBS Count, Count, #1
+ M_PRINTF "Run=%d\n", ZerosLeft
+ STRGEB ZerosLeft, [pRun], #1
+ BGT NoZerosLeft
+
+LastRun
+ ;// Final run length is remaining zeros
+ M_PRINTF "LastRun=%d\n", ZerosLeft
+ STRB ZerosLeft, [pRun], #1
+
+ ;// Write coefficients to output array
+ M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff
+ TEQ T1, #15
+ ADDEQ CoeffNum,CoeffNum,#1
+
+
+ SUB pRun,pRun,TotalCoeff
+ SUB pLevel,pLevel,TotalCoeff
+ SUB pLevel,pLevel,TotalCoeff
+
+ M_LDR ppPosCoefbuf, pppPosCoefbuf
+ LDR pPosCoefbuf, [ppPosCoefbuf]
+ TEQ T1, #4
+ LDREQ pZigZag, =armVCM4P10_ZigZag_2x2
+ LDRNE pZigZag, =armVCM4P10_ZigZag_4x4
+
+
+
+OutputLoop
+
+ LDRB T2, [pRun],#1
+ LDRB T1, [pZigZag, CoeffNum]
+ SUB CoeffNum, CoeffNum, #1 ;// Skip Non zero
+ SUB CoeffNum, CoeffNum, T2 ;// Skip Zero run
+
+ LDRSH T2, [pLevel],#2
+
+ SUBS TotalCoeff, TotalCoeff, #1
+ ORREQ T1, T1, #LAST_COEFF
+
+ ADD temp, T2, #128
+ CMP temp, #256
+ ORRCS T1, T1, #TWO_BYTE_COEFF
+
+
+ TEQ TotalCoeff, #0 ;// Preserves carry
+
+ M_PRINTF "Output=%02x %04x\n", T1, T2
+ STRB T1, [pPosCoefbuf], #1
+ STRB T2, [pPosCoefbuf], #1
+ MOV T2, T2, LSR #8
+ STRCSB T2, [pPosCoefbuf], #1
+ BNE OutputLoop
+
+ ;// Finished
+ STR pPosCoefbuf, [ppPosCoefbuf]
+ M_LDR ppBitStream, pppBitStream
+ M_LDR pOffset, ppOffset
+ B EndNoError
+
+EndBadSymbol
+ MOV r0, #OMX_Sts_Err
+ B End
+
+EndNoError
+ ;// Finished reading from the bitstream
+ M_BD_FINI ppBitStream, pOffset
+
+ ;// Set return value
+ MOV r0, #OMX_Sts_NoErr
+End
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s
new file mode 100644
index 0000000..b16f188
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s
@@ -0,0 +1,123 @@
+;//
+;//
+;// File Name: armVCM4P10_DequantTables_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_QPDivTable
+ EXPORT armVCM4P10_VMatrixQPModTable
+ EXPORT armVCM4P10_PosToVCol4x4
+ EXPORT armVCM4P10_PosToVCol2x2
+ EXPORT armVCM4P10_VMatrix
+ EXPORT armVCM4P10_QPModuloTable
+ EXPORT armVCM4P10_VMatrixU16
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS ARM1136JS
+
+
+;// Guarding implementation by the processor name
+
+
+ IF ARM1136JS :LOR: CortexA8
+
+
+ M_TABLE armVCM4P10_PosToVCol4x4
+ DCB 0, 2, 0, 2
+ DCB 2, 1, 2, 1
+ DCB 0, 2, 0, 2
+ DCB 2, 1, 2, 1
+
+
+ M_TABLE armVCM4P10_PosToVCol2x2
+ DCB 0, 2
+ DCB 2, 1
+
+
+ M_TABLE armVCM4P10_VMatrix
+ DCB 10, 16, 13
+ DCB 11, 18, 14
+ DCB 13, 20, 16
+ DCB 14, 23, 18
+ DCB 16, 25, 20
+ DCB 18, 29, 23
+
+;//-------------------------------------------------------
+;// This table evaluates the expression [(INT)(QP/6)],
+;// for values of QP from 0 to 51 (inclusive).
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPDivTable
+ DCB 0, 0, 0, 0, 0, 0
+ DCB 1, 1, 1, 1, 1, 1
+ DCB 2, 2, 2, 2, 2, 2
+ DCB 3, 3, 3, 3, 3, 3
+ DCB 4, 4, 4, 4, 4, 4
+ DCB 5, 5, 5, 5, 5, 5
+ DCB 6, 6, 6, 6, 6, 6
+ DCB 7, 7, 7, 7, 7, 7
+ DCB 8, 8, 8, 8, 8, 8
+
+;//----------------------------------------------------
+;// This table contains armVCM4P10_VMatrix[QP%6][0] entires,
+;// for values of QP from 0 to 51 (inclusive).
+;//----------------------------------------------------
+
+ M_TABLE armVCM4P10_VMatrixQPModTable
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+ DCB 10, 11, 13, 14, 16, 18
+
+;//-------------------------------------------------------
+;// This table evaluates the modulus expression [QP%6]*6,
+;// for values of QP from 0 to 51 (inclusive).
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPModuloTable
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+ DCB 0, 6, 12, 18, 24, 30
+
+;//-------------------------------------------------------
+;// This table contains the invidual byte values stored as
+;// halfwords. This avoids unpacking inside the function
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_VMatrixU16
+ DCW 10, 16, 13
+ DCW 11, 18, 14
+ DCW 13, 20, 16
+ DCW 14, 23, 18
+ DCW 16, 25, 20
+ DCW 18, 29, 23
+
+ ENDIF ;//ARM1136JS
+
+
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
new file mode 100644
index 0000000..82b9542
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
@@ -0,0 +1,236 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+ IF ARM1136JS
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 8
+iHeight RN 9
+
+;// Declare inner loop registers
+x RN 7
+x0 RN 7
+x1 RN 10
+x2 RN 11
+Scratch RN 12
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
+;// destination pointed by (pDst) for horizontal interpolation.
+;// This function needs to copy 9 bytes in horizontal direction.
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+
+ ;// Copy pDst to scratch
+ MOV Scratch, pDst
+
+StartAlignedStackCopy
+ AND x, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+ M_SWITCH x
+ M_CASE Copy0toAligned
+ M_CASE Copy1toAligned
+ M_CASE Copy2toAligned
+ M_CASE Copy3toAligned
+ M_ENDSWITCH
+
+Copy0toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy0toAligned
+ B CopyEnd
+
+Copy1toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #8
+ ORR x0, x0, x1, LSL #24
+ MOV x1, x1, LSR #8
+ ORR x1, x1, x2, LSL #24
+ MOV x2, x2, LSR #8
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy1toAligned
+ B CopyEnd
+
+Copy2toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #16
+ ORR x0, x0, x1, LSL #16
+ MOV x1, x1, LSR #16
+ ORR x1, x1, x2, LSL #16
+ MOV x2, x2, LSR #16
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy2toAligned
+ B CopyEnd
+
+Copy3toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #24
+ ORR x0, x0, x1, LSL #8
+ MOV x1, x1, LSR #24
+ ORR x1, x1, x2, LSL #8
+ MOV x2, x2, LSR #24
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy3toAligned
+
+CopyEnd
+
+ MOV pSrc, Scratch
+ MOV srcStep, #12
+
+ M_END
+
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+
+ ;// Copy pSrc to stack
+StartVAlignedStackCopy
+ AND x, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+
+ M_SWITCH x
+ M_CASE Copy0toVAligned
+ M_CASE Copy1toVAligned
+ M_CASE Copy2toVAligned
+ M_CASE Copy3toVAligned
+ M_ENDSWITCH
+
+Copy0toVAligned
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy0toVAligned
+ B CopyVEnd
+
+Copy1toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #24
+ ORR x0, x1, x0, LSR #8
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy1toVAligned
+ B CopyVEnd
+
+Copy2toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #16
+ ORR x0, x1, x0, LSR #16
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy2toVAligned
+ B CopyVEnd
+
+Copy3toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #8
+ ORR x0, x1, x0, LSR #24
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy3toVAligned
+
+CopyVEnd
+
+ SUB pSrc, pDst, #28
+ MOV srcStep, #4
+
+ M_END
+
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
new file mode 100644
index 0000000..bc0b6ec
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
@@ -0,0 +1,149 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_Copy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst)
+;//
+;// Registers preserved for top level function
+;// r1,r3,r4,r5,r6,r7,r10,r11,r14
+;//
+;// Registers modified by the function
+;// r0,r2,r8,r9,r12
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare other intermediate registers
+x0 RN 4
+x1 RN 5
+x2 RN 8
+x3 RN 9
+Temp RN 12
+
+ IF ARM1136JS
+
+ M_START armVCM4P10_InterpolateLuma_Copy4x4_unsafe, r6
+
+Copy4x4Start
+ ;// Do Copy and branch to EndOfInterpolation
+ AND Temp, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+ M_SWITCH Temp
+ M_CASE Copy4x4Align0
+ M_CASE Copy4x4Align1
+ M_CASE Copy4x4Align2
+ M_CASE Copy4x4Align3
+ M_ENDSWITCH
+
+Copy4x4Align0
+ M_LDR x0, [pSrc], srcStep
+ M_LDR x1, [pSrc], srcStep
+ M_STR x0, [pDst], dstStep
+ M_LDR x2, [pSrc], srcStep
+ M_STR x1, [pDst], dstStep
+ M_LDR x3, [pSrc], srcStep
+ M_STR x2, [pDst], dstStep
+ M_STR x3, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4Align1
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #8
+ ORR x0, x0, x1, LSL #24
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #8
+ ORR x2, x2, x3, LSL #24
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ M_STR x2, [pDst], dstStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #8
+ ORR x0, x0, x1, LSL #24
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #8
+ ORR x2, x2, x3, LSL #24
+ M_STR x2, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4Align2
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #16
+ ORR x0, x0, x1, LSL #16
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #16
+ ORR x2, x2, x3, LSL #16
+ M_STR x2, [pDst], dstStep
+
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #16
+ ORR x0, x0, x1, LSL #16
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #16
+ ORR x2, x2, x3, LSL #16
+ M_STR x2, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4Align3
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #24
+ ORR x0, x0, x1, LSL #8
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #24
+ ORR x2, x2, x3, LSL #8
+ M_STR x2, [pDst], dstStep
+
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ LDR x3, [pSrc, #4]
+ M_LDR x2, [pSrc], srcStep
+ MOV x0, x0, LSR #24
+ ORR x0, x0, x1, LSL #8
+ M_STR x0, [pDst], dstStep
+ MOV x2, x2, LSR #24
+ ORR x2, x2, x3, LSL #8
+ M_STR x2, [pDst], dstStep
+ B Copy4x4End
+
+Copy4x4End
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
new file mode 100644
index 0000000..66cfe5e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
@@ -0,0 +1,178 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+
+;// Functions:
+;// armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
+;// armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+;//
+;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
+;// This will do the convertion of data from 16 bit to 8 bit and it also
+;// remove offset and check for saturation.
+;//
+;// Registers used as input for this function
+;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the destination location
+;// r1 - step size to this destination location
+
+
+DEBUG_ON SETL {FALSE}
+
+MASK EQU 0x80808080 ;// Mask is used to implement (a+b+1)/2
+
+;// Declare input registers
+
+pSrc0 RN 0
+srcStep0 RN 1
+
+;// Declare other intermediate registers
+Temp1 RN 4
+Temp2 RN 5
+Temp3 RN 10
+Temp4 RN 11
+pBuf RN 7
+r0x0fe00fe0 RN 6
+r0x00ff00ff RN 12
+Count RN 14
+ValueA0 RN 10
+ValueA1 RN 11
+
+ IF ARM1136JS
+
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6
+
+ ;// Code start
+ MOV Count, #4
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ LDR r0x00ff00ff, =0x00ff00ff
+LoopStart1
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8
+ ORR ValueA0, Temp1, Temp2, LSL #8
+ SUBS Count, Count, #1
+ STRD ValueA0, [pBuf], #8
+ BGT LoopStart1
+End1
+ SUB pSrc0, pBuf, #32
+ MOV srcStep0, #8
+
+ M_END
+
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
+
+ ;// Code start
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ LDR r0x00ff00ff, =0x00ff00ff
+ MOV Count, #2
+
+LoopStart
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0]
+ ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0]
+
+ PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0]
+
+ STR Temp1, [pBuf], #8
+ PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2]
+ STR Temp2, [pBuf], #-4
+
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0]
+ ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0]
+
+ PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0]
+ SUBS Count, Count, #1
+ STR Temp1, [pBuf], #8
+ PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2]
+ STR Temp2, [pBuf], #4
+
+ BGT LoopStart
+End2
+ SUB pSrc0, pBuf, #32-8
+ MOV srcStep0, #4
+
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
new file mode 100644
index 0000000..851ff6a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
@@ -0,0 +1,296 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IF ARM1136JS
+
+
+ M_ALLOC8 ppDstArgs, 8
+ M_ALLOC8 pTempResult1, 8
+ M_ALLOC8 pTempResult2, 8
+ M_ALLOC4 ppSrc, 4
+ M_ALLOC4 ppDst, 4
+ M_ALLOC4 pDstStep, 4
+ M_ALLOC4 pSrcStep, 4
+ M_ALLOC4 pCounter, 4
+
+ ;// Function header
+ ;// Function:
+ ;// armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ ;//
+ ;// Implements diagonal interpolation for a block of size 4x4. Input and output should
+ ;// be aligned.
+ ;//
+ ;// Registers used as input for this function
+ ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer
+ ;//
+ ;// Registers preserved for top level function
+ ;// r0,r1,r2,r3,r4,r5,r6,r14
+ ;//
+ ;// Registers modified by the function
+ ;// r7,r8,r9,r10,r11,r12
+ ;//
+ ;// Output registers
+ ;// None. Function will preserve r0-r3
+
+ M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r6
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare inner loop registers
+Acc0 RN 4
+Acc1 RN 5
+Acc2 RN 6
+Acc3 RN 7
+
+ValA RN 4
+ValB RN 5
+ValC RN 6
+ValD RN 7
+ValE RN 8
+ValF RN 9
+ValG RN 12
+ValH RN 14
+ValI RN 1
+
+Temp1 RN 3
+Temp2 RN 1
+Temp3 RN 12
+Temp4 RN 7
+Temp5 RN 5
+r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset
+Counter RN 11
+pInterBuf RN 8
+
+ValCA RN 8
+ValDB RN 9
+ValGE RN 10
+ValHF RN 11
+r0x00140001 RN 12
+r0x0014fffb RN 14
+
+r0x0001fc00 RN 11
+
+Accx RN 8
+Accy RN 9
+Temp6 RN 14
+
+ M_STRD pDst, dstStep, ppDstArgs
+
+ MOV pDst, pInterBuf
+ MOV dstStep, #16
+
+ ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)]
+ MOV Counter, #4
+ M_STR dstStep, pDstStep
+ M_STR srcStep, pSrcStep
+ LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results
+
+HeightLoop
+NextTwoRowsLoop
+ LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1]
+ LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0]
+ LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1]
+ LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0]
+ LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1]
+ LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0]
+
+ PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0]
+ PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0]
+ UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255]
+ UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255]
+ PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0]
+ PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0]
+ PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0]
+ UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255]
+
+ ;// Calculate Acc0
+ ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
+ UXTAB16 Temp1, ValC, ValD, ROR #8
+ UXTAB16 Temp3, ValE, ValB, ROR #8
+ RSB Temp1, Temp3, Temp1, LSL #2
+ UXTAB16 Acc0, ValA, ValF, ROR #8
+ ADD Temp1, Temp1, Temp1, LSL #2
+ ADD Acc0, Acc0, Temp1
+
+ ;// Calculate Acc1
+ ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
+ UXTAB16 Temp1, ValE, ValD, ROR #8
+ UXTAB16 Temp3, ValC, ValF, ROR #8
+ RSB Temp1, Temp3, Temp1, LSL #2
+ UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255]
+ ADD Temp1, Temp1, Temp1, LSL #2
+ UXTAB16 Acc1, ValG, ValB, ROR #8
+ ADD Acc1, Acc1, Temp1
+
+ UXTAB16 Acc2, ValC, ValH, ROR #8
+ ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255]
+
+ ;// Calculate Acc2
+ ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
+ UXTAB16 Temp1, ValG, ValD, ROR #8
+ UXTAB16 Acc3, ValI, ValD, ROR #8
+ UXTAB16 Temp2, ValE, ValF, ROR #8
+
+ RSB Temp1, Temp1, Temp2, LSL #2
+ UXTAB16 Temp2, ValG, ValF, ROR #8
+ ADD Temp1, Temp1, Temp1, LSL #2
+ ADD Acc2, Acc2, Temp1
+
+ ;// Calculate Acc3
+ ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
+ UXTAB16 Temp1, ValE, ValH, ROR #8
+ RSB Temp1, Temp1, Temp2, LSL #2
+ ADD Temp1, Temp1, Temp1, LSL #2
+ ADD Acc3, Acc3, Temp1
+
+ M_LDR dstStep, pDstStep
+ M_LDR srcStep, pSrcStep
+
+ ;// If Counter is even store Acc0-Acc3 in a temporary buffer
+ ;// If Counter is off store Acc0-Acc3 and previous Acc0-Acc3 in a intermediate buf
+ ANDS Temp3, Counter, #1
+ BEQ NoProcessing
+
+ ;// Packing previous and current Acc0-Acc3 values
+ M_LDRD Accx, Accy, pTempResult1
+ PKHBT Temp6, Accx, Acc0, LSL #16 ;//[0 a2 0 a0] = [0 a3 0 a2] [0 a1 0 a0]
+ PKHTB Acc0, Acc0, Accx, ASR #16 ;//[0 a3 0 a1] = [0 a1 0 a0] [0 a3 0 a2]
+ STR Acc0, [pDst, dstStep]
+ STR Temp6, [pDst], #4
+ PKHBT Temp6, Accy, Acc1, LSL #16 ;//[0 b2 0 b0] = [0 b3 0 b2] [0 b1 0 b0]
+ PKHTB Acc1, Acc1, Accy, ASR #16 ;//[0 b3 0 b1] = [0 b1 0 b0] [0 b3 0 b2]
+ M_LDRD Accx, Accy, pTempResult2
+ STR Acc1, [pDst, dstStep]
+ STR Temp6, [pDst], #4
+
+ PKHBT Temp6, Accx, Acc2, LSL #16 ;//[0 c2 0 c0] = [0 c3 0 c2] [0 c1 0 c0]
+ PKHTB Acc2, Acc2, Accx, ASR #16 ;//[0 c3 0 c1] = [0 c1 0 c0] [0 c3 0 c2]
+ STR Acc2, [pDst, dstStep]
+ STR Temp6, [pDst], #4
+ PKHBT Temp6, Accy, Acc3, LSL #16 ;//[0 d2 0 d0] = [0 d3 0 d2] [0 d1 0 d0]
+ PKHTB Acc3, Acc3, Accy, ASR #16 ;//[0 d3 0 d1] = [0 d1 0 d0] [0 d3 0 d2]
+ STR Acc3, [pDst, dstStep]
+ STR Temp6, [pDst], #-12
+ ADD pDst, pDst, dstStep, LSL #1
+ B AfterStore
+
+NoProcessing
+ M_STRD Acc0, Acc1, pTempResult1
+ M_STRD Acc2, Acc3, pTempResult2
+AfterStore
+ SUBS Counter, Counter, #1 ;// Loop till height is 10
+ ADD pSrc, pSrc, srcStep, LSL #1
+ BPL HeightLoop
+
+ STR Acc0, [pDst], #4 ;//[0 a1 0 a0]
+ STR Acc1, [pDst], #4
+ STR Acc2, [pDst], #4
+ STR Acc3, [pDst], #-12
+
+ ;//
+ ;// Horizontal interpolation using multiplication
+ ;//
+
+ SUB pSrc, pDst, dstStep, LSL #2
+ MOV srcStep, #16
+ M_LDRD pDst, dstStep, ppDstArgs
+
+ MOV Counter, #4
+ LDR r0x0014fffb, =0x0014fffb
+ LDR r0x00140001, =0x00140001
+
+HeightLoop1
+ M_STR Counter, pCounter
+
+ M_LDR ValCA, [pSrc], srcStep ;// Load [0 c 0 a]
+ M_LDR ValDB, [pSrc], srcStep ;// Load [0 d 0 b]
+ M_LDR ValGE, [pSrc], srcStep ;// Load [0 g 0 e]
+ M_LDR ValHF, [pSrc], srcStep ;// Load [0 h 0 f]
+
+
+ ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e))
+ ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f))
+ ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g))
+ ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h))
+
+ SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1]
+ SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1]
+ SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5]
+ SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5]
+
+ SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5]
+ SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1]
+ SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1]
+ SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5]
+
+ SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5]
+ SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5]
+ SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1]
+ SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1]
+
+ LDRH ValCA, [pSrc], #4 ;// 8 = srcStep - 16
+ SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1]
+ SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5]
+ SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5]
+ SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1]
+
+ LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512
+ SUB Acc0, Acc0, r0x0001fc00
+ SUB Acc1, Acc1, r0x0001fc00
+ SUB Acc2, Acc2, r0x0001fc00
+ SUB Acc3, Acc3, r0x0001fc00
+
+ USAT Acc0, #18, Acc0
+ USAT Acc1, #18, Acc1
+ USAT Acc2, #18, Acc2
+ USAT Acc3, #18, Acc3
+
+ MOV Acc0, Acc0, LSR #10
+ M_STRB Acc0, [pDst], dstStep
+ MOV Acc1, Acc1, LSR #10
+ M_STRB Acc1, [pDst], dstStep
+ MOV Acc2, Acc2, LSR #10
+ M_STRB Acc2, [pDst], dstStep
+ MOV Acc3, Acc3, LSR #10
+ M_STRB Acc3, [pDst], dstStep
+
+
+ M_LDR Counter, pCounter
+ SUB pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, srcStep, LSL #2
+ ADD pDst, pDst, #1
+ SUBS Counter, Counter, #1
+ BGT HeightLoop1
+End
+ SUB pDst, pDst, #4
+ SUB pSrc, pSrc, #16
+
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
new file mode 100644
index 0000000..2f48e13
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
@@ -0,0 +1,276 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+ M_ALLOC8 ppDstArgs, 8
+ M_ALLOC4 ppSrc, 4
+ M_ALLOC4 ppDst, 4
+ M_ALLOC4 pCounter, 4
+
+ ;// Function header
+ ;// Function:
+ ;// armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ ;//
+ ;// Implements diagonal interpolation for a block of size 4x4. Input and output should
+ ;// be aligned.
+ ;//
+ ;// Registers used as input for this function
+ ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer
+ ;//
+ ;// Registers preserved for top level function
+ ;// r0,r1,r2,r3,r4,r5,r6,r14
+ ;//
+ ;// Registers modified by the function
+ ;// r7,r8,r9,r10,r11,r12
+ ;//
+ ;// Output registers
+ ;// None. Function will preserve r0-r3
+
+ M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r6
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare inner loop registers
+ValA RN 5
+ValA0 RN 4
+ValA1 RN 5
+ValAF0 RN 4
+ValAF1 RN 5
+
+ValB RN 11
+
+ValC RN 5
+ValC0 RN 4
+ValC1 RN 5
+ValCD0 RN 12
+ValCD1 RN 14
+ValCF0 RN 4
+ValCF1 RN 5
+
+ValD RN 10
+
+ValE RN 7
+ValE0 RN 6
+ValE1 RN 7
+ValEB0 RN 10
+ValEB1 RN 11
+ValED0 RN 6
+ValED1 RN 7
+
+ValF RN 10
+
+ValG RN 14
+ValG0 RN 12
+ValG1 RN 14
+ValGB0 RN 12
+ValGB1 RN 14
+
+Acc0 RN 4
+Acc1 RN 5
+Acc2 RN 6
+Acc3 RN 7
+
+Temp RN 7
+Step RN 6
+
+pInterBuf RN 8
+Counter RN 8
+r0x00ff00ff RN 9 ;// [0 255 0 255] where 255 is offset
+r0x0001fc00 RN 10 ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+
+
+;// Declare inner loop registers
+ValCA RN 8
+ValDB RN 9
+ValGE RN 10
+ValHF RN 11
+r0x00140001 RN 12
+r0x0014fffb RN 14
+
+r0x00000200 RN 12
+r0x000000ff RN 12
+
+ M_STRD pDst, dstStep, ppDstArgs
+ MOV pDst, pInterBuf
+ MOV dstStep, #24
+
+ ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)]
+ MOV Counter, #1
+ MOV Temp, #8
+ ADD Counter, Temp, Counter, LSL #8 ;// [0 0 H W]
+
+ LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results
+WidthLoop
+ M_STR pSrc, ppSrc
+ M_STR pDst, ppDst
+HeightLoop
+TwoRowsLoop
+ M_LDR ValC, [pSrc], srcStep ;// Load [c3 c2 c1 c0]
+ M_LDR ValD, [pSrc], srcStep ;// Load [d3 d2 d1 d0]
+ M_LDR ValE, [pSrc], srcStep ;// Load [e3 e2 e1 e0]
+ SUB pSrc, pSrc, srcStep, LSL #2
+ UXTAB16 ValC0, r0x00ff00ff, ValC ;// [0 c2 0 c0] + [0 255 0 255]
+ UXTAB16 ValC1, r0x00ff00ff, ValC, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255]
+ LDR ValB, [pSrc] ;// Load [b3 b2 b1 b0]
+ UXTAB16 ValE0, r0x00ff00ff, ValE ;// [0 e2 0 e0] + [0 255 0 255]
+ UXTAB16 ValE1, r0x00ff00ff, ValE, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255]
+ UXTAB16 ValCD0, ValC0, ValD ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0]
+ UXTAB16 ValCD1, ValC1, ValD, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1]
+ UXTAB16 ValEB0, ValE0, ValB ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0]
+ RSB ValCD0, ValEB0, ValCD0, LSL #2 ;// 4*(Off+C+D) - (Off+B+E)
+
+ LDR ValD, [pSrc, srcStep, LSL #1] ;// Load [d3 d2 d1 d0]
+ UXTAB16 ValEB1, ValE1, ValB, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1]
+ RSB ValCD1, ValEB1, ValCD1, LSL #2
+
+ UXTAB16 ValED0, ValE0, ValD ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0]
+ UXTAB16 ValED1, ValE1, ValD, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1]
+ LDR ValF, [pSrc, srcStep, LSL #2] ;// Load [f3 f2 f1 f0]
+ M_LDR ValB, [pSrc], srcStep ;// Load [b3 b2 b1 b0]
+ ADD ValCD0, ValCD0, ValCD0, LSL #2 ;// 5 * [4*(Off+C+D) - (Off+B+E)]
+ ADD ValCD1, ValCD1, ValCD1, LSL #2
+ UXTAB16 ValCF1, ValC1, ValF, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1]
+ UXTAB16 ValCF0, ValC0, ValF ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0]
+ RSB ValED1, ValCF1, ValED1, LSL #2
+
+ SUB ValA, pSrc, srcStep, LSL #1
+ LDR ValA, [ValA] ;// Load [a3 a2 a1 a0]
+ RSB ValED0, ValCF0, ValED0, LSL #2 ;// 4*(Off+E+D) - (Off+C+F)
+ ADD ValED1, ValED1, ValED1, LSL #2
+ ADD ValED0, ValED0, ValED0, LSL #2 ;// 5 * [4*(Off+E+D) - (Off+C+F)]
+ UXTAB16 ValA0, r0x00ff00ff, ValA ;// [0 a2 0 a0] + [0 255 0 255]
+ UXTAB16 ValA1, r0x00ff00ff, ValA, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255]
+ UXTAB16 ValAF0, ValA0, ValF ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0]
+ UXTAB16 ValAF1, ValA1, ValF, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1]
+ ADD Acc1, ValCD1, ValAF1
+
+ LDR ValG, [pSrc, srcStep, LSL #2] ;// Load [g3 g2 g1 g0]
+ ADD Acc0, ValCD0, ValAF0 ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E)
+ STR Acc1, [pDst, #4] ;// Store result & adjust pointer
+ M_STR Acc0, [pDst], dstStep ;// Store result & adjust pointer
+ UXTAB16 ValG0, r0x00ff00ff, ValG ;// [0 g2 0 g0] + [0 255 0 255]
+ UXTAB16 ValG1, r0x00ff00ff, ValG, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255]
+ UXTAB16 ValGB0, ValG0, ValB ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0]
+ UXTAB16 ValGB1, ValG1, ValB, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1]
+ ADD Acc2, ValED0, ValGB0 ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F)
+ ADD Acc3, ValED1, ValGB1
+
+ STR Acc3, [pDst, #4] ;// Store result & adjust pointer
+ M_STR Acc2, [pDst], dstStep ;// Store result & adjust pointer
+
+ SUBS Counter, Counter, #1 << 8 ;// Loop till height is 10
+ ADD pSrc, pSrc, srcStep, LSL #1
+ BPL HeightLoop
+
+ M_LDR pSrc, ppSrc
+ M_LDR pDst, ppDst
+ ADDS Counter, Counter, #(1 << 8)-4 ;// Loop till width is 12
+ ADD pSrc, pSrc, #4
+ ADD pDst, pDst, #8
+ ADD Counter, Counter, #1<<8
+ BPL WidthLoop
+
+ ;//
+ ;// Horizontal interpolation using multiplication
+ ;//
+
+ SUB pSrc, pDst, #24
+ MOV srcStep, #24
+ M_LDRD pDst, dstStep, ppDstArgs
+
+ MOV Counter, #4
+ LDR r0x0014fffb, =0x0014fffb
+ LDR r0x00140001, =0x00140001
+
+HeightLoop1
+ M_STR Counter, pCounter
+
+
+ LDR ValCA, [pSrc], #4 ;// Load [0 c 0 a]
+ LDR ValDB, [pSrc], #4 ;// Load [0 d 0 b]
+ LDR ValGE, [pSrc], #4 ;// Load [0 g 0 e]
+ LDR ValHF, [pSrc], #4 ;// Load [0 h 0 f]
+
+ ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e))
+ ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f))
+ ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g))
+ ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h))
+ SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1]
+ SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1]
+ SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5]
+ SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5]
+
+ SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5]
+ SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1]
+ SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1]
+ SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5]
+
+ SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5]
+ SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5]
+ SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1]
+ SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1]
+
+ LDRH ValCA, [pSrc], #8 ;// 8 = srcStep - 16
+ SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1]
+ SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5]
+ SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5]
+ SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1]
+
+ LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512
+ SUB Acc0, Acc0, r0x0001fc00
+ SUB Acc1, Acc1, r0x0001fc00
+ SUB Acc2, Acc2, r0x0001fc00
+ SUB Acc3, Acc3, r0x0001fc00
+
+ USAT Acc0, #18, Acc0
+ USAT Acc1, #18, Acc1
+ USAT Acc2, #18, Acc2
+ USAT Acc3, #18, Acc3
+
+ MOV Acc0, Acc0, LSR #10
+ MOV Acc1, Acc1, LSR #10
+ MOV Acc2, Acc2, LSR #10
+ MOV Acc3, Acc3, LSR #10
+
+ M_LDR Counter, pCounter
+ ORR Acc0, Acc0, Acc1, LSL #8
+ ORR Acc2, Acc2, Acc3, LSL #8
+ SUBS Counter, Counter, #1
+ ORR Acc0, Acc0, Acc2, LSL #16
+ M_STR Acc0, [pDst], dstStep
+ BGT HeightLoop1
+End
+ SUB pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, srcStep, LSL #2
+
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
new file mode 100644
index 0000000..6690ced
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
@@ -0,0 +1,239 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+
+ IF ARM1136JS
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+;//
+;// Implements horizontal interpolation for a block of size 4x4. Input and output should
+;// be aligned.
+;//
+;// Registers used as input for this function
+;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size
+;//
+;// Registers preserved for top level function
+;// r0,r1,r2,r3,r4,r5,r6,r14
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// None. Function will preserve r0-r3
+
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare inner loop registers
+Acc0 RN 4
+Acc1 RN 5
+Acc2 RN 6
+Acc3 RN 7
+
+ValA RN 4
+ValB RN 5
+ValC RN 6
+ValD RN 7
+ValE RN 8
+ValF RN 9
+ValG RN 12
+ValH RN 14
+ValI RN 1
+
+Temp1 RN 3
+Temp2 RN 1
+Temp3 RN 12
+Temp4 RN 7
+Temp5 RN 5
+r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset
+Counter RN 11
+
+Height RN 3
+
+ M_ALLOC4 pDstStep, 4
+ M_ALLOC4 pSrcStep, 4
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6
+
+ MOV Counter, #2
+ M_STR dstStep, pDstStep
+ M_STR srcStep, pSrcStep
+ LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results
+
+NextTwoRowsLoop
+ LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1]
+ LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0]
+ LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1]
+ LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0]
+ LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1]
+ LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0]
+
+ PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0]
+ PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0]
+ UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255]
+ UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255]
+ PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0]
+ PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0]
+ PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0]
+ UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255]
+
+ ;// Calculate Acc0
+ ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
+ UXTAB16 Temp1, ValC, ValD, ROR #8
+ UXTAB16 Temp3, ValE, ValB, ROR #8
+ RSB Temp1, Temp3, Temp1, LSL #2
+ UXTAB16 Acc0, ValA, ValF, ROR #8
+ ADD Temp1, Temp1, Temp1, LSL #2
+ ADD Acc0, Acc0, Temp1
+
+ ;// Calculate Acc1
+ ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
+ UXTAB16 Temp1, ValE, ValD, ROR #8
+ UXTAB16 Temp3, ValC, ValF, ROR #8
+ RSB Temp1, Temp3, Temp1, LSL #2
+ UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255]
+ ADD Temp1, Temp1, Temp1, LSL #2
+ UXTAB16 Acc1, ValG, ValB, ROR #8
+ ADD Acc1, Acc1, Temp1
+
+ LDR r0x0fe00fe0, =0x0fe00fe0 ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255
+ UXTAB16 Acc2, ValC, ValH, ROR #8
+ ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255]
+ UQSUB16 Acc0, Acc0, r0x0fe00fe0
+ UQSUB16 Acc1, Acc1, r0x0fe00fe0
+ USAT16 Acc0, #13, Acc0
+ USAT16 Acc1, #13, Acc1
+
+ ;// Calculate Acc2
+ ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
+ UXTAB16 Temp1, ValG, ValD, ROR #8
+ UXTAB16 Acc3, ValI, ValD, ROR #8
+ UXTAB16 Temp2, ValE, ValF, ROR #8
+ AND Acc1, r0x00ff00ff, Acc1, LSR #5
+ AND Acc0, r0x00ff00ff, Acc0, LSR #5
+ ORR Acc0, Acc0, Acc1, LSL #8
+ RSB Temp5, Temp1, Temp2, LSL #2
+ UXTAB16 Temp2, ValG, ValF, ROR #8
+ ADD Temp5, Temp5, Temp5, LSL #2
+ ADD Acc2, Acc2, Temp5
+
+ ;// Calculate Acc3
+ ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
+ UXTAB16 Temp5, ValE, ValH, ROR #8
+ RSB Temp5, Temp5, Temp2, LSL #2
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ ADD Temp5, Temp5, Temp5, LSL #2
+ ADD Acc3, Acc3, Temp5
+
+ UQSUB16 Acc3, Acc3, r0x0fe00fe0
+ UQSUB16 Acc2, Acc2, r0x0fe00fe0
+ USAT16 Acc3, #13, Acc3
+ USAT16 Acc2, #13, Acc2
+
+ M_LDR dstStep, pDstStep
+ AND Acc3, r0x00ff00ff, Acc3, LSR #5
+ AND Acc2, r0x00ff00ff, Acc2, LSR #5
+ ORR Acc2, Acc2, Acc3, LSL #8
+
+ SUBS Counter, Counter, #1
+ M_LDR srcStep, pSrcStep
+ PKHBT Acc1, Acc0, Acc2, LSL #16
+ M_STR Acc1, [pDst], dstStep ;// Store result1
+ PKHTB Acc2, Acc2, Acc0, ASR #16
+ M_STR Acc2, [pDst], dstStep ;// Store result2
+ ADD pSrc, pSrc, srcStep, LSL #1
+
+ BGT NextTwoRowsLoop
+End
+ SUB pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, srcStep, LSL #2
+
+ M_END
+
+ ENDIF
+
+ END
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
new file mode 100644
index 0000000..007cd0d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
@@ -0,0 +1,185 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+
+
+
+ IF ARM1136JS
+
+ ;// Function header
+
+ ;// Function:
+ ;// armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ ;//
+ ;// Implements vertical interpolation for a block of size 4x4. Input and output should
+ ;// be aligned.
+ ;//
+ ;// Registers used as input for this function
+ ;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size
+ ;//
+ ;// Registers preserved for top level function
+ ;// r0,r1,r2,r3,r4,r5,r6,r14
+ ;//
+ ;// Registers modified by the function
+ ;// r7,r8,r9,r10,r11,r12
+ ;//
+ ;// Output registers
+ ;// None. Function will preserve r0-r3
+ M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r6
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare inner loop registers
+ValA RN 5
+ValA0 RN 4
+ValA1 RN 5
+ValAF0 RN 4
+ValAF1 RN 5
+
+ValB RN 11
+
+ValC RN 5
+ValC0 RN 4
+ValC1 RN 5
+ValCD0 RN 12
+ValCD1 RN 14
+ValCF0 RN 4
+ValCF1 RN 5
+
+ValD RN 10
+
+ValE RN 7
+ValE0 RN 6
+ValE1 RN 7
+ValEB0 RN 10
+ValEB1 RN 11
+ValED0 RN 6
+ValED1 RN 7
+
+ValF RN 10
+
+ValG RN 14
+ValG0 RN 12
+ValG1 RN 14
+ValGB0 RN 12
+ValGB1 RN 14
+
+Acc0 RN 4
+Acc1 RN 5
+Acc2 RN 6
+Acc3 RN 7
+
+Temp RN 7
+Height RN 3
+Step RN 6
+
+Counter RN 8
+r0x00ff00ff RN 9 ;// [0 255 0 255] where 255 is offset
+r0x0fe00fe0 RN 10 ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+
+
+ LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results
+ MOV Counter, #2
+
+TwoRowsLoop
+ M_LDR ValC, [pSrc], srcStep ;// Load [c3 c2 c1 c0]
+ M_LDR ValD, [pSrc], srcStep ;// Load [d3 d2 d1 d0]
+ M_LDR ValE, [pSrc], srcStep ;// Load [e3 e2 e1 e0]
+ SUB pSrc, pSrc, srcStep, LSL #2
+ LDR ValB, [pSrc] ;// Load [b3 b2 b1 b0]
+ UXTAB16 ValC0, r0x00ff00ff, ValC ;// [0 c2 0 c0] + [0 255 0 255]
+ UXTAB16 ValC1, r0x00ff00ff, ValC, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255]
+
+ UXTAB16 ValE0, r0x00ff00ff, ValE ;// [0 e2 0 e0] + [0 255 0 255]
+ UXTAB16 ValE1, r0x00ff00ff, ValE, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255]
+ UXTAB16 ValCD0, ValC0, ValD ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0]
+ UXTAB16 ValCD1, ValC1, ValD, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1]
+ UXTAB16 ValEB0, ValE0, ValB ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0]
+ RSB ValCD0, ValEB0, ValCD0, LSL #2 ;// 4*(Off+C+D) - (Off+B+E)
+
+ LDR ValD, [pSrc, srcStep, LSL #1] ;// Load [d3 d2 d1 d0]
+ UXTAB16 ValEB1, ValE1, ValB, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1]
+ RSB ValCD1, ValEB1, ValCD1, LSL #2
+ ;// One cycle stall
+ UXTAB16 ValED0, ValE0, ValD ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0]
+ UXTAB16 ValED1, ValE1, ValD, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1]
+
+ LDR ValF, [pSrc, srcStep, LSL #2] ;// Load [f3 f2 f1 f0]
+ M_LDR ValB, [pSrc], srcStep ;// Load [b3 b2 b1 b0]
+ ADD ValCD0, ValCD0, ValCD0, LSL #2 ;// 5 * [4*(Off+C+D) - (Off+B+E)]
+ ADD ValCD1, ValCD1, ValCD1, LSL #2
+ UXTAB16 ValCF1, ValC1, ValF, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1]
+ UXTAB16 ValCF0, ValC0, ValF ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0]
+ RSB ValED1, ValCF1, ValED1, LSL #2
+
+ SUB ValA, pSrc, srcStep, LSL #1
+ LDR ValA, [ValA] ;// Load [a3 a2 a1 a0]
+ RSB ValED0, ValCF0, ValED0, LSL #2 ;// 4*(Off+E+D) - (Off+C+F)
+ ADD ValED1, ValED1, ValED1, LSL #2
+ ADD ValED0, ValED0, ValED0, LSL #2 ;// 5 * [4*(Off+E+D) - (Off+C+F)]
+ UXTAB16 ValA0, r0x00ff00ff, ValA ;// [0 a2 0 a0] + [0 255 0 255]
+ UXTAB16 ValA1, r0x00ff00ff, ValA, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255]
+ UXTAB16 ValAF0, ValA0, ValF ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0]
+ UXTAB16 ValAF1, ValA1, ValF, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1]
+
+ LDR r0x0fe00fe0, =0x0fe00fe0 ;// [0 255 0 255] 255 is offset to avoid negative results
+ ADD Acc1, ValCD1, ValAF1
+
+ LDR ValG, [pSrc, srcStep, LSL #2] ;// Load [g3 g2 g1 g0]
+ ADD Acc0, ValCD0, ValAF0 ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E)
+ UQSUB16 Acc1, Acc1, r0x0fe00fe0 ;// Acc1 -= (16*Off - 16)
+ UQSUB16 Acc0, Acc0, r0x0fe00fe0
+ UXTAB16 ValG0, r0x00ff00ff, ValG ;// [0 g2 0 g0] + [0 255 0 255]
+ UXTAB16 ValG1, r0x00ff00ff, ValG, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255]
+ UXTAB16 ValGB0, ValG0, ValB ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0]
+ UXTAB16 ValGB1, ValG1, ValB, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1]
+ ADD Acc2, ValED0, ValGB0 ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F)
+ ADD Acc3, ValED1, ValGB1
+ UQSUB16 Acc3, Acc3, r0x0fe00fe0 ;// Acc3 -= (16*Off - 16)
+ UQSUB16 Acc2, Acc2, r0x0fe00fe0
+ USAT16 Acc1, #13, Acc1 ;// Saturate to 8+5 = 13 bits
+ USAT16 Acc0, #13, Acc0
+ USAT16 Acc3, #13, Acc3
+ USAT16 Acc2, #13, Acc2
+ AND Acc1, r0x00ff00ff, Acc1, LSR #5 ;// [0 a3 0 a1]
+ AND Acc0, r0x00ff00ff, Acc0, LSR #5 ;// [0 a2 0 a0]
+ ORR Acc0, Acc0, Acc1, LSL #8 ;// [a3 a2 a1 a0]
+ AND Acc3, r0x00ff00ff, Acc3, LSR #5 ;// [0 b3 0 b1]
+ AND Acc2, r0x00ff00ff, Acc2, LSR #5 ;// [0 b2 0 b0]
+
+ M_STR Acc0, [pDst], dstStep ;// Store result & adjust pointer
+ ORR Acc2, Acc2, Acc3, LSL #8 ;// [b3 b2 b1 b0]
+ M_STR Acc2, [pDst], dstStep ;// Store result & adjust pointer
+ ADD pSrc, pSrc, srcStep, LSL #1
+
+ SUBS Counter, Counter, #1
+ BGT TwoRowsLoop
+End
+ SUB pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, srcStep, LSL #2
+
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
new file mode 100644
index 0000000..b1ad17c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
@@ -0,0 +1,273 @@
+;//
+;//
+;// File Name: armVCM4P10_Interpolate_Chroma_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ IF ARM1136JS
+
+;// input registers
+
+pSrc RN 0
+iSrcStep RN 1
+pDst RN 2
+iDstStep RN 3
+iWidth RN 4
+iHeight RN 5
+dx RN 6
+dy RN 7
+
+
+;// local variable registers
+temp RN 11
+r0x20 RN 12
+tmp0x20 RN 14
+return RN 0
+dxPlusdy RN 10
+EightMinusdx RN 8
+EightMinusdy RN 9
+dxEightMinusdx RN 8
+BACoeff RN 6
+DCCoeff RN 7
+
+iDstStepx2MinusWidth RN 8
+iSrcStepx2MinusWidth RN 9
+iSrcStep1 RN 10
+
+pSrc1 RN 1
+pSrc2 RN 8
+pDst1 RN 8
+pDst2 RN 12
+
+pix00 RN 8
+pix01 RN 9
+pix10 RN 10
+pix11 RN 11
+
+Out0100 RN 8
+Out1110 RN 10
+
+x00 RN 8
+x01 RN 10
+x02 RN 12
+x10 RN 9
+x11 RN 11
+x12 RN 14
+x20 RN 10
+x21 RN 12
+x22 RN 14
+
+x01x00 RN 8
+x02x01 RN 10
+x11x10 RN 9
+x12x11 RN 11
+x21x20 RN 10
+x22x21 RN 12
+
+OutRow00 RN 12
+OutRow01 RN 14
+OutRow10 RN 10
+OutRow11 RN 12
+
+OutRow0100 RN 12
+OutRow1110 RN 12
+
+;//-----------------------------------------------------------------------------------------------
+;// armVCM4P10_Interpolate_Chroma_asm starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START armVCM4P10_Interpolate_Chroma, r11
+
+ ;// Define stack arguments
+ M_ARG Width, 4
+ M_ARG Height, 4
+ M_ARG Dx, 4
+ M_ARG Dy, 4
+
+ ;// Load argument from the stack
+ ;// M_STALL ARM1136JS=4
+
+ M_LDR iWidth, Width
+ M_LDR iHeight, Height
+ M_LDR dx, Dx
+ M_LDR dy, Dy
+
+ ;// EightMinusdx = 8 - dx
+ ;// EightMinusdy = 8 - dy
+
+ ;// ACoeff = EightMinusdx * EightMinusdy
+ ;// BCoeff = dx * EightMinusdy
+ ;// CCoeff = EightMinusdx * dy
+ ;// DCoeff = dx * dy
+
+ ADD pSrc1, pSrc, iSrcStep
+ SUB temp, iWidth, #1
+ RSB EightMinusdx, dx, #8
+ RSB EightMinusdy, dy, #8
+ CMN dx,dy
+ ADD dxEightMinusdx, EightMinusdx, dx, LSL #16
+ ORR iWidth, iWidth, temp, LSL #16
+
+ ;// Packed Coeffs.
+
+ MUL BACoeff, dxEightMinusdx, EightMinusdy
+ MUL DCCoeff, dxEightMinusdx, dy
+
+
+ ;// Checking either of dx and dy being non-zero
+
+ BEQ MVIsZero
+
+;// Pixel layout:
+;//
+;// x00 x01 x02
+;// x10 x11 x12
+;// x20 x21 x22
+
+;// If fractionl mv is not (0, 0)
+
+OuterLoopMVIsNotZero
+
+InnerLoopMVIsNotZero
+
+ LDRB x00, [pSrc, #+0]
+ LDRB x10, [pSrc1, #+0]
+ LDRB x01, [pSrc, #+1]
+ LDRB x11, [pSrc1, #+1]
+ LDRB x02, [pSrc, #+2]!
+ LDRB x12, [pSrc1, #+2]!
+
+ ORR x01x00, x00, x01, LSL #16
+ ;// M_STALL ARM1136JS=1
+ ORR x02x01, x01, x02, LSL #16
+ MOV r0x20, #32
+ ORR x11x10, x10, x11, LSL #16
+ ORR x12x11, x11, x12, LSL #16
+
+ SMLAD x01x00, x01x00, BACoeff, r0x20
+ SMLAD x02x01, x02x01, BACoeff, r0x20
+
+ ;// iWidth packed with MSB (top 16 bits)
+ ;// as inner loop counter value i.e
+ ;// (iWidth -1) and LSB (lower 16 bits)
+ ;// as original width
+
+ SUBS iWidth, iWidth, #1<<17
+
+ SMLAD OutRow00, x11x10, DCCoeff, x01x00
+ SMLAD OutRow01, x12x11, DCCoeff, x02x01
+
+ RSB pSrc2, pSrc, pSrc1, LSL #1
+
+ MOV OutRow00, OutRow00, LSR #6
+ MOV OutRow01, OutRow01, LSR #6
+
+ LDRB x20,[pSrc2, #-2]
+
+ ORR OutRow0100, OutRow00, OutRow01, LSL #8
+ STRH OutRow0100, [pDst], #2
+
+ LDRB x21,[pSrc2, #-1]
+ LDRB x22,[pSrc2, #+0]
+
+ ADD pDst1, pDst, iDstStep
+
+ ;// M_STALL ARM1136JS=1
+
+ ORR x21x20, x20, x21, LSL #16
+ ORR x22x21, x21, x22, LSL #16
+
+ MOV tmp0x20, #32
+
+ ;// Reusing the packed data x11x10 and x12x11
+
+ SMLAD x11x10, x11x10, BACoeff, tmp0x20
+ SMLAD x12x11, x12x11, BACoeff, tmp0x20
+ SMLAD OutRow10, x21x20, DCCoeff, x11x10
+ SMLAD OutRow11, x22x21, DCCoeff, x12x11
+
+ MOV OutRow10, OutRow10, LSR #6
+ MOV OutRow11, OutRow11, LSR #6
+
+ ;// M_STALL ARM1136JS=1
+
+ ORR OutRow1110, OutRow10, OutRow11, LSL #8
+
+ STRH OutRow1110, [pDst1, #-2]
+
+ BGT InnerLoopMVIsNotZero
+
+ SUBS iHeight, iHeight, #2
+ ADD iWidth, iWidth, #1<<16
+ RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
+ SUB iSrcStep1, pSrc1, pSrc
+ SUB temp, iWidth, #1
+ RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
+ ADD pDst, pDst, iDstStepx2MinusWidth
+ ADD pSrc1, pSrc1, iSrcStepx2MinusWidth
+ ADD pSrc, pSrc, iSrcStepx2MinusWidth
+ ORR iWidth, iWidth, temp, LSL #16
+ BGT OuterLoopMVIsNotZero
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+;// If fractionl mv is (0, 0)
+
+MVIsZero
+ ;// M_STALL ARM1136JS=4
+OuterLoopMVIsZero
+
+InnerLoopMVIsZero
+
+ LDRB pix00, [pSrc], #+1
+ LDRB pix01, [pSrc], #+1
+ LDRB pix10, [pSrc1], #+1
+ LDRB pix11, [pSrc1], #+1
+
+ ADD pDst2, pDst, iDstStep
+ SUBS iWidth, iWidth, #1<<17
+
+ ORR Out0100, pix00, pix01, LSL #8
+ ORR Out1110, pix10, pix11, LSL #8
+
+ STRH Out0100, [pDst], #2
+ STRH Out1110, [pDst2], #2
+
+ BGT InnerLoopMVIsZero
+
+ SUBS iHeight, iHeight, #2
+ ADD iWidth, iWidth, #1<<16
+ RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
+ SUB iSrcStep1, pSrc1, pSrc
+ SUB temp, iWidth, #1
+ RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
+ ADD pDst, pDst, iDstStepx2MinusWidth
+ ADD pSrc1, pSrc1, iSrcStepx2MinusWidth
+ ADD pSrc, pSrc, iSrcStepx2MinusWidth
+ ORR iWidth, iWidth, temp, LSL #16
+ BGT OuterLoopMVIsZero
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// ARM1136JS
+
+
+ END
+
+;//-----------------------------------------------------------------------------------------------
+;// armVCM4P10_Interpolate_Chroma_asm ends
+;//-----------------------------------------------------------------------------------------------
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s
new file mode 100644
index 0000000..f962f70
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s
@@ -0,0 +1,74 @@
+;//
+;//
+;// File Name: armVCM4P10_QuantTables_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;// Description:
+;// This file contains quantization tables
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+
+ EXPORT armVCM4P10_MFMatrixQPModTable
+ EXPORT armVCM4P10_QPDivIntraTable
+ EXPORT armVCM4P10_QPDivPlusOneTable
+
+;//--------------------------------------------------------------
+;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires,
+;// for values of iQP from 0 to 51 (inclusive).
+;//--------------------------------------------------------------
+
+ M_TABLE armVCM4P10_MFMatrixQPModTable
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+ DCW 13107, 11916, 10082, 9362, 8192, 7282
+
+;//---------------------------------------------------------------
+;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values,
+;// for values of iQP from 0 to 51 (inclusive).
+;//---------------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPDivPlusOneTable
+ DCB 16, 16, 16, 16, 16, 16
+ DCB 17, 17, 17, 17, 17, 17
+ DCB 18, 18, 18, 18, 18, 18
+ DCB 19, 19, 19, 19, 19, 19
+ DCB 20, 20, 20, 20, 20, 20
+ DCB 21, 21, 21, 21, 21, 21
+ DCB 22, 22, 22, 22, 22, 22
+ DCB 23, 23, 23, 23, 23, 23
+ DCB 24, 24, 24, 24, 24, 24
+
+;//------------------------------------------------------------------
+;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) ,
+;// for values of iQP from 0 to 51 (inclusive).
+;//------------------------------------------------------------------
+
+ M_TABLE armVCM4P10_QPDivIntraTable, 2
+ DCD 21845, 21845, 21845, 21845, 21845, 21845
+ DCD 43690, 43690, 43690, 43690, 43690, 43690
+ DCD 87381, 87381, 87381, 87381, 87381, 87381
+ DCD 174762, 174762, 174762, 174762, 174762, 174762
+ DCD 349525, 349525, 349525, 349525, 349525, 349525
+ DCD 699050, 699050, 699050, 699050, 699050, 699050
+ DCD 1398101, 1398101, 1398101, 1398101, 1398101, 1398101
+ DCD 2796202, 2796202, 2796202, 2796202, 2796202, 2796202
+ DCD 5592405, 5592405, 5592405, 5592405, 5592405, 5592405
+
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
new file mode 100644
index 0000000..241d188
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
@@ -0,0 +1,407 @@
+;//
+;//
+;// File Name: armVCM4P10_TransformResidual4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;// Transform Residual 4x4 Coefficients
+;//
+;//
+
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+;// Import symbols required from other files
+;// (For example tables)
+
+
+
+
+;// Set debugging level
+;//DEBUG_ON SETL {TRUE}
+
+
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+;//Input Registers
+pDst RN 0
+pSrc RN 1
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+
+;// Packed Input pixels
+in00 RN 2 ;// Src[0] & Src[1]
+in02 RN 3 ;// Src[2] & Src[3]
+in10 RN 4 ;// Src[4] & Src[5]
+in12 RN 5 ;// Src[6] & Src[7]
+in20 RN 6 ;// Src[8] & Src[9]
+in22 RN 7 ;// Src[10] & Src[11]
+in30 RN 8 ;// Src[12] & Src[13]
+in32 RN 9 ;// Src[14] & Src[15]
+
+;// Transpose for Row operations (Rows to cols)
+trRow00 RN 2
+trRow10 RN 10
+trRow02 RN 3
+trRow12 RN 5
+trRow20 RN 11
+trRow30 RN 12
+trRow32 RN 14
+trRow22 RN 7
+
+;// Intermediate calculations
+e0 RN 4
+e1 RN 6
+e2 RN 8
+e3 RN 9
+constZero RN 1
+
+;// Row operated pixels
+rowOp00 RN 2
+rowOp10 RN 10
+rowOp20 RN 11
+rowOp30 RN 12
+rowOp02 RN 3
+rowOp12 RN 5
+rowOp22 RN 7
+rowOp32 RN 14
+
+;// Transpose for colulmn operations
+trCol00 RN 2
+trCol02 RN 3
+trCol10 RN 4
+trCol12 RN 5
+trCol20 RN 6
+trCol22 RN 7
+trCol30 RN 8
+trCol32 RN 9
+
+;// Intermediate calculations
+g0 RN 10
+g1 RN 11
+g2 RN 12
+g3 RN 14
+
+;// Coloumn operated pixels
+colOp00 RN 2
+colOp02 RN 3
+colOp10 RN 4
+colOp12 RN 5
+colOp20 RN 6
+colOp22 RN 7
+colOp30 RN 8
+colOp32 RN 9
+
+
+temp1 RN 10 ;// Temporary scratch varaibles
+const1 RN 11
+const2 RN 12
+mask RN 14
+
+;// Output pixels
+out00 RN 2
+out02 RN 3
+out10 RN 4
+out12 RN 5
+out20 RN 6
+out22 RN 7
+out30 RN 8
+out32 RN 9
+
+
+
+ ;// Allocate stack memory required by the function
+
+
+ ;// Write function header
+ M_START armVCM4P10_TransformResidual4x4,r11
+
+ ;******************************************************************
+ ;// The strategy used in implementing the transform is as follows:*
+ ;// Load the 4x4 block into 8 registers *
+ ;// Transpose the 4x4 matrix *
+ ;// Perform the row operations (on columns) using SIMD *
+ ;// Transpose the 4x4 result matrix *
+ ;// Perform the coloumn operations *
+ ;// Store the 4x4 block at one go *
+ ;******************************************************************
+
+ ;// Load all the 4x4 pixels
+
+ LDMIA pSrc,{in00,in02,in10,in12,in20,in22,in30,in32}
+
+ MOV constZero,#0 ;// Used to right shift by 1
+ ;LDR constZero,=0x00000000
+
+ ;*****************************************************************
+ ;//
+ ;// Transpose the matrix inorder to perform row ops as coloumn ops
+ ;// Input: in[][] = original matrix
+ ;// Output: trRow[][]= transposed matrix
+ ;// Step1: Obtain the LL part of the transposed matrix
+ ;// Step2: Obtain the HL part
+ ;// step3: Obtain the LH part
+ ;// Step4: Obtain the HH part
+ ;//
+ ;*****************************************************************
+
+ ;// LL 2x2 transposed matrix
+ ;// d0 d1 - -
+ ;// d4 d5 - -
+ ;// - - - -
+ ;// - - - -
+
+ PKHTB trRow10,in10,in00,ASR #16 ;// [5 4] = [f5:f1]
+ PKHBT trRow00,in00,in10,LSL #16 ;// [1 0] = [f4:f0]
+
+ ;// HL 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// d8 d9 - -
+ ;// d12 d13 - -
+
+
+ PKHTB trRow30,in12,in02,ASR #16 ;// [13 12] = [7 3]
+ PKHBT trRow20,in02,in12,LSL #16 ;// [9 8] = [6 2]
+
+ ;// LH 2x2 transposed matrix
+ ;// - - d2 d3
+ ;// - - d6 d7
+ ;// - - - -
+ ;// - - - -
+
+ PKHBT trRow02,in20,in30,LSL #16 ;// [3 2] = [f12:f8]
+ PKHTB trRow12,in30,in20,ASR #16 ;// [7 6] = [f13:f9]
+
+
+
+
+ ;// HH 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// - - d10 d11
+ ;// - - d14 d15
+
+ PKHTB trRow32,in32,in22,ASR #16 ;// [15 14] = [15 11]
+ PKHBT trRow22,in22,in32,LSL #16 ;// [11 10] = [14 10]
+
+
+ ;****************************************
+ ;// Row Operations (Performed on columns)
+ ;****************************************
+
+
+ ;// SIMD operations on first two columns(two rows of the original matrix)
+
+
+ SADD16 e0, trRow00,trRow20 ;// e0 = d0 + d2
+ SSUB16 e1, trRow00,trRow20 ;// e1 = d0 - d2
+ SHADD16 e2, trRow10,constZero ;// (f1>>1) constZero is a register holding 0
+ SHADD16 e3, trRow30,constZero ;// avoid pipeline stalls for e2 and e3
+ SSUB16 e2, e2, trRow30 ;// e2 = (d1>>1) - d3
+ SADD16 e3, e3, trRow10 ;// e3 = d1 + (d3>>1)
+ SADD16 rowOp00, e0, e3 ;// f0 = e0 + e3
+ SADD16 rowOp10, e1, e2 ;// f1 = e1 + e2
+ SSUB16 rowOp20, e1, e2 ;// f2 = e1 - e2
+ SSUB16 rowOp30, e0, e3 ;// f3 = e0 - e3
+
+ ;// SIMD operations on next two columns(next two rows of the original matrix)
+
+ SADD16 e0, trRow02,trRow22
+ SSUB16 e1, trRow02,trRow22
+ SHADD16 e2, trRow12,constZero ;//(f1>>1) constZero is a register holding 0
+ SHADD16 e3, trRow32,constZero
+ SSUB16 e2, e2, trRow32
+ SADD16 e3, e3, trRow12
+ SADD16 rowOp02, e0, e3
+ SADD16 rowOp12, e1, e2
+ SSUB16 rowOp22, e1, e2
+ SSUB16 rowOp32, e0, e3
+
+
+ ;*****************************************************************
+ ;// Transpose the resultant matrix
+ ;// Input: rowOp[][]
+ ;// Output: trCol[][]
+ ;*****************************************************************
+
+ ;// LL 2x2 transposed matrix
+ ;// d0 d1 - -
+ ;// d4 d5 - -
+ ;// - - - -
+ ;// - - - -
+
+ PKHTB trCol10,rowOp10,rowOp00,ASR #16 ;// [5 4] = [f5:f1]
+ PKHBT trCol00,rowOp00,rowOp10,LSL #16 ;// [1 0] = [f4:f0]
+
+ ;// HL 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// d8 d9 - -
+ ;// d12 d13 - -
+
+
+ PKHTB trCol30,rowOp12,rowOp02,ASR #16 ;// [13 12] = [7 3]
+ PKHBT trCol20,rowOp02,rowOp12,LSL #16 ;// [9 8] = [6 2]
+
+ ;// LH 2x2 transposed matrix
+ ;// - - d2 d3
+ ;// - - d6 d7
+ ;// - - - -
+ ;// - - - -
+
+ PKHBT trCol02,rowOp20,rowOp30,LSL #16 ;// [3 2] = [f12:f8]
+ PKHTB trCol12,rowOp30,rowOp20,ASR #16 ;// [7 6] = [f13:f9]
+
+
+
+
+ ;// HH 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// - - d10 d11
+ ;// - - d14 d15
+
+ PKHTB trCol32,rowOp32,rowOp22,ASR #16 ;// [15 14] = [15 11]
+ PKHBT trCol22,rowOp22,rowOp32,LSL #16 ;// [11 10] = [14 10]
+
+
+ ;*******************************
+ ;// Coloumn Operations
+ ;*******************************
+
+
+ ;// SIMD operations on first two columns
+
+
+ SADD16 g0, trCol00,trCol20
+ SSUB16 g1, trCol00,trCol20
+ SHADD16 g2, trCol10,constZero ;// (f1>>1) constZero is a register holding 0
+ SHADD16 g3, trCol30,constZero
+ SSUB16 g2, g2, trCol30
+ SADD16 g3, g3, trCol10
+ SADD16 colOp00, g0, g3
+ SADD16 colOp10, g1, g2
+ SSUB16 colOp20, g1, g2
+ SSUB16 colOp30, g0, g3
+
+ ;// SIMD operations on next two columns
+
+ SADD16 g0, trCol02,trCol22
+ SSUB16 g1, trCol02,trCol22
+ SHADD16 g2, trCol12,constZero ;// (f1>>1) constZero is a register holding 0
+ SHADD16 g3, trCol32,constZero
+ SSUB16 g2, g2, trCol32
+ SADD16 g3, g3, trCol12
+ SADD16 colOp02, g0, g3
+ SADD16 colOp12, g1, g2
+ SSUB16 colOp22, g1, g2
+ SSUB16 colOp32, g0, g3
+
+
+
+
+
+ ;************************************************
+ ;// Calculate final value (colOp[i][j] + 32)>>6
+ ;************************************************
+
+ ;// const1: Serves dual purpose
+ ;// (1) Add #32 to both the lower and higher 16bits of the SIMD result
+ ;// (2) Convert the lower 16 bit value to an unsigned number (Add 32768)
+
+ LDR const1, =0x00208020
+
+ LDR mask, =0xffff03ff ;// Used to mask the down shifted 6 bits
+
+ ;// const2(#512): used to convert the lower 16bit number back to signed value
+
+ MOV const2,#0x200 ;// const2 = 2^9
+
+ ;// First Row
+
+ SADD16 colOp00, colOp00, const1
+ SADD16 colOp02, colOp02, const1
+ AND colOp00, mask, colOp00, ASR #6
+ AND colOp02, mask, colOp02, ASR #6
+ SSUB16 out00,colOp00,const2
+ SSUB16 out02,colOp02,const2
+
+
+ ;// Second Row
+
+ SADD16 colOp10, colOp10, const1
+ SADD16 colOp12, colOp12, const1
+ AND colOp10, mask, colOp10, ASR #6
+ AND colOp12, mask, colOp12, ASR #6
+ SSUB16 out10,colOp10,const2
+ SSUB16 out12,colOp12,const2
+
+
+ ;// Third Row
+
+ SADD16 colOp20, colOp20, const1
+ SADD16 colOp22, colOp22, const1
+ AND colOp20, mask, colOp20, ASR #6
+ AND colOp22, mask, colOp22, ASR #6
+ SSUB16 out20,colOp20,const2
+ SSUB16 out22,colOp22,const2
+
+
+ ;// Fourth Row
+
+ SADD16 colOp30, colOp30, const1
+ SADD16 colOp32, colOp32, const1
+ AND colOp30, mask, colOp30, ASR #6
+ AND colOp32, mask, colOp32, ASR #6
+ SSUB16 out30,colOp30,const2
+ SSUB16 out32,colOp32,const2
+
+
+
+
+ ;***************************
+ ;// Store all the 4x4 pixels
+ ;***************************
+
+ STMIA pDst,{out00,out02,out10,out12,out20,out22,out30,out32}
+
+
+
+ ;// Set return value
+
+End
+
+
+ ;// Write function tail
+ M_END
+
+ ENDIF ;//ARM1136JS
+
+
+
+
+
+
+
+;// Guarding implementation by the processor name
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
new file mode 100644
index 0000000..ad16d9c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s
@@ -0,0 +1,92 @@
+;//
+;//
+;// File Name: armVCM4P10_UnpackBlock4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+;//--------------------------------------
+;// Input Arguments and their scope/usage
+;//--------------------------------------
+ppSrc RN 0 ;// Persistent variable
+pDst RN 1 ;// Persistent variable
+
+;//--------------------------------
+;// Variables and their scope/usage
+;//--------------------------------
+pSrc RN 2 ;// Persistent variables
+Flag RN 3
+Value RN 4
+Value2 RN 5
+strOffset RN 6
+cstOffset RN 7
+
+
+ M_START armVCM4P10_UnpackBlock4x4, r7
+
+ LDR pSrc, [ppSrc] ;// Load pSrc
+ MOV cstOffset, #31 ;// To be used in the loop, to compute offset
+
+ ;//-----------------------------------------------------------------------
+ ; Firstly, fill all the coefficient values on the <pDst> buffer by zero
+ ;//-----------------------------------------------------------------------
+
+ MOV Value, #0 ;// Initialize the zero value
+ MOV Value2, #0 ;// Initialize the zero value
+ LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop>
+
+ STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0
+ STRD Value, [pDst, #8] ;// pDst[4] = pDst[5] = pDst[6] = pDst[7] = 0
+ STRD Value, [pDst, #16] ;// pDst[8] = pDst[9] = pDst[10] = pDst[11] = 0
+ STRD Value, [pDst, #24] ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0
+
+ ;//----------------------------------------------------------------------------
+ ;// The loop below parses and unpacks the input stream. The C-model has
+ ;// a somewhat complicated logic for sign extension. But in the v6 version,
+ ;// that can be easily taken care by loading the data from <pSrc> stream as
+ ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or
+ ;// 16-bits are read.
+ ;//
+ ;// Next, to compute the offset, where the unpacked value needs to be stored,
+ ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31]
+ ;// This results in a saving of one cycle.
+ ;//----------------------------------------------------------------------------
+
+unpackLoop
+ TST Flag, #0x10 ;// Computing (Flag & 0x10)
+ LDRSBNE Value2,[pSrc,#1] ;// Load byte wise to avoid unaligned access
+ LDRBNE Value, [pSrc], #2
+ AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1;
+ LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++
+ ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++
+
+ TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done
+ LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration
+ STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset>
+ BEQ unpackLoop ;// Branch to the loop beginning
+
+ STR pSrc, [ppSrc] ;// Update the bitstream pointer
+ M_END
+
+ ENDIF
+
+
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
new file mode 100644
index 0000000..c2e6b60
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c
@@ -0,0 +1,88 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DeblockChroma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 intra chroma deblock
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DeblockChroma_I
+ *
+ * Description:
+ * Performs deblocking filtering on all edges of the chroma macroblock (16x16).
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned.
+ * [in] srcdstStep Step of the arrays
+ * [in] pAlpha pointer to a 2x2 array of alpha thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external
+ * horizontal edge, internal horizontal edge }
+ * [in] pBeta pointer to a 2x2 array of beta thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external horizontal edge,
+ * internal horizontal edge }
+ * [in] pThresholds AArray of size 8x2 of Thresholds (TC0) (values for the left or
+ * above edge of each 4x2 or 2x4 block, arranged in vertical block order
+ * and then in horizontal block order)
+ * [in] pBS array of size 16x2 of BS parameters (arranged in scan block order for vertical edges and then horizontal edges);
+ * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned.
+ * [out] pSrcDst pointer to filtered output macroblock
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds, or pBS is NULL.
+ * - pSrcDst is not 8-byte aligned.
+ * - either pThresholds or pBS is not 4-byte aligned.
+ * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3.
+ * - srcdstStep is not a multiple of 8.
+ *
+ */
+OMXResult omxVCM4P10_DeblockChroma_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+)
+{
+ OMXResult errorCode;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+
+ errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I(
+ pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+ armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+
+ errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I(
+ pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16);
+
+ return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
new file mode 100644
index 0000000..6023862
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c
@@ -0,0 +1,91 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DeblockLuma_I.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 luma deblock
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+
+/**
+ * Function: omxVCM4P10_DeblockLuma_I
+ *
+ * Description:
+ * This function performs deblock filtering the horizontal and vertical edges of a luma macroblock
+ *(16x16).
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned.
+ * [in] srcdstStep image width
+ * [in] pAlpha pointer to a 2x2 table of alpha thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external horizontal
+ * edge, internal horizontal edge }
+ * [in] pBeta pointer to a 2x2 table of beta thresholds, organized as follows: { external
+ * vertical edge, internal vertical edge, external horizontal edge,
+ * internal horizontal edge }
+ * [in] pThresholds pointer to a 16x2 table of threshold (TC0), organized as follows: { values for
+ * the left or above edge of each 4x4 block, arranged in vertical block order
+ * and then in horizontal block order)
+ * [in] pBS pointer to a 16x2 table of BS parameters arranged in scan block order for vertical edges and then horizontal edges;
+ * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned.
+ * [out] pSrcDst pointer to filtered output macroblock.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds or pBS is NULL.
+ * - pSrcDst is not 8-byte aligned.
+ * - srcdstStep is not a multiple of 8
+ * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3.
+.
+ *
+ */
+
+OMXResult omxVCM4P10_DeblockLuma_I(
+ OMX_U8* pSrcDst,
+ OMX_S32 srcdstStep,
+ const OMX_U8* pAlpha,
+ const OMX_U8* pBeta,
+ const OMX_U8* pThresholds,
+ const OMX_U8 *pBS
+)
+{
+ OMXResult errorCode;
+
+ armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr);
+ armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr);
+ armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr);
+ armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr);
+
+ errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I(
+ pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS);
+
+ armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode)
+
+ errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I(
+ pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16);
+
+ return errorCode;
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
new file mode 100644
index 0000000..a19f277
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
@@ -0,0 +1,62 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 decode coefficients module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse raster scan for 2x2 block of
+ * ChromaDCLevel. The decoded coefficients in packed position-coefficient
+ * buffer are stored in increasing raster scan order, namely position order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream
+ * buffer
+ * [in] pOffset Pointer to current bit position in the byte
+ * pointed to by *ppBitStream
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients
+ * in this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ *
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8** ppPosCoefbuf
+ )
+
+{
+ return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+ ppPosCoefbuf, 17, 4);
+
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
new file mode 100644
index 0000000..99bb4ce
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c
@@ -0,0 +1,68 @@
+/* ----------------------------------------------------------------
+ *
+ *
+ * File Name: omxVCM4P10_DecodeCoeffsToPairCAVLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * H.264 decode coefficients module
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC
+ *
+ * Description:
+ * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of
+ * Intra16x16DCLevel, Intra16x16ACLevel,LumaLevel, and ChromaACLevel.
+ * Inverse field scan is not supported. The decoded coefficients in packed
+ * position-coefficient buffer are stored in increasing zigzag order instead
+ * of position order.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream Double pointer to current byte in bit stream buffer
+ * [in] pOffset Pointer to current bit position in the byte pointed
+ * to by *ppBitStream
+ * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current
+ * block
+ * [in] sVLCSelect VLC table selector, obtained from number of non-zero
+ * AC coefficients of above and left 4x4 blocks. It is
+ * equivalent to the variable nC described in H.264 standard
+ * table 9-5, except its value can¡¯t be less than zero.
+ * [out] ppBitStream *ppBitStream is updated after each block is decoded
+ * [out] pOffset *pOffset is updated after each block is decoded
+ * [out] pNumCoeff Pointer to the number of nonzero coefficients in
+ * this block
+ * [out] ppPosCoefbuf Double pointer to destination residual
+ * coefficient-position pair buffer
+ * Return Value:
+ * Standard omxError result. See enumeration for possible result codes.
+ *
+ */
+
+OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC(
+ const OMX_U8** ppBitStream,
+ OMX_S32* pOffset,
+ OMX_U8* pNumCoeff,
+ OMX_U8**ppPosCoefbuf,
+ OMX_INT sVLCSelect,
+ OMX_INT sMaxNumCoeff
+ )
+{
+ return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff,
+ ppPosCoefbuf, sVLCSelect, sMaxNumCoeff);
+}
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
new file mode 100644
index 0000000..2b71486
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s
@@ -0,0 +1,480 @@
+;//
+;// (c) Copyright 2007 ARM Limited. All Rights Reserved.
+;//
+;// Description:
+;// H.264 inverse quantize and transform module
+;//
+;//
+
+
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Import symbols required from other files
+;// (For example tables)
+
+ IMPORT armVCM4P10_UnpackBlock4x4
+ IMPORT armVCM4P10_TransformResidual4x4
+ IMPORT armVCM4P10_QPDivTable
+ IMPORT armVCM4P10_VMatrixU16
+ IMPORT armVCM4P10_QPModuloTable
+
+ M_VARIANTS ARM1136JS, ARM1136JS_U
+
+;// Set debugging level
+;//DEBUG_ON SETL {TRUE}
+
+
+;// Static Function: armVCM4P10_DequantLumaAC4x4
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+;//Input Registers
+pSrcDst RN 0
+QP RN 1
+
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+pQPdiv RN 4
+pQPmod RN 5
+pVRow RN 2
+QPmod RN 6
+shift RN 3
+rowLuma01 RN 1
+rowLuma23 RN 4
+
+SrcDst00 RN 5
+SrcDst02 RN 6
+SrcDst10 RN 7
+SrcDst12 RN 8
+SrcDst20 RN 9
+SrcDst22 RN 10
+SrcDst30 RN 11
+SrcDst32 RN 12
+
+temp1 RN 2
+temp2 RN 3
+temp3 RN 14
+
+
+ ;// Allocate stack memory required by the function
+
+ ;// Write function header
+ M_START armVCM4P10_DequantLumaAC4x4,r11
+
+ LDR pQPmod,=armVCM4P10_QPModuloTable
+ LDR pQPdiv,=armVCM4P10_QPDivTable
+ LDR pVRow,=armVCM4P10_VMatrixU16
+
+ LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6
+ LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6
+
+ LDRH rowLuma01,[pVRow,QPmod]! ;// rowLuma01 = [00|0a]
+ LDRH temp3,[pVRow,#2] ;// temp3 = [00|0b]
+ LDRH rowLuma23,[pVRow,#4] ;// rowLuma23 = [00|0c]
+ ORR rowLuma01,rowLuma01,temp3,LSL #16 ;// rowLuma01 = [0b|0a]
+
+ ;// Load all the 16 'src' values
+ LDMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+
+
+ ;//*********************************************************************************************
+ ;//
+ ;// 'Shift' ranges between [0,8]
+ ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation
+ ;//
+ ;//*********************************************************************************************
+
+ LSL rowLuma01,rowLuma01,shift
+ LSL rowLuma23,rowLuma23,shift
+
+
+ ;//**********************************************************************************************
+ ;//
+ ;// The idea is to unroll the Loop completely
+ ;// All the 16 src values are loaded at once into 8 registers : SrcDst<y><x> (above)
+ ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16'
+ ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2
+ ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above)
+ ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above)
+ ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated
+ ;// Here we interleave the PKHBT operations for various rows to avoide pipeline stalls
+ ;//
+ ;// We then pack the two 16 bit multiplication result into a word and store at one go
+ ;//
+ ;//**********************************************************************************************
+
+
+ ;// Row 1
+
+
+ SMULTB temp1,SrcDst00,rowLuma23 ;// pSrcDst[1] * (pVRow[2]<<Shift)
+ SMULBB SrcDst00,SrcDst00,rowLuma01 ;// pSrcDst[0] * (pVRow[0]<<Shift)
+
+ SMULTB temp2,SrcDst02,rowLuma23 ;// pSrcDst[3] * (pVRow[2]<<Shift)
+ SMULBB SrcDst02,SrcDst02,rowLuma01 ;// pSrcDst[2] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst00,SrcDst00,temp1,LSL #16 ;// Pack the first two product values
+
+
+ ;// Row 2
+ SMULTT temp1,SrcDst10,rowLuma01 ;// pSrcDst[5] * (pVRow[1]<<Shift)
+ SMULBB SrcDst10,SrcDst10,rowLuma23 ;// pSrcDst[4] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst02,SrcDst02,temp2,LSL #16 ;// Pack the next two product values
+ SMULTT temp2,SrcDst12,rowLuma01 ;// pSrcDst[7] * (pVRow[1]<<Shift)
+ SMULBB SrcDst12,SrcDst12,rowLuma23 ;// pSrcDst[6] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst10,SrcDst10,temp1,LSL #16 ;// Pack the next two product values
+
+
+ ;// Row 3
+
+ SMULTB temp1,SrcDst20,rowLuma23 ;// pSrcDst[9] * (pVRow[2]<<Shift)
+ SMULBB SrcDst20,SrcDst20,rowLuma01 ;// pSrcDst[8] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst12,SrcDst12,temp2,LSL #16 ;// Pack the next two product values
+ SMULTB temp2,SrcDst22,rowLuma23 ;// pSrcDst[11] * (pVRow[2]<<Shift)
+ SMULBB SrcDst22,SrcDst22,rowLuma01 ;// pSrcDst[10] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst20,SrcDst20,temp1,LSL #16 ;// Pack the next two product values
+
+
+
+ ;// Row 4
+
+ SMULTT temp1,SrcDst30,rowLuma01 ;// pSrcDst[13] * (pVRow[1]<<Shift)
+ SMULBB SrcDst30,SrcDst30,rowLuma23 ;// pSrcDst[12] * (pVRow[2]<<Shift)
+
+ SMULTT temp3,SrcDst32,rowLuma01 ;// pSrcDst[15] * (pVRow[1]<<Shift)
+ SMULBB SrcDst32,SrcDst32,rowLuma23 ;// pSrcDst[14] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst22,SrcDst22,temp2,LSL #16 ;// Pack the remaining product values
+ PKHBT SrcDst30,SrcDst30,temp1,LSL #16
+ PKHBT SrcDst32,SrcDst32,temp3,LSL #16
+
+
+ STMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+
+
+ ;// Set return value
+
+
+
+ ;// Write function tail
+ M_END
+
+ ENDIF ;//ARM1136JS
+
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS_U
+
+;//Input Registers
+pSrcDst RN 0
+QP RN 1
+
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+pQPdiv RN 4
+pQPmod RN 5
+pVRow RN 2
+QPmod RN 6
+shift RN 3
+rowLuma01 RN 1
+rowLuma23 RN 4
+
+SrcDst00 RN 5
+SrcDst02 RN 6
+SrcDst10 RN 7
+SrcDst12 RN 8
+SrcDst20 RN 9
+SrcDst22 RN 10
+SrcDst30 RN 11
+SrcDst32 RN 12
+
+temp1 RN 2
+temp2 RN 3
+temp3 RN 14
+
+
+ ;// Allocate stack memory required by the function
+
+ ;// Write function header
+ M_START armVCM4P10_DequantLumaAC4x4,r11
+
+ LDR pQPmod,=armVCM4P10_QPModuloTable
+ LDR pQPdiv,=armVCM4P10_QPDivTable
+ LDR pVRow,=armVCM4P10_VMatrixU16
+
+ LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6
+ LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6
+
+ LDR rowLuma01,[pVRow,QPmod]! ;// rowLuma01 = [0b|0a]
+ LDR rowLuma23,[pVRow,#4] ;// rowLuma23 = [0d|0c]
+
+ ;// Load all the 16 'src' values
+ LDMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+
+
+ ;//*********************************************************************************************
+ ;//
+ ;// 'Shift' ranges between [0,8]
+ ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation
+ ;//
+ ;//*********************************************************************************************
+
+ LSL rowLuma01,rowLuma01,shift
+ LSL rowLuma23,rowLuma23,shift
+
+
+ ;//**********************************************************************************************
+ ;//
+ ;// The idea is to unroll the Loop completely
+ ;// All the 16 src values are loaded at once into 8 registers : SrcDst<y><x> (above)
+ ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16'
+ ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2
+ ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above)
+ ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above)
+ ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated
+ ;// Here we interleave the PKHBT operations for various rows to avoide pipeline stalls
+ ;//
+ ;// We then pack the two 16 bit multiplication result into a word and store at one go
+ ;//
+ ;//**********************************************************************************************
+
+
+ ;// Row 1
+
+
+ SMULTB temp1,SrcDst00,rowLuma23 ;// pSrcDst[1] * (pVRow[2]<<Shift)
+ SMULBB SrcDst00,SrcDst00,rowLuma01 ;// pSrcDst[0] * (pVRow[0]<<Shift)
+
+ SMULTB temp2,SrcDst02,rowLuma23 ;// pSrcDst[3] * (pVRow[2]<<Shift)
+ SMULBB SrcDst02,SrcDst02,rowLuma01 ;// pSrcDst[2] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst00,SrcDst00,temp1,LSL #16 ;// Pack the first two product values
+
+
+ ;// Row 2
+ SMULTT temp1,SrcDst10,rowLuma01 ;// pSrcDst[5] * (pVRow[1]<<Shift)
+ SMULBB SrcDst10,SrcDst10,rowLuma23 ;// pSrcDst[4] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst02,SrcDst02,temp2,LSL #16 ;// Pack the next two product values
+ SMULTT temp2,SrcDst12,rowLuma01 ;// pSrcDst[7] * (pVRow[1]<<Shift)
+ SMULBB SrcDst12,SrcDst12,rowLuma23 ;// pSrcDst[6] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst10,SrcDst10,temp1,LSL #16 ;// Pack the next two product values
+
+
+ ;// Row 3
+
+ SMULTB temp1,SrcDst20,rowLuma23 ;// pSrcDst[9] * (pVRow[2]<<Shift)
+ SMULBB SrcDst20,SrcDst20,rowLuma01 ;// pSrcDst[8] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst12,SrcDst12,temp2,LSL #16 ;// Pack the next two product values
+ SMULTB temp2,SrcDst22,rowLuma23 ;// pSrcDst[11] * (pVRow[2]<<Shift)
+ SMULBB SrcDst22,SrcDst22,rowLuma01 ;// pSrcDst[10] * (pVRow[0]<<Shift)
+
+ PKHBT SrcDst20,SrcDst20,temp1,LSL #16 ;// Pack the next two product values
+
+
+
+ ;// Row 4
+
+ SMULTT temp1,SrcDst30,rowLuma01 ;// pSrcDst[13] * (pVRow[1]<<Shift)
+ SMULBB SrcDst30,SrcDst30,rowLuma23 ;// pSrcDst[12] * (pVRow[2]<<Shift)
+
+ SMULTT temp3,SrcDst32,rowLuma01 ;// pSrcDst[15] * (pVRow[1]<<Shift)
+ SMULBB SrcDst32,SrcDst32,rowLuma23 ;// pSrcDst[14] * (pVRow[2]<<Shift)
+
+ PKHBT SrcDst22,SrcDst22,temp2,LSL #16 ;// Pack the remaining product values
+ PKHBT SrcDst30,SrcDst30,temp1,LSL #16
+ PKHBT SrcDst32,SrcDst32,temp3,LSL #16
+
+
+ STMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32}
+
+
+ ;// Set return value
+
+
+
+ ;// Write function tail
+ M_END
+
+ ENDIF ;//ARM1136JS_U
+
+
+
+
+
+;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+;//Input Registers
+ppSrc RN 0
+pPred RN 1
+pDC RN 2
+pDst RN 3
+
+
+;//Output Registers
+result RN 0
+
+;//Local Scratch Registers
+pDelta RN 4
+pDeltaTmp RN 6
+AC RN 5 ;//Load from stack
+pPredTemp RN 7
+pDCTemp RN 8
+pDstTemp RN 9
+pDeltaArg1 RN 1
+pDeltaArg0 RN 0
+QP RN 1 ;//Load from stack
+DCval RN 10
+DCvalCopy RN 11
+predstep RN 1
+dstStep RN 10
+ycounter RN 0
+PredVal1 RN 3
+PredVal2 RN 5
+DeltaVal1 RN 2
+DeltaVal2 RN 11
+PredVal RN 8
+tmpDeltaVal RN 6
+sum1 RN 12
+sum2 RN 14
+
+
+
+ ;// Allocate stack memory required by the function
+ M_ALLOC8 pBuffer, 32
+
+
+ ;// Write function header
+ M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11
+
+ ;// Define stack arguments
+ M_ARG predStepOnStack, 4
+ M_ARG dstStepOnStack,4
+ M_ARG QPOnStack, 4
+ M_ARG ACOnStack,4
+
+
+ M_ADR pDelta,pBuffer
+ M_LDR AC,ACOnStack
+
+
+ ;// Save registers r1,r2,r3 before function call
+ MOV pPredTemp,pPred
+ MOV pDCTemp,pDC
+ MOV pDstTemp,pDst
+
+ CMP AC,#0
+ BEQ DCcase
+ MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_UnpackBlock4x4
+
+ BL armVCM4P10_UnpackBlock4x4
+
+ M_LDR QP,QPOnStack ;// Set up r1 for DequantLumaAC4x4
+ MOV pDeltaArg0,pDelta ;// Set up r0 for DequantLumaAC4x4
+
+ BL armVCM4P10_DequantLumaAC4x4
+
+
+ CMP pDCTemp,#0
+ LDRSHNE DCval,[pDCTemp]
+ MOV pDeltaArg0,pDelta ;// Set up r0 for armVCM4P10_TransformResidual4x4
+ MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_TransformResidual4x4
+ STRHNE DCval,[pDelta]
+
+ BL armVCM4P10_TransformResidual4x4
+ B OutDCcase
+
+
+DCcase
+ LDRSH DCval,[pDCTemp]
+ ADD DCval,DCval,#32
+ ASR DCval,DCval,#6
+ PKHBT DCval,DCval,DCval,LSL #16 ;// Duplicating the Lower halfword
+ MOV DCvalCopy, DCval ;// Needed for STRD
+ STRD DCval, [pDelta, #0] ;// pDelta[0] = pDelta[1] = pDelta[2] = pDelta[3] = DCval
+ STRD DCval, [pDelta, #8] ;// pDelta[4] = pDelta[5] = pDelta[6] = pDelta[7] = DCval
+ STRD DCval, [pDelta, #16] ;// pDelta[8] = pDelta[9] = pDelta[10] = pDelta[11] = DCval
+ STRD DCval, [pDelta, #24]
+
+
+OutDCcase
+ M_LDR predstep,predStepOnStack
+ M_LDR dstStep,dstStepOnStack
+
+ LDMIA pDelta!,{tmpDeltaVal,DeltaVal2} ;// Pre load
+ MOV ycounter,#4 ;// Counter for the PredPlusDeltaLoop
+ LDR PredVal,[pPredTemp] ;// Pre load
+
+PredPlusDeltaLoop
+
+
+ SUBS ycounter,ycounter,#1
+ ADD pPredTemp,pPredTemp,predstep ;// Increment pPred ptr
+
+ PKHBT DeltaVal1,tmpDeltaVal,DeltaVal2,LSL #16 ;// Deltaval1 = [C A]
+ PKHTB DeltaVal2,DeltaVal2,tmpDeltaVal,ASR #16 ;// DeltaVal2 = [D B]
+
+ UXTB16 PredVal1,PredVal ;// PredVal1 = [0c0a]
+ UXTB16 PredVal2,PredVal,ROR #8 ;// PredVal2 = [0d0b]
+
+ LDRGT PredVal,[pPredTemp] ;// Pre load
+
+ QADD16 sum2,DeltaVal2,PredVal2 ;// Add and saturate to 16 bits
+ QADD16 sum1,DeltaVal1,PredVal1
+
+ USAT16 sum2,#8,sum2 ;// armClip(0,255,sum2)
+ USAT16 sum1,#8,sum1
+
+ LDMGTIA pDelta!,{tmpDeltaVal,DeltaVal2} ;// Pre load
+
+ ORR sum1,sum1,sum2,LSL #8 ;// sum1 = [dcba]
+ STR sum1,[pDstTemp]
+
+ ADD pDstTemp,pDstTemp,dstStep ;// Increment pDst ptr
+ BGT PredPlusDeltaLoop
+
+
+ ;// Set return value
+ MOV result,#OMX_Sts_NoErr
+
+End
+
+
+ ;// Write function tail
+
+ M_END
+
+ ENDIF ;//ARM1136JS
+
+
+;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd
+
+;// Guarding implementation by the processor name
+
+
+
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
new file mode 100644
index 0000000..6d960f0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
@@ -0,0 +1,336 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+MASK_0 EQU 0x00000000
+MASK_1 EQU 0x01010101
+LOOP_COUNT EQU 0x50000000
+
+;// Declare input registers
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlphaArg RN 2
+pBetaArg RN 3
+
+pThresholds RN 6
+pBS RN 9
+pQ0 RN 0
+bS RN 10
+
+alpha RN 6
+alpha0 RN 6
+alpha1 RN 8
+
+beta RN 7
+beta0 RN 7
+beta1 RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0 RN 3
+p_1 RN 5
+q_0 RN 8
+q_1 RN 9
+
+;// Filtering
+
+dp0q0 RN 12
+dp1p0 RN 12
+dq1q0 RN 12
+
+ap0q0 RN 4
+filt RN 2
+
+m00 RN 14
+m01 RN 11
+
+pQ0 RN 0
+Step RN 1
+
+;// Output
+
+P_0 RN 6
+Q_0 RN 7
+
+;//Declarations for bSLT4 kernel
+
+tC RN 12
+tC0 RN 5
+tC1 RN 12
+pos RN 5
+neg RN 9
+
+;//Declarations for bSGE4 kernel
+
+
+;// Miscellanous
+XY RN 8
+
+a RN 10
+t1 RN 10
+t2 RN 12
+t3 RN 14
+t4 RN 6
+t5 RN 5
+
+
+ ;// Allocate stack memory
+ M_ALLOC4 ppThresholds,4
+ M_ALLOC8 pAlphaBeta0,8
+ M_ALLOC8 pAlphaBeta1,8
+ M_ALLOC8 pXYBS,4
+ M_ALLOC4 ppBS,4
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r11
+
+ ;//Input arguments on the stack
+ M_ARG ppThresholdsArg, 4
+ M_ARG ppBSArg, 4
+
+ LDRB alpha1, [pAlphaArg,#1]
+ LDRB beta1, [pBetaArg,#1]
+ M_LDR pThresholds, ppThresholdsArg
+ LDR a,=MASK_1
+ LDRB beta0, [pBetaArg]
+ M_STR pThresholds, ppThresholds
+ LDRB alpha0, [pAlphaArg]
+
+ MUL alpha1, alpha1, a
+ MUL beta1, beta1, a
+ MUL alpha0, alpha0, a
+ MUL beta0, beta0, a
+
+ M_STRD alpha1, beta1, pAlphaBeta1
+ M_LDR pBS, ppBSArg
+ M_STRD alpha0, beta0, pAlphaBeta0
+
+ LDR XY,=LOOP_COUNT
+ M_STRD XY, pBS, pXYBS
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+LoopY
+LoopX
+;//---------------Load Pixels-------------------
+ LDRH bS, [pBS], #2
+
+ M_STR pBS, ppBS
+ M_LDR p_1, [pQ0],srcdstStep
+
+ CMP bS, #0
+
+ M_LDR p_0, [pQ0],srcdstStep
+ M_LDR q_0, [pQ0],srcdstStep
+ M_LDR q_1, [pQ0]
+ LDR m01, =MASK_1 ;// 01010101 mask
+ BEQ NoFilterBS0
+
+
+ ;// p_0 = [r3p0 r2p0 r1p0 r0p0]
+ ;// p_1 = [r3p1 r2p1 r1p1 r0p1]
+ ;// q_0 = [r3q0 r2q0 r1q0 r0q0]
+ ;// q_1 = [r3q1 r2q1 r1q1 r0q1]
+
+;//--------------Filtering Decision -------------------
+ MOV m00, #MASK_0 ;// 00000000 mask
+
+ MOV filt, m01
+ TST bS, #0xff00
+ MOVEQ filt, filt, LSR #16
+ TST bS, #0xff
+ MOVEQ filt, filt, LSL #16
+ TST bS, #4
+
+
+ ;// Check |p0-q0|<Alpha
+ USUB8 dp0q0, p_0, q_0
+ USUB8 a, q_0, p_0
+ SEL ap0q0, a, dp0q0
+ USUB8 a, ap0q0, alpha
+ SEL filt, m00, filt
+
+ ;// Check |p1-p0|<Beta
+ USUB8 dp1p0, p_1, p_0
+ USUB8 a, p_0, p_1
+ SEL a, a, dp1p0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check |q1-q0|<Beta
+ USUB8 dq1q0, q_1, q_0
+ USUB8 a, q_0, q_1
+ SEL a, a, dq1q0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ BEQ bSLT4
+;//-------------------Filter--------------------
+bSGE4
+ ;//---------bSGE4 Execution---------------
+ CMP filt, #0
+
+ M_LDR pThresholds, ppThresholds
+
+ ;// Compute P0b
+ UHADD8 t1, p_0, q_1
+ BEQ NoFilterFilt0
+ MVN t2, p_1
+ UHSUB8 t1, t1, t2
+ USUB8 t2, filt, m01
+ EOR t1, t1, m01, LSL #7
+
+ ADD pThresholds,pThresholds, #2
+
+ ;// Compute Q0b
+ UHADD8 t2, q_0, p_1
+ MVN t3, q_1
+ UHSUB8 t2, t2, t3
+ M_STR pThresholds, ppThresholds
+ SEL P_0, t1, p_0
+ EOR t2, t2, m01, LSL #7
+ SEL Q_0, t2, q_0
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ B StoreResultAndExit
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterFilt0
+NoFilterBS0
+ M_LDR pThresholds, ppThresholds
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ SUB pQ0, pQ0, srcdstStep
+ ADD pQ0, pQ0, #4
+ ADD pThresholds, pThresholds, #2
+
+ ;// Load counter for LoopX
+ M_LDRD XY, pBS, pXYBS
+ M_STR pThresholds, ppThresholds
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ ;// Align the pointer
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopY
+ B ExitLoopY
+
+bSLT4
+ ;//---------bSLT4 Execution---------------
+ M_LDR pThresholds, ppThresholds
+ CMP filt, #0
+
+ ;// Since beta <= 18 and alpha <= 255 we know
+ ;// -254 <= p0-q0 <= 254
+ ;// -17 <= q1-q0 <= 17
+ ;// -17 <= p1-p0 <= 17
+
+ ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
+ ;//
+ ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
+ ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
+ ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
+
+ USUB8 t1, p_1, p_0
+ USUB8 t2, q_1, q_0
+ BEQ NoFilterFilt0
+
+ LDRB tC0, [pThresholds],#1
+ SSUB8 t1, t1, t2
+ LDRB tC1, [pThresholds],#1
+ M_STR pThresholds, ppThresholds
+ UHSUB8 t4, p_0, q_0
+ ORR tC, tC0, tC1, LSL #16
+ USUB8 t5, p_0, q_0
+ AND t5, t5, m01
+ SHSUB8 t1, t1, t5
+ ORR tC, tC, LSL #8
+ SSUB8 t1, t1, t5
+ SHSUB8 t1, t1, t4
+ UQADD8 tC, tC, m01
+ SADD8 t1, t1, m01
+ USUB8 t5, filt, m01
+ SHSUB8 t1, t1, t4
+ SEL tC, tC, m00
+
+ ;// Split into positive and negative part and clip
+
+ SSUB8 t1, t1, m00
+ SEL pos, t1, m00
+ USUB8 neg, pos, t1
+ USUB8 t3, pos, tC
+ SEL pos, tC, pos
+ USUB8 t3, neg, tC
+ SEL neg, tC, neg
+ UQADD8 P_0, p_0, pos
+ UQSUB8 Q_0, q_0, pos
+ UQSUB8 P_0, P_0, neg
+ UQADD8 Q_0, Q_0, neg
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+
+ ;// Choose to store the filtered
+ ;// value or the original pixel
+ USUB8 t1, filt, m01
+ SEL P_0, P_0, p_0
+ SEL Q_0, Q_0, q_0
+
+StoreResultAndExit
+
+ ;//---------Store result---------------
+
+ ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
+ ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
+
+ M_STR P_0, [pQ0], srcdstStep
+ STR Q_0, [pQ0], #4
+
+ M_LDRD XY, pBS, pXYBS
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopX
+
+;//-------- Common Exit of LoopY -----------------
+ ;// Align the pointers
+
+ExitLoopY
+ ADD pBS, pBS, #4
+ M_LDRD alpha, beta, pAlphaBeta1
+ SUB pQ0, pQ0, #8
+ ADD pQ0, pQ0, srcdstStep, LSL #2
+ M_STRD alpha, beta, pAlphaBeta0
+
+ BNE LoopY
+ MOV r0, #OMX_Sts_NoErr
+
+;//-----------------End Filter--------------------
+ M_END
+
+ ENDIF
+
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
new file mode 100644
index 0000000..00c8354
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
@@ -0,0 +1,437 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+
+MASK_0 EQU 0x00000000
+MASK_1 EQU 0x01010101
+MASK_2 EQU 0x0000ff00
+LOOP_COUNT EQU 0x50000000
+
+;// Declare input registers
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlphaArg RN 2
+pBetaArg RN 3
+
+pThresholds RN 6
+pBS RN 9
+pQ0 RN 0
+bS RN 2
+bSTemp RN 10
+
+alpha RN 6
+alpha0 RN 6
+alpha1 RN 8
+
+beta RN 7
+beta0 RN 7
+beta1 RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0 RN 3
+p_1 RN 5
+q_0 RN 8
+q_1 RN 9
+
+;// Unpacking
+mask RN 11
+
+row0 RN 2
+row1 RN 4
+row2 RN 5
+row3 RN 3
+
+row4 RN 8
+row5 RN 9
+row6 RN 10
+row7 RN 12
+
+tunpk0 RN 2
+tunpk2 RN 10
+tunpk3 RN 12
+
+tunpk4 RN 4
+tunpk5 RN 5
+tunpk6 RN 14
+tunpk7 RN 2
+
+;// Filtering
+
+dp0q0 RN 12
+dp1p0 RN 12
+dq1q0 RN 12
+
+ap0q0 RN 4
+filt RN 2
+
+m00 RN 14
+m01 RN 11
+
+pQ0 RN 0
+Step RN 1
+
+;// Output
+
+P_0 RN 6
+Q_0 RN 7
+
+;//Declarations for bSLT4 kernel
+
+tC RN 12
+tC0 RN 5
+tC1 RN 12
+pos RN 5
+neg RN 9
+
+;//Declarations for bSGE4 kernel
+
+
+;// Miscellanous
+XY RN 8
+
+a RN 10
+t1 RN 10
+t2 RN 12
+t3 RN 14
+t4 RN 6
+t5 RN 5
+
+
+ ;// Allocate stack memory
+ M_ALLOC4 ppThresholds,4
+ M_ALLOC8 pAlphaBeta0,8
+ M_ALLOC8 pAlphaBeta1,8
+ M_ALLOC8 pXYBS,4
+ M_ALLOC4 ppBS,4
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r11
+
+ ;//Input arguments on the stack
+ M_ARG ppThresholdsArg, 4
+ M_ARG ppBSArg, 4
+
+ LDRB alpha1, [pAlphaArg,#1]
+ LDRB beta1, [pBetaArg,#1]
+ M_LDR pThresholds, ppThresholdsArg
+ LDR a,=MASK_1
+ LDRB beta0, [pBetaArg]
+ M_STR pThresholds, ppThresholds
+ LDRB alpha0, [pAlphaArg]
+
+ MUL alpha1, alpha1, a
+ MUL beta1, beta1, a
+ MUL alpha0, alpha0, a
+ MUL beta0, beta0, a
+
+ M_STRD alpha1, beta1, pAlphaBeta1
+ M_LDR pBS, ppBSArg
+ M_STRD alpha0, beta0, pAlphaBeta0
+
+ LDR XY,=LOOP_COUNT
+ M_STRD XY, pBS, pXYBS
+
+
+LoopY
+LoopX
+;//---------------Load Pixels-------------------
+
+;//----------------Pack q0-q1-----------------------
+ LDRH bS, [pBS], #8
+ LDR mask, =MASK_2
+
+ M_LDRH row4, [pQ0], srcdstStep
+ CMP bS, #0
+ M_STR pBS, ppBS
+ M_LDRH row5, [pQ0], srcdstStep
+ BEQ.W NoFilterBS0
+ LDRH row6, [pQ0]
+ LDRH row7, [pQ0, srcdstStep]
+
+ ;// row4 = [0 0 r0q0 r0q1]
+ ;// row5 = [0 0 r1q0 r1q1]
+ ;// row6 = [0 0 r2q0 r2q1]
+ ;// row7 = [0 0 r3q0 r3q1]
+
+ AND tunpk4, mask, row4
+ AND tunpk5, mask, row4, LSL#8
+ UXTAB tunpk4, tunpk4, row5, ROR#8
+ UXTAB tunpk5, tunpk5, row5
+ AND tunpk6, mask, row6
+ AND tunpk7, mask, row6, LSL#8
+ UXTAB tunpk6, tunpk6, row7, ROR#8
+ UXTAB tunpk7, tunpk7, row7
+
+ ;// tunpk4 = [0 0 r0q0 r1q0]
+ ;// tunpk5 = [0 0 r0q1 r1q1]
+ ;// tunpk6 = [0 0 r2q0 r3q0]
+ ;// tunpk7 = [0 0 r2q1 r3q1]
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ SUB pQ0, pQ0, #2
+
+ PKHBT q_1, tunpk6, tunpk4, LSL#16
+ PKHBT q_0, tunpk7, tunpk5, LSL#16
+
+ ;// q_0 = [r0q0 r1q0 r2q0 r3q0]
+ ;// q_1 = [r0q1 r1q1 r2q1 r3q1]
+
+
+;//----------------Pack p0-p1-----------------------
+
+ M_LDRH row0, [pQ0], srcdstStep
+ M_LDRH row1, [pQ0], srcdstStep
+ LDRH row2, [pQ0]
+ LDRH row3, [pQ0, srcdstStep]
+
+ ;// row0 = [0 0 r0p0 r0p1]
+ ;// row1 = [0 0 r1p0 r1p1]
+ ;// row2 = [0 0 r2p0 r2p1]
+ ;// row3 = [0 0 r3p0 r3p1]
+
+ AND tunpk2, mask, row0
+ AND tunpk6, mask, row0, LSL#8
+ UXTAB tunpk2, tunpk2, row1, ROR#8
+ UXTAB tunpk6, tunpk6, row1
+
+ AND tunpk0, mask, row2
+ AND tunpk3, mask, row2, LSL#8
+ UXTAB tunpk0, tunpk0, row3, ROR#8
+ UXTAB tunpk3, tunpk3, row3
+
+ ;// tunpk2 = [0 0 r0p0 r1p0]
+ ;// tunpk6 = [0 0 r0p1 r1p1]
+ ;// tunpk0 = [0 0 r2p0 r3p0]
+ ;// tunpk3 = [0 0 r2p1 r3p1]
+
+ PKHBT p_0, tunpk0, tunpk2, LSL#16
+ M_LDR bSTemp, ppBS
+ PKHBT p_1, tunpk3, tunpk6, LSL#16
+
+ ;// p_0 = [r0p0 r1p0 r2p0 r3p0]
+ ;// p_1 = [r0p1 r1p1 r2p1 r3p1]
+
+;//--------------Filtering Decision -------------------
+ USUB8 dp0q0, p_0, q_0
+ LDR m01, =MASK_1
+ LDRH bSTemp, [bSTemp ,#-8]
+ MOV m00, #MASK_0 ;// 00000000 mask
+
+ MOV filt, m01
+ TST bSTemp, #0xff00
+ MOVEQ filt, filt, LSL #16
+ TST bSTemp, #0xff
+ MOVEQ filt, filt, LSR #16
+ TST bSTemp, #4
+
+ ;// Check |p0-q0|<Alpha
+ USUB8 a, q_0, p_0
+ SEL ap0q0, a, dp0q0
+ USUB8 a, ap0q0, alpha
+ SEL filt, m00, filt
+
+ ;// Check |p1-p0|<Beta
+ USUB8 dp1p0, p_1, p_0
+ USUB8 a, p_0, p_1
+ SEL a, a, dp1p0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check |q1-q0|<Beta
+ USUB8 dq1q0, q_1, q_0
+ USUB8 a, q_0, q_1
+ SEL a, a, dq1q0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ BEQ bSLT4
+;//-------------------Filter--------------------
+bSGE4
+ ;//---------bSGE4 Execution---------------
+ CMP filt, #0
+
+ M_LDR pThresholds, ppThresholds
+
+ ;// Compute P0b
+ UHADD8 t1, p_0, q_1
+ BEQ NoFilterFilt0
+ MVN t2, p_1
+ UHSUB8 t1, t1, t2
+ USUB8 t2, filt, m01
+ EOR t1, t1, m01, LSL #7
+
+ ADD pThresholds,pThresholds, #4
+
+ ;// Compute Q0b
+ UHADD8 t2, q_0, p_1
+ MVN t3, q_1
+ UHSUB8 t2, t2, t3
+ M_STR pThresholds, ppThresholds
+ SEL P_0, t1, p_0
+ EOR t2, t2, m01, LSL #7
+ SEL Q_0, t2, q_0
+
+ B StoreResultAndExit
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterFilt0
+ ADD pQ0, pQ0, #2
+NoFilterBS0
+ M_LDR pThresholds, ppThresholds
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ ADD pQ0, pQ0, #4
+ ADD pThresholds, pThresholds, #4
+ ;// Load counter for LoopX
+ M_LDRD XY, pBS, pXYBS
+ M_STR pThresholds, ppThresholds
+ M_LDRD alpha, beta, pAlphaBeta1
+
+ ;// Align the pointer
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopY
+ B ExitLoopY
+
+bSLT4
+ ;//---------bSLT4 Execution---------------
+ M_LDR pThresholds, ppThresholds
+ CMP filt, #0
+
+
+ ;// Since beta <= 18 and alpha <= 255 we know
+ ;// -254 <= p0-q0 <= 254
+ ;// -17 <= q1-q0 <= 17
+ ;// -17 <= p1-p0 <= 17
+
+ ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3))
+ ;//
+ ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3
+ ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3
+ ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3
+
+ USUB8 t1, p_1, p_0
+ USUB8 t2, q_1, q_0
+ BEQ NoFilterFilt0
+
+ LDRB tC0, [pThresholds], #1
+ SSUB8 t1, t1, t2
+ LDRB tC1, [pThresholds], #3
+ M_STR pThresholds, ppThresholds
+ UHSUB8 t4, p_0, q_0
+ ORR tC, tC1, tC0, LSL #16
+ USUB8 t5, p_0, q_0
+ AND t5, t5, m01
+ SHSUB8 t1, t1, t5
+ ORR tC, tC, LSL #8
+ SSUB8 t1, t1, t5
+ SHSUB8 t1, t1, t4
+ UQADD8 tC, tC, m01
+ SADD8 t1, t1, m01
+ USUB8 t5, filt, m01
+ SHSUB8 t1, t1, t4
+ SEL tC, tC, m00
+
+ ;// Split into positive and negative part and clip
+
+ SSUB8 t1, t1, m00
+ SEL pos, t1, m00
+ USUB8 neg, pos, t1
+ USUB8 t3, pos, tC
+ SEL pos, tC, pos
+ USUB8 t3, neg, tC
+ SEL neg, tC, neg
+ UQADD8 P_0, p_0, pos
+ UQSUB8 Q_0, q_0, pos
+ UQSUB8 P_0, P_0, neg
+ UQADD8 Q_0, Q_0, neg
+
+ ;// Choose to store the filtered
+ ;// value or the original pixel
+ USUB8 t1, filt, m01
+ SEL P_0, P_0, p_0
+ SEL Q_0, Q_0, q_0
+
+StoreResultAndExit
+
+ ;//---------Store result---------------
+
+ ;// P_0 = [r0p0 r1p0 r2p0 r3p0]
+ ;// Q_0 = [r0q0 r1q0 r2q0 r3q0]
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ ADD pQ0, pQ0, #1
+
+ MOV t1, Q_0, LSR #24
+ STRB t1, [pQ0, #1]
+ MOV t1, P_0, LSR #24
+ M_STRB t1, [pQ0], srcdstStep
+
+ MOV t1, Q_0, LSR #16
+ STRB t1, [pQ0, #1]
+ MOV t1, P_0, LSR #16
+ M_STRB t1, [pQ0], srcdstStep
+
+ MOV t1, P_0, LSR #8
+ STRB t1, [pQ0]
+ STRB P_0, [pQ0, srcdstStep]
+ MOV t1, Q_0, LSR #8
+ STRB t1, [pQ0, #1]!
+ STRB Q_0, [pQ0, srcdstStep]
+
+ M_LDRD XY, pBS, pXYBS
+ M_LDRD alpha, beta, pAlphaBeta1
+
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+ ADD pQ0, pQ0, #4
+
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopX
+
+;//-------- Common Exit of LoopY -----------------
+ ;// Align the pointers
+
+ExitLoopY
+
+ M_LDR pThresholds, ppThresholds
+ SUB pQ0, pQ0, #8
+ ADD pQ0, pQ0, srcdstStep, LSL #2
+ SUB pBS, pBS, #14
+ SUB pThresholds, pThresholds, #6
+ M_STR pThresholds, ppThresholds
+
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ BNE LoopY
+ MOV r0, #OMX_Sts_NoErr
+;//-----------------End Filter--------------------
+
+ M_END
+
+ ENDIF
+
+ END
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
new file mode 100644
index 0000000..1b84080
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
@@ -0,0 +1,331 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe
+ IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe
+
+
+
+ IF ARM1136JS
+
+
+MASK_0 EQU 0x00000000
+MASK_1 EQU 0x01010101
+MASK_2 EQU 0xff00ff00
+LOOP_COUNT EQU 0x11110000
+
+;// Declare input registers
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlphaArg RN 2
+pBetaArg RN 3
+
+pThresholds RN 14
+pBS RN 9
+pQ0 RN 0
+bS RN 2
+
+alpha RN 6
+alpha0 RN 6
+alpha1 RN 8
+
+beta RN 7
+beta0 RN 7
+beta1 RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0 RN 3
+p_1 RN 5
+p_2 RN 4
+p_3 RN 2
+q_0 RN 8
+q_1 RN 9
+q_2 RN 10
+q_3 RN 12
+
+;// Filtering
+
+dp0q0 RN 12
+dp1p0 RN 12
+dq1q0 RN 12
+dp2p0 RN 12
+dq2q0 RN 12
+
+ap0q0 RN 1
+filt RN 2
+
+m00 RN 14
+m01 RN 11
+
+apflg RN 0
+aqflg RN 6
+apqflg RN 0
+
+
+;//Declarations for bSLT4 kernel
+
+tC0 RN 7
+ptC0 RN 1
+
+pQ0a RN 0
+Stepa RN 1
+maska RN 14
+
+P0a RN 1
+P1a RN 8
+Q0a RN 7
+Q1a RN 11
+
+;//Declarations for bSGE4 kernel
+
+pQ0b RN 0
+Stepb RN 1
+maskb RN 14
+
+P0b RN 6
+P1b RN 7
+P2b RN 1
+P3b RN 3
+
+Q0b RN 9
+Q1b RN 0
+Q2b RN 2
+Q3b RN 3
+
+;// Miscellanous
+XY RN 8
+t0 RN 3
+t1 RN 12
+t2 RN 14
+t7 RN 7
+t4 RN 4
+t5 RN 1
+t8 RN 6
+a RN 0
+
+
+
+
+ ;// Allocate stack memory
+ M_ALLOC4 ppThresholds,4
+ M_ALLOC4 pQ_3,4
+ M_ALLOC4 pP_3,4
+ M_ALLOC8 pAlphaBeta0,8
+ M_ALLOC8 pAlphaBeta1,8
+ M_ALLOC8 pXYBS,4
+ M_ALLOC4 ppBS,4
+ M_ALLOC8 ppQ0Step,4
+ M_ALLOC4 pStep,4
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11
+
+ ;//Input arguments on the stack
+ M_ARG ppThresholdsArg, 4
+ M_ARG ppBSArg, 4
+
+ LDR t4,=MASK_1
+
+ LDRB alpha0, [pAlphaArg]
+ LDRB beta0, [pBetaArg]
+ LDRB alpha1, [pAlphaArg,#1]
+ LDRB beta1, [pBetaArg,#1]
+
+ MUL alpha0, alpha0, t4
+ MUL beta0, beta0, t4
+ MUL alpha1, alpha1, t4
+ MUL beta1, beta1, t4
+
+ M_STRD alpha0, beta0, pAlphaBeta0
+ M_STRD alpha1, beta1, pAlphaBeta1
+
+ LDR XY,=LOOP_COUNT
+ M_LDR pBS, ppBSArg
+ M_LDR pThresholds, ppThresholdsArg
+ M_STR srcdstStep, pStep
+ M_STRD XY, pBS, pXYBS
+ SUB pQ0, pQ0, srcdstStep, LSL #2
+ M_STR pThresholds, ppThresholds
+LoopY
+LoopX
+;//---------------Load Pixels-------------------
+ M_STR pQ0, ppQ0Step
+ M_LDR p_3, [pQ0], srcdstStep
+ M_LDR p_2, [pQ0], srcdstStep
+ M_STR p_3, pP_3
+ LDRB bS, [pBS], #1
+ M_STR pBS, ppBS
+ M_LDR p_1, [pQ0], srcdstStep
+ CMP bS, #0
+ M_LDR p_0, [pQ0], srcdstStep
+ M_LDR q_0, [pQ0], srcdstStep
+ M_LDR q_1, [pQ0], srcdstStep
+ M_LDR q_2, [pQ0], srcdstStep
+ M_LDR q_3, [pQ0], srcdstStep
+ BEQ NoFilterBS0
+ CMP bS, #4
+ M_STR q_3, pQ_3
+
+;//--------------Filtering Decision -------------------
+ LDR m01, =MASK_1 ;// 01010101 mask
+ MOV m00, #MASK_0 ;// 00000000 mask
+
+ ;// Check |p0-q0|<Alpha
+ USUB8 dp0q0, p_0, q_0
+ USUB8 a, q_0, p_0
+ SEL ap0q0, a, dp0q0
+ USUB8 a, ap0q0, alpha
+ SEL filt, m00, m01
+
+ ;// Check |p1-p0|<Beta
+ USUB8 dp1p0, p_1, p_0
+ USUB8 a, p_0, p_1
+ SEL a, a, dp1p0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check |q1-q0|<Beta
+ USUB8 dq1q0, q_1, q_0
+ USUB8 a, q_0, q_1
+ SEL a, a, dq1q0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check ap<Beta
+ USUB8 dp2p0, p_2, p_0
+ USUB8 a, p_0, p_2
+ SEL a, a, dp2p0
+ USUB8 a, a, beta
+ SEL apflg, m00, filt ;// apflg = filt && (ap<beta)
+
+ ;// Check aq<Beta
+ USUB8 dq2q0, q_2, q_0
+ USUB8 t2, q_0, q_2
+ SEL t2, t2, dq2q0
+ USUB8 t2, t2, beta
+ MOV t7,#0
+
+ BLT bSLT4
+;//-------------------Filter--------------------
+bSGE4
+ ;//---------bSGE4 Execution---------------
+ SEL t1, t7, filt ;// aqflg = filt && (aq<beta)
+ CMP filt, #0
+ ORR apqflg, apflg, t1, LSL #1
+ M_LDRD pQ0, srcdstStep, ppQ0Step, EQ
+ BEQ NoFilterFilt0
+
+ BL armVCM4P10_DeblockingLumabSGE4_unsafe
+
+ ;//---------Store result---------------
+ M_LDR pThresholds, ppThresholds
+ MOV p_2, Q1b
+ MOV p_1, P2b
+ M_LDRD pQ0b, Stepb, ppQ0Step
+ ADD pThresholds, #1
+ M_STR pThresholds, ppThresholds
+ M_STR p_1, [pQ0b, Stepb]!
+ M_STR P1b, [pQ0b, Stepb]!
+ M_STR P0b, [pQ0b, Stepb]!
+ M_STR Q0b, [pQ0b, Stepb]!
+ STR p_2, [pQ0b, Stepb]
+ STR Q2b, [pQ0b, Stepb, LSL #1]
+
+
+ M_LDRD XY, pBS, pXYBS
+ SUB pQ0, pQ0b, Stepb, LSL #2
+ ADD pQ0, pQ0, #4
+ M_LDRD alpha, beta, pAlphaBeta0
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopX
+ B ExitLoopY
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterBS0
+ SUB pQ0, pQ0, srcdstStep, LSL #3
+NoFilterFilt0
+ ADD pQ0, pQ0, #4
+ ;// Load counter for LoopX
+ M_LDRD XY, pBS, pXYBS
+ M_LDR pThresholds, ppThresholds
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ ;// Align the pointers
+ ADDS XY, XY, XY
+ ADD pThresholds, pThresholds, #1
+ M_STR pThresholds, ppThresholds
+ M_STR XY, pXYBS
+ BCC LoopX
+ B ExitLoopY
+
+bSLT4
+ ;//---------bSLT4 Execution---------------
+ SEL aqflg, t7, filt ;// aqflg = filt && (aq<beta)
+ M_LDR ptC0, ppThresholds
+ CMP filt, #0
+ M_LDRD pQ0, srcdstStep, ppQ0Step, EQ
+ BEQ NoFilterFilt0
+
+ LDRB tC0, [ptC0], #1
+ M_STR ptC0, ppThresholds
+
+ BL armVCM4P10_DeblockingLumabSLT4_unsafe
+
+ ;//---------Store result---------------
+ MOV p_2, P0a
+ M_LDRD pQ0a, Stepa, ppQ0Step
+ M_STR P1a, [pQ0a, Stepa, LSL #1]!
+ M_STR p_2, [pQ0a, Stepa]!
+ M_STR Q0a, [pQ0a, Stepa]!
+ STR Q1a, [pQ0a, Stepa]
+
+ ;// Load counter
+ M_LDRD XY, pBS, pXYBS
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ SUB pQ0, pQ0a, Stepa, LSL #2
+ ADD pQ0, pQ0, #4
+
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopX
+
+;//-------- Common Exit of LoopY -----------------
+ ;// Align the pointers
+ExitLoopY
+ M_LDRD alpha, beta, pAlphaBeta1
+ SUB pQ0, pQ0, #16
+ ADD pQ0, pQ0, srcdstStep, LSL #2
+ M_STRD alpha, beta, pAlphaBeta0
+
+ BNE LoopY
+ MOV r0, #OMX_Sts_NoErr
+;//-----------------End Filter--------------------
+ M_END
+
+ ENDIF
+
+
+ END
+
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
new file mode 100644
index 0000000..417ddc2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
@@ -0,0 +1,550 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe
+ IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe
+
+
+ IF ARM1136JS
+
+MASK_0 EQU 0x00000000
+MASK_1 EQU 0x01010101
+MASK_2 EQU 0xff00ff00
+LOOP_COUNT EQU 0x11110000
+
+;// Declare input registers
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlphaArg RN 2
+pBetaArg RN 3
+
+pThresholds RN 14
+pBS RN 9
+pQ0 RN 0
+bS RN 2
+
+alpha RN 6
+alpha0 RN 6
+alpha1 RN 8
+
+beta RN 7
+beta0 RN 7
+beta1 RN 9
+
+;// Declare Local/Temporary variables
+
+;// Pixels
+p_0 RN 3
+p_1 RN 5
+p_2 RN 4
+p_3 RN 2
+q_0 RN 8
+q_1 RN 9
+q_2 RN 10
+q_3 RN 12
+
+;// Unpacking
+mask RN 11
+
+row0 RN 2
+row1 RN 4
+row2 RN 5
+row3 RN 3
+
+row4 RN 8
+row5 RN 9
+row6 RN 10
+row7 RN 12
+row8 RN 14
+row9 RN 7
+
+tunpk0 RN 8
+tunpk1 RN 9
+tunpk2 RN 10
+tunpk3 RN 12
+tunpk4 RN 0
+
+tunpk5 RN 1
+tunpk6 RN 14
+tunpk7 RN 2
+tunpk8 RN 5
+tunpk9 RN 6
+
+
+;// Filtering
+
+dp0q0 RN 12
+dp1p0 RN 12
+dq1q0 RN 12
+dp2p0 RN 12
+dq2q0 RN 12
+
+ap0q0 RN 1
+filt RN 2
+
+m00 RN 14
+m01 RN 11
+
+apflg RN 0
+aqflg RN 6
+apqflg RN 0
+
+
+;//Declarations for bSLT4 kernel
+
+tC0 RN 7
+ptC0 RN 1
+
+pQ0a RN 0
+Stepa RN 1
+maska RN 14
+
+P0a RN 1
+P1a RN 8
+Q0a RN 7
+Q1a RN 11
+
+;//Declarations for bSGE4 kernel
+
+pQ0b RN 0
+Stepb RN 1
+maskb RN 14
+
+P0b RN 6
+P1b RN 7
+P2b RN 1
+P3b RN 3
+
+Q0b RN 9
+Q1b RN 0
+Q2b RN 2
+Q3b RN 3
+
+;// Miscellanous
+XY RN 8
+t0 RN 3
+t1 RN 12
+t2 RN 14
+t7 RN 7
+t4 RN 4
+t5 RN 1
+t8 RN 6
+a RN 0
+
+
+
+ ;// Allocate stack memory
+ M_ALLOC4 ppThresholds,4
+ M_ALLOC4 pQ_3,4
+ M_ALLOC4 pP_3,4
+ M_ALLOC8 pAlphaBeta0,8
+ M_ALLOC8 pAlphaBeta1,8
+ M_ALLOC8 pXYBS,4
+ M_ALLOC4 ppBS,4
+ M_ALLOC8 ppQ0Step,4
+ M_ALLOC4 pStep,4
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingLuma_VerEdge_I, r11
+
+ ;//Input arguments on the stack
+ M_ARG ppThresholdsArg, 4
+ M_ARG ppBSArg, 4
+
+ LDR t4,=MASK_1
+
+ LDRB alpha0, [pAlphaArg]
+ LDRB beta0, [pBetaArg]
+ LDRB alpha1, [pAlphaArg,#1]
+ LDRB beta1, [pBetaArg,#1]
+
+ MUL alpha0, alpha0, t4
+ MUL beta0, beta0, t4
+ MUL alpha1, alpha1, t4
+ MUL beta1, beta1, t4
+
+ M_STRD alpha0, beta0, pAlphaBeta0
+ M_STRD alpha1, beta1, pAlphaBeta1
+
+ LDR XY,=LOOP_COUNT
+ M_LDR pBS, ppBSArg
+ M_LDR pThresholds, ppThresholdsArg
+ M_STR srcdstStep, pStep
+ M_STRD XY, pBS, pXYBS
+ M_STR pThresholds, ppThresholds
+
+ SUB pQ0, pQ0, #4
+LoopY
+;//---------------Load Pixels-------------------
+
+;//----------------Pack p0-p3-----------------------
+ LDR mask, =MASK_2
+
+ M_LDR row0, [pQ0], srcdstStep
+ M_LDR row1, [pQ0], srcdstStep
+ LDR row2, [pQ0]
+ LDR row3, [pQ0, srcdstStep]
+ SUB pQ0, pQ0, srcdstStep, LSL #1
+
+ ;// row0 = [r0p0 r0p1 r0p2 r0p3]
+ ;// row1 = [r1p0 r1p1 r1p2 r1p3]
+ ;// row2 = [r2p0 r2p1 r2p2 r2p3]
+ ;// row3 = [r3p0 r3p1 r3p2 r3p3]
+
+ AND tunpk0, mask, row0
+ AND tunpk6, mask, row0, LSL#8
+ UXTAB16 tunpk0, tunpk0, row1, ROR#8
+ UXTAB16 tunpk6, tunpk6, row1
+ AND tunpk2, mask, row2
+ AND tunpk3, mask, row2, LSL#8
+ UXTAB16 tunpk2, tunpk2, row3, ROR#8
+ UXTAB16 tunpk3, tunpk3, row3
+
+ ;// tunpk0 = [r0p0 r1p0 r0p2 r1p2]
+ ;// tunpk6 = [r0p1 r1p1 r0p3 r1p3]
+ ;// tunpk2 = [r2p0 r3p0 r2p2 r3p2]
+ ;// tunpk3 = [r2p1 r3p1 r2p3 r3p3]
+
+ PKHTB p_0, tunpk0, tunpk2, ASR#16
+ PKHTB p_1, tunpk6, tunpk3, ASR#16
+ PKHBT p_2, tunpk2, tunpk0, LSL#16
+ PKHBT p_3, tunpk3, tunpk6, LSL#16
+
+
+ ;// p_0 = [r0p0 r1p0 r2p0 r3p0]
+ ;// p_1 = [r0p1 r1p1 r2p1 r3p1]
+ ;// p_2 = [r0p2 r1p2 r2p1 r3p2]
+ ;// p_3 = [r0p3 r1p3 r2p3 r3p3]
+
+ M_STR p_3, pP_3
+
+;//----------------Pack q0-q3-----------------------
+LoopX
+ LDRB bS, [pBS], #4
+ M_STR pQ0, ppQ0Step
+ LDR mask, =MASK_2
+ CMP bS, #0
+ M_STR pBS, ppBS
+
+ LDR row4, [pQ0, #4]!
+ BEQ.W NoFilterBS0
+ M_LDR row5, [pQ0, srcdstStep]!
+ M_LDR row6, [pQ0, srcdstStep]!
+ M_LDR row7, [pQ0, srcdstStep]
+
+ ;// row4 = [r0q3 r0q2 r0q1 r0q0]
+ ;// row5 = [r1q3 r1q2 r1q1 r1q0]
+ ;// row6 = [r2q3 r2q2 r2q1 r2q0]
+ ;// row7 = [r3q3 r3q2 r3q1 r3q0]
+
+ AND tunpk4, mask, row4
+ CMP bS, #4
+ AND tunpk5, mask, row4, LSL#8
+ UXTAB16 tunpk4, tunpk4, row5, ROR#8
+ UXTAB16 tunpk5, tunpk5, row5
+ AND tunpk6, mask, row6
+ AND tunpk7, mask, row6, LSL#8
+ UXTAB16 tunpk6, tunpk6, row7, ROR#8
+ UXTAB16 tunpk7, tunpk7, row7
+
+ ;// tunpk4 = [r0q0 r1q0 r0q2 r1q2]
+ ;// tunpk5 = [r0q1 r1q1 r0q3 r1q3]
+ ;// tunpk6 = [r2q0 r3q0 r2q2 r3q2]
+ ;// tunpk7 = [r2q1 r3q1 r2q3 r3q3]
+
+ PKHTB q_3, tunpk4, tunpk6, ASR#16
+ PKHTB q_2, tunpk5, tunpk7, ASR#16
+ PKHBT q_1, tunpk6, tunpk4, LSL#16
+ M_STR q_3, pQ_3
+ PKHBT q_0, tunpk7, tunpk5, LSL#16
+
+
+ ;// q_0 = [r0q0 r1q0 r2q0 r3q0]
+ ;// q_1 = [r0q1 r1q1 r2q1 r3q1]
+ ;// q_2 = [r0q2 r1q2 r2q1 r3q2]
+ ;// q_3 = [r0q3 r1q3 r2q3 r3q3]
+
+
+;//--------------Filtering Decision -------------------
+ LDR m01, =MASK_1 ;// 01010101 mask
+ MOV m00, #MASK_0 ;// 00000000 mask
+
+ ;// Check |p0-q0|<Alpha
+ USUB8 dp0q0, p_0, q_0
+ USUB8 a, q_0, p_0
+ SEL ap0q0, a, dp0q0
+ USUB8 a, ap0q0, alpha
+ SEL filt, m00, m01
+
+ ;// Check |p1-p0|<Beta
+ USUB8 dp1p0, p_1, p_0
+ USUB8 a, p_0, p_1
+ SEL a, a, dp1p0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check |q1-q0|<Beta
+ USUB8 dq1q0, q_1, q_0
+ USUB8 a, q_0, q_1
+ SEL a, a, dq1q0
+ USUB8 a, a, beta
+ SEL filt, m00, filt
+
+ ;// Check ap<Beta
+ USUB8 dp2p0, p_2, p_0
+ USUB8 a, p_0, p_2
+ SEL a, a, dp2p0
+ USUB8 a, a, beta
+ SEL apflg, m00, filt ;// apflg = filt && (ap<beta)
+
+ ;// Check aq<Beta
+ USUB8 dq2q0, q_2, q_0
+ USUB8 t2, q_0, q_2
+ SEL t2, t2, dq2q0
+ USUB8 t2, t2, beta
+ MOV t7,#0
+
+
+ BLT bSLT4
+;//-------------------Filter--------------------
+bSGE4
+ ;//---------bSGE4 Execution---------------
+ SEL t1, t7, filt ;// aqflg = filt && (aq<beta)
+ CMP filt, #0
+ ORR apqflg, apflg, t1, LSL #1
+ M_LDRD pQ0, srcdstStep, ppQ0Step, EQ
+ BEQ NoFilterFilt0
+
+ BL armVCM4P10_DeblockingLumabSGE4_unsafe
+
+ ;//---------Store result---------------
+
+ LDR maskb,=MASK_2
+
+ ;// P0b = [r0p0 r1p0 r2p0 r3p0]
+ ;// P1b = [r0p1 r1p1 r2p1 r3p1]
+ ;// P2b = [r0p2 r1p2 r2p2 r3p2]
+ ;// P3b = [r0p3 r1p3 r2p3 r3p3]
+
+ M_LDR P3b, pP_3
+ M_STR Q0b, pP_3
+
+ ;//------Pack p0-p3------
+ AND tunpk0, maskb, P0b
+ AND tunpk2, maskb, P0b, LSL#8
+ UXTAB16 tunpk0, tunpk0, P1b, ROR#8
+ UXTAB16 tunpk2, tunpk2, P1b
+
+ AND tunpk3, maskb, P2b
+ AND tunpk8, maskb, P2b, LSL#8
+ UXTAB16 tunpk3, tunpk3, P3b, ROR#8
+ UXTAB16 tunpk8, tunpk8, P3b
+
+ ;// tunpk0 = [r0p0 r0p1 r2p0 r2p1]
+ ;// tunpk2 = [r1p0 r1p1 r3p0 r3p1]
+ ;// tunpk3 = [r0p2 r0p3 r2p2 r2p3]
+ ;// tunpk8 = [r1p2 r1p3 r3p2 r3p3]
+
+ MOV p_2, Q1b
+ M_LDRD pQ0b, Stepb, ppQ0Step
+
+ PKHTB row9, tunpk0, tunpk3, ASR#16
+ PKHBT row7, tunpk3, tunpk0, LSL#16
+ PKHTB row3, tunpk2, tunpk8, ASR#16
+ PKHBT row6, tunpk8, tunpk2, LSL#16
+
+ ;// row9 = [r0p0 r0p1 r0p2 r0p3]
+ ;// row3 = [r1p0 r1p1 r1p2 r1p3]
+ ;// row7 = [r2p0 r2p1 r2p2 r2p3]
+ ;// row6 = [r3p0 r3p1 r3p2 r3p3]
+
+ M_STR row9, [pQ0b], Stepb
+ STR row7, [pQ0b, Stepb]
+ STR row6, [pQ0b, Stepb, LSL #1]
+ STR row3, [pQ0b], #4
+
+ M_LDR Q3b, pQ_3
+
+ ;// Q0b = [r0q0 r1q0 r2q0 r3q0]
+ ;// Q1b = [r0q1 r1q1 r2q1 r3q1]
+ ;// Q2b = [r0q2 r1q2 r2q2 r3q2]
+ ;// Q3b = [r0q3 r1q3 r2q3 r3q3]
+
+ ;//------Pack q0-q3------
+ AND tunpk0, maskb, p_2
+ AND tunpk2, maskb, p_2, LSL#8
+ UXTAB16 tunpk0, tunpk0, Q0b, ROR#8
+ UXTAB16 tunpk2, tunpk2, Q0b
+
+ AND tunpk3, maskb, Q3b
+ AND tunpk8, maskb, Q3b, LSL#8
+ UXTAB16 tunpk3, tunpk3, Q2b, ROR#8
+ UXTAB16 tunpk8, tunpk8, Q2b
+
+ ;// tunpk0 = [r0q1 r0q0 r2q1 r2q0]
+ ;// tunpk2 = [r1q1 r1q0 r3q1 r3q0]
+ ;// tunpk3 = [r0q3 r0q2 r2q3 r2q2]
+ ;// tunpk8 = [r1q3 r1q2 r3q3 r3q2]
+
+ PKHTB row8, tunpk3, tunpk0, ASR#16
+ PKHBT row7, tunpk0, tunpk3, LSL#16
+ PKHTB row4, tunpk8, tunpk2, ASR#16
+ PKHBT row6, tunpk2, tunpk8, LSL#16
+
+ ;// row8 = [r0q0 r0q1 r0q2 r0q3]
+ ;// row4 = [r1q0 r1q1 r1q2 r1q3]
+ ;// row7 = [r2q0 r2q1 r2q2 r2q3]
+ ;// row6 = [r3q0 r3q1 r3q2 r3q3]
+
+ STR row4, [pQ0b]
+ STR row7, [pQ0b, Stepb]
+ STR row6, [pQ0b, Stepb, LSL #1]
+
+ SUB pQ0, pQ0b, Stepb
+ MOV p_1, Q2b
+
+ STR row8, [pQ0]
+
+ M_LDRD XY, pBS, pXYBS
+ M_LDR pThresholds, ppThresholds
+ M_LDRD alpha, beta, pAlphaBeta1
+
+ ADDS XY, XY, XY
+ ADD pThresholds, #4
+ M_STR pThresholds, ppThresholds
+ M_STR XY, pXYBS
+ BCC LoopX
+ B ExitLoopY
+
+;//---------- Exit of LoopX --------------
+;//---- for the case of no filtering -----
+
+NoFilterFilt0
+ ADD pQ0, pQ0, #4
+NoFilterBS0
+ ;// Load counter for LoopX
+ M_LDRD XY, pBS, pXYBS
+ M_LDR pThresholds, ppThresholds
+ M_LDRD alpha, beta, pAlphaBeta1
+
+ ;// Align the pointer
+ ADDS XY, XY, XY
+ ADD pThresholds, pThresholds, #4
+ M_STR pThresholds, ppThresholds
+ M_STR XY, pXYBS
+ BCC LoopY
+ B ExitLoopY
+
+bSLT4
+ ;//---------bSLT4 Execution---------------
+ SEL aqflg, t7, filt ;// aqflg = filt && (aq<beta)
+ M_LDR ptC0, ppThresholds
+ CMP filt, #0
+ M_LDRD pQ0, srcdstStep, ppQ0Step, EQ
+ BEQ NoFilterFilt0
+
+ LDRB tC0, [ptC0], #4
+ M_STR ptC0, ppThresholds
+
+ BL armVCM4P10_DeblockingLumabSLT4_unsafe
+
+ ;//---------Store result---------------
+ ;//--------Pack p1,p0,q1,q0------------
+
+ ;//Load destination pointer
+ LDR maska,=MASK_2
+ M_STR Q0a, pP_3
+ MOV p_1, q_2
+
+ ;// P1a = [r0p1 r1p1 r2p1 r3p1]
+ ;// P0a = [r0p0 r1p0 r2p0 r3p0]
+ ;// Q0a = [r0q0 r1q0 r2q0 r3q0]
+ ;// Q1a = [r0q1 r1q1 r2q1 r3q1]
+
+ AND tunpk1, maska, P0a
+ AND tunpk2, maska, P0a, LSL#8
+ UXTAB16 tunpk1, tunpk1, P1a, ROR#8
+ UXTAB16 tunpk2, tunpk2, P1a
+
+ M_LDRD pQ0a, Stepa, ppQ0Step
+
+ AND tunpk9, maska, Q1a
+ AND tunpk3, maska, Q1a, LSL#8
+ UXTAB16 tunpk9, tunpk9, Q0a, ROR#8
+ UXTAB16 tunpk3, tunpk3, Q0a
+
+ ;// tunpk1 = [r0p0 r0p1 r2p0 r2p1]
+ ;// tunpk2 = [r1p0 r1p1 r3p0 r3p1]
+ ;// tunpk9 = [r0q1 r0q0 r2q1 r2q0]
+ ;// tunpk3 = [r1q1 r1q0 r3q1 r3q0]
+
+ MOV t4, tunpk1, LSR #16
+ MOV t0, tunpk9, LSR #16
+
+ STRH t4,[pQ0a, #2]! ;//Stores [r0p0 r0p1]
+ STRH t0,[pQ0a, #2] ;//Stores [r0q0 r0q1]
+
+ MOV t4, tunpk2, LSR #16
+ MOV t0, tunpk3, LSR #16
+
+ M_STRH t4,[pQ0a, Stepa]! ;//Stores [r1p0 r1p1]
+ STRH t0,[pQ0a, #2] ;//Stores [r1q0 r1q1]
+
+ M_STRH tunpk1,[pQ0a, Stepa]! ;//Stores [r2p0 r2p1]
+ STRH tunpk2,[pQ0a, Stepa] ;//Stores [r3p0 r3p1]
+ STRH tunpk9,[pQ0a, #2]! ;//Stores [r2q0 r2q1]
+ STRH tunpk3,[pQ0a, Stepa] ;//Stores [r3q0 r3q1]
+
+ SUB pQ0, pQ0a, Stepa, LSL #1
+
+ ;// Load counter
+ M_LDRD XY, pBS, pXYBS
+
+ ;// Reload Pixels
+ M_LDR p_0, pQ_3
+ MOV p_2, Q1a
+
+ M_LDRD alpha, beta, pAlphaBeta1
+
+ ADDS XY, XY, XY
+ M_STR XY, pXYBS
+ BCC LoopX
+
+;//-------- Common Exit of LoopY -----------------
+ ;// Align the pointers
+ M_LDR pThresholds, ppThresholds
+ExitLoopY
+ SUB pQ0, pQ0, #16
+ ADD pQ0, pQ0, srcdstStep, LSL #2
+ SUB pBS, pBS, #15
+ SUB pThresholds, pThresholds, #15
+ M_STR pThresholds, ppThresholds
+
+ M_LDRD alpha, beta, pAlphaBeta0
+
+ BNE LoopY
+ MOV r0, #OMX_Sts_NoErr
+
+ M_END
+;//-----------------End Filter--------------------
+
+ ENDIF
+
+ END
+
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
new file mode 100644
index 0000000..de835bd
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c
@@ -0,0 +1,79 @@
+/**
+ *
+ * File Name: omxVCM4P10_InterpolateChroma.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ * Description:
+ * This function will calculate 1/8 Pixel interpolation for Chroma Block
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armVC.h"
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P10_InterpolateChroma,
+ *
+ * Description:
+ * Performs 1/8-pixel interpolation for inter chroma MB.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] pSrc Pointer to the source reference frame buffer
+ * [in] srcStep Reference frame step in byte
+ * [in] dstStep Destination frame step in byte. Must be multiple of roi.width.
+ * [in] dx Fractional part of horizontal motion vector component
+ * in 1/8 pixel unit;valid in the range [0,7]
+ * [in] dy Fractional part of vertical motion vector component
+ * in 1/8 pixel unit;valid in the range [0,7]
+ * [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must
+ * be equal to either 2, 4, or 8.
+ * [out] pDst Pointer to the destination frame buffer.
+ * if roi.width==2, 2-byte alignment required
+ * if roi.width==4, 4-byte alignment required
+ * if roi.width==8, 8-byte alignment required
+ *
+ * Return Value:
+ * If the function runs without error, it returns OMX_Sts_NoErr.
+ * If one of the following cases occurs, the function returns OMX_Sts_BadArgErr:
+ * pSrc or pDst is NULL.
+ * srcStep or dstStep < 8.
+ * dx or dy is out of range [0-7].
+ * roi.width or roi.height is out of range {2,4,8}.
+ * roi.width is equal to 2, but pDst is not 2-byte aligned.
+ * roi.width is equal to 4, but pDst is not 4-byte aligned.
+ * roi.width is equal to 8, but pDst is not 8 byte aligned.
+ * srcStep or dstStep is not a multiple of 8.
+ *
+ */
+
+OMXResult omxVCM4P10_InterpolateChroma (
+ const OMX_U8* pSrc,
+ OMX_S32 srcStep,
+ OMX_U8* pDst,
+ OMX_S32 dstStep,
+ OMX_S32 dx,
+ OMX_S32 dy,
+ OMXSize roi
+ )
+{
+ return armVCM4P10_Interpolate_Chroma
+ ((OMX_U8*)pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy);
+}
+
+
+/*****************************************************************************
+ * END OF FILE
+ *****************************************************************************/
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
new file mode 100644
index 0000000..cf611a3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
@@ -0,0 +1,426 @@
+;//
+;//
+;// File Name: omxVCM4P10_InterpolateLuma_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P10_InterpolateLuma
+;//
+;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
+;// Performs quarter pel interpolation of inter luma MB.
+;// It's assumed that the frame is already padded when calling this function.
+;// Parameters:
+;// [in] pSrc Pointer to the source reference frame buffer
+;// [in] srcStep Reference frame step in byte
+;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width
+;// [in] dx Fractional part of horizontal motion vector
+;// component in 1/4 pixel unit; valid in the range [0,3]
+;// [in] dy Fractional part of vertical motion vector
+;// component in 1/4 pixel unit; valid in the range [0,3]
+;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must
+;// be equal to either 4, 8, or 16.
+;// [out] pDst Pointer to the destination frame buffer.
+;// if roi.width==4, 4-byte alignment required
+;// if roi.width==8, 8-byte alignment required
+;// if roi.width==16, 16-byte alignment required
+;//
+;// Return Value:
+;// If the function runs without error, it returns OMX_Sts_NoErr.
+;// It is assued that following cases are satisfied before calling this function:
+;// pSrc or pDst is not NULL.
+;// srcStep or dstStep >= roi.width.
+;// dx or dy is in the range [0-3].
+;// roi.width or roi.height is not out of range {4, 8, 16}.
+;// If roi.width is equal to 4, Dst is 4 byte aligned.
+;// If roi.width is equal to 8, pDst is 8 byte aligned.
+;// If roi.width is equal to 16, pDst is 16 byte aligned.
+;// srcStep and dstStep is multiple of 8.
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT omxVCM4P10_InterpolateLuma
+
+ IF ARM1136JS
+ IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ IMPORT armVCM4P10_Average_4x4_Align0_unsafe
+ IMPORT armVCM4P10_Average_4x4_Align2_unsafe
+ IMPORT armVCM4P10_Average_4x4_Align3_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+ ENDIF
+
+ IF ARM1136JS
+ IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ ENDIF
+
+
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+iHeight RN 4
+iWidth RN 5
+
+;// Declare other intermediate registers
+idx RN 6
+idy RN 7
+index RN 6
+Temp RN 12
+pArgs RN 11
+
+
+ ;// End of CortexA8
+
+;//-------------------------------------------------------------------------------------------------------------------------
+;//-------------------------------------------------------------------------------------------------------------------------
+ IF ARM1136JS
+
+
+ M_ALLOC4 ppDst, 8
+ M_ALLOC4 ppSrc, 8
+ M_ALLOC4 ppArgs, 16
+ M_ALLOC4 pBuffer, 120 ;// 120 = 12x10
+ M_ALLOC8 pInterBuf, 120 ;// 120 = 12*5*2
+ M_ALLOC8 pTempBuf, 32 ;// 32 = 8*4
+
+ ;// Function header
+ ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
+ ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed.
+ ;// Registers r4, r5, r6 to be preserved by internal unsafe functions
+ ;// r4 - iHeight
+ ;// r5 - iWidth
+ ;// r6 - index
+ M_START omxVCM4P10_InterpolateLuma, r11
+
+;// Declare other intermediate registers
+idx RN 6
+idy RN 7
+index RN 6
+Temp RN 12
+pArgs RN 11
+
+pBuf RN 8
+Height RN 9
+bufStep RN 9
+
+ ;// Define stack arguments
+ M_ARG ptridx, 4
+ M_ARG ptridy, 4
+ M_ARG ptrWidth, 4
+ M_ARG ptrHeight, 4
+
+ ;// Load structure elements of roi
+ M_LDR idx, ptridx
+ M_LDR idy, ptridy
+ M_LDR iWidth, ptrWidth
+ M_LDR iHeight, ptrHeight
+
+ M_PRINTF "roi.width %d\n", iWidth
+ M_PRINTF "roi.height %d\n", iHeight
+
+ ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx]
+ M_ADR pArgs, ppArgs
+
+InterpolateLuma
+Block4x4WidthLoop
+Block4x4HeightLoop
+
+ STM pArgs, {pSrc,srcStep,pDst,dstStep}
+ M_ADR pBuf, pBuffer
+
+ ;// switch table using motion vector as index
+ M_SWITCH index, L
+ M_CASE Case_0
+ M_CASE Case_1
+ M_CASE Case_2
+ M_CASE Case_3
+ M_CASE Case_4
+ M_CASE Case_5
+ M_CASE Case_6
+ M_CASE Case_7
+ M_CASE Case_8
+ M_CASE Case_9
+ M_CASE Case_a
+ M_CASE Case_b
+ M_CASE Case_c
+ M_CASE Case_d
+ M_CASE Case_e
+ M_CASE Case_f
+ M_ENDSWITCH
+
+Case_0
+ ;// Case G
+ M_PRINTF "Case 0 \n"
+
+ BL armVCM4P10_InterpolateLuma_Copy4x4_unsafe
+ B Block4x4LoopEnd
+
+Case_1
+ ;// Case a
+ M_PRINTF "Case 1 \n"
+
+ SUB pSrc, pSrc, #2
+ MOV Height, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ BL armVCM4P10_Average_4x4_Align2_unsafe
+ B Block4x4LoopEnd
+Case_2
+ ;// Case b
+ M_PRINTF "Case 2 \n"
+
+ SUB pSrc, pSrc, #2
+ MOV Height, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ B Block4x4LoopEnd
+Case_3
+ ;// Case c
+ M_PRINTF "Case 3 \n"
+
+ SUB pSrc, pSrc, #2
+ MOV Height, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ BL armVCM4P10_Average_4x4_Align3_unsafe
+ B Block4x4LoopEnd
+Case_4
+ ;// Case d
+ M_PRINTF "Case 4 \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+
+ B Block4x4LoopEnd
+Case_5
+ ;// Case e
+ M_PRINTF "Case 5 \n"
+
+ SUB pSrc, pSrc, #2
+ MOV Height, #4
+ M_ADR pDst, pTempBuf
+ MOV dstStep, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc, srcStep, pDst, dstStep}
+ SUB pSrc, pSrc, srcStep, LSL #1
+ M_ADR pBuf, pBuffer
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ M_ADR pSrc, pTempBuf
+ MOV srcStep, #4
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+
+
+ B Block4x4LoopEnd
+Case_6
+ ;// Case f
+ M_PRINTF "Case 6 \n"
+
+ SUB pSrc, pSrc, #2
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ M_ADR pBuf, pInterBuf
+ BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ M_ADR idy, pTempBuf
+ BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+ B Block4x4LoopEnd
+Case_7
+ ;// Case g
+ M_PRINTF "Case 7 \n"
+
+ SUB pSrc, pSrc, #2
+ MOV Height, #4
+ M_ADR pDst, pTempBuf
+ MOV dstStep, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc, srcStep, pDst, dstStep}
+ SUB pSrc, pSrc, srcStep, LSL #1
+ ADD pSrc, pSrc, #1
+ M_ADR pBuf, pBuffer
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ M_ADR pSrc, pTempBuf
+ MOV srcStep, #4
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+
+ B Block4x4LoopEnd
+Case_8
+ ;// Case h
+ M_PRINTF "Case 8 \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ B Block4x4LoopEnd
+Case_9
+ ;// Case i
+ M_PRINTF "Case 9 \n"
+
+ SUB pSrc, pSrc, #2
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ ADD pSrc, pSrc, srcStep, LSL #1
+ M_ADR pBuf, pInterBuf
+ BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ M_ADR idy, pTempBuf
+ BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ BL armVCM4P10_Average_4x4_Align2_unsafe
+ B Block4x4LoopEnd
+Case_a
+ ;// Case j
+ M_PRINTF "Case a \n"
+
+ SUB pSrc, pSrc, #2
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ ADD pSrc, pSrc, srcStep, LSL #1
+ M_ADR pBuf, pInterBuf
+ BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ B Block4x4LoopEnd
+Case_b
+ ;// Case k
+ M_PRINTF "Case b \n"
+ SUB pSrc, pSrc, #2
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ ADD pSrc, pSrc, srcStep, LSL #1
+ M_ADR pBuf, pInterBuf
+ BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
+ M_ADR idy, pTempBuf
+ BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ BL armVCM4P10_Average_4x4_Align3_unsafe
+ B Block4x4LoopEnd
+Case_c
+ ;// Case n
+ M_PRINTF "Case c \n"
+
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ ADD pSrc, pSrc, srcStep ;// Update pSrc to one row down
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+ B Block4x4LoopEnd
+Case_d
+ ;// Case p
+ M_PRINTF "Case d \n"
+ SUB pSrc, pSrc, #2
+ ADD pSrc, pSrc, srcStep
+ MOV Height, #4
+ M_ADR pDst, pTempBuf
+ MOV dstStep, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc, srcStep, pDst, dstStep}
+ SUB pSrc, pSrc, srcStep, LSL #1
+ M_ADR pBuf, pBuffer
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ M_ADR pSrc, pTempBuf
+ MOV srcStep, #4
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+ B Block4x4LoopEnd
+Case_e
+ ;// Case q
+ M_PRINTF "Case e \n"
+
+ SUB pSrc, pSrc, #2
+ SUB pSrc, pSrc, srcStep, LSL #1
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ M_ADR pBuf, pInterBuf
+ BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
+ M_ADR idy, pTempBuf
+ BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+ ADD pSrc, pSrc, #4
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+
+ B Block4x4LoopEnd
+Case_f
+ ;// Case r
+ M_PRINTF "Case f \n"
+ SUB pSrc, pSrc, #2
+ ADD pSrc, pSrc, srcStep
+ MOV Height, #4
+ M_ADR pDst, pTempBuf
+ MOV dstStep, #4
+ BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+ M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc, srcStep, pDst, dstStep}
+ SUB pSrc, pSrc, srcStep, LSL #1
+ ADD pSrc, pSrc, #1
+ M_ADR pBuf, pBuffer
+ MOV Height, #9
+ BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+ BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
+ M_ADR pSrc, pTempBuf
+ MOV srcStep, #4
+ BL armVCM4P10_Average_4x4_Align0_unsafe
+
+Block4x4LoopEnd
+
+ ;// Width Loop
+ SUBS iWidth, iWidth, #4
+ M_ADR pArgs, ppArgs
+ LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments
+ ADD pSrc, pSrc, #4
+ ADD pDst, pDst, #4
+ BGT Block4x4WidthLoop
+
+ ;// Height Loop
+ SUBS iHeight, iHeight, #4
+ M_LDR iWidth, ptrWidth
+ M_ADR pArgs, ppArgs
+ ADD pSrc, pSrc, srcStep, LSL #2
+ ADD pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, iWidth
+ SUB pDst, pDst, iWidth
+ BGT Block4x4HeightLoop
+
+EndOfInterpolation
+ MOV r0, #0
+ M_END
+
+ ENDIF
+
+
+ END
+ \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
new file mode 100644
index 0000000..34fedd8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s
@@ -0,0 +1,494 @@
+;//
+;//
+;// File Name: omxVCM4P10_PredictIntraChroma_8x8_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ EXPORT armVCM4P10_pIndexTable8x8
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS ARM1136JS
+
+ AREA table, DATA
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_pIndexTable8x8
+ DCD OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR
+ DCD OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE
+
+ M_TABLE armVCM4P10_MultiplierTableChroma8x8,1
+ DCW 3, 2, 1,4
+ DCW -3,-2,-1,0
+ DCW 1, 2, 3,4
+
+ IF ARM1136JS
+
+;//--------------------------------------------
+;// Constants
+;//--------------------------------------------
+
+BLK_SIZE EQU 0x8
+MUL_CONST0 EQU 0x01010101
+MASK_CONST EQU 0x00FF00FF
+MUL_CONST1 EQU 0x80808080
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+y RN 12
+pc RN 15
+return RN 0
+pSrcLeft2 RN 1
+pDst2 RN 2
+sum1 RN 6
+sum2 RN 7
+pTable RN 9
+dstStepx2 RN 11
+leftStepx2 RN 14
+outerCount RN 14
+r0x01010101 RN 10
+r0x00FF00FF RN 11
+
+tVal0 RN 0
+tVal1 RN 1
+tVal2 RN 2
+tVal3 RN 3
+tVal4 RN 4
+tVal5 RN 5
+tVal6 RN 6
+tVal7 RN 7
+tVal8 RN 8
+tVal9 RN 9
+tVal10 RN 10
+tVal11 RN 11
+tVal12 RN 12
+tVal14 RN 14
+
+b RN 14
+c RN 12
+
+p2p0 RN 0
+p3p1 RN 1
+p6p4 RN 2
+p7p5 RN 4
+
+pp2pp0 RN 6
+pp3pp1 RN 7
+pp6pp4 RN 8
+pp7pp5 RN 9
+
+p3210 RN 10
+p7654 RN 10
+
+;//--------------------------------------------
+;// Input Arguments
+;//--------------------------------------------
+pSrcLeft RN 0 ;// input pointer
+pSrcAbove RN 1 ;// input pointer
+pSrcAboveLeft RN 2 ;// input pointer
+pDst RN 3 ;// output pointer
+leftStep RN 4 ;// input variable
+dstStep RN 5 ;// input variable
+predMode RN 6 ;// input variable
+availability RN 7 ;// input variable
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntraChroma_8x8 starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START omxVCM4P10_PredictIntraChroma_8x8, r11
+
+ ;// Define stack arguments
+ M_ARG LeftStep, 4
+ M_ARG DstStep, 4
+ M_ARG PredMode, 4
+ M_ARG Availability, 4
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR pTable,=armVCM4P10_pIndexTable8x8 ;// Load index table for switch case
+
+
+ ;// Load argument from the stack
+ M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg
+ M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg
+ M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg
+ M_LDR availability, Availability ;// Arg availability loaded from stack to reg
+
+ MOV y, #BLK_SIZE ;// Outer Loop Count
+ LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode
+
+OMX_VC_CHROMA_DC
+ AND availability, availability,#(OMX_VC_UPPER + OMX_VC_LEFT)
+ CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT) ;// if(availability & (#OMX_VC_UPPER | #OMX_VC_LEFT))
+ LDR r0x01010101, =MUL_CONST0
+ BNE TST_UPPER ;// Jump to Upper if not both
+ LDM pSrcAbove,{tVal8,tVal9} ;// tVal 8 to 9 = pSrcAbove[0 to 7]
+
+ ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep
+ ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep
+
+ ;// M_STALL ARM1136JS=1
+
+ UXTB16 tVal7, tVal8 ;// pSrcAbove[0, 2]
+ UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3]
+ UADD16 sum1, tVal7, tVal8 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+
+ UXTB16 tVal7, tVal9 ;// pSrcAbove[4, 6]
+ UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7]
+ UADD16 sum2, tVal7, tVal9 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
+ ADD sum1, sum1, sum1, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3])
+ ADD sum2, sum2, sum2, LSR #16 ;// sum(pSrcAbove[4] to pSrcAbove[7])
+ UXTH sum1, sum1 ;// upsum1 (Clear the top junk bits)
+ UXTH sum2, sum2 ;// upsum2 (Clear the top junk bits)
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[2]
+ M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[3]
+ ADD tVal2, tVal8, tVal9 ;// tVal14 = tVal8 + tVal9
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[4]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[5]
+ ADD tVal14, tVal4, tVal12 ;// tVal14 = tVal4 + tVal12
+
+ LDRB tVal4, [pSrcLeft] ;// tVal4 = pSrcLeft[6]
+ LDRB tVal12,[pSrcLeft2] ;// tVal12= pSrcLeft[7]
+ ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9
+ ADD tVal2, tVal2, tVal14 ;// leftsum1 = sum(pSrcLeft[0] to pSrcLeft[3])
+ ADD tVal4, tVal4, tVal12 ;// tVal4 = tVal4 + tVal12
+ ADD tVal14, tVal8, tVal4 ;// leftsum2 = sum(pSrcLeft[4] to pSrcLeft[7])
+ ADD tVal8, tVal14, #2 ;// tVal8 = leftsum2 + 2
+ ADD tVal9, sum2, #2 ;// tVal8 = upsum2 + 2
+ ADD sum1, sum1, tVal2 ;// sum1 = upsum1 + leftsum1
+ ADD sum2, sum2, tVal14 ;// sum2 = upsum2 + leftsum2
+ ADD sum1, sum1, #4 ;// (sum1 + 4)
+ ADD sum2, sum2, #4 ;// (sum2 + 4)
+ MOV sum1, sum1, LSR #3 ;// (sum1 + 4)>>3
+ MOV tVal9, tVal9, LSR #2 ;// (tVal9 + 2)>>2
+ MOV tVal8, tVal8, LSR #2 ;// (tVal8 + 2)>>2
+ MOV sum2, sum2, LSR #3 ;// (sum2 + 4)>>3
+
+ MUL tVal0, sum1, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal1, tVal9,r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal8, tVal8,r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal9, sum2, r0x01010101 ;// replicate the val in all the bytes
+
+ M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[0 to 7] = tVal 0 to 1
+ M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[8 to 15] = tVal 0 to 1
+ M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[16 to 23] = tVal 0 to 1
+ M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[24 to 31] = tVal 0 to 1
+
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[32 to 39] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[40 to 47] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[48 to 55] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[56 to 63] = tVal 8 to 9
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+TST_UPPER
+
+ ;// M_STALL ARM1136JS=3
+
+ CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER)
+
+ BNE TST_LEFT ;// Jump to Left if not upper
+ LDM pSrcAbove,{tVal8,tVal9} ;// tVal 8 to 9 = pSrcAbove[0 to 7]
+
+ ;// M_STALL ARM1136JS=3
+
+ UXTB16 tVal7, tVal8 ;// pSrcAbove[0, 2]
+ UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3]
+ UADD16 sum1, tVal7, tVal8 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+
+ UXTB16 tVal7, tVal9 ;// pSrcAbove[4, 6]
+ UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7]
+ UADD16 sum2, tVal7, tVal9 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
+
+ ADD sum1, sum1, sum1, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3])
+ ADD sum2, sum2, sum2, LSR #16 ;// sum(pSrcAbove[4] to pSrcAbove[7])
+
+ UXTH sum1, sum1 ;// upsum1 (Clear the top junk bits)
+ UXTH sum2, sum2 ;// upsum2 (Clear the top junk bits)
+
+ ADD sum1, sum1, #2 ;// sum1 + 2
+ ADD sum2, sum2, #2 ;// sum2 + 2
+
+ MOV sum1, sum1, LSR #2 ;// (sum1 + 2)>>2
+ MOV sum2, sum2, LSR #2 ;// (sum2 + 2)>>2
+
+ MUL sum1, sum1,r0x01010101 ;// replicate the val in all the bytes
+ MUL sum2, sum2,r0x01010101 ;// replicate the val in all the bytes
+
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+TST_LEFT
+ ;// M_STALL ARM1136JS=3
+
+ CMP availability, #OMX_VC_LEFT
+ BNE TST_COUNT0
+ ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep
+ ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[2]
+ M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[3]
+
+ ADD tVal6, tVal8, tVal9 ;// tVal6 = tVal8 + tVal9
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[4]
+ ADD tVal7, tVal4, tVal12 ;// tVal7 = tVal4 + tVal12
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[5]
+ M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[6]
+ M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[7]
+
+ ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9
+ ADD sum1, tVal6, tVal7 ;// sum1 = sum(pSrcLeft[0] to pSrcLeft[3])
+ ADD tVal4, tVal4, tVal12 ;// tVal4 = tVal4 + tVal12
+ ADD sum2, tVal8, tVal4 ;// sum2 = sum(pSrcLeft[4] to pSrcLeft[7])
+
+ ADD sum1, sum1, #2 ;// sum1 + 2
+ ADD sum2, sum2, #2 ;// sum2 + 2
+
+ MOV sum1, sum1, LSR #2 ;// (sum1 + 2)>>2
+ MOV sum2, sum2, LSR #2 ;// (sum2 + 2)>>2
+
+ MUL tVal6, sum1,r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal8, sum2,r0x01010101 ;// replicate the val in all the bytes
+
+ ;// M_STALL ARM1136JS=1
+ MOV tVal7,tVal6 ;// tVal7 = sum1
+ MOV tVal9,tVal8 ;// tVal9 = sum2
+
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7
+
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[32 to 39] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[40 to 47] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[48 to 55] = tVal 8 to 9
+ M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[56 to 63] = tVal 8 to 9
+
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+TST_COUNT0
+ LDR sum1, =MUL_CONST1 ;// sum1 = 0x80808080 if(count == 0)
+
+ ;// M_STALL ARM1136JS=2
+
+ MOV tVal7, sum1 ;// tVal7 = sum1
+
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7
+
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_CHROMA_HOR
+
+ ;// M_STALL ARM1136JS=2
+
+ ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep
+ ADD leftStepx2, leftStep, leftStep ;// leftStepx2 = leftStep * 2
+ ADD pDst2, pDst, dstStep ;// pDst2 = pDst + dstStep
+ ADD dstStepx2, dstStep, dstStep ;// double dstStep
+ SUB dstStepx2, dstStepx2, #4 ;// double dstStep minus 4
+ LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times
+ M_LDRB tVal6, [pSrcLeft], +leftStepx2 ;// tVal6 = pSrcLeft[0]
+ M_LDRB tVal7, [pSrcLeft2],+leftStepx2 ;// tVal7 = pSrcLeft[1]
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[2]
+ M_LDRB tVal9, [pSrcLeft2],+leftStepx2 ;// tVal9 = pSrcLeft[3]
+ MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes
+ STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3]
+ STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7]
+ M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7]
+ STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3]
+ STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7]
+ M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7]
+ M_LDRB tVal6, [pSrcLeft], +leftStepx2 ;// tVal6 = pSrcLeft[4]
+ M_LDRB tVal7, [pSrcLeft2],+leftStepx2 ;// tVal7 = pSrcLeft[5]
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[6]
+ M_LDRB tVal9, [pSrcLeft2],+leftStepx2 ;// tVal9 = pSrcLeft[7]
+ MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes
+ MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes
+ STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3]
+ STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7]
+ M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7]
+ STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3]
+ STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7]
+ M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+OMX_VC_CHROMA_VERT
+
+ ;// M_STALL ARM1136JS=4
+
+ LDMIA pSrcAbove, {tVal6,tVal7} ;// tVal 6 to 7 = pSrcAbove[0 to 7]
+ MOV return, #OMX_Sts_NoErr
+
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7
+ M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7
+
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_CHROMA_PLANE
+
+ ;// M_STALL ARM1136JS=3
+
+ RSB tVal14, leftStep, leftStep, LSL #3 ;// 7*leftStep
+ LDRB tVal7, [pSrcAbove, #+7] ;// pSrcAbove[7]
+ LDRB tVal6, [pSrcLeft, +tVal14] ;// pSrcLeft[7*leftStep]
+ LDRB tVal8, [pSrcAboveLeft] ;// pSrcAboveLeft[0]
+ LDRB tVal9, [pSrcAbove, #+6 ] ;// pSrcAbove[6]
+ LDRB tVal10,[pSrcAbove] ;// pSrcAbove[0]
+ ADD tVal2, tVal7, tVal6 ;// pSrcAbove[7] + pSrcLeft[7*leftStep]
+ SUB tVal6, tVal6, tVal8 ;// V0 = pSrcLeft[7*leftStep] - pSrcAboveLeft[0]
+ SUB tVal7, tVal7, tVal8 ;// H0 = pSrcAbove[7] - pSrcAboveLeft[0]
+ LSL tVal2, tVal2, #4 ;// a = 16 * (pSrcAbove[15] + pSrcLeft[15*lS])
+ ADD tVal2, tVal2, #16 ;// a + 16
+ SUB tVal9, tVal9,tVal10 ;// pSrcAbove[6] - pSrcAbove[0]
+ LDRB tVal8, [pSrcAbove,#+5] ;// pSrcAbove[5]
+ LDRB tVal10,[pSrcAbove,#+1] ;// pSrcAbove[1]
+ ADD tVal9, tVal9, tVal9, LSL #1 ;// H1 = 3 * (pSrcAbove[6] - pSrcAbove[0])
+ ADD tVal7, tVal9, tVal7, LSL #2 ;// H = H1 + H0
+ SUB tVal8, tVal8, tVal10 ;// pSrcAbove[5] - pSrcAbove[1]
+ LDRB tVal9, [pSrcAbove,#+4] ;// pSrcAbove[4]
+ LDRB tVal10,[pSrcAbove,#+2] ;// pSrcAbove[2]
+ ADD tVal7, tVal7, tVal8, LSL #1 ;// H = H + H2
+ SUB tVal11, tVal14,leftStep ;// 6*leftStep
+ ADD tVal11, pSrcLeft, tVal11 ;// pSrcLeft + 6*leftStep
+ MOV tVal12, pSrcLeft ;// pSrcLeft
+ SUB tVal9, tVal9, tVal10 ;// pSrcAbove[4] - pSrcAbove[2]
+ ADD tVal7, tVal7, tVal9 ;// H = H + H3
+ M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[6*leftStep]
+ M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[0]
+ ADD tVal7, tVal7, tVal7, LSL #4 ;// 17 * H
+ ADD tVal7, tVal7, #16 ;// 17 * H + 16
+ SUB tVal8, tVal8, tVal10 ;// pSrcLeft[6*leftStep] - pSrcLeft[0]
+ ASR b, tVal7, #5 ;// b = (17 * H + 16) >> 5
+ ADD tVal8, tVal8, tVal8, LSL #1 ;// V1 = 3 * (pSrcLeft[6*leftStep] - pSrcLeft[0])
+ ADD tVal6, tVal8, tVal6, LSL #2 ;// V = V0 +V1
+ M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[5*leftStep]
+ M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[leftStep]
+ ADD tVal7, b, b, LSL #1 ;// 3*b
+ SUB tVal2, tVal2, tVal7 ;// a + 16 - 3*b
+ SUB tVal7, tVal8, tVal10 ;// pSrcLeft[5*leftStep] - pSrcLeft[leftStep]
+ M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[4*leftStep]
+ M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[2*leftStep]
+ ADD tVal6, tVal6, tVal7, LSL #1 ;// V = V + V2
+ LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF
+ SUB tVal7, tVal8, tVal10 ;// pSrcLeft[4*leftStep] - pSrcLeft[2*leftStep]
+ ADD tVal6, tVal6, tVal7 ;// V = V + V7
+ SUB dstStep, dstStep, #4 ;// dstStep - 4
+ ADD tVal6, tVal6, tVal6, LSL #4 ;// 17*V
+ ADD tVal6, tVal6, #16 ;// 17*V + 16
+
+ ;// M_STALL ARM1136JS=1
+
+ ASR c, tVal6, #5 ;// c = (17*V + 16)>>5
+
+ ;// M_STALL ARM1136JS=1
+
+ ADD tVal6, c, c, LSL #1 ;// 3*c
+ UXTH c, c ;// only in half word
+ SUB tVal6, tVal2, tVal6 ;// a - 3*b - 3*c + 16
+ ORR c, c, c, LSL #16 ;// c c
+ ADD tVal7, b, b ;// 2b
+ ADD tVal2, tVal6, tVal7 ;// pp2 = d + 2*b
+ ADD tVal7, tVal7, b ;// 3b
+ ORR p2p0, tVal6, tVal2, LSL #16 ;// p2p0 = pack {p2, p0}
+ UXTH b, b
+ UXTH tVal7, tVal7
+ ORR b, b, b, LSL #16 ;// {b,b}
+ ORR tVal7, tVal7, tVal7, LSL #16 ;// {3b,3b}
+ SADD16 p3p1, p2p0, b ;// p3p1 = p2p0 + {b,b}
+ SADD16 p6p4, p3p1, tVal7 ;// p6p4 = p3p1 + {3b,3b}
+ SADD16 p7p5, p6p4, b ;// p7p5 = p6p4 + {b,b}
+ MOV outerCount, #BLK_SIZE ;// Outer Loop Count
+
+LOOP_PLANE
+
+ USAT16 p7p5, #13, p7p5 ;// clip13(p7) clip13(p5)
+ USAT16 p6p4, #13, p6p4 ;// clip13(p6) clip13(p4)
+ USAT16 p3p1, #13, p3p1 ;// clip13(p3) clip13(p1)
+ USAT16 p2p0, #13, p2p0 ;// clip13(p2) clip13(p0)
+
+ AND pp7pp5, r0x00FF00FF, p7p5, ASR #5 ;// clip8(p7) clip8(p5)
+ AND pp6pp4, r0x00FF00FF, p6p4, ASR #5 ;// clip8(p6) clip8(p4)
+ AND pp3pp1, r0x00FF00FF, p3p1, ASR #5 ;// clip8(p3) clip8(p1)
+ AND pp2pp0, r0x00FF00FF, p2p0, ASR #5 ;// clip8(p2) clip8(p0)
+
+ SUBS outerCount, outerCount, #1 ;// outerCount--
+
+ ORR p3210, pp2pp0, pp3pp1, LSL #8 ;// pack {p3,p2, p1, p0}
+ STR p3210, [pDst], #4 ;// store {pDst[0] to pDst[3]}
+
+ ORR p7654, pp6pp4, pp7pp5, LSL #8 ;// pack {p7,p6, p5, p4}
+ M_STR p7654, [pDst], dstStep ;// store {pDst[4] to pDst[7]}
+
+ SADD16 p7p5, p7p5, c ;// {p7 + c}, {p5 + c}
+ SADD16 p6p4, p6p4, c ;// {p6 + c}, {p4 + c}
+ SADD16 p3p1, p3p1, c ;// {p3 + c}, {p1 + c}
+ SADD16 p2p0, p2p0, c ;// {p2 + c}, {p0 + c}
+
+ BNE LOOP_PLANE ;// Loop for 8 times
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// ARM1136JS
+
+
+
+ END
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntraChroma_8x8 ends
+;//-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
new file mode 100644
index 0000000..1557208
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s
@@ -0,0 +1,501 @@
+;//
+;//
+;// File Name: omxVCM4P10_PredictIntra_16x16_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_pIndexTable16x16
+ DCD OMX_VC_16X16_VERT, OMX_VC_16X16_HOR
+ DCD OMX_VC_16X16_DC, OMX_VC_16X16_PLANE
+
+ IF ARM1136JS
+
+;//--------------------------------------------
+;// Constants
+;//--------------------------------------------
+BLK_SIZE EQU 0x10
+MUL_CONST0 EQU 0x01010101
+MUL_CONST1 EQU 0x00060004
+MUL_CONST2 EQU 0x00070005
+MUL_CONST3 EQU 0x00030001
+MASK_CONST EQU 0x00FF00FF
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+y RN 12
+pc RN 15
+
+return RN 0
+innerCount RN 0
+outerCount RN 1
+pSrcLeft2 RN 1
+pDst2 RN 2
+sum RN 6
+pTable RN 9
+temp1 RN 10
+temp2 RN 12
+cMul1 RN 11
+cMul2 RN 12
+count RN 12
+dstStepx2 RN 11
+leftStepx2 RN 14
+r0x01010101 RN 10
+r0x00FF00FF RN 11
+
+tVal0 RN 0
+tVal1 RN 1
+tVal2 RN 2
+tVal3 RN 3
+tVal4 RN 4
+tVal5 RN 5
+tVal6 RN 6
+tVal7 RN 7
+tVal8 RN 8
+tVal9 RN 9
+tVal10 RN 10
+tVal11 RN 11
+tVal12 RN 12
+tVal14 RN 14
+
+b RN 12
+c RN 14
+
+p2p0 RN 0
+p3p1 RN 1
+p6p4 RN 2
+p7p5 RN 4
+p10p8 RN 6
+p11p9 RN 7
+p14p12 RN 8
+p15p13 RN 9
+
+p3210 RN 10
+p7654 RN 10
+p111098 RN 10
+p15141312 RN 10
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+pSrcLeft RN 0 ;// input pointer
+pSrcAbove RN 1 ;// input pointer
+pSrcAboveLeft RN 2 ;// input pointer
+pDst RN 3 ;// output pointer
+leftStep RN 4 ;// input variable
+dstStep RN 5 ;// input variable
+predMode RN 6 ;// input variable
+availability RN 7 ;// input variable
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_16x16 starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START omxVCM4P10_PredictIntra_16x16, r11
+
+ ;// Define stack arguments
+ M_ARG LeftStep, 4
+ M_ARG DstStep, 4
+ M_ARG PredMode, 4
+ M_ARG Availability, 4
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case
+
+ ;// Load argument from the stack
+ M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg
+ M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg
+ M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg
+ M_LDR availability, Availability ;// Arg availability loaded from stack to reg
+
+ MOV y, #BLK_SIZE ;// Outer Loop Count
+ LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode
+
+OMX_VC_16X16_VERT
+ LDM pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15]
+ ADD dstStepx2, dstStep, dstStep ;// double dstStep
+ ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep
+
+ ;// M_STALL ARM1136JS=2 ;// Stall outside the loop
+
+LOOP_VERT
+ STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9
+ SUBS y, y, #2 ;// y--
+ ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep
+ STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9
+ ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep
+ BNE LOOP_VERT ;// Loop for 8 times
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+
+OMX_VC_16X16_HOR
+
+ ;// M_STALL ARM1136JS=6
+
+ LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times
+ MOV y, #4 ;// Outer Loop Count
+ M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3]
+ ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep
+ M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal1 = pSrcLeft[4 to 7]
+ ADD dstStepx2, dstStep, dstStep ;// double dstStep
+ SUB dstStepx2, dstStepx2, #12 ;// double dstStep minus 12
+
+LOOP_HOR
+ M_LDRB tVal8, [pSrcLeft], +leftStep ;// tVal8 = pSrcLeft[0 to 3]
+ MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes
+ M_LDRB tVal9, [pSrcLeft], +leftStep ;// tVal9 = pSrcLeft[4 to 7]
+ MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes
+ SUBS y, y, #1 ;// y--
+ STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3]
+ STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7]
+ STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7]
+ MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes
+ STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11]
+ STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11]
+ MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes
+ M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15]
+ M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15]
+ STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3]
+ STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3]
+ STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7]
+ STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7]
+ STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11]
+ STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11]
+ M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15]
+ M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3]
+ M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15]
+ M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal7 = pSrcLeft[4 to 7]
+ BNE LOOP_HOR ;// Loop for 3 times
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+OMX_VC_16X16_DC
+
+ ;// M_STALL ARM1136JS=2
+
+ MOV count, #0 ;// count = 0
+ TST availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER)
+ BEQ TST_LEFT ;// Jump to Left if not upper
+ LDM pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15]
+ ADD count, count, #1 ;// if upper inc count by 1
+
+ ;// M_STALL ARM1136JS=2
+
+ UXTB16 tVal2, tVal8 ;// pSrcAbove[0, 2]
+ UXTB16 tVal6, tVal9 ;// pSrcAbove[4, 6]
+ UADD16 tVal2, tVal2, tVal6 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6]
+ UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3]
+ UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7]
+ UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7]
+ UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[7])
+
+ UXTB16 tVal8, tVal10 ;// pSrcAbove[8, 10]
+ UXTB16 tVal9, tVal11 ;// pSrcAbove[12, 14]
+ UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14]
+ UXTB16 tVal10, tVal10, ROR #8 ;// pSrcAbove[9, 11]
+ UXTB16 tVal11, tVal11, ROR #8 ;// pSrcAbove[13, 15]
+ UADD16 tVal10, tVal10, tVal11 ;// pSrcAbove[9, 11] + pSrcAbove[13, 15]
+ UADD16 tVal8, tVal8, tVal10 ;// sum(pSrcAbove[8] to pSrcAbove[15])
+
+ UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[15])
+
+ ;// M_STALL ARM1136JS=1
+
+ ADD tVal2, tVal2, tVal2, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[15])
+
+ ;// M_STALL ARM1136JS=1
+
+ UXTH sum, tVal2 ;// Extract the lower half for result
+
+TST_LEFT
+ TST availability, #OMX_VC_LEFT
+ BEQ TST_COUNT
+ ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep
+ ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2]
+ M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3]
+ ADD tVal7, tVal8, tVal9 ;// tVal7 = tVal8 + tVal9
+ ADD count, count, #1 ;// Inc Counter if Left is available
+ ADD tVal6, tVal10, tVal11 ;// tVal6 = tVal10 + tVal11
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2]
+ M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3]
+ ADD sum, tVal7, tVal6 ;// sum = tVal8 + tVal10
+ ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9
+ ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11
+ ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10
+
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2]
+ M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3]
+ ADD sum, sum, tVal7 ;// sum = sum + tVal7
+ ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9
+ ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11
+ ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10
+
+
+ M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0]
+ M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1]
+ M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2]
+ M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3]
+ ADD sum, sum, tVal7 ;// sum = sum + tVal7
+ ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9
+ ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11
+ ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10
+ ADD sum, sum, tVal7 ;// sum = sum + tVal7
+
+TST_COUNT
+ CMP count, #0 ;// if(count == 0)
+ MOVEQ sum, #128 ;// sum = 128 if(count == 0)
+ BEQ TST_COUNT0 ;// if(count == 0)
+ CMP count, #1 ;// if(count == 1)
+ ADDEQ sum, sum, #8 ;// sum += 8 if(count == 1)
+ ADDNE sum, sum, tVal2 ;// sum = sumleft + sumupper
+ ADDNE sum, sum, #16 ;// sum += 16 if(count == 2)
+
+ ;// M_STALL ARM1136JS=1
+
+ UXTH sum, sum ;// sum only byte rest cleared
+
+ ;// M_STALL ARM1136JS=1
+
+ LSREQ sum, sum, #4 ;// sum >> 4 if(count == 1)
+
+ ;// M_STALL ARM1136JS=1
+
+ LSRNE sum, sum, #5 ;// sum >> 5 if(count == 2)
+
+TST_COUNT0
+
+ ;// M_STALL ARM1136JS=1
+
+ ORR sum, sum, sum, LSL #8 ;// sum replicated in two halfword
+
+ ;// M_STALL ARM1136JS=1
+
+ ORR tVal6, sum, sum, LSL #16 ;// sum replicated in all bytes
+ CPY tVal7, tVal6 ;// tVal1 = tVal0
+ CPY tVal8, tVal6 ;// tVal2 = tVal0
+ CPY tVal9, tVal6 ;// tVal3 = tVal0
+ ADD dstStepx2, dstStep, dstStep ;// double dstStep
+ ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep
+ MOV y, #BLK_SIZE ;// Outer Loop Count
+
+LOOP_DC
+ STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9
+ SUBS y, y, #2 ;// y--
+ ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep
+ STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9
+ ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep
+ BNE LOOP_DC ;// Loop for 8 times
+
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT
+
+OMX_VC_16X16_PLANE
+
+ ;// M_STALL ARM1136JS=3
+ RSB tVal14, leftStep, leftStep, LSL #4 ;// tVal14 = 15*leftStep
+
+ ;// M_STALL ARM1136JS=2
+ LDRB tVal10, [pSrcLeft, tVal14] ;// tVal10 = pSrcLeft[15*leftStep]
+ LDRB tVal11, [pSrcAboveLeft] ;// tVal11 = pSrcAboveLeft[0]
+ LDRB tVal12, [pSrcAbove, #15]
+
+ ADD tVal2, tVal12, tVal10 ;// tVal2 = pSrcAbove[15] + pSrcLeft[15*leftStep]
+ SUB tVal10, tVal10, tVal11 ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0]
+ SUB tVal11, tVal12, tVal11 ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0]
+ MOV tVal2, tVal2, LSL #4 ;// tVal2 = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep])
+
+ MOV tVal11, tVal11, LSL #3 ;// 8*[15]-[-1]
+ LDRB tVal6, [pSrcAbove, #0]
+ LDRB tVal7, [pSrcAbove, #14]
+ SUB tVal8, tVal7, tVal6
+ RSB tVal8, tVal8, tVal8, LSL #3 ;// 7*[14]-[0]
+ ADD tVal11, tVal11, tVal8
+ LDRB tVal6, [pSrcAbove, #1]
+ LDRB tVal7, [pSrcAbove, #13]
+ SUB tVal8, tVal7, tVal6
+ ADD tVal8, tVal8, tVal8
+ ADD tVal8, tVal8, tVal8, LSL #1 ;// 6*[13]-[1]
+ ADD tVal11, tVal11, tVal8
+ LDRB tVal6, [pSrcAbove, #2]
+ LDRB tVal7, [pSrcAbove, #12]
+ SUB tVal8, tVal7, tVal6
+ ADD tVal8, tVal8, tVal8, LSL #2 ;// 5*[12]-[2]
+ ADD tVal11, tVal11, tVal8
+ LDRB tVal6, [pSrcAbove, #3]
+ LDRB tVal7, [pSrcAbove, #11]
+ SUB tVal8, tVal7, tVal6
+ ADD tVal11, tVal11, tVal8, LSL #2 ;// + 4*[11]-[3]
+ LDRB tVal6, [pSrcAbove, #4]
+ LDRB tVal7, [pSrcAbove, #10]
+ SUB tVal8, tVal7, tVal6
+ ADD tVal8, tVal8, tVal8, LSL #1 ;// 3*[10]-[4]
+ ADD tVal11, tVal11, tVal8
+ LDRB tVal6, [pSrcAbove, #5]
+ LDRB tVal7, [pSrcAbove, #9]
+ SUB tVal8, tVal7, tVal6
+ ADD tVal11, tVal11, tVal8, LSL #1 ;// + 2*[9]-[5]
+ LDRB tVal6, [pSrcAbove, #6]
+ LDRB tVal7, [pSrcAbove, #8]
+ SUB tVal8, tVal7, tVal6 ;// 1*[8]-[6]
+ ADD tVal7, tVal11, tVal8
+
+ ADD tVal2, tVal2, #16 ;// tVal2 = a + 16
+ MOV tVal1, pSrcLeft ;// tVal4 = pSrcLeft
+ SUB tVal9, tVal14, leftStep ;// tVal9 = 14*leftStep
+ ADD tVal9, pSrcLeft, tVal9 ;// tVal9 = pSrcLeft + 14*leftStep
+
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[14*leftStep]
+ M_LDRB tVal11, [tVal1], +leftStep ;// tVal11 = pSrcLeft[0]
+ ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * H
+ ADD tVal7, tVal7, #32 ;// tVal7 = 5 * H + 32
+ SUB tVal8, tVal8, tVal11 ;// tVal8 = pSrcLeft[14*leftStep] - pSrcLeft[0]
+ ASR tVal12, tVal7, #6 ;// tVal12 = b = (5 * H + 32) >> 6
+
+ RSB tVal8, tVal8, tVal8, LSL #3 ;// tVal8 = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0])
+ ADD tVal6, tVal8, tVal10, LSL #3 ;// tVal6 = V = V0 +V1
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[13*leftStep]
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[leftStep]
+ RSB tVal7, tVal12, tVal12, LSL #3 ;// tVal7 = 7*b
+ SUB tVal2, tVal2, tVal7 ;// tVal2 = a + 16 - 7*b
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[13*leftStep] - pSrcLeft[leftStep]
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[12*lS]
+ ADD tVal7, tVal7, tVal7 ;// tVal7 = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep])
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[2*leftStep]
+ ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep])
+ ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V2
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[11*leftStep]
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[3*leftStep]
+ ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep])
+ ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V3
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep]
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[10*leftStep]
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[4*leftStep]
+ ADD tVal6, tVal6, tVal7, LSL #2 ;// tVal6 = V = V + V4
+ SUB dstStep, dstStep, #16 ;// tVal5 = dstStep - 16
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[9*leftStep]
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[5*leftStep]
+ ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep])
+ ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V5
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep]
+ M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[8*leftStep]
+ M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[6*leftStep]
+ ADD tVal6, tVal6, tVal7, LSL #1 ;// tVal6 = V = V + V6
+
+ ;// M_STALL ARM1136JS=1
+ SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep]
+ ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V7
+
+ ;// M_STALL ARM1136JS=1
+ ADD tVal6, tVal6, tVal6, LSL #2 ;// tVal6 = 5*V
+ ADD tVal6, tVal6, #32 ;// tVal6 = 5*V + 32
+
+ ;// M_STALL ARM1136JS=1
+ ASR tVal14, tVal6, #6 ;// tVal14 = c = (5*V + 32)>>6
+
+ ;// M_STALL ARM1136JS=1
+ RSB tVal6, tVal14, tVal14, LSL #3 ;// tVal6 = 7*c
+ UXTH tVal14, tVal14 ;// tVal14 = Cleared the upper half word
+ ADD tVal10, tVal12, tVal12 ;// tVal10 = 2*b
+ ORR tVal14, tVal14, tVal14, LSL #16 ;// tVal14 = {c , c}
+ SUB tVal6, tVal2, tVal6 ;// tVal6 = d = a - 7*b - 7*c + 16
+ ADD tVal1, tVal6, tVal10 ;// tVal1 = pp2 = d + 2*b
+ ADD tVal10, tVal10, tVal12 ;// tVal10 =3*b
+ ORR tVal0, tVal6, tVal1, LSL #16 ;// tval0 = p2p0 = pack {p2, p0}
+ UXTH tVal12, tVal12 ;// tVal12 = Cleared the upper half word
+ UXTH tVal10, tVal10 ;// tVal12 = Cleared the upper half word
+ ORR tVal12, tVal12, tVal12, LSL #16 ;// tVal12 = {b , b}
+ ORR tVal10, tVal10, tVal10, LSL #16 ;// tVal10 = {3b , 3b}
+ SADD16 tVal1, tVal0, tVal12 ;// tVal1 = p3p1 = p2p0 + {b,b}
+ SADD16 tVal2, tVal1, tVal10 ;// tVal2 = p6p4 = p3p1 + {3b,3b}
+ SADD16 tVal4, tVal2, tVal12 ;// tVal4 = p7p5 = p6p4 + {b,b}
+ SADD16 tVal6, tVal4, tVal10 ;// tVal6 = p10p8 = p7p5 + {3b,3b}
+ SADD16 tVal7, tVal6, tVal12 ;// tVal7 = p11p9 = p10p8 + {b,b}
+ SADD16 tVal8, tVal7, tVal10 ;// tVal8 = p14p12 = p11p9 + {3b,3b}
+ SADD16 tVal9, tVal8, tVal12 ;// tVal9 = p15p13 = p14p12 + {b,b}
+ LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF
+
+LOOP_PLANE
+
+ USAT16 temp2, #13, p3p1
+ USAT16 temp1, #13, p2p0
+ SADD16 p3p1, p3p1, c
+ SADD16 p2p0, p2p0, c
+ AND temp2, r0x00FF00FF, temp2, ASR #5
+ AND temp1, r0x00FF00FF, temp1, ASR #5
+ ORR temp1, temp1, temp2, LSL #8
+ STR temp1, [pDst], #4
+
+ USAT16 temp2, #13, p7p5
+ USAT16 temp1, #13, p6p4
+ SADD16 p7p5, p7p5, c
+ SADD16 p6p4, p6p4, c
+ AND temp2, r0x00FF00FF, temp2, ASR #5
+ AND temp1, r0x00FF00FF, temp1, ASR #5
+ ORR temp1, temp1, temp2, LSL #8
+ STR temp1, [pDst], #4
+
+ USAT16 temp2, #13, p11p9
+ USAT16 temp1, #13, p10p8
+ SADD16 p11p9, p11p9, c
+ SADD16 p10p8, p10p8, c
+ AND temp2, r0x00FF00FF, temp2, ASR #5
+ AND temp1, r0x00FF00FF, temp1, ASR #5
+ ORR temp1, temp1, temp2, LSL #8
+ STR temp1, [pDst], #4
+
+ USAT16 temp2, #13, p15p13
+ USAT16 temp1, #13, p14p12
+ SADD16 p15p13, p15p13, c
+ SADD16 p14p12, p14p12, c
+ AND temp2, r0x00FF00FF, temp2, ASR #5
+ AND temp1, r0x00FF00FF, temp1, ASR #5
+ ORR temp1, temp1, temp2, LSL #8
+ STR temp1, [pDst], #4
+
+ ADDS r0x00FF00FF, r0x00FF00FF, #1<<28 ;// Loop counter value in top 4 bits
+
+ ADD pDst, pDst, dstStep
+
+ BCC LOOP_PLANE ;// Loop for 16 times
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// ARM1136JS
+
+
+ END
+;-----------------------------------------------------------------------------------------------
+; omxVCM4P10_PredictIntra_16x16 ends
+;-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
new file mode 100644
index 0000000..a90f460
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s
@@ -0,0 +1,567 @@
+;//
+;//
+;// File Name: omxVCM4P10_PredictIntra_4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Define the processor variants supported by this file
+
+ M_VARIANTS ARM1136JS
+
+;//-------------------------------------------------------
+;// This table for implementing switch case of C in asm by
+;// the mehtod of two levels of indexing.
+;//-------------------------------------------------------
+
+ M_TABLE armVCM4P10_pSwitchTable4x4
+ DCD OMX_VC_4x4_VERT, OMX_VC_4x4_HOR
+ DCD OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL
+ DCD OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR
+ DCD OMX_VC_4x4_HD, OMX_VC_4x4_VL
+ DCD OMX_VC_4x4_HU
+
+ IF ARM1136JS
+
+;//--------------------------------------------
+;// Constants
+;//--------------------------------------------
+BLK_SIZE EQU 0x8
+MUL_CONST0 EQU 0x01010101
+ADD_CONST1 EQU 0x80808080
+
+;//--------------------------------------------
+;// Scratch variable
+;//--------------------------------------------
+return RN 0
+pTable RN 9
+pc RN 15
+r0x01010101 RN 1
+r0x80808080 RN 0
+
+tVal0 RN 0
+tVal1 RN 1
+tVal2 RN 2
+tVal4 RN 4
+tVal6 RN 6
+tVal7 RN 7
+tVal8 RN 8
+tVal9 RN 9
+tVal10 RN 10
+tVal11 RN 11
+tVal12 RN 12
+tVal14 RN 14
+
+Out0 RN 6
+Out1 RN 7
+Out2 RN 8
+Out3 RN 9
+
+Left0 RN 6
+Left1 RN 7
+Left2 RN 8
+Left3 RN 9
+
+Above0123 RN 12
+Above4567 RN 14
+
+AboveLeft RN 10
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+pSrcLeft RN 0 ;// input pointer
+pSrcAbove RN 1 ;// input pointer
+pSrcAboveLeft RN 2 ;// input pointer
+pDst RN 3 ;// output pointer
+leftStep RN 4 ;// input variable
+dstStep RN 5 ;// input variable
+predMode RN 6 ;// input variable
+availability RN 7 ;// input variable
+
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_4x4 starts
+;//-----------------------------------------------------------------------------------------------
+
+ ;// Write function header
+ M_START omxVCM4P10_PredictIntra_4x4, r11
+
+ ;// Define stack arguments
+ M_ARG LeftStep, 4
+ M_ARG DstStep, 4
+ M_ARG PredMode, 4
+ M_ARG Availability, 4
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR pTable,=armVCM4P10_pSwitchTable4x4 ;// Load index table for switch case
+
+ ;// Load argument from the stack
+ M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg
+ M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg
+ M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg
+ M_LDR availability, Availability ;// Arg availability loaded from stack to reg
+
+ LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode
+
+OMX_VC_4x4_VERT
+
+ LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3]
+ M_STR Above0123, [pDst], dstStep ;// pDst[0 to 3] = Above0123
+ M_STR Above0123, [pDst], dstStep ;// pDst[4 to 7] = Above0123
+ M_STR Above0123, [pDst], dstStep ;// pDst[8 to 11] = Above0123
+ STR Above0123, [pDst] ;// pDst[12 to 15] = Above0123
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_HOR
+
+ ;// M_STALL ARM1136JS=6
+
+ LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0]
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1]
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2]
+ LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3]
+ MUL Out0, Left0, r0x01010101 ;// replicate the val in all the bytes
+ MUL Out1, Left1, r0x01010101 ;// replicate the val in all the bytes
+ MUL Out2, Left2, r0x01010101 ;// replicate the val in all the bytes
+ MUL Out3, Left3, r0x01010101 ;// replicate the val in all the bytes
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ]
+ M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11]
+ STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_DC
+
+ ;// M_STALL ARM1136JS=6
+
+ AND availability, availability, #(OMX_VC_UPPER + OMX_VC_LEFT)
+ CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT)
+ BNE UpperOrLeftOrNoneAvailable ;// Jump to Upper if not both
+ LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3]
+
+ ;// M_STALL ARM1136JS=1
+
+ UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2]
+ UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3]
+ UADD16 tVal11, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0]
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1]
+ ADD tVal11, tVal11, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3])
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2]
+ LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3]
+ UXTH tVal11, tVal11 ;// upsum1 (Clear the top junk bits)
+ ADD tVal6, Left0, Left1 ;// tVal6 = Left0 + Left1
+ ADD tVal7, Left2, Left3 ;// tVal7 = Left2 + Left3
+ ADD tVal6, tVal6, tVal7 ;// tVal6 = tVal6 + tVal7
+ ADD Out0, tVal6, tVal11 ;// Out0 = tVal6 + tVal11
+ ADD Out0, Out0, #4 ;// Out0 = Out0 + 4
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ MOV Out0, Out0, LSR #3 ;// Out0 = (Out0 + 4)>>3
+
+ ;// M_STALL ARM1136JS=1
+
+ MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV return, #OMX_Sts_NoErr
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11]
+ STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15]
+ M_EXIT ;// Macro to exit midway-break frm case
+
+UpperOrLeftOrNoneAvailable
+ ;// M_STALL ARM1136JS=3
+
+ CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER)
+ BNE LeftOrNoneAvailable ;// Jump to Left if not upper
+ LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3]
+
+ ;// M_STALL ARM1136JS=3
+
+ UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2]
+ UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3]
+ UADD16 Out0, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3]
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ ADD Out0, Out0, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3])
+
+ ;// M_STALL ARM1136JS=1
+
+ UXTH Out0, Out0 ;// upsum1 (Clear the top junk bits)
+ ADD Out0, Out0, #2 ;// Out0 = Out0 + 2
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2
+
+ ;// M_STALL ARM1136JS=1
+
+ MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV return, #OMX_Sts_NoErr
+ M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [0 to 3 ]
+ M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [4 to 7 ]
+ M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [8 to 11]
+ STR Out0, [pDst] ;// store {tVal6} at pDst [12 to 15]
+
+ M_EXIT ;// Macro to exit midway-break frm case
+
+LeftOrNoneAvailable
+ ;// M_STALL ARM1136JS=3
+
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ CMP availability, #OMX_VC_LEFT
+ BNE NoneAvailable
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0]
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1]
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2]
+ LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3]
+ ADD Out0, Left0, Left1 ;// Out0 = Left0 + Left1
+
+ ;// M_STALL ARM1136JS=1
+
+ ADD Out1, Left2, Left3 ;// Out1 = Left2 + Left3
+ ADD Out0, Out0, Out1 ;// Out0 = Out0 + Out1
+ ADD Out0, Out0, #2 ;// Out0 = Out0 + 2
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2
+
+ ;// M_STALL ARM1136JS=1
+
+ MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV return, #OMX_Sts_NoErr
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11]
+ STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15]
+ M_EXIT ;// Macro to exit midway-break frm case
+
+NoneAvailable
+ MOV Out0, #128 ;// Out0 = 128 if(count == 0)
+
+ ;// M_STALL ARM1136JS=5
+
+ MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes
+
+ ;// M_STALL ARM1136JS=1
+
+ MOV return, #OMX_Sts_NoErr
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ]
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11]
+ STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15]
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_DIAG_DL
+
+ ;//------------------------------------------------------------------
+ ;// f = (a+2*b+c+2)>>2
+ ;// Calculate as:
+ ;// d = (a + c )>>1
+ ;// e = (d - b')>>1
+ ;// f = e + 128
+ ;//------------------------------------------------------------------
+
+ ;// M_STALL ARM1136JS=3
+
+ TST availability, #OMX_VC_UPPER_RIGHT
+ LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7]
+ LDR r0x80808080, =ADD_CONST1 ;// 0x80808080
+ BNE DLUpperRightAvailable
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ MOV tVal7, Above0123, LSR #24 ;// {00, 00, 00, U3 }
+ MOV tVal11, tVal7, LSL #24 ;// {U3, 00, 00, 00 }
+ MUL Out3, tVal7, r0x01010101 ;// {U3, U3, U3, U3 }
+ MOV tVal8, Above0123, LSR #16 ;// {00, 00, U3, U2 }
+ MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 }
+ MVN tVal10, tVal10 ;// {00', U3', U2', U1'}
+ UHADD8 tVal8, tVal8, Above0123 ;// {xx, xx, d1, d0 }
+ UHADD8 tVal6, Above0123, tVal9 ;// {xx, d2, xx, xx }
+ UHSUB8 tVal8, tVal8, tVal10 ;// {xx, xx, e1, e0 }
+ UHSUB8 tVal6, tVal6, tVal10 ;// {xx, e2, xx, xx }
+ UADD8 tVal8, tVal8, r0x80808080 ;// {xx, xx, f1, f0 }
+ UADD8 tVal6, tVal6, r0x80808080 ;// {xx, f2, xx, xx }
+
+ ;// M_STALL ARM1136JS=1
+
+ PKHBT tVal6, tVal8, tVal6 ;// {xx, f2, f1, f0 }
+ BIC tVal6, tVal6, #0xFF000000 ;// {00, f2, f1, f0 }
+ ORR Out0, tVal6, tVal11 ;// {U3, f2, f1, f0 }
+
+ ;// M_STALL ARM1136JS=1
+
+ PKHTB Out1, Out3, Out0, ASR #8 ;// {U3, U3, f2, f1 }
+ MOV return, #OMX_Sts_NoErr
+ PKHTB Out2, Out3, Out1, ASR #8 ;// {U3, U3, U3, f2 }
+
+ M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ]
+ M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ]
+ M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ]
+ STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12]
+ M_EXIT ;// Macro to exit midway-break frm case
+
+DLUpperRightAvailable
+
+ MOV tVal8, Above0123, LSR #24 ;// {00, 00, 00, U3 }
+ MOV tVal9, Above0123, LSR #16 ;// {00, 00, U3, U2 }
+ MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 }
+ ORR tVal8, tVal8, Above4567, LSL #8 ;// {U6, U5, U4, U3 }
+ ORR tVal10, tVal10, Above4567, LSL #24 ;// {U4, U3, U2, U1 }
+ PKHBT tVal9, tVal9, Above4567, LSL #16 ;// {U5, U4, U3, U2 }
+ MVN tVal1, tVal8 ;// {U6', U5', U4', U3'}
+ MVN tVal10, tVal10 ;// {U4', U3', U2', U1'}
+ MVN tVal2, Above4567 ;// {U7', U6', U5', U4'}
+ UHADD8 tVal6, Above0123, tVal9 ;// {d3, d2, d1, d0 }
+ UHADD8 tVal9, tVal9, Above4567 ;// {d5, d4, d3, d2 }
+ UHADD8 tVal8, Above4567, tVal8 ;// {d6, xx, xx, xx }
+ UHSUB8 tVal6, tVal6, tVal10 ;// {e3, e2, e1, e0 }
+ UHSUB8 tVal12, tVal9, tVal1 ;// {e5, e4, e3, e2 }
+ UHSUB8 tVal8, tVal8, tVal2 ;// {e6, xx, xx, xx }
+ UADD8 Out0, tVal6, r0x80808080 ;// {f3, f2, f1, f0 }
+ UADD8 tVal9, tVal8, r0x80808080 ;// {f6, xx, xx, xx }
+ UADD8 Out2, tVal12, r0x80808080 ;// {f5, f4, f3, f2 }
+ MOV tVal7, Out0, LSR #8 ;// {00, f3, f2, f1 }
+ AND tVal9, tVal9, #0xFF000000 ;// {f6, 00, 00, 00 }
+ PKHBT Out1, tVal7, Out2, LSL #8 ;// {f4, f3, f2, f1 }
+ ORR Out3, tVal9, Out2, LSR #8 ;// {f6, f5, f4, f3 }
+ M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ]
+ M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ]
+ M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ]
+ STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+
+OMX_VC_4x4_DIAG_DR
+
+ ;// M_STALL ARM1136JS=4
+
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0]
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1]
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2]
+ LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3]
+ LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = pSrcAboveLeft[0]
+ ORR tVal7, Left1, Left0, LSL #8 ;// tVal7 = 00 00 L0 L1
+ LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0
+ LDR r0x80808080, =ADD_CONST1 ;// 0x80808080
+ ORR tVal8, Left3, Left2, LSL #8 ;// tVal8 = 00 00 L2 L3
+ PKHBT tVal7, tVal8, tVal7, LSL #16 ;// tVal7 = L0 L1 L2 L3
+ MOV tVal8, Above0123, LSL #8 ;// tVal8 = U2 U1 U0 00
+ MOV tVal9, tVal7, LSR #8 ;// tVal9 = 00 L0 L1 L2
+ ORR tVal8, tVal8, AboveLeft ;// tVal8 = U2 U1 U0 UL
+ ORR tVal9, tVal9, AboveLeft, LSL #24 ;// tVal9 = UL L0 L1 L2
+ MOV tVal10, Above0123, LSL #24 ;// tVal10= U0 00 00 00
+ UXTB tVal11, tVal7, ROR #24 ;// tVal11= 00 00 00 L0
+ ORR tVal10, tVal10, tVal9, LSR #8 ;// tVal10= U0 UL L0 L1
+ ORR tVal11, tVal11, tVal8, LSL #8 ;// tVal11= U1 U0 UL L0
+ UHADD8 tVal11, Above0123, tVal11 ;// tVal11= d1 d0 dL g0
+ UHADD8 tVal10, tVal7, tVal10 ;// tVal10= g0 g1 g2 g3
+ MVN tVal8, tVal8 ;// tVal8 = U2'U1'U0'UL'
+ MVN tVal9, tVal9 ;// tVal9 = UL'L0'L1'L2'
+ UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= e1 e0 eL h0
+ UHSUB8 tVal10, tVal10, tVal9 ;// tVal10= h0 h1 h2 h3
+ UADD8 Out3, tVal10, r0x80808080 ;// Out3 = i0 i1 i2 i3
+ UADD8 Out0, tVal11, r0x80808080 ;// Out0 = f1 f0 fL i0
+ UXTH tVal11, Out3, ROR #8 ;// tVal11= 00 00 i1 i2
+ MOV tVal7, Out0, LSL #8 ;// tVal7 = f0 fL i0 00
+ ORR Out1, tVal7, tVal11, LSR #8 ;// Out1 = f0 fL i0 i1
+ PKHBT Out2, tVal11, Out0, LSL #16 ;// Out2 = fL i0 i1 i2
+ M_STR Out0, [pDst], dstStep ;// store {f1 to i0} at pDst[3 to 0 ]
+ M_STR Out1, [pDst], dstStep ;// store {f0 to i1} at pDst[7 to 4 ]
+ M_STR Out2, [pDst], dstStep ;// store {fL to i2} at pDst[11 to 8 ]
+ STR Out3, [pDst] ;// store {i0 to i3} at pDst[15 to 12]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_VR
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0
+ LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1
+ LDRB Left2, [pSrcLeft] ;// Left2 = 00 00 00 L2
+ MOV tVal0, Above0123, LSL #8 ;// tVal0 = U2 U1 U0 00
+ MOV tVal9, Above0123 ;// tVal9 = U3 U2 U1 U0
+ ORR tVal14, tVal0, AboveLeft ;// tVal14 = U2 U1 U0 UL
+ MVN tVal11, tVal14 ;// tVal11 = U2'U1'U0'UL'
+ MOV tVal2, tVal14, LSL #8 ;// tVal2 = U1 U0 UL 00
+ UHSUB8 tVal1, Above0123, tVal11 ;// tVal1 = d2 d1 d0 dL
+ UHADD8 tVal10, AboveLeft, Left1 ;// tVal10 = 00 00 00 j1
+ MVN tVal4, Left0 ;// tVal4 = 00 00 00 L0'
+ UHSUB8 tVal4, tVal10, tVal4 ;// tVal4 = 00 00 00 k1
+ ORR tVal12, tVal0, Left0 ;// tVal12 = U2 U1 U0 L0
+ ORR tVal14, tVal2, Left0 ;// tVal14 = U1 U0 UL L0
+ LDR r0x80808080, =ADD_CONST1 ;// 0x80808080
+ UHADD8 tVal10, tVal9, tVal14 ;// tVal10 = g3 g2 g1 g0
+ UADD8 Out0, tVal1, r0x80808080 ;// Out0 = e2 e1 e0 eL
+ UHSUB8 tVal10, tVal10, tVal11 ;// tVal10 = h3 h2 h1 h0
+ M_STR Out0, [pDst], dstStep ;// store {e2 to eL} at pDst[3 to 0 ]
+ MOV tVal1, tVal14, LSL #8 ;// tVal1 = U0 UL L0 00
+ MOV tVal6, Out0, LSL #8 ;// tVal6 = e1 e0 eL 00
+ ORR tVal2, tVal2, Left1 ;// tVal2 = U1 U0 UL L1
+ UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 00 00 l1
+ UADD8 Out1, tVal10, r0x80808080 ;// Out1 = i3 i2 i1 i0
+ MVN tVal2, tVal2 ;// tVal14 = U1'U0'UL'L1'
+ ORR tVal1, tVal1, Left2 ;// tVal1 = U0 UL L0 L2
+ ORR Out2, tVal6, tVal4 ;// Out2 = e1 e0 eL l1
+ UHADD8 tVal1, tVal1, tVal12 ;// tVal1 = g2 g1 g0 j2
+ M_STR Out1, [pDst], dstStep ;// store {i3 to i0} at pDst[7 to 4 ]
+ M_STR Out2, [pDst], dstStep ;// store {e1 to l1} at pDst[11 to 8 ]
+ UHSUB8 tVal9, tVal1, tVal2 ;// tVal9 = h2 h1 h0 k2
+ UADD8 Out3, tVal9, r0x80808080 ;// Out3 = i2 i1 i0 l2
+ STR Out3, [pDst] ;// store {i2 to l2} at pDst[15 to 12]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_HD
+
+ ;// M_STALL ARM1136JS=4
+
+ LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0
+ LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = 00 00 00 L2
+ LDRB Left3, [pSrcLeft] ;// Left3 = 00 00 00 L3
+ LDR r0x80808080, =ADD_CONST1 ;// 0x80808080
+ ORR tVal2, AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL
+ MVN tVal1, Left0 ;// tVal1 = 00 00 00 L0'
+ ORR tVal4, Left0, tVal2, LSL #8 ;// tVal4 = U1 U0 UL L0
+ MVN tVal2, tVal2 ;// tVal2 = U2'U1'U0'UL'
+ UHADD8 tVal4, tVal4, Above0123 ;// tVal4 = g3 g2 g1 g0
+ UHSUB8 tVal1, AboveLeft, tVal1 ;// tVal1 = 00 00 00 dL
+ UHSUB8 tVal4, tVal4, tVal2 ;// tVal4 = h3 h2 h1 h0
+ UADD8 tVal1, tVal1, r0x80808080 ;// tVal1 = 00 00 00 eL
+ UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = i3 i2 i1 i0
+ ORR tVal2, Left0, AboveLeft, LSL #16 ;// tVal2 = 00 UL 00 L0
+ MOV tVal4, tVal4, LSL #8 ;// tVal4 = i2 i1 i0 00
+ ORR tVal11, Left1, Left0, LSL #16 ;// tVal11= 00 L0 00 L1
+ ORR tVal7, Left2, Left1, LSL #16 ;// tVal7 = 00 L1 00 L2
+ ORR tVal10, Left3, Left2, LSL #16 ;// tVal10= 00 L2 00 L3
+ ORR Out0, tVal4, tVal1 ;// Out0 = i2 i1 i0 eL
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ MOV tVal4, Out0, LSL #16 ;// tVal4 = i1 i0 00 00
+ UHADD8 tVal2, tVal2, tVal7 ;// tVal2 = 00 j1 00 j2
+ UHADD8 tVal6, tVal11, tVal10 ;// tVal11= 00 j2 00 j3
+ MVN tVal12, tVal11 ;// tVal12= 00 L0'00 L1'
+ MVN tVal14, tVal7 ;// tVal14= 00 L1'00 L2'
+ UHSUB8 tVal2, tVal2, tVal12 ;// tVal2 = 00 k1 00 k2
+ UHSUB8 tVal8, tVal7, tVal12 ;// tVal8 = 00 d1 00 d2
+ UHSUB8 tVal11, tVal6, tVal14 ;// tVal11= 00 k2 00 k3
+ UHSUB8 tVal9, tVal10, tVal14 ;// tVal9 = 00 d2 00 d3
+ UADD8 tVal2, tVal2, r0x80808080 ;// tVal2 = 00 l1 00 l2
+ UADD8 tVal8, tVal8, r0x80808080 ;// tVal8 = 00 e1 00 e2
+ UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 l2 00 l3
+ UADD8 tVal9, tVal9, r0x80808080 ;// tVal9 = 00 e2 00 e3
+ ORR Out2, tVal8, tVal2, LSL #8 ;// Out2 = l1 e1 l2 e2
+ ORR Out3, tVal9, tVal11, LSL #8 ;// Out3 = l2 e2 l3 e3
+ PKHTB Out1, tVal4, Out2, ASR #16 ;// Out1 = i1 i0 l1 e1
+ M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ]
+ M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11]
+ STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_VL
+
+ ;// M_STALL ARM1136JS=3
+
+ LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7]
+ TST availability, #OMX_VC_UPPER_RIGHT
+ LDR r0x80808080, =ADD_CONST1 ;// 0x80808080
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ MOV tVal11, Above0123, LSR #24 ;// tVal11= 00 00 00 U3
+ MULEQ Above4567, tVal11, r0x01010101 ;// Above4567 = U3 U3 U3 U3
+ MOV tVal9, Above0123, LSR #8 ;// tVal9 = 00 U3 U2 U1
+ MVN tVal10, Above0123 ;// tVal10= U3'U2'U1'U0'
+ ORR tVal2, tVal9, Above4567, LSL #24 ;// tVal2 = U4 U3 U2 U1
+ UHSUB8 tVal8, tVal2, tVal10 ;// tVal8 = d4 d3 d2 d1
+ UADD8 Out0, tVal8, r0x80808080 ;// Out0 = e4 e3 e2 e1
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ MOV tVal9, tVal9, LSR #8 ;// tVal9 = 00 00 U3 U2
+ MOV tVal10, Above4567, LSL #8 ;// tVal10= U6 U5 U4 00
+ PKHBT tVal9, tVal9, Above4567, LSL #16 ;// tVal9 = U5 U4 U3 U2
+ ORR tVal10, tVal10, tVal11 ;// tVal10= U6 U5 U4 U3
+ UHADD8 tVal11, tVal9, Above0123 ;// tVal11= g5 g4 g3 g2
+ UHADD8 tVal14, tVal2, tVal10 ;// tVal14= g6 g5 g4 g3
+ MVN tVal8, tVal2 ;// tVal8 = U4'U3'U2'U1'
+ MVN tVal7, tVal9 ;// tVal7 = U5'U4'U3'U2'
+ UHSUB8 tVal12, tVal9, tVal8 ;// tVal12= d5 d4 d3 d2
+ UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= h5 h4 h3 h2
+ UHSUB8 tVal2, tVal14, tVal7 ;// tVal2 = h6 h5 h4 h3
+ UADD8 Out1, tVal11, r0x80808080 ;// Out1 = i5 i4 i3 i2
+ UADD8 Out2, tVal12, r0x80808080 ;// Out2 = e5 e4 e3 e2
+ UADD8 Out3, tVal2, r0x80808080 ;// Out3 = i6 i5 i4 i3
+ M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ]
+ M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11]
+ M_STR Out3, [pDst], dstStep ;// store {Out3} at pDst [12 to 15]
+ MOV return, #OMX_Sts_NoErr
+ M_EXIT ;// Macro to exit midway-break frm case
+
+OMX_VC_4x4_HU
+
+ ;// M_STALL ARM1136JS=2
+
+ LDR r0x01010101, =MUL_CONST0 ;// 0x01010101
+ M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0]
+ M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1]
+ M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2]
+ LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3]
+ MOV r0x80808080, r0x01010101, LSL #7 ;// 0x80808080
+ ORR tVal6, Left0, Left1, LSL #16 ;// tVal6 = 00 L1 00 L0
+ ORR tVal7, Left1, Left2, LSL #16 ;// tVal7 = 00 L2 00 L1
+ ORR tVal11, Left2, Left3, LSL #16 ;// tVal11= 00 L3 00 L2
+ MUL Out3, Left3, r0x01010101 ;// Out3 = L3 L3 L3 L3
+ MVN tVal8, tVal7 ;// tVal8 = 00 L2'00 L1'
+ MVN tVal10, tVal11 ;// tVal10= 00 L3'00 L2'
+ UHADD8 tVal4, tVal6, tVal11 ;// tVal4 = 00 g3 00 g2
+ UXTB16 tVal12, Out3 ;// tVal12= 00 L3 00 L3
+ UHSUB8 tVal4, tVal4, tVal8 ;// tVal4 = 00 h3 00 h2
+ UHSUB8 tVal6, tVal6, tVal8 ;// tVal6 = 00 d2 00 d1
+ UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= 00 d3 00 d2
+ UHADD8 tVal12, tVal12, tVal7 ;// tVal12= 00 g4 00 g3
+ UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 i3 00 i2
+ UHSUB8 tVal12, tVal12, tVal10 ;// tVal12= 00 h4 00 h3
+ UADD8 tVal8, tVal6, r0x80808080 ;// tVal8 = 00 e2 00 e1
+ UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 e3 00 e2
+ UADD8 tVal12, tVal12, r0x80808080 ;// tVal12= 00 i4 00 i3
+ ORR Out0, tVal8, tVal4, LSL #8 ;// Out0 = i3 e2 i2 e1
+ ORR Out1, tVal11, tVal12, LSL #8 ;// Out1 = i4 e3 i3 e2
+ M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ]
+ PKHTB Out2, Out3, Out1, ASR #16 ;// Out2 = L3 L3 i4 e3
+ M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ]
+ M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11]
+ STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15]
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// ARM1136JS
+
+
+ END
+;//-----------------------------------------------------------------------------------------------
+;// omxVCM4P10_PredictIntra_4x4 ends
+;//-----------------------------------------------------------------------------------------------
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
new file mode 100644
index 0000000..53597a8
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s
@@ -0,0 +1,128 @@
+;//
+;//
+;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ IMPORT armVCM4P10_QPDivTable
+ IMPORT armVCM4P10_VMatrixQPModTable
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+;//--------------------------------------
+;// Declare input registers
+;//--------------------------------------
+ppSrc RN 0
+pDst RN 1
+QP RN 2
+
+;//--------------------------------
+;// Scratch variable for Unpack2x2
+;//--------------------------------
+pSrc RN 9
+Value RN 4
+Value2 RN 5
+Flag RN 6
+strOffset RN 7
+cstOffset RN 8
+
+;//--------------------------------
+;// Scratch variable
+;//--------------------------------
+r0w0 RN 3
+r0w1 RN 4
+
+c0w0 RN 5
+c1w0 RN 6
+
+return RN 0
+pQPDivTable RN 5
+pQPModTable RN 6
+Shift RN 9
+Scale RN 2
+
+Temp1 RN 3
+Temp2 RN 4
+Temp3 RN 7
+Temp4 RN 8
+
+ ;// Write function header
+ M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9
+
+
+ LDR pSrc, [ppSrc] ;// Load pSrc
+ MOV cstOffset, #31 ;// To be used in the loop, to compute offset
+
+ ;//-----------------------------------------------------------------------
+ ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero
+ ;//-----------------------------------------------------------------------
+
+ MOV Value, #0 ;// Initialize the zero value
+ MOV Value2, #0 ;// Initialize the zero value
+ LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop>
+ STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0
+
+
+unpackLoop
+ TST Flag, #0x10 ;// Computing (Flag & 0x10)
+ LDRSBNE Value2,[pSrc,#1]
+ LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access
+ AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1;
+ LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++
+ ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++
+
+ TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done
+ LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration
+ STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset>
+ BEQ unpackLoop ;// Branch to the loop beginning
+
+ LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2|
+
+
+ STR pSrc, [ppSrc] ;// Update the bitstream pointer
+
+ LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
+ LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
+
+ SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ]
+ SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ]
+
+ LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP]
+ LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP]
+
+ SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ]
+ SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ]
+
+ LSL Scale, Scale, Shift ;// Scale = Scale << Shift
+
+ SMULTB Temp2, c0w0, Scale ;// Temp2 = T(c0w0) * Scale
+ SMULTB Temp4, c1w0, Scale ;// Temp4 = T(c1w0) * Scale
+ SMULBB Temp1, c0w0, Scale ;// Temp1 = B(c0w0) * Scale
+ SMULBB Temp3, c1w0, Scale ;// Temp3 = B(c1w0) * Scale
+ MOV Temp2, Temp2, ASR #1 ;// Temp2 = Temp2 >> 1 & Temp1 = (Temp1 >> 1) << 16
+ MOV Temp4, Temp4, ASR #1 ;// Temp4 = Temp4 >> 1 & Temp3 = (Temp3 >> 1) << 16
+ PKHBT c0w0, Temp2, Temp1, LSL #15 ;// c0w0 = | Temp1 | Temp2 |
+ PKHBT c1w0, Temp4, Temp3, LSL #15 ;// c1w0 = | Temp3 | Temp4 |
+ STMIA pDst, {c0w0, c1w0} ;// Storing all the coefficients at once
+ MOV return, #OMX_Sts_NoErr
+ M_END
+
+ ENDIF ;// ARM1136JS
+
+
+
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
new file mode 100644
index 0000000..73caec2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s
@@ -0,0 +1,469 @@
+;//
+;//
+;// File Name: omxVCM4P10_TransformDequantLumaDCFromPair_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;// H.264 inverse quantize and transform module
+;//
+;//
+
+;// Include standard headers
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Import/Export symbols required from/to other files
+;// (For example tables)
+
+ IMPORT armVCM4P10_UnpackBlock4x4
+ IMPORT armVCM4P10_QPDivTable
+ IMPORT armVCM4P10_VMatrixQPModTable
+
+ M_VARIANTS ARM1136JS
+
+;// Set debugging level
+;//DEBUG_ON SETL {TRUE}
+
+
+;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4
+
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+
+;//Input Registers
+pData RN 0
+QP RN 1
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+
+;// Packed Input pixels
+in00 RN 2 ;// Src[0] & Src[1]
+in02 RN 3 ;// Src[2] & Src[3]
+in10 RN 4 ;// Src[4] & Src[5]
+in12 RN 5 ;// Src[6] & Src[7]
+in20 RN 6 ;// Src[8] & Src[9]
+in22 RN 7 ;// Src[10] & Src[11]
+in30 RN 8 ;// Src[12] & Src[13]
+in32 RN 9 ;// Src[14] & Src[15]
+
+;// Transpose for Row operations (Rows to cols)
+trRow00 RN 2
+trRow10 RN 10
+trRow02 RN 3
+trRow12 RN 5
+trRow20 RN 11
+trRow30 RN 12
+trRow32 RN 14
+trRow22 RN 7
+
+;// Intermediate calculations
+rowSum1 RN 4
+rowSum2 RN 6
+rowDiff1 RN 8
+rowDiff2 RN 9
+
+
+;// Row operated pixels
+rowOp00 RN 2
+rowOp10 RN 10
+rowOp20 RN 11
+rowOp30 RN 12
+rowOp02 RN 3
+rowOp12 RN 5
+rowOp22 RN 7
+rowOp32 RN 14
+
+;// Transpose for colulmn operations
+trCol00 RN 2
+trCol02 RN 3
+trCol10 RN 4
+trCol12 RN 5
+trCol20 RN 6
+trCol22 RN 7
+trCol30 RN 8
+trCol32 RN 9
+
+;// Intermediate calculations
+colSum1 RN 10
+colSum2 RN 11
+colDiff1 RN 12
+colDiff2 RN 14
+
+
+;// Coloumn operated pixels
+colOp00 RN 2
+colOp02 RN 3
+colOp10 RN 4
+colOp12 RN 5
+colOp20 RN 6
+colOp22 RN 7
+colOp30 RN 8
+colOp32 RN 9
+
+;// Temporary scratch varaibles
+pQPDivTable RN 0
+pQPModTable RN 11
+Shift RN 10
+Scale RN 14
+Round RN 0
+
+temp1 RN 10
+temp2 RN 11
+temp3 RN 12
+temp4 RN 1
+
+
+
+;// InvTransformed and Dequantized pixels
+out00 RN 2
+out02 RN 3
+out10 RN 4
+out12 RN 5
+out20 RN 6
+out22 RN 7
+out30 RN 8
+out32 RN 9
+
+
+
+
+ ;// Allocate stack memory required by the function
+ M_ALLOC4 pDataOnStack, 4
+
+ ;// Write function header
+ M_START armVCM4P10_InvTransformDequantLumaDC4x4,r11
+
+ ;******************************************************************
+ ;// The strategy used in implementing the transform is as follows:*
+ ;// Load the 4x4 block into 8 registers *
+ ;// Transpose the 4x4 matrix *
+ ;// Perform the row operations (on columns) using SIMD *
+ ;// Transpose the 4x4 result matrix *
+ ;// Perform the coloumn operations *
+ ;// Store the 4x4 block at one go *
+ ;******************************************************************
+
+ ;// Load all the 4x4 pixels
+
+ LDMIA pData,{in00,in02,in10,in12,in20,in22,in30,in32}
+
+ ;//*****************************************************************
+ ;//
+ ;// Transpose the matrix inorder to perform row ops as coloumn ops
+ ;// Input: in[][] = original matrix
+ ;// Output: trRow[][]= transposed matrix
+ ;// Step1: Obtain the LL part of the transposed matrix
+ ;// Step2: Obtain the HL part
+ ;// step3: Obtain the LH part
+ ;// Step4: Obtain the HH part
+ ;//
+ ;//*****************************************************************
+
+ ;// LL 2x2 transposed matrix
+ ;// d0 d1 - -
+ ;// d4 d5 - -
+ ;// - - - -
+ ;// - - - -
+
+ PKHTB trRow10,in10,in00,ASR #16 ;// [5 4] = [f5:f1]
+ PKHBT trRow00,in00,in10,LSL #16 ;// [1 0] = [f4:f0]
+
+ ;// HL 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// d8 d9 - -
+ ;// d12 d13 - -
+
+
+ PKHTB trRow30,in12,in02,ASR #16 ;// [13 12] = [7 3]
+ PKHBT trRow20,in02,in12,LSL #16 ;// [9 8] = [6 2]
+
+ ;// LH 2x2 transposed matrix
+ ;// - - d2 d3
+ ;// - - d6 d7
+ ;// - - - -
+ ;// - - - -
+
+ PKHBT trRow02,in20,in30,LSL #16 ;// [3 2] = [f12:f8]
+ PKHTB trRow12,in30,in20,ASR #16 ;// [7 6] = [f13:f9]
+
+
+
+
+ ;// HH 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// - - d10 d11
+ ;// - - d14 d15
+
+ PKHTB trRow32,in32,in22,ASR #16 ;// [15 14] = [15 11]
+ PKHBT trRow22,in22,in32,LSL #16 ;// [11 10] = [14 10]
+
+
+ ;****************************************
+ ;// Row Operations (Performed on columns)
+ ;****************************************
+
+
+ ;// SIMD operations on first two columns(two rows of the original matrix)
+
+ SADD16 rowSum1,trRow00,trRow10 ;// (c0+c1)
+ SADD16 rowSum2,trRow20,trRow30 ;// (c2+c3)
+ SSUB16 rowDiff1,trRow00,trRow10 ;// (c0-c1)
+ SSUB16 rowDiff2,trRow20,trRow30 ;// (c2-c3)
+ SADD16 rowOp00,rowSum1,rowSum2 ;// (c0+c1+c2+c3)
+ SSUB16 rowOp10,rowSum1,rowSum2 ;// (c0+c1-c2-c3)
+ SSUB16 rowOp20,rowDiff1,rowDiff2 ;// (c0-c1-c2+c3)
+ SADD16 rowOp30,rowDiff1,rowDiff2 ;// (c0-c1+c2-c3)
+
+
+ ;// SIMD operations on next two columns(next two rows of the original matrix)
+
+ SADD16 rowSum1,trRow02,trRow12 ;// (c0+c1)
+ SADD16 rowSum2,trRow22,trRow32 ;// (c2+c3)
+ SSUB16 rowDiff1,trRow02,trRow12 ;// (c0-c1)
+ SSUB16 rowDiff2,trRow22,trRow32 ;// (c2-c3)
+ SADD16 rowOp02,rowSum1,rowSum2 ;// (c0+c1+c2+c3)
+ SSUB16 rowOp12,rowSum1,rowSum2 ;// (c0+c1-c2-c3)
+ SSUB16 rowOp22,rowDiff1,rowDiff2 ;// (c0-c1-c2+c3)
+ SADD16 rowOp32,rowDiff1,rowDiff2 ;// (c0-c1+c2-c3)
+
+
+
+ ;*****************************************************************
+ ;// Transpose the resultant matrix
+ ;// Input: rowOp[][]
+ ;// Output: trCol[][]
+ ;*****************************************************************
+
+ ;// LL 2x2 transposed matrix
+ ;// d0 d1 - -
+ ;// d4 d5 - -
+ ;// - - - -
+ ;// - - - -
+
+ PKHTB trCol10,rowOp10,rowOp00,ASR #16 ;// [5 4] = [f5:f1]
+ PKHBT trCol00,rowOp00,rowOp10,LSL #16 ;// [1 0] = [f4:f0]
+
+ ;// HL 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// d8 d9 - -
+ ;// d12 d13 - -
+
+
+ PKHTB trCol30,rowOp12,rowOp02,ASR #16 ;// [13 12] = [7 3]
+ PKHBT trCol20,rowOp02,rowOp12,LSL #16 ;// [9 8] = [6 2]
+
+ ;// LH 2x2 transposed matrix
+ ;// - - d2 d3
+ ;// - - d6 d7
+ ;// - - - -
+ ;// - - - -
+
+ PKHBT trCol02,rowOp20,rowOp30,LSL #16 ;// [3 2] = [f12:f8]
+ PKHTB trCol12,rowOp30,rowOp20,ASR #16 ;// [7 6] = [f13:f9]
+
+
+
+
+ ;// HH 2x2 transposed matrix
+ ;// - - - -
+ ;// - - - -
+ ;// - - d10 d11
+ ;// - - d14 d15
+
+ PKHTB trCol32,rowOp32,rowOp22,ASR #16 ;// [15 14] = [15 11]
+ PKHBT trCol22,rowOp22,rowOp32,LSL #16 ;// [11 10] = [14 10]
+
+
+ ;*******************************
+ ;// Coloumn Operations
+ ;*******************************
+
+ ;//--------------------------------------------------------------------------------------
+ ;// Store pData(RN0) on stack and restore it only at the final store back
+ ;// This frees up a register (RN0) which is used to reduce number of intermediate stalls
+ ;//--------------------------------------------------------------------------------------
+ M_STR pData,pDataOnStack
+
+
+ ;// SIMD operations on first two columns(two rows of the original matrix)
+
+ SADD16 colSum1,trCol00,trCol10 ;// (c0+c1)
+ SADD16 colSum2,trCol20,trCol30 ;// (c2+c3)
+ SSUB16 colDiff1,trCol00,trCol10 ;// (c0-c1)
+ SSUB16 colDiff2,trCol20,trCol30 ;// (c2-c3)
+ SADD16 colOp00,colSum1,colSum2 ;// (c0+c1+c2+c3)
+ SSUB16 colOp10,colSum1,colSum2 ;// (c0+c1-c2-c3)
+ SSUB16 colOp20,colDiff1,colDiff2 ;// (c0-c1-c2+c3)
+ SADD16 colOp30,colDiff1,colDiff2 ;// (c0-c1+c2-c3)
+
+
+ ;// SIMD operations on next two columns(next two rows of the original matrix)
+
+ LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
+ SADD16 colSum1,trCol02,trCol12 ;// (c0+c1)
+ SADD16 colSum2,trCol22,trCol32 ;// (c2+c3)
+ SSUB16 colDiff1,trCol02,trCol12 ;// (c0-c1)
+ SSUB16 colDiff2,trCol22,trCol32 ;// (c2-c3)
+ SADD16 colOp02,colSum1,colSum2 ;// (c0+c1+c2+c3)
+ SSUB16 colOp12,colSum1,colSum2 ;// (c0+c1-c2-c3)
+ LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
+ LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP]
+ SSUB16 colOp22,colDiff1,colDiff2 ;// (c0-c1-c2+c3)
+ SADD16 colOp32,colDiff1,colDiff2 ;// (c0-c1+c2-c3)
+
+
+ LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP]
+
+ ;//----------------------------------------------------------------------
+ ;//
+ ;// <Dequantize> improves on the c-reference code
+ ;// Both the cases i.e., Shift>=0 and Shift<0 cases are covered together
+ ;// We do not subtract 2 from Shift as in C reference, instead perform a
+ ;// Scale << Shift once in the beginning and do a right shift by a
+ ;// constant 2 after the Multiplication. The value of Round would be 2
+ ;//
+ ;// By doing this we aviod the Branches required and also
+ ;// reduce the code size substantially
+ ;//
+ ;//----------------------------------------------------------------------
+
+ MOV Round, #2 ;// Round = 2
+ LSL Scale, Scale, Shift ;// Scale = Scale << Shift
+
+
+ ;// Row 1
+ SMLABB temp1, colOp00, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round
+ SMLABB temp3, colOp02, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round
+ SMLATB temp2, colOp00, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round
+ SMLATB temp4, colOp02, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round
+
+ ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2
+ ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2
+ PKHBT out00, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 |
+ PKHBT out02, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 |
+
+
+ ;// Row 2
+ SMLABB temp1, colOp10, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round
+ SMLABB temp3, colOp12, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round
+ SMLATB temp2, colOp10, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round
+ SMLATB temp4, colOp12, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round
+
+ ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2
+ ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2
+ PKHBT out10, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 |
+ PKHBT out12, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 |
+
+ ;// Row 3
+ SMLABB temp1, colOp20, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round
+ SMLABB temp3, colOp22, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round
+ SMLATB temp2, colOp20, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round
+ SMLATB temp4, colOp22, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round
+
+ ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2
+ ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2
+ PKHBT out20, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 |
+ PKHBT out22, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 |
+
+ ;// Row 4
+ SMLABB temp1, colOp30, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round
+ SMLABB temp3, colOp32, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round
+ SMLATB temp2, colOp30, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round
+ SMLATB temp4, colOp32, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round
+
+ M_LDR pData,pDataOnStack ;// Restore pData pointer from stack
+ ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2
+ ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2
+ PKHBT out30, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 |
+ PKHBT out32, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 |
+
+
+
+ ;***************************
+ ;// Store all the 4x4 pixels
+ ;***************************
+
+store_coeff
+
+ STMIA pData,{out00,out02,out10,out12,out20,out22,out30,out32}
+
+
+
+ ;// Set return value
+
+
+ ;// Write function tail
+ M_END
+
+ ENDIF ;//ARM1136JS
+
+
+;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4
+
+;// Guarding implementation by the processor name
+
+
+
+
+;// Function: omxVCM4P10_TransformDequantLumaDCFromPair
+
+;//Input Registers
+ppSrc RN 0
+pDst RN 1
+QPR2 RN 2
+
+;//Output Registers
+result RN 0
+
+;//Local Scratch Registers
+pDstR4 RN 4
+pDstR0 RN 0
+QPR1 RN 1
+QPR5 RN 5
+
+;// Guarding implementation by the processor name
+
+ IF ARM1136JS
+
+ ;// Allocate stack memory required by the function
+
+
+ ;// Write function header
+ M_START omxVCM4P10_TransformDequantLumaDCFromPair,r5
+
+ MOV pDstR4,pDst ;// Saving register r1
+ MOV QPR5,QPR2 ;// Saving register r2
+ BL armVCM4P10_UnpackBlock4x4
+
+ MOV pDstR0,pDstR4 ;// Setting up register r0
+ MOV QPR1,QPR5 ;// Setting up register r1
+ BL armVCM4P10_InvTransformDequantLumaDC4x4
+
+
+ ;// Set return value
+ MOV result,#OMX_Sts_NoErr
+
+ ;// Write function tail
+ M_END
+
+
+ ENDIF ;//ARM1136JS
+
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
new file mode 100644
index 0000000..22115d3
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h
@@ -0,0 +1,37 @@
+/**
+ *
+ * File Name: armVCM4P2_Huff_Tables_VLC.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_Huff_Tables.h
+ * Description: Declares Tables used for Hufffman coding and decoding
+ * in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXHUFFTAB_H_
+#define _OMXHUFFTAB_H_
+
+
+extern const OMX_U16 armVCM4P2_IntraVlcL0L1[200];
+
+
+extern const OMX_U16 armVCM4P2_InterVlcL0L1[200];
+
+extern const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64];
+//extern const OMX_U16 armVCM4P2_aIntraDCChromaIndex[32];
+extern const OMX_U16 armVCM4P2_aVlcMVD[124];
+
+extern const OMX_U8 armVCM4P2_InterL0L1LMAX[73];
+extern const OMX_U8 armVCM4P2_InterL0L1RMAX[35];
+extern const OMX_U8 armVCM4P2_IntraL0L1LMAX[53];
+extern const OMX_U8 armVCM4P2_IntraL0L1RMAX[40]
+
+#endif /* _OMXHUFFTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
new file mode 100644
index 0000000..d5f865c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h
@@ -0,0 +1,25 @@
+/**
+ *
+ * File Name: armVCM4P2_ZigZag_Tables.h
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ *
+ * File: armVCM4P2_Zigzag_Tables.h
+ * Description: Declares Tables used for Zigzag scan in MP4P2 codec.
+ *
+ */
+
+#ifndef _OMXZIGZAGTAB_H
+#define _OMXZIGZAGTAB_H
+
+extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [192];
+//extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64];
+//extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64];
+
+#endif /* _OMXZIGZAGTAB_H_ */
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s
new file mode 100644
index 0000000..7801e57
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s
@@ -0,0 +1,75 @@
+; /**
+; *
+; * File Name: armVCM4P2_Clip8_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains module for Clipping 16 bit value to [0,255] Range
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+
+pSrc RN 0
+pDst RN 1
+step RN 2
+
+;// Local variables
+
+x0 RN 3
+x1 RN 4
+x2 RN 5
+x3 RN 6
+
+Count RN 14
+
+
+ M_START armVCM4P2_Clip8,r6
+
+
+ MOV Count,#8
+loop
+
+ LDMIA pSrc!,{x0, x1}
+ SUBS Count,Count, #1 ;// count down
+ LDMIA pSrc!,{x2, x3}
+ USAT16 x0, #8, x0 ;// clip two samples to [0,255]
+ USAT16 x1, #8, x1 ;// clip two samples to [0,255]
+ STRB x0, [pDst]
+ MOV x0, x0, LSR #16
+ STRB x0, [pDst,#1]
+ STRB x1, [pDst,#2]
+ MOV x1, x1, LSR #16
+ STRB x1, [pDst,#3]
+
+ USAT16 x2, #8, x2 ;// clip two samples to [0,255]
+ USAT16 x3, #8, x3 ;// clip two samples to [0,255]
+ STRB x2, [pDst,#4]
+ MOV x2, x2, LSR #16
+ STRB x2, [pDst,#5]
+ STRB x3, [pDst,#6]
+ MOV x3, x3, LSR #16
+ STRB x3, [pDst,#7]
+ ADD pDst,pDst,step ;// Increment pDst by step value
+
+ BGT loop ;// Continue loop until Count reaches 64
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
new file mode 100644
index 0000000..9e30900
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
@@ -0,0 +1,398 @@
+;/**
+; *
+; * File Name: armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter, intra block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan
+; *
+; *
+; *
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+
+
+
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+shortVideoHeader RN 3
+
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+
+ftype RN 0
+temp3 RN 4
+temp RN 5
+Count RN 6
+Escape RN 5
+
+;// armVCM4P2_FillVLDBuffer
+zigzag RN 0
+storeLevel RN 1
+temp2 RN 4
+temp1 RN 5
+sign RN 5
+Last RN 7
+storeRun RN 14
+
+
+packRetIndex RN 5
+
+
+markerbit RN 5
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitBuffer RN 9
+RBitCount RN 10
+
+T1 RN 11
+T2 RN 12
+LR RN 14
+
+
+
+ M_ALLOC4 pppBitStream,4
+ M_ALLOC4 ppOffset,4
+ M_ALLOC4 pLinkRegister,4
+
+ M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+ ;// get the table addresses from stack
+ M_ARG ppVlcTableL0L1,4
+ M_ARG ppLMAXTableL0L1,4
+ M_ARG ppRMAXTableL0L1,4
+ M_ARG ppZigzagTable,4
+
+ ;// Store ALL zeros at pDst
+
+ MOV temp1,#0 ;// Initialize Count to zero
+ MOV Last,#0
+ M_STR LR,pLinkRegister ;// Store Link Register on Stack
+ MOV temp2,#0
+ MOV LR,#0
+
+ ;// Initialize the Macro and Store all zeros to pDst
+
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT1 T1, T2, T2
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_BD_INIT2 T1, T2, T2
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_STR ppBitStream,pppBitStream ;// Store ppBitstream on stack
+ STM pDst!,{temp2,temp1,Last,LR}
+ M_STR pBitOffset,ppOffset ;// Store pBitOffset on stack
+ STM pDst!,{temp2,temp1,Last,LR}
+
+ STM pDst!,{temp2,temp1,Last,LR}
+ STM pDst!,{temp2,temp1,Last,LR}
+
+
+ SUB pDst,pDst,#128 ;// Restore pDst
+
+ ;// The armVCM4P2_GetVLCBits begins
+
+getVLCbits
+
+ M_BD_LOOK8 Escape,7 ;// Load Escape Value
+ LSR Escape,Escape,#25
+ CMP Escape,#3 ;// check for escape mode
+ MOVNE ftype,#0
+ BNE notEscapemode ;// Branch if not in Escape mode 3
+
+ M_BD_VSKIP8 #7,T1
+ CMP shortVideoHeader,#0 ;// Check shortVideoHeader flag to know the type of Escape mode
+ BEQ endFillVLD
+
+ ;// Escape Mode 4
+
+ M_BD_READ8 Last,1,T1
+ M_BD_READ8 storeRun,6,T1
+ M_BD_READ8 storeLevel,8,T1
+
+
+ ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so
+
+ TEQ storeLevel,#0
+ TEQNE storeLevel,#128
+ BEQ ExitError
+
+ ADD temp2,storeRun,Count
+ CMP temp2,#64
+ BGE ExitError ;// error if Count+storeRun >= 64
+
+
+ ;// Load address of zigzagTable
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Loading the Address of Zigzag table
+
+
+ ;// armVCM4P2_FillVLDBuffer
+
+ SXTB storeLevel,storeLevel ;// Sign Extend storeLevel to 32 bits
+
+
+ ;// To Reflect Runlength
+
+ ADD Count,Count,storeRun
+ LDRB zigzag,[pZigzagTable,Count]
+ ADD Count,Count,#1
+ STRH storeLevel,[pDst,zigzag] ;// store Level
+
+ B ExitOk
+
+
+
+endFillVLD
+
+
+ ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream
+
+ M_BD_READ8 temp1,1,T1
+ CMP temp1,#0
+ MOVEQ ftype,#1
+ BEQ notEscapemode
+ M_BD_READ8 temp1,1,T1
+ CMP temp1,#1
+ MOVEQ ftype,#3
+ MOVNE ftype,#2
+
+
+notEscapemode
+
+ ;// Load optimized packed VLC table with last=0 and Last=1
+
+ M_LDR pVlcTableL0L1,ppVlcTableL0L1 ;// Load Combined VLC Table
+
+
+ CMP ftype,#3 ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3)
+ BGE EscapeMode3 ;// Else continue normal VLC Decoding
+
+ ;// Variable lengh decoding, "armUnPackVLC32"
+
+
+ M_BD_VLD packRetIndex,T1,T2,pVlcTableL0L1,4,2
+
+
+ LDR temp3,=0xFFF
+
+ CMP packRetIndex,temp3 ;// Check for invalid symbol
+ BEQ ExitError ;// if invalid symbol occurs exit with an error message
+
+ AND Last,packRetIndex,#2 ;// Get Last from packed Index
+
+
+
+
+ LSR storeRun,packRetIndex,#7 ;// Get Run Value from Packed index
+ AND storeLevel,packRetIndex,#0x7c ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0
+
+
+ M_LDR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Load LMAX table
+
+
+ LSR storeLevel,storeLevel,#2 ;// Level value
+
+ CMP ftype,#1
+ BNE ftype2
+
+ ;// ftype==1; Escape mode =1
+
+
+ ADD temp1, pLMAXTableL0L1, Last, LSL#4 ;// If the Last=1 add 32 to table address
+ LDRB temp1,[temp1,storeRun]
+
+
+ ADD storeLevel,temp1,storeLevel
+
+ftype2
+
+ ;// ftype =2; Escape mode =2
+
+ M_LDR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Load RMAX Table
+
+ CMP ftype,#2
+ BNE FillVLDL1
+
+ ADD temp1, pRMAXTableL0L1, Last, LSL#4 ;// If Last=1 add 32 to table address
+ SUB temp2,storeLevel,#1
+ LDRB temp1,[temp1,temp2]
+
+
+ ADD storeRun,storeRun,#1
+ ADD storeRun,temp1
+
+FillVLDL1
+
+
+ ;// armVCM4P2_FillVLDBuffer
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable
+
+ M_BD_READ8 sign,1,T1
+
+ CMP sign,#1
+ RSBEQ storeLevel,storeLevel,#0
+
+ ADD temp1,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63
+ CMP temp1,#64
+ BGE ExitError
+
+
+
+
+
+
+ ;// To Reflect Runlenght
+
+ ADD Count,Count,storeRun
+
+storeLevelL1
+
+ LDRB zigzag,[pZigzagTable,Count]
+ CMP Last,#2 ;// Check if the Level val is Last non zero val
+ ADD Count,Count,#1
+ LSR Last,Last,#1
+ STRH storeLevel,[pDst,zigzag]
+
+ BNE end
+
+ B ExitOk
+
+
+
+ ;// Fixed Lengh Decoding Escape Mode 3
+
+EscapeMode3
+
+ M_BD_READ8 Last,1,T1
+ M_BD_READ8 storeRun,6,T1
+
+ ADD temp2,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63
+ CMP temp2,#64
+ BGE ExitError
+
+ M_BD_READ8 markerbit,1,T1
+ TEQ markerbit,#0 ;// Exit with an error message if marker bit is zero
+ BEQ ExitError
+
+ M_BD_READ16 storeLevel,12,T1
+
+ TST storeLevel,#0x800 ;// test if the level is negative
+ SUBNE storeLevel,storeLevel,#4096
+ CMP storeLevel,#0
+ CMPNE storeLevel,#-2048
+ BEQ ExitError ;// Exit with an error message if Level==0 or -2048
+
+ M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable
+
+ M_BD_READ8 markerbit,1,T1
+
+
+ ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed)
+
+
+
+ ;// To Reflect Run Length
+
+ ADD Count,Count,storeRun
+
+
+
+storeLevelLast
+
+ LDRB zigzag,[pZigzagTable,Count]
+ CMP Last,#1
+ ADD Count,Count,#1
+ STRH storeLevel,[pDst,zigzag]
+
+ BNE end
+
+ B ExitOk
+
+end
+
+ CMP Count,#64 ;//Run the Loop untill Count reaches 64
+
+ BLT getVLCbits
+
+
+ExitOk
+ ;// Exit When VLC Decoding is done Successfully
+
+ ;// Loading ppBitStream and pBitOffset from stack
+
+ CMP Last,#1
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppOffset
+
+ ;//Ending the macro
+
+ M_BD_FINI ppBitStream,pBitOffset
+
+ MOVEQ Return,#OMX_Sts_NoErr
+ MOVNE Return,#OMX_Sts_Err
+ M_LDR LR,pLinkRegister ;// Load the Link Register Back
+ B exit2
+
+ExitError
+ ;// Exit When an Error occurs
+
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppOffset
+ ;//Ending the macro
+
+ M_BD_FINI ppBitStream,pBitOffset
+ M_LDR LR,pLinkRegister
+ MOV Return,#OMX_Sts_Err
+
+exit2
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
new file mode 100644
index 0000000..ba4d058
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
@@ -0,0 +1,211 @@
+ /**
+ *
+ * File Name: armVCM4P2_Huff_Tables_VLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_Huff_Tables_VLC.c
+ * Description: Contains all the Huffman tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM_Bitstream.h"
+
+
+
+
+// Contains optimized and Packed VLC tables with Last=0 and Last=1
+
+// optimized Packed VLC table Entry Format
+// ---------------------------------------
+//
+// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+// +------------------------------------------------+
+// | Len | Run | Level |L | 1 |
+// +------------------------------------------------+
+// | Offset | 0 |
+// +------------------------------------------------+
+// If the table entry is a leaf entry then bit 0 set:
+// Len = Number of bits overread (0 to 7) 3 bits
+// Run = RunLength of the Symbol (0 to 63) 6 bits
+// Level = Level of the Symbol (0 to 31) 5 bits
+// L = Last Value of the Symbol (0 or 1) 1 bit
+//
+// If the table entry is an internal node then bit 0 is clear:
+// Offset = Number of (16-bit) half words from the table
+// start to the next table node
+//
+// The table is accessed by successive lookup up on the
+// next Step bits of the input bitstream until a leaf node
+// is obtained. The Step sizes are supplied to the VLD macro.
+
+// The VLC tables used for Intra and non inta coefficients in non Escape mode
+// contains symbols with both Last=0 and Last=1.
+// If a symbol is not found in the table it will be coded as 0xFFF
+
+
+const OMX_U16 armVCM4P2_InterVlcL0L1[200] = {
+ 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011,
+ 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+ 0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d,
+ 0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d,
+ 0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121,
+ 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809,
+ 0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d,
+ 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519,
+ 0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d,
+ 0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d,
+ 0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d,
+ 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015,
+ 0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309,
+ 0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140,
+ 0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09,
+ 0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031,
+ 0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09,
+ 0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168,
+ 0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d,
+ 0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021,
+ 0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609,
+ 0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309
+};
+
+
+const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = {
+ 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09,
+ 0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019,
+ 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+ 0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035,
+ 0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09,
+ 0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099,
+ 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+ 0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911,
+ 0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d,
+ 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149,
+ 0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615,
+ 0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+ 0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d,
+ 0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025,
+ 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09,
+ 0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319,
+ 0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140,
+ 0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081,
+ 0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069,
+ 0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311,
+ 0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168,
+ 0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d,
+ 0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049,
+ 0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039,
+ 0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021
+};
+
+const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = {
+ 0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001,
+ 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+ 0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011,
+ 0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019,
+
+ 0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005,
+ 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001,
+ 0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f,
+ 0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017
+};
+
+
+const OMX_U16 armVCM4P2_aVlcMVD[124] = {
+ 0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041,
+ 0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0,
+ 0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040,
+ 0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005,
+ 0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068,
+ 0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b,
+ 0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f,
+ 0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017,
+ 0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b,
+ 0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023,
+ 0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8,
+ 0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d,
+ 0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031,
+ 0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039,
+ 0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037,
+ 0x2045, 0x2045, 0x203d, 0x203d
+};
+
+/* LMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_InterL0L1LMAX[73] =
+{
+ 12, 6, 4, 3, 3, 3, 3, 2,
+ 2, 2, 2, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0, 0,
+ 3, 2, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1
+};
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_InterL0L1RMAX[35] =
+{
+ 26, 10, 6, 2, 1, 1,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 0, 0, 40, 1, 0
+};
+
+/* LMAX table for non Intra (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] =
+{
+ 27, 10, 5, 4, 3, 3, 3,
+ 3, 2, 2, 1, 1, 1, 1, 1, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0,
+
+ 8, 3, 2, 2, 2, 2, 2, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1
+};
+
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+ Level - 1 Indexed
+ padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] =
+{
+ 14, 9, 7, 3, 2, 1, 1,
+ 1, 1, 1, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0,
+
+ 20, 6, 1, 0, 0, 0, 0, 0
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
new file mode 100644
index 0000000..25cf8db
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
@@ -0,0 +1,75 @@
+ /**
+ *
+ * File Name: armVCM4P2_Lookup_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_Lookup_Tables.c
+ * Description: Contains all the Lookup tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+ /* * Table Entries contain Dc Scaler values
+ * armVCM4P2_DCScaler[i]= 8 for i=1 to 4 and i=33 to 36
+ * = 2*i for i=5 to 8
+ * = i+8 for i=9 to 25
+ * = 2*i-16 for i=26 to 31
+ * = (i-32+13)/2 for i=37 to 59
+ * = i-6-32 for i=60 to 63
+ * = 255 for i=0 and i=32
+ */
+
+const OMX_U8 armVCM4P2_DCScaler[64]={
+ 0xff, 0x8, 0x8, 0x8, 0x8, 0xa, 0xc, 0xe,
+ 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+ 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+ 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
+ 0xff, 0x8, 0x8, 0x8, 0x8, 0x9, 0x9, 0xa,
+ 0xa, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xe,
+ 0xe, 0xf, 0xf, 0x10, 0x10, 0x11, 0x11, 0x12,
+ 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+};
+
+
+ /* Table Entries Contain reciprocal of 1 to 63
+ * armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i)
+ * armVCM4P2_Reciprocal_QP_S16[0]= 0
+ */
+
+const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={
+ 0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249,
+ 0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888,
+ 0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591,
+ 0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421,
+ 0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348,
+ 0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9,
+ 0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254,
+ 0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208
+
+};
+
+ /* Table Entries Contain reciprocal of 1 to 63
+ * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i)
+ * armVCM4P2_Reciprocal_QP_S32[0]= 0
+ */
+
+const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={
+ 0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924,
+ 0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222,
+ 0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643,
+ 0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084,
+ 0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21,
+ 0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5,
+ 0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f,
+ 0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
new file mode 100644
index 0000000..3f92d85
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
@@ -0,0 +1,104 @@
+;//
+;//
+;// File Name: armVCM4P2_SetPredDir_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+; **
+; * Function: armVCM4P2_SetPredDir
+; *
+; * Description:
+; * Performs detecting the prediction direction
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] blockIndex block index indicating the component type and
+; * position as defined in subclause 6.1.3.8, of ISO/IEC
+; * 14496-2. Furthermore, indexes 6 to 9 indicate the
+; * alpha blocks spatially corresponding to luminance
+; * blocks 0 to 3 in the same macroblock.
+; * [in] pCoefBufRow pointer to the coefficient row buffer
+; * [in] pQpBuf pointer to the quantization parameter buffer
+; * [out]predQP quantization parameter of the predictor block
+; * [out]predDir indicates the prediction direction which takes one
+; * of the following values:
+; * OMX_VC_HORIZONTAL predict horizontally
+; * OMX_VC_VERTICAL predict vertically
+; *
+; * Return Value:
+; * Standard OMXResult result. See enumeration for possible result codes.
+; *
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE omxVC_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+;// Input Arguments
+BlockIndex RN 0
+pCoefBufRow RN 1
+pCoefBufCol RN 2
+predDir RN 3
+predQP RN 4
+pQpBuf RN 5
+
+;// Local Variables
+
+Return RN 0
+blockDCLeft RN 6
+blockDCTop RN 7
+blockDCTopLeft RN 8
+temp1 RN 9
+temp2 RN 14
+
+ M_START armVCM4P2_SetPredDir,r9
+
+ M_ARG ppredQP,4
+ M_ARG ppQpBuf,4
+
+ LDRH blockDCTopLeft,[pCoefBufRow,#-16]
+ LDRH blockDCLeft,[pCoefBufCol]
+
+ TEQ BlockIndex,#3
+ LDREQH blockDCTop,[pCoefBufCol,#-16]
+ LDRNEH blockDCTop,[pCoefBufRow]
+
+ SUBS temp1,blockDCLeft,blockDCTopLeft
+ RSBLT temp1,temp1,#0
+ SUBS temp2,blockDCTopLeft,blockDCTop
+ RSBLT temp2,temp2,#0
+
+ M_LDR pQpBuf,ppQpBuf
+ M_LDR predQP,ppredQP
+ CMP temp1,temp2
+ MOV temp2,#OMX_VC_VERTICAL
+ LDRLTB temp1,[pQpBuf,#1]
+ STRLT temp2,[predDir]
+ STRLT temp1,[predQP]
+ MOV temp2,#OMX_VC_HORIZONTAL
+ LDRGEB temp1,[pQpBuf]
+ STRGE temp2,[predDir]
+ MOV Return,#OMX_Sts_NoErr
+ STRGE temp1,[predQP]
+
+
+
+ M_END
+
+ ENDIF
+
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
new file mode 100644
index 0000000..ed17f9b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
@@ -0,0 +1,61 @@
+/**
+ *
+ * File Name: armVCM4P2_Zigzag_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * File: armVCM4P2_ZigZag_Tables.c
+ * Description: Contains the zigzag tables
+ *
+ */
+
+#include "omxtypes.h"
+
+/* Contains Double the values in the reference Zigzag Table
+ * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array
+ */
+
+const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] =
+{
+ 0, 2, 16, 32, 18, 4, 6, 20,
+ 34, 48, 64, 50, 36, 22, 8, 10,
+ 24, 38, 52, 66, 80, 96, 82, 68,
+ 54, 40, 26, 12, 14, 28, 42, 56,
+ 70, 84, 98, 112, 114, 100, 86, 72,
+ 58, 44, 30, 46, 60, 74, 88, 102,
+ 116, 118, 104, 90, 76, 62, 78, 92,
+ 106, 120, 122, 104, 94, 110, 124, 126,
+
+ 0, 16, 32, 48, 2, 18, 4, 20,
+ 34, 50, 64, 80, 96, 112, 114, 98,
+ 82, 66, 52, 36, 6, 22, 8, 24,
+ 38, 54, 68, 84, 100, 116, 70, 86,
+ 102, 118, 40, 56, 10, 26, 12, 28,
+ 42, 58, 72, 88, 104, 120, 74, 90,
+ 106, 122, 44, 60, 14, 30, 46, 62,
+ 76, 92, 108, 124, 78, 94, 110, 126,
+
+ 0, 2, 4, 6, 16, 18, 32, 34,
+ 20, 22, 8, 10, 12, 14, 30, 28,
+ 26, 24, 38, 36, 48, 50, 64, 66,
+ 52, 54, 40, 42, 44, 46, 56, 58,
+ 60, 62, 68, 70, 80, 82, 96, 98,
+ 84, 86, 72, 74, 76, 78, 88, 90,
+ 92, 94, 100, 102, 112, 114, 116, 118,
+ 104, 106, 108, 110, 120, 122, 124, 126
+
+
+};
+
+
+
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
new file mode 100644
index 0000000..b63d295
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
@@ -0,0 +1,102 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeBlockCoef_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for inter reconstruction
+ *
+ */
+
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter
+ *
+ * Description:
+ * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag
+ * positioning and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results (residuals) are placed in a contiguous array
+ * of 64 elements. For INTER block, the output buffer holds the residuals for
+ * further reconstruction.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream buffer. There is no boundary
+ * check for the bit stream buffer.
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7]
+ * [in] QP quantization parameter
+ * [in] shortVideoHeader a flag indicating presence of short_video_header;
+ * shortVideoHeader==1 indicates using quantization method defined in short
+ * video header mode, and shortVideoHeader==0 indicates normail quantization method.
+ * [out] ppBitStream *ppBitStream is updated after the block is decoded, so that it points to the
+ * current byte in the bit stream buffer.
+ * [out] pBitOffset *pBitOffset is updated so that it points to the current bit position in the
+ * byte pointed by *ppBitStream
+ * [out] pDst pointer to the decoded residual buffer (a contiguous array of 64 elements of
+ * OMX_S16 data type). Must be 16-byte aligned.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst
+ * - At least one of the below case:
+ * - *pBitOffset exceeds [0,7], QP <= 0;
+ * - pDst not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT * pBitOffset,
+ OMX_S16 * pDst,
+ OMX_INT QP,
+ OMX_INT shortVideoHeader
+)
+{
+ /* 64 elements are needed but to align it to 16 bytes need
+ 15 more elements of padding */
+ OMX_S16 tempBuf[79];
+ OMX_S16 *pTempBuf1;
+ OMXResult errorCode;
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf);
+
+
+ /* VLD and zigzag */
+ errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset,
+ pTempBuf1,shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Dequantization */
+ errorCode = omxVCM4P2_QuantInvInter_I(
+ pTempBuf1,
+ QP);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Inverse transform */
+ errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
new file mode 100644
index 0000000..c609a60
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
@@ -0,0 +1,208 @@
+/**
+ *
+ * File Name: omxVCM4P2_DecodeBlockCoef_Intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision: 9641
+ * Date: Thursday, February 7, 2008
+ *
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ *
+ *
+ * Description:
+ * Contains modules for intra reconstruction
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag
+ * positioning, and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results are then placed in the output frame/plane on
+ * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and
+ * written to corresponding block buffer within the destination plane.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in] ppBitStream pointer to the pointer to the current byte in
+ * the bit stream buffer. There is no boundary
+ * check for the bit stream buffer.
+ * [in] pBitOffset pointer to the bit position in the byte pointed
+ * to by *ppBitStream. *pBitOffset is valid within
+ * [0-7].
+ * [in] step width of the destination plane
+ * [in/out] pCoefBufRow [in] pointer to the coefficient row buffer
+ * [out] updated coefficient rwo buffer
+ * [in/out] pCoefBufCol [in] pointer to the coefficient column buffer
+ * [out] updated coefficient column buffer
+ * [in] curQP quantization parameter of the macroblock which
+ * the current block belongs to
+ * [in] pQpBuf Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to
+ * the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the
+ * current block(QPc).
+ * Note, in case the corresponding block is out of VOP bound, the QP value will have
+ * no effect to the intra-prediction process. Refer to subclause "7.4.3.3 Adaptive
+ * ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description.
+ * [in] blockIndex block index indicating the component type and
+ * position as defined in subclause 6.1.3.8,
+ * Figure 6-5 of ISO/IEC 14496-2.
+ * [in] intraDCVLC a code determined by intra_dc_vlc_thr and QP.
+ * This allows a mechanism to switch between two VLC
+ * for coding of Intra DC coefficients as per Table
+ * 6-21 of ISO/IEC 14496-2.
+ * [in] ACPredFlag a flag equal to ac_pred_flag (of luminance) indicating
+ * if the ac coefficients of the first row or first
+ * column are differentially coded for intra coded
+ * macroblock.
+ * [in] shortVideoHeader a flag indicating presence of short_video_header;
+ * shortVideoHeader==1 selects linear intra DC mode,
+ * and shortVideoHeader==0 selects nonlinear intra DC mode.
+ * [out] ppBitStream *ppBitStream is updated after the block is
+ * decoded, so that it points to the current byte
+ * in the bit stream buffer
+ * [out] pBitOffset *pBitOffset is updated so that it points to the
+ * current bit position in the byte pointed by
+ * *ppBitStream
+ * [out] pDst pointer to the block in the destination plane.
+ * pDst should be 16-byte aligned.
+ * [out] pCoefBufRow pointer to the updated coefficient row buffer.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ * - At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset,
+ * pCoefBufRow, pCoefBufCol, pQPBuf, pDst.
+ * or
+ * - At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31),
+ * blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while
+ * blockIndex greater than 5.
+ * or
+ * - pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
+ const OMX_U8 ** ppBitStream,
+ OMX_INT *pBitOffset,
+ OMX_U8 *pDst,
+ OMX_INT step,
+ OMX_S16 *pCoefBufRow,
+ OMX_S16 *pCoefBufCol,
+ OMX_U8 curQP,
+ const OMX_U8 *pQPBuf,
+ OMX_INT blockIndex,
+ OMX_INT intraDCVLC,
+ OMX_INT ACPredFlag,
+ OMX_INT shortVideoHeader
+ )
+{
+ OMX_S16 tempBuf1[79], tempBuf2[79];
+ OMX_S16 *pTempBuf1, *pTempBuf2;
+ OMX_INT predDir, predACDir;
+ OMX_INT predQP;
+ OMXVCM4P2VideoComponent videoComp;
+ OMXResult errorCode;
+
+
+ /* Aligning the local buffers */
+ pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+ pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+
+ /* Setting the AC prediction direction and prediction direction */
+ armVCM4P2_SetPredDir(
+ blockIndex,
+ pCoefBufRow,
+ pCoefBufCol,
+ &predDir,
+ &predQP,
+ pQPBuf);
+
+ predACDir = predDir;
+
+
+ if (ACPredFlag == 0)
+ {
+ predACDir = OMX_VC_NONE;
+ }
+
+ /* Setting the videoComp */
+ if (blockIndex <= 3)
+ {
+ videoComp = OMX_VC_LUMINANCE;
+ }
+ else
+ {
+ videoComp = OMX_VC_CHROMINANCE;
+ }
+
+
+ /* VLD and zigzag */
+ if (intraDCVLC == 1)
+ {
+ errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+ ppBitStream,
+ pBitOffset,
+ pTempBuf1,
+ predACDir,
+ shortVideoHeader,
+ videoComp);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+ }
+ else
+ {
+ errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+ ppBitStream,
+ pBitOffset,
+ pTempBuf1,
+ predACDir,
+ shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+ }
+
+ /* AC DC prediction */
+ errorCode = omxVCM4P2_PredictReconCoefIntra(
+ pTempBuf1,
+ pCoefBufRow,
+ pCoefBufCol,
+ curQP,
+ predQP,
+ predDir,
+ ACPredFlag,
+ videoComp);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Dequantization */
+ errorCode = omxVCM4P2_QuantInvIntra_I(
+ pTempBuf1,
+ curQP,
+ videoComp,
+ shortVideoHeader);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Inverse transform */
+ errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
+ armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+
+ /* Placing the linear array into the destination plane and clipping
+ it to 0 to 255 */
+
+ armVCM4P2_Clip8(pTempBuf2,pDst,step);
+
+
+ return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
new file mode 100644
index 0000000..a1861da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
@@ -0,0 +1,364 @@
+; **********
+; *
+; * File Name: omxVCM4P2_DecodePadMV_PVOP_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; **
+; * Function: omxVCM4P2_DecodePadMV_PVOP
+; *
+; * Description:
+; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP.
+; * The motion vector padding process is specified in subclause 7.6.1.6 of
+; * ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bit stream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within
+; * [0-7].
+; * [in] pSrcMVLeftMB pointers to the motion vector buffers of the
+; * macroblocks specially at the left side of the current macroblock
+; * respectively.
+; * [in] pSrcMVUpperMB pointers to the motion vector buffers of the
+; * macroblocks specially at the upper side of the current macroblock
+; * respectively.
+; * [in] pSrcMVUpperRightMB pointers to the motion vector buffers of the
+; * macroblocks specially at the upper-right side of the current macroblock
+; * respectively.
+; * [in] fcodeForward a code equal to vop_fcode_forward in MPEG-4
+; * bit stream syntax
+; * [in] MBType the type of the current macroblock. If MBType
+; * is not equal to OMX_VC_INTER4V, the destination
+; * motion vector buffer is still filled with the
+; * same decoded vector.
+; * [out] ppBitStream *ppBitStream is updated after the block is decoded,
+; * so that it points to the current byte in the bit
+; * stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDstMVCurMB pointer to the motion vector buffer of the current
+; * macroblock which contains four decoded motion vectors
+; *
+; * Return Value:
+; * OMX_Sts_NoErr -no error
+; *
+; *
+; * OMX_Sts_Err - status error
+; *
+; *
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+ INCLUDE omxVC_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pSrcMVLeftMB RN 2
+pSrcMVUpperMB RN 3
+pSrcMVUpperRightMB RN 4
+pDstMVCurMB RN 5
+fcodeForward RN 6
+MBType RN 7
+
+;//Local Variables
+
+zero RN 4
+one RN 4
+scaleFactor RN 1
+
+
+Return RN 0
+
+VlcMVD RN 0
+index RN 4
+Count RN 7
+
+mvHorData RN 4
+mvHorResidual RN 0
+
+mvVerData RN 4
+mvVerResidual RN 0
+
+temp RN 1
+
+temp1 RN 3
+High RN 4
+Low RN 2
+Range RN 1
+
+BlkCount RN 14
+
+diffMVdx RN 0
+diffMVdy RN 1
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitCount RN 9
+RBitBuffer RN 10
+
+T1 RN 11
+T2 RN 12
+LR RN 14
+
+ IMPORT armVCM4P2_aVlcMVD
+ IMPORT omxVCM4P2_FindMVpred
+
+ ;// Allocate stack memory
+
+ M_ALLOC4 ppDstMVCurMB,4
+ M_ALLOC4 pDstMVPredME,4
+ M_ALLOC4 pBlkCount,4
+
+ M_ALLOC4 pppBitStream,4
+ M_ALLOC4 ppBitOffset,4
+ M_ALLOC4 ppSrcMVLeftMB,4
+ M_ALLOC4 ppSrcMVUpperMB,4
+
+ M_ALLOC4 pdiffMVdx,4
+ M_ALLOC4 pdiffMVdy,4
+ M_ALLOC4 pHigh,4
+
+
+
+
+ M_START omxVCM4P2_DecodePadMV_PVOP,r11
+
+ M_ARG pSrcMVUpperRightMBonStack,4 ;// pointer to pSrcMVUpperRightMB on stack
+ M_ARG pDstMVCurMBonStack,4 ;// pointer to pDstMVCurMB on stack
+ M_ARG fcodeForwardonStack,4 ;// pointer to fcodeForward on stack
+ M_ARG MBTypeonStack,4 ;// pointer to MBType on stack
+
+
+
+
+
+ ;// Initializing the BitStream Macro
+
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ M_LDR MBType,MBTypeonStack ;// Load MBType from stack
+ M_LDR pDstMVCurMB,pDstMVCurMBonStack ;// Load pDstMVCurMB from stack
+ MOV zero,#0
+
+ TEQ MBType,#OMX_VC_INTRA ;// Check if MBType=OMX_VC_INTRA
+ TEQNE MBType,#OMX_VC_INTRA_Q ;// check if MBType=OMX_VC_INTRA_Q
+ STREQ zero,[pDstMVCurMB]
+ M_BD_INIT1 T1, T2, T2
+ STREQ zero,[pDstMVCurMB,#4]
+ M_BD_INIT2 T1, T2, T2
+ STREQ zero,[pDstMVCurMB,#4]
+ MOVEQ Return,#OMX_Sts_NoErr
+ MOV BlkCount,#0
+ STREQ zero,[pDstMVCurMB,#4]
+
+ BEQ ExitOK
+
+ TEQ MBType,#OMX_VC_INTER4V ;// Check if MBType=OMX_VC_INTER4V
+ TEQNE MBType,#OMX_VC_INTER4V_Q ;// Check if MBType=OMX_VC_INTER4V_Q
+ MOVEQ Count,#4
+
+ TEQ MBType,#OMX_VC_INTER ;// Check if MBType=OMX_VC_INTER
+ TEQNE MBType,#OMX_VC_INTER_Q ;// Check if MBType=OMX_VC_INTER_Q
+ MOVEQ Count,#1
+
+ M_LDR fcodeForward,fcodeForwardonStack ;// Load fcodeForward from stack
+
+ ;// Storing the values temporarily on stack
+
+ M_STR ppBitStream,pppBitStream
+ M_STR pBitOffset,ppBitOffset
+
+
+ SUB temp,fcodeForward,#1 ;// temp=fcodeForward-1
+ MOV one,#1
+ M_STR pSrcMVLeftMB,ppSrcMVLeftMB
+ LSL scaleFactor,one,temp ;// scaleFactor=1<<(fcodeForward-1)
+ M_STR pSrcMVUpperMB,ppSrcMVUpperMB
+ LSL scaleFactor,scaleFactor,#5
+ M_STR scaleFactor,pHigh ;// [pHigh]=32*scaleFactor
+
+ ;// VLD Decoding
+
+
+Loop
+
+ LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Load the optimized MVD VLC table
+
+ ;// Horizontal Data and Residual calculation
+
+ LDR temp,=0xFFF
+ M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// variable lenght decoding using the macro
+
+ TEQ index,temp
+ BEQ ExitError ;// Exit with an Error Message if the decoded symbol is an invalied symbol
+
+ SUB mvHorData,index,#32 ;// mvHorData=index-32
+ MOV mvHorResidual,#1 ;// mvHorResidual=1
+ CMP fcodeForward,#1
+ TEQNE mvHorData,#0
+ MOVEQ diffMVdx,mvHorData ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData
+ BEQ VerticalData
+
+ SUB temp,fcodeForward,#1
+ M_BD_VREAD8 mvHorResidual,temp,T1,T2 ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0
+
+ CMP mvHorData,#0
+ RSBLT mvHorData,mvHorData,#0 ;// mvHorData=abs(mvHorData)
+ SUB mvHorResidual,mvHorResidual,fcodeForward
+ SMLABB diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward
+ ADD diffMVdx,diffMVdx,#1
+ RSBLT diffMVdx,diffMVdx,#0
+
+ ;// Vertical Data and Residual calculation
+
+VerticalData
+
+ M_STR diffMVdx,pdiffMVdx ;// Store the diffMVdx on stack
+ LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Loading the address of optimized VLC tables
+
+ LDR temp,=0xFFF
+ M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// VLC decoding using the macro
+
+ TEQ index,temp
+ BEQ ExitError ;// Exit with an Error Message if an Invalied Symbol occurs
+
+ SUB mvVerData,index,#32 ;// mvVerData=index-32
+ MOV mvVerResidual,#1
+ CMP fcodeForward,#1
+ TEQNE mvVerData,#0
+ MOVEQ diffMVdy,mvVerData ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0
+ BEQ FindMVPred
+
+ SUB temp,fcodeForward,#1
+ M_BD_VREAD8 mvVerResidual,temp,T1,T2 ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0
+
+
+ CMP mvVerData,#0
+ RSBLT mvVerData,mvVerData,#0
+ SUB mvVerResidual,mvVerResidual,fcodeForward
+ SMLABB diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward
+ ADD diffMVdy,diffMVdy,#1
+ RSBLT diffMVdy,diffMVdy,#0
+
+ ;//Calling the Function omxVCM4P2_FindMVpred
+
+FindMVPred
+
+ M_STR diffMVdy,pdiffMVdy
+ ADD temp,pDstMVCurMB,BlkCount,LSL #2 ;// temp=pDstMVCurMB[BlkCount]
+ M_STR temp,ppDstMVCurMB ;// store temp on stack for passing as an argument to FindMVPred
+
+ MOV temp,#0
+ M_STR temp,pDstMVPredME ;// Pass pDstMVPredME=NULL as an argument
+ M_STR BlkCount,pBlkCount ;// Passs BlkCount as Argument through stack
+
+ MOV temp,pSrcMVLeftMB ;// temp (RN 1)=pSrcMVLeftMB
+ M_LDR pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack
+ MOV pSrcMVLeftMB,pSrcMVUpperMB ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB
+ MOV ppBitStream,pDstMVCurMB ;// ppBitStream ( RN 0) = pDstMVCurMB
+ MOV pSrcMVUpperMB,pSrcMVUpperRightMB ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB
+ BL omxVCM4P2_FindMVpred ;// Branch to subroutine omxVCM4P2_FindMVpred
+
+ ;// Store Horizontal Motion Vector
+
+ M_LDR BlkCount,pBlkCount ;// Load BlkCount from stack
+ M_LDR High,pHigh ;// High=32*scaleFactor
+ LSL temp1,BlkCount,#2 ;// temp=BlkCount*4
+ M_LDR diffMVdx,pdiffMVdx ;// Laad diffMVdx
+
+ LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount]
+
+
+ RSB Low,High,#0 ;// Low = -32*scaleFactor
+ ADD diffMVdx,temp,diffMVdx ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx
+ ADD Range,High,High ;// Range=64*ScaleFactor
+ SUB High,High,#1 ;// High= 32*scaleFactor-1
+
+ CMP diffMVdx,Low ;// If diffMVdx<Low
+ ADDLT diffMVdx,diffMVdx,Range ;// diffMVdx+=Range
+
+ CMP diffMVdx,High
+ SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdx > High diffMVdx-=Range
+ STRH diffMVdx,[pDstMVCurMB,temp1]
+
+ ;// Store Vertical
+
+ ADD temp1,temp1,#2 ;// temp1=4*BlkCount+2
+ M_LDR diffMVdx,pdiffMVdy ;// Laad diffMVdy
+ LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount].diffMVdy
+ ADD BlkCount,BlkCount,#1 ;// BlkCount=BlkCount+1
+ ADD diffMVdx,temp,diffMVdx
+ CMP diffMVdx,Low
+ ADDLT diffMVdx,diffMVdx,Range ;// If diffMVdy<Low diffMVdy+=Range
+ CMP diffMVdx,High
+ SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdy > High diffMVdy-=Range
+ STRH diffMVdx,[pDstMVCurMB,temp1]
+
+ CMP BlkCount,Count
+ M_LDR pSrcMVLeftMB,ppSrcMVLeftMB
+ M_LDR pSrcMVUpperMB,ppSrcMVUpperMB
+
+ BLT Loop ;// If BlkCount<Count Continue the Loop
+
+
+ ;// If MBType=OMX_VC_INTER or MBtype=OMX_VC_INTER_Q copy pDstMVCurMB[0] to
+ ;// pDstMVCurMB[1], pDstMVCurMB[2], pDstMVCurMB[3]
+
+ M_LDR MBType,MBTypeonStack
+
+ TEQ MBType,#OMX_VC_INTER
+ TEQNE MBType,#OMX_VC_INTER_Q
+ LDREQ temp,[pDstMVCurMB]
+ M_LDR ppBitStream,pppBitStream
+ STREQ temp,[pDstMVCurMB,#4]
+
+ STREQ temp,[pDstMVCurMB,#8]
+ STREQ temp,[pDstMVCurMB,#12]
+
+
+ M_LDR pBitOffset,ppBitOffset
+ ;//Ending the macro
+ M_BD_FINI ppBitStream,pBitOffset ;// Finishing the Macro
+
+
+ MOV Return,#OMX_Sts_NoErr
+ B ExitOK
+
+ExitError
+
+ M_LDR ppBitStream,pppBitStream
+ M_LDR pBitOffset,ppBitOffset
+ ;//Ending the macro
+ M_BD_FINI ppBitStream,pBitOffset
+
+ MOV Return,#OMX_Sts_Err
+
+ExitOK
+
+ M_END
+ ENDIF
+ END
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
new file mode 100644
index 0000000..c43b253
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
@@ -0,0 +1,132 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one inter coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_InterVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_InterL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_InterL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+shortVideoHeader RN 3
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+
+
+
+ ;// Allocate stack memory to store the VLC,Zigzag,LMAX and RMAX tables
+
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_Inter,r12
+
+
+
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load zigzag table
+ M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack to pass as argument to unsafe function
+ LDR pVlcTableL0L1, =armVCM4P2_InterVlcL0L1 ;// Load optimized VLC table with both L=0 and L=1 entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store optimized VLC table address on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_InterL0L1LMAX ;// Load Interleaved L=0 and L=1 LMAX Tables
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table address on stack
+ LDR pRMAXTableL0L1, =armVCM4P2_InterL0L1RMAX ;// Load Interleaved L=0 and L=1 RMAX Tables
+ MOV Count,#0 ;// set start=0
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// store RMAX table address on stack
+
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// call Unsafe Function for VLC Zigzag Decoding
+
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
new file mode 100644
index 0000000..166729e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
@@ -0,0 +1,136 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+PredDir RN 3
+shortVideoHeader RN 3
+
+;//Local Variables
+
+Return RN 0
+
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+
+
+
+ ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_IntraACVLC,r12
+
+ M_ARG shortVideoHeaderonStack,4 ;// pointer to Input Argument on stack
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Address of the Zigzag table
+ ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Loading Different type of zigzag tables based on PredDir
+
+ M_STR pZigzagTable,ppZigzagTable ;// Store Zigzag table address on stack
+ LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load optimized packed VLC Table with both L=0 and L=1 entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store VLC Table address on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX Table address on Stack
+ LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table
+ MOV Count,#0 ;// Set Start=0
+
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table address on stack
+
+
+
+ M_LDR shortVideoHeader,shortVideoHeaderonStack ;// get the Input Argument from stack
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call Unsafe Function
+
+
+
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
new file mode 100644
index 0000000..d19cb13
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
@@ -0,0 +1,224 @@
+;/**
+; *
+; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] ppBitStream pointer to the pointer to the current byte in
+; * the bitstream buffer
+; * [in] pBitOffset pointer to the bit position in the byte pointed
+; * to by *ppBitStream. *pBitOffset is valid within [0-7].
+; * [in] shortVideoHeader binary flag indicating presence of short_video_header;
+; * escape modes 0-3 are used if shortVideoHeader==0,
+; * and escape mode 4 is used when shortVideoHeader==1.
+; * [out] ppBitStream *ppBitStream is updated after the block is
+; * decoded, so that it points to the current byte
+; * in the bit stream buffer
+; * [out] pBitOffset *pBitOffset is updated so that it points to the
+; * current bit position in the byte pointed by
+; * *ppBitStream
+; * [out] pDst pointer to the coefficient buffer of current
+; * block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; * -pDst is not 16-byte aligned, or
+; * -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; * -At least one mark bit is equal to zero
+; * -Encountered an illegal stream code that cannot be found in the VLC table
+; * -Encountered and illegal code in the VLC FLC table
+; * -The number of coefficients is greater than 64
+; *
+; */
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armCOMM_BitDec_s.h
+
+
+ M_VARIANTS ARM1136JS
+
+
+
+
+
+ IF ARM1136JS :LOR: CortexA8
+
+
+ ;// Import various tables needed for the function
+
+
+ IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+ ;// Packed in Run:Level:Last format
+ IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1
+ IMPORT armVCM4P2_aClassicalZigzagScan ;// contains CLassical, Horizontal, Vertical Zigzag table entries with double the original values
+ IMPORT armVCM4P2_aIntraDCLumaChromaIndex ;// Contains Optimized DCLuma and DCChroma Index table Entries
+
+
+ IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream RN 0
+pBitOffset RN 1
+pDst RN 2
+PredDir RN 3
+shortVideoHeader RN 3
+videoComp RN 5
+;//Local Variables
+
+Return RN 0
+
+pDCLumaChromaIndex RN 4
+pDCChromaIndex RN 7
+pVlcTableL0L1 RN 4
+pLMAXTableL0L1 RN 4
+pRMAXTableL0L1 RN 4
+pZigzagTable RN 4
+Count RN 6
+DCValueSize RN 6
+powOfSize RN 7
+temp1 RN 5
+
+
+;// Scratch Registers
+
+RBitStream RN 8
+RBitBuffer RN 9
+RBitCount RN 10
+
+T1 RN 11
+T2 RN 12
+DCVal RN 14
+
+
+ ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses
+
+ M_ALLOC4 ppVlcTableL0L1,4
+ M_ALLOC4 ppLMAXTableL0L1,4
+ M_ALLOC4 ppRMAXTableL0L1,4
+ M_ALLOC4 ppZigzagTable,4
+ M_ALLOC4 pDCCoeff,4
+
+
+
+ M_START omxVCM4P2_DecodeVLCZigzag_IntraDCVLC,r12
+
+ M_ARG shortVideoHeaderonStack,4 ;// Pointer to argument on stack
+ M_ARG videoComponstack,4 ;// Pointer to argument on stack
+
+
+ ;// Decode DC Coefficient
+
+
+ LDR pDCLumaChromaIndex, =armVCM4P2_aIntraDCLumaChromaIndex ;// Load Optimized VLC Table for Luminance and Chrominance
+
+ ;// Initializing the Bitstream Macro
+
+ M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+ M_LDR videoComp,videoComponstack
+ M_BD_INIT1 T1, T2, T2
+ ADD pDCLumaChromaIndex,pDCLumaChromaIndex,videoComp, LSL #6
+ M_BD_INIT2 T1, T2, T2
+
+
+ M_BD_VLD DCValueSize,T1,T2,pDCLumaChromaIndex,4,2 ;// VLC Decode using optimized Luminance and Chrominance VLC Table
+
+
+
+
+DecodeDC
+
+ CMP DCValueSize,#12
+ BGT ExitError
+
+ CMP DCValueSize,#0
+ MOVEQ DCVal,#0 ;// If DCValueSize is zero then DC coeff =0
+ BEQ ACDecode ;// Branch to perform AC Coeff Decoding
+
+ M_BD_VREAD16 DCVal,DCValueSize,T1,T2 ;// Get DC Value From Bit stream
+
+
+ MOV powOfSize,#1
+ LSL powOfSize,DCValueSize ;// powOfSize=pow(2,DCValueSize)
+ CMP DCVal,powOfSize,LSR #1 ;// Compare DCVal with powOfSize/2
+ ADDLT DCVal,DCVal,#1
+ SUBLT DCVal,DCVal,powOfSize ;// If Lessthan powOfSize/2 DCVal=DCVal-powOfSize+1
+ ;// Else DCVal= fetchbits from bit stream
+
+CheckDCValueSize
+
+ CMP DCValueSize,#8 ;// If DCValueSize greater than 8 check marker bit
+
+ BLE ACDecode
+
+ M_BD_READ8 temp1,1,T1
+ TEQ temp1,#0 ;// If Marker bit is zero Exit with an Error Message
+ BEQ ExitError
+
+
+
+ ;// Decode AC Coefficient
+
+ACDecode
+
+ M_STR DCVal,pDCCoeff ;// Store Decoded DC Coeff on Stack
+ M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit stream Macro
+
+ LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Zigzag talbe address
+ ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Modify the Zigzag table adress based on PredDir
+
+ M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack
+ LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load Optimized VLC Table With both Last=0 and Last=1 Entries
+ M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store Optimized VLC Table on stack
+ LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table
+ M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table on stack
+ LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table
+ MOV Count,#1 ;// Set Start =1
+
+ M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table on Stack
+
+
+ M_LDR shortVideoHeader,shortVideoHeaderonStack ;// Load the Input Argument From Stack
+
+ BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call the Unsafe Function
+
+ M_LDR DCVal,pDCCoeff ;// Get the Decoded DC Value From Stack
+ STRH DCVal,[pDst] ;// Store the DC Value
+ B ExitOK
+
+
+
+ExitError
+
+ M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit Stream Macro in case of an Error
+ MOV Return,#OMX_Sts_Err ;// Exit with an Error Message
+ExitOK
+
+ M_END
+ ENDIF
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
new file mode 100644
index 0000000..a4bfa71
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
@@ -0,0 +1,194 @@
+;//
+;//
+;// File Name: omxVCM4P2_FindMVpred_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P2_FindMVpred
+;//
+ ;// Include headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+ INCLUDE armVCCOMM_s.h
+
+ ;// Define cpu variants
+ M_VARIANTS ARM1136JS
+
+
+ IF ARM1136JS
+
+ M_TABLE armVCM4P2_pBlkIndexTable
+ DCD OMXVCBlk0, OMXVCBlk1
+ DCD OMXVCBlk2, OMXVCBlk3
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+
+pSrcMVCurMB RN 0
+pSrcCandMV1 RN 1
+pSrcCandMV2 RN 2
+pSrcCandMV3 RN 3
+pDstMVPred RN 4
+pDstMVPredME RN 5
+iBlk RN 6
+
+pTable RN 4
+CandMV RN 12
+
+pCandMV1 RN 7
+pCandMV2 RN 8
+pCandMV3 RN 9
+
+CandMV1dx RN 0
+CandMV1dy RN 1
+CandMV2dx RN 2
+CandMV2dy RN 3
+CandMV3dx RN 10
+CandMV3dy RN 11
+
+temp RN 14
+
+zero RN 14
+return RN 0
+
+; ----------------------------------------------
+; Main routine
+; ----------------------------------------------
+
+ M_ALLOC4 MV, 4
+
+ ;// Function header
+ M_START omxVCM4P2_FindMVpred, r11
+
+ ;// Define stack arguments
+ M_ARG ppDstMVPred, 4
+ M_ARG ppDstMVPredME, 4
+ M_ARG Blk, 4
+
+ M_ADR CandMV, MV
+ MOV zero, #0
+ M_LDR iBlk, Blk
+
+ ;// Set the default value for these
+ ;// to be used if pSrcCandMV[1|2|3] == NULL
+ MOV pCandMV1, CandMV
+ MOV pCandMV2, CandMV
+ MOV pCandMV3, CandMV
+
+ STR zero, [CandMV]
+
+ ;// Branch to the case based on blk number
+ M_SWITCH iBlk
+ M_CASE OMXVCBlk0 ;// iBlk=0
+ M_CASE OMXVCBlk1 ;// iBlk=0
+ M_CASE OMXVCBlk2 ;// iBlk=0
+ M_CASE OMXVCBlk3 ;// iBlk=0
+ M_ENDSWITCH
+
+OMXVCBlk0
+ CMP pSrcCandMV1, #0
+ ADDNE pCandMV1, pSrcCandMV1, #4
+
+ CMP pSrcCandMV2, #0
+ ADDNE pCandMV2, pSrcCandMV2, #8
+
+ CMP pSrcCandMV3, #0
+ ADDNE pCandMV3, pSrcCandMV3, #8
+ CMPEQ pSrcCandMV1, #0
+
+ MOVEQ pCandMV3, pCandMV2
+ MOVEQ pCandMV1, pCandMV2
+
+ CMP pSrcCandMV1, #0
+ CMPEQ pSrcCandMV2, #0
+
+ MOVEQ pCandMV1, pCandMV3
+ MOVEQ pCandMV2, pCandMV3
+
+ CMP pSrcCandMV2, #0
+ CMPEQ pSrcCandMV3, #0
+
+ MOVEQ pCandMV2, pCandMV1
+ MOVEQ pCandMV3, pCandMV1
+
+ B BlkEnd
+
+OMXVCBlk1
+ MOV pCandMV1, pSrcMVCurMB
+ CMP pSrcCandMV3, #0
+ ADDNE pCandMV3, pSrcCandMV3, #8
+
+ CMP pSrcCandMV2, #0
+ ADDNE pCandMV2, pSrcCandMV2, #12
+
+ CMPEQ pSrcCandMV3, #0
+
+ MOVEQ pCandMV2, pCandMV1
+ MOVEQ pCandMV3, pCandMV1
+
+ B BlkEnd
+
+OMXVCBlk2
+ CMP pSrcCandMV1, #0
+ MOV pCandMV2, pSrcMVCurMB
+ ADD pCandMV3, pSrcMVCurMB, #4
+ ADDNE pCandMV1, pSrcCandMV1, #12
+ B BlkEnd
+
+OMXVCBlk3
+ ADD pCandMV1, pSrcMVCurMB, #8
+ MOV pCandMV2, pSrcMVCurMB
+ ADD pCandMV3, pSrcMVCurMB, #4
+
+BlkEnd
+
+ ;// Using the transperancy info, zero
+ ;// out the candidate MV if neccesary
+ LDRSH CandMV1dx, [pCandMV1], #2
+ LDRSH CandMV2dx, [pCandMV2], #2
+ LDRSH CandMV3dx, [pCandMV3], #2
+
+ ;// Load argument from the stack
+ M_LDR pDstMVPredME, ppDstMVPredME
+
+ LDRSH CandMV1dy, [pCandMV1]
+ LDRSH CandMV2dy, [pCandMV2]
+ LDRSH CandMV3dy, [pCandMV3]
+
+ CMP pDstMVPredME, #0
+
+ ;// Store the candidate MV's into the pDstMVPredME,
+ ;// these can be used in the fast algorithm if implemented
+
+ STRHNE CandMV1dx, [pDstMVPredME], #2
+ STRHNE CandMV1dy, [pDstMVPredME], #2
+ STRHNE CandMV2dx, [pDstMVPredME], #2
+ STRHNE CandMV2dy, [pDstMVPredME], #2
+ STRHNE CandMV3dx, [pDstMVPredME], #2
+ STRHNE CandMV3dy, [pDstMVPredME]
+
+ ; Find the median of the 3 candidate MV's
+ M_MEDIAN3 CandMV1dx, CandMV2dx, CandMV3dx, temp
+
+ ;// Load argument from the stack
+ M_LDR pDstMVPred, ppDstMVPred
+
+ M_MEDIAN3 CandMV1dy, CandMV2dy, CandMV3dy, temp
+
+ STRH CandMV3dx, [pDstMVPred], #2
+ STRH CandMV3dy, [pDstMVPred]
+
+ MOV return, #OMX_Sts_NoErr
+
+ M_END
+ ENDIF ;// ARM1136JS :LOR: CortexA8
+
+ END \ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
new file mode 100644
index 0000000..bfeb540
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
@@ -0,0 +1,73 @@
+;//
+;//
+;// File Name: omxVCM4P2_IDCT8x8blk_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+;// Function:
+;// omxVCM4P2_IDCT8x8blk
+;//
+ ;// Include headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ ;// Define cpu variants
+ M_VARIANTS ARM1136JS
+
+ INCLUDE armCOMM_IDCT_s.h
+
+ IMPORT armCOMM_IDCTPreScale
+ ;//
+ ;// Function prototype
+ ;//
+ ;// OMXResult
+ ;// omxVCM4P2_IDCT8x8blk(const OMX_S16* pSrc,
+ ;// OMX_S16* pDst)
+ ;//
+
+ IF ARM1136JS :LOR: CortexA8
+ M_ALLOC4 ppDest, 4
+ M_ALLOC4 pStride, 4
+ M_ALLOC8 pBlk, 2*8*8
+ ENDIF
+
+ IF ARM1136JS
+ M_START omxVCM4P2_IDCT8x8blk, r11
+ ENDIF
+
+
+ IF ARM1136JS :LOR: CortexA8
+
+;// Declare input registers
+pSrc RN 0
+pDst RN 1
+
+;// Declare other intermediate registers
+Result RN 0
+
+;// Prototype for macro M_IDCT
+;// pSrc RN 0 ;// source data buffer
+;// Stride RN 1 ;// destination stride in bytes
+;// pDest RN 2 ;// destination data buffer
+;// pScale RN 3 ;// pointer to scaling table
+
+pSrc RN 0
+Stride RN 1
+pDest RN 2
+pScale RN 3
+
+ MOV pDest, pDst
+ LDR pScale, =armCOMM_IDCTPreScale
+ M_IDCT s9, s16, 16
+ MOV Result, #OMX_Sts_NoErr
+ M_END
+ ENDIF
+ ;// ARM1136JS :LOR: CortexA8
+
+ END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
new file mode 100644
index 0000000..20965bf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
@@ -0,0 +1,713 @@
+;//
+;//
+;// File Name: omxVCM4P2_MCReconBlock_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+;// Description:
+;//
+;//
+
+;// Include standard headers
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+;// Import symbols required from other files
+
+ M_VARIANTS ARM1136JS
+
+;// ***************************************************************************
+;// ARM1136JS implementation
+;// ***************************************************************************
+ IF ARM1136JS
+
+;// ***************************************************************************
+;// MACRO DEFINITIONS
+;// ***************************************************************************
+ ;// Description:
+ ;//
+ ;// dest[j] = (x[j] + y[j] + round) >> 1, j=0..3
+ ;//
+ ;// Similar to UHADD8 instruction, but with a rounding value of 1 added to
+ ;// each sum before dividing by two, if round is 1
+ ;//
+ ;// Syntax:
+ ;// M_UHADD8R $dest, $x, $y, $round, $mask
+ ;//
+ ;// Inputs:
+ ;// $x four packed bytes, x[3] : x[2] : x[1] : x[0]
+ ;// $y four packed bytes, y[3] : y[2] : y[1] : y[0]
+ ;// $round 0 if no rounding to be added, 1 if rounding to be done
+ ;// $mask some register set to 0x80808080
+ ;//
+ ;// Outputs:
+ ;// $dest four packed bytes, z[3] : z[2] : z[1] : z[0]
+
+ MACRO
+ M_UHADD8R $dest, $x, $y, $round, $mask
+ IF $round = 1
+ IF $dest /= $y
+ MVN $dest, $x
+ UHSUB8 $dest, $y, $dest
+ EOR $dest, $dest, $mask
+ ELSE
+ MVN $dest, $y
+ UHSUB8 $dest, $x, $dest
+ EOR $dest, $dest, $mask
+ ENDIF
+ ELSE
+ UHADD8 $dest, $x, $y
+ ENDIF
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Load 8 bytes from $pSrc (aligned or unaligned locations)
+ ;//
+ ;// Syntax:
+ ;// M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset
+ ;//
+ ;// Inputs:
+ ;// $pSrc 4 byte aligned source pointer to an address just less than
+ ;// or equal to the data location
+ ;// $srcStep The stride on source
+ ;// $scratch A scratch register, used internally for temp calculations
+ ;// $offset Difference of source data location to the source pointer
+ ;// Use when $offset != 0 (unaligned load)
+ ;//
+ ;// Outputs:
+ ;// $pSrc In case the macro accepts stride, it increments the pSrc by
+ ;// that value, else unchanged
+ ;// $out0 four packed bytes, z[3] : z[2] : z[1] : z[0]
+ ;// $out1 four packed bytes, z[7] : z[6] : z[5] : z[4]
+ ;//
+ ;// Note: {$out0, $out1, $scratch} should be registers with ascending
+ ;// register numbering. In case offset is 0, $scratch is not modified.
+
+ MACRO
+ M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset
+ IF $offset = 0
+ LDM $pSrc, {$out0, $out1}
+ ADD $pSrc, $pSrc, $srcStep
+ ELSE
+ LDM $pSrc, {$out0, $out1, $scratch}
+ ADD $pSrc, $pSrc, $srcStep
+
+ MOV $out0, $out0, LSR #8 * $offset
+ ORR $out0, $out0, $out1, LSL #(32 - 8 * ($offset))
+ MOV $out1, $out1, LSR #8 * $offset
+ ORR $out1, $out1, $scratch, LSL #(32 - 8 * ($offset))
+ ENDIF
+ MEND
+
+;// ***************************************************************************
+ ;// Description:
+ ;// Loads three words for X interpolation, update pointer to next row. For
+ ;// X interpolation, given a truncated-4byteAligned source pointer,
+ ;// invariably three continous words are required from there to get the
+ ;// nine bytes from the source pointer for filtering.
+ ;//
+ ;// Syntax:
+ ;// M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3
+ ;//
+ ;// Inputs:
+ ;// $pSrc 4 byte aligned source pointer to an address just less than
+ ;// or equal to the data location
+ ;//
+ ;// $srcStep The stride on source
+ ;//
+ ;// $offset Difference of source data location to the source pointer
+ ;// Use when $offset != 0 (unaligned load)
+ ;//
+ ;// Outputs:
+ ;// $pSrc Incremented by $srcStep
+ ;//
+ ;// $word0, $word1, $word2, $word3
+ ;// Three of these are outputs based on the $offset parameter.
+ ;// The outputs are specifically generated to be processed by
+ ;// the M_EXT_XINT macro. Following is the illustration to show
+ ;// how the nine bytes are spanned for different offsets from
+ ;// notTruncatedForAlignmentSourcePointer.
+ ;//
+ ;// ------------------------------------------------------
+ ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 |
+ ;// |------------------------------------------------------|
+ ;// | 0 | 0 | 0123 | 4567 | 8xxx | |
+ ;// | 1 | -1 | x012 | 3456 | 78xx | |
+ ;// | 2 | -2 | xx01 | 2345 | 678x | |
+ ;// | 3 | -3 | xxx0 | | 1234 | 5678 |
+ ;// ------------------------------------------------------
+ ;//
+ ;// where the numbering (0-8) is to designate the 9 bytes from
+ ;// start of a particular row. The illustration doesn't take in
+ ;// account the positioning of bytes with in the word and the
+ ;// macro combination with M_EXT_XINT will work only in little
+ ;// endian environs
+ ;//
+ ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending
+ ;// register numbering
+
+ MACRO
+ M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3
+ IF $offset /= 3
+ LDM $pSrc, {$word0, $word1, $word2}
+ ELSE
+ LDM $pSrc, {$word0, $word2, $word3}
+ ENDIF
+ ADD $pSrc, $pSrc, $srcStep
+ MEND
+
+;// ***************************************************************************
+ ;// Description:
+ ;// Extract four registers of four pixels for X interpolation
+ ;//
+ ;// Syntax:
+ ;// M_EXT_XINT $offset, $word0, $word1, $word2, $word3
+ ;//
+ ;// Inputs:
+ ;// $offset Difference of source data location to the source pointer
+ ;// Use when $offset != 0 (unaligned load)
+ ;//
+ ;// $word0, $word1, $word2, $word3
+ ;// Three of these are inputs based on the $offset parameter.
+ ;// The inputs are specifically selected to be processed by
+ ;// the M_EXT_XINT macro.
+ ;//
+ ;// ------------------------------------------------------
+ ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 |
+ ;// |------------------------------------------------------|
+ ;// | 0 | 0 | 0123 | 4567 | 8xxx | yyyy |
+ ;// | 1 | -1 | x012 | 3456 | 78xx | yyyy |
+ ;// | 2 | -2 | xx01 | 2345 | 678x | yyyy |
+ ;// | 3 | -3 | xxx0 | yyyy | 1234 | 5678 |
+ ;// ------------------------------------------------------
+ ;//
+ ;// Outputs:
+ ;// $word0, $word1, $word2, $word3
+ ;// Bytes from the original source pointer (not truncated for
+ ;// 4 byte alignment) as shown in the table.
+ ;// -------------------------------
+ ;// | word0 | word1 | word2 | word3 |
+ ;// |-------------------------------|
+ ;// | 0123 | 4567 | 1234 | 5678 |
+ ;// -------------------------------
+ ;//
+ ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending
+ ;// register numbering
+
+ MACRO
+ M_EXT_XINT $offset, $word0, $word1, $word2, $word3
+ IF $offset = 0
+ ; $word0 and $word1 are ok
+ ; $word2, $word3 are just 8 shifted versions
+ MOV $word3, $word1, LSR #8
+ ORR $word3, $word3, $word2, LSL #24
+ MOV $word2, $word0, LSR #8
+ ORR $word2, $word2, $word1, LSL #24
+ ELIF $offset = 3
+ ; $word2 and $word3 are ok (taken care while loading itself)
+ ; set $word0 & $word1
+ MOV $word0, $word0, LSR #24
+ ORR $word0, $word0, $word2, LSL #8
+ MOV $word1, $word2, LSR #24
+ ORR $word1, $word1, $word3, LSL #8
+ ELSE
+ MOV $word0, $word0, LSR #8 * $offset
+ ORR $word0, $word0, $word1, LSL #(32 - 8 * ($offset))
+ MOV $word1, $word1, LSR #8 * $offset
+ ORR $word1, $word1, $word2, LSL #(32 - 8 * ($offset))
+
+ MOV $word3, $word1, LSR #8
+ ORR $word3, $word3, $word2, LSL #(32 - 8 * (($offset)+1))
+ MOV $word2, $word0, LSR #8
+ ORR $word2, $word2, $word1, LSL #24
+ ENDIF
+ MEND
+
+;// ***************************************************************************
+ ;// Description:
+ ;// Computes half-sum and xor of two inputs and puts them in the input
+ ;// registers in that order
+ ;//
+ ;// Syntax:
+ ;// M_HSUM_XOR $v0, $v1, $tmp
+ ;//
+ ;// Inputs:
+ ;// $v0 a, first input
+ ;// $v1 b, second input
+ ;// $tmp scratch register
+ ;//
+ ;// Outputs:
+ ;// $v0 (a + b)/2
+ ;// $v1 a ^ b
+
+ MACRO
+ M_HSUM_XOR $v0, $v1, $tmp
+ UHADD8 $tmp, $v0, $v1 ;// s0 = a + b
+ EOR $v1, $v0, $v1 ;// l0 = a ^ b
+ MOV $v0, $tmp ;// s0
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Calculates average of 4 values (a,b,c,d) for HalfPixelXY predict type in
+ ;// mcReconBlock module. Very specific to the implementation of
+ ;// M_MCRECONBLOCK_HalfPixelXY done here. Uses "tmp" as scratch register and
+ ;// "yMask" for mask variable "0x1010101x" set in it. In yMask 4 lsbs are
+ ;// not significant and are used by the callee for row counter (y)
+ ;//
+ ;// Some points to note are:
+ ;// 1. Input is pair of pair-averages and Xors
+ ;// 2. $sum1 and $lsb1 are not modified and hence can be reused in another
+ ;// running average
+ ;// 3. Output is in the first argument
+ ;//
+ ;// Syntax:
+ ;// M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal
+ ;//
+ ;// Inputs:
+ ;// $sum0 (a + b) >> 1, where a and b are 1st and 2nd inputs to be averaged
+ ;// $lsb0 (a ^ b)
+ ;// $sum1 (c + d) >> 1. Not modified
+ ;// $lsb1 (c ^ d) Not modified
+ ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding
+ ;//
+ ;// Outputs:
+ ;// $sum0 (a + b + c + d + 1) / 4 : If no rounding
+ ;// (a + b + c + d + 2) / 4 : If rounding
+
+ MACRO
+ M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal
+ LCLS OP1
+ LCLS OP2
+ IF $rndVal = 0 ;// rounding case
+OP1 SETS "AND"
+OP2 SETS "ORR"
+ ELSE ;// Not rounding case
+OP1 SETS "ORR"
+OP2 SETS "AND"
+ ENDIF
+
+ LCLS lsb2
+ LCLS sum2
+ LCLS dest
+
+lsb2 SETS "tmp"
+sum2 SETS "$lsb0"
+dest SETS "$sum0"
+
+ $OP1 $lsb0, $lsb0, $lsb1 ;// e0 = e0 & e1
+ EOR $lsb2, $sum0, $sum1 ;// e2 = s0 ^ s1
+ $OP2 $lsb2, $lsb2, $lsb0 ;// e2 = e2 | e0
+ AND $lsb2, $lsb2, yMask, LSR # 4 ;// e2 = e2 & mask
+ UHADD8 $sum2, $sum0, $sum1 ;// s2 = (s0 + s1)/2
+ UADD8 $dest, $sum2, $lsb2 ;// dest = s2 + e2
+ MEND
+;// ***************************************************************************
+;// Motion compensation handler macros
+;// ***************************************************************************
+ ;// Description:
+ ;// Implement motion compensation routines using the named registers in
+ ;// callee function. Each of the following 4 implement the 4 predict type
+ ;// Each handles 8 cases each ie all the combinations of 4 types of source
+ ;// alignment offsets and 2 types of rounding flag
+ ;//
+ ;// Syntax:
+ ;// M_MCRECONBLOCK_IntegerPixel $rndVal, $offset
+ ;// M_MCRECONBLOCK_HalfPixelX $rndVal, $offset
+ ;// M_MCRECONBLOCK_HalfPixelY $rndVal, $offset
+ ;// M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset
+ ;//
+ ;// Inputs:
+ ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding
+ ;// $offset $pSrc MOD 4 value. Offset from 4 byte aligned location.
+ ;//
+ ;// Outputs:
+ ;// Outputs come in the named registers of the callee functions
+ ;// The macro loads the data from the source pointer, processes it and
+ ;// stores in the destination pointer. Does the whole prediction cycle
+ ;// of Motion Compensation routine for a particular predictType
+ ;// After this only residue addition to the predicted values remain
+
+ MACRO
+ M_MCRECONBLOCK_IntegerPixel $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for IntegerPixel predictType. Both
+ ;// rounding cases are handled by the same code base. It is just a copy
+ ;// from source to destination. Two lines are done per loop to reduce
+ ;// stalls. Loop has been software pipelined as well for that purpose.
+ ;//
+ ;// M_LOAD_X loads a whole row in two registers and then they are stored
+
+CaseIntegerPixelRnd0Offset$offset
+CaseIntegerPixelRnd1Offset$offset
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset
+ M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+YloopIntegerPixelOffset$offset
+ SUBS y, y, #2
+ STRD tmp1, tmp2, [pDst], dstStep
+ STRD tmp3, tmp4, [pDst], dstStep
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset
+ M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+ BGT YloopIntegerPixelOffset$offset
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ MACRO
+ M_MCRECONBLOCK_HalfPixelX $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for HalfPixelX predictType. The two
+ ;// rounding cases are handled by the different code base and spanned by
+ ;// different macro calls. Loop has been software pipelined to reduce
+ ;// stalls.
+ ;//
+ ;// Filtering involves averaging a pixel with the next horizontal pixel.
+ ;// M_LOAD_XINT and M_EXT_XINT combination generate 4 registers, 2 with
+ ;// all pixels in a row with 4 pixel in each register and another 2
+ ;// registers with pixels corresponding to one horizontally shifted pixel
+ ;// corresponding to the initial row pixels. These are set of packed
+ ;// registers appropriate to do 4 lane SIMD.
+ ;// After that M_UHADD8R macro does the averaging taking care of the
+ ;// rounding as required
+
+CaseHalfPixelXRnd$rndVal.Offset$offset
+ IF $rndVal = 0
+ LDR mask, =0x80808080
+ ENDIF
+
+ M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4
+YloopHalfPixelXRnd$rndVal.Offset$offset
+ SUBS y, y, #1
+ M_EXT_XINT $offset, tmp1, tmp2, tmp3, tmp4
+ M_UHADD8R tmp5, tmp1, tmp3, (1-$rndVal), mask
+ M_UHADD8R tmp6, tmp2, tmp4, (1-$rndVal), mask
+ STRD tmp5, tmp6, [pDst], dstStep
+ M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4
+ BGT YloopHalfPixelXRnd$rndVal.Offset$offset
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ MACRO
+ M_MCRECONBLOCK_HalfPixelY $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for HalfPixelY predictType. The two
+ ;// rounding cases are handled by the different code base and spanned by
+ ;// different macro calls. PreLoading is used to avoid reload of same data.
+ ;//
+ ;// Filtering involves averaging a pixel with the next vertical pixel.
+ ;// M_LOAD_X generates 2 registers with all pixels in a row with 4 pixel in
+ ;// each register. These are set of packed registers appropriate to do
+ ;// 4 lane SIMD. After that M_UHADD8R macro does the averaging taking care
+ ;// of the rounding as required
+
+CaseHalfPixelYRnd$rndVal.Offset$offset
+ IF $rndVal = 0
+ LDR mask, =0x80808080
+ ENDIF
+
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset ;// Pre-load
+YloopHalfPixelYRnd$rndVal.Offset$offset
+ SUBS y, y, #2
+ ;// Processing one line
+ M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+ M_UHADD8R tmp1, tmp1, tmp3, (1-$rndVal), mask
+ M_UHADD8R tmp2, tmp2, tmp4, (1-$rndVal), mask
+ STRD tmp1, tmp2, [pDst], dstStep
+ ;// Processing another line
+ M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset
+ M_UHADD8R tmp3, tmp3, tmp1, (1-$rndVal), mask
+ M_UHADD8R tmp4, tmp4, tmp2, (1-$rndVal), mask
+ STRD tmp3, tmp4, [pDst], dstStep
+
+ BGT YloopHalfPixelYRnd$rndVal.Offset$offset
+
+ B SwitchPredictTypeEnd
+ MEND
+;// ***************************************************************************
+ MACRO
+ M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset
+ ;// Algorithmic Description:
+ ;// This handles motion compensation for HalfPixelXY predictType. The two
+ ;// rounding cases are handled by the different code base and spanned by
+ ;// different macro calls. PreLoading is used to avoid reload of same data.
+ ;//
+ ;// Filtering involves averaging a pixel with the next vertical, horizontal
+ ;// and right-down diagonal pixels. Just as in HalfPixelX case, M_LOAD_XINT
+ ;// and M_EXT_XINT combination generates 4 registers with a row and its
+ ;// 1 pixel right shifted version, with 4 pixels in one register. Another
+ ;// call of that macro-combination gets another row. Then M_HSUM_XOR is
+ ;// called to get mutual half-sum and xor combinations of a row with its
+ ;// shifted version as they are inputs to the M_AVG4 macro which computes
+ ;// the 4 element average with rounding. Note that it is the half-sum/xor
+ ;// values that are preserved for next row as they can be re-used in the
+ ;// next call to the M_AVG4 and saves recomputation.
+ ;// Due to lack of register, the row counter and a masking value required
+ ;// in M_AVG4 are packed into a single register yMask where the last nibble
+ ;// holds the row counter values and rest holds the masking variable left
+ ;// shifted by 4
+
+CaseHalfPixelXYRnd$rndVal.Offset$offset
+ LDR yMask, =((0x01010101 << 4) + 8)
+
+ M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b'
+ M_EXT_XINT $offset, t00, t01, t10, t11
+ M_HSUM_XOR t00, t10, tmp ;// s0, l0
+ M_HSUM_XOR t01, t11, tmp ;// s0', l0'
+
+YloopHalfPixelXYRnd$rndVal.Offset$offset
+ ;// Processsing one line
+ ;// t00, t01, t10, t11 required from previous loop
+ M_LOAD_XINT pSrc, srcStep, $offset, t20, t21, t30, t31 ;// Load c, c', d, d'
+ SUB yMask, yMask, #2
+ M_EXT_XINT $offset, t20, t21, t30, t31
+ M_HSUM_XOR t20, t30, tmp ;// s1, l1
+ M_HSUM_XOR t21, t31, tmp ;// s1', l1'
+ M_AVG4 t00, t10, t20, t30, $rndVal ;// s0, l0, s1, l1
+ M_AVG4 t01, t11, t21, t31, $rndVal ;// s0', l0', s1', l1'
+ STRD t00, t01, [pDst], dstStep ;// store the average
+
+ ;// Processsing another line
+ ;// t20, t21, t30, t31 required from above
+ M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b'
+ TST yMask, #7
+ M_EXT_XINT $offset, t00, t01, t10, t11
+ M_HSUM_XOR t00, t10, tmp
+ M_HSUM_XOR t01, t11, tmp
+ M_AVG4 t20, t30, t00, t10, $rndVal
+ M_AVG4 t21, t31, t01, t11, $rndVal
+ STRD t20, t21, [pDst], dstStep
+
+ BGT YloopHalfPixelXYRnd$rndVal.Offset$offset
+
+ IF $offset/=3 :LOR: $rndVal/=1
+ B SwitchPredictTypeEnd
+ ENDIF
+ MEND
+;// ***************************************************************************
+;// Motion compensation handler macros end here
+;// ***************************************************************************
+ ;// Description:
+ ;// Populates all 4 kinds of offsets "cases" for each predictType and rndVal
+ ;// combination in the "switch" to prediction processing code segment
+ ;//
+ ;// Syntax:
+ ;// M_CASE_OFFSET $rnd, $predictType
+ ;//
+ ;// Inputs:
+ ;// $rnd 0 for rounding, 1 for no rounding
+ ;// $predictType The prediction mode
+ ;//
+ ;// Outputs:
+ ;// Populated list of "M_CASE"s for the "M_SWITCH" macro
+
+ MACRO
+ M_CASE_OFFSET $rnd, $predictType
+ M_CASE Case$predictType.Rnd$rnd.Offset0
+ M_CASE Case$predictType.Rnd$rnd.Offset1
+ M_CASE Case$predictType.Rnd$rnd.Offset2
+ M_CASE Case$predictType.Rnd$rnd.Offset3
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Populates all 2 kinds of rounding "cases" for each predictType in the
+ ;// "switch" to prediction processing code segment
+ ;//
+ ;// Syntax:
+ ;// M_CASE_OFFSET $predictType
+ ;//
+ ;// Inputs:
+ ;// $predictType The prediction mode
+ ;//
+ ;// Outputs:
+ ;// Populated list of "M_CASE_OFFSET" macros
+
+ MACRO
+ M_CASE_MCRECONBLOCK $predictType
+ M_CASE_OFFSET 0, $predictType ;// 0 for rounding
+ M_CASE_OFFSET 1, $predictType ;// 1 for no rounding
+ MEND
+;// ***************************************************************************
+ ;// Description:
+ ;// Populates all 8 kinds of rounding and offset combinations handling macros
+ ;// for the specified predictType. In case of "IntegerPixel" predictType,
+ ;// rounding is not required so same code segment handles both cases
+ ;//
+ ;// Syntax:
+ ;// M_MCRECONBLOCK $predictType
+ ;//
+ ;// Inputs:
+ ;// $predictType The prediction mode
+ ;//
+ ;// Outputs:
+ ;// Populated list of "M_MCRECONBLOCK_<predictType>" macros for specified
+ ;// predictType. Each
+ ;// M_MCRECONBLOCK_<predictType> $rnd, $offset
+ ;// is an code segment (starting with a label indicating the predictType,
+ ;// rounding and offset combination)
+ ;// Four calls of this macro with the 4 prediction modes populate all the 32
+ ;// handlers
+
+ MACRO
+ M_MCRECONBLOCK $predictType
+ M_MCRECONBLOCK_$predictType 0, 0
+ M_MCRECONBLOCK_$predictType 0, 1
+ M_MCRECONBLOCK_$predictType 0, 2
+ M_MCRECONBLOCK_$predictType 0, 3
+ IF "$predictType" /= "IntegerPixel" ;// If not IntegerPixel then rounding makes a difference
+ M_MCRECONBLOCK_$predictType 1, 0
+ M_MCRECONBLOCK_$predictType 1, 1
+ M_MCRECONBLOCK_$predictType 1, 2
+ M_MCRECONBLOCK_$predictType 1, 3
+ ENDIF
+ MEND
+;// ***************************************************************************
+;// Input/Output Registers
+pSrc RN 0
+srcStep RN 1
+arg_pSrcResidue RN 2
+pSrcResidue RN 12
+pDst RN 3
+dstStep RN 2
+predictType RN 10
+rndVal RN 11
+mask RN 11
+
+;// Local Scratch Registers
+zero RN 12
+y RN 14
+
+tmp1 RN 4
+tmp2 RN 5
+tmp3 RN 6
+tmp4 RN 7
+tmp5 RN 8
+tmp6 RN 9
+tmp7 RN 10
+tmp8 RN 11
+tmp9 RN 12
+
+t00 RN 4
+t01 RN 5
+t10 RN 6
+t11 RN 7
+t20 RN 8
+t21 RN 9
+t30 RN 10
+t31 RN 11
+tmp RN 12
+
+yMask RN 14
+
+dst RN 1
+return RN 0
+
+ ;// Allocate memory on stack
+ M_ALLOC4 Stk_pDst, 4
+ M_ALLOC4 Stk_pSrcResidue, 4
+ ;// Function header
+ M_START omxVCM4P2_MCReconBlock, r11
+ ;// Define stack arguments
+ M_ARG Arg_dstStep, 4
+ M_ARG Arg_predictType, 4
+ M_ARG Arg_rndVal, 4
+ ;// Save on stack
+ M_STR pDst, Stk_pDst
+ M_STR arg_pSrcResidue, Stk_pSrcResidue
+ ;// Load argument from the stack
+ M_LDR dstStep, Arg_dstStep
+ M_LDR predictType, Arg_predictType
+ M_LDR rndVal, Arg_rndVal
+
+ MOV y, #8
+
+ AND tmp1, pSrc, #3
+ ORR predictType, tmp1, predictType, LSL #3
+ ORR predictType, predictType, rndVal, LSL #2
+ ;// Truncating source pointer to align to 4 byte location
+ BIC pSrc, pSrc, #3
+
+ ;// Implementation takes care of all combinations of different
+ ;// predictTypes, rounding cases and source pointer offsets to alignment
+ ;// of 4 bytes in different code bases unless one of these parameter wasn't
+ ;// making any difference to the implementation. Below M_CASE_MCRECONBLOCK
+ ;// macros branch into 8 M_CASE macros for all combinations of the 2
+ ;// rounding cases and 4 offsets of the pSrc pointer to the 4 byte
+ ;// alignment.
+ M_SWITCH predictType
+ M_CASE_MCRECONBLOCK IntegerPixel
+ M_CASE_MCRECONBLOCK HalfPixelX
+ M_CASE_MCRECONBLOCK HalfPixelY
+ M_CASE_MCRECONBLOCK HalfPixelXY
+ M_ENDSWITCH
+
+ ;// The M_MCRECONBLOCK macros populate the code bases by calling all 8
+ ;// particular macros (4 in case of IntegerPixel as rounding makes no
+ ;// difference there) to generate the code for all cases of rounding and
+ ;// offsets. LTORG is used to segment the code as code size bloated beyond
+ ;// 4KB.
+ M_MCRECONBLOCK IntegerPixel
+ M_MCRECONBLOCK HalfPixelX
+ LTORG
+ M_MCRECONBLOCK HalfPixelY
+ M_MCRECONBLOCK HalfPixelXY
+SwitchPredictTypeEnd
+
+ ;// Residue Addition
+ ;// This is done in 2 lane SIMD though loads are further optimized and
+ ;// 4 bytes are loaded in case of destination buffer. Algorithmic
+ ;// details are in inlined comments
+ M_LDR pSrcResidue, Stk_pSrcResidue
+ CMP pSrcResidue, #0
+ BEQ pSrcResidueConditionEnd
+pSrcResidueNotNull
+ M_LDR pDst, Stk_pDst
+ MOV y, #8
+ SUB dstStep, dstStep, #4
+Yloop_pSrcResidueNotNull
+ SUBS y, y, #1
+ LDR dst, [pDst] ;// dst = [dcba]
+ LDMIA pSrcResidue!, {tmp1, tmp2} ;// tmp1=[DC] tmp2=[BA]
+ PKHBT tmp3, tmp1, tmp2, LSL #16 ;// Deltaval1 = [C A]
+ PKHTB tmp4, tmp2, tmp1, ASR #16 ;// DeltaVal2 = [D B]
+ UXTB16 tmp1, dst ;// tmp1 = [0c0a]
+ UXTB16 tmp2, dst, ROR #8 ;// tmp2 = [0d0b]
+ QADD16 tmp1, tmp1, tmp3 ;// Add and saturate to 16 bits
+ QADD16 tmp2, tmp2, tmp4
+ USAT16 tmp1, #8, tmp1
+ USAT16 tmp2, #8, tmp2 ;// armClip(0, 255, tmp2)
+ ORR tmp1, tmp1, tmp2, LSL #8 ;// tmp1 = [dcba]
+ STR tmp1, [pDst], #4
+
+ LDR dst, [pDst]
+ LDMIA pSrcResidue!, {tmp1, tmp2}
+ PKHBT tmp3, tmp1, tmp2, LSL #16
+ PKHTB tmp4, tmp2, tmp1, ASR #16
+ UXTB16 tmp1, dst
+ UXTB16 tmp2, dst, ROR #8
+ QADD16 tmp1, tmp1, tmp3
+ QADD16 tmp2, tmp2, tmp4
+ USAT16 tmp1, #8, tmp1
+ USAT16 tmp2, #8, tmp2
+ ORR tmp1, tmp1, tmp2, LSL #8
+ STR tmp1, [pDst], dstStep
+
+ BGT Yloop_pSrcResidueNotNull
+pSrcResidueConditionEnd
+
+ MOV return, #OMX_Sts_NoErr
+
+ M_END
+ ENDIF ;// ARM1136JS
+
+;// ***************************************************************************
+;// CortexA8 implementation
+;// ***************************************************************************
+ END
+;// ***************************************************************************
+;// omxVCM4P2_MCReconBlock ends
+;// ***************************************************************************
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
new file mode 100644
index 0000000..213444a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
@@ -0,0 +1,283 @@
+; **********
+; *
+; * File Name: omxVCM4P2_PredictReconCoefIntra_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains module for DC/AC coefficient prediction
+; *
+; *
+; * Function: omxVCM4P2_PredictReconCoefIntra
+; *
+; * Description:
+; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+; * to the function call, prediction direction (predDir) should be selected
+; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the coefficient buffer which contains the
+; * quantized coefficient residuals (PQF) of the current
+; * block; must be aligned on a 4-byte boundary. The
+; * output coefficients are saturated to the range
+; * [-2048, 2047].
+; * [in] pPredBufRow pointer to the coefficient row buffer; must be aligned
+; * on a 4-byte boundary.
+; * [in] pPredBufCol pointer to the coefficient column buffer; must be
+; * aligned on a 4-byte boundary.
+; * [in] curQP quantization parameter of the current block. curQP may
+; * equal to predQP especially when the current block and
+; * the predictor block are in the same macroblock.
+; * [in] predQP quantization parameter of the predictor block
+; * [in] predDir indicates the prediction direction which takes one
+; * of the following values:
+; * OMX_VIDEO_HORIZONTAL predict horizontally
+; * OMX_VIDEO_VERTICAL predict vertically
+; * [in] ACPredFlag a flag indicating if AC prediction should be
+; * performed. It is equal to ac_pred_flag in the bit
+; * stream syntax of MPEG-4
+; * [in] videoComp video component type (luminance, chrominance or
+; * alpha) of the current block
+; * [out] pSrcDst pointer to the coefficient buffer which contains
+; * the quantized coefficients (QF) of the current
+; * block
+; * [out] pPredBufRow pointer to the updated coefficient row buffer
+; * [out] pPredBufCol pointer to the updated coefficient column buffer
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - Bad arguments
+; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
+; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31,
+; * predQP > 31, preDir exceeds [1,2].
+; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not
+; * 4-byte aligned.
+; *
+; *********
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IMPORT armVCM4P2_Reciprocal_QP_S32
+ IMPORT armVCM4P2_Reciprocal_QP_S16
+ IMPORT armVCM4P2_DCScaler
+
+
+
+ IF ARM1136JS
+
+
+;// Input Arguments
+
+pSrcDst RN 0
+pPredBufRow RN 1
+pPredBufCol RN 2
+curQP RN 3
+QP RN 3
+predQP RN 4
+predDir RN 5
+ACPredFlag RN 6
+videoComp RN 7
+
+;// Local Variables
+
+temp2 RN 5
+negCurQP RN 7
+negdcScaler RN 7
+tempPred RN 8
+
+dcScaler RN 4
+CoeffTable RN 9
+absCoeffDC RN 9
+temp3 RN 6
+absCoeffAC RN 6
+
+shortVideoHeader RN 9
+predCoeffTable RN 10
+Count RN 10
+temp1 RN 12
+index RN 12
+Rem RN 14
+temp RN 11
+Return RN 0
+
+
+
+ M_START omxVCM4P2_PredictReconCoefIntra,r12
+
+ ;// Assigning pointers to Input arguments on Stack
+
+ M_ARG predQPonStack,4
+ M_ARG predDironStack,4
+ M_ARG ACPredFlagonStack,4
+ M_ARG videoComponStack,4
+
+ ;// DC Prediction
+
+ M_LDR videoComp,videoComponStack ;// Load videoComp From Stack
+
+ M_LDR predDir,predDironStack ;// Load Prediction direction
+
+ ;// dcScaler Calculation
+
+ LDR index, =armVCM4P2_DCScaler
+ ADD index,index,videoComp,LSL #5
+ LDRB dcScaler,[index,QP]
+
+
+calDCVal
+
+
+ LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63)
+
+ CMP predDir,#2 ;// Check if the Prediction direction is vertical
+
+ ;// Caulucate temp pred by performing Division
+
+ LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer
+ LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer
+
+ RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler
+
+ MOV temp1,absCoeffDC ;// temp1=prediction coeff
+ CMP temp1,#0
+ RSBLT absCoeffDC,temp1,#0 ;//absCoeffDC=abs(temp1)
+
+ ADD temp,dcScaler,dcScaler
+ LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication
+
+ SMULBB tempPred,temp,absCoeffDC ;// tempPred=pPredBufRow(Col)[0]*32767/dcScaler
+ ADD temp3,dcScaler,#1
+ LSR tempPred,tempPred,#15 ;// tempPred=pPredBufRow(Col)[0]/dcScaler
+ LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2)
+
+ MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Rem = pPredBufRow(Col)[0]-tempPred*dcScaler
+
+
+ LDRH temp,[pPredBufCol]
+ CMP Rem,temp3
+ ADDGE tempPred,#1 ;// If Rem>=round(dcScaler/2);tempPred=tempPred+1
+ CMP temp1,#0
+ RSBLT tempPred,tempPred,#0 ;/ if pPredBufRow(Col)[0]<0; tempPred=-tempPred
+
+
+ STRH temp,[pPredBufRow,#-16]
+
+ LDRH temp,[pSrcDst] ;// temp=pSrcDst[0]
+ M_LDR ACPredFlag,ACPredFlagonStack
+ ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred
+ SSAT16 temp,#12,temp ;// clip temp to [-2048,2047]
+
+ SMULBB temp1,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler
+ M_LDR predQP,predQPonStack
+ STRH temp,[pSrcDst]
+ CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not
+ STRH temp1,[pPredBufCol] ;// store temp1 to pPredBufCol
+
+ ;// AC Prediction
+
+
+ BNE Exit ;// If not set Exit
+
+ LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63)
+ MOV temp1,#4
+ MUL temp1,curQP,temp1
+ CMP predDir,#2 ;// Check the Prediction direction
+ RSB negCurQP,curQP,#0
+ LDR CoeffTable,[predCoeffTable,temp1] ;// CoeffTable=0x1ffff/curQP
+ ADD curQP,curQP,#1 ;// curQP=curQP+1
+ LSR curQP,curQP,#1 ;// curQP=round(curQP/2)
+ MOV Count,#2 ;// Initializing the Loop Count
+ BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal
+
+
+
+loop1
+ ;// Calculate tempPred
+
+ LDRSH absCoeffAC,[pPredBufRow,Count] ;// absCoeffAC=pPredBufRow[i], 1=<i<=7
+ MOV temp1,absCoeffAC
+ CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=<i<=7
+ RSBLT absCoeffAC,temp1,#0 ;// absCoeffAC= abs(pPredBufRow[i])
+
+ SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufRow[i]*predQP
+ MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufRow[i]*predQP*0x1ffff/curQP
+ LSR tempPred,tempPred,#17
+
+ MLA Rem,negCurQP,tempPred,absCoeffAC ;// Rem=abs(pPredBufRow[i])-tempPred*curQP
+ LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i],1<=i<8
+
+ CMP Rem,curQP
+ ADDGE tempPred,#1 ;// if Rem>=round(curQP/2); tempPred=tempPred+1
+ CMP temp1,#0
+ RSBLT tempPred,tempPred,#0 ;// if pPredBufRow[i]<0 ; tempPred=-tempPred
+
+ ;// Update source and Row Prediction buffers
+
+ ADD temp,temp,tempPred ;// temp=tempPred+pSrcDst[i]
+ SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047]
+ STRH temp,[pSrcDst,Count]
+ STRH temp,[pPredBufRow,Count] ;// pPredBufRow[i]=temp
+ ADD Count,Count,#2 ;// i=i+1
+ CMP Count,#16 ;// compare if i=8
+ BLT loop1
+ B Exit ;// Branch to exit
+
+Horizontal
+
+ MOV Count,#16 ;// Initializing i=8
+
+loop2
+
+ LSR temp2,Count,#3 ;// temp2=i>>3
+
+ ;// Calculate tempPred
+
+ LDRH absCoeffAC,[pPredBufCol,temp2] ;// absCoefAC=pPredBufCol[i>>3]
+ MOV temp1,absCoeffAC
+ CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=<i<=7
+ RSBLT absCoeffAC,temp1,#0 ;// absCoeffAC=abs(pPredBufCol[i>>3])
+
+ SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufCol[i>>3]*predQP
+ MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufCol[i>>3]*predQP*0x1ffff/curQP
+ LSR tempPred,tempPred,#17 ;// tempPred=pPredBufCol[i>>3]*predQP/curQP
+
+ MLA Rem,negCurQP,tempPred,absCoeffAC
+ LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i]
+
+ CMP Rem,curQP ;// Compare Rem with round(curQP/2)
+ ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=round(curQP/2)
+ CMP temp1,#0
+ RSBLT tempPred,tempPred,#0 ;// if pPredBufCol[i>>3 <0 tempPred=-tempPred
+
+ ;// Update source and Row Prediction buffers
+
+ ADD temp,temp,tempPred ;// temp=pSrcDst[i]+tempPred
+ SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047]
+ STRH temp,[pSrcDst,Count] ;// pSrcDst[0]= clipped value
+ STRH temp,[pPredBufCol,temp2] ;// pPredBufCol[i>>3]=temp
+ ADD Count,Count,#16 ;// i=i+8
+ CMP Count,#128 ;// compare i with 64
+ BLT loop2
+
+
+Exit
+
+ MOV Return,#OMX_Sts_NoErr
+
+ M_END
+ ENDIF
+ END
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
new file mode 100644
index 0000000..c9591cb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
@@ -0,0 +1,141 @@
+;/**
+; *
+; * File Name: omxVCM4P2_QuantInvInter_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for inter reconstruction
+; *
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvInter_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP quantization parameter (quantiser_scale)
+; * [in] videoComp (Intra version only.) Video component type of the
+; * current block. Takes one of the following flags:
+; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; * OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; * shortVideoHeader==1 selects linear intra DC mode,
+; * and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; * - If pSrcDst is NULL or is not 16-byte aligned.
+; * or
+; * - If QP <= 0.
+; * or
+; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+; */
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+pSrcDst RN 0
+QP RN 1
+
+;//Local Variables
+Return RN 0
+Count RN 4
+tempVal21 RN 2
+tempVal43 RN 3
+QP1 RN 5
+X2 RN 6
+X3 RN 14
+Result1 RN 8
+Result2 RN 9
+two RN 7
+
+ M_START omxVCM4P2_QuantInvInter_I,r9
+
+ MOV Count,#64
+ TST QP,#1
+ LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21,
+ ;// next two values to tempVal43
+ SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even
+ MOVNE QP1,QP
+ MOV two,#2
+
+
+
+Loop
+
+
+ SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2
+ CMP X2,#0
+
+ RSBLT X2,X2,#0 ;// X2=absoluteval(first val)
+ SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd
+ ;// X2=2*absval(first val)*QP+QP-1 if QP is even
+ SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+
+ CMP X3,#0
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+
+ RSBLT X3,X3,#0
+ PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31]
+ SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2
+ SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047]
+ CMP X2,#0
+
+
+
+ RSBLE X2,X2,#0
+ SMLABBNE X2,QP,X2,QP1
+ SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+ CMP X3,#0
+
+ LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+ RSBLT X3,X3,#0
+ PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[0-15]
+ SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047]
+
+ SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0
+ STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address
+
+
+
+ BGT Loop
+
+ MOV Return,#OMX_Sts_NoErr
+
+ M_END
+ ENDIF
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
new file mode 100644
index 0000000..6328e01
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
@@ -0,0 +1,188 @@
+;/**
+; *
+; * File Name: omxVCM4P2_QuantInvIntra_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision: 9641
+; * Date: Thursday, February 7, 2008
+; *
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; *
+; *
+; *
+; * Description:
+; * Contains modules for inter reconstruction
+; *
+; *
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvIntra_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP quantization parameter (quantiser_scale)
+; * [in] videoComp (Intra version only.) Video component type of the
+; * current block. Takes one of the following flags:
+; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; * OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; * shortVideoHeader==1 selects linear intra DC mode,
+; * and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; * - If pSrcDst is NULL or is not 16-byte aligned.
+; * or
+; * - If QP <= 0.
+; * or
+; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+
+ IMPORT armVCM4P2_DCScaler
+
+
+
+ IF ARM1136JS
+
+;//Input Arguments
+pSrcDst RN 0
+QP RN 1
+videoComp RN 2
+shortVideoHeader RN 3
+
+;//Local Variables
+Return RN 0
+dcScaler RN 4
+temp RN 12
+index RN 6
+
+tempVal21 RN 4
+tempVal43 RN 5
+QP1 RN 6
+X2 RN 7
+X3 RN 14
+Result1 RN 8
+Result2 RN 9
+two RN 10
+Count RN 11
+
+
+
+
+ M_START omxVCM4P2_QuantInvIntra_I,r11
+
+
+
+ ;// Perform Inverse Quantization for DC coefficient
+
+ TEQ shortVideoHeader,#0 ;// Test if short Video Header flag =0
+ MOVNE dcScaler,#8 ;// if shortVideoHeader is non zero dcScaler=8
+ BNE calDCVal
+ LDR index, =armVCM4P2_DCScaler
+ ADD index,index,videoComp,LSL #5
+ LDRB dcScaler,[index,QP]
+
+
+ ;//M_CalDCScalar shortVideoHeader,videoComp, QP
+
+calDCVal
+
+ LDRH temp,[pSrcDst]
+ SMULBB temp,temp,dcScaler ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory)
+ SSAT temp,#12,temp ;// Saturating to 12 bits
+
+
+ MOV Count,#64
+ TST QP,#1
+ LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21,
+ ;// next two values to tempVal43
+ SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even
+ MOVNE QP1,QP
+ MOV two,#2
+
+
+
+
+
+Loop
+
+
+ SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2
+ CMP X2,#0
+
+ RSBLT X2,X2,#0 ;// X2=absoluteval(first val)
+ SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd
+ ;// X2=2*absval(first val)*QP+QP-1 if QP is even
+ SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+
+ CMP X3,#0
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+
+ RSBLT X3,X3,#0
+ PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31]
+ SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2
+ SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047]
+ CMP X2,#0
+
+
+
+ RSBLE X2,X2,#0
+ SMLABBNE X2,QP,X2,QP1
+ SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2
+ RSBLT X2,X2,#0
+ CMP X3,#0
+
+ LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43
+
+ RSBLT X3,X3,#0
+ SMLABBNE X3,QP,X3,QP1
+ RSBLT X3,X3,#0
+ PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[16-31]
+ SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047]
+
+ SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0
+ STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address
+
+
+
+ BGT Loop
+
+ SUB pSrcDst,pSrcDst,#128
+
+ ;// Storing the Inverse Quantized DC coefficient
+
+ STRH temp,[pSrcDst],#2
+
+
+
+ MOV Return,#OMX_Sts_NoErr
+
+
+
+
+ M_END
+ ENDIF
+ END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c
new file mode 100644
index 0000000..5d93681
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c
@@ -0,0 +1,6 @@
+#include "omxtypes.h"
+#include "armCOMM_Version.h"
+
+#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS
+const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING " Rel=" OMX_ARM_RELEASE_TAG " Arch=" OMX_ARM_BUILD_ARCHITECTURE " Tools=" OMX_ARM_BUILD_TOOLCHAIN ;
+#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */