diff options
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm11')
81 files changed, 25507 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT new file mode 100644 index 0000000..5ce70ca --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_DELIVERY.TXT @@ -0,0 +1,63 @@ +The contents of this transaction was created by Hedley Francis +of ARM on 19-Feb-2008. + +It contains the ARM data versions listed below. + +This data, unless otherwise stated, is ARM Proprietary and access to it +is subject to the agreements indicated below. + +If you experience problems with this data, please contact ARM support +quoting transaction reference <97413>. + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +- OX001-SW-98010-r0p0-00bet1 + Video codecs - optimised code + V6 optimized code release for Hantro (Ver 1.0.2) + internal access + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +This transaction contains deliverables which are designated as being of +beta release status (BET). + +Beta release status has a particular meaning to ARM of which the recipient +must be aware. Beta is a pre-release status indicating that the deliverable +so described is believed to robustly demonstrate specified behaviour, to be +consistent across its included aspects and be ready for general deployment. +But Beta also indicates that pre-release reliability trials are ongoing and +that it is possible residual defects or errors in operation, consistency +and documentation may still be encountered. The recipient should consider +this position when using this Beta material supplied. ARM will normally +attempt to provide fixes or a work-around for defects identified by the +recipient, but the provision or timeliness of this support cannot be +guaranteed. ARM shall not be responsible for direct or consequential +damages as a result of encountering one or more of these residual defects. +By accepting a Beta release, the recipient agrees to these constraints and +to providing reasonable information to ARM to enable the replication of the +defects identified by the recipient. The specific Beta version supplied +will not be supported after release of a later or higher status version. +It should be noted that Support for the Beta release of the deliverable +will only be provided by ARM to a recipient who has a current support and +maintenance contract for the deliverable. + + + - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - + +In addition to the data versions listed above, this transaction contains +two additional files at the top level. + +The first is this file, ARM_DELIVERY_97413.TXT, which is the delivery +note. + +The second is ARM_MANIFEST_97413.TXT which contains a manifest of all the +files included in this transaction, together with their checksums. + +The checksums provided are calculated using the RSA Data Security, Inc. +MD5 Message-Digest Algorithm. + +The checksums can be used to verify the integrity of this data using the +"md5sum" tool (which is part of the GNU "textutils" package) by running: + + % md5sum --check ARM_MANIFEST_97413.TXT + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT new file mode 100644 index 0000000..9b2238b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/ARM_MANIFEST.TXT @@ -0,0 +1,91 @@ + OX001-SW-98010-r0p0-00bet1/ + OX001-SW-98010-r0p0-00bet1/api/ +e049791cfab6060a08cbac7b3ad767d6 OX001-SW-98010-r0p0-00bet1/api/armCOMM_s.h +ed798face25497b2703ede736d6d52b6 OX001-SW-98010-r0p0-00bet1/api/omxtypes_s.h +4eebd63af087376811d6749f0646b864 OX001-SW-98010-r0p0-00bet1/api/armCOMM_BitDec_s.h +43cf46c2cf2fe1f93c615b57bcbe4809 OX001-SW-98010-r0p0-00bet1/api/armCOMM.h +8f248ceaac8f602e277a521b679dcbbe OX001-SW-98010-r0p0-00bet1/api/armCOMM_IDCTTable.h +53f2ae8a98495f05e26a4cf862a7f750 OX001-SW-98010-r0p0-00bet1/api/armCOMM_Version.h +3a2f420ddf6a1b950470bd0f5ebd5c62 OX001-SW-98010-r0p0-00bet1/api/armCOMM_IDCT_s.h +511c0bb534fe223599e2c84eff24c9ed OX001-SW-98010-r0p0-00bet1/api/armCOMM_MaskTable.h +8971932d56eed6b1ad1ba507f0bff5f0 OX001-SW-98010-r0p0-00bet1/api/armCOMM_Bitstream.h +f87fedd9ca432fefa757008176864ef8 OX001-SW-98010-r0p0-00bet1/api/armOMX.h +8e49899a428822c36ef9dd94e0e05f18 OX001-SW-98010-r0p0-00bet1/api/omxtypes.h +694281d11af52f88e6f9d4cb226ac8a7 OX001-SW-98010-r0p0-00bet1/build_vc.pl +e72d96c0a415459748df9807f3dae72f OX001-SW-98010-r0p0-00bet1/filelist_vc.txt + OX001-SW-98010-r0p0-00bet1/src/ +5eeae659a29477f5c52296d24afffd3c OX001-SW-98010-r0p0-00bet1/src/armCOMM_IDCTTable.c +d64cdcf38f7749dc7f77465e5b7d356d OX001-SW-98010-r0p0-00bet1/src/armCOMM_MaskTable.c + OX001-SW-98010-r0p0-00bet1/vc/ + OX001-SW-98010-r0p0-00bet1/vc/m4p10/ + OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/ +e7e0c320978564a7c9b2c723749a98d6 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_CAVLCTables.c +4adcd0df081990bdfc4729041a2a9152 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c +852e0404142965dc1f3aa7f00ee5127b OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s +7054151c5bfea6b5e74feee86b2d7b01 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c +38944c5e0bba01e32ff349c2c87c71b2 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DequantTables_s.s +32ff4b8be62e2f0f3e764b83c1e5e2fd OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c +90b0e6a04e764902c0a0903640c10b32 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s +28a19ae4fe2258628080d6a89bb54b91 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s +98e196b9e1ffebaf91f62ea9d17fb97d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s +01ba60eff66ea49a4f833ce6279f8e2f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c +f301d5a95e07354f593ea5747c01cb0a OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s +44c9ef21e840a100301f7d7a4189957c OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s +a33b03bbd3352d24ed744769e12bb87d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s +00c20bfda67bb86096b615fc17c94b35 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s +2ddcaf60a8ea1e6e6b77737f768bfb9d OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_QuantTables_s.s +c3002aad5600f872b70a5d7fe3915846 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s +a2900f2c47f1c61d20bd6c1eda33d6d4 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s +e4fecd66bc47f07539bc308935e84a1f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s +78815c9df50ba53131bb22d2b829e3c3 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s +1909ae312ac79a03a5fac1d1e8bc0291 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s +3d2c48580655928065de7839866d9bc4 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s +23aa2fdf155d4fa6ff745eab6e01f32b OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s +97f20a93c481d7f6173d919f41e415bd OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s +becd512da202436286811b6aec061f47 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s +dd24a99ae3cd842dcacaf31d47de88b3 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s +c2d995f787b6f44ef10c751c12d1935f OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s +3628fbdf0cd217c287b6ccc94135d06e OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s +4a52b3e9e268b8a8f07829bf500d03af OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s +11249f8a98c5d4b84cb5575b0e37ca9c OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s +3599b1074330965c8ca285d164efccff OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s +3339e026c7de655d9400949eb5e51451 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s +cc4a6f32db0b72a91d3f278f6855df69 OX001-SW-98010-r0p0-00bet1/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c + OX001-SW-98010-r0p0-00bet1/vc/m4p10/api/ +6e530ddaa7c2b57ffe88162c020cb662 OX001-SW-98010-r0p0-00bet1/vc/m4p10/api/armVCM4P10_CAVLCTables.h + OX001-SW-98010-r0p0-00bet1/vc/m4p2/ + OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/ +cdf412920c2037a725d0420002b6752e OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Clip8_s.s +dba9824e959b21d401cac925e68a11a6 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s +b559b71d5c94627f10e616fb72c0cefc OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s +4fba4c431a783a78a2eb6497a94ac967 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c +1e4c3be8c5eddc00c9f05e83bcf315ef OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s +1b0b2990c2669dfb87cf6b810611c01b OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c +1c9b87abf3283e957816b3937c680701 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s +4fe1afca659a9055fc1172e58f78a506 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c +2ea067f0436f91ba1351edaf411cb4ea OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_Lookup_Tables.c +acb92be1dbcdb3ebe824cbe9e28d03bf OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s +a6b41f01b1df7dd656ebdba3084bfa2a OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s +293a48a648a3085456e6665bb7366fad OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_SetPredDir_s.s +ffe6b96c74d4881f4d3c8de8cc737797 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s +437dfa204508850d61d4b87091446e9f OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s +ff5915d181bfd2cd2f0bd588bd2300dc OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s +6775eb0c561dbab965c60f85b08c96fd OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s +a0d85f4f517c945a4c9317ac021f2d08 OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s +386020dee8b725c7fe2526f1fc211d7d OX001-SW-98010-r0p0-00bet1/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c + OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/ +4624e7c838e10a249abcc3d3f4f40748 OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h +65e1057d04e2cb844559dc9f6e09795a OX001-SW-98010-r0p0-00bet1/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h + OX001-SW-98010-r0p0-00bet1/vc/src/ +e627b3346b0dc9aff14446005ce0fa43 OX001-SW-98010-r0p0-00bet1/vc/src/armVC_Version.c + OX001-SW-98010-r0p0-00bet1/vc/api/ +7ca94b1c33ac0211e17d38baadd7d1dd OX001-SW-98010-r0p0-00bet1/vc/api/armVC.h +12cf7596edbbf6048b626d15e8d0ed48 OX001-SW-98010-r0p0-00bet1/vc/api/omxVC.h +11726e286a81257cb45f5547fb4d374c OX001-SW-98010-r0p0-00bet1/vc/api/omxVC_s.h +a5b2af605c319cd2491319e430741377 OX001-SW-98010-r0p0-00bet1/vc/api/armVCCOMM_s.h + OX001-SW-98010-r0p0-00bet1/vc/comm/ + OX001-SW-98010-r0p0-00bet1/vc/comm/src/ +50cca6954c447b012ab39ca7872e5e8f OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy16x16_s.s +d1c3bce77fc5774c899b447d13f02cd0 OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_Copy8x8_s.s +fdac1d1bad3fd23c880beb39bc2e89aa OX001-SW-98010-r0p0-00bet1/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s +6d9adc2be5bd0311591030d0c6df771c ARM_DELIVERY_97413.TXT diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h new file mode 100644 index 0000000..2ed86a4 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM.h @@ -0,0 +1,785 @@ +/** + * + * File Name: armCOMM.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM.h + * Brief: Declares Common APIs/Data Types used across OpenMAX API's + * + */ + + +#ifndef _armCommon_H_ +#define _armCommon_H_ + +#include "omxtypes.h" + +typedef struct +{ + OMX_F32 Re; /** Real part */ + OMX_F32 Im; /** Imaginary part */ + +} OMX_FC32; /** single precision floating point complex number */ + +typedef struct +{ + OMX_F64 Re; /** Real part */ + OMX_F64 Im; /** Imaginary part */ + +} OMX_FC64; /** double precision floating point complex number */ + + +/* Used by both IP and IC domains for 8x8 JPEG blocks. */ +typedef OMX_S16 ARM_BLOCK8x8[64]; + + +#include "armOMX.h" + +#define armPI (OMX_F64)(3.1415926535897932384626433832795) + +/***********************************************************************/ + +/* Compiler extensions */ +#ifdef ARM_DEBUG +/* debug version */ +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#define armError(str) {printf((str)); printf("\n"); exit(-1);} +#define armWarn(str) {printf((str)); printf("\n");} +#define armIgnore(a) ((void)a) +#define armAssert(a) assert(a) +#else +/* release version */ +#define armError(str) ((void) (str)) +#define armWarn(str) ((void) (str)) +#define armIgnore(a) ((void) (a)) +#define armAssert(a) ((void) (a)) +#endif /* ARM_DEBUG */ + +/* Arithmetic operations */ + +#define armMin(a,b) ( (a) > (b) ? (b):(a) ) +#define armMax(a,b) ( (a) > (b) ? (a):(b) ) +#define armAbs(a) ( (a) < 0 ? -(a):(a) ) + +/* Alignment operation */ + +#define armAlignToBytes(Ptr,N) (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) )) +#define armAlignTo2Bytes(Ptr) armAlignToBytes(Ptr,2) +#define armAlignTo4Bytes(Ptr) armAlignToBytes(Ptr,4) +#define armAlignTo8Bytes(Ptr) armAlignToBytes(Ptr,8) +#define armAlignTo16Bytes(Ptr) armAlignToBytes(Ptr,16) + +/* Error and Alignment check */ + +#define armRetArgErrIf(condition, code) if(condition) { return (code); } +#define armRetDataErrIf(condition, code) if(condition) { return (code); } + +#ifndef ALIGNMENT_DOESNT_MATTER +#define armIsByteAligned(Ptr,N) ((((int)(Ptr)) % N)==0) +#define armNotByteAligned(Ptr,N) ((((int)(Ptr)) % N)!=0) +#else +#define armIsByteAligned(Ptr,N) (1) +#define armNotByteAligned(Ptr,N) (0) +#endif + +#define armIs2ByteAligned(Ptr) armIsByteAligned(Ptr,2) +#define armIs4ByteAligned(Ptr) armIsByteAligned(Ptr,4) +#define armIs8ByteAligned(Ptr) armIsByteAligned(Ptr,8) +#define armIs16ByteAligned(Ptr) armIsByteAligned(Ptr,16) + +#define armNot2ByteAligned(Ptr) armNotByteAligned(Ptr,2) +#define armNot4ByteAligned(Ptr) armNotByteAligned(Ptr,4) +#define armNot8ByteAligned(Ptr) armNotByteAligned(Ptr,8) +#define armNot16ByteAligned(Ptr) armNotByteAligned(Ptr,16) +#define armNot32ByteAligned(Ptr) armNotByteAligned(Ptr,32) + +/** + * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64 + * + * Description: + * Converts a double precision value into a short int/int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16/OMX_S32 format + * + */ + +OMX_S16 armRoundFloatToS16 (OMX_F64 Value); +OMX_S32 armRoundFloatToS32 (OMX_F64 Value); +OMX_S64 armRoundFloatToS64 (OMX_F64 Value); + +/** + * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32 + * + * Description: + * Converts a double precision value into a short int/int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16/OMX_S32 format + * + */ + +OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value); +OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value); + +/** + * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32 + * + * Description: + * Converts a double precision value into a unsigned short int/int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U16/OMX_U32 format + * + */ + +OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value); +OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value); + +/** + * Function: armSignCheck + * + * Description: + * Checks the sign of a variable: + * returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + * + * Remarks: + * + * Parameters: + * [in] var Variable to be checked + * + * Return Value: + * OMX_INT -- returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + */ + +OMX_INT armSignCheck (OMX_S16 var); + +/** + * Function: armClip + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_S32 -- returns clipped value + */ + +OMX_S32 armClip ( + OMX_INT min, + OMX_INT max, + OMX_S32 src + ); + +/** + * Function: armClip_F32 + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_F32 -- returns clipped value + */ + +OMX_F32 armClip_F32 ( + OMX_F32 min, + OMX_F32 max, + OMX_F32 src + ); + +/** + * Function: armShiftSat_F32 + * + * Description: Divides a float value by 2^shift and + * saturates it for unsigned value range for satBits. + * Second parameter is like "shifting" the corresponding + * integer value. Takes care of rounding while clipping the final + * value. + * + * Parameters: + * [in] v Number to be operated upon + * [in] shift Divides the input "v" by "2^shift" + * [in] satBits Final range is [0, 2^satBits) + * + * Return Value: + * OMX_S32 -- returns "shifted" saturated value + */ + +OMX_U32 armShiftSat_F32( + OMX_F32 v, + OMX_INT shift, + OMX_INT satBits + ); + +/** + * Functions: armSwapElem + * + * Description: + * This function swaps two elements at the specified pointer locations. + * The size of each element could be anything as specified by <elemSize> + * + * Return Value: + * OMXResult -- Error status from the function + */ +OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize); + + +/** + * Function: armMedianOf3 + * + * Description: Finds the median of three numbers + * + * Remarks: + * + * Parameters: + * [in] fEntry First entry + * [in] sEntry second entry + * [in] tEntry Third entry + * + * Return Value: + * OMX_S32 -- returns the median value + */ + +OMX_S32 armMedianOf3 ( + OMX_S32 fEntry, + OMX_S32 sEntry, + OMX_S32 tEntry + ); + +/** + * Function: armLogSize + * + * Description: Finds the size of a positive value and returns the same + * + * Remarks: + * + * Parameters: + * [in] value Positive value + * + * Return Value: + * OMX_U8 -- returns the size of the positive value + */ + +OMX_U8 armLogSize ( + OMX_U16 value + ); + +/***********************************************************************/ + /* Saturating Arithmetic operations */ + +/** + * Function :armSatAdd_S32() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S32 armSatAdd_S32( + OMX_S32 Value1, + OMX_S32 Value2 + ); + +/** + * Function :armSatAdd_S64() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S64 armSatAdd_S64( + OMX_S64 Value1, + OMX_S64 Value2 + ); + +/** Function :armSatSub_S32() + * + * Description : + * Returns the result of saturated substraction of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatSub_S32( + OMX_S32 Value1, + OMX_S32 Value2 + ); + +/** + * Function :armSatMac_S32() + * + * Description : + * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated + * accumulation with Mac + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * [in] Mac Accumulator + * + * Return: + * [out] Result of operation + **/ + +OMX_S32 armSatMac_S32( + OMX_S32 Mac, + OMX_S16 Value1, + OMX_S16 Value2 + ); + +/** + * Function :armSatMac_S16S32_S32 + * + * Description : + * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac + * + * mac = mac + Saturate_in_32Bits(delayElem * filTap) + * + * Parametrs: + * [in] delayElem First 32 bit Operand + * [in] filTap Second 16 bit Operand + * [in] mac Result of MAC operation + * + * Return: + * [out] mac Result of operation + * + **/ + +OMX_S32 armSatMac_S16S32_S32( + OMX_S32 mac, + OMX_S32 delayElem, + OMX_S16 filTap ); + +/** + * Function :armSatRoundRightShift_S32_S16 + * + * Description : + * Returns the result of rounded right shift operation of input by the scalefactor + * + * output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) ) + * + * Parametrs: + * [in] input The input to be operated on + * [in] scaleFactor The shift number + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S16 armSatRoundRightShift_S32_S16( + OMX_S32 input, + OMX_INT scaleFactor); + +/** + * Function :armSatRoundLeftShift_S32() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatRoundLeftShift_S32( + OMX_S32 Value, + OMX_INT shift + ); + +/** + * Function :armSatRoundLeftShift_S64() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S64 armSatRoundLeftShift_S64( + OMX_S64 Value, + OMX_INT shift + ); + +/** + * Function :armSatMulS16S32_S32() + * + * Description : + * Returns the result of a S16 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S32 armSatMulS16S32_S32( + OMX_S16 input1, + OMX_S32 input2); + +/** + * Function :armSatMulS32S32_S32() + * + * Description : + * Returns the result of a S32 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatMulS32S32_S32( + OMX_S32 input1, + OMX_S32 input2); + + +/** + * Function :armIntDivAwayFromZero() + * + * Description : Integer division with rounding to the nearest integer. + * Half-integer values are rounded away from zero + * unless otherwise specified. For example 3//2 is rounded + * to 2, and -3//2 is rounded to -2. + * + * Parametrs: + * [in] Num Operand 1 + * [in] Deno Operand 2 + * + * Return: + * [out] Result of operation input1//input2 + * + **/ + +OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno); + + +/***********************************************************************/ +/* + * Debugging macros + * + */ + + +/* + * Definition of output stream - change to stderr if necessary + */ +#define DEBUG_STREAM stdout + +/* + * Debug printf macros, one for each argument count. + * Add more if needed. + */ +#ifdef DEBUG_ON +#include <stdio.h> + +#define DEBUG_PRINTF_0(a) fprintf(DEBUG_STREAM, a) +#define DEBUG_PRINTF_1(a, b) fprintf(DEBUG_STREAM, a, b) +#define DEBUG_PRINTF_2(a, b, c) fprintf(DEBUG_STREAM, a, b, c) +#define DEBUG_PRINTF_3(a, b, c, d) fprintf(DEBUG_STREAM, a, b, c, d) +#define DEBUG_PRINTF_4(a, b, c, d, e) fprintf(DEBUG_STREAM, a, b, c, d, e) +#define DEBUG_PRINTF_5(a, b, c, d, e, f) fprintf(DEBUG_STREAM, a, b, c, d, e, f) +#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g) +#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h) +#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i) +#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j) +#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k) +#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l) +#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m) +#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#else /* DEBUG_ON */ +#define DEBUG_PRINTF_0(a) +#define DEBUG_PRINTF_1(a, b) +#define DEBUG_PRINTF_2(a, b, c) +#define DEBUG_PRINTF_3(a, b, c, d) +#define DEBUG_PRINTF_4(a, b, c, d, e) +#define DEBUG_PRINTF_5(a, b, c, d, e, f) +#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) +#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) +#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) +#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) +#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) +#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) +#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) +#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#endif /* DEBUG_ON */ + + +/* + * Domain and sub domain definitions + * + * In order to turn on debug for an entire domain or sub-domain + * at compile time, one of the DEBUG_DOMAIN_* below may be defined, + * which will activate debug in all of the defines it contains. + */ + +#ifdef DEBUG_DOMAIN_AC +#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4 +#define DEBUG_OMXACAAC_DECODECHANPAIRELT +#define DEBUG_OMXACAAC_DECODEDATSTRELT +#define DEBUG_OMXACAAC_DECODEFILLELT +#define DEBUG_OMXACAAC_DECODEISSTEREO_S32 +#define DEBUG_OMXACAAC_DECODEMSPNS_S32 +#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I +#define DEBUG_OMXACAAC_DECODEPRGCFGELT +#define DEBUG_OMXACAAC_DECODETNS_S32_I +#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32 +#define DEBUG_OMXACAAC_ENCODETNS_S32_I +#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32 +#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32 +#define DEBUG_OMXACAAC_MDCTFWD_S32 +#define DEBUG_OMXACAAC_MDCTINV_S32_S16 +#define DEBUG_OMXACAAC_NOISELESSDECODE +#define DEBUG_OMXACAAC_QUANTINV_S32_I +#define DEBUG_OMXACAAC_UNPACKADIFHEADER +#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER +#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32 +#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32 +#define DEBUG_OMXACMP3_HUFFMANDECODE_S32 +#define DEBUG_OMXACMP3_MDCTINV_S32 +#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I +#define DEBUG_OMXACMP3_REQUANTIZE_S32_I +#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16 +#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER +#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8 +#define DEBUG_OMXACMP3_UNPACKSIDEINFO +#endif /* DEBUG_DOMAIN_AC */ + + +#ifdef DEBUG_DOMAIN_VC +#define DEBUG_OMXVCM4P10_AVERAGE_16X +#define DEBUG_OMXVCM4P10_AVERAGE_4X +#define DEBUG_OMXVCM4P10_AVERAGE_8X +#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR +#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR +#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8 +#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8 +#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX +#define DEBUG_OMXVCM4P10_EXPANDFRAME +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R +#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R +#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R +#define DEBUG_OMXVCM4P10_SADQUAR_16X +#define DEBUG_OMXVCM4P10_SADQUAR_4X +#define DEBUG_OMXVCM4P10_SADQUAR_8X +#define DEBUG_OMXVCM4P10_SAD_16X +#define DEBUG_OMXVCM4P10_SAD_4X +#define DEBUG_OMXVCM4P10_SAD_8X +#define DEBUG_OMXVCM4P10_SATD_4X4 +#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1 +#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1 +#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC +#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC +#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8 +#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16 +#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16 +#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX +#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16 +#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16 +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16 +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16 +#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16 +#define DEBUG_OMXVCM4P2_FINDMVPRED +#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX +#define DEBUG_OMXVCM4P2_LIMITMVTORECT +#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB +#define DEBUG_OMXVCM4P2_PADMBGRAY_U8 +#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8 +#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8 +#define DEBUG_OMXVCM4P2_PADMV +#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I +#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I +#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I +#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I +#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER +#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA +#endif /* DEBUG_DOMAIN_VC */ + + +#ifdef DEBUG_DOMAIN_IC +/* To be filled in */ +#endif /* DEBUG_DOMAIN_IC */ + + +#ifdef DEBUG_DOMAIN_SP +#define DEBUG_OMXACSP_DOTPROD_S16 +#define DEBUG_OMXACSP_BLOCKEXP_S16 +#define DEBUG_OMXACSP_BLOCKEXP_S32 +#define DEBUG_OMXACSP_COPY_S16 +#define DEBUG_OMXACSP_DOTPROD_S16 +#define DEBUG_OMXACSP_DOTPROD_S16_SFS +#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS +#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS +#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS +#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS +#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32 +#define DEBUG_OMXACSP_FFTINIT_C_SC16 +#define DEBUG_OMXACSP_FFTINIT_C_SC32 +#define DEBUG_OMXACSP_FFTINIT_R_S16_S32 +#define DEBUG_OMXACSP_FFTINIT_R_S32 +#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS +#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS +#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS +#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS +#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I +#define DEBUG_OMXACSP_FILTERMEDIAN_S32 +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16 +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS +#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS +#define DEBUG_OMXACSP_FIR_DIRECT_S16_I +#define DEBUG_OMXACSP_FIR_DIRECT_S16 +#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS +#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I +#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16 +#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I +#define DEBUG_OMXACSP_IIRONE_DIRECT_S16 +#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I +#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16 +#define DEBUG_OMXACSP_IIR_DIRECT_S16_I +#define DEBUG_OMXACSP_IIR_DIRECT_S16 +#endif /* DEBUG_DOMAIN_SP */ + + +#ifdef DEBUG_DOMAIN_IP +#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS +#define DEBUG_OMXIPBM_COPY_U8_C1R +#define DEBUG_OMXIPBM_COPY_U8_C3R +#define DEBUG_OMXIPBM_MIRROR_U8_C1R +#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS +#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R +#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R +#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R +#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R +#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R +#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R +#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R +#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R +#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R +#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64 +#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64 +#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64 +#define DEBUG_OMXIPPP_MOMENTINIT_S64 +#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R +#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R +#endif /* DEBUG_DOMAIN_IP */ + + +#endif /* _armCommon_H_ */ + +/*End of File*/ + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h new file mode 100644 index 0000000..abb98fc --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_BitDec_s.h @@ -0,0 +1,670 @@ +;// +;// +;// File Name: armCOMM_BitDec_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// OpenMAX optimized bitstream decode module +;// +;// You must include armCOMM_s.h before including this file +;// +;// This module provides macros to perform assembly optimized fixed and +;// variable length decoding from a read-only bitstream. The variable +;// length decode modules take as input a pointer to a table of 16-bit +;// entries of the following format. +;// +;// VLD Table Entry format +;// +;// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +;// +------------------------------------------------+ +;// | Len | Symbol | 1 | +;// +------------------------------------------------+ +;// | Offset | 0 | +;// +------------------------------------------------+ +;// +;// If the table entry is a leaf entry then bit 0 set: +;// Len = Number of bits overread (0 to 7) +;// Symbol = Symbol payload (unsigned 12 bits) +;// +;// If the table entry is an internal node then bit 0 is clear: +;// Offset = Number of (16-bit) half words from the table +;// start to the next table node +;// +;// The table is accessed by successive lookup up on the +;// next Step bits of the input bitstream until a leaf node +;// is obtained. The Step sizes are supplied to the VLD macro. +;// +;// USAGE: +;// +;// To use any of the macros in this package, first call: +;// +;// M_BD_INIT ppBitStream, pBitOffset, pBitStream, RBitBuffer, RBitCount, Tmp +;// +;// This caches the current bitstream position and next available +;// bits in registers pBitStream, RBitBuffer, RBitCount. These registers +;// are reserved for use by the bitstream decode package until you +;// call M_BD_FINI. +;// +;// Next call the following macro(s) as many times as you need: +;// +;// M_BD_LOOK8 - Look ahead constant 1<=N<=8 bits into the bitstream +;// M_BD_LOOK16 - Look ahead constant 1<=N<=16 bits into the bitstream +;// M_BD_READ8 - Read constant 1<=N<=8 bits from the bitstream +;// M_BD_READ16 - Read constant 1<=N<=16 bits from the bitstream +;// M_BD_VREAD8 - Read variable 1<=N<=8 bits from the bitstream +;// M_BD_VREAD16 - Read variable 1<=N<=16 bits from the bitstream +;// M_BD_VLD - Perform variable length decode using lookup table +;// +;// Finally call the macro: +;// +;// M_BD_FINI ppBitStream, pBitOffset +;// +;// This writes the bitstream state back to memory. +;// +;// The three bitstream cache register names are assigned to the following global +;// variables: +;// + + GBLS pBitStream ;// Register name for pBitStream + GBLS BitBuffer ;// Register name for BitBuffer + GBLS BitCount ;// Register name for BitCount + +;// +;// These register variables must have a certain defined state on entry to every bitstream +;// macro (except M_BD_INIT) and on exit from every bitstream macro (except M_BD_FINI). +;// The state may depend on implementation. +;// +;// For the default (ARM11) implementation the following hold: +;// pBitStream - points to the first byte not held in the BitBuffer +;// BitBuffer - is a cache of (4 bytes) 32 bits, bit 31 the first bit +;// BitCount - is offset (from the top bit) to the next unused bitstream bit +;// 0<=BitCount<=15 (so BitBuffer holds at least 17 unused bits) +;// +;// + + ;// Bitstream Decode initialise + ;// + ;// Initialises the bitstream decode global registers from + ;// bitstream pointers. This macro is split into 3 parts to enable + ;// scheduling. + ;// + ;// Input Registers: + ;// + ;// $ppBitStream - pointer to pointer to the next bitstream byte + ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7) + ;// $RBitStream - register to use for pBitStream (can be $ppBitStream) + ;// $RBitBuffer - register to use for BitBuffer + ;// $RBitCount - register to use for BitCount (can be $pBitOffset) + ;// + ;// Output Registers: + ;// + ;// $T1,$T2,$T3 - registers that must be preserved between calls to + ;// M_BD_INIT1 and M_BD_INIT2 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_INIT0 $ppBitStream, $pBitOffset, $RBitStream, $RBitBuffer, $RBitCount + +pBitStream SETS "$RBitStream" +BitBuffer SETS "$RBitBuffer" +BitCount SETS "$RBitCount" + + ;// load inputs + LDR $pBitStream, [$ppBitStream] + LDR $BitCount, [$pBitOffset] + MEND + + MACRO + M_BD_INIT1 $T1, $T2, $T3 + LDRB $T2, [$pBitStream, #2] + LDRB $T1, [$pBitStream, #1] + LDRB $BitBuffer, [$pBitStream], #3 + ADD $BitCount, $BitCount, #8 + MEND + + MACRO + M_BD_INIT2 $T1, $T2, $T3 + ORR $T2, $T2, $T1, LSL #8 + ORR $BitBuffer, $T2, $BitBuffer, LSL #16 + MEND + + ;// + ;// Look ahead fixed 1<=N<=8 bits without consuming any bits + ;// The next bits will be placed at bit 31..24 of destination register + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to look + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_LOOK8 $Symbol, $N + ASSERT ($N>=1):LAND:($N<=8) + MOV $Symbol, $BitBuffer, LSL $BitCount + MEND + + ;// + ;// Look ahead fixed 1<=N<=16 bits without consuming any bits + ;// The next bits will be placed at bit 31..16 of destination register + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to look + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_LOOK16 $Symbol, $N, $T1 + ASSERT ($N >= 1):LAND:($N <= 16) + MOV $Symbol, $BitBuffer, LSL $BitCount + MEND + + ;// + ;// Skips fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_SKIP8 $N, $T1 + ASSERT ($N>=1):LAND:($N<=8) + SUBS $BitCount, $BitCount, #(8-$N) + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Read fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_READ8 $Symbol, $N, $T1 + ASSERT ($N>=1):LAND:($N<=8) + MOVS $Symbol, $BitBuffer, LSL $BitCount + SUBS $BitCount, $BitCount, #(8-$N) + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR #(32-$N) + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Read fixed 1<=N<=16 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_READ16 $Symbol, $N, $T1, $T2 + ASSERT ($N>=1):LAND:($N<=16) + ASSERT $Symbol<>$T1 + IF ($N<=8) + M_BD_READ8 $Symbol, $N, $T1 + ELSE + ;// N>8 so we will be able to refill at least one byte + LDRB $T1, [$pBitStream], #1 + MOVS $Symbol, $BitBuffer, LSL $BitCount + ORR $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBS $BitCount, $BitCount, #(16-$N) + LDRCSB $T1, [$pBitStream], #1 + MOV $Symbol, $Symbol, LSR #(32-$N) + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + ENDIF + MEND + + ;// + ;// Skip variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits. 1<=N<=8 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VSKIP8 $N, $T1 + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Skip variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits. 1<=N<=16 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VSKIP16 $N, $T1, $T2 + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Read variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read. 1<=N<=8 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VREAD8 $Symbol, $N, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + RSB $T2, $N, #32 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR $T2 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Read variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read. 1<=N<=16 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VREAD16 $Symbol, $N, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + RSB $T2, $N, #32 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR $T2 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Decode a code of the form 0000...001 where there + ;// are N zeros before the 1 and N<=15 (code length<=16) + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the number of zeros before the next 1 + ;// >=16 is an illegal code + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_CLZ16 $Symbol, $T1, $T2 + MOVS $Symbol, $BitBuffer, LSL $BitCount + CLZ $Symbol, $Symbol + ADD $BitCount, $BitCount, $Symbol + SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Decode a code of the form 1111...110 where there + ;// are N ones before the 0 and N<=15 (code length<=16) + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the number of zeros before the next 1 + ;// >=16 is an illegal code + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_CLO16 $Symbol, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + MVN $Symbol, $Symbol + CLZ $Symbol, $Symbol + ADD $BitCount, $BitCount, $Symbol + SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Variable Length Decode module + ;// + ;// Decodes one VLD Symbol from a bitstream and refill the bitstream + ;// buffer. + ;// + ;// Input Registers: + ;// + ;// $pVLDTable - pointer to VLD decode table of 16-bit entries. + ;// The format is described above at the start of + ;// this file. + ;// $S0 - The number of bits to look up for the first step + ;// 1<=$S0<=8 + ;// $S1 - The number of bits to look up for each subsequent + ;// step 1<=$S1<=$S0. + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - decoded VLD symbol value + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VLD $Symbol, $T1, $T2, $pVLDTable, $S0, $S1 + ASSERT (1<=$S0):LAND:($S0<=8) + ASSERT (1<=$S1):LAND:($S1<=$S0) + + ;// Note 0<=BitCount<=15 on entry and exit + + MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bits + MOVS $Symbol, #(2<<$S0)-2 ;// create mask + AND $Symbol, $Symbol, $T1, LSR #(31-$S0) ;// 2*(next $S0 bits) + SUBS $BitCount, $BitCount, #8 ;// CS if buffer can be filled +01 + LDRCSB $T1, [$pBitStream], #1 ;// load refill byte + LDRH $Symbol, [$pVLDTable, $Symbol] ;// load table entry + ADDCC $BitCount, $BitCount, #8 ;// refill not possible + ADD $BitCount, $BitCount, #$S0 ;// assume $S0 bits used + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 ;// merge in refill byte + MOVS $T1, $Symbol, LSR #1 ;// CS=leaf entry + BCS %FT02 + + MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bit + IF (2*$S0-$S1<=8) + ;// Can combine refill check and -S0+S1 and keep $BitCount<=15 + SUBS $BitCount, $BitCount, #8+($S0-$S1) + ELSE + ;// Separate refill check and -S0+S1 offset + SUBS $BitCount, $BitCount, #8 + SUB $BitCount, $BitCount, #($S0-$S1) + ENDIF + ADD $Symbol, $Symbol, $T1, LSR #(31-$S1) ;// add 2*(next $S1 bits) to + BIC $Symbol, $Symbol, #1 ;// table offset + B %BT01 ;// load next table entry +02 + ;// BitCount range now depend on the route here + ;// if (first step) S0 <= BitCount <= 7+S0 <=15 + ;// else if (2*S0-S1<=8) S0 <= BitCount <= 7+(2*S0-S1) <=15 + ;// else S1 <= BitCount <= 7+S1 <=15 + + SUB $BitCount, $BitCount, $Symbol, LSR#13 + BIC $Symbol, $T1, #0xF000 + MEND + + + ;// Add an offset number of bits + ;// + ;// Outputs destination byte and bit index values which corresponds to an offset number of bits + ;// from the current location. This is used to compare bitstream positions using. M_BD_CMP. + ;// + ;// Input Registers: + ;// + ;// $Offset - Offset to be added in bits. + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $ByteIndex - Destination pBitStream pointer after adding the Offset. + ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact + ;// pointer (as in M_BD_FINI). But for using with M_BD_CMP subtract is not needed. + ;// $BitIndex - Destination BitCount after the addition of Offset number of bits + ;// + MACRO + M_BD_ADD $ByteIndex, $BitIndex, $Offset + + ;// ($ByteIndex,$BitIndex) = Current position + $Offset bits + ADD $Offset, $Offset, $BitCount + AND $BitIndex, $Offset, #7 + ADD $ByteIndex, $pBitStream, $Offset, ASR #3 + MEND + + ;// Move bitstream pointers to the location given + ;// + ;// Outputs destination byte and bit index values which corresponds to + ;// the current location given (calculated using M_BD_ADD). + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// $ByteIndex - Destination pBitStream pointer after move. + ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact + ;// pointer (as in M_BD_FINI). + ;// $BitIndex - Destination BitCount after the move + ;// + ;// Output Registers: + ;// + ;// $pBitStream \ + ;// } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_MOV $ByteIndex, $BitIndex + + ;// ($pBitStream, $Offset) = ($ByteIndex,$BitIndex) + MOV $BitCount, $BitIndex + MOV $pBitStream, $ByteIndex + MEND + + ;// Bitstream Compare + ;// + ;// Compares bitstream position with that of a destination position. Destination position + ;// is held in two input registers which are calculated using M_BD_ADD macro + ;// + ;// Input Registers: + ;// + ;// $ByteIndex - Destination pBitStream pointer, (4 byte ahead as described in M_BD_ADD) + ;// $BitIndex - Destination BitCount + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// FLAGS - GE if destination is reached, LT = is destination is ahead + ;// $T1 - corrupted temp/scratch register + ;// + MACRO + M_BD_CMP $ByteIndex, $BitIndex, $T1 + + ;// Return flags set by (current positon)-($ByteIndex,$BitIndex) + ;// so GE means that we have reached the indicated position + + ADD $T1, $pBitStream, $BitCount, LSR #3 + CMP $T1, $ByteIndex + AND $T1, $BitCount, #7 + CMPEQ $T1, $BitIndex + MEND + + + ;// Bitstream Decode finalise + ;// + ;// Writes back the bitstream state to the bitstream pointers + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $ppBitStream - pointer to pointer to the next bitstream byte + ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7) + ;// $pBitStream \ + ;// $BitBuffer } these register are corrupted + ;// $BitCount / + ;// + MACRO + M_BD_FINI $ppBitStream, $pBitOffset + + ;// Advance pointer by the number of free bits in the buffer + ADD $pBitStream, $pBitStream, $BitCount, LSR#3 + AND $BitCount, $BitCount, #7 + + ;// Now move back 32 bits to reach the first usued bit + SUB $pBitStream, $pBitStream, #4 + + ;// Store out bitstream state + STR $BitCount, [$pBitOffset] + STR $pBitStream, [$ppBitStream] + MEND + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h new file mode 100644 index 0000000..4f9bc3b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Bitstream.h @@ -0,0 +1,212 @@ +/** + * + * File Name: armCOMM_Bitstream.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM_Bitstream.h + * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders. + * + */ + +#ifndef _armCodec_H_ +#define _armCodec_H_ + +#include "omxtypes.h" + +typedef struct { + OMX_U8 codeLen; + OMX_U32 codeWord; +} ARM_VLC32; + +/* The above should be renamed as "ARM_VLC32" */ + +/** + * Function: armLookAheadBits() + * + * Description: + * Get the next N bits from the bitstream without advancing the bitstream pointer + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] N=1...32 + * + * Returns Value + */ + +OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N); + +/** + * Function: armGetBits() + * + * Description: + * Read N bits from the bitstream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N=1..32 + * + * [out] *ppBitStream + * [out] *pOffset + * Returns Value + */ + +OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N); + +/** + * Function: armByteAlign() + * + * Description: + * Align the pointer *ppBitStream to the next byte boundary + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset); + +/** + * Function: armSkipBits() + * + * Description: + * Skip N bits from the value at *ppBitStream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N); + +/*************************************** + * Variable bit length Decode + ***************************************/ + +/** + * Function: armUnPackVLC32() + * + * Description: + * Variable length decode of variable length symbol (max size 32 bits) read from + * the bit stream pointed by *ppBitStream at *pOffset by using the table + * pointed by pCodeBook + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] pCodeBook + * + * [out] **ppBitStream + * [out] *pOffset + * + * Returns : Code Book Index if successfull. + * : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails. + **/ + +#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF) + +OMX_U16 armUnPackVLC32( + const OMX_U8 **ppBitStream, + OMX_INT *pOffset, + const ARM_VLC32 *pCodeBook +); + +/*************************************** + * Fixed bit length Encode + ***************************************/ + +/** + * Function: armPackBits + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] codeWord Code word that need to be inserted in to the + * bitstream + * [in] codeLength Length of the code word valid range 1...32 + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackBits ( + OMX_U8 **ppBitStream, + OMX_INT *pOffset, + OMX_U32 codeWord, + OMX_INT codeLength +); + +/*************************************** + * Variable bit length Encode + ***************************************/ + +/** + * Function: armPackVLC32 + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pBitOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] code VLC code word that need to be inserted in to the + * bitstream + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackVLC32 ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + ARM_VLC32 code +); + +#endif /*_armCodec_H_*/ + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h new file mode 100644 index 0000000..d5db32f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCTTable.h @@ -0,0 +1,40 @@ +/** + * + * + * File Name: armCOMM_IDCTTable.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File : armCOMM_IDCTTable.h + * Description : Contains declarations of tables for IDCT calculation. + * + */ + +#ifndef _armCOMM_IDCTTable_H_ +#define _armCOMM_IDCTTable_H_ + +#include "omxtypes.h" + + /* Table of s(u)*A(u)*A(v)/16 at Q15 + * s(u)=1.0 0 <= u <= 5 + * s(6)=2.0 + * s(7)=4.0 + * A(0) = 2*sqrt(2) + * A(u) = 4*cos(u*pi/16) for (u!=0) + */ +extern const OMX_U16 armCOMM_IDCTPreScale [64]; +extern const OMX_U16 armCOMM_IDCTCoef [4]; + +#endif /* _armCOMM_IDCTTable_H_ */ + + +/* End of File */ + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h new file mode 100644 index 0000000..03f7137 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_IDCT_s.h @@ -0,0 +1,1445 @@ +;// +;// This confidential and proprietary software may be used only as +;// authorised by a licensing agreement from ARM Limited +;// (C) COPYRIGHT 2004 ARM Limited +;// ALL RIGHTS RESERVED +;// The entire notice above must be reproduced on all authorised +;// copies and copies may only be made to the extent permitted +;// by a licensing agreement from ARM Limited. +;// +;// IDCT_s.s +;// +;// Inverse DCT module +;// +;// +;// ALGORITHM DESCRIPTION +;// +;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each +;// column and then a 1D IDCT for each row. +;// +;// The 8-point 1D IDCT is defined by +;// f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2 +;// +;// C(u) = 1/sqrt(2) if u=0 or 1 if u!=0 +;// c(u,x) = cos( (2x+1)*u*pi/16 ) +;// +;// We compute the 8-point 1D IDCT using the reverse of +;// the Arai-Agui-Nakajima flow graph which we split into +;// 5 stages named in reverse order to identify with the +;// forward DCT. Direct inversion of the forward formulae +;// in file FDCT_s.s gives: +;// +;// IStage 5: j(u) = T(u)*A(u) [ A(u)=4*C(u)*c(u,0) ] +;// [ A(0) = 2*sqrt(2) +;// A(u) = 4*cos(u*pi/16) for (u!=0) ] +;// +;// IStage 4: i0 = j0 i1 = j4 +;// i3 = (j2+j6)/2 i2 = (j2-j6)/2 +;// i7 = (j5+j3)/2 i4 = (j5-j3)/2 +;// i5 = (j1+j7)/2 i6 = (j1-j7)/2 +;// +;// IStage 3: h0 = (i0+i1)/2 h1 = (i0-i1)/2 +;// h2 = (i2*sqrt2)-i3 h3 = i3 +;// h4 = cos(pi/8)*i4 + sin(pi/8)*i6 +;// h6 = -sin(pi/8)*i4 + cos(pi/8)*i6 +;// [ The above two lines rotate by -(pi/8) ] +;// h5 = (i5-i7)/sqrt2 h7 = (i5+i7)/2 +;// +;// IStage 2: g0 = (h0+h3)/2 g3 = (h0-h3)/2 +;// g1 = (h1+h2)/2 g2 = (h1-h2)/2 +;// g7 = h7 g6 = h6 - h7 +;// g5 = h5 - g6 g4 = h4 - g5 +;// +;// IStage 1: f0 = (g0+g7)/2 f7 = (g0-g7)/2 +;// f1 = (g1+g6)/2 f6 = (g1-g6)/2 +;// f2 = (g2+g5)/2 f5 = (g2-g5)/2 +;// f3 = (g3+g4)/2 f4 = (g3-g4)/2 +;// +;// Note that most coefficients are halved 3 times during the +;// above calculation. We can rescale the algorithm dividing +;// the input by 8 to remove the halvings. +;// +;// IStage 5: j(u) = T(u)*A(u)/8 +;// +;// IStage 4: i0 = j0 i1 = j4 +;// i3 = j2 + j6 i2 = j2 - j6 +;// i7 = j5 + j3 i4 = j5 - j3 +;// i5 = j1 + j7 i6 = j1 - j7 +;// +;// IStage 3: h0 = i0 + i1 h1 = i0 - i1 +;// h2 = (i2*sqrt2)-i3 h3 = i3 +;// h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6) +;// h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6) +;// h5 = (i5-i7)*sqrt2 h7 = i5 + i7 +;// +;// IStage 2: g0 = h0 + h3 g3 = h0 - h3 +;// g1 = h1 + h2 g2 = h1 - h2 +;// g7 = h7 g6 = h6 - h7 +;// g5 = h5 - g6 g4 = h4 - g5 +;// +;// IStage 1: f0 = g0 + g7 f7 = g0 - g7 +;// f1 = g1 + g6 f6 = g1 - g6 +;// f2 = g2 + g5 f5 = g2 - g5 +;// f3 = g3 + g4 f4 = g3 - g4 +;// +;// Note: +;// 1. The scaling by A(u)/8 can often be combined with inverse +;// quantization. The column and row scalings can be combined. +;// 2. The flowgraph in the AAN paper has h4,g6 negated compared +;// to the above code but is otherwise identical. +;// 3. The rotation by -pi/8 can be peformed using three multiplies +;// Eg c*i4+s*i6 = (i6-i4)*s + (c+s)*i4 +;// -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6 +;// 4. If |T(u)|<=1 then from the IDCT definition, +;// |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2 +;// = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2 +;// = ((1/sqrt2) + (cot(pi/32)-1)/2)/2 +;// = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2) +;// = (approx)2.64 +;// So the max gain of the 2D IDCT is ~x7.0 = 3 bits. +;// The table below shows input patterns generating the maximum +;// value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1 +;// InputPattern Max |f(x)| +;// PPPPPPPP |f0| = 2.64 +;// PPPMMMMM |f1| = 2.64 +;// PPMMMPPP |f2| = 2.64 +;// PPMMPPMM |f3| = 2.64 +;// PMMPPMMP |f4| = 2.64 +;// PMMPMMPM |f5| = 2.64 +;// PMPPMPMP |f6| = 2.64 +;// PMPMPMPM |f7| = 2.64 +;// Note that this input pattern is the transpose of the +;// corresponding max input patter for the FDCT. + +;// Arguments + +pSrc RN 0 ;// source data buffer +Stride RN 1 ;// destination stride in bytes +pDest RN 2 ;// destination data buffer +pScale RN 3 ;// pointer to scaling table + + + ;// DCT Inverse Macro + ;// The DCT code should be parametrized according + ;// to the following inputs: + ;// $outsize = "u8" : 8-bit unsigned data saturated (0 to +255) + ;// "s9" : 16-bit signed data saturated to 9-bit (-256 to +255) + ;// "s16" : 16-bit signed data not saturated (max size ~+/-14273) + ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment + ;// "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment + ;// + ;// Inputs: + ;// pSrc = r0 = Pointer to input data + ;// Range is -256 to +255 (9-bit) + ;// Stride = r1 = Stride between input lines + ;// pDest = r2 = Pointer to output data + ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale + + + + MACRO + M_IDCT $outsize, $inscale, $stride + LCLA SHIFT + + + IF ARM1136JS + +;// REGISTER ALLOCATION +;// This is hard since we have 8 values, 9 free registers and each +;// butterfly requires a temporary register. We also want to +;// maintain register order so we can use LDM/STM. The table below +;// summarises the register allocation that meets all these criteria. +;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above. +;// +;// r1 a01 g0 h0 +;// r4 b01 f0 g1 h1 i0 +;// r5 a23 f1 g2 i1 +;// r6 b23 f2 g3 h2 i2 +;// r7 a45 f3 h3 i3 +;// r8 b45 f4 g4 h4 i4 +;// r9 a67 f5 g5 h5 i5 +;// r10 b67 f6 g6 h6 i6 +;// r11 f7 g7 h7 i7 +;// +ra01 RN 1 +rb01 RN 4 +ra23 RN 5 +rb23 RN 6 +ra45 RN 7 +rb45 RN 8 +ra67 RN 9 +rb67 RN 10 +rtmp RN 11 +csPiBy8 RN 12 ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ] +LoopRR2 RN 14 ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ] +;// Transpose allocation +xft RN ra01 +xf0 RN rb01 +xf1 RN ra23 +xf2 RN rb23 +xf3 RN ra45 +xf4 RN rb45 +xf5 RN ra67 +xf6 RN rb67 +xf7 RN rtmp +;// IStage 1 allocation +xg0 RN xft +xg1 RN xf0 +xg2 RN xf1 +xg3 RN xf2 +xgt RN xf3 +xg4 RN xf4 +xg5 RN xf5 +xg6 RN xf6 +xg7 RN xf7 +;// IStage 2 allocation +xh0 RN xg0 +xh1 RN xg1 +xht RN xg2 +xh2 RN xg3 +xh3 RN xgt +xh4 RN xg4 +xh5 RN xg5 +xh6 RN xg6 +xh7 RN xg7 +;// IStage 3,4 allocation +xit RN xh0 +xi0 RN xh1 +xi1 RN xht +xi2 RN xh2 +xi3 RN xh3 +xi4 RN xh4 +xi5 RN xh5 +xi6 RN xh6 +xi7 RN xh7 + + M_STR pDest, ppDest + IF "$stride"="s" + M_STR Stride, pStride + ENDIF + M_ADR pDest, pBlk + LDR csPiBy8, =0x30fc7642 + LDR LoopRR2, =0x00005a82 + +v6_idct_col$_F + ;// Load even values + LDR xi4, [pSrc], #4 ;// j0 + LDR xi5, [pSrc, #4*16-4] ;// j4 + LDR xi6, [pSrc, #2*16-4] ;// j2 + LDR xi7, [pSrc, #6*16-4] ;// j6 + + ;// Scale Even Values + IF "$inscale"="s16" ;// 16x16 mul +SHIFT SETA 12 + LDR xi0, [pScale], #4 + LDR xi1, [pScale, #4*16-4] + LDR xi2, [pScale, #2*16-4] + MOV xit, #1<<(SHIFT-1) + SMLABB xi3, xi0, xi4, xit + SMLATT xi4, xi0, xi4, xit + SMLABB xi0, xi1, xi5, xit + SMLATT xi5, xi1, xi5, xit + MOV xi3, xi3, ASR #SHIFT + PKHBT xi4, xi3, xi4, LSL #(16-SHIFT) + LDR xi3, [pScale, #6*16-4] + SMLABB xi1, xi2, xi6, xit + SMLATT xi6, xi2, xi6, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi5, xi0, xi5, LSL #(16-SHIFT) + SMLABB xi2, xi3, xi7, xit + SMLATT xi7, xi3, xi7, xit + MOV xi1, xi1, ASR #SHIFT + PKHBT xi6, xi1, xi6, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi7, xi2, xi7, LSL #(16-SHIFT) + ENDIF + IF "$inscale"="s32" ;// 32x16 mul +SHIFT SETA (12+8-16) + MOV xit, #1<<(SHIFT-1) + LDR xi0, [pScale], #8 + LDR xi1, [pScale, #0*32+4-8] + LDR xi2, [pScale, #4*32-8] + LDR xi3, [pScale, #4*32+4-8] + SMLAWB xi0, xi0, xi4, xit + SMLAWT xi1, xi1, xi4, xit + SMLAWB xi2, xi2, xi5, xit + SMLAWT xi3, xi3, xi5, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi4, xi0, xi1, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi5, xi2, xi3, LSL #(16-SHIFT) + LDR xi0, [pScale, #2*32-8] + LDR xi1, [pScale, #2*32+4-8] + LDR xi2, [pScale, #6*32-8] + LDR xi3, [pScale, #6*32+4-8] + SMLAWB xi0, xi0, xi6, xit + SMLAWT xi1, xi1, xi6, xit + SMLAWB xi2, xi2, xi7, xit + SMLAWT xi3, xi3, xi7, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi6, xi0, xi1, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi7, xi2, xi3, LSL #(16-SHIFT) + ENDIF + + ;// Load odd values + LDR xi0, [pSrc, #1*16-4] ;// j1 + LDR xi1, [pSrc, #7*16-4] ;// j7 + LDR xi2, [pSrc, #5*16-4] ;// j5 + LDR xi3, [pSrc, #3*16-4] ;// j3 + + IF {TRUE} + ;// shortcut if odd values 0 + TEQ xi0, #0 + TEQEQ xi1, #0 + TEQEQ xi2, #0 + TEQEQ xi3, #0 + BEQ v6OddZero$_F + ENDIF + + ;// Store scaled even values + STMIA pDest, {xi4, xi5, xi6, xi7} + + ;// Scale odd values + IF "$inscale"="s16" + ;// Perform AAN Scale + LDR xi4, [pScale, #1*16-4] + LDR xi5, [pScale, #7*16-4] + LDR xi6, [pScale, #5*16-4] + SMLABB xi7, xi0, xi4, xit + SMLATT xi0, xi0, xi4, xit + SMLABB xi4, xi1, xi5, xit + SMLATT xi1, xi1, xi5, xit + MOV xi7, xi7, ASR #SHIFT + PKHBT xi0, xi7, xi0, LSL #(16-SHIFT) + LDR xi7, [pScale, #3*16-4] + SMLABB xi5, xi2, xi6, xit + SMLATT xi2, xi2, xi6, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi1, xi4, xi1, LSL #(16-SHIFT) + SMLABB xi6, xi3, xi7, xit + SMLATT xi3, xi3, xi7, xit + MOV xi5, xi5, ASR #SHIFT + PKHBT xi2, xi5, xi2, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi3, xi6, xi3, LSL #(16-SHIFT) + ENDIF + IF "$inscale"="s32" ;// 32x16 mul + LDR xi4, [pScale, #1*32-8] + LDR xi5, [pScale, #1*32+4-8] + LDR xi6, [pScale, #7*32-8] + LDR xi7, [pScale, #7*32+4-8] + SMLAWB xi4, xi4, xi0, xit + SMLAWT xi5, xi5, xi0, xit + SMLAWB xi6, xi6, xi1, xit + SMLAWT xi7, xi7, xi1, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi0, xi4, xi5, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi1, xi6, xi7, LSL #(16-SHIFT) + LDR xi4, [pScale, #5*32-8] + LDR xi5, [pScale, #5*32+4-8] + LDR xi6, [pScale, #3*32-8] + LDR xi7, [pScale, #3*32+4-8] + SMLAWB xi4, xi4, xi2, xit + SMLAWT xi5, xi5, xi2, xit + SMLAWB xi6, xi6, xi3, xit + SMLAWT xi7, xi7, xi3, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi2, xi4, xi5, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi3, xi6, xi7, LSL #(16-SHIFT) + ENDIF + + SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2 + SSUB16 xi6, xi0, xi1 ;// j1-j7 + SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2 + SSUB16 xi4, xi2, xi3 ;// j5-j3 + + SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 + + PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a + PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b + + SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] + SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] + SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] + SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] + + SMULBB xi1, xi3, LoopRR2 + SMULTB xi3, xi3, LoopRR2 + + PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 + PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 + SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 + + ;// xi0,xi1,xi2,xi3 now free + ;// IStage 4,3, rows 2to3 x1/2 + + MOV xi3, xi3, LSL #1 + PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 + LDRD xi0, [pDest, #8] ;// j2,j6 scaled + + ;// IStage 2, rows4to7 + SSUB16 xg6, xh6, xh7 + SSUB16 xg5, xh5, xg6 + SSUB16 xg4, xh4, xg5 + + SSUB16 xi2, xi0, xi1 ;// (j2-j6) + SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + LDRD xi0, [pDest] ;// j0, j4 scaled + SSUB16 xh2, xh2, xi3 + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + + SHADD16 xh0, xi0, xi1 + SHSUB16 xh1, xi0, xi1 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + SADD16 xf3, xg3, xg4 + SSUB16 xf4, xg3, xg4 + SADD16 xf2, xg2, xg5 + SSUB16 xf5, xg2, xg5 + SADD16 xf1, xg1, xg6 + SSUB16 xf6, xg1, xg6 + SADD16 xf0, xg0, xg7 + SSUB16 xf7, xg0, xg7 + + ;// Transpose, store and loop + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + STMIA pDest!, {ra01, ra23, ra45, ra67} + PKHTB rb67, xf7, xf6, ASR #16 + STMIA pDest!, {rb01, rb23, rb45, rb67} + BCC v6_idct_col$_F + + SUB pSrc, pDest, #(64*2) + M_LDR pDest, ppDest + IF "$stride"="s" + M_LDR pScale, pStride + ENDIF + B v6_idct_row$_F + +v6OddZero$_F + SSUB16 xi2, xi6, xi7 ;// (j2-j6) + SHADD16 xi3, xi6, xi7 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + SSUB16 xh2, xh2, xi3 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + + SHADD16 xh0, xi4, xi5 + SHSUB16 xh1, xi4, xi5 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + MOV xf3, xg3 + MOV xf4, xg3 + MOV xf2, xg2 + MOV xf5, xg2 + MOV xf1, xg1 + MOV xf6, xg1 + MOV xf0, xg0 + MOV xf7, xg0 + + ;// Transpose + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + PKHTB rb67, xf7, xf6, ASR #16 + + STMIA pDest!, {ra01, ra23, ra45, ra67} + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + STMIA pDest!, {rb01, rb23, rb45, rb67} + + BCC v6_idct_col$_F + SUB pSrc, pDest, #(64*2) + M_LDR pDest, ppDest + IF "$stride"="s" + M_LDR pScale, pStride + ENDIF + + +v6_idct_row$_F + ;// IStage 4,3, rows4to7 x1/4 + LDR xit, =0x00010001 ;// rounding constant + LDR xi0, [pSrc, #1*16] ;// j1 + LDR xi1, [pSrc, #7*16] ;// 4*j7 + LDR xi2, [pSrc, #5*16] ;// j5 + LDR xi3, [pSrc, #3*16] ;// j3 + + SHADD16 xi1, xi1, xit ;// 2*j7 + SHADD16 xi1, xi1, xit ;// j7 + + SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2 + SSUB16 xi6, xi0, xi1 ;// j1-j7 + SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2 + SSUB16 xi4, xi2, xi3 ;// j5-j3 + + SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 + + PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a + PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b + + SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] + SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] + SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] + SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] + + SMULBB xi1, xi3, LoopRR2 + SMULTB xi3, xi3, LoopRR2 + + PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 + PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 + SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 + + MOV xi3, xi3, LSL #1 + PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 + + ;// xi0,xi1,xi2,xi3 now free + ;// IStage 4,3, rows 2to3 x1/2 + + LDR xi0, [pSrc, #2*16] ;// j2 + LDR xi1, [pSrc, #6*16] ;// 2*j6 + + ;// IStage 2, rows4to7 + SSUB16 xg6, xh6, xh7 + SSUB16 xg5, xh5, xg6 + SSUB16 xg4, xh4, xg5 + + SHADD16 xi1, xi1, xit ;// j6 + SSUB16 xi2, xi0, xi1 ;// (j2-j6) + SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + LDR xi1, [pSrc, #4*16] ;// j4 + LDR xi0, [pSrc], #4 ;// j0 + + SSUB16 xh2, xh2, xi3 + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + + ADD xi0, xi0, xit, LSL #2 ;// ensure correct round + SHADD16 xh0, xi0, xi1 ;// of DC result + SHSUB16 xh1, xi0, xi1 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + SHADD16 xf3, xg3, xg4 + SHSUB16 xf4, xg3, xg4 + SHADD16 xf2, xg2, xg5 + SHSUB16 xf5, xg2, xg5 + SHADD16 xf1, xg1, xg6 + SHSUB16 xf6, xg1, xg6 + SHADD16 xf0, xg0, xg7 + SHSUB16 xf7, xg0, xg7 + + ;// Saturate + IF ("$outsize"="u8") + USAT16 xf0, #8, xf0 + USAT16 xf1, #8, xf1 + USAT16 xf2, #8, xf2 + USAT16 xf3, #8, xf3 + USAT16 xf4, #8, xf4 + USAT16 xf5, #8, xf5 + USAT16 xf6, #8, xf6 + USAT16 xf7, #8, xf7 + ENDIF + IF ("$outsize"="s9") + SSAT16 xf0, #9, xf0 + SSAT16 xf1, #9, xf1 + SSAT16 xf2, #9, xf2 + SSAT16 xf3, #9, xf3 + SSAT16 xf4, #9, xf4 + SSAT16 xf5, #9, xf5 + SSAT16 xf6, #9, xf6 + SSAT16 xf7, #9, xf7 + ENDIF + + ;// Transpose to Row, Pack and store + IF ("$outsize"="u8") + ORR xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ] + ORR xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ] + ORR xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ] + ORR xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ] + PKHBT ra01, xf0, xf2, LSL #16 + PKHTB rb01, xf2, xf0, ASR #16 + PKHBT ra23, xf4, xf6, LSL #16 + PKHTB rb23, xf6, xf4, ASR #16 + STMIA pDest, {ra01, ra23} + IF "$stride"="s" + ADD pDest, pDest, pScale + STMIA pDest, {rb01, rb23} + ADD pDest, pDest, pScale + ELSE + ADD pDest, pDest, #($stride) + STMIA pDest, {rb01, rb23} + ADD pDest, pDest, #($stride) + ENDIF + ENDIF + IF ("$outsize"="s9"):LOR:("$outsize"="s16") + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + PKHTB rb67, xf7, xf6, ASR #16 + + STMIA pDest, {ra01, ra23, ra45, ra67} + IF "$stride"="s" + ADD pDest, pDest, pScale + STMIA pDest, {rb01, rb23, rb45, rb67} + ADD pDest, pDest, pScale + ELSE + ADD pDest, pDest, #($stride) + STMIA pDest, {rb01, rb23, rb45, rb67} + ADD pDest, pDest, #($stride) + ENDIF + ENDIF + + BCC v6_idct_row$_F + ENDIF ;// ARM1136JS + + + IF CortexA8 + +Src0 EQU 7 +Src1 EQU 8 +Src2 EQU 9 +Src3 EQU 10 +Src4 EQU 11 +Src5 EQU 12 +Src6 EQU 13 +Src7 EQU 14 +Tmp EQU 15 + +qXj0 QN Src0.S16 +qXj1 QN Src1.S16 +qXj2 QN Src2.S16 +qXj3 QN Src3.S16 +qXj4 QN Src4.S16 +qXj5 QN Src5.S16 +qXj6 QN Src6.S16 +qXj7 QN Src7.S16 +qXjt QN Tmp.S16 + +dXj0lo DN (Src0*2).S16 +dXj0hi DN (Src0*2+1).S16 +dXj1lo DN (Src1*2).S16 +dXj1hi DN (Src1*2+1).S16 +dXj2lo DN (Src2*2).S16 +dXj2hi DN (Src2*2+1).S16 +dXj3lo DN (Src3*2).S16 +dXj3hi DN (Src3*2+1).S16 +dXj4lo DN (Src4*2).S16 +dXj4hi DN (Src4*2+1).S16 +dXj5lo DN (Src5*2).S16 +dXj5hi DN (Src5*2+1).S16 +dXj6lo DN (Src6*2).S16 +dXj6hi DN (Src6*2+1).S16 +dXj7lo DN (Src7*2).S16 +dXj7hi DN (Src7*2+1).S16 +dXjtlo DN (Tmp*2).S16 +dXjthi DN (Tmp*2+1).S16 + +qXi0 QN qXj0 +qXi1 QN qXj4 +qXi2 QN qXj2 +qXi3 QN qXj7 +qXi4 QN qXj5 +qXi5 QN qXjt +qXi6 QN qXj1 +qXi7 QN qXj6 +qXit QN qXj3 + +dXi0lo DN dXj0lo +dXi0hi DN dXj0hi +dXi1lo DN dXj4lo +dXi1hi DN dXj4hi +dXi2lo DN dXj2lo +dXi2hi DN dXj2hi +dXi3lo DN dXj7lo +dXi3hi DN dXj7hi +dXi4lo DN dXj5lo +dXi4hi DN dXj5hi +dXi5lo DN dXjtlo +dXi5hi DN dXjthi +dXi6lo DN dXj1lo +dXi6hi DN dXj1hi +dXi7lo DN dXj6lo +dXi7hi DN dXj6hi +dXitlo DN dXj3lo +dXithi DN dXj3hi + +qXh0 QN qXit +qXh1 QN qXi0 +qXh2 QN qXi2 +qXh3 QN qXi3 +qXh4 QN qXi7 +qXh5 QN qXi5 +qXh6 QN qXi4 +qXh7 QN qXi1 +qXht QN qXi6 + +dXh0lo DN dXitlo +dXh0hi DN dXithi +dXh1lo DN dXi0lo +dXh1hi DN dXi0hi +dXh2lo DN dXi2lo +dXh2hi DN dXi2hi +dXh3lo DN dXi3lo +dXh3hi DN dXi3hi +dXh4lo DN dXi7lo +dXh4hi DN dXi7hi +dXh5lo DN dXi5lo +dXh5hi DN dXi5hi +dXh6lo DN dXi4lo +dXh6hi DN dXi4hi +dXh7lo DN dXi1lo +dXh7hi DN dXi1hi +dXhtlo DN dXi6lo +dXhthi DN dXi6hi + +qXg0 QN qXh2 +qXg1 QN qXht +qXg2 QN qXh1 +qXg3 QN qXh0 +qXg4 QN qXh4 +qXg5 QN qXh5 +qXg6 QN qXh6 +qXg7 QN qXh7 +qXgt QN qXh3 + +qXf0 QN qXg6 +qXf1 QN qXg5 +qXf2 QN qXg4 +qXf3 QN qXgt +qXf4 QN qXg3 +qXf5 QN qXg2 +qXf6 QN qXg1 +qXf7 QN qXg0 +qXft QN qXg7 + + +qXt0 QN 1.S32 +qXt1 QN 2.S32 +qT0lo QN 1.S32 +qT0hi QN 2.S32 +qT1lo QN 3.S32 +qT1hi QN 4.S32 +qScalelo QN 5.S32 ;// used to read post scale values +qScalehi QN 6.S32 +qTemp0 QN 5.S32 +qTemp1 QN 6.S32 + + +Scale1 EQU 6 +Scale2 EQU 15 +qScale1 QN Scale1.S16 +qScale2 QN Scale2.S16 +dScale1lo DN (Scale1*2).S16 +dScale1hi DN (Scale1*2+1).S16 +dScale2lo DN (Scale2*2).S16 +dScale2hi DN (Scale2*2+1).S16 + +dCoefs DN 0.S16 ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]} +InvSqrt2 DN dCoefs[0] ;// 1/sqrt(2) in Q15 +S DN dCoefs[1] ;// Sin(PI/8) in Q15 +C DN dCoefs[2] ;// Cos(PI/8) in Q15 + +pTemp RN 12 + + + IMPORT armCOMM_IDCTCoef + + VLD1 {qXj0,qXj1}, [pSrc @64]! + VLD1 {qXj2,qXj3}, [pSrc @64]! + VLD1 {qXj4,qXj5}, [pSrc @64]! + VLD1 {qXj6,qXj7}, [pSrc @64]! + + ;// Load PreScale and multiply with Src + ;// IStage 4 + + IF "$inscale"="s16" ;// 16X16 Mul + M_IDCT_PRESCALE16 + ENDIF + + IF "$inscale"="s32" ;// 32X32 ,ul + M_IDCT_PRESCALE32 + ENDIF + + ;// IStage 3 + VQRDMULH qXi2, qXi2, InvSqrt2 ;// i2/sqrt(2) + VHADD qXh0, qXi0, qXi1 ;// (i0+i1)/2 + VHSUB qXh1, qXi0, qXi1 ;// (i0-i1)/2 + VHADD qXh7, qXi5, qXi7 ;// (i5+i7)/4 + VSUB qXh5, qXi5, qXi7 ;// (i5-i7)/2 + VQRDMULH qXh5, qXh5, InvSqrt2 ;// h5/sqrt(2) + VSUB qXh2, qXi2, qXi3 ;// h2, h3 + + VMULL qXt0, dXi4lo, C ;// c*i4 + VMLAL qXt0, dXi6lo, S ;// c*i4+s*i6 + VMULL qXt1, dXi4hi, C + VMLAL qXt1, dXi6hi, S + VSHRN dXh4lo, qXt0, #16 ;// h4 + VSHRN dXh4hi, qXt1, #16 + + VMULL qXt0, dXi6lo, C ;// c*i6 + VMLSL qXt0, dXi4lo, S ;// -s*i4 + c*h6 + VMULL qXt1, dXi6hi, C + VMLSL qXt1, dXi4hi, S + VSHRN dXh6lo, qXt0, #16 ;// h6 + VSHRN dXh6hi, qXt1, #16 + + ;// IStage 2 + VSUB qXg6, qXh6, qXh7 + VSUB qXg5, qXh5, qXg6 + VSUB qXg4, qXh4, qXg5 + VHADD qXg1, qXh1, qXh2 ;// (h1+h2)/2 + VHSUB qXg2, qXh1, qXh2 ;// (h1-h2)/2 + VHADD qXg0, qXh0, qXh3 ;// (h0+h3)/2 + VHSUB qXg3, qXh0, qXh3 ;// (h0-h3)/2 + + ;// IStage 1 all rows + VADD qXf3, qXg3, qXg4 + VSUB qXf4, qXg3, qXg4 + VADD qXf2, qXg2, qXg5 + VSUB qXf5, qXg2, qXg5 + VADD qXf1, qXg1, qXg6 + VSUB qXf6, qXg1, qXg6 + VADD qXf0, qXg0, qXg7 + VSUB qXf7, qXg0, qXg7 + + ;// Transpose, store and loop +XTR0 EQU Src5 +XTR1 EQU Tmp +XTR2 EQU Src6 +XTR3 EQU Src7 +XTR4 EQU Src3 +XTR5 EQU Src0 +XTR6 EQU Src1 +XTR7 EQU Src2 +XTRt EQU Src4 + +qA0 QN XTR0.S32 ;// for XTRpose +qA1 QN XTR1.S32 +qA2 QN XTR2.S32 +qA3 QN XTR3.S32 +qA4 QN XTR4.S32 +qA5 QN XTR5.S32 +qA6 QN XTR6.S32 +qA7 QN XTR7.S32 + +dB0 DN XTR0*2+1 ;// for using VSWP +dB1 DN XTR1*2+1 +dB2 DN XTR2*2+1 +dB3 DN XTR3*2+1 +dB4 DN XTR4*2 +dB5 DN XTR5*2 +dB6 DN XTR6*2 +dB7 DN XTR7*2 + + + VTRN qXf0, qXf1 + VTRN qXf2, qXf3 + VTRN qXf4, qXf5 + VTRN qXf6, qXf7 + VTRN qA0, qA2 + VTRN qA1, qA3 + VTRN qA4, qA6 + VTRN qA5, qA7 + VSWP dB0, dB4 + VSWP dB1, dB5 + VSWP dB2, dB6 + VSWP dB3, dB7 + + +qYj0 QN qXf0 +qYj1 QN qXf1 +qYj2 QN qXf2 +qYj3 QN qXf3 +qYj4 QN qXf4 +qYj5 QN qXf5 +qYj6 QN qXf6 +qYj7 QN qXf7 +qYjt QN qXft + +dYj0lo DN (XTR0*2).S16 +dYj0hi DN (XTR0*2+1).S16 +dYj1lo DN (XTR1*2).S16 +dYj1hi DN (XTR1*2+1).S16 +dYj2lo DN (XTR2*2).S16 +dYj2hi DN (XTR2*2+1).S16 +dYj3lo DN (XTR3*2).S16 +dYj3hi DN (XTR3*2+1).S16 +dYj4lo DN (XTR4*2).S16 +dYj4hi DN (XTR4*2+1).S16 +dYj5lo DN (XTR5*2).S16 +dYj5hi DN (XTR5*2+1).S16 +dYj6lo DN (XTR6*2).S16 +dYj6hi DN (XTR6*2+1).S16 +dYj7lo DN (XTR7*2).S16 +dYj7hi DN (XTR7*2+1).S16 +dYjtlo DN (XTRt*2).S16 +dYjthi DN (XTRt*2+1).S16 + +qYi0 QN qYj0 +qYi1 QN qYj4 +qYi2 QN qYj2 +qYi3 QN qYj7 +qYi4 QN qYj5 +qYi5 QN qYjt +qYi6 QN qYj1 +qYi7 QN qYj6 +qYit QN qYj3 + +dYi0lo DN dYj0lo +dYi0hi DN dYj0hi +dYi1lo DN dYj4lo +dYi1hi DN dYj4hi +dYi2lo DN dYj2lo +dYi2hi DN dYj2hi +dYi3lo DN dYj7lo +dYi3hi DN dYj7hi +dYi4lo DN dYj5lo +dYi4hi DN dYj5hi +dYi5lo DN dYjtlo +dYi5hi DN dYjthi +dYi6lo DN dYj1lo +dYi6hi DN dYj1hi +dYi7lo DN dYj6lo +dYi7hi DN dYj6hi +dYitlo DN dYj3lo +dYithi DN dYj3hi + +qYh0 QN qYit +qYh1 QN qYi0 +qYh2 QN qYi2 +qYh3 QN qYi3 +qYh4 QN qYi7 +qYh5 QN qYi5 +qYh6 QN qYi4 +qYh7 QN qYi1 +qYht QN qYi6 + +dYh0lo DN dYitlo +dYh0hi DN dYithi +dYh1lo DN dYi0lo +dYh1hi DN dYi0hi +dYh2lo DN dYi2lo +dYh2hi DN dYi2hi +dYh3lo DN dYi3lo +dYh3hi DN dYi3hi +dYh4lo DN dYi7lo +dYh4hi DN dYi7hi +dYh5lo DN dYi5lo +dYh5hi DN dYi5hi +dYh6lo DN dYi4lo +dYh6hi DN dYi4hi +dYh7lo DN dYi1lo +dYh7hi DN dYi1hi +dYhtlo DN dYi6lo +dYhthi DN dYi6hi + +qYg0 QN qYh2 +qYg1 QN qYht +qYg2 QN qYh1 +qYg3 QN qYh0 +qYg4 QN qYh4 +qYg5 QN qYh5 +qYg6 QN qYh6 +qYg7 QN qYh7 +qYgt QN qYh3 + +qYf0 QN qYg6 +qYf1 QN qYg5 +qYf2 QN qYg4 +qYf3 QN qYgt +qYf4 QN qYg3 +qYf5 QN qYg2 +qYf6 QN qYg1 +qYf7 QN qYg0 +qYft QN qYg7 + + VRSHR qYj7, qYj7, #2 + VRSHR qYj6, qYj6, #1 + + VHADD qYi5, qYj1, qYj7 ;// i5 = (j1+j7)/2 + VSUB qYi6, qYj1, qYj7 ;// i6 = j1-j7 + VHADD qYi3, qYj2, qYj6 ;// i3 = (j2+j6)/2 + VSUB qYi2, qYj2, qYj6 ;// i2 = j2-j6 + VHADD qYi7, qYj5, qYj3 ;// i7 = (j5+j3)/2 + VSUB qYi4, qYj5, qYj3 ;// i4 = j5-j3 + + VQRDMULH qYi2, qYi2, InvSqrt2 ;// i2/sqrt(2) + ;// IStage 4,3 rows 0to1 x 1/2 + + MOV pTemp, #0x4 ;// ensure correct round + VDUP qScale1, pTemp ;// of DC result + VADD qYi0, qYi0, qScale1 + + VHADD qYh0, qYi0, qYi1 ;// (i0+i1)/2 + VHSUB qYh1, qYi0, qYi1 ;// (i0-i1)/2 + + VHADD qYh7, qYi5, qYi7 ;// (i5+i7)/4 + VSUB qYh5, qYi5, qYi7 ;// (i5-i7)/2 + VSUB qYh2, qYi2, qYi3 ;// h2, h3 + VQRDMULH qYh5, qYh5, InvSqrt2 ;// h5/sqrt(2) + + VMULL qXt0, dYi4lo, C ;// c*i4 + VMLAL qXt0, dYi6lo, S ;// c*i4+s*i6 + VMULL qXt1, dYi4hi, C + VMLAL qXt1, dYi6hi, S + VSHRN dYh4lo, qXt0, #16 ;// h4 + VSHRN dYh4hi, qXt1, #16 + + VMULL qXt0, dYi6lo, C ;// c*i6 + VMLSL qXt0, dYi4lo, S ;// -s*i4 + c*h6 + VMULL qXt1, dYi6hi, C + VMLSL qXt1, dYi4hi, S + VSHRN dYh6lo, qXt0, #16 ;// h6 + VSHRN dYh6hi, qXt1, #16 + + VSUB qYg6, qYh6, qYh7 + VSUB qYg5, qYh5, qYg6 + VSUB qYg4, qYh4, qYg5 + + ;// IStage 2 rows 0to3 x 1/2 + VHADD qYg1, qYh1, qYh2 ;// (h1+h2)/2 + VHSUB qYg2, qYh1, qYh2 ;// (h1-h2)/2 + VHADD qYg0, qYh0, qYh3 ;// (h0+h3)/2 + VHSUB qYg3, qYh0, qYh3 ;// (h0-h3)/2 + + + ;// IStage 1 all rows + VHADD qYf3, qYg3, qYg4 + VHSUB qYf4, qYg3, qYg4 + VHADD qYf2, qYg2, qYg5 + VHSUB qYf5, qYg2, qYg5 + VHADD qYf1, qYg1, qYg6 + VHSUB qYf6, qYg1, qYg6 + VHADD qYf0, qYg0, qYg7 + VHSUB qYf7, qYg0, qYg7 + +YTR0 EQU Src0 +YTR1 EQU Src4 +YTR2 EQU Src1 +YTR3 EQU Src2 +YTR4 EQU Src7 +YTR5 EQU Src5 +YTR6 EQU Tmp +YTR7 EQU Src6 +YTRt EQU Src3 + +qC0 QN YTR0.S32 ;// for YTRpose +qC1 QN YTR1.S32 +qC2 QN YTR2.S32 +qC3 QN YTR3.S32 +qC4 QN YTR4.S32 +qC5 QN YTR5.S32 +qC6 QN YTR6.S32 +qC7 QN YTR7.S32 + +dD0 DN YTR0*2+1 ;// for using VSWP +dD1 DN YTR1*2+1 +dD2 DN YTR2*2+1 +dD3 DN YTR3*2+1 +dD4 DN YTR4*2 +dD5 DN YTR5*2 +dD6 DN YTR6*2 +dD7 DN YTR7*2 + + VTRN qYf0, qYf1 + VTRN qYf2, qYf3 + VTRN qYf4, qYf5 + VTRN qYf6, qYf7 + VTRN qC0, qC2 + VTRN qC1, qC3 + VTRN qC4, qC6 + VTRN qC5, qC7 + VSWP dD0, dD4 + VSWP dD1, dD5 + VSWP dD2, dD6 + VSWP dD3, dD7 + + +dYf0U8 DN YTR0*2.U8 +dYf1U8 DN YTR1*2.U8 +dYf2U8 DN YTR2*2.U8 +dYf3U8 DN YTR3*2.U8 +dYf4U8 DN YTR4*2.U8 +dYf5U8 DN YTR5*2.U8 +dYf6U8 DN YTR6*2.U8 +dYf7U8 DN YTR7*2.U8 + + ;// + ;// Do saturation if outsize is other than S16 + ;// + + IF ("$outsize"="u8") + ;// Output range [0-255] + VQMOVN dYf0U8, qYf0 + VQMOVN dYf1U8, qYf1 + VQMOVN dYf2U8, qYf2 + VQMOVN dYf3U8, qYf3 + VQMOVN dYf4U8, qYf4 + VQMOVN dYf5U8, qYf5 + VQMOVN dYf6U8, qYf6 + VQMOVN dYf7U8, qYf7 + ENDIF + + IF ("$outsize"="s9") + ;// Output range [-256 to +255] + VQSHL qYf0, qYf0, #16-9 + VQSHL qYf1, qYf1, #16-9 + VQSHL qYf2, qYf2, #16-9 + VQSHL qYf3, qYf3, #16-9 + VQSHL qYf4, qYf4, #16-9 + VQSHL qYf5, qYf5, #16-9 + VQSHL qYf6, qYf6, #16-9 + VQSHL qYf7, qYf7, #16-9 + + VSHR qYf0, qYf0, #16-9 + VSHR qYf1, qYf1, #16-9 + VSHR qYf2, qYf2, #16-9 + VSHR qYf3, qYf3, #16-9 + VSHR qYf4, qYf4, #16-9 + VSHR qYf5, qYf5, #16-9 + VSHR qYf6, qYf6, #16-9 + VSHR qYf7, qYf7, #16-9 + ENDIF + + ;// Store output depending on the Stride size + IF "$stride"="s" + VST1 qYf0, [pDest @64], Stride + VST1 qYf1, [pDest @64], Stride + VST1 qYf2, [pDest @64], Stride + VST1 qYf3, [pDest @64], Stride + VST1 qYf4, [pDest @64], Stride + VST1 qYf5, [pDest @64], Stride + VST1 qYf6, [pDest @64], Stride + VST1 qYf7, [pDest @64] + ELSE + IF ("$outsize"="u8") + VST1 dYf0U8, [pDest @64], #8 + VST1 dYf1U8, [pDest @64], #8 + VST1 dYf2U8, [pDest @64], #8 + VST1 dYf3U8, [pDest @64], #8 + VST1 dYf4U8, [pDest @64], #8 + VST1 dYf5U8, [pDest @64], #8 + VST1 dYf6U8, [pDest @64], #8 + VST1 dYf7U8, [pDest @64] + ELSE + ;// ("$outsize"="s9") or ("$outsize"="s16") + VST1 qYf0, [pDest @64], #16 + VST1 qYf1, [pDest @64], #16 + VST1 qYf2, [pDest @64], #16 + VST1 qYf3, [pDest @64], #16 + VST1 qYf4, [pDest @64], #16 + VST1 qYf5, [pDest @64], #16 + VST1 qYf6, [pDest @64], #16 + VST1 qYf7, [pDest @64] + ENDIF + + ENDIF + + + + ENDIF ;// CortexA8 + + + + MEND + + ;// Scale TWO input rows with TWO rows of 16 bit scale values + ;// + ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row + ;// input (Eight input values) with one row of scale values. Also + ;// Loads next scale values from pScale, if $LastRow flag is not set. + ;// + ;// Input Registers: + ;// + ;// $dAlo - Input D register with first four S16 values of row n + ;// $dAhi - Input D register with next four S16 values of row n + ;// $dBlo - Input D register with first four S16 values of row n+1 + ;// $dBhi - Input D register with next four S16 values of row n+1 + ;// pScale - Pointer to next row of scale values + ;// qT0lo - Temporary scratch register + ;// qT0hi - Temporary scratch register + ;// qT1lo - Temporary scratch register + ;// qT1hi - Temporary scratch register + ;// dScale1lo - Scale value of row n + ;// dScale1hi - Scale value of row n + ;// dScale2lo - Scale value of row n+1 + ;// dScale2hi - Scale value of row n+1 + ;// + ;// Input Flag + ;// + ;// $LastRow - Flag to indicate whether current row is last row + ;// + ;// Output Registers: + ;// + ;// $dAlo - Scaled output values (first four S16 of row n) + ;// $dAhi - Scaled output values (next four S16 of row n) + ;// $dBlo - Scaled output values (first four S16 of row n+1) + ;// $dBhi - Scaled output values (next four S16 of row n+1) + ;// qScale1 - Scale values for next row + ;// qScale2 - Scale values for next row+1 + ;// pScale - Pointer to next row of scale values + ;// + MACRO + M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow + VMULL qT0lo, $dAlo, dScale1lo + VMULL qT0hi, $dAhi, dScale1hi + VMULL qT1lo, $dBlo, dScale2lo + VMULL qT1hi, $dBhi, dScale2hi + IF "$LastRow"="0" + VLD1 qScale1, [pScale], #16 ;// Load scale for row n+1 + VLD1 qScale2, [pScale], #16 ;// Load scale for row n+2 + ENDIF + VQRSHRN $dAlo, qT0lo, #12 + VQRSHRN $dAhi, qT0hi, #12 + VQRSHRN $dBlo, qT1lo, #12 + VQRSHRN $dBhi, qT1hi, #12 + MEND + + ;// Scale 8x8 block input values with 16 bit scale values + ;// + ;// This macro is used to pre-scale block of 8x8 input. + ;// This also do the Ist stage transformations of IDCT. + ;// + ;// Input Registers: + ;// + ;// dXjnlo - n th input D register with first four S16 values + ;// dXjnhi - n th input D register with next four S16 values + ;// qXjn - n th input Q register with eight S16 values + ;// pScale - Pointer to scale values + ;// + ;// Output Registers: + ;// + ;// qXin - n th output Q register with eight S16 output values of 1st stage + ;// + MACRO + M_IDCT_PRESCALE16 + VLD1 qScale1, [pScale], #16 ;// Load Pre scale for row 0 + VLD1 qScale2, [pScale], #16 ;// Load Pre scale for row 0 + M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0 ;// Pre scale row 0 & 1 + M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0 + M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0 + M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1 + VHADD qXi5, qXj1, qXj7 ;// (j1+j7)/2 + VSUB qXi6, qXj1, qXj7 ;// j1-j7 + LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants + VHADD qXi3, qXj2, qXj6 ;// (j2+j6)/2 + VSUB qXi2, qXj2, qXj6 ;// j2-j6 + VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants + VHADD qXi7, qXj5, qXj3 ;// (j5+j3)/2 + VSUB qXi4, qXj5, qXj3 ;// j5-j3 + MEND + + + ;// Scale 8x8 block input values with 32 bit scale values + ;// + ;// This macro is used to pre-scale block of 8x8 input. + ;// This also do the Ist stage transformations of IDCT. + ;// + ;// Input Registers: + ;// + ;// dXjnlo - n th input D register with first four S16 values + ;// dXjnhi - n th input D register with next four S16 values + ;// qXjn - n th input Q register with eight S16 values + ;// pScale - Pointer to 32bit scale values in Q23 format + ;// + ;// Output Registers: + ;// + ;// dXinlo - n th output D register with first four S16 output values of 1st stage + ;// dXinhi - n th output D register with next four S16 output values of 1st stage + ;// + MACRO + M_IDCT_PRESCALE32 +qScale0lo QN 0.S32 +qScale0hi QN 1.S32 +qScale1lo QN 2.S32 +qScale1hi QN 3.S32 +qScale2lo QN qScale1lo +qScale2hi QN qScale1hi +qScale3lo QN qScale1lo +qScale3hi QN qScale1hi +qScale4lo QN qScale1lo +qScale4hi QN qScale1hi +qScale5lo QN qScale0lo +qScale5hi QN qScale0hi +qScale6lo QN qScale0lo +qScale6hi QN qScale0hi +qScale7lo QN qScale0lo +qScale7hi QN qScale0hi + +qSrc0lo QN 4.S32 +qSrc0hi QN 5.S32 +qSrc1lo QN 6.S32 +qSrc1hi QN Src4.S32 +qSrc2lo QN qSrc0lo +qSrc2hi QN qSrc0hi +qSrc3lo QN qSrc0lo +qSrc3hi QN qSrc0hi +qSrc4lo QN qSrc0lo +qSrc4hi QN qSrc0hi +qSrc5lo QN qSrc1lo +qSrc5hi QN qSrc1hi +qSrc6lo QN qSrc1lo +qSrc6hi QN qSrc1hi +qSrc7lo QN qSrc0lo +qSrc7hi QN qSrc0hi + +qRes17lo QN qScale0lo +qRes17hi QN qScale0hi +qRes26lo QN qScale0lo +qRes26hi QN qScale0hi +qRes53lo QN qScale0lo +qRes53hi QN qScale0hi + + ADD pTemp, pScale, #4*8*7 ;// Address of pScale[7] + + ;// Row 0 + VLD1 {qScale0lo, qScale0hi}, [pScale]! + VSHLL qSrc0lo, dXj0lo, #(12-1) + VSHLL qSrc0hi, dXj0hi, #(12-1) + VLD1 {qScale1lo, qScale1hi}, [pScale]! + VQRDMULH qSrc0lo, qScale0lo, qSrc0lo + VQRDMULH qSrc0hi, qScale0hi, qSrc0hi + VLD1 {qScale7lo, qScale7hi}, [pTemp]! + VSHLL qSrc1lo, dXj1lo, #(12-1) + VSHLL qSrc1hi, dXj1hi, #(12-1) + VMOVN dXi0lo, qSrc0lo ;// Output i0 + VMOVN dXi0hi, qSrc0hi + VSHLL qSrc7lo, dXj7lo, #(12-1) + VSHLL qSrc7hi, dXj7hi, #(12-1) + SUB pTemp, pTemp, #((16*2)+(4*8*1)) + VQRDMULH qSrc1lo, qScale1lo, qSrc1lo + VQRDMULH qSrc1hi, qScale1hi, qSrc1hi + VQRDMULH qSrc7lo, qScale7lo, qSrc7lo + VQRDMULH qSrc7hi, qScale7hi, qSrc7hi + VLD1 {qScale2lo, qScale2hi}, [pScale]! + + ;// Row 1 & 7 + VHADD qRes17lo, qSrc1lo, qSrc7lo ;// (j1+j7)/2 + VHADD qRes17hi, qSrc1hi, qSrc7hi ;// (j1+j7)/2 + VMOVN dXi5lo, qRes17lo ;// Output i5 + VMOVN dXi5hi, qRes17hi + VSUB qRes17lo, qSrc1lo, qSrc7lo ;// j1-j7 + VSUB qRes17hi, qSrc1hi, qSrc7hi ;// j1-j7 + VMOVN dXi6lo, qRes17lo ;// Output i6 + VMOVN dXi6hi, qRes17hi + VSHLL qSrc2lo, dXj2lo, #(12-1) + VSHLL qSrc2hi, dXj2hi, #(12-1) + VLD1 {qScale6lo, qScale6hi}, [pTemp]! + VSHLL qSrc6lo, dXj6lo, #(12-1) + VSHLL qSrc6hi, dXj6hi, #(12-1) + SUB pTemp, pTemp, #((16*2)+(4*8*1)) + VQRDMULH qSrc2lo, qScale2lo, qSrc2lo + VQRDMULH qSrc2hi, qScale2hi, qSrc2hi + VQRDMULH qSrc6lo, qScale6lo, qSrc6lo + VQRDMULH qSrc6hi, qScale6hi, qSrc6hi + VLD1 {qScale3lo, qScale3hi}, [pScale]! + + ;// Row 2 & 6 + VHADD qRes26lo, qSrc2lo, qSrc6lo ;// (j2+j6)/2 + VHADD qRes26hi, qSrc2hi, qSrc6hi ;// (j2+j6)/2 + VMOVN dXi3lo, qRes26lo ;// Output i3 + VMOVN dXi3hi, qRes26hi + VSUB qRes26lo, qSrc2lo, qSrc6lo ;// j2-j6 + VSUB qRes26hi, qSrc2hi, qSrc6hi ;// j2-j6 + VMOVN dXi2lo, qRes26lo ;// Output i2 + VMOVN dXi2hi, qRes26hi + VSHLL qSrc3lo, dXj3lo, #(12-1) + VSHLL qSrc3hi, dXj3hi, #(12-1) + VLD1 {qScale5lo, qScale5hi}, [pTemp]! + VSHLL qSrc5lo, dXj5lo, #(12-1) + VSHLL qSrc5hi, dXj5hi, #(12-1) + VQRDMULH qSrc3lo, qScale3lo, qSrc3lo + VQRDMULH qSrc3hi, qScale3hi, qSrc3hi + VQRDMULH qSrc5lo, qScale5lo, qSrc5lo + VQRDMULH qSrc5hi, qScale5hi, qSrc5hi + + ;// Row 3 & 5 + VHADD qRes53lo, qSrc5lo, qSrc3lo ;// (j5+j3)/2 + VHADD qRes53hi, qSrc5hi, qSrc3hi ;// (j5+j3)/2 + SUB pSrc, pSrc, #16*2*2 + VMOVN dXi7lo, qRes53lo ;// Output i7 + VMOVN dXi7hi, qRes53hi + VSUB qRes53lo, qSrc5lo, qSrc3lo ;// j5-j3 + VSUB qRes53hi, qSrc5hi, qSrc3hi ;// j5-j3 + VLD1 qXj4, [pSrc @64] + VMOVN dXi4lo, qRes53lo ;// Output i4 + VMOVN dXi4hi, qRes53hi + VSHLL qSrc4lo, dXj4lo, #(12-1) + VSHLL qSrc4hi, dXj4hi, #(12-1) + VLD1 {qScale4lo, qScale4hi}, [pScale] + LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants + VQRDMULH qSrc4lo, qScale4lo, qSrc4lo + VQRDMULH qSrc4hi, qScale4hi, qSrc4hi + VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants + ;// Row 4 + VMOVN dXi1lo, qSrc4lo ;// Output i1 + VMOVN dXi1hi, qSrc4hi + + MEND + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h new file mode 100644 index 0000000..b5da9dc --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_MaskTable.h @@ -0,0 +1,27 @@ +/** + * + * File Name: armCOMM_MaskTable.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Mask Table to mask the end of array + */ + + + +#ifndef _ARMCOMM_MASKTABLE_H_ +#define _ARMCOMM_MASKTABLE_H_ + +#define MaskTableSize 72 + +/* Mask table */ + +extern const OMX_U16 armCOMM_qMaskTable16[MaskTableSize]; +extern const OMX_U8 armCOMM_qMaskTable8[MaskTableSize]; + +#endif diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h new file mode 100644 index 0000000..13e5b2b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_Version.h @@ -0,0 +1,43 @@ +/* Guard the header against multiple inclusion. */ +#ifndef __ARM_COMM_VERSION_H__ +#define __ARM_COMM_VERSION_H__ + + +/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */ +#define OMX_VERSION 102 + +/* We need to define these macros in order to convert a #define number into a #define string. */ +#define ARM_QUOTE(a) #a +#define ARM_INDIRECT(A) ARM_QUOTE(A) + +/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */ +#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION) + + +/* Define this in order to turn on ARM version/release/build strings in each domain */ +#define ARM_INCLUDE_VERSION_DESCRIPTIONS + +#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS + extern const char * const omxAC_VersionDescription; + extern const char * const omxIC_VersionDescription; + extern const char * const omxIP_VersionDescription; + extern const char * const omxSP_VersionDescription; + extern const char * const omxVC_VersionDescription; +#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */ + + +/* The following entries should be automatically updated by the release script */ +/* They are used in the ARM version strings defined for each domain. */ + +/* The release tag associated with this release of the library. - used for source and object releases */ +#define OMX_ARM_RELEASE_TAG "r0p0-00bet1" + +/* The ARM architecture used to build any objects or executables in this release. */ +#define OMX_ARM_BUILD_ARCHITECTURE "ARM Architecture V6" + +/* The ARM Toolchain used to build any objects or executables in this release. */ +#define OMX_ARM_BUILD_TOOLCHAIN "ARM RVCT 3.1" + + +#endif /* __ARM_COMM_VERSION_H__ */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h new file mode 100644 index 0000000..2df1fc8 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armCOMM_s.h @@ -0,0 +1,1154 @@ +;// +;// +;// File Name: armCOMM_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// ARM optimized OpenMAX common header file +;// + +;// Protect against multiple inclusion + IF :LNOT::DEF:ARMCOMM_S_H + GBLL ARMCOMM_S_H + + REQUIRE8 ;// Requires 8-byte stack alignment + PRESERVE8 ;// Preserves 8-byte stack alignment + + GBLL ARM_ERRORCHECK +ARM_ERRORCHECK SETL {FALSE} + +;// Globals + + GBLS _RRegList ;// R saved register list + GBLS _DRegList ;// D saved register list + GBLS _Variant ;// Selected processor variant + GBLS _CPU ;// CPU name + GBLS _Struct ;// Structure name + + GBLL _InFunc ;// Inside function assembly flag + GBLL _SwLong ;// Long switch flag + + GBLA _RBytes ;// Number of register bytes on stack + GBLA _SBytes ;// Number of scratch bytes on stack + GBLA _ABytes ;// Stack offset of next argument + GBLA _Workspace ;// Stack offset of scratch workspace + GBLA _F ;// Function number + GBLA _StOff ;// Struct offset + GBLA _SwNum ;// Switch number + GBLS _32 ;// Suffix for 32 byte alignmnet + GBLS _16 ;// Suffix for 16 byte alignmnet + +_InFunc SETL {FALSE} +_SBytes SETA 0 +_F SETA 0 +_SwNum SETA 0 +_32 SETS "ALIGN32" +_16 SETS "ALIGN16" + +;///////////////////////////////////////////////////////// +;// Override the tools settings of the CPU if the #define +;// USECPU is set, otherwise use the CPU defined by the +;// assembler settings. +;///////////////////////////////////////////////////////// + + IF :DEF: OVERRIDECPU +_CPU SETS OVERRIDECPU + ELSE +_CPU SETS {CPU} + ENDIF + + + +;///////////////////////////////////////////////////////// +;// Work out which code to build +;///////////////////////////////////////////////////////// + + IF :DEF:ARM1136JS:LOR::DEF:CortexA8:LOR::DEF:ARM_GENERIC + INFO 1,"Please switch to using M_VARIANTS" + ENDIF + + ;// Define and reset all officially recongnised variants + MACRO + _M_DEF_VARIANTS + _M_DEF_VARIANT ARM926EJS + _M_DEF_VARIANT ARM1136JS + _M_DEF_VARIANT ARM1136JS_U + _M_DEF_VARIANT CortexA8 + _M_DEF_VARIANT ARM7TDMI + MEND + + MACRO + _M_DEF_VARIANT $var + GBLL $var + GBLL _ok$var +$var SETL {FALSE} + MEND + + + ;// Variant declaration + ;// + ;// Define a list of code variants supported by this + ;// source file. This macro then chooses the most + ;// appropriate variant to build for the currently configured + ;// core. + ;// + MACRO + M_VARIANTS $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7 + ;// Set to TRUE variants that are supported + _M_DEF_VARIANTS + _M_VARIANT $v0 + _M_VARIANT $v1 + _M_VARIANT $v2 + _M_VARIANT $v3 + _M_VARIANT $v4 + _M_VARIANT $v5 + _M_VARIANT $v6 + _M_VARIANT $v7 + + ;// Look for first available variant to match a CPU + ;// _M_TRY cpu, variant fall back list +_Variant SETS "" + _M_TRY ARM926EJ-S, ARM926EJS + _M_TRY ARM1176JZ-S, ARM1136JS + _M_TRY ARM1176JZF-S, ARM1136JS + _M_TRY ARM1156T2-S, ARM1136JS + _M_TRY ARM1156T2F-S, ARM1136JS + _M_TRY ARM1136J-S, ARM1136JS + _M_TRY ARM1136JF-S, ARM1136JS + _M_TRY MPCore, ARM1136JS + _M_TRY Cortex-A8, CortexA8, ARM1136JS + _M_TRY Cortex-R4, ARM1136JS + _M_TRY ARM7TDMI + + ;// Select the correct variant + _M_DEF_VARIANTS + IF _Variant="" + INFO 1, "No match found for CPU '$_CPU'" + ELSE +$_Variant SETL {TRUE} + ENDIF + MEND + + ;// Register a variant as available + MACRO + _M_VARIANT $var + IF "$var"="" + MEXIT + ENDIF + IF :LNOT::DEF:_ok$var + INFO 1, "Unrecognized variant '$var'" + ENDIF +$var SETL {TRUE} + MEND + + ;// For a given CPU, see if any of the variants supporting + ;// this CPU are available. The first available variant is + ;// chosen + MACRO + _M_TRY $cpu, $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7 + IF "$cpu"<>_CPU + MEXIT + ENDIF + _M_TRY1 $v0 + _M_TRY1 $v1 + _M_TRY1 $v2 + _M_TRY1 $v3 + _M_TRY1 $v4 + _M_TRY1 $v5 + _M_TRY1 $v6 + _M_TRY1 $v7 + ;// Check a match was found + IF _Variant="" + INFO 1, "No variant match found for CPU '$_CPU'" + ENDIF + MEND + + MACRO + _M_TRY1 $var + IF "$var"="" + MEXIT + ENDIF + IF (_Variant=""):LAND:$var +_Variant SETS "$var" + ENDIF + MEND + +;//////////////////////////////////////////////////////// +;// Structure definition +;//////////////////////////////////////////////////////// + + ;// Declare a structure of given name + MACRO + M_STRUCT $sname +_Struct SETS "$sname" +_StOff SETA 0 + MEND + + ;// Declare a structure field + ;// The field is called $sname_$fname + ;// $size = the size of each entry, must be power of 2 + ;// $number = (if provided) the number of entries for an array + MACRO + M_FIELD $fname, $size, $number + IF (_StOff:AND:($size-1))!=0 +_StOff SETA _StOff + ($size - (_StOff:AND:($size-1))) + ENDIF +$_Struct._$fname EQU _StOff + IF "$number"<>"" +_StOff SETA _StOff + $size*$number + ELSE +_StOff SETA _StOff + $size + ENDIF + MEND + + + MACRO + M_ENDSTRUCT +sizeof_$_Struct EQU _StOff +_Struct SETS "" + MEND + +;////////////////////////////////////////////////////////// +;// Switch and table macros +;////////////////////////////////////////////////////////// + + ;// Start a relative switch table with register to switch on + ;// + ;// $v = the register to switch on + ;// $s = if specified must be "L" to indicate long + ;// this allows a greater range to the case code + MACRO + M_SWITCH $v, $s + ASSERT "$s"="":LOR:"$s"="L" +_SwLong SETL {FALSE} + IF "$s"="L" +_SwLong SETL {TRUE} + ENDIF +_SwNum SETA _SwNum+1 + IF {CONFIG}=16 + ;// Thumb + IF _SwLong + TBH [pc, $v, LSL#1] + ELSE + TBB [pc, $v] + ENDIF +_Switch$_SwNum + ELSE + ;// ARM + ADD pc, pc, $v, LSL #2 + NOP + ENDIF + MEND + + ;// Add a case to the switch statement + MACRO + M_CASE $label + IF {CONFIG}=16 + ;// Thumb + IF _SwLong + DCW ($label - _Switch$_SwNum)/2 + ELSE + DCB ($label - _Switch$_SwNum)/2 + ENDIF + ELSE + ;// ARM + B $label + ENDIF + MEND + + ;// End of switch statement + MACRO + M_ENDSWITCH + ALIGN 2 + MEND + + +;//////////////////////////////////////////////////////// +;// Data area allocation +;//////////////////////////////////////////////////////// + + ;// Constant table allocator macro + ;// + ;// Creates a new section for each constant table + ;// $name is symbol through which the table can be accessed. + ;// $align is the optional alignment of the table, log2 of + ;// the byte alignment - $align=4 is 16 byte aligned + MACRO + M_TABLE $name, $align + ASSERT :LNOT:_InFunc + IF "$align"="" + AREA |.constdata|, READONLY, DATA + ELSE + ;// AREAs inherit the alignment of the first declaration. + ;// Therefore for each alignment size we must have an area + ;// of a different name. + AREA constdata_a$align, READONLY, DATA, ALIGN=$align + + ;// We also force alignment incase we are tagging onto + ;// an already started area. + ALIGN (1<<$align) + ENDIF +$name + MEND + +;///////////////////////////////////////////////////// +;// Macros to allocate space on the stack +;// +;// These all assume that the stack is 8-byte aligned +;// at entry to the function, which means that the +;// 32-byte alignment macro needs to work in a +;// bit more of a special way... +;///////////////////////////////////////////////////// + + + + + ;// Allocate 1-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC1 $name, $size + ASSERT :LNOT:_InFunc +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 2-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC2 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:1)!=0 +_SBytes SETA _SBytes + (2 - (_SBytes:AND:1)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 4-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC4 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:3)!=0 +_SBytes SETA _SBytes + (4 - (_SBytes:AND:3)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 8-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC8 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + + ;// Allocate 8-byte aligned area of name + ;// $name size ($size+16) bytes. + ;// The extra 16 bytes are later used to align the pointer to 16 bytes + + MACRO + M_ALLOC16 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F$_16 EQU (_SBytes + 8) +_SBytes SETA _SBytes + ($size) + 8 + MEND + + ;// Allocate 8-byte aligned area of name + ;// $name size ($size+32) bytes. + ;// The extra 32 bytes are later used to align the pointer to 32 bytes + + MACRO + M_ALLOC32 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F$_32 EQU (_SBytes + 24) +_SBytes SETA _SBytes + ($size) + 24 + MEND + + + + + ;// Argument Declaration Macro + ;// + ;// Allocate an argument name $name + ;// size $size bytes + MACRO + M_ARG $name, $size + ASSERT _InFunc +$name$_F EQU _ABytes +_ABytes SETA _ABytes + ($size) + MEND + +;/////////////////////////////////////////////// +;// Macros to access stacked variables +;/////////////////////////////////////////////// + + ;// Macro to perform a data processing operation + ;// with a constant second operand + MACRO + _M_OPC $op,$rd,$rn,$const + LCLA _sh + LCLA _cst +_sh SETA 0 +_cst SETA $const + IF _cst=0 + $op $rd, $rn, #_cst + MEXIT + ENDIF + WHILE (_cst:AND:3)=0 +_cst SETA _cst>>2 +_sh SETA _sh+2 + WEND + $op $rd, $rn, #(_cst:AND:0x000000FF)<<_sh + IF _cst>=256 + $op $rd, $rd, #(_cst:AND:0xFFFFFF00)<<_sh + ENDIF + MEND + + ;// Macro to perform a data access operation + ;// Such as LDR or STR + ;// The addressing mode is modified such that + ;// 1. If no address is given then the name is taken + ;// as a stack offset + ;// 2. If the addressing mode is not available for the + ;// state being assembled for (eg Thumb) then a suitable + ;// addressing mode is substituted. + ;// + ;// On Entry: + ;// $i = Instruction to perform (eg "LDRB") + ;// $a = Required byte alignment + ;// $r = Register(s) to transfer (eg "r1") + ;// $a0,$a1,$a2. Addressing mode and condition. One of: + ;// label {,cc} + ;// [base] {,,,cc} + ;// [base, offset]{!} {,,cc} + ;// [base, offset, shift]{!} {,cc} + ;// [base], offset {,,cc} + ;// [base], offset, shift {,cc} + MACRO + _M_DATA $i,$a,$r,$a0,$a1,$a2,$a3 + IF "$a0":LEFT:1="[" + IF "$a1"="" + $i$a3 $r, $a0 + ELSE + IF "$a0":RIGHT:1="]" + IF "$a2"="" + _M_POSTIND $i$a3, "$r", $a0, $a1 + ELSE + _M_POSTIND $i$a3, "$r", $a0, "$a1,$a2" + ENDIF + ELSE + IF "$a2"="" + _M_PREIND $i$a3, "$r", $a0, $a1 + ELSE + _M_PREIND $i$a3, "$r", $a0, "$a1,$a2" + ENDIF + ENDIF + ENDIF + ELSE + LCLA _Offset +_Offset SETA _Workspace + $a0$_F + ASSERT (_Offset:AND:($a-1))=0 + $i$a1 $r, [sp, #_Offset] + ENDIF + MEND + + ;// Handle post indexed load/stores + ;// op reg, [base], offset + MACRO + _M_POSTIND $i,$r,$a0,$a1 + LCLS _base + LCLS _offset + IF {CONFIG}=16 ;// Thumb +_base SETS ("$a0":LEFT:(:LEN:"$a0"-1)):RIGHT:(:LEN:"$a0"-2) ;// remove [] +_offset SETS "$a1" + IF _offset:LEFT:1="+" +_offset SETS _offset:RIGHT:(:LEN:_offset-1) + ENDIF + $i $r, $a0 + IF _offset:LEFT:1="-" +_offset SETS _offset:RIGHT:(:LEN:_offset-1) + SUB $_base, $_base, $_offset + ELSE + ADD $_base, $_base, $_offset + ENDIF + ELSE ;// ARM + $i $r, $a0, $a1 + ENDIF + MEND + + ;// Handle pre indexed load/store + ;// op reg, [base, offset]{!} + MACRO + _M_PREIND $i,$r,$a0,$a1 + LCLS _base + LCLS _offset + IF ({CONFIG}=16):LAND:(("$a1":RIGHT:2)="]!") +_base SETS "$a0":RIGHT:(:LEN:("$a0")-1) +_offset SETS "$a1":LEFT:(:LEN:("$a1")-2) + $i $r, [$_base, $_offset] + ADD $_base, $_base, $_offset + ELSE + $i $r, $a0, $a1 + ENDIF + MEND + + ;// Load unsigned byte from stack + MACRO + M_LDRB $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load signed byte from stack + MACRO + M_LDRSB $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRSB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store byte to stack + MACRO + M_STRB $r,$a0,$a1,$a2,$a3 + _M_DATA "STRB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load unsigned half word from stack + MACRO + M_LDRH $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load signed half word from stack + MACRO + M_LDRSH $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRSH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store half word to stack + MACRO + M_STRH $r,$a0,$a1,$a2,$a3 + _M_DATA "STRH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load word from stack + MACRO + M_LDR $r,$a0,$a1,$a2,$a3 + _M_DATA "LDR",4,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store word to stack + MACRO + M_STR $r,$a0,$a1,$a2,$a3 + _M_DATA "STR",4,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load double word from stack + MACRO + M_LDRD $r0,$r1,$a0,$a1,$a2,$a3 + _M_DATA "LDRD",8,"$r0,$r1",$a0,$a1,$a2,$a3 + MEND + + ;// Store double word to stack + MACRO + M_STRD $r0,$r1,$a0,$a1,$a2,$a3 + _M_DATA "STRD",8,"$r0,$r1",$a0,$a1,$a2,$a3 + MEND + + ;// Get absolute address of stack allocated location + MACRO + M_ADR $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F) + MEND + + ;// Get absolute address of stack allocated location and align the address to 16 bytes + MACRO + M_ADR16 $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_16) + + ;// Now align $a to 16 bytes + BIC$cc $a,$a,#0x0F + MEND + + ;// Get absolute address of stack allocated location and align the address to 32 bytes + MACRO + M_ADR32 $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_32) + + ;// Now align $a to 32 bytes + BIC$cc $a,$a,#0x1F + MEND + +;////////////////////////////////////////////////////////// +;// Function header and footer macros +;////////////////////////////////////////////////////////// + + ;// Function Header Macro + ;// Generates the function prologue + ;// Note that functions should all be "stack-moves-once" + ;// The FNSTART and FNEND macros should be the only places + ;// where the stack moves. + ;// + ;// $name = function name + ;// $rreg = "" don't stack any registers + ;// "lr" stack "lr" only + ;// "rN" stack registers "r4-rN,lr" + ;// $dreg = "" don't stack any D registers + ;// "dN" stack registers "d8-dN" + ;// + ;// Note: ARM Archicture procedure call standard AAPCS + ;// states that r4-r11, sp, d8-d15 must be preserved by + ;// a compliant function. + MACRO + M_START $name, $rreg, $dreg + ASSERT :LNOT:_InFunc + ASSERT "$name"!="" +_InFunc SETL {TRUE} +_RBytes SETA 0 +_Workspace SETA 0 + + ;// Create an area for the function + AREA |.text|, CODE + EXPORT $name +$name FUNCTION + + ;// Save R registers + _M_GETRREGLIST $rreg + IF _RRegList<>"" + STMFD sp!, {$_RRegList, lr} + ENDIF + + ;// Save D registers + _M_GETDREGLIST $dreg + IF _DRegList<>"" + VSTMFD sp!, {$_DRegList} + ENDIF + + + ;// Ensure size claimed on stack is 8-byte aligned + IF ((_SBytes:AND:7)!=0) +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF + + IF (_SBytes!=0) + _M_OPC SUB, sp, sp, _SBytes + ENDIF + + +_ABytes SETA _SBytes + _RBytes - _Workspace + + + ;// Print function name if debug enabled + M_PRINTF "$name\n", + MEND + + ;// Work out a list of R saved registers + MACRO + _M_GETRREGLIST $rreg + IF "$rreg"="" +_RRegList SETS "" + MEXIT + ENDIF + IF "$rreg"="lr":LOR:"$rreg"="r4" +_RRegList SETS "r4" +_RBytes SETA _RBytes+8 + MEXIT + ENDIF + IF "$rreg"="r5":LOR:"$rreg"="r6" +_RRegList SETS "r4-r6" +_RBytes SETA _RBytes+16 + MEXIT + ENDIF + IF "$rreg"="r7":LOR:"$rreg"="r8" +_RRegList SETS "r4-r8" +_RBytes SETA _RBytes+24 + MEXIT + ENDIF + IF "$rreg"="r9":LOR:"$rreg"="r10" +_RRegList SETS "r4-r10" +_RBytes SETA _RBytes+32 + MEXIT + ENDIF + IF "$rreg"="r11":LOR:"$rreg"="r12" +_RRegList SETS "r4-r12" +_RBytes SETA _RBytes+40 + MEXIT + ENDIF + INFO 1, "Unrecognized saved r register limit '$rreg'" + MEND + + ;// Work out a list of D saved registers + MACRO + _M_GETDREGLIST $dreg + IF "$dreg"="" +_DRegList SETS "" + MEXIT + ENDIF + IF "$dreg"="d8" +_DRegList SETS "d8" +_RBytes SETA _RBytes+8 + MEXIT + ENDIF + IF "$dreg"="d9" +_DRegList SETS "d8-d9" +_RBytes SETA _RBytes+16 + MEXIT + ENDIF + IF "$dreg"="d10" +_DRegList SETS "d8-d10" +_RBytes SETA _RBytes+24 + MEXIT + ENDIF + IF "$dreg"="d11" +_DRegList SETS "d8-d11" +_RBytes SETA _RBytes+32 + MEXIT + ENDIF + IF "$dreg"="d12" +_DRegList SETS "d8-d12" +_RBytes SETA _RBytes+40 + MEXIT + ENDIF + IF "$dreg"="d13" +_DRegList SETS "d8-d13" +_RBytes SETA _RBytes+48 + MEXIT + ENDIF + IF "$dreg"="d14" +_DRegList SETS "d8-d14" +_RBytes SETA _RBytes+56 + MEXIT + ENDIF + IF "$dreg"="d15" +_DRegList SETS "d8-d15" +_RBytes SETA _RBytes+64 + MEXIT + ENDIF + INFO 1, "Unrecognized saved d register limit '$dreg'" + MEND + + ;// Produce function return instructions + MACRO + _M_RET $cc + IF _DRegList<>"" + VPOP$cc {$_DRegList} + ENDIF + IF _RRegList="" + BX$cc lr + ELSE + LDM$cc.FD sp!, {$_RRegList, pc} + ENDIF + MEND + + ;// Early Function Exit Macro + ;// $cc = condition to exit with + ;// (Example: M_EXIT EQ) + MACRO + M_EXIT $cc + ASSERT _InFunc + IF _SBytes!=0 + ;// Restore stack frame and exit + B$cc _End$_F + ELSE + ;// Can return directly + _M_RET $cc + ENDIF + MEND + + ;// Function Footer Macro + ;// Generates the function epilogue + MACRO + M_END + ASSERT _InFunc +_InFunc SETL {FALSE} +_End$_F + + ;// Restore the stack pointer to its original value on function entry + IF _SBytes!=0 + _M_OPC ADD, sp, sp, _SBytes + ENDIF + _M_RET + ENDFUNC + + ;// Reset the global stack tracking variables back to their + ;// initial values, and increment the function count +_SBytes SETA 0 +_F SETA _F+1 + MEND + + +;//========================================================================== +;// Debug Macros +;//========================================================================== + + GBLL DEBUG_ON +DEBUG_ON SETL {FALSE} + GBLL DEBUG_STALLS_ON +DEBUG_STALLS_ON SETL {FALSE} + + ;//========================================================================== + ;// Debug call to printf + ;// M_PRINTF $format, $val0, $val1, $val2 + ;// + ;// Examples: + ;// M_PRINTF "x=%08x\n", r0 + ;// + ;// This macro preserves the value of all registers including the + ;// flags. + ;//========================================================================== + + MACRO + M_PRINTF $format, $val0, $val1, $val2 + IF DEBUG_ON + + IMPORT printf + LCLA nArgs +nArgs SETA 0 + + ;// save registers so we don't corrupt them + STMFD sp!, {r0-r12, lr} + + ;// Drop stack to give us some workspace + SUB sp, sp, #16 + + ;// Save registers we need to print to the stack + IF "$val2" <> "" + ASSERT "$val1" <> "" + STR $val2, [sp, #8] +nArgs SETA nArgs+1 + ENDIF + IF "$val1" <> "" + ASSERT "$val0" <> "" + STR $val1, [sp, #4] +nArgs SETA nArgs+1 + ENDIF + IF "$val0"<>"" + STR $val0, [sp] +nArgs SETA nArgs+1 + ENDIF + + ;// Now we are safe to corrupt registers + ADR r0, %FT00 + IF nArgs=1 + LDR r1, [sp] + ENDIF + IF nArgs=2 + LDMIA sp, {r1,r2} + ENDIF + IF nArgs=3 + LDMIA sp, {r1,r2,r3} + ENDIF + + ;// print the values + MRS r4, cpsr ;// preserve flags + BL printf + MSR cpsr_f, r4 ;// restore flags + B %FT01 +00 ;// string to print + DCB "$format", 0 + ALIGN +01 ;// Finished + ADD sp, sp, #16 + ;// Restore registers + LDMFD sp!, {r0-r12,lr} + + ENDIF ;// DEBUG_ON + MEND + + + ;// Stall Simulation Macro + ;// Inserts a given number of NOPs for the currently + ;// defined platform + MACRO + M_STALL $plat1stall, $plat2stall, $plat3stall, $plat4stall, $plat5stall, $plat6stall + IF DEBUG_STALLS_ON + _M_STALL_SUB $plat1stall + _M_STALL_SUB $plat2stall + _M_STALL_SUB $plat3stall + _M_STALL_SUB $plat4stall + _M_STALL_SUB $plat5stall + _M_STALL_SUB $plat6stall + ENDIF + MEND + + MACRO + _M_STALL_SUB $platstall + IF "$platstall"!="" + LCLA _pllen + LCLS _pl + LCLL _pllog +_pllen SETA :LEN:"$platstall" +_pl SETS "$platstall":LEFT:(_pllen - 2) + IF :DEF:$_pl + IF $_pl + LCLS _st + LCLA _stnum +_st SETS "$platstall":RIGHT:1 +_stnum SETA $_st + WHILE _stnum>0 + MOV sp, sp +_stnum SETA _stnum - 1 + WEND + ENDIF + ENDIF + ENDIF + MEND + + + +;//========================================================================== +;// Endian Invarience Macros +;// +;// The idea behind these macros is that if an array is +;// loaded as words then the SMUL00 macro will multiply +;// array elements 0 regardless of the endianess of the +;// system. For little endian SMUL00=SMULBB, for big +;// endian SMUL00=SMULTT and similarly for other packed operations. +;// +;//========================================================================== + + MACRO + LIBI4 $comli, $combi, $a, $b, $c, $d, $cc + IF {ENDIAN}="big" + $combi.$cc $a, $b, $c, $d + ELSE + $comli.$cc $a, $b, $c, $d + ENDIF + MEND + + MACRO + LIBI3 $comli, $combi, $a, $b, $c, $cc + IF {ENDIAN}="big" + $combi.$cc $a, $b, $c + ELSE + $comli.$cc $a, $b, $c + ENDIF + MEND + + ;// SMLAxy macros + + MACRO + SMLA00 $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA01 $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLATB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA0B $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLATB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA0T $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA10 $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA11 $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA1B $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA1T $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAB0 $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAB1 $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAT0 $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAT1 $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLATB, $a, $b, $c, $d, $cc + MEND + + ;// SMULxy macros + + MACRO + SMUL00 $a, $b, $c, $cc + LIBI3 SMULBB, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMUL01 $a, $b, $c, $cc + LIBI3 SMULBT, SMULTB, $a, $b, $c, $cc + MEND + + MACRO + SMUL0B $a, $b, $c, $cc + LIBI3 SMULBB, SMULTB, $a, $b, $c, $cc + MEND + + MACRO + SMUL0T $a, $b, $c, $cc + LIBI3 SMULBT, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMUL10 $a, $b, $c, $cc + LIBI3 SMULTB, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMUL11 $a, $b, $c, $cc + LIBI3 SMULTT, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMUL1B $a, $b, $c, $cc + LIBI3 SMULTB, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMUL1T $a, $b, $c, $cc + LIBI3 SMULTT, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMULB0 $a, $b, $c, $cc + LIBI3 SMULBB, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMULB1 $a, $b, $c, $cc + LIBI3 SMULBT, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMULT0 $a, $b, $c, $cc + LIBI3 SMULTB, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMULT1 $a, $b, $c, $cc + LIBI3 SMULTT, SMULTB, $a, $b, $c, $cc + MEND + + ;// SMLAWx, SMULWx macros + + MACRO + SMLAW0 $a, $b, $c, $d, $cc + LIBI4 SMLAWB, SMLAWT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAW1 $a, $b, $c, $d, $cc + LIBI4 SMLAWT, SMLAWB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMULW0 $a, $b, $c, $cc + LIBI3 SMULWB, SMULWT, $a, $b, $c, $cc + MEND + + MACRO + SMULW1 $a, $b, $c, $cc + LIBI3 SMULWT, SMULWB, $a, $b, $c, $cc + MEND + + ;// SMLALxy macros + + + MACRO + SMLAL00 $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL01 $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALTB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL0B $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALTB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL0T $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL10 $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL11 $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL1B $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL1T $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALB0 $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALB1 $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALT0 $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALT1 $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALTB, $a, $b, $c, $d, $cc + MEND + + ENDIF ;// ARMCOMM_S_H + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h new file mode 100644 index 0000000..f629f72 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/armOMX.h @@ -0,0 +1,274 @@ +/* + * + * File Name: armOMX_ReleaseVersion.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * This file allows a version of the OMX DL libraries to be built where some or + * all of the function names can be given a user specified suffix. + * + * You might want to use it where: + * + * - you want to rename a function "out of the way" so that you could replace + * a function with a different version (the original version would still be + * in the library just with a different name - so you could debug the new + * version by comparing it to the output of the old) + * + * - you want to rename all the functions to versions with a suffix so that + * you can include two versions of the library and choose between functions + * at runtime. + * + * e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8 + * + */ + + +#ifndef _armOMX_H_ +#define _armOMX_H_ + + +/* We need to define these two macros in order to expand and concatenate the names */ +#define OMXCAT2BAR(A, B) omx ## A ## B +#define OMXCATBAR(A, B) OMXCAT2BAR(A, B) + +/* Define the suffix to add to all functions - the default is no suffix */ +#define BARE_SUFFIX + + + +/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */ +#define OMXACAAC_SUFFIX BARE_SUFFIX +#define OMXACMP3_SUFFIX BARE_SUFFIX +#define OMXICJP_SUFFIX BARE_SUFFIX +#define OMXIPBM_SUFFIX BARE_SUFFIX +#define OMXIPCS_SUFFIX BARE_SUFFIX +#define OMXIPPP_SUFFIX BARE_SUFFIX +#define OMXSP_SUFFIX BARE_SUFFIX +#define OMXVCCOMM_SUFFIX BARE_SUFFIX +#define OMXVCM4P10_SUFFIX BARE_SUFFIX +#define OMXVCM4P2_SUFFIX BARE_SUFFIX + + + + +/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */ +#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX) +#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX) +#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX) +#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX) +#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX) +#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX) +#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX) + + +#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX) +#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX) +#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX) +#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX) +#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX) +#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX) +#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX) + +#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX) +#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX) + +#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX) +#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX) +#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX) +#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX) +#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX) + +#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX) + +#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX) +#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX) +#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX) +#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX) +#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX) +#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX) +#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX) + +#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX) +#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX) +#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX) +#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX) +#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX) +#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX) +#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX) +#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX) +#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX) +#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX) +#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX) +#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX) +#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX) +#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX) +#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX) +#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX) +#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX) +#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX) + +#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX) + +#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX) + +#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX) + + +#endif /* _armOMX_h_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h new file mode 100644 index 0000000..8b295a6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes.h @@ -0,0 +1,252 @@ +/** + * File: omxtypes.h + * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files. + * + * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. + * + * These materials are protected by copyright laws and contain material + * proprietary to the Khronos Group, Inc. You may use these materials + * for implementing Khronos specifications, without altering or removing + * any trademark, copyright or other notice from the specification. + * + * Khronos Group makes no, and expressly disclaims any, representations + * or warranties, express or implied, regarding these materials, including, + * without limitation, any implied warranties of merchantability or fitness + * for a particular purpose or non-infringement of any intellectual property. + * Khronos Group makes no, and expressly disclaims any, warranties, express + * or implied, regarding the correctness, accuracy, completeness, timeliness, + * and reliability of these materials. + * + * Under no circumstances will the Khronos Group, or any of its Promoters, + * Contributors or Members or their respective partners, officers, directors, + * employees, agents or representatives be liable for any damages, whether + * direct, indirect, special or consequential damages for lost revenues, + * lost profits, or otherwise, arising from or in connection with these + * materials. + * + * Khronos and OpenMAX are trademarks of the Khronos Group Inc. + * + */ + +#ifndef _OMXTYPES_H_ +#define _OMXTYPES_H_ + +#include <limits.h> + +#define OMX_IN +#define OMX_OUT +#define OMX_INOUT + + +typedef enum { + + /* Mandatory return codes - use cases are explicitly described for each function */ + OMX_Sts_NoErr = 0, /* No error, the function completed successfully */ + OMX_Sts_Err = -2, /* Unknown/unspecified error */ + OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */ + OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */ + OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */ + OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */ + OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */ + OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */ + OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */ + + /* Optional return codes - use cases are explicitly described for each function*/ + OMX_Sts_BadArgErr = -5, /* Bad Arguments */ + + OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */ + OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */ + OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */ + OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */ + OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */ + OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */ + + OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */ + /* Huffman decoding operation terminated early. */ + OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */ + /* operation terminated early. */ + OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */ + + OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */ + + OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/ + + } OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */ + + +/* OMX_U8 */ +#if UCHAR_MAX == 0xff +typedef unsigned char OMX_U8; +#elif USHRT_MAX == 0xff +typedef unsigned short int OMX_U8; +#else +#error OMX_U8 undefined +#endif + + +/* OMX_S8 */ +#if SCHAR_MAX == 0x7f +typedef signed char OMX_S8; +#elif SHRT_MAX == 0x7f +typedef signed short int OMX_S8; +#else +#error OMX_S8 undefined +#endif + + +/* OMX_U16 */ +#if USHRT_MAX == 0xffff +typedef unsigned short int OMX_U16; +#elif UINT_MAX == 0xffff +typedef unsigned int OMX_U16; +#else +#error OMX_U16 undefined +#endif + + +/* OMX_S16 */ +#if SHRT_MAX == 0x7fff +typedef signed short int OMX_S16; +#elif INT_MAX == 0x7fff +typedef signed int OMX_S16; +#else +#error OMX_S16 undefined +#endif + + +/* OMX_U32 */ +#if UINT_MAX == 0xffffffff +typedef unsigned int OMX_U32; +#elif LONG_MAX == 0xffffffff +typedef unsigned long int OMX_U32; +#else +#error OMX_U32 undefined +#endif + + +/* OMX_S32 */ +#if INT_MAX == 0x7fffffff +typedef signed int OMX_S32; +#elif LONG_MAX == 0x7fffffff +typedef long signed int OMX_S32; +#else +#error OMX_S32 undefined +#endif + + +/* OMX_U64 & OMX_S64 */ +#if defined( _WIN32 ) || defined ( _WIN64 ) + typedef __int64 OMX_S64; /** Signed 64-bit integer */ + typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */ + #define OMX_MIN_S64 (0x8000000000000000i64) + #define OMX_MIN_U64 (0x0000000000000000i64) + #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64) + #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64) +#else + typedef long long OMX_S64; /** Signed 64-bit integer */ + typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */ + #define OMX_MIN_S64 (0x8000000000000000LL) + #define OMX_MIN_U64 (0x0000000000000000LL) + #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL) + #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL) +#endif + + +/* OMX_SC8 */ +typedef struct +{ + OMX_S8 Re; /** Real part */ + OMX_S8 Im; /** Imaginary part */ + +} OMX_SC8; /** Signed 8-bit complex number */ + + +/* OMX_SC16 */ +typedef struct +{ + OMX_S16 Re; /** Real part */ + OMX_S16 Im; /** Imaginary part */ + +} OMX_SC16; /** Signed 16-bit complex number */ + + +/* OMX_SC32 */ +typedef struct +{ + OMX_S32 Re; /** Real part */ + OMX_S32 Im; /** Imaginary part */ + +} OMX_SC32; /** Signed 32-bit complex number */ + + +/* OMX_SC64 */ +typedef struct +{ + OMX_S64 Re; /** Real part */ + OMX_S64 Im; /** Imaginary part */ + +} OMX_SC64; /** Signed 64-bit complex number */ + + +/* OMX_F32 */ +typedef float OMX_F32; /** Single precision floating point,IEEE 754 */ + + +/* OMX_F64 */ +typedef double OMX_F64; /** Double precision floating point,IEEE 754 */ + + +/* OMX_INT */ +typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/ + + +#define OMX_MIN_S8 (-128) +#define OMX_MIN_U8 0 +#define OMX_MIN_S16 (-32768) +#define OMX_MIN_U16 0 +#define OMX_MIN_S32 (-2147483647-1) +#define OMX_MIN_U32 0 + +#define OMX_MAX_S8 (127) +#define OMX_MAX_U8 (255) +#define OMX_MAX_S16 (32767) +#define OMX_MAX_U16 (0xFFFF) +#define OMX_MAX_S32 (2147483647) +#define OMX_MAX_U32 (0xFFFFFFFF) + +typedef void OMXVoid; + +#ifndef NULL +#define NULL ((void*)0) +#endif + +/** Defines the geometric position and size of a rectangle, + * where x,y defines the coordinates of the top left corner + * of the rectangle, with dimensions width in the x-direction + * and height in the y-direction */ +typedef struct { + OMX_INT x; /** x-coordinate of top left corner of rectangle */ + OMX_INT y; /** y-coordinate of top left corner of rectangle */ + OMX_INT width; /** Width in the x-direction. */ + OMX_INT height; /** Height in the y-direction. */ +}OMXRect; + + +/** Defines the geometric position of a point, */ +typedef struct +{ + OMX_INT x; /** x-coordinate */ + OMX_INT y; /** y-coordinate */ + +} OMXPoint; + + +/** Defines the dimensions of a rectangle, or region of interest in an image */ +typedef struct +{ + OMX_INT width; /** Width of the rectangle, in the x-direction */ + OMX_INT height; /** Height of the rectangle, in the y-direction */ + +} OMXSize; + +#endif /* _OMXTYPES_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h new file mode 100644 index 0000000..8d24b65 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/api/omxtypes_s.h @@ -0,0 +1,77 @@ +;// +;// +;// File Name: omxtypes_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Mandatory return codes - use cases are explicitly described for each function +OMX_Sts_NoErr EQU 0 ;// No error the function completed successfully +OMX_Sts_Err EQU -2 ;// Unknown/unspecified error +OMX_Sts_InvalidBitstreamValErr EQU -182 ;// Invalid value detected during bitstream processing +OMX_Sts_MemAllocErr EQU -9 ;// Not enough memory allocated for the operation +OMX_StsACAAC_GainCtrErr EQU -159 ;// AAC: Unsupported gain control data detected +OMX_StsACAAC_PrgNumErr EQU -167 ;// AAC: Invalid number of elements for one program +OMX_StsACAAC_CoefValErr EQU -163 ;// AAC: Invalid quantized coefficient value +OMX_StsACAAC_MaxSfbErr EQU -162 ;// AAC: Invalid maxSfb value in relation to numSwb +OMX_StsACAAC_PlsDataErr EQU -160 ;// AAC: pulse escape sequence data error + +;// Optional return codes - use cases are explicitly described for each function +OMX_Sts_BadArgErr EQU -5 ;// Bad Arguments + +OMX_StsACAAC_TnsNumFiltErr EQU -157 ;// AAC: Invalid number of TNS filters +OMX_StsACAAC_TnsLenErr EQU -156 ;// AAC: Invalid TNS region length +OMX_StsACAAC_TnsOrderErr EQU -155 ;// AAC: Invalid order of TNS filter +OMX_StsACAAC_TnsCoefResErr EQU -154 ;// AAC: Invalid bit-resolution for TNS filter coefficients +OMX_StsACAAC_TnsCoefErr EQU -153 ;// AAC: Invalid TNS filter coefficients +OMX_StsACAAC_TnsDirectErr EQU -152 ;// AAC: Invalid TNS filter direction + +OMX_StsICJP_JPEGMarkerErr EQU -183 ;// JPEG marker encountered within an entropy-coded block; + ;// Huffman decoding operation terminated early. +OMX_StsICJP_JPEGMarker EQU -181 ;// JPEG marker encountered; Huffman decoding + ;// operation terminated early. +OMX_StsIPPP_ContextMatchErr EQU -17 ;// Context parameter doesn't match to the operation + +OMX_StsSP_EvenMedianMaskSizeErr EQU -180 ;// Even size of the Median Filter mask was replaced by the odd one + +OMX_Sts_MaximumEnumeration EQU 0x7FFFFFFF + + + +OMX_MIN_S8 EQU (-128) +OMX_MIN_U8 EQU 0 +OMX_MIN_S16 EQU (-32768) +OMX_MIN_U16 EQU 0 + + +OMX_MIN_S32 EQU (-2147483647-1) +OMX_MIN_U32 EQU 0 + +OMX_MAX_S8 EQU (127) +OMX_MAX_U8 EQU (255) +OMX_MAX_S16 EQU (32767) +OMX_MAX_U16 EQU (0xFFFF) +OMX_MAX_S32 EQU (2147483647) +OMX_MAX_U32 EQU (0xFFFFFFFF) + +OMX_VC_UPPER EQU 0x1 ;// Used by the PredictIntra functions +OMX_VC_LEFT EQU 0x2 ;// Used by the PredictIntra functions +OMX_VC_UPPER_RIGHT EQU 0x40 ;// Used by the PredictIntra functions + +NULL EQU 0 + +;// Structures + + INCLUDE armCOMM_s.h + + M_STRUCT OMXPoint + M_FIELD x, 4 + M_FIELD y, 4 + M_ENDSTRUCT + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl new file mode 100755 index 0000000..1ae7005 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/build_vc.pl @@ -0,0 +1,111 @@ +#!/usr/bin/perl +# +# +# File Name: build_vc.pl +# OpenMAX DL: v1.0.2 +# Revision: 9641 +# Date: Thursday, February 7, 2008 +# +# (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +# +# +# +# This file builds the OpenMAX DL vc domain library omxVC.o. +# + +use File::Spec; +use strict; + +my ($CC, $CC_OPTS, $AS, $AS_OPTS, $LIB, $LIB_OPTS, $LIB_TYPE); + +$CC = 'armcc'; +$CC_OPTS = '--no_unaligned_access --cpu ARM1136J-S -c'; +$AS = 'armasm'; +$AS_OPTS = '--no_unaligned_access --cpu ARM1136J-S'; +# $LIB = 'armlink'; +# $LIB_OPTS = '--partial -o'; +# $LIB_TYPE = '.o'; +$LIB = 'armar'; +$LIB_OPTS = '--create -r'; +$LIB_TYPE = '.a'; + +#------------------------ + +my (@headerlist, @filelist, $hd, $file, $ofile, $command, $objlist, $libfile, $h); + +# Define the list of directories containing included header files. +@headerlist = qw(api vc/api vc/m4p2/api vc/m4p10/api); + +# Define the list of source files to compile. +open(FILES, '<filelist_vc.txt') or die("Can't open source file list\n"); +@filelist = <FILES>; +close(FILES); + +# Fix the file separators in the header paths +foreach $h (@headerlist) +{ + $h = File::Spec->canonpath($h); +} + +# Create the include path to be passed to the compiler +$hd = '-I' . join(' -I', @headerlist); + +# Create the build directories "/lib/" and "/obj/" (if they are not there already) +mkdir "obj", 0777 if (! -d "obj"); +mkdir "lib", 0777 if (! -d "lib"); + +$objlist = ''; + +# Compile each file +foreach $file (@filelist) +{ + my $f; + my $base; + my $ext; + my $objfile; + + chomp($file); + $file = File::Spec->canonpath($file); + + (undef, undef, $f) = File::Spec->splitpath($file); + if(($base, $ext) = $f =~ /(.+)\.(\w)$/) + { + $objfile = File::Spec->catfile('obj', $base.'.o'); + + if($ext eq 'c') + { + $objlist .= "$objfile "; + $command = $CC.' '.$CC_OPTS.' '.$hd.' -o '.$objfile.' '.$file; + print "$command\n"; + system($command); + } + elsif($ext eq 's') + { + $objlist .= "$objfile "; + $command = $AS.' '.$AS_OPTS.' '.$hd.' -o '.$objfile.' '.$file; + print "$command\n"; + system($command); + } + else + { + print "Ignoring file: $f\n"; + } + } + else + { + die "No file extension found: $f\n"; + } +} + +# Do the final link stage to create the libraries. +$libfile = File::Spec->catfile('lib', 'omxVC'.$LIB_TYPE); +$command = $LIB.' '.$LIB_OPTS.' '.$libfile.' '.$objlist; +print "$command\n"; +(system($command) == 0) and print "Build successful\n"; + + + + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt new file mode 100644 index 0000000..0f1623f --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/filelist_vc.txt @@ -0,0 +1,74 @@ +./api/armCOMM.h +./api/armCOMM_BitDec_s.h +./api/armCOMM_Bitstream.h +./api/armCOMM_IDCT_s.h +./api/armCOMM_IDCTTable.h +./api/armCOMM_MaskTable.h +./api/armCOMM_s.h +./api/armCOMM_Version.h +./api/armOMX_ReleaseVersion.h +./api/omxtypes.h +./api/omxtypes_s.h +./src/armCOMM_IDCTTable.c +./src/armCOMM_MaskTable.c +./vc/api/armVC.h +./vc/api/armVCCOMM_s.h +./vc/api/omxVC.h +./vc/api/omxVC_s.h +./vc/comm/src/omxVCCOMM_Copy16x16_s.s +./vc/comm/src/omxVCCOMM_Copy8x8_s.s +./vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s +./vc/m4p10/api/armVCM4P10_CAVLCTables.h +./vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s +./vc/m4p10/src/armVCM4P10_CAVLCTables.c +./vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s +./vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s +./vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s +./vc/m4p10/src/armVCM4P10_DequantTables_s.s +./vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s +./vc/m4p10/src/armVCM4P10_QuantTables_s.s +./vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s +./vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s +./vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c +./vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c +./vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c +./vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c +./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s +./vc/m4p10/src/omxVCM4P10_InterpolateChroma.c +./vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s +./vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s +./vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s +./vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s +./vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s +./vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s +./vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h +./vc/m4p2/api/armVCM4P2_ZigZag_Tables.h +./vc/m4p2/src/armVCM4P2_Clip8_s.s +./vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s +./vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c +./vc/m4p2/src/armVCM4P2_Lookup_Tables.c +./vc/m4p2/src/armVCM4P2_SetPredDir_s.s +./vc/m4p2/src/armVCM4P2_Zigzag_Tables.c +./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c +./vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c +./vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s +./vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s +./vc/m4p2/src/omxVCM4P2_FindMVpred_s.s +./vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s +./vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s +./vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s +./vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s +./vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s +./vc/src/armVC_Version.c
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c new file mode 100644 index 0000000..e572a89 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM.c @@ -0,0 +1,936 @@ +/** + * + * File Name: armCOMM.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Defines Common APIs used across OpenMAX API's + */ + +#include "omxtypes.h" +#include "armCOMM.h" + +/***********************************************************************/ + /* Miscellaneous Arithmetic operations */ + +/** + * Function: armRoundFloatToS16 + * + * Description: + * Converts a double precision value into a short int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16 format + * + */ + +OMX_S16 armRoundFloatToS16 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S16)(Value + .5); + } + else + { + return (OMX_S16)(Value - .5); + } +} + +/** + * Function: armRoundFloatToS32 + * + * Description: + * Converts a double precision value into a int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S32 format + * + */ + +OMX_S32 armRoundFloatToS32 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S32)(Value + .5); + } + else + { + return (OMX_S32)(Value - .5); + } +} +/** + * Function: armSatRoundFloatToS16 + * + * Description: + * Converts a double precision value into a short int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16 format + * + */ + +OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value) +{ + if (Value > 0) + { + Value += 0.5; + + if(Value > (OMX_S16)OMX_MAX_S16 ) + { + return (OMX_S16)OMX_MAX_S16; + } + else + { + return (OMX_S16)Value; + } + } + else + { + Value -= 0.5; + + if(Value < (OMX_S16)OMX_MIN_S16 ) + { + return (OMX_S16)OMX_MIN_S16; + } + else + { + return (OMX_S16)Value; + } + } +} + +/** + * Function: armSatRoundFloatToS32 + * + * Description: + * Converts a double precision value into a int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S32 format + * + */ + +OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value) +{ + if (Value > 0) + { + Value += 0.5; + + if(Value > (OMX_S32)OMX_MAX_S32 ) + { + return (OMX_S32)OMX_MAX_S32; + } + else + { + return (OMX_S32)Value; + } + } + else + { + Value -= 0.5; + + if(Value < (OMX_S32)OMX_MIN_S32 ) + { + return (OMX_S32)OMX_MIN_S32; + } + else + { + return (OMX_S32)Value; + } + } +} + +/** + * Function: armSatRoundFloatToU16 + * + * Description: + * Converts a double precision value into a unsigned short int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U16 format + * + */ + +OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value) +{ + Value += 0.5; + + if(Value > (OMX_U16)OMX_MAX_U16 ) + { + return (OMX_U16)OMX_MAX_U16; + } + else + { + return (OMX_U16)Value; + } +} + +/** + * Function: armSatRoundFloatToU32 + * + * Description: + * Converts a double precision value into a unsigned int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U32 format + * + */ + +OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value) +{ + Value += 0.5; + + if(Value > (OMX_U32)OMX_MAX_U32 ) + { + return (OMX_U32)OMX_MAX_U32; + } + else + { + return (OMX_U32)Value; + } +} + +/** + * Function: armRoundFloatToS64 + * + * Description: + * Converts a double precision value into a 64 bit int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S64 format + * + */ + +OMX_S64 armRoundFloatToS64 (OMX_F64 Value) +{ + if (Value > 0) + { + return (OMX_S64)(Value + .5); + } + else + { + return (OMX_S64)(Value - .5); + } +} + +/** + * Function: armSignCheck + * + * Description: + * Checks the sign of a variable: + * returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + * + * Remarks: + * + * Parameters: + * [in] var Variable to be checked + * + * Return Value: + * OMX_INT -- returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + */ + +OMX_INT armSignCheck ( + OMX_S16 var +) + +{ + OMX_INT Sign; + + if (var < 0) + { + Sign = -1; + } + else if ( var > 0) + { + Sign = 1; + } + else + { + Sign = 0; + } + + return Sign; +} + +/** + * Function: armClip + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_S32 -- returns clipped value + */ + +OMX_S32 armClip ( + OMX_INT min, + OMX_INT max, + OMX_S32 src +) + +{ + if (src > max) + { + src = max; + } + else if (src < min) + { + src = min; + } + + return src; +} + +/** + * Function: armClip_F32 + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_F32 -- returns clipped value + */ + +OMX_F32 armClip_F32 ( + OMX_F32 min, + OMX_F32 max, + OMX_F32 src +) + +{ + if (src > max) + { + src = max; + } + else if (src < min) + { + src = min; + } + + return src; +} + +/** + * Function: armShiftSat_F32 + * + * Description: Divides a float value by 2^shift and + * saturates it for unsigned value range for satBits. + * Second parameter is like "shifting" the corresponding + * integer value. Takes care of rounding while clipping the final + * value. + * + * Parameters: + * [in] v Number to be operated upon + * [in] shift Divides the input "v" by "2^shift" + * [in] satBits Final range is [0, 2^satBits) + * + * Return Value: + * OMX_S32 -- returns "shifted" saturated value + */ + +OMX_U32 armShiftSat_F32(OMX_F32 v, OMX_INT shift, OMX_INT satBits) +{ + OMX_U32 allOnes = (OMX_U32)(-1); + OMX_U32 maxV = allOnes >> (32-satBits); + OMX_F32 vShifted, vRounded, shiftDiv = (OMX_F32)(1 << shift); + OMX_U32 vInt; + OMX_U32 vIntSat; + + if(v <= 0) + return 0; + + vShifted = v / shiftDiv; + vRounded = (OMX_F32)(vShifted + 0.5); + vInt = (OMX_U32)vRounded; + vIntSat = vInt; + if(vIntSat > maxV) + vIntSat = maxV; + return vIntSat; +} + +/** + * Functions: armSwapElem + * + * Description: + * These function swaps two elements at the specified pointer locations. + * The size of each element could be anything as specified by <elemSize> + * + * Return Value: + * OMXResult -- Error status from the function + */ +OMXResult armSwapElem( + OMX_U8 *pBuf1, + OMX_U8 *pBuf2, + OMX_INT elemSize + ) +{ + OMX_INT i; + OMX_U8 temp; + armRetArgErrIf(!pBuf1 || !pBuf2, OMX_Sts_BadArgErr); + + for(i = 0; i < elemSize; i++) + { + temp = *(pBuf1 + i); + *(pBuf1 + i) = *(pBuf2 + i); + *(pBuf2 + i) = temp; + } + return OMX_Sts_NoErr; +} + +/** + * Function: armMedianOf3 + * + * Description: Finds the median of three numbers + * + * Remarks: + * + * Parameters: + * [in] fEntry First entry + * [in] sEntry second entry + * [in] tEntry Third entry + * + * Return Value: + * OMX_S32 -- returns the median value + */ + +OMX_S32 armMedianOf3 ( + OMX_S32 fEntry, + OMX_S32 sEntry, + OMX_S32 tEntry +) +{ + OMX_S32 a, b, c; + + a = armMin (fEntry, sEntry); + b = armMax (fEntry, sEntry); + c = armMin (b, tEntry); + return (armMax (a, c)); +} + +/** + * Function: armLogSize + * + * Description: Finds the size of a positive value and returns the same + * + * Remarks: + * + * Parameters: + * [in] value Positive value + * + * Return Value: + * OMX_U8 -- Returns the minimum number of bits required to represent the positive value. + This is the smallest k>=0 such that that value is less than (1<<k). + */ + +OMX_U8 armLogSize ( + OMX_U16 value +) +{ + OMX_U8 i; + for ( i = 0; value > 0; value = value >> 1) + { + i++; + } + return i; +} + +/***********************************************************************/ + /* Saturating Arithmetic operations */ + +/** + * Function :armSatAdd_S32() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S32 armSatAdd_S32(OMX_S32 Value1,OMX_S32 Value2) +{ + OMX_S32 Result; + + Result = Value1 + Value2; + + if( (Value1^Value2) >= 0) + { + /*Same sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + return OMX_MAX_S32; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S32; + } + + } + + } + else + { + return Result; + } + +} + +/** + * Function :armSatAdd_S64() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S64 armSatAdd_S64(OMX_S64 Value1,OMX_S64 Value2) +{ + OMX_S64 Result; + + Result = Value1 + Value2; + + if( (Value1^Value2) >= 0) + { + /*Same sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + Result = OMX_MAX_S64; + return Result; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S64; + } + + } + + } + else + { + return Result; + } + +} + +/** Function :armSatSub_S32() + * + * Description : + * Returns the result of saturated substraction of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatSub_S32(OMX_S32 Value1,OMX_S32 Value2) +{ + OMX_S32 Result; + + Result = Value1 - Value2; + + if( (Value1^Value2) < 0) + { + /*Opposite sign*/ + if( (Result^Value1) >= 0) + { + /*Result has not saturated*/ + return Result; + } + else + { + if(Value1 >= 0) + { + /*Result has saturated in positive side*/ + return OMX_MAX_S32; + } + else + { + /*Result has saturated in negative side*/ + return OMX_MIN_S32; + } + + } + + } + else + { + return Result; + } + +} + +/** + * Function :armSatMac_S32() + * + * Description : + * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated + * accumulation with Mac + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * [in] Mac Accumulator + * + * Return: + * [out] Result of operation + **/ + +OMX_S32 armSatMac_S32(OMX_S32 Mac,OMX_S16 Value1,OMX_S16 Value2) +{ + OMX_S32 Result; + + Result = (OMX_S32)(Value1*Value2); + Result = armSatAdd_S32( Mac , Result ); + + return Result; +} + +/** + * Function :armSatMac_S16S32_S32 + * + * Description : + * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac + * + * mac = mac + Saturate_in_32Bits(delayElem * filTap) + * + * Parametrs: + * [in] delayElem First 32 bit Operand + * [in] filTap Second 16 bit Operand + * [in] mac Result of MAC operation + * + * Return: + * [out] mac Result of operation + * + **/ + +OMX_S32 armSatMac_S16S32_S32(OMX_S32 mac, OMX_S32 delayElem, OMX_S16 filTap ) +{ + + OMX_S32 result; + + result = armSatMulS16S32_S32(filTap,delayElem); + + if ( result > OMX_MAX_S16 ) + { + result = OMX_MAX_S32; + } + else if( result < OMX_MIN_S16 ) + { + result = OMX_MIN_S32; + } + else + { + result = delayElem * filTap; + } + + mac = armSatAdd_S32(mac,result); + + return mac; +} + + +/** + * Function :armSatRoundRightShift_S32_S16 + * + * Description : + * Returns the result of rounded right shift operation of input by the scalefactor + * + * output = Saturate_in_16Bits( ( Right/LeftShift( (Round(input) , shift ) ) + * + * Parametrs: + * [in] input The input to be operated on + * [in] shift The shift number + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S16 armSatRoundRightShift_S32_S16(OMX_S32 input, OMX_INT shift) +{ + input = armSatRoundLeftShift_S32(input,-shift); + + if ( input > OMX_MAX_S16 ) + { + return (OMX_S16)OMX_MAX_S16; + } + else if (input < OMX_MIN_S16) + { + return (OMX_S16)OMX_MIN_S16; + } + else + { + return (OMX_S16)input; + } + +} + +/** + * Function :armSatRoundLeftShift_S32() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] Shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatRoundLeftShift_S32(OMX_S32 Value, OMX_INT Shift) +{ + OMX_INT i; + + if (Shift < 0) + { + Shift = -Shift; + Value = armSatAdd_S32(Value, (1 << (Shift - 1))); + Value = Value >> Shift; + } + else + { + for (i = 0; i < Shift; i++) + { + Value = armSatAdd_S32(Value, Value); + } + } + return Value; +} + +/** + * Function :armSatRoundLeftShift_S64() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S64 armSatRoundLeftShift_S64(OMX_S64 Value, OMX_INT Shift) +{ + OMX_INT i; + + if (Shift < 0) + { + Shift = -Shift; + Value = armSatAdd_S64(Value, ((OMX_S64)1 << (Shift - 1))); + Value = Value >> Shift; + } + else + { + for (i = 0; i < Shift; i++) + { + Value = armSatAdd_S64(Value, Value); + } + } + return Value; +} + +/** + * Function :armSatMulS16S32_S32() + * + * Description : + * Returns the result of a S16 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S32 armSatMulS16S32_S32(OMX_S16 input1,OMX_S32 input2) +{ + OMX_S16 hi2,lo1; + OMX_U16 lo2; + + OMX_S32 temp1,temp2; + OMX_S32 result; + + lo1 = input1; + + hi2 = ( input2 >> 16 ); + lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 ); + + temp1 = hi2 * lo1; + temp2 = ( lo2* lo1 ) >> 16; + + result = armSatAdd_S32(temp1,temp2); + + return result; +} + +/** + * Function :armSatMulS32S32_S32() + * + * Description : + * Returns the result of a S32 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatMulS32S32_S32(OMX_S32 input1,OMX_S32 input2) +{ + OMX_S16 hi1,hi2; + OMX_U16 lo1,lo2; + + OMX_S32 temp1,temp2,temp3; + OMX_S32 result; + + hi1 = ( input1 >> 16 ); + lo1 = ( (OMX_U32)( input1 << 16 ) >> 16 ); + + hi2 = ( input2 >> 16 ); + lo2 = ( (OMX_U32)( input2 << 16 ) >> 16 ); + + temp1 = hi1 * hi2; + temp2 = ( hi1* lo2 ) >> 16; + temp3 = ( hi2* lo1 ) >> 16; + + result = armSatAdd_S32(temp1,temp2); + result = armSatAdd_S32(result,temp3); + + return result; +} + +/** + * Function :armIntDivAwayFromZero() + * + * Description : Integer division with rounding to the nearest integer. + * Half-integer values are rounded away from zero + * unless otherwise specified. For example 3//2 is rounded + * to 2, and -3//2 is rounded to -2. + * + * Parametrs: + * [in] Num Operand 1 + * [in] Deno Operand 2 + * + * Return: + * [out] Result of operation input1//input2 + * + **/ + +OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno) +{ + OMX_F64 result; + + result = ((OMX_F64)Num)/((OMX_F64)Deno); + + if (result >= 0) + { + result += 0.5; + } + else + { + result -= 0.5; + } + + return (OMX_S32)(result); +} + + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c new file mode 100644 index 0000000..9ef9319 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_Bitstream.c @@ -0,0 +1,329 @@ +/** + * + * File Name: armCOMM_Bitstream.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Defines bitstream encode and decode functions common to all codecs + */ + +#include "omxtypes.h" +#include "armCOMM.h" +#include "armCOMM_Bitstream.h" + +/*************************************** + * Fixed bit length Decode + ***************************************/ + +/** + * Function: armLookAheadBits() + * + * Description: + * Get the next N bits from the bitstream without advancing the bitstream pointer + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] N=1...32 + * + * Returns Value + */ + +OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + armAssert(Offset>=0 && Offset<=7); + armAssert(N>=1 && N<=32); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Return N bits */ + return Value >> (32-N); +} + + +/** + * Function: armGetBits() + * + * Description: + * Read N bits from the bitstream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N=1..32 + * + * [out] *ppBitStream + * [out] *pOffset + * Returns Value + */ + + +OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + if(N == 0) + { + return 0; + } + + armAssert(Offset>=0 && Offset<=7); + armAssert(N>=1 && N<=32); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Advance bitstream pointer by N bits */ + Offset += N; + *ppBitStream = pBitStream + (Offset>>3); + *pOffset = Offset & 7; + + /* Return N bits */ + return Value >> (32-N); +} + +/** + * Function: armByteAlign() + * + * Description: + * Align the pointer *ppBitStream to the next byte boundary + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset) +{ + if(*pOffset > 0) + { + *ppBitStream += 1; + *pOffset = 0; + } +} + +/** + * Function: armSkipBits() + * + * Description: + * Skip N bits from the value at *ppBitStream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + + +OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N) +{ + OMX_INT Offset = *pOffset; + const OMX_U8 *pBitStream = *ppBitStream; + + /* Advance bitstream pointer by N bits */ + Offset += N; + *ppBitStream = pBitStream + (Offset>>3); + *pOffset = Offset & 7; +} + +/*************************************** + * Variable bit length Decode + ***************************************/ + +/** + * Function: armUnPackVLC32() + * + * Description: + * Variable length decode of variable length symbol (max size 32 bits) read from + * the bit stream pointed by *ppBitStream at *pOffset by using the table + * pointed by pCodeBook + * + * Parameters: + * [in] *pBitStream + * [in] *pOffset + * [in] pCodeBook + * + * [out] *pBitStream + * [out] *pOffset + * + * Returns : Code Book Index if successfull. + * : ARM_NO_CODEBOOK_INDEX = -1 if search fails. + **/ +#ifndef C_OPTIMIZED_IMPLEMENTATION + +OMX_U16 armUnPackVLC32( + const OMX_U8 **ppBitStream, + OMX_INT *pOffset, + const ARM_VLC32 *pCodeBook +) +{ + const OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + OMX_INT Index; + + armAssert(Offset>=0 && Offset<=7); + + /* Read next 32 bits from stream */ + Value = (pBitStream[0] << 24 ) | ( pBitStream[1] << 16) | (pBitStream[2] << 8 ) | (pBitStream[3]) ; + Value = (Value << Offset ) | (pBitStream[4] >> (8-Offset)); + + /* Search through the codebook */ + for (Index=0; pCodeBook->codeLen != 0; Index++) + { + if (pCodeBook->codeWord == (Value >> (32 - pCodeBook->codeLen))) + { + Offset = Offset + pCodeBook->codeLen; + *ppBitStream = pBitStream + (Offset >> 3) ; + *pOffset = Offset & 7; + + return Index; + } + pCodeBook++; + } + + /* No code match found */ + return ARM_NO_CODEBOOK_INDEX; +} + +#endif + +/*************************************** + * Fixed bit length Encode + ***************************************/ + +/** + * Function: armPackBits + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] codeWord Code word that need to be inserted in to the + * bitstream + * [in] codeLength Length of the code word valid range 1...32 + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackBits ( + OMX_U8 **ppBitStream, + OMX_INT *pOffset, + OMX_U32 codeWord, + OMX_INT codeLength +) +{ + OMX_U8 *pBitStream = *ppBitStream; + OMX_INT Offset = *pOffset; + OMX_U32 Value; + + /* checking argument validity */ + armRetArgErrIf(Offset < 0, OMX_Sts_BadArgErr); + armRetArgErrIf(Offset > 7, OMX_Sts_BadArgErr); + armRetArgErrIf(codeLength < 1, OMX_Sts_BadArgErr); + armRetArgErrIf(codeLength > 32, OMX_Sts_BadArgErr); + + /* Prepare the first byte */ + codeWord = codeWord << (32-codeLength); + Value = (pBitStream[0] >> (8-Offset)) << (8-Offset); + Value = Value | (codeWord >> (24+Offset)); + + /* Write out whole bytes */ + while (8-Offset <= codeLength) + { + *pBitStream++ = (OMX_U8)Value; + codeWord = codeWord << (8-Offset); + codeLength = codeLength - (8-Offset); + Offset = 0; + Value = codeWord >> 24; + } + + /* Write out final partial byte */ + *pBitStream = (OMX_U8)Value; + *ppBitStream = pBitStream; + *pOffset = Offset + codeLength; + + return OMX_Sts_NoErr; +} + +/*************************************** + * Variable bit length Encode + ***************************************/ + +/** + * Function: armPackVLC32 + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pBitOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] code VLC code word that need to be inserted in to the + * bitstream + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackVLC32 ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + ARM_VLC32 code +) +{ + return (armPackBits(ppBitStream, pBitOffset, code.codeWord, code.codeLen)); +} + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c new file mode 100644 index 0000000..9e4679c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_IDCTTable.c @@ -0,0 +1,60 @@ +/** + * + * File Name: armCOMM_IDCTTable.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM_IDCTTable.c + * Brief: Defines Tables used in IDCT computation + * + */ + +#include "armCOMM_IDCTTable.h" + + /* Table of s(u)*A(u)*A(v)/16 at Q15 + * s(u)=1.0 0 <= u <= 5 + * s(6)=2.0 + * s(7)=4.0 + * A(0) = 2*sqrt(2) + * A(u) = 4*cos(u*pi/16) for (u!=0) + */ + +__align(4) const OMX_U16 armCOMM_IDCTPreScale [64] = +{ + 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1, + 0x58c5, 0x7b21, 0x73fc, 0x6862, 0x58c5, 0x45bf, 0x6016, 0x61f8, + 0x539f, 0x73fc, 0x6d41, 0x6254, 0x539f, 0x41b3, 0x5a82, 0x5c48, + 0x4b42, 0x6862, 0x6254, 0x587e, 0x4b42, 0x3b21, 0x5175, 0x530d, + 0x4000, 0x58c5, 0x539f, 0x4b42, 0x4000, 0x3249, 0x4546, 0x46a1, + 0x3249, 0x45bf, 0x41b3, 0x3b21, 0x3249, 0x2782, 0x366d, 0x377e, + 0x22a3, 0x300b, 0x2d41, 0x28ba, 0x22a3, 0x1b37, 0x257e, 0x263a, + 0x11a8, 0x187e, 0x1712, 0x14c3, 0x11a8, 0x0de0, 0x131d, 0x137d +}; + /* Above array armCOMM_IDCTPreScale, in Q23 format */ +const OMX_U32 armCOMM_IDCTPreScaleU32 [64] = +{ + 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157, + 0x58c543, 0x7b20d8, 0x73fbfc, 0x686214, 0x58c543, 0x45bf1f, 0x6015a5, 0x61f78b, + 0x539eba, 0x73fbfc, 0x6d413d, 0x6253a6, 0x539eba, 0x41b328, 0x5a827a, 0x5c4869, + 0x4b418c, 0x686214, 0x6253a6, 0x587de3, 0x4b418c, 0x3b20d8, 0x5174e0, 0x530d69, + 0x400000, 0x58c543, 0x539eba, 0x4b418c, 0x400000, 0x3248d4, 0x4545ea, 0x46a157, + 0x3248d4, 0x45bf1f, 0x41b328, 0x3b20d8, 0x3248d4, 0x27821d, 0x366d72, 0x377e6b, + 0x22a2f5, 0x300ad3, 0x2d413d, 0x28ba70, 0x22a2f5, 0x1b36b9, 0x257d86, 0x26398d, + 0x11a856, 0x187de3, 0x17121a, 0x14c35a, 0x11a856, 0x0ddf9b, 0x131cc7, 0x137ca2 +}; + +const OMX_U16 armCOMM_IDCTCoef [4] = +{ + 0x5a82, /* InvSqrt2 */ + 0x30fc, /* SinPIBy8 */ + 0x7642, /* CosPIBy8 */ + 0x0000 +}; + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c new file mode 100644 index 0000000..3241db2 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/src/armCOMM_MaskTable.c @@ -0,0 +1,45 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armCOMM_MaskTable.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Mask Table to mask the end of array. + * + */ + +#include "omxtypes.h" + +#define MaskTableSize 72 + +const OMX_U16 armCOMM_qMaskTable16[MaskTableSize] = +{ + 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0x0000, + 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF +}; + +const OMX_U8 armCOMM_qMaskTable8[MaskTableSize] = +{ + 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, + 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h new file mode 100644 index 0000000..7fa7716 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVC.h @@ -0,0 +1,1153 @@ +/** + * + * File Name: armVC.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVideo.h + * Brief: Declares API's/Basic Data types used across the OpenMAX Video domain + * + */ + + +#ifndef _armVideo_H_ +#define _armVideo_H_ + +#include "omxVC.h" +#include "armCOMM_Bitstream.h" + +/** + * ARM specific state structure to hold Motion Estimation information. + */ + +struct m4p2_MESpec +{ + OMXVCM4P2MEParams MEParams; + OMXVCM4P2MEMode MEMode; +}; + +struct m4p10_MESpec +{ + OMXVCM4P10MEParams MEParams; + OMXVCM4P10MEMode MEMode; +}; + +typedef struct m4p2_MESpec ARMVCM4P2_MESpec; +typedef struct m4p10_MESpec ARMVCM4P10_MESpec; + +/** + * Function: armVCM4P2_CompareMV + * + * Description: + * Performs comparision of motion vectors and SAD's to decide the + * best MV and SAD + * + * Remarks: + * + * Parameters: + * [in] mvX x coordinate of the candidate motion vector + * [in] mvY y coordinate of the candidate motion vector + * [in] candSAD Candidate SAD + * [in] bestMVX x coordinate of the best motion vector + * [in] bestMVY y coordinate of the best motion vector + * [in] bestSAD best SAD + * + * Return Value: + * OMX_INT -- 1 to indicate that the current sad is the best + * 0 to indicate that it is NOT the best SAD + */ + +OMX_INT armVCM4P2_CompareMV ( + OMX_S16 mvX, + OMX_S16 mvY, + OMX_INT candSAD, + OMX_S16 bestMVX, + OMX_S16 bestMVY, + OMX_INT bestSAD); + +/** + * Function: armVCM4P2_ACDCPredict + * + * Description: + * Performs adaptive DC/AC coefficient prediction for an intra block. Prior + * to the function call, prediction direction (predDir) should be selected + * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the coefficient buffer which contains + * the quantized coefficient residuals (PQF) of the + * current block + * [in] pPredBufRow pointer to the coefficient row buffer + * [in] pPredBufCol pointer to the coefficient column buffer + * [in] curQP quantization parameter of the current block. curQP + * may equal to predQP especially when the current + * block and the predictor block are in the same + * macroblock. + * [in] predQP quantization parameter of the predictor block + * [in] predDir indicates the prediction direction which takes one + * of the following values: + * OMX_VIDEO_HORIZONTAL predict horizontally + * OMX_VIDEO_VERTICAL predict vertically + * [in] ACPredFlag a flag indicating if AC prediction should be + * performed. It is equal to ac_pred_flag in the bit + * stream syntax of MPEG-4 + * [in] videoComp video component type (luminance, chrominance or + * alpha) of the current block + * [in] flag This flag defines the if one wants to use this functions to + * calculate PQF (set 1, prediction) or QF (set 0, reconstruction) + * [out] pPreACPredict pointer to the predicted coefficients buffer. + * Filled ONLY if it is not NULL + * [out] pSrcDst pointer to the coefficient buffer which contains + * the quantized coefficients (QF) of the current + * block + * [out] pPredBufRow pointer to the updated coefficient row buffer + * [out] pPredBufCol pointer to the updated coefficient column buffer + * [out] pSumErr pointer to the updated sum of the difference + * between predicted and unpredicted coefficients + * If this is NULL, do not update + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_ACDCPredict( + OMX_S16 * pSrcDst, + OMX_S16 * pPreACPredict, + OMX_S16 * pPredBufRow, + OMX_S16 * pPredBufCol, + OMX_INT curQP, + OMX_INT predQP, + OMX_INT predDir, + OMX_INT ACPredFlag, + OMXVCM4P2VideoComponent videoComp, + OMX_U8 flag, + OMX_INT *pSumErr +); + +/** + * Function: armVCM4P2_SetPredDir + * + * Description: + * Performs detecting the prediction direction + * + * Remarks: + * + * Parameters: + * [in] blockIndex block index indicating the component type and + * position as defined in subclause 6.1.3.8, of ISO/IEC + * 14496-2. Furthermore, indexes 6 to 9 indicate the + * alpha blocks spatially corresponding to luminance + * blocks 0 to 3 in the same macroblock. + * [in] pCoefBufRow pointer to the coefficient row buffer + * [in] pQpBuf pointer to the quantization parameter buffer + * [out] predQP quantization parameter of the predictor block + * [out] predDir indicates the prediction direction which takes one + * of the following values: + * OMX_VIDEO_HORIZONTAL predict horizontally + * OMX_VIDEO_VERTICAL predict vertically + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_SetPredDir( + OMX_INT blockIndex, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_INT *predDir, + OMX_INT *predQP, + const OMX_U8 *pQpBuf +); + +/** + * Function: armVCM4P2_EncodeVLCZigzag_Intra + * + * Description: + * Performs zigzag scanning and VLC encoding for one intra block. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7. + * [in] pQDctBlkCoef pointer to the quantized DCT coefficient + * [in] predDir AC prediction direction, which is used to decide + * the zigzag scan pattern. This takes one of the + * following values: + * OMX_VIDEO_NONE AC prediction not used. + * Performs classical zigzag + * scan. + * OMX_VIDEO_HORIZONTAL Horizontal prediction. + * Performs alternate-vertical + * zigzag scan. + * OMX_VIDEO_VERTICAL Vertical prediction. + * Performs alternate-horizontal + * zigzag scan. + * [in] pattern block pattern which is used to decide whether + * this block is encoded + * [in] start start indicates whether the encoding begins with 0th element + * or 1st. + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_EncodeVLCZigzag_Intra( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader, + OMX_U8 start +); + +/** + * Function: armVCM4P2_DecodeVLCZigzag_Intra + * + * Description: + * Performs VLC decoding and inverse zigzag scan for one intra coded block. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bitstream buffer + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7]. + * [in] predDir AC prediction direction which is used to decide + * the zigzag scan pattern. It takes one of the + * following values: + * OMX_VIDEO_NONE AC prediction not used; + * perform classical zigzag scan; + * OMX_VIDEO_HORIZONTAL Horizontal prediction; + * perform alternate-vertical + * zigzag scan; + * OMX_VIDEO_VERTICAL Vertical prediction; + * thus perform + * alternate-horizontal + * zigzag scan. + * [in] videoComp video component type (luminance, chrominance or + * alpha) of the current block + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with 0th element + * or 1st. + * [out] ppBitStream *ppBitStream is updated after the block is + * decoded, so that it points to the current byte + * in the bit stream buffer + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream + * [out] pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_DecodeVLCZigzag_Intra( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader, + OMX_U8 start +); + +/** + * Function: armVCM4P2_FillVLDBuffer + * + * Description: + * Performs filling of the coefficient buffer according to the run, level + * and sign, also updates the index + * + * Parameters: + * [in] storeRun Stored Run value (count of zeros) + * [in] storeLevel Stored Level value (non-zero value) + * [in] sign Flag indicating the sign of level + * [in] last status of the last flag + * [in] pIndex pointer to coefficient index in 8x8 matrix + * [out] pIndex pointer to updated coefficient index in 8x8 + * matrix + * [in] pZigzagTable pointer to the zigzag tables + * [out] pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_FillVLDBuffer( + OMX_U32 storeRun, + OMX_S16 * pDst, + OMX_S16 storeLevel, + OMX_U8 sign, + OMX_U8 last, + OMX_U8 * index, + const OMX_U8 * pZigzagTable +); + +/** + * Function: armVCM4P2_GetVLCBits + * + * Description: + * Performs escape mode decision based on the run, run+, level, level+ and + * last combinations. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with + * 0th element or 1st. + * [in/out] pLast pointer to last status flag + * [in] runBeginSingleLevelEntriesL0 The run value from which level + * will be equal to 1: last == 0 + * [in] IndexBeginSingleLevelEntriesL0 Array index in the VLC table + * pointing to the + * runBeginSingleLevelEntriesL0 + * [in] runBeginSingleLevelEntriesL1 The run value from which level + * will be equal to 1: last == 1 + * [in] IndexBeginSingleLevelEntriesL1 Array index in the VLC table + * pointing to the + * runBeginSingleLevelEntriesL0 + * [in] pRunIndexTableL0 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pVlcTableL0 VLC table for last == 0 + * [in] pRunIndexTableL1 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pVlcTableL1 VLC table for last == 1 + * [in] pLMAXTableL0 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pLMAXTableL1 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pRMAXTableL0 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pRMAXTableL1 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [out]pDst pointer to the coefficient buffer of current + * block. Should be 32-bit aligned + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_GetVLCBits ( + const OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT shortVideoHeader, + OMX_U8 start, + OMX_U8 * pLast, + OMX_U8 runBeginSingleLevelEntriesL0, + OMX_U8 maxIndexForMultipleEntriesL0, + OMX_U8 maxRunForMultipleEntriesL1, + OMX_U8 maxIndexForMultipleEntriesL1, + const OMX_U8 * pRunIndexTableL0, + const ARM_VLC32 *pVlcTableL0, + const OMX_U8 * pRunIndexTableL1, + const ARM_VLC32 *pVlcTableL1, + const OMX_U8 * pLMAXTableL0, + const OMX_U8 * pLMAXTableL1, + const OMX_U8 * pRMAXTableL0, + const OMX_U8 * pRMAXTableL1, + const OMX_U8 * pZigzagTable +); + +/** + * Function: armVCM4P2_PutVLCBits + * + * Description: + * Checks the type of Escape Mode and put encoded bits for + * quantized DCT coefficients. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] start start indicates whether the encoding begins with + * 0th element or 1st. + * [in] maxStoreRunL0 Max store possible (considering last and inter/intra) + * for last = 0 + * [in] maxStoreRunL1 Max store possible (considering last and inter/intra) + * for last = 1 + * [in] maxRunForMultipleEntriesL0 + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) for last = 0 + * [in] maxRunForMultipleEntriesL1 + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) for last = 1 + * [in] pRunIndexTableL0 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pVlcTableL0 VLC table for last == 0 + * [in] pRunIndexTableL1 Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pVlcTableL1 VLC table for last == 1 + * [in] pLMAXTableL0 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pLMAXTableL1 Level MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [in] pRMAXTableL0 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 0 + * [in] pRMAXTableL1 Run MAX table defined in + * armVCM4P2_Huff_Tables_VLC.c for last == 1 + * [out] pQDctBlkCoef pointer to the quantized DCT coefficient + * [out] ppBitStream *ppBitStream is updated after the block is encoded + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + + +OMXResult armVCM4P2_PutVLCBits ( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_INT shortVideoHeader, + OMX_U8 start, + OMX_U8 maxStoreRunL0, + OMX_U8 maxStoreRunL1, + OMX_U8 maxRunForMultipleEntriesL0, + OMX_U8 maxRunForMultipleEntriesL1, + const OMX_U8 * pRunIndexTableL0, + const ARM_VLC32 *pVlcTableL0, + const OMX_U8 * pRunIndexTableL1, + const ARM_VLC32 *pVlcTableL1, + const OMX_U8 * pLMAXTableL0, + const OMX_U8 * pLMAXTableL1, + const OMX_U8 * pRMAXTableL0, + const OMX_U8 * pRMAXTableL1, + const OMX_U8 * pZigzagTable +); +/** + * Function: armVCM4P2_FillVLCBuffer + * + * Description: + * Performs calculating the VLC bits depending on the escape type and insert + * the same in the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream + * [in] pBitOffset pointer to the bit position in the byte pointed + * by *ppBitStream. Valid within 0 to 7 + * [in] run Run value (count of zeros) to be encoded + * [in] level Level value (non-zero value) to be encoded + * [in] runPlus Calculated as runPlus = run - (RMAX + 1) + * [in] levelPlus Calculated as + * levelPlus = sign(level)*[abs(level) - LMAX] + * [in] fMode Flag indicating the escape modes + * [in] last status of the last flag + * [in] maxRunForMultipleEntries + * The run value after which level will be equal to 1: + * (considering last and inter/intra status) + * [in] pRunIndexTable Run Index table defined in + * armVCM4P2_Huff_tables_VLC.h + * [in] pVlcTable VLC table defined in armVCM4P2_Huff_tables_VLC.h + * [out] ppBitStream *ppBitStream is updated after the block is encoded + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_FillVLCBuffer ( + OMX_U8 **ppBitStream, + OMX_INT * pBitOffset, + OMX_U32 run, + OMX_S16 level, + OMX_U32 runPlus, + OMX_S16 levelPlus, + OMX_U8 fMode, + OMX_U8 last, + OMX_U8 maxRunForMultipleEntries, + const OMX_U8 *pRunIndexTable, + const ARM_VLC32 *pVlcTable +); + +/** + * Function: armVCM4P2_CheckVLCEscapeMode + * + * Description: + * Performs escape mode decision based on the run, run+, level, level+ and + * last combinations. + * + * Remarks: + * + * Parameters: + * [in] run Run value (count of zeros) to be encoded + * [in] level Level value (non-zero value) to be encoded + * [in] runPlus Calculated as runPlus = run - (RMAX + 1) + * [in] levelPlus Calculated as + * levelPlus = sign(level)*[abs(level) - LMAX] + * [in] maxStoreRun Max store possible (considering last and inter/intra) + * [in] maxRunForMultipleEntries + * The run value after which level + * will be equal to 1: + * (considering last and inter/intra status) + * [in] shortVideoHeader binary flag indicating presence of short_video_header; escape modes 0-3 are used if shortVideoHeader==0, + * and escape mode 4 is used when shortVideoHeader==1. + * [in] pRunIndexTable Run Index table defined in + * armVCM4P2_Huff_Tables_VLC.c + * (considering last and inter/intra status) + * + * + * Return Value: + * Returns an Escape mode which can take values from 0 to 3 + * 0 --> no escape mode, 1 --> escape type 1, + * 1 --> escape type 2, 3 --> escape type 3, check section 7.4.1.3 + * in the MPEG ISO standard. + * + */ + +OMX_U8 armVCM4P2_CheckVLCEscapeMode( + OMX_U32 run, + OMX_U32 runPlus, + OMX_S16 level, + OMX_S16 levelPlus, + OMX_U8 maxStoreRun, + OMX_U8 maxRunForMultipleEntries, + OMX_INT shortVideoHeader, + const OMX_U8 *pRunIndexTable +); + + +/** + * Function: armVCM4P2_BlockMatch_Integer + * + * Description: + * Performs a 16x16 block search; estimates motion vector and associated minimum SAD. + * Both the input and output motion vectors are represented using half-pixel units, and + * therefore a shift left or right by 1 bit may be required, respectively, to match the + * input or output MVs with other functions that either generate output MVs or expect + * input MVs represented using integer pixel units. + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB that + * corresponds to the location of the current macroblock in the current + * plane. + * [in] refWidth width of the reference plane + * [in] pRefRect pointer to the valid rectangular in reference plane. Relative to image origin. + * It's not limited to the image boundary, but depended on the padding. For example, + * if you pad 4 pixels outside the image border, then the value for left border + * can be -4 + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane (linear array, + * 256 entries); must be aligned on an 8-byte boundary. + * [in] pCurrPointPos position of the current macroblock in the current plane + * [in] pSrcPreMV pointer to predicted motion vector; NULL indicates no predicted MV + * [in] pSrcPreSAD pointer to SAD associated with the predicted MV (referenced by pSrcPreMV) + * [in] searchRange search range for 16X16 integer block,the units of it is full pixel,the search range + * is the same in all directions.It is in inclusive of the boundary and specified in + * terms of integer pixel units. + * [in] pMESpec vendor-specific motion estimation specification structure; must have been allocated + * and then initialized using omxVCM4P2_MEInit prior to calling the block matching + * function. + * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8. + * [out] pDstMV pointer to estimated MV + * [out] pDstSAD pointer to minimum SAD + * + * Return Value: + * OMX_Sts_NoErr ¨C no error. + * OMX_Sts_BadArgErr ¨C bad arguments + * + */ + +OMXResult armVCM4P2_BlockMatch_Integer( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD, + OMX_U8 BlockSize +); + +/** + * Function: armVCM4P2_BlockMatch_Half + * + * Description: + * Performs a 16x16 block match with half-pixel resolution. Returns the estimated + * motion vector and associated minimum SAD. This function estimates the half-pixel + * motion vector by interpolating the integer resolution motion vector referenced + * by the input parameter pSrcDstMV, i.e., the initial integer MV is generated + * externally. The input parameters pSrcRefBuf and pSearchPointRefPos should be + * shifted by the winning MV of 16x16 integer search prior to calling BlockMatch_Half_16x16. + * The function BlockMatch_Integer_16x16 may be used for integer motion estimation. + * + * Remarks: + * + * Parameters: + * [in] pSrcRefBuf pointer to the reference Y plane; points to the reference MB + * that corresponds to the location of the current macroblock in + * the current plane. + * [in] refWidth width of the reference plane + * [in] pRefRect reference plane valid region rectangle + * [in] pSrcCurrBuf pointer to the current macroblock extracted from original plane + * (linear array, 256 entries); must be aligned on an 8-byte boundary. + * [in] pSearchPointRefPos position of the starting point for half pixel search (specified + * in terms of integer pixel units) in the reference plane. + * [in] rndVal rounding control bit for half pixel motion estimation; + * 0=rounding control disabled; 1=rounding control enabled + * [in] pSrcDstMV pointer to the initial MV estimate; typically generated during a prior + * 16X16 integer search and its unit is half pixel. + * [in] BlockSize MacroBlock Size i.e either 16x16 or 8x8. + * [out]pSrcDstMV pointer to estimated MV + * [out]pDstSAD pointer to minimum SAD + * + * Return Value: + * OMX_Sts_NoErr ¨C no error + * OMX_Sts_BadArgErr ¨C bad arguments + * + */ + +OMXResult armVCM4P2_BlockMatch_Half( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD, + OMX_U8 BlockSize +); +/** + * Function: armVCM4P2_PadMV + * + * Description: + * Performs motion vector padding for a macroblock. + * + * Remarks: + * + * Parameters: + * [in] pSrcDstMV pointer to motion vector buffer of the current + * macroblock + * [in] pTransp pointer to transparent status buffer of the + * current macroblock + * [out] pSrcDstMV pointer to motion vector buffer in which the + * motion vectors have been padded + * Return Value: + * Standard OMXResult result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P2_PadMV( + OMXVCMotionVector * pSrcDstMV, + OMX_U8 * pTransp +); + +/* + * H.264 Specific Declarations + */ +/* Defines */ +#define ARM_M4P10_Q_OFFSET (15) + + +/* Dequant tables */ + +extern const OMX_U8 armVCM4P10_PosToVCol4x4[16]; +extern const OMX_U8 armVCM4P10_PosToVCol2x2[4]; +extern const OMX_U8 armVCM4P10_VMatrix[6][3]; +extern const OMX_U32 armVCM4P10_MFMatrix[6][3]; + + +/* + * Description: + * This function perform the work required by the OpenMAX + * DecodeCoeffsToPair function and DecodeChromaDCCoeffsToPair. + * Since most of the code is common we share it here. + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream buffer + * [in] pOffset Pointer to current bit position in the byte pointed + * to by *ppBitStream + * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current + * block (4,15 or 16) + * [in] nTable Table number (0 to 4) according to the five columns + * of Table 9-5 in the H.264 spec + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients in + * this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + + */ + +OMXResult armVCM4P10_DecodeCoeffsToPair( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8**ppPosCoefbuf, + OMX_INT nTable, + OMX_INT sMaxNumCoeff + ); + +/* + * Description: + * Perform DC style intra prediction, averaging upper and left block + * + * Parameters: + * [in] pSrcLeft Pointer to the buffer of 16 left coefficients: + * p[x, y] (x = -1, y = 0..3) + * [in] pSrcAbove Pointer to the buffer of 16 above coefficients: + * p[x,y] (x = 0..3, y = -1) + * [in] leftStep Step of left coefficient buffer + * [in] dstStep Step of the destination buffer + * [in] availability Neighboring 16x16 MB availability flag + * [out] pDst Pointer to the destination buffer + * + * Return Value: + * None + */ + +void armVCM4P10_PredictIntraDC4x4( + const OMX_U8* pSrcLeft, + const OMX_U8 *pSrcAbove, + OMX_U8* pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMX_S32 availability +); + +/* + * Description + * Unpack a 4x4 block of coefficient-residual pair values + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position pair + * buffer output by CALVC decoding + * [out] ppSrc *ppSrc is updated to the start of next non empty block + * [out] pDst Pointer to unpacked 4x4 block + */ + +void armVCM4P10_UnpackBlock4x4( + const OMX_U8 **ppSrc, + OMX_S16* pDst +); + +/* + * Description + * Unpack a 2x2 block of coefficient-residual pair values + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position pair + * buffer output by CALVC decoding + * [out] ppSrc *ppSrc is updated to the start of next non empty block + * [out] pDst Pointer to unpacked 4x4 block + */ + +void armVCM4P10_UnpackBlock2x2( + const OMX_U8 **ppSrc, + OMX_S16* pDst +); + +/* + * Description + * Deblock one boundary pixel + * + * Parameters: + * [in] pQ0 Pointer to pixel q0 + * [in] Step Step between pixels q0 and q1 + * [in] tC0 Edge threshold value + * [in] alpha alpha threshold value + * [in] beta beta threshold value + * [in] bS deblocking strength + * [in] ChromaFlag True for chroma blocks + * [out] pQ0 Deblocked pixels + * + */ + +void armVCM4P10_DeBlockPixel( + OMX_U8 *pQ0, /* pointer to the pixel q0 */ + int Step, /* step between pixels q0 and q1 */ + int tC0, /* edge threshold value */ + int alpha, /* alpha */ + int beta, /* beta */ + int bS, /* deblocking strength */ + int ChromaFlag +); + +/** + * Function: armVCM4P10_InterpolateHalfHor_Luma + * + * Description: + * This function performs interpolation for horizontal 1/2-pel positions + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the 1/2-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfHor_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_InterpolateHalfVer_Luma + * + * Description: + * This function performs interpolation for vertical 1/2-pel positions + * around a full-pel position. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the 1/2-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfVer_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_InterpolateHalfDiag_Luma + * + * Description: + * This function performs interpolation for (1/2, 1/2) positions + * around a full-pel position. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to interpolate + * in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [out] pDst Pointer to the interpolation buffer of the (1/2,1/2)-pel + * + * Return Value: + * Standard OMXResult value. + * + */ + +OMXResult armVCM4P10_InterpolateHalfDiag_Luma( + const OMX_U8* pSrc, + OMX_U32 iSrcStep, + OMX_U8* pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/* + * Description: + * Transform Residual 4x4 Coefficients + * + * Parameters: + * [in] pSrc Source 4x4 block + * [out] pDst Destination 4x4 block + * + */ + +void armVCM4P10_TransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc); + +/* + * Description: + * Forward Transform Residual 4x4 Coefficients + * + * Parameters: + * [in] pSrc Source 4x4 block + * [out] pDst Destination 4x4 block + * + */ + +void armVCM4P10_FwdTransformResidual4x4(OMX_S16* pDst, OMX_S16 *pSrc); + +OMX_INT armVCM4P10_CompareMotionCostToMV ( + OMX_S16 mvX, + OMX_S16 mvY, + OMXVCMotionVector diffMV, + OMX_INT candSAD, + OMXVCMotionVector *bestMV, + OMX_U32 nLamda, + OMX_S32 *pBestCost); + +/** + * Function: armVCCOMM_SAD + * + * Description: + * This function calculate the SAD for NxM blocks. + * + * Remarks: + * + * [in] pSrcOrg Pointer to the original block + * [in] iStepOrg Step of the original block buffer + * [in] pSrcRef Pointer to the reference block + * [in] iStepRef Step of the reference block buffer + * [in] iHeight Height of the block + * [in] iWidth Width of the block + * [out] pDstSAD Pointer of result SAD + * + * Return Value: + * Standard OMXResult value. + * + */ +OMXResult armVCCOMM_SAD( + const OMX_U8* pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8* pSrcRef, + OMX_U32 iStepRef, + OMX_S32* pDstSAD, + OMX_U32 iHeight, + OMX_U32 iWidth); + +/** + * Function: armVCCOMM_Average + * + * Description: + * This function calculates the average of two blocks and stores the result. + * + * Remarks: + * + * [in] pPred0 Pointer to the top-left corner of reference block 0 + * [in] pPred1 Pointer to the top-left corner of reference block 1 + * [in] iPredStep0 Step of reference block 0 + * [in] iPredStep1 Step of reference block 1 + * [in] iDstStep Step of the destination buffer + * [in] iWidth Width of the blocks + * [in] iHeight Height of the blocks + * [out] pDstPred Pointer to the destination buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + OMXResult armVCCOMM_Average ( + const OMX_U8* pPred0, + const OMX_U8* pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8* pDstPred, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + +/** + * Function: armVCM4P10_SADQuar + * + * Description: + * This function calculates the SAD between one block (pSrc) and the + * average of the other two (pSrcRef0 and pSrcRef1) + * + * Remarks: + * + * [in] pSrc Pointer to the original block + * [in] pSrcRef0 Pointer to reference block 0 + * [in] pSrcRef1 Pointer to reference block 1 + * [in] iSrcStep Step of the original block buffer + * [in] iRefStep0 Step of reference block 0 + * [in] iRefStep1 Step of reference block 1 + * [in] iHeight Height of the block + * [in] iWidth Width of the block + * [out] pDstSAD Pointer of result SAD + * + * Return Value: + * Standard OMXResult value. + * + */ +OMXResult armVCM4P10_SADQuar( + const OMX_U8* pSrc, + const OMX_U8* pSrcRef0, + const OMX_U8* pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32* pDstSAD, + OMX_U32 iHeight, + OMX_U32 iWidth +); + +/** + * Function: armVCM4P10_Interpolate_Chroma + * + * Description: + * This function performs interpolation for chroma components. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to + * interpolate in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [in] dx Fractional part of horizontal motion vector + * component in 1/8 pixel unit (0~7) + * [in] dy Fractional part of vertical motion vector + * component in 1/8 pixel unit (0~7) + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + OMXResult armVCM4P10_Interpolate_Chroma( + OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight, + OMX_U32 dx, + OMX_U32 dy +); + +/** + * Function: armVCM4P10_Interpolate_Luma + * + * Description: + * This function performs interpolation for luma components. + * + * Remarks: + * + * [in] pSrc Pointer to top-left corner of block used to + * interpolate in the reconstructed frame plane + * [in] iSrcStep Step of the source buffer. + * [in] iDstStep Step of the destination(interpolation) buffer. + * [in] iWidth Width of the current block + * [in] iHeight Height of the current block + * [in] dx Fractional part of horizontal motion vector + * component in 1/4 pixel unit (0~3) + * [in] dy Fractional part of vertical motion vector + * component in 1/4 pixel unit (0~3) + * [out] pDst Pointer to the interpolation buffer + * + * Return Value: + * Standard OMXResult value. + * + */ + + OMXResult armVCM4P10_Interpolate_Luma( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDst, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight, + OMX_U32 dx, + OMX_U32 dy +); + +/** + * Function: omxVCH264_DequantTransformACFromPair_U8_S16_C1_DLx + * + * Description: + * Reconstruct the 4x4 residual block from coefficient-position pair buffer, + * perform dequantisation and integer inverse transformation for 4x4 block of + * residuals and update the pair buffer pointer to next non-empty block. + * + * Remarks: + * + * Parameters: + * [in] ppSrc Double pointer to residual coefficient-position + * pair buffer output by CALVC decoding + * [in] pDC Pointer to the DC coefficient of this block, NULL + * if it doesn't exist + * [in] QP Quantization parameter + * [in] AC Flag indicating if at least one non-zero coefficient exists + * [out] pDst pointer to the reconstructed 4x4 block data + * + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + * + */ + +OMXResult armVCM4P10_DequantTransformACFromPair_U8_S16_C1_DLx( + OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP, + OMX_S16* pDC, + int AC +); + +#endif /*_armVideo_H_*/ + +/*End of File*/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h new file mode 100644 index 0000000..7f0a9b8 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/armVCCOMM_s.h @@ -0,0 +1,72 @@ +;// +;// +;// File Name: armVCCOMM_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// ARM optimized OpenMAX AC header file +;// +;// Formula used: +;// MACRO for calculating median for three values. + + + + IF :LNOT::DEF:ARMVCCOMM_S_H + INCLUDE armCOMM_s.h + M_VARIANTS CortexA8, ARM1136JS + + IF ARM1136JS :LOR: CortexA8 + + ;///* + ;// * Macro: M_MEDIAN3 + ;// * + ;// * Description: Finds the median of three numbers + ;// * + ;// * Remarks: + ;// * + ;// * Parameters: + ;// * [in] x First entry for the list of three numbers. + ;// * [in] y Second entry for the list of three numbers. + ;// * Input value may be corrupted at the end of + ;// * the execution of this macro. + ;// * [in] z Third entry of the list of three numbers. + ;// * Input value corrupted at the end of the + ;// * execution of this macro. + ;// * [in] t Temporary scratch register. + ;// * [out]z Median of the three numbers. + ;// */ + + MACRO + + M_MEDIAN3 $x, $y, $z, $t + + SUBS $t, $y, $z; // if (y < z) + ADDLT $z, $z, $t; // swap y and z + SUBLT $y, $y, $t; + + ;// Now z' <= y', so there are three cases for the + ;// median value, depending on x. + + ;// 1) x <= z' <= y' : median value is z' + ;// 2) z' <= x <= y' : median value is x + ;// 3) z' <= y' <= x : median value is y' + + CMP $z, $x; // if ( x > min(y,z) ) + MOVLT $z, $x; // ans = x + + CMP $x, $y; // if ( x > max(y,z) ) + MOVGT $z, $y; // ans = max(y,z) + + MEND + ENDIF + + + + ENDIF ;// ARMACCOMM_S_H + + END
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h new file mode 100644 index 0000000..7b3cc72 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC.h @@ -0,0 +1,4381 @@ +/** + * File: omxVC.h + * Brief: OpenMAX DL v1.0.2 - Video Coding library + * + * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. + * + * These materials are protected by copyright laws and contain material + * proprietary to the Khronos Group, Inc. You may use these materials + * for implementing Khronos specifications, without altering or removing + * any trademark, copyright or other notice from the specification. + * + * Khronos Group makes no, and expressly disclaims any, representations + * or warranties, express or implied, regarding these materials, including, + * without limitation, any implied warranties of merchantability or fitness + * for a particular purpose or non-infringement of any intellectual property. + * Khronos Group makes no, and expressly disclaims any, warranties, express + * or implied, regarding the correctness, accuracy, completeness, timeliness, + * and reliability of these materials. + * + * Under no circumstances will the Khronos Group, or any of its Promoters, + * Contributors or Members or their respective partners, officers, directors, + * employees, agents or representatives be liable for any damages, whether + * direct, indirect, special or consequential damages for lost revenues, + * lost profits, or otherwise, arising from or in connection with these + * materials. + * + * Khronos and OpenMAX are trademarks of the Khronos Group Inc. + * + */ + +/* *****************************************************************************************/ + +#ifndef _OMXVC_H_ +#define _OMXVC_H_ + +#include "omxtypes.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +/* 6.1.1.1 Motion Vectors */ +/* In omxVC, motion vectors are represented as follows: */ + +typedef struct { + OMX_S16 dx; + OMX_S16 dy; +} OMXVCMotionVector; + + + +/** + * Function: omxVCCOMM_Average_8x (6.1.3.1.1) + * + * Description: + * This function calculates the average of two 8x4, 8x8, or 8x16 blocks. The + * result is rounded according to (a+b+1)/2. The block average function can + * be used in conjunction with half-pixel interpolation to obtain quarter + * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0 + * iPredStep1 - Step of reference block 1 + * iDstStep - Step of the destination buffer. + * iHeight - Height of the blocks + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 8-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pPred0, pPred1, or + * pDstPred. + * - pDstPred is not aligned on an 8-byte boundary. + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 8. + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 8. + * - iDstStep <= 0 or iDstStep is not a multiple of 8. + * - iHeight is not 4, 8, or 16. + * + */ +OMXResult omxVCCOMM_Average_8x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_Average_16x (6.1.3.1.2) + * + * Description: + * This function calculates the average of two 16x16 or 16x8 blocks. The + * result is rounded according to (a+b+1)/2. The block average function can + * be used in conjunction with half-pixel interpolation to obtain quarter + * pixel motion estimates, as described in [ISO14496-10], subclause 8.4.2.2.1. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0 + * iPredStep1 - Step of reference block 1 + * iDstStep - Step of the destination buffer + * iHeight - Height of the blocks + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 16-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pPred0, pPred1, or + * pDstPred. + * - pDstPred is not aligned on a 16-byte boundary. + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 16. + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 16. + * - iDstStep <= 0 or iDstStep is not a multiple of 16. + * - iHeight is not 8 or 16. + * + */ +OMXResult omxVCCOMM_Average_16x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_ExpandFrame_I (6.1.3.2.1) + * + * Description: + * This function expands a reconstructed frame in-place. The unexpanded + * source frame should be stored in a plane buffer with sufficient space + * pre-allocated for edge expansion, and the input frame should be located in + * the plane buffer center. This function executes the pixel expansion by + * replicating source frame edge pixel intensities in the empty pixel + * locations (expansion region) between the source frame edge and the plane + * buffer edge. The width/height of the expansion regions on the + * horizontal/vertical edges is controlled by the parameter iExpandPels. + * + * Input Arguments: + * + * pSrcDstPlane - pointer to the top-left corner of the frame to be + * expanded; must be aligned on an 8-byte boundary. + * iFrameWidth - frame width; must be a multiple of 8. + * iFrameHeight -frame height; must be a multiple of 8. + * iExpandPels - number of pixels to be expanded in the horizontal and + * vertical directions; must be a multiple of 8. + * iPlaneStep - distance, in bytes, between the start of consecutive lines + * in the plane buffer; must be larger than or equal to + * (iFrameWidth + 2 * iExpandPels). + * + * Output Arguments: + * + * pSrcDstPlane -Pointer to the top-left corner of the frame (NOT the + * top-left corner of the plane); must be aligned on an 8-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - pSrcDstPlane is NULL. + * - pSrcDstPlane is not aligned on an 8-byte boundary. + * - one of the following parameters is either equal to zero or is a + * non-multiple of 8: iFrameHeight, iFrameWidth, iPlaneStep, or + * iExpandPels. + * - iPlaneStep < (iFrameWidth + 2 * iExpandPels). + * + */ +OMXResult omxVCCOMM_ExpandFrame_I ( + OMX_U8 *pSrcDstPlane, + OMX_U32 iFrameWidth, + OMX_U32 iFrameHeight, + OMX_U32 iExpandPels, + OMX_U32 iPlaneStep +); + + + +/** + * Function: omxVCCOMM_Copy8x8 (6.1.3.3.1) + * + * Description: + * Copies the reference 8x8 block to the current block. + * + * Input Arguments: + * + * pSrc - pointer to the reference block in the source frame; must be + * aligned on an 8-byte boundary. + * step - distance between the starts of consecutive lines in the reference + * frame, in bytes; must be a multiple of 8 and must be larger than + * or equal to 8. + * + * Output Arguments: + * + * pDst - pointer to the destination block; must be aligned on an 8-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pSrc, pDst + * - one or more of the following pointers is not aligned on an 8-byte + * boundary: pSrc, pDst + * - step <8 or step is not a multiple of 8. + * + */ +OMXResult omxVCCOMM_Copy8x8 ( + const OMX_U8 *pSrc, + OMX_U8 *pDst, + OMX_INT step +); + + + +/** + * Function: omxVCCOMM_Copy16x16 (6.1.3.3.2) + * + * Description: + * Copies the reference 16x16 macroblock to the current macroblock. + * + * Input Arguments: + * + * pSrc - pointer to the reference macroblock in the source frame; must be + * aligned on a 16-byte boundary. + * step - distance between the starts of consecutive lines in the reference + * frame, in bytes; must be a multiple of 16 and must be larger + * than or equal to 16. + * + * Output Arguments: + * + * pDst - pointer to the destination macroblock; must be aligned on a + * 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - one or more of the following pointers is NULL: pSrc, pDst + * - one or more of the following pointers is not aligned on a 16-byte + * boundary: pSrc, pDst + * - step <16 or step is not a multiple of 16. + * + */ +OMXResult omxVCCOMM_Copy16x16 ( + const OMX_U8 *pSrc, + OMX_U8 *pDst, + OMX_INT step +); + + + +/** + * Function: omxVCCOMM_ComputeTextureErrorBlock_SAD (6.1.4.1.1) + * + * Description: + * Computes texture error of the block; also returns SAD. + * + * Input Arguments: + * + * pSrc - pointer to the source plane; must be aligned on an 8-byte + * boundary. + * srcStep - step of the source plane + * pSrcRef - pointer to the reference buffer, an 8x8 block; must be aligned + * on an 8-byte boundary. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer, an 8x8 block; must be aligned + * on an 8-byte boundary. + * pDstSAD - pointer to the Sum of Absolute Differences (SAD) value + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following + * pointers is NULL: pSrc, pSrcRef, pDst and pDstSAD. + * - pSrc is not 8-byte aligned. + * - SrcStep <= 0 or srcStep is not a multiple of 8. + * - pSrcRef is not 8-byte aligned. + * - pDst is not 8-byte aligned. + * + */ +OMXResult omxVCCOMM_ComputeTextureErrorBlock_SAD ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_U8 *pSrcRef, + OMX_S16 *pDst, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCCOMM_ComputeTextureErrorBlock (6.1.4.1.2) + * + * Description: + * Computes the texture error of the block. + * + * Input Arguments: + * + * pSrc - pointer to the source plane. This should be aligned on an 8-byte + * boundary. + * srcStep - step of the source plane + * pSrcRef - pointer to the reference buffer, an 8x8 block. This should be + * aligned on an 8-byte boundary. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer, an 8x8 block. This should be + * aligned on an 8-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * pSrc, pSrcRef, pDst. + * - pSrc is not 8-byte aligned. + * - SrcStep <= 0 or srcStep is not a multiple of 8. + * - pSrcRef is not 8-byte aligned. + * - pDst is not 8-byte aligned + * + */ +OMXResult omxVCCOMM_ComputeTextureErrorBlock ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_U8 *pSrcRef, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCCOMM_LimitMVToRect (6.1.4.1.3) + * + * Description: + * Limits the motion vector associated with the current block/macroblock to + * prevent the motion compensated block/macroblock from moving outside a + * bounding rectangle as shown in Figure 6-1. + * + * Input Arguments: + * + * pSrcMV - pointer to the motion vector associated with the current block + * or macroblock + * pRectVOPRef - pointer to the bounding rectangle + * Xcoord, Ycoord - coordinates of the current block or macroblock + * size - size of the current block or macroblock; must be equal to 8 or + * 16. + * + * Output Arguments: + * + * pDstMV - pointer to the limited motion vector + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcMV, pDstMV, or pRectVOPRef. + * - size is not equal to either 8 or 16. + * - the width or height of the bounding rectangle is less than + * twice the block size. + */ +OMXResult omxVCCOMM_LimitMVToRect ( + const OMXVCMotionVector *pSrcMV, + OMXVCMotionVector *pDstMV, + const OMXRect *pRectVOPRef, + OMX_INT Xcoord, + OMX_INT Ycoord, + OMX_INT size +); + + + +/** + * Function: omxVCCOMM_SAD_16x (6.1.4.1.4) + * + * Description: + * This function calculates the SAD for 16x16 and 16x8 blocks. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 16-byte + * boundary. + * iStepOrg - Step of the original block buffer + * pSrcRef - Pointer to the reference block + * iStepRef - Step of the reference block buffer + * iHeight - Height of the block + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pDstSAD, or pSrcRef + * - pSrcOrg is not 16-byte aligned. + * - iStepOrg <= 0 or iStepOrg is not a multiple of 16 + * - iStepRef <= 0 or iStepRef is not a multiple of 16 + * - iHeight is not 8 or 16 + * + */ +OMXResult omxVCCOMM_SAD_16x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCCOMM_SAD_8x (6.1.4.1.5) + * + * Description: + * This function calculates the SAD for 8x16, 8x8, 8x4 blocks. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 8-byte + * boundary. + * iStepOrg - Step of the original block buffer + * pSrcRef - Pointer to the reference block + * iStepRef - Step of the reference block buffer + * iHeight - Height of the block + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pDstSAD, or pSrcRef + * - pSrcOrg is not 8-byte aligned. + * - iStepOrg <= 0 or iStepOrg is not a multiple of 8 + * - iStepRef <= 0 or iStepRef is not a multiple of 8 + * - iHeight is not 4, 8 or 16 + * + */ +OMXResult omxVCCOMM_SAD_8x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32*pDstSAD, + OMX_U32 iHeight +); + + + +/* 6.2.1.1 Direction */ +/* The direction enumerator is used with functions that perform AC/DC prediction and zig-zag scan. */ + +enum { + OMX_VC_NONE = 0, + OMX_VC_HORIZONTAL = 1, + OMX_VC_VERTICAL = 2 +}; + + + +/* 6.2.1.2 Bilinear Interpolation */ +/* The bilinear interpolation enumerator is used with motion estimation, motion compensation, and reconstruction functions. */ + +enum { + OMX_VC_INTEGER_PIXEL = 0, /* case a */ + OMX_VC_HALF_PIXEL_X = 1, /* case b */ + OMX_VC_HALF_PIXEL_Y = 2, /* case c */ + OMX_VC_HALF_PIXEL_XY = 3 /* case d */ +}; + + + +/* 6.2.1.3 Neighboring Macroblock Availability */ +/* Neighboring macroblock availability is indicated using the following flags: */ + +enum { + OMX_VC_UPPER = 1, /** above macroblock is available */ + OMX_VC_LEFT = 2, /** left macroblock is available */ + OMX_VC_CENTER = 4, + OMX_VC_RIGHT = 8, + OMX_VC_LOWER = 16, + OMX_VC_UPPER_LEFT = 32, /** above-left macroblock is available */ + OMX_VC_UPPER_RIGHT = 64, /** above-right macroblock is available */ + OMX_VC_LOWER_LEFT = 128, + OMX_VC_LOWER_RIGHT = 256 +}; + + + +/* 6.2.1.4 Video Components */ +/* A data type that enumerates video components is defined as follows: */ + +typedef enum { + OMX_VC_LUMINANCE, /** Luminance component */ + OMX_VC_CHROMINANCE /** chrominance component */ +} OMXVCM4P2VideoComponent; + + + +/* 6.2.1.5 MacroblockTypes */ +/* A data type that enumerates macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_INTER = 0, /** P picture or P-VOP */ + OMX_VC_INTER_Q = 1, /** P picture or P-VOP */ + OMX_VC_INTER4V = 2, /** P picture or P-VOP */ + OMX_VC_INTRA = 3, /** I and P picture, I- and P-VOP */ + OMX_VC_INTRA_Q = 4, /** I and P picture, I- and P-VOP */ + OMX_VC_INTER4V_Q = 5 /** P picture or P-VOP (H.263)*/ +} OMXVCM4P2MacroblockType; + + + +/* 6.2.1.6 Coordinates */ +/* Coordinates are represented as follows: */ + +typedef struct { + OMX_INT x; + OMX_INT y; +} OMXVCM4P2Coordinate; + + + +/* 6.2.1.7 Motion Estimation Algorithms */ +/* A data type that enumerates motion estimation search methods is defined as follows: */ + +typedef enum { + OMX_VC_M4P2_FAST_SEARCH = 0, /** Fast motion search */ + OMX_VC_M4P2_FULL_SEARCH = 1 /** Full motion search */ +} OMXVCM4P2MEMode; + + + +/* 6.2.1.8 Motion Estimation Parameters */ +/* A data structure containing control parameters for + * motion estimation functions is defined as follows: + */ + +typedef struct { + OMX_INT searchEnable8x8; /** enables 8x8 search */ + OMX_INT halfPelSearchEnable; /** enables half-pel resolution */ + OMX_INT searchRange; /** search range */ + OMX_INT rndVal; /** rounding control; 0-disabled, 1-enabled*/ +} OMXVCM4P2MEParams; + + + +/* 6.2.1.9 Macroblock Information */ +/* A data structure containing macroblock parameters for + * motion estimation functions is defined as follows: + */ + +typedef struct { + OMX_S32 sliceId; /* slice number */ + OMXVCM4P2MacroblockType mbType; /* MB type: OMX_VC_INTRA, OMX_VC_INTER, or OMX_VC_INTER4 */ + OMX_S32 qp; /* quantization parameter*/ + OMX_U32 cbpy; /* CBP Luma */ + OMX_U32 cbpc; /* CBP Chroma */ + OMXVCMotionVector pMV0[2][2]; /* motion vector, represented using 1/2-pel units, + * pMV0[blocky][blockx] (blocky = 0~1, blockx =0~1) + */ + OMXVCMotionVector pMVPred[2][2]; /* motion vector prediction, represented using 1/2-pel units, + * pMVPred[blocky][blockx] (blocky = 0~1, blockx = 0~1) + */ + OMX_U8 pPredDir[2][2]; /* AC prediction direction: + * OMX_VC_NONE, OMX_VC_VERTICAL, OMX_VC_HORIZONTAL + */ +} OMXVCM4P2MBInfo, *OMXVCM4P2MBInfoPtr; + + + +/** + * Function: omxVCM4P2_FindMVpred (6.2.3.1.1) + * + * Description: + * Predicts a motion vector for the current block using the procedure + * specified in [ISO14496-2], subclause 7.6.5. The resulting predicted MV is + * returned in pDstMVPred. If the parameter pDstMVPredME if is not NULL then + * the set of three MV candidates used for prediction is also returned, + * otherwise pDstMVPredMEis NULL upon return. + * + * Input Arguments: + * + * pSrcMVCurMB - pointer to the MV buffer associated with the current Y + * macroblock; a value of NULL indicates unavailability. + * pSrcCandMV1 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located to the left of the current MB; set to NULL + * if there is no MB to the left. + * pSrcCandMV2 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located above the current MB; set to NULL if there + * is no MB located above the current MB. + * pSrcCandMV3 - pointer to the MV buffer containing the 4 MVs associated + * with the MB located to the right and above the current MB; set + * to NULL if there is no MB located to the above-right. + * iBlk - the index of block in the current macroblock + * pDstMVPredME - MV candidate return buffer; if set to NULL then + * prediction candidate MVs are not returned and pDstMVPredME will + * be NULL upon function return; if pDstMVPredME is non-NULL then it + * must point to a buffer containing sufficient space for three + * return MVs. + * + * Output Arguments: + * + * pDstMVPred - pointer to the predicted motion vector + * pDstMVPredME - if non-NULL upon input then pDstMVPredME points upon + * return to a buffer containing the three motion vector candidates + * used for prediction as specified in [ISO14496-2], subclause + * 7.6.5, otherwise if NULL upon input then pDstMVPredME is NULL + * upon output. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - the pointer pDstMVPred is NULL + * - the parameter iBlk does not fall into the range 0 <= iBlk<=3 + * + */ +OMXResult omxVCM4P2_FindMVpred ( + const OMXVCMotionVector *pSrcMVCurMB, + const OMXVCMotionVector *pSrcCandMV1, + const OMXVCMotionVector *pSrcCandMV2, + const OMXVCMotionVector *pSrcCandMV3, + OMXVCMotionVector *pDstMVPred, + OMXVCMotionVector *pDstMVPredME, + OMX_INT iBlk +); + + + +/** + * Function: omxVCM4P2_IDCT8x8blk (6.2.3.2.1) + * + * Description: + * Computes a 2D inverse DCT for a single 8x8 block, as defined in + * [ISO14496-2]. + * + * Input Arguments: + * + * pSrc - pointer to the start of the linearly arranged IDCT input buffer; + * must be aligned on a 16-byte boundary. According to + * [ISO14496-2], the input coefficient values should lie within the + * range [-2048, 2047]. + * + * Output Arguments: + * + * pDst - pointer to the start of the linearly arranged IDCT output buffer; + * must be aligned on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrc or pDst is NULL. + * - pSrc or pDst is not 16-byte aligned. + * + */ +OMXResult omxVCM4P2_IDCT8x8blk ( + const OMX_S16 *pSrc, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCM4P2_MEGetBufSize (6.2.4.1.1) + * + * Description: + * Computes the size, in bytes, of the vendor-specific specification + * structure for the following motion estimation functions: + * BlockMatch_Integer_8x8, BlockMatch_Integer_16x16, and MotionEstimationMB. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P2MEMode + * pMEParams - motion estimation parameters + * + * Output Arguments: + * + * pSize - pointer to the number of bytes required for the specification + * structure + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - one or more of the following is true: + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for the + * parameter pMEParams->searchRange + * + */ +OMXResult omxVCM4P2_MEGetBufSize ( + OMXVCM4P2MEMode MEmode, + const OMXVCM4P2MEParams *pMEParams, + OMX_U32 *pSize +); + + + +/** + * Function: omxVCM4P2_MEInit (6.2.4.1.2) + * + * Description: + * Initializes the vendor-specific specification structure required for the + * following motion estimation functions: BlockMatch_Integer_8x8, + * BlockMatch_Integer_16x16, and MotionEstimationMB. Memory for the + * specification structure *pMESpec must be allocated prior to calling the + * function, and should be aligned on a 4-byte boundary. Following + * initialization by this function, the vendor-specific structure *pMESpec + * should contain an implementation-specific representation of all motion + * estimation parameters received via the structure pMEParams, for example + * rndVal, searchRange, etc. The number of bytes required for the + * specification structure can be determined using the function + * omxVCM4P2_MEGetBufSize. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P2MEMode + * pMEParams - motion estimation parameters + * pMESpec - pointer to the uninitialized ME specification structure + * + * Output Arguments: + * + * pMESpec - pointer to the initialized ME specification structure + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - one or more of the following is true: + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for the + * parameter pMEParams->searchRange + * + */ +OMXResult omxVCM4P2_MEInit ( + OMXVCM4P2MEMode MEmode, + const OMXVCM4P2MEParams*pMEParams, + void *pMESpec +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Integer_16x16 (6.2.4.2.1) + * + * Description: + * Performs a 16x16 block search; estimates motion vector and associated + * minimum SAD. Both the input and output motion vectors are represented using + * half-pixel units, and therefore a shift left or right by 1 bit may be + * required, respectively, to match the input or output MVs with other + * functions that either generate output MVs or expect input MVs represented + * using integer pixel units. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * MB that corresponds to the location of the current macroblock in + * the current plane. + * refWidth - width of the reference plane + * pRefRect - pointer to the valid reference plane rectangle; coordinates + * are specified relative to the image origin. Rectangle + * boundaries may extend beyond image boundaries if the image has + * been padded. For example, if padding extends 4 pixels beyond + * frame border, then the value for the left border could be set to + * -4. + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 256 + * entries); must be aligned on a 16-byte boundary. The number of + * bytes between lines (step) is 16. + * pCurrPointPos - position of the current macroblock in the current plane + * pSrcPreMV - pointer to predicted motion vector; NULL indicates no + * predicted MV + * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced + * by pSrcPreMV); may be set to NULL if unavailable. + * pMESpec - vendor-specific motion estimation specification structure; + * must have been allocated and then initialized using + * omxVCM4P2_MEInit prior to calling the block matching function. + * + * Output Arguments: + * + * pDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or + * pMESpec, or + * - pSrcCurrBuf is not 16-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Integer_16x16 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector*pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector*pDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Integer_8x8 (6.2.4.2.2) + * + * Description: + * Performs an 8x8 block search; estimates motion vector and associated + * minimum SAD. Both the input and output motion vectors are represented + * using half-pixel units, and therefore a shift left or right by 1 bit may be + * required, respectively, to match the input or output MVs with other + * functions that either generate output MVs or expect input MVs represented + * using integer pixel units. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * block that corresponds to the location of the current 8x8 block + * in the current plane. + * refWidth - width of the reference plane + * pRefRect - pointer to the valid reference plane rectangle; coordinates + * are specified relative to the image origin. Rectangle + * boundaries may extend beyond image boundaries if the image has + * been padded. + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 128 + * entries); must be aligned on an 8-byte boundary. The number of + * bytes between lines (step) is 16 bytes. + * pCurrPointPos - position of the current block in the current plane + * pSrcPreMV - pointer to predicted motion vector; NULL indicates no + * predicted MV + * pSrcPreSAD - pointer to SAD associated with the predicted MV (referenced + * by pSrcPreMV); may be set to NULL if unavailable. + * pMESpec - vendor-specific motion estimation specification structure; + * must have been allocated and then initialized using + * omxVCM4P2_MEInit prior to calling the block matching function. + * + * Output Arguments: + * + * pDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pCurrPointPos, pDstMV, pDstSAD or + * pMESpec, or + * - pSrcCurrBuf is not 8-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Integer_8x8 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pCurrPointPos, + const OMXVCMotionVector *pSrcPreMV, + const OMX_INT *pSrcPreSAD, + void *pMESpec, + OMXVCMotionVector *pDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Half_16x16 (6.2.4.2.3) + * + * Description: + * Performs a 16x16 block match with half-pixel resolution. Returns the + * estimated motion vector and associated minimum SAD. This function + * estimates the half-pixel motion vector by interpolating the integer + * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., + * the initial integer MV is generated externally. The input parameters + * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of + * 16x16 integer search prior to calling BlockMatch_Half_16x16. The function + * BlockMatch_Integer_16x16 may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * macroblock that corresponds to the location of the current + * macroblock in the current plane. + * refWidth - width of the reference plane + * pRefRect - reference plane valid region rectangle + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 256 + * entries); must be aligned on a 16-byte boundary. The number of + * bytes between lines (step) is 16. + * pSearchPointRefPos - position of the starting point for half pixel + * search (specified in terms of integer pixel units) in the + * reference plane, i.e., the reference position pointed to by the + * predicted motion vector. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * pSrcDstMV - pointer to the initial MV estimate; typically generated + * during a prior 16X16 integer search; specified in terms of + * half-pixel units. + * + * Output Arguments: + * + * pSrcDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: pSrcRefBuf, + * pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV. + * - pSrcCurrBuf is not 16-byte aligned, or + * + */ +OMXResult omxVCM4P2_BlockMatch_Half_16x16 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_BlockMatch_Half_8x8 (6.2.4.2.4) + * + * Description: + * Performs an 8x8 block match with half-pixel resolution. Returns the + * estimated motion vector and associated minimum SAD. This function + * estimates the half-pixel motion vector by interpolating the integer + * resolution motion vector referenced by the input parameter pSrcDstMV, i.e., + * the initial integer MV is generated externally. The input parameters + * pSrcRefBuf and pSearchPointRefPos should be shifted by the winning MV of + * 8x8 integer search prior to calling BlockMatch_Half_8x8. The function + * BlockMatch_Integer_8x8 may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * block that corresponds to the location of the current 8x8 block + * in the current plane. + * refWidth - width of the reference plane + * pRefRect - reference plane valid region rectangle + * pSrcCurrBuf - pointer to the current block in the current macroblock + * buffer extracted from the original plane (linear array, 128 + * entries); must be aligned on a 8-byte boundary. The number of + * bytes between lines (step) is 16. + * pSearchPointRefPos - position of the starting point for half pixel + * search (specified in terms of integer pixel units) in the + * reference plane. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * pSrcDstMV - pointer to the initial MV estimate; typically generated + * during a prior 8x8 integer search, specified in terms of + * half-pixel units. + * + * Output Arguments: + * + * pSrcDstMV - pointer to estimated MV + * pDstSAD - pointer to minimum SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one of the following + * conditions is true: + * - at least one of the following pointers is NULL: + * pSrcRefBuf, pRefRect, pSrcCurrBuff, pSearchPointRefPos, pSrcDstMV + * - pSrcCurrBuf is not 8-byte aligned + * + */ +OMXResult omxVCM4P2_BlockMatch_Half_8x8 ( + const OMX_U8 *pSrcRefBuf, + OMX_INT refWidth, + const OMXRect *pRefRect, + const OMX_U8 *pSrcCurrBuf, + const OMXVCM4P2Coordinate *pSearchPointRefPos, + OMX_INT rndVal, + OMXVCMotionVector *pSrcDstMV, + OMX_INT *pDstSAD +); + + + +/** + * Function: omxVCM4P2_MotionEstimationMB (6.2.4.3.1) + * + * Description: + * Performs motion search for a 16x16 macroblock. Selects best motion search + * strategy from among inter-1MV, inter-4MV, and intra modes. Supports + * integer and half pixel resolution. + * + * Input Arguments: + * + * pSrcCurrBuf - pointer to the top-left corner of the current MB in the + * original picture plane; must be aligned on a 16-byte boundary. + * The function does not expect source data outside the region + * bounded by the MB to be available; for example it is not + * necessary for the caller to guarantee the availability of + * pSrcCurrBuf[-SrcCurrStep], i.e., the row of pixels above the MB + * to be processed. + * srcCurrStep - width of the original picture plane, in terms of full + * pixels; must be a multiple of 16. + * pSrcRefBuf - pointer to the reference Y plane; points to the reference + * plane location corresponding to the location of the current + * macroblock in the current plane; must be aligned on a 16-byte + * boundary. + * srcRefStep - width of the reference picture plane, in terms of full + * pixels; must be a multiple of 16. + * pRefRect - reference plane valid region rectangle, specified relative to + * the image origin + * pCurrPointPos - position of the current macroblock in the current plane + * pMESpec - pointer to the vendor-specific motion estimation specification + * structure; must be allocated and then initialized using + * omxVCM4P2_MEInit prior to calling this function. + * pMBInfo - array, of dimension four, containing pointers to information + * associated with four nearby MBs: + * - pMBInfo[0] - pointer to left MB information + * - pMBInfo[1] - pointer to top MB information + * - pMBInfo[2] - pointer to top-left MB information + * - pMBInfo[3] - pointer to top-right MB information + * Any pointer in the array may be set equal to NULL if the + * corresponding MB doesn't exist. For each MB, the following structure + * members are used: + * - mbType - macroblock type, either OMX_VC_INTRA, OMX_VC_INTER, or + * OMX_VC_INTER4V + * - pMV0[2][2] - estimated motion vectors; represented + * in 1/2 pixel units + * - sliceID - number of the slice to which the MB belongs + * pSrcDstMBCurr - pointer to information structure for the current MB. + * The following entries should be set prior to calling the + * function: sliceID - the number of the slice the to which the + * current MB belongs. The structure elements cbpy and cbpc are + * ignored. + * + * Output Arguments: + * + * pSrcDstMBCurr - pointer to updated information structure for the current + * MB after MB-level motion estimation has been completed. The + * following structure members are updated by the ME function: + * - mbType - macroblock type: OMX_VC_INTRA, OMX_VC_INTER, or + * OMX_VC_INTER4V. + * - pMV0[2][2] - estimated motion vectors; represented in + * terms of 1/2 pel units. + * - pMVPred[2][2] - predicted motion vectors; represented + * in terms of 1/2 pel units. + * The structure members cbpy and cbpc are not updated by the function. + * pDstSAD - pointer to the minimum SAD for INTER1V, or sum of minimum SADs + * for INTER4V + * pDstBlockSAD - pointer to an array of SAD values for each of the four + * 8x8 luma blocks in the MB. The block SADs are in scan order for + * each MB. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments. Returned if one or more of the + * following conditions is true: + * - at least one of the following pointers is NULL: pSrcCurrBuf, + * pSrcRefBuf, pRefRect, pCurrPointPos, pMBInter, pMBIntra, + * pSrcDstMBCurr, or pDstSAD. + * + */ +OMXResult omxVCM4P2_MotionEstimationMB ( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 srcCurrStep, + const OMX_U8 *pSrcRefBuf, + OMX_S32 srcRefStep, + const OMXRect*pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + void *pMESpec, + const OMXVCM4P2MBInfoPtr *pMBInfo, + OMXVCM4P2MBInfo *pSrcDstMBCurr, + OMX_U16 *pDstSAD, + OMX_U16 *pDstBlockSAD +); + + + +/** + * Function: omxVCM4P2_DCT8x8blk (6.2.4.4.1) + * + * Description: + * Computes a 2D forward DCT for a single 8x8 block, as defined in + * [ISO14496-2]. + * + * Input Arguments: + * + * pSrc - pointer to the start of the linearly arranged input buffer; must + * be aligned on a 16-byte boundary. Input values (pixel + * intensities) are valid in the range [-255,255]. + * + * Output Arguments: + * + * pDst - pointer to the start of the linearly arranged output buffer; must + * be aligned on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, returned if: + * - pSrc or pDst is NULL. + * - pSrc or pDst is not 16-byte aligned. + * + */ +OMXResult omxVCM4P2_DCT8x8blk ( + const OMX_S16 *pSrc, + OMX_S16 *pDst +); + + + +/** + * Function: omxVCM4P2_QuantIntra_I (6.2.4.4.2) + * + * Description: + * Performs quantization on intra block coefficients. This function supports + * bits_per_pixel == 8. + * + * Input Arguments: + * + * pSrcDst - pointer to the input intra block coefficients; must be aligned + * on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale). + * blockIndex - block index indicating the component type and position, + * valid in the range 0 to 5, as defined in [ISO14496-2], subclause + * 6.1.3.8. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pSrcDst - pointer to the output (quantized) interblock coefficients. + * When shortVideoHeader==1, AC coefficients are saturated on the + * interval [-127, 127], and DC coefficients are saturated on the + * interval [1, 254]. When shortVideoHeader==0, AC coefficients + * are saturated on the interval [-2047, 2047]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrcDst is NULL. + * - blockIndex < 0 or blockIndex >= 10 + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_QuantIntra_I ( + OMX_S16 *pSrcDst, + OMX_U8 QP, + OMX_INT blockIndex, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInter_I (6.2.4.4.3) + * + * Description: + * Performs quantization on an inter coefficient block; supports + * bits_per_pixel == 8. + * + * Input Arguments: + * + * pSrcDst - pointer to the input inter block coefficients; must be aligned + * on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * shortVideoHeader - binary flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, and + * shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pSrcDst - pointer to the output (quantized) interblock coefficients. + * When shortVideoHeader==1, AC coefficients are saturated on the + * interval [-127, 127], and DC coefficients are saturated on the + * interval [1, 254]. When shortVideoHeader==0, AC coefficients + * are saturated on the interval [-2047, 2047]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - pSrcDst is NULL. + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_QuantInter_I ( + OMX_S16 *pSrcDst, + OMX_U8 QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_TransRecBlockCoef_intra (6.2.4.4.4) + * + * Description: + * Quantizes the DCT coefficients, implements intra block AC/DC coefficient + * prediction, and reconstructs the current intra block texture for prediction + * on the next frame. Quantized row and column coefficients are returned in + * the updated coefficient buffers. + * + * Input Arguments: + * + * pSrc - pointer to the pixels of current intra block; must be aligned on + * an 8-byte boundary. + * pPredBufRow - pointer to the coefficient row buffer containing + * ((num_mb_per_row * 2 + 1) * 8) elements of type OMX_S16. + * Coefficients are organized into blocks of eight as described + * below (Internal Prediction Coefficient Update Procedures). The + * DC coefficient is first, and the remaining buffer locations + * contain the quantized AC coefficients. Each group of eight row + * buffer elements combined with one element eight elements ahead + * contains the coefficient predictors of the neighboring block + * that is spatially above or to the left of the block currently to + * be decoded. A negative-valued DC coefficient indicates that this + * neighboring block is not INTRA-coded or out of bounds, and + * therefore the AC and DC coefficients are invalid. Pointer must + * be aligned on an 8-byte boundary. + * pPredBufCol - pointer to the prediction coefficient column buffer + * containing 16 elements of type OMX_S16. Coefficients are + * organized as described in section 6.2.2.5. Pointer must be + * aligned on an 8-byte boundary. + * pSumErr - pointer to a flag indicating whether or not AC prediction is + * required; AC prediction is enabled if *pSumErr >=0, but the + * value is not used for coefficient prediction, i.e., the sum of + * absolute differences starts from 0 for each call to this + * function. Otherwise AC prediction is disabled if *pSumErr < 0 . + * blockIndex - block index indicating the component type and position, as + * defined in [ISO14496-2], subclause 6.1.3.8. + * curQp - quantization parameter of the macroblock to which the current + * block belongs + * pQpBuf - pointer to a 2-element quantization parameter buffer; pQpBuf[0] + * contains the quantization parameter associated with the 8x8 + * block left of the current block (QPa), and pQpBuf[1] contains + * the quantization parameter associated with the 8x8 block above + * the current block (QPc). In the event that the corresponding + * block is outside of the VOP bound, the Qp value will not affect + * the intra prediction process, as described in [ISO14496-2], + * sub-clause 7.4.3.3, Adaptive AC Coefficient Prediction. + * srcStep - width of the source buffer; must be a multiple of 8. + * dstStep - width of the reconstructed destination buffer; must be a + * multiple of 16. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pDst - pointer to the quantized DCT coefficient buffer; pDst[0] contains + * the predicted DC coefficient; the remaining entries contain the + * quantized AC coefficients (without prediction). The pointer + * pDstmust be aligned on a 16-byte boundary. + * pRec - pointer to the reconstructed texture; must be aligned on an + * 8-byte boundary. + * pPredBufRow - pointer to the updated coefficient row buffer + * pPredBufCol - pointer to the updated coefficient column buffer + * pPreACPredict - if prediction is enabled, the parameter points to the + * start of the buffer containing the coefficient differences for + * VLC encoding. The entry pPreACPredict[0]indicates prediction + * direction for the current block and takes one of the following + * values: OMX_VC_NONE (prediction disabled), OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. The entries + * pPreACPredict[1]-pPreACPredict[7]contain predicted AC + * coefficients. If prediction is disabled (*pSumErr<0) then the + * contents of this buffer are undefined upon return from the + * function + * pSumErr - pointer to the value of the accumulated AC coefficient errors, + * i.e., sum of the absolute differences between predicted and + * unpredicted AC coefficients + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: pSrc, pDst, pRec, + * pCoefBufRow, pCoefBufCol, pQpBuf, pPreACPredict, pSumErr. + * - blockIndex < 0 or blockIndex >= 10; + * - curQP <= 0 or curQP >= 32. + * - srcStep, or dstStep <= 0 or not a multiple of 8. + * - pDst is not 16-byte aligned: . + * - At least one of the following pointers is not 8-byte aligned: + * pSrc, pRec. + * + * Note: The coefficient buffers must be updated in accordance with the + * update procedures defined in section in 6.2.2. + * + */ +OMXResult omxVCM4P2_TransRecBlockCoef_intra ( + const OMX_U8 *pSrc, + OMX_S16 *pDst, + OMX_U8 *pRec, + OMX_S16 *pPredBufRow, + OMX_S16 *pPredBufCol, + OMX_S16 *pPreACPredict, + OMX_INT *pSumErr, + OMX_INT blockIndex, + OMX_U8 curQp, + const OMX_U8 *pQpBuf, + OMX_INT srcStep, + OMX_INT dstStep, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_TransRecBlockCoef_inter (6.2.4.4.5) + * + * Description: + * Implements DCT, and quantizes the DCT coefficients of the inter block + * while reconstructing the texture residual. There is no boundary check for + * the bit stream buffer. + * + * Input Arguments: + * + * pSrc -pointer to the residuals to be encoded; must be aligned on an + * 16-byte boundary. + * QP - quantization parameter. + * shortVideoHeader - binary flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, and + * shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * pDst - pointer to the quantized DCT coefficients buffer; must be aligned + * on a 16-byte boundary. + * pRec - pointer to the reconstructed texture residuals; must be aligned + * on a 16-byte boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is either NULL or + * not 16-byte aligned: + * - pSrc + * - pDst + * - pRec + * - QP <= 0 or QP >= 32. + * + */ +OMXResult omxVCM4P2_TransRecBlockCoef_inter ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_S16 *pRec, + OMX_U8 QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_IntraDCVLC (6.2.4.5.2) + * + * Description: + * Performs zigzag scan and VLC encoding of AC and DC coefficients for one + * intra block. Two versions of the function (DCVLC and ACVLC) are provided + * in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, "Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding". + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7. + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * predDir - AC prediction direction, which is used to decide the zigzag + * scan pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used. + * Performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction. + * Performs alternate-vertical zigzag scan. + * - OMX_VC_VERTICAL - Vertical prediction. + * Performs alternate-horizontal zigzag scan. + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance, chrominance) of the current + * block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded, so + * that it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pQDctBlkCoef. + * - *pBitOffset < 0, or *pBitOffset >7. + * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. + * - VideoComp is not one component of enum OMXVCM4P2VideoComponent. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_IntraDCVLC ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_IntraACVLC (6.2.4.5.2) + * + * Description: + * Performs zigzag scan and VLC encoding of AC and DC coefficients for one + * intra block. Two versions of the function (DCVLC and ACVLC) are provided + * in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7. + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * predDir - AC prediction direction, which is used to decide the zigzag + * scan pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used. + * Performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction. + * Performs alternate-vertical zigzag scan. + * - OMX_VC_VERTICAL - Vertical prediction. + * Performs alternate-horizontal zigzag scan. + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded, so + * that it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments: + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pQDctBlkCoef. + * - *pBitOffset < 0, or *pBitOffset >7. + * - PredDir is not one of: OMX_VC_NONE, OMX_VC_HORIZONTAL, or + * OMX_VC_VERTICAL. + * - VideoComp is not one component of enum OMXVCM4P2VideoComponent. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_IntraACVLC ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 predDir, + OMX_U8 pattern, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeVLCZigzag_Inter (6.2.4.5.3) + * + * Description: + * Performs classical zigzag scanning and VLC encoding for one inter block. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream + * pBitOffset - pointer to the bit position in the byte pointed by + * *ppBitStream. Valid within 0 to 7 + * pQDctBlkCoef - pointer to the quantized DCT coefficient + * pattern - block pattern which is used to decide whether this block is + * encoded + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is encoded so that + * it points to the current byte in the bit stream buffer. + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - Bad arguments + * - At least one of the pointers: is NULL: ppBitStream, *ppBitStream, + * pBitOffset, pQDctBlkCoef + * - *pBitOffset < 0, or *pBitOffset >7. + * + */ +OMXResult omxVCM4P2_EncodeVLCZigzag_Inter ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMX_S16 *pQDctBlkCoef, + OMX_U8 pattern, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_EncodeMV (6.2.4.5.4) + * + * Description: + * Predicts a motion vector for the current macroblock, encodes the + * difference, and writes the output to the stream buffer. The input MVs + * pMVCurMB, pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB should lie + * within the ranges associated with the input parameter fcodeForward, as + * described in [ISO14496-2], subclause 7.6.3. This function provides a + * superset of the functionality associated with the function + * omxVCM4P2_FindMVpred. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the bitstream buffer + * pBitOffset - index of the first free (next available) bit in the stream + * buffer referenced by *ppBitStream, valid in the range 0 to 7. + * pMVCurMB - pointer to the current macroblock motion vector; a value of + * NULL indicates unavailability. + * pSrcMVLeftMB - pointer to the source left macroblock motion vector; a + * value of NULLindicates unavailability. + * pSrcMVUpperMB - pointer to source upper macroblock motion vector; a + * value of NULL indicates unavailability. + * pSrcMVUpperRightMB - pointer to source upper right MB motion vector; a + * value of NULL indicates unavailability. + * fcodeForward - an integer with values from 1 to 7; used in encoding + * motion vectors related to search range, as described in + * [ISO14496-2], subclause 7.6.3. + * MBType - macro block type, valid in the range 0 to 5 + * + * Output Arguments: + * + * ppBitStream - updated pointer to the current byte in the bit stream + * buffer + * pBitOffset - updated index of the next available bit position in stream + * buffer referenced by *ppBitStream + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is NULL: ppBitStream, + * *ppBitStream, pBitOffset, pMVCurMB + * - *pBitOffset < 0, or *pBitOffset >7. + * - fcodeForward <= 0, or fcodeForward > 7, or MBType < 0. + * + */ +OMXResult omxVCM4P2_EncodeMV ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + const OMXVCMotionVector *pMVCurMB, + const OMXVCMotionVector*pSrcMVLeftMB, + const OMXVCMotionVector *pSrcMVUpperMB, + const OMXVCMotionVector *pSrcMVUpperRightMB, + OMX_INT fcodeForward, + OMXVCM4P2MacroblockType MBType +); + + + +/** + * Function: omxVCM4P2_DecodePadMV_PVOP (6.2.5.1.1) + * + * Description: + * Decodes and pads the four motion vectors associated with a non-intra P-VOP + * macroblock. For macroblocks of type OMX_VC_INTER4V, the output MV is + * padded as specified in [ISO14496-2], subclause 7.6.1.6. Otherwise, for + * macroblocks of types other than OMX_VC_INTER4V, the decoded MV is copied to + * all four output MV buffer entries. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7]. + * pSrcMVLeftMB, pSrcMVUpperMB, and pSrcMVUpperRightMB - pointers to the + * motion vector buffers of the macroblocks specially at the left, + * upper, and upper-right side of the current macroblock, + * respectively; a value of NULL indicates unavailability. Note: + * Any neighborhood macroblock outside the current VOP or video + * packet or outside the current GOB (when short_video_header is + * 1 ) for which gob_header_empty is 0 is treated as + * transparent, according to [ISO14496-2], subclause 7.6.5. + * fcodeForward - a code equal to vop_fcode_forward in MPEG-4 bit stream + * syntax + * MBType - the type of the current macroblock. If MBType is not equal to + * OMX_VC_INTER4V, the destination motion vector buffer is still + * filled with the same decoded vector. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDstMVCurMB - pointer to the motion vector buffer for the current + * macroblock; contains four decoded motion vectors + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDstMVCurMB + * - *pBitOffset exceeds [0,7] + * - fcodeForward exceeds (0,7] + * - MBType less than zero + * - motion vector buffer is not 4-byte aligned. + * OMX_Sts_Err - status error + * + */ +OMXResult omxVCM4P2_DecodePadMV_PVOP ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMXVCMotionVector *pSrcMVLeftMB, + OMXVCMotionVector*pSrcMVUpperMB, + OMXVCMotionVector *pSrcMVUpperRightMB, + OMXVCMotionVector*pDstMVCurMB, + OMX_INT fcodeForward, + OMXVCM4P2MacroblockType MBType +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC (6.2.5.2.2) + * + * Description: + * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients + * for one intra block. Two versions of the function (DCVLC and ACVLC) are + * provided in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the + * bitstream buffer + * pBitOffset - pointer to the bit position in the current byte referenced + * by *ppBitStream. The parameter *pBitOffset is valid in the + * range [0-7]. + * Bit Position in one byte: |Most Least| + * *pBitOffset |0 1 2 3 4 5 6 7| + * predDir - AC prediction direction; used to select the zigzag scan + * pattern; takes one of the following values: + * - OMX_VC_NONE - AC prediction not used; + * performs classical zigzag scan. + * - OMX_VC_HORIZONTAL - Horizontal prediction; + * performs alternate-vertical zigzag scan; + * - OMX_VC_VERTICAL - Vertical prediction; + * performs alternate-horizontal zigzag scan. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated such that it points to the current + * bit position in the byte pointed by *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDst + * - *pBitOffset exceeds [0,7] + * - preDir exceeds [0,2] + * - pDst is not 4-byte aligned + * OMX_Sts_Err - if: + * - In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 + * - At least one of mark bits equals zero + * - Illegal stream encountered; code cannot be located in VLC table + * - Forbidden code encountered in the VLC FLC table. + * - The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_IntraDCVLC ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_IntraACVLC (6.2.5.2.2) + * + * Description: + * Performs VLC decoding and inverse zigzag scan of AC and DC coefficients + * for one intra block. Two versions of the function (DCVLC and ACVLC) are + * provided in order to support the two different methods of processing DC + * coefficients, as described in [ISO14496-2], subclause 7.4.1.4, Intra DC + * Coefficient Decoding for the Case of Switched VLC Encoding. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the + * bitstream buffer + * pBitOffset - pointer to the bit position in the current byte referenced + * by *ppBitStream. The parameter *pBitOffset is valid in the + * range [0-7]. Bit Position in one byte: |Most Least| *pBitOffset + * |0 1 2 3 4 5 6 7| + * predDir - AC prediction direction; used to select the zigzag scan + * pattern; takes one of the following values: OMX_VC_NONE - AC + * prediction not used; performs classical zigzag scan. + * OMX_VC_HORIZONTAL - Horizontal prediction; performs + * alternate-vertical zigzag scan; OMX_VC_VERTICAL - Vertical + * prediction; performs alternate-horizontal zigzag scan. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated such that it points to the current + * bit position in the byte pointed by *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments At least one of the following + * pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, + * or At least one of the following conditions is true: + * *pBitOffset exceeds [0,7], preDir exceeds [0,2], or pDst is + * not 4-byte aligned + * OMX_Sts_Err In DecodeVLCZigzag_IntraDCVLC, dc_size > 12 At least one of + * mark bits equals zero Illegal stream encountered; code cannot + * be located in VLC table Forbidden code encountered in the VLC + * FLC table The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_IntraACVLC ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_U8 predDir, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_DecodeVLCZigzag_Inter (6.2.5.2.3) + * + * Description: + * Performs VLC decoding and inverse zigzag scan for one inter-coded block. + * + * Input Arguments: + * + * ppBitStream - double pointer to the current byte in the stream buffer + * pBitOffset - pointer to the next available bit in the current stream + * byte referenced by *ppBitStream. The parameter *pBitOffset is + * valid within the range [0-7]. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; escape modes 0-3 are used if + * shortVideoHeader==0, and escape mode 4 is used when + * shortVideoHeader==1. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded such + * that it points to the current byte in the stream buffer + * pBitOffset - *pBitOffset is updated after decoding such that it points + * to the next available bit in the stream byte referenced by + * *ppBitStream + * pDst - pointer to the coefficient buffer of current block; must be + * 4-byte aligned. + * + * Return Value: + * + * OMX_Sts_BadArgErr - bad arguments: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pDst + * - pDst is not 4-byte aligned + * - *pBitOffset exceeds [0,7] + * OMX_Sts_Err - status error, if: + * - At least one mark bit is equal to zero + * - Encountered an illegal stream code that cannot be found in the VLC table + * - Encountered an illegal code in the VLC FLC table + * - The number of coefficients is greater than 64 + * + */ +OMXResult omxVCM4P2_DecodeVLCZigzag_Inter ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInvIntra_I (6.2.5.3.2) + * + * Description: + * Performs the second inverse quantization mode on an intra/inter coded + * block. Supports bits_per_pixel = 8. The output coefficients are clipped to + * the range [-2048, 2047]. + * + * Input Arguments: + * + * pSrcDst - pointer to the input (quantized) intra/inter block; must be + * aligned on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * videoComp - video component type of the current block. Takes one of the + * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra + * version only). + * shortVideoHeader - binary flag indicating presence of short_video_header + * (intra version only). + * + * Output Arguments: + * + * pSrcDst - pointer to the output (dequantized) intra/inter block + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; one or more of the following is + * true: + * - pSrcDst is NULL + * - QP <= 0 or QP >=31 + * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. + * + */ +OMXResult omxVCM4P2_QuantInvIntra_I ( + OMX_S16 *pSrcDst, + OMX_INT QP, + OMXVCM4P2VideoComponent videoComp, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_QuantInvInter_I (6.2.5.3.2) + * + * Description: + * Performs the second inverse quantization mode on an intra/inter coded + * block. Supports bits_per_pixel = 8. The output coefficients are clipped to + * the range [-2048, 2047]. + * + * Input Arguments: + * + * pSrcDst - pointer to the input (quantized) intra/inter block; must be + * aligned on a 16-byte boundary. + * QP - quantization parameter (quantizer_scale) + * videoComp - video component type of the current block. Takes one of the + * following flags: OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE (intra + * version only). + * shortVideoHeader - binary flag indicating presence of short_video_header + * (intra version only). + * + * Output Arguments: + * + * pSrcDst - pointer to the output (dequantized) intra/inter block + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; one or more of the following is + * true: + * - pSrcDst is NULL + * - QP <= 0 or QP >=31 + * - videoComp is neither OMX_VC_LUMINANCE nor OMX_VC_CHROMINANCE. + * + */ +OMXResult omxVCM4P2_QuantInvInter_I ( + OMX_S16 *pSrcDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Intra (6.2.5.4.1) + * + * Description: + * Decodes the INTRA block coefficients. Inverse quantization, inversely + * zigzag positioning, and IDCT, with appropriate clipping on each step, are + * performed on the coefficients. The results are then placed in the output + * frame/plane on a pixel basis. Note: This function will be used only when + * at least one non-zero AC coefficient of current block exists in the bit + * stream. The DC only condition will be handled in another function. + * + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer. There is no boundary check for the bit stream + * buffer. + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7]. + * step - width of the destination plane + * pCoefBufRow - pointer to the coefficient row buffer; must be aligned on + * an 8-byte boundary. + * pCoefBufCol - pointer to the coefficient column buffer; must be aligned + * on an 8-byte boundary. + * curQP - quantization parameter of the macroblock which the current block + * belongs to + * pQPBuf - pointer to the quantization parameter buffer + * blockIndex - block index indicating the component type and position as + * defined in [ISO14496-2], subclause 6.1.3.8, Figure 6-5. + * intraDCVLC - a code determined by intra_dc_vlc_thr and QP. This allows a + * mechanism to switch between two VLC for coding of Intra DC + * coefficients as per [ISO14496-2], Table 6-21. + * ACPredFlag - a flag equal to ac_pred_flag (of luminance) indicating if + * the ac coefficients of the first row or first column are + * differentially coded for intra coded macroblock. + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDst - pointer to the block in the destination plane; must be aligned on + * an 8-byte boundary. + * pCoefBufRow - pointer to the updated coefficient row buffer. + * pCoefBufCol - pointer to the updated coefficient column buffer Note: + * The coefficient buffers must be updated in accordance with the + * update procedure defined in section 6.2.2. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is NULL: + * ppBitStream, *ppBitStream, pBitOffset, pCoefBufRow, pCoefBufCol, + * pQPBuf, pDst. + * - *pBitOffset exceeds [0,7] + * - curQP exceeds (1, 31) + * - blockIndex exceeds [0,5] + * - step is not the multiple of 8 + * - a pointer alignment requirement was violated. + * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Intra. + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Intra ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_U8 *pDst, + OMX_INT step, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_U8 curQP, + const OMX_U8 *pQPBuf, + OMX_INT blockIndex, + OMX_INT intraDCVLC, + OMX_INT ACPredFlag, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Inter (6.2.5.4.2) + * + * Description: + * Decodes the INTER block coefficients. This function performs inverse + * quantization, inverse zigzag positioning, and IDCT (with appropriate + * clipping on each step) on the coefficients. The results (residuals) are + * placed in a contiguous array of 64 elements. For INTER block, the output + * buffer holds the residuals for further reconstruction. + * + * Input Arguments: + * + * ppBitStream - pointer to the pointer to the current byte in the bit + * stream buffer. There is no boundary check for the bit stream + * buffer. + * pBitOffset - pointer to the bit position in the byte pointed to by + * *ppBitStream. *pBitOffset is valid within [0-7] + * QP - quantization parameter + * shortVideoHeader - binary flag indicating presence of + * short_video_header; shortVideoHeader==1 selects linear intra DC + * mode, and shortVideoHeader==0 selects non linear intra DC mode. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after the block is decoded, so + * that it points to the current byte in the bit stream buffer + * pBitOffset - *pBitOffset is updated so that it points to the current bit + * position in the byte pointed by *ppBitStream + * pDst - pointer to the decoded residual buffer (a contiguous array of 64 + * elements of OMX_S16 data type); must be aligned on a 16-byte + * boundary. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the following pointers is Null: + * ppBitStream, *ppBitStream, pBitOffset , pDst + * - *pBitOffset exceeds [0,7] + * - QP <= 0. + * - pDst is not 16-byte aligned + * OMX_Sts_Err - status error. Refer to OMX_Sts_Err of DecodeVLCZigzag_Inter . + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Inter ( + const OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + OMX_S16 *pDst, + OMX_INT QP, + OMX_INT shortVideoHeader +); + + + +/** + * Function: omxVCM4P2_PredictReconCoefIntra (6.2.5.4.3) + * + * Description: + * Performs adaptive DC/AC coefficient prediction for an intra block. Prior + * to the function call, prediction direction (predDir) should be selected as + * specified in [ISO14496-2], subclause 7.4.3.1. + * + * Input Arguments: + * + * pSrcDst - pointer to the coefficient buffer which contains the quantized + * coefficient residuals (PQF) of the current block; must be + * aligned on a 4-byte boundary. The output coefficients are + * saturated to the range [-2048, 2047]. + * pPredBufRow - pointer to the coefficient row buffer; must be aligned on + * a 4-byte boundary. + * pPredBufCol - pointer to the coefficient column buffer; must be aligned + * on a 4-byte boundary. + * curQP - quantization parameter of the current block. curQP may equal to + * predQP especially when the current block and the predictor block + * are in the same macroblock. + * predQP - quantization parameter of the predictor block + * predDir - indicates the prediction direction which takes one of the + * following values: OMX_VC_HORIZONTAL - predict horizontally + * OMX_VC_VERTICAL - predict vertically + * ACPredFlag - a flag indicating if AC prediction should be performed. It + * is equal to ac_pred_flag in the bit stream syntax of MPEG-4 + * videoComp - video component type (luminance or chrominance) of the + * current block + * + * Output Arguments: + * + * pSrcDst - pointer to the coefficient buffer which contains the quantized + * coefficients (QF) of the current block + * pPredBufRow - pointer to the updated coefficient row buffer + * pPredBufCol - pointer to the updated coefficient column buffer Note: + * Buffer update: Update the AC prediction buffer (both row and + * column buffer). + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments, if: + * - At least one of the pointers is NULL: + * pSrcDst, pPredBufRow, or pPredBufCol. + * - curQP <= 0, + * - predQP <= 0, + * - curQP >31, + * - predQP > 31, + * - preDir exceeds [1,2] + * - pSrcDst, pPredBufRow, or pPredBufCol is not 4-byte aligned. + * + */ +OMXResult omxVCM4P2_PredictReconCoefIntra ( + OMX_S16 *pSrcDst, + OMX_S16 *pPredBufRow, + OMX_S16 *pPredBufCol, + OMX_INT curQP, + OMX_INT predQP, + OMX_INT predDir, + OMX_INT ACPredFlag, + OMXVCM4P2VideoComponent videoComp +); + + + +/** + * Function: omxVCM4P2_MCReconBlock (6.2.5.5.1) + * + * Description: + * Performs motion compensation prediction for an 8x8 block using + * interpolation described in [ISO14496-2], subclause 7.6.2. + * + * Input Arguments: + * + * pSrc - pointer to the block in the reference plane. + * srcStep - distance between the start of consecutive lines in the + * reference plane, in bytes; must be a multiple of 8. + * dstStep - distance between the start of consecutive lines in the + * destination plane, in bytes; must be a multiple of 8. + * pSrcResidue - pointer to a buffer containing the 16-bit prediction + * residuals; must be 16-byte aligned. If the pointer is NULL, then + * no prediction is done, only motion compensation, i.e., the block + * is moved with interpolation. + * predictType - bilinear interpolation type, as defined in section + * 6.2.1.2. + * rndVal - rounding control parameter: 0 - disabled; 1 - enabled. + * + * Output Arguments: + * + * pDst - pointer to the destination buffer; must be 8-byte aligned. If + * prediction residuals are added then output intensities are + * clipped to the range [0,255]. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned under any of the following + * conditions: + * - pDst is not 8-byte aligned. + * - pSrcResidue is not 16-byte aligned. + * - one or more of the following pointers is NULL: pSrc or pDst. + * - either srcStep or dstStep is not a multiple of 8. + * - invalid type specified for the parameter predictType. + * - the parameter rndVal is not equal either to 0 or 1. + * + */ +OMXResult omxVCM4P2_MCReconBlock ( + const OMX_U8 *pSrc, + OMX_INT srcStep, + const OMX_S16 *pSrcResidue, + OMX_U8 *pDst, + OMX_INT dstStep, + OMX_INT predictType, + OMX_INT rndVal +); + + + +/* 6.3.1.1 Intra 16x16 Prediction Modes */ +/* A data type that enumerates intra_16x16 macroblock prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_16X16_VERT = 0, /** Intra_16x16_Vertical */ + OMX_VC_16X16_HOR = 1, /** Intra_16x16_Horizontal */ + OMX_VC_16X16_DC = 2, /** Intra_16x16_DC */ + OMX_VC_16X16_PLANE = 3 /** Intra_16x16_Plane */ +} OMXVCM4P10Intra16x16PredMode; + + + +/* 6.3.1.2 Intra 4x4 Prediction Modes */ +/* A data type that enumerates intra_4x4 macroblock prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_4X4_VERT = 0, /** Intra_4x4_Vertical */ + OMX_VC_4X4_HOR = 1, /** Intra_4x4_Horizontal */ + OMX_VC_4X4_DC = 2, /** Intra_4x4_DC */ + OMX_VC_4X4_DIAG_DL = 3, /** Intra_4x4_Diagonal_Down_Left */ + OMX_VC_4X4_DIAG_DR = 4, /** Intra_4x4_Diagonal_Down_Right */ + OMX_VC_4X4_VR = 5, /** Intra_4x4_Vertical_Right */ + OMX_VC_4X4_HD = 6, /** Intra_4x4_Horizontal_Down */ + OMX_VC_4X4_VL = 7, /** Intra_4x4_Vertical_Left */ + OMX_VC_4X4_HU = 8 /** Intra_4x4_Horizontal_Up */ +} OMXVCM4P10Intra4x4PredMode; + + + +/* 6.3.1.3 Chroma Prediction Modes */ +/* A data type that enumerates intra chroma prediction modes is defined as follows: */ + +typedef enum { + OMX_VC_CHROMA_DC = 0, /** Intra_Chroma_DC */ + OMX_VC_CHROMA_HOR = 1, /** Intra_Chroma_Horizontal */ + OMX_VC_CHROMA_VERT = 2, /** Intra_Chroma_Vertical */ + OMX_VC_CHROMA_PLANE = 3 /** Intra_Chroma_Plane */ +} OMXVCM4P10IntraChromaPredMode; + + + +/* 6.3.1.4 Motion Estimation Modes */ +/* A data type that enumerates H.264 motion estimation modes is defined as follows: */ + +typedef enum { + OMX_VC_M4P10_FAST_SEARCH = 0, /** Fast motion search */ + OMX_VC_M4P10_FULL_SEARCH = 1 /** Full motion search */ +} OMXVCM4P10MEMode; + + + +/* 6.3.1.5 Macroblock Types */ +/* A data type that enumerates H.264 macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_P_16x16 = 0, /* defined by [ISO14496-10] */ + OMX_VC_P_16x8 = 1, + OMX_VC_P_8x16 = 2, + OMX_VC_P_8x8 = 3, + OMX_VC_PREF0_8x8 = 4, + OMX_VC_INTER_SKIP = 5, + OMX_VC_INTRA_4x4 = 8, + OMX_VC_INTRA_16x16 = 9, + OMX_VC_INTRA_PCM = 10 +} OMXVCM4P10MacroblockType; + + + +/* 6.3.1.6 Sub-Macroblock Types */ +/* A data type that enumerates H.264 sub-macroblock types is defined as follows: */ + +typedef enum { + OMX_VC_SUB_P_8x8 = 0, /* defined by [ISO14496-10] */ + OMX_VC_SUB_P_8x4 = 1, + OMX_VC_SUB_P_4x8 = 2, + OMX_VC_SUB_P_4x4 = 3 +} OMXVCM4P10SubMacroblockType; + + + +/* 6.3.1.7 Variable Length Coding (VLC) Information */ + +typedef struct { + OMX_U8 uTrailing_Ones; /* Trailing ones; 3 at most */ + OMX_U8 uTrailing_One_Signs; /* Trailing ones signal */ + OMX_U8 uNumCoeffs; /* Total number of non-zero coefs, including trailing ones */ + OMX_U8 uTotalZeros; /* Total number of zero coefs */ + OMX_S16 iLevels[16]; /* Levels of non-zero coefs, in reverse zig-zag order */ + OMX_U8 uRuns[16]; /* Runs for levels and trailing ones, in reverse zig-zag order */ +} OMXVCM4P10VLCInfo; + + + +/* 6.3.1.8 Macroblock Information */ + +typedef struct { + OMX_S32 sliceId; /* slice number */ + OMXVCM4P10MacroblockType mbType; /* MB type */ + OMXVCM4P10SubMacroblockType subMBType[4]; /* sub-block type */ + OMX_S32 qpy; /* qp for luma */ + OMX_S32 qpc; /* qp for chroma */ + OMX_U32 cbpy; /* CBP Luma */ + OMX_U32 cbpc; /* CBP Chroma */ + OMXVCMotionVector pMV0[4][4]; /* motion vector, represented using 1/4-pel units, pMV0[blocky][blockx] (blocky = 0~3, blockx =0~3) */ + OMXVCMotionVector pMVPred[4][4]; /* motion vector prediction, Represented using 1/4-pel units, pMVPred[blocky][blockx] (blocky = 0~3, blockx = 0~3) */ + OMX_U8 pRefL0Idx[4]; /* reference picture indices */ + OMXVCM4P10Intra16x16PredMode Intra16x16PredMode; /* best intra 16x16 prediction mode */ + OMXVCM4P10Intra4x4PredMode pIntra4x4PredMode[16]; /* best intra 4x4 prediction mode for each block, pMV0 indexed as above */ +} OMXVCM4P10MBInfo, *OMXVCM4P10MBInfoPtr; + + + +/* 6.3.1.9 Motion Estimation Parameters */ + +typedef struct { + OMX_S32 blockSplitEnable8x8; /* enables 16x8, 8x16, 8x8 */ + OMX_S32 blockSplitEnable4x4; /* enable splitting of 8x4, 4x8, 4x4 blocks */ + OMX_S32 halfSearchEnable; + OMX_S32 quarterSearchEnable; + OMX_S32 intraEnable4x4; /* 1=enable, 0=disable */ + OMX_S32 searchRange16x16; /* integer pixel units */ + OMX_S32 searchRange8x8; + OMX_S32 searchRange4x4; +} OMXVCM4P10MEParams; + + + +/** + * Function: omxVCM4P10_PredictIntra_4x4 (6.3.3.1.1) + * + * Description: + * Perform Intra_4x4 prediction for luma samples. If the upper-right block is + * not available, then duplication work should be handled inside the function. + * Users need not define them outside. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 4 left pixels: + * p[x, y] (x = -1, y = 0..3) + * pSrcAbove - Pointer to the buffer of 8 above pixels: + * p[x,y] (x = 0..7, y =-1); + * must be aligned on a 4-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 4. + * dstStep - Step of the destination buffer; must be a multiple of 4. + * predMode - Intra_4x4 prediction mode. + * availability - Neighboring 4x4 block availability flag, refer to + * "Neighboring Macroblock Availability" . + * + * Output Arguments: + * + * pDst - Pointer to the destination buffer; must be aligned on a 4-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 4, or dstStep is not a multiple of 4. + * leftStep is not a multiple of 4. + * predMode is not in the valid range of enumeration + * OMXVCM4P10Intra4x4PredMode. + * predMode is OMX_VC_4x4_VERT, but availability doesn't set OMX_VC_UPPER + * indicating p[x,-1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..3) is not available. + * predMode is OMX_VC_4x4_DIAG_DL, but availability doesn't set + * OMX_VC_UPPER indicating p[x, -1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_DIAG_DR, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_VR, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_HD, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1] (x = 0..3), or p[-1,y] (y = 0..3) or p[-1,-1] is not + * available. + * predMode is OMX_VC_4x4_VL, but availability doesn't set OMX_VC_UPPER + * indicating p[x,-1] (x = 0..3) is not available. + * predMode is OMX_VC_4x4_HU, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..3) is not available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 4-byte boundary. + * + * Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if + * they are not used by intra prediction as implied in predMode. + * + */ +OMXResult omxVCM4P10_PredictIntra_4x4 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10Intra4x4PredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_PredictIntra_16x16 (6.3.3.1.2) + * + * Description: + * Perform Intra_16x16 prediction for luma samples. If the upper-right block + * is not available, then duplication work should be handled inside the + * function. Users need not define them outside. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 16 left pixels: p[x, y] (x = -1, y = + * 0..15) + * pSrcAbove - Pointer to the buffer of 16 above pixels: p[x,y] (x = 0..15, + * y= -1); must be aligned on a 16-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 16. + * dstStep - Step of the destination buffer; must be a multiple of 16. + * predMode - Intra_16x16 prediction mode, please refer to section 3.4.1. + * availability - Neighboring 16x16 MB availability flag. Refer to + * section 3.4.4. + * + * Output Arguments: + * + * pDst -Pointer to the destination buffer; must be aligned on a 16-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 16. or dstStep is not a multiple of 16. + * leftStep is not a multiple of 16. + * predMode is not in the valid range of enumeration + * OMXVCM4P10Intra16x16PredMode + * predMode is OMX_VC_16X16_VERT, but availability doesn't set + * OMX_VC_UPPER indicating p[x,-1] (x = 0..15) is not available. + * predMode is OMX_VC_16X16_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..15) is not available. + * predMode is OMX_VC_16X16_PLANE, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1](x = 0..15), or p[-1,y] (y = 0..15), or p[-1,-1] is not + * available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 16-byte boundary. + * + * Note: + * pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointers if + * they are not used by intra prediction implied in predMode. + * Note: + * OMX_VC_UPPER_RIGHT is not used in intra_16x16 luma prediction. + * + */ +OMXResult omxVCM4P10_PredictIntra_16x16 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10Intra16x16PredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_PredictIntraChroma_8x8 (6.3.3.1.3) + * + * Description: + * Performs intra prediction for chroma samples. + * + * Input Arguments: + * + * pSrcLeft - Pointer to the buffer of 8 left pixels: p[x, y] (x = -1, y= + * 0..7). + * pSrcAbove - Pointer to the buffer of 8 above pixels: p[x,y] (x = 0..7, y + * = -1); must be aligned on an 8-byte boundary. + * pSrcAboveLeft - Pointer to the above left pixels: p[x,y] (x = -1, y = -1) + * leftStep - Step of left pixel buffer; must be a multiple of 8. + * dstStep - Step of the destination buffer; must be a multiple of 8. + * predMode - Intra chroma prediction mode, please refer to section 3.4.3. + * availability - Neighboring chroma block availability flag, please refer + * to "Neighboring Macroblock Availability". + * + * Output Arguments: + * + * pDst - Pointer to the destination buffer; must be aligned on an 8-byte + * boundary. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If any of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pDst is NULL. + * dstStep < 8 or dstStep is not a multiple of 8. + * leftStep is not a multiple of 8. + * predMode is not in the valid range of enumeration + * OMXVCM4P10IntraChromaPredMode. + * predMode is OMX_VC_CHROMA_VERT, but availability doesn't set + * OMX_VC_UPPER indicating p[x,-1] (x = 0..7) is not available. + * predMode is OMX_VC_CHROMA_HOR, but availability doesn't set OMX_VC_LEFT + * indicating p[-1,y] (y = 0..7) is not available. + * predMode is OMX_VC_CHROMA_PLANE, but availability doesn't set + * OMX_VC_UPPER_LEFT or OMX_VC_UPPER or OMX_VC_LEFT indicating + * p[x,-1](x = 0..7), or p[-1,y] (y = 0..7), or p[-1,-1] is not + * available. + * availability sets OMX_VC_UPPER, but pSrcAbove is NULL. + * availability sets OMX_VC_LEFT, but pSrcLeft is NULL. + * availability sets OMX_VC_UPPER_LEFT, but pSrcAboveLeft is NULL. + * either pSrcAbove or pDst is not aligned on a 8-byte boundary. + * + * Note: pSrcAbove, pSrcAbove, pSrcAboveLeft may be invalid pointer if + * they are not used by intra prediction implied in predMode. + * + * Note: OMX_VC_UPPER_RIGHT is not used in intra chroma prediction. + * + */ +OMXResult omxVCM4P10_PredictIntraChroma_8x8 ( + const OMX_U8 *pSrcLeft, + const OMX_U8 *pSrcAbove, + const OMX_U8 *pSrcAboveLeft, + OMX_U8 *pDst, + OMX_INT leftStep, + OMX_INT dstStep, + OMXVCM4P10IntraChromaPredMode predMode, + OMX_S32 availability +); + + + +/** + * Function: omxVCM4P10_InterpolateLuma (6.3.3.2.1) + * + * Description: + * Performs quarter-pixel interpolation for inter luma MB. It is assumed that + * the frame is already padded when calling this function. + * + * Input Arguments: + * + * pSrc - Pointer to the source reference frame buffer + * srcStep - reference frame step, in bytes; must be a multiple of roi.width + * dstStep - destination frame step, in bytes; must be a multiple of + * roi.width + * dx - Fractional part of horizontal motion vector component in 1/4 pixel + * unit; valid in the range [0,3] + * dy - Fractional part of vertical motion vector y component in 1/4 pixel + * unit; valid in the range [0,3] + * roi - Dimension of the interpolation region; the parameters roi.width and + * roi.height must be equal to either 4, 8, or 16. + * + * Output Arguments: + * + * pDst - Pointer to the destination frame buffer: + * if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * if roi.width==16, 16-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < roi.width. + * dx or dy is out of range [0,3]. + * roi.width or roi.height is out of range {4, 8, 16}. + * roi.width is equal to 4, but pDst is not 4 byte aligned. + * roi.width is equal to 8 or 16, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_InterpolateLuma ( + const OMX_U8 *pSrc, + OMX_S32 srcStep, + OMX_U8 *pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi +); + + + +/** + * Function: omxVCM4P10_InterpolateChroma (6.3.3.2.2) + * + * Description: + * Performs 1/8-pixel interpolation for inter chroma MB. + * + * Input Arguments: + * + * pSrc -Pointer to the source reference frame buffer + * srcStep -Reference frame step in bytes + * dstStep -Destination frame step in bytes; must be a multiple of + * roi.width. + * dx -Fractional part of horizontal motion vector component in 1/8 pixel + * unit; valid in the range [0,7] + * dy -Fractional part of vertical motion vector component in 1/8 pixel + * unit; valid in the range [0,7] + * roi -Dimension of the interpolation region; the parameters roi.width and + * roi.height must be equal to either 2, 4, or 8. + * + * Output Arguments: + * + * pDst -Pointer to the destination frame buffer: + * if roi.width==2, 2-byte alignment required + * if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < 8. + * dx or dy is out of range [0-7]. + * roi.width or roi.height is out of range {2,4,8}. + * roi.width is equal to 2, but pDst is not 2-byte aligned. + * roi.width is equal to 4, but pDst is not 4-byte aligned. + * roi.width is equal to 8, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_InterpolateChroma ( + const OMX_U8 *pSrc, + OMX_S32 srcStep, + OMX_U8 *pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingLuma_VerEdge_I (6.3.3.3.1) + * + * Description: + * Performs in-place deblock filtering on four vertical edges of the luma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - Pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep -Step of the arrays; must be a multiple of 16. + * pAlpha -Array of size 2 of alpha thresholds (the first item is the alpha + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] alpha values + * must be in the range [0,255]. + * pBeta -Array of size 2 of beta thresholds (the first item is the beta + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] beta values + * must be in the range [0,18]. + * pThresholds -Array of size 16 of Thresholds (TC0) (values for the left + * edge of each 4x4 block, arranged in vertical block order); must + * be aligned on a 4-byte boundary.. Per [ISO14496-10] values must + * be in the range [0,25]. + * pBS -Array of size 16 of BS parameters (arranged in vertical block + * order); valid in the range [0,4] with the following + * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) + * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns + * OMX_Sts_BadArgErr: + * Either of the pointers in pSrcDst, pAlpha, pBeta, pThresholds, or pBS + * is NULL. + * Either pThresholds or pBS is not aligned on a 4-byte boundary. + * pSrcDst is not 16-byte aligned. + * srcdstStep is not a multiple of 16. + * pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * One or more entries in the table pThresholds[0..15]is outside of the + * range [0,25]. + * pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && + * pBS[i^3]!=4) for 0<=i<=3. + * + */ +OMXResult omxVCM4P10_FilterDeblockingLuma_VerEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingLuma_HorEdge_I (6.3.3.3.2) + * + * Description: + * Performs in-place deblock filtering on four horizontal edges of the luma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep - step of the arrays; must be a multiple of 16. + * pAlpha - array of size 2 of alpha thresholds (the first item is the alpha + * threshold for the external vertical edge, and the second item is + * for the internal horizontal edge); per [ISO14496-10] alpha + * values must be in the range [0,255]. + * pBeta - array of size 2 of beta thresholds (the first item is the beta + * threshold for the external horizontal edge, and the second item + * is for the internal horizontal edge). Per [ISO14496-10] beta + * values must be in the range [0,18]. + * pThresholds - array of size 16 containing thresholds, TC0, for the top + * horizontal edge of each 4x4 block, arranged in horizontal block + * order; must be aligned on a 4-byte boundary. Per [ISO14496 10] + * values must be in the range [0,25]. + * pBS - array of size 16 of BS parameters (arranged in horizontal block + * order); valid in the range [0,4] with the following + * restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) + * pBS[i]== 4 if and only if pBS[i^3]== 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr, if one of the following cases occurs: + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - either pThresholds or pBS is not aligned on a 4-byte boundary. + * - pSrcDst is not 16-byte aligned. + * - srcdstStep is not a multiple of 16. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..15] is + * outside of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * + */ +OMXResult omxVCM4P10_FilterDeblockingLuma_HorEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingChroma_VerEdge_I (6.3.3.3.3) + * + * Description: + * Performs in-place deblock filtering on four vertical edges of the chroma + * macroblock (8x8). + * + * Input Arguments: + * + * pSrcDst - Pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - Step of the arrays; must be a multiple of 8. + * pAlpha - Array of size 2 of alpha thresholds (the first item is alpha + * threshold for external vertical edge, and the second item is for + * internal vertical edge); per [ISO14496-10] alpha values must be + * in the range [0,255]. + * pBeta - Array of size 2 of beta thresholds (the first item is the beta + * threshold for the external vertical edge, and the second item is + * for the internal vertical edge); per [ISO14496-10] beta values + * must be in the range [0,18]. + * pThresholds - Array of size 8 containing thresholds, TC0, for the left + * vertical edge of each 4x2 chroma block, arranged in vertical + * block order; must be aligned on a 4-byte boundary. Per + * [ISO14496-10] values must be in the range [0,25]. + * pBS - Array of size 16 of BS parameters (values for each 2x2 chroma + * block, arranged in vertical block order). This parameter is the + * same as the pBS parameter passed into FilterDeblockLuma_VerEdge; + * valid in the range [0,4] with the following restrictions: i) + * pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and + * only if pBS[i^3]== 4. Must be 4 byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8. + * - pThresholds is not 4-byte aligned. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..7] is outside + * of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - pBS is not 4-byte aligned. + * + */ +OMXResult omxVCM4P10_FilterDeblockingChroma_VerEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_FilterDeblockingChroma_HorEdge_I (6.3.3.3.4) + * + * Description: + * Performs in-place deblock filtering on the horizontal edges of the chroma + * macroblock (8x8). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - array step; must be a multiple of 8. + * pAlpha - array of size 2 containing alpha thresholds; the first element + * contains the threshold for the external horizontal edge, and the + * second element contains the threshold for internal horizontal + * edge. Per [ISO14496-10] alpha values must be in the range + * [0,255]. + * pBeta - array of size 2 containing beta thresholds; the first element + * contains the threshold for the external horizontal edge, and the + * second element contains the threshold for the internal + * horizontal edge. Per [ISO14496-10] beta values must be in the + * range [0,18]. + * pThresholds - array of size 8 containing thresholds, TC0, for the top + * horizontal edge of each 2x4 chroma block, arranged in horizontal + * block order; must be aligned on a 4-byte boundary. Per + * [ISO14496-10] values must be in the range [0,25]. + * pBS - array of size 16 containing BS parameters for each 2x2 chroma + * block, arranged in horizontal block order; valid in the range + * [0,4] with the following restrictions: i) pBS[i]== 4 may occur + * only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== 4. + * Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst -Pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr, if one of the following cases occurs: + * - any of the following pointers is NULL: + * pSrcDst, pAlpha, pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8. + * - pThresholds is not 4-byte aligned. + * - pAlpha[0] and/or pAlpha[1] is outside the range [0,255]. + * - pBeta[0] and/or pBeta[1] is outside the range [0,18]. + * - One or more entries in the table pThresholds[0..7] is outside + * of the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - pBS is not 4-byte aligned. + * + */ +OMXResult omxVCM4P10_FilterDeblockingChroma_HorEdge_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DeblockLuma_I (6.3.3.3.5) + * + * Description: + * This function performs in-place deblock filtering the horizontal and + * vertical edges of a luma macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 16-byte aligned. + * srcdstStep - image width; must be a multiple of 16. + * pAlpha - pointer to a 2x2 table of alpha thresholds, organized as + * follows: {external vertical edge, internal vertical edge, + * external horizontal edge, internal horizontal edge }. Per + * [ISO14496-10] alpha values must be in the range [0,255]. + * pBeta - pointer to a 2x2 table of beta thresholds, organized as follows: + * {external vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge }. Per [ISO14496-10] + * beta values must be in the range [0,18]. + * pThresholds - pointer to a 16x2 table of threshold (TC0), organized as + * follows: {values for the left or above edge of each 4x4 block, + * arranged in vertical block order and then in horizontal block + * order}; must be aligned on a 4-byte boundary. Per [ISO14496-10] + * values must be in the range [0,25]. + * pBS - pointer to a 16x2 table of BS parameters arranged in scan block + * order for vertical edges and then horizontal edges; valid in the + * range [0,4] with the following restrictions: i) pBS[i]== 4 may + * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== + * 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst - pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds or pBS. + * - pSrcDst is not 16-byte aligned. + * - either pThresholds or pBS is not aligned on a 4-byte boundary. + * - one or more entries in the table pAlpha[0..3] is outside the range + * [0,255]. + * - one or more entries in the table pBeta[0..3] is outside the range + * [0,18]. + * - one or more entries in the table pThresholds[0..31]is outside of + * the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 16. + * + */ +OMXResult omxVCM4P10_DeblockLuma_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DeblockChroma_I (6.3.3.3.6) + * + * Description: + * Performs in-place deblocking filtering on all edges of the chroma + * macroblock (16x16). + * + * Input Arguments: + * + * pSrcDst - pointer to the input macroblock; must be 8-byte aligned. + * srcdstStep - step of the arrays; must be a multiple of 8. + * pAlpha - pointer to a 2x2 array of alpha thresholds, organized as + * follows: {external vertical edge, internal vertical edge, + * external horizontal edge, internal horizontal edge }. Per + * [ISO14496-10] alpha values must be in the range [0,255]. + * pBeta - pointer to a 2x2 array of Beta Thresholds, organized as follows: + * { external vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge }. Per [ISO14496-10] + * beta values must be in the range [0,18]. + * pThresholds - array of size 8x2 of Thresholds (TC0) (values for the left + * or above edge of each 4x2 or 2x4 block, arranged in vertical + * block order and then in horizontal block order); must be aligned + * on a 4-byte boundary. Per [ISO14496-10] values must be in the + * range [0,25]. + * pBS - array of size 16x2 of BS parameters (arranged in scan block order + * for vertical edges and then horizontal edges); valid in the + * range [0,4] with the following restrictions: i) pBS[i]== 4 may + * occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^3]== + * 4. Must be 4-byte aligned. + * + * Output Arguments: + * + * pSrcDst - pointer to filtered output macroblock. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - one or more of the following pointers is NULL: pSrcDst, pAlpha, + * pBeta, pThresholds, or pBS. + * - pSrcDst is not 8-byte aligned. + * - either pThresholds or pBS is not 4-byte aligned. + * - one or more entries in the table pAlpha[0..3] is outside the range + * [0,255]. + * - one or more entries in the table pBeta[0..3] is outside the range + * [0,18]. + * - one or more entries in the table pThresholds[0..15]is outside of + * the range [0,25]. + * - pBS is out of range, i.e., one of the following conditions is true: + * pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or + * (pBS[i]==4 && pBS[i^3]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_DeblockChroma_I ( + OMX_U8 *pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8 *pAlpha, + const OMX_U8 *pBeta, + const OMX_U8 *pThresholds, + const OMX_U8 *pBS +); + + + +/** + * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC (6.3.4.1.1) + * + * Description: + * Performs CAVLC decoding and inverse raster scan for a 2x2 block of + * ChromaDCLevel. The decoded coefficients in the packed position-coefficient + * buffer are stored in reverse zig-zag order, i.e., the first buffer element + * contains the last non-zero postion-coefficient pair of the block. Within + * each position-coefficient pair, the position entry indicates the + * raster-scan position of the coefficient, while the coefficient entry + * contains the coefficient value. + * + * Input Arguments: + * + * ppBitStream - Double pointer to current byte in bit stream buffer + * pOffset - Pointer to current bit position in the byte pointed to by + * *ppBitStream; valid in the range [0,7]. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after each block is decoded + * pOffset - *pOffset is updated after each block is decoded + * pNumCoeff - Pointer to the number of nonzero coefficients in this block + * ppPosCoefBuf - Double pointer to destination residual + * coefficient-position pair buffer. Buffer position + * (*ppPosCoefBuf) is updated upon return, unless there are only + * zero coefficients in the currently decoded block. In this case + * the caller is expected to bypass the transform/dequantization of + * the empty blocks. + * + * Return Value: + * + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppBitStream or pOffset is NULL. + * - ppPosCoefBuf or pNumCoeff is NULL. + * OMX_Sts_Err - if one of the following is true: + * - an illegal code is encountered in the bitstream + * + */ +OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC ( + const OMX_U8 **ppBitStream, + OMX_S32*pOffset, + OMX_U8 *pNumCoeff, + OMX_U8 **ppPosCoefbuf +); + + + +/** + * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC (6.3.4.1.2) + * + * Description: + * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of + * Intra16x16DCLevel, Intra16x16ACLevel, LumaLevel, and ChromaACLevel. Inverse + * field scan is not supported. The decoded coefficients in the packed + * position-coefficient buffer are stored in reverse zig-zag order, i.e., the + * first buffer element contains the last non-zero postion-coefficient pair of + * the block. Within each position-coefficient pair, the position entry + * indicates the raster-scan position of the coefficient, while the + * coefficient entry contains the coefficient value. + * + * Input Arguments: + * + * ppBitStream -Double pointer to current byte in bit stream buffer + * pOffset - Pointer to current bit position in the byte pointed to by + * *ppBitStream; valid in the range [0,7]. + * sMaxNumCoeff - Maximum the number of non-zero coefficients in current + * block + * sVLCSelect - VLC table selector, obtained from the number of non-zero + * coefficients contained in the above and left 4x4 blocks. It is + * equivalent to the variable nC described in H.264 standard table + * 9 5, except its value can t be less than zero. + * + * Output Arguments: + * + * ppBitStream - *ppBitStream is updated after each block is decoded. + * Buffer position (*ppPosCoefBuf) is updated upon return, unless + * there are only zero coefficients in the currently decoded block. + * In this case the caller is expected to bypass the + * transform/dequantization of the empty blocks. + * pOffset - *pOffset is updated after each block is decoded + * pNumCoeff - Pointer to the number of nonzero coefficients in this block + * ppPosCoefBuf - Double pointer to destination residual + * coefficient-position pair buffer + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppBitStream or pOffset is NULL. + * - ppPosCoefBuf or pNumCoeff is NULL. + * - sMaxNumCoeff is not equal to either 15 or 16. + * - sVLCSelect is less than 0. + * + * OMX_Sts_Err - if one of the following is true: + * - an illegal code is encountered in the bitstream + * + */ +OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC ( + const OMX_U8 **ppBitStream, + OMX_S32 *pOffset, + OMX_U8 *pNumCoeff, + OMX_U8 **ppPosCoefbuf, + OMX_INT sVLCSelect, + OMX_INT sMaxNumCoeff +); + + + +/** + * Function: omxVCM4P10_TransformDequantLumaDCFromPair (6.3.4.2.1) + * + * Description: + * Reconstructs the 4x4 LumaDC block from the coefficient-position pair + * buffer, performs integer inverse, and dequantization for 4x4 LumaDC + * coefficients, and updates the pair buffer pointer to the next non-empty + * block. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * QP - Quantization parameter QpY + * + * Output Arguments: + * + * ppSrc - *ppSrc is updated to the start of next non empty block + * pDst - Pointer to the reconstructed 4x4 LumaDC coefficients buffer; must + * be aligned on a 8-byte boundary. + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppSrc or pDst is NULL. + * - pDst is not 8 byte aligned. + * - QP is not in the range of [0-51]. + * + */ +OMXResult omxVCM4P10_TransformDequantLumaDCFromPair ( + const OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P10_TransformDequantChromaDCFromPair (6.3.4.2.2) + * + * Description: + * Reconstruct the 2x2 ChromaDC block from coefficient-position pair buffer, + * perform integer inverse transformation, and dequantization for 2x2 chroma + * DC coefficients, and update the pair buffer pointer to next non-empty + * block. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * QP - Quantization parameter QpC + * + * Output Arguments: + * + * ppSrc - *ppSrc is updated to the start of next non empty block + * pDst - Pointer to the reconstructed 2x2 ChromaDC coefficients buffer; + * must be aligned on a 4-byte boundary. + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - ppSrc or pDst is NULL. + * - pDst is not 4-byte aligned. + * - QP is not in the range of [0-51]. + * + */ +OMXResult omxVCM4P10_TransformDequantChromaDCFromPair ( + const OMX_U8 **ppSrc, + OMX_S16 *pDst, + OMX_INT QP +); + + + +/** + * Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd (6.3.4.2.3) + * + * Description: + * Reconstruct the 4x4 residual block from coefficient-position pair buffer, + * perform dequantization and integer inverse transformation for 4x4 block of + * residuals with previous intra prediction or motion compensation data, and + * update the pair buffer pointer to next non-empty block. If pDC == NULL, + * there re 16 non-zero AC coefficients at most in the packed buffer starting + * from 4x4 block position 0; If pDC != NULL, there re 15 non-zero AC + * coefficients at most in the packet buffer starting from 4x4 block position + * 1. + * + * Input Arguments: + * + * ppSrc - Double pointer to residual coefficient-position pair buffer + * output by CALVC decoding + * pPred - Pointer to the predicted 4x4 block; must be aligned on a 4-byte + * boundary + * predStep - Predicted frame step size in bytes; must be a multiple of 4 + * dstStep - Destination frame step in bytes; must be a multiple of 4 + * pDC - Pointer to the DC coefficient of this block, NULL if it doesn't + * exist + * QP - QP Quantization parameter. It should be QpC in chroma 4x4 block + * decoding, otherwise it should be QpY. + * AC - Flag indicating if at least one non-zero AC coefficient exists + * + * Output Arguments: + * + * pDst - pointer to the reconstructed 4x4 block data; must be aligned on a + * 4-byte boundary + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pPred or pDst is NULL. + * - pPred or pDst is not 4-byte aligned. + * - predStep or dstStep is not a multiple of 4. + * - AC !=0 and Qp is not in the range of [0-51] or ppSrc == NULL. + * - AC ==0 && pDC ==NULL. + * + */ +OMXResult omxVCM4P10_DequantTransformResidualFromPairAndAdd ( + const OMX_U8 **ppSrc, + const OMX_U8 *pPred, + const OMX_S16 *pDC, + OMX_U8 *pDst, + OMX_INT predStep, + OMX_INT dstStep, + OMX_INT QP, + OMX_INT AC +); + + + +/** + * Function: omxVCM4P10_MEGetBufSize (6.3.5.1.1) + * + * Description: + * Computes the size, in bytes, of the vendor-specific specification + * structure for the omxVCM4P10 motion estimation functions BlockMatch_Integer + * and MotionEstimationMB. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P10MEMode + * pMEParams -motion estimation parameters + * + * Output Arguments: + * + * pSize - pointer to the number of bytes required for the motion + * estimation specification structure + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pMEParams or pSize is NULL. + * - an invalid MEMode is specified. + * + */ +OMXResult omxVCM4P10_MEGetBufSize ( + OMXVCM4P10MEMode MEmode, + const OMXVCM4P10MEParams *pMEParams, + OMX_U32 *pSize +); + + + +/** + * Function: omxVCM4P10_MEInit (6.3.5.1.2) + * + * Description: + * Initializes the vendor-specific specification structure required for the + * omxVCM4P10 motion estimation functions: BlockMatch_Integer and + * MotionEstimationMB. Memory for the specification structure *pMESpec must be + * allocated prior to calling the function, and should be aligned on a 4-byte + * boundary. The number of bytes required for the specification structure can + * be determined using the function omxVCM4P10_MEGetBufSize. Following + * initialization by this function, the vendor-specific structure *pMESpec + * should contain an implementation-specific representation of all motion + * estimation parameters received via the structure pMEParams, for example + * searchRange16x16, searchRange8x8, etc. + * + * Input Arguments: + * + * MEmode - motion estimation mode; available modes are defined by the + * enumerated type OMXVCM4P10MEMode + * pMEParams - motion estimation parameters + * pMESpec - pointer to the uninitialized ME specification structure + * + * Output Arguments: + * + * pMESpec - pointer to the initialized ME specification structure + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - pMEParams or pSize is NULL. + * - an invalid value was specified for the parameter MEmode + * - a negative or zero value was specified for one of the search ranges + * (e.g., pMBParams >searchRange8x8, pMEParams->searchRange16x16, etc.) + * - either in isolation or in combination, one or more of the enables or + * search ranges in the structure *pMEParams were configured such + * that the requested behavior fails to comply with [ISO14496-10]. + * + */ +OMXResult omxVCM4P10_MEInit ( + OMXVCM4P10MEMode MEmode, + const OMXVCM4P10MEParams *pMEParams, + void *pMESpec +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Integer (6.3.5.2.1) + * + * Description: + * Performs integer block match. Returns best MV and associated cost. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the top-left corner of the current block: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane, expressed in terms + * of integer pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane, expressed in terms + * of integer pixels + * pRefRect - pointer to the valid reference rectangle inside the reference + * picture plane + * nCurrPointPos - position of the current block in the current plane + * iBlockWidth - Width of the current block, expressed in terms of integer + * pixels; must be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block, expressed in terms of + * integer pixels; must be equal to either 4, 8, or 16. + * nLamda - Lamda factor; used to compute motion cost + * pMVPred - Predicted MV; used to compute motion cost, expressed in terms + * of 1/4-pel units + * pMVCandidate - Candidate MV; used to initialize the motion search, + * expressed in terms of integer pixels + * pMESpec - pointer to the ME specification structure + * + * Output Arguments: + * + * pDstBestMV - Best MV resulting from integer search, expressed in terms + * of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - any of the following pointers are NULL: + * pSrcOrgY, pSrcRefY, pRefRect, pMVPred, pMVCandidate, or pMESpec. + * - Either iBlockWidth or iBlockHeight are values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Integer ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + const OMXVCMotionVector *pMVCandidate, + OMXVCMotionVector *pBestMV, + OMX_S32 *pBestCost, + void *pMESpec +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Half (6.3.5.2.2) + * + * Description: + * Performs a half-pel block match using results from a prior integer search. + * Returns the best MV and associated cost. This function estimates the + * half-pixel motion vector by interpolating the integer resolution motion + * vector referenced by the input parameter pSrcDstBestMV, i.e., the initial + * integer MV is generated externally. The function + * omxVCM4P10_BlockMatch_Integer may be used for integer motion estimation. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the current position in original picture plane: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane in terms of full + * pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane in terms of full + * pixels + * iBlockWidth - Width of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * nLamda - Lamda factor, used to compute motion cost + * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to + * compute motion cost + * pSrcDstBestMV - The best MV resulting from a prior integer search, + * represented in terms of 1/4-pel units + * + * Output Arguments: + * + * pSrcDstBestMV - Best MV resulting from the half-pel search, expressed in + * terms of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - any of the following pointers is NULL: pSrcOrgY, pSrcRefY, + * pSrcDstBestMV, pMVPred, pBestCost + * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Half ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + OMXVCMotionVector *pSrcDstBestMV, + OMX_S32 *pBestCost +); + + + +/** + * Function: omxVCM4P10_BlockMatch_Quarter (6.3.5.2.3) + * + * Description: + * Performs a quarter-pel block match using results from a prior half-pel + * search. Returns the best MV and associated cost. This function estimates + * the quarter-pixel motion vector by interpolating the half-pel resolution + * motion vector referenced by the input parameter pSrcDstBestMV, i.e., the + * initial half-pel MV is generated externally. The function + * omxVCM4P10_BlockMatch_Half may be used for half-pel motion estimation. + * + * Input Arguments: + * + * pSrcOrgY - Pointer to the current position in original picture plane: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * pSrcRefY - Pointer to the top-left corner of the co-located block in the + * reference picture: + * If iBlockWidth==4, 4-byte alignment required. + * If iBlockWidth==8, 8-byte alignment required. + * If iBlockWidth==16, 16-byte alignment required. + * nSrcOrgStep - Stride of the original picture plane in terms of full + * pixels; must be a multiple of iBlockWidth. + * nSrcRefStep - Stride of the reference picture plane in terms of full + * pixels + * iBlockWidth - Width of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * iBlockHeight - Height of the current block in terms of full pixels; must + * be equal to either 4, 8, or 16. + * nLamda - Lamda factor, used to compute motion cost + * pMVPred - Predicted MV, represented in terms of 1/4-pel units; used to + * compute motion cost + * pSrcDstBestMV - The best MV resulting from a prior half-pel search, + * represented in terms of 1/4 pel units + * + * Output Arguments: + * + * pSrcDstBestMV - Best MV resulting from the quarter-pel search, expressed + * in terms of 1/4-pel units + * pBestCost - Motion cost associated with the best MV; computed as + * SAD+Lamda*BitsUsedByMV + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: + * pSrcOrgY, pSrcRefY, pSrcDstBestMV, pMVPred, pBestCost + * - iBlockWidth or iBlockHeight are equal to values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_BlockMatch_Quarter ( + const OMX_U8 *pSrcOrgY, + OMX_S32 nSrcOrgStep, + const OMX_U8 *pSrcRefY, + OMX_S32 nSrcRefStep, + OMX_U8 iBlockWidth, + OMX_U8 iBlockHeight, + OMX_U32 nLamda, + const OMXVCMotionVector *pMVPred, + OMXVCMotionVector *pSrcDstBestMV, + OMX_S32 *pBestCost +); + + + +/** + * Function: omxVCM4P10_MotionEstimationMB (6.3.5.3.1) + * + * Description: + * Performs MB-level motion estimation and selects best motion estimation + * strategy from the set of modes supported in baseline profile [ISO14496-10]. + * + * Input Arguments: + * + * pSrcCurrBuf - Pointer to the current position in original picture plane; + * 16-byte alignment required + * pSrcRefBufList - Pointer to an array with 16 entries. Each entry points + * to the top-left corner of the co-located MB in a reference + * picture. The array is filled from low-to-high with valid + * reference frame pointers; the unused high entries should be set + * to NULL. Ordering of the reference frames should follow + * [ISO14496-10] subclause 8.2.4 Decoding Process for Reference + * Picture Lists. The entries must be 16-byte aligned. + * pSrcRecBuf - Pointer to the top-left corner of the co-located MB in the + * reconstructed picture; must be 16-byte aligned. + * SrcCurrStep - Width of the original picture plane in terms of full + * pixels; must be a multiple of 16. + * SrcRefStep - Width of the reference picture plane in terms of full + * pixels; must be a multiple of 16. + * SrcRecStep - Width of the reconstructed picture plane in terms of full + * pixels; must be a multiple of 16. + * pRefRect - Pointer to the valid reference rectangle; relative to the + * image origin. + * pCurrPointPos - Position of the current macroblock in the current plane. + * Lambda - Lagrange factor for computing the cost function + * pMESpec - Pointer to the motion estimation specification structure; must + * have been allocated and initialized prior to calling this + * function. + * pMBInter - Array, of dimension four, containing pointers to information + * associated with four adjacent type INTER MBs (Left, Top, + * Top-Left, Top-Right). Any pointer in the array may be set equal + * to NULL if the corresponding MB doesn t exist or is not of type + * INTER. + * - pMBInter[0] - Pointer to left MB information + * - pMBInter[1] - Pointer to top MB information + * - pMBInter[2] - Pointer to top-left MB information + * - pMBInter[3] - Pointer to top-right MB information + * pMBIntra - Array, of dimension four, containing pointers to information + * associated with four adjacent type INTRA MBs (Left, Top, + * Top-Left, Top-Right). Any pointer in the array may be set equal + * to NULL if the corresponding MB doesn t exist or is not of type + * INTRA. + * - pMBIntra[0] - Pointer to left MB information + * - pMBIntra[1] - Pointer to top MB information + * - pMBIntra[2] - Pointer to top-left MB information + * - pMBIntra[3] - Pointer to top-right MB information + * pSrcDstMBCurr - Pointer to information structure for the current MB. + * The following entries should be set prior to calling the + * function: sliceID - the number of the slice the to which the + * current MB belongs. + * + * Output Arguments: + * + * pDstCost - Pointer to the minimum motion cost for the current MB. + * pDstBlockSAD - Pointer to the array of SADs for each of the sixteen luma + * 4x4 blocks in each MB. The block SADs are in scan order for + * each MB. For implementations that cannot compute the SAD values + * individually, the maximum possible value (0xffff) is returned + * for each of the 16 block SAD entries. + * pSrcDstMBCurr - Pointer to updated information structure for the current + * MB after MB-level motion estimation has been completed. The + * following fields are updated by the ME function. The following + * parameter set quantifies the MB-level ME search results: + * - MbType + * - subMBType[4] + * - pMV0[4][4] + * - pMVPred[4][4] + * - pRefL0Idx[4] + * - Intra16x16PredMode + * - pIntra4x4PredMode[4][4] + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: pSrcCurrBuf, + * pSrcRefBufList, pSrcRecBuf, pRefRect, pCurrPointPos, pMESpec, + * pMBInter, pMBIntra,pSrcDstMBCurr, pDstCost, pSrcRefBufList[0] + * - SrcRefStep, SrcRecStep are not multiples of 16 + * - iBlockWidth or iBlockHeight are values other than 4, 8, or 16. + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_MotionEstimationMB ( + const OMX_U8 *pSrcCurrBuf, + OMX_S32 SrcCurrStep, + const OMX_U8 *pSrcRefBufList[15], + OMX_S32 SrcRefStep, + const OMX_U8 *pSrcRecBuf, + OMX_S32 SrcRecStep, + const OMXRect *pRefRect, + const OMXVCM4P2Coordinate *pCurrPointPos, + OMX_U32 Lambda, + void *pMESpec, + const OMXVCM4P10MBInfoPtr *pMBInter, + const OMXVCM4P10MBInfoPtr *pMBIntra, + OMXVCM4P10MBInfoPtr pSrcDstMBCurr, + OMX_INT *pDstCost, + OMX_U16 *pDstBlockSAD +); + + + +/** + * Function: omxVCM4P10_SAD_4x (6.3.5.4.1) + * + * Description: + * This function calculates the SAD for 4x8 and 4x4 blocks. + * + * Input Arguments: + * + * pSrcOrg -Pointer to the original block; must be aligned on a 4-byte + * boundary. + * iStepOrg -Step of the original block buffer; must be a multiple of 4. + * pSrcRef -Pointer to the reference block + * iStepRef -Step of the reference block buffer + * iHeight -Height of the block; must be equal to either 4 or 8. + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - One or more of the following pointers is NULL: + * pSrcOrg, pSrcRef, or pDstSAD + * - iHeight is not equal to either 4 or 8. + * - iStepOrg is not a multiple of 4 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SAD_4x ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_S32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_4x (6.3.5.4.2) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 4x8 or 4x4 blocks. Rounding + * is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on a 4-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 4. + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal to either 4 or 8. + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 4 or 8. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 4 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_4x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_8x (6.3.5.4.3) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 8x16, 8x8, or 8x4 blocks. + * Rounding is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on an 8-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 8. + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal either 4, 8, or 16. + * + * Output Arguments: + * + * pDstSAD - Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 4, 8, or 16. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 8 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_8x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SADQuar_16x (6.3.5.4.4) + * + * Description: + * This function calculates the SAD between one block (pSrc) and the average + * of the other two (pSrcRef0 and pSrcRef1) for 16x16 or 16x8 blocks. + * Rounding is applied according to the convention (a+b+1)>>1. + * + * Input Arguments: + * + * pSrc - Pointer to the original block; must be aligned on a 16-byte + * boundary. + * pSrcRef0 - Pointer to reference block 0 + * pSrcRef1 - Pointer to reference block 1 + * iSrcStep - Step of the original block buffer; must be a multiple of 16 + * iRefStep0 - Step of reference block 0 + * iRefStep1 - Step of reference block 1 + * iHeight - Height of the block; must be equal to either 8 or 16 + * + * Output Arguments: + * + * pDstSAD -Pointer of result SAD + * + * Return Value: + * OMX_Sts_NoErr, if the function runs without error. + * OMX_Sts_BadArgErr - bad arguments: if one of the following cases occurs: + * - iHeight is not equal to either 8 or 16. + * - One or more of the following pointers is NULL: pSrc, pSrcRef0, + * pSrcRef1, pDstSAD. + * - iSrcStep is not a multiple of 16 + * - Any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_SADQuar_16x ( + const OMX_U8 *pSrc, + const OMX_U8 *pSrcRef0, + const OMX_U8 *pSrcRef1, + OMX_U32 iSrcStep, + OMX_U32 iRefStep0, + OMX_U32 iRefStep1, + OMX_U32 *pDstSAD, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_SATD_4x4 (6.3.5.4.5) + * + * Description: + * This function calculates the sum of absolute transform differences (SATD) + * for a 4x4 block by applying a Hadamard transform to the difference block + * and then calculating the sum of absolute coefficient values. + * + * Input Arguments: + * + * pSrcOrg - Pointer to the original block; must be aligned on a 4-byte + * boundary + * iStepOrg - Step of the original block buffer; must be a multiple of 4 + * pSrcRef - Pointer to the reference block; must be aligned on a 4-byte + * boundary + * iStepRef - Step of the reference block buffer; must be a multiple of 4 + * + * Output Arguments: + * + * pDstSAD - pointer to the resulting SAD + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pSrcRef, or pDstSAD either pSrcOrg + * - pSrcRef is not aligned on a 4-byte boundary + * - iStepOrg <= 0 or iStepOrg is not a multiple of 4 + * - iStepRef <= 0 or iStepRef is not a multiple of 4 + * + */ +OMXResult omxVCM4P10_SATD_4x4 ( + const OMX_U8 *pSrcOrg, + OMX_U32 iStepOrg, + const OMX_U8 *pSrcRef, + OMX_U32 iStepRef, + OMX_U32 *pDstSAD +); + + + +/** + * Function: omxVCM4P10_InterpolateHalfHor_Luma (6.3.5.5.1) + * + * Description: + * This function performs interpolation for two horizontal 1/2-pel positions + * (-1/2,0) and (1/2, 0) - around a full-pel position. + * + * Input Arguments: + * + * pSrc - Pointer to the top-left corner of the block used to interpolate in + * the reconstruction frame plane. + * iSrcStep - Step of the source buffer. + * iDstStep - Step of the destination(interpolation) buffer; must be a + * multiple of iWidth. + * iWidth - Width of the current block; must be equal to either 4, 8, or 16 + * iHeight - Height of the current block; must be equal to 4, 8, or 16 + * + * Output Arguments: + * + * pDstLeft -Pointer to the interpolation buffer of the left -pel position + * (-1/2, 0) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * pDstRight -Pointer to the interpolation buffer of the right -pel + * position (1/2, 0) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrc, pDstLeft, or pDstRight + * - iWidth or iHeight have values other than 4, 8, or 16 + * - iWidth==4 but pDstLeft and/or pDstRight is/are not aligned on a 4-byte boundary + * - iWidth==8 but pDstLeft and/or pDstRight is/are not aligned on a 8-byte boundary + * - iWidth==16 but pDstLeft and/or pDstRight is/are not aligned on a 16-byte boundary + * - any alignment restrictions are violated + * + */ +OMXResult omxVCM4P10_InterpolateHalfHor_Luma ( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDstLeft, + OMX_U8 *pDstRight, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_InterpolateHalfVer_Luma (6.3.5.5.2) + * + * Description: + * This function performs interpolation for two vertical 1/2-pel positions - + * (0, -1/2) and (0, 1/2) - around a full-pel position. + * + * Input Arguments: + * + * pSrc - Pointer to top-left corner of block used to interpolate in the + * reconstructed frame plane + * iSrcStep - Step of the source buffer. + * iDstStep - Step of the destination (interpolation) buffer; must be a + * multiple of iWidth. + * iWidth - Width of the current block; must be equal to either 4, 8, or 16 + * iHeight - Height of the current block; must be equal to either 4, 8, or 16 + * + * Output Arguments: + * + * pDstUp -Pointer to the interpolation buffer of the -pel position above + * the current full-pel position (0, -1/2) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * pDstDown -Pointer to the interpolation buffer of the -pel position below + * the current full-pel position (0, 1/2) + * If iWidth==4, 4-byte alignment required. + * If iWidth==8, 8-byte alignment required. + * If iWidth==16, 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrc, pDstUp, or pDstDown + * - iWidth or iHeight have values other than 4, 8, or 16 + * - iWidth==4 but pDstUp and/or pDstDown is/are not aligned on a 4-byte boundary + * - iWidth==8 but pDstUp and/or pDstDown is/are not aligned on a 8-byte boundary + * - iWidth==16 but pDstUp and/or pDstDown is/are not aligned on a 16-byte boundary + * + */ +OMXResult omxVCM4P10_InterpolateHalfVer_Luma ( + const OMX_U8 *pSrc, + OMX_U32 iSrcStep, + OMX_U8 *pDstUp, + OMX_U8 *pDstDown, + OMX_U32 iDstStep, + OMX_U32 iWidth, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_Average_4x (6.3.5.5.3) + * + * Description: + * This function calculates the average of two 4x4, 4x8 blocks. The result + * is rounded according to (a+b+1)/2. + * + * Input Arguments: + * + * pPred0 - Pointer to the top-left corner of reference block 0 + * pPred1 - Pointer to the top-left corner of reference block 1 + * iPredStep0 - Step of reference block 0; must be a multiple of 4. + * iPredStep1 - Step of reference block 1; must be a multiple of 4. + * iDstStep - Step of the destination buffer; must be a multiple of 4. + * iHeight - Height of the blocks; must be either 4 or 8. + * + * Output Arguments: + * + * pDstPred - Pointer to the destination buffer. 4-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pPred0, pPred1, or pDstPred + * - pDstPred is not aligned on a 4-byte boundary + * - iPredStep0 <= 0 or iPredStep0 is not a multiple of 4 + * - iPredStep1 <= 0 or iPredStep1 is not a multiple of 4 + * - iDstStep <= 0 or iDstStep is not a multiple of 4 + * - iHeight is not equal to either 4 or 8 + * + */ +OMXResult omxVCM4P10_Average_4x ( + const OMX_U8 *pPred0, + const OMX_U8 *pPred1, + OMX_U32 iPredStep0, + OMX_U32 iPredStep1, + OMX_U8 *pDstPred, + OMX_U32 iDstStep, + OMX_U32 iHeight +); + + + +/** + * Function: omxVCM4P10_TransformQuant_ChromaDC (6.3.5.6.1) + * + * Description: + * This function performs 2x2 Hadamard transform of chroma DC coefficients + * and then quantizes the coefficients. + * + * Input Arguments: + * + * pSrcDst - Pointer to the 2x2 array of chroma DC coefficients. 8-byte + * alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * bIntra - Indicate whether this is an INTRA block. 1-INTRA, 0-INTER + * + * Output Arguments: + * + * pSrcDst - Pointer to transformed and quantized coefficients. 8-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcDst + * - pSrcDst is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_TransformQuant_ChromaDC ( + OMX_S16 *pSrcDst, + OMX_U32 iQP, + OMX_U8 bIntra +); + + + +/** + * Function: omxVCM4P10_TransformQuant_LumaDC (6.3.5.6.2) + * + * Description: + * This function performs a 4x4 Hadamard transform of luma DC coefficients + * and then quantizes the coefficients. + * + * Input Arguments: + * + * pSrcDst - Pointer to the 4x4 array of luma DC coefficients. 16-byte + * alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pSrcDst - Pointer to transformed and quantized coefficients. 16-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrcDst + * - pSrcDst is not aligned on an 16-byte boundary + * + */ +OMXResult omxVCM4P10_TransformQuant_LumaDC ( + OMX_S16 *pSrcDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformDequant_LumaDC (6.3.5.6.3) + * + * Description: + * This function performs inverse 4x4 Hadamard transform and then dequantizes + * the coefficients. + * + * Input Arguments: + * + * pSrc - Pointer to the 4x4 array of the 4x4 Hadamard-transformed and + * quantized coefficients. 16 byte alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pDst - Pointer to inverse-transformed and dequantized coefficients. + * 16-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrc + * - pSrc or pDst is not aligned on a 16-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformDequant_LumaDC ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformDequant_ChromaDC (6.3.5.6.4) + * + * Description: + * This function performs inverse 2x2 Hadamard transform and then dequantizes + * the coefficients. + * + * Input Arguments: + * + * pSrc - Pointer to the 2x2 array of the 2x2 Hadamard-transformed and + * quantized coefficients. 8 byte alignment required. + * iQP - Quantization parameter; must be in the range [0,51]. + * + * Output Arguments: + * + * pDst - Pointer to inverse-transformed and dequantized coefficients. + * 8-byte alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: pSrc + * - pSrc or pDst is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformDequant_ChromaDC ( + const OMX_S16 *pSrc, + OMX_S16 *pDst, + OMX_U32 iQP +); + + + +/** + * Function: omxVCM4P10_InvTransformResidualAndAdd (6.3.5.7.1) + * + * Description: + * This function performs inverse an 4x4 integer transformation to produce + * the difference signal and then adds the difference to the prediction to get + * the reconstructed signal. + * + * Input Arguments: + * + * pSrcPred - Pointer to prediction signal. 4-byte alignment required. + * pDequantCoeff - Pointer to the transformed coefficients. 8-byte + * alignment required. + * iSrcPredStep - Step of the prediction buffer; must be a multiple of 4. + * iDstReconStep - Step of the destination reconstruction buffer; must be a + * multiple of 4. + * bAC - Indicate whether there is AC coefficients in the coefficients + * matrix. + * + * Output Arguments: + * + * pDstRecon -Pointer to the destination reconstruction buffer. 4-byte + * alignment required. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcPred, pDequantCoeff, pDstRecon + * - pSrcPred is not aligned on a 4-byte boundary + * - iSrcPredStep or iDstReconStep is not a multiple of 4. + * - pDequantCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_InvTransformResidualAndAdd ( + const OMX_U8 *pSrcPred, + const OMX_S16 *pDequantCoeff, + OMX_U8 *pDstRecon, + OMX_U32 iSrcPredStep, + OMX_U32 iDstReconStep, + OMX_U8 bAC +); + + + +/** + * Function: omxVCM4P10_SubAndTransformQDQResidual (6.3.5.8.1) + * + * Description: + * This function subtracts the prediction signal from the original signal to + * produce the difference signal and then performs a 4x4 integer transform and + * quantization. The quantized transformed coefficients are stored as + * pDstQuantCoeff. This function can also output dequantized coefficients or + * unquantized DC coefficients optionally by setting the pointers + * pDstDeQuantCoeff, pDCCoeff. + * + * Input Arguments: + * + * pSrcOrg - Pointer to original signal. 4-byte alignment required. + * pSrcPred - Pointer to prediction signal. 4-byte alignment required. + * iSrcOrgStep - Step of the original signal buffer; must be a multiple of + * 4. + * iSrcPredStep - Step of the prediction signal buffer; must be a multiple + * of 4. + * pNumCoeff -Number of non-zero coefficients after quantization. If this + * parameter is not required, it is set to NULL. + * nThreshSAD - Zero-block early detection threshold. If this parameter is + * not required, it is set to 0. + * iQP - Quantization parameter; must be in the range [0,51]. + * bIntra - Indicates whether this is an INTRA block, either 1-INTRA or + * 0-INTER + * + * Output Arguments: + * + * pDstQuantCoeff - Pointer to the quantized transformed coefficients. + * 8-byte alignment required. + * pDstDeQuantCoeff - Pointer to the dequantized transformed coefficients + * if this parameter is not equal to NULL. 8-byte alignment + * required. + * pDCCoeff - Pointer to the unquantized DC coefficient if this parameter + * is not equal to NULL. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcOrg, pSrcPred, pNumCoeff, pDstQuantCoeff, + * pDstDeQuantCoeff, pDCCoeff + * - pSrcOrg is not aligned on a 4-byte boundary + * - pSrcPred is not aligned on a 4-byte boundary + * - iSrcOrgStep is not a multiple of 4 + * - iSrcPredStep is not a multiple of 4 + * - pDstQuantCoeff or pDstDeQuantCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_SubAndTransformQDQResidual ( + const OMX_U8 *pSrcOrg, + const OMX_U8 *pSrcPred, + OMX_U32 iSrcOrgStep, + OMX_U32 iSrcPredStep, + OMX_S16 *pDstQuantCoeff, + OMX_S16 *pDstDeQuantCoeff, + OMX_S16 *pDCCoeff, + OMX_S8 *pNumCoeff, + OMX_U32 nThreshSAD, + OMX_U32 iQP, + OMX_U8 bIntra +); + + + +/** + * Function: omxVCM4P10_GetVLCInfo (6.3.5.9.1) + * + * Description: + * This function extracts run-length encoding (RLE) information from the + * coefficient matrix. The results are returned in an OMXVCM4P10VLCInfo + * structure. + * + * Input Arguments: + * + * pSrcCoeff - pointer to the transform coefficient matrix. 8-byte + * alignment required. + * pScanMatrix - pointer to the scan order definition matrix. For a luma + * block the scan matrix should follow [ISO14496-10] section 8.5.4, + * and should contain the values 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, + * 10, 7, 11, 14, 15. For a chroma block, the scan matrix should + * contain the values 0, 1, 2, 3. + * bAC - indicates presence of a DC coefficient; 0 = DC coefficient + * present, 1= DC coefficient absent. + * MaxNumCoef - specifies the number of coefficients contained in the + * transform coefficient matrix, pSrcCoeff. The value should be 16 + * for blocks of type LUMADC, LUMAAC, LUMALEVEL, and CHROMAAC. The + * value should be 4 for blocks of type CHROMADC. + * + * Output Arguments: + * + * pDstVLCInfo - pointer to structure that stores information for + * run-length coding. + * + * Return Value: + * + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments; returned if any of the following + * conditions are true: + * - at least one of the following pointers is NULL: + * pSrcCoeff, pScanMatrix, pDstVLCInfo + * - pSrcCoeff is not aligned on an 8-byte boundary + * + */ +OMXResult omxVCM4P10_GetVLCInfo ( + const OMX_S16 *pSrcCoeff, + const OMX_U8 *pScanMatrix, + OMX_U8 bAC, + OMX_U32 MaxNumCoef, + OMXVCM4P10VLCInfo*pDstVLCInfo +); + + + +#ifdef __cplusplus +} +#endif + +#endif /** end of #define _OMXVC_H_ */ + +/** EOF */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h new file mode 100644 index 0000000..be974d5 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/api/omxVC_s.h @@ -0,0 +1,129 @@ +;/****************************************************************************** +;// Copyright (c) 1999-2005 The Khronos Group Inc. All Rights Reserved +;// +;// +;// +;// +;// +;// +;// +;// +;******************************************************************************/ + +;/** =============== Structure Definition for Sample Generation ============== */ +;/** transparent status */ + +;enum { +OMX_VIDEO_TRANSPARENT EQU 0; /** Wholly transparent */ +OMX_VIDEO_PARTIAL EQU 1; /** Partially transparent */ +OMX_VIDEO_OPAQUE EQU 2; /** Opaque */ +;} + +;/** direction */ +;enum { +OMX_VIDEO_NONE EQU 0; +OMX_VIDEO_HORIZONTAL EQU 1; +OMX_VIDEO_VERTICAL EQU 2; +;} + +;/** bilinear interpolation type */ +;enum { +OMX_VIDEO_INTEGER_PIXEL EQU 0; /** case ¡°a¡± */ +OMX_VIDEO_HALF_PIXEL_X EQU 1; /** case ¡°b¡± */ +OMX_VIDEO_HALF_PIXEL_Y EQU 2; /** case ¡°c¡± */ +OMX_VIDEO_HALF_PIXEL_XY EQU 3; /** case ¡°d¡± */ +;} + +;enum { +OMX_UPPER EQU 1; /** set if the above macroblock is available */ +OMX_LEFT EQU 2; /** set if the left macroblock is available */ +OMX_CENTER EQU 4; +OMX_RIGHT EQU 8; +OMX_LOWER EQU 16; +OMX_UPPER_LEFT EQU 32; /** set if the above-left macroblock is available */ +OMX_UPPER_RIGHT EQU 64; /** set if the above-right macroblock is available */ +OMX_LOWER_LEFT EQU 128; +OMX_LOWER_RIGHT EQU 256 +;} + +;enum { +OMX_VIDEO_LUMINANCE EQU 0; /** Luminance component */ +OMX_VIDEO_CHROMINANCE EQU 1; /** chrominance component */ +OMX_VIDEO_ALPHA EQU 2; /** Alpha component */ +;} + +;enum { +OMX_VIDEO_INTER EQU 0; /** P picture or P-VOP */ +OMX_VIDEO_INTER_Q EQU 1; /** P picture or P-VOP */ +OMX_VIDEO_INTER4V EQU 2; /** P picture or P-VOP */ +OMX_VIDEO_INTRA EQU 3; /** I and P picture; I- and P-VOP */ +OMX_VIDEO_INTRA_Q EQU 4; /** I and P picture; I- and P-VOP */ +OMX_VIDEO_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/ +OMX_VIDEO_DIRECT EQU 6; /** B picture or B-VOP (MPEG-4 only) */ +OMX_VIDEO_INTERPOLATE EQU 7; /** B picture or B-VOP */ +OMX_VIDEO_BACKWARD EQU 8; /** B picture or B-VOP */ +OMX_VIDEO_FORWARD EQU 9; /** B picture or B-VOP */ +OMX_VIDEO_NOTCODED EQU 10; /** B picture or B-VOP */ +;} + +;enum { +OMX_16X16_VERT EQU 0; /** Intra_16x16_Vertical (prediction mode) */ +OMX_16X16_HOR EQU 1; /** Intra_16x16_Horizontal (prediction mode) */ +OMX_16X16_DC EQU 2; /** Intra_16x16_DC (prediction mode) */ +OMX_16X16_PLANE EQU 3; /** Intra_16x16_Plane (prediction mode) */ +;} + +;enum { +OMX_4x4_VERT EQU 0; /** Intra_4x4_Vertical (prediction mode) */ +OMX_4x4_HOR EQU 1; /** Intra_4x4_Horizontal (prediction mode) */ +OMX_4x4_DC EQU 2; /** Intra_4x4_DC (prediction mode) */ +OMX_4x4_DIAG_DL EQU 3; /** Intra_4x4_Diagonal_Down_Left (prediction mode) */ +OMX_4x4_DIAG_DR EQU 4; /** Intra_4x4_Diagonal_Down_Right (prediction mode) */ +OMX_4x4_VR EQU 5; /** Intra_4x4_Vertical_Right (prediction mode) */ +OMX_4x4_HD EQU 6; /** Intra_4x4_Horizontal_Down (prediction mode) */ +OMX_4x4_VL EQU 7; /** Intra_4x4_Vertical_Left (prediction mode) */ +OMX_4x4_HU EQU 8; /** Intra_4x4_Horizontal_Up (prediction mode) */ +;} + +;enum { +OMX_CHROMA_DC EQU 0; /** Intra_Chroma_DC (prediction mode) */ +OMX_CHROMA_HOR EQU 1; /** Intra_Chroma_Horizontal (prediction mode) */ +OMX_CHROMA_VERT EQU 2; /** Intra_Chroma_Vertical (prediction mode) */ +OMX_CHROMA_PLANE EQU 3; /** Intra_Chroma_Plane (prediction mode) */ +;} + +;typedef struct { +x EQU 0; +y EQU 4; +;}OMXCoordinate; + +;typedef struct { +dx EQU 0; +dy EQU 2; +;}OMXMotionVector; + +;typedef struct { +xx EQU 0; +yy EQU 4; +width EQU 8; +height EQU 12; +;}OMXiRect; + +;typedef enum { +OMX_VC_INTER EQU 0; /** P picture or P-VOP */ +OMX_VC_INTER_Q EQU 1; /** P picture or P-VOP */ +OMX_VC_INTER4V EQU 2; /** P picture or P-VOP */ +OMX_VC_INTRA EQU 3; /** I and P picture, I- and P-VOP */ +OMX_VC_INTRA_Q EQU 4; /** I and P picture, I- and P-VOP */ +OMX_VC_INTER4V_Q EQU 5; /** P picture or P-VOP (H.263)*/ +;} OMXVCM4P2MacroblockType; + +;enum { +OMX_VC_NONE EQU 0 +OMX_VC_HORIZONTAL EQU 1 +OMX_VC_VERTICAL EQU 2 +;}; + + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s new file mode 100644 index 0000000..2663a70 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy16x16_s.s @@ -0,0 +1,148 @@ + ;/** + ; * Function: omxVCCOMM_Copy16x16 + ; * + ; * Description: + ; * Copies the reference 16x16 block to the current block. + ; * Parameters: + ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 16-byte boundary. + ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes; + ; * must be a multiple of 16 and must be larger than or equal to 16. + ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary. + ; * Return Value: + ; * OMX_Sts_NoErr - no error + ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions: + ; * - one or more of the following pointers is NULL: pSrc, pDst + ; * - one or more of the following pointers is not aligned on an 16-byte boundary: pSrc, pDst + ; * - step <16 or step is not a multiple of 16. + ; */ + + INCLUDE omxtypes_s.h + + + M_VARIANTS ARM1136JS + + + + + IF ARM1136JS + +;//Input Arguments +pSrc RN 0 +pDst RN 1 +step RN 2 + +;//Local Variables +Count RN 14 +X0 RN 2 +X1 RN 4 + +Return RN 0 + + M_START omxVCCOMM_Copy16x16,r5 + + + + SUB Count,step,#8 ;//Count=step-8 + LDRD X0,[pSrc],#8 ;//pSrc after loading pSrc=pSrc+8 + LDRD X1,[pSrc],Count ;//pSrc after loading pSrc=pSrc+step + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + ;// loading 16 bytes and storing + STRD X0,[pDst],#8 + LDRD X0,[pSrc],#8 + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + STRD X0,[pDst],#8 + MOV Return,#OMX_Sts_NoErr + STRD X1,[pDst],#8 + + + M_END + ENDIF + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s new file mode 100644 index 0000000..993873c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_Copy8x8_s.s @@ -0,0 +1,72 @@ + ;/** + ; * Function: omxVCCOMM_Copy8x8 + ; * + ; * Description: + ; * Copies the reference 8x8 block to the current block. + ; * Parameters: + ; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 8-byte boundary. + ; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes; + ; * must be a multiple of 8 and must be larger than or equal to 8. + ; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary. + ; * Return Value: + ; * OMX_Sts_NoErr - no error + ; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions: + ; * - one or more of the following pointers is NULL: pSrc, pDst + ; * - one or more of the following pointers is not aligned on an 8-byte boundary: pSrc, pDst + ; * - step <8 or step is not a multiple of 8. + ; */ + + INCLUDE omxtypes_s.h + + + M_VARIANTS ARM1136JS + + + + + IF ARM1136JS + +;//Input Arguments +pSrc RN 0 +pDst RN 1 +step RN 2 + +;//Local Variables +Count RN 14 +X0 RN 2 +X1 RN 4 +Return RN 0 + M_START omxVCCOMM_Copy8x8,r5 + + + + MOV Count,step ;//Count=step + + LDRD X0,[pSrc],Count ;//pSrc after loading : pSrc=pSrc+step + LDRD X1,[pSrc],Count + + STRD X0,[pDst],#8 + LDRD X0,[pSrc],Count + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + STRD X0,[pDst],#8 + LDRD X0,[pSrc],Count + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + STRD X0,[pDst],#8 + LDRD X0,[pSrc],Count + STRD X1,[pDst],#8 + LDRD X1,[pSrc],Count + + STRD X0,[pDst],#8 + MOV Return,#OMX_Sts_NoErr + STRD X1,[pDst],#8 + + + M_END + ENDIF + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s new file mode 100644 index 0000000..02b4b08 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s @@ -0,0 +1,189 @@ +;// +;// +;// File Name: omxVCCOMM_ExpandFrame_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// This function will Expand Frame boundary pixels into Plane +;// +;// + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + +;// Import symbols required from other files +;// (For example tables) + + +;// Set debugging level +DEBUG_ON SETL {FALSE} + + + + + + + +;// Guarding implementation by the processor name + + IF ARM1136JS + +;//Input Registers + +pSrcDstPlane RN 0 +iFrameWidth RN 1 +iFrameHeight RN 2 +iExpandPels RN 3 + + +;//Output Registers + +result RN 0 + +;//Local Scratch Registers + +iPlaneStep RN 4 +pTop RN 5 +pBottom RN 6 +pBottomIndex RN 7 +x RN 8 +y RN 9 +tempTop RN 10 +tempBot RN 11 +ColStep RN 12 +pLeft RN 5 +pRight RN 6 +pRightIndex RN 7 +tempLeft1 RN 10 +tempRight1 RN 11 +tempLeft2 RN 14 +tempRight2 RN 2 +indexY RN 14 +RowStep RN 12 +expandTo4bytes RN 1 ;// copy a byte to 4 bytes of a word + + ;// Allocate stack memory required by the function + + + ;// Write function header + M_START omxVCCOMM_ExpandFrame_I,r11 + + ;// Define stack arguments + M_ARG iPlaneStepOnStack, 4 + + ;// Load argument from the stack + M_LDR iPlaneStep, iPlaneStepOnStack + + MUL pTop,iExpandPels,iPlaneStep + MLA pBottom,iFrameHeight,iPlaneStep,pSrcDstPlane + SUB x,iFrameWidth,#4 + MOV indexY,pTop + ADD ColStep,indexY,#4 + SUB pBottomIndex,pBottom,iPlaneStep + SUB pTop,pSrcDstPlane,pTop + + + ADD pTop,pTop,x + ADD pBottom,pBottom,x + + ;//------------------------------------------------------------------------ + ;// The following improves upon the C implmentation + ;// The x and y loops are interchanged: This ensures that the values of + ;// pSrcDstPlane [x] and pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x] + ;// which depend only on loop variable 'x' are loaded once and used in + ;// multiple stores in the 'Y' loop + ;//------------------------------------------------------------------------ + + ;// xloop +ExpandFrameTopBotXloop + + LDR tempTop,[pSrcDstPlane,x] + ;//------------------------------------------------------------------------ + ;// pSrcDstPlane [(iFrameHeight - 1) * iPlaneStep + x] is simplified as: + ;// pSrcDstPlane + (iFrameHeight * iPlaneStep) - iPlaneStep + x == + ;// pBottom - iPlaneStep + x == pBottomIndex [x] + ;// The value of pBottomIndex is calculated above this 'x' loop + ;//------------------------------------------------------------------------ + LDR tempBot,[pBottomIndex,x] + + ;// yloop + MOV y,iExpandPels + +ExpandFrameTopBotYloop + SUBS y,y,#1 + M_STR tempTop,[pTop],iPlaneStep + M_STR tempBot,[pBottom],iPlaneStep + BGT ExpandFrameTopBotYloop + + SUBS x,x,#4 + SUB pTop,pTop,ColStep + SUB pBottom,pBottom,ColStep + BGE ExpandFrameTopBotXloop + + + ;// y loop + ;// The product is already calculated above : Reuse + ;//MUL indexY,iExpandPels,iPlaneStep + + SUB pSrcDstPlane,pSrcDstPlane,indexY + SUB pLeft,pSrcDstPlane,iExpandPels ;// pLeft->points to the top left of the expanded block + ADD pRight,pSrcDstPlane,iFrameWidth + SUB pRightIndex,pRight,#1 + + ADD y,iFrameHeight,iExpandPels,LSL #1 + LDR expandTo4bytes,=0x01010101 + + RSB RowStep,iExpandPels,iPlaneStep,LSL #1 + + ;// The Y Loop is unrolled twice +ExpandFrameLeftRightYloop + LDRB tempLeft2,[pSrcDstPlane,iPlaneStep] ;// PreLoad the values + LDRB tempRight2,[pRightIndex,iPlaneStep] + M_LDRB tempLeft1,[pSrcDstPlane],iPlaneStep,LSL #1 ;// PreLoad the values + M_LDRB tempRight1,[pRightIndex],iPlaneStep,LSL #1 + + SUB x,iExpandPels,#4 + MUL tempLeft2,tempLeft2,expandTo4bytes ;// Copy the single byte to 4 bytes + MUL tempRight2,tempRight2,expandTo4bytes + MUL tempLeft1,tempLeft1,expandTo4bytes ;// Copy the single byte to 4 bytes + MUL tempRight1,tempRight1,expandTo4bytes + + + ;// x loop +ExpandFrameLeftRightXloop + SUBS x,x,#4 + STR tempLeft2,[pLeft,iPlaneStep] ;// Store the 4 bytes at one go + STR tempRight2,[pRight,iPlaneStep] + STR tempLeft1,[pLeft],#4 ;// Store the 4 bytes at one go + STR tempRight1,[pRight],#4 + BGE ExpandFrameLeftRightXloop + + SUBS y,y,#2 + ADD pLeft,pLeft,RowStep + ADD pRight,pRight,RowStep + BGT ExpandFrameLeftRightYloop + + + ;// Set return value + + MOV result,#OMX_Sts_NoErr +End + + ;// Write function tail + + M_END + + ENDIF ;//ARM1136JS + + + END
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h new file mode 100644 index 0000000..4340f2a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/api/armVCM4P10_CAVLCTables.h @@ -0,0 +1,30 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_CAVLCTables.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Header file for optimized H.264 CALVC tables + * + */ + +#ifndef ARMVCM4P10_CAVLCTABLES_H +#define ARMVCM4P10_CAVLCTABLES_H + +/* CAVLC tables */ + +extern const OMX_U16 *armVCM4P10_CAVLCCoeffTokenTables[18]; +extern const OMX_U16 *armVCM4P10_CAVLCTotalZeroTables[15]; +extern const OMX_U16 *armVCM4P10_CAVLCTotalZeros2x2Tables[3]; +extern const OMX_U16 *armVCM4P10_CAVLCRunBeforeTables[15]; +extern const OMX_U8 armVCM4P10_ZigZag_4x4[16]; +extern const OMX_U8 armVCM4P10_ZigZag_2x2[4]; +extern const OMX_S8 armVCM4P10_SuffixToLevel[7]; + +#endif diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s new file mode 100644 index 0000000..b2cd9d1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s @@ -0,0 +1,222 @@ +;// +;// +;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + +;// Functions: +;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe +;// +;// Implements Average of 4x4 with equation c = (a+b+1)>>1. +;// First operand will be at offset ALIGNMENT from aligned address +;// Second operand will be at aligned location and will be used as output. +;// destination pointed by (pDst) for vertical interpolation. +;// This function needs to copy 4 bytes in horizontal direction +;// +;// Registers used as input for this function +;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size +;// +;// Registers preserved for top level function +;// r4,r5,r6,r8,r9,r14 +;// +;// Registers modified by the function +;// r7,r10,r11,r12 +;// +;// Output registers +;// r2 - pointer to the aligned location +;// r3 - step size to this aligned location + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_Average_4x4_Align0_unsafe + EXPORT armVCM4P10_Average_4x4_Align2_unsafe + EXPORT armVCM4P10_Average_4x4_Align3_unsafe + +DEBUG_ON SETL {FALSE} + +;// Declare input registers +pPred0 RN 0 +iPredStep0 RN 1 +pPred1 RN 2 +iPredStep1 RN 3 +pDstPred RN 2 +iDstStep RN 3 + +;// Declare other intermediate registers +iPredA0 RN 10 +iPredA1 RN 11 +iPredB0 RN 12 +iPredB1 RN 14 +Temp1 RN 4 +Temp2 RN 5 +ResultA RN 5 +ResultB RN 4 +r0x80808080 RN 7 + + IF ARM1136JS + + ;// This function calculates average of 4x4 block + ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4 + + ;// Function header + M_START armVCM4P10_Average_4x4_Align0_unsafe, r6 + + ;// Code start + LDR r0x80808080, =0x80808080 + + ;// 1st load + M_LDR iPredB0, [pPred1] + M_LDR iPredA0, [pPred0], iPredStep0 + M_LDR iPredB1, [pPred1, iPredStep1] + M_LDR iPredA1, [pPred0], iPredStep0 + + ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep + + ;// 2nd load + M_LDR iPredA0, [pPred0], iPredStep0 + M_LDR iPredB0, [pPred1] + M_LDR iPredA1, [pPred0], iPredStep0 + M_LDR iPredB1, [pPred1, iPredStep1] + + MVN iPredB0, iPredB0 + UHSUB8 ResultA, iPredA0, iPredB0 + MVN iPredB1, iPredB1 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep +End0 + M_END + + ;// This function calculates average of 4x4 block + ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4 + + ;// Function header + M_START armVCM4P10_Average_4x4_Align2_unsafe, r6 + + ;// Code start + LDR r0x80808080, =0x80808080 + + ;// 1st load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + M_LDR iPredB0, [pPred1] + M_LDR iPredB1, [pPred1, iPredStep1] + M_LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #16 + ORR iPredA0, iPredA0, Temp1, LSL #16 + MOV iPredA1, iPredA1, LSR #16 + ORR iPredA1, iPredA1, Temp2, LSL #16 + + ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep + + ;// 2nd load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + LDR iPredB0, [pPred1] + LDR iPredB1, [pPred1, iPredStep1] + LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #16 + ORR iPredA0, iPredA0, Temp1, LSL #16 + MOV iPredA1, iPredA1, LSR #16 + ORR iPredA1, iPredA1, Temp2, LSL #16 + + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep +End2 + M_END + + + ;// This function calculates average of 4x4 block + ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4 + + ;// Function header + M_START armVCM4P10_Average_4x4_Align3_unsafe, r6 + + ;// Code start + LDR r0x80808080, =0x80808080 + + ;// 1st load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + LDR iPredB0, [pPred1] + LDR iPredB1, [pPred1, iPredStep1] + LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #24 + ORR iPredA0, iPredA0, Temp1, LSL #8 + MOV iPredA1, iPredA1, LSR #24 + ORR iPredA1, iPredA1, Temp2, LSL #8 + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep + + ;// 2nd load + LDR Temp1, [pPred0, #4] + M_LDR iPredA0, [pPred0], iPredStep0 + LDR iPredB0, [pPred1] + LDR iPredB1, [pPred1, iPredStep1] + LDR Temp2, [pPred0, #4] + M_LDR iPredA1, [pPred0], iPredStep0 + + MVN iPredB0, iPredB0 + MVN iPredB1, iPredB1 + MOV iPredA0, iPredA0, LSR #24 + ORR iPredA0, iPredA0, Temp1, LSL #8 + MOV iPredA1, iPredA1, LSR #24 + ORR iPredA1, iPredA1, Temp2, LSL #8 + + UHSUB8 ResultA, iPredA0, iPredB0 + UHSUB8 ResultB, iPredA1, iPredB1 + EOR ResultA, ResultA, r0x80808080 + M_STR ResultA, [pDstPred], iDstStep + EOR ResultB, ResultB, r0x80808080 + M_STR ResultB, [pDstPred], iDstStep +End3 + M_END + + ENDIF + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c new file mode 100644 index 0000000..17fe518 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_CAVLCTables.c @@ -0,0 +1,327 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: armVCM4P10_CAVLCTables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Optimized CAVLC tables for H.264 + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armVCM4P10_CAVLCTables.h" + +/* 4x4 DeZigZag table */ + +const OMX_U8 armVCM4P10_ZigZag_4x4[16] = +{ + 0, 1, 4, 8, 5, 2, 3, 6, 9, 12, 13, 10, 7, 11, 14, 15 +}; + +/* 2x2 DeZigZag table */ + +const OMX_U8 armVCM4P10_ZigZag_2x2[4] = +{ + 0, 1, 2, 3 +}; + + +/* + * Suffix To Level table + * We increment the suffix length if + * ((LevelCode>>1)+1)>(3<<(SuffixLength-1)) && SuffixLength<6 + * (LevelCode>>1)>=(3<<(SuffixLength-1)) && SuffixLength<6 + * LevelCode >= 3<<SuffixLength && SuffixLength<6 + * (LevelCode+2) >= (3<<SuffixLength)+2 && SuffixLength<6 + */ +const OMX_S8 armVCM4P10_SuffixToLevel[7] = +{ + (3<<1)+2, /* SuffixLength=1 */ + (3<<1)+2, /* SuffixLength=1 */ + (3<<2)+2, /* SuffixLength=2 */ + (3<<3)+2, /* SuffixLength=3 */ + (3<<4)+2, /* SuffixLength=4 */ + (3<<5)+2, /* SuffixLength=5 */ + -1 /* SuffixLength=6 - never increment */ +}; + +static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_0[132] = { + 0x0020, 0x0100, 0x2015, 0x2015, 0x400b, 0x400b, 0x400b, 0x400b, + 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, + 0x0028, 0x00f0, 0x00f8, 0x0027, 0x0030, 0x00d8, 0x00e0, 0x00e8, + 0x0038, 0x00a0, 0x00c8, 0x00d0, 0x0040, 0x0068, 0x0090, 0x0098, + 0x0048, 0x0050, 0x0058, 0x0060, 0x27ff, 0x27ff, 0x206b, 0x206b, + 0x0081, 0x0085, 0x0083, 0x0079, 0x0087, 0x007d, 0x007b, 0x0071, + 0x007f, 0x0075, 0x0073, 0x0069, 0x0070, 0x0078, 0x0080, 0x0088, + 0x2077, 0x2077, 0x206d, 0x206d, 0x2063, 0x2063, 0x2061, 0x2061, + 0x206f, 0x206f, 0x2065, 0x2065, 0x205b, 0x205b, 0x2059, 0x2059, + 0x0067, 0x005d, 0x0053, 0x0051, 0x005f, 0x0055, 0x004b, 0x0049, + 0x00a8, 0x00b0, 0x00b8, 0x00c0, 0x2041, 0x2041, 0x204d, 0x204d, + 0x2043, 0x2043, 0x2039, 0x2039, 0x2057, 0x2057, 0x2045, 0x2045, + 0x203b, 0x203b, 0x2031, 0x2031, 0x204f, 0x204f, 0x203d, 0x203d, + 0x2033, 0x2033, 0x2029, 0x2029, 0x0047, 0x0035, 0x002b, 0x0021, + 0x203f, 0x203f, 0x202d, 0x202d, 0x2023, 0x2023, 0x2019, 0x2019, + 0x0037, 0x0025, 0x001b, 0x0011, 0x202f, 0x202f, 0x201d, 0x201d, + 0x0013, 0x0009, 0x201f, 0x201f +}; + +static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_1[128] = { + 0x0020, 0x00e8, 0x00f0, 0x00f8, 0x0027, 0x001f, 0x2015, 0x2015, + 0x400b, 0x400b, 0x400b, 0x400b, 0x4001, 0x4001, 0x4001, 0x4001, + 0x0028, 0x00d0, 0x00d8, 0x00e0, 0x0030, 0x0098, 0x00c0, 0x00c8, + 0x0038, 0x0060, 0x0088, 0x0090, 0x0040, 0x0048, 0x0050, 0x0058, + 0x27ff, 0x27ff, 0x207f, 0x207f, 0x0087, 0x0085, 0x0083, 0x0081, + 0x007b, 0x0079, 0x007d, 0x0073, 0x2075, 0x2075, 0x2071, 0x2071, + 0x0068, 0x0070, 0x0078, 0x0080, 0x2077, 0x2077, 0x206d, 0x206d, + 0x206b, 0x206b, 0x2069, 0x2069, 0x206f, 0x206f, 0x2065, 0x2065, + 0x2063, 0x2063, 0x2061, 0x2061, 0x0059, 0x005d, 0x005b, 0x0051, + 0x0067, 0x0055, 0x0053, 0x0049, 0x00a0, 0x00a8, 0x00b0, 0x00b8, + 0x205f, 0x205f, 0x204d, 0x204d, 0x204b, 0x204b, 0x2041, 0x2041, + 0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2039, 0x2039, + 0x204f, 0x204f, 0x203d, 0x203d, 0x203b, 0x203b, 0x2031, 0x2031, + 0x0029, 0x0035, 0x0033, 0x0021, 0x2047, 0x2047, 0x202d, 0x202d, + 0x202b, 0x202b, 0x2019, 0x2019, 0x003f, 0x0025, 0x0023, 0x0011, + 0x0037, 0x001d, 0x001b, 0x0009, 0x202f, 0x202f, 0x2013, 0x2013 +}; + +static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_2[112] = { + 0x0020, 0x0088, 0x00b0, 0x00b8, 0x00c0, 0x00c8, 0x00d0, 0x00d8, + 0x003f, 0x0037, 0x002f, 0x0027, 0x001f, 0x0015, 0x000b, 0x0001, + 0x0028, 0x0050, 0x0078, 0x0080, 0x0030, 0x0038, 0x0040, 0x0048, + 0x07ff, 0x0081, 0x0087, 0x0085, 0x0083, 0x0079, 0x007f, 0x007d, + 0x007b, 0x0071, 0x0077, 0x0075, 0x0073, 0x0069, 0x206b, 0x206b, + 0x0058, 0x0060, 0x0068, 0x0070, 0x2061, 0x2061, 0x206d, 0x206d, + 0x2063, 0x2063, 0x2059, 0x2059, 0x206f, 0x206f, 0x2065, 0x2065, + 0x205b, 0x205b, 0x2051, 0x2051, 0x0067, 0x005d, 0x0053, 0x0049, + 0x005f, 0x0055, 0x004b, 0x0041, 0x0090, 0x0098, 0x00a0, 0x00a8, + 0x2039, 0x2039, 0x2031, 0x2031, 0x204d, 0x204d, 0x2029, 0x2029, + 0x2057, 0x2057, 0x2045, 0x2045, 0x2043, 0x2043, 0x2021, 0x2021, + 0x0019, 0x003d, 0x003b, 0x0011, 0x004f, 0x0035, 0x0033, 0x0009, + 0x202b, 0x202b, 0x202d, 0x202d, 0x2023, 0x2023, 0x2025, 0x2025, + 0x201b, 0x201b, 0x2047, 0x2047, 0x201d, 0x201d, 0x2013, 0x2013 +}; + +static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_3[80] = { + 0x0020, 0x0028, 0x0030, 0x0038, 0x0040, 0x0048, 0x0050, 0x0058, + 0x0060, 0x0068, 0x0070, 0x0078, 0x0080, 0x0088, 0x0090, 0x0098, + 0x0009, 0x000b, 0x07ff, 0x0001, 0x0011, 0x0013, 0x0015, 0x07ff, + 0x0019, 0x001b, 0x001d, 0x001f, 0x0021, 0x0023, 0x0025, 0x0027, + 0x0029, 0x002b, 0x002d, 0x002f, 0x0031, 0x0033, 0x0035, 0x0037, + 0x0039, 0x003b, 0x003d, 0x003f, 0x0041, 0x0043, 0x0045, 0x0047, + 0x0049, 0x004b, 0x004d, 0x004f, 0x0051, 0x0053, 0x0055, 0x0057, + 0x0059, 0x005b, 0x005d, 0x005f, 0x0061, 0x0063, 0x0065, 0x0067, + 0x0069, 0x006b, 0x006d, 0x006f, 0x0071, 0x0073, 0x0075, 0x0077, + 0x0079, 0x007b, 0x007d, 0x007f, 0x0081, 0x0083, 0x0085, 0x0087 +}; + +static const OMX_U16 armVCM4P10_CAVLCCoeffTokenTables_4[32] = { + 0x0020, 0x0038, 0x2015, 0x2015, 0x4001, 0x4001, 0x4001, 0x4001, + 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, 0x600b, + 0x0028, 0x0030, 0x0021, 0x0019, 0x2027, 0x2027, 0x0025, 0x0023, + 0x201d, 0x201d, 0x201b, 0x201b, 0x0011, 0x001f, 0x0013, 0x0009 +}; + +const OMX_U16 * armVCM4P10_CAVLCCoeffTokenTables[18] = { + armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=0 */ + armVCM4P10_CAVLCCoeffTokenTables_0, /* nC=1 */ + armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=2 */ + armVCM4P10_CAVLCCoeffTokenTables_1, /* nC=3 */ + armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=4 */ + armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=5 */ + armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=6 */ + armVCM4P10_CAVLCCoeffTokenTables_2, /* nC=7 */ + armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=8 */ + armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=9 */ + armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=10 */ + armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=11 */ + armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=12 */ + armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=13 */ + armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=14 */ + armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=15 */ + armVCM4P10_CAVLCCoeffTokenTables_3, /* nC=16 */ + armVCM4P10_CAVLCCoeffTokenTables_4 /* nC=-1 */ +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_0[40] = { + 0x0020, 0x0048, 0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003, + 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, + 0x0028, 0x0040, 0x0011, 0x000f, 0x0030, 0x0038, 0x0019, 0x0017, + 0x27ff, 0x27ff, 0x201f, 0x201f, 0x201d, 0x201d, 0x201b, 0x201b, + 0x2015, 0x2015, 0x2013, 0x2013, 0x200d, 0x200d, 0x200b, 0x200b +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_1[24] = { + 0x0020, 0x0028, 0x0011, 0x000f, 0x000d, 0x000b, 0x2009, 0x2009, + 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001, + 0x001d, 0x001b, 0x0019, 0x0017, 0x2015, 0x2015, 0x2013, 0x2013 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_2[24] = { + 0x0020, 0x0028, 0x0011, 0x000b, 0x0009, 0x0001, 0x200f, 0x200f, + 0x200d, 0x200d, 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, + 0x001b, 0x0017, 0x2019, 0x2019, 0x2015, 0x2015, 0x2013, 0x2013 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_3[24] = { + 0x0020, 0x0028, 0x0013, 0x000f, 0x0007, 0x0005, 0x2011, 0x2011, + 0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2003, 0x2003, + 0x2019, 0x2019, 0x2017, 0x2017, 0x2015, 0x2015, 0x2001, 0x2001 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_4[20] = { + 0x0020, 0x0015, 0x0011, 0x0005, 0x0003, 0x0001, 0x200f, 0x200f, + 0x200d, 0x200d, 0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007, + 0x2017, 0x2017, 0x2013, 0x2013 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_5[20] = { + 0x0020, 0x0011, 0x2013, 0x2013, 0x200f, 0x200f, 0x200d, 0x200d, + 0x200b, 0x200b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2005, 0x2005, + 0x0015, 0x0001, 0x2003, 0x2003 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_6[20] = { + 0x0020, 0x000f, 0x2011, 0x2011, 0x200d, 0x200d, 0x2009, 0x2009, + 0x2007, 0x2007, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b, + 0x0013, 0x0001, 0x2003, 0x2003 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_7[20] = { + 0x0020, 0x0003, 0x200f, 0x200f, 0x200d, 0x200d, 0x2007, 0x2007, + 0x400b, 0x400b, 0x400b, 0x400b, 0x4009, 0x4009, 0x4009, 0x4009, + 0x0011, 0x0001, 0x2005, 0x2005 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_8[20] = { + 0x0020, 0x0005, 0x200b, 0x200b, 0x400d, 0x400d, 0x400d, 0x400d, + 0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007, + 0x0003, 0x0001, 0x200f, 0x200f +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_9[20] = { + 0x0020, 0x000d, 0x2005, 0x2005, 0x400b, 0x400b, 0x400b, 0x400b, + 0x4009, 0x4009, 0x4009, 0x4009, 0x4007, 0x4007, 0x4007, 0x4007, + 0x2003, 0x2003, 0x2001, 0x2001 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_10[16] = { + 0x0001, 0x0003, 0x2005, 0x2005, 0x2007, 0x2007, 0x200b, 0x200b, + 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009, 0x6009 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_11[16] = { + 0x0001, 0x0003, 0x2009, 0x2009, 0x4005, 0x4005, 0x4005, 0x4005, + 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007, 0x6007 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_12[16] = { + 0x2001, 0x2001, 0x2003, 0x2003, 0x4007, 0x4007, 0x4007, 0x4007, + 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_13[16] = { + 0x4001, 0x4001, 0x4001, 0x4001, 0x4003, 0x4003, 0x4003, 0x4003, + 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005, 0x6005 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeroTables_14[16] = { + 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, + 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003 +}; + +const OMX_U16 * armVCM4P10_CAVLCTotalZeroTables[15] = { + armVCM4P10_CAVLCTotalZeroTables_0, + armVCM4P10_CAVLCTotalZeroTables_1, + armVCM4P10_CAVLCTotalZeroTables_2, + armVCM4P10_CAVLCTotalZeroTables_3, + armVCM4P10_CAVLCTotalZeroTables_4, + armVCM4P10_CAVLCTotalZeroTables_5, + armVCM4P10_CAVLCTotalZeroTables_6, + armVCM4P10_CAVLCTotalZeroTables_7, + armVCM4P10_CAVLCTotalZeroTables_8, + armVCM4P10_CAVLCTotalZeroTables_9, + armVCM4P10_CAVLCTotalZeroTables_10, + armVCM4P10_CAVLCTotalZeroTables_11, + armVCM4P10_CAVLCTotalZeroTables_12, + armVCM4P10_CAVLCTotalZeroTables_13, + armVCM4P10_CAVLCTotalZeroTables_14 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_0[16] = { + 0x2007, 0x2007, 0x2005, 0x2005, 0x4003, 0x4003, 0x4003, 0x4003, + 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_1[16] = { + 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003, + 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001 +}; + +static const OMX_U16 armVCM4P10_CAVLCTotalZeros2x2Tables_2[16] = { + 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, 0x6003, + 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001, 0x6001 +}; + +const OMX_U16 * armVCM4P10_CAVLCTotalZeros2x2Tables[3] = { + armVCM4P10_CAVLCTotalZeros2x2Tables_0, + armVCM4P10_CAVLCTotalZeros2x2Tables_1, + armVCM4P10_CAVLCTotalZeros2x2Tables_2 +}; + +static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_0[8] = { + 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001 +}; + +static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_1[8] = { + 0x2005, 0x2005, 0x2003, 0x2003, 0x4001, 0x4001, 0x4001, 0x4001 +}; + +static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_2[8] = { + 0x2007, 0x2007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001 +}; + +static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_3[8] = { + 0x0009, 0x0007, 0x2005, 0x2005, 0x2003, 0x2003, 0x2001, 0x2001 +}; + +static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_4[8] = { + 0x000b, 0x0009, 0x0007, 0x0005, 0x2003, 0x2003, 0x2001, 0x2001 +}; + +static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_5[8] = { + 0x0003, 0x0005, 0x0009, 0x0007, 0x000d, 0x000b, 0x2001, 0x2001 +}; + +static const OMX_U16 armVCM4P10_CAVLCRunBeforeTables_6[24] = { + 0x0010, 0x000d, 0x000b, 0x0009, 0x0007, 0x0005, 0x0003, 0x0001, + 0x0018, 0x0011, 0x200f, 0x200f, 0x0020, 0x0015, 0x2013, 0x2013, + 0x0028, 0x0019, 0x2017, 0x2017, 0x07ff, 0x001d, 0x201b, 0x201b +}; + +/* Tables 7 to 14 are duplicates of table 6 */ + +const OMX_U16 * armVCM4P10_CAVLCRunBeforeTables[15] = { + armVCM4P10_CAVLCRunBeforeTables_0, /* ZerosLeft=1 */ + armVCM4P10_CAVLCRunBeforeTables_1, + armVCM4P10_CAVLCRunBeforeTables_2, + armVCM4P10_CAVLCRunBeforeTables_3, + armVCM4P10_CAVLCRunBeforeTables_4, + armVCM4P10_CAVLCRunBeforeTables_5, /* ZerosLeft=6 */ + armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=7 */ + armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=8 */ + armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=9 */ + armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=10 */ + armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=11 */ + armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=12 */ + armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=13 */ + armVCM4P10_CAVLCRunBeforeTables_6, /* ZerosLeft=14 */ + armVCM4P10_CAVLCRunBeforeTables_6 /* ZerosLeft=15 */ +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s new file mode 100644 index 0000000..dcbcd00 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s @@ -0,0 +1,20 @@ +;// +;// +;// File Name: armVCM4P10_DeblockingChroma_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + + END
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s new file mode 100644 index 0000000..14b37fe --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DeblockingLuma_unsafe_s.s @@ -0,0 +1,366 @@ +;// +;// +;// File Name: armVCM4P10_DeblockingLuma_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + + IF ARM1136JS + +MASK_1 EQU 0x01010101 + +;// Declare input registers + +pQ0 RN 0 +StepArg RN 1 +tC0Arg RN 2 +alpha RN 6 + +beta RN 14 +bS RN 14 +tC0 RN 14 +ptC0 RN 1 + +;// Declare Local/Temporary variables + +;// Pixels +p_0 RN 3 +p_1 RN 5 +p_2 RN 4 +p_3 RN 2 +q_0 RN 8 +q_1 RN 9 +q_2 RN 10 +q_3 RN 12 + + +;// Filtering + +ap0q0 RN 1 +filt RN 2 + +m00 RN 7 +m01 RN 11 + +apflg RN 0 +aqflg RN 6 + +tC RN 1 + + +;//Declarations for bSLT4 kernel + +pos RN 7 +neg RN 12 + +P0a RN 1 +P1a RN 8 +Q0a RN 7 +Q1a RN 4 + +u1 RN 3 +max RN 12 +min RN 2 + + + +;//Declarations for bSGE4 kernel + +q_3b RN 9 +p_3b RN 0 +apqflg RN 12 + +P0b RN 6 +P1b RN 7 +P2b RN 1 + +Q0b RN 9 +Q1b RN 0 +Q2b RN 2 + +;// Miscellanous + +a RN 0 +t0 RN 3 +t1 RN 12 +t2 RN 7 +t3 RN 11 +t4 RN 4 +t5 RN 1 +t8 RN 6 +t9 RN 14 +t10 RN 5 +t11 RN 9 + +;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe() +;// +;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2) +;// - 2 - filt, 0 - apflg, 6 - aqflg +;// - 11 - m01, 7 - tC0 +;// +;// Outputs - 1,8,7,11 - Output Pixels(P0a,P1a,Q0a,Q1a) +;// +;// Registers Corrupted - 0-3,5-12,14 + + + M_START armVCM4P10_DeblockingLumabSLT4_unsafe, lr + + ;// Since beta <= 18 and alpha <= 255 we know + ;// -254 <= p0-q0 <= 254 + ;// -17 <= q1-q0 <= 17 + ;// -17 <= p1-p0 <= 17 + + ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3)) + ;// + ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 + ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 + ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 + + USUB8 t1, p_1, p_0 + MUL tC0, t2, m01 + + USUB8 t2, q_1, q_0 + SSUB8 t1, t1, t2 + + USUB8 t2, p_0, q_0 + AND t2, t2, m01 + SHSUB8 t1, t1, t2 + UHSUB8 t5, p_0, q_0 + SSUB8 t1, t1, t2 + SHSUB8 t1, t1, t5 + MOV m00, #0 + SADD8 t1, t1, m01 + SHSUB8 t1, t1, t5 + + ;// tC = tC0 + ;// if (ap < beta) tC++; + ;// if (aq < beta) tC++; + USUB8 t5, filt, m01 + SEL tC0, tC0, m00 + UQADD8 tC, tC0, apflg + SSUB8 t1, t1, m00 + UQADD8 tC, tC, aqflg + + ;// Split into positive and negative part and clip + SEL pos, t1, m00 + USUB8 neg, pos, t1 + USUB8 t3, pos, tC + SEL pos, tC, pos + USUB8 t3, neg, tC + SEL neg, tC, neg + + ;//Reload m01 + LDR m01,=MASK_1 + + UQADD8 P0a, p_0, pos + UQSUB8 Q0a, q_0, pos + UQSUB8 P0a, P0a, neg + UQADD8 Q0a, Q0a, neg + + ;// Choose to store the filtered + ;// value or the original pixel + USUB8 t1, filt, m01 + SEL P0a, P0a, p_0 + SEL Q0a, Q0a, q_0 + + ;// delta = (p2 + ((p0+q0+1)>>1) - (p1<<1))>>1; + ;// u1 = (p0 + q0 + 1)>>1 + ;// u1 = ( (q_0 - p_0')>>1 ) ^ 0x80 + MVN p_0, p_0 + UHSUB8 u1, q_0, p_0 + UQADD8 max, p_1, tC0 + EOR u1, u1, m01 ,LSL #7 + + ;// Calculate A = (p2+u1)>>1 + ;// Then delta = Clip3( -tC0, tC0, A - p1) + + ;// Clip P1 + UHADD8 P1a, p_2, u1 + UQSUB8 min, p_1, tC0 + USUB8 t4, P1a, max + SEL P1a, max, P1a + USUB8 t4, P1a, min + SEL P1a, P1a, min + + ;// Clip Q1 + UHADD8 Q1a, q_2, u1 + UQADD8 max, q_1, tC0 + UQSUB8 min, q_1, tC0 + USUB8 t0, Q1a, max + SEL Q1a, max, Q1a + USUB8 t0, Q1a, min + SEL Q1a, Q1a, min + + ;// Choose to store the filtered + ;// value or the original pixel + USUB8 t0, apflg, m01 + SEL P1a, P1a, p_1 + USUB8 t0, aqflg, m01 + SEL t3, Q1a, q_1 + + M_END + +;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe() +;// +;// Inputs - 3,4,5,8,9,10 - Input Pixels (p0-p2,q0-q2) +;// - 2 - filt, 0 - apflg,aqflg +;// - 1 - ap0q0, 6 - alpha +;// - 7 - m00, 11 - m01 +;// +;// Outputs - 6,7,1,9,0,2 - Output Pixels(P0b,P1b,P2b, Q0b,Q1b,Q2b) +;// +;// Registers Corrupted - 0-3,5-12,14 + + M_START armVCM4P10_DeblockingLumabSGE4_unsafe, lr + + ;// apflg = apflg && |p0-q0|<((alpha>>2)+2) + ;// apflg = aqflg && |p0-q0|<((alpha>>2)+2) + + M_ARG pDummy,4 + M_ARG pQ_3,4 + M_ARG pP_3,4 + + UHADD8 alpha, alpha, m00 + USUB8 t9, p_2, p_0 ;//t9 = dp2p0 + UHADD8 alpha, alpha, m00 + ADD alpha, alpha, m01, LSL #1 + USUB8 ap0q0, ap0q0, alpha + SEL apqflg, m00, apflg + + ;// P0 = (p2 + 2*p1 + 2*p0 + 2*q0 + q1 + 4)>>3 + ;// = ((p2-p0) + 2*(p1-p0) + (q1-q0) + 3*(q0-p0) + 8*p0 + 4)>>3 + ;// = p0 + (((p2-p0) + 2*(p1-p0) + (q1-q0) - 3*(p0-q0) + 4)>>3) + + ;// P1 = (p2 + p1 + q0 + p0 + 2)>>2 + ;// = p0 + (((p2-p0) + (p1-p0) - (p0-q0) + 2)>>2) + + ;// P2 = (2*p3 + 3*p2 + p1 + p0 + q0 + 4)>>3 + ;// = (2*(p3-p0) + 3*(p2-p0) + (p1-p0) - (p0-q0) + 8*p0 + 4)>>3 + ;// = p0 + (((p3-p0) + (p2-p0) + t2 + 2)>>2) + + ;// Compute P0b + USUB8 t2, p_0, q_0 + SSUB8 t5, t9, t2 + + USUB8 t8, q_1, q_0 + SHADD8 t8, t5, t8 + + USUB8 t9, p_1, p_0 + SADD8 t8, t8, t9 + SHSUB8 t8, t8, t2 + SHADD8 t5, t5, t9 + SHADD8 t8, t8, m01 + SHADD8 t9, t5, m01 + SADD8 P0b, p_0, t8 + ;// P0b ready + + ;// Compute P1b + M_LDR p_3b, pP_3 + SADD8 P1b, p_0, t9 + ;// P1b ready + + ;// Compute P2b + USUB8 t9, p_2, p_0 + SADD8 t5, t5, t9 + UHSUB8 t9, p_3b, p_0 + EOR a, p_3b, p_0 + AND a, a, m01 + SHADD8 t5, t5, a + UHADD8 a, p_0, q_1 + SADD8 t5, t5, m01 + SHADD8 t5, t5, t9 + MVN t9, p_1 + SADD8 P2b, p_0, t5 + ;// P2b ready + + UHSUB8 a, a, t9 + ORR t9, apqflg, m01 + USUB8 t9, apqflg, t9 + + EOR a, a, m01, LSL #7 + SEL P0b, P0b, a + SEL P1b, P1b, p_1 + SEL P2b, P2b, p_2 + + USUB8 t4, filt, m01 + SEL P0b, P0b, p_0 + + + ;// Q0 = (q2 + 2*q1 + 2*q0 + 2*p0 + p1 + 4)>>3 + ;// = ((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 8*q0 + 4)>>3 + ;// = q0 + (((q2-q0) + 2*(q1-q0) + (p1-p0) + 3*(p0-q0) + 4)>>3) + + ;// Q1 = (q2 + q1 + p0 + q0 + 2)>>2 + ;// = q0 + (((q2-q0) + (q1-q0) + (p0-q0) + 2)>>2) + + ;// Q2 = (2*q3 + 3*q2 + q1 + q0 + p0 + 4)>>3 + ;// = (2*(q3-q0) + 3*(q2-q0) + (q1-q0) + (p0-q0) + 8*q0 + 4)>>3 + ;// = q0 + (((q3-q0) + (q2-q0) + t2 + 2)>>2) + + + ;// Compute Q0b Q1b + USUB8 t4, q_2, q_0 + USUB8 a, p_0, q_0 + USUB8 t9, p_1, p_0 + SADD8 t0, t4, a + SHADD8 t9, t0, t9 + UHADD8 t10, q_0, p_1 + SADD8 t9, t9, a + USUB8 a, q_1, q_0 + SHADD8 t9, t9, a + SHADD8 t0, t0, a + SHADD8 t9, t9, m01 + SHADD8 a, t0, m01 + SADD8 t9, q_0, t9 + ;// Q0b ready - t9 + + MOV t4, #0 + UHADD8 apqflg, apqflg, t4 + + SADD8 Q1b, q_0, a + ;// Q1b ready + + USUB8 t4, apqflg, m01 + SEL Q1b, Q1b, q_1 + MVN t11, q_1 + UHSUB8 t10, t10, t11 + M_LDR q_3b, pQ_3 + EOR t10, t10, m01, LSL #7 + SEL t9, t9, t10 + + ;// Compute Q2b + USUB8 t4, q_2, q_0 + SADD8 t4, t0, t4 + EOR t0, q_3b, q_0 + AND t0, t0, m01 + SHADD8 t4, t4, t0 + UHSUB8 t10, q_3b, q_0 + SADD8 t4, t4, m01 + SHADD8 t4, t4, t10 + + USUB8 t10, filt, m01 + SEL Q0b, t9, q_0 + + SADD8 t4, q_0, t4 + ;// Q2b ready - t4 + + USUB8 t10, apqflg, m01 + SEL Q2b, t4, q_2 + + M_END + + ENDIF + + END
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s new file mode 100644 index 0000000..ac448a0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DecodeCoeffsToPair_s.s @@ -0,0 +1,325 @@ +;// +;// +;// File Name: armVCM4P10_DecodeCoeffsToPair_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + IMPORT armVCM4P10_CAVLCCoeffTokenTables + IMPORT armVCM4P10_CAVLCTotalZeroTables + IMPORT armVCM4P10_CAVLCTotalZeros2x2Tables + IMPORT armVCM4P10_CAVLCRunBeforeTables + IMPORT armVCM4P10_SuffixToLevel + IMPORT armVCM4P10_ZigZag_4x4 + IMPORT armVCM4P10_ZigZag_2x2 + + M_VARIANTS ARM1136JS + +;//DEBUG_ON SETL {TRUE} + +LAST_COEFF EQU 0x20 ;// End of block flag +TWO_BYTE_COEFF EQU 0x10 + +;// Declare input registers + +ppBitStream RN 0 +pOffset RN 1 +pNumCoeff RN 2 +ppPosCoefbuf RN 3 +nC RN 4 ;// number of coeffs or 17 for chroma +sMaxNumCoeff RN 5 + +;// Declare inner loop registers + +;// Level loop +Count RN 0 +TrailingOnes RN 1 +pLevel RN 2 +LevelSuffix RN 3 +SuffixLength RN 4 +TotalCoeff RN 5 + +pVLDTable RN 6 +Symbol RN 7 +T1 RN 8 +T2 RN 9 +RBitStream RN 10 +RBitBuffer RN 11 +RBitCount RN 12 +lr RN 14 + +;// Run loop +Count RN 0 +ZerosLeft RN 1 +pLevel RN 2 +ppRunTable RN 3 +pRun RN 4 +TotalCoeff RN 5 + +pVLDTable RN 6 +Symbol RN 7 +T1 RN 8 +T2 RN 9 +RBitStream RN 10 +RBitBuffer RN 11 +RBitCount RN 12 +lr RN 14 + +;// Fill in coefficients loop +pPosCoefbuf RN 0 +temp RN 1 +pLevel RN 2 +ppPosCoefbuf RN 3 +pRun RN 4 +TotalCoeff RN 5 +pZigZag RN 6 + +T1 RN 8 +T2 RN 9 +RBitStream RN 10 +RBitBuffer RN 11 +RBitCount RN 12 +CoeffNum RN 14 + + + + IF ARM1136JS + + ;// Allocate stack memory required by the function + M_ALLOC4 pppBitStream, 4 + M_ALLOC4 ppOffset, 4 + M_ALLOC4 pppPosCoefbuf, 4 + M_ALLOC4 ppLevel, 16*2 + M_ALLOC4 ppRun, 16 + + ;// Write function header + M_START armVCM4P10_DecodeCoeffsToPair, r11 + + ;// Define stack arguments + M_ARG pNC, 4 + M_ARG pSMaxNumCoeff,4 + + ;// Code start + M_BD_INIT0 ppBitStream, pOffset, RBitStream, RBitBuffer, RBitCount + LDR pVLDTable, =armVCM4P10_CAVLCCoeffTokenTables + M_LDR nC, pNC + + M_BD_INIT1 T1, T2, lr + LDR pVLDTable, [pVLDTable, nC, LSL #2] ;// Find VLD table + + M_BD_INIT2 T1, T2, lr + + ;// Decode Symbol = TotalCoeff*4 + TrailingOnes + M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2 + + MOVS TotalCoeff, Symbol, LSR #2 + STRB TotalCoeff, [pNumCoeff] + M_PRINTF "TotalCoeff=%d\n", TotalCoeff + BEQ.W EndNoError ;// Finished if no coefficients + + CMP Symbol, #17*4 + BGE.W EndBadSymbol ;// Error if bad symbol + + ;// Save bitstream pointers + M_STR ppBitStream, pppBitStream + M_STR pOffset, ppOffset + M_STR ppPosCoefbuf, pppPosCoefbuf + + ;// Decode Trailing Ones + ANDS TrailingOnes, Symbol, #3 + M_ADR pLevel, ppLevel + M_PRINTF "TrailingOnes=%d\n", TrailingOnes + BEQ TrailingOnesDone + MOV Count, TrailingOnes +TrailingOnesLoop + M_BD_READ8 Symbol, 1, T1 + SUBS Count, Count, #1 + MOV T1, #1 + SUB T1, T1, Symbol, LSL #1 + M_PRINTF "Level=%d\n", T1 + STRH T1, [pLevel], #2 + BGT TrailingOnesLoop +TrailingOnesDone + + ;// Decode level values + SUBS Count, TotalCoeff, TrailingOnes ;// Number of levels to read + BEQ DecodeRuns ;// None left + + MOV SuffixLength, #1 + CMP TotalCoeff, #10 + MOVLE SuffixLength, #0 + CMP TrailingOnes, #3 ;// if (TrailingOnes<3) + MOVLT TrailingOnes, #4 ;// then TrailingOnes = +4 + MOVGE TrailingOnes, #2 ;// else TrailingOnes = +2 + MOVGE SuffixLength, #0 ;// SuffixLength = 0 + +LevelLoop + M_BD_CLZ16 Symbol, T1, T2 ;// Symbol=LevelPrefix + CMP Symbol,#16 + BGE EndBadSymbol + + MOVS lr, SuffixLength ;// if LevelSuffixSize==0 + TEQEQ Symbol, #14 ;// and LevelPrefix==14 + MOVEQ lr, #4 ;// then LevelSuffixSize=4 + TEQ Symbol, #15 ;// if LevelSuffixSize==15 + MOVEQ lr, #12 ;// then LevelSuffixSize=12 + + TEQEQ SuffixLength,#0 + ADDEQ Symbol,Symbol,#15 + + TEQ lr, #0 ;// if LevelSuffixSize==0 + BEQ LevelCodeRead ;// LevelCode = LevelPrefix + + M_BD_VREAD16 LevelSuffix, lr, T1, T2 ;// Read Level Suffix + + MOV Symbol, Symbol, LSL SuffixLength + ADD Symbol, LevelSuffix, Symbol + +LevelCodeRead + ;// Symbol = LevelCode + ADD Symbol, Symbol, TrailingOnes ;// +4 if level cannot be +/-1, +2 o/w + MOV TrailingOnes, #2 + MOVS T1, Symbol, LSR #1 + RSBCS T1, T1, #0 ;// If Symbol odd then negate + M_PRINTF "Level=%d\n", T1 + STRH T1, [pLevel], #2 ;// Store level. + + LDR T2, =armVCM4P10_SuffixToLevel + LDRSB T1, [T2, SuffixLength] ;// Find increment level + TEQ SuffixLength, #0 + MOVEQ SuffixLength, #1 + CMP Symbol, T1 + ADDCS SuffixLength, SuffixLength, #1 + SUBS Count, Count, #1 + BGT LevelLoop + +DecodeRuns + ;// Find number of zeros + M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff + SUB Count, TotalCoeff, #1 ;// Number of runs excluding last + SUBS ZerosLeft, T1, TotalCoeff ;// Maximum number of zeros there could be + M_ADR pRun, ppRun + MOV CoeffNum,TotalCoeff + SUB CoeffNum,CoeffNum,#1 + BEQ NoZerosLeft + + ;// Unpack number of zeros from bitstream + TEQ T1, #4 + LDREQ pVLDTable, =(armVCM4P10_CAVLCTotalZeros2x2Tables-4) + LDRNE pVLDTable, =(armVCM4P10_CAVLCTotalZeroTables-4) + LDR pVLDTable, [pVLDTable, TotalCoeff, LSL #2] + + M_BD_VLD Symbol, T1, T2, pVLDTable, 4, 2 ;// Symbol = ZerosLeft + CMP Symbol,#16 + BGE EndBadSymbol + + LDR ppRunTable, =(armVCM4P10_CAVLCRunBeforeTables-4) + M_ADR pRun, ppRun + MOVS ZerosLeft, Symbol + + ADD CoeffNum,CoeffNum,ZerosLeft + + BEQ NoZerosLeft + + ;// Decode runs while zeros are left and more than one coefficient +RunLoop + SUBS Count, Count, #1 + LDR pVLDTable, [ppRunTable, ZerosLeft, LSL#2] + BLT LastRun + M_BD_VLD Symbol, T1, T2, pVLDTable, 3, 2 ;// Symbol = Run + CMP Symbol,#15 + BGE EndBadSymbol + + SUBS ZerosLeft, ZerosLeft, Symbol + M_PRINTF "Run=%d\n", Symbol + STRB Symbol, [pRun], #1 + BGT RunLoop + + ;// Decode runs while no zeros are left +NoZerosLeft + SUBS Count, Count, #1 + M_PRINTF "Run=%d\n", ZerosLeft + STRGEB ZerosLeft, [pRun], #1 + BGT NoZerosLeft + +LastRun + ;// Final run length is remaining zeros + M_PRINTF "LastRun=%d\n", ZerosLeft + STRB ZerosLeft, [pRun], #1 + + ;// Write coefficients to output array + M_LDR T1, pSMaxNumCoeff ;// sMaxNumCoeff + TEQ T1, #15 + ADDEQ CoeffNum,CoeffNum,#1 + + + SUB pRun,pRun,TotalCoeff + SUB pLevel,pLevel,TotalCoeff + SUB pLevel,pLevel,TotalCoeff + + M_LDR ppPosCoefbuf, pppPosCoefbuf + LDR pPosCoefbuf, [ppPosCoefbuf] + TEQ T1, #4 + LDREQ pZigZag, =armVCM4P10_ZigZag_2x2 + LDRNE pZigZag, =armVCM4P10_ZigZag_4x4 + + + +OutputLoop + + LDRB T2, [pRun],#1 + LDRB T1, [pZigZag, CoeffNum] + SUB CoeffNum, CoeffNum, #1 ;// Skip Non zero + SUB CoeffNum, CoeffNum, T2 ;// Skip Zero run + + LDRSH T2, [pLevel],#2 + + SUBS TotalCoeff, TotalCoeff, #1 + ORREQ T1, T1, #LAST_COEFF + + ADD temp, T2, #128 + CMP temp, #256 + ORRCS T1, T1, #TWO_BYTE_COEFF + + + TEQ TotalCoeff, #0 ;// Preserves carry + + M_PRINTF "Output=%02x %04x\n", T1, T2 + STRB T1, [pPosCoefbuf], #1 + STRB T2, [pPosCoefbuf], #1 + MOV T2, T2, LSR #8 + STRCSB T2, [pPosCoefbuf], #1 + BNE OutputLoop + + ;// Finished + STR pPosCoefbuf, [ppPosCoefbuf] + M_LDR ppBitStream, pppBitStream + M_LDR pOffset, ppOffset + B EndNoError + +EndBadSymbol + MOV r0, #OMX_Sts_Err + B End + +EndNoError + ;// Finished reading from the bitstream + M_BD_FINI ppBitStream, pOffset + + ;// Set return value + MOV r0, #OMX_Sts_NoErr +End + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s new file mode 100644 index 0000000..b16f188 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_DequantTables_s.s @@ -0,0 +1,123 @@ +;// +;// +;// File Name: armVCM4P10_DequantTables_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_QPDivTable + EXPORT armVCM4P10_VMatrixQPModTable + EXPORT armVCM4P10_PosToVCol4x4 + EXPORT armVCM4P10_PosToVCol2x2 + EXPORT armVCM4P10_VMatrix + EXPORT armVCM4P10_QPModuloTable + EXPORT armVCM4P10_VMatrixU16 + +;// Define the processor variants supported by this file + + M_VARIANTS ARM1136JS + + +;// Guarding implementation by the processor name + + + IF ARM1136JS :LOR: CortexA8 + + + M_TABLE armVCM4P10_PosToVCol4x4 + DCB 0, 2, 0, 2 + DCB 2, 1, 2, 1 + DCB 0, 2, 0, 2 + DCB 2, 1, 2, 1 + + + M_TABLE armVCM4P10_PosToVCol2x2 + DCB 0, 2 + DCB 2, 1 + + + M_TABLE armVCM4P10_VMatrix + DCB 10, 16, 13 + DCB 11, 18, 14 + DCB 13, 20, 16 + DCB 14, 23, 18 + DCB 16, 25, 20 + DCB 18, 29, 23 + +;//------------------------------------------------------- +;// This table evaluates the expression [(INT)(QP/6)], +;// for values of QP from 0 to 51 (inclusive). +;//------------------------------------------------------- + + M_TABLE armVCM4P10_QPDivTable + DCB 0, 0, 0, 0, 0, 0 + DCB 1, 1, 1, 1, 1, 1 + DCB 2, 2, 2, 2, 2, 2 + DCB 3, 3, 3, 3, 3, 3 + DCB 4, 4, 4, 4, 4, 4 + DCB 5, 5, 5, 5, 5, 5 + DCB 6, 6, 6, 6, 6, 6 + DCB 7, 7, 7, 7, 7, 7 + DCB 8, 8, 8, 8, 8, 8 + +;//---------------------------------------------------- +;// This table contains armVCM4P10_VMatrix[QP%6][0] entires, +;// for values of QP from 0 to 51 (inclusive). +;//---------------------------------------------------- + + M_TABLE armVCM4P10_VMatrixQPModTable + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + DCB 10, 11, 13, 14, 16, 18 + +;//------------------------------------------------------- +;// This table evaluates the modulus expression [QP%6]*6, +;// for values of QP from 0 to 51 (inclusive). +;//------------------------------------------------------- + + M_TABLE armVCM4P10_QPModuloTable + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + DCB 0, 6, 12, 18, 24, 30 + +;//------------------------------------------------------- +;// This table contains the invidual byte values stored as +;// halfwords. This avoids unpacking inside the function +;//------------------------------------------------------- + + M_TABLE armVCM4P10_VMatrixU16 + DCW 10, 16, 13 + DCW 11, 18, 14 + DCW 13, 20, 16 + DCW 14, 23, 18 + DCW 16, 25, 20 + DCW 18, 29, 23 + + ENDIF ;//ARM1136JS + + + + + END
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s new file mode 100644 index 0000000..82b9542 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s @@ -0,0 +1,236 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + +DEBUG_ON SETL {FALSE} + + IF ARM1136JS + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 8 +iHeight RN 9 + +;// Declare inner loop registers +x RN 7 +x0 RN 7 +x1 RN 10 +x2 RN 11 +Scratch RN 12 + +;// Function: +;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned +;// destination pointed by (pDst) for horizontal interpolation. +;// This function needs to copy 9 bytes in horizontal direction. +;// +;// Registers used as input for this function +;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy +;// +;// Registers preserved for top level function +;// r2,r3,r4,r5,r6 +;// +;// Registers modified by the function +;// r7,r8,r9,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the new aligned location which will be used as pSrc +;// r1 - step size to this aligned location + + ;// Function header + M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + + ;// Copy pDst to scratch + MOV Scratch, pDst + +StartAlignedStackCopy + AND x, pSrc, #3 + BIC pSrc, pSrc, #3 + + M_SWITCH x + M_CASE Copy0toAligned + M_CASE Copy1toAligned + M_CASE Copy2toAligned + M_CASE Copy3toAligned + M_ENDSWITCH + +Copy0toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy0toAligned + B CopyEnd + +Copy1toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #8 + ORR x0, x0, x1, LSL #24 + MOV x1, x1, LSR #8 + ORR x1, x1, x2, LSL #24 + MOV x2, x2, LSR #8 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy1toAligned + B CopyEnd + +Copy2toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #16 + ORR x0, x0, x1, LSL #16 + MOV x1, x1, LSR #16 + ORR x1, x1, x2, LSL #16 + MOV x2, x2, LSR #16 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy2toAligned + B CopyEnd + +Copy3toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #24 + ORR x0, x0, x1, LSL #8 + MOV x1, x1, LSR #24 + ORR x1, x1, x2, LSL #8 + MOV x2, x2, LSR #24 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy3toAligned + +CopyEnd + + MOV pSrc, Scratch + MOV srcStep, #12 + + M_END + + +;// Function: +;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned +;// destination pointed by (pDst) for vertical interpolation. +;// This function needs to copy 4 bytes in horizontal direction +;// +;// Registers used as input for this function +;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy +;// +;// Registers preserved for top level function +;// r2,r3,r4,r5,r6 +;// +;// Registers modified by the function +;// r7,r8,r9,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the new aligned location which will be used as pSrc +;// r1 - step size to this aligned location + + ;// Function header + M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + + ;// Copy pSrc to stack +StartVAlignedStackCopy + AND x, pSrc, #3 + BIC pSrc, pSrc, #3 + + + M_SWITCH x + M_CASE Copy0toVAligned + M_CASE Copy1toVAligned + M_CASE Copy2toVAligned + M_CASE Copy3toVAligned + M_ENDSWITCH + +Copy0toVAligned + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy0toVAligned + B CopyVEnd + +Copy1toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #24 + ORR x0, x1, x0, LSR #8 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy1toVAligned + B CopyVEnd + +Copy2toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #16 + ORR x0, x1, x0, LSR #16 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy2toVAligned + B CopyVEnd + +Copy3toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #8 + ORR x0, x1, x0, LSR #24 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy3toVAligned + +CopyVEnd + + SUB pSrc, pDst, #28 + MOV srcStep, #4 + + M_END + + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s new file mode 100644 index 0000000..bc0b6ec --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_Copy_unsafe_s.s @@ -0,0 +1,149 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_Copy_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Function: +;// armVCM4P10_InterpolateLuma_Copy4x4_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned +;// destination pointed by (pDst) +;// +;// Registers preserved for top level function +;// r1,r3,r4,r5,r6,r7,r10,r11,r14 +;// +;// Registers modified by the function +;// r0,r2,r8,r9,r12 + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare other intermediate registers +x0 RN 4 +x1 RN 5 +x2 RN 8 +x3 RN 9 +Temp RN 12 + + IF ARM1136JS + + M_START armVCM4P10_InterpolateLuma_Copy4x4_unsafe, r6 + +Copy4x4Start + ;// Do Copy and branch to EndOfInterpolation + AND Temp, pSrc, #3 + BIC pSrc, pSrc, #3 + + M_SWITCH Temp + M_CASE Copy4x4Align0 + M_CASE Copy4x4Align1 + M_CASE Copy4x4Align2 + M_CASE Copy4x4Align3 + M_ENDSWITCH + +Copy4x4Align0 + M_LDR x0, [pSrc], srcStep + M_LDR x1, [pSrc], srcStep + M_STR x0, [pDst], dstStep + M_LDR x2, [pSrc], srcStep + M_STR x1, [pDst], dstStep + M_LDR x3, [pSrc], srcStep + M_STR x2, [pDst], dstStep + M_STR x3, [pDst], dstStep + B Copy4x4End + +Copy4x4Align1 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #8 + ORR x0, x0, x1, LSL #24 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #8 + ORR x2, x2, x3, LSL #24 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + M_STR x2, [pDst], dstStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #8 + ORR x0, x0, x1, LSL #24 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #8 + ORR x2, x2, x3, LSL #24 + M_STR x2, [pDst], dstStep + B Copy4x4End + +Copy4x4Align2 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #16 + ORR x0, x0, x1, LSL #16 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #16 + ORR x2, x2, x3, LSL #16 + M_STR x2, [pDst], dstStep + + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #16 + ORR x0, x0, x1, LSL #16 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #16 + ORR x2, x2, x3, LSL #16 + M_STR x2, [pDst], dstStep + B Copy4x4End + +Copy4x4Align3 + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #24 + ORR x0, x0, x1, LSL #8 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #24 + ORR x2, x2, x3, LSL #8 + M_STR x2, [pDst], dstStep + + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + LDR x3, [pSrc, #4] + M_LDR x2, [pSrc], srcStep + MOV x0, x0, LSR #24 + ORR x0, x0, x1, LSL #8 + M_STR x0, [pDst], dstStep + MOV x2, x2, LSR #24 + ORR x2, x2, x3, LSL #8 + M_STR x2, [pDst], dstStep + B Copy4x4End + +Copy4x4End + M_END + + ENDIF + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s new file mode 100644 index 0000000..66cfe5e --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s @@ -0,0 +1,178 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + +;// Functions: +;// armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and +;// armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe +;// +;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf. +;// This will do the convertion of data from 16 bit to 8 bit and it also +;// remove offset and check for saturation. +;// +;// Registers used as input for this function +;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer +;// +;// Registers preserved for top level function +;// r4,r5,r6,r8,r9,r14 +;// +;// Registers modified by the function +;// r7,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the destination location +;// r1 - step size to this destination location + + +DEBUG_ON SETL {FALSE} + +MASK EQU 0x80808080 ;// Mask is used to implement (a+b+1)/2 + +;// Declare input registers + +pSrc0 RN 0 +srcStep0 RN 1 + +;// Declare other intermediate registers +Temp1 RN 4 +Temp2 RN 5 +Temp3 RN 10 +Temp4 RN 11 +pBuf RN 7 +r0x0fe00fe0 RN 6 +r0x00ff00ff RN 12 +Count RN 14 +ValueA0 RN 10 +ValueA1 RN 11 + + IF ARM1136JS + + + ;// Function header + M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6 + + ;// Code start + MOV Count, #4 + LDR r0x0fe00fe0, =0x0fe00fe0 + LDR r0x00ff00ff, =0x00ff00ff +LoopStart1 + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 + ORR ValueA0, Temp1, Temp2, LSL #8 + SUBS Count, Count, #1 + STRD ValueA0, [pBuf], #8 + BGT LoopStart1 +End1 + SUB pSrc0, pBuf, #32 + MOV srcStep0, #8 + + M_END + + + ;// Function header + M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6 + + ;// Code start + LDR r0x0fe00fe0, =0x0fe00fe0 + LDR r0x00ff00ff, =0x00ff00ff + MOV Count, #2 + +LoopStart + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0] + ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0] + + PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0] + + STR Temp1, [pBuf], #8 + PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2] + STR Temp2, [pBuf], #-4 + + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0] + ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0] + + PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0] + SUBS Count, Count, #1 + STR Temp1, [pBuf], #8 + PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2] + STR Temp2, [pBuf], #4 + + BGT LoopStart +End2 + SUB pSrc0, pBuf, #32-8 + MOV srcStep0, #4 + + M_END + + ENDIF + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s new file mode 100644 index 0000000..851ff6a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s @@ -0,0 +1,296 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + + M_VARIANTS ARM1136JS + + + + IF ARM1136JS + + + M_ALLOC8 ppDstArgs, 8 + M_ALLOC8 pTempResult1, 8 + M_ALLOC8 pTempResult2, 8 + M_ALLOC4 ppSrc, 4 + M_ALLOC4 ppDst, 4 + M_ALLOC4 pDstStep, 4 + M_ALLOC4 pSrcStep, 4 + M_ALLOC4 pCounter, 4 + + ;// Function header + ;// Function: + ;// armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + ;// + ;// Implements diagonal interpolation for a block of size 4x4. Input and output should + ;// be aligned. + ;// + ;// Registers used as input for this function + ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer + ;// + ;// Registers preserved for top level function + ;// r0,r1,r2,r3,r4,r5,r6,r14 + ;// + ;// Registers modified by the function + ;// r7,r8,r9,r10,r11,r12 + ;// + ;// Output registers + ;// None. Function will preserve r0-r3 + + M_START armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe, r6 + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare inner loop registers +Acc0 RN 4 +Acc1 RN 5 +Acc2 RN 6 +Acc3 RN 7 + +ValA RN 4 +ValB RN 5 +ValC RN 6 +ValD RN 7 +ValE RN 8 +ValF RN 9 +ValG RN 12 +ValH RN 14 +ValI RN 1 + +Temp1 RN 3 +Temp2 RN 1 +Temp3 RN 12 +Temp4 RN 7 +Temp5 RN 5 +r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)] +r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset +Counter RN 11 +pInterBuf RN 8 + +ValCA RN 8 +ValDB RN 9 +ValGE RN 10 +ValHF RN 11 +r0x00140001 RN 12 +r0x0014fffb RN 14 + +r0x0001fc00 RN 11 + +Accx RN 8 +Accy RN 9 +Temp6 RN 14 + + M_STRD pDst, dstStep, ppDstArgs + + MOV pDst, pInterBuf + MOV dstStep, #16 + + ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)] + MOV Counter, #4 + M_STR dstStep, pDstStep + M_STR srcStep, pSrcStep + LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results + +HeightLoop +NextTwoRowsLoop + LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1] + LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0] + LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1] + LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0] + LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1] + LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0] + + PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0] + PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0] + UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255] + UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255] + PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0] + PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0] + PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0] + UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255] + + ;// Calculate Acc0 + ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f + UXTAB16 Temp1, ValC, ValD, ROR #8 + UXTAB16 Temp3, ValE, ValB, ROR #8 + RSB Temp1, Temp3, Temp1, LSL #2 + UXTAB16 Acc0, ValA, ValF, ROR #8 + ADD Temp1, Temp1, Temp1, LSL #2 + ADD Acc0, Acc0, Temp1 + + ;// Calculate Acc1 + ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g + UXTAB16 Temp1, ValE, ValD, ROR #8 + UXTAB16 Temp3, ValC, ValF, ROR #8 + RSB Temp1, Temp3, Temp1, LSL #2 + UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255] + ADD Temp1, Temp1, Temp1, LSL #2 + UXTAB16 Acc1, ValG, ValB, ROR #8 + ADD Acc1, Acc1, Temp1 + + UXTAB16 Acc2, ValC, ValH, ROR #8 + ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255] + + ;// Calculate Acc2 + ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h + UXTAB16 Temp1, ValG, ValD, ROR #8 + UXTAB16 Acc3, ValI, ValD, ROR #8 + UXTAB16 Temp2, ValE, ValF, ROR #8 + + RSB Temp1, Temp1, Temp2, LSL #2 + UXTAB16 Temp2, ValG, ValF, ROR #8 + ADD Temp1, Temp1, Temp1, LSL #2 + ADD Acc2, Acc2, Temp1 + + ;// Calculate Acc3 + ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i + UXTAB16 Temp1, ValE, ValH, ROR #8 + RSB Temp1, Temp1, Temp2, LSL #2 + ADD Temp1, Temp1, Temp1, LSL #2 + ADD Acc3, Acc3, Temp1 + + M_LDR dstStep, pDstStep + M_LDR srcStep, pSrcStep + + ;// If Counter is even store Acc0-Acc3 in a temporary buffer + ;// If Counter is off store Acc0-Acc3 and previous Acc0-Acc3 in a intermediate buf + ANDS Temp3, Counter, #1 + BEQ NoProcessing + + ;// Packing previous and current Acc0-Acc3 values + M_LDRD Accx, Accy, pTempResult1 + PKHBT Temp6, Accx, Acc0, LSL #16 ;//[0 a2 0 a0] = [0 a3 0 a2] [0 a1 0 a0] + PKHTB Acc0, Acc0, Accx, ASR #16 ;//[0 a3 0 a1] = [0 a1 0 a0] [0 a3 0 a2] + STR Acc0, [pDst, dstStep] + STR Temp6, [pDst], #4 + PKHBT Temp6, Accy, Acc1, LSL #16 ;//[0 b2 0 b0] = [0 b3 0 b2] [0 b1 0 b0] + PKHTB Acc1, Acc1, Accy, ASR #16 ;//[0 b3 0 b1] = [0 b1 0 b0] [0 b3 0 b2] + M_LDRD Accx, Accy, pTempResult2 + STR Acc1, [pDst, dstStep] + STR Temp6, [pDst], #4 + + PKHBT Temp6, Accx, Acc2, LSL #16 ;//[0 c2 0 c0] = [0 c3 0 c2] [0 c1 0 c0] + PKHTB Acc2, Acc2, Accx, ASR #16 ;//[0 c3 0 c1] = [0 c1 0 c0] [0 c3 0 c2] + STR Acc2, [pDst, dstStep] + STR Temp6, [pDst], #4 + PKHBT Temp6, Accy, Acc3, LSL #16 ;//[0 d2 0 d0] = [0 d3 0 d2] [0 d1 0 d0] + PKHTB Acc3, Acc3, Accy, ASR #16 ;//[0 d3 0 d1] = [0 d1 0 d0] [0 d3 0 d2] + STR Acc3, [pDst, dstStep] + STR Temp6, [pDst], #-12 + ADD pDst, pDst, dstStep, LSL #1 + B AfterStore + +NoProcessing + M_STRD Acc0, Acc1, pTempResult1 + M_STRD Acc2, Acc3, pTempResult2 +AfterStore + SUBS Counter, Counter, #1 ;// Loop till height is 10 + ADD pSrc, pSrc, srcStep, LSL #1 + BPL HeightLoop + + STR Acc0, [pDst], #4 ;//[0 a1 0 a0] + STR Acc1, [pDst], #4 + STR Acc2, [pDst], #4 + STR Acc3, [pDst], #-12 + + ;// + ;// Horizontal interpolation using multiplication + ;// + + SUB pSrc, pDst, dstStep, LSL #2 + MOV srcStep, #16 + M_LDRD pDst, dstStep, ppDstArgs + + MOV Counter, #4 + LDR r0x0014fffb, =0x0014fffb + LDR r0x00140001, =0x00140001 + +HeightLoop1 + M_STR Counter, pCounter + + M_LDR ValCA, [pSrc], srcStep ;// Load [0 c 0 a] + M_LDR ValDB, [pSrc], srcStep ;// Load [0 d 0 b] + M_LDR ValGE, [pSrc], srcStep ;// Load [0 g 0 e] + M_LDR ValHF, [pSrc], srcStep ;// Load [0 h 0 f] + + + ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e)) + ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f)) + ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g)) + ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h)) + + SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1] + SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1] + SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5] + SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5] + + SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5] + SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1] + SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1] + SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5] + + SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5] + SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5] + SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1] + SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1] + + LDRH ValCA, [pSrc], #4 ;// 8 = srcStep - 16 + SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1] + SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5] + SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5] + SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1] + + LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512 + SUB Acc0, Acc0, r0x0001fc00 + SUB Acc1, Acc1, r0x0001fc00 + SUB Acc2, Acc2, r0x0001fc00 + SUB Acc3, Acc3, r0x0001fc00 + + USAT Acc0, #18, Acc0 + USAT Acc1, #18, Acc1 + USAT Acc2, #18, Acc2 + USAT Acc3, #18, Acc3 + + MOV Acc0, Acc0, LSR #10 + M_STRB Acc0, [pDst], dstStep + MOV Acc1, Acc1, LSR #10 + M_STRB Acc1, [pDst], dstStep + MOV Acc2, Acc2, LSR #10 + M_STRB Acc2, [pDst], dstStep + MOV Acc3, Acc3, LSR #10 + M_STRB Acc3, [pDst], dstStep + + + M_LDR Counter, pCounter + SUB pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, srcStep, LSL #2 + ADD pDst, pDst, #1 + SUBS Counter, Counter, #1 + BGT HeightLoop1 +End + SUB pDst, pDst, #4 + SUB pSrc, pSrc, #16 + + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s new file mode 100644 index 0000000..2f48e13 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s @@ -0,0 +1,276 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS + + M_ALLOC8 ppDstArgs, 8 + M_ALLOC4 ppSrc, 4 + M_ALLOC4 ppDst, 4 + M_ALLOC4 pCounter, 4 + + ;// Function header + ;// Function: + ;// armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + ;// + ;// Implements diagonal interpolation for a block of size 4x4. Input and output should + ;// be aligned. + ;// + ;// Registers used as input for this function + ;// r0,r1,r2,r3, r8 where r0,r2 input pointer and r1,r3 step size, r8 intermediate-buf pointer + ;// + ;// Registers preserved for top level function + ;// r0,r1,r2,r3,r4,r5,r6,r14 + ;// + ;// Registers modified by the function + ;// r7,r8,r9,r10,r11,r12 + ;// + ;// Output registers + ;// None. Function will preserve r0-r3 + + M_START armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe, r6 + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare inner loop registers +ValA RN 5 +ValA0 RN 4 +ValA1 RN 5 +ValAF0 RN 4 +ValAF1 RN 5 + +ValB RN 11 + +ValC RN 5 +ValC0 RN 4 +ValC1 RN 5 +ValCD0 RN 12 +ValCD1 RN 14 +ValCF0 RN 4 +ValCF1 RN 5 + +ValD RN 10 + +ValE RN 7 +ValE0 RN 6 +ValE1 RN 7 +ValEB0 RN 10 +ValEB1 RN 11 +ValED0 RN 6 +ValED1 RN 7 + +ValF RN 10 + +ValG RN 14 +ValG0 RN 12 +ValG1 RN 14 +ValGB0 RN 12 +ValGB1 RN 14 + +Acc0 RN 4 +Acc1 RN 5 +Acc2 RN 6 +Acc3 RN 7 + +Temp RN 7 +Step RN 6 + +pInterBuf RN 8 +Counter RN 8 +r0x00ff00ff RN 9 ;// [0 255 0 255] where 255 is offset +r0x0001fc00 RN 10 ;// [0 (16*255 - 16) 0 (16*255 - 16)] + + +;// Declare inner loop registers +ValCA RN 8 +ValDB RN 9 +ValGE RN 10 +ValHF RN 11 +r0x00140001 RN 12 +r0x0014fffb RN 14 + +r0x00000200 RN 12 +r0x000000ff RN 12 + + M_STRD pDst, dstStep, ppDstArgs + MOV pDst, pInterBuf + MOV dstStep, #24 + + ;// Set up counter of format, [0] [0] [1 (height)] [8 (width)] + MOV Counter, #1 + MOV Temp, #8 + ADD Counter, Temp, Counter, LSL #8 ;// [0 0 H W] + + LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results +WidthLoop + M_STR pSrc, ppSrc + M_STR pDst, ppDst +HeightLoop +TwoRowsLoop + M_LDR ValC, [pSrc], srcStep ;// Load [c3 c2 c1 c0] + M_LDR ValD, [pSrc], srcStep ;// Load [d3 d2 d1 d0] + M_LDR ValE, [pSrc], srcStep ;// Load [e3 e2 e1 e0] + SUB pSrc, pSrc, srcStep, LSL #2 + UXTAB16 ValC0, r0x00ff00ff, ValC ;// [0 c2 0 c0] + [0 255 0 255] + UXTAB16 ValC1, r0x00ff00ff, ValC, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + LDR ValB, [pSrc] ;// Load [b3 b2 b1 b0] + UXTAB16 ValE0, r0x00ff00ff, ValE ;// [0 e2 0 e0] + [0 255 0 255] + UXTAB16 ValE1, r0x00ff00ff, ValE, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + UXTAB16 ValCD0, ValC0, ValD ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0] + UXTAB16 ValCD1, ValC1, ValD, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1] + UXTAB16 ValEB0, ValE0, ValB ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0] + RSB ValCD0, ValEB0, ValCD0, LSL #2 ;// 4*(Off+C+D) - (Off+B+E) + + LDR ValD, [pSrc, srcStep, LSL #1] ;// Load [d3 d2 d1 d0] + UXTAB16 ValEB1, ValE1, ValB, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1] + RSB ValCD1, ValEB1, ValCD1, LSL #2 + + UXTAB16 ValED0, ValE0, ValD ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0] + UXTAB16 ValED1, ValE1, ValD, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1] + LDR ValF, [pSrc, srcStep, LSL #2] ;// Load [f3 f2 f1 f0] + M_LDR ValB, [pSrc], srcStep ;// Load [b3 b2 b1 b0] + ADD ValCD0, ValCD0, ValCD0, LSL #2 ;// 5 * [4*(Off+C+D) - (Off+B+E)] + ADD ValCD1, ValCD1, ValCD1, LSL #2 + UXTAB16 ValCF1, ValC1, ValF, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1] + UXTAB16 ValCF0, ValC0, ValF ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0] + RSB ValED1, ValCF1, ValED1, LSL #2 + + SUB ValA, pSrc, srcStep, LSL #1 + LDR ValA, [ValA] ;// Load [a3 a2 a1 a0] + RSB ValED0, ValCF0, ValED0, LSL #2 ;// 4*(Off+E+D) - (Off+C+F) + ADD ValED1, ValED1, ValED1, LSL #2 + ADD ValED0, ValED0, ValED0, LSL #2 ;// 5 * [4*(Off+E+D) - (Off+C+F)] + UXTAB16 ValA0, r0x00ff00ff, ValA ;// [0 a2 0 a0] + [0 255 0 255] + UXTAB16 ValA1, r0x00ff00ff, ValA, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + UXTAB16 ValAF0, ValA0, ValF ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0] + UXTAB16 ValAF1, ValA1, ValF, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1] + ADD Acc1, ValCD1, ValAF1 + + LDR ValG, [pSrc, srcStep, LSL #2] ;// Load [g3 g2 g1 g0] + ADD Acc0, ValCD0, ValAF0 ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E) + STR Acc1, [pDst, #4] ;// Store result & adjust pointer + M_STR Acc0, [pDst], dstStep ;// Store result & adjust pointer + UXTAB16 ValG0, r0x00ff00ff, ValG ;// [0 g2 0 g0] + [0 255 0 255] + UXTAB16 ValG1, r0x00ff00ff, ValG, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + UXTAB16 ValGB0, ValG0, ValB ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0] + UXTAB16 ValGB1, ValG1, ValB, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1] + ADD Acc2, ValED0, ValGB0 ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F) + ADD Acc3, ValED1, ValGB1 + + STR Acc3, [pDst, #4] ;// Store result & adjust pointer + M_STR Acc2, [pDst], dstStep ;// Store result & adjust pointer + + SUBS Counter, Counter, #1 << 8 ;// Loop till height is 10 + ADD pSrc, pSrc, srcStep, LSL #1 + BPL HeightLoop + + M_LDR pSrc, ppSrc + M_LDR pDst, ppDst + ADDS Counter, Counter, #(1 << 8)-4 ;// Loop till width is 12 + ADD pSrc, pSrc, #4 + ADD pDst, pDst, #8 + ADD Counter, Counter, #1<<8 + BPL WidthLoop + + ;// + ;// Horizontal interpolation using multiplication + ;// + + SUB pSrc, pDst, #24 + MOV srcStep, #24 + M_LDRD pDst, dstStep, ppDstArgs + + MOV Counter, #4 + LDR r0x0014fffb, =0x0014fffb + LDR r0x00140001, =0x00140001 + +HeightLoop1 + M_STR Counter, pCounter + + + LDR ValCA, [pSrc], #4 ;// Load [0 c 0 a] + LDR ValDB, [pSrc], #4 ;// Load [0 d 0 b] + LDR ValGE, [pSrc], #4 ;// Load [0 g 0 e] + LDR ValHF, [pSrc], #4 ;// Load [0 h 0 f] + + ;// Acc0 = smuad ([0 20 0 1], add([0 c 0 a] + [0 d 0 f])) - (5 * (b + e)) + ;// Acc1 = smuad ([0 20 0 1], add([0 e 0 g] + [0 d 0 b])) - (5 * (c + f)) + ;// Acc2 = smuad ([0 1 0 20], add([0 c 0 e] + [0 h 0 f])) - (5 * (d + g)) + ;// Acc3 = smuad ([0 20 0 1], add([0 d 0 f] + [0 i 0 g])) - (5 * (e + h)) + SMUAD Acc0, ValCA, r0x00140001 ;// Acc0 = [0 c 0 a] * [0 20 0 1] + SMUAD Acc1, ValDB, r0x00140001 ;// Acc1 = [0 c 0 a] * [0 20 0 1] + SMUADX Acc2, ValGE, r0x0014fffb ;// Acc2 = [0 g 0 e] * [0 20 0 -5] + SMUAD Acc3, ValGE, r0x0014fffb ;// Acc3 = [0 g 0 e] * [0 20 0 -5] + + SMLAD Acc0, ValDB, r0x0014fffb, Acc0 ;// Acc0 += [0 d 0 b] * [0 20 0 -5] + SMLADX Acc1, ValGE, r0x00140001, Acc1 ;// Acc1 += [0 g 0 e] * [0 20 0 1] + SMLADX Acc2, ValHF, r0x00140001, Acc2 ;// Acc2 += [0 h 0 f] * [0 20 0 1] + SMLADX Acc3, ValHF, r0x0014fffb, Acc3 ;// Acc3 += [0 h 0 f] * [0 20 0 -5] + + SMLABB Acc0, ValGE, r0x0014fffb, Acc0 ;// Acc0 += [0 g 0 e] * [0 0 0 -5] + SMLATB Acc1, ValCA, r0x0014fffb, Acc1 ;// Acc1 += [0 d 0 b] * [0 0 0 -5] + SMLATB Acc2, ValCA, r0x00140001, Acc2 ;// Acc2 += [0 c 0 a] * [0 0 0 1] + SMLATB Acc3, ValDB, r0x00140001, Acc3 ;// Acc3 += [0 c 0 a] * [0 0 0 1] + + LDRH ValCA, [pSrc], #8 ;// 8 = srcStep - 16 + SMLABB Acc0, ValHF, r0x00140001, Acc0 ;// Acc0 += [0 h 0 f] * [0 0 0 1] + SMLABB Acc1, ValHF, r0x0014fffb, Acc1 ;// Acc1 += [0 h 0 f] * [0 0 0 -5] + SMLATB Acc2, ValDB, r0x0014fffb, Acc2 ;// Acc2 += [0 d 0 b] * [0 0 0 -5] + SMLABB Acc3, ValCA, r0x00140001, Acc3 ;// Acc3 += [0 d 0 b] * [0 0 0 1] + + LDR r0x0001fc00, =0x0001fc00 ;// (0xff * 16 * 32) - 512 + SUB Acc0, Acc0, r0x0001fc00 + SUB Acc1, Acc1, r0x0001fc00 + SUB Acc2, Acc2, r0x0001fc00 + SUB Acc3, Acc3, r0x0001fc00 + + USAT Acc0, #18, Acc0 + USAT Acc1, #18, Acc1 + USAT Acc2, #18, Acc2 + USAT Acc3, #18, Acc3 + + MOV Acc0, Acc0, LSR #10 + MOV Acc1, Acc1, LSR #10 + MOV Acc2, Acc2, LSR #10 + MOV Acc3, Acc3, LSR #10 + + M_LDR Counter, pCounter + ORR Acc0, Acc0, Acc1, LSL #8 + ORR Acc2, Acc2, Acc3, LSL #8 + SUBS Counter, Counter, #1 + ORR Acc0, Acc0, Acc2, LSL #16 + M_STR Acc0, [pDst], dstStep + BGT HeightLoop1 +End + SUB pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, srcStep, LSL #2 + + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s new file mode 100644 index 0000000..6690ced --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s @@ -0,0 +1,239 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + +DEBUG_ON SETL {FALSE} + + + IF ARM1136JS + +;// Function: +;// armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe +;// +;// Implements horizontal interpolation for a block of size 4x4. Input and output should +;// be aligned. +;// +;// Registers used as input for this function +;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size +;// +;// Registers preserved for top level function +;// r0,r1,r2,r3,r4,r5,r6,r14 +;// +;// Registers modified by the function +;// r7,r8,r9,r10,r11,r12 +;// +;// Output registers +;// None. Function will preserve r0-r3 + + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare inner loop registers +Acc0 RN 4 +Acc1 RN 5 +Acc2 RN 6 +Acc3 RN 7 + +ValA RN 4 +ValB RN 5 +ValC RN 6 +ValD RN 7 +ValE RN 8 +ValF RN 9 +ValG RN 12 +ValH RN 14 +ValI RN 1 + +Temp1 RN 3 +Temp2 RN 1 +Temp3 RN 12 +Temp4 RN 7 +Temp5 RN 5 +r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)] +r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset +Counter RN 11 + +Height RN 3 + + M_ALLOC4 pDstStep, 4 + M_ALLOC4 pSrcStep, 4 + + ;// Function header + M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6 + + MOV Counter, #2 + M_STR dstStep, pDstStep + M_STR srcStep, pSrcStep + LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results + +NextTwoRowsLoop + LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1] + LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0] + LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1] + LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0] + LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1] + LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0] + + PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0] + PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0] + UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255] + UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255] + PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0] + PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0] + PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0] + UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255] + + ;// Calculate Acc0 + ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f + UXTAB16 Temp1, ValC, ValD, ROR #8 + UXTAB16 Temp3, ValE, ValB, ROR #8 + RSB Temp1, Temp3, Temp1, LSL #2 + UXTAB16 Acc0, ValA, ValF, ROR #8 + ADD Temp1, Temp1, Temp1, LSL #2 + ADD Acc0, Acc0, Temp1 + + ;// Calculate Acc1 + ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g + UXTAB16 Temp1, ValE, ValD, ROR #8 + UXTAB16 Temp3, ValC, ValF, ROR #8 + RSB Temp1, Temp3, Temp1, LSL #2 + UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255] + ADD Temp1, Temp1, Temp1, LSL #2 + UXTAB16 Acc1, ValG, ValB, ROR #8 + ADD Acc1, Acc1, Temp1 + + LDR r0x0fe00fe0, =0x0fe00fe0 ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255 + UXTAB16 Acc2, ValC, ValH, ROR #8 + ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255] + UQSUB16 Acc0, Acc0, r0x0fe00fe0 + UQSUB16 Acc1, Acc1, r0x0fe00fe0 + USAT16 Acc0, #13, Acc0 + USAT16 Acc1, #13, Acc1 + + ;// Calculate Acc2 + ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h + UXTAB16 Temp1, ValG, ValD, ROR #8 + UXTAB16 Acc3, ValI, ValD, ROR #8 + UXTAB16 Temp2, ValE, ValF, ROR #8 + AND Acc1, r0x00ff00ff, Acc1, LSR #5 + AND Acc0, r0x00ff00ff, Acc0, LSR #5 + ORR Acc0, Acc0, Acc1, LSL #8 + RSB Temp5, Temp1, Temp2, LSL #2 + UXTAB16 Temp2, ValG, ValF, ROR #8 + ADD Temp5, Temp5, Temp5, LSL #2 + ADD Acc2, Acc2, Temp5 + + ;// Calculate Acc3 + ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i + UXTAB16 Temp5, ValE, ValH, ROR #8 + RSB Temp5, Temp5, Temp2, LSL #2 + LDR r0x0fe00fe0, =0x0fe00fe0 + ADD Temp5, Temp5, Temp5, LSL #2 + ADD Acc3, Acc3, Temp5 + + UQSUB16 Acc3, Acc3, r0x0fe00fe0 + UQSUB16 Acc2, Acc2, r0x0fe00fe0 + USAT16 Acc3, #13, Acc3 + USAT16 Acc2, #13, Acc2 + + M_LDR dstStep, pDstStep + AND Acc3, r0x00ff00ff, Acc3, LSR #5 + AND Acc2, r0x00ff00ff, Acc2, LSR #5 + ORR Acc2, Acc2, Acc3, LSL #8 + + SUBS Counter, Counter, #1 + M_LDR srcStep, pSrcStep + PKHBT Acc1, Acc0, Acc2, LSL #16 + M_STR Acc1, [pDst], dstStep ;// Store result1 + PKHTB Acc2, Acc2, Acc0, ASR #16 + M_STR Acc2, [pDst], dstStep ;// Store result2 + ADD pSrc, pSrc, srcStep, LSL #1 + + BGT NextTwoRowsLoop +End + SUB pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, srcStep, LSL #2 + + M_END + + ENDIF + + END + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s new file mode 100644 index 0000000..007cd0d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s @@ -0,0 +1,185 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + + + + IF ARM1136JS + + ;// Function header + + ;// Function: + ;// armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + ;// + ;// Implements vertical interpolation for a block of size 4x4. Input and output should + ;// be aligned. + ;// + ;// Registers used as input for this function + ;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size + ;// + ;// Registers preserved for top level function + ;// r0,r1,r2,r3,r4,r5,r6,r14 + ;// + ;// Registers modified by the function + ;// r7,r8,r9,r10,r11,r12 + ;// + ;// Output registers + ;// None. Function will preserve r0-r3 + M_START armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe, r6 + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 + +;// Declare inner loop registers +ValA RN 5 +ValA0 RN 4 +ValA1 RN 5 +ValAF0 RN 4 +ValAF1 RN 5 + +ValB RN 11 + +ValC RN 5 +ValC0 RN 4 +ValC1 RN 5 +ValCD0 RN 12 +ValCD1 RN 14 +ValCF0 RN 4 +ValCF1 RN 5 + +ValD RN 10 + +ValE RN 7 +ValE0 RN 6 +ValE1 RN 7 +ValEB0 RN 10 +ValEB1 RN 11 +ValED0 RN 6 +ValED1 RN 7 + +ValF RN 10 + +ValG RN 14 +ValG0 RN 12 +ValG1 RN 14 +ValGB0 RN 12 +ValGB1 RN 14 + +Acc0 RN 4 +Acc1 RN 5 +Acc2 RN 6 +Acc3 RN 7 + +Temp RN 7 +Height RN 3 +Step RN 6 + +Counter RN 8 +r0x00ff00ff RN 9 ;// [0 255 0 255] where 255 is offset +r0x0fe00fe0 RN 10 ;// [0 (16*255 - 16) 0 (16*255 - 16)] + + + LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results + MOV Counter, #2 + +TwoRowsLoop + M_LDR ValC, [pSrc], srcStep ;// Load [c3 c2 c1 c0] + M_LDR ValD, [pSrc], srcStep ;// Load [d3 d2 d1 d0] + M_LDR ValE, [pSrc], srcStep ;// Load [e3 e2 e1 e0] + SUB pSrc, pSrc, srcStep, LSL #2 + LDR ValB, [pSrc] ;// Load [b3 b2 b1 b0] + UXTAB16 ValC0, r0x00ff00ff, ValC ;// [0 c2 0 c0] + [0 255 0 255] + UXTAB16 ValC1, r0x00ff00ff, ValC, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + + UXTAB16 ValE0, r0x00ff00ff, ValE ;// [0 e2 0 e0] + [0 255 0 255] + UXTAB16 ValE1, r0x00ff00ff, ValE, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + UXTAB16 ValCD0, ValC0, ValD ;// [0 c2 0 c0] + [0 255 0 255] + [0 d2 0 d0] + UXTAB16 ValCD1, ValC1, ValD, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 d3 0 d1] + UXTAB16 ValEB0, ValE0, ValB ;// [0 e2 0 e0] + [0 255 0 255] + [0 b2 0 b0] + RSB ValCD0, ValEB0, ValCD0, LSL #2 ;// 4*(Off+C+D) - (Off+B+E) + + LDR ValD, [pSrc, srcStep, LSL #1] ;// Load [d3 d2 d1 d0] + UXTAB16 ValEB1, ValE1, ValB, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 b3 0 b1] + RSB ValCD1, ValEB1, ValCD1, LSL #2 + ;// One cycle stall + UXTAB16 ValED0, ValE0, ValD ;// [0 e2 0 e0] + [0 255 0 255] + [0 d2 0 d0] + UXTAB16 ValED1, ValE1, ValD, ROR #8 ;// [0 e3 0 e1] + [0 255 0 255] + [0 d3 0 d1] + + LDR ValF, [pSrc, srcStep, LSL #2] ;// Load [f3 f2 f1 f0] + M_LDR ValB, [pSrc], srcStep ;// Load [b3 b2 b1 b0] + ADD ValCD0, ValCD0, ValCD0, LSL #2 ;// 5 * [4*(Off+C+D) - (Off+B+E)] + ADD ValCD1, ValCD1, ValCD1, LSL #2 + UXTAB16 ValCF1, ValC1, ValF, ROR #8 ;// [0 c3 0 c1] + [0 255 0 255] + [0 f3 0 f1] + UXTAB16 ValCF0, ValC0, ValF ;// [0 c2 0 c0] + [0 255 0 255] + [0 f2 0 f0] + RSB ValED1, ValCF1, ValED1, LSL #2 + + SUB ValA, pSrc, srcStep, LSL #1 + LDR ValA, [ValA] ;// Load [a3 a2 a1 a0] + RSB ValED0, ValCF0, ValED0, LSL #2 ;// 4*(Off+E+D) - (Off+C+F) + ADD ValED1, ValED1, ValED1, LSL #2 + ADD ValED0, ValED0, ValED0, LSL #2 ;// 5 * [4*(Off+E+D) - (Off+C+F)] + UXTAB16 ValA0, r0x00ff00ff, ValA ;// [0 a2 0 a0] + [0 255 0 255] + UXTAB16 ValA1, r0x00ff00ff, ValA, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + UXTAB16 ValAF0, ValA0, ValF ;// [0 a2 0 a0] + [0 255 0 255] + [0 f2 0 f0] + UXTAB16 ValAF1, ValA1, ValF, ROR #8 ;// [0 a3 0 a1] + [0 255 0 255] + [0 f3 0 f1] + + LDR r0x0fe00fe0, =0x0fe00fe0 ;// [0 255 0 255] 255 is offset to avoid negative results + ADD Acc1, ValCD1, ValAF1 + + LDR ValG, [pSrc, srcStep, LSL #2] ;// Load [g3 g2 g1 g0] + ADD Acc0, ValCD0, ValAF0 ;// Acc0 = 16*Off + (A+F) + 20*(C+D) - 5*(B+E) + UQSUB16 Acc1, Acc1, r0x0fe00fe0 ;// Acc1 -= (16*Off - 16) + UQSUB16 Acc0, Acc0, r0x0fe00fe0 + UXTAB16 ValG0, r0x00ff00ff, ValG ;// [0 g2 0 g0] + [0 255 0 255] + UXTAB16 ValG1, r0x00ff00ff, ValG, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + UXTAB16 ValGB0, ValG0, ValB ;// [0 g2 0 g0] + [0 255 0 255] + [0 b2 0 b0] + UXTAB16 ValGB1, ValG1, ValB, ROR #8 ;// [0 g3 0 g1] + [0 255 0 255] + [0 b3 0 b1] + ADD Acc2, ValED0, ValGB0 ;// Acc2 = 16*Off + (B+G) + 20*(D+E) - 5*(C+F) + ADD Acc3, ValED1, ValGB1 + UQSUB16 Acc3, Acc3, r0x0fe00fe0 ;// Acc3 -= (16*Off - 16) + UQSUB16 Acc2, Acc2, r0x0fe00fe0 + USAT16 Acc1, #13, Acc1 ;// Saturate to 8+5 = 13 bits + USAT16 Acc0, #13, Acc0 + USAT16 Acc3, #13, Acc3 + USAT16 Acc2, #13, Acc2 + AND Acc1, r0x00ff00ff, Acc1, LSR #5 ;// [0 a3 0 a1] + AND Acc0, r0x00ff00ff, Acc0, LSR #5 ;// [0 a2 0 a0] + ORR Acc0, Acc0, Acc1, LSL #8 ;// [a3 a2 a1 a0] + AND Acc3, r0x00ff00ff, Acc3, LSR #5 ;// [0 b3 0 b1] + AND Acc2, r0x00ff00ff, Acc2, LSR #5 ;// [0 b2 0 b0] + + M_STR Acc0, [pDst], dstStep ;// Store result & adjust pointer + ORR Acc2, Acc2, Acc3, LSL #8 ;// [b3 b2 b1 b0] + M_STR Acc2, [pDst], dstStep ;// Store result & adjust pointer + ADD pSrc, pSrc, srcStep, LSL #1 + + SUBS Counter, Counter, #1 + BGT TwoRowsLoop +End + SUB pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, srcStep, LSL #2 + + M_END + + ENDIF + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s new file mode 100644 index 0000000..b1ad17c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s @@ -0,0 +1,273 @@ +;// +;// +;// File Name: armVCM4P10_Interpolate_Chroma_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + IF ARM1136JS + +;// input registers + +pSrc RN 0 +iSrcStep RN 1 +pDst RN 2 +iDstStep RN 3 +iWidth RN 4 +iHeight RN 5 +dx RN 6 +dy RN 7 + + +;// local variable registers +temp RN 11 +r0x20 RN 12 +tmp0x20 RN 14 +return RN 0 +dxPlusdy RN 10 +EightMinusdx RN 8 +EightMinusdy RN 9 +dxEightMinusdx RN 8 +BACoeff RN 6 +DCCoeff RN 7 + +iDstStepx2MinusWidth RN 8 +iSrcStepx2MinusWidth RN 9 +iSrcStep1 RN 10 + +pSrc1 RN 1 +pSrc2 RN 8 +pDst1 RN 8 +pDst2 RN 12 + +pix00 RN 8 +pix01 RN 9 +pix10 RN 10 +pix11 RN 11 + +Out0100 RN 8 +Out1110 RN 10 + +x00 RN 8 +x01 RN 10 +x02 RN 12 +x10 RN 9 +x11 RN 11 +x12 RN 14 +x20 RN 10 +x21 RN 12 +x22 RN 14 + +x01x00 RN 8 +x02x01 RN 10 +x11x10 RN 9 +x12x11 RN 11 +x21x20 RN 10 +x22x21 RN 12 + +OutRow00 RN 12 +OutRow01 RN 14 +OutRow10 RN 10 +OutRow11 RN 12 + +OutRow0100 RN 12 +OutRow1110 RN 12 + +;//----------------------------------------------------------------------------------------------- +;// armVCM4P10_Interpolate_Chroma_asm starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START armVCM4P10_Interpolate_Chroma, r11 + + ;// Define stack arguments + M_ARG Width, 4 + M_ARG Height, 4 + M_ARG Dx, 4 + M_ARG Dy, 4 + + ;// Load argument from the stack + ;// M_STALL ARM1136JS=4 + + M_LDR iWidth, Width + M_LDR iHeight, Height + M_LDR dx, Dx + M_LDR dy, Dy + + ;// EightMinusdx = 8 - dx + ;// EightMinusdy = 8 - dy + + ;// ACoeff = EightMinusdx * EightMinusdy + ;// BCoeff = dx * EightMinusdy + ;// CCoeff = EightMinusdx * dy + ;// DCoeff = dx * dy + + ADD pSrc1, pSrc, iSrcStep + SUB temp, iWidth, #1 + RSB EightMinusdx, dx, #8 + RSB EightMinusdy, dy, #8 + CMN dx,dy + ADD dxEightMinusdx, EightMinusdx, dx, LSL #16 + ORR iWidth, iWidth, temp, LSL #16 + + ;// Packed Coeffs. + + MUL BACoeff, dxEightMinusdx, EightMinusdy + MUL DCCoeff, dxEightMinusdx, dy + + + ;// Checking either of dx and dy being non-zero + + BEQ MVIsZero + +;// Pixel layout: +;// +;// x00 x01 x02 +;// x10 x11 x12 +;// x20 x21 x22 + +;// If fractionl mv is not (0, 0) + +OuterLoopMVIsNotZero + +InnerLoopMVIsNotZero + + LDRB x00, [pSrc, #+0] + LDRB x10, [pSrc1, #+0] + LDRB x01, [pSrc, #+1] + LDRB x11, [pSrc1, #+1] + LDRB x02, [pSrc, #+2]! + LDRB x12, [pSrc1, #+2]! + + ORR x01x00, x00, x01, LSL #16 + ;// M_STALL ARM1136JS=1 + ORR x02x01, x01, x02, LSL #16 + MOV r0x20, #32 + ORR x11x10, x10, x11, LSL #16 + ORR x12x11, x11, x12, LSL #16 + + SMLAD x01x00, x01x00, BACoeff, r0x20 + SMLAD x02x01, x02x01, BACoeff, r0x20 + + ;// iWidth packed with MSB (top 16 bits) + ;// as inner loop counter value i.e + ;// (iWidth -1) and LSB (lower 16 bits) + ;// as original width + + SUBS iWidth, iWidth, #1<<17 + + SMLAD OutRow00, x11x10, DCCoeff, x01x00 + SMLAD OutRow01, x12x11, DCCoeff, x02x01 + + RSB pSrc2, pSrc, pSrc1, LSL #1 + + MOV OutRow00, OutRow00, LSR #6 + MOV OutRow01, OutRow01, LSR #6 + + LDRB x20,[pSrc2, #-2] + + ORR OutRow0100, OutRow00, OutRow01, LSL #8 + STRH OutRow0100, [pDst], #2 + + LDRB x21,[pSrc2, #-1] + LDRB x22,[pSrc2, #+0] + + ADD pDst1, pDst, iDstStep + + ;// M_STALL ARM1136JS=1 + + ORR x21x20, x20, x21, LSL #16 + ORR x22x21, x21, x22, LSL #16 + + MOV tmp0x20, #32 + + ;// Reusing the packed data x11x10 and x12x11 + + SMLAD x11x10, x11x10, BACoeff, tmp0x20 + SMLAD x12x11, x12x11, BACoeff, tmp0x20 + SMLAD OutRow10, x21x20, DCCoeff, x11x10 + SMLAD OutRow11, x22x21, DCCoeff, x12x11 + + MOV OutRow10, OutRow10, LSR #6 + MOV OutRow11, OutRow11, LSR #6 + + ;// M_STALL ARM1136JS=1 + + ORR OutRow1110, OutRow10, OutRow11, LSL #8 + + STRH OutRow1110, [pDst1, #-2] + + BGT InnerLoopMVIsNotZero + + SUBS iHeight, iHeight, #2 + ADD iWidth, iWidth, #1<<16 + RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 + SUB iSrcStep1, pSrc1, pSrc + SUB temp, iWidth, #1 + RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 + ADD pDst, pDst, iDstStepx2MinusWidth + ADD pSrc1, pSrc1, iSrcStepx2MinusWidth + ADD pSrc, pSrc, iSrcStepx2MinusWidth + ORR iWidth, iWidth, temp, LSL #16 + BGT OuterLoopMVIsNotZero + MOV return, #OMX_Sts_NoErr + M_EXIT + +;// If fractionl mv is (0, 0) + +MVIsZero + ;// M_STALL ARM1136JS=4 +OuterLoopMVIsZero + +InnerLoopMVIsZero + + LDRB pix00, [pSrc], #+1 + LDRB pix01, [pSrc], #+1 + LDRB pix10, [pSrc1], #+1 + LDRB pix11, [pSrc1], #+1 + + ADD pDst2, pDst, iDstStep + SUBS iWidth, iWidth, #1<<17 + + ORR Out0100, pix00, pix01, LSL #8 + ORR Out1110, pix10, pix11, LSL #8 + + STRH Out0100, [pDst], #2 + STRH Out1110, [pDst2], #2 + + BGT InnerLoopMVIsZero + + SUBS iHeight, iHeight, #2 + ADD iWidth, iWidth, #1<<16 + RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 + SUB iSrcStep1, pSrc1, pSrc + SUB temp, iWidth, #1 + RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 + ADD pDst, pDst, iDstStepx2MinusWidth + ADD pSrc1, pSrc1, iSrcStepx2MinusWidth + ADD pSrc, pSrc, iSrcStepx2MinusWidth + ORR iWidth, iWidth, temp, LSL #16 + BGT OuterLoopMVIsZero + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + END + +;//----------------------------------------------------------------------------------------------- +;// armVCM4P10_Interpolate_Chroma_asm ends +;//----------------------------------------------------------------------------------------------- + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s new file mode 100644 index 0000000..f962f70 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_QuantTables_s.s @@ -0,0 +1,74 @@ +;// +;// +;// File Name: armVCM4P10_QuantTables_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// Description: +;// This file contains quantization tables +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + + EXPORT armVCM4P10_MFMatrixQPModTable + EXPORT armVCM4P10_QPDivIntraTable + EXPORT armVCM4P10_QPDivPlusOneTable + +;//-------------------------------------------------------------- +;// This table contains armVCM4P10_MFMatrix [iQP % 6][0] entires, +;// for values of iQP from 0 to 51 (inclusive). +;//-------------------------------------------------------------- + + M_TABLE armVCM4P10_MFMatrixQPModTable + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + DCW 13107, 11916, 10082, 9362, 8192, 7282 + +;//--------------------------------------------------------------- +;// This table contains ARM_M4P10_Q_OFFSET + 1 + (iQP / 6) values, +;// for values of iQP from 0 to 51 (inclusive). +;//--------------------------------------------------------------- + + M_TABLE armVCM4P10_QPDivPlusOneTable + DCB 16, 16, 16, 16, 16, 16 + DCB 17, 17, 17, 17, 17, 17 + DCB 18, 18, 18, 18, 18, 18 + DCB 19, 19, 19, 19, 19, 19 + DCB 20, 20, 20, 20, 20, 20 + DCB 21, 21, 21, 21, 21, 21 + DCB 22, 22, 22, 22, 22, 22 + DCB 23, 23, 23, 23, 23, 23 + DCB 24, 24, 24, 24, 24, 24 + +;//------------------------------------------------------------------ +;// This table contains (1 << QbitsPlusOne) / 3 Values (Intra case) , +;// for values of iQP from 0 to 51 (inclusive). +;//------------------------------------------------------------------ + + M_TABLE armVCM4P10_QPDivIntraTable, 2 + DCD 21845, 21845, 21845, 21845, 21845, 21845 + DCD 43690, 43690, 43690, 43690, 43690, 43690 + DCD 87381, 87381, 87381, 87381, 87381, 87381 + DCD 174762, 174762, 174762, 174762, 174762, 174762 + DCD 349525, 349525, 349525, 349525, 349525, 349525 + DCD 699050, 699050, 699050, 699050, 699050, 699050 + DCD 1398101, 1398101, 1398101, 1398101, 1398101, 1398101 + DCD 2796202, 2796202, 2796202, 2796202, 2796202, 2796202 + DCD 5592405, 5592405, 5592405, 5592405, 5592405, 5592405 + + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s new file mode 100644 index 0000000..241d188 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s @@ -0,0 +1,407 @@ +;// +;// +;// File Name: armVCM4P10_TransformResidual4x4_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// Transform Residual 4x4 Coefficients +;// +;// + + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + +;// Import symbols required from other files +;// (For example tables) + + + + +;// Set debugging level +;//DEBUG_ON SETL {TRUE} + + + +;// Guarding implementation by the processor name + + IF ARM1136JS + +;//Input Registers +pDst RN 0 +pSrc RN 1 + +;//Output Registers + + +;//Local Scratch Registers + +;// Packed Input pixels +in00 RN 2 ;// Src[0] & Src[1] +in02 RN 3 ;// Src[2] & Src[3] +in10 RN 4 ;// Src[4] & Src[5] +in12 RN 5 ;// Src[6] & Src[7] +in20 RN 6 ;// Src[8] & Src[9] +in22 RN 7 ;// Src[10] & Src[11] +in30 RN 8 ;// Src[12] & Src[13] +in32 RN 9 ;// Src[14] & Src[15] + +;// Transpose for Row operations (Rows to cols) +trRow00 RN 2 +trRow10 RN 10 +trRow02 RN 3 +trRow12 RN 5 +trRow20 RN 11 +trRow30 RN 12 +trRow32 RN 14 +trRow22 RN 7 + +;// Intermediate calculations +e0 RN 4 +e1 RN 6 +e2 RN 8 +e3 RN 9 +constZero RN 1 + +;// Row operated pixels +rowOp00 RN 2 +rowOp10 RN 10 +rowOp20 RN 11 +rowOp30 RN 12 +rowOp02 RN 3 +rowOp12 RN 5 +rowOp22 RN 7 +rowOp32 RN 14 + +;// Transpose for colulmn operations +trCol00 RN 2 +trCol02 RN 3 +trCol10 RN 4 +trCol12 RN 5 +trCol20 RN 6 +trCol22 RN 7 +trCol30 RN 8 +trCol32 RN 9 + +;// Intermediate calculations +g0 RN 10 +g1 RN 11 +g2 RN 12 +g3 RN 14 + +;// Coloumn operated pixels +colOp00 RN 2 +colOp02 RN 3 +colOp10 RN 4 +colOp12 RN 5 +colOp20 RN 6 +colOp22 RN 7 +colOp30 RN 8 +colOp32 RN 9 + + +temp1 RN 10 ;// Temporary scratch varaibles +const1 RN 11 +const2 RN 12 +mask RN 14 + +;// Output pixels +out00 RN 2 +out02 RN 3 +out10 RN 4 +out12 RN 5 +out20 RN 6 +out22 RN 7 +out30 RN 8 +out32 RN 9 + + + + ;// Allocate stack memory required by the function + + + ;// Write function header + M_START armVCM4P10_TransformResidual4x4,r11 + + ;****************************************************************** + ;// The strategy used in implementing the transform is as follows:* + ;// Load the 4x4 block into 8 registers * + ;// Transpose the 4x4 matrix * + ;// Perform the row operations (on columns) using SIMD * + ;// Transpose the 4x4 result matrix * + ;// Perform the coloumn operations * + ;// Store the 4x4 block at one go * + ;****************************************************************** + + ;// Load all the 4x4 pixels + + LDMIA pSrc,{in00,in02,in10,in12,in20,in22,in30,in32} + + MOV constZero,#0 ;// Used to right shift by 1 + ;LDR constZero,=0x00000000 + + ;***************************************************************** + ;// + ;// Transpose the matrix inorder to perform row ops as coloumn ops + ;// Input: in[][] = original matrix + ;// Output: trRow[][]= transposed matrix + ;// Step1: Obtain the LL part of the transposed matrix + ;// Step2: Obtain the HL part + ;// step3: Obtain the LH part + ;// Step4: Obtain the HH part + ;// + ;***************************************************************** + + ;// LL 2x2 transposed matrix + ;// d0 d1 - - + ;// d4 d5 - - + ;// - - - - + ;// - - - - + + PKHTB trRow10,in10,in00,ASR #16 ;// [5 4] = [f5:f1] + PKHBT trRow00,in00,in10,LSL #16 ;// [1 0] = [f4:f0] + + ;// HL 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// d8 d9 - - + ;// d12 d13 - - + + + PKHTB trRow30,in12,in02,ASR #16 ;// [13 12] = [7 3] + PKHBT trRow20,in02,in12,LSL #16 ;// [9 8] = [6 2] + + ;// LH 2x2 transposed matrix + ;// - - d2 d3 + ;// - - d6 d7 + ;// - - - - + ;// - - - - + + PKHBT trRow02,in20,in30,LSL #16 ;// [3 2] = [f12:f8] + PKHTB trRow12,in30,in20,ASR #16 ;// [7 6] = [f13:f9] + + + + + ;// HH 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// - - d10 d11 + ;// - - d14 d15 + + PKHTB trRow32,in32,in22,ASR #16 ;// [15 14] = [15 11] + PKHBT trRow22,in22,in32,LSL #16 ;// [11 10] = [14 10] + + + ;**************************************** + ;// Row Operations (Performed on columns) + ;**************************************** + + + ;// SIMD operations on first two columns(two rows of the original matrix) + + + SADD16 e0, trRow00,trRow20 ;// e0 = d0 + d2 + SSUB16 e1, trRow00,trRow20 ;// e1 = d0 - d2 + SHADD16 e2, trRow10,constZero ;// (f1>>1) constZero is a register holding 0 + SHADD16 e3, trRow30,constZero ;// avoid pipeline stalls for e2 and e3 + SSUB16 e2, e2, trRow30 ;// e2 = (d1>>1) - d3 + SADD16 e3, e3, trRow10 ;// e3 = d1 + (d3>>1) + SADD16 rowOp00, e0, e3 ;// f0 = e0 + e3 + SADD16 rowOp10, e1, e2 ;// f1 = e1 + e2 + SSUB16 rowOp20, e1, e2 ;// f2 = e1 - e2 + SSUB16 rowOp30, e0, e3 ;// f3 = e0 - e3 + + ;// SIMD operations on next two columns(next two rows of the original matrix) + + SADD16 e0, trRow02,trRow22 + SSUB16 e1, trRow02,trRow22 + SHADD16 e2, trRow12,constZero ;//(f1>>1) constZero is a register holding 0 + SHADD16 e3, trRow32,constZero + SSUB16 e2, e2, trRow32 + SADD16 e3, e3, trRow12 + SADD16 rowOp02, e0, e3 + SADD16 rowOp12, e1, e2 + SSUB16 rowOp22, e1, e2 + SSUB16 rowOp32, e0, e3 + + + ;***************************************************************** + ;// Transpose the resultant matrix + ;// Input: rowOp[][] + ;// Output: trCol[][] + ;***************************************************************** + + ;// LL 2x2 transposed matrix + ;// d0 d1 - - + ;// d4 d5 - - + ;// - - - - + ;// - - - - + + PKHTB trCol10,rowOp10,rowOp00,ASR #16 ;// [5 4] = [f5:f1] + PKHBT trCol00,rowOp00,rowOp10,LSL #16 ;// [1 0] = [f4:f0] + + ;// HL 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// d8 d9 - - + ;// d12 d13 - - + + + PKHTB trCol30,rowOp12,rowOp02,ASR #16 ;// [13 12] = [7 3] + PKHBT trCol20,rowOp02,rowOp12,LSL #16 ;// [9 8] = [6 2] + + ;// LH 2x2 transposed matrix + ;// - - d2 d3 + ;// - - d6 d7 + ;// - - - - + ;// - - - - + + PKHBT trCol02,rowOp20,rowOp30,LSL #16 ;// [3 2] = [f12:f8] + PKHTB trCol12,rowOp30,rowOp20,ASR #16 ;// [7 6] = [f13:f9] + + + + + ;// HH 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// - - d10 d11 + ;// - - d14 d15 + + PKHTB trCol32,rowOp32,rowOp22,ASR #16 ;// [15 14] = [15 11] + PKHBT trCol22,rowOp22,rowOp32,LSL #16 ;// [11 10] = [14 10] + + + ;******************************* + ;// Coloumn Operations + ;******************************* + + + ;// SIMD operations on first two columns + + + SADD16 g0, trCol00,trCol20 + SSUB16 g1, trCol00,trCol20 + SHADD16 g2, trCol10,constZero ;// (f1>>1) constZero is a register holding 0 + SHADD16 g3, trCol30,constZero + SSUB16 g2, g2, trCol30 + SADD16 g3, g3, trCol10 + SADD16 colOp00, g0, g3 + SADD16 colOp10, g1, g2 + SSUB16 colOp20, g1, g2 + SSUB16 colOp30, g0, g3 + + ;// SIMD operations on next two columns + + SADD16 g0, trCol02,trCol22 + SSUB16 g1, trCol02,trCol22 + SHADD16 g2, trCol12,constZero ;// (f1>>1) constZero is a register holding 0 + SHADD16 g3, trCol32,constZero + SSUB16 g2, g2, trCol32 + SADD16 g3, g3, trCol12 + SADD16 colOp02, g0, g3 + SADD16 colOp12, g1, g2 + SSUB16 colOp22, g1, g2 + SSUB16 colOp32, g0, g3 + + + + + + ;************************************************ + ;// Calculate final value (colOp[i][j] + 32)>>6 + ;************************************************ + + ;// const1: Serves dual purpose + ;// (1) Add #32 to both the lower and higher 16bits of the SIMD result + ;// (2) Convert the lower 16 bit value to an unsigned number (Add 32768) + + LDR const1, =0x00208020 + + LDR mask, =0xffff03ff ;// Used to mask the down shifted 6 bits + + ;// const2(#512): used to convert the lower 16bit number back to signed value + + MOV const2,#0x200 ;// const2 = 2^9 + + ;// First Row + + SADD16 colOp00, colOp00, const1 + SADD16 colOp02, colOp02, const1 + AND colOp00, mask, colOp00, ASR #6 + AND colOp02, mask, colOp02, ASR #6 + SSUB16 out00,colOp00,const2 + SSUB16 out02,colOp02,const2 + + + ;// Second Row + + SADD16 colOp10, colOp10, const1 + SADD16 colOp12, colOp12, const1 + AND colOp10, mask, colOp10, ASR #6 + AND colOp12, mask, colOp12, ASR #6 + SSUB16 out10,colOp10,const2 + SSUB16 out12,colOp12,const2 + + + ;// Third Row + + SADD16 colOp20, colOp20, const1 + SADD16 colOp22, colOp22, const1 + AND colOp20, mask, colOp20, ASR #6 + AND colOp22, mask, colOp22, ASR #6 + SSUB16 out20,colOp20,const2 + SSUB16 out22,colOp22,const2 + + + ;// Fourth Row + + SADD16 colOp30, colOp30, const1 + SADD16 colOp32, colOp32, const1 + AND colOp30, mask, colOp30, ASR #6 + AND colOp32, mask, colOp32, ASR #6 + SSUB16 out30,colOp30,const2 + SSUB16 out32,colOp32,const2 + + + + + ;*************************** + ;// Store all the 4x4 pixels + ;*************************** + + STMIA pDst,{out00,out02,out10,out12,out20,out22,out30,out32} + + + + ;// Set return value + +End + + + ;// Write function tail + M_END + + ENDIF ;//ARM1136JS + + + + + + + +;// Guarding implementation by the processor name + + + END
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s new file mode 100644 index 0000000..ad16d9c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_UnpackBlock4x4_s.s @@ -0,0 +1,92 @@ +;// +;// +;// File Name: armVCM4P10_UnpackBlock4x4_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Define the processor variants supported by this file + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + +;//-------------------------------------- +;// Input Arguments and their scope/usage +;//-------------------------------------- +ppSrc RN 0 ;// Persistent variable +pDst RN 1 ;// Persistent variable + +;//-------------------------------- +;// Variables and their scope/usage +;//-------------------------------- +pSrc RN 2 ;// Persistent variables +Flag RN 3 +Value RN 4 +Value2 RN 5 +strOffset RN 6 +cstOffset RN 7 + + + M_START armVCM4P10_UnpackBlock4x4, r7 + + LDR pSrc, [ppSrc] ;// Load pSrc + MOV cstOffset, #31 ;// To be used in the loop, to compute offset + + ;//----------------------------------------------------------------------- + ; Firstly, fill all the coefficient values on the <pDst> buffer by zero + ;//----------------------------------------------------------------------- + + MOV Value, #0 ;// Initialize the zero value + MOV Value2, #0 ;// Initialize the zero value + LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop> + + STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 + STRD Value, [pDst, #8] ;// pDst[4] = pDst[5] = pDst[6] = pDst[7] = 0 + STRD Value, [pDst, #16] ;// pDst[8] = pDst[9] = pDst[10] = pDst[11] = 0 + STRD Value, [pDst, #24] ;// pDst[12] = pDst[13] = pDst[14] = pDst[15] = 0 + + ;//---------------------------------------------------------------------------- + ;// The loop below parses and unpacks the input stream. The C-model has + ;// a somewhat complicated logic for sign extension. But in the v6 version, + ;// that can be easily taken care by loading the data from <pSrc> stream as + ;// SIGNED byte/halfword. So, based on the first TST instruction, 8-bits or + ;// 16-bits are read. + ;// + ;// Next, to compute the offset, where the unpacked value needs to be stored, + ;// we modify the computation to perform [(Flag & 15) < 1] as [(Flag < 1) & 31] + ;// This results in a saving of one cycle. + ;//---------------------------------------------------------------------------- + +unpackLoop + TST Flag, #0x10 ;// Computing (Flag & 0x10) + LDRSBNE Value2,[pSrc,#1] ;// Load byte wise to avoid unaligned access + LDRBNE Value, [pSrc], #2 + AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; + LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ + ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ + + TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done + LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration + STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset> + BEQ unpackLoop ;// Branch to the loop beginning + + STR pSrc, [ppSrc] ;// Update the bitstream pointer + M_END + + ENDIF + + + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c new file mode 100644 index 0000000..c2e6b60 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockChroma_I.c @@ -0,0 +1,88 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DeblockChroma_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 intra chroma deblock + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DeblockChroma_I + * + * Description: + * Performs deblocking filtering on all edges of the chroma macroblock (16x16). + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned. + * [in] srcdstStep Step of the arrays + * [in] pAlpha pointer to a 2x2 array of alpha thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external + * horizontal edge, internal horizontal edge } + * [in] pBeta pointer to a 2x2 array of beta thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external horizontal edge, + * internal horizontal edge } + * [in] pThresholds AArray of size 8x2 of Thresholds (TC0) (values for the left or + * above edge of each 4x2 or 2x4 block, arranged in vertical block order + * and then in horizontal block order) + * [in] pBS array of size 16x2 of BS parameters (arranged in scan block order for vertical edges and then horizontal edges); + * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned. + * [out] pSrcDst pointer to filtered output macroblock + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds, or pBS is NULL. + * - pSrcDst is not 8-byte aligned. + * - either pThresholds or pBS is not 4-byte aligned. + * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3. + * - srcdstStep is not a multiple of 8. + * + */ +OMXResult omxVCM4P10_DeblockChroma_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS +) +{ + OMXResult errorCode; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + + errorCode = omxVCM4P10_FilterDeblockingChroma_VerEdge_I( + pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS); + + armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode) + + errorCode = omxVCM4P10_FilterDeblockingChroma_HorEdge_I( + pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+8, pBS+16); + + return errorCode; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c new file mode 100644 index 0000000..6023862 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DeblockLuma_I.c @@ -0,0 +1,91 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DeblockLuma_I.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 luma deblock + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + + +/** + * Function: omxVCM4P10_DeblockLuma_I + * + * Description: + * This function performs deblock filtering the horizontal and vertical edges of a luma macroblock + *(16x16). + * + * Remarks: + * + * Parameters: + * [in] pSrcDst pointer to the input macroblock. Must be 8-byte aligned. + * [in] srcdstStep image width + * [in] pAlpha pointer to a 2x2 table of alpha thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external horizontal + * edge, internal horizontal edge } + * [in] pBeta pointer to a 2x2 table of beta thresholds, organized as follows: { external + * vertical edge, internal vertical edge, external horizontal edge, + * internal horizontal edge } + * [in] pThresholds pointer to a 16x2 table of threshold (TC0), organized as follows: { values for + * the left or above edge of each 4x4 block, arranged in vertical block order + * and then in horizontal block order) + * [in] pBS pointer to a 16x2 table of BS parameters arranged in scan block order for vertical edges and then horizontal edges; + * valid in the range [0,4] with the following restrictions: i) pBS[i]== 4 may occur only for 0<=i<=3, ii) pBS[i]== 4 if and only if pBS[i^1]== 4. Must be 4-byte aligned. + * [out] pSrcDst pointer to filtered output macroblock. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - Either of the pointers in pSrcDst, pAlpha, pBeta, pTresholds or pBS is NULL. + * - pSrcDst is not 8-byte aligned. + * - srcdstStep is not a multiple of 8 + * - pBS is out of range, i.e., one of the following conditions is true: pBS[i]<0, pBS[i]>4, pBS[i]==4 for i>=4, or (pBS[i]==4 && pBS[i^1]!=4) for 0<=i<=3. +. + * + */ + +OMXResult omxVCM4P10_DeblockLuma_I( + OMX_U8* pSrcDst, + OMX_S32 srcdstStep, + const OMX_U8* pAlpha, + const OMX_U8* pBeta, + const OMX_U8* pThresholds, + const OMX_U8 *pBS +) +{ + OMXResult errorCode; + + armRetArgErrIf(pSrcDst == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot8ByteAligned(pSrcDst), OMX_Sts_BadArgErr); + armRetArgErrIf(srcdstStep & 7, OMX_Sts_BadArgErr); + armRetArgErrIf(pAlpha == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pBeta == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(pThresholds == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pThresholds), OMX_Sts_BadArgErr); + armRetArgErrIf(pBS == NULL, OMX_Sts_BadArgErr); + armRetArgErrIf(armNot4ByteAligned(pBS), OMX_Sts_BadArgErr); + + errorCode = omxVCM4P10_FilterDeblockingLuma_VerEdge_I( + pSrcDst, srcdstStep, pAlpha, pBeta, pThresholds, pBS); + + armRetArgErrIf(errorCode != OMX_Sts_NoErr, errorCode) + + errorCode = omxVCM4P10_FilterDeblockingLuma_HorEdge_I( + pSrcDst, srcdstStep, pAlpha+2, pBeta+2, pThresholds+16, pBS+16); + + return errorCode; +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c new file mode 100644 index 0000000..a19f277 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c @@ -0,0 +1,62 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 decode coefficients module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC + * + * Description: + * Performs CAVLC decoding and inverse raster scan for 2x2 block of + * ChromaDCLevel. The decoded coefficients in packed position-coefficient + * buffer are stored in increasing raster scan order, namely position order. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream + * buffer + * [in] pOffset Pointer to current bit position in the byte + * pointed to by *ppBitStream + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients + * in this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + * + */ + +OMXResult omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC ( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8** ppPosCoefbuf + ) + +{ + return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff, + ppPosCoefbuf, 17, 4); + +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c new file mode 100644 index 0000000..99bb4ce --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DecodeCoeffsToPairCAVLC.c @@ -0,0 +1,68 @@ +/* ---------------------------------------------------------------- + * + * + * File Name: omxVCM4P10_DecodeCoeffsToPairCAVLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * H.264 decode coefficients module + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P10_DecodeCoeffsToPairCAVLC + * + * Description: + * Performs CAVLC decoding and inverse zigzag scan for 4x4 block of + * Intra16x16DCLevel, Intra16x16ACLevel,LumaLevel, and ChromaACLevel. + * Inverse field scan is not supported. The decoded coefficients in packed + * position-coefficient buffer are stored in increasing zigzag order instead + * of position order. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream Double pointer to current byte in bit stream buffer + * [in] pOffset Pointer to current bit position in the byte pointed + * to by *ppBitStream + * [in] sMaxNumCoeff Maximum number of non-zero coefficients in current + * block + * [in] sVLCSelect VLC table selector, obtained from number of non-zero + * AC coefficients of above and left 4x4 blocks. It is + * equivalent to the variable nC described in H.264 standard + * table 9-5, except its value can¡¯t be less than zero. + * [out] ppBitStream *ppBitStream is updated after each block is decoded + * [out] pOffset *pOffset is updated after each block is decoded + * [out] pNumCoeff Pointer to the number of nonzero coefficients in + * this block + * [out] ppPosCoefbuf Double pointer to destination residual + * coefficient-position pair buffer + * Return Value: + * Standard omxError result. See enumeration for possible result codes. + * + */ + +OMXResult omxVCM4P10_DecodeCoeffsToPairCAVLC( + const OMX_U8** ppBitStream, + OMX_S32* pOffset, + OMX_U8* pNumCoeff, + OMX_U8**ppPosCoefbuf, + OMX_INT sVLCSelect, + OMX_INT sMaxNumCoeff + ) +{ + return armVCM4P10_DecodeCoeffsToPair(ppBitStream, pOffset, pNumCoeff, + ppPosCoefbuf, sVLCSelect, sMaxNumCoeff); +} diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s new file mode 100644 index 0000000..2b71486 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_DequantTransformResidualFromPairAndAdd_s.s @@ -0,0 +1,480 @@ +;// +;// (c) Copyright 2007 ARM Limited. All Rights Reserved. +;// +;// Description: +;// H.264 inverse quantize and transform module +;// +;// + + + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Import symbols required from other files +;// (For example tables) + + IMPORT armVCM4P10_UnpackBlock4x4 + IMPORT armVCM4P10_TransformResidual4x4 + IMPORT armVCM4P10_QPDivTable + IMPORT armVCM4P10_VMatrixU16 + IMPORT armVCM4P10_QPModuloTable + + M_VARIANTS ARM1136JS, ARM1136JS_U + +;// Set debugging level +;//DEBUG_ON SETL {TRUE} + + +;// Static Function: armVCM4P10_DequantLumaAC4x4 + +;// Guarding implementation by the processor name + + IF ARM1136JS + +;//Input Registers +pSrcDst RN 0 +QP RN 1 + + +;//Output Registers + + +;//Local Scratch Registers +pQPdiv RN 4 +pQPmod RN 5 +pVRow RN 2 +QPmod RN 6 +shift RN 3 +rowLuma01 RN 1 +rowLuma23 RN 4 + +SrcDst00 RN 5 +SrcDst02 RN 6 +SrcDst10 RN 7 +SrcDst12 RN 8 +SrcDst20 RN 9 +SrcDst22 RN 10 +SrcDst30 RN 11 +SrcDst32 RN 12 + +temp1 RN 2 +temp2 RN 3 +temp3 RN 14 + + + ;// Allocate stack memory required by the function + + ;// Write function header + M_START armVCM4P10_DequantLumaAC4x4,r11 + + LDR pQPmod,=armVCM4P10_QPModuloTable + LDR pQPdiv,=armVCM4P10_QPDivTable + LDR pVRow,=armVCM4P10_VMatrixU16 + + LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6 + LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6 + + LDRH rowLuma01,[pVRow,QPmod]! ;// rowLuma01 = [00|0a] + LDRH temp3,[pVRow,#2] ;// temp3 = [00|0b] + LDRH rowLuma23,[pVRow,#4] ;// rowLuma23 = [00|0c] + ORR rowLuma01,rowLuma01,temp3,LSL #16 ;// rowLuma01 = [0b|0a] + + ;// Load all the 16 'src' values + LDMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32} + + + ;//********************************************************************************************* + ;// + ;// 'Shift' ranges between [0,8] + ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation + ;// + ;//********************************************************************************************* + + LSL rowLuma01,rowLuma01,shift + LSL rowLuma23,rowLuma23,shift + + + ;//********************************************************************************************** + ;// + ;// The idea is to unroll the Loop completely + ;// All the 16 src values are loaded at once into 8 registers : SrcDst<y><x> (above) + ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16' + ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2 + ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above) + ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above) + ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated + ;// Here we interleave the PKHBT operations for various rows to avoide pipeline stalls + ;// + ;// We then pack the two 16 bit multiplication result into a word and store at one go + ;// + ;//********************************************************************************************** + + + ;// Row 1 + + + SMULTB temp1,SrcDst00,rowLuma23 ;// pSrcDst[1] * (pVRow[2]<<Shift) + SMULBB SrcDst00,SrcDst00,rowLuma01 ;// pSrcDst[0] * (pVRow[0]<<Shift) + + SMULTB temp2,SrcDst02,rowLuma23 ;// pSrcDst[3] * (pVRow[2]<<Shift) + SMULBB SrcDst02,SrcDst02,rowLuma01 ;// pSrcDst[2] * (pVRow[0]<<Shift) + + PKHBT SrcDst00,SrcDst00,temp1,LSL #16 ;// Pack the first two product values + + + ;// Row 2 + SMULTT temp1,SrcDst10,rowLuma01 ;// pSrcDst[5] * (pVRow[1]<<Shift) + SMULBB SrcDst10,SrcDst10,rowLuma23 ;// pSrcDst[4] * (pVRow[2]<<Shift) + + PKHBT SrcDst02,SrcDst02,temp2,LSL #16 ;// Pack the next two product values + SMULTT temp2,SrcDst12,rowLuma01 ;// pSrcDst[7] * (pVRow[1]<<Shift) + SMULBB SrcDst12,SrcDst12,rowLuma23 ;// pSrcDst[6] * (pVRow[2]<<Shift) + + PKHBT SrcDst10,SrcDst10,temp1,LSL #16 ;// Pack the next two product values + + + ;// Row 3 + + SMULTB temp1,SrcDst20,rowLuma23 ;// pSrcDst[9] * (pVRow[2]<<Shift) + SMULBB SrcDst20,SrcDst20,rowLuma01 ;// pSrcDst[8] * (pVRow[0]<<Shift) + + PKHBT SrcDst12,SrcDst12,temp2,LSL #16 ;// Pack the next two product values + SMULTB temp2,SrcDst22,rowLuma23 ;// pSrcDst[11] * (pVRow[2]<<Shift) + SMULBB SrcDst22,SrcDst22,rowLuma01 ;// pSrcDst[10] * (pVRow[0]<<Shift) + + PKHBT SrcDst20,SrcDst20,temp1,LSL #16 ;// Pack the next two product values + + + + ;// Row 4 + + SMULTT temp1,SrcDst30,rowLuma01 ;// pSrcDst[13] * (pVRow[1]<<Shift) + SMULBB SrcDst30,SrcDst30,rowLuma23 ;// pSrcDst[12] * (pVRow[2]<<Shift) + + SMULTT temp3,SrcDst32,rowLuma01 ;// pSrcDst[15] * (pVRow[1]<<Shift) + SMULBB SrcDst32,SrcDst32,rowLuma23 ;// pSrcDst[14] * (pVRow[2]<<Shift) + + PKHBT SrcDst22,SrcDst22,temp2,LSL #16 ;// Pack the remaining product values + PKHBT SrcDst30,SrcDst30,temp1,LSL #16 + PKHBT SrcDst32,SrcDst32,temp3,LSL #16 + + + STMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32} + + + ;// Set return value + + + + ;// Write function tail + M_END + + ENDIF ;//ARM1136JS + + +;// Guarding implementation by the processor name + + IF ARM1136JS_U + +;//Input Registers +pSrcDst RN 0 +QP RN 1 + + +;//Output Registers + + +;//Local Scratch Registers +pQPdiv RN 4 +pQPmod RN 5 +pVRow RN 2 +QPmod RN 6 +shift RN 3 +rowLuma01 RN 1 +rowLuma23 RN 4 + +SrcDst00 RN 5 +SrcDst02 RN 6 +SrcDst10 RN 7 +SrcDst12 RN 8 +SrcDst20 RN 9 +SrcDst22 RN 10 +SrcDst30 RN 11 +SrcDst32 RN 12 + +temp1 RN 2 +temp2 RN 3 +temp3 RN 14 + + + ;// Allocate stack memory required by the function + + ;// Write function header + M_START armVCM4P10_DequantLumaAC4x4,r11 + + LDR pQPmod,=armVCM4P10_QPModuloTable + LDR pQPdiv,=armVCM4P10_QPDivTable + LDR pVRow,=armVCM4P10_VMatrixU16 + + LDRSB QPmod,[pQPmod,QP] ;// (QP%6) * 6 + LDRSB shift,[pQPdiv,QP] ;// Shift = QP / 6 + + LDR rowLuma01,[pVRow,QPmod]! ;// rowLuma01 = [0b|0a] + LDR rowLuma23,[pVRow,#4] ;// rowLuma23 = [0d|0c] + + ;// Load all the 16 'src' values + LDMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32} + + + ;//********************************************************************************************* + ;// + ;// 'Shift' ranges between [0,8] + ;// So we can shift the packed rowLuma values [0b|0a] with a single LSL operation + ;// + ;//********************************************************************************************* + + LSL rowLuma01,rowLuma01,shift + LSL rowLuma23,rowLuma23,shift + + + ;//********************************************************************************************** + ;// + ;// The idea is to unroll the Loop completely + ;// All the 16 src values are loaded at once into 8 registers : SrcDst<y><x> (above) + ;// 0<= armVCM4P10_PosToVCol4x4[i] <=2 for any 'i<16' + ;// So the only values of pVRow[i] that need to be loaded are for i=0,1,2 + ;// These 3 values are loaded into rowLuma01 and rowLuma23 (above) + ;// We first calculate pVRow[armVCM4P10_PosToVCol4x4[i]]) << Shift which fits into 16 bits (above) + ;// Then the product pSrcDst[i] * (pVRow[armVCM4P10_PosToVCol4x4[i]] << Shift) is calculated + ;// Here we interleave the PKHBT operations for various rows to avoide pipeline stalls + ;// + ;// We then pack the two 16 bit multiplication result into a word and store at one go + ;// + ;//********************************************************************************************** + + + ;// Row 1 + + + SMULTB temp1,SrcDst00,rowLuma23 ;// pSrcDst[1] * (pVRow[2]<<Shift) + SMULBB SrcDst00,SrcDst00,rowLuma01 ;// pSrcDst[0] * (pVRow[0]<<Shift) + + SMULTB temp2,SrcDst02,rowLuma23 ;// pSrcDst[3] * (pVRow[2]<<Shift) + SMULBB SrcDst02,SrcDst02,rowLuma01 ;// pSrcDst[2] * (pVRow[0]<<Shift) + + PKHBT SrcDst00,SrcDst00,temp1,LSL #16 ;// Pack the first two product values + + + ;// Row 2 + SMULTT temp1,SrcDst10,rowLuma01 ;// pSrcDst[5] * (pVRow[1]<<Shift) + SMULBB SrcDst10,SrcDst10,rowLuma23 ;// pSrcDst[4] * (pVRow[2]<<Shift) + + PKHBT SrcDst02,SrcDst02,temp2,LSL #16 ;// Pack the next two product values + SMULTT temp2,SrcDst12,rowLuma01 ;// pSrcDst[7] * (pVRow[1]<<Shift) + SMULBB SrcDst12,SrcDst12,rowLuma23 ;// pSrcDst[6] * (pVRow[2]<<Shift) + + PKHBT SrcDst10,SrcDst10,temp1,LSL #16 ;// Pack the next two product values + + + ;// Row 3 + + SMULTB temp1,SrcDst20,rowLuma23 ;// pSrcDst[9] * (pVRow[2]<<Shift) + SMULBB SrcDst20,SrcDst20,rowLuma01 ;// pSrcDst[8] * (pVRow[0]<<Shift) + + PKHBT SrcDst12,SrcDst12,temp2,LSL #16 ;// Pack the next two product values + SMULTB temp2,SrcDst22,rowLuma23 ;// pSrcDst[11] * (pVRow[2]<<Shift) + SMULBB SrcDst22,SrcDst22,rowLuma01 ;// pSrcDst[10] * (pVRow[0]<<Shift) + + PKHBT SrcDst20,SrcDst20,temp1,LSL #16 ;// Pack the next two product values + + + + ;// Row 4 + + SMULTT temp1,SrcDst30,rowLuma01 ;// pSrcDst[13] * (pVRow[1]<<Shift) + SMULBB SrcDst30,SrcDst30,rowLuma23 ;// pSrcDst[12] * (pVRow[2]<<Shift) + + SMULTT temp3,SrcDst32,rowLuma01 ;// pSrcDst[15] * (pVRow[1]<<Shift) + SMULBB SrcDst32,SrcDst32,rowLuma23 ;// pSrcDst[14] * (pVRow[2]<<Shift) + + PKHBT SrcDst22,SrcDst22,temp2,LSL #16 ;// Pack the remaining product values + PKHBT SrcDst30,SrcDst30,temp1,LSL #16 + PKHBT SrcDst32,SrcDst32,temp3,LSL #16 + + + STMIA pSrcDst,{SrcDst00,SrcDst02,SrcDst10,SrcDst12,SrcDst20,SrcDst22,SrcDst30,SrcDst32} + + + ;// Set return value + + + + ;// Write function tail + M_END + + ENDIF ;//ARM1136JS_U + + + + + +;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd + +;// Guarding implementation by the processor name + + IF ARM1136JS + +;//Input Registers +ppSrc RN 0 +pPred RN 1 +pDC RN 2 +pDst RN 3 + + +;//Output Registers +result RN 0 + +;//Local Scratch Registers +pDelta RN 4 +pDeltaTmp RN 6 +AC RN 5 ;//Load from stack +pPredTemp RN 7 +pDCTemp RN 8 +pDstTemp RN 9 +pDeltaArg1 RN 1 +pDeltaArg0 RN 0 +QP RN 1 ;//Load from stack +DCval RN 10 +DCvalCopy RN 11 +predstep RN 1 +dstStep RN 10 +ycounter RN 0 +PredVal1 RN 3 +PredVal2 RN 5 +DeltaVal1 RN 2 +DeltaVal2 RN 11 +PredVal RN 8 +tmpDeltaVal RN 6 +sum1 RN 12 +sum2 RN 14 + + + + ;// Allocate stack memory required by the function + M_ALLOC8 pBuffer, 32 + + + ;// Write function header + M_START omxVCM4P10_DequantTransformResidualFromPairAndAdd,r11 + + ;// Define stack arguments + M_ARG predStepOnStack, 4 + M_ARG dstStepOnStack,4 + M_ARG QPOnStack, 4 + M_ARG ACOnStack,4 + + + M_ADR pDelta,pBuffer + M_LDR AC,ACOnStack + + + ;// Save registers r1,r2,r3 before function call + MOV pPredTemp,pPred + MOV pDCTemp,pDC + MOV pDstTemp,pDst + + CMP AC,#0 + BEQ DCcase + MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_UnpackBlock4x4 + + BL armVCM4P10_UnpackBlock4x4 + + M_LDR QP,QPOnStack ;// Set up r1 for DequantLumaAC4x4 + MOV pDeltaArg0,pDelta ;// Set up r0 for DequantLumaAC4x4 + + BL armVCM4P10_DequantLumaAC4x4 + + + CMP pDCTemp,#0 + LDRSHNE DCval,[pDCTemp] + MOV pDeltaArg0,pDelta ;// Set up r0 for armVCM4P10_TransformResidual4x4 + MOV pDeltaArg1,pDelta ;// Set up r1 for armVCM4P10_TransformResidual4x4 + STRHNE DCval,[pDelta] + + BL armVCM4P10_TransformResidual4x4 + B OutDCcase + + +DCcase + LDRSH DCval,[pDCTemp] + ADD DCval,DCval,#32 + ASR DCval,DCval,#6 + PKHBT DCval,DCval,DCval,LSL #16 ;// Duplicating the Lower halfword + MOV DCvalCopy, DCval ;// Needed for STRD + STRD DCval, [pDelta, #0] ;// pDelta[0] = pDelta[1] = pDelta[2] = pDelta[3] = DCval + STRD DCval, [pDelta, #8] ;// pDelta[4] = pDelta[5] = pDelta[6] = pDelta[7] = DCval + STRD DCval, [pDelta, #16] ;// pDelta[8] = pDelta[9] = pDelta[10] = pDelta[11] = DCval + STRD DCval, [pDelta, #24] + + +OutDCcase + M_LDR predstep,predStepOnStack + M_LDR dstStep,dstStepOnStack + + LDMIA pDelta!,{tmpDeltaVal,DeltaVal2} ;// Pre load + MOV ycounter,#4 ;// Counter for the PredPlusDeltaLoop + LDR PredVal,[pPredTemp] ;// Pre load + +PredPlusDeltaLoop + + + SUBS ycounter,ycounter,#1 + ADD pPredTemp,pPredTemp,predstep ;// Increment pPred ptr + + PKHBT DeltaVal1,tmpDeltaVal,DeltaVal2,LSL #16 ;// Deltaval1 = [C A] + PKHTB DeltaVal2,DeltaVal2,tmpDeltaVal,ASR #16 ;// DeltaVal2 = [D B] + + UXTB16 PredVal1,PredVal ;// PredVal1 = [0c0a] + UXTB16 PredVal2,PredVal,ROR #8 ;// PredVal2 = [0d0b] + + LDRGT PredVal,[pPredTemp] ;// Pre load + + QADD16 sum2,DeltaVal2,PredVal2 ;// Add and saturate to 16 bits + QADD16 sum1,DeltaVal1,PredVal1 + + USAT16 sum2,#8,sum2 ;// armClip(0,255,sum2) + USAT16 sum1,#8,sum1 + + LDMGTIA pDelta!,{tmpDeltaVal,DeltaVal2} ;// Pre load + + ORR sum1,sum1,sum2,LSL #8 ;// sum1 = [dcba] + STR sum1,[pDstTemp] + + ADD pDstTemp,pDstTemp,dstStep ;// Increment pDst ptr + BGT PredPlusDeltaLoop + + + ;// Set return value + MOV result,#OMX_Sts_NoErr + +End + + + ;// Write function tail + + M_END + + ENDIF ;//ARM1136JS + + +;// Function: omxVCM4P10_DequantTransformResidualFromPairAndAdd + +;// Guarding implementation by the processor name + + + + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s new file mode 100644 index 0000000..6d960f0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s @@ -0,0 +1,336 @@ +;// +;// +;// File Name: omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + +MASK_0 EQU 0x00000000 +MASK_1 EQU 0x01010101 +LOOP_COUNT EQU 0x50000000 + +;// Declare input registers + +pSrcDst RN 0 +srcdstStep RN 1 +pAlphaArg RN 2 +pBetaArg RN 3 + +pThresholds RN 6 +pBS RN 9 +pQ0 RN 0 +bS RN 10 + +alpha RN 6 +alpha0 RN 6 +alpha1 RN 8 + +beta RN 7 +beta0 RN 7 +beta1 RN 9 + +;// Declare Local/Temporary variables + +;// Pixels +p_0 RN 3 +p_1 RN 5 +q_0 RN 8 +q_1 RN 9 + +;// Filtering + +dp0q0 RN 12 +dp1p0 RN 12 +dq1q0 RN 12 + +ap0q0 RN 4 +filt RN 2 + +m00 RN 14 +m01 RN 11 + +pQ0 RN 0 +Step RN 1 + +;// Output + +P_0 RN 6 +Q_0 RN 7 + +;//Declarations for bSLT4 kernel + +tC RN 12 +tC0 RN 5 +tC1 RN 12 +pos RN 5 +neg RN 9 + +;//Declarations for bSGE4 kernel + + +;// Miscellanous +XY RN 8 + +a RN 10 +t1 RN 10 +t2 RN 12 +t3 RN 14 +t4 RN 6 +t5 RN 5 + + + ;// Allocate stack memory + M_ALLOC4 ppThresholds,4 + M_ALLOC8 pAlphaBeta0,8 + M_ALLOC8 pAlphaBeta1,8 + M_ALLOC8 pXYBS,4 + M_ALLOC4 ppBS,4 + + ;// Function header + M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r11 + + ;//Input arguments on the stack + M_ARG ppThresholdsArg, 4 + M_ARG ppBSArg, 4 + + LDRB alpha1, [pAlphaArg,#1] + LDRB beta1, [pBetaArg,#1] + M_LDR pThresholds, ppThresholdsArg + LDR a,=MASK_1 + LDRB beta0, [pBetaArg] + M_STR pThresholds, ppThresholds + LDRB alpha0, [pAlphaArg] + + MUL alpha1, alpha1, a + MUL beta1, beta1, a + MUL alpha0, alpha0, a + MUL beta0, beta0, a + + M_STRD alpha1, beta1, pAlphaBeta1 + M_LDR pBS, ppBSArg + M_STRD alpha0, beta0, pAlphaBeta0 + + LDR XY,=LOOP_COUNT + M_STRD XY, pBS, pXYBS + + SUB pQ0, pQ0, srcdstStep, LSL #1 +LoopY +LoopX +;//---------------Load Pixels------------------- + LDRH bS, [pBS], #2 + + M_STR pBS, ppBS + M_LDR p_1, [pQ0],srcdstStep + + CMP bS, #0 + + M_LDR p_0, [pQ0],srcdstStep + M_LDR q_0, [pQ0],srcdstStep + M_LDR q_1, [pQ0] + LDR m01, =MASK_1 ;// 01010101 mask + BEQ NoFilterBS0 + + + ;// p_0 = [r3p0 r2p0 r1p0 r0p0] + ;// p_1 = [r3p1 r2p1 r1p1 r0p1] + ;// q_0 = [r3q0 r2q0 r1q0 r0q0] + ;// q_1 = [r3q1 r2q1 r1q1 r0q1] + +;//--------------Filtering Decision ------------------- + MOV m00, #MASK_0 ;// 00000000 mask + + MOV filt, m01 + TST bS, #0xff00 + MOVEQ filt, filt, LSR #16 + TST bS, #0xff + MOVEQ filt, filt, LSL #16 + TST bS, #4 + + + ;// Check |p0-q0|<Alpha + USUB8 dp0q0, p_0, q_0 + USUB8 a, q_0, p_0 + SEL ap0q0, a, dp0q0 + USUB8 a, ap0q0, alpha + SEL filt, m00, filt + + ;// Check |p1-p0|<Beta + USUB8 dp1p0, p_1, p_0 + USUB8 a, p_0, p_1 + SEL a, a, dp1p0 + USUB8 a, a, beta + SEL filt, m00, filt + + ;// Check |q1-q0|<Beta + USUB8 dq1q0, q_1, q_0 + USUB8 a, q_0, q_1 + SEL a, a, dq1q0 + USUB8 a, a, beta + SEL filt, m00, filt + + BEQ bSLT4 +;//-------------------Filter-------------------- +bSGE4 + ;//---------bSGE4 Execution--------------- + CMP filt, #0 + + M_LDR pThresholds, ppThresholds + + ;// Compute P0b + UHADD8 t1, p_0, q_1 + BEQ NoFilterFilt0 + MVN t2, p_1 + UHSUB8 t1, t1, t2 + USUB8 t2, filt, m01 + EOR t1, t1, m01, LSL #7 + + ADD pThresholds,pThresholds, #2 + + ;// Compute Q0b + UHADD8 t2, q_0, p_1 + MVN t3, q_1 + UHSUB8 t2, t2, t3 + M_STR pThresholds, ppThresholds + SEL P_0, t1, p_0 + EOR t2, t2, m01, LSL #7 + SEL Q_0, t2, q_0 + + SUB pQ0, pQ0, srcdstStep, LSL #1 + B StoreResultAndExit + +;//---------- Exit of LoopX -------------- +;//---- for the case of no filtering ----- + +NoFilterFilt0 +NoFilterBS0 + M_LDR pThresholds, ppThresholds + SUB pQ0, pQ0, srcdstStep, LSL #1 + SUB pQ0, pQ0, srcdstStep + ADD pQ0, pQ0, #4 + ADD pThresholds, pThresholds, #2 + + ;// Load counter for LoopX + M_LDRD XY, pBS, pXYBS + M_STR pThresholds, ppThresholds + M_LDRD alpha, beta, pAlphaBeta0 + + ;// Align the pointer + ADDS XY, XY, XY + M_STR XY, pXYBS + BCC LoopY + B ExitLoopY + +bSLT4 + ;//---------bSLT4 Execution--------------- + M_LDR pThresholds, ppThresholds + CMP filt, #0 + + ;// Since beta <= 18 and alpha <= 255 we know + ;// -254 <= p0-q0 <= 254 + ;// -17 <= q1-q0 <= 17 + ;// -17 <= p1-p0 <= 17 + + ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3)) + ;// + ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 + ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 + ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 + + USUB8 t1, p_1, p_0 + USUB8 t2, q_1, q_0 + BEQ NoFilterFilt0 + + LDRB tC0, [pThresholds],#1 + SSUB8 t1, t1, t2 + LDRB tC1, [pThresholds],#1 + M_STR pThresholds, ppThresholds + UHSUB8 t4, p_0, q_0 + ORR tC, tC0, tC1, LSL #16 + USUB8 t5, p_0, q_0 + AND t5, t5, m01 + SHSUB8 t1, t1, t5 + ORR tC, tC, LSL #8 + SSUB8 t1, t1, t5 + SHSUB8 t1, t1, t4 + UQADD8 tC, tC, m01 + SADD8 t1, t1, m01 + USUB8 t5, filt, m01 + SHSUB8 t1, t1, t4 + SEL tC, tC, m00 + + ;// Split into positive and negative part and clip + + SSUB8 t1, t1, m00 + SEL pos, t1, m00 + USUB8 neg, pos, t1 + USUB8 t3, pos, tC + SEL pos, tC, pos + USUB8 t3, neg, tC + SEL neg, tC, neg + UQADD8 P_0, p_0, pos + UQSUB8 Q_0, q_0, pos + UQSUB8 P_0, P_0, neg + UQADD8 Q_0, Q_0, neg + + SUB pQ0, pQ0, srcdstStep, LSL #1 + + ;// Choose to store the filtered + ;// value or the original pixel + USUB8 t1, filt, m01 + SEL P_0, P_0, p_0 + SEL Q_0, Q_0, q_0 + +StoreResultAndExit + + ;//---------Store result--------------- + + ;// P_0 = [r0p0 r1p0 r2p0 r3p0] + ;// Q_0 = [r0q0 r1q0 r2q0 r3q0] + + M_STR P_0, [pQ0], srcdstStep + STR Q_0, [pQ0], #4 + + M_LDRD XY, pBS, pXYBS + M_LDRD alpha, beta, pAlphaBeta0 + + SUB pQ0, pQ0, srcdstStep, LSL #1 + + ADDS XY, XY, XY + M_STR XY, pXYBS + BCC LoopX + +;//-------- Common Exit of LoopY ----------------- + ;// Align the pointers + +ExitLoopY + ADD pBS, pBS, #4 + M_LDRD alpha, beta, pAlphaBeta1 + SUB pQ0, pQ0, #8 + ADD pQ0, pQ0, srcdstStep, LSL #2 + M_STRD alpha, beta, pAlphaBeta0 + + BNE LoopY + MOV r0, #OMX_Sts_NoErr + +;//-----------------End Filter-------------------- + M_END + + ENDIF + + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s new file mode 100644 index 0000000..00c8354 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s @@ -0,0 +1,437 @@ +;// +;// +;// File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + + +MASK_0 EQU 0x00000000 +MASK_1 EQU 0x01010101 +MASK_2 EQU 0x0000ff00 +LOOP_COUNT EQU 0x50000000 + +;// Declare input registers + +pSrcDst RN 0 +srcdstStep RN 1 +pAlphaArg RN 2 +pBetaArg RN 3 + +pThresholds RN 6 +pBS RN 9 +pQ0 RN 0 +bS RN 2 +bSTemp RN 10 + +alpha RN 6 +alpha0 RN 6 +alpha1 RN 8 + +beta RN 7 +beta0 RN 7 +beta1 RN 9 + +;// Declare Local/Temporary variables + +;// Pixels +p_0 RN 3 +p_1 RN 5 +q_0 RN 8 +q_1 RN 9 + +;// Unpacking +mask RN 11 + +row0 RN 2 +row1 RN 4 +row2 RN 5 +row3 RN 3 + +row4 RN 8 +row5 RN 9 +row6 RN 10 +row7 RN 12 + +tunpk0 RN 2 +tunpk2 RN 10 +tunpk3 RN 12 + +tunpk4 RN 4 +tunpk5 RN 5 +tunpk6 RN 14 +tunpk7 RN 2 + +;// Filtering + +dp0q0 RN 12 +dp1p0 RN 12 +dq1q0 RN 12 + +ap0q0 RN 4 +filt RN 2 + +m00 RN 14 +m01 RN 11 + +pQ0 RN 0 +Step RN 1 + +;// Output + +P_0 RN 6 +Q_0 RN 7 + +;//Declarations for bSLT4 kernel + +tC RN 12 +tC0 RN 5 +tC1 RN 12 +pos RN 5 +neg RN 9 + +;//Declarations for bSGE4 kernel + + +;// Miscellanous +XY RN 8 + +a RN 10 +t1 RN 10 +t2 RN 12 +t3 RN 14 +t4 RN 6 +t5 RN 5 + + + ;// Allocate stack memory + M_ALLOC4 ppThresholds,4 + M_ALLOC8 pAlphaBeta0,8 + M_ALLOC8 pAlphaBeta1,8 + M_ALLOC8 pXYBS,4 + M_ALLOC4 ppBS,4 + + ;// Function header + M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r11 + + ;//Input arguments on the stack + M_ARG ppThresholdsArg, 4 + M_ARG ppBSArg, 4 + + LDRB alpha1, [pAlphaArg,#1] + LDRB beta1, [pBetaArg,#1] + M_LDR pThresholds, ppThresholdsArg + LDR a,=MASK_1 + LDRB beta0, [pBetaArg] + M_STR pThresholds, ppThresholds + LDRB alpha0, [pAlphaArg] + + MUL alpha1, alpha1, a + MUL beta1, beta1, a + MUL alpha0, alpha0, a + MUL beta0, beta0, a + + M_STRD alpha1, beta1, pAlphaBeta1 + M_LDR pBS, ppBSArg + M_STRD alpha0, beta0, pAlphaBeta0 + + LDR XY,=LOOP_COUNT + M_STRD XY, pBS, pXYBS + + +LoopY +LoopX +;//---------------Load Pixels------------------- + +;//----------------Pack q0-q1----------------------- + LDRH bS, [pBS], #8 + LDR mask, =MASK_2 + + M_LDRH row4, [pQ0], srcdstStep + CMP bS, #0 + M_STR pBS, ppBS + M_LDRH row5, [pQ0], srcdstStep + BEQ.W NoFilterBS0 + LDRH row6, [pQ0] + LDRH row7, [pQ0, srcdstStep] + + ;// row4 = [0 0 r0q0 r0q1] + ;// row5 = [0 0 r1q0 r1q1] + ;// row6 = [0 0 r2q0 r2q1] + ;// row7 = [0 0 r3q0 r3q1] + + AND tunpk4, mask, row4 + AND tunpk5, mask, row4, LSL#8 + UXTAB tunpk4, tunpk4, row5, ROR#8 + UXTAB tunpk5, tunpk5, row5 + AND tunpk6, mask, row6 + AND tunpk7, mask, row6, LSL#8 + UXTAB tunpk6, tunpk6, row7, ROR#8 + UXTAB tunpk7, tunpk7, row7 + + ;// tunpk4 = [0 0 r0q0 r1q0] + ;// tunpk5 = [0 0 r0q1 r1q1] + ;// tunpk6 = [0 0 r2q0 r3q0] + ;// tunpk7 = [0 0 r2q1 r3q1] + + SUB pQ0, pQ0, srcdstStep, LSL #1 + SUB pQ0, pQ0, #2 + + PKHBT q_1, tunpk6, tunpk4, LSL#16 + PKHBT q_0, tunpk7, tunpk5, LSL#16 + + ;// q_0 = [r0q0 r1q0 r2q0 r3q0] + ;// q_1 = [r0q1 r1q1 r2q1 r3q1] + + +;//----------------Pack p0-p1----------------------- + + M_LDRH row0, [pQ0], srcdstStep + M_LDRH row1, [pQ0], srcdstStep + LDRH row2, [pQ0] + LDRH row3, [pQ0, srcdstStep] + + ;// row0 = [0 0 r0p0 r0p1] + ;// row1 = [0 0 r1p0 r1p1] + ;// row2 = [0 0 r2p0 r2p1] + ;// row3 = [0 0 r3p0 r3p1] + + AND tunpk2, mask, row0 + AND tunpk6, mask, row0, LSL#8 + UXTAB tunpk2, tunpk2, row1, ROR#8 + UXTAB tunpk6, tunpk6, row1 + + AND tunpk0, mask, row2 + AND tunpk3, mask, row2, LSL#8 + UXTAB tunpk0, tunpk0, row3, ROR#8 + UXTAB tunpk3, tunpk3, row3 + + ;// tunpk2 = [0 0 r0p0 r1p0] + ;// tunpk6 = [0 0 r0p1 r1p1] + ;// tunpk0 = [0 0 r2p0 r3p0] + ;// tunpk3 = [0 0 r2p1 r3p1] + + PKHBT p_0, tunpk0, tunpk2, LSL#16 + M_LDR bSTemp, ppBS + PKHBT p_1, tunpk3, tunpk6, LSL#16 + + ;// p_0 = [r0p0 r1p0 r2p0 r3p0] + ;// p_1 = [r0p1 r1p1 r2p1 r3p1] + +;//--------------Filtering Decision ------------------- + USUB8 dp0q0, p_0, q_0 + LDR m01, =MASK_1 + LDRH bSTemp, [bSTemp ,#-8] + MOV m00, #MASK_0 ;// 00000000 mask + + MOV filt, m01 + TST bSTemp, #0xff00 + MOVEQ filt, filt, LSL #16 + TST bSTemp, #0xff + MOVEQ filt, filt, LSR #16 + TST bSTemp, #4 + + ;// Check |p0-q0|<Alpha + USUB8 a, q_0, p_0 + SEL ap0q0, a, dp0q0 + USUB8 a, ap0q0, alpha + SEL filt, m00, filt + + ;// Check |p1-p0|<Beta + USUB8 dp1p0, p_1, p_0 + USUB8 a, p_0, p_1 + SEL a, a, dp1p0 + USUB8 a, a, beta + SEL filt, m00, filt + + ;// Check |q1-q0|<Beta + USUB8 dq1q0, q_1, q_0 + USUB8 a, q_0, q_1 + SEL a, a, dq1q0 + USUB8 a, a, beta + SEL filt, m00, filt + + BEQ bSLT4 +;//-------------------Filter-------------------- +bSGE4 + ;//---------bSGE4 Execution--------------- + CMP filt, #0 + + M_LDR pThresholds, ppThresholds + + ;// Compute P0b + UHADD8 t1, p_0, q_1 + BEQ NoFilterFilt0 + MVN t2, p_1 + UHSUB8 t1, t1, t2 + USUB8 t2, filt, m01 + EOR t1, t1, m01, LSL #7 + + ADD pThresholds,pThresholds, #4 + + ;// Compute Q0b + UHADD8 t2, q_0, p_1 + MVN t3, q_1 + UHSUB8 t2, t2, t3 + M_STR pThresholds, ppThresholds + SEL P_0, t1, p_0 + EOR t2, t2, m01, LSL #7 + SEL Q_0, t2, q_0 + + B StoreResultAndExit + +;//---------- Exit of LoopX -------------- +;//---- for the case of no filtering ----- + +NoFilterFilt0 + ADD pQ0, pQ0, #2 +NoFilterBS0 + M_LDR pThresholds, ppThresholds + SUB pQ0, pQ0, srcdstStep, LSL #1 + ADD pQ0, pQ0, #4 + ADD pThresholds, pThresholds, #4 + ;// Load counter for LoopX + M_LDRD XY, pBS, pXYBS + M_STR pThresholds, ppThresholds + M_LDRD alpha, beta, pAlphaBeta1 + + ;// Align the pointer + ADDS XY, XY, XY + M_STR XY, pXYBS + BCC LoopY + B ExitLoopY + +bSLT4 + ;//---------bSLT4 Execution--------------- + M_LDR pThresholds, ppThresholds + CMP filt, #0 + + + ;// Since beta <= 18 and alpha <= 255 we know + ;// -254 <= p0-q0 <= 254 + ;// -17 <= q1-q0 <= 17 + ;// -17 <= p1-p0 <= 17 + + ;// delta = Clip3( -tC, tC, ((((q0-p0)<<2) + (p1-q1) + 4)>>3)) + ;// + ;// Calculate A = (((q0-p0)<<2) + (p1-q1) + 4)>>3 + ;// = (4*q0 - 4*p0 + p1 - q1 + 4)>>3 + ;// = ((p1-p0) - (q1-q0) - 3*(p0-q0) + 4)>>3 + + USUB8 t1, p_1, p_0 + USUB8 t2, q_1, q_0 + BEQ NoFilterFilt0 + + LDRB tC0, [pThresholds], #1 + SSUB8 t1, t1, t2 + LDRB tC1, [pThresholds], #3 + M_STR pThresholds, ppThresholds + UHSUB8 t4, p_0, q_0 + ORR tC, tC1, tC0, LSL #16 + USUB8 t5, p_0, q_0 + AND t5, t5, m01 + SHSUB8 t1, t1, t5 + ORR tC, tC, LSL #8 + SSUB8 t1, t1, t5 + SHSUB8 t1, t1, t4 + UQADD8 tC, tC, m01 + SADD8 t1, t1, m01 + USUB8 t5, filt, m01 + SHSUB8 t1, t1, t4 + SEL tC, tC, m00 + + ;// Split into positive and negative part and clip + + SSUB8 t1, t1, m00 + SEL pos, t1, m00 + USUB8 neg, pos, t1 + USUB8 t3, pos, tC + SEL pos, tC, pos + USUB8 t3, neg, tC + SEL neg, tC, neg + UQADD8 P_0, p_0, pos + UQSUB8 Q_0, q_0, pos + UQSUB8 P_0, P_0, neg + UQADD8 Q_0, Q_0, neg + + ;// Choose to store the filtered + ;// value or the original pixel + USUB8 t1, filt, m01 + SEL P_0, P_0, p_0 + SEL Q_0, Q_0, q_0 + +StoreResultAndExit + + ;//---------Store result--------------- + + ;// P_0 = [r0p0 r1p0 r2p0 r3p0] + ;// Q_0 = [r0q0 r1q0 r2q0 r3q0] + + SUB pQ0, pQ0, srcdstStep, LSL #1 + ADD pQ0, pQ0, #1 + + MOV t1, Q_0, LSR #24 + STRB t1, [pQ0, #1] + MOV t1, P_0, LSR #24 + M_STRB t1, [pQ0], srcdstStep + + MOV t1, Q_0, LSR #16 + STRB t1, [pQ0, #1] + MOV t1, P_0, LSR #16 + M_STRB t1, [pQ0], srcdstStep + + MOV t1, P_0, LSR #8 + STRB t1, [pQ0] + STRB P_0, [pQ0, srcdstStep] + MOV t1, Q_0, LSR #8 + STRB t1, [pQ0, #1]! + STRB Q_0, [pQ0, srcdstStep] + + M_LDRD XY, pBS, pXYBS + M_LDRD alpha, beta, pAlphaBeta1 + + SUB pQ0, pQ0, srcdstStep, LSL #1 + ADD pQ0, pQ0, #4 + + ADDS XY, XY, XY + M_STR XY, pXYBS + BCC LoopX + +;//-------- Common Exit of LoopY ----------------- + ;// Align the pointers + +ExitLoopY + + M_LDR pThresholds, ppThresholds + SUB pQ0, pQ0, #8 + ADD pQ0, pQ0, srcdstStep, LSL #2 + SUB pBS, pBS, #14 + SUB pThresholds, pThresholds, #6 + M_STR pThresholds, ppThresholds + + M_LDRD alpha, beta, pAlphaBeta0 + + BNE LoopY + MOV r0, #OMX_Sts_NoErr +;//-----------------End Filter-------------------- + + M_END + + ENDIF + + END + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s new file mode 100644 index 0000000..1b84080 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s @@ -0,0 +1,331 @@ +;// +;// +;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe + IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe + + + + IF ARM1136JS + + +MASK_0 EQU 0x00000000 +MASK_1 EQU 0x01010101 +MASK_2 EQU 0xff00ff00 +LOOP_COUNT EQU 0x11110000 + +;// Declare input registers + +pSrcDst RN 0 +srcdstStep RN 1 +pAlphaArg RN 2 +pBetaArg RN 3 + +pThresholds RN 14 +pBS RN 9 +pQ0 RN 0 +bS RN 2 + +alpha RN 6 +alpha0 RN 6 +alpha1 RN 8 + +beta RN 7 +beta0 RN 7 +beta1 RN 9 + +;// Declare Local/Temporary variables + +;// Pixels +p_0 RN 3 +p_1 RN 5 +p_2 RN 4 +p_3 RN 2 +q_0 RN 8 +q_1 RN 9 +q_2 RN 10 +q_3 RN 12 + +;// Filtering + +dp0q0 RN 12 +dp1p0 RN 12 +dq1q0 RN 12 +dp2p0 RN 12 +dq2q0 RN 12 + +ap0q0 RN 1 +filt RN 2 + +m00 RN 14 +m01 RN 11 + +apflg RN 0 +aqflg RN 6 +apqflg RN 0 + + +;//Declarations for bSLT4 kernel + +tC0 RN 7 +ptC0 RN 1 + +pQ0a RN 0 +Stepa RN 1 +maska RN 14 + +P0a RN 1 +P1a RN 8 +Q0a RN 7 +Q1a RN 11 + +;//Declarations for bSGE4 kernel + +pQ0b RN 0 +Stepb RN 1 +maskb RN 14 + +P0b RN 6 +P1b RN 7 +P2b RN 1 +P3b RN 3 + +Q0b RN 9 +Q1b RN 0 +Q2b RN 2 +Q3b RN 3 + +;// Miscellanous +XY RN 8 +t0 RN 3 +t1 RN 12 +t2 RN 14 +t7 RN 7 +t4 RN 4 +t5 RN 1 +t8 RN 6 +a RN 0 + + + + + ;// Allocate stack memory + M_ALLOC4 ppThresholds,4 + M_ALLOC4 pQ_3,4 + M_ALLOC4 pP_3,4 + M_ALLOC8 pAlphaBeta0,8 + M_ALLOC8 pAlphaBeta1,8 + M_ALLOC8 pXYBS,4 + M_ALLOC4 ppBS,4 + M_ALLOC8 ppQ0Step,4 + M_ALLOC4 pStep,4 + + ;// Function header + M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11 + + ;//Input arguments on the stack + M_ARG ppThresholdsArg, 4 + M_ARG ppBSArg, 4 + + LDR t4,=MASK_1 + + LDRB alpha0, [pAlphaArg] + LDRB beta0, [pBetaArg] + LDRB alpha1, [pAlphaArg,#1] + LDRB beta1, [pBetaArg,#1] + + MUL alpha0, alpha0, t4 + MUL beta0, beta0, t4 + MUL alpha1, alpha1, t4 + MUL beta1, beta1, t4 + + M_STRD alpha0, beta0, pAlphaBeta0 + M_STRD alpha1, beta1, pAlphaBeta1 + + LDR XY,=LOOP_COUNT + M_LDR pBS, ppBSArg + M_LDR pThresholds, ppThresholdsArg + M_STR srcdstStep, pStep + M_STRD XY, pBS, pXYBS + SUB pQ0, pQ0, srcdstStep, LSL #2 + M_STR pThresholds, ppThresholds +LoopY +LoopX +;//---------------Load Pixels------------------- + M_STR pQ0, ppQ0Step + M_LDR p_3, [pQ0], srcdstStep + M_LDR p_2, [pQ0], srcdstStep + M_STR p_3, pP_3 + LDRB bS, [pBS], #1 + M_STR pBS, ppBS + M_LDR p_1, [pQ0], srcdstStep + CMP bS, #0 + M_LDR p_0, [pQ0], srcdstStep + M_LDR q_0, [pQ0], srcdstStep + M_LDR q_1, [pQ0], srcdstStep + M_LDR q_2, [pQ0], srcdstStep + M_LDR q_3, [pQ0], srcdstStep + BEQ NoFilterBS0 + CMP bS, #4 + M_STR q_3, pQ_3 + +;//--------------Filtering Decision ------------------- + LDR m01, =MASK_1 ;// 01010101 mask + MOV m00, #MASK_0 ;// 00000000 mask + + ;// Check |p0-q0|<Alpha + USUB8 dp0q0, p_0, q_0 + USUB8 a, q_0, p_0 + SEL ap0q0, a, dp0q0 + USUB8 a, ap0q0, alpha + SEL filt, m00, m01 + + ;// Check |p1-p0|<Beta + USUB8 dp1p0, p_1, p_0 + USUB8 a, p_0, p_1 + SEL a, a, dp1p0 + USUB8 a, a, beta + SEL filt, m00, filt + + ;// Check |q1-q0|<Beta + USUB8 dq1q0, q_1, q_0 + USUB8 a, q_0, q_1 + SEL a, a, dq1q0 + USUB8 a, a, beta + SEL filt, m00, filt + + ;// Check ap<Beta + USUB8 dp2p0, p_2, p_0 + USUB8 a, p_0, p_2 + SEL a, a, dp2p0 + USUB8 a, a, beta + SEL apflg, m00, filt ;// apflg = filt && (ap<beta) + + ;// Check aq<Beta + USUB8 dq2q0, q_2, q_0 + USUB8 t2, q_0, q_2 + SEL t2, t2, dq2q0 + USUB8 t2, t2, beta + MOV t7,#0 + + BLT bSLT4 +;//-------------------Filter-------------------- +bSGE4 + ;//---------bSGE4 Execution--------------- + SEL t1, t7, filt ;// aqflg = filt && (aq<beta) + CMP filt, #0 + ORR apqflg, apflg, t1, LSL #1 + M_LDRD pQ0, srcdstStep, ppQ0Step, EQ + BEQ NoFilterFilt0 + + BL armVCM4P10_DeblockingLumabSGE4_unsafe + + ;//---------Store result--------------- + M_LDR pThresholds, ppThresholds + MOV p_2, Q1b + MOV p_1, P2b + M_LDRD pQ0b, Stepb, ppQ0Step + ADD pThresholds, #1 + M_STR pThresholds, ppThresholds + M_STR p_1, [pQ0b, Stepb]! + M_STR P1b, [pQ0b, Stepb]! + M_STR P0b, [pQ0b, Stepb]! + M_STR Q0b, [pQ0b, Stepb]! + STR p_2, [pQ0b, Stepb] + STR Q2b, [pQ0b, Stepb, LSL #1] + + + M_LDRD XY, pBS, pXYBS + SUB pQ0, pQ0b, Stepb, LSL #2 + ADD pQ0, pQ0, #4 + M_LDRD alpha, beta, pAlphaBeta0 + ADDS XY, XY, XY + M_STR XY, pXYBS + BCC LoopX + B ExitLoopY + +;//---------- Exit of LoopX -------------- +;//---- for the case of no filtering ----- + +NoFilterBS0 + SUB pQ0, pQ0, srcdstStep, LSL #3 +NoFilterFilt0 + ADD pQ0, pQ0, #4 + ;// Load counter for LoopX + M_LDRD XY, pBS, pXYBS + M_LDR pThresholds, ppThresholds + M_LDRD alpha, beta, pAlphaBeta0 + + ;// Align the pointers + ADDS XY, XY, XY + ADD pThresholds, pThresholds, #1 + M_STR pThresholds, ppThresholds + M_STR XY, pXYBS + BCC LoopX + B ExitLoopY + +bSLT4 + ;//---------bSLT4 Execution--------------- + SEL aqflg, t7, filt ;// aqflg = filt && (aq<beta) + M_LDR ptC0, ppThresholds + CMP filt, #0 + M_LDRD pQ0, srcdstStep, ppQ0Step, EQ + BEQ NoFilterFilt0 + + LDRB tC0, [ptC0], #1 + M_STR ptC0, ppThresholds + + BL armVCM4P10_DeblockingLumabSLT4_unsafe + + ;//---------Store result--------------- + MOV p_2, P0a + M_LDRD pQ0a, Stepa, ppQ0Step + M_STR P1a, [pQ0a, Stepa, LSL #1]! + M_STR p_2, [pQ0a, Stepa]! + M_STR Q0a, [pQ0a, Stepa]! + STR Q1a, [pQ0a, Stepa] + + ;// Load counter + M_LDRD XY, pBS, pXYBS + M_LDRD alpha, beta, pAlphaBeta0 + + SUB pQ0, pQ0a, Stepa, LSL #2 + ADD pQ0, pQ0, #4 + + ADDS XY, XY, XY + M_STR XY, pXYBS + BCC LoopX + +;//-------- Common Exit of LoopY ----------------- + ;// Align the pointers +ExitLoopY + M_LDRD alpha, beta, pAlphaBeta1 + SUB pQ0, pQ0, #16 + ADD pQ0, pQ0, srcdstStep, LSL #2 + M_STRD alpha, beta, pAlphaBeta0 + + BNE LoopY + MOV r0, #OMX_Sts_NoErr +;//-----------------End Filter-------------------- + M_END + + ENDIF + + + END + +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s new file mode 100644 index 0000000..417ddc2 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s @@ -0,0 +1,550 @@ +;// +;// +;// File Name: omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe + IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe + + + IF ARM1136JS + +MASK_0 EQU 0x00000000 +MASK_1 EQU 0x01010101 +MASK_2 EQU 0xff00ff00 +LOOP_COUNT EQU 0x11110000 + +;// Declare input registers + +pSrcDst RN 0 +srcdstStep RN 1 +pAlphaArg RN 2 +pBetaArg RN 3 + +pThresholds RN 14 +pBS RN 9 +pQ0 RN 0 +bS RN 2 + +alpha RN 6 +alpha0 RN 6 +alpha1 RN 8 + +beta RN 7 +beta0 RN 7 +beta1 RN 9 + +;// Declare Local/Temporary variables + +;// Pixels +p_0 RN 3 +p_1 RN 5 +p_2 RN 4 +p_3 RN 2 +q_0 RN 8 +q_1 RN 9 +q_2 RN 10 +q_3 RN 12 + +;// Unpacking +mask RN 11 + +row0 RN 2 +row1 RN 4 +row2 RN 5 +row3 RN 3 + +row4 RN 8 +row5 RN 9 +row6 RN 10 +row7 RN 12 +row8 RN 14 +row9 RN 7 + +tunpk0 RN 8 +tunpk1 RN 9 +tunpk2 RN 10 +tunpk3 RN 12 +tunpk4 RN 0 + +tunpk5 RN 1 +tunpk6 RN 14 +tunpk7 RN 2 +tunpk8 RN 5 +tunpk9 RN 6 + + +;// Filtering + +dp0q0 RN 12 +dp1p0 RN 12 +dq1q0 RN 12 +dp2p0 RN 12 +dq2q0 RN 12 + +ap0q0 RN 1 +filt RN 2 + +m00 RN 14 +m01 RN 11 + +apflg RN 0 +aqflg RN 6 +apqflg RN 0 + + +;//Declarations for bSLT4 kernel + +tC0 RN 7 +ptC0 RN 1 + +pQ0a RN 0 +Stepa RN 1 +maska RN 14 + +P0a RN 1 +P1a RN 8 +Q0a RN 7 +Q1a RN 11 + +;//Declarations for bSGE4 kernel + +pQ0b RN 0 +Stepb RN 1 +maskb RN 14 + +P0b RN 6 +P1b RN 7 +P2b RN 1 +P3b RN 3 + +Q0b RN 9 +Q1b RN 0 +Q2b RN 2 +Q3b RN 3 + +;// Miscellanous +XY RN 8 +t0 RN 3 +t1 RN 12 +t2 RN 14 +t7 RN 7 +t4 RN 4 +t5 RN 1 +t8 RN 6 +a RN 0 + + + + ;// Allocate stack memory + M_ALLOC4 ppThresholds,4 + M_ALLOC4 pQ_3,4 + M_ALLOC4 pP_3,4 + M_ALLOC8 pAlphaBeta0,8 + M_ALLOC8 pAlphaBeta1,8 + M_ALLOC8 pXYBS,4 + M_ALLOC4 ppBS,4 + M_ALLOC8 ppQ0Step,4 + M_ALLOC4 pStep,4 + + ;// Function header + M_START omxVCM4P10_FilterDeblockingLuma_VerEdge_I, r11 + + ;//Input arguments on the stack + M_ARG ppThresholdsArg, 4 + M_ARG ppBSArg, 4 + + LDR t4,=MASK_1 + + LDRB alpha0, [pAlphaArg] + LDRB beta0, [pBetaArg] + LDRB alpha1, [pAlphaArg,#1] + LDRB beta1, [pBetaArg,#1] + + MUL alpha0, alpha0, t4 + MUL beta0, beta0, t4 + MUL alpha1, alpha1, t4 + MUL beta1, beta1, t4 + + M_STRD alpha0, beta0, pAlphaBeta0 + M_STRD alpha1, beta1, pAlphaBeta1 + + LDR XY,=LOOP_COUNT + M_LDR pBS, ppBSArg + M_LDR pThresholds, ppThresholdsArg + M_STR srcdstStep, pStep + M_STRD XY, pBS, pXYBS + M_STR pThresholds, ppThresholds + + SUB pQ0, pQ0, #4 +LoopY +;//---------------Load Pixels------------------- + +;//----------------Pack p0-p3----------------------- + LDR mask, =MASK_2 + + M_LDR row0, [pQ0], srcdstStep + M_LDR row1, [pQ0], srcdstStep + LDR row2, [pQ0] + LDR row3, [pQ0, srcdstStep] + SUB pQ0, pQ0, srcdstStep, LSL #1 + + ;// row0 = [r0p0 r0p1 r0p2 r0p3] + ;// row1 = [r1p0 r1p1 r1p2 r1p3] + ;// row2 = [r2p0 r2p1 r2p2 r2p3] + ;// row3 = [r3p0 r3p1 r3p2 r3p3] + + AND tunpk0, mask, row0 + AND tunpk6, mask, row0, LSL#8 + UXTAB16 tunpk0, tunpk0, row1, ROR#8 + UXTAB16 tunpk6, tunpk6, row1 + AND tunpk2, mask, row2 + AND tunpk3, mask, row2, LSL#8 + UXTAB16 tunpk2, tunpk2, row3, ROR#8 + UXTAB16 tunpk3, tunpk3, row3 + + ;// tunpk0 = [r0p0 r1p0 r0p2 r1p2] + ;// tunpk6 = [r0p1 r1p1 r0p3 r1p3] + ;// tunpk2 = [r2p0 r3p0 r2p2 r3p2] + ;// tunpk3 = [r2p1 r3p1 r2p3 r3p3] + + PKHTB p_0, tunpk0, tunpk2, ASR#16 + PKHTB p_1, tunpk6, tunpk3, ASR#16 + PKHBT p_2, tunpk2, tunpk0, LSL#16 + PKHBT p_3, tunpk3, tunpk6, LSL#16 + + + ;// p_0 = [r0p0 r1p0 r2p0 r3p0] + ;// p_1 = [r0p1 r1p1 r2p1 r3p1] + ;// p_2 = [r0p2 r1p2 r2p1 r3p2] + ;// p_3 = [r0p3 r1p3 r2p3 r3p3] + + M_STR p_3, pP_3 + +;//----------------Pack q0-q3----------------------- +LoopX + LDRB bS, [pBS], #4 + M_STR pQ0, ppQ0Step + LDR mask, =MASK_2 + CMP bS, #0 + M_STR pBS, ppBS + + LDR row4, [pQ0, #4]! + BEQ.W NoFilterBS0 + M_LDR row5, [pQ0, srcdstStep]! + M_LDR row6, [pQ0, srcdstStep]! + M_LDR row7, [pQ0, srcdstStep] + + ;// row4 = [r0q3 r0q2 r0q1 r0q0] + ;// row5 = [r1q3 r1q2 r1q1 r1q0] + ;// row6 = [r2q3 r2q2 r2q1 r2q0] + ;// row7 = [r3q3 r3q2 r3q1 r3q0] + + AND tunpk4, mask, row4 + CMP bS, #4 + AND tunpk5, mask, row4, LSL#8 + UXTAB16 tunpk4, tunpk4, row5, ROR#8 + UXTAB16 tunpk5, tunpk5, row5 + AND tunpk6, mask, row6 + AND tunpk7, mask, row6, LSL#8 + UXTAB16 tunpk6, tunpk6, row7, ROR#8 + UXTAB16 tunpk7, tunpk7, row7 + + ;// tunpk4 = [r0q0 r1q0 r0q2 r1q2] + ;// tunpk5 = [r0q1 r1q1 r0q3 r1q3] + ;// tunpk6 = [r2q0 r3q0 r2q2 r3q2] + ;// tunpk7 = [r2q1 r3q1 r2q3 r3q3] + + PKHTB q_3, tunpk4, tunpk6, ASR#16 + PKHTB q_2, tunpk5, tunpk7, ASR#16 + PKHBT q_1, tunpk6, tunpk4, LSL#16 + M_STR q_3, pQ_3 + PKHBT q_0, tunpk7, tunpk5, LSL#16 + + + ;// q_0 = [r0q0 r1q0 r2q0 r3q0] + ;// q_1 = [r0q1 r1q1 r2q1 r3q1] + ;// q_2 = [r0q2 r1q2 r2q1 r3q2] + ;// q_3 = [r0q3 r1q3 r2q3 r3q3] + + +;//--------------Filtering Decision ------------------- + LDR m01, =MASK_1 ;// 01010101 mask + MOV m00, #MASK_0 ;// 00000000 mask + + ;// Check |p0-q0|<Alpha + USUB8 dp0q0, p_0, q_0 + USUB8 a, q_0, p_0 + SEL ap0q0, a, dp0q0 + USUB8 a, ap0q0, alpha + SEL filt, m00, m01 + + ;// Check |p1-p0|<Beta + USUB8 dp1p0, p_1, p_0 + USUB8 a, p_0, p_1 + SEL a, a, dp1p0 + USUB8 a, a, beta + SEL filt, m00, filt + + ;// Check |q1-q0|<Beta + USUB8 dq1q0, q_1, q_0 + USUB8 a, q_0, q_1 + SEL a, a, dq1q0 + USUB8 a, a, beta + SEL filt, m00, filt + + ;// Check ap<Beta + USUB8 dp2p0, p_2, p_0 + USUB8 a, p_0, p_2 + SEL a, a, dp2p0 + USUB8 a, a, beta + SEL apflg, m00, filt ;// apflg = filt && (ap<beta) + + ;// Check aq<Beta + USUB8 dq2q0, q_2, q_0 + USUB8 t2, q_0, q_2 + SEL t2, t2, dq2q0 + USUB8 t2, t2, beta + MOV t7,#0 + + + BLT bSLT4 +;//-------------------Filter-------------------- +bSGE4 + ;//---------bSGE4 Execution--------------- + SEL t1, t7, filt ;// aqflg = filt && (aq<beta) + CMP filt, #0 + ORR apqflg, apflg, t1, LSL #1 + M_LDRD pQ0, srcdstStep, ppQ0Step, EQ + BEQ NoFilterFilt0 + + BL armVCM4P10_DeblockingLumabSGE4_unsafe + + ;//---------Store result--------------- + + LDR maskb,=MASK_2 + + ;// P0b = [r0p0 r1p0 r2p0 r3p0] + ;// P1b = [r0p1 r1p1 r2p1 r3p1] + ;// P2b = [r0p2 r1p2 r2p2 r3p2] + ;// P3b = [r0p3 r1p3 r2p3 r3p3] + + M_LDR P3b, pP_3 + M_STR Q0b, pP_3 + + ;//------Pack p0-p3------ + AND tunpk0, maskb, P0b + AND tunpk2, maskb, P0b, LSL#8 + UXTAB16 tunpk0, tunpk0, P1b, ROR#8 + UXTAB16 tunpk2, tunpk2, P1b + + AND tunpk3, maskb, P2b + AND tunpk8, maskb, P2b, LSL#8 + UXTAB16 tunpk3, tunpk3, P3b, ROR#8 + UXTAB16 tunpk8, tunpk8, P3b + + ;// tunpk0 = [r0p0 r0p1 r2p0 r2p1] + ;// tunpk2 = [r1p0 r1p1 r3p0 r3p1] + ;// tunpk3 = [r0p2 r0p3 r2p2 r2p3] + ;// tunpk8 = [r1p2 r1p3 r3p2 r3p3] + + MOV p_2, Q1b + M_LDRD pQ0b, Stepb, ppQ0Step + + PKHTB row9, tunpk0, tunpk3, ASR#16 + PKHBT row7, tunpk3, tunpk0, LSL#16 + PKHTB row3, tunpk2, tunpk8, ASR#16 + PKHBT row6, tunpk8, tunpk2, LSL#16 + + ;// row9 = [r0p0 r0p1 r0p2 r0p3] + ;// row3 = [r1p0 r1p1 r1p2 r1p3] + ;// row7 = [r2p0 r2p1 r2p2 r2p3] + ;// row6 = [r3p0 r3p1 r3p2 r3p3] + + M_STR row9, [pQ0b], Stepb + STR row7, [pQ0b, Stepb] + STR row6, [pQ0b, Stepb, LSL #1] + STR row3, [pQ0b], #4 + + M_LDR Q3b, pQ_3 + + ;// Q0b = [r0q0 r1q0 r2q0 r3q0] + ;// Q1b = [r0q1 r1q1 r2q1 r3q1] + ;// Q2b = [r0q2 r1q2 r2q2 r3q2] + ;// Q3b = [r0q3 r1q3 r2q3 r3q3] + + ;//------Pack q0-q3------ + AND tunpk0, maskb, p_2 + AND tunpk2, maskb, p_2, LSL#8 + UXTAB16 tunpk0, tunpk0, Q0b, ROR#8 + UXTAB16 tunpk2, tunpk2, Q0b + + AND tunpk3, maskb, Q3b + AND tunpk8, maskb, Q3b, LSL#8 + UXTAB16 tunpk3, tunpk3, Q2b, ROR#8 + UXTAB16 tunpk8, tunpk8, Q2b + + ;// tunpk0 = [r0q1 r0q0 r2q1 r2q0] + ;// tunpk2 = [r1q1 r1q0 r3q1 r3q0] + ;// tunpk3 = [r0q3 r0q2 r2q3 r2q2] + ;// tunpk8 = [r1q3 r1q2 r3q3 r3q2] + + PKHTB row8, tunpk3, tunpk0, ASR#16 + PKHBT row7, tunpk0, tunpk3, LSL#16 + PKHTB row4, tunpk8, tunpk2, ASR#16 + PKHBT row6, tunpk2, tunpk8, LSL#16 + + ;// row8 = [r0q0 r0q1 r0q2 r0q3] + ;// row4 = [r1q0 r1q1 r1q2 r1q3] + ;// row7 = [r2q0 r2q1 r2q2 r2q3] + ;// row6 = [r3q0 r3q1 r3q2 r3q3] + + STR row4, [pQ0b] + STR row7, [pQ0b, Stepb] + STR row6, [pQ0b, Stepb, LSL #1] + + SUB pQ0, pQ0b, Stepb + MOV p_1, Q2b + + STR row8, [pQ0] + + M_LDRD XY, pBS, pXYBS + M_LDR pThresholds, ppThresholds + M_LDRD alpha, beta, pAlphaBeta1 + + ADDS XY, XY, XY + ADD pThresholds, #4 + M_STR pThresholds, ppThresholds + M_STR XY, pXYBS + BCC LoopX + B ExitLoopY + +;//---------- Exit of LoopX -------------- +;//---- for the case of no filtering ----- + +NoFilterFilt0 + ADD pQ0, pQ0, #4 +NoFilterBS0 + ;// Load counter for LoopX + M_LDRD XY, pBS, pXYBS + M_LDR pThresholds, ppThresholds + M_LDRD alpha, beta, pAlphaBeta1 + + ;// Align the pointer + ADDS XY, XY, XY + ADD pThresholds, pThresholds, #4 + M_STR pThresholds, ppThresholds + M_STR XY, pXYBS + BCC LoopY + B ExitLoopY + +bSLT4 + ;//---------bSLT4 Execution--------------- + SEL aqflg, t7, filt ;// aqflg = filt && (aq<beta) + M_LDR ptC0, ppThresholds + CMP filt, #0 + M_LDRD pQ0, srcdstStep, ppQ0Step, EQ + BEQ NoFilterFilt0 + + LDRB tC0, [ptC0], #4 + M_STR ptC0, ppThresholds + + BL armVCM4P10_DeblockingLumabSLT4_unsafe + + ;//---------Store result--------------- + ;//--------Pack p1,p0,q1,q0------------ + + ;//Load destination pointer + LDR maska,=MASK_2 + M_STR Q0a, pP_3 + MOV p_1, q_2 + + ;// P1a = [r0p1 r1p1 r2p1 r3p1] + ;// P0a = [r0p0 r1p0 r2p0 r3p0] + ;// Q0a = [r0q0 r1q0 r2q0 r3q0] + ;// Q1a = [r0q1 r1q1 r2q1 r3q1] + + AND tunpk1, maska, P0a + AND tunpk2, maska, P0a, LSL#8 + UXTAB16 tunpk1, tunpk1, P1a, ROR#8 + UXTAB16 tunpk2, tunpk2, P1a + + M_LDRD pQ0a, Stepa, ppQ0Step + + AND tunpk9, maska, Q1a + AND tunpk3, maska, Q1a, LSL#8 + UXTAB16 tunpk9, tunpk9, Q0a, ROR#8 + UXTAB16 tunpk3, tunpk3, Q0a + + ;// tunpk1 = [r0p0 r0p1 r2p0 r2p1] + ;// tunpk2 = [r1p0 r1p1 r3p0 r3p1] + ;// tunpk9 = [r0q1 r0q0 r2q1 r2q0] + ;// tunpk3 = [r1q1 r1q0 r3q1 r3q0] + + MOV t4, tunpk1, LSR #16 + MOV t0, tunpk9, LSR #16 + + STRH t4,[pQ0a, #2]! ;//Stores [r0p0 r0p1] + STRH t0,[pQ0a, #2] ;//Stores [r0q0 r0q1] + + MOV t4, tunpk2, LSR #16 + MOV t0, tunpk3, LSR #16 + + M_STRH t4,[pQ0a, Stepa]! ;//Stores [r1p0 r1p1] + STRH t0,[pQ0a, #2] ;//Stores [r1q0 r1q1] + + M_STRH tunpk1,[pQ0a, Stepa]! ;//Stores [r2p0 r2p1] + STRH tunpk2,[pQ0a, Stepa] ;//Stores [r3p0 r3p1] + STRH tunpk9,[pQ0a, #2]! ;//Stores [r2q0 r2q1] + STRH tunpk3,[pQ0a, Stepa] ;//Stores [r3q0 r3q1] + + SUB pQ0, pQ0a, Stepa, LSL #1 + + ;// Load counter + M_LDRD XY, pBS, pXYBS + + ;// Reload Pixels + M_LDR p_0, pQ_3 + MOV p_2, Q1a + + M_LDRD alpha, beta, pAlphaBeta1 + + ADDS XY, XY, XY + M_STR XY, pXYBS + BCC LoopX + +;//-------- Common Exit of LoopY ----------------- + ;// Align the pointers + M_LDR pThresholds, ppThresholds +ExitLoopY + SUB pQ0, pQ0, #16 + ADD pQ0, pQ0, srcdstStep, LSL #2 + SUB pBS, pBS, #15 + SUB pThresholds, pThresholds, #15 + M_STR pThresholds, ppThresholds + + M_LDRD alpha, beta, pAlphaBeta0 + + BNE LoopY + MOV r0, #OMX_Sts_NoErr + + M_END +;//-----------------End Filter-------------------- + + ENDIF + + END + +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c new file mode 100644 index 0000000..de835bd --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateChroma.c @@ -0,0 +1,79 @@ +/** + * + * File Name: omxVCM4P10_InterpolateChroma.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * Description: + * This function will calculate 1/8 Pixel interpolation for Chroma Block + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armVC.h" +#include "armCOMM.h" + + +/** + * Function: omxVCM4P10_InterpolateChroma, + * + * Description: + * Performs 1/8-pixel interpolation for inter chroma MB. + * + * Remarks: + * + * Parameters: + * [in] pSrc Pointer to the source reference frame buffer + * [in] srcStep Reference frame step in byte + * [in] dstStep Destination frame step in byte. Must be multiple of roi.width. + * [in] dx Fractional part of horizontal motion vector component + * in 1/8 pixel unit;valid in the range [0,7] + * [in] dy Fractional part of vertical motion vector component + * in 1/8 pixel unit;valid in the range [0,7] + * [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must + * be equal to either 2, 4, or 8. + * [out] pDst Pointer to the destination frame buffer. + * if roi.width==2, 2-byte alignment required + * if roi.width==4, 4-byte alignment required + * if roi.width==8, 8-byte alignment required + * + * Return Value: + * If the function runs without error, it returns OMX_Sts_NoErr. + * If one of the following cases occurs, the function returns OMX_Sts_BadArgErr: + * pSrc or pDst is NULL. + * srcStep or dstStep < 8. + * dx or dy is out of range [0-7]. + * roi.width or roi.height is out of range {2,4,8}. + * roi.width is equal to 2, but pDst is not 2-byte aligned. + * roi.width is equal to 4, but pDst is not 4-byte aligned. + * roi.width is equal to 8, but pDst is not 8 byte aligned. + * srcStep or dstStep is not a multiple of 8. + * + */ + +OMXResult omxVCM4P10_InterpolateChroma ( + const OMX_U8* pSrc, + OMX_S32 srcStep, + OMX_U8* pDst, + OMX_S32 dstStep, + OMX_S32 dx, + OMX_S32 dy, + OMXSize roi + ) +{ + return armVCM4P10_Interpolate_Chroma + ((OMX_U8*)pSrc, srcStep, pDst, dstStep, roi.width, roi.height, dx, dy); +} + + +/***************************************************************************** + * END OF FILE + *****************************************************************************/ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s new file mode 100644 index 0000000..cf611a3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s @@ -0,0 +1,426 @@ +;// +;// +;// File Name: omxVCM4P10_InterpolateLuma_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Function: +;// omxVCM4P10_InterpolateLuma +;// +;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly. +;// Performs quarter pel interpolation of inter luma MB. +;// It's assumed that the frame is already padded when calling this function. +;// Parameters: +;// [in] pSrc Pointer to the source reference frame buffer +;// [in] srcStep Reference frame step in byte +;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width +;// [in] dx Fractional part of horizontal motion vector +;// component in 1/4 pixel unit; valid in the range [0,3] +;// [in] dy Fractional part of vertical motion vector +;// component in 1/4 pixel unit; valid in the range [0,3] +;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must +;// be equal to either 4, 8, or 16. +;// [out] pDst Pointer to the destination frame buffer. +;// if roi.width==4, 4-byte alignment required +;// if roi.width==8, 8-byte alignment required +;// if roi.width==16, 16-byte alignment required +;// +;// Return Value: +;// If the function runs without error, it returns OMX_Sts_NoErr. +;// It is assued that following cases are satisfied before calling this function: +;// pSrc or pDst is not NULL. +;// srcStep or dstStep >= roi.width. +;// dx or dy is in the range [0-3]. +;// roi.width or roi.height is not out of range {4, 8, 16}. +;// If roi.width is equal to 4, Dst is 4 byte aligned. +;// If roi.width is equal to 8, pDst is 8 byte aligned. +;// If roi.width is equal to 16, pDst is 16 byte aligned. +;// srcStep and dstStep is multiple of 8. +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT omxVCM4P10_InterpolateLuma + + IF ARM1136JS + IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + IMPORT armVCM4P10_Average_4x4_Align0_unsafe + IMPORT armVCM4P10_Average_4x4_Align2_unsafe + IMPORT armVCM4P10_Average_4x4_Align3_unsafe + IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + ENDIF + + IF ARM1136JS + IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + ENDIF + + + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 2 +dstStep RN 3 +iHeight RN 4 +iWidth RN 5 + +;// Declare other intermediate registers +idx RN 6 +idy RN 7 +index RN 6 +Temp RN 12 +pArgs RN 11 + + + ;// End of CortexA8 + +;//------------------------------------------------------------------------------------------------------------------------- +;//------------------------------------------------------------------------------------------------------------------------- + IF ARM1136JS + + + M_ALLOC4 ppDst, 8 + M_ALLOC4 ppSrc, 8 + M_ALLOC4 ppArgs, 16 + M_ALLOC4 pBuffer, 120 ;// 120 = 12x10 + M_ALLOC8 pInterBuf, 120 ;// 120 = 12*5*2 + M_ALLOC8 pTempBuf, 32 ;// 32 = 8*4 + + ;// Function header + ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time. + ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed. + ;// Registers r4, r5, r6 to be preserved by internal unsafe functions + ;// r4 - iHeight + ;// r5 - iWidth + ;// r6 - index + M_START omxVCM4P10_InterpolateLuma, r11 + +;// Declare other intermediate registers +idx RN 6 +idy RN 7 +index RN 6 +Temp RN 12 +pArgs RN 11 + +pBuf RN 8 +Height RN 9 +bufStep RN 9 + + ;// Define stack arguments + M_ARG ptridx, 4 + M_ARG ptridy, 4 + M_ARG ptrWidth, 4 + M_ARG ptrHeight, 4 + + ;// Load structure elements of roi + M_LDR idx, ptridx + M_LDR idy, ptridy + M_LDR iWidth, ptrWidth + M_LDR iHeight, ptrHeight + + M_PRINTF "roi.width %d\n", iWidth + M_PRINTF "roi.height %d\n", iHeight + + ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx] + M_ADR pArgs, ppArgs + +InterpolateLuma +Block4x4WidthLoop +Block4x4HeightLoop + + STM pArgs, {pSrc,srcStep,pDst,dstStep} + M_ADR pBuf, pBuffer + + ;// switch table using motion vector as index + M_SWITCH index, L + M_CASE Case_0 + M_CASE Case_1 + M_CASE Case_2 + M_CASE Case_3 + M_CASE Case_4 + M_CASE Case_5 + M_CASE Case_6 + M_CASE Case_7 + M_CASE Case_8 + M_CASE Case_9 + M_CASE Case_a + M_CASE Case_b + M_CASE Case_c + M_CASE Case_d + M_CASE Case_e + M_CASE Case_f + M_ENDSWITCH + +Case_0 + ;// Case G + M_PRINTF "Case 0 \n" + + BL armVCM4P10_InterpolateLuma_Copy4x4_unsafe + B Block4x4LoopEnd + +Case_1 + ;// Case a + M_PRINTF "Case 1 \n" + + SUB pSrc, pSrc, #2 + MOV Height, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + BL armVCM4P10_Average_4x4_Align2_unsafe + B Block4x4LoopEnd +Case_2 + ;// Case b + M_PRINTF "Case 2 \n" + + SUB pSrc, pSrc, #2 + MOV Height, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + B Block4x4LoopEnd +Case_3 + ;// Case c + M_PRINTF "Case 3 \n" + + SUB pSrc, pSrc, #2 + MOV Height, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + BL armVCM4P10_Average_4x4_Align3_unsafe + B Block4x4LoopEnd +Case_4 + ;// Case d + M_PRINTF "Case 4 \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + BL armVCM4P10_Average_4x4_Align0_unsafe + + B Block4x4LoopEnd +Case_5 + ;// Case e + M_PRINTF "Case 5 \n" + + SUB pSrc, pSrc, #2 + MOV Height, #4 + M_ADR pDst, pTempBuf + MOV dstStep, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + M_ADR pArgs, ppArgs + LDM pArgs, {pSrc, srcStep, pDst, dstStep} + SUB pSrc, pSrc, srcStep, LSL #1 + M_ADR pBuf, pBuffer + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + M_ADR pSrc, pTempBuf + MOV srcStep, #4 + BL armVCM4P10_Average_4x4_Align0_unsafe + + + B Block4x4LoopEnd +Case_6 + ;// Case f + M_PRINTF "Case 6 \n" + + SUB pSrc, pSrc, #2 + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + M_ADR pBuf, pInterBuf + BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + M_ADR idy, pTempBuf + BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + BL armVCM4P10_Average_4x4_Align0_unsafe + B Block4x4LoopEnd +Case_7 + ;// Case g + M_PRINTF "Case 7 \n" + + SUB pSrc, pSrc, #2 + MOV Height, #4 + M_ADR pDst, pTempBuf + MOV dstStep, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + M_ADR pArgs, ppArgs + LDM pArgs, {pSrc, srcStep, pDst, dstStep} + SUB pSrc, pSrc, srcStep, LSL #1 + ADD pSrc, pSrc, #1 + M_ADR pBuf, pBuffer + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + M_ADR pSrc, pTempBuf + MOV srcStep, #4 + BL armVCM4P10_Average_4x4_Align0_unsafe + + B Block4x4LoopEnd +Case_8 + ;// Case h + M_PRINTF "Case 8 \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + B Block4x4LoopEnd +Case_9 + ;// Case i + M_PRINTF "Case 9 \n" + + SUB pSrc, pSrc, #2 + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + ADD pSrc, pSrc, srcStep, LSL #1 + M_ADR pBuf, pInterBuf + BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + M_ADR idy, pTempBuf + BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + BL armVCM4P10_Average_4x4_Align2_unsafe + B Block4x4LoopEnd +Case_a + ;// Case j + M_PRINTF "Case a \n" + + SUB pSrc, pSrc, #2 + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + ADD pSrc, pSrc, srcStep, LSL #1 + M_ADR pBuf, pInterBuf + BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + B Block4x4LoopEnd +Case_b + ;// Case k + M_PRINTF "Case b \n" + SUB pSrc, pSrc, #2 + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + ADD pSrc, pSrc, srcStep, LSL #1 + M_ADR pBuf, pInterBuf + BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe + M_ADR idy, pTempBuf + BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + BL armVCM4P10_Average_4x4_Align3_unsafe + B Block4x4LoopEnd +Case_c + ;// Case n + M_PRINTF "Case c \n" + + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + ADD pSrc, pSrc, srcStep ;// Update pSrc to one row down + BL armVCM4P10_Average_4x4_Align0_unsafe + B Block4x4LoopEnd +Case_d + ;// Case p + M_PRINTF "Case d \n" + SUB pSrc, pSrc, #2 + ADD pSrc, pSrc, srcStep + MOV Height, #4 + M_ADR pDst, pTempBuf + MOV dstStep, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + M_ADR pArgs, ppArgs + LDM pArgs, {pSrc, srcStep, pDst, dstStep} + SUB pSrc, pSrc, srcStep, LSL #1 + M_ADR pBuf, pBuffer + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + M_ADR pSrc, pTempBuf + MOV srcStep, #4 + BL armVCM4P10_Average_4x4_Align0_unsafe + B Block4x4LoopEnd +Case_e + ;// Case q + M_PRINTF "Case e \n" + + SUB pSrc, pSrc, #2 + SUB pSrc, pSrc, srcStep, LSL #1 + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + M_ADR pBuf, pInterBuf + BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe + M_ADR idy, pTempBuf + BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + ADD pSrc, pSrc, #4 + BL armVCM4P10_Average_4x4_Align0_unsafe + + B Block4x4LoopEnd +Case_f + ;// Case r + M_PRINTF "Case f \n" + SUB pSrc, pSrc, #2 + ADD pSrc, pSrc, srcStep + MOV Height, #4 + M_ADR pDst, pTempBuf + MOV dstStep, #4 + BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe + M_ADR pArgs, ppArgs + LDM pArgs, {pSrc, srcStep, pDst, dstStep} + SUB pSrc, pSrc, srcStep, LSL #1 + ADD pSrc, pSrc, #1 + M_ADR pBuf, pBuffer + MOV Height, #9 + BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe + M_ADR pSrc, pTempBuf + MOV srcStep, #4 + BL armVCM4P10_Average_4x4_Align0_unsafe + +Block4x4LoopEnd + + ;// Width Loop + SUBS iWidth, iWidth, #4 + M_ADR pArgs, ppArgs + LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments + ADD pSrc, pSrc, #4 + ADD pDst, pDst, #4 + BGT Block4x4WidthLoop + + ;// Height Loop + SUBS iHeight, iHeight, #4 + M_LDR iWidth, ptrWidth + M_ADR pArgs, ppArgs + ADD pSrc, pSrc, srcStep, LSL #2 + ADD pDst, pDst, dstStep, LSL #2 + SUB pSrc, pSrc, iWidth + SUB pDst, pDst, iWidth + BGT Block4x4HeightLoop + +EndOfInterpolation + MOV r0, #0 + M_END + + ENDIF + + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s new file mode 100644 index 0000000..34fedd8 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntraChroma_8x8_s.s @@ -0,0 +1,494 @@ +;// +;// +;// File Name: omxVCM4P10_PredictIntraChroma_8x8_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + EXPORT armVCM4P10_pIndexTable8x8 + +;// Define the processor variants supported by this file + + M_VARIANTS ARM1136JS + + AREA table, DATA +;//------------------------------------------------------- +;// This table for implementing switch case of C in asm by +;// the mehtod of two levels of indexing. +;//------------------------------------------------------- + + M_TABLE armVCM4P10_pIndexTable8x8 + DCD OMX_VC_CHROMA_DC, OMX_VC_CHROMA_HOR + DCD OMX_VC_CHROMA_VERT, OMX_VC_CHROMA_PLANE + + M_TABLE armVCM4P10_MultiplierTableChroma8x8,1 + DCW 3, 2, 1,4 + DCW -3,-2,-1,0 + DCW 1, 2, 3,4 + + IF ARM1136JS + +;//-------------------------------------------- +;// Constants +;//-------------------------------------------- + +BLK_SIZE EQU 0x8 +MUL_CONST0 EQU 0x01010101 +MASK_CONST EQU 0x00FF00FF +MUL_CONST1 EQU 0x80808080 + +;//-------------------------------------------- +;// Scratch variable +;//-------------------------------------------- +y RN 12 +pc RN 15 +return RN 0 +pSrcLeft2 RN 1 +pDst2 RN 2 +sum1 RN 6 +sum2 RN 7 +pTable RN 9 +dstStepx2 RN 11 +leftStepx2 RN 14 +outerCount RN 14 +r0x01010101 RN 10 +r0x00FF00FF RN 11 + +tVal0 RN 0 +tVal1 RN 1 +tVal2 RN 2 +tVal3 RN 3 +tVal4 RN 4 +tVal5 RN 5 +tVal6 RN 6 +tVal7 RN 7 +tVal8 RN 8 +tVal9 RN 9 +tVal10 RN 10 +tVal11 RN 11 +tVal12 RN 12 +tVal14 RN 14 + +b RN 14 +c RN 12 + +p2p0 RN 0 +p3p1 RN 1 +p6p4 RN 2 +p7p5 RN 4 + +pp2pp0 RN 6 +pp3pp1 RN 7 +pp6pp4 RN 8 +pp7pp5 RN 9 + +p3210 RN 10 +p7654 RN 10 + +;//-------------------------------------------- +;// Input Arguments +;//-------------------------------------------- +pSrcLeft RN 0 ;// input pointer +pSrcAbove RN 1 ;// input pointer +pSrcAboveLeft RN 2 ;// input pointer +pDst RN 3 ;// output pointer +leftStep RN 4 ;// input variable +dstStep RN 5 ;// input variable +predMode RN 6 ;// input variable +availability RN 7 ;// input variable + +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntraChroma_8x8 starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START omxVCM4P10_PredictIntraChroma_8x8, r11 + + ;// Define stack arguments + M_ARG LeftStep, 4 + M_ARG DstStep, 4 + M_ARG PredMode, 4 + M_ARG Availability, 4 + + ;// M_STALL ARM1136JS=4 + + LDR pTable,=armVCM4P10_pIndexTable8x8 ;// Load index table for switch case + + + ;// Load argument from the stack + M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg + M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg + M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg + M_LDR availability, Availability ;// Arg availability loaded from stack to reg + + MOV y, #BLK_SIZE ;// Outer Loop Count + LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode + +OMX_VC_CHROMA_DC + AND availability, availability,#(OMX_VC_UPPER + OMX_VC_LEFT) + CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT) ;// if(availability & (#OMX_VC_UPPER | #OMX_VC_LEFT)) + LDR r0x01010101, =MUL_CONST0 + BNE TST_UPPER ;// Jump to Upper if not both + LDM pSrcAbove,{tVal8,tVal9} ;// tVal 8 to 9 = pSrcAbove[0 to 7] + + ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep + ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep + + ;// M_STALL ARM1136JS=1 + + UXTB16 tVal7, tVal8 ;// pSrcAbove[0, 2] + UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] + UADD16 sum1, tVal7, tVal8 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] + + UXTB16 tVal7, tVal9 ;// pSrcAbove[4, 6] + UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] + UADD16 sum2, tVal7, tVal9 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] + ADD sum1, sum1, sum1, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) + ADD sum2, sum2, sum2, LSR #16 ;// sum(pSrcAbove[4] to pSrcAbove[7]) + UXTH sum1, sum1 ;// upsum1 (Clear the top junk bits) + UXTH sum2, sum2 ;// upsum2 (Clear the top junk bits) + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[2] + M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[3] + ADD tVal2, tVal8, tVal9 ;// tVal14 = tVal8 + tVal9 + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[4] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[5] + ADD tVal14, tVal4, tVal12 ;// tVal14 = tVal4 + tVal12 + + LDRB tVal4, [pSrcLeft] ;// tVal4 = pSrcLeft[6] + LDRB tVal12,[pSrcLeft2] ;// tVal12= pSrcLeft[7] + ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 + ADD tVal2, tVal2, tVal14 ;// leftsum1 = sum(pSrcLeft[0] to pSrcLeft[3]) + ADD tVal4, tVal4, tVal12 ;// tVal4 = tVal4 + tVal12 + ADD tVal14, tVal8, tVal4 ;// leftsum2 = sum(pSrcLeft[4] to pSrcLeft[7]) + ADD tVal8, tVal14, #2 ;// tVal8 = leftsum2 + 2 + ADD tVal9, sum2, #2 ;// tVal8 = upsum2 + 2 + ADD sum1, sum1, tVal2 ;// sum1 = upsum1 + leftsum1 + ADD sum2, sum2, tVal14 ;// sum2 = upsum2 + leftsum2 + ADD sum1, sum1, #4 ;// (sum1 + 4) + ADD sum2, sum2, #4 ;// (sum2 + 4) + MOV sum1, sum1, LSR #3 ;// (sum1 + 4)>>3 + MOV tVal9, tVal9, LSR #2 ;// (tVal9 + 2)>>2 + MOV tVal8, tVal8, LSR #2 ;// (tVal8 + 2)>>2 + MOV sum2, sum2, LSR #3 ;// (sum2 + 4)>>3 + + MUL tVal0, sum1, r0x01010101 ;// replicate the val in all the bytes + MUL tVal1, tVal9,r0x01010101 ;// replicate the val in all the bytes + MUL tVal8, tVal8,r0x01010101 ;// replicate the val in all the bytes + MUL tVal9, sum2, r0x01010101 ;// replicate the val in all the bytes + + M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[0 to 7] = tVal 0 to 1 + M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[8 to 15] = tVal 0 to 1 + M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[16 to 23] = tVal 0 to 1 + M_STRD tVal0, tVal1, [pDst], dstStep ;// pDst[24 to 31] = tVal 0 to 1 + + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[32 to 39] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[40 to 47] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[48 to 55] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[56 to 63] = tVal 8 to 9 + MOV return, #OMX_Sts_NoErr + M_EXIT + +TST_UPPER + + ;// M_STALL ARM1136JS=3 + + CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) + + BNE TST_LEFT ;// Jump to Left if not upper + LDM pSrcAbove,{tVal8,tVal9} ;// tVal 8 to 9 = pSrcAbove[0 to 7] + + ;// M_STALL ARM1136JS=3 + + UXTB16 tVal7, tVal8 ;// pSrcAbove[0, 2] + UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] + UADD16 sum1, tVal7, tVal8 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] + + UXTB16 tVal7, tVal9 ;// pSrcAbove[4, 6] + UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] + UADD16 sum2, tVal7, tVal9 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] + + ADD sum1, sum1, sum1, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) + ADD sum2, sum2, sum2, LSR #16 ;// sum(pSrcAbove[4] to pSrcAbove[7]) + + UXTH sum1, sum1 ;// upsum1 (Clear the top junk bits) + UXTH sum2, sum2 ;// upsum2 (Clear the top junk bits) + + ADD sum1, sum1, #2 ;// sum1 + 2 + ADD sum2, sum2, #2 ;// sum2 + 2 + + MOV sum1, sum1, LSR #2 ;// (sum1 + 2)>>2 + MOV sum2, sum2, LSR #2 ;// (sum2 + 2)>>2 + + MUL sum1, sum1,r0x01010101 ;// replicate the val in all the bytes + MUL sum2, sum2,r0x01010101 ;// replicate the val in all the bytes + + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7 + MOV return, #OMX_Sts_NoErr + M_EXIT + +TST_LEFT + ;// M_STALL ARM1136JS=3 + + CMP availability, #OMX_VC_LEFT + BNE TST_COUNT0 + ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep + ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[2] + M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[3] + + ADD tVal6, tVal8, tVal9 ;// tVal6 = tVal8 + tVal9 + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[4] + ADD tVal7, tVal4, tVal12 ;// tVal7 = tVal4 + tVal12 + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[5] + M_LDRB tVal4, [pSrcLeft], +leftStepx2 ;// tVal4 = pSrcLeft[6] + M_LDRB tVal12,[pSrcLeft2], +leftStepx2 ;// tVal12= pSrcLeft[7] + + ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 + ADD sum1, tVal6, tVal7 ;// sum1 = sum(pSrcLeft[0] to pSrcLeft[3]) + ADD tVal4, tVal4, tVal12 ;// tVal4 = tVal4 + tVal12 + ADD sum2, tVal8, tVal4 ;// sum2 = sum(pSrcLeft[4] to pSrcLeft[7]) + + ADD sum1, sum1, #2 ;// sum1 + 2 + ADD sum2, sum2, #2 ;// sum2 + 2 + + MOV sum1, sum1, LSR #2 ;// (sum1 + 2)>>2 + MOV sum2, sum2, LSR #2 ;// (sum2 + 2)>>2 + + MUL tVal6, sum1,r0x01010101 ;// replicate the val in all the bytes + MUL tVal8, sum2,r0x01010101 ;// replicate the val in all the bytes + + ;// M_STALL ARM1136JS=1 + MOV tVal7,tVal6 ;// tVal7 = sum1 + MOV tVal9,tVal8 ;// tVal9 = sum2 + + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 + + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[32 to 39] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[40 to 47] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[48 to 55] = tVal 8 to 9 + M_STRD tVal8, tVal9, [pDst], dstStep ;// pDst[56 to 63] = tVal 8 to 9 + + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +TST_COUNT0 + LDR sum1, =MUL_CONST1 ;// sum1 = 0x80808080 if(count == 0) + + ;// M_STALL ARM1136JS=2 + + MOV tVal7, sum1 ;// tVal7 = sum1 + + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7 + + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_CHROMA_HOR + + ;// M_STALL ARM1136JS=2 + + ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep + ADD leftStepx2, leftStep, leftStep ;// leftStepx2 = leftStep * 2 + ADD pDst2, pDst, dstStep ;// pDst2 = pDst + dstStep + ADD dstStepx2, dstStep, dstStep ;// double dstStep + SUB dstStepx2, dstStepx2, #4 ;// double dstStep minus 4 + LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times + M_LDRB tVal6, [pSrcLeft], +leftStepx2 ;// tVal6 = pSrcLeft[0] + M_LDRB tVal7, [pSrcLeft2],+leftStepx2 ;// tVal7 = pSrcLeft[1] + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[2] + M_LDRB tVal9, [pSrcLeft2],+leftStepx2 ;// tVal9 = pSrcLeft[3] + MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes + MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes + MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes + MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes + STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] + STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] + M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] + STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] + STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] + M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] + M_LDRB tVal6, [pSrcLeft], +leftStepx2 ;// tVal6 = pSrcLeft[4] + M_LDRB tVal7, [pSrcLeft2],+leftStepx2 ;// tVal7 = pSrcLeft[5] + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[6] + M_LDRB tVal9, [pSrcLeft2],+leftStepx2 ;// tVal9 = pSrcLeft[7] + MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes + MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes + MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes + MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes + STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] + STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] + M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] + STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst [0 to 3] + STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst [4 to 7] + M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[4 to 7] + MOV return, #OMX_Sts_NoErr + M_EXIT + +OMX_VC_CHROMA_VERT + + ;// M_STALL ARM1136JS=4 + + LDMIA pSrcAbove, {tVal6,tVal7} ;// tVal 6 to 7 = pSrcAbove[0 to 7] + MOV return, #OMX_Sts_NoErr + + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[0 to 7] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[8 to 15] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[16 to 23] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[24 to 31] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[32 to 39] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[40 to 47] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[48 to 55] = tVal 6 to 7 + M_STRD tVal6, tVal7, [pDst], dstStep ;// pDst[56 to 63] = tVal 6 to 7 + + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_CHROMA_PLANE + + ;// M_STALL ARM1136JS=3 + + RSB tVal14, leftStep, leftStep, LSL #3 ;// 7*leftStep + LDRB tVal7, [pSrcAbove, #+7] ;// pSrcAbove[7] + LDRB tVal6, [pSrcLeft, +tVal14] ;// pSrcLeft[7*leftStep] + LDRB tVal8, [pSrcAboveLeft] ;// pSrcAboveLeft[0] + LDRB tVal9, [pSrcAbove, #+6 ] ;// pSrcAbove[6] + LDRB tVal10,[pSrcAbove] ;// pSrcAbove[0] + ADD tVal2, tVal7, tVal6 ;// pSrcAbove[7] + pSrcLeft[7*leftStep] + SUB tVal6, tVal6, tVal8 ;// V0 = pSrcLeft[7*leftStep] - pSrcAboveLeft[0] + SUB tVal7, tVal7, tVal8 ;// H0 = pSrcAbove[7] - pSrcAboveLeft[0] + LSL tVal2, tVal2, #4 ;// a = 16 * (pSrcAbove[15] + pSrcLeft[15*lS]) + ADD tVal2, tVal2, #16 ;// a + 16 + SUB tVal9, tVal9,tVal10 ;// pSrcAbove[6] - pSrcAbove[0] + LDRB tVal8, [pSrcAbove,#+5] ;// pSrcAbove[5] + LDRB tVal10,[pSrcAbove,#+1] ;// pSrcAbove[1] + ADD tVal9, tVal9, tVal9, LSL #1 ;// H1 = 3 * (pSrcAbove[6] - pSrcAbove[0]) + ADD tVal7, tVal9, tVal7, LSL #2 ;// H = H1 + H0 + SUB tVal8, tVal8, tVal10 ;// pSrcAbove[5] - pSrcAbove[1] + LDRB tVal9, [pSrcAbove,#+4] ;// pSrcAbove[4] + LDRB tVal10,[pSrcAbove,#+2] ;// pSrcAbove[2] + ADD tVal7, tVal7, tVal8, LSL #1 ;// H = H + H2 + SUB tVal11, tVal14,leftStep ;// 6*leftStep + ADD tVal11, pSrcLeft, tVal11 ;// pSrcLeft + 6*leftStep + MOV tVal12, pSrcLeft ;// pSrcLeft + SUB tVal9, tVal9, tVal10 ;// pSrcAbove[4] - pSrcAbove[2] + ADD tVal7, tVal7, tVal9 ;// H = H + H3 + M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[6*leftStep] + M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[0] + ADD tVal7, tVal7, tVal7, LSL #4 ;// 17 * H + ADD tVal7, tVal7, #16 ;// 17 * H + 16 + SUB tVal8, tVal8, tVal10 ;// pSrcLeft[6*leftStep] - pSrcLeft[0] + ASR b, tVal7, #5 ;// b = (17 * H + 16) >> 5 + ADD tVal8, tVal8, tVal8, LSL #1 ;// V1 = 3 * (pSrcLeft[6*leftStep] - pSrcLeft[0]) + ADD tVal6, tVal8, tVal6, LSL #2 ;// V = V0 +V1 + M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[5*leftStep] + M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[leftStep] + ADD tVal7, b, b, LSL #1 ;// 3*b + SUB tVal2, tVal2, tVal7 ;// a + 16 - 3*b + SUB tVal7, tVal8, tVal10 ;// pSrcLeft[5*leftStep] - pSrcLeft[leftStep] + M_LDRB tVal8, [tVal11],-leftStep ;// pSrcLeft[4*leftStep] + M_LDRB tVal10,[tVal12],+leftStep ;// pSrcLeft[2*leftStep] + ADD tVal6, tVal6, tVal7, LSL #1 ;// V = V + V2 + LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF + SUB tVal7, tVal8, tVal10 ;// pSrcLeft[4*leftStep] - pSrcLeft[2*leftStep] + ADD tVal6, tVal6, tVal7 ;// V = V + V7 + SUB dstStep, dstStep, #4 ;// dstStep - 4 + ADD tVal6, tVal6, tVal6, LSL #4 ;// 17*V + ADD tVal6, tVal6, #16 ;// 17*V + 16 + + ;// M_STALL ARM1136JS=1 + + ASR c, tVal6, #5 ;// c = (17*V + 16)>>5 + + ;// M_STALL ARM1136JS=1 + + ADD tVal6, c, c, LSL #1 ;// 3*c + UXTH c, c ;// only in half word + SUB tVal6, tVal2, tVal6 ;// a - 3*b - 3*c + 16 + ORR c, c, c, LSL #16 ;// c c + ADD tVal7, b, b ;// 2b + ADD tVal2, tVal6, tVal7 ;// pp2 = d + 2*b + ADD tVal7, tVal7, b ;// 3b + ORR p2p0, tVal6, tVal2, LSL #16 ;// p2p0 = pack {p2, p0} + UXTH b, b + UXTH tVal7, tVal7 + ORR b, b, b, LSL #16 ;// {b,b} + ORR tVal7, tVal7, tVal7, LSL #16 ;// {3b,3b} + SADD16 p3p1, p2p0, b ;// p3p1 = p2p0 + {b,b} + SADD16 p6p4, p3p1, tVal7 ;// p6p4 = p3p1 + {3b,3b} + SADD16 p7p5, p6p4, b ;// p7p5 = p6p4 + {b,b} + MOV outerCount, #BLK_SIZE ;// Outer Loop Count + +LOOP_PLANE + + USAT16 p7p5, #13, p7p5 ;// clip13(p7) clip13(p5) + USAT16 p6p4, #13, p6p4 ;// clip13(p6) clip13(p4) + USAT16 p3p1, #13, p3p1 ;// clip13(p3) clip13(p1) + USAT16 p2p0, #13, p2p0 ;// clip13(p2) clip13(p0) + + AND pp7pp5, r0x00FF00FF, p7p5, ASR #5 ;// clip8(p7) clip8(p5) + AND pp6pp4, r0x00FF00FF, p6p4, ASR #5 ;// clip8(p6) clip8(p4) + AND pp3pp1, r0x00FF00FF, p3p1, ASR #5 ;// clip8(p3) clip8(p1) + AND pp2pp0, r0x00FF00FF, p2p0, ASR #5 ;// clip8(p2) clip8(p0) + + SUBS outerCount, outerCount, #1 ;// outerCount-- + + ORR p3210, pp2pp0, pp3pp1, LSL #8 ;// pack {p3,p2, p1, p0} + STR p3210, [pDst], #4 ;// store {pDst[0] to pDst[3]} + + ORR p7654, pp6pp4, pp7pp5, LSL #8 ;// pack {p7,p6, p5, p4} + M_STR p7654, [pDst], dstStep ;// store {pDst[4] to pDst[7]} + + SADD16 p7p5, p7p5, c ;// {p7 + c}, {p5 + c} + SADD16 p6p4, p6p4, c ;// {p6 + c}, {p4 + c} + SADD16 p3p1, p3p1, c ;// {p3 + c}, {p1 + c} + SADD16 p2p0, p2p0, c ;// {p2 + c}, {p0 + c} + + BNE LOOP_PLANE ;// Loop for 8 times + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + + END +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntraChroma_8x8 ends +;//----------------------------------------------------------------------------------------------- diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s new file mode 100644 index 0000000..1557208 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_16x16_s.s @@ -0,0 +1,501 @@ +;// +;// +;// File Name: omxVCM4P10_PredictIntra_16x16_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + +;//------------------------------------------------------- +;// This table for implementing switch case of C in asm by +;// the mehtod of two levels of indexing. +;//------------------------------------------------------- + + M_TABLE armVCM4P10_pIndexTable16x16 + DCD OMX_VC_16X16_VERT, OMX_VC_16X16_HOR + DCD OMX_VC_16X16_DC, OMX_VC_16X16_PLANE + + IF ARM1136JS + +;//-------------------------------------------- +;// Constants +;//-------------------------------------------- +BLK_SIZE EQU 0x10 +MUL_CONST0 EQU 0x01010101 +MUL_CONST1 EQU 0x00060004 +MUL_CONST2 EQU 0x00070005 +MUL_CONST3 EQU 0x00030001 +MASK_CONST EQU 0x00FF00FF + +;//-------------------------------------------- +;// Scratch variable +;//-------------------------------------------- +y RN 12 +pc RN 15 + +return RN 0 +innerCount RN 0 +outerCount RN 1 +pSrcLeft2 RN 1 +pDst2 RN 2 +sum RN 6 +pTable RN 9 +temp1 RN 10 +temp2 RN 12 +cMul1 RN 11 +cMul2 RN 12 +count RN 12 +dstStepx2 RN 11 +leftStepx2 RN 14 +r0x01010101 RN 10 +r0x00FF00FF RN 11 + +tVal0 RN 0 +tVal1 RN 1 +tVal2 RN 2 +tVal3 RN 3 +tVal4 RN 4 +tVal5 RN 5 +tVal6 RN 6 +tVal7 RN 7 +tVal8 RN 8 +tVal9 RN 9 +tVal10 RN 10 +tVal11 RN 11 +tVal12 RN 12 +tVal14 RN 14 + +b RN 12 +c RN 14 + +p2p0 RN 0 +p3p1 RN 1 +p6p4 RN 2 +p7p5 RN 4 +p10p8 RN 6 +p11p9 RN 7 +p14p12 RN 8 +p15p13 RN 9 + +p3210 RN 10 +p7654 RN 10 +p111098 RN 10 +p15141312 RN 10 + +;//-------------------------------------------- +;// Declare input registers +;//-------------------------------------------- +pSrcLeft RN 0 ;// input pointer +pSrcAbove RN 1 ;// input pointer +pSrcAboveLeft RN 2 ;// input pointer +pDst RN 3 ;// output pointer +leftStep RN 4 ;// input variable +dstStep RN 5 ;// input variable +predMode RN 6 ;// input variable +availability RN 7 ;// input variable + +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntra_16x16 starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START omxVCM4P10_PredictIntra_16x16, r11 + + ;// Define stack arguments + M_ARG LeftStep, 4 + M_ARG DstStep, 4 + M_ARG PredMode, 4 + M_ARG Availability, 4 + + ;// M_STALL ARM1136JS=4 + + LDR pTable,=armVCM4P10_pIndexTable16x16 ;// Load index table for switch case + + ;// Load argument from the stack + M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg + M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg + M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg + M_LDR availability, Availability ;// Arg availability loaded from stack to reg + + MOV y, #BLK_SIZE ;// Outer Loop Count + LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode + +OMX_VC_16X16_VERT + LDM pSrcAbove, {tVal6,tVal7,tVal8,tVal9};// tVal 6 to 9 = pSrcAbove[0 to 15] + ADD dstStepx2, dstStep, dstStep ;// double dstStep + ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep + + ;// M_STALL ARM1136JS=2 ;// Stall outside the loop + +LOOP_VERT + STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 + SUBS y, y, #2 ;// y-- + ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep + STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 + ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep + BNE LOOP_VERT ;// Loop for 8 times + MOV return, #OMX_Sts_NoErr + M_EXIT + + +OMX_VC_16X16_HOR + + ;// M_STALL ARM1136JS=6 + + LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times + MOV y, #4 ;// Outer Loop Count + M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] + ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep + M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal1 = pSrcLeft[4 to 7] + ADD dstStepx2, dstStep, dstStep ;// double dstStep + SUB dstStepx2, dstStepx2, #12 ;// double dstStep minus 12 + +LOOP_HOR + M_LDRB tVal8, [pSrcLeft], +leftStep ;// tVal8 = pSrcLeft[0 to 3] + MUL tVal6, tVal6, r0x01010101 ;// replicate the val in all the bytes + M_LDRB tVal9, [pSrcLeft], +leftStep ;// tVal9 = pSrcLeft[4 to 7] + MUL tVal7, tVal7, r0x01010101 ;// replicate the val in all the bytes + SUBS y, y, #1 ;// y-- + STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] + STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] + STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] + MUL tVal8, tVal8, r0x01010101 ;// replicate the val in all the bytes + STR tVal6, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] + STR tVal7, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] + MUL tVal9, tVal9, r0x01010101 ;// replicate the val in all the bytes + M_STR tVal6, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] + M_STR tVal7, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] + STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[0 to 3] + STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[0 to 3] + STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[4 to 7] + STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[4 to 7] + STR tVal8, [pDst], #+4 ;// store {tVal6} at pDst[8 to 11] + STR tVal9, [pDst2], #+4 ;// store {tVal7} at pDst2[8 to 11] + M_STR tVal8, [pDst], dstStepx2 ;// store {tVal6} at pDst[12 to 15] + M_LDRB tVal6, [pSrcLeft], +leftStep ;// tVal6 = pSrcLeft[0 to 3] + M_STR tVal9, [pDst2], dstStepx2 ;// store {tVal7} at pDst2[12 to 15] + M_LDRB tVal7, [pSrcLeft], +leftStep ;// tVal7 = pSrcLeft[4 to 7] + BNE LOOP_HOR ;// Loop for 3 times + MOV return, #OMX_Sts_NoErr + M_EXIT + +OMX_VC_16X16_DC + + ;// M_STALL ARM1136JS=2 + + MOV count, #0 ;// count = 0 + TST availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) + BEQ TST_LEFT ;// Jump to Left if not upper + LDM pSrcAbove,{tVal8,tVal9,tVal10,tVal11};// tVal 8 to 11 = pSrcAbove[0 to 15] + ADD count, count, #1 ;// if upper inc count by 1 + + ;// M_STALL ARM1136JS=2 + + UXTB16 tVal2, tVal8 ;// pSrcAbove[0, 2] + UXTB16 tVal6, tVal9 ;// pSrcAbove[4, 6] + UADD16 tVal2, tVal2, tVal6 ;// pSrcAbove[0, 2] + pSrcAbove[4, 6] + UXTB16 tVal8, tVal8, ROR #8 ;// pSrcAbove[1, 3] + UXTB16 tVal9, tVal9, ROR #8 ;// pSrcAbove[5, 7] + UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[1, 3] + pSrcAbove[5, 7] + UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[7]) + + UXTB16 tVal8, tVal10 ;// pSrcAbove[8, 10] + UXTB16 tVal9, tVal11 ;// pSrcAbove[12, 14] + UADD16 tVal8, tVal8, tVal9 ;// pSrcAbove[8, 10] + pSrcAbove[12, 14] + UXTB16 tVal10, tVal10, ROR #8 ;// pSrcAbove[9, 11] + UXTB16 tVal11, tVal11, ROR #8 ;// pSrcAbove[13, 15] + UADD16 tVal10, tVal10, tVal11 ;// pSrcAbove[9, 11] + pSrcAbove[13, 15] + UADD16 tVal8, tVal8, tVal10 ;// sum(pSrcAbove[8] to pSrcAbove[15]) + + UADD16 tVal2, tVal2, tVal8 ;// sum(pSrcAbove[0] to pSrcAbove[15]) + + ;// M_STALL ARM1136JS=1 + + ADD tVal2, tVal2, tVal2, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[15]) + + ;// M_STALL ARM1136JS=1 + + UXTH sum, tVal2 ;// Extract the lower half for result + +TST_LEFT + TST availability, #OMX_VC_LEFT + BEQ TST_COUNT + ADD leftStepx2, leftStep,leftStep ;// leftStepx2 = 2 * leftStep + ADD pSrcLeft2, pSrcLeft, leftStep ;// pSrcLeft2 = pSrcLeft + leftStep + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] + M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] + ADD tVal7, tVal8, tVal9 ;// tVal7 = tVal8 + tVal9 + ADD count, count, #1 ;// Inc Counter if Left is available + ADD tVal6, tVal10, tVal11 ;// tVal6 = tVal10 + tVal11 + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] + M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] + ADD sum, tVal7, tVal6 ;// sum = tVal8 + tVal10 + ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 + ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 + ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 + + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] + M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] + ADD sum, sum, tVal7 ;// sum = sum + tVal7 + ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 + ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 + ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 + + + M_LDRB tVal8, [pSrcLeft], +leftStepx2 ;// tVal8 = pSrcLeft[0] + M_LDRB tVal9, [pSrcLeft2], +leftStepx2 ;// tVal9 = pSrcLeft[1] + M_LDRB tVal10, [pSrcLeft], +leftStepx2 ;// tVal10= pSrcLeft[2] + M_LDRB tVal11, [pSrcLeft2],+leftStepx2 ;// tVal11= pSrcLeft[3] + ADD sum, sum, tVal7 ;// sum = sum + tVal7 + ADD tVal8, tVal8, tVal9 ;// tVal8 = tVal8 + tVal9 + ADD tVal10, tVal10, tVal11 ;// tVal10= tVal10 + tVal11 + ADD tVal7, tVal8, tVal10 ;// tVal7 = tVal8 + tVal10 + ADD sum, sum, tVal7 ;// sum = sum + tVal7 + +TST_COUNT + CMP count, #0 ;// if(count == 0) + MOVEQ sum, #128 ;// sum = 128 if(count == 0) + BEQ TST_COUNT0 ;// if(count == 0) + CMP count, #1 ;// if(count == 1) + ADDEQ sum, sum, #8 ;// sum += 8 if(count == 1) + ADDNE sum, sum, tVal2 ;// sum = sumleft + sumupper + ADDNE sum, sum, #16 ;// sum += 16 if(count == 2) + + ;// M_STALL ARM1136JS=1 + + UXTH sum, sum ;// sum only byte rest cleared + + ;// M_STALL ARM1136JS=1 + + LSREQ sum, sum, #4 ;// sum >> 4 if(count == 1) + + ;// M_STALL ARM1136JS=1 + + LSRNE sum, sum, #5 ;// sum >> 5 if(count == 2) + +TST_COUNT0 + + ;// M_STALL ARM1136JS=1 + + ORR sum, sum, sum, LSL #8 ;// sum replicated in two halfword + + ;// M_STALL ARM1136JS=1 + + ORR tVal6, sum, sum, LSL #16 ;// sum replicated in all bytes + CPY tVal7, tVal6 ;// tVal1 = tVal0 + CPY tVal8, tVal6 ;// tVal2 = tVal0 + CPY tVal9, tVal6 ;// tVal3 = tVal0 + ADD dstStepx2, dstStep, dstStep ;// double dstStep + ADD pDst2, pDst, dstStep ;// pDst2- pDst advanced by dstStep + MOV y, #BLK_SIZE ;// Outer Loop Count + +LOOP_DC + STM pDst, {tVal6,tVal7,tVal8,tVal9} ;// pDst[0 to 15] = tVal 6 to 9 + SUBS y, y, #2 ;// y-- + ADD pDst, pDst, dstStepx2 ;// pDst advanced by dstStep + STM pDst2, {tVal6,tVal7,tVal8,tVal9} ;// pDst2[16 to 31] = tVal 6 to 9 + ADD pDst2, pDst2, dstStepx2 ;// pDst advanced by dstStep + BNE LOOP_DC ;// Loop for 8 times + + MOV return, #OMX_Sts_NoErr + M_EXIT + +OMX_VC_16X16_PLANE + + ;// M_STALL ARM1136JS=3 + RSB tVal14, leftStep, leftStep, LSL #4 ;// tVal14 = 15*leftStep + + ;// M_STALL ARM1136JS=2 + LDRB tVal10, [pSrcLeft, tVal14] ;// tVal10 = pSrcLeft[15*leftStep] + LDRB tVal11, [pSrcAboveLeft] ;// tVal11 = pSrcAboveLeft[0] + LDRB tVal12, [pSrcAbove, #15] + + ADD tVal2, tVal12, tVal10 ;// tVal2 = pSrcAbove[15] + pSrcLeft[15*leftStep] + SUB tVal10, tVal10, tVal11 ;// tVal10 = V0 = pSrcLeft[15*leftStep] - pSrcAboveLeft[0] + SUB tVal11, tVal12, tVal11 ;// tVal11 = H0 = pSrcAbove[15] - pSrcAboveLeft[0] + MOV tVal2, tVal2, LSL #4 ;// tVal2 = a = 16 * (pSrcAbove[15] + pSrcLeft[15*leftStep]) + + MOV tVal11, tVal11, LSL #3 ;// 8*[15]-[-1] + LDRB tVal6, [pSrcAbove, #0] + LDRB tVal7, [pSrcAbove, #14] + SUB tVal8, tVal7, tVal6 + RSB tVal8, tVal8, tVal8, LSL #3 ;// 7*[14]-[0] + ADD tVal11, tVal11, tVal8 + LDRB tVal6, [pSrcAbove, #1] + LDRB tVal7, [pSrcAbove, #13] + SUB tVal8, tVal7, tVal6 + ADD tVal8, tVal8, tVal8 + ADD tVal8, tVal8, tVal8, LSL #1 ;// 6*[13]-[1] + ADD tVal11, tVal11, tVal8 + LDRB tVal6, [pSrcAbove, #2] + LDRB tVal7, [pSrcAbove, #12] + SUB tVal8, tVal7, tVal6 + ADD tVal8, tVal8, tVal8, LSL #2 ;// 5*[12]-[2] + ADD tVal11, tVal11, tVal8 + LDRB tVal6, [pSrcAbove, #3] + LDRB tVal7, [pSrcAbove, #11] + SUB tVal8, tVal7, tVal6 + ADD tVal11, tVal11, tVal8, LSL #2 ;// + 4*[11]-[3] + LDRB tVal6, [pSrcAbove, #4] + LDRB tVal7, [pSrcAbove, #10] + SUB tVal8, tVal7, tVal6 + ADD tVal8, tVal8, tVal8, LSL #1 ;// 3*[10]-[4] + ADD tVal11, tVal11, tVal8 + LDRB tVal6, [pSrcAbove, #5] + LDRB tVal7, [pSrcAbove, #9] + SUB tVal8, tVal7, tVal6 + ADD tVal11, tVal11, tVal8, LSL #1 ;// + 2*[9]-[5] + LDRB tVal6, [pSrcAbove, #6] + LDRB tVal7, [pSrcAbove, #8] + SUB tVal8, tVal7, tVal6 ;// 1*[8]-[6] + ADD tVal7, tVal11, tVal8 + + ADD tVal2, tVal2, #16 ;// tVal2 = a + 16 + MOV tVal1, pSrcLeft ;// tVal4 = pSrcLeft + SUB tVal9, tVal14, leftStep ;// tVal9 = 14*leftStep + ADD tVal9, pSrcLeft, tVal9 ;// tVal9 = pSrcLeft + 14*leftStep + + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[14*leftStep] + M_LDRB tVal11, [tVal1], +leftStep ;// tVal11 = pSrcLeft[0] + ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * H + ADD tVal7, tVal7, #32 ;// tVal7 = 5 * H + 32 + SUB tVal8, tVal8, tVal11 ;// tVal8 = pSrcLeft[14*leftStep] - pSrcLeft[0] + ASR tVal12, tVal7, #6 ;// tVal12 = b = (5 * H + 32) >> 6 + + RSB tVal8, tVal8, tVal8, LSL #3 ;// tVal8 = V1 = 7* (pSrcLeft[14*leftStep]-pSrcLeft[0]) + ADD tVal6, tVal8, tVal10, LSL #3 ;// tVal6 = V = V0 +V1 + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[13*leftStep] + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[leftStep] + RSB tVal7, tVal12, tVal12, LSL #3 ;// tVal7 = 7*b + SUB tVal2, tVal2, tVal7 ;// tVal2 = a + 16 - 7*b + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[13*leftStep] - pSrcLeft[leftStep] + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[12*lS] + ADD tVal7, tVal7, tVal7 ;// tVal7 = 2 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[2*leftStep] + ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 6 * (pSrcLeft[13*leftStep] - pSrcLeft[leftStep]) + ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V2 + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep] + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[11*leftStep] + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[3*leftStep] + ADD tVal7, tVal7, tVal7, LSL #2 ;// tVal7 = 5 * (pSrcLeft[12*leftStep] - pSrcLeft[2*leftStep]) + ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V3 + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[11*leftStep] - pSrcLeft[3*leftStep] + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[10*leftStep] + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[4*leftStep] + ADD tVal6, tVal6, tVal7, LSL #2 ;// tVal6 = V = V + V4 + SUB dstStep, dstStep, #16 ;// tVal5 = dstStep - 16 + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep] + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[9*leftStep] + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[5*leftStep] + ADD tVal7, tVal7, tVal7, LSL #1 ;// tVal7 = 3 * (pSrcLeft[10*leftStep] - pSrcLeft[4*leftStep]) + ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V5 + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[9*leftStep] - pSrcLeft[5*leftStep] + M_LDRB tVal8, [tVal9], -leftStep ;// tVal8 = pSrcLeft[8*leftStep] + M_LDRB tVal10, [tVal1], +leftStep ;// tVal10 = pSrcLeft[6*leftStep] + ADD tVal6, tVal6, tVal7, LSL #1 ;// tVal6 = V = V + V6 + + ;// M_STALL ARM1136JS=1 + SUB tVal7, tVal8, tVal10 ;// tVal7 = pSrcLeft[8*leftStep] - pSrcLeft[6*leftStep] + ADD tVal6, tVal6, tVal7 ;// tVal6 = V = V + V7 + + ;// M_STALL ARM1136JS=1 + ADD tVal6, tVal6, tVal6, LSL #2 ;// tVal6 = 5*V + ADD tVal6, tVal6, #32 ;// tVal6 = 5*V + 32 + + ;// M_STALL ARM1136JS=1 + ASR tVal14, tVal6, #6 ;// tVal14 = c = (5*V + 32)>>6 + + ;// M_STALL ARM1136JS=1 + RSB tVal6, tVal14, tVal14, LSL #3 ;// tVal6 = 7*c + UXTH tVal14, tVal14 ;// tVal14 = Cleared the upper half word + ADD tVal10, tVal12, tVal12 ;// tVal10 = 2*b + ORR tVal14, tVal14, tVal14, LSL #16 ;// tVal14 = {c , c} + SUB tVal6, tVal2, tVal6 ;// tVal6 = d = a - 7*b - 7*c + 16 + ADD tVal1, tVal6, tVal10 ;// tVal1 = pp2 = d + 2*b + ADD tVal10, tVal10, tVal12 ;// tVal10 =3*b + ORR tVal0, tVal6, tVal1, LSL #16 ;// tval0 = p2p0 = pack {p2, p0} + UXTH tVal12, tVal12 ;// tVal12 = Cleared the upper half word + UXTH tVal10, tVal10 ;// tVal12 = Cleared the upper half word + ORR tVal12, tVal12, tVal12, LSL #16 ;// tVal12 = {b , b} + ORR tVal10, tVal10, tVal10, LSL #16 ;// tVal10 = {3b , 3b} + SADD16 tVal1, tVal0, tVal12 ;// tVal1 = p3p1 = p2p0 + {b,b} + SADD16 tVal2, tVal1, tVal10 ;// tVal2 = p6p4 = p3p1 + {3b,3b} + SADD16 tVal4, tVal2, tVal12 ;// tVal4 = p7p5 = p6p4 + {b,b} + SADD16 tVal6, tVal4, tVal10 ;// tVal6 = p10p8 = p7p5 + {3b,3b} + SADD16 tVal7, tVal6, tVal12 ;// tVal7 = p11p9 = p10p8 + {b,b} + SADD16 tVal8, tVal7, tVal10 ;// tVal8 = p14p12 = p11p9 + {3b,3b} + SADD16 tVal9, tVal8, tVal12 ;// tVal9 = p15p13 = p14p12 + {b,b} + LDR r0x00FF00FF, =MASK_CONST ;// r0x00FF00FF = 0x00FF00FF + +LOOP_PLANE + + USAT16 temp2, #13, p3p1 + USAT16 temp1, #13, p2p0 + SADD16 p3p1, p3p1, c + SADD16 p2p0, p2p0, c + AND temp2, r0x00FF00FF, temp2, ASR #5 + AND temp1, r0x00FF00FF, temp1, ASR #5 + ORR temp1, temp1, temp2, LSL #8 + STR temp1, [pDst], #4 + + USAT16 temp2, #13, p7p5 + USAT16 temp1, #13, p6p4 + SADD16 p7p5, p7p5, c + SADD16 p6p4, p6p4, c + AND temp2, r0x00FF00FF, temp2, ASR #5 + AND temp1, r0x00FF00FF, temp1, ASR #5 + ORR temp1, temp1, temp2, LSL #8 + STR temp1, [pDst], #4 + + USAT16 temp2, #13, p11p9 + USAT16 temp1, #13, p10p8 + SADD16 p11p9, p11p9, c + SADD16 p10p8, p10p8, c + AND temp2, r0x00FF00FF, temp2, ASR #5 + AND temp1, r0x00FF00FF, temp1, ASR #5 + ORR temp1, temp1, temp2, LSL #8 + STR temp1, [pDst], #4 + + USAT16 temp2, #13, p15p13 + USAT16 temp1, #13, p14p12 + SADD16 p15p13, p15p13, c + SADD16 p14p12, p14p12, c + AND temp2, r0x00FF00FF, temp2, ASR #5 + AND temp1, r0x00FF00FF, temp1, ASR #5 + ORR temp1, temp1, temp2, LSL #8 + STR temp1, [pDst], #4 + + ADDS r0x00FF00FF, r0x00FF00FF, #1<<28 ;// Loop counter value in top 4 bits + + ADD pDst, pDst, dstStep + + BCC LOOP_PLANE ;// Loop for 16 times + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + END +;----------------------------------------------------------------------------------------------- +; omxVCM4P10_PredictIntra_16x16 ends +;----------------------------------------------------------------------------------------------- diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s new file mode 100644 index 0000000..a90f460 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_PredictIntra_4x4_s.s @@ -0,0 +1,567 @@ +;// +;// +;// File Name: omxVCM4P10_PredictIntra_4x4_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Define the processor variants supported by this file + + M_VARIANTS ARM1136JS + +;//------------------------------------------------------- +;// This table for implementing switch case of C in asm by +;// the mehtod of two levels of indexing. +;//------------------------------------------------------- + + M_TABLE armVCM4P10_pSwitchTable4x4 + DCD OMX_VC_4x4_VERT, OMX_VC_4x4_HOR + DCD OMX_VC_4x4_DC, OMX_VC_4x4_DIAG_DL + DCD OMX_VC_4x4_DIAG_DR, OMX_VC_4x4_VR + DCD OMX_VC_4x4_HD, OMX_VC_4x4_VL + DCD OMX_VC_4x4_HU + + IF ARM1136JS + +;//-------------------------------------------- +;// Constants +;//-------------------------------------------- +BLK_SIZE EQU 0x8 +MUL_CONST0 EQU 0x01010101 +ADD_CONST1 EQU 0x80808080 + +;//-------------------------------------------- +;// Scratch variable +;//-------------------------------------------- +return RN 0 +pTable RN 9 +pc RN 15 +r0x01010101 RN 1 +r0x80808080 RN 0 + +tVal0 RN 0 +tVal1 RN 1 +tVal2 RN 2 +tVal4 RN 4 +tVal6 RN 6 +tVal7 RN 7 +tVal8 RN 8 +tVal9 RN 9 +tVal10 RN 10 +tVal11 RN 11 +tVal12 RN 12 +tVal14 RN 14 + +Out0 RN 6 +Out1 RN 7 +Out2 RN 8 +Out3 RN 9 + +Left0 RN 6 +Left1 RN 7 +Left2 RN 8 +Left3 RN 9 + +Above0123 RN 12 +Above4567 RN 14 + +AboveLeft RN 10 + +;//-------------------------------------------- +;// Declare input registers +;//-------------------------------------------- +pSrcLeft RN 0 ;// input pointer +pSrcAbove RN 1 ;// input pointer +pSrcAboveLeft RN 2 ;// input pointer +pDst RN 3 ;// output pointer +leftStep RN 4 ;// input variable +dstStep RN 5 ;// input variable +predMode RN 6 ;// input variable +availability RN 7 ;// input variable + +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntra_4x4 starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START omxVCM4P10_PredictIntra_4x4, r11 + + ;// Define stack arguments + M_ARG LeftStep, 4 + M_ARG DstStep, 4 + M_ARG PredMode, 4 + M_ARG Availability, 4 + + ;// M_STALL ARM1136JS=4 + + LDR pTable,=armVCM4P10_pSwitchTable4x4 ;// Load index table for switch case + + ;// Load argument from the stack + M_LDR predMode, PredMode ;// Arg predMode loaded from stack to reg + M_LDR leftStep, LeftStep ;// Arg leftStep loaded from stack to reg + M_LDR dstStep, DstStep ;// Arg dstStep loaded from stack to reg + M_LDR availability, Availability ;// Arg availability loaded from stack to reg + + LDR pc, [pTable, predMode, LSL #2] ;// Branch to the case based on preMode + +OMX_VC_4x4_VERT + + LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] + M_STR Above0123, [pDst], dstStep ;// pDst[0 to 3] = Above0123 + M_STR Above0123, [pDst], dstStep ;// pDst[4 to 7] = Above0123 + M_STR Above0123, [pDst], dstStep ;// pDst[8 to 11] = Above0123 + STR Above0123, [pDst] ;// pDst[12 to 15] = Above0123 + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_HOR + + ;// M_STALL ARM1136JS=6 + + LDR r0x01010101, =MUL_CONST0 ;// Const to repeat the byte in reg 4 times + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] + LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] + MUL Out0, Left0, r0x01010101 ;// replicate the val in all the bytes + MUL Out1, Left1, r0x01010101 ;// replicate the val in all the bytes + MUL Out2, Left2, r0x01010101 ;// replicate the val in all the bytes + MUL Out3, Left3, r0x01010101 ;// replicate the val in all the bytes + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] + M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] + STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_DC + + ;// M_STALL ARM1136JS=6 + + AND availability, availability, #(OMX_VC_UPPER + OMX_VC_LEFT) + CMP availability, #(OMX_VC_UPPER + OMX_VC_LEFT) + BNE UpperOrLeftOrNoneAvailable ;// Jump to Upper if not both + LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] + + ;// M_STALL ARM1136JS=1 + + UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2] + UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3] + UADD16 tVal11, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] + ADD tVal11, tVal11, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] + LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] + UXTH tVal11, tVal11 ;// upsum1 (Clear the top junk bits) + ADD tVal6, Left0, Left1 ;// tVal6 = Left0 + Left1 + ADD tVal7, Left2, Left3 ;// tVal7 = Left2 + Left3 + ADD tVal6, tVal6, tVal7 ;// tVal6 = tVal6 + tVal7 + ADD Out0, tVal6, tVal11 ;// Out0 = tVal6 + tVal11 + ADD Out0, Out0, #4 ;// Out0 = Out0 + 4 + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + MOV Out0, Out0, LSR #3 ;// Out0 = (Out0 + 4)>>3 + + ;// M_STALL ARM1136JS=1 + + MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes + + ;// M_STALL ARM1136JS=1 + + MOV return, #OMX_Sts_NoErr + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] + STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] + M_EXIT ;// Macro to exit midway-break frm case + +UpperOrLeftOrNoneAvailable + ;// M_STALL ARM1136JS=3 + + CMP availability, #OMX_VC_UPPER ;// if(availability & #OMX_VC_UPPER) + BNE LeftOrNoneAvailable ;// Jump to Left if not upper + LDR Above0123, [pSrcAbove] ;// Above0123 = pSrcAbove[0 to 3] + + ;// M_STALL ARM1136JS=3 + + UXTB16 tVal7, Above0123 ;// pSrcAbove[0, 2] + UXTB16 tVal6, Above0123, ROR #8 ;// pSrcAbove[1, 3] + UADD16 Out0, tVal6, tVal7 ;// pSrcAbove[0, 2] + pSrcAbove[1, 3] + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + ADD Out0, Out0, LSR #16 ;// sum(pSrcAbove[0] to pSrcAbove[3]) + + ;// M_STALL ARM1136JS=1 + + UXTH Out0, Out0 ;// upsum1 (Clear the top junk bits) + ADD Out0, Out0, #2 ;// Out0 = Out0 + 2 + + ;// M_STALL ARM1136JS=1 + + MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2 + + ;// M_STALL ARM1136JS=1 + + MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes + + ;// M_STALL ARM1136JS=1 + + MOV return, #OMX_Sts_NoErr + M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [0 to 3 ] + M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [4 to 7 ] + M_STR Out0, [pDst], dstStep ;// store {tVal6} at pDst [8 to 11] + STR Out0, [pDst] ;// store {tVal6} at pDst [12 to 15] + + M_EXIT ;// Macro to exit midway-break frm case + +LeftOrNoneAvailable + ;// M_STALL ARM1136JS=3 + + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + CMP availability, #OMX_VC_LEFT + BNE NoneAvailable + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] + LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] + ADD Out0, Left0, Left1 ;// Out0 = Left0 + Left1 + + ;// M_STALL ARM1136JS=1 + + ADD Out1, Left2, Left3 ;// Out1 = Left2 + Left3 + ADD Out0, Out0, Out1 ;// Out0 = Out0 + Out1 + ADD Out0, Out0, #2 ;// Out0 = Out0 + 2 + + ;// M_STALL ARM1136JS=1 + + MOV Out0, Out0, LSR #2 ;// Out0 = (Out0 + 2)>>2 + + ;// M_STALL ARM1136JS=1 + + MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes + + ;// M_STALL ARM1136JS=1 + + MOV return, #OMX_Sts_NoErr + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] + STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] + M_EXIT ;// Macro to exit midway-break frm case + +NoneAvailable + MOV Out0, #128 ;// Out0 = 128 if(count == 0) + + ;// M_STALL ARM1136JS=5 + + MUL Out0, Out0, r0x01010101 ;// replicate the val in all the bytes + + ;// M_STALL ARM1136JS=1 + + MOV return, #OMX_Sts_NoErr + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [4 to 7 ] + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [8 to 11] + STR Out0, [pDst] ;// store {Out0} at pDst [12 to 15] + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_DIAG_DL + + ;//------------------------------------------------------------------ + ;// f = (a+2*b+c+2)>>2 + ;// Calculate as: + ;// d = (a + c )>>1 + ;// e = (d - b')>>1 + ;// f = e + 128 + ;//------------------------------------------------------------------ + + ;// M_STALL ARM1136JS=3 + + TST availability, #OMX_VC_UPPER_RIGHT + LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7] + LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 + BNE DLUpperRightAvailable + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + MOV tVal7, Above0123, LSR #24 ;// {00, 00, 00, U3 } + MOV tVal11, tVal7, LSL #24 ;// {U3, 00, 00, 00 } + MUL Out3, tVal7, r0x01010101 ;// {U3, U3, U3, U3 } + MOV tVal8, Above0123, LSR #16 ;// {00, 00, U3, U2 } + MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 } + MVN tVal10, tVal10 ;// {00', U3', U2', U1'} + UHADD8 tVal8, tVal8, Above0123 ;// {xx, xx, d1, d0 } + UHADD8 tVal6, Above0123, tVal9 ;// {xx, d2, xx, xx } + UHSUB8 tVal8, tVal8, tVal10 ;// {xx, xx, e1, e0 } + UHSUB8 tVal6, tVal6, tVal10 ;// {xx, e2, xx, xx } + UADD8 tVal8, tVal8, r0x80808080 ;// {xx, xx, f1, f0 } + UADD8 tVal6, tVal6, r0x80808080 ;// {xx, f2, xx, xx } + + ;// M_STALL ARM1136JS=1 + + PKHBT tVal6, tVal8, tVal6 ;// {xx, f2, f1, f0 } + BIC tVal6, tVal6, #0xFF000000 ;// {00, f2, f1, f0 } + ORR Out0, tVal6, tVal11 ;// {U3, f2, f1, f0 } + + ;// M_STALL ARM1136JS=1 + + PKHTB Out1, Out3, Out0, ASR #8 ;// {U3, U3, f2, f1 } + MOV return, #OMX_Sts_NoErr + PKHTB Out2, Out3, Out1, ASR #8 ;// {U3, U3, U3, f2 } + + M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ] + M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ] + M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ] + STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12] + M_EXIT ;// Macro to exit midway-break frm case + +DLUpperRightAvailable + + MOV tVal8, Above0123, LSR #24 ;// {00, 00, 00, U3 } + MOV tVal9, Above0123, LSR #16 ;// {00, 00, U3, U2 } + MOV tVal10, Above0123, LSR #8 ;// {00, U3, U2, U1 } + ORR tVal8, tVal8, Above4567, LSL #8 ;// {U6, U5, U4, U3 } + ORR tVal10, tVal10, Above4567, LSL #24 ;// {U4, U3, U2, U1 } + PKHBT tVal9, tVal9, Above4567, LSL #16 ;// {U5, U4, U3, U2 } + MVN tVal1, tVal8 ;// {U6', U5', U4', U3'} + MVN tVal10, tVal10 ;// {U4', U3', U2', U1'} + MVN tVal2, Above4567 ;// {U7', U6', U5', U4'} + UHADD8 tVal6, Above0123, tVal9 ;// {d3, d2, d1, d0 } + UHADD8 tVal9, tVal9, Above4567 ;// {d5, d4, d3, d2 } + UHADD8 tVal8, Above4567, tVal8 ;// {d6, xx, xx, xx } + UHSUB8 tVal6, tVal6, tVal10 ;// {e3, e2, e1, e0 } + UHSUB8 tVal12, tVal9, tVal1 ;// {e5, e4, e3, e2 } + UHSUB8 tVal8, tVal8, tVal2 ;// {e6, xx, xx, xx } + UADD8 Out0, tVal6, r0x80808080 ;// {f3, f2, f1, f0 } + UADD8 tVal9, tVal8, r0x80808080 ;// {f6, xx, xx, xx } + UADD8 Out2, tVal12, r0x80808080 ;// {f5, f4, f3, f2 } + MOV tVal7, Out0, LSR #8 ;// {00, f3, f2, f1 } + AND tVal9, tVal9, #0xFF000000 ;// {f6, 00, 00, 00 } + PKHBT Out1, tVal7, Out2, LSL #8 ;// {f4, f3, f2, f1 } + ORR Out3, tVal9, Out2, LSR #8 ;// {f6, f5, f4, f3 } + M_STR Out0, [pDst], dstStep ;// store {f3 to f0} at pDst[3 to 0 ] + M_STR Out1, [pDst], dstStep ;// store {f4 to f1} at pDst[7 to 4 ] + M_STR Out2, [pDst], dstStep ;// store {f5 to f2} at pDst[11 to 8 ] + STR Out3, [pDst] ;// store {f6 to f3} at pDSt[15 to 12] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + + +OMX_VC_4x4_DIAG_DR + + ;// M_STALL ARM1136JS=4 + + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] + LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] + LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = pSrcAboveLeft[0] + ORR tVal7, Left1, Left0, LSL #8 ;// tVal7 = 00 00 L0 L1 + LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 + LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 + ORR tVal8, Left3, Left2, LSL #8 ;// tVal8 = 00 00 L2 L3 + PKHBT tVal7, tVal8, tVal7, LSL #16 ;// tVal7 = L0 L1 L2 L3 + MOV tVal8, Above0123, LSL #8 ;// tVal8 = U2 U1 U0 00 + MOV tVal9, tVal7, LSR #8 ;// tVal9 = 00 L0 L1 L2 + ORR tVal8, tVal8, AboveLeft ;// tVal8 = U2 U1 U0 UL + ORR tVal9, tVal9, AboveLeft, LSL #24 ;// tVal9 = UL L0 L1 L2 + MOV tVal10, Above0123, LSL #24 ;// tVal10= U0 00 00 00 + UXTB tVal11, tVal7, ROR #24 ;// tVal11= 00 00 00 L0 + ORR tVal10, tVal10, tVal9, LSR #8 ;// tVal10= U0 UL L0 L1 + ORR tVal11, tVal11, tVal8, LSL #8 ;// tVal11= U1 U0 UL L0 + UHADD8 tVal11, Above0123, tVal11 ;// tVal11= d1 d0 dL g0 + UHADD8 tVal10, tVal7, tVal10 ;// tVal10= g0 g1 g2 g3 + MVN tVal8, tVal8 ;// tVal8 = U2'U1'U0'UL' + MVN tVal9, tVal9 ;// tVal9 = UL'L0'L1'L2' + UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= e1 e0 eL h0 + UHSUB8 tVal10, tVal10, tVal9 ;// tVal10= h0 h1 h2 h3 + UADD8 Out3, tVal10, r0x80808080 ;// Out3 = i0 i1 i2 i3 + UADD8 Out0, tVal11, r0x80808080 ;// Out0 = f1 f0 fL i0 + UXTH tVal11, Out3, ROR #8 ;// tVal11= 00 00 i1 i2 + MOV tVal7, Out0, LSL #8 ;// tVal7 = f0 fL i0 00 + ORR Out1, tVal7, tVal11, LSR #8 ;// Out1 = f0 fL i0 i1 + PKHBT Out2, tVal11, Out0, LSL #16 ;// Out2 = fL i0 i1 i2 + M_STR Out0, [pDst], dstStep ;// store {f1 to i0} at pDst[3 to 0 ] + M_STR Out1, [pDst], dstStep ;// store {f0 to i1} at pDst[7 to 4 ] + M_STR Out2, [pDst], dstStep ;// store {fL to i2} at pDst[11 to 8 ] + STR Out3, [pDst] ;// store {i0 to i3} at pDst[15 to 12] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_VR + + ;// M_STALL ARM1136JS=4 + + LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 + LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0 + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1 + LDRB Left2, [pSrcLeft] ;// Left2 = 00 00 00 L2 + MOV tVal0, Above0123, LSL #8 ;// tVal0 = U2 U1 U0 00 + MOV tVal9, Above0123 ;// tVal9 = U3 U2 U1 U0 + ORR tVal14, tVal0, AboveLeft ;// tVal14 = U2 U1 U0 UL + MVN tVal11, tVal14 ;// tVal11 = U2'U1'U0'UL' + MOV tVal2, tVal14, LSL #8 ;// tVal2 = U1 U0 UL 00 + UHSUB8 tVal1, Above0123, tVal11 ;// tVal1 = d2 d1 d0 dL + UHADD8 tVal10, AboveLeft, Left1 ;// tVal10 = 00 00 00 j1 + MVN tVal4, Left0 ;// tVal4 = 00 00 00 L0' + UHSUB8 tVal4, tVal10, tVal4 ;// tVal4 = 00 00 00 k1 + ORR tVal12, tVal0, Left0 ;// tVal12 = U2 U1 U0 L0 + ORR tVal14, tVal2, Left0 ;// tVal14 = U1 U0 UL L0 + LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 + UHADD8 tVal10, tVal9, tVal14 ;// tVal10 = g3 g2 g1 g0 + UADD8 Out0, tVal1, r0x80808080 ;// Out0 = e2 e1 e0 eL + UHSUB8 tVal10, tVal10, tVal11 ;// tVal10 = h3 h2 h1 h0 + M_STR Out0, [pDst], dstStep ;// store {e2 to eL} at pDst[3 to 0 ] + MOV tVal1, tVal14, LSL #8 ;// tVal1 = U0 UL L0 00 + MOV tVal6, Out0, LSL #8 ;// tVal6 = e1 e0 eL 00 + ORR tVal2, tVal2, Left1 ;// tVal2 = U1 U0 UL L1 + UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 00 00 l1 + UADD8 Out1, tVal10, r0x80808080 ;// Out1 = i3 i2 i1 i0 + MVN tVal2, tVal2 ;// tVal14 = U1'U0'UL'L1' + ORR tVal1, tVal1, Left2 ;// tVal1 = U0 UL L0 L2 + ORR Out2, tVal6, tVal4 ;// Out2 = e1 e0 eL l1 + UHADD8 tVal1, tVal1, tVal12 ;// tVal1 = g2 g1 g0 j2 + M_STR Out1, [pDst], dstStep ;// store {i3 to i0} at pDst[7 to 4 ] + M_STR Out2, [pDst], dstStep ;// store {e1 to l1} at pDst[11 to 8 ] + UHSUB8 tVal9, tVal1, tVal2 ;// tVal9 = h2 h1 h0 k2 + UADD8 Out3, tVal9, r0x80808080 ;// Out3 = i2 i1 i0 l2 + STR Out3, [pDst] ;// store {i2 to l2} at pDst[15 to 12] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_HD + + ;// M_STALL ARM1136JS=4 + + LDR Above0123, [pSrcAbove] ;// Above0123 = U3 U2 U1 U0 + LDRB AboveLeft, [pSrcAboveLeft] ;// AboveLeft = 00 00 00 UL + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = 00 00 00 L0 + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = 00 00 00 L1 + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = 00 00 00 L2 + LDRB Left3, [pSrcLeft] ;// Left3 = 00 00 00 L3 + LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 + ORR tVal2, AboveLeft, Above0123, LSL #8;// tVal2 = U2 U1 U0 UL + MVN tVal1, Left0 ;// tVal1 = 00 00 00 L0' + ORR tVal4, Left0, tVal2, LSL #8 ;// tVal4 = U1 U0 UL L0 + MVN tVal2, tVal2 ;// tVal2 = U2'U1'U0'UL' + UHADD8 tVal4, tVal4, Above0123 ;// tVal4 = g3 g2 g1 g0 + UHSUB8 tVal1, AboveLeft, tVal1 ;// tVal1 = 00 00 00 dL + UHSUB8 tVal4, tVal4, tVal2 ;// tVal4 = h3 h2 h1 h0 + UADD8 tVal1, tVal1, r0x80808080 ;// tVal1 = 00 00 00 eL + UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = i3 i2 i1 i0 + ORR tVal2, Left0, AboveLeft, LSL #16 ;// tVal2 = 00 UL 00 L0 + MOV tVal4, tVal4, LSL #8 ;// tVal4 = i2 i1 i0 00 + ORR tVal11, Left1, Left0, LSL #16 ;// tVal11= 00 L0 00 L1 + ORR tVal7, Left2, Left1, LSL #16 ;// tVal7 = 00 L1 00 L2 + ORR tVal10, Left3, Left2, LSL #16 ;// tVal10= 00 L2 00 L3 + ORR Out0, tVal4, tVal1 ;// Out0 = i2 i1 i0 eL + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + MOV tVal4, Out0, LSL #16 ;// tVal4 = i1 i0 00 00 + UHADD8 tVal2, tVal2, tVal7 ;// tVal2 = 00 j1 00 j2 + UHADD8 tVal6, tVal11, tVal10 ;// tVal11= 00 j2 00 j3 + MVN tVal12, tVal11 ;// tVal12= 00 L0'00 L1' + MVN tVal14, tVal7 ;// tVal14= 00 L1'00 L2' + UHSUB8 tVal2, tVal2, tVal12 ;// tVal2 = 00 k1 00 k2 + UHSUB8 tVal8, tVal7, tVal12 ;// tVal8 = 00 d1 00 d2 + UHSUB8 tVal11, tVal6, tVal14 ;// tVal11= 00 k2 00 k3 + UHSUB8 tVal9, tVal10, tVal14 ;// tVal9 = 00 d2 00 d3 + UADD8 tVal2, tVal2, r0x80808080 ;// tVal2 = 00 l1 00 l2 + UADD8 tVal8, tVal8, r0x80808080 ;// tVal8 = 00 e1 00 e2 + UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 l2 00 l3 + UADD8 tVal9, tVal9, r0x80808080 ;// tVal9 = 00 e2 00 e3 + ORR Out2, tVal8, tVal2, LSL #8 ;// Out2 = l1 e1 l2 e2 + ORR Out3, tVal9, tVal11, LSL #8 ;// Out3 = l2 e2 l3 e3 + PKHTB Out1, tVal4, Out2, ASR #16 ;// Out1 = i1 i0 l1 e1 + M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] + M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] + STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_VL + + ;// M_STALL ARM1136JS=3 + + LDMIA pSrcAbove, {Above0123, Above4567} ;// Above0123, Above4567 = pSrcAbove[0 to 7] + TST availability, #OMX_VC_UPPER_RIGHT + LDR r0x80808080, =ADD_CONST1 ;// 0x80808080 + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + MOV tVal11, Above0123, LSR #24 ;// tVal11= 00 00 00 U3 + MULEQ Above4567, tVal11, r0x01010101 ;// Above4567 = U3 U3 U3 U3 + MOV tVal9, Above0123, LSR #8 ;// tVal9 = 00 U3 U2 U1 + MVN tVal10, Above0123 ;// tVal10= U3'U2'U1'U0' + ORR tVal2, tVal9, Above4567, LSL #24 ;// tVal2 = U4 U3 U2 U1 + UHSUB8 tVal8, tVal2, tVal10 ;// tVal8 = d4 d3 d2 d1 + UADD8 Out0, tVal8, r0x80808080 ;// Out0 = e4 e3 e2 e1 + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + MOV tVal9, tVal9, LSR #8 ;// tVal9 = 00 00 U3 U2 + MOV tVal10, Above4567, LSL #8 ;// tVal10= U6 U5 U4 00 + PKHBT tVal9, tVal9, Above4567, LSL #16 ;// tVal9 = U5 U4 U3 U2 + ORR tVal10, tVal10, tVal11 ;// tVal10= U6 U5 U4 U3 + UHADD8 tVal11, tVal9, Above0123 ;// tVal11= g5 g4 g3 g2 + UHADD8 tVal14, tVal2, tVal10 ;// tVal14= g6 g5 g4 g3 + MVN tVal8, tVal2 ;// tVal8 = U4'U3'U2'U1' + MVN tVal7, tVal9 ;// tVal7 = U5'U4'U3'U2' + UHSUB8 tVal12, tVal9, tVal8 ;// tVal12= d5 d4 d3 d2 + UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= h5 h4 h3 h2 + UHSUB8 tVal2, tVal14, tVal7 ;// tVal2 = h6 h5 h4 h3 + UADD8 Out1, tVal11, r0x80808080 ;// Out1 = i5 i4 i3 i2 + UADD8 Out2, tVal12, r0x80808080 ;// Out2 = e5 e4 e3 e2 + UADD8 Out3, tVal2, r0x80808080 ;// Out3 = i6 i5 i4 i3 + M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] + M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] + M_STR Out3, [pDst], dstStep ;// store {Out3} at pDst [12 to 15] + MOV return, #OMX_Sts_NoErr + M_EXIT ;// Macro to exit midway-break frm case + +OMX_VC_4x4_HU + + ;// M_STALL ARM1136JS=2 + + LDR r0x01010101, =MUL_CONST0 ;// 0x01010101 + M_LDRB Left0, [pSrcLeft], leftStep ;// Left0 = pSrcLeft[0] + M_LDRB Left1, [pSrcLeft], leftStep ;// Left1 = pSrcLeft[1] + M_LDRB Left2, [pSrcLeft], leftStep ;// Left2 = pSrcLeft[2] + LDRB Left3, [pSrcLeft] ;// Left3 = pSrcLeft[3] + MOV r0x80808080, r0x01010101, LSL #7 ;// 0x80808080 + ORR tVal6, Left0, Left1, LSL #16 ;// tVal6 = 00 L1 00 L0 + ORR tVal7, Left1, Left2, LSL #16 ;// tVal7 = 00 L2 00 L1 + ORR tVal11, Left2, Left3, LSL #16 ;// tVal11= 00 L3 00 L2 + MUL Out3, Left3, r0x01010101 ;// Out3 = L3 L3 L3 L3 + MVN tVal8, tVal7 ;// tVal8 = 00 L2'00 L1' + MVN tVal10, tVal11 ;// tVal10= 00 L3'00 L2' + UHADD8 tVal4, tVal6, tVal11 ;// tVal4 = 00 g3 00 g2 + UXTB16 tVal12, Out3 ;// tVal12= 00 L3 00 L3 + UHSUB8 tVal4, tVal4, tVal8 ;// tVal4 = 00 h3 00 h2 + UHSUB8 tVal6, tVal6, tVal8 ;// tVal6 = 00 d2 00 d1 + UHSUB8 tVal11, tVal11, tVal8 ;// tVal11= 00 d3 00 d2 + UHADD8 tVal12, tVal12, tVal7 ;// tVal12= 00 g4 00 g3 + UADD8 tVal4, tVal4, r0x80808080 ;// tVal4 = 00 i3 00 i2 + UHSUB8 tVal12, tVal12, tVal10 ;// tVal12= 00 h4 00 h3 + UADD8 tVal8, tVal6, r0x80808080 ;// tVal8 = 00 e2 00 e1 + UADD8 tVal11, tVal11, r0x80808080 ;// tVal11= 00 e3 00 e2 + UADD8 tVal12, tVal12, r0x80808080 ;// tVal12= 00 i4 00 i3 + ORR Out0, tVal8, tVal4, LSL #8 ;// Out0 = i3 e2 i2 e1 + ORR Out1, tVal11, tVal12, LSL #8 ;// Out1 = i4 e3 i3 e2 + M_STR Out0, [pDst], dstStep ;// store {Out0} at pDst [0 to 3 ] + PKHTB Out2, Out3, Out1, ASR #16 ;// Out2 = L3 L3 i4 e3 + M_STR Out1, [pDst], dstStep ;// store {Out1} at pDst [4 to 7 ] + M_STR Out2, [pDst], dstStep ;// store {Out2} at pDst [8 to 11] + STR Out3, [pDst] ;// store {Out3} at pDst [12 to 15] + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + END +;//----------------------------------------------------------------------------------------------- +;// omxVCM4P10_PredictIntra_4x4 ends +;//----------------------------------------------------------------------------------------------- diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s new file mode 100644 index 0000000..53597a8 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s @@ -0,0 +1,128 @@ +;// +;// +;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + IMPORT armVCM4P10_QPDivTable + IMPORT armVCM4P10_VMatrixQPModTable + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + +;//-------------------------------------- +;// Declare input registers +;//-------------------------------------- +ppSrc RN 0 +pDst RN 1 +QP RN 2 + +;//-------------------------------- +;// Scratch variable for Unpack2x2 +;//-------------------------------- +pSrc RN 9 +Value RN 4 +Value2 RN 5 +Flag RN 6 +strOffset RN 7 +cstOffset RN 8 + +;//-------------------------------- +;// Scratch variable +;//-------------------------------- +r0w0 RN 3 +r0w1 RN 4 + +c0w0 RN 5 +c1w0 RN 6 + +return RN 0 +pQPDivTable RN 5 +pQPModTable RN 6 +Shift RN 9 +Scale RN 2 + +Temp1 RN 3 +Temp2 RN 4 +Temp3 RN 7 +Temp4 RN 8 + + ;// Write function header + M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9 + + + LDR pSrc, [ppSrc] ;// Load pSrc + MOV cstOffset, #31 ;// To be used in the loop, to compute offset + + ;//----------------------------------------------------------------------- + ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero + ;//----------------------------------------------------------------------- + + MOV Value, #0 ;// Initialize the zero value + MOV Value2, #0 ;// Initialize the zero value + LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop> + STRD Value, [pDst, #0] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 + + +unpackLoop + TST Flag, #0x10 ;// Computing (Flag & 0x10) + LDRSBNE Value2,[pSrc,#1] + LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access + AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; + LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ + ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ + + TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done + LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration + STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset> + BEQ unpackLoop ;// Branch to the loop beginning + + LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2| + + + STR pSrc, [ppSrc] ;// Update the bitstream pointer + + LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer + LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer + + SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ] + SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ] + + LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP] + LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP] + + SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ] + SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ] + + LSL Scale, Scale, Shift ;// Scale = Scale << Shift + + SMULTB Temp2, c0w0, Scale ;// Temp2 = T(c0w0) * Scale + SMULTB Temp4, c1w0, Scale ;// Temp4 = T(c1w0) * Scale + SMULBB Temp1, c0w0, Scale ;// Temp1 = B(c0w0) * Scale + SMULBB Temp3, c1w0, Scale ;// Temp3 = B(c1w0) * Scale + MOV Temp2, Temp2, ASR #1 ;// Temp2 = Temp2 >> 1 & Temp1 = (Temp1 >> 1) << 16 + MOV Temp4, Temp4, ASR #1 ;// Temp4 = Temp4 >> 1 & Temp3 = (Temp3 >> 1) << 16 + PKHBT c0w0, Temp2, Temp1, LSL #15 ;// c0w0 = | Temp1 | Temp2 | + PKHBT c1w0, Temp4, Temp3, LSL #15 ;// c1w0 = | Temp3 | Temp4 | + STMIA pDst, {c0w0, c1w0} ;// Storing all the coefficients at once + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s new file mode 100644 index 0000000..73caec2 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_TransformDequantLumaDCFromPair_s.s @@ -0,0 +1,469 @@ +;// +;// +;// File Name: omxVCM4P10_TransformDequantLumaDCFromPair_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// H.264 inverse quantize and transform module +;// +;// + +;// Include standard headers + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Import/Export symbols required from/to other files +;// (For example tables) + + IMPORT armVCM4P10_UnpackBlock4x4 + IMPORT armVCM4P10_QPDivTable + IMPORT armVCM4P10_VMatrixQPModTable + + M_VARIANTS ARM1136JS + +;// Set debugging level +;//DEBUG_ON SETL {TRUE} + + +;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4 + + +;// Guarding implementation by the processor name + + IF ARM1136JS + + +;//Input Registers +pData RN 0 +QP RN 1 + +;//Output Registers + + +;//Local Scratch Registers + +;// Packed Input pixels +in00 RN 2 ;// Src[0] & Src[1] +in02 RN 3 ;// Src[2] & Src[3] +in10 RN 4 ;// Src[4] & Src[5] +in12 RN 5 ;// Src[6] & Src[7] +in20 RN 6 ;// Src[8] & Src[9] +in22 RN 7 ;// Src[10] & Src[11] +in30 RN 8 ;// Src[12] & Src[13] +in32 RN 9 ;// Src[14] & Src[15] + +;// Transpose for Row operations (Rows to cols) +trRow00 RN 2 +trRow10 RN 10 +trRow02 RN 3 +trRow12 RN 5 +trRow20 RN 11 +trRow30 RN 12 +trRow32 RN 14 +trRow22 RN 7 + +;// Intermediate calculations +rowSum1 RN 4 +rowSum2 RN 6 +rowDiff1 RN 8 +rowDiff2 RN 9 + + +;// Row operated pixels +rowOp00 RN 2 +rowOp10 RN 10 +rowOp20 RN 11 +rowOp30 RN 12 +rowOp02 RN 3 +rowOp12 RN 5 +rowOp22 RN 7 +rowOp32 RN 14 + +;// Transpose for colulmn operations +trCol00 RN 2 +trCol02 RN 3 +trCol10 RN 4 +trCol12 RN 5 +trCol20 RN 6 +trCol22 RN 7 +trCol30 RN 8 +trCol32 RN 9 + +;// Intermediate calculations +colSum1 RN 10 +colSum2 RN 11 +colDiff1 RN 12 +colDiff2 RN 14 + + +;// Coloumn operated pixels +colOp00 RN 2 +colOp02 RN 3 +colOp10 RN 4 +colOp12 RN 5 +colOp20 RN 6 +colOp22 RN 7 +colOp30 RN 8 +colOp32 RN 9 + +;// Temporary scratch varaibles +pQPDivTable RN 0 +pQPModTable RN 11 +Shift RN 10 +Scale RN 14 +Round RN 0 + +temp1 RN 10 +temp2 RN 11 +temp3 RN 12 +temp4 RN 1 + + + +;// InvTransformed and Dequantized pixels +out00 RN 2 +out02 RN 3 +out10 RN 4 +out12 RN 5 +out20 RN 6 +out22 RN 7 +out30 RN 8 +out32 RN 9 + + + + + ;// Allocate stack memory required by the function + M_ALLOC4 pDataOnStack, 4 + + ;// Write function header + M_START armVCM4P10_InvTransformDequantLumaDC4x4,r11 + + ;****************************************************************** + ;// The strategy used in implementing the transform is as follows:* + ;// Load the 4x4 block into 8 registers * + ;// Transpose the 4x4 matrix * + ;// Perform the row operations (on columns) using SIMD * + ;// Transpose the 4x4 result matrix * + ;// Perform the coloumn operations * + ;// Store the 4x4 block at one go * + ;****************************************************************** + + ;// Load all the 4x4 pixels + + LDMIA pData,{in00,in02,in10,in12,in20,in22,in30,in32} + + ;//***************************************************************** + ;// + ;// Transpose the matrix inorder to perform row ops as coloumn ops + ;// Input: in[][] = original matrix + ;// Output: trRow[][]= transposed matrix + ;// Step1: Obtain the LL part of the transposed matrix + ;// Step2: Obtain the HL part + ;// step3: Obtain the LH part + ;// Step4: Obtain the HH part + ;// + ;//***************************************************************** + + ;// LL 2x2 transposed matrix + ;// d0 d1 - - + ;// d4 d5 - - + ;// - - - - + ;// - - - - + + PKHTB trRow10,in10,in00,ASR #16 ;// [5 4] = [f5:f1] + PKHBT trRow00,in00,in10,LSL #16 ;// [1 0] = [f4:f0] + + ;// HL 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// d8 d9 - - + ;// d12 d13 - - + + + PKHTB trRow30,in12,in02,ASR #16 ;// [13 12] = [7 3] + PKHBT trRow20,in02,in12,LSL #16 ;// [9 8] = [6 2] + + ;// LH 2x2 transposed matrix + ;// - - d2 d3 + ;// - - d6 d7 + ;// - - - - + ;// - - - - + + PKHBT trRow02,in20,in30,LSL #16 ;// [3 2] = [f12:f8] + PKHTB trRow12,in30,in20,ASR #16 ;// [7 6] = [f13:f9] + + + + + ;// HH 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// - - d10 d11 + ;// - - d14 d15 + + PKHTB trRow32,in32,in22,ASR #16 ;// [15 14] = [15 11] + PKHBT trRow22,in22,in32,LSL #16 ;// [11 10] = [14 10] + + + ;**************************************** + ;// Row Operations (Performed on columns) + ;**************************************** + + + ;// SIMD operations on first two columns(two rows of the original matrix) + + SADD16 rowSum1,trRow00,trRow10 ;// (c0+c1) + SADD16 rowSum2,trRow20,trRow30 ;// (c2+c3) + SSUB16 rowDiff1,trRow00,trRow10 ;// (c0-c1) + SSUB16 rowDiff2,trRow20,trRow30 ;// (c2-c3) + SADD16 rowOp00,rowSum1,rowSum2 ;// (c0+c1+c2+c3) + SSUB16 rowOp10,rowSum1,rowSum2 ;// (c0+c1-c2-c3) + SSUB16 rowOp20,rowDiff1,rowDiff2 ;// (c0-c1-c2+c3) + SADD16 rowOp30,rowDiff1,rowDiff2 ;// (c0-c1+c2-c3) + + + ;// SIMD operations on next two columns(next two rows of the original matrix) + + SADD16 rowSum1,trRow02,trRow12 ;// (c0+c1) + SADD16 rowSum2,trRow22,trRow32 ;// (c2+c3) + SSUB16 rowDiff1,trRow02,trRow12 ;// (c0-c1) + SSUB16 rowDiff2,trRow22,trRow32 ;// (c2-c3) + SADD16 rowOp02,rowSum1,rowSum2 ;// (c0+c1+c2+c3) + SSUB16 rowOp12,rowSum1,rowSum2 ;// (c0+c1-c2-c3) + SSUB16 rowOp22,rowDiff1,rowDiff2 ;// (c0-c1-c2+c3) + SADD16 rowOp32,rowDiff1,rowDiff2 ;// (c0-c1+c2-c3) + + + + ;***************************************************************** + ;// Transpose the resultant matrix + ;// Input: rowOp[][] + ;// Output: trCol[][] + ;***************************************************************** + + ;// LL 2x2 transposed matrix + ;// d0 d1 - - + ;// d4 d5 - - + ;// - - - - + ;// - - - - + + PKHTB trCol10,rowOp10,rowOp00,ASR #16 ;// [5 4] = [f5:f1] + PKHBT trCol00,rowOp00,rowOp10,LSL #16 ;// [1 0] = [f4:f0] + + ;// HL 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// d8 d9 - - + ;// d12 d13 - - + + + PKHTB trCol30,rowOp12,rowOp02,ASR #16 ;// [13 12] = [7 3] + PKHBT trCol20,rowOp02,rowOp12,LSL #16 ;// [9 8] = [6 2] + + ;// LH 2x2 transposed matrix + ;// - - d2 d3 + ;// - - d6 d7 + ;// - - - - + ;// - - - - + + PKHBT trCol02,rowOp20,rowOp30,LSL #16 ;// [3 2] = [f12:f8] + PKHTB trCol12,rowOp30,rowOp20,ASR #16 ;// [7 6] = [f13:f9] + + + + + ;// HH 2x2 transposed matrix + ;// - - - - + ;// - - - - + ;// - - d10 d11 + ;// - - d14 d15 + + PKHTB trCol32,rowOp32,rowOp22,ASR #16 ;// [15 14] = [15 11] + PKHBT trCol22,rowOp22,rowOp32,LSL #16 ;// [11 10] = [14 10] + + + ;******************************* + ;// Coloumn Operations + ;******************************* + + ;//-------------------------------------------------------------------------------------- + ;// Store pData(RN0) on stack and restore it only at the final store back + ;// This frees up a register (RN0) which is used to reduce number of intermediate stalls + ;//-------------------------------------------------------------------------------------- + M_STR pData,pDataOnStack + + + ;// SIMD operations on first two columns(two rows of the original matrix) + + SADD16 colSum1,trCol00,trCol10 ;// (c0+c1) + SADD16 colSum2,trCol20,trCol30 ;// (c2+c3) + SSUB16 colDiff1,trCol00,trCol10 ;// (c0-c1) + SSUB16 colDiff2,trCol20,trCol30 ;// (c2-c3) + SADD16 colOp00,colSum1,colSum2 ;// (c0+c1+c2+c3) + SSUB16 colOp10,colSum1,colSum2 ;// (c0+c1-c2-c3) + SSUB16 colOp20,colDiff1,colDiff2 ;// (c0-c1-c2+c3) + SADD16 colOp30,colDiff1,colDiff2 ;// (c0-c1+c2-c3) + + + ;// SIMD operations on next two columns(next two rows of the original matrix) + + LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer + SADD16 colSum1,trCol02,trCol12 ;// (c0+c1) + SADD16 colSum2,trCol22,trCol32 ;// (c2+c3) + SSUB16 colDiff1,trCol02,trCol12 ;// (c0-c1) + SSUB16 colDiff2,trCol22,trCol32 ;// (c2-c3) + SADD16 colOp02,colSum1,colSum2 ;// (c0+c1+c2+c3) + SSUB16 colOp12,colSum1,colSum2 ;// (c0+c1-c2-c3) + LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer + LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP] + SSUB16 colOp22,colDiff1,colDiff2 ;// (c0-c1-c2+c3) + SADD16 colOp32,colDiff1,colDiff2 ;// (c0-c1+c2-c3) + + + LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP] + + ;//---------------------------------------------------------------------- + ;// + ;// <Dequantize> improves on the c-reference code + ;// Both the cases i.e., Shift>=0 and Shift<0 cases are covered together + ;// We do not subtract 2 from Shift as in C reference, instead perform a + ;// Scale << Shift once in the beginning and do a right shift by a + ;// constant 2 after the Multiplication. The value of Round would be 2 + ;// + ;// By doing this we aviod the Branches required and also + ;// reduce the code size substantially + ;// + ;//---------------------------------------------------------------------- + + MOV Round, #2 ;// Round = 2 + LSL Scale, Scale, Shift ;// Scale = Scale << Shift + + + ;// Row 1 + SMLABB temp1, colOp00, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round + SMLABB temp3, colOp02, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round + SMLATB temp2, colOp00, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round + SMLATB temp4, colOp02, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round + + ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2 + ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2 + PKHBT out00, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 | + PKHBT out02, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 | + + + ;// Row 2 + SMLABB temp1, colOp10, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round + SMLABB temp3, colOp12, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round + SMLATB temp2, colOp10, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round + SMLATB temp4, colOp12, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round + + ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2 + ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2 + PKHBT out10, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 | + PKHBT out12, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 | + + ;// Row 3 + SMLABB temp1, colOp20, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round + SMLABB temp3, colOp22, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round + SMLATB temp2, colOp20, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round + SMLATB temp4, colOp22, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round + + ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2 + ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2 + PKHBT out20, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 | + PKHBT out22, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 | + + ;// Row 4 + SMLABB temp1, colOp30, Scale, Round ;// Temp1 = B(c0w0) * Scale + Round + SMLABB temp3, colOp32, Scale, Round ;// Temp3 = B(c1w0) * Scale + Round + SMLATB temp2, colOp30, Scale, Round ;// Temp2 = T(c0w0) * Scale + Round + SMLATB temp4, colOp32, Scale, Round ;// Temp4 = T(c1w0) * Scale + Round + + M_LDR pData,pDataOnStack ;// Restore pData pointer from stack + ASR temp1, temp1, #2 ;// Temp1 = Temp1 >> 2 + ASR temp3, temp3, #2 ;// Temp3 = Temp3 >> 2 + PKHBT out30, temp1, temp2, LSL #14 ;// c0w0 = | Temp2 | Temp1 | + PKHBT out32, temp3, temp4, LSL #14 ;// c1w0 = | Temp2 | Temp1 | + + + + ;*************************** + ;// Store all the 4x4 pixels + ;*************************** + +store_coeff + + STMIA pData,{out00,out02,out10,out12,out20,out22,out30,out32} + + + + ;// Set return value + + + ;// Write function tail + M_END + + ENDIF ;//ARM1136JS + + +;// Static Function: armVCM4P10_InvTransformDequantLumaDC4x4 + +;// Guarding implementation by the processor name + + + + +;// Function: omxVCM4P10_TransformDequantLumaDCFromPair + +;//Input Registers +ppSrc RN 0 +pDst RN 1 +QPR2 RN 2 + +;//Output Registers +result RN 0 + +;//Local Scratch Registers +pDstR4 RN 4 +pDstR0 RN 0 +QPR1 RN 1 +QPR5 RN 5 + +;// Guarding implementation by the processor name + + IF ARM1136JS + + ;// Allocate stack memory required by the function + + + ;// Write function header + M_START omxVCM4P10_TransformDequantLumaDCFromPair,r5 + + MOV pDstR4,pDst ;// Saving register r1 + MOV QPR5,QPR2 ;// Saving register r2 + BL armVCM4P10_UnpackBlock4x4 + + MOV pDstR0,pDstR4 ;// Setting up register r0 + MOV QPR1,QPR5 ;// Setting up register r1 + BL armVCM4P10_InvTransformDequantLumaDC4x4 + + + ;// Set return value + MOV result,#OMX_Sts_NoErr + + ;// Write function tail + M_END + + + ENDIF ;//ARM1136JS + + + END
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h new file mode 100644 index 0000000..22115d3 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_Huff_Tables_VLC.h @@ -0,0 +1,37 @@ +/** + * + * File Name: armVCM4P2_Huff_Tables_VLC.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * + * File: armVCM4P2_Huff_Tables.h + * Description: Declares Tables used for Hufffman coding and decoding + * in MP4P2 codec. + * + */ + +#ifndef _OMXHUFFTAB_H_ +#define _OMXHUFFTAB_H_ + + +extern const OMX_U16 armVCM4P2_IntraVlcL0L1[200]; + + +extern const OMX_U16 armVCM4P2_InterVlcL0L1[200]; + +extern const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64]; +//extern const OMX_U16 armVCM4P2_aIntraDCChromaIndex[32]; +extern const OMX_U16 armVCM4P2_aVlcMVD[124]; + +extern const OMX_U8 armVCM4P2_InterL0L1LMAX[73]; +extern const OMX_U8 armVCM4P2_InterL0L1RMAX[35]; +extern const OMX_U8 armVCM4P2_IntraL0L1LMAX[53]; +extern const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] + +#endif /* _OMXHUFFTAB_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h new file mode 100644 index 0000000..d5f865c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/api/armVCM4P2_ZigZag_Tables.h @@ -0,0 +1,25 @@ +/** + * + * File Name: armVCM4P2_ZigZag_Tables.h + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * + * File: armVCM4P2_Zigzag_Tables.h + * Description: Declares Tables used for Zigzag scan in MP4P2 codec. + * + */ + +#ifndef _OMXZIGZAGTAB_H +#define _OMXZIGZAGTAB_H + +extern const OMX_U8 armVCM4P2_aClassicalZigzagScan [192]; +//extern const OMX_U8 armVCM4P2_aHorizontalZigzagScan [64]; +//extern const OMX_U8 armVCM4P2_aVerticalZigzagScan [64]; + +#endif /* _OMXZIGZAGTAB_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s new file mode 100644 index 0000000..7801e57 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s @@ -0,0 +1,75 @@ +; /** +; * +; * File Name: armVCM4P2_Clip8_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains module for Clipping 16 bit value to [0,255] Range +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + + M_VARIANTS ARM1136JS + + + + IF ARM1136JS + +;//Input Arguments + +pSrc RN 0 +pDst RN 1 +step RN 2 + +;// Local variables + +x0 RN 3 +x1 RN 4 +x2 RN 5 +x3 RN 6 + +Count RN 14 + + + M_START armVCM4P2_Clip8,r6 + + + MOV Count,#8 +loop + + LDMIA pSrc!,{x0, x1} + SUBS Count,Count, #1 ;// count down + LDMIA pSrc!,{x2, x3} + USAT16 x0, #8, x0 ;// clip two samples to [0,255] + USAT16 x1, #8, x1 ;// clip two samples to [0,255] + STRB x0, [pDst] + MOV x0, x0, LSR #16 + STRB x0, [pDst,#1] + STRB x1, [pDst,#2] + MOV x1, x1, LSR #16 + STRB x1, [pDst,#3] + + USAT16 x2, #8, x2 ;// clip two samples to [0,255] + USAT16 x3, #8, x3 ;// clip two samples to [0,255] + STRB x2, [pDst,#4] + MOV x2, x2, LSR #16 + STRB x2, [pDst,#5] + STRB x3, [pDst,#6] + MOV x3, x3, LSR #16 + STRB x3, [pDst,#7] + ADD pDst,pDst,step ;// Increment pDst by step value + + BGT loop ;// Continue loop until Count reaches 64 + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s new file mode 100644 index 0000000..9e30900 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s @@ -0,0 +1,398 @@ +;/** +; * +; * File Name: armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for zigzag scanning and VLC decoding +; * for inter, intra block. +; * +; * +; * +; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe +; * +; * Description: +; * Performs VLC decoding and inverse zigzag scan +; * +; * +; * +; * +; */ + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS + + + + + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pDst RN 2 +shortVideoHeader RN 3 + + +;//Local Variables + +Return RN 0 + +pVlcTableL0L1 RN 4 +pLMAXTableL0L1 RN 4 +pRMAXTableL0L1 RN 4 +pZigzagTable RN 4 + +ftype RN 0 +temp3 RN 4 +temp RN 5 +Count RN 6 +Escape RN 5 + +;// armVCM4P2_FillVLDBuffer +zigzag RN 0 +storeLevel RN 1 +temp2 RN 4 +temp1 RN 5 +sign RN 5 +Last RN 7 +storeRun RN 14 + + +packRetIndex RN 5 + + +markerbit RN 5 + +;// Scratch Registers + +RBitStream RN 8 +RBitBuffer RN 9 +RBitCount RN 10 + +T1 RN 11 +T2 RN 12 +LR RN 14 + + + + M_ALLOC4 pppBitStream,4 + M_ALLOC4 ppOffset,4 + M_ALLOC4 pLinkRegister,4 + + M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe + + ;// get the table addresses from stack + M_ARG ppVlcTableL0L1,4 + M_ARG ppLMAXTableL0L1,4 + M_ARG ppRMAXTableL0L1,4 + M_ARG ppZigzagTable,4 + + ;// Store ALL zeros at pDst + + MOV temp1,#0 ;// Initialize Count to zero + MOV Last,#0 + M_STR LR,pLinkRegister ;// Store Link Register on Stack + MOV temp2,#0 + MOV LR,#0 + + ;// Initialize the Macro and Store all zeros to pDst + + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT1 T1, T2, T2 + STM pDst!,{temp2,temp1,Last,LR} + M_BD_INIT2 T1, T2, T2 + STM pDst!,{temp2,temp1,Last,LR} + M_STR ppBitStream,pppBitStream ;// Store ppBitstream on stack + STM pDst!,{temp2,temp1,Last,LR} + M_STR pBitOffset,ppOffset ;// Store pBitOffset on stack + STM pDst!,{temp2,temp1,Last,LR} + + STM pDst!,{temp2,temp1,Last,LR} + STM pDst!,{temp2,temp1,Last,LR} + + + SUB pDst,pDst,#128 ;// Restore pDst + + ;// The armVCM4P2_GetVLCBits begins + +getVLCbits + + M_BD_LOOK8 Escape,7 ;// Load Escape Value + LSR Escape,Escape,#25 + CMP Escape,#3 ;// check for escape mode + MOVNE ftype,#0 + BNE notEscapemode ;// Branch if not in Escape mode 3 + + M_BD_VSKIP8 #7,T1 + CMP shortVideoHeader,#0 ;// Check shortVideoHeader flag to know the type of Escape mode + BEQ endFillVLD + + ;// Escape Mode 4 + + M_BD_READ8 Last,1,T1 + M_BD_READ8 storeRun,6,T1 + M_BD_READ8 storeLevel,8,T1 + + + ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so + + TEQ storeLevel,#0 + TEQNE storeLevel,#128 + BEQ ExitError + + ADD temp2,storeRun,Count + CMP temp2,#64 + BGE ExitError ;// error if Count+storeRun >= 64 + + + ;// Load address of zigzagTable + + M_LDR pZigzagTable,ppZigzagTable ;// Loading the Address of Zigzag table + + + ;// armVCM4P2_FillVLDBuffer + + SXTB storeLevel,storeLevel ;// Sign Extend storeLevel to 32 bits + + + ;// To Reflect Runlength + + ADD Count,Count,storeRun + LDRB zigzag,[pZigzagTable,Count] + ADD Count,Count,#1 + STRH storeLevel,[pDst,zigzag] ;// store Level + + B ExitOk + + + +endFillVLD + + + ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream + + M_BD_READ8 temp1,1,T1 + CMP temp1,#0 + MOVEQ ftype,#1 + BEQ notEscapemode + M_BD_READ8 temp1,1,T1 + CMP temp1,#1 + MOVEQ ftype,#3 + MOVNE ftype,#2 + + +notEscapemode + + ;// Load optimized packed VLC table with last=0 and Last=1 + + M_LDR pVlcTableL0L1,ppVlcTableL0L1 ;// Load Combined VLC Table + + + CMP ftype,#3 ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3) + BGE EscapeMode3 ;// Else continue normal VLC Decoding + + ;// Variable lengh decoding, "armUnPackVLC32" + + + M_BD_VLD packRetIndex,T1,T2,pVlcTableL0L1,4,2 + + + LDR temp3,=0xFFF + + CMP packRetIndex,temp3 ;// Check for invalid symbol + BEQ ExitError ;// if invalid symbol occurs exit with an error message + + AND Last,packRetIndex,#2 ;// Get Last from packed Index + + + + + LSR storeRun,packRetIndex,#7 ;// Get Run Value from Packed index + AND storeLevel,packRetIndex,#0x7c ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0 + + + M_LDR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Load LMAX table + + + LSR storeLevel,storeLevel,#2 ;// Level value + + CMP ftype,#1 + BNE ftype2 + + ;// ftype==1; Escape mode =1 + + + ADD temp1, pLMAXTableL0L1, Last, LSL#4 ;// If the Last=1 add 32 to table address + LDRB temp1,[temp1,storeRun] + + + ADD storeLevel,temp1,storeLevel + +ftype2 + + ;// ftype =2; Escape mode =2 + + M_LDR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Load RMAX Table + + CMP ftype,#2 + BNE FillVLDL1 + + ADD temp1, pRMAXTableL0L1, Last, LSL#4 ;// If Last=1 add 32 to table address + SUB temp2,storeLevel,#1 + LDRB temp1,[temp1,temp2] + + + ADD storeRun,storeRun,#1 + ADD storeRun,temp1 + +FillVLDL1 + + + ;// armVCM4P2_FillVLDBuffer + + M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable + + M_BD_READ8 sign,1,T1 + + CMP sign,#1 + RSBEQ storeLevel,storeLevel,#0 + + ADD temp1,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63 + CMP temp1,#64 + BGE ExitError + + + + + + + ;// To Reflect Runlenght + + ADD Count,Count,storeRun + +storeLevelL1 + + LDRB zigzag,[pZigzagTable,Count] + CMP Last,#2 ;// Check if the Level val is Last non zero val + ADD Count,Count,#1 + LSR Last,Last,#1 + STRH storeLevel,[pDst,zigzag] + + BNE end + + B ExitOk + + + + ;// Fixed Lengh Decoding Escape Mode 3 + +EscapeMode3 + + M_BD_READ8 Last,1,T1 + M_BD_READ8 storeRun,6,T1 + + ADD temp2,storeRun,Count ;// Exit with an error message if Run + Count exceeds 63 + CMP temp2,#64 + BGE ExitError + + M_BD_READ8 markerbit,1,T1 + TEQ markerbit,#0 ;// Exit with an error message if marker bit is zero + BEQ ExitError + + M_BD_READ16 storeLevel,12,T1 + + TST storeLevel,#0x800 ;// test if the level is negative + SUBNE storeLevel,storeLevel,#4096 + CMP storeLevel,#0 + CMPNE storeLevel,#-2048 + BEQ ExitError ;// Exit with an error message if Level==0 or -2048 + + M_LDR pZigzagTable,ppZigzagTable ;// Load address of zigzagTable + + M_BD_READ8 markerbit,1,T1 + + + ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed) + + + + ;// To Reflect Run Length + + ADD Count,Count,storeRun + + + +storeLevelLast + + LDRB zigzag,[pZigzagTable,Count] + CMP Last,#1 + ADD Count,Count,#1 + STRH storeLevel,[pDst,zigzag] + + BNE end + + B ExitOk + +end + + CMP Count,#64 ;//Run the Loop untill Count reaches 64 + + BLT getVLCbits + + +ExitOk + ;// Exit When VLC Decoding is done Successfully + + ;// Loading ppBitStream and pBitOffset from stack + + CMP Last,#1 + M_LDR ppBitStream,pppBitStream + M_LDR pBitOffset,ppOffset + + ;//Ending the macro + + M_BD_FINI ppBitStream,pBitOffset + + MOVEQ Return,#OMX_Sts_NoErr + MOVNE Return,#OMX_Sts_Err + M_LDR LR,pLinkRegister ;// Load the Link Register Back + B exit2 + +ExitError + ;// Exit When an Error occurs + + M_LDR ppBitStream,pppBitStream + M_LDR pBitOffset,ppOffset + ;//Ending the macro + + M_BD_FINI ppBitStream,pBitOffset + M_LDR LR,pLinkRegister + MOV Return,#OMX_Sts_Err + +exit2 + + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c new file mode 100644 index 0000000..ba4d058 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c @@ -0,0 +1,211 @@ + /** + * + * File Name: armVCM4P2_Huff_Tables_VLC.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_Huff_Tables_VLC.c + * Description: Contains all the Huffman tables used in MPEG4 codec + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + +#include "armCOMM_Bitstream.h" + + + + +// Contains optimized and Packed VLC tables with Last=0 and Last=1 + +// optimized Packed VLC table Entry Format +// --------------------------------------- +// +// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +// +------------------------------------------------+ +// | Len | Run | Level |L | 1 | +// +------------------------------------------------+ +// | Offset | 0 | +// +------------------------------------------------+ +// If the table entry is a leaf entry then bit 0 set: +// Len = Number of bits overread (0 to 7) 3 bits +// Run = RunLength of the Symbol (0 to 63) 6 bits +// Level = Level of the Symbol (0 to 31) 5 bits +// L = Last Value of the Symbol (0 or 1) 1 bit +// +// If the table entry is an internal node then bit 0 is clear: +// Offset = Number of (16-bit) half words from the table +// start to the next table node +// +// The table is accessed by successive lookup up on the +// next Step bits of the input bitstream until a leaf node +// is obtained. The Step sizes are supplied to the VLD macro. + +// The VLC tables used for Intra and non inta coefficients in non Escape mode +// contains symbols with both Last=0 and Last=1. +// If a symbol is not found in the table it will be coded as 0xFFF + + +const OMX_U16 armVCM4P2_InterVlcL0L1[200] = { + 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09, + 0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011, + 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058, + 0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d, + 0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d, + 0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121, + 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088, + 0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809, + 0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d, + 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519, + 0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d, + 0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8, + 0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d, + 0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d, + 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015, + 0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309, + 0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140, + 0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09, + 0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031, + 0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09, + 0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168, + 0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d, + 0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021, + 0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609, + 0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309 +}; + + +const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = { + 0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09, + 0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019, + 0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058, + 0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035, + 0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09, + 0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099, + 0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088, + 0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911, + 0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d, + 0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149, + 0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615, + 0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8, + 0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d, + 0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025, + 0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09, + 0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319, + 0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140, + 0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081, + 0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069, + 0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311, + 0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168, + 0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d, + 0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049, + 0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039, + 0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021 +}; + +const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = { + 0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001, + 0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003, + 0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011, + 0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019, + + 0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005, + 0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001, + 0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f, + 0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017 +}; + + +const OMX_U16 armVCM4P2_aVlcMVD[124] = { + 0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041, + 0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0, + 0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040, + 0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005, + 0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068, + 0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b, + 0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f, + 0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017, + 0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b, + 0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023, + 0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8, + 0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d, + 0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031, + 0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039, + 0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037, + 0x2045, 0x2045, 0x203d, 0x203d +}; + +/* LMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively + +*/ +const OMX_U8 armVCM4P2_InterL0L1LMAX[73] = +{ + 12, 6, 4, 3, 3, 3, 3, 2, + 2, 2, 2, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0, + 3, 2, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1 +}; + +/* RMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */ + + +const OMX_U8 armVCM4P2_InterL0L1RMAX[35] = +{ + 26, 10, 6, 2, 1, 1, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + 0, 0, 0, 0, 40, 1, 0 +}; + +/* LMAX table for non Intra (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively + +*/ +const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] = +{ + 27, 10, 5, 4, 3, 3, 3, + 3, 2, 2, 1, 1, 1, 1, 1, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + + 8, 3, 2, 2, 2, 2, 2, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 +}; + + +/* RMAX table for non Inter (Last == 0 and Last=1) + Level - 1 Indexed + padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */ + + +const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] = +{ + 14, 9, 7, 3, 2, 1, 1, + 1, 1, 1, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, + + 20, 6, 1, 0, 0, 0, 0, 0 + +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c new file mode 100644 index 0000000..25cf8db --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c @@ -0,0 +1,75 @@ + /** + * + * File Name: armVCM4P2_Lookup_Tables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_Lookup_Tables.c + * Description: Contains all the Lookup tables used in MPEG4 codec + * + */ + +#include "omxtypes.h" +#include "armOMX.h" + + /* * Table Entries contain Dc Scaler values + * armVCM4P2_DCScaler[i]= 8 for i=1 to 4 and i=33 to 36 + * = 2*i for i=5 to 8 + * = i+8 for i=9 to 25 + * = 2*i-16 for i=26 to 31 + * = (i-32+13)/2 for i=37 to 59 + * = i-6-32 for i=60 to 63 + * = 255 for i=0 and i=32 + */ + +const OMX_U8 armVCM4P2_DCScaler[64]={ + 0xff, 0x8, 0x8, 0x8, 0x8, 0xa, 0xc, 0xe, + 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, + 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, + 0xff, 0x8, 0x8, 0x8, 0x8, 0x9, 0x9, 0xa, + 0xa, 0xb, 0xb, 0xc, 0xc, 0xd, 0xd, 0xe, + 0xe, 0xf, 0xf, 0x10, 0x10, 0x11, 0x11, 0x12, + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, + +}; + + + /* Table Entries Contain reciprocal of 1 to 63 + * armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i) + * armVCM4P2_Reciprocal_QP_S16[0]= 0 + */ + +const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={ + 0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249, + 0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888, + 0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591, + 0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421, + 0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348, + 0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9, + 0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254, + 0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208 + +}; + + /* Table Entries Contain reciprocal of 1 to 63 + * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i) + * armVCM4P2_Reciprocal_QP_S32[0]= 0 + */ + +const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={ + 0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924, + 0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222, + 0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643, + 0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084, + 0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21, + 0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5, + 0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f, + 0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820 + +}; diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s new file mode 100644 index 0000000..3f92d85 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s @@ -0,0 +1,104 @@ +;// +;// +;// File Name: armVCM4P2_SetPredDir_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +; ** +; * Function: armVCM4P2_SetPredDir +; * +; * Description: +; * Performs detecting the prediction direction +; * +; * Remarks: +; * +; * Parameters: +; * [in] blockIndex block index indicating the component type and +; * position as defined in subclause 6.1.3.8, of ISO/IEC +; * 14496-2. Furthermore, indexes 6 to 9 indicate the +; * alpha blocks spatially corresponding to luminance +; * blocks 0 to 3 in the same macroblock. +; * [in] pCoefBufRow pointer to the coefficient row buffer +; * [in] pQpBuf pointer to the quantization parameter buffer +; * [out]predQP quantization parameter of the predictor block +; * [out]predDir indicates the prediction direction which takes one +; * of the following values: +; * OMX_VC_HORIZONTAL predict horizontally +; * OMX_VC_VERTICAL predict vertically +; * +; * Return Value: +; * Standard OMXResult result. See enumeration for possible result codes. +; * +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE omxVC_s.h + + + M_VARIANTS ARM1136JS + + + IF ARM1136JS + +;// Input Arguments +BlockIndex RN 0 +pCoefBufRow RN 1 +pCoefBufCol RN 2 +predDir RN 3 +predQP RN 4 +pQpBuf RN 5 + +;// Local Variables + +Return RN 0 +blockDCLeft RN 6 +blockDCTop RN 7 +blockDCTopLeft RN 8 +temp1 RN 9 +temp2 RN 14 + + M_START armVCM4P2_SetPredDir,r9 + + M_ARG ppredQP,4 + M_ARG ppQpBuf,4 + + LDRH blockDCTopLeft,[pCoefBufRow,#-16] + LDRH blockDCLeft,[pCoefBufCol] + + TEQ BlockIndex,#3 + LDREQH blockDCTop,[pCoefBufCol,#-16] + LDRNEH blockDCTop,[pCoefBufRow] + + SUBS temp1,blockDCLeft,blockDCTopLeft + RSBLT temp1,temp1,#0 + SUBS temp2,blockDCTopLeft,blockDCTop + RSBLT temp2,temp2,#0 + + M_LDR pQpBuf,ppQpBuf + M_LDR predQP,ppredQP + CMP temp1,temp2 + MOV temp2,#OMX_VC_VERTICAL + LDRLTB temp1,[pQpBuf,#1] + STRLT temp2,[predDir] + STRLT temp1,[predQP] + MOV temp2,#OMX_VC_HORIZONTAL + LDRGEB temp1,[pQpBuf] + STRGE temp2,[predDir] + MOV Return,#OMX_Sts_NoErr + STRGE temp1,[predQP] + + + + M_END + + ENDIF + + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c new file mode 100644 index 0000000..ed17f9b --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c @@ -0,0 +1,61 @@ +/** + * + * File Name: armVCM4P2_Zigzag_Tables.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armVCM4P2_ZigZag_Tables.c + * Description: Contains the zigzag tables + * + */ + +#include "omxtypes.h" + +/* Contains Double the values in the reference Zigzag Table + * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array + */ + +const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] = +{ + 0, 2, 16, 32, 18, 4, 6, 20, + 34, 48, 64, 50, 36, 22, 8, 10, + 24, 38, 52, 66, 80, 96, 82, 68, + 54, 40, 26, 12, 14, 28, 42, 56, + 70, 84, 98, 112, 114, 100, 86, 72, + 58, 44, 30, 46, 60, 74, 88, 102, + 116, 118, 104, 90, 76, 62, 78, 92, + 106, 120, 122, 104, 94, 110, 124, 126, + + 0, 16, 32, 48, 2, 18, 4, 20, + 34, 50, 64, 80, 96, 112, 114, 98, + 82, 66, 52, 36, 6, 22, 8, 24, + 38, 54, 68, 84, 100, 116, 70, 86, + 102, 118, 40, 56, 10, 26, 12, 28, + 42, 58, 72, 88, 104, 120, 74, 90, + 106, 122, 44, 60, 14, 30, 46, 62, + 76, 92, 108, 124, 78, 94, 110, 126, + + 0, 2, 4, 6, 16, 18, 32, 34, + 20, 22, 8, 10, 12, 14, 30, 28, + 26, 24, 38, 36, 48, 50, 64, 66, + 52, 54, 40, 42, 44, 46, 56, 58, + 60, 62, 68, 70, 80, 82, 96, 98, + 84, 86, 72, 74, 76, 78, 88, 90, + 92, 94, 100, 102, 112, 114, 116, 118, + 104, 106, 108, 110, 120, 122, 124, 126 + + +}; + + + + + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c new file mode 100644 index 0000000..b63d295 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c @@ -0,0 +1,102 @@ +/** + * + * File Name: omxVCM4P2_DecodeBlockCoef_Inter.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for inter reconstruction + * + */ + + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" + + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Inter + * + * Description: + * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag + * positioning and IDCT, with appropriate clipping on each step, are performed + * on the coefficients. The results (residuals) are placed in a contiguous array + * of 64 elements. For INTER block, the output buffer holds the residuals for + * further reconstruction. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream buffer. There is no boundary + * check for the bit stream buffer. + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7] + * [in] QP quantization parameter + * [in] shortVideoHeader a flag indicating presence of short_video_header; + * shortVideoHeader==1 indicates using quantization method defined in short + * video header mode, and shortVideoHeader==0 indicates normail quantization method. + * [out] ppBitStream *ppBitStream is updated after the block is decoded, so that it points to the + * current byte in the bit stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the current bit position in the + * byte pointed by *ppBitStream + * [out] pDst pointer to the decoded residual buffer (a contiguous array of 64 elements of + * OMX_S16 data type). Must be 16-byte aligned. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst + * - At least one of the below case: + * - *pBitOffset exceeds [0,7], QP <= 0; + * - pDst not 16-byte aligned + * OMX_Sts_Err - status error + * + */ +OMXResult omxVCM4P2_DecodeBlockCoef_Inter( + const OMX_U8 ** ppBitStream, + OMX_INT * pBitOffset, + OMX_S16 * pDst, + OMX_INT QP, + OMX_INT shortVideoHeader +) +{ + /* 64 elements are needed but to align it to 16 bytes need + 15 more elements of padding */ + OMX_S16 tempBuf[79]; + OMX_S16 *pTempBuf1; + OMXResult errorCode; + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf); + + + /* VLD and zigzag */ + errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, + pTempBuf1,shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Dequantization */ + errorCode = omxVCM4P2_QuantInvInter_I( + pTempBuf1, + QP); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Inverse transform */ + errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c new file mode 100644 index 0000000..c609a60 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c @@ -0,0 +1,208 @@ +/** + * + * File Name: omxVCM4P2_DecodeBlockCoef_Intra.c + * OpenMAX DL: v1.0.2 + * Revision: 9641 + * Date: Thursday, February 7, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Description: + * Contains modules for intra reconstruction + * + */ + +#include "omxtypes.h" +#include "armOMX.h" +#include "omxVC.h" + +#include "armCOMM.h" +#include "armVC.h" + +/** + * Function: omxVCM4P2_DecodeBlockCoef_Intra + * + * Description: + * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag + * positioning, and IDCT, with appropriate clipping on each step, are performed + * on the coefficients. The results are then placed in the output frame/plane on + * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and + * written to corresponding block buffer within the destination plane. + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte in + * the bit stream buffer. There is no boundary + * check for the bit stream buffer. + * [in] pBitOffset pointer to the bit position in the byte pointed + * to by *ppBitStream. *pBitOffset is valid within + * [0-7]. + * [in] step width of the destination plane + * [in/out] pCoefBufRow [in] pointer to the coefficient row buffer + * [out] updated coefficient rwo buffer + * [in/out] pCoefBufCol [in] pointer to the coefficient column buffer + * [out] updated coefficient column buffer + * [in] curQP quantization parameter of the macroblock which + * the current block belongs to + * [in] pQpBuf Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to + * the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the + * current block(QPc). + * Note, in case the corresponding block is out of VOP bound, the QP value will have + * no effect to the intra-prediction process. Refer to subclause "7.4.3.3 Adaptive + * ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description. + * [in] blockIndex block index indicating the component type and + * position as defined in subclause 6.1.3.8, + * Figure 6-5 of ISO/IEC 14496-2. + * [in] intraDCVLC a code determined by intra_dc_vlc_thr and QP. + * This allows a mechanism to switch between two VLC + * for coding of Intra DC coefficients as per Table + * 6-21 of ISO/IEC 14496-2. + * [in] ACPredFlag a flag equal to ac_pred_flag (of luminance) indicating + * if the ac coefficients of the first row or first + * column are differentially coded for intra coded + * macroblock. + * [in] shortVideoHeader a flag indicating presence of short_video_header; + * shortVideoHeader==1 selects linear intra DC mode, + * and shortVideoHeader==0 selects nonlinear intra DC mode. + * [out] ppBitStream *ppBitStream is updated after the block is + * decoded, so that it points to the current byte + * in the bit stream buffer + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream + * [out] pDst pointer to the block in the destination plane. + * pDst should be 16-byte aligned. + * [out] pCoefBufRow pointer to the updated coefficient row buffer. + * + * Return Value: + * OMX_Sts_NoErr - no error + * OMX_Sts_BadArgErr - bad arguments + * - At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, + * pCoefBufRow, pCoefBufCol, pQPBuf, pDst. + * or + * - At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31), + * blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while + * blockIndex greater than 5. + * or + * - pDst is not 16-byte aligned + * OMX_Sts_Err - status error + * + */ + +OMXResult omxVCM4P2_DecodeBlockCoef_Intra( + const OMX_U8 ** ppBitStream, + OMX_INT *pBitOffset, + OMX_U8 *pDst, + OMX_INT step, + OMX_S16 *pCoefBufRow, + OMX_S16 *pCoefBufCol, + OMX_U8 curQP, + const OMX_U8 *pQPBuf, + OMX_INT blockIndex, + OMX_INT intraDCVLC, + OMX_INT ACPredFlag, + OMX_INT shortVideoHeader + ) +{ + OMX_S16 tempBuf1[79], tempBuf2[79]; + OMX_S16 *pTempBuf1, *pTempBuf2; + OMX_INT predDir, predACDir; + OMX_INT predQP; + OMXVCM4P2VideoComponent videoComp; + OMXResult errorCode; + + + /* Aligning the local buffers */ + pTempBuf1 = armAlignTo16Bytes(tempBuf1); + pTempBuf2 = armAlignTo16Bytes(tempBuf2); + + /* Setting the AC prediction direction and prediction direction */ + armVCM4P2_SetPredDir( + blockIndex, + pCoefBufRow, + pCoefBufCol, + &predDir, + &predQP, + pQPBuf); + + predACDir = predDir; + + + if (ACPredFlag == 0) + { + predACDir = OMX_VC_NONE; + } + + /* Setting the videoComp */ + if (blockIndex <= 3) + { + videoComp = OMX_VC_LUMINANCE; + } + else + { + videoComp = OMX_VC_CHROMINANCE; + } + + + /* VLD and zigzag */ + if (intraDCVLC == 1) + { + errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC( + ppBitStream, + pBitOffset, + pTempBuf1, + predACDir, + shortVideoHeader, + videoComp); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + } + else + { + errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC( + ppBitStream, + pBitOffset, + pTempBuf1, + predACDir, + shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + } + + /* AC DC prediction */ + errorCode = omxVCM4P2_PredictReconCoefIntra( + pTempBuf1, + pCoefBufRow, + pCoefBufCol, + curQP, + predQP, + predDir, + ACPredFlag, + videoComp); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Dequantization */ + errorCode = omxVCM4P2_QuantInvIntra_I( + pTempBuf1, + curQP, + videoComp, + shortVideoHeader); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Inverse transform */ + errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2); + armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode); + + /* Placing the linear array into the destination plane and clipping + it to 0 to 255 */ + + armVCM4P2_Clip8(pTempBuf2,pDst,step); + + + return OMX_Sts_NoErr; +} + +/* End of file */ + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s new file mode 100644 index 0000000..a1861da --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s @@ -0,0 +1,364 @@ +; ********** +; * +; * File Name: omxVCM4P2_DecodePadMV_PVOP_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; ** +; * Function: omxVCM4P2_DecodePadMV_PVOP +; * +; * Description: +; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP. +; * The motion vector padding process is specified in subclause 7.6.1.6 of +; * ISO/IEC 14496-2. +; * +; * Remarks: +; * +; * +; * Parameters: +; * [in] ppBitStream pointer to the pointer to the current byte in +; * the bit stream buffer +; * [in] pBitOffset pointer to the bit position in the byte pointed +; * to by *ppBitStream. *pBitOffset is valid within +; * [0-7]. +; * [in] pSrcMVLeftMB pointers to the motion vector buffers of the +; * macroblocks specially at the left side of the current macroblock +; * respectively. +; * [in] pSrcMVUpperMB pointers to the motion vector buffers of the +; * macroblocks specially at the upper side of the current macroblock +; * respectively. +; * [in] pSrcMVUpperRightMB pointers to the motion vector buffers of the +; * macroblocks specially at the upper-right side of the current macroblock +; * respectively. +; * [in] fcodeForward a code equal to vop_fcode_forward in MPEG-4 +; * bit stream syntax +; * [in] MBType the type of the current macroblock. If MBType +; * is not equal to OMX_VC_INTER4V, the destination +; * motion vector buffer is still filled with the +; * same decoded vector. +; * [out] ppBitStream *ppBitStream is updated after the block is decoded, +; * so that it points to the current byte in the bit +; * stream buffer +; * [out] pBitOffset *pBitOffset is updated so that it points to the +; * current bit position in the byte pointed by +; * *ppBitStream +; * [out] pDstMVCurMB pointer to the motion vector buffer of the current +; * macroblock which contains four decoded motion vectors +; * +; * Return Value: +; * OMX_Sts_NoErr -no error +; * +; * +; * OMX_Sts_Err - status error +; * +; * + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + INCLUDE omxVC_s.h + + M_VARIANTS ARM1136JS + + + + + IF ARM1136JS + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pSrcMVLeftMB RN 2 +pSrcMVUpperMB RN 3 +pSrcMVUpperRightMB RN 4 +pDstMVCurMB RN 5 +fcodeForward RN 6 +MBType RN 7 + +;//Local Variables + +zero RN 4 +one RN 4 +scaleFactor RN 1 + + +Return RN 0 + +VlcMVD RN 0 +index RN 4 +Count RN 7 + +mvHorData RN 4 +mvHorResidual RN 0 + +mvVerData RN 4 +mvVerResidual RN 0 + +temp RN 1 + +temp1 RN 3 +High RN 4 +Low RN 2 +Range RN 1 + +BlkCount RN 14 + +diffMVdx RN 0 +diffMVdy RN 1 + +;// Scratch Registers + +RBitStream RN 8 +RBitCount RN 9 +RBitBuffer RN 10 + +T1 RN 11 +T2 RN 12 +LR RN 14 + + IMPORT armVCM4P2_aVlcMVD + IMPORT omxVCM4P2_FindMVpred + + ;// Allocate stack memory + + M_ALLOC4 ppDstMVCurMB,4 + M_ALLOC4 pDstMVPredME,4 + M_ALLOC4 pBlkCount,4 + + M_ALLOC4 pppBitStream,4 + M_ALLOC4 ppBitOffset,4 + M_ALLOC4 ppSrcMVLeftMB,4 + M_ALLOC4 ppSrcMVUpperMB,4 + + M_ALLOC4 pdiffMVdx,4 + M_ALLOC4 pdiffMVdy,4 + M_ALLOC4 pHigh,4 + + + + + M_START omxVCM4P2_DecodePadMV_PVOP,r11 + + M_ARG pSrcMVUpperRightMBonStack,4 ;// pointer to pSrcMVUpperRightMB on stack + M_ARG pDstMVCurMBonStack,4 ;// pointer to pDstMVCurMB on stack + M_ARG fcodeForwardonStack,4 ;// pointer to fcodeForward on stack + M_ARG MBTypeonStack,4 ;// pointer to MBType on stack + + + + + + ;// Initializing the BitStream Macro + + M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount + M_LDR MBType,MBTypeonStack ;// Load MBType from stack + M_LDR pDstMVCurMB,pDstMVCurMBonStack ;// Load pDstMVCurMB from stack + MOV zero,#0 + + TEQ MBType,#OMX_VC_INTRA ;// Check if MBType=OMX_VC_INTRA + TEQNE MBType,#OMX_VC_INTRA_Q ;// check if MBType=OMX_VC_INTRA_Q + STREQ zero,[pDstMVCurMB] + M_BD_INIT1 T1, T2, T2 + STREQ zero,[pDstMVCurMB,#4] + M_BD_INIT2 T1, T2, T2 + STREQ zero,[pDstMVCurMB,#4] + MOVEQ Return,#OMX_Sts_NoErr + MOV BlkCount,#0 + STREQ zero,[pDstMVCurMB,#4] + + BEQ ExitOK + + TEQ MBType,#OMX_VC_INTER4V ;// Check if MBType=OMX_VC_INTER4V + TEQNE MBType,#OMX_VC_INTER4V_Q ;// Check if MBType=OMX_VC_INTER4V_Q + MOVEQ Count,#4 + + TEQ MBType,#OMX_VC_INTER ;// Check if MBType=OMX_VC_INTER + TEQNE MBType,#OMX_VC_INTER_Q ;// Check if MBType=OMX_VC_INTER_Q + MOVEQ Count,#1 + + M_LDR fcodeForward,fcodeForwardonStack ;// Load fcodeForward from stack + + ;// Storing the values temporarily on stack + + M_STR ppBitStream,pppBitStream + M_STR pBitOffset,ppBitOffset + + + SUB temp,fcodeForward,#1 ;// temp=fcodeForward-1 + MOV one,#1 + M_STR pSrcMVLeftMB,ppSrcMVLeftMB + LSL scaleFactor,one,temp ;// scaleFactor=1<<(fcodeForward-1) + M_STR pSrcMVUpperMB,ppSrcMVUpperMB + LSL scaleFactor,scaleFactor,#5 + M_STR scaleFactor,pHigh ;// [pHigh]=32*scaleFactor + + ;// VLD Decoding + + +Loop + + LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Load the optimized MVD VLC table + + ;// Horizontal Data and Residual calculation + + LDR temp,=0xFFF + M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// variable lenght decoding using the macro + + TEQ index,temp + BEQ ExitError ;// Exit with an Error Message if the decoded symbol is an invalied symbol + + SUB mvHorData,index,#32 ;// mvHorData=index-32 + MOV mvHorResidual,#1 ;// mvHorResidual=1 + CMP fcodeForward,#1 + TEQNE mvHorData,#0 + MOVEQ diffMVdx,mvHorData ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData + BEQ VerticalData + + SUB temp,fcodeForward,#1 + M_BD_VREAD8 mvHorResidual,temp,T1,T2 ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0 + + CMP mvHorData,#0 + RSBLT mvHorData,mvHorData,#0 ;// mvHorData=abs(mvHorData) + SUB mvHorResidual,mvHorResidual,fcodeForward + SMLABB diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward + ADD diffMVdx,diffMVdx,#1 + RSBLT diffMVdx,diffMVdx,#0 + + ;// Vertical Data and Residual calculation + +VerticalData + + M_STR diffMVdx,pdiffMVdx ;// Store the diffMVdx on stack + LDR VlcMVD, =armVCM4P2_aVlcMVD ;// Loading the address of optimized VLC tables + + LDR temp,=0xFFF + M_BD_VLD index,T1,T2,VlcMVD,3,2 ;// VLC decoding using the macro + + TEQ index,temp + BEQ ExitError ;// Exit with an Error Message if an Invalied Symbol occurs + + SUB mvVerData,index,#32 ;// mvVerData=index-32 + MOV mvVerResidual,#1 + CMP fcodeForward,#1 + TEQNE mvVerData,#0 + MOVEQ diffMVdy,mvVerData ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0 + BEQ FindMVPred + + SUB temp,fcodeForward,#1 + M_BD_VREAD8 mvVerResidual,temp,T1,T2 ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0 + + + CMP mvVerData,#0 + RSBLT mvVerData,mvVerData,#0 + SUB mvVerResidual,mvVerResidual,fcodeForward + SMLABB diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward + ADD diffMVdy,diffMVdy,#1 + RSBLT diffMVdy,diffMVdy,#0 + + ;//Calling the Function omxVCM4P2_FindMVpred + +FindMVPred + + M_STR diffMVdy,pdiffMVdy + ADD temp,pDstMVCurMB,BlkCount,LSL #2 ;// temp=pDstMVCurMB[BlkCount] + M_STR temp,ppDstMVCurMB ;// store temp on stack for passing as an argument to FindMVPred + + MOV temp,#0 + M_STR temp,pDstMVPredME ;// Pass pDstMVPredME=NULL as an argument + M_STR BlkCount,pBlkCount ;// Passs BlkCount as Argument through stack + + MOV temp,pSrcMVLeftMB ;// temp (RN 1)=pSrcMVLeftMB + M_LDR pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack + MOV pSrcMVLeftMB,pSrcMVUpperMB ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB + MOV ppBitStream,pDstMVCurMB ;// ppBitStream ( RN 0) = pDstMVCurMB + MOV pSrcMVUpperMB,pSrcMVUpperRightMB ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB + BL omxVCM4P2_FindMVpred ;// Branch to subroutine omxVCM4P2_FindMVpred + + ;// Store Horizontal Motion Vector + + M_LDR BlkCount,pBlkCount ;// Load BlkCount from stack + M_LDR High,pHigh ;// High=32*scaleFactor + LSL temp1,BlkCount,#2 ;// temp=BlkCount*4 + M_LDR diffMVdx,pdiffMVdx ;// Laad diffMVdx + + LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount] + + + RSB Low,High,#0 ;// Low = -32*scaleFactor + ADD diffMVdx,temp,diffMVdx ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx + ADD Range,High,High ;// Range=64*ScaleFactor + SUB High,High,#1 ;// High= 32*scaleFactor-1 + + CMP diffMVdx,Low ;// If diffMVdx<Low + ADDLT diffMVdx,diffMVdx,Range ;// diffMVdx+=Range + + CMP diffMVdx,High + SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdx > High diffMVdx-=Range + STRH diffMVdx,[pDstMVCurMB,temp1] + + ;// Store Vertical + + ADD temp1,temp1,#2 ;// temp1=4*BlkCount+2 + M_LDR diffMVdx,pdiffMVdy ;// Laad diffMVdy + LDRSH temp,[pDstMVCurMB,temp1] ;// temp=pDstMVCurMB[BlkCount].diffMVdy + ADD BlkCount,BlkCount,#1 ;// BlkCount=BlkCount+1 + ADD diffMVdx,temp,diffMVdx + CMP diffMVdx,Low + ADDLT diffMVdx,diffMVdx,Range ;// If diffMVdy<Low diffMVdy+=Range + CMP diffMVdx,High + SUBGT diffMVdx,diffMVdx,Range ;// If diffMVdy > High diffMVdy-=Range + STRH diffMVdx,[pDstMVCurMB,temp1] + + CMP BlkCount,Count + M_LDR pSrcMVLeftMB,ppSrcMVLeftMB + M_LDR pSrcMVUpperMB,ppSrcMVUpperMB + + BLT Loop ;// If BlkCount<Count Continue the Loop + + + ;// If MBType=OMX_VC_INTER or MBtype=OMX_VC_INTER_Q copy pDstMVCurMB[0] to + ;// pDstMVCurMB[1], pDstMVCurMB[2], pDstMVCurMB[3] + + M_LDR MBType,MBTypeonStack + + TEQ MBType,#OMX_VC_INTER + TEQNE MBType,#OMX_VC_INTER_Q + LDREQ temp,[pDstMVCurMB] + M_LDR ppBitStream,pppBitStream + STREQ temp,[pDstMVCurMB,#4] + + STREQ temp,[pDstMVCurMB,#8] + STREQ temp,[pDstMVCurMB,#12] + + + M_LDR pBitOffset,ppBitOffset + ;//Ending the macro + M_BD_FINI ppBitStream,pBitOffset ;// Finishing the Macro + + + MOV Return,#OMX_Sts_NoErr + B ExitOK + +ExitError + + M_LDR ppBitStream,pppBitStream + M_LDR pBitOffset,ppBitOffset + ;//Ending the macro + M_BD_FINI ppBitStream,pBitOffset + + MOV Return,#OMX_Sts_Err + +ExitOK + + M_END + ENDIF + END + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s new file mode 100644 index 0000000..c43b253 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s @@ -0,0 +1,132 @@ +;/** +; * +; * File Name: omxVCM4P2_DecodeVLCZigzag_Inter_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for zigzag scanning and VLC decoding +; * for inter block. +; * +; * +; * +; * Function: omxVCM4P2_DecodeVLCZigzag_Inter +; * +; * Description: +; * Performs VLC decoding and inverse zigzag scan for one inter coded block. +; * +; * Remarks: +; * +; * Parameters: +; * [in] ppBitStream pointer to the pointer to the current byte in +; * the bitstream buffer +; * [in] pBitOffset pointer to the bit position in the byte pointed +; * to by *ppBitStream. *pBitOffset is valid within [0-7]. +; * [in] shortVideoHeader binary flag indicating presence of short_video_header; +; * escape modes 0-3 are used if shortVideoHeader==0, +; * and escape mode 4 is used when shortVideoHeader==1. +; * [out] ppBitStream *ppBitStream is updated after the block is +; * decoded, so that it points to the current byte +; * in the bit stream buffer +; * [out] pBitOffset *pBitOffset is updated so that it points to the +; * current bit position in the byte pointed by +; * *ppBitStream +; * [out] pDst pointer to the coefficient buffer of current +; * block. Must be 16-byte aligned +; * +; * Return Value: +; * OMX_Sts_BadArgErr - bad arguments +; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or +; * -pDst is not 16-byte aligned, or +; * -*pBitOffset exceeds [0,7]. +; * OMX_Sts_Err - status error +; * -At least one mark bit is equal to zero +; * -Encountered an illegal stream code that cannot be found in the VLC table +; * -Encountered and illegal code in the VLC FLC table +; * -The number of coefficients is greater than 64 +; * +; */ + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS + + ;// Import various tables needed for the function + + + IMPORT armVCM4P2_InterVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0 + ;// Packed in Run:Level:Last format + IMPORT armVCM4P2_InterL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_InterL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values + IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe + + + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pDst RN 2 +shortVideoHeader RN 3 + +;//Local Variables + +Return RN 0 + +pVlcTableL0L1 RN 4 +pLMAXTableL0L1 RN 4 +pRMAXTableL0L1 RN 4 +pZigzagTable RN 4 +Count RN 6 + + + + ;// Allocate stack memory to store the VLC,Zigzag,LMAX and RMAX tables + + + M_ALLOC4 ppVlcTableL0L1,4 + M_ALLOC4 ppLMAXTableL0L1,4 + M_ALLOC4 ppRMAXTableL0L1,4 + M_ALLOC4 ppZigzagTable,4 + + + M_START omxVCM4P2_DecodeVLCZigzag_Inter,r12 + + + + + LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load zigzag table + M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack to pass as argument to unsafe function + LDR pVlcTableL0L1, =armVCM4P2_InterVlcL0L1 ;// Load optimized VLC table with both L=0 and L=1 entries + M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store optimized VLC table address on stack + LDR pLMAXTableL0L1, =armVCM4P2_InterL0L1LMAX ;// Load Interleaved L=0 and L=1 LMAX Tables + M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table address on stack + LDR pRMAXTableL0L1, =armVCM4P2_InterL0L1RMAX ;// Load Interleaved L=0 and L=1 RMAX Tables + MOV Count,#0 ;// set start=0 + M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// store RMAX table address on stack + + + BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// call Unsafe Function for VLC Zigzag Decoding + + + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s new file mode 100644 index 0000000..166729e --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s @@ -0,0 +1,136 @@ +;/** +; * +; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for zigzag scanning and VLC decoding +; * for inter block. +; * +; * +; * +; * Function: omxVCM4P2_DecodeVLCZigzag_Inter +; * +; * Description: +; * Performs VLC decoding and inverse zigzag scan for one intra coded block. +; * +; * Remarks: +; * +; * Parameters: +; * [in] ppBitStream pointer to the pointer to the current byte in +; * the bitstream buffer +; * [in] pBitOffset pointer to the bit position in the byte pointed +; * to by *ppBitStream. *pBitOffset is valid within [0-7]. +; * [in] shortVideoHeader binary flag indicating presence of short_video_header; +; * escape modes 0-3 are used if shortVideoHeader==0, +; * and escape mode 4 is used when shortVideoHeader==1. +; * [out] ppBitStream *ppBitStream is updated after the block is +; * decoded, so that it points to the current byte +; * in the bit stream buffer +; * [out] pBitOffset *pBitOffset is updated so that it points to the +; * current bit position in the byte pointed by +; * *ppBitStream +; * [out] pDst pointer to the coefficient buffer of current +; * block. Must be 16-byte aligned +; * +; * Return Value: +; * OMX_Sts_BadArgErr - bad arguments +; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or +; * -pDst is not 16-byte aligned, or +; * -*pBitOffset exceeds [0,7]. +; * OMX_Sts_Err - status error +; * -At least one mark bit is equal to zero +; * -Encountered an illegal stream code that cannot be found in the VLC table +; * -Encountered and illegal code in the VLC FLC table +; * -The number of coefficients is greater than 64 +; * +; */ + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS + + ;// Import various tables needed for the function + + + IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0 + ;// Packed in Run:Level:Last format + IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_aClassicalZigzagScan ;// contains classical Zigzag table entries with double the original values + IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pDst RN 2 +PredDir RN 3 +shortVideoHeader RN 3 + +;//Local Variables + +Return RN 0 + +pVlcTableL0L1 RN 4 +pLMAXTableL0L1 RN 4 +pRMAXTableL0L1 RN 4 +pZigzagTable RN 4 +Count RN 6 + + + + ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses + + M_ALLOC4 ppVlcTableL0L1,4 + M_ALLOC4 ppLMAXTableL0L1,4 + M_ALLOC4 ppRMAXTableL0L1,4 + M_ALLOC4 ppZigzagTable,4 + + + M_START omxVCM4P2_DecodeVLCZigzag_IntraACVLC,r12 + + M_ARG shortVideoHeaderonStack,4 ;// pointer to Input Argument on stack + + LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Address of the Zigzag table + ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Loading Different type of zigzag tables based on PredDir + + M_STR pZigzagTable,ppZigzagTable ;// Store Zigzag table address on stack + LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load optimized packed VLC Table with both L=0 and L=1 entries + M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store VLC Table address on stack + LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table + M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX Table address on Stack + LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table + MOV Count,#0 ;// Set Start=0 + + M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table address on stack + + + + M_LDR shortVideoHeader,shortVideoHeaderonStack ;// get the Input Argument from stack + + BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call Unsafe Function + + + + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s new file mode 100644 index 0000000..d19cb13 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s @@ -0,0 +1,224 @@ +;/** +; * +; * File Name: omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for zigzag scanning and VLC decoding +; * for inter block. +; * +; * +; * +; * Function: omxVCM4P2_DecodeVLCZigzag_Inter +; * +; * Description: +; * Performs VLC decoding and inverse zigzag scan for one intra coded block. +; * +; * Remarks: +; * +; * Parameters: +; * [in] ppBitStream pointer to the pointer to the current byte in +; * the bitstream buffer +; * [in] pBitOffset pointer to the bit position in the byte pointed +; * to by *ppBitStream. *pBitOffset is valid within [0-7]. +; * [in] shortVideoHeader binary flag indicating presence of short_video_header; +; * escape modes 0-3 are used if shortVideoHeader==0, +; * and escape mode 4 is used when shortVideoHeader==1. +; * [out] ppBitStream *ppBitStream is updated after the block is +; * decoded, so that it points to the current byte +; * in the bit stream buffer +; * [out] pBitOffset *pBitOffset is updated so that it points to the +; * current bit position in the byte pointed by +; * *ppBitStream +; * [out] pDst pointer to the coefficient buffer of current +; * block. Must be 16-byte aligned +; * +; * Return Value: +; * OMX_Sts_BadArgErr - bad arguments +; * -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or +; * -pDst is not 16-byte aligned, or +; * -*pBitOffset exceeds [0,7]. +; * OMX_Sts_Err - status error +; * -At least one mark bit is equal to zero +; * -Encountered an illegal stream code that cannot be found in the VLC table +; * -Encountered and illegal code in the VLC FLC table +; * -The number of coefficients is greater than 64 +; * +; */ + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armCOMM_BitDec_s.h + + + M_VARIANTS ARM1136JS + + + + + + IF ARM1136JS :LOR: CortexA8 + + + ;// Import various tables needed for the function + + + IMPORT armVCM4P2_IntraVlcL0L1 ;// Contains optimized and packed VLC Tables for both Last =1 and last=0 + ;// Packed in Run:Level:Last format + IMPORT armVCM4P2_IntraL0L1LMAX ;// Contains LMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_IntraL0L1RMAX ;// Contains RMAX table entries with both Last=0 and Last=1 + IMPORT armVCM4P2_aClassicalZigzagScan ;// contains CLassical, Horizontal, Vertical Zigzag table entries with double the original values + IMPORT armVCM4P2_aIntraDCLumaChromaIndex ;// Contains Optimized DCLuma and DCChroma Index table Entries + + + IMPORT armVCM4P2_DecodeVLCZigzag_AC_unsafe + +;//Input Arguments + +ppBitStream RN 0 +pBitOffset RN 1 +pDst RN 2 +PredDir RN 3 +shortVideoHeader RN 3 +videoComp RN 5 +;//Local Variables + +Return RN 0 + +pDCLumaChromaIndex RN 4 +pDCChromaIndex RN 7 +pVlcTableL0L1 RN 4 +pLMAXTableL0L1 RN 4 +pRMAXTableL0L1 RN 4 +pZigzagTable RN 4 +Count RN 6 +DCValueSize RN 6 +powOfSize RN 7 +temp1 RN 5 + + +;// Scratch Registers + +RBitStream RN 8 +RBitBuffer RN 9 +RBitCount RN 10 + +T1 RN 11 +T2 RN 12 +DCVal RN 14 + + + ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses + + M_ALLOC4 ppVlcTableL0L1,4 + M_ALLOC4 ppLMAXTableL0L1,4 + M_ALLOC4 ppRMAXTableL0L1,4 + M_ALLOC4 ppZigzagTable,4 + M_ALLOC4 pDCCoeff,4 + + + + M_START omxVCM4P2_DecodeVLCZigzag_IntraDCVLC,r12 + + M_ARG shortVideoHeaderonStack,4 ;// Pointer to argument on stack + M_ARG videoComponstack,4 ;// Pointer to argument on stack + + + ;// Decode DC Coefficient + + + LDR pDCLumaChromaIndex, =armVCM4P2_aIntraDCLumaChromaIndex ;// Load Optimized VLC Table for Luminance and Chrominance + + ;// Initializing the Bitstream Macro + + M_BD_INIT0 ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount + M_LDR videoComp,videoComponstack + M_BD_INIT1 T1, T2, T2 + ADD pDCLumaChromaIndex,pDCLumaChromaIndex,videoComp, LSL #6 + M_BD_INIT2 T1, T2, T2 + + + M_BD_VLD DCValueSize,T1,T2,pDCLumaChromaIndex,4,2 ;// VLC Decode using optimized Luminance and Chrominance VLC Table + + + + +DecodeDC + + CMP DCValueSize,#12 + BGT ExitError + + CMP DCValueSize,#0 + MOVEQ DCVal,#0 ;// If DCValueSize is zero then DC coeff =0 + BEQ ACDecode ;// Branch to perform AC Coeff Decoding + + M_BD_VREAD16 DCVal,DCValueSize,T1,T2 ;// Get DC Value From Bit stream + + + MOV powOfSize,#1 + LSL powOfSize,DCValueSize ;// powOfSize=pow(2,DCValueSize) + CMP DCVal,powOfSize,LSR #1 ;// Compare DCVal with powOfSize/2 + ADDLT DCVal,DCVal,#1 + SUBLT DCVal,DCVal,powOfSize ;// If Lessthan powOfSize/2 DCVal=DCVal-powOfSize+1 + ;// Else DCVal= fetchbits from bit stream + +CheckDCValueSize + + CMP DCValueSize,#8 ;// If DCValueSize greater than 8 check marker bit + + BLE ACDecode + + M_BD_READ8 temp1,1,T1 + TEQ temp1,#0 ;// If Marker bit is zero Exit with an Error Message + BEQ ExitError + + + + ;// Decode AC Coefficient + +ACDecode + + M_STR DCVal,pDCCoeff ;// Store Decoded DC Coeff on Stack + M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit stream Macro + + LDR pZigzagTable, =armVCM4P2_aClassicalZigzagScan ;// Load Zigzag talbe address + ADD pZigzagTable, pZigzagTable, PredDir, LSL #6 ;// Modify the Zigzag table adress based on PredDir + + M_STR pZigzagTable,ppZigzagTable ;// Store zigzag table on stack + LDR pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1 ;// Load Optimized VLC Table With both Last=0 and Last=1 Entries + M_STR pVlcTableL0L1,ppVlcTableL0L1 ;// Store Optimized VLC Table on stack + LDR pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX ;// Load LMAX Table + M_STR pLMAXTableL0L1,ppLMAXTableL0L1 ;// Store LMAX table on stack + LDR pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX ;// Load RMAX Table + MOV Count,#1 ;// Set Start =1 + + M_STR pRMAXTableL0L1,ppRMAXTableL0L1 ;// Store RMAX Table on Stack + + + M_LDR shortVideoHeader,shortVideoHeaderonStack ;// Load the Input Argument From Stack + + BL armVCM4P2_DecodeVLCZigzag_AC_unsafe ;// Call the Unsafe Function + + M_LDR DCVal,pDCCoeff ;// Get the Decoded DC Value From Stack + STRH DCVal,[pDst] ;// Store the DC Value + B ExitOK + + + +ExitError + + M_BD_FINI ppBitStream,pBitOffset ;// Terminating the Bit Stream Macro in case of an Error + MOV Return,#OMX_Sts_Err ;// Exit with an Error Message +ExitOK + + M_END + ENDIF + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s new file mode 100644 index 0000000..a4bfa71 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s @@ -0,0 +1,194 @@ +;// +;// +;// File Name: omxVCM4P2_FindMVpred_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Function: +;// omxVCM4P2_FindMVpred +;// + ;// Include headers + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + INCLUDE armVCCOMM_s.h + + ;// Define cpu variants + M_VARIANTS ARM1136JS + + + IF ARM1136JS + + M_TABLE armVCM4P2_pBlkIndexTable + DCD OMXVCBlk0, OMXVCBlk1 + DCD OMXVCBlk2, OMXVCBlk3 + +;//-------------------------------------------- +;// Declare input registers +;//-------------------------------------------- + +pSrcMVCurMB RN 0 +pSrcCandMV1 RN 1 +pSrcCandMV2 RN 2 +pSrcCandMV3 RN 3 +pDstMVPred RN 4 +pDstMVPredME RN 5 +iBlk RN 6 + +pTable RN 4 +CandMV RN 12 + +pCandMV1 RN 7 +pCandMV2 RN 8 +pCandMV3 RN 9 + +CandMV1dx RN 0 +CandMV1dy RN 1 +CandMV2dx RN 2 +CandMV2dy RN 3 +CandMV3dx RN 10 +CandMV3dy RN 11 + +temp RN 14 + +zero RN 14 +return RN 0 + +; ---------------------------------------------- +; Main routine +; ---------------------------------------------- + + M_ALLOC4 MV, 4 + + ;// Function header + M_START omxVCM4P2_FindMVpred, r11 + + ;// Define stack arguments + M_ARG ppDstMVPred, 4 + M_ARG ppDstMVPredME, 4 + M_ARG Blk, 4 + + M_ADR CandMV, MV + MOV zero, #0 + M_LDR iBlk, Blk + + ;// Set the default value for these + ;// to be used if pSrcCandMV[1|2|3] == NULL + MOV pCandMV1, CandMV + MOV pCandMV2, CandMV + MOV pCandMV3, CandMV + + STR zero, [CandMV] + + ;// Branch to the case based on blk number + M_SWITCH iBlk + M_CASE OMXVCBlk0 ;// iBlk=0 + M_CASE OMXVCBlk1 ;// iBlk=0 + M_CASE OMXVCBlk2 ;// iBlk=0 + M_CASE OMXVCBlk3 ;// iBlk=0 + M_ENDSWITCH + +OMXVCBlk0 + CMP pSrcCandMV1, #0 + ADDNE pCandMV1, pSrcCandMV1, #4 + + CMP pSrcCandMV2, #0 + ADDNE pCandMV2, pSrcCandMV2, #8 + + CMP pSrcCandMV3, #0 + ADDNE pCandMV3, pSrcCandMV3, #8 + CMPEQ pSrcCandMV1, #0 + + MOVEQ pCandMV3, pCandMV2 + MOVEQ pCandMV1, pCandMV2 + + CMP pSrcCandMV1, #0 + CMPEQ pSrcCandMV2, #0 + + MOVEQ pCandMV1, pCandMV3 + MOVEQ pCandMV2, pCandMV3 + + CMP pSrcCandMV2, #0 + CMPEQ pSrcCandMV3, #0 + + MOVEQ pCandMV2, pCandMV1 + MOVEQ pCandMV3, pCandMV1 + + B BlkEnd + +OMXVCBlk1 + MOV pCandMV1, pSrcMVCurMB + CMP pSrcCandMV3, #0 + ADDNE pCandMV3, pSrcCandMV3, #8 + + CMP pSrcCandMV2, #0 + ADDNE pCandMV2, pSrcCandMV2, #12 + + CMPEQ pSrcCandMV3, #0 + + MOVEQ pCandMV2, pCandMV1 + MOVEQ pCandMV3, pCandMV1 + + B BlkEnd + +OMXVCBlk2 + CMP pSrcCandMV1, #0 + MOV pCandMV2, pSrcMVCurMB + ADD pCandMV3, pSrcMVCurMB, #4 + ADDNE pCandMV1, pSrcCandMV1, #12 + B BlkEnd + +OMXVCBlk3 + ADD pCandMV1, pSrcMVCurMB, #8 + MOV pCandMV2, pSrcMVCurMB + ADD pCandMV3, pSrcMVCurMB, #4 + +BlkEnd + + ;// Using the transperancy info, zero + ;// out the candidate MV if neccesary + LDRSH CandMV1dx, [pCandMV1], #2 + LDRSH CandMV2dx, [pCandMV2], #2 + LDRSH CandMV3dx, [pCandMV3], #2 + + ;// Load argument from the stack + M_LDR pDstMVPredME, ppDstMVPredME + + LDRSH CandMV1dy, [pCandMV1] + LDRSH CandMV2dy, [pCandMV2] + LDRSH CandMV3dy, [pCandMV3] + + CMP pDstMVPredME, #0 + + ;// Store the candidate MV's into the pDstMVPredME, + ;// these can be used in the fast algorithm if implemented + + STRHNE CandMV1dx, [pDstMVPredME], #2 + STRHNE CandMV1dy, [pDstMVPredME], #2 + STRHNE CandMV2dx, [pDstMVPredME], #2 + STRHNE CandMV2dy, [pDstMVPredME], #2 + STRHNE CandMV3dx, [pDstMVPredME], #2 + STRHNE CandMV3dy, [pDstMVPredME] + + ; Find the median of the 3 candidate MV's + M_MEDIAN3 CandMV1dx, CandMV2dx, CandMV3dx, temp + + ;// Load argument from the stack + M_LDR pDstMVPred, ppDstMVPred + + M_MEDIAN3 CandMV1dy, CandMV2dy, CandMV3dy, temp + + STRH CandMV3dx, [pDstMVPred], #2 + STRH CandMV3dy, [pDstMVPred] + + MOV return, #OMX_Sts_NoErr + + M_END + ENDIF ;// ARM1136JS :LOR: CortexA8 + + END
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s new file mode 100644 index 0000000..bfeb540 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s @@ -0,0 +1,73 @@ +;// +;// +;// File Name: omxVCM4P2_IDCT8x8blk_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Function: +;// omxVCM4P2_IDCT8x8blk +;// + ;// Include headers + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + ;// Define cpu variants + M_VARIANTS ARM1136JS + + INCLUDE armCOMM_IDCT_s.h + + IMPORT armCOMM_IDCTPreScale + ;// + ;// Function prototype + ;// + ;// OMXResult + ;// omxVCM4P2_IDCT8x8blk(const OMX_S16* pSrc, + ;// OMX_S16* pDst) + ;// + + IF ARM1136JS :LOR: CortexA8 + M_ALLOC4 ppDest, 4 + M_ALLOC4 pStride, 4 + M_ALLOC8 pBlk, 2*8*8 + ENDIF + + IF ARM1136JS + M_START omxVCM4P2_IDCT8x8blk, r11 + ENDIF + + + IF ARM1136JS :LOR: CortexA8 + +;// Declare input registers +pSrc RN 0 +pDst RN 1 + +;// Declare other intermediate registers +Result RN 0 + +;// Prototype for macro M_IDCT +;// pSrc RN 0 ;// source data buffer +;// Stride RN 1 ;// destination stride in bytes +;// pDest RN 2 ;// destination data buffer +;// pScale RN 3 ;// pointer to scaling table + +pSrc RN 0 +Stride RN 1 +pDest RN 2 +pScale RN 3 + + MOV pDest, pDst + LDR pScale, =armCOMM_IDCTPreScale + M_IDCT s9, s16, 16 + MOV Result, #OMX_Sts_NoErr + M_END + ENDIF + ;// ARM1136JS :LOR: CortexA8 + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s new file mode 100644 index 0000000..20965bf --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s @@ -0,0 +1,713 @@ +;// +;// +;// File Name: omxVCM4P2_MCReconBlock_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// Description: +;// +;// + +;// Include standard headers + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + +;// Import symbols required from other files + + M_VARIANTS ARM1136JS + +;// *************************************************************************** +;// ARM1136JS implementation +;// *************************************************************************** + IF ARM1136JS + +;// *************************************************************************** +;// MACRO DEFINITIONS +;// *************************************************************************** + ;// Description: + ;// + ;// dest[j] = (x[j] + y[j] + round) >> 1, j=0..3 + ;// + ;// Similar to UHADD8 instruction, but with a rounding value of 1 added to + ;// each sum before dividing by two, if round is 1 + ;// + ;// Syntax: + ;// M_UHADD8R $dest, $x, $y, $round, $mask + ;// + ;// Inputs: + ;// $x four packed bytes, x[3] : x[2] : x[1] : x[0] + ;// $y four packed bytes, y[3] : y[2] : y[1] : y[0] + ;// $round 0 if no rounding to be added, 1 if rounding to be done + ;// $mask some register set to 0x80808080 + ;// + ;// Outputs: + ;// $dest four packed bytes, z[3] : z[2] : z[1] : z[0] + + MACRO + M_UHADD8R $dest, $x, $y, $round, $mask + IF $round = 1 + IF $dest /= $y + MVN $dest, $x + UHSUB8 $dest, $y, $dest + EOR $dest, $dest, $mask + ELSE + MVN $dest, $y + UHSUB8 $dest, $x, $dest + EOR $dest, $dest, $mask + ENDIF + ELSE + UHADD8 $dest, $x, $y + ENDIF + MEND +;// *************************************************************************** + ;// Description: + ;// Load 8 bytes from $pSrc (aligned or unaligned locations) + ;// + ;// Syntax: + ;// M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset + ;// + ;// Inputs: + ;// $pSrc 4 byte aligned source pointer to an address just less than + ;// or equal to the data location + ;// $srcStep The stride on source + ;// $scratch A scratch register, used internally for temp calculations + ;// $offset Difference of source data location to the source pointer + ;// Use when $offset != 0 (unaligned load) + ;// + ;// Outputs: + ;// $pSrc In case the macro accepts stride, it increments the pSrc by + ;// that value, else unchanged + ;// $out0 four packed bytes, z[3] : z[2] : z[1] : z[0] + ;// $out1 four packed bytes, z[7] : z[6] : z[5] : z[4] + ;// + ;// Note: {$out0, $out1, $scratch} should be registers with ascending + ;// register numbering. In case offset is 0, $scratch is not modified. + + MACRO + M_LOAD_X $pSrc, $srcStep, $out0, $out1, $scratch, $offset + IF $offset = 0 + LDM $pSrc, {$out0, $out1} + ADD $pSrc, $pSrc, $srcStep + ELSE + LDM $pSrc, {$out0, $out1, $scratch} + ADD $pSrc, $pSrc, $srcStep + + MOV $out0, $out0, LSR #8 * $offset + ORR $out0, $out0, $out1, LSL #(32 - 8 * ($offset)) + MOV $out1, $out1, LSR #8 * $offset + ORR $out1, $out1, $scratch, LSL #(32 - 8 * ($offset)) + ENDIF + MEND + +;// *************************************************************************** + ;// Description: + ;// Loads three words for X interpolation, update pointer to next row. For + ;// X interpolation, given a truncated-4byteAligned source pointer, + ;// invariably three continous words are required from there to get the + ;// nine bytes from the source pointer for filtering. + ;// + ;// Syntax: + ;// M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3 + ;// + ;// Inputs: + ;// $pSrc 4 byte aligned source pointer to an address just less than + ;// or equal to the data location + ;// + ;// $srcStep The stride on source + ;// + ;// $offset Difference of source data location to the source pointer + ;// Use when $offset != 0 (unaligned load) + ;// + ;// Outputs: + ;// $pSrc Incremented by $srcStep + ;// + ;// $word0, $word1, $word2, $word3 + ;// Three of these are outputs based on the $offset parameter. + ;// The outputs are specifically generated to be processed by + ;// the M_EXT_XINT macro. Following is the illustration to show + ;// how the nine bytes are spanned for different offsets from + ;// notTruncatedForAlignmentSourcePointer. + ;// + ;// ------------------------------------------------------ + ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 | + ;// |------------------------------------------------------| + ;// | 0 | 0 | 0123 | 4567 | 8xxx | | + ;// | 1 | -1 | x012 | 3456 | 78xx | | + ;// | 2 | -2 | xx01 | 2345 | 678x | | + ;// | 3 | -3 | xxx0 | | 1234 | 5678 | + ;// ------------------------------------------------------ + ;// + ;// where the numbering (0-8) is to designate the 9 bytes from + ;// start of a particular row. The illustration doesn't take in + ;// account the positioning of bytes with in the word and the + ;// macro combination with M_EXT_XINT will work only in little + ;// endian environs + ;// + ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending + ;// register numbering + + MACRO + M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3 + IF $offset /= 3 + LDM $pSrc, {$word0, $word1, $word2} + ELSE + LDM $pSrc, {$word0, $word2, $word3} + ENDIF + ADD $pSrc, $pSrc, $srcStep + MEND + +;// *************************************************************************** + ;// Description: + ;// Extract four registers of four pixels for X interpolation + ;// + ;// Syntax: + ;// M_EXT_XINT $offset, $word0, $word1, $word2, $word3 + ;// + ;// Inputs: + ;// $offset Difference of source data location to the source pointer + ;// Use when $offset != 0 (unaligned load) + ;// + ;// $word0, $word1, $word2, $word3 + ;// Three of these are inputs based on the $offset parameter. + ;// The inputs are specifically selected to be processed by + ;// the M_EXT_XINT macro. + ;// + ;// ------------------------------------------------------ + ;// | Offset | Aligned Ptr | word0 | word1 | word2 | word3 | + ;// |------------------------------------------------------| + ;// | 0 | 0 | 0123 | 4567 | 8xxx | yyyy | + ;// | 1 | -1 | x012 | 3456 | 78xx | yyyy | + ;// | 2 | -2 | xx01 | 2345 | 678x | yyyy | + ;// | 3 | -3 | xxx0 | yyyy | 1234 | 5678 | + ;// ------------------------------------------------------ + ;// + ;// Outputs: + ;// $word0, $word1, $word2, $word3 + ;// Bytes from the original source pointer (not truncated for + ;// 4 byte alignment) as shown in the table. + ;// ------------------------------- + ;// | word0 | word1 | word2 | word3 | + ;// |-------------------------------| + ;// | 0123 | 4567 | 1234 | 5678 | + ;// ------------------------------- + ;// + ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending + ;// register numbering + + MACRO + M_EXT_XINT $offset, $word0, $word1, $word2, $word3 + IF $offset = 0 + ; $word0 and $word1 are ok + ; $word2, $word3 are just 8 shifted versions + MOV $word3, $word1, LSR #8 + ORR $word3, $word3, $word2, LSL #24 + MOV $word2, $word0, LSR #8 + ORR $word2, $word2, $word1, LSL #24 + ELIF $offset = 3 + ; $word2 and $word3 are ok (taken care while loading itself) + ; set $word0 & $word1 + MOV $word0, $word0, LSR #24 + ORR $word0, $word0, $word2, LSL #8 + MOV $word1, $word2, LSR #24 + ORR $word1, $word1, $word3, LSL #8 + ELSE + MOV $word0, $word0, LSR #8 * $offset + ORR $word0, $word0, $word1, LSL #(32 - 8 * ($offset)) + MOV $word1, $word1, LSR #8 * $offset + ORR $word1, $word1, $word2, LSL #(32 - 8 * ($offset)) + + MOV $word3, $word1, LSR #8 + ORR $word3, $word3, $word2, LSL #(32 - 8 * (($offset)+1)) + MOV $word2, $word0, LSR #8 + ORR $word2, $word2, $word1, LSL #24 + ENDIF + MEND + +;// *************************************************************************** + ;// Description: + ;// Computes half-sum and xor of two inputs and puts them in the input + ;// registers in that order + ;// + ;// Syntax: + ;// M_HSUM_XOR $v0, $v1, $tmp + ;// + ;// Inputs: + ;// $v0 a, first input + ;// $v1 b, second input + ;// $tmp scratch register + ;// + ;// Outputs: + ;// $v0 (a + b)/2 + ;// $v1 a ^ b + + MACRO + M_HSUM_XOR $v0, $v1, $tmp + UHADD8 $tmp, $v0, $v1 ;// s0 = a + b + EOR $v1, $v0, $v1 ;// l0 = a ^ b + MOV $v0, $tmp ;// s0 + MEND +;// *************************************************************************** + ;// Description: + ;// Calculates average of 4 values (a,b,c,d) for HalfPixelXY predict type in + ;// mcReconBlock module. Very specific to the implementation of + ;// M_MCRECONBLOCK_HalfPixelXY done here. Uses "tmp" as scratch register and + ;// "yMask" for mask variable "0x1010101x" set in it. In yMask 4 lsbs are + ;// not significant and are used by the callee for row counter (y) + ;// + ;// Some points to note are: + ;// 1. Input is pair of pair-averages and Xors + ;// 2. $sum1 and $lsb1 are not modified and hence can be reused in another + ;// running average + ;// 3. Output is in the first argument + ;// + ;// Syntax: + ;// M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal + ;// + ;// Inputs: + ;// $sum0 (a + b) >> 1, where a and b are 1st and 2nd inputs to be averaged + ;// $lsb0 (a ^ b) + ;// $sum1 (c + d) >> 1. Not modified + ;// $lsb1 (c ^ d) Not modified + ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding + ;// + ;// Outputs: + ;// $sum0 (a + b + c + d + 1) / 4 : If no rounding + ;// (a + b + c + d + 2) / 4 : If rounding + + MACRO + M_AVG4 $sum0, $lsb0, $sum1, $lsb1, $rndVal + LCLS OP1 + LCLS OP2 + IF $rndVal = 0 ;// rounding case +OP1 SETS "AND" +OP2 SETS "ORR" + ELSE ;// Not rounding case +OP1 SETS "ORR" +OP2 SETS "AND" + ENDIF + + LCLS lsb2 + LCLS sum2 + LCLS dest + +lsb2 SETS "tmp" +sum2 SETS "$lsb0" +dest SETS "$sum0" + + $OP1 $lsb0, $lsb0, $lsb1 ;// e0 = e0 & e1 + EOR $lsb2, $sum0, $sum1 ;// e2 = s0 ^ s1 + $OP2 $lsb2, $lsb2, $lsb0 ;// e2 = e2 | e0 + AND $lsb2, $lsb2, yMask, LSR # 4 ;// e2 = e2 & mask + UHADD8 $sum2, $sum0, $sum1 ;// s2 = (s0 + s1)/2 + UADD8 $dest, $sum2, $lsb2 ;// dest = s2 + e2 + MEND +;// *************************************************************************** +;// Motion compensation handler macros +;// *************************************************************************** + ;// Description: + ;// Implement motion compensation routines using the named registers in + ;// callee function. Each of the following 4 implement the 4 predict type + ;// Each handles 8 cases each ie all the combinations of 4 types of source + ;// alignment offsets and 2 types of rounding flag + ;// + ;// Syntax: + ;// M_MCRECONBLOCK_IntegerPixel $rndVal, $offset + ;// M_MCRECONBLOCK_HalfPixelX $rndVal, $offset + ;// M_MCRECONBLOCK_HalfPixelY $rndVal, $offset + ;// M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset + ;// + ;// Inputs: + ;// $rndVal Assembler Variable. 0 for rounding, 1 for no rounding + ;// $offset $pSrc MOD 4 value. Offset from 4 byte aligned location. + ;// + ;// Outputs: + ;// Outputs come in the named registers of the callee functions + ;// The macro loads the data from the source pointer, processes it and + ;// stores in the destination pointer. Does the whole prediction cycle + ;// of Motion Compensation routine for a particular predictType + ;// After this only residue addition to the predicted values remain + + MACRO + M_MCRECONBLOCK_IntegerPixel $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for IntegerPixel predictType. Both + ;// rounding cases are handled by the same code base. It is just a copy + ;// from source to destination. Two lines are done per loop to reduce + ;// stalls. Loop has been software pipelined as well for that purpose. + ;// + ;// M_LOAD_X loads a whole row in two registers and then they are stored + +CaseIntegerPixelRnd0Offset$offset +CaseIntegerPixelRnd1Offset$offset + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset + M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset +YloopIntegerPixelOffset$offset + SUBS y, y, #2 + STRD tmp1, tmp2, [pDst], dstStep + STRD tmp3, tmp4, [pDst], dstStep + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp3, $offset + M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset + BGT YloopIntegerPixelOffset$offset + + B SwitchPredictTypeEnd + MEND +;// *************************************************************************** + MACRO + M_MCRECONBLOCK_HalfPixelX $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for HalfPixelX predictType. The two + ;// rounding cases are handled by the different code base and spanned by + ;// different macro calls. Loop has been software pipelined to reduce + ;// stalls. + ;// + ;// Filtering involves averaging a pixel with the next horizontal pixel. + ;// M_LOAD_XINT and M_EXT_XINT combination generate 4 registers, 2 with + ;// all pixels in a row with 4 pixel in each register and another 2 + ;// registers with pixels corresponding to one horizontally shifted pixel + ;// corresponding to the initial row pixels. These are set of packed + ;// registers appropriate to do 4 lane SIMD. + ;// After that M_UHADD8R macro does the averaging taking care of the + ;// rounding as required + +CaseHalfPixelXRnd$rndVal.Offset$offset + IF $rndVal = 0 + LDR mask, =0x80808080 + ENDIF + + M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4 +YloopHalfPixelXRnd$rndVal.Offset$offset + SUBS y, y, #1 + M_EXT_XINT $offset, tmp1, tmp2, tmp3, tmp4 + M_UHADD8R tmp5, tmp1, tmp3, (1-$rndVal), mask + M_UHADD8R tmp6, tmp2, tmp4, (1-$rndVal), mask + STRD tmp5, tmp6, [pDst], dstStep + M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4 + BGT YloopHalfPixelXRnd$rndVal.Offset$offset + + B SwitchPredictTypeEnd + MEND +;// *************************************************************************** + MACRO + M_MCRECONBLOCK_HalfPixelY $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for HalfPixelY predictType. The two + ;// rounding cases are handled by the different code base and spanned by + ;// different macro calls. PreLoading is used to avoid reload of same data. + ;// + ;// Filtering involves averaging a pixel with the next vertical pixel. + ;// M_LOAD_X generates 2 registers with all pixels in a row with 4 pixel in + ;// each register. These are set of packed registers appropriate to do + ;// 4 lane SIMD. After that M_UHADD8R macro does the averaging taking care + ;// of the rounding as required + +CaseHalfPixelYRnd$rndVal.Offset$offset + IF $rndVal = 0 + LDR mask, =0x80808080 + ENDIF + + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset ;// Pre-load +YloopHalfPixelYRnd$rndVal.Offset$offset + SUBS y, y, #2 + ;// Processing one line + M_LOAD_X pSrc, srcStep, tmp3, tmp4, tmp5, $offset + M_UHADD8R tmp1, tmp1, tmp3, (1-$rndVal), mask + M_UHADD8R tmp2, tmp2, tmp4, (1-$rndVal), mask + STRD tmp1, tmp2, [pDst], dstStep + ;// Processing another line + M_LOAD_X pSrc, srcStep, tmp1, tmp2, tmp5, $offset + M_UHADD8R tmp3, tmp3, tmp1, (1-$rndVal), mask + M_UHADD8R tmp4, tmp4, tmp2, (1-$rndVal), mask + STRD tmp3, tmp4, [pDst], dstStep + + BGT YloopHalfPixelYRnd$rndVal.Offset$offset + + B SwitchPredictTypeEnd + MEND +;// *************************************************************************** + MACRO + M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset + ;// Algorithmic Description: + ;// This handles motion compensation for HalfPixelXY predictType. The two + ;// rounding cases are handled by the different code base and spanned by + ;// different macro calls. PreLoading is used to avoid reload of same data. + ;// + ;// Filtering involves averaging a pixel with the next vertical, horizontal + ;// and right-down diagonal pixels. Just as in HalfPixelX case, M_LOAD_XINT + ;// and M_EXT_XINT combination generates 4 registers with a row and its + ;// 1 pixel right shifted version, with 4 pixels in one register. Another + ;// call of that macro-combination gets another row. Then M_HSUM_XOR is + ;// called to get mutual half-sum and xor combinations of a row with its + ;// shifted version as they are inputs to the M_AVG4 macro which computes + ;// the 4 element average with rounding. Note that it is the half-sum/xor + ;// values that are preserved for next row as they can be re-used in the + ;// next call to the M_AVG4 and saves recomputation. + ;// Due to lack of register, the row counter and a masking value required + ;// in M_AVG4 are packed into a single register yMask where the last nibble + ;// holds the row counter values and rest holds the masking variable left + ;// shifted by 4 + +CaseHalfPixelXYRnd$rndVal.Offset$offset + LDR yMask, =((0x01010101 << 4) + 8) + + M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b' + M_EXT_XINT $offset, t00, t01, t10, t11 + M_HSUM_XOR t00, t10, tmp ;// s0, l0 + M_HSUM_XOR t01, t11, tmp ;// s0', l0' + +YloopHalfPixelXYRnd$rndVal.Offset$offset + ;// Processsing one line + ;// t00, t01, t10, t11 required from previous loop + M_LOAD_XINT pSrc, srcStep, $offset, t20, t21, t30, t31 ;// Load c, c', d, d' + SUB yMask, yMask, #2 + M_EXT_XINT $offset, t20, t21, t30, t31 + M_HSUM_XOR t20, t30, tmp ;// s1, l1 + M_HSUM_XOR t21, t31, tmp ;// s1', l1' + M_AVG4 t00, t10, t20, t30, $rndVal ;// s0, l0, s1, l1 + M_AVG4 t01, t11, t21, t31, $rndVal ;// s0', l0', s1', l1' + STRD t00, t01, [pDst], dstStep ;// store the average + + ;// Processsing another line + ;// t20, t21, t30, t31 required from above + M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b' + TST yMask, #7 + M_EXT_XINT $offset, t00, t01, t10, t11 + M_HSUM_XOR t00, t10, tmp + M_HSUM_XOR t01, t11, tmp + M_AVG4 t20, t30, t00, t10, $rndVal + M_AVG4 t21, t31, t01, t11, $rndVal + STRD t20, t21, [pDst], dstStep + + BGT YloopHalfPixelXYRnd$rndVal.Offset$offset + + IF $offset/=3 :LOR: $rndVal/=1 + B SwitchPredictTypeEnd + ENDIF + MEND +;// *************************************************************************** +;// Motion compensation handler macros end here +;// *************************************************************************** + ;// Description: + ;// Populates all 4 kinds of offsets "cases" for each predictType and rndVal + ;// combination in the "switch" to prediction processing code segment + ;// + ;// Syntax: + ;// M_CASE_OFFSET $rnd, $predictType + ;// + ;// Inputs: + ;// $rnd 0 for rounding, 1 for no rounding + ;// $predictType The prediction mode + ;// + ;// Outputs: + ;// Populated list of "M_CASE"s for the "M_SWITCH" macro + + MACRO + M_CASE_OFFSET $rnd, $predictType + M_CASE Case$predictType.Rnd$rnd.Offset0 + M_CASE Case$predictType.Rnd$rnd.Offset1 + M_CASE Case$predictType.Rnd$rnd.Offset2 + M_CASE Case$predictType.Rnd$rnd.Offset3 + MEND +;// *************************************************************************** + ;// Description: + ;// Populates all 2 kinds of rounding "cases" for each predictType in the + ;// "switch" to prediction processing code segment + ;// + ;// Syntax: + ;// M_CASE_OFFSET $predictType + ;// + ;// Inputs: + ;// $predictType The prediction mode + ;// + ;// Outputs: + ;// Populated list of "M_CASE_OFFSET" macros + + MACRO + M_CASE_MCRECONBLOCK $predictType + M_CASE_OFFSET 0, $predictType ;// 0 for rounding + M_CASE_OFFSET 1, $predictType ;// 1 for no rounding + MEND +;// *************************************************************************** + ;// Description: + ;// Populates all 8 kinds of rounding and offset combinations handling macros + ;// for the specified predictType. In case of "IntegerPixel" predictType, + ;// rounding is not required so same code segment handles both cases + ;// + ;// Syntax: + ;// M_MCRECONBLOCK $predictType + ;// + ;// Inputs: + ;// $predictType The prediction mode + ;// + ;// Outputs: + ;// Populated list of "M_MCRECONBLOCK_<predictType>" macros for specified + ;// predictType. Each + ;// M_MCRECONBLOCK_<predictType> $rnd, $offset + ;// is an code segment (starting with a label indicating the predictType, + ;// rounding and offset combination) + ;// Four calls of this macro with the 4 prediction modes populate all the 32 + ;// handlers + + MACRO + M_MCRECONBLOCK $predictType + M_MCRECONBLOCK_$predictType 0, 0 + M_MCRECONBLOCK_$predictType 0, 1 + M_MCRECONBLOCK_$predictType 0, 2 + M_MCRECONBLOCK_$predictType 0, 3 + IF "$predictType" /= "IntegerPixel" ;// If not IntegerPixel then rounding makes a difference + M_MCRECONBLOCK_$predictType 1, 0 + M_MCRECONBLOCK_$predictType 1, 1 + M_MCRECONBLOCK_$predictType 1, 2 + M_MCRECONBLOCK_$predictType 1, 3 + ENDIF + MEND +;// *************************************************************************** +;// Input/Output Registers +pSrc RN 0 +srcStep RN 1 +arg_pSrcResidue RN 2 +pSrcResidue RN 12 +pDst RN 3 +dstStep RN 2 +predictType RN 10 +rndVal RN 11 +mask RN 11 + +;// Local Scratch Registers +zero RN 12 +y RN 14 + +tmp1 RN 4 +tmp2 RN 5 +tmp3 RN 6 +tmp4 RN 7 +tmp5 RN 8 +tmp6 RN 9 +tmp7 RN 10 +tmp8 RN 11 +tmp9 RN 12 + +t00 RN 4 +t01 RN 5 +t10 RN 6 +t11 RN 7 +t20 RN 8 +t21 RN 9 +t30 RN 10 +t31 RN 11 +tmp RN 12 + +yMask RN 14 + +dst RN 1 +return RN 0 + + ;// Allocate memory on stack + M_ALLOC4 Stk_pDst, 4 + M_ALLOC4 Stk_pSrcResidue, 4 + ;// Function header + M_START omxVCM4P2_MCReconBlock, r11 + ;// Define stack arguments + M_ARG Arg_dstStep, 4 + M_ARG Arg_predictType, 4 + M_ARG Arg_rndVal, 4 + ;// Save on stack + M_STR pDst, Stk_pDst + M_STR arg_pSrcResidue, Stk_pSrcResidue + ;// Load argument from the stack + M_LDR dstStep, Arg_dstStep + M_LDR predictType, Arg_predictType + M_LDR rndVal, Arg_rndVal + + MOV y, #8 + + AND tmp1, pSrc, #3 + ORR predictType, tmp1, predictType, LSL #3 + ORR predictType, predictType, rndVal, LSL #2 + ;// Truncating source pointer to align to 4 byte location + BIC pSrc, pSrc, #3 + + ;// Implementation takes care of all combinations of different + ;// predictTypes, rounding cases and source pointer offsets to alignment + ;// of 4 bytes in different code bases unless one of these parameter wasn't + ;// making any difference to the implementation. Below M_CASE_MCRECONBLOCK + ;// macros branch into 8 M_CASE macros for all combinations of the 2 + ;// rounding cases and 4 offsets of the pSrc pointer to the 4 byte + ;// alignment. + M_SWITCH predictType + M_CASE_MCRECONBLOCK IntegerPixel + M_CASE_MCRECONBLOCK HalfPixelX + M_CASE_MCRECONBLOCK HalfPixelY + M_CASE_MCRECONBLOCK HalfPixelXY + M_ENDSWITCH + + ;// The M_MCRECONBLOCK macros populate the code bases by calling all 8 + ;// particular macros (4 in case of IntegerPixel as rounding makes no + ;// difference there) to generate the code for all cases of rounding and + ;// offsets. LTORG is used to segment the code as code size bloated beyond + ;// 4KB. + M_MCRECONBLOCK IntegerPixel + M_MCRECONBLOCK HalfPixelX + LTORG + M_MCRECONBLOCK HalfPixelY + M_MCRECONBLOCK HalfPixelXY +SwitchPredictTypeEnd + + ;// Residue Addition + ;// This is done in 2 lane SIMD though loads are further optimized and + ;// 4 bytes are loaded in case of destination buffer. Algorithmic + ;// details are in inlined comments + M_LDR pSrcResidue, Stk_pSrcResidue + CMP pSrcResidue, #0 + BEQ pSrcResidueConditionEnd +pSrcResidueNotNull + M_LDR pDst, Stk_pDst + MOV y, #8 + SUB dstStep, dstStep, #4 +Yloop_pSrcResidueNotNull + SUBS y, y, #1 + LDR dst, [pDst] ;// dst = [dcba] + LDMIA pSrcResidue!, {tmp1, tmp2} ;// tmp1=[DC] tmp2=[BA] + PKHBT tmp3, tmp1, tmp2, LSL #16 ;// Deltaval1 = [C A] + PKHTB tmp4, tmp2, tmp1, ASR #16 ;// DeltaVal2 = [D B] + UXTB16 tmp1, dst ;// tmp1 = [0c0a] + UXTB16 tmp2, dst, ROR #8 ;// tmp2 = [0d0b] + QADD16 tmp1, tmp1, tmp3 ;// Add and saturate to 16 bits + QADD16 tmp2, tmp2, tmp4 + USAT16 tmp1, #8, tmp1 + USAT16 tmp2, #8, tmp2 ;// armClip(0, 255, tmp2) + ORR tmp1, tmp1, tmp2, LSL #8 ;// tmp1 = [dcba] + STR tmp1, [pDst], #4 + + LDR dst, [pDst] + LDMIA pSrcResidue!, {tmp1, tmp2} + PKHBT tmp3, tmp1, tmp2, LSL #16 + PKHTB tmp4, tmp2, tmp1, ASR #16 + UXTB16 tmp1, dst + UXTB16 tmp2, dst, ROR #8 + QADD16 tmp1, tmp1, tmp3 + QADD16 tmp2, tmp2, tmp4 + USAT16 tmp1, #8, tmp1 + USAT16 tmp2, #8, tmp2 + ORR tmp1, tmp1, tmp2, LSL #8 + STR tmp1, [pDst], dstStep + + BGT Yloop_pSrcResidueNotNull +pSrcResidueConditionEnd + + MOV return, #OMX_Sts_NoErr + + M_END + ENDIF ;// ARM1136JS + +;// *************************************************************************** +;// CortexA8 implementation +;// *************************************************************************** + END +;// *************************************************************************** +;// omxVCM4P2_MCReconBlock ends +;// *************************************************************************** diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s new file mode 100644 index 0000000..213444a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s @@ -0,0 +1,283 @@ +; ********** +; * +; * File Name: omxVCM4P2_PredictReconCoefIntra_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains module for DC/AC coefficient prediction +; * +; * +; * Function: omxVCM4P2_PredictReconCoefIntra +; * +; * Description: +; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior +; * to the function call, prediction direction (predDir) should be selected +; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the coefficient buffer which contains the +; * quantized coefficient residuals (PQF) of the current +; * block; must be aligned on a 4-byte boundary. The +; * output coefficients are saturated to the range +; * [-2048, 2047]. +; * [in] pPredBufRow pointer to the coefficient row buffer; must be aligned +; * on a 4-byte boundary. +; * [in] pPredBufCol pointer to the coefficient column buffer; must be +; * aligned on a 4-byte boundary. +; * [in] curQP quantization parameter of the current block. curQP may +; * equal to predQP especially when the current block and +; * the predictor block are in the same macroblock. +; * [in] predQP quantization parameter of the predictor block +; * [in] predDir indicates the prediction direction which takes one +; * of the following values: +; * OMX_VIDEO_HORIZONTAL predict horizontally +; * OMX_VIDEO_VERTICAL predict vertically +; * [in] ACPredFlag a flag indicating if AC prediction should be +; * performed. It is equal to ac_pred_flag in the bit +; * stream syntax of MPEG-4 +; * [in] videoComp video component type (luminance, chrominance or +; * alpha) of the current block +; * [out] pSrcDst pointer to the coefficient buffer which contains +; * the quantized coefficients (QF) of the current +; * block +; * [out] pPredBufRow pointer to the updated coefficient row buffer +; * [out] pPredBufCol pointer to the updated coefficient column buffer +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - Bad arguments +; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol. +; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31, +; * predQP > 31, preDir exceeds [1,2]. +; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not +; * 4-byte aligned. +; * +; ********* + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + + IMPORT armVCM4P2_Reciprocal_QP_S32 + IMPORT armVCM4P2_Reciprocal_QP_S16 + IMPORT armVCM4P2_DCScaler + + + + IF ARM1136JS + + +;// Input Arguments + +pSrcDst RN 0 +pPredBufRow RN 1 +pPredBufCol RN 2 +curQP RN 3 +QP RN 3 +predQP RN 4 +predDir RN 5 +ACPredFlag RN 6 +videoComp RN 7 + +;// Local Variables + +temp2 RN 5 +negCurQP RN 7 +negdcScaler RN 7 +tempPred RN 8 + +dcScaler RN 4 +CoeffTable RN 9 +absCoeffDC RN 9 +temp3 RN 6 +absCoeffAC RN 6 + +shortVideoHeader RN 9 +predCoeffTable RN 10 +Count RN 10 +temp1 RN 12 +index RN 12 +Rem RN 14 +temp RN 11 +Return RN 0 + + + + M_START omxVCM4P2_PredictReconCoefIntra,r12 + + ;// Assigning pointers to Input arguments on Stack + + M_ARG predQPonStack,4 + M_ARG predDironStack,4 + M_ARG ACPredFlagonStack,4 + M_ARG videoComponStack,4 + + ;// DC Prediction + + M_LDR videoComp,videoComponStack ;// Load videoComp From Stack + + M_LDR predDir,predDironStack ;// Load Prediction direction + + ;// dcScaler Calculation + + LDR index, =armVCM4P2_DCScaler + ADD index,index,videoComp,LSL #5 + LDRB dcScaler,[index,QP] + + +calDCVal + + + LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63) + + CMP predDir,#2 ;// Check if the Prediction direction is vertical + + ;// Caulucate temp pred by performing Division + + LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer + LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer + + RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler + + MOV temp1,absCoeffDC ;// temp1=prediction coeff + CMP temp1,#0 + RSBLT absCoeffDC,temp1,#0 ;//absCoeffDC=abs(temp1) + + ADD temp,dcScaler,dcScaler + LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication + + SMULBB tempPred,temp,absCoeffDC ;// tempPred=pPredBufRow(Col)[0]*32767/dcScaler + ADD temp3,dcScaler,#1 + LSR tempPred,tempPred,#15 ;// tempPred=pPredBufRow(Col)[0]/dcScaler + LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2) + + MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Rem = pPredBufRow(Col)[0]-tempPred*dcScaler + + + LDRH temp,[pPredBufCol] + CMP Rem,temp3 + ADDGE tempPred,#1 ;// If Rem>=round(dcScaler/2);tempPred=tempPred+1 + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;/ if pPredBufRow(Col)[0]<0; tempPred=-tempPred + + + STRH temp,[pPredBufRow,#-16] + + LDRH temp,[pSrcDst] ;// temp=pSrcDst[0] + M_LDR ACPredFlag,ACPredFlagonStack + ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred + SSAT16 temp,#12,temp ;// clip temp to [-2048,2047] + + SMULBB temp1,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler + M_LDR predQP,predQPonStack + STRH temp,[pSrcDst] + CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not + STRH temp1,[pPredBufCol] ;// store temp1 to pPredBufCol + + ;// AC Prediction + + + BNE Exit ;// If not set Exit + + LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63) + MOV temp1,#4 + MUL temp1,curQP,temp1 + CMP predDir,#2 ;// Check the Prediction direction + RSB negCurQP,curQP,#0 + LDR CoeffTable,[predCoeffTable,temp1] ;// CoeffTable=0x1ffff/curQP + ADD curQP,curQP,#1 ;// curQP=curQP+1 + LSR curQP,curQP,#1 ;// curQP=round(curQP/2) + MOV Count,#2 ;// Initializing the Loop Count + BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal + + + +loop1 + ;// Calculate tempPred + + LDRSH absCoeffAC,[pPredBufRow,Count] ;// absCoeffAC=pPredBufRow[i], 1=<i<=7 + MOV temp1,absCoeffAC + CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=<i<=7 + RSBLT absCoeffAC,temp1,#0 ;// absCoeffAC= abs(pPredBufRow[i]) + + SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufRow[i]*predQP + MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufRow[i]*predQP*0x1ffff/curQP + LSR tempPred,tempPred,#17 + + MLA Rem,negCurQP,tempPred,absCoeffAC ;// Rem=abs(pPredBufRow[i])-tempPred*curQP + LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i],1<=i<8 + + CMP Rem,curQP + ADDGE tempPred,#1 ;// if Rem>=round(curQP/2); tempPred=tempPred+1 + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;// if pPredBufRow[i]<0 ; tempPred=-tempPred + + ;// Update source and Row Prediction buffers + + ADD temp,temp,tempPred ;// temp=tempPred+pSrcDst[i] + SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047] + STRH temp,[pSrcDst,Count] + STRH temp,[pPredBufRow,Count] ;// pPredBufRow[i]=temp + ADD Count,Count,#2 ;// i=i+1 + CMP Count,#16 ;// compare if i=8 + BLT loop1 + B Exit ;// Branch to exit + +Horizontal + + MOV Count,#16 ;// Initializing i=8 + +loop2 + + LSR temp2,Count,#3 ;// temp2=i>>3 + + ;// Calculate tempPred + + LDRH absCoeffAC,[pPredBufCol,temp2] ;// absCoefAC=pPredBufCol[i>>3] + MOV temp1,absCoeffAC + CMP temp1,#0 ;// compare pPredBufRow[i] with zero, 1=<i<=7 + RSBLT absCoeffAC,temp1,#0 ;// absCoeffAC=abs(pPredBufCol[i>>3]) + + SMULBB absCoeffAC,absCoeffAC,predQP ;// temp1=pPredBufCol[i>>3]*predQP + MUL tempPred,absCoeffAC,CoeffTable ;// tempPred=pPredBufCol[i>>3]*predQP*0x1ffff/curQP + LSR tempPred,tempPred,#17 ;// tempPred=pPredBufCol[i>>3]*predQP/curQP + + MLA Rem,negCurQP,tempPred,absCoeffAC + LDRH temp,[pSrcDst,Count] ;// temp=pSrcDst[i] + + CMP Rem,curQP ;// Compare Rem with round(curQP/2) + ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=round(curQP/2) + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;// if pPredBufCol[i>>3 <0 tempPred=-tempPred + + ;// Update source and Row Prediction buffers + + ADD temp,temp,tempPred ;// temp=pSrcDst[i]+tempPred + SSAT16 temp,#12,temp ;// Clip temp to [-2048,2047] + STRH temp,[pSrcDst,Count] ;// pSrcDst[0]= clipped value + STRH temp,[pPredBufCol,temp2] ;// pPredBufCol[i>>3]=temp + ADD Count,Count,#16 ;// i=i+8 + CMP Count,#128 ;// compare i with 64 + BLT loop2 + + +Exit + + MOV Return,#OMX_Sts_NoErr + + M_END + ENDIF + END + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s new file mode 100644 index 0000000..c9591cb --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s @@ -0,0 +1,141 @@ +;/** +; * +; * File Name: omxVCM4P2_QuantInvInter_I_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for inter reconstruction +; * +; * +; * +; * +; * +; * Function: omxVCM4P2_QuantInvInter_I +; * +; * Description: +; * Performs inverse quantization on intra/inter coded block. +; * This function supports bits_per_pixel = 8. Mismatch control +; * is performed for the first MPEG-4 mode inverse quantization method. +; * The output coefficients are clipped to the range: [-2048, 2047]. +; * Mismatch control is performed for the first inverse quantization method. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned. +; * [in] QP quantization parameter (quantiser_scale) +; * [in] videoComp (Intra version only.) Video component type of the +; * current block. Takes one of the following flags: +; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE, +; * OMX_VC_ALPHA. +; * [in] shortVideoHeader a flag indicating presence of short_video_header; +; * shortVideoHeader==1 selects linear intra DC mode, +; * and shortVideoHeader==0 selects nonlinear intra DC mode. +; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned. +; * +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - bad arguments +; * - If pSrcDst is NULL or is not 16-byte aligned. +; * or +; * - If QP <= 0. +; * or +; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA. +; * +; */ + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + + IF ARM1136JS + +;//Input Arguments +pSrcDst RN 0 +QP RN 1 + +;//Local Variables +Return RN 0 +Count RN 4 +tempVal21 RN 2 +tempVal43 RN 3 +QP1 RN 5 +X2 RN 6 +X3 RN 14 +Result1 RN 8 +Result2 RN 9 +two RN 7 + + M_START omxVCM4P2_QuantInvInter_I,r9 + + MOV Count,#64 + TST QP,#1 + LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21, + ;// next two values to tempVal43 + SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even + MOVNE QP1,QP + MOV two,#2 + + + +Loop + + + SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2 + CMP X2,#0 + + RSBLT X2,X2,#0 ;// X2=absoluteval(first val) + SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd + ;// X2=2*absval(first val)*QP+QP-1 if QP is even + SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + + CMP X3,#0 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + + RSBLT X3,X3,#0 + PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31] + SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2 + SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047] + CMP X2,#0 + + + + RSBLE X2,X2,#0 + SMLABBNE X2,QP,X2,QP1 + SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + CMP X3,#0 + + LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + RSBLT X3,X3,#0 + PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[0-15] + SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047] + + SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0 + STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address + + + + BGT Loop + + MOV Return,#OMX_Sts_NoErr + + M_END + ENDIF + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s new file mode 100644 index 0000000..6328e01 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s @@ -0,0 +1,188 @@ +;/** +; * +; * File Name: omxVCM4P2_QuantInvIntra_I_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 9641 +; * Date: Thursday, February 7, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains modules for inter reconstruction +; * +; * +; * +; * +; * +; * +; * Function: omxVCM4P2_QuantInvIntra_I +; * +; * Description: +; * Performs inverse quantization on intra/inter coded block. +; * This function supports bits_per_pixel = 8. Mismatch control +; * is performed for the first MPEG-4 mode inverse quantization method. +; * The output coefficients are clipped to the range: [-2048, 2047]. +; * Mismatch control is performed for the first inverse quantization method. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the input (quantized) intra/inter block. Must be 16-byte aligned. +; * [in] QP quantization parameter (quantiser_scale) +; * [in] videoComp (Intra version only.) Video component type of the +; * current block. Takes one of the following flags: +; * OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE, +; * OMX_VC_ALPHA. +; * [in] shortVideoHeader a flag indicating presence of short_video_header; +; * shortVideoHeader==1 selects linear intra DC mode, +; * and shortVideoHeader==0 selects nonlinear intra DC mode. +; * [out] pSrcDst pointer to the output (dequantized) intra/inter block. Must be 16-byte aligned. +; * +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - bad arguments +; * - If pSrcDst is NULL or is not 16-byte aligned. +; * or +; * - If QP <= 0. +; * or +; * - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA. +; * + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + + IMPORT armVCM4P2_DCScaler + + + + IF ARM1136JS + +;//Input Arguments +pSrcDst RN 0 +QP RN 1 +videoComp RN 2 +shortVideoHeader RN 3 + +;//Local Variables +Return RN 0 +dcScaler RN 4 +temp RN 12 +index RN 6 + +tempVal21 RN 4 +tempVal43 RN 5 +QP1 RN 6 +X2 RN 7 +X3 RN 14 +Result1 RN 8 +Result2 RN 9 +two RN 10 +Count RN 11 + + + + + M_START omxVCM4P2_QuantInvIntra_I,r11 + + + + ;// Perform Inverse Quantization for DC coefficient + + TEQ shortVideoHeader,#0 ;// Test if short Video Header flag =0 + MOVNE dcScaler,#8 ;// if shortVideoHeader is non zero dcScaler=8 + BNE calDCVal + LDR index, =armVCM4P2_DCScaler + ADD index,index,videoComp,LSL #5 + LDRB dcScaler,[index,QP] + + + ;//M_CalDCScalar shortVideoHeader,videoComp, QP + +calDCVal + + LDRH temp,[pSrcDst] + SMULBB temp,temp,dcScaler ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory) + SSAT temp,#12,temp ;// Saturating to 12 bits + + + MOV Count,#64 + TST QP,#1 + LDRD tempVal21,[pSrcDst] ;// Loads first two values of pSrcDst to tempVal21, + ;// next two values to tempVal43 + SUBEQ QP1,QP,#1 ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even + MOVNE QP1,QP + MOV two,#2 + + + + + +Loop + + + SMULBB X2,tempVal21,two ;// X2= first val(lower 16 bits of tampVal21)*2 + CMP X2,#0 + + RSBLT X2,X2,#0 ;// X2=absoluteval(first val) + SMLABBNE X2,QP,X2,QP1 ;// X2=2*absval(first val)*QP+QP if QP is odd + ;// X2=2*absval(first val)*QP+QP-1 if QP is even + SMULTB X3,tempVal21,two ;// X3= second val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + + CMP X3,#0 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + + RSBLT X3,X3,#0 + PKHBT Result1,X2,X3,LSL #16 ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31] + SMULBB X2,tempVal43,two ;// X2= first val(lower 16 bits of tampVal43)*2 + SSAT16 Result1,#12,Result1 ;// clip to range [-2048,2047] + CMP X2,#0 + + + + RSBLE X2,X2,#0 + SMLABBNE X2,QP,X2,QP1 + SMULTB X3,tempVal43,two ;// X2= first val(top 16 bits of tampVal21)*2 + RSBLT X2,X2,#0 + CMP X3,#0 + + LDRD tempVal21,[pSrcDst,#8] ;// Load next four Values to tempVal21,tempVal43 + + RSBLT X3,X3,#0 + SMLABBNE X3,QP,X3,QP1 + RSBLT X3,X3,#0 + PKHBT Result2,X2,X3,LSL #16 ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[16-31] + SSAT16 Result2,#12,Result2 ;// clip to range [-2048,2047] + + SUBS Count,Count,#4 ;// Decrement Count by 4 and continue if it has not reached 0 + STRD Result1,[pSrcDst],#8 ;// Store Double words and increment the pointer to point the next store address + + + + BGT Loop + + SUB pSrcDst,pSrcDst,#128 + + ;// Storing the Inverse Quantized DC coefficient + + STRH temp,[pSrcDst],#2 + + + + MOV Return,#OMX_Sts_NoErr + + + + + M_END + ENDIF + END + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c new file mode 100644 index 0000000..5d93681 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/src/armVC_Version.c @@ -0,0 +1,6 @@ +#include "omxtypes.h" +#include "armCOMM_Version.h" + +#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS +const char * const omxVC_VersionDescription = "ARM OpenMAX DL v" ARM_VERSION_STRING " Rel=" OMX_ARM_RELEASE_TAG " Arch=" OMX_ARM_BUILD_ARCHITECTURE " Tools=" OMX_ARM_BUILD_TOOLCHAIN ; +#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */ |