diff options
author | James Dong <jdong@google.com> | 2011-05-31 18:53:46 -0700 |
---|---|---|
committer | James Dong <jdong@google.com> | 2011-06-02 12:32:46 -0700 |
commit | 0c1bc742181ded4930842b46e9507372f0b1b963 (patch) | |
tree | c952bfcb03ff7cce5e0f91ad7d25c67a2fdd39cb /media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api | |
parent | 92a746c3b18d035189f596ce32847bf26247aaca (diff) | |
download | frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.zip frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.tar.gz frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.tar.bz2 |
Initial-checkin for ON2 Software AVC/H264 decoder
o when neon is present, the performance gain of On2 AVC software decoder
over PV software decoder is more than 30%.
o In addition, it fixes some known PV software decoder issues like missing
output frames
o allow both pv and on2 software avc to be available for easy comparision
o change output frames from 8 to 16
Change-Id: I567ad1842025ead7092f0c47e3513d6d9ca232dd
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api')
11 files changed, 4988 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h new file mode 100755 index 0000000..64c1958 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM.h @@ -0,0 +1,785 @@ +/** + * + * File Name: armCOMM.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM.h + * Brief: Declares Common APIs/Data Types used across OpenMAX API's + * + */ + + +#ifndef _armCommon_H_ +#define _armCommon_H_ + +#include "omxtypes.h" + +typedef struct +{ + OMX_F32 Re; /** Real part */ + OMX_F32 Im; /** Imaginary part */ + +} OMX_FC32; /** single precision floating point complex number */ + +typedef struct +{ + OMX_F64 Re; /** Real part */ + OMX_F64 Im; /** Imaginary part */ + +} OMX_FC64; /** double precision floating point complex number */ + + +/* Used by both IP and IC domains for 8x8 JPEG blocks. */ +typedef OMX_S16 ARM_BLOCK8x8[64]; + + +#include "armOMX.h" + +#define armPI (OMX_F64)(3.1415926535897932384626433832795) + +/***********************************************************************/ + +/* Compiler extensions */ +#ifdef ARM_DEBUG +/* debug version */ +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#define armError(str) {printf((str)); printf("\n"); exit(-1);} +#define armWarn(str) {printf((str)); printf("\n");} +#define armIgnore(a) ((void)a) +#define armAssert(a) assert(a) +#else +/* release version */ +#define armError(str) ((void) (str)) +#define armWarn(str) ((void) (str)) +#define armIgnore(a) ((void) (a)) +#define armAssert(a) ((void) (a)) +#endif /* ARM_DEBUG */ + +/* Arithmetic operations */ + +#define armMin(a,b) ( (a) > (b) ? (b):(a) ) +#define armMax(a,b) ( (a) > (b) ? (a):(b) ) +#define armAbs(a) ( (a) < 0 ? -(a):(a) ) + +/* Alignment operation */ + +#define armAlignToBytes(Ptr,N) (Ptr + ( ((N-(int)Ptr)&(N-1)) / sizeof(*Ptr) )) +#define armAlignTo2Bytes(Ptr) armAlignToBytes(Ptr,2) +#define armAlignTo4Bytes(Ptr) armAlignToBytes(Ptr,4) +#define armAlignTo8Bytes(Ptr) armAlignToBytes(Ptr,8) +#define armAlignTo16Bytes(Ptr) armAlignToBytes(Ptr,16) + +/* Error and Alignment check */ + +#define armRetArgErrIf(condition, code) if(condition) { return (code); } +#define armRetDataErrIf(condition, code) if(condition) { return (code); } + +#ifndef ALIGNMENT_DOESNT_MATTER +#define armIsByteAligned(Ptr,N) ((((int)(Ptr)) % N)==0) +#define armNotByteAligned(Ptr,N) ((((int)(Ptr)) % N)!=0) +#else +#define armIsByteAligned(Ptr,N) (1) +#define armNotByteAligned(Ptr,N) (0) +#endif + +#define armIs2ByteAligned(Ptr) armIsByteAligned(Ptr,2) +#define armIs4ByteAligned(Ptr) armIsByteAligned(Ptr,4) +#define armIs8ByteAligned(Ptr) armIsByteAligned(Ptr,8) +#define armIs16ByteAligned(Ptr) armIsByteAligned(Ptr,16) + +#define armNot2ByteAligned(Ptr) armNotByteAligned(Ptr,2) +#define armNot4ByteAligned(Ptr) armNotByteAligned(Ptr,4) +#define armNot8ByteAligned(Ptr) armNotByteAligned(Ptr,8) +#define armNot16ByteAligned(Ptr) armNotByteAligned(Ptr,16) +#define armNot32ByteAligned(Ptr) armNotByteAligned(Ptr,32) + +/** + * Function: armRoundFloatToS16_ref/armRoundFloatToS32_ref/armRoundFloatToS64 + * + * Description: + * Converts a double precision value into a short int/int after rounding + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16/OMX_S32 format + * + */ + +OMX_S16 armRoundFloatToS16 (OMX_F64 Value); +OMX_S32 armRoundFloatToS32 (OMX_F64 Value); +OMX_S64 armRoundFloatToS64 (OMX_F64 Value); + +/** + * Function: armSatRoundFloatToS16_ref/armSatRoundFloatToS32 + * + * Description: + * Converts a double precision value into a short int/int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_S16/OMX_S32 format + * + */ + +OMX_S16 armSatRoundFloatToS16 (OMX_F64 Value); +OMX_S32 armSatRoundFloatToS32 (OMX_F64 Value); + +/** + * Function: armSatRoundFloatToU16_ref/armSatRoundFloatToU32 + * + * Description: + * Converts a double precision value into a unsigned short int/int after rounding and saturation + * + * Parameters: + * [in] Value Float value to be converted + * + * Return Value: + * [out] converted value in OMX_U16/OMX_U32 format + * + */ + +OMX_U16 armSatRoundFloatToU16 (OMX_F64 Value); +OMX_U32 armSatRoundFloatToU32 (OMX_F64 Value); + +/** + * Function: armSignCheck + * + * Description: + * Checks the sign of a variable: + * returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + * + * Remarks: + * + * Parameters: + * [in] var Variable to be checked + * + * Return Value: + * OMX_INT -- returns 1 if it is Positive + * returns 0 if it is 0 + * returns -1 if it is Negative + */ + +OMX_INT armSignCheck (OMX_S16 var); + +/** + * Function: armClip + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_S32 -- returns clipped value + */ + +OMX_S32 armClip ( + OMX_INT min, + OMX_INT max, + OMX_S32 src + ); + +/** + * Function: armClip_F32 + * + * Description: Clips the input between MAX and MIN value + * + * + * Remarks: + * + * Parameters: + * [in] Min lower bound + * [in] Max upper bound + * [in] src variable to the clipped + * + * Return Value: + * OMX_F32 -- returns clipped value + */ + +OMX_F32 armClip_F32 ( + OMX_F32 min, + OMX_F32 max, + OMX_F32 src + ); + +/** + * Function: armShiftSat_F32 + * + * Description: Divides a float value by 2^shift and + * saturates it for unsigned value range for satBits. + * Second parameter is like "shifting" the corresponding + * integer value. Takes care of rounding while clipping the final + * value. + * + * Parameters: + * [in] v Number to be operated upon + * [in] shift Divides the input "v" by "2^shift" + * [in] satBits Final range is [0, 2^satBits) + * + * Return Value: + * OMX_S32 -- returns "shifted" saturated value + */ + +OMX_U32 armShiftSat_F32( + OMX_F32 v, + OMX_INT shift, + OMX_INT satBits + ); + +/** + * Functions: armSwapElem + * + * Description: + * This function swaps two elements at the specified pointer locations. + * The size of each element could be anything as specified by <elemSize> + * + * Return Value: + * OMXResult -- Error status from the function + */ +OMXResult armSwapElem(OMX_U8 *pBuf1, OMX_U8 *pBuf2, OMX_INT elemSize); + + +/** + * Function: armMedianOf3 + * + * Description: Finds the median of three numbers + * + * Remarks: + * + * Parameters: + * [in] fEntry First entry + * [in] sEntry second entry + * [in] tEntry Third entry + * + * Return Value: + * OMX_S32 -- returns the median value + */ + +OMX_S32 armMedianOf3 ( + OMX_S32 fEntry, + OMX_S32 sEntry, + OMX_S32 tEntry + ); + +/** + * Function: armLogSize + * + * Description: Finds the size of a positive value and returns the same + * + * Remarks: + * + * Parameters: + * [in] value Positive value + * + * Return Value: + * OMX_U8 -- returns the size of the positive value + */ + +OMX_U8 armLogSize ( + OMX_U16 value + ); + +/***********************************************************************/ + /* Saturating Arithmetic operations */ + +/** + * Function :armSatAdd_S32() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S32 armSatAdd_S32( + OMX_S32 Value1, + OMX_S32 Value2 + ); + +/** + * Function :armSatAdd_S64() + * + * Description : + * Returns the result of saturated addition of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + * + **/ + +OMX_S64 armSatAdd_S64( + OMX_S64 Value1, + OMX_S64 Value2 + ); + +/** Function :armSatSub_S32() + * + * Description : + * Returns the result of saturated substraction of the two inputs Value1, Value2 + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatSub_S32( + OMX_S32 Value1, + OMX_S32 Value2 + ); + +/** + * Function :armSatMac_S32() + * + * Description : + * Returns the result of Multiplication of Value1 and Value2 and subesquent saturated + * accumulation with Mac + * + * Parametrs: + * [in] Value1 First Operand + * [in] Value2 Second Operand + * [in] Mac Accumulator + * + * Return: + * [out] Result of operation + **/ + +OMX_S32 armSatMac_S32( + OMX_S32 Mac, + OMX_S16 Value1, + OMX_S16 Value2 + ); + +/** + * Function :armSatMac_S16S32_S32 + * + * Description : + * Returns the result of saturated MAC operation of the three inputs delayElem, filTap , mac + * + * mac = mac + Saturate_in_32Bits(delayElem * filTap) + * + * Parametrs: + * [in] delayElem First 32 bit Operand + * [in] filTap Second 16 bit Operand + * [in] mac Result of MAC operation + * + * Return: + * [out] mac Result of operation + * + **/ + +OMX_S32 armSatMac_S16S32_S32( + OMX_S32 mac, + OMX_S32 delayElem, + OMX_S16 filTap ); + +/** + * Function :armSatRoundRightShift_S32_S16 + * + * Description : + * Returns the result of rounded right shift operation of input by the scalefactor + * + * output = Saturate_in_16Bits( ( RightShift( (Round(input) , scaleFactor ) ) + * + * Parametrs: + * [in] input The input to be operated on + * [in] scaleFactor The shift number + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S16 armSatRoundRightShift_S32_S16( + OMX_S32 input, + OMX_INT scaleFactor); + +/** + * Function :armSatRoundLeftShift_S32() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatRoundLeftShift_S32( + OMX_S32 Value, + OMX_INT shift + ); + +/** + * Function :armSatRoundLeftShift_S64() + * + * Description : + * Returns the result of saturating left-shift operation on input + * Or rounded Right shift if the input Shift is negative. + * + * Parametrs: + * [in] Value Operand + * [in] shift Operand for shift operation + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S64 armSatRoundLeftShift_S64( + OMX_S64 Value, + OMX_INT shift + ); + +/** + * Function :armSatMulS16S32_S32() + * + * Description : + * Returns the result of a S16 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + + +OMX_S32 armSatMulS16S32_S32( + OMX_S16 input1, + OMX_S32 input2); + +/** + * Function :armSatMulS32S32_S32() + * + * Description : + * Returns the result of a S32 data type multiplied with an S32 data type + * in a S32 container + * + * Parametrs: + * [in] input1 Operand 1 + * [in] input2 Operand 2 + * + * Return: + * [out] Result of operation + * + **/ + +OMX_S32 armSatMulS32S32_S32( + OMX_S32 input1, + OMX_S32 input2); + + +/** + * Function :armIntDivAwayFromZero() + * + * Description : Integer division with rounding to the nearest integer. + * Half-integer values are rounded away from zero + * unless otherwise specified. For example 3//2 is rounded + * to 2, and -3//2 is rounded to -2. + * + * Parametrs: + * [in] Num Operand 1 + * [in] Deno Operand 2 + * + * Return: + * [out] Result of operation input1//input2 + * + **/ + +OMX_S32 armIntDivAwayFromZero (OMX_S32 Num, OMX_S32 Deno); + + +/***********************************************************************/ +/* + * Debugging macros + * + */ + + +/* + * Definition of output stream - change to stderr if necessary + */ +#define DEBUG_STREAM stdout + +/* + * Debug printf macros, one for each argument count. + * Add more if needed. + */ +#ifdef DEBUG_ON +#include <stdio.h> + +#define DEBUG_PRINTF_0(a) fprintf(DEBUG_STREAM, a) +#define DEBUG_PRINTF_1(a, b) fprintf(DEBUG_STREAM, a, b) +#define DEBUG_PRINTF_2(a, b, c) fprintf(DEBUG_STREAM, a, b, c) +#define DEBUG_PRINTF_3(a, b, c, d) fprintf(DEBUG_STREAM, a, b, c, d) +#define DEBUG_PRINTF_4(a, b, c, d, e) fprintf(DEBUG_STREAM, a, b, c, d, e) +#define DEBUG_PRINTF_5(a, b, c, d, e, f) fprintf(DEBUG_STREAM, a, b, c, d, e, f) +#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g) +#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h) +#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i) +#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j) +#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k) +#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l) +#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m) +#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) fprintf(DEBUG_STREAM, a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#else /* DEBUG_ON */ +#define DEBUG_PRINTF_0(a) +#define DEBUG_PRINTF_1(a, b) +#define DEBUG_PRINTF_2(a, b, c) +#define DEBUG_PRINTF_3(a, b, c, d) +#define DEBUG_PRINTF_4(a, b, c, d, e) +#define DEBUG_PRINTF_5(a, b, c, d, e, f) +#define DEBUG_PRINTF_6(a, b, c, d, e, f, g) +#define DEBUG_PRINTF_7(a, b, c, d, e, f, g, h) +#define DEBUG_PRINTF_8(a, b, c, d, e, f, g, h, i) +#define DEBUG_PRINTF_9(a, b, c, d, e, f, g, h, i, j) +#define DEBUG_PRINTF_10(a, b, c, d, e, f, g, h, i, j, k) +#define DEBUG_PRINTF_11(a, b, c, d, e, f, g, h, i, j, k, l) +#define DEBUG_PRINTF_12(a, b, c, d, e, f, g, h, i, j, k, l, m) +#define DEBUG_PRINTF_13(a, b, c, d, e, f, g, h, i, j, k, l, m, n) +#define DEBUG_PRINTF_14(a, b, c, d, e, f, g, h, i, j, k, l, m, n, o) +#endif /* DEBUG_ON */ + + +/* + * Domain and sub domain definitions + * + * In order to turn on debug for an entire domain or sub-domain + * at compile time, one of the DEBUG_DOMAIN_* below may be defined, + * which will activate debug in all of the defines it contains. + */ + +#ifdef DEBUG_DOMAIN_AC +#define DEBUG_OMXACAAC_DECODECHANPAIRELT_MPEG4 +#define DEBUG_OMXACAAC_DECODECHANPAIRELT +#define DEBUG_OMXACAAC_DECODEDATSTRELT +#define DEBUG_OMXACAAC_DECODEFILLELT +#define DEBUG_OMXACAAC_DECODEISSTEREO_S32 +#define DEBUG_OMXACAAC_DECODEMSPNS_S32 +#define DEBUG_OMXACAAC_DECODEMSSTEREO_S32_I +#define DEBUG_OMXACAAC_DECODEPRGCFGELT +#define DEBUG_OMXACAAC_DECODETNS_S32_I +#define DEBUG_OMXACAAC_DEINTERLEAVESPECTRUM_S32 +#define DEBUG_OMXACAAC_ENCODETNS_S32_I +#define DEBUG_OMXACAAC_LONGTERMPREDICT_S32 +#define DEBUG_OMXACAAC_LONGTERMRECONSTRUCT_S32 +#define DEBUG_OMXACAAC_MDCTFWD_S32 +#define DEBUG_OMXACAAC_MDCTINV_S32_S16 +#define DEBUG_OMXACAAC_NOISELESSDECODE +#define DEBUG_OMXACAAC_QUANTINV_S32_I +#define DEBUG_OMXACAAC_UNPACKADIFHEADER +#define DEBUG_OMXACAAC_UNPACKADTSFRAMEHEADER +#define DEBUG_OMXACMP3_HUFFMANDECODESFBMBP_S32 +#define DEBUG_OMXACMP3_HUFFMANDECODESFB_S32 +#define DEBUG_OMXACMP3_HUFFMANDECODE_S32 +#define DEBUG_OMXACMP3_MDCTINV_S32 +#define DEBUG_OMXACMP3_REQUANTIZESFB_S32_I +#define DEBUG_OMXACMP3_REQUANTIZE_S32_I +#define DEBUG_OMXACMP3_SYNTHPQMF_S32_S16 +#define DEBUG_OMXACMP3_UNPACKFRAMEHEADER +#define DEBUG_OMXACMP3_UNPACKSCALEFACTORS_S8 +#define DEBUG_OMXACMP3_UNPACKSIDEINFO +#endif /* DEBUG_DOMAIN_AC */ + + +#ifdef DEBUG_DOMAIN_VC +#define DEBUG_OMXVCM4P10_AVERAGE_16X +#define DEBUG_OMXVCM4P10_AVERAGE_4X +#define DEBUG_OMXVCM4P10_AVERAGE_8X +#define DEBUG_OMXVCM4P10_DEBLOCKCHROMA_U8_C1IR +#define DEBUG_OMXVCM4P10_DEBLOCKLUMA_U8_C1IR +#define DEBUG_OMXVCM4P10_DECODECHROMADCCOEFFSTOPAIRCAVLC_U8 +#define DEBUG_OMXVCM4P10_DECODECOEFFSTOPAIRCAVLC_U8 +#define DEBUG_OMXVCM4P10_DEQUANTTRANSFORMACFROMPAIR_U8_S16_C1_DLX +#define DEBUG_OMXVCM4P10_EXPANDFRAME +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_HOREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGCHROMA_VEREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_HOREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_FILTERDEBLOCKINGLUMA_VEREDGE_U8_C1IR +#define DEBUG_OMXVCM4P10_PREDICTINTRACHROMA8X8_U8_C1R +#define DEBUG_OMXVCM4P10_PREDICTINTRA_16X16_U8_C1R +#define DEBUG_OMXVCM4P10_PREDICTINTRA_4X4_U8_C1R +#define DEBUG_OMXVCM4P10_SADQUAR_16X +#define DEBUG_OMXVCM4P10_SADQUAR_4X +#define DEBUG_OMXVCM4P10_SADQUAR_8X +#define DEBUG_OMXVCM4P10_SAD_16X +#define DEBUG_OMXVCM4P10_SAD_4X +#define DEBUG_OMXVCM4P10_SAD_8X +#define DEBUG_OMXVCM4P10_SATD_4X4 +#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTCHROMADCFROMPAIR_U8_S16_C1 +#define DEBUG_OMXVCM4P10_TRANSFORMDEQUANTLUMADCFROMPAIR_U8_S16_C1 +#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_CHROMADC +#define DEBUG_OMXVCM4P10_TRANSFORMQUANT_LUMADC +#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_16X16 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_HALF_8X8 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_16X16 +#define DEBUG_OMXVCM4P2_BLOCKMATCH_INTEGER_8X8 +#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_SAD_U8_S16 +#define DEBUG_OMXVCM4P2_COMPUTETEXTUREERRORBLOCK_U8_S16 +#define DEBUG_OMXVCM4P2_DCT8X8BLKDLX +#define DEBUG_OMXVCM4P2_DECODEBLOCKCOEF_INTER_S16 +#define DEBUG_OMXVCM4P2_DECODEPADMV_PVOP +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTER_S16 +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRAACVLC_S16 +#define DEBUG_OMXVCM4P2_DECODEVLCZIGZAG_INTRADCVLC_S16 +#define DEBUG_OMXVCM4P2_ENCODEMV_U8_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTER_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRAACVLC_S16 +#define DEBUG_OMXVCM4P2_ENCODEVLCZIGZAG_INTRADCVLC_S16 +#define DEBUG_OMXVCM4P2_FINDMVPRED +#define DEBUG_OMXVCM4P2_IDCT8X8BLKDLX +#define DEBUG_OMXVCM4P2_LIMITMVTORECT +#define DEBUG_OMXVCM4P2_MOTIONESTIMATIONMB +#define DEBUG_OMXVCM4P2_PADMBGRAY_U8 +#define DEBUG_OMXVCM4P2_PADMBHORIZONTAL_U8 +#define DEBUG_OMXVCM4P2_PADMBVERTICAL_U8 +#define DEBUG_OMXVCM4P2_PADMV +#define DEBUG_OMXVCM4P2_QUANTINTER_S16_I +#define DEBUG_OMXVCM4P2_QUANTINTRA_S16_I +#define DEBUG_OMXVCM4P2_QUANTINVINTER_S16_I +#define DEBUG_OMXVCM4P2_QUANTINVINTRA_S16_I +#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTER +#define DEBUG_OMXVCM4P2_TRANSRECBLOCKCEOF_INTRA +#endif /* DEBUG_DOMAIN_VC */ + + +#ifdef DEBUG_DOMAIN_IC +/* To be filled in */ +#endif /* DEBUG_DOMAIN_IC */ + + +#ifdef DEBUG_DOMAIN_SP +#define DEBUG_OMXACSP_DOTPROD_S16 +#define DEBUG_OMXACSP_BLOCKEXP_S16 +#define DEBUG_OMXACSP_BLOCKEXP_S32 +#define DEBUG_OMXACSP_COPY_S16 +#define DEBUG_OMXACSP_DOTPROD_S16 +#define DEBUG_OMXACSP_DOTPROD_S16_SFS +#define DEBUG_OMXACSP_FFTFWD_CTOC_SC16_SFS +#define DEBUG_OMXACSP_FFTFWD_CTOC_SC32_SFS +#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S16S32_SFS +#define DEBUG_OMXACSP_FFTFWD_RTOCCS_S32_SFS +#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC16 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_C_SC32 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S16_S32 +#define DEBUG_OMXACSP_FFTGETBUFSIZE_R_S32 +#define DEBUG_OMXACSP_FFTINIT_C_SC16 +#define DEBUG_OMXACSP_FFTINIT_C_SC32 +#define DEBUG_OMXACSP_FFTINIT_R_S16_S32 +#define DEBUG_OMXACSP_FFTINIT_R_S32 +#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32S16_SFS +#define DEBUG_OMXACSP_FFTINV_CCSTOR_S32_SFS +#define DEBUG_OMXACSP_FFTINV_CTOC_SC16_SFS +#define DEBUG_OMXACSP_FFTINV_CTOC_SC32_SFS +#define DEBUG_OMXACSP_FILTERMEDIAN_S32_I +#define DEBUG_OMXACSP_FILTERMEDIAN_S32 +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_ISFS +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_I +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16 +#define DEBUG_OMXACSP_FIRONE_DIRECT_S16_SFS +#define DEBUG_OMXACSP_FIR_DIRECT_S16_ISFS +#define DEBUG_OMXACSP_FIR_DIRECT_S16_I +#define DEBUG_OMXACSP_FIR_DIRECT_S16 +#define DEBUG_OMXACSP_FIR_DIRECT_S16_SFS +#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16_I +#define DEBUG_OMXACSP_IIRONE_BIQUADDIRECT_S16 +#define DEBUG_OMXACSP_IIRONE_DIRECT_S16_I +#define DEBUG_OMXACSP_IIRONE_DIRECT_S16 +#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16_I +#define DEBUG_OMXACSP_IIR_BIQUADDIRECT_S16 +#define DEBUG_OMXACSP_IIR_DIRECT_S16_I +#define DEBUG_OMXACSP_IIR_DIRECT_S16 +#endif /* DEBUG_DOMAIN_SP */ + + +#ifdef DEBUG_DOMAIN_IP +#define DEBUG_OMXIPBM_ADDC_U8_C1R_SFS +#define DEBUG_OMXIPBM_COPY_U8_C1R +#define DEBUG_OMXIPBM_COPY_U8_C3R +#define DEBUG_OMXIPBM_MIRROR_U8_C1R +#define DEBUG_OMXIPBM_MULC_U8_C1R_SFS +#define DEBUG_OMXIPCS_COLORTWISTQ14_U8_C3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR420LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR422LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGB565TOYCBCR444LS_MCU_U16_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR420LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR422LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_RGBTOYCBCR444LS_MCU_U8_S16_C3P3R +#define DEBUG_OMXIPCS_YCBCR420RSZROT_U8_P3R +#define DEBUG_OMXIPCS_YCBCR420TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR420TORGB565_U8_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR420TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCR422RSZCSCROTRGB_U8_C2R +#define DEBUG_OMXIPCS_YCBCR422RSZROT_U8_P3R +#define DEBUG_OMXIPCS_YCBCR422TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR422TORGB565_U8_U16_C2C3R +#define DEBUG_OMXIPCS_YCBCR422TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCR422TORGB_U8_C2C3R +#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_C2P3R +#define DEBUG_OMXIPCS_YCBCR422TOYCBCR420ROTATE_U8_P3R +#define DEBUG_OMXIPCS_YCBCR444TORGB565LS_MCU_S16_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCR444TORGBLS_MCU_S16_U8_P3C3R +#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_C3R +#define DEBUG_OMXIPCS_YCBCRTORGB565_U8_U16_P3C3R +#define DEBUG_OMXIPCS_YCBCRTORGB_U8_C3R +#define DEBUG_OMXIPPP_GETCENTRALMOMENT_S64 +#define DEBUG_OMXIPPP_GETSPATIALMOMENT_S64 +#define DEBUG_OMXIPPP_MOMENTGETSTATESIZE_S64 +#define DEBUG_OMXIPPP_MOMENTINIT_S64 +#define DEBUG_OMXIPPP_MOMENTS64S_U8_C1R +#define DEBUG_OMXIPPP_MOMENTS64S_U8_C3R +#endif /* DEBUG_DOMAIN_IP */ + + +#endif /* _armCommon_H_ */ + +/*End of File*/ + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h new file mode 100755 index 0000000..c738f72 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_BitDec_s.h @@ -0,0 +1,670 @@ +;// +;// +;// File Name: armCOMM_BitDec_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// OpenMAX optimized bitstream decode module +;// +;// You must include armCOMM_s.h before including this file +;// +;// This module provides macros to perform assembly optimized fixed and +;// variable length decoding from a read-only bitstream. The variable +;// length decode modules take as input a pointer to a table of 16-bit +;// entries of the following format. +;// +;// VLD Table Entry format +;// +;// 15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00 +;// +------------------------------------------------+ +;// | Len | Symbol | 1 | +;// +------------------------------------------------+ +;// | Offset | 0 | +;// +------------------------------------------------+ +;// +;// If the table entry is a leaf entry then bit 0 set: +;// Len = Number of bits overread (0 to 7) +;// Symbol = Symbol payload (unsigned 12 bits) +;// +;// If the table entry is an internal node then bit 0 is clear: +;// Offset = Number of (16-bit) half words from the table +;// start to the next table node +;// +;// The table is accessed by successive lookup up on the +;// next Step bits of the input bitstream until a leaf node +;// is obtained. The Step sizes are supplied to the VLD macro. +;// +;// USAGE: +;// +;// To use any of the macros in this package, first call: +;// +;// M_BD_INIT ppBitStream, pBitOffset, pBitStream, RBitBuffer, RBitCount, Tmp +;// +;// This caches the current bitstream position and next available +;// bits in registers pBitStream, RBitBuffer, RBitCount. These registers +;// are reserved for use by the bitstream decode package until you +;// call M_BD_FINI. +;// +;// Next call the following macro(s) as many times as you need: +;// +;// M_BD_LOOK8 - Look ahead constant 1<=N<=8 bits into the bitstream +;// M_BD_LOOK16 - Look ahead constant 1<=N<=16 bits into the bitstream +;// M_BD_READ8 - Read constant 1<=N<=8 bits from the bitstream +;// M_BD_READ16 - Read constant 1<=N<=16 bits from the bitstream +;// M_BD_VREAD8 - Read variable 1<=N<=8 bits from the bitstream +;// M_BD_VREAD16 - Read variable 1<=N<=16 bits from the bitstream +;// M_BD_VLD - Perform variable length decode using lookup table +;// +;// Finally call the macro: +;// +;// M_BD_FINI ppBitStream, pBitOffset +;// +;// This writes the bitstream state back to memory. +;// +;// The three bitstream cache register names are assigned to the following global +;// variables: +;// + + GBLS pBitStream ;// Register name for pBitStream + GBLS BitBuffer ;// Register name for BitBuffer + GBLS BitCount ;// Register name for BitCount + +;// +;// These register variables must have a certain defined state on entry to every bitstream +;// macro (except M_BD_INIT) and on exit from every bitstream macro (except M_BD_FINI). +;// The state may depend on implementation. +;// +;// For the default (ARM11) implementation the following hold: +;// pBitStream - points to the first byte not held in the BitBuffer +;// BitBuffer - is a cache of (4 bytes) 32 bits, bit 31 the first bit +;// BitCount - is offset (from the top bit) to the next unused bitstream bit +;// 0<=BitCount<=15 (so BitBuffer holds at least 17 unused bits) +;// +;// + + ;// Bitstream Decode initialise + ;// + ;// Initialises the bitstream decode global registers from + ;// bitstream pointers. This macro is split into 3 parts to enable + ;// scheduling. + ;// + ;// Input Registers: + ;// + ;// $ppBitStream - pointer to pointer to the next bitstream byte + ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7) + ;// $RBitStream - register to use for pBitStream (can be $ppBitStream) + ;// $RBitBuffer - register to use for BitBuffer + ;// $RBitCount - register to use for BitCount (can be $pBitOffset) + ;// + ;// Output Registers: + ;// + ;// $T1,$T2,$T3 - registers that must be preserved between calls to + ;// M_BD_INIT1 and M_BD_INIT2 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_INIT0 $ppBitStream, $pBitOffset, $RBitStream, $RBitBuffer, $RBitCount + +pBitStream SETS "$RBitStream" +BitBuffer SETS "$RBitBuffer" +BitCount SETS "$RBitCount" + + ;// load inputs + LDR $pBitStream, [$ppBitStream] + LDR $BitCount, [$pBitOffset] + MEND + + MACRO + M_BD_INIT1 $T1, $T2, $T3 + LDRB $T2, [$pBitStream, #2] + LDRB $T1, [$pBitStream, #1] + LDRB $BitBuffer, [$pBitStream], #3 + ADD $BitCount, $BitCount, #8 + MEND + + MACRO + M_BD_INIT2 $T1, $T2, $T3 + ORR $T2, $T2, $T1, LSL #8 + ORR $BitBuffer, $T2, $BitBuffer, LSL #16 + MEND + + ;// + ;// Look ahead fixed 1<=N<=8 bits without consuming any bits + ;// The next bits will be placed at bit 31..24 of destination register + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to look + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_LOOK8 $Symbol, $N + ASSERT ($N>=1):LAND:($N<=8) + MOV $Symbol, $BitBuffer, LSL $BitCount + MEND + + ;// + ;// Look ahead fixed 1<=N<=16 bits without consuming any bits + ;// The next bits will be placed at bit 31..16 of destination register + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to look + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_LOOK16 $Symbol, $N, $T1 + ASSERT ($N >= 1):LAND:($N <= 16) + MOV $Symbol, $BitBuffer, LSL $BitCount + MEND + + ;// + ;// Skips fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_SKIP8 $N, $T1 + ASSERT ($N>=1):LAND:($N<=8) + SUBS $BitCount, $BitCount, #(8-$N) + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Read fixed 1<=N<=8 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_READ8 $Symbol, $N, $T1 + ASSERT ($N>=1):LAND:($N<=8) + MOVS $Symbol, $BitBuffer, LSL $BitCount + SUBS $BitCount, $BitCount, #(8-$N) + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR #(32-$N) + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Read fixed 1<=N<=16 bits from the bitstream, advancing the bitstream pointer + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_READ16 $Symbol, $N, $T1, $T2 + ASSERT ($N>=1):LAND:($N<=16) + ASSERT $Symbol<>$T1 + IF ($N<=8) + M_BD_READ8 $Symbol, $N, $T1 + ELSE + ;// N>8 so we will be able to refill at least one byte + LDRB $T1, [$pBitStream], #1 + MOVS $Symbol, $BitBuffer, LSL $BitCount + ORR $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBS $BitCount, $BitCount, #(16-$N) + LDRCSB $T1, [$pBitStream], #1 + MOV $Symbol, $Symbol, LSR #(32-$N) + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + ENDIF + MEND + + ;// + ;// Skip variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits. 1<=N<=8 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VSKIP8 $N, $T1 + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Skip variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits. 1<=N<=16 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VSKIP16 $N, $T1, $T2 + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Read variable 1<=N<=8 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read. 1<=N<=8 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VREAD8 $Symbol, $N, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + RSB $T2, $N, #32 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR $T2 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Read variable 1<=N<=16 bits from the bitstream, advancing the bitstream pointer. + ;// + ;// Input Registers: + ;// + ;// $N - number of bits to read. 1<=N<=16 + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the next N bits of the bitstream + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VREAD16 $Symbol, $N, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + ADD $BitCount, $BitCount, $N + SUBS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + RSB $T2, $N, #32 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + MOV $Symbol, $Symbol, LSR $T2 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Decode a code of the form 0000...001 where there + ;// are N zeros before the 1 and N<=15 (code length<=16) + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the number of zeros before the next 1 + ;// >=16 is an illegal code + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_CLZ16 $Symbol, $T1, $T2 + MOVS $Symbol, $BitBuffer, LSL $BitCount + CLZ $Symbol, $Symbol + ADD $BitCount, $BitCount, $Symbol + SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + ;// + ;// Decode a code of the form 1111...110 where there + ;// are N ones before the 0 and N<=15 (code length<=16) + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - the number of zeros before the next 1 + ;// >=16 is an illegal code + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_CLO16 $Symbol, $T1, $T2 + MOV $Symbol, $BitBuffer, LSL $BitCount + MVN $Symbol, $Symbol + CLZ $Symbol, $Symbol + ADD $BitCount, $BitCount, $Symbol + SUBS $BitCount, $BitCount, #7 ;// length is Symbol+1 + LDRCSB $T1, [$pBitStream], #1 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + SUBCSS $BitCount, $BitCount, #8 + LDRCSB $T1, [$pBitStream], #1 + ADDCC $BitCount, $BitCount, #8 + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 + MEND + + + ;// + ;// Variable Length Decode module + ;// + ;// Decodes one VLD Symbol from a bitstream and refill the bitstream + ;// buffer. + ;// + ;// Input Registers: + ;// + ;// $pVLDTable - pointer to VLD decode table of 16-bit entries. + ;// The format is described above at the start of + ;// this file. + ;// $S0 - The number of bits to look up for the first step + ;// 1<=$S0<=8 + ;// $S1 - The number of bits to look up for each subsequent + ;// step 1<=$S1<=$S0. + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $Symbol - decoded VLD symbol value + ;// $T1 - corrupted temp/scratch register + ;// $T2 - corrupted temp/scratch register + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_VLD $Symbol, $T1, $T2, $pVLDTable, $S0, $S1 + ASSERT (1<=$S0):LAND:($S0<=8) + ASSERT (1<=$S1):LAND:($S1<=$S0) + + ;// Note 0<=BitCount<=15 on entry and exit + + MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bits + MOVS $Symbol, #(2<<$S0)-2 ;// create mask + AND $Symbol, $Symbol, $T1, LSR #(31-$S0) ;// 2*(next $S0 bits) + SUBS $BitCount, $BitCount, #8 ;// CS if buffer can be filled +01 + LDRCSB $T1, [$pBitStream], #1 ;// load refill byte + LDRH $Symbol, [$pVLDTable, $Symbol] ;// load table entry + ADDCC $BitCount, $BitCount, #8 ;// refill not possible + ADD $BitCount, $BitCount, #$S0 ;// assume $S0 bits used + ORRCS $BitBuffer, $T1, $BitBuffer, LSL #8 ;// merge in refill byte + MOVS $T1, $Symbol, LSR #1 ;// CS=leaf entry + BCS %FT02 + + MOVS $T1, $BitBuffer, LSL $BitCount ;// left align next bit + IF (2*$S0-$S1<=8) + ;// Can combine refill check and -S0+S1 and keep $BitCount<=15 + SUBS $BitCount, $BitCount, #8+($S0-$S1) + ELSE + ;// Separate refill check and -S0+S1 offset + SUBS $BitCount, $BitCount, #8 + SUB $BitCount, $BitCount, #($S0-$S1) + ENDIF + ADD $Symbol, $Symbol, $T1, LSR #(31-$S1) ;// add 2*(next $S1 bits) to + BIC $Symbol, $Symbol, #1 ;// table offset + B %BT01 ;// load next table entry +02 + ;// BitCount range now depend on the route here + ;// if (first step) S0 <= BitCount <= 7+S0 <=15 + ;// else if (2*S0-S1<=8) S0 <= BitCount <= 7+(2*S0-S1) <=15 + ;// else S1 <= BitCount <= 7+S1 <=15 + + SUB $BitCount, $BitCount, $Symbol, LSR#13 + BIC $Symbol, $T1, #0xF000 + MEND + + + ;// Add an offset number of bits + ;// + ;// Outputs destination byte and bit index values which corresponds to an offset number of bits + ;// from the current location. This is used to compare bitstream positions using. M_BD_CMP. + ;// + ;// Input Registers: + ;// + ;// $Offset - Offset to be added in bits. + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $ByteIndex - Destination pBitStream pointer after adding the Offset. + ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact + ;// pointer (as in M_BD_FINI). But for using with M_BD_CMP subtract is not needed. + ;// $BitIndex - Destination BitCount after the addition of Offset number of bits + ;// + MACRO + M_BD_ADD $ByteIndex, $BitIndex, $Offset + + ;// ($ByteIndex,$BitIndex) = Current position + $Offset bits + ADD $Offset, $Offset, $BitCount + AND $BitIndex, $Offset, #7 + ADD $ByteIndex, $pBitStream, $Offset, ASR #3 + MEND + + ;// Move bitstream pointers to the location given + ;// + ;// Outputs destination byte and bit index values which corresponds to + ;// the current location given (calculated using M_BD_ADD). + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// $ByteIndex - Destination pBitStream pointer after move. + ;// This value will be 4 byte ahead and needs to subtract by 4 to get exact + ;// pointer (as in M_BD_FINI). + ;// $BitIndex - Destination BitCount after the move + ;// + ;// Output Registers: + ;// + ;// $pBitStream \ + ;// } See description above. + ;// $BitCount / + ;// + MACRO + M_BD_MOV $ByteIndex, $BitIndex + + ;// ($pBitStream, $Offset) = ($ByteIndex,$BitIndex) + MOV $BitCount, $BitIndex + MOV $pBitStream, $ByteIndex + MEND + + ;// Bitstream Compare + ;// + ;// Compares bitstream position with that of a destination position. Destination position + ;// is held in two input registers which are calculated using M_BD_ADD macro + ;// + ;// Input Registers: + ;// + ;// $ByteIndex - Destination pBitStream pointer, (4 byte ahead as described in M_BD_ADD) + ;// $BitIndex - Destination BitCount + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// FLAGS - GE if destination is reached, LT = is destination is ahead + ;// $T1 - corrupted temp/scratch register + ;// + MACRO + M_BD_CMP $ByteIndex, $BitIndex, $T1 + + ;// Return flags set by (current positon)-($ByteIndex,$BitIndex) + ;// so GE means that we have reached the indicated position + + ADD $T1, $pBitStream, $BitCount, LSR #3 + CMP $T1, $ByteIndex + AND $T1, $BitCount, #7 + CMPEQ $T1, $BitIndex + MEND + + + ;// Bitstream Decode finalise + ;// + ;// Writes back the bitstream state to the bitstream pointers + ;// + ;// Input Registers: + ;// + ;// $pBitStream \ + ;// $BitBuffer } See description above. + ;// $BitCount / + ;// + ;// Output Registers: + ;// + ;// $ppBitStream - pointer to pointer to the next bitstream byte + ;// $pBitOffset - pointer to the number of bits used in the current byte (0..7) + ;// $pBitStream \ + ;// $BitBuffer } these register are corrupted + ;// $BitCount / + ;// + MACRO + M_BD_FINI $ppBitStream, $pBitOffset + + ;// Advance pointer by the number of free bits in the buffer + ADD $pBitStream, $pBitStream, $BitCount, LSR#3 + AND $BitCount, $BitCount, #7 + + ;// Now move back 32 bits to reach the first usued bit + SUB $pBitStream, $pBitStream, #4 + + ;// Store out bitstream state + STR $BitCount, [$pBitOffset] + STR $pBitStream, [$ppBitStream] + MEND + + END +
\ No newline at end of file diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h new file mode 100755 index 0000000..b699034 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Bitstream.h @@ -0,0 +1,212 @@ +/** + * + * File Name: armCOMM_Bitstream.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File: armCOMM_Bitstream.h + * Brief: Declares common API's/Data types used across the OpenMax Encoders/Decoders. + * + */ + +#ifndef _armCodec_H_ +#define _armCodec_H_ + +#include "omxtypes.h" + +typedef struct { + OMX_U8 codeLen; + OMX_U32 codeWord; +} ARM_VLC32; + +/* The above should be renamed as "ARM_VLC32" */ + +/** + * Function: armLookAheadBits() + * + * Description: + * Get the next N bits from the bitstream without advancing the bitstream pointer + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] N=1...32 + * + * Returns Value + */ + +OMX_U32 armLookAheadBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N); + +/** + * Function: armGetBits() + * + * Description: + * Read N bits from the bitstream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N=1..32 + * + * [out] *ppBitStream + * [out] *pOffset + * Returns Value + */ + +OMX_U32 armGetBits(const OMX_U8 **ppBitStream, OMX_INT *pOffset, OMX_INT N); + +/** + * Function: armByteAlign() + * + * Description: + * Align the pointer *ppBitStream to the next byte boundary + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armByteAlign(const OMX_U8 **ppBitStream,OMX_INT *pOffset); + +/** + * Function: armSkipBits() + * + * Description: + * Skip N bits from the value at *ppBitStream + * + * Parameters: + * [in] *ppBitStream + * [in] *pOffset + * [in] N + * + * [out] *ppBitStream + * [out] *pOffset + * + **/ + +OMXVoid armSkipBits(const OMX_U8 **ppBitStream,OMX_INT *pOffset,OMX_INT N); + +/*************************************** + * Variable bit length Decode + ***************************************/ + +/** + * Function: armUnPackVLC32() + * + * Description: + * Variable length decode of variable length symbol (max size 32 bits) read from + * the bit stream pointed by *ppBitStream at *pOffset by using the table + * pointed by pCodeBook + * + * Parameters: + * [in] **ppBitStream + * [in] *pOffset + * [in] pCodeBook + * + * [out] **ppBitStream + * [out] *pOffset + * + * Returns : Code Book Index if successfull. + * : "ARM_NO_CODEBOOK_INDEX = 0xFFFF" if search fails. + **/ + +#define ARM_NO_CODEBOOK_INDEX (OMX_U16)(0xFFFF) + +OMX_U16 armUnPackVLC32( + const OMX_U8 **ppBitStream, + OMX_INT *pOffset, + const ARM_VLC32 *pCodeBook +); + +/*************************************** + * Fixed bit length Encode + ***************************************/ + +/** + * Function: armPackBits + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] codeWord Code word that need to be inserted in to the + * bitstream + * [in] codeLength Length of the code word valid range 1...32 + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackBits ( + OMX_U8 **ppBitStream, + OMX_INT *pOffset, + OMX_U32 codeWord, + OMX_INT codeLength +); + +/*************************************** + * Variable bit length Encode + ***************************************/ + +/** + * Function: armPackVLC32 + * + * Description: + * Pack a VLC code word into the bitstream + * + * Remarks: + * + * Parameters: + * [in] ppBitStream pointer to the pointer to the current byte + * in the bit stream. + * [in] pBitOffset pointer to the bit position in the byte + * pointed by *ppBitStream. Valid within 0 + * to 7. + * [in] code VLC code word that need to be inserted in to the + * bitstream + * + * [out] ppBitStream *ppBitStream is updated after the block is encoded, + * so that it points to the current byte in the bit + * stream buffer. + * [out] pBitOffset *pBitOffset is updated so that it points to the + * current bit position in the byte pointed by + * *ppBitStream. + * + * Return Value: + * Standard OMX_RESULT result. See enumeration for possible result codes. + * + */ + +OMXResult armPackVLC32 ( + OMX_U8 **ppBitStream, + OMX_INT *pBitOffset, + ARM_VLC32 code +); + +#endif /*_armCodec_H_*/ + +/*End of File*/ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h new file mode 100755 index 0000000..e0cfdaa --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCTTable.h @@ -0,0 +1,40 @@ +/** + * + * + * File Name: armCOMM_IDCTTable.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * File : armCOMM_IDCTTable.h + * Description : Contains declarations of tables for IDCT calculation. + * + */ + +#ifndef _armCOMM_IDCTTable_H_ +#define _armCOMM_IDCTTable_H_ + +#include "omxtypes.h" + + /* Table of s(u)*A(u)*A(v)/16 at Q15 + * s(u)=1.0 0 <= u <= 5 + * s(6)=2.0 + * s(7)=4.0 + * A(0) = 2*sqrt(2) + * A(u) = 4*cos(u*pi/16) for (u!=0) + */ +extern const OMX_U16 armCOMM_IDCTPreScale [64]; +extern const OMX_U16 armCOMM_IDCTCoef [4]; + +#endif /* _armCOMM_IDCTTable_H_ */ + + +/* End of File */ + + + + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h new file mode 100755 index 0000000..0baa087 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_IDCT_s.h @@ -0,0 +1,1451 @@ +;// +;// This confidential and proprietary software may be used only as +;// authorised by a licensing agreement from ARM Limited +;// (C) COPYRIGHT 2004 ARM Limited +;// ALL RIGHTS RESERVED +;// The entire notice above must be reproduced on all authorised +;// copies and copies may only be made to the extent permitted +;// by a licensing agreement from ARM Limited. +;// +;// IDCT_s.s +;// +;// Inverse DCT module +;// +;// +;// ALGORITHM DESCRIPTION +;// +;// The 8x8 2D IDCT is performed by calculating a 1D IDCT for each +;// column and then a 1D IDCT for each row. +;// +;// The 8-point 1D IDCT is defined by +;// f(x) = (C(0)*T(0)*c(0,x) + ... + C(7)*T(7)*c(7,x))/2 +;// +;// C(u) = 1/sqrt(2) if u=0 or 1 if u!=0 +;// c(u,x) = cos( (2x+1)*u*pi/16 ) +;// +;// We compute the 8-point 1D IDCT using the reverse of +;// the Arai-Agui-Nakajima flow graph which we split into +;// 5 stages named in reverse order to identify with the +;// forward DCT. Direct inversion of the forward formulae +;// in file FDCT_s.s gives: +;// +;// IStage 5: j(u) = T(u)*A(u) [ A(u)=4*C(u)*c(u,0) ] +;// [ A(0) = 2*sqrt(2) +;// A(u) = 4*cos(u*pi/16) for (u!=0) ] +;// +;// IStage 4: i0 = j0 i1 = j4 +;// i3 = (j2+j6)/2 i2 = (j2-j6)/2 +;// i7 = (j5+j3)/2 i4 = (j5-j3)/2 +;// i5 = (j1+j7)/2 i6 = (j1-j7)/2 +;// +;// IStage 3: h0 = (i0+i1)/2 h1 = (i0-i1)/2 +;// h2 = (i2*sqrt2)-i3 h3 = i3 +;// h4 = cos(pi/8)*i4 + sin(pi/8)*i6 +;// h6 = -sin(pi/8)*i4 + cos(pi/8)*i6 +;// [ The above two lines rotate by -(pi/8) ] +;// h5 = (i5-i7)/sqrt2 h7 = (i5+i7)/2 +;// +;// IStage 2: g0 = (h0+h3)/2 g3 = (h0-h3)/2 +;// g1 = (h1+h2)/2 g2 = (h1-h2)/2 +;// g7 = h7 g6 = h6 - h7 +;// g5 = h5 - g6 g4 = h4 - g5 +;// +;// IStage 1: f0 = (g0+g7)/2 f7 = (g0-g7)/2 +;// f1 = (g1+g6)/2 f6 = (g1-g6)/2 +;// f2 = (g2+g5)/2 f5 = (g2-g5)/2 +;// f3 = (g3+g4)/2 f4 = (g3-g4)/2 +;// +;// Note that most coefficients are halved 3 times during the +;// above calculation. We can rescale the algorithm dividing +;// the input by 8 to remove the halvings. +;// +;// IStage 5: j(u) = T(u)*A(u)/8 +;// +;// IStage 4: i0 = j0 i1 = j4 +;// i3 = j2 + j6 i2 = j2 - j6 +;// i7 = j5 + j3 i4 = j5 - j3 +;// i5 = j1 + j7 i6 = j1 - j7 +;// +;// IStage 3: h0 = i0 + i1 h1 = i0 - i1 +;// h2 = (i2*sqrt2)-i3 h3 = i3 +;// h4 = 2*( cos(pi/8)*i4 + sin(pi/8)*i6) +;// h6 = 2*(-sin(pi/8)*i4 + cos(pi/8)*i6) +;// h5 = (i5-i7)*sqrt2 h7 = i5 + i7 +;// +;// IStage 2: g0 = h0 + h3 g3 = h0 - h3 +;// g1 = h1 + h2 g2 = h1 - h2 +;// g7 = h7 g6 = h6 - h7 +;// g5 = h5 - g6 g4 = h4 - g5 +;// +;// IStage 1: f0 = g0 + g7 f7 = g0 - g7 +;// f1 = g1 + g6 f6 = g1 - g6 +;// f2 = g2 + g5 f5 = g2 - g5 +;// f3 = g3 + g4 f4 = g3 - g4 +;// +;// Note: +;// 1. The scaling by A(u)/8 can often be combined with inverse +;// quantization. The column and row scalings can be combined. +;// 2. The flowgraph in the AAN paper has h4,g6 negated compared +;// to the above code but is otherwise identical. +;// 3. The rotation by -pi/8 can be peformed using three multiplies +;// Eg c*i4+s*i6 = (i6-i4)*s + (c+s)*i4 +;// -s*i4+c*i6 = (i6-i4)*s + (c-s)*i6 +;// 4. If |T(u)|<=1 then from the IDCT definition, +;// |f(x)| <= ((1/sqrt2) + |c(1,x)| + .. + |c(7,x)|)/2 +;// = ((1/sqrt2) + cos(pi/16) + ... + cos(7*pi/16))/2 +;// = ((1/sqrt2) + (cot(pi/32)-1)/2)/2 +;// = (1 + cos(pi/16) + cos(2pi/16) + cos(3pi/16))/sqrt(2) +;// = (approx)2.64 +;// So the max gain of the 2D IDCT is ~x7.0 = 3 bits. +;// The table below shows input patterns generating the maximum +;// value of |f(u)| for input in the range |T(x)|<=1. M=-1, P=+1 +;// InputPattern Max |f(x)| +;// PPPPPPPP |f0| = 2.64 +;// PPPMMMMM |f1| = 2.64 +;// PPMMMPPP |f2| = 2.64 +;// PPMMPPMM |f3| = 2.64 +;// PMMPPMMP |f4| = 2.64 +;// PMMPMMPM |f5| = 2.64 +;// PMPPMPMP |f6| = 2.64 +;// PMPMPMPM |f7| = 2.64 +;// Note that this input pattern is the transpose of the +;// corresponding max input patter for the FDCT. + +;// Arguments + +pSrc RN 0 ;// source data buffer +Stride RN 1 ;// destination stride in bytes +pDest RN 2 ;// destination data buffer +pScale RN 3 ;// pointer to scaling table + + + ;// DCT Inverse Macro + ;// The DCT code should be parametrized according + ;// to the following inputs: + ;// $outsize = "u8" : 8-bit unsigned data saturated (0 to +255) + ;// "s9" : 16-bit signed data saturated to 9-bit (-256 to +255) + ;// "s16" : 16-bit signed data not saturated (max size ~+/-14273) + ;// $inscale = "s16" : signed 16-bit aan-scale table, Q15 format, with 4 byte alignment + ;// "s32" : signed 32-bit aan-scale table, Q23 format, with 4 byte alignment + ;// + ;// Inputs: + ;// pSrc = r0 = Pointer to input data + ;// Range is -256 to +255 (9-bit) + ;// Stride = r1 = Stride between input lines + ;// pDest = r2 = Pointer to output data + ;// pScale = r3 = Pointer to aan-scale table in the format defined by $inscale + + + + MACRO + M_IDCT $outsize, $inscale, $stride + LCLA SHIFT + + + IF ARM1136JS + +;// REGISTER ALLOCATION +;// This is hard since we have 8 values, 9 free registers and each +;// butterfly requires a temporary register. We also want to +;// maintain register order so we can use LDM/STM. The table below +;// summarises the register allocation that meets all these criteria. +;// a=1stcol, b=2ndcol, f,g,h,i are dataflow points described above. +;// +;// r1 a01 g0 h0 +;// r4 b01 f0 g1 h1 i0 +;// r5 a23 f1 g2 i1 +;// r6 b23 f2 g3 h2 i2 +;// r7 a45 f3 h3 i3 +;// r8 b45 f4 g4 h4 i4 +;// r9 a67 f5 g5 h5 i5 +;// r10 b67 f6 g6 h6 i6 +;// r11 f7 g7 h7 i7 +;// +ra01 RN 1 +rb01 RN 4 +ra23 RN 5 +rb23 RN 6 +ra45 RN 7 +rb45 RN 8 +ra67 RN 9 +rb67 RN 10 +rtmp RN 11 +csPiBy8 RN 12 ;// [ (Sin(pi/8)@Q15), (Cos(pi/8)@Q15) ] +LoopRR2 RN 14 ;// [ LoopNumber<<13 , (1/Sqrt(2))@Q15 ] +;// Transpose allocation +xft RN ra01 +xf0 RN rb01 +xf1 RN ra23 +xf2 RN rb23 +xf3 RN ra45 +xf4 RN rb45 +xf5 RN ra67 +xf6 RN rb67 +xf7 RN rtmp +;// IStage 1 allocation +xg0 RN xft +xg1 RN xf0 +xg2 RN xf1 +xg3 RN xf2 +xgt RN xf3 +xg4 RN xf4 +xg5 RN xf5 +xg6 RN xf6 +xg7 RN xf7 +;// IStage 2 allocation +xh0 RN xg0 +xh1 RN xg1 +xht RN xg2 +xh2 RN xg3 +xh3 RN xgt +xh4 RN xg4 +xh5 RN xg5 +xh6 RN xg6 +xh7 RN xg7 +;// IStage 3,4 allocation +xit RN xh0 +xi0 RN xh1 +xi1 RN xht +xi2 RN xh2 +xi3 RN xh3 +xi4 RN xh4 +xi5 RN xh5 +xi6 RN xh6 +xi7 RN xh7 + + M_STR pDest, ppDest + IF "$stride"="s" + M_STR Stride, pStride + ENDIF + M_ADR pDest, pBlk + LDR csPiBy8, =0x30fc7642 + LDR LoopRR2, =0x00005a82 + +v6_idct_col$_F + ;// Load even values + LDR xi4, [pSrc], #4 ;// j0 + LDR xi5, [pSrc, #4*16-4] ;// j4 + LDR xi6, [pSrc, #2*16-4] ;// j2 + LDR xi7, [pSrc, #6*16-4] ;// j6 + + ;// Scale Even Values + IF "$inscale"="s16" ;// 16x16 mul +SHIFT SETA 12 + LDR xi0, [pScale], #4 + LDR xi1, [pScale, #4*16-4] + LDR xi2, [pScale, #2*16-4] + MOV xit, #1<<(SHIFT-1) + SMLABB xi3, xi0, xi4, xit + SMLATT xi4, xi0, xi4, xit + SMLABB xi0, xi1, xi5, xit + SMLATT xi5, xi1, xi5, xit + MOV xi3, xi3, ASR #SHIFT + PKHBT xi4, xi3, xi4, LSL #(16-SHIFT) + LDR xi3, [pScale, #6*16-4] + SMLABB xi1, xi2, xi6, xit + SMLATT xi6, xi2, xi6, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi5, xi0, xi5, LSL #(16-SHIFT) + SMLABB xi2, xi3, xi7, xit + SMLATT xi7, xi3, xi7, xit + MOV xi1, xi1, ASR #SHIFT + PKHBT xi6, xi1, xi6, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi7, xi2, xi7, LSL #(16-SHIFT) + ENDIF + IF "$inscale"="s32" ;// 32x16 mul +SHIFT SETA (12+8-16) + MOV xit, #1<<(SHIFT-1) + LDR xi0, [pScale], #8 + LDR xi1, [pScale, #0*32+4-8] + LDR xi2, [pScale, #4*32-8] + LDR xi3, [pScale, #4*32+4-8] + SMLAWB xi0, xi0, xi4, xit + SMLAWT xi1, xi1, xi4, xit + SMLAWB xi2, xi2, xi5, xit + SMLAWT xi3, xi3, xi5, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi4, xi0, xi1, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi5, xi2, xi3, LSL #(16-SHIFT) + LDR xi0, [pScale, #2*32-8] + LDR xi1, [pScale, #2*32+4-8] + LDR xi2, [pScale, #6*32-8] + LDR xi3, [pScale, #6*32+4-8] + SMLAWB xi0, xi0, xi6, xit + SMLAWT xi1, xi1, xi6, xit + SMLAWB xi2, xi2, xi7, xit + SMLAWT xi3, xi3, xi7, xit + MOV xi0, xi0, ASR #SHIFT + PKHBT xi6, xi0, xi1, LSL #(16-SHIFT) + MOV xi2, xi2, ASR #SHIFT + PKHBT xi7, xi2, xi3, LSL #(16-SHIFT) + ENDIF + + ;// Load odd values + LDR xi0, [pSrc, #1*16-4] ;// j1 + LDR xi1, [pSrc, #7*16-4] ;// j7 + LDR xi2, [pSrc, #5*16-4] ;// j5 + LDR xi3, [pSrc, #3*16-4] ;// j3 + + IF {TRUE} + ;// shortcut if odd values 0 + TEQ xi0, #0 + TEQEQ xi1, #0 + TEQEQ xi2, #0 + TEQEQ xi3, #0 + BEQ v6OddZero$_F + ENDIF + + ;// Store scaled even values + STMIA pDest, {xi4, xi5, xi6, xi7} + + ;// Scale odd values + IF "$inscale"="s16" + ;// Perform AAN Scale + LDR xi4, [pScale, #1*16-4] + LDR xi5, [pScale, #7*16-4] + LDR xi6, [pScale, #5*16-4] + SMLABB xi7, xi0, xi4, xit + SMLATT xi0, xi0, xi4, xit + SMLABB xi4, xi1, xi5, xit + SMLATT xi1, xi1, xi5, xit + MOV xi7, xi7, ASR #SHIFT + PKHBT xi0, xi7, xi0, LSL #(16-SHIFT) + LDR xi7, [pScale, #3*16-4] + SMLABB xi5, xi2, xi6, xit + SMLATT xi2, xi2, xi6, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi1, xi4, xi1, LSL #(16-SHIFT) + SMLABB xi6, xi3, xi7, xit + SMLATT xi3, xi3, xi7, xit + MOV xi5, xi5, ASR #SHIFT + PKHBT xi2, xi5, xi2, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi3, xi6, xi3, LSL #(16-SHIFT) + ENDIF + IF "$inscale"="s32" ;// 32x16 mul + LDR xi4, [pScale, #1*32-8] + LDR xi5, [pScale, #1*32+4-8] + LDR xi6, [pScale, #7*32-8] + LDR xi7, [pScale, #7*32+4-8] + SMLAWB xi4, xi4, xi0, xit + SMLAWT xi5, xi5, xi0, xit + SMLAWB xi6, xi6, xi1, xit + SMLAWT xi7, xi7, xi1, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi0, xi4, xi5, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi1, xi6, xi7, LSL #(16-SHIFT) + LDR xi4, [pScale, #5*32-8] + LDR xi5, [pScale, #5*32+4-8] + LDR xi6, [pScale, #3*32-8] + LDR xi7, [pScale, #3*32+4-8] + SMLAWB xi4, xi4, xi2, xit + SMLAWT xi5, xi5, xi2, xit + SMLAWB xi6, xi6, xi3, xit + SMLAWT xi7, xi7, xi3, xit + MOV xi4, xi4, ASR #SHIFT + PKHBT xi2, xi4, xi5, LSL #(16-SHIFT) + MOV xi6, xi6, ASR #SHIFT + PKHBT xi3, xi6, xi7, LSL #(16-SHIFT) + ENDIF + + LDR xit, =0x00010001 ;// rounding constant + SADD16 xi5, xi0, xi1 ;// (j1+j7)/2 + SHADD16 xi5, xi5, xit + + SSUB16 xi6, xi0, xi1 ;// j1-j7 + SADD16 xi7, xi2, xi3 ;// (j5+j3)/2 + SHADD16 xi7, xi7, xit + + SSUB16 xi4, xi2, xi3 ;// j5-j3 + + SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 + + PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a + PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b + + SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] + SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] + SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] + SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] + + SMULBB xi1, xi3, LoopRR2 + SMULTB xi3, xi3, LoopRR2 + + PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 + PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 + SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 + + ;// xi0,xi1,xi2,xi3 now free + ;// IStage 4,3, rows 2to3 x1/2 + + MOV xi3, xi3, LSL #1 + PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 + LDRD xi0, [pDest, #8] ;// j2,j6 scaled + + ;// IStage 2, rows4to7 + SSUB16 xg6, xh6, xh7 + SSUB16 xg5, xh5, xg6 + SSUB16 xg4, xh4, xg5 + + SSUB16 xi2, xi0, xi1 ;// (j2-j6) + + SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + LDRD xi0, [pDest] ;// j0, j4 scaled + SSUB16 xh2, xh2, xi3 + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + + SHADD16 xh0, xi0, xi1 + SHSUB16 xh1, xi0, xi1 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + SADD16 xf3, xg3, xg4 + SSUB16 xf4, xg3, xg4 + SADD16 xf2, xg2, xg5 + SSUB16 xf5, xg2, xg5 + SADD16 xf1, xg1, xg6 + SSUB16 xf6, xg1, xg6 + SADD16 xf0, xg0, xg7 + SSUB16 xf7, xg0, xg7 + + ;// Transpose, store and loop + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + STMIA pDest!, {ra01, ra23, ra45, ra67} + PKHTB rb67, xf7, xf6, ASR #16 + STMIA pDest!, {rb01, rb23, rb45, rb67} + BCC v6_idct_col$_F + + SUB pSrc, pDest, #(64*2) + M_LDR pDest, ppDest + IF "$stride"="s" + M_LDR pScale, pStride + ENDIF + B v6_idct_row$_F + +v6OddZero$_F + SSUB16 xi2, xi6, xi7 ;// (j2-j6) + SHADD16 xi3, xi6, xi7 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + SSUB16 xh2, xh2, xi3 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + + SHADD16 xh0, xi4, xi5 + SHSUB16 xh1, xi4, xi5 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + MOV xf3, xg3 + MOV xf4, xg3 + MOV xf2, xg2 + MOV xf5, xg2 + MOV xf1, xg1 + MOV xf6, xg1 + MOV xf0, xg0 + MOV xf7, xg0 + + ;// Transpose + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + PKHTB rb67, xf7, xf6, ASR #16 + + STMIA pDest!, {ra01, ra23, ra45, ra67} + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + STMIA pDest!, {rb01, rb23, rb45, rb67} + + BCC v6_idct_col$_F + SUB pSrc, pDest, #(64*2) + M_LDR pDest, ppDest + IF "$stride"="s" + M_LDR pScale, pStride + ENDIF + + +v6_idct_row$_F + ;// IStage 4,3, rows4to7 x1/4 + LDR xit, =0x00010001 ;// rounding constant + LDR xi0, [pSrc, #1*16] ;// j1 + LDR xi1, [pSrc, #7*16] ;// 4*j7 + LDR xi2, [pSrc, #5*16] ;// j5 + LDR xi3, [pSrc, #3*16] ;// j3 + + SHADD16 xi1, xi1, xit ;// 2*j7 + SHADD16 xi1, xi1, xit ;// j7 + + SHADD16 xi5, xi0, xi1 ;// (j1+j7)/2 + SSUB16 xi6, xi0, xi1 ;// j1-j7 + SHADD16 xi7, xi2, xi3 ;// (j5+j3)/2 + SSUB16 xi4, xi2, xi3 ;// j5-j3 + + SSUB16 xi3, xi5, xi7 ;// (i5-i7)/2 + + PKHBT xi0, xi6, xi4, LSL#16 ;// [i4,i6] row a + PKHTB xi1, xi4, xi6, ASR#16 ;// [i4,i6] row b + + SMUADX xi2, xi0, csPiBy8 ;// rowa by [c,s] + SMUADX xi4, xi1, csPiBy8 ;// rowb by [c,s] + SMUSD xi0, xi0, csPiBy8 ;// rowa by [-s,c] + SMUSD xi6, xi1, csPiBy8 ;// rowb by [-s,c] + + SMULBB xi1, xi3, LoopRR2 + SMULTB xi3, xi3, LoopRR2 + + PKHTB xh4, xi4, xi2, ASR#16 ;// h4/4 + PKHTB xh6, xi6, xi0, ASR#16 ;// h6/4 + SHADD16 xh7, xi5, xi7 ;// (i5+i7)/4 + + MOV xi3, xi3, LSL #1 + PKHTB xh5, xi3, xi1, ASR#15 ;// h5/4 + + ;// xi0,xi1,xi2,xi3 now free + ;// IStage 4,3, rows 2to3 x1/2 + + LDR xi0, [pSrc, #2*16] ;// j2 + LDR xi1, [pSrc, #6*16] ;// 2*j6 + + ;// IStage 2, rows4to7 + SSUB16 xg6, xh6, xh7 + SSUB16 xg5, xh5, xg6 + SSUB16 xg4, xh4, xg5 + + SHADD16 xi1, xi1, xit ;// j6 + SSUB16 xi2, xi0, xi1 ;// (j2-j6) + SHADD16 xi3, xi0, xi1 ;// (j2+j6)/2 + + SMULBB xi0, xi2, LoopRR2 + SMULTB xi2, xi2, LoopRR2 + + MOV xi2, xi2, LSL #1 + + PKHTB xh2, xi2, xi0, ASR#15 ;// i2*sqrt(2)/4 + + ;// xi0, xi1 now free + ;// IStage 4,3 rows 0to1 x 1/2 + LDR xi1, [pSrc, #4*16] ;// j4 + LDR xi0, [pSrc], #4 ;// j0 + + SSUB16 xh2, xh2, xi3 + ADDS LoopRR2, LoopRR2, #2<<29 ;// done two rows + + ADD xi0, xi0, xit, LSL #2 ;// ensure correct round + SHADD16 xh0, xi0, xi1 ;// of DC result + SHSUB16 xh1, xi0, xi1 + + ;// IStage 2 rows 0to3 x 1/2 + SHSUB16 xg2, xh1, xh2 + SHADD16 xg1, xh1, xh2 + SHSUB16 xg3, xh0, xh3 + SHADD16 xg0, xh0, xh3 + + ;// IStage 1 all rows + SHADD16 xf3, xg3, xg4 + SHSUB16 xf4, xg3, xg4 + SHADD16 xf2, xg2, xg5 + SHSUB16 xf5, xg2, xg5 + SHADD16 xf1, xg1, xg6 + SHSUB16 xf6, xg1, xg6 + SHADD16 xf0, xg0, xg7 + SHSUB16 xf7, xg0, xg7 + + ;// Saturate + IF ("$outsize"="u8") + USAT16 xf0, #8, xf0 + USAT16 xf1, #8, xf1 + USAT16 xf2, #8, xf2 + USAT16 xf3, #8, xf3 + USAT16 xf4, #8, xf4 + USAT16 xf5, #8, xf5 + USAT16 xf6, #8, xf6 + USAT16 xf7, #8, xf7 + ENDIF + IF ("$outsize"="s9") + SSAT16 xf0, #9, xf0 + SSAT16 xf1, #9, xf1 + SSAT16 xf2, #9, xf2 + SSAT16 xf3, #9, xf3 + SSAT16 xf4, #9, xf4 + SSAT16 xf5, #9, xf5 + SSAT16 xf6, #9, xf6 + SSAT16 xf7, #9, xf7 + ENDIF + + ;// Transpose to Row, Pack and store + IF ("$outsize"="u8") + ORR xf0, xf0, xf1, LSL #8 ;// [ b1 b0 a1 a0 ] + ORR xf2, xf2, xf3, LSL #8 ;// [ b3 b2 a3 a2 ] + ORR xf4, xf4, xf5, LSL #8 ;// [ b5 b4 a5 a4 ] + ORR xf6, xf6, xf7, LSL #8 ;// [ b7 b6 a7 a6 ] + PKHBT ra01, xf0, xf2, LSL #16 + PKHTB rb01, xf2, xf0, ASR #16 + PKHBT ra23, xf4, xf6, LSL #16 + PKHTB rb23, xf6, xf4, ASR #16 + STMIA pDest, {ra01, ra23} + IF "$stride"="s" + ADD pDest, pDest, pScale + STMIA pDest, {rb01, rb23} + ADD pDest, pDest, pScale + ELSE + ADD pDest, pDest, #($stride) + STMIA pDest, {rb01, rb23} + ADD pDest, pDest, #($stride) + ENDIF + ENDIF + IF ("$outsize"="s9"):LOR:("$outsize"="s16") + PKHBT ra01, xf0, xf1, LSL #16 + PKHTB rb01, xf1, xf0, ASR #16 + + PKHBT ra23, xf2, xf3, LSL #16 + PKHTB rb23, xf3, xf2, ASR #16 + + PKHBT ra45, xf4, xf5, LSL #16 + PKHTB rb45, xf5, xf4, ASR #16 + + PKHBT ra67, xf6, xf7, LSL #16 + PKHTB rb67, xf7, xf6, ASR #16 + + STMIA pDest, {ra01, ra23, ra45, ra67} + IF "$stride"="s" + ADD pDest, pDest, pScale + STMIA pDest, {rb01, rb23, rb45, rb67} + ADD pDest, pDest, pScale + ELSE + ADD pDest, pDest, #($stride) + STMIA pDest, {rb01, rb23, rb45, rb67} + ADD pDest, pDest, #($stride) + ENDIF + ENDIF + + BCC v6_idct_row$_F + ENDIF ;// ARM1136JS + + + IF CortexA8 + +Src0 EQU 7 +Src1 EQU 8 +Src2 EQU 9 +Src3 EQU 10 +Src4 EQU 11 +Src5 EQU 12 +Src6 EQU 13 +Src7 EQU 14 +Tmp EQU 15 + +qXj0 QN Src0.S16 +qXj1 QN Src1.S16 +qXj2 QN Src2.S16 +qXj3 QN Src3.S16 +qXj4 QN Src4.S16 +qXj5 QN Src5.S16 +qXj6 QN Src6.S16 +qXj7 QN Src7.S16 +qXjt QN Tmp.S16 + +dXj0lo DN (Src0*2).S16 +dXj0hi DN (Src0*2+1).S16 +dXj1lo DN (Src1*2).S16 +dXj1hi DN (Src1*2+1).S16 +dXj2lo DN (Src2*2).S16 +dXj2hi DN (Src2*2+1).S16 +dXj3lo DN (Src3*2).S16 +dXj3hi DN (Src3*2+1).S16 +dXj4lo DN (Src4*2).S16 +dXj4hi DN (Src4*2+1).S16 +dXj5lo DN (Src5*2).S16 +dXj5hi DN (Src5*2+1).S16 +dXj6lo DN (Src6*2).S16 +dXj6hi DN (Src6*2+1).S16 +dXj7lo DN (Src7*2).S16 +dXj7hi DN (Src7*2+1).S16 +dXjtlo DN (Tmp*2).S16 +dXjthi DN (Tmp*2+1).S16 + +qXi0 QN qXj0 +qXi1 QN qXj4 +qXi2 QN qXj2 +qXi3 QN qXj7 +qXi4 QN qXj5 +qXi5 QN qXjt +qXi6 QN qXj1 +qXi7 QN qXj6 +qXit QN qXj3 + +dXi0lo DN dXj0lo +dXi0hi DN dXj0hi +dXi1lo DN dXj4lo +dXi1hi DN dXj4hi +dXi2lo DN dXj2lo +dXi2hi DN dXj2hi +dXi3lo DN dXj7lo +dXi3hi DN dXj7hi +dXi4lo DN dXj5lo +dXi4hi DN dXj5hi +dXi5lo DN dXjtlo +dXi5hi DN dXjthi +dXi6lo DN dXj1lo +dXi6hi DN dXj1hi +dXi7lo DN dXj6lo +dXi7hi DN dXj6hi +dXitlo DN dXj3lo +dXithi DN dXj3hi + +qXh0 QN qXit +qXh1 QN qXi0 +qXh2 QN qXi2 +qXh3 QN qXi3 +qXh4 QN qXi7 +qXh5 QN qXi5 +qXh6 QN qXi4 +qXh7 QN qXi1 +qXht QN qXi6 + +dXh0lo DN dXitlo +dXh0hi DN dXithi +dXh1lo DN dXi0lo +dXh1hi DN dXi0hi +dXh2lo DN dXi2lo +dXh2hi DN dXi2hi +dXh3lo DN dXi3lo +dXh3hi DN dXi3hi +dXh4lo DN dXi7lo +dXh4hi DN dXi7hi +dXh5lo DN dXi5lo +dXh5hi DN dXi5hi +dXh6lo DN dXi4lo +dXh6hi DN dXi4hi +dXh7lo DN dXi1lo +dXh7hi DN dXi1hi +dXhtlo DN dXi6lo +dXhthi DN dXi6hi + +qXg0 QN qXh2 +qXg1 QN qXht +qXg2 QN qXh1 +qXg3 QN qXh0 +qXg4 QN qXh4 +qXg5 QN qXh5 +qXg6 QN qXh6 +qXg7 QN qXh7 +qXgt QN qXh3 + +qXf0 QN qXg6 +qXf1 QN qXg5 +qXf2 QN qXg4 +qXf3 QN qXgt +qXf4 QN qXg3 +qXf5 QN qXg2 +qXf6 QN qXg1 +qXf7 QN qXg0 +qXft QN qXg7 + + +qXt0 QN 1.S32 +qXt1 QN 2.S32 +qT0lo QN 1.S32 +qT0hi QN 2.S32 +qT1lo QN 3.S32 +qT1hi QN 4.S32 +qScalelo QN 5.S32 ;// used to read post scale values +qScalehi QN 6.S32 +qTemp0 QN 5.S32 +qTemp1 QN 6.S32 + + +Scale1 EQU 6 +Scale2 EQU 15 +qScale1 QN Scale1.S16 +qScale2 QN Scale2.S16 +dScale1lo DN (Scale1*2).S16 +dScale1hi DN (Scale1*2+1).S16 +dScale2lo DN (Scale2*2).S16 +dScale2hi DN (Scale2*2+1).S16 + +dCoefs DN 0.S16 ;// Scale coefficients in format {[0] [C] [S] [InvSqrt2]} +InvSqrt2 DN dCoefs[0] ;// 1/sqrt(2) in Q15 +S DN dCoefs[1] ;// Sin(PI/8) in Q15 +C DN dCoefs[2] ;// Cos(PI/8) in Q15 + +pTemp RN 12 + + + IMPORT armCOMM_IDCTCoef + + VLD1 {qXj0,qXj1}, [pSrc @64]! + VLD1 {qXj2,qXj3}, [pSrc @64]! + VLD1 {qXj4,qXj5}, [pSrc @64]! + VLD1 {qXj6,qXj7}, [pSrc @64]! + + ;// Load PreScale and multiply with Src + ;// IStage 4 + + IF "$inscale"="s16" ;// 16X16 Mul + M_IDCT_PRESCALE16 + ENDIF + + IF "$inscale"="s32" ;// 32X32 ,ul + M_IDCT_PRESCALE32 + ENDIF + + ;// IStage 3 + VQDMULH qXi2, qXi2, InvSqrt2 ;// i2/sqrt(2) + VHADD qXh0, qXi0, qXi1 ;// (i0+i1)/2 + VHSUB qXh1, qXi0, qXi1 ;// (i0-i1)/2 + VHADD qXh7, qXi5, qXi7 ;// (i5+i7)/4 + VSUB qXh5, qXi5, qXi7 ;// (i5-i7)/2 + VQDMULH qXh5, qXh5, InvSqrt2 ;// h5/sqrt(2) + VSUB qXh2, qXi2, qXi3 ;// h2, h3 + + VMULL qXt0, dXi4lo, C ;// c*i4 + VMLAL qXt0, dXi6lo, S ;// c*i4+s*i6 + VMULL qXt1, dXi4hi, C + VMLAL qXt1, dXi6hi, S + VSHRN dXh4lo, qXt0, #16 ;// h4 + VSHRN dXh4hi, qXt1, #16 + + VMULL qXt0, dXi6lo, C ;// c*i6 + VMLSL qXt0, dXi4lo, S ;// -s*i4 + c*h6 + VMULL qXt1, dXi6hi, C + VMLSL qXt1, dXi4hi, S + VSHRN dXh6lo, qXt0, #16 ;// h6 + VSHRN dXh6hi, qXt1, #16 + + ;// IStage 2 + VSUB qXg6, qXh6, qXh7 + VSUB qXg5, qXh5, qXg6 + VSUB qXg4, qXh4, qXg5 + VHADD qXg1, qXh1, qXh2 ;// (h1+h2)/2 + VHSUB qXg2, qXh1, qXh2 ;// (h1-h2)/2 + VHADD qXg0, qXh0, qXh3 ;// (h0+h3)/2 + VHSUB qXg3, qXh0, qXh3 ;// (h0-h3)/2 + + ;// IStage 1 all rows + VADD qXf3, qXg3, qXg4 + VSUB qXf4, qXg3, qXg4 + VADD qXf2, qXg2, qXg5 + VSUB qXf5, qXg2, qXg5 + VADD qXf1, qXg1, qXg6 + VSUB qXf6, qXg1, qXg6 + VADD qXf0, qXg0, qXg7 + VSUB qXf7, qXg0, qXg7 + + ;// Transpose, store and loop +XTR0 EQU Src5 +XTR1 EQU Tmp +XTR2 EQU Src6 +XTR3 EQU Src7 +XTR4 EQU Src3 +XTR5 EQU Src0 +XTR6 EQU Src1 +XTR7 EQU Src2 +XTRt EQU Src4 + +qA0 QN XTR0.S32 ;// for XTRpose +qA1 QN XTR1.S32 +qA2 QN XTR2.S32 +qA3 QN XTR3.S32 +qA4 QN XTR4.S32 +qA5 QN XTR5.S32 +qA6 QN XTR6.S32 +qA7 QN XTR7.S32 + +dB0 DN XTR0*2+1 ;// for using VSWP +dB1 DN XTR1*2+1 +dB2 DN XTR2*2+1 +dB3 DN XTR3*2+1 +dB4 DN XTR4*2 +dB5 DN XTR5*2 +dB6 DN XTR6*2 +dB7 DN XTR7*2 + + + VTRN qXf0, qXf1 + VTRN qXf2, qXf3 + VTRN qXf4, qXf5 + VTRN qXf6, qXf7 + VTRN qA0, qA2 + VTRN qA1, qA3 + VTRN qA4, qA6 + VTRN qA5, qA7 + VSWP dB0, dB4 + VSWP dB1, dB5 + VSWP dB2, dB6 + VSWP dB3, dB7 + + +qYj0 QN qXf0 +qYj1 QN qXf1 +qYj2 QN qXf2 +qYj3 QN qXf3 +qYj4 QN qXf4 +qYj5 QN qXf5 +qYj6 QN qXf6 +qYj7 QN qXf7 +qYjt QN qXft + +dYj0lo DN (XTR0*2).S16 +dYj0hi DN (XTR0*2+1).S16 +dYj1lo DN (XTR1*2).S16 +dYj1hi DN (XTR1*2+1).S16 +dYj2lo DN (XTR2*2).S16 +dYj2hi DN (XTR2*2+1).S16 +dYj3lo DN (XTR3*2).S16 +dYj3hi DN (XTR3*2+1).S16 +dYj4lo DN (XTR4*2).S16 +dYj4hi DN (XTR4*2+1).S16 +dYj5lo DN (XTR5*2).S16 +dYj5hi DN (XTR5*2+1).S16 +dYj6lo DN (XTR6*2).S16 +dYj6hi DN (XTR6*2+1).S16 +dYj7lo DN (XTR7*2).S16 +dYj7hi DN (XTR7*2+1).S16 +dYjtlo DN (XTRt*2).S16 +dYjthi DN (XTRt*2+1).S16 + +qYi0 QN qYj0 +qYi1 QN qYj4 +qYi2 QN qYj2 +qYi3 QN qYj7 +qYi4 QN qYj5 +qYi5 QN qYjt +qYi6 QN qYj1 +qYi7 QN qYj6 +qYit QN qYj3 + +dYi0lo DN dYj0lo +dYi0hi DN dYj0hi +dYi1lo DN dYj4lo +dYi1hi DN dYj4hi +dYi2lo DN dYj2lo +dYi2hi DN dYj2hi +dYi3lo DN dYj7lo +dYi3hi DN dYj7hi +dYi4lo DN dYj5lo +dYi4hi DN dYj5hi +dYi5lo DN dYjtlo +dYi5hi DN dYjthi +dYi6lo DN dYj1lo +dYi6hi DN dYj1hi +dYi7lo DN dYj6lo +dYi7hi DN dYj6hi +dYitlo DN dYj3lo +dYithi DN dYj3hi + +qYh0 QN qYit +qYh1 QN qYi0 +qYh2 QN qYi2 +qYh3 QN qYi3 +qYh4 QN qYi7 +qYh5 QN qYi5 +qYh6 QN qYi4 +qYh7 QN qYi1 +qYht QN qYi6 + +dYh0lo DN dYitlo +dYh0hi DN dYithi +dYh1lo DN dYi0lo +dYh1hi DN dYi0hi +dYh2lo DN dYi2lo +dYh2hi DN dYi2hi +dYh3lo DN dYi3lo +dYh3hi DN dYi3hi +dYh4lo DN dYi7lo +dYh4hi DN dYi7hi +dYh5lo DN dYi5lo +dYh5hi DN dYi5hi +dYh6lo DN dYi4lo +dYh6hi DN dYi4hi +dYh7lo DN dYi1lo +dYh7hi DN dYi1hi +dYhtlo DN dYi6lo +dYhthi DN dYi6hi + +qYg0 QN qYh2 +qYg1 QN qYht +qYg2 QN qYh1 +qYg3 QN qYh0 +qYg4 QN qYh4 +qYg5 QN qYh5 +qYg6 QN qYh6 +qYg7 QN qYh7 +qYgt QN qYh3 + +qYf0 QN qYg6 +qYf1 QN qYg5 +qYf2 QN qYg4 +qYf3 QN qYgt +qYf4 QN qYg3 +qYf5 QN qYg2 +qYf6 QN qYg1 +qYf7 QN qYg0 +qYft QN qYg7 + + VRSHR qYj7, qYj7, #2 + VRSHR qYj6, qYj6, #1 + + VHADD qYi5, qYj1, qYj7 ;// i5 = (j1+j7)/2 + VSUB qYi6, qYj1, qYj7 ;// i6 = j1-j7 + VHADD qYi3, qYj2, qYj6 ;// i3 = (j2+j6)/2 + VSUB qYi2, qYj2, qYj6 ;// i2 = j2-j6 + VHADD qYi7, qYj5, qYj3 ;// i7 = (j5+j3)/2 + VSUB qYi4, qYj5, qYj3 ;// i4 = j5-j3 + + VQDMULH qYi2, qYi2, InvSqrt2 ;// i2/sqrt(2) + ;// IStage 4,3 rows 0to1 x 1/2 + + MOV pTemp, #0x4 ;// ensure correct round + VDUP qScale1, pTemp ;// of DC result + VADD qYi0, qYi0, qScale1 + + VHADD qYh0, qYi0, qYi1 ;// (i0+i1)/2 + VHSUB qYh1, qYi0, qYi1 ;// (i0-i1)/2 + + VHADD qYh7, qYi5, qYi7 ;// (i5+i7)/4 + VSUB qYh5, qYi5, qYi7 ;// (i5-i7)/2 + VSUB qYh2, qYi2, qYi3 ;// h2, h3 + VQDMULH qYh5, qYh5, InvSqrt2 ;// h5/sqrt(2) + + VMULL qXt0, dYi4lo, C ;// c*i4 + VMLAL qXt0, dYi6lo, S ;// c*i4+s*i6 + VMULL qXt1, dYi4hi, C + VMLAL qXt1, dYi6hi, S + VSHRN dYh4lo, qXt0, #16 ;// h4 + VSHRN dYh4hi, qXt1, #16 + + VMULL qXt0, dYi6lo, C ;// c*i6 + VMLSL qXt0, dYi4lo, S ;// -s*i4 + c*h6 + VMULL qXt1, dYi6hi, C + VMLSL qXt1, dYi4hi, S + VSHRN dYh6lo, qXt0, #16 ;// h6 + VSHRN dYh6hi, qXt1, #16 + + VSUB qYg6, qYh6, qYh7 + VSUB qYg5, qYh5, qYg6 + VSUB qYg4, qYh4, qYg5 + + ;// IStage 2 rows 0to3 x 1/2 + VHADD qYg1, qYh1, qYh2 ;// (h1+h2)/2 + VHSUB qYg2, qYh1, qYh2 ;// (h1-h2)/2 + VHADD qYg0, qYh0, qYh3 ;// (h0+h3)/2 + VHSUB qYg3, qYh0, qYh3 ;// (h0-h3)/2 + + + ;// IStage 1 all rows + VHADD qYf3, qYg3, qYg4 + VHSUB qYf4, qYg3, qYg4 + VHADD qYf2, qYg2, qYg5 + VHSUB qYf5, qYg2, qYg5 + VHADD qYf1, qYg1, qYg6 + VHSUB qYf6, qYg1, qYg6 + VHADD qYf0, qYg0, qYg7 + VHSUB qYf7, qYg0, qYg7 + +YTR0 EQU Src0 +YTR1 EQU Src4 +YTR2 EQU Src1 +YTR3 EQU Src2 +YTR4 EQU Src7 +YTR5 EQU Src5 +YTR6 EQU Tmp +YTR7 EQU Src6 +YTRt EQU Src3 + +qC0 QN YTR0.S32 ;// for YTRpose +qC1 QN YTR1.S32 +qC2 QN YTR2.S32 +qC3 QN YTR3.S32 +qC4 QN YTR4.S32 +qC5 QN YTR5.S32 +qC6 QN YTR6.S32 +qC7 QN YTR7.S32 + +dD0 DN YTR0*2+1 ;// for using VSWP +dD1 DN YTR1*2+1 +dD2 DN YTR2*2+1 +dD3 DN YTR3*2+1 +dD4 DN YTR4*2 +dD5 DN YTR5*2 +dD6 DN YTR6*2 +dD7 DN YTR7*2 + + VTRN qYf0, qYf1 + VTRN qYf2, qYf3 + VTRN qYf4, qYf5 + VTRN qYf6, qYf7 + VTRN qC0, qC2 + VTRN qC1, qC3 + VTRN qC4, qC6 + VTRN qC5, qC7 + VSWP dD0, dD4 + VSWP dD1, dD5 + VSWP dD2, dD6 + VSWP dD3, dD7 + + +dYf0U8 DN YTR0*2.U8 +dYf1U8 DN YTR1*2.U8 +dYf2U8 DN YTR2*2.U8 +dYf3U8 DN YTR3*2.U8 +dYf4U8 DN YTR4*2.U8 +dYf5U8 DN YTR5*2.U8 +dYf6U8 DN YTR6*2.U8 +dYf7U8 DN YTR7*2.U8 + + ;// + ;// Do saturation if outsize is other than S16 + ;// + + IF ("$outsize"="u8") + ;// Output range [0-255] + VQMOVN dYf0U8, qYf0 + VQMOVN dYf1U8, qYf1 + VQMOVN dYf2U8, qYf2 + VQMOVN dYf3U8, qYf3 + VQMOVN dYf4U8, qYf4 + VQMOVN dYf5U8, qYf5 + VQMOVN dYf6U8, qYf6 + VQMOVN dYf7U8, qYf7 + ENDIF + + IF ("$outsize"="s9") + ;// Output range [-256 to +255] + VQSHL qYf0, qYf0, #16-9 + VQSHL qYf1, qYf1, #16-9 + VQSHL qYf2, qYf2, #16-9 + VQSHL qYf3, qYf3, #16-9 + VQSHL qYf4, qYf4, #16-9 + VQSHL qYf5, qYf5, #16-9 + VQSHL qYf6, qYf6, #16-9 + VQSHL qYf7, qYf7, #16-9 + + VSHR qYf0, qYf0, #16-9 + VSHR qYf1, qYf1, #16-9 + VSHR qYf2, qYf2, #16-9 + VSHR qYf3, qYf3, #16-9 + VSHR qYf4, qYf4, #16-9 + VSHR qYf5, qYf5, #16-9 + VSHR qYf6, qYf6, #16-9 + VSHR qYf7, qYf7, #16-9 + ENDIF + + ;// Store output depending on the Stride size + IF "$stride"="s" + VST1 qYf0, [pDest @64], Stride + VST1 qYf1, [pDest @64], Stride + VST1 qYf2, [pDest @64], Stride + VST1 qYf3, [pDest @64], Stride + VST1 qYf4, [pDest @64], Stride + VST1 qYf5, [pDest @64], Stride + VST1 qYf6, [pDest @64], Stride + VST1 qYf7, [pDest @64] + ELSE + IF ("$outsize"="u8") + VST1 dYf0U8, [pDest @64], #8 + VST1 dYf1U8, [pDest @64], #8 + VST1 dYf2U8, [pDest @64], #8 + VST1 dYf3U8, [pDest @64], #8 + VST1 dYf4U8, [pDest @64], #8 + VST1 dYf5U8, [pDest @64], #8 + VST1 dYf6U8, [pDest @64], #8 + VST1 dYf7U8, [pDest @64] + ELSE + ;// ("$outsize"="s9") or ("$outsize"="s16") + VST1 qYf0, [pDest @64], #16 + VST1 qYf1, [pDest @64], #16 + VST1 qYf2, [pDest @64], #16 + VST1 qYf3, [pDest @64], #16 + VST1 qYf4, [pDest @64], #16 + VST1 qYf5, [pDest @64], #16 + VST1 qYf6, [pDest @64], #16 + VST1 qYf7, [pDest @64] + ENDIF + + ENDIF + + + + ENDIF ;// CortexA8 + + + + MEND + + ;// Scale TWO input rows with TWO rows of 16 bit scale values + ;// + ;// This macro is used by M_IDCT_PRESCALE16 to pre-scale one row + ;// input (Eight input values) with one row of scale values. Also + ;// Loads next scale values from pScale, if $LastRow flag is not set. + ;// + ;// Input Registers: + ;// + ;// $dAlo - Input D register with first four S16 values of row n + ;// $dAhi - Input D register with next four S16 values of row n + ;// $dBlo - Input D register with first four S16 values of row n+1 + ;// $dBhi - Input D register with next four S16 values of row n+1 + ;// pScale - Pointer to next row of scale values + ;// qT0lo - Temporary scratch register + ;// qT0hi - Temporary scratch register + ;// qT1lo - Temporary scratch register + ;// qT1hi - Temporary scratch register + ;// dScale1lo - Scale value of row n + ;// dScale1hi - Scale value of row n + ;// dScale2lo - Scale value of row n+1 + ;// dScale2hi - Scale value of row n+1 + ;// + ;// Input Flag + ;// + ;// $LastRow - Flag to indicate whether current row is last row + ;// + ;// Output Registers: + ;// + ;// $dAlo - Scaled output values (first four S16 of row n) + ;// $dAhi - Scaled output values (next four S16 of row n) + ;// $dBlo - Scaled output values (first four S16 of row n+1) + ;// $dBhi - Scaled output values (next four S16 of row n+1) + ;// qScale1 - Scale values for next row + ;// qScale2 - Scale values for next row+1 + ;// pScale - Pointer to next row of scale values + ;// + MACRO + M_IDCT_SCALE16 $dAlo, $dAhi, $dBlo, $dBhi, $LastRow + VMULL qT0lo, $dAlo, dScale1lo + VMULL qT0hi, $dAhi, dScale1hi + VMULL qT1lo, $dBlo, dScale2lo + VMULL qT1hi, $dBhi, dScale2hi + IF "$LastRow"="0" + VLD1 qScale1, [pScale], #16 ;// Load scale for row n+1 + VLD1 qScale2, [pScale], #16 ;// Load scale for row n+2 + ENDIF + VQRSHRN $dAlo, qT0lo, #12 + VQRSHRN $dAhi, qT0hi, #12 + VQRSHRN $dBlo, qT1lo, #12 + VQRSHRN $dBhi, qT1hi, #12 + MEND + + ;// Scale 8x8 block input values with 16 bit scale values + ;// + ;// This macro is used to pre-scale block of 8x8 input. + ;// This also do the Ist stage transformations of IDCT. + ;// + ;// Input Registers: + ;// + ;// dXjnlo - n th input D register with first four S16 values + ;// dXjnhi - n th input D register with next four S16 values + ;// qXjn - n th input Q register with eight S16 values + ;// pScale - Pointer to scale values + ;// + ;// Output Registers: + ;// + ;// qXin - n th output Q register with eight S16 output values of 1st stage + ;// + MACRO + M_IDCT_PRESCALE16 + VLD1 qScale1, [pScale], #16 ;// Load Pre scale for row 0 + VLD1 qScale2, [pScale], #16 ;// Load Pre scale for row 0 + M_IDCT_SCALE16 dXj0lo, dXj0hi, dXj1lo, dXj1hi, 0 ;// Pre scale row 0 & 1 + M_IDCT_SCALE16 dXj2lo, dXj2hi, dXj3lo, dXj3hi, 0 + M_IDCT_SCALE16 dXj4lo, dXj4hi, dXj5lo, dXj5hi, 0 + M_IDCT_SCALE16 dXj6lo, dXj6hi, dXj7lo, dXj7hi, 1 + VHADD qXi5, qXj1, qXj7 ;// (j1+j7)/2 + VSUB qXi6, qXj1, qXj7 ;// j1-j7 + LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants + VHADD qXi3, qXj2, qXj6 ;// (j2+j6)/2 + VSUB qXi2, qXj2, qXj6 ;// j2-j6 + VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants + VHADD qXi7, qXj5, qXj3 ;// (j5+j3)/2 + VSUB qXi4, qXj5, qXj3 ;// j5-j3 + MEND + + + ;// Scale 8x8 block input values with 32 bit scale values + ;// + ;// This macro is used to pre-scale block of 8x8 input. + ;// This also do the Ist stage transformations of IDCT. + ;// + ;// Input Registers: + ;// + ;// dXjnlo - n th input D register with first four S16 values + ;// dXjnhi - n th input D register with next four S16 values + ;// qXjn - n th input Q register with eight S16 values + ;// pScale - Pointer to 32bit scale values in Q23 format + ;// + ;// Output Registers: + ;// + ;// dXinlo - n th output D register with first four S16 output values of 1st stage + ;// dXinhi - n th output D register with next four S16 output values of 1st stage + ;// + MACRO + M_IDCT_PRESCALE32 +qScale0lo QN 0.S32 +qScale0hi QN 1.S32 +qScale1lo QN 2.S32 +qScale1hi QN 3.S32 +qScale2lo QN qScale1lo +qScale2hi QN qScale1hi +qScale3lo QN qScale1lo +qScale3hi QN qScale1hi +qScale4lo QN qScale1lo +qScale4hi QN qScale1hi +qScale5lo QN qScale0lo +qScale5hi QN qScale0hi +qScale6lo QN qScale0lo +qScale6hi QN qScale0hi +qScale7lo QN qScale0lo +qScale7hi QN qScale0hi + +qSrc0lo QN 4.S32 +qSrc0hi QN 5.S32 +qSrc1lo QN 6.S32 +qSrc1hi QN Src4.S32 +qSrc2lo QN qSrc0lo +qSrc2hi QN qSrc0hi +qSrc3lo QN qSrc0lo +qSrc3hi QN qSrc0hi +qSrc4lo QN qSrc0lo +qSrc4hi QN qSrc0hi +qSrc5lo QN qSrc1lo +qSrc5hi QN qSrc1hi +qSrc6lo QN qSrc1lo +qSrc6hi QN qSrc1hi +qSrc7lo QN qSrc0lo +qSrc7hi QN qSrc0hi + +qRes17lo QN qScale0lo +qRes17hi QN qScale0hi +qRes26lo QN qScale0lo +qRes26hi QN qScale0hi +qRes53lo QN qScale0lo +qRes53hi QN qScale0hi + + ADD pTemp, pScale, #4*8*7 ;// Address of pScale[7] + + ;// Row 0 + VLD1 {qScale0lo, qScale0hi}, [pScale]! + VSHLL qSrc0lo, dXj0lo, #(12-1) + VSHLL qSrc0hi, dXj0hi, #(12-1) + VLD1 {qScale1lo, qScale1hi}, [pScale]! + VQRDMULH qSrc0lo, qScale0lo, qSrc0lo + VQRDMULH qSrc0hi, qScale0hi, qSrc0hi + VLD1 {qScale7lo, qScale7hi}, [pTemp]! + VSHLL qSrc1lo, dXj1lo, #(12-1) + VSHLL qSrc1hi, dXj1hi, #(12-1) + VMOVN dXi0lo, qSrc0lo ;// Output i0 + VMOVN dXi0hi, qSrc0hi + VSHLL qSrc7lo, dXj7lo, #(12-1) + VSHLL qSrc7hi, dXj7hi, #(12-1) + SUB pTemp, pTemp, #((16*2)+(4*8*1)) + VQRDMULH qSrc1lo, qScale1lo, qSrc1lo + VQRDMULH qSrc1hi, qScale1hi, qSrc1hi + VQRDMULH qSrc7lo, qScale7lo, qSrc7lo + VQRDMULH qSrc7hi, qScale7hi, qSrc7hi + VLD1 {qScale2lo, qScale2hi}, [pScale]! + + ;// Row 1 & 7 + VHADD qRes17lo, qSrc1lo, qSrc7lo ;// (j1+j7)/2 + VHADD qRes17hi, qSrc1hi, qSrc7hi ;// (j1+j7)/2 + VMOVN dXi5lo, qRes17lo ;// Output i5 + VMOVN dXi5hi, qRes17hi + VSUB qRes17lo, qSrc1lo, qSrc7lo ;// j1-j7 + VSUB qRes17hi, qSrc1hi, qSrc7hi ;// j1-j7 + VMOVN dXi6lo, qRes17lo ;// Output i6 + VMOVN dXi6hi, qRes17hi + VSHLL qSrc2lo, dXj2lo, #(12-1) + VSHLL qSrc2hi, dXj2hi, #(12-1) + VLD1 {qScale6lo, qScale6hi}, [pTemp]! + VSHLL qSrc6lo, dXj6lo, #(12-1) + VSHLL qSrc6hi, dXj6hi, #(12-1) + SUB pTemp, pTemp, #((16*2)+(4*8*1)) + VQRDMULH qSrc2lo, qScale2lo, qSrc2lo + VQRDMULH qSrc2hi, qScale2hi, qSrc2hi + VQRDMULH qSrc6lo, qScale6lo, qSrc6lo + VQRDMULH qSrc6hi, qScale6hi, qSrc6hi + VLD1 {qScale3lo, qScale3hi}, [pScale]! + + ;// Row 2 & 6 + VHADD qRes26lo, qSrc2lo, qSrc6lo ;// (j2+j6)/2 + VHADD qRes26hi, qSrc2hi, qSrc6hi ;// (j2+j6)/2 + VMOVN dXi3lo, qRes26lo ;// Output i3 + VMOVN dXi3hi, qRes26hi + VSUB qRes26lo, qSrc2lo, qSrc6lo ;// j2-j6 + VSUB qRes26hi, qSrc2hi, qSrc6hi ;// j2-j6 + VMOVN dXi2lo, qRes26lo ;// Output i2 + VMOVN dXi2hi, qRes26hi + VSHLL qSrc3lo, dXj3lo, #(12-1) + VSHLL qSrc3hi, dXj3hi, #(12-1) + VLD1 {qScale5lo, qScale5hi}, [pTemp]! + VSHLL qSrc5lo, dXj5lo, #(12-1) + VSHLL qSrc5hi, dXj5hi, #(12-1) + VQRDMULH qSrc3lo, qScale3lo, qSrc3lo + VQRDMULH qSrc3hi, qScale3hi, qSrc3hi + VQRDMULH qSrc5lo, qScale5lo, qSrc5lo + VQRDMULH qSrc5hi, qScale5hi, qSrc5hi + + ;// Row 3 & 5 + VHADD qRes53lo, qSrc5lo, qSrc3lo ;// (j5+j3)/2 + VHADD qRes53hi, qSrc5hi, qSrc3hi ;// (j5+j3)/2 + SUB pSrc, pSrc, #16*2*2 + VMOVN dXi7lo, qRes53lo ;// Output i7 + VMOVN dXi7hi, qRes53hi + VSUB qRes53lo, qSrc5lo, qSrc3lo ;// j5-j3 + VSUB qRes53hi, qSrc5hi, qSrc3hi ;// j5-j3 + VLD1 qXj4, [pSrc @64] + VMOVN dXi4lo, qRes53lo ;// Output i4 + VMOVN dXi4hi, qRes53hi + VSHLL qSrc4lo, dXj4lo, #(12-1) + VSHLL qSrc4hi, dXj4hi, #(12-1) + VLD1 {qScale4lo, qScale4hi}, [pScale] + LDR pSrc, =armCOMM_IDCTCoef ;// Address of DCT inverse AAN constants + VQRDMULH qSrc4lo, qScale4lo, qSrc4lo + VQRDMULH qSrc4hi, qScale4hi, qSrc4hi + VLDR dCoefs, [pSrc] ;// Load DCT inverse AAN constants + ;// Row 4 + VMOVN dXi1lo, qSrc4lo ;// Output i1 + VMOVN dXi1hi, qSrc4hi + + MEND + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h new file mode 100755 index 0000000..51118fd --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_MaskTable.h @@ -0,0 +1,27 @@ +/** + * + * File Name: armCOMM_MaskTable.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * Mask Table to mask the end of array + */ + + + +#ifndef _ARMCOMM_MASKTABLE_H_ +#define _ARMCOMM_MASKTABLE_H_ + +#define MaskTableSize 72 + +/* Mask table */ + +extern const OMX_U16 armCOMM_qMaskTable16[MaskTableSize]; +extern const OMX_U8 armCOMM_qMaskTable8[MaskTableSize]; + +#endif diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h new file mode 100755 index 0000000..41b3e1e --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_Version.h @@ -0,0 +1,43 @@ +/* Guard the header against multiple inclusion. */ +#ifndef __ARM_COMM_VERSION_H__ +#define __ARM_COMM_VERSION_H__ + + +/* The following line should be in omxtypes.h but hasn't been approved by OpenMAX yet */ +#define OMX_VERSION 102 + +/* We need to define these macros in order to convert a #define number into a #define string. */ +#define ARM_QUOTE(a) #a +#define ARM_INDIRECT(A) ARM_QUOTE(A) + +/* Convert the OMX_VERSION number into a string that can be used, for example, to print it out. */ +#define ARM_VERSION_STRING ARM_INDIRECT(OMX_VERSION) + + +/* Define this in order to turn on ARM version/release/build strings in each domain */ +#define ARM_INCLUDE_VERSION_DESCRIPTIONS + +#ifdef ARM_INCLUDE_VERSION_DESCRIPTIONS + extern const char * const omxAC_VersionDescription; + extern const char * const omxIC_VersionDescription; + extern const char * const omxIP_VersionDescription; + extern const char * const omxSP_VersionDescription; + extern const char * const omxVC_VersionDescription; +#endif /* ARM_INCLUDE_VERSION_DESCRIPTIONS */ + + +/* The following entries should be automatically updated by the release script */ +/* They are used in the ARM version strings defined for each domain. */ + +/* The release tag associated with this release of the library. - used for source and object releases */ +#define OMX_ARM_RELEASE_TAG "r1p0-00bet0" + +/* The ARM architecture used to build any objects or executables in this release. */ +#define OMX_ARM_BUILD_ARCHITECTURE "ARM Architecture V7 with NEON" + +/* The ARM Toolchain used to build any objects or executables in this release. */ +#define OMX_ARM_BUILD_TOOLCHAIN "ARM RVCT 3.1" + + +#endif /* __ARM_COMM_VERSION_H__ */ + diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h new file mode 100755 index 0000000..0956bd1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armCOMM_s.h @@ -0,0 +1,1157 @@ +;// +;// +;// File Name: armCOMM_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// +;// ARM optimized OpenMAX common header file +;// + +;// Protect against multiple inclusion + IF :LNOT::DEF:ARMCOMM_S_H + GBLL ARMCOMM_S_H + + REQUIRE8 ;// Requires 8-byte stack alignment + PRESERVE8 ;// Preserves 8-byte stack alignment + + GBLL ARM_ERRORCHECK +ARM_ERRORCHECK SETL {FALSE} + +;// Globals + + GBLS _RRegList ;// R saved register list + GBLS _DRegList ;// D saved register list + GBLS _Variant ;// Selected processor variant + GBLS _CPU ;// CPU name + GBLS _Struct ;// Structure name + + GBLL _InFunc ;// Inside function assembly flag + GBLL _SwLong ;// Long switch flag + + GBLA _RBytes ;// Number of register bytes on stack + GBLA _SBytes ;// Number of scratch bytes on stack + GBLA _ABytes ;// Stack offset of next argument + GBLA _Workspace ;// Stack offset of scratch workspace + GBLA _F ;// Function number + GBLA _StOff ;// Struct offset + GBLA _SwNum ;// Switch number + GBLS _32 ;// Suffix for 32 byte alignmnet + GBLS _16 ;// Suffix for 16 byte alignmnet + +_InFunc SETL {FALSE} +_SBytes SETA 0 +_F SETA 0 +_SwNum SETA 0 +_32 SETS "ALIGN32" +_16 SETS "ALIGN16" + +;///////////////////////////////////////////////////////// +;// Override the tools settings of the CPU if the #define +;// USECPU is set, otherwise use the CPU defined by the +;// assembler settings. +;///////////////////////////////////////////////////////// + + IF :DEF: OVERRIDECPU +_CPU SETS OVERRIDECPU + ELSE +_CPU SETS {CPU} + ENDIF + + + +;///////////////////////////////////////////////////////// +;// Work out which code to build +;///////////////////////////////////////////////////////// + + IF :DEF:ARM1136JS:LOR::DEF:CortexA8:LOR::DEF:ARM_GENERIC + INFO 1,"Please switch to using M_VARIANTS" + ENDIF + + ;// Define and reset all officially recongnised variants + MACRO + _M_DEF_VARIANTS + _M_DEF_VARIANT ARM926EJS + _M_DEF_VARIANT ARM1136JS + _M_DEF_VARIANT ARM1136JS_U + _M_DEF_VARIANT CortexA8 + _M_DEF_VARIANT ARM7TDMI + MEND + + MACRO + _M_DEF_VARIANT $var + GBLL $var + GBLL _ok$var +$var SETL {FALSE} + MEND + + + ;// Variant declaration + ;// + ;// Define a list of code variants supported by this + ;// source file. This macro then chooses the most + ;// appropriate variant to build for the currently configured + ;// core. + ;// + MACRO + M_VARIANTS $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7 + ;// Set to TRUE variants that are supported + _M_DEF_VARIANTS + _M_VARIANT $v0 + _M_VARIANT $v1 + _M_VARIANT $v2 + _M_VARIANT $v3 + _M_VARIANT $v4 + _M_VARIANT $v5 + _M_VARIANT $v6 + _M_VARIANT $v7 + + ;// Look for first available variant to match a CPU + ;// _M_TRY cpu, variant fall back list +_Variant SETS "" + _M_TRY ARM926EJ-S, ARM926EJS + _M_TRY ARM1176JZ-S, ARM1136JS + _M_TRY ARM1176JZF-S, ARM1136JS + _M_TRY ARM1156T2-S, ARM1136JS + _M_TRY ARM1156T2F-S, ARM1136JS + _M_TRY ARM1136J-S, ARM1136JS + _M_TRY ARM1136JF-S, ARM1136JS + _M_TRY MPCore, ARM1136JS + _M_TRY falcon-vfp, ARM1136JS + _M_TRY falcon-full-neon, CortexA8 + _M_TRY Cortex-A8NoNeon, ARM1136JS + _M_TRY Cortex-A8, CortexA8, ARM1136JS + _M_TRY Cortex-R4, ARM1136JS + _M_TRY ARM7TDMI + + ;// Select the correct variant + _M_DEF_VARIANTS + IF _Variant="" + INFO 1, "No match found for CPU '$_CPU'" + ELSE +$_Variant SETL {TRUE} + ENDIF + MEND + + ;// Register a variant as available + MACRO + _M_VARIANT $var + IF "$var"="" + MEXIT + ENDIF + IF :LNOT::DEF:_ok$var + INFO 1, "Unrecognized variant '$var'" + ENDIF +$var SETL {TRUE} + MEND + + ;// For a given CPU, see if any of the variants supporting + ;// this CPU are available. The first available variant is + ;// chosen + MACRO + _M_TRY $cpu, $v0,$v1,$v2,$v3,$v4,$v5,$v6,$v7 + IF "$cpu"<>_CPU + MEXIT + ENDIF + _M_TRY1 $v0 + _M_TRY1 $v1 + _M_TRY1 $v2 + _M_TRY1 $v3 + _M_TRY1 $v4 + _M_TRY1 $v5 + _M_TRY1 $v6 + _M_TRY1 $v7 + ;// Check a match was found + IF _Variant="" + INFO 1, "No variant match found for CPU '$_CPU'" + ENDIF + MEND + + MACRO + _M_TRY1 $var + IF "$var"="" + MEXIT + ENDIF + IF (_Variant=""):LAND:$var +_Variant SETS "$var" + ENDIF + MEND + +;//////////////////////////////////////////////////////// +;// Structure definition +;//////////////////////////////////////////////////////// + + ;// Declare a structure of given name + MACRO + M_STRUCT $sname +_Struct SETS "$sname" +_StOff SETA 0 + MEND + + ;// Declare a structure field + ;// The field is called $sname_$fname + ;// $size = the size of each entry, must be power of 2 + ;// $number = (if provided) the number of entries for an array + MACRO + M_FIELD $fname, $size, $number + IF (_StOff:AND:($size-1))!=0 +_StOff SETA _StOff + ($size - (_StOff:AND:($size-1))) + ENDIF +$_Struct._$fname EQU _StOff + IF "$number"<>"" +_StOff SETA _StOff + $size*$number + ELSE +_StOff SETA _StOff + $size + ENDIF + MEND + + + MACRO + M_ENDSTRUCT +sizeof_$_Struct EQU _StOff +_Struct SETS "" + MEND + +;////////////////////////////////////////////////////////// +;// Switch and table macros +;////////////////////////////////////////////////////////// + + ;// Start a relative switch table with register to switch on + ;// + ;// $v = the register to switch on + ;// $s = if specified must be "L" to indicate long + ;// this allows a greater range to the case code + MACRO + M_SWITCH $v, $s + ASSERT "$s"="":LOR:"$s"="L" +_SwLong SETL {FALSE} + IF "$s"="L" +_SwLong SETL {TRUE} + ENDIF +_SwNum SETA _SwNum+1 + IF {CONFIG}=16 + ;// Thumb + IF _SwLong + TBH [pc, $v, LSL#1] + ELSE + TBB [pc, $v] + ENDIF +_Switch$_SwNum + ELSE + ;// ARM + ADD pc, pc, $v, LSL #2 + NOP + ENDIF + MEND + + ;// Add a case to the switch statement + MACRO + M_CASE $label + IF {CONFIG}=16 + ;// Thumb + IF _SwLong + DCW ($label - _Switch$_SwNum)/2 + ELSE + DCB ($label - _Switch$_SwNum)/2 + ENDIF + ELSE + ;// ARM + B $label + ENDIF + MEND + + ;// End of switch statement + MACRO + M_ENDSWITCH + ALIGN 2 + MEND + + +;//////////////////////////////////////////////////////// +;// Data area allocation +;//////////////////////////////////////////////////////// + + ;// Constant table allocator macro + ;// + ;// Creates a new section for each constant table + ;// $name is symbol through which the table can be accessed. + ;// $align is the optional alignment of the table, log2 of + ;// the byte alignment - $align=4 is 16 byte aligned + MACRO + M_TABLE $name, $align + ASSERT :LNOT:_InFunc + IF "$align"="" + AREA |.constdata|, READONLY, DATA + ELSE + ;// AREAs inherit the alignment of the first declaration. + ;// Therefore for each alignment size we must have an area + ;// of a different name. + AREA constdata_a$align, READONLY, DATA, ALIGN=$align + + ;// We also force alignment incase we are tagging onto + ;// an already started area. + ALIGN (1<<$align) + ENDIF +$name + MEND + +;///////////////////////////////////////////////////// +;// Macros to allocate space on the stack +;// +;// These all assume that the stack is 8-byte aligned +;// at entry to the function, which means that the +;// 32-byte alignment macro needs to work in a +;// bit more of a special way... +;///////////////////////////////////////////////////// + + + + + ;// Allocate 1-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC1 $name, $size + ASSERT :LNOT:_InFunc +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 2-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC2 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:1)!=0 +_SBytes SETA _SBytes + (2 - (_SBytes:AND:1)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 4-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC4 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:3)!=0 +_SBytes SETA _SBytes + (4 - (_SBytes:AND:3)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + ;// Allocate 8-byte aligned area of name + ;// $name size $size bytes. + MACRO + M_ALLOC8 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F EQU _SBytes +_SBytes SETA _SBytes + ($size) + MEND + + + ;// Allocate 8-byte aligned area of name + ;// $name size ($size+16) bytes. + ;// The extra 16 bytes are later used to align the pointer to 16 bytes + + MACRO + M_ALLOC16 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F$_16 EQU (_SBytes + 8) +_SBytes SETA _SBytes + ($size) + 8 + MEND + + ;// Allocate 8-byte aligned area of name + ;// $name size ($size+32) bytes. + ;// The extra 32 bytes are later used to align the pointer to 32 bytes + + MACRO + M_ALLOC32 $name, $size + ASSERT :LNOT:_InFunc + IF (_SBytes:AND:7)!=0 +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF +$name$_F$_32 EQU (_SBytes + 24) +_SBytes SETA _SBytes + ($size) + 24 + MEND + + + + + ;// Argument Declaration Macro + ;// + ;// Allocate an argument name $name + ;// size $size bytes + MACRO + M_ARG $name, $size + ASSERT _InFunc +$name$_F EQU _ABytes +_ABytes SETA _ABytes + ($size) + MEND + +;/////////////////////////////////////////////// +;// Macros to access stacked variables +;/////////////////////////////////////////////// + + ;// Macro to perform a data processing operation + ;// with a constant second operand + MACRO + _M_OPC $op,$rd,$rn,$const + LCLA _sh + LCLA _cst +_sh SETA 0 +_cst SETA $const + IF _cst=0 + $op $rd, $rn, #_cst + MEXIT + ENDIF + WHILE (_cst:AND:3)=0 +_cst SETA _cst>>2 +_sh SETA _sh+2 + WEND + $op $rd, $rn, #(_cst:AND:0x000000FF)<<_sh + IF _cst>=256 + $op $rd, $rd, #(_cst:AND:0xFFFFFF00)<<_sh + ENDIF + MEND + + ;// Macro to perform a data access operation + ;// Such as LDR or STR + ;// The addressing mode is modified such that + ;// 1. If no address is given then the name is taken + ;// as a stack offset + ;// 2. If the addressing mode is not available for the + ;// state being assembled for (eg Thumb) then a suitable + ;// addressing mode is substituted. + ;// + ;// On Entry: + ;// $i = Instruction to perform (eg "LDRB") + ;// $a = Required byte alignment + ;// $r = Register(s) to transfer (eg "r1") + ;// $a0,$a1,$a2. Addressing mode and condition. One of: + ;// label {,cc} + ;// [base] {,,,cc} + ;// [base, offset]{!} {,,cc} + ;// [base, offset, shift]{!} {,cc} + ;// [base], offset {,,cc} + ;// [base], offset, shift {,cc} + MACRO + _M_DATA $i,$a,$r,$a0,$a1,$a2,$a3 + IF "$a0":LEFT:1="[" + IF "$a1"="" + $i$a3 $r, $a0 + ELSE + IF "$a0":RIGHT:1="]" + IF "$a2"="" + _M_POSTIND $i$a3, "$r", $a0, $a1 + ELSE + _M_POSTIND $i$a3, "$r", $a0, "$a1,$a2" + ENDIF + ELSE + IF "$a2"="" + _M_PREIND $i$a3, "$r", $a0, $a1 + ELSE + _M_PREIND $i$a3, "$r", $a0, "$a1,$a2" + ENDIF + ENDIF + ENDIF + ELSE + LCLA _Offset +_Offset SETA _Workspace + $a0$_F + ASSERT (_Offset:AND:($a-1))=0 + $i$a1 $r, [sp, #_Offset] + ENDIF + MEND + + ;// Handle post indexed load/stores + ;// op reg, [base], offset + MACRO + _M_POSTIND $i,$r,$a0,$a1 + LCLS _base + LCLS _offset + IF {CONFIG}=16 ;// Thumb +_base SETS ("$a0":LEFT:(:LEN:"$a0"-1)):RIGHT:(:LEN:"$a0"-2) ;// remove [] +_offset SETS "$a1" + IF _offset:LEFT:1="+" +_offset SETS _offset:RIGHT:(:LEN:_offset-1) + ENDIF + $i $r, $a0 + IF _offset:LEFT:1="-" +_offset SETS _offset:RIGHT:(:LEN:_offset-1) + SUB $_base, $_base, $_offset + ELSE + ADD $_base, $_base, $_offset + ENDIF + ELSE ;// ARM + $i $r, $a0, $a1 + ENDIF + MEND + + ;// Handle pre indexed load/store + ;// op reg, [base, offset]{!} + MACRO + _M_PREIND $i,$r,$a0,$a1 + LCLS _base + LCLS _offset + IF ({CONFIG}=16):LAND:(("$a1":RIGHT:2)="]!") +_base SETS "$a0":RIGHT:(:LEN:("$a0")-1) +_offset SETS "$a1":LEFT:(:LEN:("$a1")-2) + $i $r, [$_base, $_offset] + ADD $_base, $_base, $_offset + ELSE + $i $r, $a0, $a1 + ENDIF + MEND + + ;// Load unsigned byte from stack + MACRO + M_LDRB $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load signed byte from stack + MACRO + M_LDRSB $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRSB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store byte to stack + MACRO + M_STRB $r,$a0,$a1,$a2,$a3 + _M_DATA "STRB",1,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load unsigned half word from stack + MACRO + M_LDRH $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load signed half word from stack + MACRO + M_LDRSH $r,$a0,$a1,$a2,$a3 + _M_DATA "LDRSH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store half word to stack + MACRO + M_STRH $r,$a0,$a1,$a2,$a3 + _M_DATA "STRH",2,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load word from stack + MACRO + M_LDR $r,$a0,$a1,$a2,$a3 + _M_DATA "LDR",4,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Store word to stack + MACRO + M_STR $r,$a0,$a1,$a2,$a3 + _M_DATA "STR",4,$r,$a0,$a1,$a2,$a3 + MEND + + ;// Load double word from stack + MACRO + M_LDRD $r0,$r1,$a0,$a1,$a2,$a3 + _M_DATA "LDRD",8,"$r0,$r1",$a0,$a1,$a2,$a3 + MEND + + ;// Store double word to stack + MACRO + M_STRD $r0,$r1,$a0,$a1,$a2,$a3 + _M_DATA "STRD",8,"$r0,$r1",$a0,$a1,$a2,$a3 + MEND + + ;// Get absolute address of stack allocated location + MACRO + M_ADR $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F) + MEND + + ;// Get absolute address of stack allocated location and align the address to 16 bytes + MACRO + M_ADR16 $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_16) + + ;// Now align $a to 16 bytes + BIC$cc $a,$a,#0x0F + MEND + + ;// Get absolute address of stack allocated location and align the address to 32 bytes + MACRO + M_ADR32 $a, $b, $cc + _M_OPC ADD$cc, $a, sp, (_Workspace + $b$_F$_32) + + ;// Now align $a to 32 bytes + BIC$cc $a,$a,#0x1F + MEND + +;////////////////////////////////////////////////////////// +;// Function header and footer macros +;////////////////////////////////////////////////////////// + + ;// Function Header Macro + ;// Generates the function prologue + ;// Note that functions should all be "stack-moves-once" + ;// The FNSTART and FNEND macros should be the only places + ;// where the stack moves. + ;// + ;// $name = function name + ;// $rreg = "" don't stack any registers + ;// "lr" stack "lr" only + ;// "rN" stack registers "r4-rN,lr" + ;// $dreg = "" don't stack any D registers + ;// "dN" stack registers "d8-dN" + ;// + ;// Note: ARM Archicture procedure call standard AAPCS + ;// states that r4-r11, sp, d8-d15 must be preserved by + ;// a compliant function. + MACRO + M_START $name, $rreg, $dreg + ASSERT :LNOT:_InFunc + ASSERT "$name"!="" +_InFunc SETL {TRUE} +_RBytes SETA 0 +_Workspace SETA 0 + + ;// Create an area for the function + AREA |.text|, CODE + EXPORT $name +$name FUNCTION + + ;// Save R registers + _M_GETRREGLIST $rreg + IF _RRegList<>"" + STMFD sp!, {$_RRegList, lr} + ENDIF + + ;// Save D registers + _M_GETDREGLIST $dreg + IF _DRegList<>"" + VSTMFD sp!, {$_DRegList} + ENDIF + + + ;// Ensure size claimed on stack is 8-byte aligned + IF ((_SBytes:AND:7)!=0) +_SBytes SETA _SBytes + (8 - (_SBytes:AND:7)) + ENDIF + + IF (_SBytes!=0) + _M_OPC SUB, sp, sp, _SBytes + ENDIF + + +_ABytes SETA _SBytes + _RBytes - _Workspace + + + ;// Print function name if debug enabled + M_PRINTF "$name\n", + MEND + + ;// Work out a list of R saved registers + MACRO + _M_GETRREGLIST $rreg + IF "$rreg"="" +_RRegList SETS "" + MEXIT + ENDIF + IF "$rreg"="lr":LOR:"$rreg"="r4" +_RRegList SETS "r4" +_RBytes SETA _RBytes+8 + MEXIT + ENDIF + IF "$rreg"="r5":LOR:"$rreg"="r6" +_RRegList SETS "r4-r6" +_RBytes SETA _RBytes+16 + MEXIT + ENDIF + IF "$rreg"="r7":LOR:"$rreg"="r8" +_RRegList SETS "r4-r8" +_RBytes SETA _RBytes+24 + MEXIT + ENDIF + IF "$rreg"="r9":LOR:"$rreg"="r10" +_RRegList SETS "r4-r10" +_RBytes SETA _RBytes+32 + MEXIT + ENDIF + IF "$rreg"="r11":LOR:"$rreg"="r12" +_RRegList SETS "r4-r12" +_RBytes SETA _RBytes+40 + MEXIT + ENDIF + INFO 1, "Unrecognized saved r register limit '$rreg'" + MEND + + ;// Work out a list of D saved registers + MACRO + _M_GETDREGLIST $dreg + IF "$dreg"="" +_DRegList SETS "" + MEXIT + ENDIF + IF "$dreg"="d8" +_DRegList SETS "d8" +_RBytes SETA _RBytes+8 + MEXIT + ENDIF + IF "$dreg"="d9" +_DRegList SETS "d8-d9" +_RBytes SETA _RBytes+16 + MEXIT + ENDIF + IF "$dreg"="d10" +_DRegList SETS "d8-d10" +_RBytes SETA _RBytes+24 + MEXIT + ENDIF + IF "$dreg"="d11" +_DRegList SETS "d8-d11" +_RBytes SETA _RBytes+32 + MEXIT + ENDIF + IF "$dreg"="d12" +_DRegList SETS "d8-d12" +_RBytes SETA _RBytes+40 + MEXIT + ENDIF + IF "$dreg"="d13" +_DRegList SETS "d8-d13" +_RBytes SETA _RBytes+48 + MEXIT + ENDIF + IF "$dreg"="d14" +_DRegList SETS "d8-d14" +_RBytes SETA _RBytes+56 + MEXIT + ENDIF + IF "$dreg"="d15" +_DRegList SETS "d8-d15" +_RBytes SETA _RBytes+64 + MEXIT + ENDIF + INFO 1, "Unrecognized saved d register limit '$dreg'" + MEND + + ;// Produce function return instructions + MACRO + _M_RET $cc + IF _DRegList<>"" + VPOP$cc {$_DRegList} + ENDIF + IF _RRegList="" + BX$cc lr + ELSE + LDM$cc.FD sp!, {$_RRegList, pc} + ENDIF + MEND + + ;// Early Function Exit Macro + ;// $cc = condition to exit with + ;// (Example: M_EXIT EQ) + MACRO + M_EXIT $cc + ASSERT _InFunc + IF _SBytes!=0 + ;// Restore stack frame and exit + B$cc _End$_F + ELSE + ;// Can return directly + _M_RET $cc + ENDIF + MEND + + ;// Function Footer Macro + ;// Generates the function epilogue + MACRO + M_END + ASSERT _InFunc +_InFunc SETL {FALSE} +_End$_F + + ;// Restore the stack pointer to its original value on function entry + IF _SBytes!=0 + _M_OPC ADD, sp, sp, _SBytes + ENDIF + _M_RET + ENDFUNC + + ;// Reset the global stack tracking variables back to their + ;// initial values, and increment the function count +_SBytes SETA 0 +_F SETA _F+1 + MEND + + +;//========================================================================== +;// Debug Macros +;//========================================================================== + + GBLL DEBUG_ON +DEBUG_ON SETL {FALSE} + GBLL DEBUG_STALLS_ON +DEBUG_STALLS_ON SETL {FALSE} + + ;//========================================================================== + ;// Debug call to printf + ;// M_PRINTF $format, $val0, $val1, $val2 + ;// + ;// Examples: + ;// M_PRINTF "x=%08x\n", r0 + ;// + ;// This macro preserves the value of all registers including the + ;// flags. + ;//========================================================================== + + MACRO + M_PRINTF $format, $val0, $val1, $val2 + IF DEBUG_ON + + IMPORT printf + LCLA nArgs +nArgs SETA 0 + + ;// save registers so we don't corrupt them + STMFD sp!, {r0-r12, lr} + + ;// Drop stack to give us some workspace + SUB sp, sp, #16 + + ;// Save registers we need to print to the stack + IF "$val2" <> "" + ASSERT "$val1" <> "" + STR $val2, [sp, #8] +nArgs SETA nArgs+1 + ENDIF + IF "$val1" <> "" + ASSERT "$val0" <> "" + STR $val1, [sp, #4] +nArgs SETA nArgs+1 + ENDIF + IF "$val0"<>"" + STR $val0, [sp] +nArgs SETA nArgs+1 + ENDIF + + ;// Now we are safe to corrupt registers + ADR r0, %FT00 + IF nArgs=1 + LDR r1, [sp] + ENDIF + IF nArgs=2 + LDMIA sp, {r1,r2} + ENDIF + IF nArgs=3 + LDMIA sp, {r1,r2,r3} + ENDIF + + ;// print the values + MRS r4, cpsr ;// preserve flags + BL printf + MSR cpsr_f, r4 ;// restore flags + B %FT01 +00 ;// string to print + DCB "$format", 0 + ALIGN +01 ;// Finished + ADD sp, sp, #16 + ;// Restore registers + LDMFD sp!, {r0-r12,lr} + + ENDIF ;// DEBUG_ON + MEND + + + ;// Stall Simulation Macro + ;// Inserts a given number of NOPs for the currently + ;// defined platform + MACRO + M_STALL $plat1stall, $plat2stall, $plat3stall, $plat4stall, $plat5stall, $plat6stall + IF DEBUG_STALLS_ON + _M_STALL_SUB $plat1stall + _M_STALL_SUB $plat2stall + _M_STALL_SUB $plat3stall + _M_STALL_SUB $plat4stall + _M_STALL_SUB $plat5stall + _M_STALL_SUB $plat6stall + ENDIF + MEND + + MACRO + _M_STALL_SUB $platstall + IF "$platstall"!="" + LCLA _pllen + LCLS _pl + LCLL _pllog +_pllen SETA :LEN:"$platstall" +_pl SETS "$platstall":LEFT:(_pllen - 2) + IF :DEF:$_pl + IF $_pl + LCLS _st + LCLA _stnum +_st SETS "$platstall":RIGHT:1 +_stnum SETA $_st + WHILE _stnum>0 + MOV sp, sp +_stnum SETA _stnum - 1 + WEND + ENDIF + ENDIF + ENDIF + MEND + + + +;//========================================================================== +;// Endian Invarience Macros +;// +;// The idea behind these macros is that if an array is +;// loaded as words then the SMUL00 macro will multiply +;// array elements 0 regardless of the endianess of the +;// system. For little endian SMUL00=SMULBB, for big +;// endian SMUL00=SMULTT and similarly for other packed operations. +;// +;//========================================================================== + + MACRO + LIBI4 $comli, $combi, $a, $b, $c, $d, $cc + IF {ENDIAN}="big" + $combi.$cc $a, $b, $c, $d + ELSE + $comli.$cc $a, $b, $c, $d + ENDIF + MEND + + MACRO + LIBI3 $comli, $combi, $a, $b, $c, $cc + IF {ENDIAN}="big" + $combi.$cc $a, $b, $c + ELSE + $comli.$cc $a, $b, $c + ENDIF + MEND + + ;// SMLAxy macros + + MACRO + SMLA00 $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA01 $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLATB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA0B $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLATB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA0T $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA10 $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA11 $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA1B $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLA1T $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAB0 $a, $b, $c, $d, $cc + LIBI4 SMLABB, SMLABT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAB1 $a, $b, $c, $d, $cc + LIBI4 SMLABT, SMLABB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAT0 $a, $b, $c, $d, $cc + LIBI4 SMLATB, SMLATT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAT1 $a, $b, $c, $d, $cc + LIBI4 SMLATT, SMLATB, $a, $b, $c, $d, $cc + MEND + + ;// SMULxy macros + + MACRO + SMUL00 $a, $b, $c, $cc + LIBI3 SMULBB, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMUL01 $a, $b, $c, $cc + LIBI3 SMULBT, SMULTB, $a, $b, $c, $cc + MEND + + MACRO + SMUL0B $a, $b, $c, $cc + LIBI3 SMULBB, SMULTB, $a, $b, $c, $cc + MEND + + MACRO + SMUL0T $a, $b, $c, $cc + LIBI3 SMULBT, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMUL10 $a, $b, $c, $cc + LIBI3 SMULTB, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMUL11 $a, $b, $c, $cc + LIBI3 SMULTT, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMUL1B $a, $b, $c, $cc + LIBI3 SMULTB, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMUL1T $a, $b, $c, $cc + LIBI3 SMULTT, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMULB0 $a, $b, $c, $cc + LIBI3 SMULBB, SMULBT, $a, $b, $c, $cc + MEND + + MACRO + SMULB1 $a, $b, $c, $cc + LIBI3 SMULBT, SMULBB, $a, $b, $c, $cc + MEND + + MACRO + SMULT0 $a, $b, $c, $cc + LIBI3 SMULTB, SMULTT, $a, $b, $c, $cc + MEND + + MACRO + SMULT1 $a, $b, $c, $cc + LIBI3 SMULTT, SMULTB, $a, $b, $c, $cc + MEND + + ;// SMLAWx, SMULWx macros + + MACRO + SMLAW0 $a, $b, $c, $d, $cc + LIBI4 SMLAWB, SMLAWT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAW1 $a, $b, $c, $d, $cc + LIBI4 SMLAWT, SMLAWB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMULW0 $a, $b, $c, $cc + LIBI3 SMULWB, SMULWT, $a, $b, $c, $cc + MEND + + MACRO + SMULW1 $a, $b, $c, $cc + LIBI3 SMULWT, SMULWB, $a, $b, $c, $cc + MEND + + ;// SMLALxy macros + + + MACRO + SMLAL00 $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL01 $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALTB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL0B $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALTB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL0T $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL10 $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL11 $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL1B $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLAL1T $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALB0 $a, $b, $c, $d, $cc + LIBI4 SMLALBB, SMLALBT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALB1 $a, $b, $c, $d, $cc + LIBI4 SMLALBT, SMLALBB, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALT0 $a, $b, $c, $d, $cc + LIBI4 SMLALTB, SMLALTT, $a, $b, $c, $d, $cc + MEND + + MACRO + SMLALT1 $a, $b, $c, $d, $cc + LIBI4 SMLALTT, SMLALTB, $a, $b, $c, $d, $cc + MEND + + ENDIF ;// ARMCOMM_S_H + + END diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h new file mode 100755 index 0000000..7a68d14 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/armOMX.h @@ -0,0 +1,274 @@ +/* + * + * File Name: armOMX_ReleaseVersion.h + * OpenMAX DL: v1.0.2 + * Revision: 12290 + * Date: Wednesday, April 9, 2008 + * + * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. + * + * + * + * This file allows a version of the OMX DL libraries to be built where some or + * all of the function names can be given a user specified suffix. + * + * You might want to use it where: + * + * - you want to rename a function "out of the way" so that you could replace + * a function with a different version (the original version would still be + * in the library just with a different name - so you could debug the new + * version by comparing it to the output of the old) + * + * - you want to rename all the functions to versions with a suffix so that + * you can include two versions of the library and choose between functions + * at runtime. + * + * e.g. omxIPBM_Copy_U8_C1R could be renamed omxIPBM_Copy_U8_C1R_CortexA8 + * + */ + + +#ifndef _armOMX_H_ +#define _armOMX_H_ + + +/* We need to define these two macros in order to expand and concatenate the names */ +#define OMXCAT2BAR(A, B) omx ## A ## B +#define OMXCATBAR(A, B) OMXCAT2BAR(A, B) + +/* Define the suffix to add to all functions - the default is no suffix */ +#define BARE_SUFFIX + + + +/* Define what happens to the bare suffix-less functions, down to the sub-domain accuracy */ +#define OMXACAAC_SUFFIX BARE_SUFFIX +#define OMXACMP3_SUFFIX BARE_SUFFIX +#define OMXICJP_SUFFIX BARE_SUFFIX +#define OMXIPBM_SUFFIX BARE_SUFFIX +#define OMXIPCS_SUFFIX BARE_SUFFIX +#define OMXIPPP_SUFFIX BARE_SUFFIX +#define OMXSP_SUFFIX BARE_SUFFIX +#define OMXVCCOMM_SUFFIX BARE_SUFFIX +#define OMXVCM4P10_SUFFIX BARE_SUFFIX +#define OMXVCM4P2_SUFFIX BARE_SUFFIX + + + + +/* Define what the each bare, un-suffixed OpenMAX API function names is to be renamed */ +#define omxACAAC_DecodeChanPairElt OMXCATBAR(ACAAC_DecodeChanPairElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeDatStrElt OMXCATBAR(ACAAC_DecodeDatStrElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeFillElt OMXCATBAR(ACAAC_DecodeFillElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeIsStereo_S32 OMXCATBAR(ACAAC_DecodeIsStereo_S32, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeMsPNS_S32_I OMXCATBAR(ACAAC_DecodeMsPNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeMsStereo_S32_I OMXCATBAR(ACAAC_DecodeMsStereo_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DecodePrgCfgElt OMXCATBAR(ACAAC_DecodePrgCfgElt, OMXACAAC_SUFFIX) +#define omxACAAC_DecodeTNS_S32_I OMXCATBAR(ACAAC_DecodeTNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_DeinterleaveSpectrum_S32 OMXCATBAR(ACAAC_DeinterleaveSpectrum_S32, OMXACAAC_SUFFIX) +#define omxACAAC_EncodeTNS_S32_I OMXCATBAR(ACAAC_EncodeTNS_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_LongTermPredict_S32 OMXCATBAR(ACAAC_LongTermPredict_S32, OMXACAAC_SUFFIX) +#define omxACAAC_LongTermReconstruct_S32_I OMXCATBAR(ACAAC_LongTermReconstruct_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_MDCTFwd_S32 OMXCATBAR(ACAAC_MDCTFwd_S32, OMXACAAC_SUFFIX) +#define omxACAAC_MDCTInv_S32_S16 OMXCATBAR(ACAAC_MDCTInv_S32_S16, OMXACAAC_SUFFIX) +#define omxACAAC_NoiselessDecode OMXCATBAR(ACAAC_NoiselessDecode, OMXACAAC_SUFFIX) +#define omxACAAC_QuantInv_S32_I OMXCATBAR(ACAAC_QuantInv_S32_I, OMXACAAC_SUFFIX) +#define omxACAAC_UnpackADIFHeader OMXCATBAR(ACAAC_UnpackADIFHeader, OMXACAAC_SUFFIX) +#define omxACAAC_UnpackADTSFrameHeader OMXCATBAR(ACAAC_UnpackADTSFrameHeader, OMXACAAC_SUFFIX) + + +#define omxACMP3_HuffmanDecode_S32 OMXCATBAR(ACMP3_HuffmanDecode_S32, OMXACMP3_SUFFIX) +#define omxACMP3_HuffmanDecodeSfb_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfb_S32, OMXACMP3_SUFFIX) +#define omxACMP3_HuffmanDecodeSfbMbp_S32 OMXCATBAR(ACMP3_HuffmanDecodeSfbMbp_S32, OMXACMP3_SUFFIX) +#define omxACMP3_MDCTInv_S32 OMXCATBAR(ACMP3_MDCTInv_S32, OMXACMP3_SUFFIX) +#define omxACMP3_ReQuantize_S32_I OMXCATBAR(ACMP3_ReQuantize_S32_I, OMXACMP3_SUFFIX) +#define omxACMP3_ReQuantizeSfb_S32_I OMXCATBAR(ACMP3_ReQuantizeSfb_S32_I, OMXACMP3_SUFFIX) +#define omxACMP3_SynthPQMF_S32_S16 OMXCATBAR(ACMP3_SynthPQMF_S32_S16, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackFrameHeader OMXCATBAR(ACMP3_UnpackFrameHeader, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackScaleFactors_S8 OMXCATBAR(ACMP3_UnpackScaleFactors_S8, OMXACMP3_SUFFIX) +#define omxACMP3_UnpackSideInfo OMXCATBAR(ACMP3_UnpackSideInfo, OMXACMP3_SUFFIX) + +#define omxICJP_CopyExpand_U8_C3 OMXCATBAR(ICJP_CopyExpand_U8_C3, OMXICJP_SUFFIX) +#define omxICJP_DCTFwd_S16 OMXCATBAR(ICJP_DCTFwd_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTFwd_S16_I OMXCATBAR(ICJP_DCTFwd_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTInv_S16 OMXCATBAR(ICJP_DCTInv_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTInv_S16_I OMXCATBAR(ICJP_DCTInv_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_Multiple_S16 OMXCATBAR(ICJP_DCTQuantFwd_Multiple_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_S16 OMXCATBAR(ICJP_DCTQuantFwd_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwd_S16_I OMXCATBAR(ICJP_DCTQuantFwd_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantFwdTableInit OMXCATBAR(ICJP_DCTQuantFwdTableInit, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_Multiple_S16 OMXCATBAR(ICJP_DCTQuantInv_Multiple_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_S16 OMXCATBAR(ICJP_DCTQuantInv_S16, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInv_S16_I OMXCATBAR(ICJP_DCTQuantInv_S16_I, OMXICJP_SUFFIX) +#define omxICJP_DCTQuantInvTableInit OMXCATBAR(ICJP_DCTQuantInvTableInit, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffman8x8_Direct_S16_C1 OMXCATBAR(ICJP_DecodeHuffman8x8_Direct_S16_C1, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX) +#define omxICJP_DecodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_DecodeHuffmanSpecInit_U8, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffman8x8_Direct_S16_U1_C1 OMXCATBAR(ICJP_EncodeHuffman8x8_Direct_S16_U1_C1, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffmanSpecGetBufSize_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecGetBufSize_U8, OMXICJP_SUFFIX) +#define omxICJP_EncodeHuffmanSpecInit_U8 OMXCATBAR(ICJP_EncodeHuffmanSpecInit_U8, OMXICJP_SUFFIX) + +#define omxIPBM_AddC_U8_C1R_Sfs OMXCATBAR(IPBM_AddC_U8_C1R_Sfs, OMXIPBM_SUFFIX) +#define omxIPBM_Copy_U8_C1R OMXCATBAR(IPBM_Copy_U8_C1R, OMXIPBM_SUFFIX) +#define omxIPBM_Copy_U8_C3R OMXCATBAR(IPBM_Copy_U8_C3R, OMXIPBM_SUFFIX) +#define omxIPBM_Mirror_U8_C1R OMXCATBAR(IPBM_Mirror_U8_C1R, OMXIPBM_SUFFIX) +#define omxIPBM_MulC_U8_C1R_Sfs OMXCATBAR(IPBM_MulC_U8_C1R_Sfs, OMXIPBM_SUFFIX) + +#define omxIPCS_ColorTwistQ14_U8_C3R OMXCATBAR(IPCS_ColorTwistQ14_U8_C3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr420LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr422LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R OMXCATBAR(IPCS_BGR565ToYCbCr444LS_MCU_U16_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr420LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr422LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R OMXCATBAR(IPCS_BGR888ToYCbCr444LS_MCU_U8_S16_C3P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr420RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr420RszRot_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr420ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422RszCscRotBGR_U8_P3C3R OMXCATBAR(IPCS_YCbCr422RszCscRotBGR_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R OMXCATBAR(IPCS_CbYCrY422RszCscRotBGR_U8_U16_C2R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422RszRot_U8_P3R OMXCATBAR(IPCS_YCbCr422RszRot_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbYCr422ToBGR565_U8_U16_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR565_U8_U16_C2C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbYCr422ToBGR888_U8_C2C3R OMXCATBAR(IPCS_YCbYCr422ToBGR888_U8_C2C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R OMXCATBAR(IPCS_YCbCr422ToBGR888LS_MCU_S16_U8_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R OMXCATBAR(IPCS_CbYCrY422ToYCbCr420Rotate_U8_C2P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr422ToYCbCr420Rotate_U8_P3R OMXCATBAR(IPCS_YCbCr422ToYCbCr420Rotate_U8_P3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565_U8_U16_C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565_U8_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565_U8_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R OMXCATBAR(IPCS_YCbCr444ToBGR565LS_MCU_S16_U16_P3C3R, OMXIPCS_SUFFIX) +#define omxIPCS_YCbCr444ToBGR888_U8_C3R OMXCATBAR(IPCS_YCbCr444ToBGR888_U8_C3R, OMXIPCS_SUFFIX) + +#define omxIPPP_Deblock_HorEdge_U8_I OMXCATBAR(IPPP_Deblock_HorEdge_U8_I, OMXIPPP_SUFFIX) +#define omxIPPP_Deblock_VerEdge_U8_I OMXCATBAR(IPPP_Deblock_VerEdge_U8_I, OMXIPPP_SUFFIX) +#define omxIPPP_FilterFIR_U8_C1R OMXCATBAR(IPPP_FilterFIR_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_FilterMedian_U8_C1R OMXCATBAR(IPPP_FilterMedian_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_GetCentralMoment_S64 OMXCATBAR(IPPP_GetCentralMoment_S64, OMXIPPP_SUFFIX) +#define omxIPPP_GetSpatialMoment_S64 OMXCATBAR(IPPP_GetSpatialMoment_S64, OMXIPPP_SUFFIX) +#define omxIPPP_MomentGetStateSize OMXCATBAR(IPPP_MomentGetStateSize, OMXIPPP_SUFFIX) +#define omxIPPP_MomentInit OMXCATBAR(IPPP_MomentInit, OMXIPPP_SUFFIX) +#define omxIPPP_Moments_U8_C1R OMXCATBAR(IPPP_Moments_U8_C1R, OMXIPPP_SUFFIX) +#define omxIPPP_Moments_U8_C3R OMXCATBAR(IPPP_Moments_U8_C3R, OMXIPPP_SUFFIX) + +#define omxSP_BlockExp_S16 OMXCATBAR(SP_BlockExp_S16, OMXSP_SUFFIX) +#define omxSP_BlockExp_S32 OMXCATBAR(SP_BlockExp_S32, OMXSP_SUFFIX) +#define omxSP_Copy_S16 OMXCATBAR(SP_Copy_S16, OMXSP_SUFFIX) +#define omxSP_DotProd_S16 OMXCATBAR(SP_DotProd_S16, OMXSP_SUFFIX) +#define omxSP_DotProd_S16_Sfs OMXCATBAR(SP_DotProd_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_CToC_SC16_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_CToC_SC32_Sfs OMXCATBAR(SP_FFTFwd_CToC_SC32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_RToCCS_S16S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S16S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTFwd_RToCCS_S32_Sfs OMXCATBAR(SP_FFTFwd_RToCCS_S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_C_SC16 OMXCATBAR(SP_FFTGetBufSize_C_SC16, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_C_SC32 OMXCATBAR(SP_FFTGetBufSize_C_SC32, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_R_S16S32 OMXCATBAR(SP_FFTGetBufSize_R_S16S32, OMXSP_SUFFIX) +#define omxSP_FFTGetBufSize_R_S32 OMXCATBAR(SP_FFTGetBufSize_R_S32, OMXSP_SUFFIX) +#define omxSP_FFTInit_C_SC16 OMXCATBAR(SP_FFTInit_C_SC16, OMXSP_SUFFIX) +#define omxSP_FFTInit_C_SC32 OMXCATBAR(SP_FFTInit_C_SC32, OMXSP_SUFFIX) +#define omxSP_FFTInit_R_S16S32 OMXCATBAR(SP_FFTInit_R_S16S32, OMXSP_SUFFIX) +#define omxSP_FFTInit_R_S32 OMXCATBAR(SP_FFTInit_R_S32, OMXSP_SUFFIX) +#define omxSP_FFTInv_CCSToR_S32_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CCSToR_S32S16_Sfs OMXCATBAR(SP_FFTInv_CCSToR_S32S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CToC_SC16_Sfs OMXCATBAR(SP_FFTInv_CToC_SC16_Sfs, OMXSP_SUFFIX) +#define omxSP_FFTInv_CToC_SC32_Sfs OMXCATBAR(SP_FFTInv_CToC_SC32_Sfs, OMXSP_SUFFIX) +#define omxSP_FilterMedian_S32 OMXCATBAR(SP_FilterMedian_S32, OMXSP_SUFFIX) +#define omxSP_FilterMedian_S32_I OMXCATBAR(SP_FilterMedian_S32_I, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16 OMXCATBAR(SP_FIR_Direct_S16, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_I OMXCATBAR(SP_FIR_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_ISfs OMXCATBAR(SP_FIR_Direct_S16_ISfs, OMXSP_SUFFIX) +#define omxSP_FIR_Direct_S16_Sfs OMXCATBAR(SP_FIR_Direct_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16 OMXCATBAR(SP_FIROne_Direct_S16, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_I OMXCATBAR(SP_FIROne_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_ISfs OMXCATBAR(SP_FIROne_Direct_S16_ISfs, OMXSP_SUFFIX) +#define omxSP_FIROne_Direct_S16_Sfs OMXCATBAR(SP_FIROne_Direct_S16_Sfs, OMXSP_SUFFIX) +#define omxSP_IIR_BiQuadDirect_S16 OMXCATBAR(SP_IIR_BiQuadDirect_S16, OMXSP_SUFFIX) +#define omxSP_IIR_BiQuadDirect_S16_I OMXCATBAR(SP_IIR_BiQuadDirect_S16_I, OMXSP_SUFFIX) +#define omxSP_IIR_Direct_S16 OMXCATBAR(SP_IIR_Direct_S16, OMXSP_SUFFIX) +#define omxSP_IIR_Direct_S16_I OMXCATBAR(SP_IIR_Direct_S16_I, OMXSP_SUFFIX) +#define omxSP_IIROne_BiQuadDirect_S16 OMXCATBAR(SP_IIROne_BiQuadDirect_S16, OMXSP_SUFFIX) +#define omxSP_IIROne_BiQuadDirect_S16_I OMXCATBAR(SP_IIROne_BiQuadDirect_S16_I, OMXSP_SUFFIX) +#define omxSP_IIROne_Direct_S16 OMXCATBAR(SP_IIROne_Direct_S16, OMXSP_SUFFIX) +#define omxSP_IIROne_Direct_S16_I OMXCATBAR(SP_IIROne_Direct_S16_I, OMXSP_SUFFIX) + +#define omxVCCOMM_Average_16x OMXCATBAR(VCCOMM_Average_16x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Average_8x OMXCATBAR(VCCOMM_Average_8x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ComputeTextureErrorBlock OMXCATBAR(VCCOMM_ComputeTextureErrorBlock, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ComputeTextureErrorBlock_SAD OMXCATBAR(VCCOMM_ComputeTextureErrorBlock_SAD, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Copy16x16 OMXCATBAR(VCCOMM_Copy16x16, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_Copy8x8 OMXCATBAR(VCCOMM_Copy8x8, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_ExpandFrame_I OMXCATBAR(VCCOMM_ExpandFrame_I, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_LimitMVToRect OMXCATBAR(VCCOMM_LimitMVToRect, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_SAD_16x OMXCATBAR(VCCOMM_SAD_16x, OMXVCCOMM_SUFFIX) +#define omxVCCOMM_SAD_8x OMXCATBAR(VCCOMM_SAD_8x, OMXVCCOMM_SUFFIX) + +#define omxVCM4P10_Average_4x OMXCATBAR(VCM4P10_Average_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Half OMXCATBAR(VCM4P10_BlockMatch_Half, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Integer OMXCATBAR(VCM4P10_BlockMatch_Integer, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_BlockMatch_Quarter OMXCATBAR(VCM4P10_BlockMatch_Quarter, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DeblockChroma_I OMXCATBAR(VCM4P10_DeblockChroma_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DeblockLuma_I OMXCATBAR(VCM4P10_DeblockLuma_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DecodeChromaDcCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeChromaDcCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DecodeCoeffsToPairCAVLC OMXCATBAR(VCM4P10_DecodeCoeffsToPairCAVLC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_DequantTransformResidualFromPairAndAdd OMXCATBAR(VCM4P10_DequantTransformResidualFromPairAndAdd, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingChroma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_HorEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingChroma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingChroma_VerEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingLuma_HorEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_HorEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_FilterDeblockingLuma_VerEdge_I OMXCATBAR(VCM4P10_FilterDeblockingLuma_VerEdge_I, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_GetVLCInfo OMXCATBAR(VCM4P10_GetVLCInfo, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateChroma OMXCATBAR(VCM4P10_InterpolateChroma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateHalfHor_Luma OMXCATBAR(VCM4P10_InterpolateHalfHor_Luma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateHalfVer_Luma OMXCATBAR(VCM4P10_InterpolateHalfVer_Luma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InterpolateLuma OMXCATBAR(VCM4P10_InterpolateLuma, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformDequant_ChromaDC OMXCATBAR(VCM4P10_InvTransformDequant_ChromaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformDequant_LumaDC OMXCATBAR(VCM4P10_InvTransformDequant_LumaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_InvTransformResidualAndAdd OMXCATBAR(VCM4P10_InvTransformResidualAndAdd, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MEGetBufSize OMXCATBAR(VCM4P10_MEGetBufSize, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MEInit OMXCATBAR(VCM4P10_MEInit, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_MotionEstimationMB OMXCATBAR(VCM4P10_MotionEstimationMB, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntra_16x16 OMXCATBAR(VCM4P10_PredictIntra_16x16, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntra_4x4 OMXCATBAR(VCM4P10_PredictIntra_4x4, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_PredictIntraChroma_8x8 OMXCATBAR(VCM4P10_PredictIntraChroma_8x8, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SAD_4x OMXCATBAR(VCM4P10_SAD_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_16x OMXCATBAR(VCM4P10_SADQuar_16x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_4x OMXCATBAR(VCM4P10_SADQuar_4x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SADQuar_8x OMXCATBAR(VCM4P10_SADQuar_8x, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SATD_4x4 OMXCATBAR(VCM4P10_SATD_4x4, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_SubAndTransformQDQResidual OMXCATBAR(VCM4P10_SubAndTransformQDQResidual, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformDequantChromaDCFromPair OMXCATBAR(VCM4P10_TransformDequantChromaDCFromPair, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformDequantLumaDCFromPair OMXCATBAR(VCM4P10_TransformDequantLumaDCFromPair, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformQuant_ChromaDC OMXCATBAR(VCM4P10_TransformQuant_ChromaDC, OMXVCM4P10_SUFFIX) +#define omxVCM4P10_TransformQuant_LumaDC OMXCATBAR(VCM4P10_TransformQuant_LumaDC, OMXVCM4P10_SUFFIX) + +#define omxVCM4P2_BlockMatch_Half_16x16 OMXCATBAR(VCM4P2_BlockMatch_Half_16x16, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Half_8x8 OMXCATBAR(VCM4P2_BlockMatch_Half_8x8, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Integer_16x16 OMXCATBAR(VCM4P2_BlockMatch_Integer_16x16, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_BlockMatch_Integer_8x8 OMXCATBAR(VCM4P2_BlockMatch_Integer_8x8, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DCT8x8blk OMXCATBAR(VCM4P2_DCT8x8blk, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeBlockCoef_Inter OMXCATBAR(VCM4P2_DecodeBlockCoef_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeBlockCoef_Intra OMXCATBAR(VCM4P2_DecodeBlockCoef_Intra, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodePadMV_PVOP OMXCATBAR(VCM4P2_DecodePadMV_PVOP, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_Inter OMXCATBAR(VCM4P2_DecodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_DecodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_DecodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeMV OMXCATBAR(VCM4P2_EncodeMV, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_Inter OMXCATBAR(VCM4P2_EncodeVLCZigzag_Inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_IntraACVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraACVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_EncodeVLCZigzag_IntraDCVLC OMXCATBAR(VCM4P2_EncodeVLCZigzag_IntraDCVLC, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_FindMVpred OMXCATBAR(VCM4P2_FindMVpred, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_IDCT8x8blk OMXCATBAR(VCM4P2_IDCT8x8blk, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MCReconBlock OMXCATBAR(VCM4P2_MCReconBlock, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MEGetBufSize OMXCATBAR(VCM4P2_MEGetBufSize, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MEInit OMXCATBAR(VCM4P2_MEInit, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_MotionEstimationMB OMXCATBAR(VCM4P2_MotionEstimationMB, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_PredictReconCoefIntra OMXCATBAR(VCM4P2_PredictReconCoefIntra, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInter_I OMXCATBAR(VCM4P2_QuantInter_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantIntra_I OMXCATBAR(VCM4P2_QuantIntra_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInvInter_I OMXCATBAR(VCM4P2_QuantInvInter_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_QuantInvIntra_I OMXCATBAR(VCM4P2_QuantInvIntra_I, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_TransRecBlockCoef_inter OMXCATBAR(VCM4P2_TransRecBlockCoef_inter, OMXVCM4P2_SUFFIX) +#define omxVCM4P2_TransRecBlockCoef_intra OMXCATBAR(VCM4P2_TransRecBlockCoef_intra, OMXVCM4P2_SUFFIX) + + +#endif /* _armOMX_h_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h new file mode 100755 index 0000000..8b295a6 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes.h @@ -0,0 +1,252 @@ +/** + * File: omxtypes.h + * Brief: Defines basic Data types used in OpenMAX v1.0.2 header files. + * + * Copyright © 2005-2008 The Khronos Group Inc. All Rights Reserved. + * + * These materials are protected by copyright laws and contain material + * proprietary to the Khronos Group, Inc. You may use these materials + * for implementing Khronos specifications, without altering or removing + * any trademark, copyright or other notice from the specification. + * + * Khronos Group makes no, and expressly disclaims any, representations + * or warranties, express or implied, regarding these materials, including, + * without limitation, any implied warranties of merchantability or fitness + * for a particular purpose or non-infringement of any intellectual property. + * Khronos Group makes no, and expressly disclaims any, warranties, express + * or implied, regarding the correctness, accuracy, completeness, timeliness, + * and reliability of these materials. + * + * Under no circumstances will the Khronos Group, or any of its Promoters, + * Contributors or Members or their respective partners, officers, directors, + * employees, agents or representatives be liable for any damages, whether + * direct, indirect, special or consequential damages for lost revenues, + * lost profits, or otherwise, arising from or in connection with these + * materials. + * + * Khronos and OpenMAX are trademarks of the Khronos Group Inc. + * + */ + +#ifndef _OMXTYPES_H_ +#define _OMXTYPES_H_ + +#include <limits.h> + +#define OMX_IN +#define OMX_OUT +#define OMX_INOUT + + +typedef enum { + + /* Mandatory return codes - use cases are explicitly described for each function */ + OMX_Sts_NoErr = 0, /* No error, the function completed successfully */ + OMX_Sts_Err = -2, /* Unknown/unspecified error */ + OMX_Sts_InvalidBitstreamValErr = -182, /* Invalid value detected during bitstream processing */ + OMX_Sts_MemAllocErr = -9, /* Not enough memory allocated for the operation */ + OMX_StsACAAC_GainCtrErr = -159, /* AAC: Unsupported gain control data detected */ + OMX_StsACAAC_PrgNumErr = -167, /* AAC: Invalid number of elements for one program */ + OMX_StsACAAC_CoefValErr = -163, /* AAC: Invalid quantized coefficient value */ + OMX_StsACAAC_MaxSfbErr = -162, /* AAC: Invalid maxSfb value in relation to numSwb */ + OMX_StsACAAC_PlsDataErr = -160, /* AAC: pulse escape sequence data error */ + + /* Optional return codes - use cases are explicitly described for each function*/ + OMX_Sts_BadArgErr = -5, /* Bad Arguments */ + + OMX_StsACAAC_TnsNumFiltErr = -157, /* AAC: Invalid number of TNS filters */ + OMX_StsACAAC_TnsLenErr = -156, /* AAC: Invalid TNS region length */ + OMX_StsACAAC_TnsOrderErr = -155, /* AAC: Invalid order of TNS filter */ + OMX_StsACAAC_TnsCoefResErr = -154, /* AAC: Invalid bit-resolution for TNS filter coefficients */ + OMX_StsACAAC_TnsCoefErr = -153, /* AAC: Invalid TNS filter coefficients */ + OMX_StsACAAC_TnsDirectErr = -152, /* AAC: Invalid TNS filter direction */ + + OMX_StsICJP_JPEGMarkerErr = -183, /* JPEG marker encountered within an entropy-coded block; */ + /* Huffman decoding operation terminated early. */ + OMX_StsICJP_JPEGMarker = -181, /* JPEG marker encountered; Huffman decoding */ + /* operation terminated early. */ + OMX_StsIPPP_ContextMatchErr = -17, /* Context parameter doesn't match to the operation */ + + OMX_StsSP_EvenMedianMaskSizeErr = -180, /* Even size of the Median Filter mask was replaced by the odd one */ + + OMX_Sts_MaximumEnumeration = INT_MAX /*Placeholder, forces enum of size OMX_INT*/ + + } OMXResult; /** Return value or error value returned from a function. Identical to OMX_INT */ + + +/* OMX_U8 */ +#if UCHAR_MAX == 0xff +typedef unsigned char OMX_U8; +#elif USHRT_MAX == 0xff +typedef unsigned short int OMX_U8; +#else +#error OMX_U8 undefined +#endif + + +/* OMX_S8 */ +#if SCHAR_MAX == 0x7f +typedef signed char OMX_S8; +#elif SHRT_MAX == 0x7f +typedef signed short int OMX_S8; +#else +#error OMX_S8 undefined +#endif + + +/* OMX_U16 */ +#if USHRT_MAX == 0xffff +typedef unsigned short int OMX_U16; +#elif UINT_MAX == 0xffff +typedef unsigned int OMX_U16; +#else +#error OMX_U16 undefined +#endif + + +/* OMX_S16 */ +#if SHRT_MAX == 0x7fff +typedef signed short int OMX_S16; +#elif INT_MAX == 0x7fff +typedef signed int OMX_S16; +#else +#error OMX_S16 undefined +#endif + + +/* OMX_U32 */ +#if UINT_MAX == 0xffffffff +typedef unsigned int OMX_U32; +#elif LONG_MAX == 0xffffffff +typedef unsigned long int OMX_U32; +#else +#error OMX_U32 undefined +#endif + + +/* OMX_S32 */ +#if INT_MAX == 0x7fffffff +typedef signed int OMX_S32; +#elif LONG_MAX == 0x7fffffff +typedef long signed int OMX_S32; +#else +#error OMX_S32 undefined +#endif + + +/* OMX_U64 & OMX_S64 */ +#if defined( _WIN32 ) || defined ( _WIN64 ) + typedef __int64 OMX_S64; /** Signed 64-bit integer */ + typedef unsigned __int64 OMX_U64; /** Unsigned 64-bit integer */ + #define OMX_MIN_S64 (0x8000000000000000i64) + #define OMX_MIN_U64 (0x0000000000000000i64) + #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFi64) + #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFi64) +#else + typedef long long OMX_S64; /** Signed 64-bit integer */ + typedef unsigned long long OMX_U64; /** Unsigned 64-bit integer */ + #define OMX_MIN_S64 (0x8000000000000000LL) + #define OMX_MIN_U64 (0x0000000000000000LL) + #define OMX_MAX_S64 (0x7FFFFFFFFFFFFFFFLL) + #define OMX_MAX_U64 (0xFFFFFFFFFFFFFFFFLL) +#endif + + +/* OMX_SC8 */ +typedef struct +{ + OMX_S8 Re; /** Real part */ + OMX_S8 Im; /** Imaginary part */ + +} OMX_SC8; /** Signed 8-bit complex number */ + + +/* OMX_SC16 */ +typedef struct +{ + OMX_S16 Re; /** Real part */ + OMX_S16 Im; /** Imaginary part */ + +} OMX_SC16; /** Signed 16-bit complex number */ + + +/* OMX_SC32 */ +typedef struct +{ + OMX_S32 Re; /** Real part */ + OMX_S32 Im; /** Imaginary part */ + +} OMX_SC32; /** Signed 32-bit complex number */ + + +/* OMX_SC64 */ +typedef struct +{ + OMX_S64 Re; /** Real part */ + OMX_S64 Im; /** Imaginary part */ + +} OMX_SC64; /** Signed 64-bit complex number */ + + +/* OMX_F32 */ +typedef float OMX_F32; /** Single precision floating point,IEEE 754 */ + + +/* OMX_F64 */ +typedef double OMX_F64; /** Double precision floating point,IEEE 754 */ + + +/* OMX_INT */ +typedef int OMX_INT; /** signed integer corresponding to machine word length, has maximum signed value INT_MAX*/ + + +#define OMX_MIN_S8 (-128) +#define OMX_MIN_U8 0 +#define OMX_MIN_S16 (-32768) +#define OMX_MIN_U16 0 +#define OMX_MIN_S32 (-2147483647-1) +#define OMX_MIN_U32 0 + +#define OMX_MAX_S8 (127) +#define OMX_MAX_U8 (255) +#define OMX_MAX_S16 (32767) +#define OMX_MAX_U16 (0xFFFF) +#define OMX_MAX_S32 (2147483647) +#define OMX_MAX_U32 (0xFFFFFFFF) + +typedef void OMXVoid; + +#ifndef NULL +#define NULL ((void*)0) +#endif + +/** Defines the geometric position and size of a rectangle, + * where x,y defines the coordinates of the top left corner + * of the rectangle, with dimensions width in the x-direction + * and height in the y-direction */ +typedef struct { + OMX_INT x; /** x-coordinate of top left corner of rectangle */ + OMX_INT y; /** y-coordinate of top left corner of rectangle */ + OMX_INT width; /** Width in the x-direction. */ + OMX_INT height; /** Height in the y-direction. */ +}OMXRect; + + +/** Defines the geometric position of a point, */ +typedef struct +{ + OMX_INT x; /** x-coordinate */ + OMX_INT y; /** y-coordinate */ + +} OMXPoint; + + +/** Defines the dimensions of a rectangle, or region of interest in an image */ +typedef struct +{ + OMX_INT width; /** Width of the rectangle, in the x-direction */ + OMX_INT height; /** Height of the rectangle, in the y-direction */ + +} OMXSize; + +#endif /* _OMXTYPES_H_ */ diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h new file mode 100755 index 0000000..48703d1 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/api/omxtypes_s.h @@ -0,0 +1,77 @@ +;// +;// +;// File Name: omxtypes_s.h +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + +;// Mandatory return codes - use cases are explicitly described for each function +OMX_Sts_NoErr EQU 0 ;// No error the function completed successfully +OMX_Sts_Err EQU -2 ;// Unknown/unspecified error +OMX_Sts_InvalidBitstreamValErr EQU -182 ;// Invalid value detected during bitstream processing +OMX_Sts_MemAllocErr EQU -9 ;// Not enough memory allocated for the operation +OMX_StsACAAC_GainCtrErr EQU -159 ;// AAC: Unsupported gain control data detected +OMX_StsACAAC_PrgNumErr EQU -167 ;// AAC: Invalid number of elements for one program +OMX_StsACAAC_CoefValErr EQU -163 ;// AAC: Invalid quantized coefficient value +OMX_StsACAAC_MaxSfbErr EQU -162 ;// AAC: Invalid maxSfb value in relation to numSwb +OMX_StsACAAC_PlsDataErr EQU -160 ;// AAC: pulse escape sequence data error + +;// Optional return codes - use cases are explicitly described for each function +OMX_Sts_BadArgErr EQU -5 ;// Bad Arguments + +OMX_StsACAAC_TnsNumFiltErr EQU -157 ;// AAC: Invalid number of TNS filters +OMX_StsACAAC_TnsLenErr EQU -156 ;// AAC: Invalid TNS region length +OMX_StsACAAC_TnsOrderErr EQU -155 ;// AAC: Invalid order of TNS filter +OMX_StsACAAC_TnsCoefResErr EQU -154 ;// AAC: Invalid bit-resolution for TNS filter coefficients +OMX_StsACAAC_TnsCoefErr EQU -153 ;// AAC: Invalid TNS filter coefficients +OMX_StsACAAC_TnsDirectErr EQU -152 ;// AAC: Invalid TNS filter direction + +OMX_StsICJP_JPEGMarkerErr EQU -183 ;// JPEG marker encountered within an entropy-coded block; + ;// Huffman decoding operation terminated early. +OMX_StsICJP_JPEGMarker EQU -181 ;// JPEG marker encountered; Huffman decoding + ;// operation terminated early. +OMX_StsIPPP_ContextMatchErr EQU -17 ;// Context parameter doesn't match to the operation + +OMX_StsSP_EvenMedianMaskSizeErr EQU -180 ;// Even size of the Median Filter mask was replaced by the odd one + +OMX_Sts_MaximumEnumeration EQU 0x7FFFFFFF + + + +OMX_MIN_S8 EQU (-128) +OMX_MIN_U8 EQU 0 +OMX_MIN_S16 EQU (-32768) +OMX_MIN_U16 EQU 0 + + +OMX_MIN_S32 EQU (-2147483647-1) +OMX_MIN_U32 EQU 0 + +OMX_MAX_S8 EQU (127) +OMX_MAX_U8 EQU (255) +OMX_MAX_S16 EQU (32767) +OMX_MAX_U16 EQU (0xFFFF) +OMX_MAX_S32 EQU (2147483647) +OMX_MAX_U32 EQU (0xFFFFFFFF) + +OMX_VC_UPPER EQU 0x1 ;// Used by the PredictIntra functions +OMX_VC_LEFT EQU 0x2 ;// Used by the PredictIntra functions +OMX_VC_UPPER_RIGHT EQU 0x40 ;// Used by the PredictIntra functions + +NULL EQU 0 + +;// Structures + + INCLUDE armCOMM_s.h + + M_STRUCT OMXPoint + M_FIELD x, 4 + M_FIELD y, 4 + M_ENDSTRUCT + + END |