diff options
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s')
-rwxr-xr-x | media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s | 140 |
1 files changed, 140 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s new file mode 100755 index 0000000..e394339 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_TransformDequantChromaDCFromPair_s.s @@ -0,0 +1,140 @@ +;// +;// +;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + IMPORT armVCM4P10_QPDivTable + IMPORT armVCM4P10_VMatrixQPModTable + + M_VARIANTS CortexA8 + + + + + IF CortexA8 + +;// ARM Registers +;//-------------------------------------- +;// Declare input registers +;//-------------------------------------- +ppSrc RN 0 +pDst RN 1 +QP RN 2 + +;//-------------------------------- +;// Scratch variable for Unpack2x2 +;//-------------------------------- +pSrc RN 9 +Value RN 4 +Value2 RN 5 +Flag RN 6 +strOffset RN 7 +cstOffset RN 8 + +;//-------------------------------- +;// Scratch variable +;//-------------------------------- +r0w0 RN 3 +r0w1 RN 4 + +c0w0 RN 5 +c1w0 RN 6 + +return RN 0 +pQPDivTable RN 5 +pQPModTable RN 6 +Shift RN 9 +Scale RN 2 + + + +;// Neon Registers + +dZero DN D0.U16 +dInvTrCoeff DN D0.S16 +dScale DN D1.S16 +qDqntCoeff QN Q1.S32 +dDqntCoeff DN D2.S16 + + + ;// Write function header + M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9 + + LDR pSrc, [ppSrc] ;// Load pSrc + VMOV dZero, #0 + MOV cstOffset, #31 ;// To be used in the loop, to compute offset + + ;//----------------------------------------------------------------------- + ;// Firstly, fill all the coefficient values on the <pDst> buffer by zero + ;//----------------------------------------------------------------------- + + VST1 dZero,[pDst] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0 + LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop> + + +unpackLoop + TST Flag, #0x10 ;// Computing (Flag & 0x10) + LDRSBNE Value2,[pSrc,#1] + LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access + AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1; + LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++ + ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++ + + TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done + LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration + STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset> + BEQ unpackLoop ;// Branch to the loop beginning + + ;//-------------------------------------------------- + ;//InvTransformDC2x2: Inlined (Implemented in ARM V6) + ;//-------------------------------------------------- + + LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2| + + STR pSrc, [ppSrc] ;// Update the bitstream pointer + + LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer + LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer + + SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ] + SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ] + + LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP] + LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP] + + SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ] + SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ] + + ;//------------------------------------------------- + ;//DequantChromaDC2x2: Inlined (Neon Implementation) + ;//------------------------------------------------- + + LSL Scale, Scale, Shift ;// Scale = Scale << Shift + VMOV dInvTrCoeff, c0w0, c1w0 + VREV32 dInvTrCoeff,dInvTrCoeff + VDUP dScale,Scale + + VMULL qDqntCoeff,dInvTrCoeff,dScale + VSHRN dDqntCoeff,qDqntCoeff,#1 + + + VST1 dDqntCoeff,[pDst] ;// Storing all the coefficients at once + + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// CortexA8 + + + END |