diff options
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s')
-rwxr-xr-x | media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s | 236 |
1 files changed, 236 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s new file mode 100755 index 0000000..6e912d7 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s @@ -0,0 +1,236 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + +DEBUG_ON SETL {FALSE} + + IF ARM1136JS + +;// Declare input registers +pSrc RN 0 +srcStep RN 1 +pDst RN 8 +iHeight RN 9 + +;// Declare inner loop registers +x RN 7 +x0 RN 7 +x1 RN 10 +x2 RN 11 +Scratch RN 12 + +;// Function: +;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned +;// destination pointed by (pDst) for horizontal interpolation. +;// This function needs to copy 9 bytes in horizontal direction. +;// +;// Registers used as input for this function +;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy +;// +;// Registers preserved for top level function +;// r2,r3,r4,r5,r6 +;// +;// Registers modified by the function +;// r7,r8,r9,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the new aligned location which will be used as pSrc +;// r1 - step size to this aligned location + + ;// Function header + M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe + + ;// Copy pDst to scratch + MOV Scratch, pDst + +StartAlignedStackCopy + AND x, pSrc, #3 + BIC pSrc, pSrc, #3 + + M_SWITCH x + M_CASE Copy0toAligned + M_CASE Copy1toAligned + M_CASE Copy2toAligned + M_CASE Copy3toAligned + M_ENDSWITCH + +Copy0toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy0toAligned + B CopyEnd + +Copy1toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #8 + ORR x0, x0, x1, LSL #24 + MOV x1, x1, LSR #8 + ORR x1, x1, x2, LSL #24 + MOV x2, x2, LSR #8 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy1toAligned + B CopyEnd + +Copy2toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #16 + ORR x0, x0, x1, LSL #16 + MOV x1, x1, LSR #16 + ORR x1, x1, x2, LSL #16 + MOV x2, x2, LSR #16 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy2toAligned + B CopyEnd + +Copy3toAligned + LDM pSrc, {x0, x1, x2} + SUBS iHeight, iHeight, #1 + ADD pSrc, pSrc, srcStep + + ;// One cycle stall + + MOV x0, x0, LSR #24 + ORR x0, x0, x1, LSL #8 + MOV x1, x1, LSR #24 + ORR x1, x1, x2, LSL #8 + MOV x2, x2, LSR #24 + STM pDst!, {x0, x1, x2} ;// Store aligned output row + BGT Copy3toAligned + +CopyEnd + + MOV pSrc, Scratch + MOV srcStep, #12 + + M_END + + +;// Function: +;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe +;// +;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned +;// destination pointed by (pDst) for vertical interpolation. +;// This function needs to copy 4 bytes in horizontal direction +;// +;// Registers used as input for this function +;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy +;// +;// Registers preserved for top level function +;// r2,r3,r4,r5,r6 +;// +;// Registers modified by the function +;// r7,r8,r9,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the new aligned location which will be used as pSrc +;// r1 - step size to this aligned location + + ;// Function header + M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe + + ;// Copy pSrc to stack +StartVAlignedStackCopy + AND x, pSrc, #3 + BIC pSrc, pSrc, #3 + + + M_SWITCH x + M_CASE Copy0toVAligned + M_CASE Copy1toVAligned + M_CASE Copy2toVAligned + M_CASE Copy3toVAligned + M_ENDSWITCH + +Copy0toVAligned + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy0toVAligned + B CopyVEnd + +Copy1toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #24 + ORR x0, x1, x0, LSR #8 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy1toVAligned + B CopyVEnd + +Copy2toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #16 + ORR x0, x1, x0, LSR #16 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy2toVAligned + B CopyVEnd + +Copy3toVAligned + LDR x1, [pSrc, #4] + M_LDR x0, [pSrc], srcStep + SUBS iHeight, iHeight, #1 + + ;// One cycle stall + + MOV x1, x1, LSL #8 + ORR x0, x1, x0, LSR #24 + STR x0, [pDst], #4 ;// Store aligned output row + BGT Copy3toVAligned + +CopyVEnd + + SUB pSrc, pDst, #28 + MOV srcStep, #4 + + M_END + + + ENDIF + + END + |