summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s')
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s236
1 files changed, 236 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
new file mode 100755
index 0000000..6e912d7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_Align_unsafe_s.s
@@ -0,0 +1,236 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+ EXPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+ IF ARM1136JS
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 8
+iHeight RN 9
+
+;// Declare inner loop registers
+x RN 7
+x0 RN 7
+x1 RN 10
+x2 RN 11
+Scratch RN 12
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to a 4 byte aligned
+;// destination pointed by (pDst) for horizontal interpolation.
+;// This function needs to copy 9 bytes in horizontal direction.
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
+
+ ;// Copy pDst to scratch
+ MOV Scratch, pDst
+
+StartAlignedStackCopy
+ AND x, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+ M_SWITCH x
+ M_CASE Copy0toAligned
+ M_CASE Copy1toAligned
+ M_CASE Copy2toAligned
+ M_CASE Copy3toAligned
+ M_ENDSWITCH
+
+Copy0toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy0toAligned
+ B CopyEnd
+
+Copy1toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #8
+ ORR x0, x0, x1, LSL #24
+ MOV x1, x1, LSR #8
+ ORR x1, x1, x2, LSL #24
+ MOV x2, x2, LSR #8
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy1toAligned
+ B CopyEnd
+
+Copy2toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #16
+ ORR x0, x0, x1, LSL #16
+ MOV x1, x1, LSR #16
+ ORR x1, x1, x2, LSL #16
+ MOV x2, x2, LSR #16
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy2toAligned
+ B CopyEnd
+
+Copy3toAligned
+ LDM pSrc, {x0, x1, x2}
+ SUBS iHeight, iHeight, #1
+ ADD pSrc, pSrc, srcStep
+
+ ;// One cycle stall
+
+ MOV x0, x0, LSR #24
+ ORR x0, x0, x1, LSL #8
+ MOV x1, x1, LSR #24
+ ORR x1, x1, x2, LSL #8
+ MOV x2, x2, LSR #24
+ STM pDst!, {x0, x1, x2} ;// Store aligned output row
+ BGT Copy3toAligned
+
+CopyEnd
+
+ MOV pSrc, Scratch
+ MOV srcStep, #12
+
+ M_END
+
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+;//
+;// Implements copy from an arbitrary aligned source memory location (pSrc) to an aligned
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction
+;//
+;// Registers used as input for this function
+;// r0,r1,r8,r9 where r8 containings aligned memory pointer and r9 no of rows to copy
+;//
+;// Registers preserved for top level function
+;// r2,r3,r4,r5,r6
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the new aligned location which will be used as pSrc
+;// r1 - step size to this aligned location
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
+
+ ;// Copy pSrc to stack
+StartVAlignedStackCopy
+ AND x, pSrc, #3
+ BIC pSrc, pSrc, #3
+
+
+ M_SWITCH x
+ M_CASE Copy0toVAligned
+ M_CASE Copy1toVAligned
+ M_CASE Copy2toVAligned
+ M_CASE Copy3toVAligned
+ M_ENDSWITCH
+
+Copy0toVAligned
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy0toVAligned
+ B CopyVEnd
+
+Copy1toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #24
+ ORR x0, x1, x0, LSR #8
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy1toVAligned
+ B CopyVEnd
+
+Copy2toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #16
+ ORR x0, x1, x0, LSR #16
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy2toVAligned
+ B CopyVEnd
+
+Copy3toVAligned
+ LDR x1, [pSrc, #4]
+ M_LDR x0, [pSrc], srcStep
+ SUBS iHeight, iHeight, #1
+
+ ;// One cycle stall
+
+ MOV x1, x1, LSL #8
+ ORR x0, x1, x0, LSR #24
+ STR x0, [pDst], #4 ;// Store aligned output row
+ BGT Copy3toVAligned
+
+CopyVEnd
+
+ SUB pSrc, pDst, #28
+ MOV srcStep, #4
+
+ M_END
+
+
+ ENDIF
+
+ END
+