diff options
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s')
-rwxr-xr-x | media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s | 178 |
1 files changed, 178 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s new file mode 100755 index 0000000..4e5a39d --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s @@ -0,0 +1,178 @@ +;// +;// +;// File Name: armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe + EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe + +;// Functions: +;// armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and +;// armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe +;// +;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf. +;// This will do the convertion of data from 16 bit to 8 bit and it also +;// remove offset and check for saturation. +;// +;// Registers used as input for this function +;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer +;// +;// Registers preserved for top level function +;// r4,r5,r6,r8,r9,r14 +;// +;// Registers modified by the function +;// r7,r10,r11,r12 +;// +;// Output registers +;// r0 - pointer to the destination location +;// r1 - step size to this destination location + + +DEBUG_ON SETL {FALSE} + +MASK EQU 0x80808080 ;// Mask is used to implement (a+b+1)/2 + +;// Declare input registers + +pSrc0 RN 0 +srcStep0 RN 1 + +;// Declare other intermediate registers +Temp1 RN 4 +Temp2 RN 5 +Temp3 RN 10 +Temp4 RN 11 +pBuf RN 7 +r0x0fe00fe0 RN 6 +r0x00ff00ff RN 12 +Count RN 14 +ValueA0 RN 10 +ValueA1 RN 11 + + IF ARM1136JS + + + ;// Function header + M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6 + + ;// Code start + MOV Count, #4 + LDR r0x0fe00fe0, =0x0fe00fe0 + LDR r0x00ff00ff, =0x00ff00ff +LoopStart1 + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 + ORR ValueA0, Temp1, Temp2, LSL #8 + SUBS Count, Count, #1 + STRD ValueA0, [pBuf], #8 + BGT LoopStart1 +End1 + SUB pSrc0, pBuf, #32 + MOV srcStep0, #8 + + M_END + + + ;// Function header + M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6 + + ;// Code start + LDR r0x0fe00fe0, =0x0fe00fe0 + LDR r0x00ff00ff, =0x00ff00ff + MOV Count, #2 + +LoopStart + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0] + ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0] + + PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0] + + STR Temp1, [pBuf], #8 + PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2] + STR Temp2, [pBuf], #-4 + + LDR Temp4, [pSrc0, #12] + LDR Temp3, [pSrc0, #8] + LDR Temp2, [pSrc0, #4] + M_LDR Temp1, [pSrc0], srcStep0 + + UQSUB16 Temp4, Temp4, r0x0fe00fe0 + UQSUB16 Temp3, Temp3, r0x0fe00fe0 + UQSUB16 Temp2, Temp2, r0x0fe00fe0 + UQSUB16 Temp1, Temp1, r0x0fe00fe0 + + USAT16 Temp4, #13, Temp4 + USAT16 Temp3, #13, Temp3 + USAT16 Temp2, #13, Temp2 + USAT16 Temp1, #13, Temp1 + + AND Temp4, r0x00ff00ff, Temp4, LSR #5 + AND Temp3, r0x00ff00ff, Temp3, LSR #5 + AND Temp2, r0x00ff00ff, Temp2, LSR #5 + AND Temp1, r0x00ff00ff, Temp1, LSR #5 + ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0] + ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0] + + PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0] + SUBS Count, Count, #1 + STR Temp1, [pBuf], #8 + PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2] + STR Temp2, [pBuf], #4 + + BGT LoopStart +End2 + SUB pSrc0, pBuf, #32-8 + MOV srcStep0, #4 + + M_END + + ENDIF + + END +
\ No newline at end of file |