summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s')
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s178
1 files changed, 178 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
new file mode 100755
index 0000000..4e5a39d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
@@ -0,0 +1,178 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
+ EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+
+;// Functions:
+;// armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
+;// armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
+;//
+;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
+;// This will do the convertion of data from 16 bit to 8 bit and it also
+;// remove offset and check for saturation.
+;//
+;// Registers used as input for this function
+;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r0 - pointer to the destination location
+;// r1 - step size to this destination location
+
+
+DEBUG_ON SETL {FALSE}
+
+MASK EQU 0x80808080 ;// Mask is used to implement (a+b+1)/2
+
+;// Declare input registers
+
+pSrc0 RN 0
+srcStep0 RN 1
+
+;// Declare other intermediate registers
+Temp1 RN 4
+Temp2 RN 5
+Temp3 RN 10
+Temp4 RN 11
+pBuf RN 7
+r0x0fe00fe0 RN 6
+r0x00ff00ff RN 12
+Count RN 14
+ValueA0 RN 10
+ValueA1 RN 11
+
+ IF ARM1136JS
+
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6
+
+ ;// Code start
+ MOV Count, #4
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ LDR r0x00ff00ff, =0x00ff00ff
+LoopStart1
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8
+ ORR ValueA0, Temp1, Temp2, LSL #8
+ SUBS Count, Count, #1
+ STRD ValueA0, [pBuf], #8
+ BGT LoopStart1
+End1
+ SUB pSrc0, pBuf, #32
+ MOV srcStep0, #8
+
+ M_END
+
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
+
+ ;// Code start
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ LDR r0x00ff00ff, =0x00ff00ff
+ MOV Count, #2
+
+LoopStart
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0]
+ ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0]
+
+ PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0]
+
+ STR Temp1, [pBuf], #8
+ PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2]
+ STR Temp2, [pBuf], #-4
+
+ LDR Temp4, [pSrc0, #12]
+ LDR Temp3, [pSrc0, #8]
+ LDR Temp2, [pSrc0, #4]
+ M_LDR Temp1, [pSrc0], srcStep0
+
+ UQSUB16 Temp4, Temp4, r0x0fe00fe0
+ UQSUB16 Temp3, Temp3, r0x0fe00fe0
+ UQSUB16 Temp2, Temp2, r0x0fe00fe0
+ UQSUB16 Temp1, Temp1, r0x0fe00fe0
+
+ USAT16 Temp4, #13, Temp4
+ USAT16 Temp3, #13, Temp3
+ USAT16 Temp2, #13, Temp2
+ USAT16 Temp1, #13, Temp1
+
+ AND Temp4, r0x00ff00ff, Temp4, LSR #5
+ AND Temp3, r0x00ff00ff, Temp3, LSR #5
+ AND Temp2, r0x00ff00ff, Temp2, LSR #5
+ AND Temp1, r0x00ff00ff, Temp1, LSR #5
+ ORR ValueA1, Temp3, Temp4, LSL #8 ;// [d2 c2 d0 c0]
+ ORR ValueA0, Temp1, Temp2, LSL #8 ;// [b2 a2 b0 a0]
+
+ PKHBT Temp1, ValueA0, ValueA1, LSL #16 ;// [d0 c0 b0 a0]
+ SUBS Count, Count, #1
+ STR Temp1, [pBuf], #8
+ PKHTB Temp2, ValueA1, ValueA0, ASR #16 ;// [d2 c2 b2 a2]
+ STR Temp2, [pBuf], #4
+
+ BGT LoopStart
+End2
+ SUB pSrc0, pBuf, #32-8
+ MOV srcStep0, #4
+
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file