summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s')
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s239
1 files changed, 239 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
new file mode 100644
index 0000000..6690ced
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
@@ -0,0 +1,239 @@
+;//
+;//
+;// File Name: armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 9641
+;// Date: Thursday, February 7, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+
+ IF ARM1136JS
+
+;// Function:
+;// armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
+;//
+;// Implements horizontal interpolation for a block of size 4x4. Input and output should
+;// be aligned.
+;//
+;// Registers used as input for this function
+;// r0,r1,r2,r3 where r0,r2 input pointer and r1,r3 corresponding step size
+;//
+;// Registers preserved for top level function
+;// r0,r1,r2,r3,r4,r5,r6,r14
+;//
+;// Registers modified by the function
+;// r7,r8,r9,r10,r11,r12
+;//
+;// Output registers
+;// None. Function will preserve r0-r3
+
+
+;// Declare input registers
+pSrc RN 0
+srcStep RN 1
+pDst RN 2
+dstStep RN 3
+
+;// Declare inner loop registers
+Acc0 RN 4
+Acc1 RN 5
+Acc2 RN 6
+Acc3 RN 7
+
+ValA RN 4
+ValB RN 5
+ValC RN 6
+ValD RN 7
+ValE RN 8
+ValF RN 9
+ValG RN 12
+ValH RN 14
+ValI RN 1
+
+Temp1 RN 3
+Temp2 RN 1
+Temp3 RN 12
+Temp4 RN 7
+Temp5 RN 5
+r0x0fe00fe0 RN 3 ;// [0 (16*255 - 16) 0 (16*255 - 16)]
+r0x00ff00ff RN 10 ;// [0 255 0 255] where 255 is offset
+Counter RN 11
+
+Height RN 3
+
+ M_ALLOC4 pDstStep, 4
+ M_ALLOC4 pSrcStep, 4
+
+ ;// Function header
+ M_START armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe, r6
+
+ MOV Counter, #2
+ M_STR dstStep, pDstStep
+ M_STR srcStep, pSrcStep
+ LDR r0x00ff00ff, =0x00ff00ff ;// [0 255 0 255] 255 is offset to avoid negative results
+
+NextTwoRowsLoop
+ LDR ValD, [pSrc, srcStep] ;// Load row 1 [d1 c1 b1 a1]
+ LDR ValA, [pSrc], #4 ;// Load row 0 [d0 c0 b0 a0]
+ LDR ValH, [pSrc, srcStep] ;// Load [h1 g1 f1 e1]
+ LDR ValE, [pSrc], #4 ;// Load [h0 g0 f0 e0]
+ LDRB Temp2, [pSrc, srcStep] ;// Load row 1 [l1 k1 j1 i1]
+ LDRB Temp1, [pSrc], #-8 ;// Load row 0 [l0 k0 j0 i0]
+
+ PKHBT ValB, ValA, ValD, LSL #16 ;// [b1 a1 b0 a0]
+ PKHTB ValD, ValD, ValA, ASR #16 ;// [d1 c1 d0 c0]
+ UXTAB16 ValA, r0x00ff00ff, ValB ;// [00 a1 00 a0] + [0 255 0 255]
+ UXTAB16 ValC, r0x00ff00ff, ValD ;// [00 c1 00 c0] + [0 255 0 255]
+ PKHBT ValI, Temp1, Temp2, LSL #16 ;// [00 i1 00 i0]
+ PKHBT ValF, ValE, ValH, LSL #16 ;// [f1 e1 f0 e0]
+ PKHTB ValH, ValH, ValE, ASR #16 ;// [h1 g1 h0 g0]
+ UXTAB16 ValE, r0x00ff00ff, ValF ;// [00 e1 00 e0] + [0 255 0 255]
+
+ ;// Calculate Acc0
+ ;// Acc0 = a - 5*b + 20*c + 20*d - 5*e + f
+ UXTAB16 Temp1, ValC, ValD, ROR #8
+ UXTAB16 Temp3, ValE, ValB, ROR #8
+ RSB Temp1, Temp3, Temp1, LSL #2
+ UXTAB16 Acc0, ValA, ValF, ROR #8
+ ADD Temp1, Temp1, Temp1, LSL #2
+ ADD Acc0, Acc0, Temp1
+
+ ;// Calculate Acc1
+ ;// Acc1 = b - 5*c + 20*d + 20*e - 5*f + g
+ UXTAB16 Temp1, ValE, ValD, ROR #8
+ UXTAB16 Temp3, ValC, ValF, ROR #8
+ RSB Temp1, Temp3, Temp1, LSL #2
+ UXTAB16 ValG, r0x00ff00ff, ValH ;// [00 g1 00 g0] + [0 255 0 255]
+ ADD Temp1, Temp1, Temp1, LSL #2
+ UXTAB16 Acc1, ValG, ValB, ROR #8
+ ADD Acc1, Acc1, Temp1
+
+ LDR r0x0fe00fe0, =0x0fe00fe0 ;// 0x0fe00fe0 = (16 * Offset) - 16 where Offset is 255
+ UXTAB16 Acc2, ValC, ValH, ROR #8
+ ADD ValI, r0x00ff00ff, ValI ;// [00 i1 00 i0] + [0 255 0 255]
+ UQSUB16 Acc0, Acc0, r0x0fe00fe0
+ UQSUB16 Acc1, Acc1, r0x0fe00fe0
+ USAT16 Acc0, #13, Acc0
+ USAT16 Acc1, #13, Acc1
+
+ ;// Calculate Acc2
+ ;// Acc2 = c - 5*d + 20*e + 20*f - 5*g + h
+ UXTAB16 Temp1, ValG, ValD, ROR #8
+ UXTAB16 Acc3, ValI, ValD, ROR #8
+ UXTAB16 Temp2, ValE, ValF, ROR #8
+ AND Acc1, r0x00ff00ff, Acc1, LSR #5
+ AND Acc0, r0x00ff00ff, Acc0, LSR #5
+ ORR Acc0, Acc0, Acc1, LSL #8
+ RSB Temp5, Temp1, Temp2, LSL #2
+ UXTAB16 Temp2, ValG, ValF, ROR #8
+ ADD Temp5, Temp5, Temp5, LSL #2
+ ADD Acc2, Acc2, Temp5
+
+ ;// Calculate Acc3
+ ;// Acc3 = d - 5*e + 20*f + 20*g - 5*h + i
+ UXTAB16 Temp5, ValE, ValH, ROR #8
+ RSB Temp5, Temp5, Temp2, LSL #2
+ LDR r0x0fe00fe0, =0x0fe00fe0
+ ADD Temp5, Temp5, Temp5, LSL #2
+ ADD Acc3, Acc3, Temp5
+
+ UQSUB16 Acc3, Acc3, r0x0fe00fe0
+ UQSUB16 Acc2, Acc2, r0x0fe00fe0
+ USAT16 Acc3, #13, Acc3
+ USAT16 Acc2, #13, Acc2
+
+ M_LDR dstStep, pDstStep
+ AND Acc3, r0x00ff00ff, Acc3, LSR #5
+ AND Acc2, r0x00ff00ff, Acc2, LSR #5
+ ORR Acc2, Acc2, Acc3, LSL #8
+
+ SUBS Counter, Counter, #1
+ M_LDR srcStep, pSrcStep
+ PKHBT Acc1, Acc0, Acc2, LSL #16
+ M_STR Acc1, [pDst], dstStep ;// Store result1
+ PKHTB Acc2, Acc2, Acc0, ASR #16
+ M_STR Acc2, [pDst], dstStep ;// Store result2
+ ADD pSrc, pSrc, srcStep, LSL #1
+
+ BGT NextTwoRowsLoop
+End
+ SUB pDst, pDst, dstStep, LSL #2
+ SUB pSrc, pSrc, srcStep, LSL #2
+
+ M_END
+
+ ENDIF
+
+ END
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+