summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s')
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s222
1 files changed, 222 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
new file mode 100755
index 0000000..4f0892d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_Average_4x_Align_unsafe_s.s
@@ -0,0 +1,222 @@
+;//
+;//
+;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+
+;// Functions:
+;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
+;//
+;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
+;// First operand will be at offset ALIGNMENT from aligned address
+;// Second operand will be at aligned location and will be used as output.
+;// destination pointed by (pDst) for vertical interpolation.
+;// This function needs to copy 4 bytes in horizontal direction
+;//
+;// Registers used as input for this function
+;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
+;//
+;// Registers preserved for top level function
+;// r4,r5,r6,r8,r9,r14
+;//
+;// Registers modified by the function
+;// r7,r10,r11,r12
+;//
+;// Output registers
+;// r2 - pointer to the aligned location
+;// r3 - step size to this aligned location
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS ARM1136JS
+
+ EXPORT armVCM4P10_Average_4x4_Align0_unsafe
+ EXPORT armVCM4P10_Average_4x4_Align2_unsafe
+ EXPORT armVCM4P10_Average_4x4_Align3_unsafe
+
+DEBUG_ON SETL {FALSE}
+
+;// Declare input registers
+pPred0 RN 0
+iPredStep0 RN 1
+pPred1 RN 2
+iPredStep1 RN 3
+pDstPred RN 2
+iDstStep RN 3
+
+;// Declare other intermediate registers
+iPredA0 RN 10
+iPredA1 RN 11
+iPredB0 RN 12
+iPredB1 RN 14
+Temp1 RN 4
+Temp2 RN 5
+ResultA RN 5
+ResultB RN 4
+r0x80808080 RN 7
+
+ IF ARM1136JS
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB1, [pPred1, iPredStep1]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ M_LDR iPredB1, [pPred1, iPredStep1]
+
+ MVN iPredB0, iPredB0
+ UHSUB8 ResultA, iPredA0, iPredB0
+ MVN iPredB1, iPredB1
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End0
+ M_END
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ M_LDR iPredB0, [pPred1]
+ M_LDR iPredB1, [pPred1, iPredStep1]
+ M_LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #16
+ ORR iPredA0, iPredA0, Temp1, LSL #16
+ MOV iPredA1, iPredA1, LSR #16
+ ORR iPredA1, iPredA1, Temp2, LSL #16
+
+ ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #16
+ ORR iPredA0, iPredA0, Temp1, LSL #16
+ MOV iPredA1, iPredA1, LSR #16
+ ORR iPredA1, iPredA1, Temp2, LSL #16
+
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End2
+ M_END
+
+
+ ;// This function calculates average of 4x4 block
+ ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
+
+ ;// Function header
+ M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
+
+ ;// Code start
+ LDR r0x80808080, =0x80808080
+
+ ;// 1st load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #24
+ ORR iPredA0, iPredA0, Temp1, LSL #8
+ MOV iPredA1, iPredA1, LSR #24
+ ORR iPredA1, iPredA1, Temp2, LSL #8
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+
+ ;// 2nd load
+ LDR Temp1, [pPred0, #4]
+ M_LDR iPredA0, [pPred0], iPredStep0
+ LDR iPredB0, [pPred1]
+ LDR iPredB1, [pPred1, iPredStep1]
+ LDR Temp2, [pPred0, #4]
+ M_LDR iPredA1, [pPred0], iPredStep0
+
+ MVN iPredB0, iPredB0
+ MVN iPredB1, iPredB1
+ MOV iPredA0, iPredA0, LSR #24
+ ORR iPredA0, iPredA0, Temp1, LSL #8
+ MOV iPredA1, iPredA1, LSR #24
+ ORR iPredA1, iPredA1, Temp2, LSL #8
+
+ UHSUB8 ResultA, iPredA0, iPredB0
+ UHSUB8 ResultB, iPredA1, iPredB1
+ EOR ResultA, ResultA, r0x80808080
+ M_STR ResultA, [pDstPred], iDstStep
+ EOR ResultB, ResultB, r0x80808080
+ M_STR ResultB, [pDstPred], iDstStep
+End3
+ M_END
+
+ ENDIF
+
+ END
+ \ No newline at end of file