;// ;// ;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s ;// OpenMAX DL: v1.0.2 ;// Revision: 12290 ;// Date: Wednesday, April 9, 2008 ;// ;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. ;// ;// ;// ;// Functions: ;// armVCM4P10_Average_4x4_Align_unsafe ;// ;// Implements Average of 4x4 with equation c = (a+b+1)>>1. ;// First operand will be at offset ALIGNMENT from aligned address ;// Second operand will be at aligned location and will be used as output. ;// destination pointed by (pDst) for vertical interpolation. ;// This function needs to copy 4 bytes in horizontal direction ;// ;// Registers used as input for this function ;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size ;// ;// Registers preserved for top level function ;// r4,r5,r6,r8,r9,r14 ;// ;// Registers modified by the function ;// r7,r10,r11,r12 ;// ;// Output registers ;// r2 - pointer to the aligned location ;// r3 - step size to this aligned location INCLUDE omxtypes_s.h INCLUDE armCOMM_s.h M_VARIANTS ARM1136JS EXPORT armVCM4P10_Average_4x4_Align0_unsafe EXPORT armVCM4P10_Average_4x4_Align2_unsafe EXPORT armVCM4P10_Average_4x4_Align3_unsafe DEBUG_ON SETL {FALSE} ;// Declare input registers pPred0 RN 0 iPredStep0 RN 1 pPred1 RN 2 iPredStep1 RN 3 pDstPred RN 2 iDstStep RN 3 ;// Declare other intermediate registers iPredA0 RN 10 iPredA1 RN 11 iPredB0 RN 12 iPredB1 RN 14 Temp1 RN 4 Temp2 RN 5 ResultA RN 5 ResultB RN 4 r0x80808080 RN 7 IF ARM1136JS ;// This function calculates average of 4x4 block ;// pPred0 is at alignment offset 0 and pPred1 is alignment 4 ;// Function header M_START armVCM4P10_Average_4x4_Align0_unsafe, r6 ;// Code start LDR r0x80808080, =0x80808080 ;// 1st load M_LDR iPredB0, [pPred1] M_LDR iPredA0, [pPred0], iPredStep0 M_LDR iPredB1, [pPred1, iPredStep1] M_LDR iPredA1, [pPred0], iPredStep0 ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 MVN iPredB0, iPredB0 MVN iPredB1, iPredB1 UHSUB8 ResultA, iPredA0, iPredB0 UHSUB8 ResultB, iPredA1, iPredB1 EOR ResultA, ResultA, r0x80808080 M_STR ResultA, [pDstPred], iDstStep EOR ResultB, ResultB, r0x80808080 M_STR ResultB, [pDstPred], iDstStep ;// 2nd load M_LDR iPredA0, [pPred0], iPredStep0 M_LDR iPredB0, [pPred1] M_LDR iPredA1, [pPred0], iPredStep0 M_LDR iPredB1, [pPred1, iPredStep1] MVN iPredB0, iPredB0 UHSUB8 ResultA, iPredA0, iPredB0 MVN iPredB1, iPredB1 UHSUB8 ResultB, iPredA1, iPredB1 EOR ResultA, ResultA, r0x80808080 M_STR ResultA, [pDstPred], iDstStep EOR ResultB, ResultB, r0x80808080 M_STR ResultB, [pDstPred], iDstStep End0 M_END ;// This function calculates average of 4x4 block ;// pPred0 is at alignment offset 2 and pPred1 is alignment 4 ;// Function header M_START armVCM4P10_Average_4x4_Align2_unsafe, r6 ;// Code start LDR r0x80808080, =0x80808080 ;// 1st load LDR Temp1, [pPred0, #4] M_LDR iPredA0, [pPred0], iPredStep0 M_LDR iPredB0, [pPred1] M_LDR iPredB1, [pPred1, iPredStep1] M_LDR Temp2, [pPred0, #4] M_LDR iPredA1, [pPred0], iPredStep0 MVN iPredB0, iPredB0 MVN iPredB1, iPredB1 MOV iPredA0, iPredA0, LSR #16 ORR iPredA0, iPredA0, Temp1, LSL #16 MOV iPredA1, iPredA1, LSR #16 ORR iPredA1, iPredA1, Temp2, LSL #16 ;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128 UHSUB8 ResultA, iPredA0, iPredB0 UHSUB8 ResultB, iPredA1, iPredB1 EOR ResultA, ResultA, r0x80808080 M_STR ResultA, [pDstPred], iDstStep EOR ResultB, ResultB, r0x80808080 M_STR ResultB, [pDstPred], iDstStep ;// 2nd load LDR Temp1, [pPred0, #4] M_LDR iPredA0, [pPred0], iPredStep0 LDR iPredB0, [pPred1] LDR iPredB1, [pPred1, iPredStep1] LDR Temp2, [pPred0, #4] M_LDR iPredA1, [pPred0], iPredStep0 MVN iPredB0, iPredB0 MVN iPredB1, iPredB1 MOV iPredA0, iPredA0, LSR #16 ORR iPredA0, iPredA0, Temp1, LSL #16 MOV iPredA1, iPredA1, LSR #16 ORR iPredA1, iPredA1, Temp2, LSL #16 UHSUB8 ResultA, iPredA0, iPredB0 UHSUB8 ResultB, iPredA1, iPredB1 EOR ResultA, ResultA, r0x80808080 M_STR ResultA, [pDstPred], iDstStep EOR ResultB, ResultB, r0x80808080 M_STR ResultB, [pDstPred], iDstStep End2 M_END ;// This function calculates average of 4x4 block ;// pPred0 is at alignment offset 3 and pPred1 is alignment 4 ;// Function header M_START armVCM4P10_Average_4x4_Align3_unsafe, r6 ;// Code start LDR r0x80808080, =0x80808080 ;// 1st load LDR Temp1, [pPred0, #4] M_LDR iPredA0, [pPred0], iPredStep0 LDR iPredB0, [pPred1] LDR iPredB1, [pPred1, iPredStep1] LDR Temp2, [pPred0, #4] M_LDR iPredA1, [pPred0], iPredStep0 MVN iPredB0, iPredB0 MVN iPredB1, iPredB1 MOV iPredA0, iPredA0, LSR #24 ORR iPredA0, iPredA0, Temp1, LSL #8 MOV iPredA1, iPredA1, LSR #24 ORR iPredA1, iPredA1, Temp2, LSL #8 UHSUB8 ResultA, iPredA0, iPredB0 UHSUB8 ResultB, iPredA1, iPredB1 EOR ResultA, ResultA, r0x80808080 M_STR ResultA, [pDstPred], iDstStep EOR ResultB, ResultB, r0x80808080 M_STR ResultB, [pDstPred], iDstStep ;// 2nd load LDR Temp1, [pPred0, #4] M_LDR iPredA0, [pPred0], iPredStep0 LDR iPredB0, [pPred1] LDR iPredB1, [pPred1, iPredStep1] LDR Temp2, [pPred0, #4] M_LDR iPredA1, [pPred0], iPredStep0 MVN iPredB0, iPredB0 MVN iPredB1, iPredB1 MOV iPredA0, iPredA0, LSR #24 ORR iPredA0, iPredA0, Temp1, LSL #8 MOV iPredA1, iPredA1, LSR #24 ORR iPredA1, iPredA1, Temp2, LSL #8 UHSUB8 ResultA, iPredA0, iPredB0 UHSUB8 ResultB, iPredA1, iPredB1 EOR ResultA, ResultA, r0x80808080 M_STR ResultA, [pDstPred], iDstStep EOR ResultB, ResultB, r0x80808080 M_STR ResultB, [pDstPred], iDstStep End3 M_END ENDIF END