diff options
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s')
-rw-r--r-- | media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s | 70 |
1 files changed, 35 insertions, 35 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s index 189e33b..8efa9fb 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s @@ -20,22 +20,22 @@ @* Word16 y[], /* (o) : output vector */ @* Word16 L /* (i) : vector size */ @*) -@ +@ @ r0 --- x[] @ r1 --- h[] @ r2 --- y[] @ r3 --- L - .section .text - .global Convolve_asm + .section .text + .global Convolve_asm Convolve_asm: - STMFD r13!, {r4 - r12, r14} - MOV r3, #0 + STMFD r13!, {r4 - r12, r14} + MOV r3, #0 MOV r11, #0x8000 - -LOOP: + +LOOP: @MOV r8, #0 @ s = 0 ADD r4, r1, r3, LSL #1 @ tmpH address ADD r5, r3, #1 @ i = n + 1 @@ -43,21 +43,21 @@ LOOP: LDRSH r9, [r6], #2 @ *tmpX++ LDRSH r10, [r4] @ *tmpH-- SUB r5, r5, #1 - VMOV.S32 Q10, #0 - MUL r8, r9, r10 + VMOV.S32 Q10, #0 + MUL r8, r9, r10 -LOOP1: +LOOP1: CMP r5, #0 BLE L1 SUB r4, r4, #8 MOV r9, r4 - VLD1.S16 D0, [r6]! + VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! VREV64.16 D1, D1 - SUBS r5, r5, #4 - VMLAL.S16 Q10, D0, D1 - B LOOP1 -L1: + SUBS r5, r5, #4 + VMLAL.S16 Q10, D0, D1 + B LOOP1 +L1: VADD.S32 D20, D20, D21 VPADD.S32 D20, D20, D20 VMOV.S32 r5, D20[0] @@ -73,25 +73,25 @@ L1: ADD r5, r3, #1 MOV r6, r0 LDRSH r9, [r6], #2 @ *tmpX++ - LDRSH r10, [r4], #-2 + LDRSH r10, [r4], #-2 LDRSH r12, [r6], #2 LDRSH r14, [r4] MUL r8, r9, r10 SUB r5, r5, #2 MLA r8, r12, r14, r8 - + VMOV.S32 Q10, #0 LOOP2: CMP r5, #0 BLE L2 SUB r4, r4, #8 MOV r9, r4 - VLD1.S16 D0, [r6]! + VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! SUBS r5, r5, #4 VREV64.16 D1, D1 - VMLAL.S16 Q10, D0, D1 + VMLAL.S16 Q10, D0, D1 B LOOP2 L2: VADD.S32 D20, D20, D21 @@ -100,7 +100,7 @@ L2: ADD r8, r8, r5 ADD r8, r11, r8, LSL #1 MOV r8, r8, LSR #16 @extract_h(s) - ADD r3, r3, #1 + ADD r3, r3, #1 STRH r8, [r2], #2 @y[n] @@ -115,7 +115,7 @@ L2: MUL r8, r9, r10 LDRSH r9, [r6], #2 LDRSH r10, [r4] - MLA r8, r12, r14, r8 + MLA r8, r12, r14, r8 SUB r5, r5, #3 MLA r8, r9, r10, r8 @@ -125,12 +125,12 @@ LOOP3: BLE L3 SUB r4, r4, #8 MOV r9, r4 - VLD1.S16 D0, [r6]! + VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! VREV64.16 D1, D1 SUBS r5, r5, #4 - VMLAL.S16 Q10, D0, D1 - B LOOP3 + VMLAL.S16 Q10, D0, D1 + B LOOP3 L3: VADD.S32 D20, D20, D21 @@ -146,18 +146,18 @@ L3: ADD r4, r1, r5, LSL #1 @ tmpH address MOV r6, r0 VMOV.S32 Q10, #0 -LOOP4: +LOOP4: CMP r5, #0 BLE L4 SUB r4, r4, #8 MOV r9, r4 - VLD1.S16 D0, [r6]! + VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! VREV64.16 D1, D1 - SUBS r5, r5, #4 - VMLAL.S16 Q10, D0, D1 - B LOOP4 -L4: + SUBS r5, r5, #4 + VMLAL.S16 Q10, D0, D1 + B LOOP4 +L4: VADD.S32 D20, D20, D21 VPADD.S32 D20, D20, D20 VMOV.S32 r5, D20[0] @@ -165,14 +165,14 @@ L4: MOV r5, r5, LSR #16 @extract_h(s) ADD r3, r3, #1 STRH r5, [r2], #2 @y[n] - + CMP r3, #64 BLT LOOP - -Convolve_asm_end: - + +Convolve_asm_end: + LDMFD r13!, {r4 - r12, r15} - + @ENDFUNC .END |