diff options
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s')
-rw-r--r-- | media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s | 56 |
1 files changed, 28 insertions, 28 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s index 60e9ade..4263cd4 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s @@ -33,7 +33,7 @@ .section .text - .global Norm_corr_asm + .global Norm_corr_asm .extern Convolve_asm .extern Isqrt_n @****************************** @@ -47,17 +47,17 @@ .equ T_MIN , 212 .equ T_MAX , 216 .equ CORR_NORM , 220 - + Norm_corr_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} SUB r13, r13, #voSTACK - + ADD r8, r13, #20 @get the excf[L_SUBFR] LDR r4, [r13, #T_MIN] @get t_min RSB r11, r4, #0 @k = -t_min - ADD r5, r0, r11, LSL #1 @get the &exc[k] - + ADD r5, r0, r11, LSL #1 @get the &exc[k] + @transfer Convolve function STMFD sp!, {r0 - r3} MOV r0, r5 @@ -68,7 +68,7 @@ Norm_corr_asm: @ r8 --- excf[] - MOV r14, r1 @copy xn[] address + MOV r14, r1 @copy xn[] address MOV r7, #1 VLD1.S16 {Q0, Q1}, [r14]! VLD1.S16 {Q2, Q3}, [r14]! @@ -95,34 +95,34 @@ Norm_corr_asm: VQADD.S32 D20, D20, D21 VMOV.S32 r9, D20[0] VMOV.S32 r10, D20[1] - QADD r6, r9, r10 + QADD r6, r9, r10 QADD r6, r6, r6 QADD r9, r6, r7 @L_tmp = (L_tmp << 1) + 1; CLZ r7, r9 SUB r6, r7, #1 @exp = norm_l(L_tmp) RSB r7, r6, #32 @exp = 32 - exp - MOV r6, r7, ASR #1 + MOV r6, r7, ASR #1 RSB r7, r6, #0 @scale = -(exp >> 1) - + @loop for every possible period @for(t = t_min@ t <= t_max@ t++) @r7 --- scale r4 --- t_min r8 --- excf[] -LOOPFOR: +LOOPFOR: ADD r14, r13, #20 @copy of excf[] MOV r12, r1 @copy of xn[] MOV r8, #0x8000 VLD1.S16 {Q0, Q1}, [r14]! @ load 16 excf[] - VLD1.S16 {Q2, Q3}, [r14]! @ load 16 excf[] + VLD1.S16 {Q2, Q3}, [r14]! @ load 16 excf[] VLD1.S16 {Q4, Q5}, [r12]! @ load 16 x[] VLD1.S16 {Q6, Q7}, [r12]! @ load 16 x[] VMULL.S16 Q10, D0, D0 @L_tmp1 += excf[] * excf[] - VMULL.S16 Q11, D0, D8 @L_tmp += x[] * excf[] + VMULL.S16 Q11, D0, D8 @L_tmp += x[] * excf[] VMLAL.S16 Q10, D1, D1 VMLAL.S16 Q11, D1, D9 VMLAL.S16 Q10, D2, D2 - VMLAL.S16 Q11, D2, D10 + VMLAL.S16 Q11, D2, D10 VMLAL.S16 Q10, D3, D3 VMLAL.S16 Q11, D3, D11 VMLAL.S16 Q10, D4, D4 @@ -143,7 +143,7 @@ LOOPFOR: VMLAL.S16 Q10, D1, D1 VMLAL.S16 Q11, D1, D9 VMLAL.S16 Q10, D2, D2 - VMLAL.S16 Q11, D2, D10 + VMLAL.S16 Q11, D2, D10 VMLAL.S16 Q10, D3, D3 VMLAL.S16 Q11, D3, D11 VMLAL.S16 Q10, D4, D4 @@ -162,19 +162,19 @@ LOOPFOR: VPADD.S32 D22, D22, D22 @D22[0] --- L_tmp << 1 VMOV.S32 r6, D20[0] - VMOV.S32 r5, D22[0] + VMOV.S32 r5, D22[0] @r5 --- L_tmp, r6 --- L_tmp1 MOV r10, #1 ADD r5, r10, r5, LSL #1 @L_tmp = (L_tmp << 1) + 1 ADD r6, r10, r6, LSL #1 @L_tmp1 = (L_tmp1 << 1) + 1 - - CLZ r10, r5 + + CLZ r10, r5 CMP r5, #0 RSBLT r11, r5, #0 CLZLT r10, r11 SUB r10, r10, #1 @exp = norm_l(L_tmp) - + MOV r5, r5, LSL r10 @L_tmp = (L_tmp << exp) RSB r10, r10, #30 @exp_corr = 30 - exp MOV r11, r5, ASR #16 @corr = extract_h(L_tmp) @@ -190,7 +190,7 @@ LOOPFOR: @Isqrt_n(&L_tmp, &exp_norm) MOV r14, r0 - MOV r12, r1 + MOV r12, r1 STMFD sp!, {r0 - r4, r7 - r12, r14} ADD r1, sp, #4 @@ -208,7 +208,7 @@ LOOPFOR: MOV r6, r6, ASR #16 @norm = extract_h(L_tmp) MUL r12, r6, r11 ADD r12, r12, r12 @L_tmp = vo_L_mult(corr, norm) - + ADD r6, r10, r5 ADD r6, r6, r7 @exp_corr + exp_norm + scale @@ -227,8 +227,8 @@ LOOPFOR: CMP r4, r6 BEQ Norm_corr_asm_end - - ADD r4, r4, #1 @ t_min ++ + + ADD r4, r4, #1 @ t_min ++ RSB r5, r4, #0 @ k MOV r6, #63 @ i = 63 @@ -255,16 +255,16 @@ LOOPK: MUL r14, r11, r8 LDR r6, [r13, #T_MAX] @ get t_max MOV r8, r14, ASR #15 - STRH r8, [r10] + STRH r8, [r10] CMP r4, r6 BLE LOOPFOR -Norm_corr_asm_end: - - ADD r13, r13, #voSTACK +Norm_corr_asm_end: + + ADD r13, r13, #voSTACK LDMFD r13!, {r4 - r12, r15} - + .END |