diff options
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm')
22 files changed, 445 insertions, 445 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Deemph_32_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Deemph_32_opt.s index c1c74e6..282db92 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Deemph_32_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Deemph_32_opt.s @@ -30,10 +30,10 @@ .section .text .global Deemph_32_asm - + Deemph_32_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} MOV r4, #2 @i=0 LDRSH r6, [r0], #2 @load x_hi[0] LDRSH r7, [r1], #2 @load x_lo[0] @@ -47,9 +47,9 @@ Deemph_32_asm: ADD r12, r10, r7, LSL #4 @L_tmp += x_lo[0] << 4 MOV r10, r12, LSL #3 @L_tmp <<= 3 MUL r9, r5, r8 - LDRSH r6, [r0], #2 @load x_hi[1] + LDRSH r6, [r0], #2 @load x_hi[1] QDADD r10, r10, r9 - LDRSH r7, [r1], #2 @load x_lo[1] + LDRSH r7, [r1], #2 @load x_lo[1] MOV r12, r10, LSL #1 @L_tmp = L_mac(L_tmp, *mem, fac) QADD r10, r12, r11 MOV r14, r10, ASR #16 @y[0] = round(L_tmp) @@ -94,9 +94,9 @@ LOOP: BLT LOOP STR r14, [r3] - STRH r14, [r2] + STRH r14, [r2] - LDMFD r13!, {r4 - r12, r15} + LDMFD r13!, {r4 - r12, r15} @ENDP .END diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Dot_p_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Dot_p_opt.s index 02bdcab..4aa317e 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Dot_p_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Dot_p_opt.s @@ -31,7 +31,7 @@ Dot_product12_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} MOV r4, #0 @ L_sum = 0 MOV r5, #0 @ i = 0 @@ -41,13 +41,13 @@ LOOP: LDR r8, [r0], #4 SMLABB r4, r6, r7, r4 LDR r9, [r1], #4 - SMLATT r4, r6, r7, r4 + SMLATT r4, r6, r7, r4 LDR r6, [r0], #4 SMLABB r4, r8, r9, r4 LDR r7, [r1], #4 - SMLATT r4, r8, r9, r4 + SMLATT r4, r8, r9, r4 LDR r8, [r0], #4 SMLABB r4, r6, r7, r4 @@ -58,7 +58,7 @@ LOOP: CMP r5, r2 SMLATT r4, r8, r9, r4 BLT LOOP - + MOV r12, r4, LSL #1 ADD r12, r12, #1 @ L_sum = (L_sum << 1) + 1 MOV r4, r12 @@ -69,12 +69,12 @@ LOOP: SUB r10, r10, #1 @ sft = norm_l(L_sum) MOV r0, r12, LSL r10 @ L_sum = L_sum << sft RSB r11, r10, #30 @ *exp = 30 - sft - STRH r11, [r3] + STRH r11, [r3] Dot_product12_end: - - LDMFD r13!, {r4 - r12, r15} + + LDMFD r13!, {r4 - r12, r15} @ENDFUNC .END - + diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Filt_6k_7k_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Filt_6k_7k_opt.s index 1ce2a85..856ada8 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Filt_6k_7k_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Filt_6k_7k_opt.s @@ -23,7 +23,7 @@ @****************************************************************** @ r0 --- signal[] @ r1 --- lg -@ r2 --- mem[] +@ r2 --- mem[] .section .text .global Filt_6k_7k_asm @@ -32,7 +32,7 @@ Filt_6k_7k_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} SUB r13, r13, #240 @ x[L_SUBFR16k + (L_FIR - 1)] MOV r8, r0 @ copy signal[] address MOV r4, r1 @ copy lg address @@ -43,9 +43,9 @@ Filt_6k_7k_asm: MOV r2, #30 @ L_FIR - 1 BL voAWB_Copy @ memcpy(x, mem, (L_FIR - 1)<<1) - LDR r10, Lable1 @ get fir_7k address + LDR r10, Lable1 @ get fir_7k address - MOV r14, #0 + MOV r14, #0 MOV r3, r8 @ change myMemCopy to Copy, due to Copy will change r3 content ADD r6, r13, #60 @ get x[L_FIR - 1] address MOV r7, r3 @ get signal[i] @@ -76,14 +76,14 @@ LOOP1: STRH r12, [r6], #2 ADD r14, r14, #8 CMP r14, #80 - BLT LOOP1 + BLT LOOP1 STR r5, [sp, #-4] @ PUSH r5 to stack @ not use registers: r4, r10, r12, r14, r5 - MOV r4, r13 - MOV r5, #0 @ i = 0 + MOV r4, r13 + MOV r5, #0 @ i = 0 LOOP2: LDR r0, [r10] @@ -111,13 +111,13 @@ LOOP2: LDRSH r8, [r4, #10] @ load x[i+5] LDRSH r9, [r4, #50] @ load x[i+25] SMLABT r14, r1, r0, r14 @ (x[i+3] + x[i+27]) * fir_7k[3] - ADD r8, r8, r9 @ x[i+5] + x[i+25] - + ADD r8, r8, r9 @ x[i+5] + x[i+25] + LDR r0, [r10, #8] LDRSH r1, [r4, #12] @ x[i+6] LDRSH r2, [r4, #48] @ x[i+24] SMLABB r14, r6, r0, r14 @ (x[i+4] + x[i+26]) * fir_7k[4] - LDRSH r6, [r4, #14] @ x[i+7] + LDRSH r6, [r4, #14] @ x[i+7] LDRSH r7, [r4, #46] @ x[i+23] SMLABT r14, r8, r0, r14 @ (x[i+5] + x[i+25]) * fir_7k[5] LDR r0, [r10, #12] @@ -125,8 +125,8 @@ LOOP2: ADD r6, r6, r7 @ (x[i+7] + x[i+23]) SMLABB r14, r1, r0, r14 @ (x[i+6] + x[i+24]) * fir_7k[6] LDRSH r8, [r4, #16] @ x[i+8] - LDRSH r9, [r4, #44] @ x[i+22] - SMLABT r14, r6, r0, r14 @ (x[i+7] + x[i+23]) * fir_7k[7] + LDRSH r9, [r4, #44] @ x[i+22] + SMLABT r14, r6, r0, r14 @ (x[i+7] + x[i+23]) * fir_7k[7] LDR r0, [r10, #16] LDRSH r1, [r4, #18] @ x[i+9] LDRSH r2, [r4, #42] @ x[i+21] @@ -144,7 +144,7 @@ LOOP2: LDRSH r2, [r4, #36] @ x[i+18] SMLABB r14, r6, r0, r14 @ (x[i+10] + x[i+20]) * fir_7k[10] LDRSH r6, [r4, #26] @ x[i+13] - ADD r8, r8, r9 @ (x[i+11] + x[i+19]) + ADD r8, r8, r9 @ (x[i+11] + x[i+19]) LDRSH r7, [r4, #34] @ x[i+17] SMLABT r14, r8, r0, r14 @ (x[i+11] + x[i+19]) * fir_7k[11] LDR r0, [r10, #24] @@ -152,31 +152,31 @@ LOOP2: LDRSH r8, [r4, #28] @ x[i+14] SMLABB r14, r1, r0, r14 @ (x[i+12] + x[i+18]) * fir_7k[12] ADD r6, r6, r7 @ (x[i+13] + x[i+17]) - LDRSH r9, [r4, #32] @ x[i+16] + LDRSH r9, [r4, #32] @ x[i+16] SMLABT r14, r6, r0, r14 @ (x[i+13] + x[i+17]) * fir_7k[13] - LDR r0, [r10, #28] + LDR r0, [r10, #28] ADD r8, r8, r9 @ (x[i+14] + x[i+16]) LDRSH r1, [r4, #30] @ x[i+15] SMLABB r14, r8, r0, r14 @ (x[i+14] + x[i+16]) * fir_7k[14] - SMLABT r14, r1, r0, r14 @ x[i+15] * fir_7k[15] + SMLABT r14, r1, r0, r14 @ x[i+15] * fir_7k[15] ADD r5, r5, #1 ADD r14, r14, #0x4000 - ADD r4, r4, #2 + ADD r4, r4, #2 MOV r1, r14, ASR #15 CMP r5, #80 STRH r1, [r3], #2 @signal[i] = (L_tmp + 0x4000) >> 15 - BLT LOOP2 - + BLT LOOP2 + LDR r1, [sp, #-4] @mem address ADD r0, r13, #160 @x + lg MOV r2, #30 BL voAWB_Copy - + Filt_6k_7k_end: - ADD r13, r13, #240 - LDMFD r13!, {r4 - r12, r15} - + ADD r13, r13, #240 + LDMFD r13!, {r4 - r12, r15} + Lable1: .word fir_6k_7k @ENDFUNC diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Norm_Corr_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Norm_Corr_opt.s index b440a31..49bdc2b 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Norm_Corr_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Norm_Corr_opt.s @@ -32,8 +32,8 @@ @ r6 --- corr_norm[] - .section .text - .global Norm_corr_asm + .section .text + .global Norm_corr_asm .extern Convolve_asm .extern Isqrt_n @****************************** @@ -47,17 +47,17 @@ .equ T_MIN , 212 .equ T_MAX , 216 .equ CORR_NORM , 220 - + Norm_corr_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} SUB r13, r13, #voSTACK - + ADD r8, r13, #20 @get the excf[L_SUBFR] LDR r4, [r13, #T_MIN] @get t_min RSB r11, r4, #0 @k = -t_min - ADD r5, r0, r11, LSL #1 @get the &exc[k] - + ADD r5, r0, r11, LSL #1 @get the &exc[k] + @transfer Convolve function STMFD sp!, {r0 - r3} MOV r0, r5 @@ -68,7 +68,7 @@ Norm_corr_asm: @ r8 --- excf[] - MOV r14, r1 @copy xn[] address + MOV r14, r1 @copy xn[] address MOV r5, #64 MOV r6, #0 @L_tmp = 0 MOV r7, #1 @@ -93,21 +93,21 @@ LOOP1: CLZ r7, r9 SUB r6, r7, #1 @exp = norm_l(L_tmp) RSB r7, r6, #32 @exp = 32 - exp - MOV r6, r7, ASR #1 + MOV r6, r7, ASR #1 RSB r7, r6, #0 @scale = -(exp >> 1) - + @loop for every possible period @for(t = t_min@ t <= t_max@ t++) @r7 --- scale r4 --- t_min r8 --- excf[] -LOOPFOR: +LOOPFOR: MOV r5, #0 @L_tmp = 0 MOV r6, #0 @L_tmp1 = 0 - MOV r9, #64 + MOV r9, #64 MOV r12, r1 @copy of xn[] ADD r14, r13, #20 @copy of excf[] MOV r8, #0x8000 - + LOOPi: LDR r11, [r14], #4 @load excf[i], excf[i+1] LDR r10, [r12], #4 @load xn[i], xn[i+1] @@ -128,13 +128,13 @@ LOOPi: MOV r10, #1 ADD r5, r10, r5, LSL #1 @L_tmp = (L_tmp << 1) + 1 ADD r6, r10, r6, LSL #1 @L_tmp1 = (L_tmp1 << 1) + 1 - - CLZ r10, r5 + + CLZ r10, r5 CMP r5, #0 RSBLT r11, r5, #0 CLZLT r10, r11 SUB r10, r10, #1 @exp = norm_l(L_tmp) - + MOV r5, r5, LSL r10 @L_tmp = (L_tmp << exp) RSB r10, r10, #30 @exp_corr = 30 - exp MOV r11, r5, ASR #16 @corr = extract_h(L_tmp) @@ -150,7 +150,7 @@ LOOPi: @Isqrt_n(&L_tmp, &exp_norm) MOV r14, r0 - MOV r12, r1 + MOV r12, r1 STMFD sp!, {r0 - r4, r7 - r12, r14} ADD r1, sp, #4 @@ -168,7 +168,7 @@ LOOPi: MOV r6, r6, ASR #16 @norm = extract_h(L_tmp) MUL r12, r6, r11 ADD r12, r12, r12 @L_tmp = vo_L_mult(corr, norm) - + ADD r6, r10, r5 ADD r6, r6, r7 @exp_corr + exp_norm + scale @@ -187,9 +187,9 @@ LOOPi: CMP r4, r6 BEQ Norm_corr_asm_end - + ADD r4, r4, #1 @ t_min ++ - + RSB r5, r4, #0 @ k MOV r6, #63 @ i = 63 @@ -216,16 +216,16 @@ LOOPK: MUL r14, r11, r8 LDR r6, [r13, #T_MAX] @ get t_max MOV r8, r14, ASR #15 - STRH r8, [r10] + STRH r8, [r10] CMP r4, r6 BLE LOOPFOR -Norm_corr_asm_end: - - ADD r13, r13, #voSTACK +Norm_corr_asm_end: + + ADD r13, r13, #voSTACK LDMFD r13!, {r4 - r12, r15} - + .END diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Syn_filt_32_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Syn_filt_32_opt.s index 70464e4..3f4930c 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Syn_filt_32_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/Syn_filt_32_opt.s @@ -38,7 +38,7 @@ Syn_filt_32_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} LDR r4, [r13, #40] @ get sig_hi[] address LDR r5, [r13, #44] @ get sig_lo[] address @@ -55,7 +55,7 @@ Syn_filt_32_asm: AND r8, r8, r14 ORR r10, r6, r7, LSL #16 @ Aq[2] -- Aq[1] ORR r11, r8, r9, LSL #16 @ Aq[4] -- Aq[3] - STR r10, [r13, #-4] + STR r10, [r13, #-4] STR r11, [r13, #-8] LDRSH r6, [r0, #10] @ load Aq[5] @@ -73,12 +73,12 @@ Syn_filt_32_asm: LDRSH r7, [r0, #20] @ load Aq[10] LDRSH r8, [r0, #22] @ load Aq[11] LDRSH r9, [r0, #24] @ load Aq[12] - AND r6, r6, r14 + AND r6, r6, r14 AND r8, r8, r14 ORR r10, r6, r7, LSL #16 @ Aq[10] -- Aq[9] ORR r11, r8, r9, LSL #16 @ Aq[12] -- Aq[11] STR r10, [r13, #-20] - STR r11, [r13, #-24] + STR r11, [r13, #-24] LDRSH r6, [r0, #26] @ load Aq[13] LDRSH r7, [r0, #28] @ load Aq[14] @@ -90,14 +90,14 @@ Syn_filt_32_asm: ORR r11, r8, r9, LSL #16 @ Aq[16] -- Aq[15] STR r10, [r13, #-28] STR r11, [r13, #-32] - + MOV r8, #0 @ i = 0 - -LOOP: + +LOOP: LDRSH r6, [r5, #-2] @ load sig_lo[i-1] LDRSH r7, [r5, #-4] @ load sig_lo[i-2] - LDR r11, [r13, #-4] @ Aq[2] -- Aq[1] + LDR r11, [r13, #-4] @ Aq[2] -- Aq[1] LDRSH r9, [r5, #-6] @ load sig_lo[i-3] LDRSH r10, [r5, #-8] @ load sig_lo[i-4] @@ -135,12 +135,12 @@ LOOP: LDRSH r10, [r5, #-32] @ load sig_lo[i-16] SMLABB r12, r6, r11, r12 @ sig_lo[i-13] * Aq[13] SMLABT r12, r7, r11, r12 @ sig_lo[i-14] * Aq[14] - + LDR r11, [r13, #-32] @ Aq[16] -- Aq[15] - LDRSH r6, [r2],#2 @ load exc[i] + LDRSH r6, [r2],#2 @ load exc[i] SMLABB r12, r9, r11, r12 @ sig_lo[i-15] * Aq[15] SMLABT r12, r10, r11, r12 @ sig_lo[i-16] * Aq[16] - MUL r7, r6, r3 @ exc[i] * a0 + MUL r7, r6, r3 @ exc[i] * a0 RSB r14, r12, #0 @ L_tmp MOV r14, r14, ASR #11 @ L_tmp >>= 11 ADD r14, r14, r7, LSL #1 @ L_tmp += (exc[i] * a0) << 1 @@ -149,7 +149,7 @@ LOOP: LDRSH r6, [r4, #-2] @ load sig_hi[i-1] LDRSH r7, [r4, #-4] @ load sig_hi[i-2] - LDR r11, [r13, #-4] @ Aq[2] -- Aq[1] + LDR r11, [r13, #-4] @ Aq[2] -- Aq[1] LDRSH r9, [r4, #-6] @ load sig_hi[i-3] LDRSH r10, [r4, #-8] @ load sig_hi[i-4] SMULBB r12, r6, r11 @ sig_hi[i-1] * Aq[1] @@ -198,14 +198,14 @@ LOOP: LDRSH r10, [r4, #-32] @ load sig_hi[i-16] SMLABB r12, r6, r11, r12 @ sig_hi[i-13] * Aq[13] SMLABT r12, r7, r11, r12 @ sig_hi[i-14] * Aq[14] - + LDR r11, [r13, #-32] @ Aq[16] -- Aq[15] SMLABB r12, r9, r11, r12 @ sig_hi[i-15] * Aq[15] - SMLABT r12, r10, r11, r12 @ sig_hi[i-16] * Aq[16] + SMLABT r12, r10, r11, r12 @ sig_hi[i-16] * Aq[16] ADD r6, r12, r12 @ r12 << 1 - SUB r14, r14, r6 + SUB r14, r14, r6 MOV r14, r14, LSL #3 @ L_tmp <<=3 - + MOV r7, r14, ASR #16 @ L_tmp >> 16 MOV r14, r14, ASR #4 @ L_tmp >>=4 @@ -213,14 +213,14 @@ LOOP: SUB r9, r14, r7, LSL #12 @ sig_lo[i] = L_tmp - (sig_hi[i] << 12) ADD r8, r8, #1 - STRH r9, [r5], #2 + STRH r9, [r5], #2 CMP r8, #64 - BLT LOOP - + BLT LOOP + Syn_filt_32_end: - - LDMFD r13!, {r4 - r12, r15} + + LDMFD r13!, {r4 - r12, r15} @ENDFUNC .END - + diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/convolve_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/convolve_opt.s index 0228bda..71bb532 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/convolve_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/convolve_opt.s @@ -27,24 +27,24 @@ @ r3 --- L .section .text - .global Convolve_asm + .global Convolve_asm Convolve_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} MOV r3, #0 @ n MOV r11, #0x8000 - -LOOP: + +LOOP: ADD r4, r1, r3, LSL #1 @ tmpH address ADD r5, r3, #1 @ i = n + 1 MOV r6, r0 @ tmpX = x LDRSH r9, [r6], #2 @ *tmpX++ LDRSH r10, [r4], #-2 @ *tmpH-- SUB r5, r5, #1 - MUL r8, r9, r10 + MUL r8, r9, r10 -LOOP1: +LOOP1: CMP r5, #0 BLE L1 LDRSH r9, [r6], #2 @ *tmpX++ @@ -58,12 +58,12 @@ LOOP1: LDRSH r12, [r6], #2 @ *tmpX++ LDRSH r14, [r4], #-2 @ *tmpH-- MLA r8, r9, r10, r8 - SUBS r5, r5, #4 + SUBS r5, r5, #4 MLA r8, r12, r14, r8 - - B LOOP1 -L1: + B LOOP1 + +L1: ADD r5, r11, r8, LSL #1 MOV r5, r5, LSR #16 @extract_h(s) @@ -75,14 +75,14 @@ L1: ADD r5, r3, #1 MOV r6, r0 LDRSH r9, [r6], #2 @ *tmpX++ - LDRSH r10, [r4], #-2 + LDRSH r10, [r4], #-2 LDRSH r12, [r6], #2 LDRSH r14, [r4], #-2 MUL r8, r9, r10 SUB r5, r5, #2 MLA r8, r12, r14, r8 - + LOOP2: CMP r5, #0 BLE L2 @@ -97,14 +97,14 @@ LOOP2: LDRSH r12, [r6], #2 @ *tmpX++ LDRSH r14, [r4], #-2 @ *tmpH-- MLA r8, r9, r10, r8 - SUBS r5, r5, #4 + SUBS r5, r5, #4 MLA r8, r12, r14, r8 B LOOP2 L2: ADD r8, r11, r8, LSL #1 MOV r8, r8, LSR #16 @extract_h(s) - ADD r3, r3, #1 + ADD r3, r3, #1 STRH r8, [r2], #2 @y[n] ADD r4, r1, r3, LSL #1 @@ -117,7 +117,7 @@ L2: MUL r8, r9, r10 LDRSH r9, [r6], #2 LDRSH r10, [r4], #-2 - MLA r8, r12, r14, r8 + MLA r8, r12, r14, r8 SUB r5, r5, #3 MLA r8, r9, r10, r8 @@ -135,9 +135,9 @@ LOOP3: LDRSH r12, [r6], #2 @ *tmpX++ LDRSH r14, [r4], #-2 @ *tmpH-- MLA r8, r9, r10, r8 - SUBS r5, r5, #4 - MLA r8, r12, r14, r8 - B LOOP3 + SUBS r5, r5, #4 + MLA r8, r12, r14, r8 + B LOOP3 L3: ADD r8, r11, r8, LSL #1 @@ -150,7 +150,7 @@ L3: MOV r6, r0 MOV r8, #0 -LOOP4: +LOOP4: CMP r5, #0 BLE L4 LDRSH r9, [r6], #2 @ *tmpX++ @@ -164,22 +164,22 @@ LOOP4: LDRSH r12, [r6], #2 @ *tmpX++ LDRSH r14, [r4], #-2 @ *tmpH-- MLA r8, r9, r10, r8 - SUBS r5, r5, #4 - MLA r8, r12, r14, r8 - B LOOP4 -L4: + SUBS r5, r5, #4 + MLA r8, r12, r14, r8 + B LOOP4 +L4: ADD r5, r11, r8, LSL #1 MOV r5, r5, LSR #16 @extract_h(s) ADD r3, r3, #1 STRH r5, [r2], #2 @y[n] - + CMP r3, #64 BLT LOOP - -Convolve_asm_end: - + +Convolve_asm_end: + LDMFD r13!, {r4 - r12, r15} - + @ENDFUNC .END diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/cor_h_vec_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/cor_h_vec_opt.s index 8f32733..2d4c7cc 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/cor_h_vec_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/cor_h_vec_opt.s @@ -51,12 +51,12 @@ LOOPi: RSB r11, r2, #62 @j=62-pos LOOPj1: - LDRSH r12, [r10], #2 + LDRSH r12, [r10], #2 LDRSH r8, [r9], #2 LDRSH r14, [r9] SUBS r11, r11, #1 MLA r5, r12, r8, r5 - MLA r6, r12, r14, r6 + MLA r6, r12, r14, r6 BGE LOOPj1 LDRSH r12, [r10], #2 @*p1++ @@ -64,7 +64,7 @@ LOOPj1: MLA r5, r12, r14, r5 MOV r14, #0x8000 MOV r5, r5, LSL #2 @L_sum1 = (L_sum1 << 2) - ADD r10, r6, r14 + ADD r10, r6, r14 ADD r9, r5, r14 MOV r5, r9, ASR #16 MOV r6, r10, ASR #16 @@ -76,7 +76,7 @@ LOOPj1: MUL r14, r6, r11 MOV r5, r12, ASR #15 MOV r6, r14, ASR #15 - LDR r9, [r13, #44] + LDR r9, [r13, #44] LDR r12, [r13, #48] LDRSH r10, [r7], #2 @*p0++ LDRSH r11, [r8] @*p3++ @@ -88,7 +88,7 @@ LOOPj1: STRH r6, [r12] ADD r2, r2, #4 - + MOV r5, #0 @L_sum1 = 0 MOV r6, #0 @L_sum2 = 0 ADD r9, r1, r2, LSL #1 @p2 = &vec[pos] @@ -97,12 +97,12 @@ LOOPj1: ADD r4, r4, #1 @i++ LOOPj2: - LDRSH r12, [r10], #2 + LDRSH r12, [r10], #2 LDRSH r8, [r9], #2 LDRSH r14, [r9] SUBS r11, r11, #1 MLA r5, r12, r8, r5 - MLA r6, r12, r14, r6 + MLA r6, r12, r14, r6 BGE LOOPj2 LDRSH r12, [r10], #2 @*p1++ @@ -110,7 +110,7 @@ LOOPj2: MLA r5, r12, r14, r5 MOV r14, #0x8000 MOV r5, r5, LSL #2 @L_sum1 = (L_sum1 << 2) - ADD r10, r6, r14 + ADD r10, r6, r14 ADD r9, r5, r14 MOV r5, r9, ASR #16 @@ -123,7 +123,7 @@ LOOPj2: MUL r14, r6, r11 MOV r5, r12, ASR #15 MOV r6, r14, ASR #15 - LDR r9, [r13, #44] + LDR r9, [r13, #44] LDR r12, [r13, #48] LDRSH r10, [r7], #2 @*p0++ LDRSH r11, [r8] @*p3++ @@ -136,16 +136,16 @@ LOOPj2: ADD r4, r4, #1 @i+1 ADD r2, r2, #4 @pos += STEP CMP r4, #16 - + BLT LOOPi - + the_end: LDMFD r13!, {r4 - r12, r15} - + @ENDFUNC - .END - - - + .END + + + diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/pred_lt4_1_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/pred_lt4_1_opt.s index d7b4509..e0b338d 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/pred_lt4_1_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/pred_lt4_1_opt.s @@ -35,7 +35,7 @@ pred_lt4_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} RSB r4, r1, #0 @-T0 RSB r2, r2, #0 @frac = -frac ADD r5, r0, r4, LSL #1 @x = exc - T0 @@ -45,7 +45,7 @@ pred_lt4_asm: SUB r5, r5, #30 @x -= 15 RSB r4, r2, #3 @k = 3 - frac LDR r6, Table - MOV r8, r4, LSL #6 + MOV r8, r4, LSL #6 @MOV r7, #0 @j = 0 ADD r8, r6, r8 @ptr2 = &(inter4_2[k][0]) @@ -63,7 +63,7 @@ THREE_LOOP: LDRSH r6, [r1], #2 @x[1] LDRSH r9, [r1], #2 @x[2] - SMULBB r10, r4, r3 @x[0] * h[0] + SMULBB r10, r4, r3 @x[0] * h[0] SMULBB r11, r6, r3 @x[1] * h[0] SMULBB r12, r9, r3 @x[2] * h[0] @@ -285,7 +285,7 @@ Last2LOOP: SMLABB r10, r9, r3, r10 @x[2] * h[2] SMLABB r11, r4, r3, r11 @x[3] * h[2] - + SMLABT r10, r4, r3, r10 @x[3] * h[3] SMLABT r11, r6, r3, r11 @x[4] * h[3] @@ -435,7 +435,7 @@ Last2LOOP: MOV r11, r11, LSL #1 QADD r10, r10, r10 - QADD r11, r11, r11 + QADD r11, r11, r11 QADD r10, r10, r5 QADD r11, r11, r5 diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/residu_asm_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/residu_asm_opt.s index 86b3bd6..5ff0964 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/residu_asm_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/residu_asm_opt.s @@ -34,12 +34,12 @@ Residu_opt: LDRH r5, [r0], #2 LDRH r6, [r0], #2 - ORR r5, r6, r5, LSL #16 @r5 --- a0, a1 + ORR r5, r6, r5, LSL #16 @r5 --- a0, a1 LDRH r6, [r0], #2 LDRH r7, [r0], #2 ORR r6, r7, r6, LSL #16 @r6 --- a2, a3 - + LDRH r7, [r0], #2 LDRH r8, [r0], #2 ORR r7, r8, r7, LSL #16 @r7 --- a4, a5 @@ -59,13 +59,13 @@ Residu_opt: LDRH r11, [r0], #2 LDRH r12, [r0], #2 ORR r11, r12, r11, LSL #16 @r11 --- a12, a13 - + LDRH r12, [r0], #2 LDRH r4, [r0], #2 ORR r12, r4, r12, LSL #16 @r12 --- a14, a15 - - STMFD r13!, {r8 - r12} @store r8-r12 + + STMFD r13!, {r8 - r12} @store r8-r12 LDRH r4, [r0], #2 @load a16 MOV r14, r3, ASR #2 @one loop get 4 outputs ADD r1, r1, #4 @@ -78,7 +78,7 @@ residu_loop: LDR r2, [r1], #-4 @r2 --- x[1], x[0] SMULTB r3, r5, r2 @i1(0) --- r3 = x[0] * a0 - SMULTT r4, r5, r2 @i2(0) --- r4 = x[1] * a0 + SMULTT r4, r5, r2 @i2(0) --- r4 = x[1] * a0 SMULTB r11, r5, r10 @i3(0) --- r11 = x[2] * a0 SMULTT r12, r5, r10 @i4(0) --- r12 = x[3] * a0 @@ -88,20 +88,20 @@ residu_loop: SMLATB r11, r6, r2, r11 @i3(2) --- r11 += x[0] * a2 SMLATT r12, r6, r2, r12 @i4(2) --- r12 += x[1] * a2 - SMLABB r12, r6, r2, r12 @i4(3) --- r12 += x[0] * a3 - + SMLABB r12, r6, r2, r12 @i4(3) --- r12 += x[0] * a3 + LDR r2, [r1], #-4 @r2 ---- x[-1], x[-2] SMLABT r3, r5, r2, r3 @i1(1) --- r3 += x[-1] * a1 SMLATT r4, r6, r2, r4 @i2(2) --- r4 += x[-1] * a2 SMLABT r11, r6, r2, r11 @i3(3) --- r11 += x[-1] * a3 SMLATT r12, r7, r2, r12 @i4(4) --- r12 += x[-1] * a4 - SMLATB r3, r6, r2, r3 @i1(2) --- r3 += x[-2] * a2 + SMLATB r3, r6, r2, r3 @i1(2) --- r3 += x[-2] * a2 SMLABB r4, r6, r2, r4 @ i2 (3) SMLATB r11,r7, r2, r11 @ i3 (4) SMLABB r12,r7, r2, r12 @ i4 (5) - + LDR r2,[r1],#-4 SMLABT r3, r6, r2, r3 @ i1 (3) SMLATT r4, r7, r2, r4 @ i2 (4) @@ -111,7 +111,7 @@ residu_loop: SMLABB r4, r7, r2, r4 @ i2 (5) SMLATB r11,r8, r2, r11 @ i3 (6) SMLABB r12,r8, r2, r12 @ i4 (7) - + LDR r2,[r1],#-4 SMLABT r3, r7, r2, r3 @ i1 (5) SMLATT r4, r8, r2, r4 @ i2 (6) @@ -122,7 +122,7 @@ residu_loop: SMLATB r11,r9, r2, r11 @ i3 (8) SMLABB r12,r9, r2, r12 @ i4 (9) LDR r10, [r13, #8] @ [ a10 | a11] - + LDR r2,[r1],#-4 SMLABT r3, r8, r2, r3 @ i1 (7) SMLATT r4, r9, r2, r4 @ i2 (8) @@ -133,7 +133,7 @@ residu_loop: SMLATB r11,r10, r2, r11 @ i3 (10) SMLABB r12,r10, r2, r12 @ i4 (11) LDR r8, [r13, #12] @ [ a12 | a13 ] - + LDR r2,[r1],#-4 SMLABT r3, r9, r2, r3 @ i1 (9) SMLATT r4, r10, r2, r4 @ i2 (10) @@ -144,7 +144,7 @@ residu_loop: SMLATB r11,r8, r2, r11 @ i3 (12) SMLABB r12,r8, r2, r12 @ i4 (13) LDR r9, [r13, #16] @ [ a14 | a15 ] - + LDR r2,[r1],#-4 SMLABT r3, r10, r2, r3 @ i1 (11) SMLATT r4, r8, r2, r4 @ i2 (12) @@ -154,7 +154,7 @@ residu_loop: SMLABB r4, r8, r2, r4 @ i2 (13) SMLATB r11,r9, r2, r11 @ i3 (14) SMLABB r12,r9, r2, r12 @ i4 (15) - + LDR r2,[r1],#-4 SMLABT r3, r8, r2, r3 @ i1 (13) @@ -165,64 +165,64 @@ residu_loop: SMLABB r4, r9, r2, r4 @ i2 (15) SMLABB r11,r14, r2, r11 @ i3 (16) LDR r8, [r13] @ [ a6 | a7 ] - + LDR r2,[r1],#44 @ Change SMLABT r3, r9, r2, r3 SMLABB r3, r14, r2, r3 SMLABT r4, r14, r2, r4 LDR r9, [r13, #4] @ [ a8 | a9 ] - - - QADD r3,r3,r3 - QADD r4,r4,r4 - QADD r11,r11,r11 - QADD r12,r12,r12 - - QADD r3,r3,r3 - QADD r4,r4,r4 - QADD r11,r11,r11 - QADD r12,r12,r12 - - QADD r3,r3,r3 - QADD r4,r4,r4 - QADD r11,r11,r11 - QADD r12,r12,r12 - - QADD r3,r3,r3 - QADD r4,r4,r4 - QADD r11,r11,r11 - QADD r12,r12,r12 - - MOV r2,#32768 - - QDADD r3,r2,r3 - QDADD r4,r2,r4 - QDADD r11,r2,r11 - QDADD r12,r2,r12 - - + + + QADD r3,r3,r3 + QADD r4,r4,r4 + QADD r11,r11,r11 + QADD r12,r12,r12 + + QADD r3,r3,r3 + QADD r4,r4,r4 + QADD r11,r11,r11 + QADD r12,r12,r12 + + QADD r3,r3,r3 + QADD r4,r4,r4 + QADD r11,r11,r11 + QADD r12,r12,r12 + + QADD r3,r3,r3 + QADD r4,r4,r4 + QADD r11,r11,r11 + QADD r12,r12,r12 + + MOV r2,#32768 + + QDADD r3,r2,r3 + QDADD r4,r2,r4 + QDADD r11,r2,r11 + QDADD r12,r2,r12 + + MOV r3,r3,asr #16 MOV r4,r4,asr #16 MOV r11,r11,asr #16 MOV r12,r12,asr #16 - + STRH r3,[r0],#2 STRH r4,[r0],#2 STRH r11,[r0],#2 STRH r12,[r0],#2 - + MOV r2,r14,asr #16 SUB r14, r14, #0x10000 SUBS r2,r2,#1 - BNE residu_loop + BNE residu_loop end: - LDMFD r13!, {r8 -r12} + LDMFD r13!, {r8 -r12} LDMFD r13!, {r4 -r12,pc} @ENDFUNC - .END - - - + .END + + + diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/scale_sig_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/scale_sig_opt.s index f83e688..b300224 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/scale_sig_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/scale_sig_opt.s @@ -38,7 +38,7 @@ Scale_sig_opt: MOV r8, #0x7fffffff MOV r9, #0x8000 BLE LOOP2 - + LOOP1: LDRSH r5, [r4] @load x[i] @@ -65,11 +65,11 @@ LOOP2: The_end: LDMFD r13!, {r4 - r12, r15} - + @ENDFUNC - .END - - - + .END + + + diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/syn_filt_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/syn_filt_opt.s index f4700cd..0c287a4 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/syn_filt_opt.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/syn_filt_opt.s @@ -33,18 +33,18 @@ Syn_filt_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} SUB r13, r13, #700 @ y_buf[L_FRAME16k + M16k] - + MOV r4, r3 @ copy mem[] address MOV r5, r13 @ copy yy = y_buf address @ for(i = 0@ i < m@ i++) @{ @ *yy++ = mem[i]@ - @} + @} - LDRH r6, [r4], #2 + LDRH r6, [r4], #2 LDRH r7, [r4], #2 LDRH r8, [r4], #2 LDRH r9, [r4], #2 @@ -62,7 +62,7 @@ Syn_filt_asm: STRH r12, [r5], #2 STRH r14, [r5], #2 - LDRH r6, [r4], #2 + LDRH r6, [r4], #2 LDRH r7, [r4], #2 LDRH r8, [r4], #2 LDRH r9, [r4], #2 @@ -92,45 +92,45 @@ Syn_filt_asm: LDRSH r9, [r0, #6] @ load a[3] LDRSH r11,[r0, #8] @ load a[4] AND r6, r6, r14 - AND r9, r9, r14 + AND r9, r9, r14 ORR r10, r6, r7, LSL #16 @ -a[2] -- -a[1] ORR r12, r9, r11, LSL #16 @ -a[4] -- -a[3] STR r10, [r13, #-4] STR r12, [r13, #-8] - + LDRSH r6, [r0, #10] @ load a[5] LDRSH r7, [r0, #12] @ load a[6] LDRSH r9, [r0, #14] @ load a[7] LDRSH r11,[r0, #16] @ load a[8] AND r6, r6, r14 - AND r9, r9, r14 + AND r9, r9, r14 ORR r10, r6, r7, LSL #16 @ -a[6] -- -a[5] ORR r12, r9, r11, LSL #16 @ -a[8] -- -a[7] STR r10, [r13, #-12] - STR r12, [r13, #-16] - + STR r12, [r13, #-16] + LDRSH r6, [r0, #18] @ load a[9] LDRSH r7, [r0, #20] @ load a[10] LDRSH r9, [r0, #22] @ load a[11] LDRSH r11,[r0, #24] @ load a[12] AND r6, r6, r14 - AND r9, r9, r14 + AND r9, r9, r14 ORR r10, r6, r7, LSL #16 @ -a[10] -- -a[9] ORR r12, r9, r11, LSL #16 @ -a[12] -- -a[11] STR r10, [r13, #-20] - STR r12, [r13, #-24] + STR r12, [r13, #-24] LDRSH r6, [r0, #26] @ load a[13] LDRSH r7, [r0, #28] @ load a[14] LDRSH r9, [r0, #30] @ load a[15] LDRSH r11,[r0, #32] @ load a[16] AND r6, r6, r14 - AND r9, r9, r14 + AND r9, r9, r14 ORR r10, r6, r7, LSL #16 @ -a[14] -- -a[13] ORR r12, r9, r11, LSL #16 @ -a[16] -- -a[15] STR r10, [r13, #-28] - STR r12, [r13, #-32] - + STR r12, [r13, #-32] + ADD r4, r13, #32 LOOP: LDRSH r6, [r1], #2 @ load x[i] @@ -155,8 +155,8 @@ LOOP: SMLABB r14, r6, r7, r14 @ -a[3] * (*(temp_p -3)) LDRSH r9, [r10, #-10] @ *(temp_p - 5) - - SMLABT r14, r11, r7, r14 @ -a[4] * (*(temp_p -4)) + + SMLABT r14, r11, r7, r14 @ -a[4] * (*(temp_p -4)) LDR r7, [r13, #-12] @ -a[6] -a[5] LDRSH r12, [r10, #-12] @ *(temp_p - 6) @@ -169,13 +169,13 @@ LOOP: LDR r7, [r13, #-16] @ -a[8] -a[7] LDRSH r11, [r10, #-16] @ *(temp_p - 8) - + SMLABB r14, r6, r7, r14 @ -a[7] * (*(temp_p -7)) LDRSH r9, [r10, #-18] @ *(temp_p - 9) - SMLABT r14, r11, r7, r14 @ -a[8] * (*(temp_p -8)) - + SMLABT r14, r11, r7, r14 @ -a[8] * (*(temp_p -8)) + LDR r7, [r13, #-20] @ -a[10] -a[9] LDRSH r12, [r10, #-20] @ *(temp_p - 10) @@ -192,11 +192,11 @@ LOOP: LDRSH r9, [r10, #-26] @ *(temp_p - 13) - SMLABT r14, r11, r7, r14 @ -a[12] * (*(temp_p -12)) + SMLABT r14, r11, r7, r14 @ -a[12] * (*(temp_p -12)) LDR r7, [r13, #-28] @ -a[14] -a[13] LDRSH r12, [r10, #-28] @ *(temp_p - 14) - + SMLABB r14, r9, r7, r14 @ -a[13] * (*(temp_p -13)) LDRSH r6, [r10, #-30] @ *(temp_p - 15) @@ -211,28 +211,28 @@ LOOP: SMLABT r14, r11, r7, r14 @ -a[16] * (*(temp_p -16)) RSB r14, r14, r0 - + MOV r7, r14, LSL #4 @ L_tmp <<=4 ADD r8, r8, #1 - ADD r14, r7, #0x8000 + ADD r14, r7, #0x8000 MOV r7, r14, ASR #16 @ (L_tmp + 0x8000) >> 16 CMP r8, #80 STRH r7, [r10] @ yy[i] STRH r7, [r2], #2 @ y[i] BLT LOOP - + @ update mem[] ADD r5, r13, #160 @ yy[64] address MOV r1, r3 MOV r0, r5 MOV r2, #16 - BL voAWB_Copy + BL voAWB_Copy Syn_filt_asm_end: - - ADD r13, r13, #700 - LDMFD r13!, {r4 - r12, r15} + + ADD r13, r13, #700 + LDMFD r13!, {r4 - r12, r15} @ENDFUNC .END - + diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s index 2afc146..1d5893f 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s @@ -30,10 +30,10 @@ .section .text .global Deemph_32_asm - + Deemph_32_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} MOV r4, #2 @i=0 LDRSH r6, [r0], #2 @load x_hi[0] LDRSH r7, [r1], #2 @load x_lo[0] @@ -47,9 +47,9 @@ Deemph_32_asm: ADD r12, r10, r7, LSL #4 @L_tmp += x_lo[0] << 4 MOV r10, r12, LSL #3 @L_tmp <<= 3 MUL r9, r5, r8 - LDRSH r6, [r0], #2 @load x_hi[1] + LDRSH r6, [r0], #2 @load x_hi[1] QDADD r10, r10, r9 - LDRSH r7, [r1], #2 @load x_lo[1] + LDRSH r7, [r1], #2 @load x_lo[1] MOV r12, r10, LSL #1 @L_tmp = L_mac(L_tmp, *mem, fac) QADD r10, r12, r11 MOV r14, r10, ASR #16 @y[0] = round(L_tmp) @@ -94,9 +94,9 @@ LOOP: BLT LOOP STR r14, [r3] - STRH r14, [r2] + STRH r14, [r2] - LDMFD r13!, {r4 - r12, r15} + LDMFD r13!, {r4 - r12, r15} .END diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s index 678f1d0..8230944 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s @@ -45,14 +45,14 @@ Dot_product12_asm: VLD1.S16 {Q12, Q13}, [r1]! @load 16 Word16 y[] VMULL.S16 Q15, D16, D0 - VMLAL.S16 Q15, D17, D1 + VMLAL.S16 Q15, D17, D1 VMLAL.S16 Q15, D18, D2 VMLAL.S16 Q15, D19, D3 - VLD1.S16 {Q0, Q1}, [r1]! @load 16 Word16 y[] - VMLAL.S16 Q15, D20, D4 + VLD1.S16 {Q0, Q1}, [r1]! @load 16 Word16 y[] + VMLAL.S16 Q15, D20, D4 VMLAL.S16 Q15, D21, D5 VMLAL.S16 Q15, D22, D6 - VMLAL.S16 Q15, D23, D7 + VMLAL.S16 Q15, D23, D7 VMLAL.S16 Q15, D24, D8 VMLAL.S16 Q15, D25, D9 VMLAL.S16 Q15, D26, D10 @@ -64,9 +64,9 @@ Dot_product12_asm: CMP r2, #64 BEQ Lable1 - VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] - VLD1.S16 {Q2, Q3}, [r1]! - VMLAL.S16 Q15, D4, D0 + VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q2, Q3}, [r1]! + VMLAL.S16 Q15, D4, D0 VMLAL.S16 Q15, D5, D1 VMLAL.S16 Q15, D6, D2 VMLAL.S16 Q15, D7, D3 @@ -102,11 +102,11 @@ LOOP_EQ: VMLAL.S16 Q15, D2, D2 VMLAL.S16 Q15, D3, D3 -Lable1: +Lable1: VQADD.S32 D30, D30, D31 VPADD.S32 D30, D30, D30 - VMOV.S32 r12, D30[0] + VMOV.S32 r12, D30[0] ADD r12, r12, r12 ADD r12, r12, #1 @ L_sum = (L_sum << 1) + 1 @@ -117,11 +117,11 @@ Lable1: SUB r10, r10, #1 @ sft = norm_l(L_sum) MOV r0, r12, LSL r10 @ L_sum = L_sum << sft RSB r11, r10, #30 @ *exp = 30 - sft - STRH r11, [r3] + STRH r11, [r3] Dot_product12_end: - - LDMFD r13!, {r4 - r12, r15} + + LDMFD r13!, {r4 - r12, r15} .END diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s index 5389a1c..14ba828 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s @@ -13,7 +13,7 @@ @ ** See the License for the specific language governing permissions and @ ** limitations under the License. @ */ -@ +@ @**********************************************************************/ @void Filt_6k_7k( @ Word16 signal[], /* input: signal */ @@ -23,7 +23,7 @@ @*********************************************************************** @ r0 --- signal[] @ r1 --- lg -@ r2 --- mem[] +@ r2 --- mem[] .section .text .global Filt_6k_7k_asm @@ -31,7 +31,7 @@ Filt_6k_7k_asm: - STMFD r13!, {r0 - r12, r14} + STMFD r13!, {r0 - r12, r14} SUB r13, r13, #240 @ x[L_SUBFR16k + (L_FIR - 1)] MOV r8, r0 @ copy signal[] address MOV r5, r2 @ copy mem[] address @@ -49,7 +49,7 @@ Filt_6k_7k_asm: - LDR r10, Lable1 @ get fir_7k address + LDR r10, Lable1 @ get fir_7k address MOV r3, r8 @ change myMemCopy to Copy, due to Copy will change r3 content ADD r6, r13, #60 @ get x[L_FIR - 1] address MOV r7, r3 @ get signal[i] @@ -81,9 +81,9 @@ Filt_6k_7k_asm: MOV r12, r5 @STR r5, [sp, #-4] @ PUSH r5 to stack @ not use registers: r4, r10, r12, r14, r5 - MOV r4, r13 - MOV r5, #0 @ i = 0 - + MOV r4, r13 + MOV r5, #0 @ i = 0 + @ r4 --- x[i], r10 ---- fir_6k_7k VLD1.S16 {Q0, Q1}, [r10]! @fir_6k_7k[0] ~ fir_6k_7k[15] VLD1.S16 {Q2, Q3}, [r10]! @fir_6k_7k[16] ~ fir_6k_7k[31] @@ -91,20 +91,20 @@ Filt_6k_7k_asm: VLD1.S16 {Q4, Q5}, [r4]! @x[0] ~ x[15] VLD1.S16 {Q6, Q7}, [r4]! @x[16] ~ X[31] - VLD1.S16 {Q8}, [r4]! - VMOV.S16 Q15, #0 - + VLD1.S16 {Q8}, [r4]! + VMOV.S16 Q15, #0 + LOOP_6K7K: - VMULL.S16 Q9,D8,D0[0] - VMULL.S16 Q10,D9,D1[0] - VMULL.S16 Q11,D9,D0[0] + VMULL.S16 Q9,D8,D0[0] + VMULL.S16 Q10,D9,D1[0] + VMULL.S16 Q11,D9,D0[0] VMULL.S16 Q12,D10,D1[0] VEXT.8 Q4,Q4,Q5,#2 VMLAL.S16 Q9,D10,D2[0] VMLAL.S16 Q10,D11,D3[0] VMLAL.S16 Q11,D11,D2[0] - VMLAL.S16 Q12,D12,D3[0] + VMLAL.S16 Q12,D12,D3[0] VEXT.8 Q5,Q5,Q6,#2 VMLAL.S16 Q9,D12,D4[0] VMLAL.S16 Q10,D13,D5[0] @@ -115,18 +115,18 @@ LOOP_6K7K: VMLAL.S16 Q10,D15,D7[0] VMLAL.S16 Q11,D15,D6[0] VMLAL.S16 Q12,D16,D7[0] - VEXT.8 Q7,Q7,Q8,#2 + VEXT.8 Q7,Q7,Q8,#2 - VMLAL.S16 Q9,D8,D0[1] + VMLAL.S16 Q9,D8,D0[1] VMLAL.S16 Q10,D9,D1[1] - VEXT.8 Q8,Q8,Q15,#2 - VMLAL.S16 Q11,D9,D0[1] + VEXT.8 Q8,Q8,Q15,#2 + VMLAL.S16 Q11,D9,D0[1] VMLAL.S16 Q12,D10,D1[1] VEXT.8 Q4,Q4,Q5,#2 VMLAL.S16 Q9,D10,D2[1] VMLAL.S16 Q10,D11,D3[1] VMLAL.S16 Q11,D11,D2[1] - VMLAL.S16 Q12,D12,D3[1] + VMLAL.S16 Q12,D12,D3[1] VEXT.8 Q5,Q5,Q6,#2 VMLAL.S16 Q9,D12,D4[1] VMLAL.S16 Q10,D13,D5[1] @@ -137,18 +137,18 @@ LOOP_6K7K: VMLAL.S16 Q10,D15,D7[1] VMLAL.S16 Q11,D15,D6[1] VMLAL.S16 Q12,D16,D7[1] - VEXT.8 Q7,Q7,Q8,#2 + VEXT.8 Q7,Q7,Q8,#2 - VMLAL.S16 Q9,D8,D0[2] + VMLAL.S16 Q9,D8,D0[2] VMLAL.S16 Q10,D9,D1[2] - VEXT.8 Q8,Q8,Q15,#2 - VMLAL.S16 Q11,D9,D0[2] + VEXT.8 Q8,Q8,Q15,#2 + VMLAL.S16 Q11,D9,D0[2] VMLAL.S16 Q12,D10,D1[2] VEXT.8 Q4,Q4,Q5,#2 VMLAL.S16 Q9,D10,D2[2] VMLAL.S16 Q10,D11,D3[2] VMLAL.S16 Q11,D11,D2[2] - VMLAL.S16 Q12,D12,D3[2] + VMLAL.S16 Q12,D12,D3[2] VEXT.8 Q5,Q5,Q6,#2 VMLAL.S16 Q9,D12,D4[2] VMLAL.S16 Q10,D13,D5[2] @@ -159,18 +159,18 @@ LOOP_6K7K: VMLAL.S16 Q10,D15,D7[2] VMLAL.S16 Q11,D15,D6[2] VMLAL.S16 Q12,D16,D7[2] - VEXT.8 Q7,Q7,Q8,#2 + VEXT.8 Q7,Q7,Q8,#2 - VMLAL.S16 Q9,D8,D0[3] + VMLAL.S16 Q9,D8,D0[3] VMLAL.S16 Q10,D9,D1[3] - VEXT.8 Q8,Q8,Q15,#2 - VMLAL.S16 Q11,D9,D0[3] + VEXT.8 Q8,Q8,Q15,#2 + VMLAL.S16 Q11,D9,D0[3] VMLAL.S16 Q12,D10,D1[3] VEXT.8 Q4,Q4,Q5,#2 VMLAL.S16 Q9,D10,D2[3] VMLAL.S16 Q10,D11,D3[3] VMLAL.S16 Q11,D11,D2[3] - VMLAL.S16 Q12,D12,D3[3] + VMLAL.S16 Q12,D12,D3[3] VEXT.8 Q5,Q5,Q6,#2 VMLAL.S16 Q9,D12,D4[3] VMLAL.S16 Q10,D13,D5[3] @@ -181,10 +181,10 @@ LOOP_6K7K: VMLAL.S16 Q10,D15,D7[3] VMLAL.S16 Q11,D15,D6[3] VMLAL.S16 Q12,D16,D7[3] - VEXT.8 Q7,Q7,Q8,#2 + VEXT.8 Q7,Q7,Q8,#2 VMOV.S16 D8,D9 - VEXT.8 Q8,Q8,Q15,#2 + VEXT.8 Q8,Q8,Q15,#2 VMOV.S16 D9,D10 VADD.S32 Q9,Q9,Q10 VMOV.S16 D10,D11 @@ -214,12 +214,12 @@ LOOP_6K7K: VST1.S16 {D4, D5, D6}, [r1]! VST1.S16 D7[0], [r1]! VST1.S16 D7[1], [r1]! - + Filt_6k_7k_end: - ADD r13, r13, #240 - LDMFD r13!, {r0 - r12, r15} - + ADD r13, r13, #240 + LDMFD r13!, {r0 - r12, r15} + Lable1: .word fir_6k_7k @ENDFUNC diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s index 60e9ade..4263cd4 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s @@ -33,7 +33,7 @@ .section .text - .global Norm_corr_asm + .global Norm_corr_asm .extern Convolve_asm .extern Isqrt_n @****************************** @@ -47,17 +47,17 @@ .equ T_MIN , 212 .equ T_MAX , 216 .equ CORR_NORM , 220 - + Norm_corr_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} SUB r13, r13, #voSTACK - + ADD r8, r13, #20 @get the excf[L_SUBFR] LDR r4, [r13, #T_MIN] @get t_min RSB r11, r4, #0 @k = -t_min - ADD r5, r0, r11, LSL #1 @get the &exc[k] - + ADD r5, r0, r11, LSL #1 @get the &exc[k] + @transfer Convolve function STMFD sp!, {r0 - r3} MOV r0, r5 @@ -68,7 +68,7 @@ Norm_corr_asm: @ r8 --- excf[] - MOV r14, r1 @copy xn[] address + MOV r14, r1 @copy xn[] address MOV r7, #1 VLD1.S16 {Q0, Q1}, [r14]! VLD1.S16 {Q2, Q3}, [r14]! @@ -95,34 +95,34 @@ Norm_corr_asm: VQADD.S32 D20, D20, D21 VMOV.S32 r9, D20[0] VMOV.S32 r10, D20[1] - QADD r6, r9, r10 + QADD r6, r9, r10 QADD r6, r6, r6 QADD r9, r6, r7 @L_tmp = (L_tmp << 1) + 1; CLZ r7, r9 SUB r6, r7, #1 @exp = norm_l(L_tmp) RSB r7, r6, #32 @exp = 32 - exp - MOV r6, r7, ASR #1 + MOV r6, r7, ASR #1 RSB r7, r6, #0 @scale = -(exp >> 1) - + @loop for every possible period @for(t = t_min@ t <= t_max@ t++) @r7 --- scale r4 --- t_min r8 --- excf[] -LOOPFOR: +LOOPFOR: ADD r14, r13, #20 @copy of excf[] MOV r12, r1 @copy of xn[] MOV r8, #0x8000 VLD1.S16 {Q0, Q1}, [r14]! @ load 16 excf[] - VLD1.S16 {Q2, Q3}, [r14]! @ load 16 excf[] + VLD1.S16 {Q2, Q3}, [r14]! @ load 16 excf[] VLD1.S16 {Q4, Q5}, [r12]! @ load 16 x[] VLD1.S16 {Q6, Q7}, [r12]! @ load 16 x[] VMULL.S16 Q10, D0, D0 @L_tmp1 += excf[] * excf[] - VMULL.S16 Q11, D0, D8 @L_tmp += x[] * excf[] + VMULL.S16 Q11, D0, D8 @L_tmp += x[] * excf[] VMLAL.S16 Q10, D1, D1 VMLAL.S16 Q11, D1, D9 VMLAL.S16 Q10, D2, D2 - VMLAL.S16 Q11, D2, D10 + VMLAL.S16 Q11, D2, D10 VMLAL.S16 Q10, D3, D3 VMLAL.S16 Q11, D3, D11 VMLAL.S16 Q10, D4, D4 @@ -143,7 +143,7 @@ LOOPFOR: VMLAL.S16 Q10, D1, D1 VMLAL.S16 Q11, D1, D9 VMLAL.S16 Q10, D2, D2 - VMLAL.S16 Q11, D2, D10 + VMLAL.S16 Q11, D2, D10 VMLAL.S16 Q10, D3, D3 VMLAL.S16 Q11, D3, D11 VMLAL.S16 Q10, D4, D4 @@ -162,19 +162,19 @@ LOOPFOR: VPADD.S32 D22, D22, D22 @D22[0] --- L_tmp << 1 VMOV.S32 r6, D20[0] - VMOV.S32 r5, D22[0] + VMOV.S32 r5, D22[0] @r5 --- L_tmp, r6 --- L_tmp1 MOV r10, #1 ADD r5, r10, r5, LSL #1 @L_tmp = (L_tmp << 1) + 1 ADD r6, r10, r6, LSL #1 @L_tmp1 = (L_tmp1 << 1) + 1 - - CLZ r10, r5 + + CLZ r10, r5 CMP r5, #0 RSBLT r11, r5, #0 CLZLT r10, r11 SUB r10, r10, #1 @exp = norm_l(L_tmp) - + MOV r5, r5, LSL r10 @L_tmp = (L_tmp << exp) RSB r10, r10, #30 @exp_corr = 30 - exp MOV r11, r5, ASR #16 @corr = extract_h(L_tmp) @@ -190,7 +190,7 @@ LOOPFOR: @Isqrt_n(&L_tmp, &exp_norm) MOV r14, r0 - MOV r12, r1 + MOV r12, r1 STMFD sp!, {r0 - r4, r7 - r12, r14} ADD r1, sp, #4 @@ -208,7 +208,7 @@ LOOPFOR: MOV r6, r6, ASR #16 @norm = extract_h(L_tmp) MUL r12, r6, r11 ADD r12, r12, r12 @L_tmp = vo_L_mult(corr, norm) - + ADD r6, r10, r5 ADD r6, r6, r7 @exp_corr + exp_norm + scale @@ -227,8 +227,8 @@ LOOPFOR: CMP r4, r6 BEQ Norm_corr_asm_end - - ADD r4, r4, #1 @ t_min ++ + + ADD r4, r4, #1 @ t_min ++ RSB r5, r4, #0 @ k MOV r6, #63 @ i = 63 @@ -255,16 +255,16 @@ LOOPK: MUL r14, r11, r8 LDR r6, [r13, #T_MAX] @ get t_max MOV r8, r14, ASR #15 - STRH r8, [r10] + STRH r8, [r10] CMP r4, r6 BLE LOOPFOR -Norm_corr_asm_end: - - ADD r13, r13, #voSTACK +Norm_corr_asm_end: + + ADD r13, r13, #voSTACK LDMFD r13!, {r4 - r12, r15} - + .END diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s index 1e65efa..e786dde 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s @@ -33,12 +33,12 @@ @ sig_lo[] --- r5 @ lg --- r6 - .section .text + .section .text .global Syn_filt_32_asm Syn_filt_32_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} LDR r4, [r13, #40] @ get sig_hi[] address LDR r5, [r13, #44] @ get sig_lo[] address @@ -49,8 +49,8 @@ Syn_filt_32_asm: SUB r10, r4, #32 @ sig_hi[-16] address SUB r11, r5, #32 @ sig_lo[-16] address - VLD1.S16 {D0, D1, D2, D3}, [r0]! @a[1] ~ a[16] - + VLD1.S16 {D0, D1, D2, D3}, [r0]! @a[1] ~ a[16] + MOV r8, #0 @ i = 0 VLD1.S16 {D4, D5, D6, D7}, [r10]! @ sig_hi[-16] ~ sig_hi[-1] @@ -58,9 +58,9 @@ Syn_filt_32_asm: VREV64.16 D1, D1 VLD1.S16 {D8, D9, D10, D11}, [r11]! @ sig_lo[-16] ~ sig_lo[-1] VREV64.16 D2, D2 - VREV64.16 D3, D3 + VREV64.16 D3, D3 VDUP.S32 Q15, r8 - + SYN_LOOP: LDRSH r6, [r2], #2 @exc[i] @@ -73,12 +73,12 @@ SYN_LOOP: VEXT.8 D9, D9, D10, #2 VEXT.8 D10, D10, D11, #2 - + VPADD.S32 D28, D20, D21 MUL r12, r6, r3 @exc[i] * a0 VPADD.S32 D29, D28, D28 VDUP.S32 Q10, D29[0] @result1 - + VMULL.S16 Q11, D4, D3 VMLAL.S16 Q11, D5, D2 VSUB.S32 Q10, Q15, Q10 @@ -101,7 +101,7 @@ SYN_LOOP: VSHR.S32 Q10, Q10, #11 @result1 >>= 11 VSHL.S32 Q11, Q11, #1 @result2 <<= 1 - VDUP.S32 Q12, r14 + VDUP.S32 Q12, r14 VADD.S32 Q12, Q12, Q10 @L_tmp = L_tmp - (result1 >>= 11) - (result2 <<= 1) VSUB.S32 Q12, Q12, Q11 @@ -122,12 +122,12 @@ SYN_LOOP: STRH r12, [r5], #2 @stroe sig_lo[i] CMP r8, #64 - BLT SYN_LOOP - + BLT SYN_LOOP + Syn_filt_32_end: - - LDMFD r13!, {r4 - r12, r15} + + LDMFD r13!, {r4 - r12, r15} @ENDFUNC .END - + diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s index 189e33b..8efa9fb 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s @@ -20,22 +20,22 @@ @* Word16 y[], /* (o) : output vector */ @* Word16 L /* (i) : vector size */ @*) -@ +@ @ r0 --- x[] @ r1 --- h[] @ r2 --- y[] @ r3 --- L - .section .text - .global Convolve_asm + .section .text + .global Convolve_asm Convolve_asm: - STMFD r13!, {r4 - r12, r14} - MOV r3, #0 + STMFD r13!, {r4 - r12, r14} + MOV r3, #0 MOV r11, #0x8000 - -LOOP: + +LOOP: @MOV r8, #0 @ s = 0 ADD r4, r1, r3, LSL #1 @ tmpH address ADD r5, r3, #1 @ i = n + 1 @@ -43,21 +43,21 @@ LOOP: LDRSH r9, [r6], #2 @ *tmpX++ LDRSH r10, [r4] @ *tmpH-- SUB r5, r5, #1 - VMOV.S32 Q10, #0 - MUL r8, r9, r10 + VMOV.S32 Q10, #0 + MUL r8, r9, r10 -LOOP1: +LOOP1: CMP r5, #0 BLE L1 SUB r4, r4, #8 MOV r9, r4 - VLD1.S16 D0, [r6]! + VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! VREV64.16 D1, D1 - SUBS r5, r5, #4 - VMLAL.S16 Q10, D0, D1 - B LOOP1 -L1: + SUBS r5, r5, #4 + VMLAL.S16 Q10, D0, D1 + B LOOP1 +L1: VADD.S32 D20, D20, D21 VPADD.S32 D20, D20, D20 VMOV.S32 r5, D20[0] @@ -73,25 +73,25 @@ L1: ADD r5, r3, #1 MOV r6, r0 LDRSH r9, [r6], #2 @ *tmpX++ - LDRSH r10, [r4], #-2 + LDRSH r10, [r4], #-2 LDRSH r12, [r6], #2 LDRSH r14, [r4] MUL r8, r9, r10 SUB r5, r5, #2 MLA r8, r12, r14, r8 - + VMOV.S32 Q10, #0 LOOP2: CMP r5, #0 BLE L2 SUB r4, r4, #8 MOV r9, r4 - VLD1.S16 D0, [r6]! + VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! SUBS r5, r5, #4 VREV64.16 D1, D1 - VMLAL.S16 Q10, D0, D1 + VMLAL.S16 Q10, D0, D1 B LOOP2 L2: VADD.S32 D20, D20, D21 @@ -100,7 +100,7 @@ L2: ADD r8, r8, r5 ADD r8, r11, r8, LSL #1 MOV r8, r8, LSR #16 @extract_h(s) - ADD r3, r3, #1 + ADD r3, r3, #1 STRH r8, [r2], #2 @y[n] @@ -115,7 +115,7 @@ L2: MUL r8, r9, r10 LDRSH r9, [r6], #2 LDRSH r10, [r4] - MLA r8, r12, r14, r8 + MLA r8, r12, r14, r8 SUB r5, r5, #3 MLA r8, r9, r10, r8 @@ -125,12 +125,12 @@ LOOP3: BLE L3 SUB r4, r4, #8 MOV r9, r4 - VLD1.S16 D0, [r6]! + VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! VREV64.16 D1, D1 SUBS r5, r5, #4 - VMLAL.S16 Q10, D0, D1 - B LOOP3 + VMLAL.S16 Q10, D0, D1 + B LOOP3 L3: VADD.S32 D20, D20, D21 @@ -146,18 +146,18 @@ L3: ADD r4, r1, r5, LSL #1 @ tmpH address MOV r6, r0 VMOV.S32 Q10, #0 -LOOP4: +LOOP4: CMP r5, #0 BLE L4 SUB r4, r4, #8 MOV r9, r4 - VLD1.S16 D0, [r6]! + VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! VREV64.16 D1, D1 - SUBS r5, r5, #4 - VMLAL.S16 Q10, D0, D1 - B LOOP4 -L4: + SUBS r5, r5, #4 + VMLAL.S16 Q10, D0, D1 + B LOOP4 +L4: VADD.S32 D20, D20, D21 VPADD.S32 D20, D20, D20 VMOV.S32 r5, D20[0] @@ -165,14 +165,14 @@ L4: MOV r5, r5, LSR #16 @extract_h(s) ADD r3, r3, #1 STRH r5, [r2], #2 @y[n] - + CMP r3, #64 BLT LOOP - -Convolve_asm_end: - + +Convolve_asm_end: + LDMFD r13!, {r4 - r12, r15} - + @ENDFUNC .END diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s index c314a88..8904289 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s @@ -31,7 +31,7 @@ @r5 ---- cor_1[] @r6 ---- cor_2[] - .section .text + .section .text .global cor_h_vec_012_asm cor_h_vec_012_asm: @@ -52,12 +52,12 @@ LOOPi: RSB r11, r2, #62 @j=62-pos LOOPj1: - LDRSH r12, [r10], #2 + LDRSH r12, [r10], #2 LDRSH r8, [r9], #2 LDRSH r14, [r9] SUBS r11, r11, #1 MLA r5, r12, r8, r5 - MLA r6, r12, r14, r6 + MLA r6, r12, r14, r6 BGE LOOPj1 LDRSH r12, [r10], #2 @*p1++ @@ -65,7 +65,7 @@ LOOPj1: MLA r5, r12, r14, r5 MOV r14, #0x8000 MOV r5, r5, LSL #2 @L_sum1 = (L_sum1 << 2) - ADD r10, r6, r14 + ADD r10, r6, r14 ADD r9, r5, r14 MOV r5, r9, ASR #16 MOV r6, r10, ASR #16 @@ -77,7 +77,7 @@ LOOPj1: MUL r14, r6, r11 MOV r5, r12, ASR #15 MOV r6, r14, ASR #15 - LDR r9, [r13, #44] + LDR r9, [r13, #44] LDR r12, [r13, #48] LDRSH r10, [r7], #2 @*p0++ LDRSH r11, [r8] @*p3++ @@ -89,7 +89,7 @@ LOOPj1: STRH r6, [r12] ADD r2, r2, #4 - + MOV r5, #0 @L_sum1 = 0 MOV r6, #0 @L_sum2 = 0 ADD r9, r1, r2, LSL #1 @p2 = &vec[pos] @@ -98,12 +98,12 @@ LOOPj1: ADD r4, r4, #1 @i++ LOOPj2: - LDRSH r12, [r10], #2 + LDRSH r12, [r10], #2 LDRSH r8, [r9], #2 LDRSH r14, [r9] SUBS r11, r11, #1 MLA r5, r12, r8, r5 - MLA r6, r12, r14, r6 + MLA r6, r12, r14, r6 BGE LOOPj2 LDRSH r12, [r10], #2 @*p1++ @@ -111,7 +111,7 @@ LOOPj2: MLA r5, r12, r14, r5 MOV r14, #0x8000 MOV r5, r5, LSL #2 @L_sum1 = (L_sum1 << 2) - ADD r10, r6, r14 + ADD r10, r6, r14 ADD r9, r5, r14 MOV r5, r9, ASR #16 @@ -124,7 +124,7 @@ LOOPj2: MUL r14, r6, r11 MOV r5, r12, ASR #15 MOV r6, r14, ASR #15 - LDR r9, [r13, #44] + LDR r9, [r13, #44] LDR r12, [r13, #48] LDRSH r10, [r7], #2 @*p0++ LDRSH r11, [r8] @*p3++ @@ -137,15 +137,15 @@ LOOPj2: ADD r4, r4, #1 @i+1 ADD r2, r2, #4 @pos += STEP CMP r4, #16 - + BLT LOOPi - + the_end: LDMFD r13!, {r4 - r12, r15} - - .END - - - + + .END + + + diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s index dffb750..6b782cb 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s @@ -25,14 +25,14 @@ @ r1 --- T0 @ r2 --- frac @ r3 --- L_subfr - - .section .text + + .section .text .global pred_lt4_asm .extern inter4_2 pred_lt4_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} SUB r4, r0, r1, LSL #1 @ x = exc - T0 RSB r2, r2, #0 @ frac = - frac SUB r4, r4, #30 @ x -= L_INTERPOL2 - 1 @@ -47,8 +47,8 @@ pred_lt4_asm: VLD1.S16 {Q0, Q1}, [r11]! VLD1.S16 {Q2, Q3}, [r11]! - - MOV r6, #0x8000 + + MOV r6, #0x8000 VLD1.S16 {Q4, Q5}, [r4]! @load 16 x[] VLD1.S16 {Q6, Q7}, [r4]! @load 16 x[] @@ -58,14 +58,14 @@ LOOP: VQDMLAL.S16 Q15, D9, D1 VQDMLAL.S16 Q15, D10, D2 VQDMLAL.S16 Q15, D11, D3 - + VQDMLAL.S16 Q15, D12, D4 VQDMLAL.S16 Q15, D13, D5 VQDMLAL.S16 Q15, D14, D6 VQDMLAL.S16 Q15, D15, D7 - LDRSH r12, [r4], #2 - + LDRSH r12, [r4], #2 + VEXT.S16 D8, D8, D9, #1 VEXT.S16 D9, D9, D10, #1 VEXT.S16 D10, D10, D11, #1 @@ -73,26 +73,26 @@ LOOP: VDUP.S16 D24, r12 VEXT.S16 D12, D12, D13, #1 VEXT.S16 D13, D13, D14, #1 - + VQADD.S32 D30, D30, D31 - MOV r11, #0x8000 + MOV r11, #0x8000 VPADD.S32 D30, D30, D30 ADD r8, r8, #1 VMOV.S32 r12, D30[0] - VEXT.S16 D14, D14, D15, #1 + VEXT.S16 D14, D14, D15, #1 QADD r1, r12, r12 @ L_sum = (L_sum << 2) VEXT.S16 D15, D15, D24, #1 - QADD r5, r1, r6 + QADD r5, r1, r6 MOV r1, r5, ASR #16 CMP r8, r3 STRH r1, [r0], #2 @ exc[j] = (L_sum + 0x8000) >> 16 BLT LOOP - + pred_lt4_end: - - LDMFD r13!, {r4 - r12, r15} - + + LDMFD r13!, {r4 - r12, r15} + Lable1: .word inter4_2 @ENDFUNC diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s index b9e6b23..394fa83 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s @@ -26,17 +26,17 @@ @lg RN r3 .section .text - .global Residu_opt + .global Residu_opt Residu_opt: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} SUB r7, r3, #4 @i = lg - 4 - - VLD1.S16 {D0, D1, D2, D3}, [r0]! @get all a[] + + VLD1.S16 {D0, D1, D2, D3}, [r0]! @get all a[] VLD1.S16 {D4}, [r0]! VMOV.S32 Q8, #0x8000 - + LOOP1: ADD r9, r1, r7, LSL #1 @copy the address ADD r10, r2, r7, LSL #1 @@ -45,7 +45,7 @@ LOOP1: VQDMULL.S16 Q10, D5, D0[0] @finish the first L_mult SUB r8, r9, #2 @get the x[i-1] address - VLD1.S16 D5, [r8]! + VLD1.S16 D5, [r8]! VQDMLAL.S16 Q10, D5, D0[1] SUB r8, r9, #4 @load the x[i-2] address @@ -53,36 +53,36 @@ LOOP1: VQDMLAL.S16 Q10, D5, D0[2] SUB r8, r9, #6 @load the x[i-3] address - VLD1.S16 D5, [r8]! - VQDMLAL.S16 Q10, D5, D0[3] + VLD1.S16 D5, [r8]! + VQDMLAL.S16 Q10, D5, D0[3] SUB r8, r9, #8 @load the x[i-4] address - VLD1.S16 D5, [r8]! - VQDMLAL.S16 Q10, D5, D1[0] + VLD1.S16 D5, [r8]! + VQDMLAL.S16 Q10, D5, D1[0] SUB r8, r9, #10 @load the x[i-5] address - VLD1.S16 D5, [r8]! - VQDMLAL.S16 Q10, D5, D1[1] + VLD1.S16 D5, [r8]! + VQDMLAL.S16 Q10, D5, D1[1] SUB r8, r9, #12 @load the x[i-6] address - VLD1.S16 D5, [r8]! - VQDMLAL.S16 Q10, D5, D1[2] + VLD1.S16 D5, [r8]! + VQDMLAL.S16 Q10, D5, D1[2] SUB r8, r9, #14 @load the x[i-7] address - VLD1.S16 D5, [r8]! - VQDMLAL.S16 Q10, D5, D1[3] + VLD1.S16 D5, [r8]! + VQDMLAL.S16 Q10, D5, D1[3] SUB r8, r9, #16 @load the x[i-8] address - VLD1.S16 D5, [r8]! - VQDMLAL.S16 Q10, D5, D2[0] + VLD1.S16 D5, [r8]! + VQDMLAL.S16 Q10, D5, D2[0] SUB r8, r9, #18 @load the x[i-9] address - VLD1.S16 D5, [r8]! - VQDMLAL.S16 Q10, D5, D2[1] - + VLD1.S16 D5, [r8]! + VQDMLAL.S16 Q10, D5, D2[1] + SUB r8, r9, #20 @load the x[i-10] address - VLD1.S16 D5, [r8]! - VQDMLAL.S16 Q10, D5, D2[2] + VLD1.S16 D5, [r8]! + VQDMLAL.S16 Q10, D5, D2[2] SUB r8, r9, #22 @load the x[i-11] address VLD1.S16 D5, [r8]! @@ -117,10 +117,10 @@ LOOP1: BGE LOOP1 -Residu_asm_end: - +Residu_asm_end: + LDMFD r13!, {r4 - r12, r15} - + @ENDFUNC .END diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s index bbd354d..e45daac 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s @@ -13,7 +13,7 @@ @ ** See the License for the specific language governing permissions and @ ** limitations under the License. @ */ -@ +@ @**********************************************************************/ @void Scale_sig( @ Word16 x[], /* (i/o) : signal to scale */ @@ -25,16 +25,16 @@ @ lg --- r1 @ exp --- r2 - .section .text + .section .text .global Scale_sig_opt Scale_sig_opt: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} MOV r4, #4 - VMOV.S32 Q15, #0x8000 - VDUP.S32 Q14, r2 - MOV r5, r0 @ copy x[] address + VMOV.S32 Q15, #0x8000 + VDUP.S32 Q14, r2 + MOV r5, r0 @ copy x[] address CMP r1, #64 MOVEQ r4, #1 BEQ LOOP @@ -48,7 +48,7 @@ Scale_sig_opt: BEQ LOOP1 LOOP1: - VLD1.S16 {Q0, Q1}, [r5]! @load 16 Word16 x[] + VLD1.S16 {Q0, Q1}, [r5]! @load 16 Word16 x[] VSHLL.S16 Q10, D0, #16 VSHLL.S16 Q11, D1, #16 VSHLL.S16 Q12, D2, #16 @@ -63,7 +63,7 @@ LOOP1: VADDHN.S32 D19, Q13, Q15 VST1.S16 {Q8, Q9}, [r0]! @store 16 Word16 x[] -LOOP: +LOOP: VLD1.S16 {Q0, Q1}, [r5]! @load 16 Word16 x[] VLD1.S16 {Q2, Q3}, [r5]! @load 16 Word16 x[] VLD1.S16 {Q4, Q5}, [r5]! @load 16 Word16 x[] @@ -72,7 +72,7 @@ LOOP: VSHLL.S16 Q8, D0, #16 VSHLL.S16 Q9, D1, #16 VSHLL.S16 Q10, D2, #16 - VSHLL.S16 Q11, D3, #16 + VSHLL.S16 Q11, D3, #16 VSHL.S32 Q8, Q8, Q14 VSHL.S32 Q9, Q9, Q14 VSHL.S32 Q10, Q10, Q14 @@ -83,7 +83,7 @@ LOOP: VADDHN.S32 D19, Q11, Q15 VST1.S16 {Q8, Q9}, [r0]! @store 16 Word16 x[] - + VSHLL.S16 Q12, D4, #16 VSHLL.S16 Q13, D5, #16 VSHLL.S16 Q10, D6, #16 @@ -112,7 +112,7 @@ LOOP: VADDHN.S32 D19, Q13, Q15 VST1.S16 {Q8, Q9}, [r0]! @store 16 Word16 x[] - VSHLL.S16 Q10, D12, #16 + VSHLL.S16 Q10, D12, #16 VSHLL.S16 Q11, D13, #16 VSHLL.S16 Q12, D14, #16 VSHLL.S16 Q13, D15, #16 @@ -123,16 +123,16 @@ LOOP: VADDHN.S32 D16, Q10, Q15 VADDHN.S32 D17, Q11, Q15 VADDHN.S32 D18, Q12, Q15 - VADDHN.S32 D19, Q13, Q15 - VST1.S16 {Q8, Q9}, [r0]! @store 16 Word16 x[] + VADDHN.S32 D19, Q13, Q15 + VST1.S16 {Q8, Q9}, [r0]! @store 16 Word16 x[] SUBS r4, r4, #1 - BGT LOOP - - + BGT LOOP + + Scale_sig_asm_end: - LDMFD r13!, {r4 - r12, r15} + LDMFD r13!, {r4 - r12, r15} @ENDFUNC .END - + diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s index db4559c..5731bdb 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s @@ -27,21 +27,21 @@ @ mem[] --- r3 @ m --- 16 lg --- 80 update --- 1 - .section .text + .section .text .global Syn_filt_asm Syn_filt_asm: - STMFD r13!, {r4 - r12, r14} + STMFD r13!, {r4 - r12, r14} SUB r13, r13, #700 @ y_buf[L_FRAME16k + M16k] - + MOV r4, r3 @ copy mem[] address MOV r5, r13 @ copy yy = y_buf address @ for(i = 0@ i < m@ i++) @{ @ *yy++ = mem[i]@ - @} + @} VLD1.S16 {D0, D1, D2, D3}, [r4]! @load 16 mems VST1.S16 {D0, D1, D2, D3}, [r5]! @store 16 mem[] to *yy @@ -54,7 +54,7 @@ Syn_filt_asm: VREV64.16 D0, D0 VREV64.16 D1, D1 VREV64.16 D2, D2 - VREV64.16 D3, D3 + VREV64.16 D3, D3 MOV r8, #0 @ loop times MOV r10, r13 @ temp = y_buf ADD r4, r13, #32 @ yy[i] address @@ -68,7 +68,7 @@ SYN_LOOP: ADD r10, r4, r8, LSL #1 @ y[i], yy[i] address VDUP.S32 Q10, r12 - VMULL.S16 Q5, D3, D4 + VMULL.S16 Q5, D3, D4 VMLAL.S16 Q5, D2, D5 VMLAL.S16 Q5, D1, D6 VMLAL.S16 Q5, D0, D7 @@ -82,25 +82,25 @@ SYN_LOOP: VDUP.S32 Q7, D10[0] VSUB.S32 Q9, Q10, Q7 - VQRSHRN.S32 D20, Q9, #12 + VQRSHRN.S32 D20, Q9, #12 VMOV.S16 r9, D20[0] VEXT.8 D7, D7, D20, #2 CMP r8, #80 STRH r9, [r10] @ yy[i] - STRH r9, [r2], #2 @ y[i] - + STRH r9, [r2], #2 @ y[i] + BLT SYN_LOOP - + @ update mem[] ADD r5, r13, #160 @ yy[64] address VLD1.S16 {D0, D1, D2, D3}, [r5]! - VST1.S16 {D0, D1, D2, D3}, [r3]! + VST1.S16 {D0, D1, D2, D3}, [r3]! Syn_filt_asm_end: - - ADD r13, r13, #700 - LDMFD r13!, {r4 - r12, r15} + + ADD r13, r13, #700 + LDMFD r13!, {r4 - r12, r15} @ENDFUNC .END - + |