summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV7')
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s102
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s127
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s228
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s270
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s133
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s178
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s151
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s100
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s127
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s138
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s106
11 files changed, 1660 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s
new file mode 100644
index 0000000..acb60c3
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s
@@ -0,0 +1,102 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@void Deemph_32(
+@ Word16 x_hi[], /* (i) : input signal (bit31..16) */
+@ Word16 x_lo[], /* (i) : input signal (bit15..4) */
+@ Word16 y[], /* (o) : output signal (x16) */
+@ Word16 mu, /* (i) Q15 : deemphasis factor */
+@ Word16 L, /* (i) : vector size */
+@ Word16 * mem /* (i/o) : memory (y[-1]) */
+@ )
+
+@x_hi RN R0
+@x_lo RN R1
+@y[] RN R2
+@*mem RN R3
+
+ .section .text
+ .global Deemph_32_asm
+
+Deemph_32_asm:
+
+ STMFD r13!, {r4 - r12, r14}
+ MOV r4, #2 @i=0
+ LDRSH r6, [r0], #2 @load x_hi[0]
+ LDRSH r7, [r1], #2 @load x_lo[0]
+ LDR r5, =22282 @r5---mu
+ MOV r11, #0x8000
+
+ @y[0]
+ MOV r10, r6, LSL #16 @L_tmp = x_hi[0]<<16
+ MOV r8, r5, ASR #1 @fac = mu >> 1
+ LDR r5, [r3]
+ ADD r12, r10, r7, LSL #4 @L_tmp += x_lo[0] << 4
+ MOV r10, r12, LSL #3 @L_tmp <<= 3
+ MUL r9, r5, r8
+ LDRSH r6, [r0], #2 @load x_hi[1]
+ QDADD r10, r10, r9
+ LDRSH r7, [r1], #2 @load x_lo[1]
+ MOV r12, r10, LSL #1 @L_tmp = L_mac(L_tmp, *mem, fac)
+ QADD r10, r12, r11
+ MOV r14, r10, ASR #16 @y[0] = round(L_tmp)
+
+
+ MOV r10, r6, LSL #16
+ ADD r12, r10, r7, LSL #4
+ STRH r14, [r2], #2 @update y[0]
+ MOV r10, r12, LSL #3
+ MUL r9, r14, r8
+ QDADD r10, r10, r9
+ MOV r12, r10, LSL #1
+ QADD r10, r12, r11
+ MOV r14, r10, ASR #16 @y[1] = round(L_tmp)
+
+LOOP:
+ LDRSH r6, [r0], #2 @load x_hi[]
+ LDRSH r7, [r1], #2
+ STRH r14, [r2], #2
+ MOV r10, r6, LSL #16
+ ADD r12, r10, r7, LSL #4
+ MUL r9, r14, r8
+ MOV r10, r12, LSL #3
+ QDADD r10, r10, r9
+ LDRSH r6, [r0], #2 @load x_hi[]
+ MOV r12, r10, LSL #1
+ QADD r10, r12, r11
+ LDRSH r7, [r1], #2
+ MOV r14, r10, ASR #16
+
+ MOV r10, r6, LSL #16
+ ADD r12, r10, r7, LSL #4
+ STRH r14, [r2], #2
+ MUL r9, r14, r8
+ MOV r10, r12, LSL #3
+ QDADD r10, r10, r9
+ ADD r4, r4, #2
+ MOV r12, r10, LSL #1
+ QADD r10, r12, r11
+ CMP r4, #64
+ MOV r14, r10, ASR #16
+
+ BLT LOOP
+ STR r14, [r3]
+ STRH r14, [r2]
+
+ LDMFD r13!, {r4 - r12, r15}
+
+ .END
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s
new file mode 100644
index 0000000..07ca344
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s
@@ -0,0 +1,127 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@**********************************************************************/
+@Word32 Dot_product12( /* (o) Q31: normalized result (1 < val <= -1) */
+@ Word16 x[], /* (i) 12bits: x vector */
+@ Word16 y[], /* (i) 12bits: y vector */
+@ Word16 lg, /* (i) : vector length */
+@ Word16 * exp /* (o) : exponent of result (0..+30) */
+@)
+@************************************************************************
+@ x[] --- r0
+@ y[] --- r1
+@ lg --- r2
+@ *exp --- r3
+
+ .section .text
+ .global Dot_product12_asm
+
+Dot_product12_asm:
+
+ STMFD r13!, {r4 - r12, r14}
+ CMP r0, r1
+ BEQ LOOP_EQ
+
+ VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[]
+ VLD1.S16 {Q2, Q3}, [r0]! @load 16 Word16 x[]
+ VLD1.S16 {Q4, Q5}, [r0]! @load 16 Word16 x[]
+ VLD1.S16 {Q6, Q7}, [r0]! @load 16 Word16 x[]
+ VLD1.S16 {Q8, Q9}, [r1]! @load 16 Word16 y[]
+ VLD1.S16 {Q10, Q11}, [r1]! @load 16 Word16 y[]
+ VLD1.S16 {Q12, Q13}, [r1]! @load 16 Word16 y[]
+
+ VMULL.S16 Q15, D16, D0
+ VMLAL.S16 Q15, D17, D1
+ VMLAL.S16 Q15, D18, D2
+ VMLAL.S16 Q15, D19, D3
+ VLD1.S16 {Q0, Q1}, [r1]! @load 16 Word16 y[]
+ VMLAL.S16 Q15, D20, D4
+ VMLAL.S16 Q15, D21, D5
+ VMLAL.S16 Q15, D22, D6
+ VMLAL.S16 Q15, D23, D7
+ VMLAL.S16 Q15, D24, D8
+ VMLAL.S16 Q15, D25, D9
+ VMLAL.S16 Q15, D26, D10
+ VMLAL.S16 Q15, D27, D11
+ VMLAL.S16 Q15, D0, D12
+ VMLAL.S16 Q15, D1, D13
+ VMLAL.S16 Q15, D2, D14
+ VMLAL.S16 Q15, D3, D15
+
+ CMP r2, #64
+ BEQ Lable1
+ VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[]
+ VLD1.S16 {Q2, Q3}, [r1]!
+ VMLAL.S16 Q15, D4, D0
+ VMLAL.S16 Q15, D5, D1
+ VMLAL.S16 Q15, D6, D2
+ VMLAL.S16 Q15, D7, D3
+ BL Lable1
+
+LOOP_EQ:
+ VLD1.S16 {Q0, Q1}, [r0]!
+ VLD1.S16 {Q2, Q3}, [r0]!
+ VLD1.S16 {Q4, Q5}, [r0]!
+ VLD1.S16 {Q6, Q7}, [r0]!
+ VMULL.S16 Q15, D0, D0
+ VMLAL.S16 Q15, D1, D1
+ VMLAL.S16 Q15, D2, D2
+ VMLAL.S16 Q15, D3, D3
+ VMLAL.S16 Q15, D4, D4
+ VMLAL.S16 Q15, D5, D5
+ VMLAL.S16 Q15, D6, D6
+ VMLAL.S16 Q15, D7, D7
+ VMLAL.S16 Q15, D8, D8
+ VMLAL.S16 Q15, D9, D9
+ VMLAL.S16 Q15, D10, D10
+ VMLAL.S16 Q15, D11, D11
+ VMLAL.S16 Q15, D12, D12
+ VMLAL.S16 Q15, D13, D13
+ VMLAL.S16 Q15, D14, D14
+ VMLAL.S16 Q15, D15, D15
+
+ CMP r2, #64
+ BEQ Lable1
+ VLD1.S16 {Q0, Q1}, [r0]!
+ VMLAL.S16 Q15, D0, D0
+ VMLAL.S16 Q15, D1, D1
+ VMLAL.S16 Q15, D2, D2
+ VMLAL.S16 Q15, D3, D3
+
+Lable1:
+
+ VQADD.S32 D30, D30, D31
+ VPADD.S32 D30, D30, D30
+ VMOV.S32 r12, D30[0]
+
+ ADD r12, r12, r12
+ ADD r12, r12, #1 @ L_sum = (L_sum << 1) + 1
+ MOV r4, r12
+ CMP r12, #0
+ RSBLT r4, r12, #0
+ CLZ r10, r4
+ SUB r10, r10, #1 @ sft = norm_l(L_sum)
+ MOV r0, r12, LSL r10 @ L_sum = L_sum << sft
+ RSB r11, r10, #30 @ *exp = 30 - sft
+ STRH r11, [r3]
+
+Dot_product12_end:
+
+ LDMFD r13!, {r4 - r12, r15}
+
+ .END
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s
new file mode 100644
index 0000000..1880024
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s
@@ -0,0 +1,228 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@**********************************************************************/
+@void Filt_6k_7k(
+@ Word16 signal[], /* input: signal */
+@ Word16 lg, /* input: length of input */
+@ Word16 mem[] /* in/out: memory (size=30) */
+@)
+@***********************************************************************
+@ r0 --- signal[]
+@ r1 --- lg
+@ r2 --- mem[]
+
+ .section .text
+ .global Filt_6k_7k_asm
+ .extern fir_6k_7k
+
+Filt_6k_7k_asm:
+
+ STMFD r13!, {r0 - r12, r14}
+ SUB r13, r13, #240 @ x[L_SUBFR16k + (L_FIR - 1)]
+ MOV r8, r0 @ copy signal[] address
+ MOV r5, r2 @ copy mem[] address
+
+ MOV r0, r2
+ MOV r1, r13
+
+ VLD1.S16 {D0, D1, D2, D3}, [r0]!
+ VLD1.S16 {D4, D5, D6, D7}, [r0]!
+
+ VST1.S16 {D0, D1, D2, D3}, [r1]!
+ VST1.S16 {D4, D5, D6}, [r1]!
+ VST1.S16 D7[0], [r1]!
+ VST1.S16 D7[1], [r1]!
+
+
+
+ LDR r10, Lable1 @ get fir_7k address
+ MOV r3, r8 @ change myMemCopy to Copy, due to Copy will change r3 content
+ ADD r6, r13, #60 @ get x[L_FIR - 1] address
+ MOV r7, r3 @ get signal[i]
+ @for (i = lg - 1@ i >= 0@ i--)
+ @{
+ @ x[i + L_FIR - 1] = signal[i] >> 2@
+ @}
+ VLD1.S16 {Q0, Q1}, [r7]! @ signal[0] ~ signal[15]
+ VLD1.S16 {Q2, Q3}, [r7]! @ signal[16] ~ signal[31]
+ VLD1.S16 {Q4, Q5}, [r7]! @ signal[32] ~ signal[47]
+ VLD1.S16 {Q6, Q7}, [r7]! @ signal[48] ~ signal[63]
+ VLD1.S16 {Q8, Q9}, [r7]! @ signal[64] ~ signal[79]
+ VSHR.S16 Q10, Q0, #2
+ VSHR.S16 Q11, Q1, #2
+ VSHR.S16 Q12, Q2, #2
+ VSHR.S16 Q13, Q3, #2
+ VST1.S16 {Q10, Q11}, [r6]!
+ VSHR.S16 Q0, Q4, #2
+ VSHR.S16 Q1, Q5, #2
+ VSHR.S16 Q10, Q6, #2
+ VSHR.S16 Q11, Q7, #2
+ VSHR.S16 Q2, Q8, #2
+ VSHR.S16 Q3, Q9, #2
+ VST1.S16 {Q12, Q13}, [r6]!
+ VST1.S16 {Q0, Q1}, [r6]!
+ VST1.S16 {Q10, Q11}, [r6]!
+ VST1.S16 {Q2, Q3}, [r6]!
+
+ MOV r12, r5
+ @STR r5, [sp, #-4] @ PUSH r5 to stack
+ @ not use registers: r4, r10, r12, r14, r5
+ MOV r4, r13
+ MOV r5, #0 @ i = 0
+
+ @ r4 --- x[i], r10 ---- fir_6k_7k
+ VLD1.S16 {Q0, Q1}, [r10]! @fir_6k_7k[0] ~ fir_6k_7k[15]
+ VLD1.S16 {Q2, Q3}, [r10]! @fir_6k_7k[16] ~ fir_6k_7k[31]
+ VMOV.S16 D7[3], r5 @set fir_6k_7K = 0
+
+ VLD1.S16 {Q4, Q5}, [r4]! @x[0] ~ x[15]
+ VLD1.S16 {Q6, Q7}, [r4]! @x[16] ~ X[31]
+ VLD1.S16 {Q8}, [r4]!
+ VMOV.S16 Q15, #0
+
+LOOP_6K7K:
+
+ VMULL.S16 Q9,D8,D0[0]
+ VMULL.S16 Q10,D9,D1[0]
+ VMULL.S16 Q11,D9,D0[0]
+ VMULL.S16 Q12,D10,D1[0]
+ VEXT.8 Q4,Q4,Q5,#2
+ VMLAL.S16 Q9,D10,D2[0]
+ VMLAL.S16 Q10,D11,D3[0]
+ VMLAL.S16 Q11,D11,D2[0]
+ VMLAL.S16 Q12,D12,D3[0]
+ VEXT.8 Q5,Q5,Q6,#2
+ VMLAL.S16 Q9,D12,D4[0]
+ VMLAL.S16 Q10,D13,D5[0]
+ VMLAL.S16 Q11,D13,D4[0]
+ VMLAL.S16 Q12,D14,D5[0]
+ VEXT.8 Q6,Q6,Q7,#2
+ VMLAL.S16 Q9,D14,D6[0]
+ VMLAL.S16 Q10,D15,D7[0]
+ VMLAL.S16 Q11,D15,D6[0]
+ VMLAL.S16 Q12,D16,D7[0]
+ VEXT.8 Q7,Q7,Q8,#2
+
+ VMLAL.S16 Q9,D8,D0[1]
+ VMLAL.S16 Q10,D9,D1[1]
+ VEXT.8 Q8,Q8,Q15,#2
+ VMLAL.S16 Q11,D9,D0[1]
+ VMLAL.S16 Q12,D10,D1[1]
+ VEXT.8 Q4,Q4,Q5,#2
+ VMLAL.S16 Q9,D10,D2[1]
+ VMLAL.S16 Q10,D11,D3[1]
+ VMLAL.S16 Q11,D11,D2[1]
+ VMLAL.S16 Q12,D12,D3[1]
+ VEXT.8 Q5,Q5,Q6,#2
+ VMLAL.S16 Q9,D12,D4[1]
+ VMLAL.S16 Q10,D13,D5[1]
+ VMLAL.S16 Q11,D13,D4[1]
+ VMLAL.S16 Q12,D14,D5[1]
+ VEXT.8 Q6,Q6,Q7,#2
+ VMLAL.S16 Q9,D14,D6[1]
+ VMLAL.S16 Q10,D15,D7[1]
+ VMLAL.S16 Q11,D15,D6[1]
+ VMLAL.S16 Q12,D16,D7[1]
+ VEXT.8 Q7,Q7,Q8,#2
+
+ VMLAL.S16 Q9,D8,D0[2]
+ VMLAL.S16 Q10,D9,D1[2]
+ VEXT.8 Q8,Q8,Q15,#2
+ VMLAL.S16 Q11,D9,D0[2]
+ VMLAL.S16 Q12,D10,D1[2]
+ VEXT.8 Q4,Q4,Q5,#2
+ VMLAL.S16 Q9,D10,D2[2]
+ VMLAL.S16 Q10,D11,D3[2]
+ VMLAL.S16 Q11,D11,D2[2]
+ VMLAL.S16 Q12,D12,D3[2]
+ VEXT.8 Q5,Q5,Q6,#2
+ VMLAL.S16 Q9,D12,D4[2]
+ VMLAL.S16 Q10,D13,D5[2]
+ VMLAL.S16 Q11,D13,D4[2]
+ VMLAL.S16 Q12,D14,D5[2]
+ VEXT.8 Q6,Q6,Q7,#2
+ VMLAL.S16 Q9,D14,D6[2]
+ VMLAL.S16 Q10,D15,D7[2]
+ VMLAL.S16 Q11,D15,D6[2]
+ VMLAL.S16 Q12,D16,D7[2]
+ VEXT.8 Q7,Q7,Q8,#2
+
+ VMLAL.S16 Q9,D8,D0[3]
+ VMLAL.S16 Q10,D9,D1[3]
+ VEXT.8 Q8,Q8,Q15,#2
+ VMLAL.S16 Q11,D9,D0[3]
+ VMLAL.S16 Q12,D10,D1[3]
+ VEXT.8 Q4,Q4,Q5,#2
+ VMLAL.S16 Q9,D10,D2[3]
+ VMLAL.S16 Q10,D11,D3[3]
+ VMLAL.S16 Q11,D11,D2[3]
+ VMLAL.S16 Q12,D12,D3[3]
+ VEXT.8 Q5,Q5,Q6,#2
+ VMLAL.S16 Q9,D12,D4[3]
+ VMLAL.S16 Q10,D13,D5[3]
+ VMLAL.S16 Q11,D13,D4[3]
+ VMLAL.S16 Q12,D14,D5[3]
+ VEXT.8 Q6,Q6,Q7,#2
+ VMLAL.S16 Q9,D14,D6[3]
+ VMLAL.S16 Q10,D15,D7[3]
+ VMLAL.S16 Q11,D15,D6[3]
+ VMLAL.S16 Q12,D16,D7[3]
+ VEXT.8 Q7,Q7,Q8,#2
+
+ VMOV.S16 D8,D9
+ VEXT.8 Q8,Q8,Q15,#2
+ VMOV.S16 D9,D10
+ VADD.S32 Q9,Q9,Q10
+ VMOV.S16 D10,D11
+ VMOV.S16 D11,D12
+ VADD.S32 Q11,Q11,Q12
+ VMOV.S16 D12,D13
+ VQRSHRN.S32 D28,Q9,#15
+ VMOV.S16 D13,D14
+ VMOV.S16 D14,D15
+ VQRSHRN.S32 D29,Q11,#15
+ VMOV.S16 D15,D16
+
+ VLD1.S16 {Q8},[r4]!
+ ADD r5, r5, #8
+ CMP r5, #80
+ VST1.S16 {D28,D29},[r3]!
+ BLT LOOP_6K7K
+
+ ADD r0, r13, #160 @x + lg
+ MOV r1, r12
+ @LDR r1, [sp, #-4] @mem address
+
+ VLD1.S16 {D0, D1, D2, D3}, [r0]!
+ VLD1.S16 {D4, D5, D6, D7}, [r0]!
+
+ VST1.S16 {D0, D1, D2, D3}, [r1]!
+ VST1.S16 {D4, D5, D6}, [r1]!
+ VST1.S16 D7[0], [r1]!
+ VST1.S16 D7[1], [r1]!
+
+Filt_6k_7k_end:
+
+ ADD r13, r13, #240
+ LDMFD r13!, {r0 - r12, r15}
+
+Lable1:
+ .word fir_6k_7k
+ @ENDFUNC
+ .END
+
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s
new file mode 100644
index 0000000..60e9ade
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s
@@ -0,0 +1,270 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@static void Norm_Corr (Word16 exc[], /* (i) : excitation buffer */
+@ Word16 xn[], /* (i) : target vector */
+@ Word16 h[], /* (i) Q15 : impulse response of synth/wgt filters */
+@ Word16 L_subfr, /* (i) : sub-frame length */
+@ Word16 t_min, /* (i) : minimum value of pitch lag. */
+@ Word16 t_max, /* (i) : maximum value of pitch lag. */
+@ Word16 corr_norm[]) /* (o) Q15 : normalized correlation */
+@
+
+@ r0 --- exc[]
+@ r1 --- xn[]
+@ r2 --- h[]
+@ r3 --- L_subfr
+@ r4 --- t_min
+@ r5 --- t_max
+@ r6 --- corr_norm[]
+
+
+ .section .text
+ .global Norm_corr_asm
+ .extern Convolve_asm
+ .extern Isqrt_n
+@******************************
+@ constant
+@******************************
+.equ EXC , 0
+.equ XN , 4
+.equ H , 8
+.equ L_SUBFR , 12
+.equ voSTACK , 172
+.equ T_MIN , 212
+.equ T_MAX , 216
+.equ CORR_NORM , 220
+
+Norm_corr_asm:
+
+ STMFD r13!, {r4 - r12, r14}
+ SUB r13, r13, #voSTACK
+
+ ADD r8, r13, #20 @get the excf[L_SUBFR]
+ LDR r4, [r13, #T_MIN] @get t_min
+ RSB r11, r4, #0 @k = -t_min
+ ADD r5, r0, r11, LSL #1 @get the &exc[k]
+
+ @transfer Convolve function
+ STMFD sp!, {r0 - r3}
+ MOV r0, r5
+ MOV r1, r2
+ MOV r2, r8 @r2 --- excf[]
+ BL Convolve_asm
+ LDMFD sp!, {r0 - r3}
+
+ @ r8 --- excf[]
+
+ MOV r14, r1 @copy xn[] address
+ MOV r7, #1
+ VLD1.S16 {Q0, Q1}, [r14]!
+ VLD1.S16 {Q2, Q3}, [r14]!
+ VLD1.S16 {Q4, Q5}, [r14]!
+ VLD1.S16 {Q6, Q7}, [r14]!
+
+ VMULL.S16 Q10, D0, D0
+ VMLAL.S16 Q10, D1, D1
+ VMLAL.S16 Q10, D2, D2
+ VMLAL.S16 Q10, D3, D3
+ VMLAL.S16 Q10, D4, D4
+ VMLAL.S16 Q10, D5, D5
+ VMLAL.S16 Q10, D6, D6
+ VMLAL.S16 Q10, D7, D7
+ VMLAL.S16 Q10, D8, D8
+ VMLAL.S16 Q10, D9, D9
+ VMLAL.S16 Q10, D10, D10
+ VMLAL.S16 Q10, D11, D11
+ VMLAL.S16 Q10, D12, D12
+ VMLAL.S16 Q10, D13, D13
+ VMLAL.S16 Q10, D14, D14
+ VMLAL.S16 Q10, D15, D15
+
+ VQADD.S32 D20, D20, D21
+ VMOV.S32 r9, D20[0]
+ VMOV.S32 r10, D20[1]
+ QADD r6, r9, r10
+ QADD r6, r6, r6
+ QADD r9, r6, r7 @L_tmp = (L_tmp << 1) + 1;
+ CLZ r7, r9
+ SUB r6, r7, #1 @exp = norm_l(L_tmp)
+ RSB r7, r6, #32 @exp = 32 - exp
+ MOV r6, r7, ASR #1
+ RSB r7, r6, #0 @scale = -(exp >> 1)
+
+ @loop for every possible period
+ @for(t = t_min@ t <= t_max@ t++)
+ @r7 --- scale r4 --- t_min r8 --- excf[]
+
+LOOPFOR:
+ ADD r14, r13, #20 @copy of excf[]
+ MOV r12, r1 @copy of xn[]
+ MOV r8, #0x8000
+
+ VLD1.S16 {Q0, Q1}, [r14]! @ load 16 excf[]
+ VLD1.S16 {Q2, Q3}, [r14]! @ load 16 excf[]
+ VLD1.S16 {Q4, Q5}, [r12]! @ load 16 x[]
+ VLD1.S16 {Q6, Q7}, [r12]! @ load 16 x[]
+ VMULL.S16 Q10, D0, D0 @L_tmp1 += excf[] * excf[]
+ VMULL.S16 Q11, D0, D8 @L_tmp += x[] * excf[]
+ VMLAL.S16 Q10, D1, D1
+ VMLAL.S16 Q11, D1, D9
+ VMLAL.S16 Q10, D2, D2
+ VMLAL.S16 Q11, D2, D10
+ VMLAL.S16 Q10, D3, D3
+ VMLAL.S16 Q11, D3, D11
+ VMLAL.S16 Q10, D4, D4
+ VMLAL.S16 Q11, D4, D12
+ VMLAL.S16 Q10, D5, D5
+ VMLAL.S16 Q11, D5, D13
+ VMLAL.S16 Q10, D6, D6
+ VMLAL.S16 Q11, D6, D14
+ VMLAL.S16 Q10, D7, D7
+ VMLAL.S16 Q11, D7, D15
+
+ VLD1.S16 {Q0, Q1}, [r14]! @ load 16 excf[]
+ VLD1.S16 {Q2, Q3}, [r14]! @ load 16 excf[]
+ VLD1.S16 {Q4, Q5}, [r12]! @ load 16 x[]
+ VLD1.S16 {Q6, Q7}, [r12]! @ load 16 x[]
+ VMLAL.S16 Q10, D0, D0
+ VMLAL.S16 Q11, D0, D8
+ VMLAL.S16 Q10, D1, D1
+ VMLAL.S16 Q11, D1, D9
+ VMLAL.S16 Q10, D2, D2
+ VMLAL.S16 Q11, D2, D10
+ VMLAL.S16 Q10, D3, D3
+ VMLAL.S16 Q11, D3, D11
+ VMLAL.S16 Q10, D4, D4
+ VMLAL.S16 Q11, D4, D12
+ VMLAL.S16 Q10, D5, D5
+ VMLAL.S16 Q11, D5, D13
+ VMLAL.S16 Q10, D6, D6
+ VMLAL.S16 Q11, D6, D14
+ VMLAL.S16 Q10, D7, D7
+ VMLAL.S16 Q11, D7, D15
+
+ VQADD.S32 D20, D20, D21
+ VQADD.S32 D22, D22, D23
+
+ VPADD.S32 D20, D20, D20 @D20[0] --- L_tmp1 << 1
+ VPADD.S32 D22, D22, D22 @D22[0] --- L_tmp << 1
+
+ VMOV.S32 r6, D20[0]
+ VMOV.S32 r5, D22[0]
+
+ @r5 --- L_tmp, r6 --- L_tmp1
+ MOV r10, #1
+ ADD r5, r10, r5, LSL #1 @L_tmp = (L_tmp << 1) + 1
+ ADD r6, r10, r6, LSL #1 @L_tmp1 = (L_tmp1 << 1) + 1
+
+ CLZ r10, r5
+ CMP r5, #0
+ RSBLT r11, r5, #0
+ CLZLT r10, r11
+ SUB r10, r10, #1 @exp = norm_l(L_tmp)
+
+ MOV r5, r5, LSL r10 @L_tmp = (L_tmp << exp)
+ RSB r10, r10, #30 @exp_corr = 30 - exp
+ MOV r11, r5, ASR #16 @corr = extract_h(L_tmp)
+
+ CLZ r5, r6
+ SUB r5, r5, #1
+ MOV r6, r6, LSL r5 @L_tmp = (L_tmp1 << exp)
+ RSB r5, r5, #30 @exp_norm = 30 - exp
+
+ @r10 --- exp_corr, r11 --- corr
+ @r6 --- L_tmp, r5 --- exp_norm
+
+ @Isqrt_n(&L_tmp, &exp_norm)
+
+ MOV r14, r0
+ MOV r12, r1
+
+ STMFD sp!, {r0 - r4, r7 - r12, r14}
+ ADD r1, sp, #4
+ ADD r0, sp, #0
+ STR r6, [sp]
+ STRH r5, [sp, #4]
+ BL Isqrt_n
+ LDR r6, [sp]
+ LDRSH r5, [sp, #4]
+ LDMFD sp!, {r0 - r4, r7 - r12, r14}
+ MOV r0, r14
+ MOV r1, r12
+
+
+ MOV r6, r6, ASR #16 @norm = extract_h(L_tmp)
+ MUL r12, r6, r11
+ ADD r12, r12, r12 @L_tmp = vo_L_mult(corr, norm)
+
+ ADD r6, r10, r5
+ ADD r6, r6, r7 @exp_corr + exp_norm + scale
+
+ CMP r6, #0
+ RSBLT r6, r6, #0
+ MOVLT r12, r12, ASR r6
+ MOVGT r12, r12, LSL r6 @L_tmp = L_shl(L_tmp, exp_corr + exp_norm + scale)
+
+ ADD r12, r12, r8
+ MOV r12, r12, ASR #16 @vo_round(L_tmp)
+
+ LDR r5, [r13, #CORR_NORM] @ get corr_norm address
+ LDR r6, [r13, #T_MAX] @ get t_max
+ ADD r10, r5, r4, LSL #1 @ get corr_norm[t] address
+ STRH r12, [r10] @ corr_norm[t] = vo_round(L_tmp)
+
+ CMP r4, r6
+ BEQ Norm_corr_asm_end
+
+ ADD r4, r4, #1 @ t_min ++
+ RSB r5, r4, #0 @ k
+
+ MOV r6, #63 @ i = 63
+ MOV r8, r0 @ exc[]
+ MOV r9, r2 @ h[]
+ ADD r10, r13, #20 @ excf[]
+
+ ADD r8, r8, r5, LSL #1 @ exc[k] address
+ ADD r9, r9, r6, LSL #1 @ h[i] address
+ ADD r10, r10, r6, LSL #1 @ excf[i] address
+ LDRSH r11, [r8] @ tmp = exc[k]
+
+LOOPK:
+ LDRSH r8, [r9], #-2 @ load h[i]
+ LDRSH r12, [r10, #-2] @ load excf[i - 1]
+ MUL r14, r11, r8
+ MOV r8, r14, ASR #15
+ ADD r14, r8, r12
+ STRH r14, [r10], #-2
+ SUBS r6, r6, #1
+ BGT LOOPK
+
+ LDRSH r8, [r9] @ load h[0]
+ MUL r14, r11, r8
+ LDR r6, [r13, #T_MAX] @ get t_max
+ MOV r8, r14, ASR #15
+ STRH r8, [r10]
+
+ CMP r4, r6
+ BLE LOOPFOR
+
+Norm_corr_asm_end:
+
+ ADD r13, r13, #voSTACK
+ LDMFD r13!, {r4 - r12, r15}
+
+ .END
+
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s
new file mode 100644
index 0000000..cb1764f
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s
@@ -0,0 +1,133 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@**********************************************************************/
+@void Syn_filt_32(
+@ Word16 a[], /* (i) Q12 : a[m+1] prediction coefficients */
+@ Word16 m, /* (i) : order of LP filter */
+@ Word16 exc[], /* (i) Qnew: excitation (exc[i] >> Qnew) */
+@ Word16 Qnew, /* (i) : exc scaling = 0(min) to 8(max) */
+@ Word16 sig_hi[], /* (o) /16 : synthesis high */
+@ Word16 sig_lo[], /* (o) /16 : synthesis low */
+@ Word16 lg /* (i) : size of filtering */
+@)
+@***********************************************************************
+@ a[] --- r0
+@ m --- r1
+@ exc[] --- r2
+@ Qnew --- r3
+@ sig_hi[] --- r4
+@ sig_lo[] --- r5
+@ lg --- r6
+
+ .section .text
+ .global Syn_filt_32_asm
+
+Syn_filt_32_asm:
+
+ STMFD r13!, {r4 - r12, r14}
+ LDR r4, [r13, #40] @ get sig_hi[] address
+ LDR r5, [r13, #44] @ get sig_lo[] address
+
+ LDRSH r6, [r0], #2 @ load Aq[0]
+ ADD r7, r3, #4 @ 4 + Q_new
+ MOV r3, r6, ASR r7 @ a0 = Aq[0] >> (4 + Q_new)
+
+ SUB r10, r4, #32 @ sig_hi[-16] address
+ SUB r11, r5, #32 @ sig_lo[-16] address
+
+ VLD1.S16 {D0, D1, D2, D3}, [r0]! @a[1] ~ a[16]
+
+ MOV r8, #0 @ i = 0
+
+ VLD1.S16 {D4, D5, D6, D7}, [r10]! @ sig_hi[-16] ~ sig_hi[-1]
+ VREV64.16 D0, D0
+ VREV64.16 D1, D1
+ VLD1.S16 {D8, D9, D10, D11}, [r11]! @ sig_lo[-16] ~ sig_lo[-1]
+ VREV64.16 D2, D2
+ VREV64.16 D3, D3
+ VDUP.S32 Q15, r8
+
+SYN_LOOP:
+
+ LDRSH r6, [r2], #2 @exc[i]
+ @L_tmp = L_msu(L_tmp, sig_lo[i - j], a[j])@
+ VMULL.S16 Q10, D8, D3
+ VEXT.8 D8, D8, D9, #2
+ VMLAL.S16 Q10, D9, D2
+ VMLAL.S16 Q10, D10, D1
+ VMLAL.S16 Q10, D11, D0
+
+ VEXT.8 D9, D9, D10, #2
+ VEXT.8 D10, D10, D11, #2
+
+ VPADD.S32 D28, D20, D21
+ MUL r12, r6, r3 @exc[i] * a0
+ VPADD.S32 D29, D28, D28
+ VDUP.S32 Q10, D29[0] @result1
+
+ VMULL.S16 Q11, D4, D3
+ VMLAL.S16 Q11, D5, D2
+ VSUB.S32 Q10, Q15, Q10
+ @L_tmp = L_msu(L_tmp, sig_hi[i - j], a[j])@
+
+ VMLAL.S16 Q11, D6, D1
+ VEXT.8 D4, D4, D5, #2
+ VMLAL.S16 Q11, D7, D0
+
+
+ VEXT.8 D5, D5, D6, #2
+ VEXT.8 D6, D6, D7, #2
+
+ VPADD.S32 D28, D22, D23
+ VPADD.S32 D29, D28, D28
+ MOV r14, r12, LSL #1 @exc[i] * a0 << 1
+ VDUP.S32 Q11, D29[0] @result2
+
+
+
+ VSHR.S32 Q10, Q10, #11 @result1 >>= 11
+ VSHL.S32 Q11, Q11, #1 @result2 <<= 1
+ VDUP.S32 Q12, r14
+ VADD.S32 Q12, Q12, Q10 @L_tmp = L_tmp - (result1 >>= 11) - (result2 <<= 1)
+ VSUB.S32 Q12, Q12, Q11
+
+ VSHL.S32 Q12, Q12, #3 @L_tmp <<= 3
+
+
+ VSHRN.S32 D20, Q12, #16 @sig_hi[i] = L_tmp >> 16@
+ VMOV.S16 r10, D20[0]
+ VSHR.S32 Q12, Q12, #4 @L_tmp >>= 4
+ VEXT.8 D7, D7, D20, #2
+ STRH r10, [r4], #2 @store sig_hi[i]
+ VMOV.S32 r11, D24[0] @r11 --- L_tmp >>= 4
+ ADD r8, r8, #1
+ SUB r12, r11, r10, LSL #12
+ @MOV r11, r12, ASR #16 @sig_lo[i]
+ VDUP.S16 D21, r12
+ VEXT.8 D11, D11, D21, #2
+ STRH r12, [r5], #2 @stroe sig_lo[i]
+
+ CMP r8, #64
+ BLT SYN_LOOP
+
+Syn_filt_32_end:
+
+ LDMFD r13!, {r4 - r12, r15}
+ @ENDFUNC
+ .END
+
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s
new file mode 100644
index 0000000..189e33b
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s
@@ -0,0 +1,178 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@*void Convolve (
+@* Word16 x[], /* (i) : input vector */
+@* Word16 h[], /* (i) : impulse response */
+@* Word16 y[], /* (o) : output vector */
+@* Word16 L /* (i) : vector size */
+@*)
+@
+@ r0 --- x[]
+@ r1 --- h[]
+@ r2 --- y[]
+@ r3 --- L
+
+ .section .text
+ .global Convolve_asm
+
+Convolve_asm:
+
+ STMFD r13!, {r4 - r12, r14}
+ MOV r3, #0
+ MOV r11, #0x8000
+
+LOOP:
+ @MOV r8, #0 @ s = 0
+ ADD r4, r1, r3, LSL #1 @ tmpH address
+ ADD r5, r3, #1 @ i = n + 1
+ MOV r6, r0
+ LDRSH r9, [r6], #2 @ *tmpX++
+ LDRSH r10, [r4] @ *tmpH--
+ SUB r5, r5, #1
+ VMOV.S32 Q10, #0
+ MUL r8, r9, r10
+
+LOOP1:
+ CMP r5, #0
+ BLE L1
+ SUB r4, r4, #8
+ MOV r9, r4
+ VLD1.S16 D0, [r6]!
+ VLD1.S16 D1, [r9]!
+ VREV64.16 D1, D1
+ SUBS r5, r5, #4
+ VMLAL.S16 Q10, D0, D1
+ B LOOP1
+L1:
+ VADD.S32 D20, D20, D21
+ VPADD.S32 D20, D20, D20
+ VMOV.S32 r5, D20[0]
+ ADD r5, r5, r8
+ ADD r5, r11, r5, LSL #1
+ MOV r5, r5, LSR #16 @extract_h(s)
+ ADD r3, r3, #1
+ STRH r5, [r2], #2 @y[n]
+
+
+ @MOV r8, #0
+ ADD r4, r1, r3, LSL #1 @tmpH address
+ ADD r5, r3, #1
+ MOV r6, r0
+ LDRSH r9, [r6], #2 @ *tmpX++
+ LDRSH r10, [r4], #-2
+ LDRSH r12, [r6], #2
+ LDRSH r14, [r4]
+
+ MUL r8, r9, r10
+ SUB r5, r5, #2
+ MLA r8, r12, r14, r8
+
+ VMOV.S32 Q10, #0
+LOOP2:
+ CMP r5, #0
+ BLE L2
+ SUB r4, r4, #8
+ MOV r9, r4
+ VLD1.S16 D0, [r6]!
+ VLD1.S16 D1, [r9]!
+ SUBS r5, r5, #4
+ VREV64.16 D1, D1
+ VMLAL.S16 Q10, D0, D1
+ B LOOP2
+L2:
+ VADD.S32 D20, D20, D21
+ VPADD.S32 D20, D20, D20
+ VMOV.S32 r5, D20[0]
+ ADD r8, r8, r5
+ ADD r8, r11, r8, LSL #1
+ MOV r8, r8, LSR #16 @extract_h(s)
+ ADD r3, r3, #1
+ STRH r8, [r2], #2 @y[n]
+
+
+ @MOV r8, #0
+ ADD r4, r1, r3, LSL #1
+ ADD r5, r3, #1
+ MOV r6, r0
+ LDRSH r9, [r6], #2
+ LDRSH r10, [r4], #-2
+ LDRSH r12, [r6], #2
+ LDRSH r14, [r4], #-2
+ MUL r8, r9, r10
+ LDRSH r9, [r6], #2
+ LDRSH r10, [r4]
+ MLA r8, r12, r14, r8
+ SUB r5, r5, #3
+ MLA r8, r9, r10, r8
+
+ VMOV.S32 Q10, #0
+LOOP3:
+ CMP r5, #0
+ BLE L3
+ SUB r4, r4, #8
+ MOV r9, r4
+ VLD1.S16 D0, [r6]!
+ VLD1.S16 D1, [r9]!
+ VREV64.16 D1, D1
+ SUBS r5, r5, #4
+ VMLAL.S16 Q10, D0, D1
+ B LOOP3
+
+L3:
+ VADD.S32 D20, D20, D21
+ VPADD.S32 D20, D20, D20
+ VMOV.S32 r5, D20[0]
+ ADD r8, r8, r5
+ ADD r8, r11, r8, LSL #1
+ MOV r8, r8, LSR #16 @extract_h(s)
+ ADD r3, r3, #1
+ STRH r8, [r2], #2 @y[n]
+
+ ADD r5, r3, #1 @ i = n + 1
+ ADD r4, r1, r5, LSL #1 @ tmpH address
+ MOV r6, r0
+ VMOV.S32 Q10, #0
+LOOP4:
+ CMP r5, #0
+ BLE L4
+ SUB r4, r4, #8
+ MOV r9, r4
+ VLD1.S16 D0, [r6]!
+ VLD1.S16 D1, [r9]!
+ VREV64.16 D1, D1
+ SUBS r5, r5, #4
+ VMLAL.S16 Q10, D0, D1
+ B LOOP4
+L4:
+ VADD.S32 D20, D20, D21
+ VPADD.S32 D20, D20, D20
+ VMOV.S32 r5, D20[0]
+ ADD r5, r11, r5, LSL #1
+ MOV r5, r5, LSR #16 @extract_h(s)
+ ADD r3, r3, #1
+ STRH r5, [r2], #2 @y[n]
+
+ CMP r3, #64
+ BLT LOOP
+
+Convolve_asm_end:
+
+ LDMFD r13!, {r4 - r12, r15}
+
+ @ENDFUNC
+ .END
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s
new file mode 100644
index 0000000..2e339db
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s
@@ -0,0 +1,151 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@static void cor_h_vec_012(
+@ Word16 h[], /* (i) scaled impulse response */
+@ Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */
+@ Word16 track, /* (i) track to use */
+@ Word16 sign[], /* (i) sign vector */
+@ Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */
+@ Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */
+@ Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */
+@)
+@r0 ---- h[]
+@r1 ---- vec[]
+@r2 ---- track
+@r3 ---- sign[]
+@r4 ---- rrixix[][NB_POS]
+@r5 ---- cor_1[]
+@r6 ---- cor_2[]
+
+ .section .text
+ .global cor_h_vec_012_asm
+
+cor_h_vec_012_asm:
+
+ STMFD r13!, {r4 - r12, r14}
+ LDR r4, [r13, #40] @load rrixix[][NB_POS]
+ ADD r7, r4, r2, LSL #5 @r7 --- p0 = rrixix[track]
+ MOV r4, #0 @i=0
+
+ @r0 --- h[], r1 --- vec[], r2 --- pos
+ @r3 --- sign[], r4 --- i, r7 --- p0
+
+LOOPi:
+ MOV r5, #0 @L_sum1 = 0
+ MOV r6, #0 @L_sum2 = 0
+ ADD r9, r1, r2, LSL #1 @p2 = &vec[pos]
+ MOV r10, r0 @p1 = h
+ RSB r11, r2, #62 @j=62-pos
+
+LOOPj1:
+ LDRSH r12, [r10], #2
+ LDRSH r8, [r9], #2
+ LDRSH r14, [r9]
+ SUBS r11, r11, #1
+ MLA r5, r12, r8, r5
+ MLA r6, r12, r14, r6
+ BGE LOOPj1
+
+ LDRSH r12, [r10], #2 @*p1++
+ MOV r6, r6, LSL #2 @L_sum2 = (L_sum2 << 2)
+ MLA r5, r12, r14, r5
+ MOV r14, #0x8000
+ MOV r5, r5, LSL #2 @L_sum1 = (L_sum1 << 2)
+ ADD r10, r6, r14
+ ADD r9, r5, r14
+ MOV r5, r9, ASR #16
+ MOV r6, r10, ASR #16
+ ADD r9, r3, r2, LSL #1 @address of sign[pos]
+ ADD r8, r7, #32
+ LDRSH r10, [r9], #2 @sign[pos]
+ LDRSH r11, [r9] @sign[pos + 1]
+ MUL r12, r5, r10
+ MUL r14, r6, r11
+ MOV r5, r12, ASR #15
+ MOV r6, r14, ASR #15
+ LDR r9, [r13, #44]
+ LDR r12, [r13, #48]
+ LDRSH r10, [r7], #2 @*p0++
+ LDRSH r11, [r8] @*p3++
+ ADD r9, r9, r4, LSL #1
+ ADD r12, r12, r4, LSL #1
+ ADD r5, r5, r10
+ ADD r6, r6, r11
+ STRH r5, [r9]
+ STRH r6, [r12]
+
+ ADD r2, r2, #4
+
+ MOV r5, #0 @L_sum1 = 0
+ MOV r6, #0 @L_sum2 = 0
+ ADD r9, r1, r2, LSL #1 @p2 = &vec[pos]
+ MOV r10, r0 @p1 = h
+ RSB r11, r2, #62 @j=62-pos
+ ADD r4, r4, #1 @i++
+
+LOOPj2:
+ LDRSH r12, [r10], #2
+ LDRSH r8, [r9], #2
+ LDRSH r14, [r9]
+ SUBS r11, r11, #1
+ MLA r5, r12, r8, r5
+ MLA r6, r12, r14, r6
+ BGE LOOPj2
+
+ LDRSH r12, [r10], #2 @*p1++
+ MOV r6, r6, LSL #2 @L_sum2 = (L_sum2 << 2)
+ MLA r5, r12, r14, r5
+ MOV r14, #0x8000
+ MOV r5, r5, LSL #2 @L_sum1 = (L_sum1 << 2)
+ ADD r10, r6, r14
+ ADD r9, r5, r14
+
+ MOV r5, r9, ASR #16
+ MOV r6, r10, ASR #16
+ ADD r9, r3, r2, LSL #1 @address of sign[pos]
+ ADD r8, r7, #32
+ LDRSH r10, [r9], #2 @sign[pos]
+ LDRSH r11, [r9] @sign[pos + 1]
+ MUL r12, r5, r10
+ MUL r14, r6, r11
+ MOV r5, r12, ASR #15
+ MOV r6, r14, ASR #15
+ LDR r9, [r13, #44]
+ LDR r12, [r13, #48]
+ LDRSH r10, [r7], #2 @*p0++
+ LDRSH r11, [r8] @*p3++
+ ADD r9, r9, r4, LSL #1
+ ADD r12, r12, r4, LSL #1
+ ADD r5, r5, r10
+ ADD r6, r6, r11
+ STRH r5, [r9]
+ STRH r6, [r12]
+ ADD r4, r4, #1 @i+1
+ ADD r2, r2, #4 @pos += STEP
+ CMP r4, #16
+
+ BLT LOOPi
+
+the_end:
+ LDMFD r13!, {r4 - r12, r15}
+
+ .END
+
+
+
+
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s
new file mode 100644
index 0000000..3b8853f
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s
@@ -0,0 +1,100 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@void Pred_lt4(
+@ Word16 exc[], /* in/out: excitation buffer */
+@ Word16 T0, /* input : integer pitch lag */
+@ Word16 frac, /* input : fraction of lag */
+@ Word16 L_subfr /* input : subframe size */
+@)
+@***********************************************************************
+@ r0 --- exc[]
+@ r1 --- T0
+@ r2 --- frac
+@ r3 --- L_subfr
+
+ .section .text
+ .global pred_lt4_asm
+ .extern inter4_2
+
+pred_lt4_asm:
+
+ STMFD r13!, {r4 - r12, r14}
+ SUB r4, r0, r1, LSL #1 @ x = exc - T0
+ RSB r2, r2, #0 @ frac = - frac
+ SUB r4, r4, #30 @ x -= L_INTERPOL2 - 1
+ CMP r2, #0
+ ADDLT r2, r2, #4 @ frac += UP_SAMP
+ SUBLT r4, r4, #2 @ x--
+
+ LDR r11, Lable1
+ RSB r2, r2, #3 @ k = UP_SAMP - 1 - frac
+ MOV r8, #0 @ j = 0
+ ADD r11, r11, r2, LSL #6 @ get inter4_2[k][]
+
+ VLD1.S16 {Q0, Q1}, [r11]!
+ VLD1.S16 {Q2, Q3}, [r11]!
+
+ MOV r6, #0x8000
+
+ VLD1.S16 {Q4, Q5}, [r4]! @load 16 x[]
+ VLD1.S16 {Q6, Q7}, [r4]! @load 16 x[]
+
+LOOP:
+ VQDMULL.S16 Q15, D8, D0
+ VQDMLAL.S16 Q15, D9, D1
+ VQDMLAL.S16 Q15, D10, D2
+ VQDMLAL.S16 Q15, D11, D3
+
+ VQDMLAL.S16 Q15, D12, D4
+ VQDMLAL.S16 Q15, D13, D5
+ VQDMLAL.S16 Q15, D14, D6
+ VQDMLAL.S16 Q15, D15, D7
+
+ LDRSH r12, [r4], #2
+
+ VEXT.S16 D8, D8, D9, #1
+ VEXT.S16 D9, D9, D10, #1
+ VEXT.S16 D10, D10, D11, #1
+ VEXT.S16 D11, D11, D12, #1
+ VDUP.S16 D24, r12
+ VEXT.S16 D12, D12, D13, #1
+ VEXT.S16 D13, D13, D14, #1
+
+ VQADD.S32 D30, D30, D31
+ MOV r11, #0x8000
+ VPADD.S32 D30, D30, D30
+ ADD r8, r8, #1
+ VMOV.S32 r12, D30[0]
+ VEXT.S16 D14, D14, D15, #1
+
+ QADD r1, r12, r12 @ L_sum = (L_sum << 2)
+ VEXT.S16 D15, D15, D24, #1
+ QADD r5, r1, r6
+ MOV r1, r5, ASR #16
+ CMP r8, r3
+ STRH r1, [r0], #2 @ exc[j] = (L_sum + 0x8000) >> 16
+ BLT LOOP
+
+pred_lt4_end:
+
+ LDMFD r13!, {r4 - r12, r15}
+
+Lable1:
+ .word inter4_2
+ @ENDFUNC
+ .END
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s
new file mode 100644
index 0000000..b9e6b23
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s
@@ -0,0 +1,127 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@void Residu (
+@ Word16 a[], /* (i) : prediction coefficients */
+@ Word16 x[], /* (i) : speech signal */
+@ Word16 y[], /* (o) : residual signal */
+@ Word16 lg /* (i) : size of filtering */
+@)
+@a[] RN r0
+@x[] RN r1
+@y[] RN r2
+@lg RN r3
+
+ .section .text
+ .global Residu_opt
+
+Residu_opt:
+
+ STMFD r13!, {r4 - r12, r14}
+ SUB r7, r3, #4 @i = lg - 4
+
+ VLD1.S16 {D0, D1, D2, D3}, [r0]! @get all a[]
+ VLD1.S16 {D4}, [r0]!
+ VMOV.S32 Q8, #0x8000
+
+LOOP1:
+ ADD r9, r1, r7, LSL #1 @copy the address
+ ADD r10, r2, r7, LSL #1
+ MOV r8, r9
+ VLD1.S16 D5, [r8]! @get x[i], x[i+1], x[i+2], x[i+3]
+ VQDMULL.S16 Q10, D5, D0[0] @finish the first L_mult
+
+ SUB r8, r9, #2 @get the x[i-1] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D0[1]
+
+ SUB r8, r9, #4 @load the x[i-2] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D0[2]
+
+ SUB r8, r9, #6 @load the x[i-3] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D0[3]
+
+ SUB r8, r9, #8 @load the x[i-4] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D1[0]
+
+ SUB r8, r9, #10 @load the x[i-5] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D1[1]
+
+ SUB r8, r9, #12 @load the x[i-6] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D1[2]
+
+ SUB r8, r9, #14 @load the x[i-7] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D1[3]
+
+ SUB r8, r9, #16 @load the x[i-8] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D2[0]
+
+ SUB r8, r9, #18 @load the x[i-9] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D2[1]
+
+ SUB r8, r9, #20 @load the x[i-10] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D2[2]
+
+ SUB r8, r9, #22 @load the x[i-11] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D2[3]
+
+ SUB r8, r9, #24 @load the x[i-12] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D3[0]
+
+ SUB r8, r9, #26 @load the x[i-13] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D3[1]
+
+ SUB r8, r9, #28 @load the x[i-14] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D3[2]
+
+ SUB r8, r9, #30 @load the x[i-15] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D3[3]
+
+ SUB r8, r9, #32 @load the x[i-16] address
+ VLD1.S16 D5, [r8]!
+ VQDMLAL.S16 Q10, D5, D4[0]
+
+ SUB r7, r7, #4 @i-=4
+ VQSHL.S32 Q10, Q10, #4
+ VQADD.S32 Q10, Q10, Q8
+ VSHRN.S32 D5, Q10, #16
+ VST1.S16 D5, [r10]!
+ CMP r7, #0
+
+ BGE LOOP1
+
+Residu_asm_end:
+
+ LDMFD r13!, {r4 - r12, r15}
+
+ @ENDFUNC
+ .END
+
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s
new file mode 100644
index 0000000..14957d8
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s
@@ -0,0 +1,138 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@**********************************************************************/
+@void Scale_sig(
+@ Word16 x[], /* (i/o) : signal to scale */
+@ Word16 lg, /* (i) : size of x[] */
+@ Word16 exp /* (i) : exponent: x = round(x << exp) */
+@)
+@***********************************************************************
+@ x[] --- r0
+@ lg --- r1
+@ exp --- r2
+
+ .section .text
+ .global Scale_sig_opt
+
+Scale_sig_opt:
+
+ STMFD r13!, {r4 - r12, r14}
+ MOV r4, #4
+ VMOV.S32 Q15, #0x8000
+ VDUP.S32 Q14, r2
+ MOV r5, r0 @ copy x[] address
+ CMP r1, #64
+ MOVEQ r4, #1
+ BEQ LOOP
+ CMP r1, #128
+ MOVEQ r4, #2
+ BEQ LOOP
+ CMP r1, #256
+ BEQ LOOP
+ CMP r1, #80
+ MOVEQ r4, #1
+ BEQ LOOP1
+
+LOOP1:
+ VLD1.S16 {Q0, Q1}, [r5]! @load 16 Word16 x[]
+ VSHLL.S16 Q10, D0, #16
+ VSHLL.S16 Q11, D1, #16
+ VSHLL.S16 Q12, D2, #16
+ VSHLL.S16 Q13, D3, #16
+ VSHL.S32 Q10, Q10, Q14
+ VSHL.S32 Q11, Q11, Q14
+ VSHL.S32 Q12, Q12, Q14
+ VSHL.S32 Q13, Q13, Q14
+ VADDHN.S32 D16, Q10, Q15
+ VADDHN.S32 D17, Q11, Q15
+ VADDHN.S32 D18, Q12, Q15
+ VADDHN.S32 D19, Q13, Q15
+ VST1.S16 {Q8, Q9}, [r0]! @store 16 Word16 x[]
+
+LOOP:
+ VLD1.S16 {Q0, Q1}, [r5]! @load 16 Word16 x[]
+ VLD1.S16 {Q2, Q3}, [r5]! @load 16 Word16 x[]
+ VLD1.S16 {Q4, Q5}, [r5]! @load 16 Word16 x[]
+ VLD1.S16 {Q6, Q7}, [r5]! @load 16 Word16 x[]
+
+ VSHLL.S16 Q8, D0, #16
+ VSHLL.S16 Q9, D1, #16
+ VSHLL.S16 Q10, D2, #16
+ VSHLL.S16 Q11, D3, #16
+ VSHL.S32 Q8, Q8, Q14
+ VSHL.S32 Q9, Q9, Q14
+ VSHL.S32 Q10, Q10, Q14
+ VSHL.S32 Q11, Q11, Q14
+ VADDHN.S32 D16, Q8, Q15
+ VADDHN.S32 D17, Q9, Q15
+ VADDHN.S32 D18, Q10, Q15
+ VADDHN.S32 D19, Q11, Q15
+ VST1.S16 {Q8, Q9}, [r0]! @store 16 Word16 x[]
+
+
+ VSHLL.S16 Q12, D4, #16
+ VSHLL.S16 Q13, D5, #16
+ VSHLL.S16 Q10, D6, #16
+ VSHLL.S16 Q11, D7, #16
+ VSHL.S32 Q12, Q12, Q14
+ VSHL.S32 Q13, Q13, Q14
+ VSHL.S32 Q10, Q10, Q14
+ VSHL.S32 Q11, Q11, Q14
+ VADDHN.S32 D16, Q12, Q15
+ VADDHN.S32 D17, Q13, Q15
+ VADDHN.S32 D18, Q10, Q15
+ VADDHN.S32 D19, Q11, Q15
+ VST1.S16 {Q8, Q9}, [r0]! @store 16 Word16 x[]
+
+ VSHLL.S16 Q10, D8, #16
+ VSHLL.S16 Q11, D9, #16
+ VSHLL.S16 Q12, D10, #16
+ VSHLL.S16 Q13, D11, #16
+ VSHL.S32 Q10, Q10, Q14
+ VSHL.S32 Q11, Q11, Q14
+ VSHL.S32 Q12, Q12, Q14
+ VSHL.S32 Q13, Q13, Q14
+ VADDHN.S32 D16, Q10, Q15
+ VADDHN.S32 D17, Q11, Q15
+ VADDHN.S32 D18, Q12, Q15
+ VADDHN.S32 D19, Q13, Q15
+ VST1.S16 {Q8, Q9}, [r0]! @store 16 Word16 x[]
+
+ VSHLL.S16 Q10, D12, #16
+ VSHLL.S16 Q11, D13, #16
+ VSHLL.S16 Q12, D14, #16
+ VSHLL.S16 Q13, D15, #16
+ VSHL.S32 Q10, Q10, Q14
+ VSHL.S32 Q11, Q11, Q14
+ VSHL.S32 Q12, Q12, Q14
+ VSHL.S32 Q13, Q13, Q14
+ VADDHN.S32 D16, Q10, Q15
+ VADDHN.S32 D17, Q11, Q15
+ VADDHN.S32 D18, Q12, Q15
+ VADDHN.S32 D19, Q13, Q15
+ VST1.S16 {Q8, Q9}, [r0]! @store 16 Word16 x[]
+ SUBS r4, r4, #1
+ BGT LOOP
+
+
+Scale_sig_asm_end:
+
+ LDMFD r13!, {r4 - r12, r15}
+ @ENDFUNC
+ .END
+
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s
new file mode 100644
index 0000000..dc3d4a8
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s
@@ -0,0 +1,106 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@void Syn_filt(
+@ Word16 a[], /* (i) Q12 : a[m+1] prediction coefficients */
+@ Word16 x[], /* (i) : input signal */
+@ Word16 y[], /* (o) : output signal */
+@ Word16 mem[], /* (i/o) : memory associated with this filtering. */
+@)
+@***********************************************************************
+@ a[] --- r0
+@ x[] --- r1
+@ y[] --- r2
+@ mem[] --- r3
+@ m --- 16 lg --- 80 update --- 1
+
+ .section .text
+ .global Syn_filt_asm
+
+Syn_filt_asm:
+
+ STMFD r13!, {r4 - r12, r14}
+ SUB r13, r13, #700 @ y_buf[L_FRAME16k + M16k]
+
+ MOV r4, r3 @ copy mem[] address
+ MOV r5, r13 @ copy yy = y_buf address
+
+ @ for(i = 0@ i < m@ i++)
+ @{
+ @ *yy++ = mem[i]@
+ @}
+ VLD1.S16 {D0, D1, D2, D3}, [r4]! @load 16 mems
+ VST1.S16 {D0, D1, D2, D3}, [r5]! @store 16 mem[] to *yy
+
+ LDRSH r5, [r0], #2 @ load a[0]
+ MOV r8, #0 @ i = 0
+ MOV r5, r5, ASR #1 @ a0 = a[0] >> 1
+ VMOV.S16 D8[0], r5
+ @ load all a[]
+ VLD1.S16 {D0, D1, D2, D3}, [r0]! @ load a[1] ~ a[16]
+ VREV64.16 D0, D0
+ VREV64.16 D1, D1
+ VREV64.16 D2, D2
+ VREV64.16 D3, D3
+ MOV r8, #0 @ loop times
+ MOV r10, r13 @ temp = y_buf
+ ADD r4, r13, #32 @ yy[i] address
+
+ VLD1.S16 {D4, D5, D6, D7}, [r10]! @ first 16 temp_p
+
+SYN_LOOP:
+
+ LDRSH r6, [r1], #2 @ load x[i]
+ MUL r12, r6, r5 @ L_tmp = x[i] * a0
+ ADD r10, r4, r8, LSL #1 @ y[i], yy[i] address
+
+ VDUP.S32 Q10, r12
+ VMULL.S16 Q5, D3, D4
+ VMLAL.S16 Q5, D2, D5
+ VMLAL.S16 Q5, D1, D6
+ VMLAL.S16 Q5, D0, D7
+ VEXT.8 D4, D4, D5, #2
+ VEXT.8 D5, D5, D6, #2
+ VEXT.8 D6, D6, D7, #2
+ VPADD.S32 D12, D10, D11
+ ADD r8, r8, #1
+ VPADD.S32 D10, D12, D12
+
+ VDUP.S32 Q7, D10[0]
+
+ VSUB.S32 Q9, Q10, Q7
+ VQRSHRN.S32 D20, Q9, #12
+ VMOV.S16 r9, D20[0]
+ VEXT.8 D7, D7, D20, #2
+ CMP r8, #80
+ STRH r9, [r10] @ yy[i]
+ STRH r9, [r2], #2 @ y[i]
+
+ BLT SYN_LOOP
+
+ @ update mem[]
+ ADD r5, r13, #160 @ yy[64] address
+ VLD1.S16 {D0, D1, D2, D3}, [r5]!
+ VST1.S16 {D0, D1, D2, D3}, [r3]!
+
+Syn_filt_asm_end:
+
+ ADD r13, r13, #700
+ LDMFD r13!, {r4 - r12, r15}
+ @ENDFUNC
+ .END
+
+