11 files changed, 1660 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s
new file mode 100644
index 0000000..acb60c3
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Deemph_32_neon.s
@@ -0,0 +1,102 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@void Deemph_32(
+@     Word16 x_hi[],                        /* (i)     : input signal (bit31..16) */
+@     Word16 x_lo[],                        /* (i)     : input signal (bit15..4)  */
+@     Word16 y[],                           /* (o)     : output signal (x16)      */
+@     Word16 mu,                            /* (i) Q15 : deemphasis factor        */
+@     Word16 L,                             /* (i)     : vector size              */
+@     Word16 * mem                          /* (i/o)   : memory (y[-1])           */
+@     )
+
+@x_hi     RN      R0
+@x_lo     RN      R1
+@y[]      RN      R2
+@*mem     RN      R3
+
+           .section  .text
+           .global   Deemph_32_asm
+	   
+Deemph_32_asm:
+
+           STMFD   	r13!, {r4 - r12, r14} 
+	   MOV          r4, #2                   @i=0
+	   LDRSH        r6, [r0], #2             @load x_hi[0]
+	   LDRSH        r7, [r1], #2             @load x_lo[0]
+	   LDR          r5, =22282               @r5---mu
+	   MOV          r11, #0x8000
+
+           @y[0]
+	   MOV          r10, r6, LSL #16         @L_tmp = x_hi[0]<<16
+	   MOV          r8,  r5, ASR #1          @fac = mu >> 1
+	   LDR          r5,  [r3]
+	   ADD          r12, r10, r7, LSL #4     @L_tmp += x_lo[0] << 4
+	   MOV          r10, r12, LSL #3         @L_tmp <<= 3
+	   MUL          r9, r5, r8
+	   LDRSH        r6, [r0], #2             @load x_hi[1] 
+	   QDADD        r10, r10, r9
+	   LDRSH        r7, [r1], #2             @load x_lo[1]  
+	   MOV          r12, r10, LSL #1         @L_tmp = L_mac(L_tmp, *mem, fac)
+	   QADD         r10, r12, r11
+	   MOV          r14, r10, ASR #16        @y[0] = round(L_tmp)
+
+
+	   MOV          r10, r6, LSL #16
+	   ADD          r12, r10, r7, LSL #4
+           STRH         r14, [r2], #2            @update y[0]
+	   MOV          r10, r12, LSL #3
+	   MUL          r9, r14, r8
+	   QDADD        r10, r10, r9
+	   MOV          r12, r10, LSL #1
+	   QADD         r10, r12, r11
+	   MOV          r14, r10, ASR #16        @y[1] = round(L_tmp)
+
+LOOP:
+           LDRSH        r6, [r0], #2             @load x_hi[]
+	   LDRSH        r7, [r1], #2
+	   STRH         r14, [r2], #2
+	   MOV          r10, r6, LSL #16
+	   ADD          r12, r10, r7, LSL #4
+	   MUL          r9, r14, r8
+	   MOV          r10, r12, LSL #3
+	   QDADD        r10, r10, r9
+           LDRSH        r6, [r0], #2             @load x_hi[]
+	   MOV          r12, r10, LSL #1
+	   QADD         r10, r12, r11
+	   LDRSH        r7, [r1], #2
+	   MOV          r14, r10, ASR #16
+
+	   MOV          r10, r6, LSL #16
+	   ADD          r12, r10, r7, LSL #4
+	   STRH         r14, [r2], #2
+	   MUL          r9, r14, r8
+	   MOV          r10, r12, LSL #3
+	   QDADD        r10, r10, r9
+           ADD          r4, r4, #2
+	   MOV          r12, r10, LSL #1
+	   QADD         r10, r12, r11
+           CMP          r4, #64
+	   MOV          r14, r10, ASR #16
+
+           BLT          LOOP
+           STR          r14, [r3]
+           STRH         r14, [r2]	   
+
+           LDMFD   	r13!, {r4 - r12, r15} 
+
+	   .END
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s
new file mode 100644
index 0000000..07ca344
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s
@@ -0,0 +1,127 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@**********************************************************************/
+@Word32 Dot_product12(                      /* (o) Q31: normalized result (1 < val <= -1) */
+@       Word16 x[],                           /* (i) 12bits: x vector                       */
+@       Word16 y[],                           /* (i) 12bits: y vector                       */
+@       Word16 lg,                            /* (i)    : vector length                     */
+@       Word16 * exp                          /* (o)    : exponent of result (0..+30)       */
+@)
+@************************************************************************
+@  x[]   ---  r0
+@  y[]   ---  r1
+@  lg    ---  r2
+@  *exp  ---  r3
+
+          .section   .text
+          .global    Dot_product12_asm
+
+Dot_product12_asm:
+
+          STMFD   	    r13!, {r4 - r12, r14}
+	  CMP               r0, r1
+	  BEQ               LOOP_EQ
+
+          VLD1.S16          {Q0, Q1}, [r0]!               @load 16 Word16 x[]
+          VLD1.S16          {Q2, Q3}, [r0]!               @load 16 Word16 x[]
+          VLD1.S16          {Q4, Q5}, [r0]!               @load 16 Word16 x[]
+          VLD1.S16          {Q6, Q7}, [r0]!               @load 16 Word16 x[]
+	  VLD1.S16          {Q8, Q9}, [r1]!               @load 16 Word16 y[]
+	  VLD1.S16          {Q10, Q11}, [r1]!             @load 16 Word16 y[]
+	  VLD1.S16          {Q12, Q13}, [r1]!             @load 16 Word16 y[]
+
+          VMULL.S16         Q15, D16, D0
+          VMLAL.S16         Q15, D17, D1               
+          VMLAL.S16         Q15, D18, D2
+          VMLAL.S16         Q15, D19, D3
+	  VLD1.S16          {Q0, Q1}, [r1]!               @load 16 Word16 y[]   
+          VMLAL.S16         Q15, D20, D4       
+          VMLAL.S16         Q15, D21, D5
+          VMLAL.S16         Q15, D22, D6
+          VMLAL.S16         Q15, D23, D7                                       
+          VMLAL.S16         Q15, D24, D8
+          VMLAL.S16         Q15, D25, D9
+          VMLAL.S16         Q15, D26, D10
+          VMLAL.S16         Q15, D27, D11
+          VMLAL.S16         Q15, D0, D12
+          VMLAL.S16         Q15, D1, D13
+          VMLAL.S16         Q15, D2, D14
+          VMLAL.S16         Q15, D3, D15
+
+          CMP               r2, #64
+          BEQ               Lable1
+          VLD1.S16          {Q0, Q1}, [r0]!               @load 16 Word16 x[]  
+	  VLD1.S16          {Q2, Q3}, [r1]! 
+          VMLAL.S16         Q15, D4, D0             
+          VMLAL.S16         Q15, D5, D1
+          VMLAL.S16         Q15, D6, D2
+          VMLAL.S16         Q15, D7, D3
+	  BL                Lable1
+
+LOOP_EQ:
+          VLD1.S16          {Q0, Q1}, [r0]!
+	  VLD1.S16          {Q2, Q3}, [r0]!
+	  VLD1.S16          {Q4, Q5}, [r0]!
+	  VLD1.S16          {Q6, Q7}, [r0]!
+	  VMULL.S16         Q15, D0, D0
+	  VMLAL.S16         Q15, D1, D1
+	  VMLAL.S16         Q15, D2, D2
+	  VMLAL.S16         Q15, D3, D3
+	  VMLAL.S16         Q15, D4, D4
+	  VMLAL.S16         Q15, D5, D5
+	  VMLAL.S16         Q15, D6, D6
+	  VMLAL.S16         Q15, D7, D7
+	  VMLAL.S16         Q15, D8, D8
+	  VMLAL.S16         Q15, D9, D9
+	  VMLAL.S16         Q15, D10, D10
+	  VMLAL.S16         Q15, D11, D11
+	  VMLAL.S16         Q15, D12, D12
+	  VMLAL.S16         Q15, D13, D13
+	  VMLAL.S16         Q15, D14, D14
+	  VMLAL.S16         Q15, D15, D15
+
+	  CMP               r2, #64
+	  BEQ               Lable1
+	  VLD1.S16          {Q0, Q1}, [r0]!
+	  VMLAL.S16         Q15, D0, D0
+	  VMLAL.S16         Q15, D1, D1
+	  VMLAL.S16         Q15, D2, D2
+	  VMLAL.S16         Q15, D3, D3
+
+Lable1: 
+
+          VQADD.S32         D30, D30, D31
+          VPADD.S32         D30, D30, D30
+          VMOV.S32          r12, D30[0]        
+
+	  ADD               r12, r12, r12
+          ADD               r12, r12, #1                         @ L_sum = (L_sum << 1)  + 1
+	  MOV               r4, r12
+	  CMP               r12, #0
+	  RSBLT             r4, r12, #0
+          CLZ               r10, r4
+          SUB               r10, r10, #1                         @ sft = norm_l(L_sum)
+          MOV               r0, r12, LSL r10                     @ L_sum = L_sum << sft
+          RSB               r11, r10, #30                        @ *exp = 30 - sft
+          STRH              r11, [r3]                     
+
+Dot_product12_end:
+		     
+          LDMFD   	    r13!, {r4 - r12, r15} 
+
+          .END
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s
new file mode 100644
index 0000000..1880024
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Filt_6k_7k_neon.s
@@ -0,0 +1,228 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@                             
+@**********************************************************************/
+@void Filt_6k_7k(
+@     Word16 signal[],                      /* input:  signal                  */
+@     Word16 lg,                            /* input:  length of input         */
+@     Word16 mem[]                          /* in/out: memory (size=30)        */
+@)
+@***********************************************************************
+@ r0    ---  signal[]
+@ r1    ---  lg
+@ r2    ---  mem[] 
+
+          .section  .text
+          .global   Filt_6k_7k_asm
+          .extern   fir_6k_7k
+
+Filt_6k_7k_asm:
+
+          STMFD   		r13!, {r0 - r12, r14} 
+          SUB    		r13, r13, #240              @ x[L_SUBFR16k + (L_FIR - 1)]
+          MOV     		r8, r0                      @ copy signal[] address
+          MOV     		r5, r2                      @ copy mem[] address
+
+          MOV     		r0, r2
+          MOV     		r1, r13
+
+	  VLD1.S16              {D0, D1, D2, D3}, [r0]!
+	  VLD1.S16              {D4, D5, D6, D7}, [r0]!
+
+	  VST1.S16              {D0, D1, D2, D3}, [r1]!
+	  VST1.S16              {D4, D5, D6}, [r1]!
+	  VST1.S16              D7[0], [r1]!
+	  VST1.S16              D7[1], [r1]!
+
+
+
+          LDR     		r10, Lable1                 @ get fir_7k address     
+          MOV                   r3, r8                      @ change myMemCopy to Copy, due to Copy will change r3 content
+          ADD     	    	r6, r13, #60                @ get x[L_FIR - 1] address
+          MOV           	r7, r3                      @ get signal[i]
+          @for (i = lg - 1@ i >= 0@ i--)
+          @{
+          @     x[i + L_FIR - 1] = signal[i] >> 2@
+          @}
+	  VLD1.S16              {Q0, Q1}, [r7]!		    @ signal[0]  ~ signal[15]
+	  VLD1.S16              {Q2, Q3}, [r7]!             @ signal[16] ~ signal[31]
+          VLD1.S16              {Q4, Q5}, [r7]!             @ signal[32] ~ signal[47]
+	  VLD1.S16              {Q6, Q7}, [r7]!             @ signal[48] ~ signal[63]
+	  VLD1.S16              {Q8, Q9}, [r7]!             @ signal[64] ~ signal[79]
+	  VSHR.S16              Q10, Q0, #2
+          VSHR.S16              Q11, Q1, #2
+          VSHR.S16              Q12, Q2, #2
+	  VSHR.S16              Q13, Q3, #2
+	  VST1.S16              {Q10, Q11}, [r6]!
+	  VSHR.S16              Q0,  Q4, #2
+	  VSHR.S16              Q1,  Q5, #2
+	  VSHR.S16              Q10, Q6, #2
+	  VSHR.S16              Q11, Q7, #2
+	  VSHR.S16              Q2,  Q8, #2
+	  VSHR.S16              Q3,  Q9, #2
+	  VST1.S16              {Q12, Q13}, [r6]!
+	  VST1.S16              {Q0, Q1}, [r6]!
+	  VST1.S16              {Q10, Q11}, [r6]!
+	  VST1.S16              {Q2, Q3}, [r6]!
+
+	  MOV                   r12, r5
+          @STR     		r5, [sp, #-4]               @ PUSH  r5 to stack
+          @ not use registers: r4, r10, r12, r14, r5
+          MOV     		r4, r13 
+          MOV     		r5, #0                      @ i = 0    
+         
+          @ r4 --- x[i], r10 ---- fir_6k_7k
+          VLD1.S16              {Q0, Q1}, [r10]!           @fir_6k_7k[0]  ~ fir_6k_7k[15]
+	  VLD1.S16              {Q2, Q3}, [r10]!           @fir_6k_7k[16] ~ fir_6k_7k[31]
+          VMOV.S16              D7[3], r5                        @set fir_6k_7K = 0
+
+	  VLD1.S16              {Q4, Q5}, [r4]!            @x[0]  ~ x[15]
+	  VLD1.S16              {Q6, Q7}, [r4]!            @x[16] ~ X[31]
+	  VLD1.S16              {Q8}, [r4]! 
+          VMOV.S16              Q15, #0	  
+          
+LOOP_6K7K:
+
+          VMULL.S16             Q9,D8,D0[0]                 
+          VMULL.S16             Q10,D9,D1[0] 
+          VMULL.S16             Q11,D9,D0[0]                 
+          VMULL.S16             Q12,D10,D1[0]
+          VEXT.8                Q4,Q4,Q5,#2
+          VMLAL.S16             Q9,D10,D2[0]
+          VMLAL.S16             Q10,D11,D3[0]
+          VMLAL.S16             Q11,D11,D2[0]
+          VMLAL.S16             Q12,D12,D3[0]    
+          VEXT.8                Q5,Q5,Q6,#2
+          VMLAL.S16             Q9,D12,D4[0]
+          VMLAL.S16             Q10,D13,D5[0]
+          VMLAL.S16             Q11,D13,D4[0]
+          VMLAL.S16             Q12,D14,D5[0]
+          VEXT.8                Q6,Q6,Q7,#2
+          VMLAL.S16             Q9,D14,D6[0]
+          VMLAL.S16             Q10,D15,D7[0]
+          VMLAL.S16             Q11,D15,D6[0]
+	  VMLAL.S16             Q12,D16,D7[0]
+	  VEXT.8  		Q7,Q7,Q8,#2 
+
+	  VMLAL.S16 		Q9,D8,D0[1]                
+	  VMLAL.S16     	Q10,D9,D1[1]
+	  VEXT.8 		Q8,Q8,Q15,#2 
+	  VMLAL.S16 		Q11,D9,D0[1]                
+	  VMLAL.S16 		Q12,D10,D1[1]
+	  VEXT.8  		Q4,Q4,Q5,#2
+	  VMLAL.S16 		Q9,D10,D2[1]
+	  VMLAL.S16 		Q10,D11,D3[1]
+	  VMLAL.S16 		Q11,D11,D2[1]
+	  VMLAL.S16 		Q12,D12,D3[1]    
+	  VEXT.8  		Q5,Q5,Q6,#2
+	  VMLAL.S16 		Q9,D12,D4[1]
+	  VMLAL.S16 		Q10,D13,D5[1]
+	  VMLAL.S16 		Q11,D13,D4[1]
+	  VMLAL.S16 		Q12,D14,D5[1]
+	  VEXT.8  		Q6,Q6,Q7,#2
+	  VMLAL.S16 		Q9,D14,D6[1]
+	  VMLAL.S16 		Q10,D15,D7[1]
+	  VMLAL.S16 		Q11,D15,D6[1]
+	  VMLAL.S16 		Q12,D16,D7[1]
+	  VEXT.8  		Q7,Q7,Q8,#2 
+
+	  VMLAL.S16 		Q9,D8,D0[2]           
+	  VMLAL.S16 		Q10,D9,D1[2]
+	  VEXT.8 		Q8,Q8,Q15,#2 
+	  VMLAL.S16 		Q11,D9,D0[2]           
+	  VMLAL.S16 		Q12,D10,D1[2]
+	  VEXT.8  		Q4,Q4,Q5,#2
+	  VMLAL.S16 		Q9,D10,D2[2]
+	  VMLAL.S16 		Q10,D11,D3[2]
+	  VMLAL.S16 		Q11,D11,D2[2]
+	  VMLAL.S16 		Q12,D12,D3[2]    
+	  VEXT.8  		Q5,Q5,Q6,#2
+	  VMLAL.S16 		Q9,D12,D4[2]
+	  VMLAL.S16 		Q10,D13,D5[2]
+	  VMLAL.S16 		Q11,D13,D4[2]
+	  VMLAL.S16 		Q12,D14,D5[2]
+	  VEXT.8  		Q6,Q6,Q7,#2
+	  VMLAL.S16 		Q9,D14,D6[2]
+	  VMLAL.S16 		Q10,D15,D7[2]
+	  VMLAL.S16 		Q11,D15,D6[2]
+	  VMLAL.S16 		Q12,D16,D7[2]
+	  VEXT.8  		Q7,Q7,Q8,#2 
+
+	  VMLAL.S16 		Q9,D8,D0[3]              
+	  VMLAL.S16 		Q10,D9,D1[3]
+	  VEXT.8 		Q8,Q8,Q15,#2 
+	  VMLAL.S16 		Q11,D9,D0[3]              
+	  VMLAL.S16 		Q12,D10,D1[3]
+	  VEXT.8  		Q4,Q4,Q5,#2
+	  VMLAL.S16 		Q9,D10,D2[3]
+	  VMLAL.S16 		Q10,D11,D3[3]
+	  VMLAL.S16 		Q11,D11,D2[3]
+	  VMLAL.S16 		Q12,D12,D3[3]    
+	  VEXT.8  		Q5,Q5,Q6,#2
+	  VMLAL.S16 		Q9,D12,D4[3]
+	  VMLAL.S16 		Q10,D13,D5[3]
+	  VMLAL.S16 		Q11,D13,D4[3]
+	  VMLAL.S16 		Q12,D14,D5[3]
+	  VEXT.8  		Q6,Q6,Q7,#2
+	  VMLAL.S16 		Q9,D14,D6[3]
+	  VMLAL.S16 		Q10,D15,D7[3]
+	  VMLAL.S16 		Q11,D15,D6[3]
+	  VMLAL.S16 		Q12,D16,D7[3]
+	  VEXT.8 		Q7,Q7,Q8,#2     
+
+	  VMOV.S16  		D8,D9
+	  VEXT.8 		Q8,Q8,Q15,#2 
+	  VMOV.S16  		D9,D10
+	  VADD.S32  		Q9,Q9,Q10
+	  VMOV.S16  		D10,D11
+	  VMOV.S16  		D11,D12
+	  VADD.S32  		Q11,Q11,Q12
+	  VMOV.S16  		D12,D13
+	  VQRSHRN.S32 		D28,Q9,#15
+	  VMOV.S16  		D13,D14
+	  VMOV.S16  		D14,D15
+	  VQRSHRN.S32 		D29,Q11,#15
+	  VMOV.S16  		D15,D16
+
+	  VLD1.S16  		{Q8},[r4]!
+	  ADD                   r5, r5, #8
+	  CMP   		r5, #80
+	  VST1.S16  		{D28,D29},[r3]!
+	  BLT     		LOOP_6K7K
+
+          ADD     		r0, r13, #160               @x + lg
+	  MOV                   r1, r12
+	  @LDR     		r1, [sp, #-4]               @mem address
+
+	  VLD1.S16              {D0, D1, D2, D3}, [r0]!
+	  VLD1.S16              {D4, D5, D6, D7}, [r0]!
+
+	  VST1.S16              {D0, D1, D2, D3}, [r1]!
+	  VST1.S16              {D4, D5, D6}, [r1]!
+	  VST1.S16              D7[0], [r1]!
+	  VST1.S16              D7[1], [r1]!
+                    
+Filt_6k_7k_end:
+
+          ADD     		r13, r13, #240  
+          LDMFD   		r13!, {r0 - r12, r15} 
+ 
+Lable1:
+          .word   		fir_6k_7k
+          @ENDFUNC
+          .END
+
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s
new file mode 100644
index 0000000..60e9ade
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Norm_Corr_neon.s
@@ -0,0 +1,270 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@static void Norm_Corr (Word16 exc[],                    /* (i)     : excitation buffer          */
+@                       Word16 xn[],                     /* (i)     : target vector              */
+@                       Word16 h[],                      /* (i) Q15 : impulse response of synth/wgt filters */
+@                       Word16 L_subfr,                  /* (i)     : sub-frame length */
+@                       Word16 t_min,                    /* (i)     : minimum value of pitch lag.   */
+@                       Word16 t_max,                    /* (i)     : maximum value of pitch lag.   */
+@                       Word16 corr_norm[])              /* (o) Q15 : normalized correlation    */
+@
+
+@ r0 --- exc[]
+@ r1 --- xn[]
+@ r2 --- h[]
+@ r3 --- L_subfr
+@ r4 --- t_min
+@ r5 --- t_max
+@ r6 --- corr_norm[]
+
+
+	.section  .text
+        .global    Norm_corr_asm 
+        .extern    Convolve_asm
+        .extern    Isqrt_n
+@******************************
+@ constant
+@******************************
+.equ    EXC               , 0
+.equ    XN                , 4
+.equ    H                 , 8
+.equ    L_SUBFR           , 12
+.equ    voSTACK           , 172
+.equ    T_MIN             , 212
+.equ    T_MAX             , 216
+.equ    CORR_NORM         , 220
+                  
+Norm_corr_asm:
+
+        STMFD          r13!, {r4 - r12, r14}  
+        SUB            r13, r13, #voSTACK
+  
+        ADD            r8, r13, #20                 @get the excf[L_SUBFR]
+        LDR            r4, [r13, #T_MIN]            @get t_min
+        RSB            r11, r4, #0                  @k = -t_min
+        ADD            r5, r0, r11, LSL #1          @get the &exc[k]   
+        
+        @transfer Convolve function
+        STMFD          sp!, {r0 - r3}
+        MOV            r0, r5
+        MOV            r1, r2
+        MOV            r2, r8                       @r2 --- excf[]
+        BL             Convolve_asm
+        LDMFD          sp!, {r0 - r3}
+
+        @ r8 --- excf[]
+
+	MOV            r14, r1                       @copy xn[] address                      
+        MOV            r7, #1
+	VLD1.S16       {Q0, Q1}, [r14]!
+	VLD1.S16       {Q2, Q3}, [r14]!
+	VLD1.S16       {Q4, Q5}, [r14]!
+	VLD1.S16       {Q6, Q7}, [r14]!
+
+        VMULL.S16      Q10, D0, D0
+        VMLAL.S16      Q10, D1, D1
+        VMLAL.S16      Q10, D2, D2
+        VMLAL.S16      Q10, D3, D3
+        VMLAL.S16      Q10, D4, D4
+        VMLAL.S16      Q10, D5, D5
+        VMLAL.S16      Q10, D6, D6
+        VMLAL.S16      Q10, D7, D7
+        VMLAL.S16      Q10, D8, D8
+        VMLAL.S16      Q10, D9, D9
+	VMLAL.S16      Q10, D10, D10
+	VMLAL.S16      Q10, D11, D11
+	VMLAL.S16      Q10, D12, D12
+	VMLAL.S16      Q10, D13, D13
+	VMLAL.S16      Q10, D14, D14
+	VMLAL.S16      Q10, D15, D15
+
+        VQADD.S32      D20, D20, D21
+        VMOV.S32       r9,  D20[0]
+        VMOV.S32       r10, D20[1]
+        QADD           r6, r9, r10 
+	QADD           r6, r6, r6
+        QADD           r9, r6, r7                   @L_tmp = (L_tmp << 1) + 1;
+	CLZ            r7, r9
+	SUB            r6, r7, #1                   @exp = norm_l(L_tmp)
+        RSB            r7, r6, #32                  @exp = 32 - exp
+	MOV            r6, r7, ASR #1         
+	RSB            r7, r6, #0                   @scale = -(exp >> 1)
+	
+        @loop for every possible period
+	@for(t = t_min@ t <= t_max@ t++)
+	@r7 --- scale r4 --- t_min r8 --- excf[]
+
+LOOPFOR:	
+	ADD            r14, r13, #20                @copy of excf[]
+	MOV            r12, r1                      @copy of xn[]
+	MOV            r8, #0x8000
+
+        VLD1.S16       {Q0, Q1}, [r14]!                 @ load 16 excf[]
+        VLD1.S16       {Q2, Q3}, [r14]!                 @ load 16 excf[]       
+        VLD1.S16       {Q4, Q5}, [r12]!                 @ load 16 x[]
+	VLD1.S16       {Q6, Q7}, [r12]!                 @ load 16 x[]
+        VMULL.S16    Q10, D0, D0                      @L_tmp1 += excf[] * excf[]
+        VMULL.S16    Q11, D0, D8                      @L_tmp  += x[] * excf[]                   
+        VMLAL.S16    Q10, D1, D1
+        VMLAL.S16    Q11, D1, D9
+        VMLAL.S16    Q10, D2, D2
+        VMLAL.S16    Q11, D2, D10        
+        VMLAL.S16    Q10, D3, D3
+        VMLAL.S16    Q11, D3, D11
+        VMLAL.S16    Q10, D4, D4
+        VMLAL.S16    Q11, D4, D12
+        VMLAL.S16    Q10, D5, D5
+        VMLAL.S16    Q11, D5, D13
+        VMLAL.S16    Q10, D6, D6
+        VMLAL.S16    Q11, D6, D14
+        VMLAL.S16    Q10, D7, D7
+        VMLAL.S16    Q11, D7, D15
+
+	VLD1.S16       {Q0, Q1}, [r14]!                 @ load 16 excf[]
+        VLD1.S16       {Q2, Q3}, [r14]!                 @ load 16 excf[]
+        VLD1.S16       {Q4, Q5}, [r12]!                 @ load 16 x[]
+        VLD1.S16       {Q6, Q7}, [r12]!                 @ load 16 x[]
+        VMLAL.S16    Q10, D0, D0
+        VMLAL.S16    Q11, D0, D8
+        VMLAL.S16    Q10, D1, D1
+        VMLAL.S16    Q11, D1, D9
+        VMLAL.S16    Q10, D2, D2
+        VMLAL.S16    Q11, D2, D10        
+        VMLAL.S16    Q10, D3, D3
+        VMLAL.S16    Q11, D3, D11
+        VMLAL.S16    Q10, D4, D4
+        VMLAL.S16    Q11, D4, D12
+        VMLAL.S16    Q10, D5, D5
+        VMLAL.S16    Q11, D5, D13
+        VMLAL.S16    Q10, D6, D6
+        VMLAL.S16    Q11, D6, D14
+        VMLAL.S16    Q10, D7, D7
+        VMLAL.S16    Q11, D7, D15
+
+        VQADD.S32      D20, D20, D21
+        VQADD.S32      D22, D22, D23
+
+	VPADD.S32      D20, D20, D20                   @D20[0] --- L_tmp1 << 1
+	VPADD.S32      D22, D22, D22                   @D22[0] --- L_tmp << 1
+
+	VMOV.S32       r6, D20[0]
+        VMOV.S32       r5, D22[0]	
+
+	@r5 --- L_tmp, r6 --- L_tmp1
+	MOV            r10, #1
+	ADD            r5, r10, r5, LSL #1                     @L_tmp = (L_tmp << 1) + 1
+	ADD            r6, r10, r6, LSL #1                     @L_tmp1 = (L_tmp1 << 1) + 1
+ 
+	CLZ            r10, r5        
+	CMP            r5, #0
+	RSBLT          r11, r5, #0
+	CLZLT          r10, r11
+	SUB            r10, r10, #1                 @exp = norm_l(L_tmp)
+     
+	MOV            r5, r5, LSL r10              @L_tmp = (L_tmp << exp)
+	RSB            r10, r10, #30                @exp_corr = 30 - exp
+	MOV            r11, r5, ASR #16             @corr = extract_h(L_tmp)
+
+	CLZ            r5, r6
+	SUB            r5, r5, #1
+	MOV            r6, r6, LSL r5               @L_tmp = (L_tmp1 << exp)
+	RSB            r5, r5, #30                  @exp_norm = 30 - exp
+
+	@r10 --- exp_corr, r11 --- corr
+	@r6  --- L_tmp, r5 --- exp_norm
+
+	@Isqrt_n(&L_tmp, &exp_norm)
+
+	MOV            r14, r0
+	MOV            r12, r1 
+
+        STMFD          sp!, {r0 - r4, r7 - r12, r14}
+	ADD            r1, sp, #4
+	ADD            r0, sp, #0
+	STR            r6, [sp]
+	STRH           r5, [sp, #4]
+	BL             Isqrt_n
+	LDR            r6, [sp]
+	LDRSH          r5, [sp, #4]
+        LDMFD          sp!, {r0 - r4, r7 - r12, r14}
+	MOV            r0, r14
+	MOV            r1, r12
+
+
+	MOV            r6, r6, ASR #16              @norm = extract_h(L_tmp)
+	MUL            r12, r6, r11
+	ADD            r12, r12, r12                @L_tmp = vo_L_mult(corr, norm)
+  
+	ADD            r6, r10, r5
+	ADD            r6, r6, r7                   @exp_corr + exp_norm + scale
+
+        CMP            r6, #0
+        RSBLT          r6, r6, #0
+	MOVLT          r12, r12, ASR r6
+        MOVGT          r12, r12, LSL r6             @L_tmp = L_shl(L_tmp, exp_corr + exp_norm + scale)
+
+        ADD            r12, r12, r8
+        MOV            r12, r12, ASR #16            @vo_round(L_tmp)
+
+        LDR            r5, [r13, #CORR_NORM]        @ get corr_norm address
+	LDR            r6, [r13, #T_MAX]            @ get t_max
+	ADD            r10, r5, r4, LSL #1          @ get corr_norm[t] address
+	STRH           r12, [r10]                   @ corr_norm[t] = vo_round(L_tmp)
+
+	CMP            r4, r6
+	BEQ            Norm_corr_asm_end
+ 
+	ADD            r4, r4, #1                   @ t_min ++ 
+	RSB            r5, r4, #0                   @ k
+
+	MOV            r6, #63                      @ i = 63
+	MOV            r8, r0                       @ exc[]
+	MOV            r9, r2                       @ h[]
+	ADD            r10, r13, #20                @ excf[]
+
+	ADD            r8, r8, r5, LSL #1           @ exc[k] address
+	ADD            r9, r9, r6, LSL #1           @ h[i] address
+	ADD            r10, r10, r6, LSL #1         @ excf[i] address
+	LDRSH          r11, [r8]                    @ tmp = exc[k]
+
+LOOPK:
+        LDRSH          r8, [r9], #-2                @ load h[i]
+	LDRSH          r12, [r10, #-2]              @ load excf[i - 1]
+	MUL            r14, r11, r8
+	MOV            r8, r14, ASR #15
+	ADD            r14, r8, r12
+	STRH           r14, [r10], #-2
+	SUBS           r6, r6, #1
+	BGT            LOOPK
+
+	LDRSH          r8, [r9]                     @ load h[0]
+	MUL            r14, r11, r8
+        LDR            r6, [r13, #T_MAX]            @ get t_max
+	MOV            r8, r14, ASR #15
+	STRH           r8, [r10]                    
+
+	CMP            r4, r6
+	BLE            LOOPFOR
+
+Norm_corr_asm_end: 
+        
+        ADD            r13, r13, #voSTACK      
+        LDMFD          r13!, {r4 - r12, r15}
+    
+        .END
+
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s
new file mode 100644
index 0000000..cb1764f
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s
@@ -0,0 +1,133 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@**********************************************************************/
+@void Syn_filt_32(
+@     Word16 a[],                           /* (i) Q12 : a[m+1] prediction coefficients */
+@     Word16 m,                             /* (i)     : order of LP filter             */
+@     Word16 exc[],                         /* (i) Qnew: excitation (exc[i] >> Qnew)    */
+@     Word16 Qnew,                          /* (i)     : exc scaling = 0(min) to 8(max) */
+@     Word16 sig_hi[],                      /* (o) /16 : synthesis high                 */
+@     Word16 sig_lo[],                      /* (o) /16 : synthesis low                  */
+@     Word16 lg                             /* (i)     : size of filtering              */
+@)
+@***********************************************************************
+@ a[]      --- r0
+@ m        --- r1
+@ exc[]    --- r2
+@ Qnew     --- r3
+@ sig_hi[] --- r4
+@ sig_lo[] --- r5
+@ lg       --- r6
+
+          .section  .text 
+          .global   Syn_filt_32_asm
+
+Syn_filt_32_asm:
+
+          STMFD   	r13!, {r4 - r12, r14} 
+          LDR           r4,  [r13, #40]                  @ get sig_hi[] address
+          LDR           r5,  [r13, #44]                  @ get sig_lo[] address
+
+          LDRSH         r6,  [r0], #2                    @ load Aq[0]
+          ADD           r7,  r3, #4                      @ 4 + Q_new
+          MOV           r3, r6, ASR r7                   @ a0 = Aq[0] >> (4 + Q_new)
+
+	  SUB           r10, r4, #32                     @ sig_hi[-16] address
+	  SUB           r11, r5, #32                     @ sig_lo[-16] address
+
+	  VLD1.S16      {D0, D1, D2, D3}, [r0]!          @a[1] ~ a[16] 
+  
+          MOV           r8, #0                           @ i = 0
+
+	  VLD1.S16      {D4, D5, D6, D7}, [r10]!         @ sig_hi[-16] ~ sig_hi[-1]
+          VREV64.16     D0, D0
+          VREV64.16     D1, D1
+	  VLD1.S16      {D8, D9, D10, D11}, [r11]!       @ sig_lo[-16] ~ sig_lo[-1]
+          VREV64.16     D2, D2
+          VREV64.16     D3, D3	
+          VDUP.S32      Q15, r8
+              
+SYN_LOOP:
+
+          LDRSH         r6, [r2], #2                     @exc[i]
+	  @L_tmp = L_msu(L_tmp, sig_lo[i - j], a[j])@
+	  VMULL.S16     Q10, D8, D3
+	  VEXT.8        D8, D8, D9, #2
+	  VMLAL.S16     Q10, D9, D2
+	  VMLAL.S16     Q10, D10, D1
+	  VMLAL.S16     Q10, D11, D0
+
+	  VEXT.8        D9, D9, D10, #2
+	  VEXT.8        D10, D10, D11, #2
+	  
+	  VPADD.S32     D28, D20, D21
+          MUL           r12, r6, r3                      @exc[i] * a0
+	  VPADD.S32     D29, D28, D28
+	  VDUP.S32      Q10, D29[0]                      @result1
+          
+	  VMULL.S16     Q11, D4, D3
+	  VMLAL.S16     Q11, D5, D2
+          VSUB.S32      Q10, Q15, Q10
+	  @L_tmp = L_msu(L_tmp, sig_hi[i - j], a[j])@
+
+	  VMLAL.S16     Q11, D6, D1
+	  VEXT.8        D4, D4, D5, #2
+	  VMLAL.S16     Q11, D7, D0
+
+
+	  VEXT.8        D5, D5, D6, #2
+	  VEXT.8        D6, D6, D7, #2
+
+	  VPADD.S32     D28, D22, D23
+          VPADD.S32     D29, D28, D28
+          MOV           r14, r12, LSL #1                 @exc[i] * a0 << 1
+          VDUP.S32      Q11, D29[0]                      @result2
+
+
+
+	  VSHR.S32      Q10, Q10, #11                    @result1 >>= 11
+	  VSHL.S32      Q11, Q11, #1                     @result2 <<= 1
+	  VDUP.S32      Q12, r14                         
+	  VADD.S32      Q12, Q12, Q10                    @L_tmp = L_tmp - (result1 >>= 11) - (result2 <<= 1)
+	  VSUB.S32      Q12, Q12, Q11
+
+	  VSHL.S32      Q12, Q12, #3                     @L_tmp <<= 3
+
+
+	  VSHRN.S32     D20, Q12, #16                    @sig_hi[i] = L_tmp >> 16@
+	  VMOV.S16      r10, D20[0]
+	  VSHR.S32      Q12, Q12, #4                     @L_tmp >>= 4
+	  VEXT.8        D7, D7, D20, #2
+	  STRH          r10, [r4], #2                    @store sig_hi[i]
+          VMOV.S32      r11, D24[0]                      @r11 --- L_tmp >>= 4
+	  ADD           r8, r8, #1
+	  SUB           r12, r11, r10, LSL #12
+	  @MOV           r11, r12, ASR #16                @sig_lo[i]
+	  VDUP.S16      D21, r12
+	  VEXT.8        D11, D11, D21, #2
+	  STRH          r12, [r5], #2                    @stroe sig_lo[i]
+
+          CMP           r8, #64
+          BLT           SYN_LOOP                          
+         
+Syn_filt_32_end:
+		     
+          LDMFD   	    r13!, {r4 - r12, r15} 
+          @ENDFUNC
+          .END
+ 
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s
new file mode 100644
index 0000000..189e33b
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s
@@ -0,0 +1,178 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@*void Convolve (
+@*    Word16 x[],        /* (i)     : input vector                           */
+@*    Word16 h[],        /* (i)     : impulse response                       */
+@*    Word16 y[],        /* (o)     : output vector                          */
+@*    Word16 L           /* (i)     : vector size                            */
+@*)
+@ 
+@ r0 --- x[]
+@ r1 --- h[]
+@ r2 --- y[]
+@ r3 --- L
+
+	.section  .text 
+        .global   Convolve_asm 
+
+Convolve_asm:
+
+        STMFD          r13!, {r4 - r12, r14}  
+        MOV            r3,  #0                        
+	MOV            r11, #0x8000
+        
+LOOP: 
+        @MOV            r8, #0                            @ s = 0
+        ADD            r4, r1, r3, LSL #1                @ tmpH address
+        ADD            r5, r3, #1                        @ i = n + 1
+        MOV            r6, r0
+        LDRSH          r9,  [r6], #2                     @ *tmpX++
+        LDRSH          r10, [r4]                         @ *tmpH--
+        SUB            r5, r5, #1
+        VMOV.S32       Q10, #0 
+        MUL            r8,  r9, r10 
+
+LOOP1:                    
+        CMP            r5, #0
+        BLE            L1
+        SUB            r4, r4, #8
+        MOV            r9, r4
+        VLD1.S16       D0, [r6]!   
+        VLD1.S16       D1, [r9]!
+        VREV64.16      D1, D1
+        SUBS           r5, r5, #4  
+        VMLAL.S16      Q10, D0, D1         
+        B              LOOP1    
+L1:                  
+        VADD.S32       D20, D20, D21
+        VPADD.S32      D20, D20, D20
+        VMOV.S32       r5, D20[0]
+        ADD            r5, r5, r8
+        ADD            r5, r11, r5, LSL #1
+        MOV            r5, r5, LSR #16                   @extract_h(s)
+        ADD            r3, r3, #1
+        STRH           r5, [r2], #2                      @y[n]
+
+
+        @MOV            r8, #0
+        ADD            r4, r1, r3, LSL #1                @tmpH address
+        ADD            r5, r3, #1
+        MOV            r6, r0
+        LDRSH          r9,  [r6], #2                     @ *tmpX++
+        LDRSH          r10, [r4], #-2                     
+        LDRSH          r12, [r6], #2
+        LDRSH          r14, [r4]
+
+        MUL            r8, r9, r10
+        SUB            r5, r5, #2
+        MLA            r8, r12, r14, r8
+        
+        VMOV.S32       Q10, #0
+LOOP2:
+        CMP            r5, #0
+        BLE            L2
+        SUB            r4, r4, #8
+        MOV            r9, r4
+        VLD1.S16       D0, [r6]!   
+        VLD1.S16       D1, [r9]!
+        SUBS           r5, r5, #4
+        VREV64.16      D1, D1
+        VMLAL.S16      Q10, D0, D1 
+        B              LOOP2
+L2:
+        VADD.S32       D20, D20, D21
+        VPADD.S32      D20, D20, D20
+        VMOV.S32       r5, D20[0]
+        ADD            r8, r8, r5
+        ADD            r8, r11, r8, LSL #1
+        MOV            r8, r8, LSR #16                   @extract_h(s)
+        ADD            r3, r3, #1  
+        STRH           r8, [r2], #2                      @y[n]
+
+
+        @MOV            r8, #0
+        ADD            r4, r1, r3, LSL #1
+        ADD            r5, r3, #1
+        MOV            r6, r0
+        LDRSH          r9,  [r6], #2
+        LDRSH          r10, [r4], #-2
+        LDRSH          r12, [r6], #2
+        LDRSH          r14, [r4], #-2
+        MUL            r8, r9, r10
+        LDRSH          r9,  [r6], #2
+        LDRSH          r10, [r4]
+        MLA            r8, r12, r14, r8 
+        SUB            r5, r5, #3
+        MLA            r8, r9, r10, r8
+
+        VMOV.S32       Q10, #0
+LOOP3:
+        CMP            r5, #0
+        BLE            L3
+        SUB            r4, r4, #8
+        MOV            r9, r4
+        VLD1.S16       D0, [r6]!   
+        VLD1.S16       D1, [r9]!
+        VREV64.16      D1, D1
+        SUBS           r5, r5, #4
+        VMLAL.S16      Q10, D0, D1 
+        B              LOOP3   
+
+L3:
+        VADD.S32       D20, D20, D21
+        VPADD.S32      D20, D20, D20
+        VMOV.S32       r5, D20[0]
+        ADD            r8, r8, r5
+        ADD            r8, r11, r8, LSL #1
+        MOV            r8, r8, LSR #16                   @extract_h(s)
+        ADD            r3, r3, #1
+        STRH           r8, [r2], #2                      @y[n]
+
+        ADD            r5, r3, #1                        @ i = n + 1
+        ADD            r4, r1, r5, LSL #1                @ tmpH address
+        MOV            r6, r0
+        VMOV.S32       Q10, #0
+LOOP4:                    
+        CMP            r5, #0
+        BLE            L4
+        SUB            r4, r4, #8
+        MOV            r9, r4
+        VLD1.S16       D0, [r6]!   
+        VLD1.S16       D1, [r9]!
+        VREV64.16      D1, D1
+        SUBS           r5, r5, #4  
+        VMLAL.S16      Q10, D0, D1         
+        B              LOOP4    
+L4:                  
+        VADD.S32       D20, D20, D21
+        VPADD.S32      D20, D20, D20
+        VMOV.S32       r5,  D20[0]
+        ADD            r5, r11, r5, LSL #1
+        MOV            r5, r5, LSR #16                   @extract_h(s)
+        ADD            r3, r3, #1
+        STRH           r5, [r2], #2                      @y[n]
+        
+        CMP            r3, #64
+        BLT            LOOP
+                
+Convolve_asm_end: 
+ 
+        LDMFD      r13!, {r4 - r12, r15}
+    
+        @ENDFUNC
+        .END
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s
new file mode 100644
index 0000000..2e339db
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/cor_h_vec_neon.s
@@ -0,0 +1,151 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@static void cor_h_vec_012(
+@		Word16 h[],                           /* (i) scaled impulse response                 */
+@		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
+@		Word16 track,                         /* (i) track to use                            */
+@		Word16 sign[],                        /* (i) sign vector                             */
+@		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
+@		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
+@		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
+@)
+@r0 ---- h[]
+@r1 ---- vec[]
+@r2 ---- track
+@r3 ---- sign[]
+@r4 ---- rrixix[][NB_POS]
+@r5 ---- cor_1[]
+@r6 ---- cor_2[]
+
+              .section .text 
+	      .global  cor_h_vec_012_asm
+
+cor_h_vec_012_asm:
+
+             STMFD         r13!, {r4 - r12, r14}
+	     LDR           r4, [r13, #40]                    @load rrixix[][NB_POS]
+	     ADD           r7, r4, r2, LSL #5                @r7 --- p0 = rrixix[track]
+             MOV           r4, #0                            @i=0
+
+	     @r0 --- h[], r1 --- vec[],  r2 --- pos
+	     @r3 --- sign[], r4 --- i, r7 --- p0
+
+LOOPi:
+             MOV           r5, #0                            @L_sum1 = 0
+	     MOV           r6, #0                            @L_sum2 = 0
+	     ADD           r9, r1, r2, LSL #1                @p2 = &vec[pos]
+	     MOV           r10, r0                           @p1 = h
+	     RSB           r11, r2, #62                      @j=62-pos
+
+LOOPj1:
+	     LDRSH         r12, [r10], #2  
+	     LDRSH         r8,  [r9], #2
+	     LDRSH         r14, [r9]
+	     SUBS          r11, r11, #1
+             MLA           r5, r12, r8, r5
+             MLA           r6, r12, r14, r6	 
+	     BGE           LOOPj1
+
+	     LDRSH         r12, [r10], #2                     @*p1++
+	     MOV           r6, r6, LSL #2                     @L_sum2 = (L_sum2 << 2)
+             MLA           r5, r12, r14, r5
+             MOV           r14, #0x8000
+             MOV           r5, r5, LSL #2                     @L_sum1 = (L_sum1 << 2)
+             ADD           r10, r6, r14         
+             ADD           r9, r5, r14
+             MOV           r5, r9, ASR #16
+             MOV           r6, r10, ASR #16
+             ADD           r9, r3, r2, LSL #1                 @address of sign[pos]
+             ADD           r8, r7, #32
+             LDRSH         r10, [r9], #2                 	  @sign[pos]
+	     LDRSH         r11, [r9]                          @sign[pos + 1]
+	     MUL           r12, r5, r10
+	     MUL           r14, r6, r11
+	     MOV           r5, r12, ASR #15
+	     MOV           r6, r14, ASR #15
+	     LDR           r9,  [r13, #44]                   
+	     LDR           r12, [r13, #48]
+             LDRSH         r10, [r7], #2                      @*p0++
+	     LDRSH         r11, [r8]                          @*p3++
+             ADD           r9, r9, r4, LSL #1
+	     ADD           r12, r12, r4, LSL #1
+	     ADD           r5, r5, r10
+	     ADD           r6, r6, r11
+	     STRH          r5, [r9]
+	     STRH          r6, [r12]
+
+             ADD           r2, r2, #4
+ 
+             MOV           r5, #0                            @L_sum1 = 0
+	     MOV           r6, #0                            @L_sum2 = 0
+	     ADD           r9, r1, r2, LSL #1                @p2 = &vec[pos]
+	     MOV           r10, r0                           @p1 = h
+	     RSB           r11, r2, #62                      @j=62-pos
+	     ADD           r4, r4, #1                        @i++
+
+LOOPj2:
+	     LDRSH         r12, [r10], #2  
+	     LDRSH         r8,  [r9], #2
+	     LDRSH         r14, [r9]
+	     SUBS          r11, r11, #1
+             MLA           r5, r12, r8, r5
+             MLA           r6, r12, r14, r6	 
+	     BGE           LOOPj2
+
+	     LDRSH         r12, [r10], #2                     @*p1++
+	     MOV           r6, r6, LSL #2                     @L_sum2 = (L_sum2 << 2)
+             MLA           r5, r12, r14, r5
+             MOV           r14, #0x8000
+             MOV           r5, r5, LSL #2                     @L_sum1 = (L_sum1 << 2)
+             ADD           r10, r6, r14        
+             ADD           r9, r5, r14
+
+             MOV           r5, r9, ASR #16
+             MOV           r6, r10, ASR #16
+             ADD           r9, r3, r2, LSL #1                 @address of sign[pos]
+             ADD           r8, r7, #32
+             LDRSH         r10, [r9], #2                 	  @sign[pos]
+	     LDRSH         r11, [r9]                          @sign[pos + 1]
+	     MUL           r12, r5, r10
+	     MUL           r14, r6, r11
+	     MOV           r5, r12, ASR #15
+	     MOV           r6, r14, ASR #15
+	     LDR           r9,  [r13, #44]                   
+	     LDR           r12, [r13, #48]
+             LDRSH         r10, [r7], #2                      @*p0++
+	     LDRSH         r11, [r8]                          @*p3++
+             ADD           r9, r9, r4, LSL #1
+	     ADD           r12, r12, r4, LSL #1
+	     ADD           r5, r5, r10
+	     ADD           r6, r6, r11
+	     STRH          r5, [r9]
+	     STRH          r6, [r12]
+	     ADD           r4, r4, #1                         @i+1
+	     ADD           r2, r2, #4                         @pos += STEP
+	     CMP           r4, #16
+	     
+	     BLT           LOOPi
+         
+the_end:
+             LDMFD         r13!, {r4 - r12, r15}
+             
+	     .END	 
+        
+	
+	  
+
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s
new file mode 100644
index 0000000..3b8853f
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/pred_lt4_1_neon.s
@@ -0,0 +1,100 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@void Pred_lt4(
+@     Word16 exc[],                         /* in/out: excitation buffer */
+@     Word16 T0,                            /* input : integer pitch lag */
+@     Word16 frac,                          /* input : fraction of lag   */
+@     Word16 L_subfr                        /* input : subframe size     */
+@)
+@***********************************************************************
+@ r0    ---  exc[]
+@ r1    ---  T0
+@ r2    ---  frac
+@ r3    ---  L_subfr
+ 
+          .section  .text 
+          .global   pred_lt4_asm
+          .extern   inter4_2
+
+pred_lt4_asm:
+
+          STMFD   	r13!, {r4 - r12, r14} 
+          SUB           r4, r0, r1, LSL #1                        @ x = exc - T0
+          RSB           r2, r2, #0                                @ frac = - frac
+          SUB           r4, r4, #30                               @ x -= L_INTERPOL2 - 1
+          CMP           r2, #0
+          ADDLT         r2, r2, #4                                @ frac += UP_SAMP
+          SUBLT         r4, r4, #2                                @ x--
+
+          LDR           r11, Lable1
+          RSB           r2, r2, #3                                @ k = UP_SAMP - 1 - frac
+          MOV           r8, #0                                    @ j = 0
+	  ADD           r11, r11, r2, LSL #6                      @ get inter4_2[k][]
+
+	  VLD1.S16      {Q0, Q1}, [r11]!
+	  VLD1.S16      {Q2, Q3}, [r11]!
+          
+	  MOV           r6, #0x8000 
+
+          VLD1.S16      {Q4, Q5}, [r4]!                           @load 16 x[]
+          VLD1.S16      {Q6, Q7}, [r4]!                           @load 16 x[]
+
+LOOP:
+          VQDMULL.S16   Q15, D8, D0
+          VQDMLAL.S16   Q15, D9, D1
+          VQDMLAL.S16   Q15, D10, D2
+          VQDMLAL.S16   Q15, D11, D3
+        
+          VQDMLAL.S16   Q15, D12, D4
+          VQDMLAL.S16   Q15, D13, D5
+          VQDMLAL.S16   Q15, D14, D6
+          VQDMLAL.S16   Q15, D15, D7
+
+          LDRSH         r12, [r4], #2                
+          
+          VEXT.S16      D8, D8, D9, #1
+          VEXT.S16      D9, D9, D10, #1
+          VEXT.S16      D10, D10, D11, #1
+          VEXT.S16      D11, D11, D12, #1
+          VDUP.S16      D24, r12
+          VEXT.S16      D12, D12, D13, #1
+          VEXT.S16      D13, D13, D14, #1
+     
+          VQADD.S32     D30, D30, D31
+	  MOV           r11, #0x8000          
+          VPADD.S32     D30, D30, D30
+          ADD           r8, r8, #1
+          VMOV.S32      r12, D30[0]
+          VEXT.S16      D14, D14, D15, #1          
+
+          QADD          r1, r12, r12                              @ L_sum = (L_sum << 2)
+          VEXT.S16      D15, D15, D24, #1
+          QADD          r5, r1, r6                         
+          MOV           r1, r5, ASR #16
+          CMP           r8, r3
+          STRH          r1, [r0], #2                              @ exc[j] = (L_sum + 0x8000) >> 16
+          BLT           LOOP
+                    
+pred_lt4_end:
+		     
+          LDMFD   	r13!, {r4 - r12, r15} 
+ 
+Lable1:
+          .word   	inter4_2
+          @ENDFUNC
+          .END
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s
new file mode 100644
index 0000000..b9e6b23
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/residu_asm_neon.s
@@ -0,0 +1,127 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@void Residu (
+@    Word16 a[], /* (i)     : prediction coefficients                      */
+@    Word16 x[], /* (i)     : speech signal                                */
+@    Word16 y[], /* (o)     : residual signal                              */
+@    Word16 lg   /* (i)     : size of filtering                            */
+@)
+@a[]        RN     r0
+@x[]        RN     r1
+@y[]        RN     r2
+@lg         RN     r3
+
+	.section   .text
+        .global    Residu_opt 
+
+Residu_opt:
+
+        STMFD          r13!, {r4 - r12, r14} 
+        SUB            r7, r3, #4                       @i = lg - 4
+        
+        VLD1.S16       {D0, D1, D2, D3}, [r0]!              @get all a[]  
+	VLD1.S16       {D4}, [r0]!
+        VMOV.S32       Q8,  #0x8000
+        
+LOOP1:
+        ADD            r9, r1, r7, LSL #1               @copy the address
+        ADD            r10, r2, r7, LSL #1
+        MOV            r8, r9
+        VLD1.S16       D5, [r8]!                       @get x[i], x[i+1], x[i+2], x[i+3]
+        VQDMULL.S16    Q10, D5, D0[0]                  @finish the first L_mult
+
+        SUB            r8, r9, #2                       @get the x[i-1] address
+        VLD1.S16       D5, [r8]! 
+        VQDMLAL.S16    Q10, D5, D0[1]
+
+        SUB            r8, r9, #4                       @load the x[i-2] address
+        VLD1.S16       D5, [r8]!
+        VQDMLAL.S16    Q10, D5, D0[2]
+
+        SUB            r8, r9, #6                       @load the x[i-3] address
+        VLD1.S16       D5, [r8]!     
+        VQDMLAL.S16    Q10, D5, D0[3]                    
+
+        SUB            r8, r9, #8                       @load the x[i-4] address
+        VLD1.S16       D5, [r8]!     
+        VQDMLAL.S16    Q10, D5, D1[0]  
+
+        SUB            r8, r9, #10                      @load the x[i-5] address
+        VLD1.S16       D5, [r8]!     
+        VQDMLAL.S16    Q10, D5, D1[1] 
+
+        SUB            r8, r9, #12                      @load the x[i-6] address
+        VLD1.S16       D5, [r8]!     
+        VQDMLAL.S16    Q10, D5, D1[2]  
+
+        SUB            r8, r9, #14                      @load the x[i-7] address
+        VLD1.S16       D5, [r8]!     
+        VQDMLAL.S16    Q10, D5, D1[3]  
+
+        SUB            r8, r9, #16                      @load the x[i-8] address
+        VLD1.S16       D5, [r8]!     
+        VQDMLAL.S16    Q10, D5, D2[0]  
+
+        SUB            r8, r9, #18                      @load the x[i-9] address
+        VLD1.S16       D5, [r8]!     
+        VQDMLAL.S16    Q10, D5, D2[1]         
+           
+        SUB            r8, r9, #20                      @load the x[i-10] address
+        VLD1.S16       D5, [r8]!     
+        VQDMLAL.S16    Q10, D5, D2[2]  
+
+	SUB            r8, r9, #22                      @load the x[i-11] address
+	VLD1.S16       D5, [r8]!
+	VQDMLAL.S16    Q10, D5, D2[3]
+
+	SUB            r8, r9, #24                      @load the x[i-12] address
+	VLD1.S16       D5, [r8]!
+	VQDMLAL.S16    Q10, D5, D3[0]
+
+	SUB            r8, r9, #26                      @load the x[i-13] address
+	VLD1.S16       D5, [r8]!
+	VQDMLAL.S16    Q10, D5, D3[1]
+
+	SUB            r8, r9, #28                      @load the x[i-14] address
+	VLD1.S16       D5, [r8]!
+	VQDMLAL.S16    Q10, D5, D3[2]
+
+	SUB            r8, r9, #30                      @load the x[i-15] address
+	VLD1.S16       D5, [r8]!
+	VQDMLAL.S16    Q10, D5, D3[3]
+
+	SUB            r8, r9, #32                      @load the x[i-16] address
+	VLD1.S16       D5, [r8]!
+	VQDMLAL.S16    Q10, D5, D4[0]
+
+        SUB            r7, r7, #4                       @i-=4
+        VQSHL.S32      Q10, Q10, #4
+        VQADD.S32      Q10, Q10, Q8
+        VSHRN.S32      D5, Q10, #16
+        VST1.S16       D5, [r10]!
+        CMP            r7,  #0
+
+        BGE            LOOP1
+
+Residu_asm_end: 
+ 
+        LDMFD      r13!, {r4 - r12, r15}
+    
+        @ENDFUNC
+        .END
+
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s
new file mode 100644
index 0000000..14957d8
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/scale_sig_neon.s
@@ -0,0 +1,138 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@                   
+@**********************************************************************/
+@void Scale_sig(
+@               Word16 x[],                           /* (i/o) : signal to scale               */
+@               Word16 lg,                            /* (i)   : size of x[]                   */
+@               Word16 exp                            /* (i)   : exponent: x = round(x << exp) */
+@)
+@***********************************************************************
+@  x[]   ---  r0
+@  lg    ---  r1
+@  exp   ---  r2
+
+          .section  .text 
+          .global   Scale_sig_opt
+
+Scale_sig_opt:
+
+          STMFD   	r13!, {r4 - r12, r14} 
+          MOV           r4, #4
+          VMOV.S32      Q15, #0x8000       
+          VDUP.S32      Q14, r2  
+          MOV           r5, r0                          @ copy x[] address    
+          CMP           r1, #64
+          MOVEQ         r4, #1
+          BEQ           LOOP
+	  CMP           r1, #128
+	  MOVEQ         r4, #2
+	  BEQ           LOOP
+          CMP           r1, #256
+          BEQ           LOOP
+	  CMP           r1, #80
+	  MOVEQ         r4, #1
+	  BEQ           LOOP1
+
+LOOP1:
+          VLD1.S16      {Q0, Q1}, [r5]!                 @load 16 Word16 x[]     
+          VSHLL.S16     Q10, D0, #16
+          VSHLL.S16     Q11, D1, #16
+          VSHLL.S16     Q12, D2, #16
+          VSHLL.S16     Q13, D3, #16
+          VSHL.S32      Q10, Q10, Q14
+          VSHL.S32      Q11, Q11, Q14
+          VSHL.S32      Q12, Q12, Q14
+          VSHL.S32      Q13, Q13, Q14
+          VADDHN.S32    D16, Q10, Q15
+          VADDHN.S32    D17, Q11, Q15
+          VADDHN.S32    D18, Q12, Q15
+          VADDHN.S32    D19, Q13, Q15
+          VST1.S16      {Q8, Q9}, [r0]!                 @store 16 Word16 x[]
+
+LOOP:                
+          VLD1.S16      {Q0, Q1}, [r5]!                 @load 16 Word16 x[]
+          VLD1.S16      {Q2, Q3}, [r5]!                 @load 16 Word16 x[]
+          VLD1.S16      {Q4, Q5}, [r5]!                 @load 16 Word16 x[]
+          VLD1.S16      {Q6, Q7}, [r5]!                 @load 16 Word16 x[]
+
+          VSHLL.S16     Q8, D0, #16
+          VSHLL.S16     Q9, D1, #16
+          VSHLL.S16     Q10, D2, #16
+          VSHLL.S16     Q11, D3, #16     
+          VSHL.S32      Q8, Q8, Q14
+          VSHL.S32      Q9, Q9, Q14
+          VSHL.S32      Q10, Q10, Q14
+          VSHL.S32      Q11, Q11, Q14
+          VADDHN.S32    D16, Q8, Q15
+          VADDHN.S32    D17, Q9, Q15
+          VADDHN.S32    D18, Q10, Q15
+          VADDHN.S32    D19, Q11, Q15
+          VST1.S16      {Q8, Q9}, [r0]!                 @store 16 Word16 x[]
+
+   
+          VSHLL.S16     Q12, D4, #16
+          VSHLL.S16     Q13, D5, #16
+          VSHLL.S16     Q10, D6, #16
+          VSHLL.S16     Q11, D7, #16
+          VSHL.S32      Q12, Q12, Q14
+          VSHL.S32      Q13, Q13, Q14
+          VSHL.S32      Q10, Q10, Q14
+          VSHL.S32      Q11, Q11, Q14
+          VADDHN.S32    D16, Q12, Q15
+          VADDHN.S32    D17, Q13, Q15
+          VADDHN.S32    D18, Q10, Q15
+          VADDHN.S32    D19, Q11, Q15
+          VST1.S16      {Q8, Q9}, [r0]!                 @store 16 Word16 x[]
+
+          VSHLL.S16     Q10, D8, #16
+          VSHLL.S16     Q11, D9, #16
+          VSHLL.S16     Q12, D10, #16
+          VSHLL.S16     Q13, D11, #16
+          VSHL.S32      Q10, Q10, Q14
+          VSHL.S32      Q11, Q11, Q14
+          VSHL.S32      Q12, Q12, Q14
+          VSHL.S32      Q13, Q13, Q14
+          VADDHN.S32    D16, Q10, Q15
+          VADDHN.S32    D17, Q11, Q15
+          VADDHN.S32    D18, Q12, Q15
+          VADDHN.S32    D19, Q13, Q15
+          VST1.S16      {Q8, Q9}, [r0]!                 @store 16 Word16 x[]
+
+          VSHLL.S16     Q10, D12, #16   
+          VSHLL.S16     Q11, D13, #16
+          VSHLL.S16     Q12, D14, #16
+          VSHLL.S16     Q13, D15, #16
+          VSHL.S32      Q10, Q10, Q14
+          VSHL.S32      Q11, Q11, Q14
+          VSHL.S32      Q12, Q12, Q14
+          VSHL.S32      Q13, Q13, Q14
+          VADDHN.S32    D16, Q10, Q15
+          VADDHN.S32    D17, Q11, Q15
+          VADDHN.S32    D18, Q12, Q15
+          VADDHN.S32    D19, Q13, Q15 
+          VST1.S16      {Q8, Q9}, [r0]!                 @store 16 Word16 x[]  
+          SUBS          r4, r4, #1
+          BGT           LOOP     
+                
+                          
+Scale_sig_asm_end:
+
+          LDMFD   	r13!, {r4 - r12, r15} 
+          @ENDFUNC
+          .END
+ 
+
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s
new file mode 100644
index 0000000..dc3d4a8
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s
@@ -0,0 +1,106 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ **     http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@void Syn_filt(
+@     Word16 a[],                           /* (i) Q12 : a[m+1] prediction coefficients           */
+@     Word16 x[],                           /* (i)     : input signal                             */
+@     Word16 y[],                           /* (o)     : output signal                            */
+@     Word16 mem[],                         /* (i/o)   : memory associated with this filtering.   */
+@)
+@***********************************************************************
+@ a[]    ---   r0
+@ x[]    ---   r1
+@ y[]    ---   r2
+@ mem[]  ---   r3
+@ m ---  16  lg --- 80  update --- 1
+
+          .section  .text 
+          .global   Syn_filt_asm
+
+Syn_filt_asm:
+
+          STMFD   	r13!, {r4 - r12, r14} 
+          SUB           r13, r13, #700                   @ y_buf[L_FRAME16k + M16k]
+   
+          MOV           r4, r3                           @ copy mem[] address
+          MOV           r5, r13                          @ copy yy = y_buf address
+
+          @ for(i = 0@ i < m@ i++)
+          @{
+          @    *yy++ = mem[i]@
+          @} 
+          VLD1.S16      {D0, D1, D2, D3}, [r4]!          @load 16 mems
+	  VST1.S16      {D0, D1, D2, D3}, [r5]!          @store 16 mem[] to *yy
+
+          LDRSH         r5, [r0], #2                     @ load a[0]
+          MOV           r8, #0                           @ i = 0
+          MOV           r5, r5, ASR #1                   @ a0 = a[0] >> 1
+          VMOV.S16      D8[0], r5
+          @ load all a[]
+          VLD1.S16      {D0, D1, D2, D3}, [r0]!          @ load a[1] ~ a[16]
+	  VREV64.16     D0, D0
+	  VREV64.16     D1, D1
+	  VREV64.16     D2, D2
+	  VREV64.16     D3, D3 
+	  MOV           r8, #0                           @ loop times
+	  MOV           r10, r13                         @ temp = y_buf
+	  ADD           r4, r13, #32                     @ yy[i] address
+
+          VLD1.S16      {D4, D5, D6, D7}, [r10]!         @ first 16 temp_p
+
+SYN_LOOP:
+
+          LDRSH         r6, [r1], #2                     @ load x[i]
+	  MUL           r12, r6, r5                      @ L_tmp = x[i] * a0
+	  ADD           r10, r4, r8, LSL #1              @ y[i], yy[i] address
+
+	  VDUP.S32      Q10, r12
+	  VMULL.S16     Q5, D3, D4                    
+          VMLAL.S16     Q5, D2, D5
+          VMLAL.S16     Q5, D1, D6
+          VMLAL.S16     Q5, D0, D7
+          VEXT.8        D4, D4, D5, #2
+          VEXT.8        D5, D5, D6, #2
+          VEXT.8        D6, D6, D7, #2
+          VPADD.S32     D12, D10, D11
+          ADD           r8, r8, #1
+          VPADD.S32     D10, D12, D12
+
+	  VDUP.S32      Q7, D10[0]
+
+	  VSUB.S32      Q9, Q10, Q7
+          VQRSHRN.S32   D20, Q9, #12   
+          VMOV.S16      r9, D20[0]
+          VEXT.8        D7, D7, D20, #2
+          CMP           r8, #80
+          STRH          r9, [r10]                        @ yy[i]
+          STRH          r9, [r2], #2                     @ y[i]          	         
+	  
+          BLT           SYN_LOOP
+ 
+          @ update mem[]
+          ADD           r5, r13, #160                    @ yy[64] address
+	  VLD1.S16      {D0, D1, D2, D3}, [r5]!
+	  VST1.S16      {D0, D1, D2, D3}, [r3]!              
+
+Syn_filt_asm_end:
+ 
+          ADD           r13, r13, #700		     
+          LDMFD   	r13!, {r4 - r12, r15} 
+          @ENDFUNC
+          .END
+ 
+