@/*
@ ** Copyright 2003-2010, VisualOn, Inc.
@ **
@ ** Licensed under the Apache License, Version 2.0 (the "License");
@ ** you may not use this file except in compliance with the License.
@ ** You may obtain a copy of the License at
@ **
@ **     http://www.apache.org/licenses/LICENSE-2.0
@ **
@ ** Unless required by applicable law or agreed to in writing, software
@ ** distributed under the License is distributed on an "AS IS" BASIS,
@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ ** See the License for the specific language governing permissions and
@ ** limitations under the License.
@ */
@
@void Syn_filt(
@     Word16 a[],                           /* (i) Q12 : a[m+1] prediction coefficients           */
@     Word16 x[],                           /* (i)     : input signal                             */
@     Word16 y[],                           /* (o)     : output signal                            */
@     Word16 mem[],                         /* (i/o)   : memory associated with this filtering.   */
@)
@***********************************************************************
@ a[]    ---   r0
@ x[]    ---   r1
@ y[]    ---   r2
@ mem[]  ---   r3
@ m ---  16  lg --- 80  update --- 1

          .section  .text
          .global   Syn_filt_asm

Syn_filt_asm:

          STMFD   	r13!, {r4 - r12, r14}
          SUB           r13, r13, #700                   @ y_buf[L_FRAME16k + M16k]

          MOV           r4, r3                           @ copy mem[] address
          MOV           r5, r13                          @ copy yy = y_buf address

          @ for(i = 0@ i < m@ i++)
          @{
          @    *yy++ = mem[i]@
          @}
          VLD1.S16      {D0, D1, D2, D3}, [r4]!          @load 16 mems
	  VST1.S16      {D0, D1, D2, D3}, [r5]!          @store 16 mem[] to *yy

          LDRSH         r5, [r0], #2                     @ load a[0]
          MOV           r8, #0                           @ i = 0
          MOV           r5, r5, ASR #1                   @ a0 = a[0] >> 1
          VMOV.S16      D8[0], r5
          @ load all a[]
          VLD1.S16      {D0, D1, D2, D3}, [r0]!          @ load a[1] ~ a[16]
	  VREV64.16     D0, D0
	  VREV64.16     D1, D1
	  VREV64.16     D2, D2
	  VREV64.16     D3, D3
	  MOV           r8, #0                           @ loop times
	  MOV           r10, r13                         @ temp = y_buf
	  ADD           r4, r13, #32                     @ yy[i] address

          VLD1.S16      {D4, D5, D6, D7}, [r10]!         @ first 16 temp_p

SYN_LOOP:

          LDRSH         r6, [r1], #2                     @ load x[i]
	  MUL           r12, r6, r5                      @ L_tmp = x[i] * a0
	  ADD           r10, r4, r8, LSL #1              @ y[i], yy[i] address

	  VDUP.S32      Q10, r12
	  VMULL.S16     Q5, D3, D4
          VMLAL.S16     Q5, D2, D5
          VMLAL.S16     Q5, D1, D6
          VMLAL.S16     Q5, D0, D7
          VEXT.8        D4, D4, D5, #2
          VEXT.8        D5, D5, D6, #2
          VEXT.8        D6, D6, D7, #2
          VPADD.S32     D12, D10, D11
          ADD           r8, r8, #1
          VPADD.S32     D10, D12, D12

	  VDUP.S32      Q7, D10[0]

	  VSUB.S32      Q9, Q10, Q7
          VQRSHRN.S32   D20, Q9, #12
          VMOV.S16      r9, D20[0]
          VEXT.8        D7, D7, D20, #2
          CMP           r8, #80
          STRH          r9, [r10]                        @ yy[i]
          STRH          r9, [r2], #2                     @ y[i]

          BLT           SYN_LOOP

          @ update mem[]
          ADD           r5, r13, #160                    @ yy[64] address
	  VLD1.S16      {D0, D1, D2, D3}, [r5]!
	  VST1.S16      {D0, D1, D2, D3}, [r3]!

Syn_filt_asm_end:

          ADD           r13, r13, #700
          LDMFD   	r13!, {r4 - r12, r15}
          @ENDFUNC
          .end