1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
|
@/*
@ ** Copyright 2003-2010, VisualOn, Inc.
@ **
@ ** Licensed under the Apache License, Version 2.0 (the "License");
@ ** you may not use this file except in compliance with the License.
@ ** You may obtain a copy of the License at
@ **
@ ** http://www.apache.org/licenses/LICENSE-2.0
@ **
@ ** Unless required by applicable law or agreed to in writing, software
@ ** distributed under the License is distributed on an "AS IS" BASIS,
@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ ** See the License for the specific language governing permissions and
@ ** limitations under the License.
@ */
@
@void Syn_filt(
@ Word16 a[], /* (i) Q12 : a[m+1] prediction coefficients */
@ Word16 x[], /* (i) : input signal */
@ Word16 y[], /* (o) : output signal */
@ Word16 mem[], /* (i/o) : memory associated with this filtering. */
@)
@***********************************************************************
@ a[] --- r0
@ x[] --- r1
@ y[] --- r2
@ mem[] --- r3
@ m --- 16 lg --- 80 update --- 1
.section .text
.global Syn_filt_asm
Syn_filt_asm:
STMFD r13!, {r4 - r12, r14}
SUB r13, r13, #700 @ y_buf[L_FRAME16k + M16k]
MOV r4, r3 @ copy mem[] address
MOV r5, r13 @ copy yy = y_buf address
@ for(i = 0@ i < m@ i++)
@{
@ *yy++ = mem[i]@
@}
VLD1.S16 {D0, D1, D2, D3}, [r4]! @load 16 mems
VST1.S16 {D0, D1, D2, D3}, [r5]! @store 16 mem[] to *yy
LDRSH r5, [r0], #2 @ load a[0]
MOV r8, #0 @ i = 0
MOV r5, r5, ASR #1 @ a0 = a[0] >> 1
VMOV.S16 D8[0], r5
@ load all a[]
VLD1.S16 {D0, D1, D2, D3}, [r0]! @ load a[1] ~ a[16]
VREV64.16 D0, D0
VREV64.16 D1, D1
VREV64.16 D2, D2
VREV64.16 D3, D3
MOV r8, #0 @ loop times
MOV r10, r13 @ temp = y_buf
ADD r4, r13, #32 @ yy[i] address
VLD1.S16 {D4, D5, D6, D7}, [r10]! @ first 16 temp_p
SYN_LOOP:
LDRSH r6, [r1], #2 @ load x[i]
MUL r12, r6, r5 @ L_tmp = x[i] * a0
ADD r10, r4, r8, LSL #1 @ y[i], yy[i] address
VDUP.S32 Q10, r12
VMULL.S16 Q5, D3, D4
VMLAL.S16 Q5, D2, D5
VMLAL.S16 Q5, D1, D6
VMLAL.S16 Q5, D0, D7
VEXT.8 D4, D4, D5, #2
VEXT.8 D5, D5, D6, #2
VEXT.8 D6, D6, D7, #2
VPADD.S32 D12, D10, D11
ADD r8, r8, #1
VPADD.S32 D10, D12, D12
VDUP.S32 Q7, D10[0]
VSUB.S32 Q9, Q10, Q7
VQRSHRN.S32 D20, Q9, #12
VMOV.S16 r9, D20[0]
VEXT.8 D7, D7, D20, #2
CMP r8, #80
STRH r9, [r10] @ yy[i]
STRH r9, [r2], #2 @ y[i]
BLT SYN_LOOP
@ update mem[]
ADD r5, r13, #160 @ yy[64] address
VLD1.S16 {D0, D1, D2, D3}, [r5]!
VST1.S16 {D0, D1, D2, D3}, [r3]!
Syn_filt_asm_end:
ADD r13, r13, #700
LDMFD r13!, {r4 - r12, r15}
@ENDFUNC
.END
|