summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s')
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s106
1 files changed, 106 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s
new file mode 100644
index 0000000..dc3d4a8
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/syn_filt_neon.s
@@ -0,0 +1,106 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@void Syn_filt(
+@ Word16 a[], /* (i) Q12 : a[m+1] prediction coefficients */
+@ Word16 x[], /* (i) : input signal */
+@ Word16 y[], /* (o) : output signal */
+@ Word16 mem[], /* (i/o) : memory associated with this filtering. */
+@)
+@***********************************************************************
+@ a[] --- r0
+@ x[] --- r1
+@ y[] --- r2
+@ mem[] --- r3
+@ m --- 16 lg --- 80 update --- 1
+
+ .section .text
+ .global Syn_filt_asm
+
+Syn_filt_asm:
+
+ STMFD r13!, {r4 - r12, r14}
+ SUB r13, r13, #700 @ y_buf[L_FRAME16k + M16k]
+
+ MOV r4, r3 @ copy mem[] address
+ MOV r5, r13 @ copy yy = y_buf address
+
+ @ for(i = 0@ i < m@ i++)
+ @{
+ @ *yy++ = mem[i]@
+ @}
+ VLD1.S16 {D0, D1, D2, D3}, [r4]! @load 16 mems
+ VST1.S16 {D0, D1, D2, D3}, [r5]! @store 16 mem[] to *yy
+
+ LDRSH r5, [r0], #2 @ load a[0]
+ MOV r8, #0 @ i = 0
+ MOV r5, r5, ASR #1 @ a0 = a[0] >> 1
+ VMOV.S16 D8[0], r5
+ @ load all a[]
+ VLD1.S16 {D0, D1, D2, D3}, [r0]! @ load a[1] ~ a[16]
+ VREV64.16 D0, D0
+ VREV64.16 D1, D1
+ VREV64.16 D2, D2
+ VREV64.16 D3, D3
+ MOV r8, #0 @ loop times
+ MOV r10, r13 @ temp = y_buf
+ ADD r4, r13, #32 @ yy[i] address
+
+ VLD1.S16 {D4, D5, D6, D7}, [r10]! @ first 16 temp_p
+
+SYN_LOOP:
+
+ LDRSH r6, [r1], #2 @ load x[i]
+ MUL r12, r6, r5 @ L_tmp = x[i] * a0
+ ADD r10, r4, r8, LSL #1 @ y[i], yy[i] address
+
+ VDUP.S32 Q10, r12
+ VMULL.S16 Q5, D3, D4
+ VMLAL.S16 Q5, D2, D5
+ VMLAL.S16 Q5, D1, D6
+ VMLAL.S16 Q5, D0, D7
+ VEXT.8 D4, D4, D5, #2
+ VEXT.8 D5, D5, D6, #2
+ VEXT.8 D6, D6, D7, #2
+ VPADD.S32 D12, D10, D11
+ ADD r8, r8, #1
+ VPADD.S32 D10, D12, D12
+
+ VDUP.S32 Q7, D10[0]
+
+ VSUB.S32 Q9, Q10, Q7
+ VQRSHRN.S32 D20, Q9, #12
+ VMOV.S16 r9, D20[0]
+ VEXT.8 D7, D7, D20, #2
+ CMP r8, #80
+ STRH r9, [r10] @ yy[i]
+ STRH r9, [r2], #2 @ y[i]
+
+ BLT SYN_LOOP
+
+ @ update mem[]
+ ADD r5, r13, #160 @ yy[64] address
+ VLD1.S16 {D0, D1, D2, D3}, [r5]!
+ VST1.S16 {D0, D1, D2, D3}, [r3]!
+
+Syn_filt_asm_end:
+
+ ADD r13, r13, #700
+ LDMFD r13!, {r4 - r12, r15}
+ @ENDFUNC
+ .END
+
+