diff options
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s')
-rw-r--r-- | media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s | 266 |
1 files changed, 133 insertions, 133 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s index cb1764f..1e65efa 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Syn_filt_32_neon.s @@ -1,133 +1,133 @@ -@/*
-@ ** Copyright 2003-2010, VisualOn, Inc.
-@ **
-@ ** Licensed under the Apache License, Version 2.0 (the "License");
-@ ** you may not use this file except in compliance with the License.
-@ ** You may obtain a copy of the License at
-@ **
-@ ** http://www.apache.org/licenses/LICENSE-2.0
-@ **
-@ ** Unless required by applicable law or agreed to in writing, software
-@ ** distributed under the License is distributed on an "AS IS" BASIS,
-@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ ** See the License for the specific language governing permissions and
-@ ** limitations under the License.
-@ */
-@
-@**********************************************************************/
-@void Syn_filt_32(
-@ Word16 a[], /* (i) Q12 : a[m+1] prediction coefficients */
-@ Word16 m, /* (i) : order of LP filter */
-@ Word16 exc[], /* (i) Qnew: excitation (exc[i] >> Qnew) */
-@ Word16 Qnew, /* (i) : exc scaling = 0(min) to 8(max) */
-@ Word16 sig_hi[], /* (o) /16 : synthesis high */
-@ Word16 sig_lo[], /* (o) /16 : synthesis low */
-@ Word16 lg /* (i) : size of filtering */
-@)
-@***********************************************************************
-@ a[] --- r0
-@ m --- r1
-@ exc[] --- r2
-@ Qnew --- r3
-@ sig_hi[] --- r4
-@ sig_lo[] --- r5
-@ lg --- r6
-
- .section .text
- .global Syn_filt_32_asm
-
-Syn_filt_32_asm:
-
- STMFD r13!, {r4 - r12, r14}
- LDR r4, [r13, #40] @ get sig_hi[] address
- LDR r5, [r13, #44] @ get sig_lo[] address
-
- LDRSH r6, [r0], #2 @ load Aq[0]
- ADD r7, r3, #4 @ 4 + Q_new
- MOV r3, r6, ASR r7 @ a0 = Aq[0] >> (4 + Q_new)
-
- SUB r10, r4, #32 @ sig_hi[-16] address
- SUB r11, r5, #32 @ sig_lo[-16] address
-
- VLD1.S16 {D0, D1, D2, D3}, [r0]! @a[1] ~ a[16]
-
- MOV r8, #0 @ i = 0
-
- VLD1.S16 {D4, D5, D6, D7}, [r10]! @ sig_hi[-16] ~ sig_hi[-1]
- VREV64.16 D0, D0
- VREV64.16 D1, D1
- VLD1.S16 {D8, D9, D10, D11}, [r11]! @ sig_lo[-16] ~ sig_lo[-1]
- VREV64.16 D2, D2
- VREV64.16 D3, D3
- VDUP.S32 Q15, r8
-
-SYN_LOOP:
-
- LDRSH r6, [r2], #2 @exc[i]
- @L_tmp = L_msu(L_tmp, sig_lo[i - j], a[j])@
- VMULL.S16 Q10, D8, D3
- VEXT.8 D8, D8, D9, #2
- VMLAL.S16 Q10, D9, D2
- VMLAL.S16 Q10, D10, D1
- VMLAL.S16 Q10, D11, D0
-
- VEXT.8 D9, D9, D10, #2
- VEXT.8 D10, D10, D11, #2
-
- VPADD.S32 D28, D20, D21
- MUL r12, r6, r3 @exc[i] * a0
- VPADD.S32 D29, D28, D28
- VDUP.S32 Q10, D29[0] @result1
-
- VMULL.S16 Q11, D4, D3
- VMLAL.S16 Q11, D5, D2
- VSUB.S32 Q10, Q15, Q10
- @L_tmp = L_msu(L_tmp, sig_hi[i - j], a[j])@
-
- VMLAL.S16 Q11, D6, D1
- VEXT.8 D4, D4, D5, #2
- VMLAL.S16 Q11, D7, D0
-
-
- VEXT.8 D5, D5, D6, #2
- VEXT.8 D6, D6, D7, #2
-
- VPADD.S32 D28, D22, D23
- VPADD.S32 D29, D28, D28
- MOV r14, r12, LSL #1 @exc[i] * a0 << 1
- VDUP.S32 Q11, D29[0] @result2
-
-
-
- VSHR.S32 Q10, Q10, #11 @result1 >>= 11
- VSHL.S32 Q11, Q11, #1 @result2 <<= 1
- VDUP.S32 Q12, r14
- VADD.S32 Q12, Q12, Q10 @L_tmp = L_tmp - (result1 >>= 11) - (result2 <<= 1)
- VSUB.S32 Q12, Q12, Q11
-
- VSHL.S32 Q12, Q12, #3 @L_tmp <<= 3
-
-
- VSHRN.S32 D20, Q12, #16 @sig_hi[i] = L_tmp >> 16@
- VMOV.S16 r10, D20[0]
- VSHR.S32 Q12, Q12, #4 @L_tmp >>= 4
- VEXT.8 D7, D7, D20, #2
- STRH r10, [r4], #2 @store sig_hi[i]
- VMOV.S32 r11, D24[0] @r11 --- L_tmp >>= 4
- ADD r8, r8, #1
- SUB r12, r11, r10, LSL #12
- @MOV r11, r12, ASR #16 @sig_lo[i]
- VDUP.S16 D21, r12
- VEXT.8 D11, D11, D21, #2
- STRH r12, [r5], #2 @stroe sig_lo[i]
-
- CMP r8, #64
- BLT SYN_LOOP
-
-Syn_filt_32_end:
-
- LDMFD r13!, {r4 - r12, r15}
- @ENDFUNC
- .END
-
-
+@/* +@ ** Copyright 2003-2010, VisualOn, Inc. +@ ** +@ ** Licensed under the Apache License, Version 2.0 (the "License"); +@ ** you may not use this file except in compliance with the License. +@ ** You may obtain a copy of the License at +@ ** +@ ** http://www.apache.org/licenses/LICENSE-2.0 +@ ** +@ ** Unless required by applicable law or agreed to in writing, software +@ ** distributed under the License is distributed on an "AS IS" BASIS, +@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ ** See the License for the specific language governing permissions and +@ ** limitations under the License. +@ */ +@ +@**********************************************************************/ +@void Syn_filt_32( +@ Word16 a[], /* (i) Q12 : a[m+1] prediction coefficients */ +@ Word16 m, /* (i) : order of LP filter */ +@ Word16 exc[], /* (i) Qnew: excitation (exc[i] >> Qnew) */ +@ Word16 Qnew, /* (i) : exc scaling = 0(min) to 8(max) */ +@ Word16 sig_hi[], /* (o) /16 : synthesis high */ +@ Word16 sig_lo[], /* (o) /16 : synthesis low */ +@ Word16 lg /* (i) : size of filtering */ +@) +@*********************************************************************** +@ a[] --- r0 +@ m --- r1 +@ exc[] --- r2 +@ Qnew --- r3 +@ sig_hi[] --- r4 +@ sig_lo[] --- r5 +@ lg --- r6 + + .section .text + .global Syn_filt_32_asm + +Syn_filt_32_asm: + + STMFD r13!, {r4 - r12, r14} + LDR r4, [r13, #40] @ get sig_hi[] address + LDR r5, [r13, #44] @ get sig_lo[] address + + LDRSH r6, [r0], #2 @ load Aq[0] + ADD r7, r3, #4 @ 4 + Q_new + MOV r3, r6, ASR r7 @ a0 = Aq[0] >> (4 + Q_new) + + SUB r10, r4, #32 @ sig_hi[-16] address + SUB r11, r5, #32 @ sig_lo[-16] address + + VLD1.S16 {D0, D1, D2, D3}, [r0]! @a[1] ~ a[16] + + MOV r8, #0 @ i = 0 + + VLD1.S16 {D4, D5, D6, D7}, [r10]! @ sig_hi[-16] ~ sig_hi[-1] + VREV64.16 D0, D0 + VREV64.16 D1, D1 + VLD1.S16 {D8, D9, D10, D11}, [r11]! @ sig_lo[-16] ~ sig_lo[-1] + VREV64.16 D2, D2 + VREV64.16 D3, D3 + VDUP.S32 Q15, r8 + +SYN_LOOP: + + LDRSH r6, [r2], #2 @exc[i] + @L_tmp = L_msu(L_tmp, sig_lo[i - j], a[j])@ + VMULL.S16 Q10, D8, D3 + VEXT.8 D8, D8, D9, #2 + VMLAL.S16 Q10, D9, D2 + VMLAL.S16 Q10, D10, D1 + VMLAL.S16 Q10, D11, D0 + + VEXT.8 D9, D9, D10, #2 + VEXT.8 D10, D10, D11, #2 + + VPADD.S32 D28, D20, D21 + MUL r12, r6, r3 @exc[i] * a0 + VPADD.S32 D29, D28, D28 + VDUP.S32 Q10, D29[0] @result1 + + VMULL.S16 Q11, D4, D3 + VMLAL.S16 Q11, D5, D2 + VSUB.S32 Q10, Q15, Q10 + @L_tmp = L_msu(L_tmp, sig_hi[i - j], a[j])@ + + VMLAL.S16 Q11, D6, D1 + VEXT.8 D4, D4, D5, #2 + VMLAL.S16 Q11, D7, D0 + + + VEXT.8 D5, D5, D6, #2 + VEXT.8 D6, D6, D7, #2 + + VPADD.S32 D28, D22, D23 + VPADD.S32 D29, D28, D28 + MOV r14, r12, LSL #1 @exc[i] * a0 << 1 + VDUP.S32 Q11, D29[0] @result2 + + + + VSHR.S32 Q10, Q10, #11 @result1 >>= 11 + VSHL.S32 Q11, Q11, #1 @result2 <<= 1 + VDUP.S32 Q12, r14 + VADD.S32 Q12, Q12, Q10 @L_tmp = L_tmp - (result1 >>= 11) - (result2 <<= 1) + VSUB.S32 Q12, Q12, Q11 + + VSHL.S32 Q12, Q12, #3 @L_tmp <<= 3 + + + VSHRN.S32 D20, Q12, #16 @sig_hi[i] = L_tmp >> 16@ + VMOV.S16 r10, D20[0] + VSHR.S32 Q12, Q12, #4 @L_tmp >>= 4 + VEXT.8 D7, D7, D20, #2 + STRH r10, [r4], #2 @store sig_hi[i] + VMOV.S32 r11, D24[0] @r11 --- L_tmp >>= 4 + ADD r8, r8, #1 + SUB r12, r11, r10, LSL #12 + @MOV r11, r12, ASR #16 @sig_lo[i] + VDUP.S16 D21, r12 + VEXT.8 D11, D11, D21, #2 + STRH r12, [r5], #2 @stroe sig_lo[i] + + CMP r8, #64 + BLT SYN_LOOP + +Syn_filt_32_end: + + LDMFD r13!, {r4 - r12, r15} + @ENDFUNC + .END + + |