@/* @ ** Copyright 2003-2010, VisualOn, Inc. @ ** @ ** Licensed under the Apache License, Version 2.0 (the "License"); @ ** you may not use this file except in compliance with the License. @ ** You may obtain a copy of the License at @ ** @ ** http://www.apache.org/licenses/LICENSE-2.0 @ ** @ ** Unless required by applicable law or agreed to in writing, software @ ** distributed under the License is distributed on an "AS IS" BASIS, @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @ ** See the License for the specific language governing permissions and @ ** limitations under the License. @ */ @ @*void Convolve ( @* Word16 x[], /* (i) : input vector */ @* Word16 h[], /* (i) : impulse response */ @* Word16 y[], /* (o) : output vector */ @* Word16 L /* (i) : vector size */ @*) @ @ r0 --- x[] @ r1 --- h[] @ r2 --- y[] @ r3 --- L .section .text .global Convolve_asm Convolve_asm: STMFD r13!, {r4 - r12, r14} MOV r3, #0 MOV r11, #0x8000 LOOP: @MOV r8, #0 @ s = 0 ADD r4, r1, r3, LSL #1 @ tmpH address ADD r5, r3, #1 @ i = n + 1 MOV r6, r0 LDRSH r9, [r6], #2 @ *tmpX++ LDRSH r10, [r4] @ *tmpH-- SUB r5, r5, #1 VMOV.S32 Q10, #0 MUL r8, r9, r10 LOOP1: CMP r5, #0 BLE L1 SUB r4, r4, #8 MOV r9, r4 VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! VREV64.16 D1, D1 SUBS r5, r5, #4 VMLAL.S16 Q10, D0, D1 B LOOP1 L1: VADD.S32 D20, D20, D21 VPADD.S32 D20, D20, D20 VMOV.S32 r5, D20[0] ADD r5, r5, r8 ADD r5, r11, r5, LSL #1 MOV r5, r5, LSR #16 @extract_h(s) ADD r3, r3, #1 STRH r5, [r2], #2 @y[n] @MOV r8, #0 ADD r4, r1, r3, LSL #1 @tmpH address ADD r5, r3, #1 MOV r6, r0 LDRSH r9, [r6], #2 @ *tmpX++ LDRSH r10, [r4], #-2 LDRSH r12, [r6], #2 LDRSH r14, [r4] MUL r8, r9, r10 SUB r5, r5, #2 MLA r8, r12, r14, r8 VMOV.S32 Q10, #0 LOOP2: CMP r5, #0 BLE L2 SUB r4, r4, #8 MOV r9, r4 VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! SUBS r5, r5, #4 VREV64.16 D1, D1 VMLAL.S16 Q10, D0, D1 B LOOP2 L2: VADD.S32 D20, D20, D21 VPADD.S32 D20, D20, D20 VMOV.S32 r5, D20[0] ADD r8, r8, r5 ADD r8, r11, r8, LSL #1 MOV r8, r8, LSR #16 @extract_h(s) ADD r3, r3, #1 STRH r8, [r2], #2 @y[n] @MOV r8, #0 ADD r4, r1, r3, LSL #1 ADD r5, r3, #1 MOV r6, r0 LDRSH r9, [r6], #2 LDRSH r10, [r4], #-2 LDRSH r12, [r6], #2 LDRSH r14, [r4], #-2 MUL r8, r9, r10 LDRSH r9, [r6], #2 LDRSH r10, [r4] MLA r8, r12, r14, r8 SUB r5, r5, #3 MLA r8, r9, r10, r8 VMOV.S32 Q10, #0 LOOP3: CMP r5, #0 BLE L3 SUB r4, r4, #8 MOV r9, r4 VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! VREV64.16 D1, D1 SUBS r5, r5, #4 VMLAL.S16 Q10, D0, D1 B LOOP3 L3: VADD.S32 D20, D20, D21 VPADD.S32 D20, D20, D20 VMOV.S32 r5, D20[0] ADD r8, r8, r5 ADD r8, r11, r8, LSL #1 MOV r8, r8, LSR #16 @extract_h(s) ADD r3, r3, #1 STRH r8, [r2], #2 @y[n] ADD r5, r3, #1 @ i = n + 1 ADD r4, r1, r5, LSL #1 @ tmpH address MOV r6, r0 VMOV.S32 Q10, #0 LOOP4: CMP r5, #0 BLE L4 SUB r4, r4, #8 MOV r9, r4 VLD1.S16 D0, [r6]! VLD1.S16 D1, [r9]! VREV64.16 D1, D1 SUBS r5, r5, #4 VMLAL.S16 Q10, D0, D1 B LOOP4 L4: VADD.S32 D20, D20, D21 VPADD.S32 D20, D20, D20 VMOV.S32 r5, D20[0] ADD r5, r11, r5, LSL #1 MOV r5, r5, LSR #16 @extract_h(s) ADD r3, r3, #1 STRH r5, [r2], #2 @y[n] CMP r3, #64 BLT LOOP Convolve_asm_end: LDMFD r13!, {r4 - r12, r15} @ENDFUNC .END