diff options
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/convolve_opt.s')
-rw-r--r-- | media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/convolve_opt.s | 186 |
1 files changed, 186 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/convolve_opt.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/convolve_opt.s new file mode 100644 index 0000000..71bb532 --- /dev/null +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV5E/convolve_opt.s @@ -0,0 +1,186 @@ +@/* +@ ** Copyright 2003-2010, VisualOn, Inc. +@ ** +@ ** Licensed under the Apache License, Version 2.0 (the "License"); +@ ** you may not use this file except in compliance with the License. +@ ** You may obtain a copy of the License at +@ ** +@ ** http://www.apache.org/licenses/LICENSE-2.0 +@ ** +@ ** Unless required by applicable law or agreed to in writing, software +@ ** distributed under the License is distributed on an "AS IS" BASIS, +@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ ** See the License for the specific language governing permissions and +@ ** limitations under the License. +@ */ + + +@*void Convolve ( +@* Word16 x[], /* (i) : input vector */ +@* Word16 h[], /* (i) : impulse response */ +@* Word16 y[], /* (o) : output vector */ +@* Word16 L /* (i) : vector size */ +@*) +@ r0 --- x[] +@ r1 --- h[] +@ r2 --- y[] +@ r3 --- L + + .section .text + .global Convolve_asm + +Convolve_asm: + + STMFD r13!, {r4 - r12, r14} + MOV r3, #0 @ n + MOV r11, #0x8000 + +LOOP: + ADD r4, r1, r3, LSL #1 @ tmpH address + ADD r5, r3, #1 @ i = n + 1 + MOV r6, r0 @ tmpX = x + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 @ *tmpH-- + SUB r5, r5, #1 + MUL r8, r9, r10 + +LOOP1: + CMP r5, #0 + BLE L1 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 @ *tmpH-- + LDRSH r12, [r6], #2 @ *tmpX++ + LDRSH r14, [r4], #-2 @ *tmpH-- + MLA r8, r9, r10, r8 + MLA r8, r12, r14, r8 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 @ *tmpH-- + LDRSH r12, [r6], #2 @ *tmpX++ + LDRSH r14, [r4], #-2 @ *tmpH-- + MLA r8, r9, r10, r8 + SUBS r5, r5, #4 + MLA r8, r12, r14, r8 + + B LOOP1 + +L1: + + ADD r5, r11, r8, LSL #1 + MOV r5, r5, LSR #16 @extract_h(s) + ADD r3, r3, #1 + STRH r5, [r2], #2 @y[n] + + + ADD r4, r1, r3, LSL #1 @tmpH address + ADD r5, r3, #1 + MOV r6, r0 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 + LDRSH r12, [r6], #2 + LDRSH r14, [r4], #-2 + + MUL r8, r9, r10 + SUB r5, r5, #2 + MLA r8, r12, r14, r8 + +LOOP2: + CMP r5, #0 + BLE L2 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 @ *tmpH-- + LDRSH r12, [r6], #2 @ *tmpX++ + LDRSH r14, [r4], #-2 @ *tmpH-- + MLA r8, r9, r10, r8 + MLA r8, r12, r14, r8 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 @ *tmpH-- + LDRSH r12, [r6], #2 @ *tmpX++ + LDRSH r14, [r4], #-2 @ *tmpH-- + MLA r8, r9, r10, r8 + SUBS r5, r5, #4 + MLA r8, r12, r14, r8 + B LOOP2 + +L2: + ADD r8, r11, r8, LSL #1 + MOV r8, r8, LSR #16 @extract_h(s) + ADD r3, r3, #1 + STRH r8, [r2], #2 @y[n] + + ADD r4, r1, r3, LSL #1 + ADD r5, r3, #1 + MOV r6, r0 + LDRSH r9, [r6], #2 + LDRSH r10, [r4], #-2 + LDRSH r12, [r6], #2 + LDRSH r14, [r4], #-2 + MUL r8, r9, r10 + LDRSH r9, [r6], #2 + LDRSH r10, [r4], #-2 + MLA r8, r12, r14, r8 + SUB r5, r5, #3 + MLA r8, r9, r10, r8 + +LOOP3: + CMP r5, #0 + BLE L3 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 @ *tmpH-- + LDRSH r12, [r6], #2 @ *tmpX++ + LDRSH r14, [r4], #-2 @ *tmpH-- + MLA r8, r9, r10, r8 + MLA r8, r12, r14, r8 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 @ *tmpH-- + LDRSH r12, [r6], #2 @ *tmpX++ + LDRSH r14, [r4], #-2 @ *tmpH-- + MLA r8, r9, r10, r8 + SUBS r5, r5, #4 + MLA r8, r12, r14, r8 + B LOOP3 + +L3: + ADD r8, r11, r8, LSL #1 + MOV r8, r8, LSR #16 @extract_h(s) + ADD r3, r3, #1 + STRH r8, [r2], #2 @y[n] + + ADD r5, r3, #1 @ i = n + 1 + ADD r4, r1, r3, LSL #1 @ tmpH address + MOV r6, r0 + MOV r8, #0 + +LOOP4: + CMP r5, #0 + BLE L4 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 @ *tmpH-- + LDRSH r12, [r6], #2 @ *tmpX++ + LDRSH r14, [r4], #-2 @ *tmpH-- + MLA r8, r9, r10, r8 + MLA r8, r12, r14, r8 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 @ *tmpH-- + LDRSH r12, [r6], #2 @ *tmpX++ + LDRSH r14, [r4], #-2 @ *tmpH-- + MLA r8, r9, r10, r8 + SUBS r5, r5, #4 + MLA r8, r12, r14, r8 + B LOOP4 +L4: + ADD r5, r11, r8, LSL #1 + MOV r5, r5, LSR #16 @extract_h(s) + ADD r3, r3, #1 + STRH r5, [r2], #2 @y[n] + + CMP r3, #64 + BLT LOOP + +Convolve_asm_end: + + LDMFD r13!, {r4 - r12, r15} + + @ENDFUNC + .END + + |