diff options
author | James Dong <jdong@google.com> | 2010-05-14 15:45:22 -0700 |
---|---|---|
committer | James Dong <jdong@google.com> | 2010-05-19 07:21:25 -0700 |
commit | 956c553ab0ce72f8074ad0fda2ffd66a0305700c (patch) | |
tree | fe111ecd29e01c270246f5f338c56806d6d5b136 /media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s | |
parent | e1f61055b4abb96a86d1ff24b5a7777dfe40fe5f (diff) | |
download | frameworks_av-956c553ab0ce72f8074ad0fda2ffd66a0305700c.zip frameworks_av-956c553ab0ce72f8074ad0fda2ffd66a0305700c.tar.gz frameworks_av-956c553ab0ce72f8074ad0fda2ffd66a0305700c.tar.bz2 |
Initial software encoder checkins
Change-Id: I27f387db23594e46384c4eb3a0093ce220bb6b60
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s')
-rw-r--r-- | media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s | 178 |
1 files changed, 178 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s new file mode 100644 index 0000000..189e33b --- /dev/null +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s @@ -0,0 +1,178 @@ +@/* +@ ** Copyright 2003-2010, VisualOn, Inc. +@ ** +@ ** Licensed under the Apache License, Version 2.0 (the "License"); +@ ** you may not use this file except in compliance with the License. +@ ** You may obtain a copy of the License at +@ ** +@ ** http://www.apache.org/licenses/LICENSE-2.0 +@ ** +@ ** Unless required by applicable law or agreed to in writing, software +@ ** distributed under the License is distributed on an "AS IS" BASIS, +@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ ** See the License for the specific language governing permissions and +@ ** limitations under the License. +@ */ +@ +@*void Convolve ( +@* Word16 x[], /* (i) : input vector */ +@* Word16 h[], /* (i) : impulse response */ +@* Word16 y[], /* (o) : output vector */ +@* Word16 L /* (i) : vector size */ +@*) +@ +@ r0 --- x[] +@ r1 --- h[] +@ r2 --- y[] +@ r3 --- L + + .section .text + .global Convolve_asm + +Convolve_asm: + + STMFD r13!, {r4 - r12, r14} + MOV r3, #0 + MOV r11, #0x8000 + +LOOP: + @MOV r8, #0 @ s = 0 + ADD r4, r1, r3, LSL #1 @ tmpH address + ADD r5, r3, #1 @ i = n + 1 + MOV r6, r0 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4] @ *tmpH-- + SUB r5, r5, #1 + VMOV.S32 Q10, #0 + MUL r8, r9, r10 + +LOOP1: + CMP r5, #0 + BLE L1 + SUB r4, r4, #8 + MOV r9, r4 + VLD1.S16 D0, [r6]! + VLD1.S16 D1, [r9]! + VREV64.16 D1, D1 + SUBS r5, r5, #4 + VMLAL.S16 Q10, D0, D1 + B LOOP1 +L1: + VADD.S32 D20, D20, D21 + VPADD.S32 D20, D20, D20 + VMOV.S32 r5, D20[0] + ADD r5, r5, r8 + ADD r5, r11, r5, LSL #1 + MOV r5, r5, LSR #16 @extract_h(s) + ADD r3, r3, #1 + STRH r5, [r2], #2 @y[n] + + + @MOV r8, #0 + ADD r4, r1, r3, LSL #1 @tmpH address + ADD r5, r3, #1 + MOV r6, r0 + LDRSH r9, [r6], #2 @ *tmpX++ + LDRSH r10, [r4], #-2 + LDRSH r12, [r6], #2 + LDRSH r14, [r4] + + MUL r8, r9, r10 + SUB r5, r5, #2 + MLA r8, r12, r14, r8 + + VMOV.S32 Q10, #0 +LOOP2: + CMP r5, #0 + BLE L2 + SUB r4, r4, #8 + MOV r9, r4 + VLD1.S16 D0, [r6]! + VLD1.S16 D1, [r9]! + SUBS r5, r5, #4 + VREV64.16 D1, D1 + VMLAL.S16 Q10, D0, D1 + B LOOP2 +L2: + VADD.S32 D20, D20, D21 + VPADD.S32 D20, D20, D20 + VMOV.S32 r5, D20[0] + ADD r8, r8, r5 + ADD r8, r11, r8, LSL #1 + MOV r8, r8, LSR #16 @extract_h(s) + ADD r3, r3, #1 + STRH r8, [r2], #2 @y[n] + + + @MOV r8, #0 + ADD r4, r1, r3, LSL #1 + ADD r5, r3, #1 + MOV r6, r0 + LDRSH r9, [r6], #2 + LDRSH r10, [r4], #-2 + LDRSH r12, [r6], #2 + LDRSH r14, [r4], #-2 + MUL r8, r9, r10 + LDRSH r9, [r6], #2 + LDRSH r10, [r4] + MLA r8, r12, r14, r8 + SUB r5, r5, #3 + MLA r8, r9, r10, r8 + + VMOV.S32 Q10, #0 +LOOP3: + CMP r5, #0 + BLE L3 + SUB r4, r4, #8 + MOV r9, r4 + VLD1.S16 D0, [r6]! + VLD1.S16 D1, [r9]! + VREV64.16 D1, D1 + SUBS r5, r5, #4 + VMLAL.S16 Q10, D0, D1 + B LOOP3 + +L3: + VADD.S32 D20, D20, D21 + VPADD.S32 D20, D20, D20 + VMOV.S32 r5, D20[0] + ADD r8, r8, r5 + ADD r8, r11, r8, LSL #1 + MOV r8, r8, LSR #16 @extract_h(s) + ADD r3, r3, #1 + STRH r8, [r2], #2 @y[n] + + ADD r5, r3, #1 @ i = n + 1 + ADD r4, r1, r5, LSL #1 @ tmpH address + MOV r6, r0 + VMOV.S32 Q10, #0 +LOOP4: + CMP r5, #0 + BLE L4 + SUB r4, r4, #8 + MOV r9, r4 + VLD1.S16 D0, [r6]! + VLD1.S16 D1, [r9]! + VREV64.16 D1, D1 + SUBS r5, r5, #4 + VMLAL.S16 Q10, D0, D1 + B LOOP4 +L4: + VADD.S32 D20, D20, D21 + VPADD.S32 D20, D20, D20 + VMOV.S32 r5, D20[0] + ADD r5, r11, r5, LSL #1 + MOV r5, r5, LSR #16 @extract_h(s) + ADD r3, r3, #1 + STRH r5, [r2], #2 @y[n] + + CMP r3, #64 + BLT LOOP + +Convolve_asm_end: + + LDMFD r13!, {r4 - r12, r15} + + @ENDFUNC + .END + |