summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s')
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s70
1 files changed, 35 insertions, 35 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s
index 189e33b..8efa9fb 100644
--- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/convolve_neon.s
@@ -20,22 +20,22 @@
@* Word16 y[], /* (o) : output vector */
@* Word16 L /* (i) : vector size */
@*)
-@
+@
@ r0 --- x[]
@ r1 --- h[]
@ r2 --- y[]
@ r3 --- L
- .section .text
- .global Convolve_asm
+ .section .text
+ .global Convolve_asm
Convolve_asm:
- STMFD r13!, {r4 - r12, r14}
- MOV r3, #0
+ STMFD r13!, {r4 - r12, r14}
+ MOV r3, #0
MOV r11, #0x8000
-
-LOOP:
+
+LOOP:
@MOV r8, #0 @ s = 0
ADD r4, r1, r3, LSL #1 @ tmpH address
ADD r5, r3, #1 @ i = n + 1
@@ -43,21 +43,21 @@ LOOP:
LDRSH r9, [r6], #2 @ *tmpX++
LDRSH r10, [r4] @ *tmpH--
SUB r5, r5, #1
- VMOV.S32 Q10, #0
- MUL r8, r9, r10
+ VMOV.S32 Q10, #0
+ MUL r8, r9, r10
-LOOP1:
+LOOP1:
CMP r5, #0
BLE L1
SUB r4, r4, #8
MOV r9, r4
- VLD1.S16 D0, [r6]!
+ VLD1.S16 D0, [r6]!
VLD1.S16 D1, [r9]!
VREV64.16 D1, D1
- SUBS r5, r5, #4
- VMLAL.S16 Q10, D0, D1
- B LOOP1
-L1:
+ SUBS r5, r5, #4
+ VMLAL.S16 Q10, D0, D1
+ B LOOP1
+L1:
VADD.S32 D20, D20, D21
VPADD.S32 D20, D20, D20
VMOV.S32 r5, D20[0]
@@ -73,25 +73,25 @@ L1:
ADD r5, r3, #1
MOV r6, r0
LDRSH r9, [r6], #2 @ *tmpX++
- LDRSH r10, [r4], #-2
+ LDRSH r10, [r4], #-2
LDRSH r12, [r6], #2
LDRSH r14, [r4]
MUL r8, r9, r10
SUB r5, r5, #2
MLA r8, r12, r14, r8
-
+
VMOV.S32 Q10, #0
LOOP2:
CMP r5, #0
BLE L2
SUB r4, r4, #8
MOV r9, r4
- VLD1.S16 D0, [r6]!
+ VLD1.S16 D0, [r6]!
VLD1.S16 D1, [r9]!
SUBS r5, r5, #4
VREV64.16 D1, D1
- VMLAL.S16 Q10, D0, D1
+ VMLAL.S16 Q10, D0, D1
B LOOP2
L2:
VADD.S32 D20, D20, D21
@@ -100,7 +100,7 @@ L2:
ADD r8, r8, r5
ADD r8, r11, r8, LSL #1
MOV r8, r8, LSR #16 @extract_h(s)
- ADD r3, r3, #1
+ ADD r3, r3, #1
STRH r8, [r2], #2 @y[n]
@@ -115,7 +115,7 @@ L2:
MUL r8, r9, r10
LDRSH r9, [r6], #2
LDRSH r10, [r4]
- MLA r8, r12, r14, r8
+ MLA r8, r12, r14, r8
SUB r5, r5, #3
MLA r8, r9, r10, r8
@@ -125,12 +125,12 @@ LOOP3:
BLE L3
SUB r4, r4, #8
MOV r9, r4
- VLD1.S16 D0, [r6]!
+ VLD1.S16 D0, [r6]!
VLD1.S16 D1, [r9]!
VREV64.16 D1, D1
SUBS r5, r5, #4
- VMLAL.S16 Q10, D0, D1
- B LOOP3
+ VMLAL.S16 Q10, D0, D1
+ B LOOP3
L3:
VADD.S32 D20, D20, D21
@@ -146,18 +146,18 @@ L3:
ADD r4, r1, r5, LSL #1 @ tmpH address
MOV r6, r0
VMOV.S32 Q10, #0
-LOOP4:
+LOOP4:
CMP r5, #0
BLE L4
SUB r4, r4, #8
MOV r9, r4
- VLD1.S16 D0, [r6]!
+ VLD1.S16 D0, [r6]!
VLD1.S16 D1, [r9]!
VREV64.16 D1, D1
- SUBS r5, r5, #4
- VMLAL.S16 Q10, D0, D1
- B LOOP4
-L4:
+ SUBS r5, r5, #4
+ VMLAL.S16 Q10, D0, D1
+ B LOOP4
+L4:
VADD.S32 D20, D20, D21
VPADD.S32 D20, D20, D20
VMOV.S32 r5, D20[0]
@@ -165,14 +165,14 @@ L4:
MOV r5, r5, LSR #16 @extract_h(s)
ADD r3, r3, #1
STRH r5, [r2], #2 @y[n]
-
+
CMP r3, #64
BLT LOOP
-
-Convolve_asm_end:
-
+
+Convolve_asm_end:
+
LDMFD r13!, {r4 - r12, r15}
-
+
@ENDFUNC
.END