summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s')
-rw-r--r--media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s127
1 files changed, 127 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s
new file mode 100644
index 0000000..8230944
--- /dev/null
+++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s
@@ -0,0 +1,127 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+@
+@**********************************************************************/
+@Word32 Dot_product12( /* (o) Q31: normalized result (1 < val <= -1) */
+@ Word16 x[], /* (i) 12bits: x vector */
+@ Word16 y[], /* (i) 12bits: y vector */
+@ Word16 lg, /* (i) : vector length */
+@ Word16 * exp /* (o) : exponent of result (0..+30) */
+@)
+@************************************************************************
+@ x[] --- r0
+@ y[] --- r1
+@ lg --- r2
+@ *exp --- r3
+
+ .section .text
+ .global Dot_product12_asm
+
+Dot_product12_asm:
+
+ STMFD r13!, {r4 - r12, r14}
+ CMP r0, r1
+ BEQ LOOP_EQ
+
+ VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[]
+ VLD1.S16 {Q2, Q3}, [r0]! @load 16 Word16 x[]
+ VLD1.S16 {Q4, Q5}, [r0]! @load 16 Word16 x[]
+ VLD1.S16 {Q6, Q7}, [r0]! @load 16 Word16 x[]
+ VLD1.S16 {Q8, Q9}, [r1]! @load 16 Word16 y[]
+ VLD1.S16 {Q10, Q11}, [r1]! @load 16 Word16 y[]
+ VLD1.S16 {Q12, Q13}, [r1]! @load 16 Word16 y[]
+
+ VMULL.S16 Q15, D16, D0
+ VMLAL.S16 Q15, D17, D1
+ VMLAL.S16 Q15, D18, D2
+ VMLAL.S16 Q15, D19, D3
+ VLD1.S16 {Q0, Q1}, [r1]! @load 16 Word16 y[]
+ VMLAL.S16 Q15, D20, D4
+ VMLAL.S16 Q15, D21, D5
+ VMLAL.S16 Q15, D22, D6
+ VMLAL.S16 Q15, D23, D7
+ VMLAL.S16 Q15, D24, D8
+ VMLAL.S16 Q15, D25, D9
+ VMLAL.S16 Q15, D26, D10
+ VMLAL.S16 Q15, D27, D11
+ VMLAL.S16 Q15, D0, D12
+ VMLAL.S16 Q15, D1, D13
+ VMLAL.S16 Q15, D2, D14
+ VMLAL.S16 Q15, D3, D15
+
+ CMP r2, #64
+ BEQ Lable1
+ VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[]
+ VLD1.S16 {Q2, Q3}, [r1]!
+ VMLAL.S16 Q15, D4, D0
+ VMLAL.S16 Q15, D5, D1
+ VMLAL.S16 Q15, D6, D2
+ VMLAL.S16 Q15, D7, D3
+ BL Lable1
+
+LOOP_EQ:
+ VLD1.S16 {Q0, Q1}, [r0]!
+ VLD1.S16 {Q2, Q3}, [r0]!
+ VLD1.S16 {Q4, Q5}, [r0]!
+ VLD1.S16 {Q6, Q7}, [r0]!
+ VMULL.S16 Q15, D0, D0
+ VMLAL.S16 Q15, D1, D1
+ VMLAL.S16 Q15, D2, D2
+ VMLAL.S16 Q15, D3, D3
+ VMLAL.S16 Q15, D4, D4
+ VMLAL.S16 Q15, D5, D5
+ VMLAL.S16 Q15, D6, D6
+ VMLAL.S16 Q15, D7, D7
+ VMLAL.S16 Q15, D8, D8
+ VMLAL.S16 Q15, D9, D9
+ VMLAL.S16 Q15, D10, D10
+ VMLAL.S16 Q15, D11, D11
+ VMLAL.S16 Q15, D12, D12
+ VMLAL.S16 Q15, D13, D13
+ VMLAL.S16 Q15, D14, D14
+ VMLAL.S16 Q15, D15, D15
+
+ CMP r2, #64
+ BEQ Lable1
+ VLD1.S16 {Q0, Q1}, [r0]!
+ VMLAL.S16 Q15, D0, D0
+ VMLAL.S16 Q15, D1, D1
+ VMLAL.S16 Q15, D2, D2
+ VMLAL.S16 Q15, D3, D3
+
+Lable1:
+
+ VQADD.S32 D30, D30, D31
+ VPADD.S32 D30, D30, D30
+ VMOV.S32 r12, D30[0]
+
+ ADD r12, r12, r12
+ ADD r12, r12, #1 @ L_sum = (L_sum << 1) + 1
+ MOV r4, r12
+ CMP r12, #0
+ RSBLT r4, r12, #0
+ CLZ r10, r4
+ SUB r10, r10, #1 @ sft = norm_l(L_sum)
+ MOV r0, r12, LSL r10 @ L_sum = L_sum << sft
+ RSB r11, r10, #30 @ *exp = 30 - sft
+ STRH r11, [r3]
+
+Dot_product12_end:
+
+ LDMFD r13!, {r4 - r12, r15}
+
+ .END
+