diff options
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s')
-rw-r--r-- | media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s new file mode 100644 index 0000000..8230944 --- /dev/null +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s @@ -0,0 +1,127 @@ +@/* +@ ** Copyright 2003-2010, VisualOn, Inc. +@ ** +@ ** Licensed under the Apache License, Version 2.0 (the "License"); +@ ** you may not use this file except in compliance with the License. +@ ** You may obtain a copy of the License at +@ ** +@ ** http://www.apache.org/licenses/LICENSE-2.0 +@ ** +@ ** Unless required by applicable law or agreed to in writing, software +@ ** distributed under the License is distributed on an "AS IS" BASIS, +@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ ** See the License for the specific language governing permissions and +@ ** limitations under the License. +@ */ +@ +@**********************************************************************/ +@Word32 Dot_product12( /* (o) Q31: normalized result (1 < val <= -1) */ +@ Word16 x[], /* (i) 12bits: x vector */ +@ Word16 y[], /* (i) 12bits: y vector */ +@ Word16 lg, /* (i) : vector length */ +@ Word16 * exp /* (o) : exponent of result (0..+30) */ +@) +@************************************************************************ +@ x[] --- r0 +@ y[] --- r1 +@ lg --- r2 +@ *exp --- r3 + + .section .text + .global Dot_product12_asm + +Dot_product12_asm: + + STMFD r13!, {r4 - r12, r14} + CMP r0, r1 + BEQ LOOP_EQ + + VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q2, Q3}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q4, Q5}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q6, Q7}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q8, Q9}, [r1]! @load 16 Word16 y[] + VLD1.S16 {Q10, Q11}, [r1]! @load 16 Word16 y[] + VLD1.S16 {Q12, Q13}, [r1]! @load 16 Word16 y[] + + VMULL.S16 Q15, D16, D0 + VMLAL.S16 Q15, D17, D1 + VMLAL.S16 Q15, D18, D2 + VMLAL.S16 Q15, D19, D3 + VLD1.S16 {Q0, Q1}, [r1]! @load 16 Word16 y[] + VMLAL.S16 Q15, D20, D4 + VMLAL.S16 Q15, D21, D5 + VMLAL.S16 Q15, D22, D6 + VMLAL.S16 Q15, D23, D7 + VMLAL.S16 Q15, D24, D8 + VMLAL.S16 Q15, D25, D9 + VMLAL.S16 Q15, D26, D10 + VMLAL.S16 Q15, D27, D11 + VMLAL.S16 Q15, D0, D12 + VMLAL.S16 Q15, D1, D13 + VMLAL.S16 Q15, D2, D14 + VMLAL.S16 Q15, D3, D15 + + CMP r2, #64 + BEQ Lable1 + VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q2, Q3}, [r1]! + VMLAL.S16 Q15, D4, D0 + VMLAL.S16 Q15, D5, D1 + VMLAL.S16 Q15, D6, D2 + VMLAL.S16 Q15, D7, D3 + BL Lable1 + +LOOP_EQ: + VLD1.S16 {Q0, Q1}, [r0]! + VLD1.S16 {Q2, Q3}, [r0]! + VLD1.S16 {Q4, Q5}, [r0]! + VLD1.S16 {Q6, Q7}, [r0]! + VMULL.S16 Q15, D0, D0 + VMLAL.S16 Q15, D1, D1 + VMLAL.S16 Q15, D2, D2 + VMLAL.S16 Q15, D3, D3 + VMLAL.S16 Q15, D4, D4 + VMLAL.S16 Q15, D5, D5 + VMLAL.S16 Q15, D6, D6 + VMLAL.S16 Q15, D7, D7 + VMLAL.S16 Q15, D8, D8 + VMLAL.S16 Q15, D9, D9 + VMLAL.S16 Q15, D10, D10 + VMLAL.S16 Q15, D11, D11 + VMLAL.S16 Q15, D12, D12 + VMLAL.S16 Q15, D13, D13 + VMLAL.S16 Q15, D14, D14 + VMLAL.S16 Q15, D15, D15 + + CMP r2, #64 + BEQ Lable1 + VLD1.S16 {Q0, Q1}, [r0]! + VMLAL.S16 Q15, D0, D0 + VMLAL.S16 Q15, D1, D1 + VMLAL.S16 Q15, D2, D2 + VMLAL.S16 Q15, D3, D3 + +Lable1: + + VQADD.S32 D30, D30, D31 + VPADD.S32 D30, D30, D30 + VMOV.S32 r12, D30[0] + + ADD r12, r12, r12 + ADD r12, r12, #1 @ L_sum = (L_sum << 1) + 1 + MOV r4, r12 + CMP r12, #0 + RSBLT r4, r12, #0 + CLZ r10, r4 + SUB r10, r10, #1 @ sft = norm_l(L_sum) + MOV r0, r12, LSL r10 @ L_sum = L_sum << sft + RSB r11, r10, #30 @ *exp = 30 - sft + STRH r11, [r3] + +Dot_product12_end: + + LDMFD r13!, {r4 - r12, r15} + + .END + |