diff options
Diffstat (limited to 'media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s')
-rw-r--r-- | media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s | 254 |
1 files changed, 127 insertions, 127 deletions
diff --git a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s index 07ca344..678f1d0 100644 --- a/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s +++ b/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s @@ -1,127 +1,127 @@ -@/*
-@ ** Copyright 2003-2010, VisualOn, Inc.
-@ **
-@ ** Licensed under the Apache License, Version 2.0 (the "License");
-@ ** you may not use this file except in compliance with the License.
-@ ** You may obtain a copy of the License at
-@ **
-@ ** http://www.apache.org/licenses/LICENSE-2.0
-@ **
-@ ** Unless required by applicable law or agreed to in writing, software
-@ ** distributed under the License is distributed on an "AS IS" BASIS,
-@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ ** See the License for the specific language governing permissions and
-@ ** limitations under the License.
-@ */
-@
-@**********************************************************************/
-@Word32 Dot_product12( /* (o) Q31: normalized result (1 < val <= -1) */
-@ Word16 x[], /* (i) 12bits: x vector */
-@ Word16 y[], /* (i) 12bits: y vector */
-@ Word16 lg, /* (i) : vector length */
-@ Word16 * exp /* (o) : exponent of result (0..+30) */
-@)
-@************************************************************************
-@ x[] --- r0
-@ y[] --- r1
-@ lg --- r2
-@ *exp --- r3
-
- .section .text
- .global Dot_product12_asm
-
-Dot_product12_asm:
-
- STMFD r13!, {r4 - r12, r14}
- CMP r0, r1
- BEQ LOOP_EQ
-
- VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[]
- VLD1.S16 {Q2, Q3}, [r0]! @load 16 Word16 x[]
- VLD1.S16 {Q4, Q5}, [r0]! @load 16 Word16 x[]
- VLD1.S16 {Q6, Q7}, [r0]! @load 16 Word16 x[]
- VLD1.S16 {Q8, Q9}, [r1]! @load 16 Word16 y[]
- VLD1.S16 {Q10, Q11}, [r1]! @load 16 Word16 y[]
- VLD1.S16 {Q12, Q13}, [r1]! @load 16 Word16 y[]
-
- VMULL.S16 Q15, D16, D0
- VMLAL.S16 Q15, D17, D1
- VMLAL.S16 Q15, D18, D2
- VMLAL.S16 Q15, D19, D3
- VLD1.S16 {Q0, Q1}, [r1]! @load 16 Word16 y[]
- VMLAL.S16 Q15, D20, D4
- VMLAL.S16 Q15, D21, D5
- VMLAL.S16 Q15, D22, D6
- VMLAL.S16 Q15, D23, D7
- VMLAL.S16 Q15, D24, D8
- VMLAL.S16 Q15, D25, D9
- VMLAL.S16 Q15, D26, D10
- VMLAL.S16 Q15, D27, D11
- VMLAL.S16 Q15, D0, D12
- VMLAL.S16 Q15, D1, D13
- VMLAL.S16 Q15, D2, D14
- VMLAL.S16 Q15, D3, D15
-
- CMP r2, #64
- BEQ Lable1
- VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[]
- VLD1.S16 {Q2, Q3}, [r1]!
- VMLAL.S16 Q15, D4, D0
- VMLAL.S16 Q15, D5, D1
- VMLAL.S16 Q15, D6, D2
- VMLAL.S16 Q15, D7, D3
- BL Lable1
-
-LOOP_EQ:
- VLD1.S16 {Q0, Q1}, [r0]!
- VLD1.S16 {Q2, Q3}, [r0]!
- VLD1.S16 {Q4, Q5}, [r0]!
- VLD1.S16 {Q6, Q7}, [r0]!
- VMULL.S16 Q15, D0, D0
- VMLAL.S16 Q15, D1, D1
- VMLAL.S16 Q15, D2, D2
- VMLAL.S16 Q15, D3, D3
- VMLAL.S16 Q15, D4, D4
- VMLAL.S16 Q15, D5, D5
- VMLAL.S16 Q15, D6, D6
- VMLAL.S16 Q15, D7, D7
- VMLAL.S16 Q15, D8, D8
- VMLAL.S16 Q15, D9, D9
- VMLAL.S16 Q15, D10, D10
- VMLAL.S16 Q15, D11, D11
- VMLAL.S16 Q15, D12, D12
- VMLAL.S16 Q15, D13, D13
- VMLAL.S16 Q15, D14, D14
- VMLAL.S16 Q15, D15, D15
-
- CMP r2, #64
- BEQ Lable1
- VLD1.S16 {Q0, Q1}, [r0]!
- VMLAL.S16 Q15, D0, D0
- VMLAL.S16 Q15, D1, D1
- VMLAL.S16 Q15, D2, D2
- VMLAL.S16 Q15, D3, D3
-
-Lable1:
-
- VQADD.S32 D30, D30, D31
- VPADD.S32 D30, D30, D30
- VMOV.S32 r12, D30[0]
-
- ADD r12, r12, r12
- ADD r12, r12, #1 @ L_sum = (L_sum << 1) + 1
- MOV r4, r12
- CMP r12, #0
- RSBLT r4, r12, #0
- CLZ r10, r4
- SUB r10, r10, #1 @ sft = norm_l(L_sum)
- MOV r0, r12, LSL r10 @ L_sum = L_sum << sft
- RSB r11, r10, #30 @ *exp = 30 - sft
- STRH r11, [r3]
-
-Dot_product12_end:
-
- LDMFD r13!, {r4 - r12, r15}
-
- .END
-
+@/* +@ ** Copyright 2003-2010, VisualOn, Inc. +@ ** +@ ** Licensed under the Apache License, Version 2.0 (the "License"); +@ ** you may not use this file except in compliance with the License. +@ ** You may obtain a copy of the License at +@ ** +@ ** http://www.apache.org/licenses/LICENSE-2.0 +@ ** +@ ** Unless required by applicable law or agreed to in writing, software +@ ** distributed under the License is distributed on an "AS IS" BASIS, +@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ ** See the License for the specific language governing permissions and +@ ** limitations under the License. +@ */ +@ +@**********************************************************************/ +@Word32 Dot_product12( /* (o) Q31: normalized result (1 < val <= -1) */ +@ Word16 x[], /* (i) 12bits: x vector */ +@ Word16 y[], /* (i) 12bits: y vector */ +@ Word16 lg, /* (i) : vector length */ +@ Word16 * exp /* (o) : exponent of result (0..+30) */ +@) +@************************************************************************ +@ x[] --- r0 +@ y[] --- r1 +@ lg --- r2 +@ *exp --- r3 + + .section .text + .global Dot_product12_asm + +Dot_product12_asm: + + STMFD r13!, {r4 - r12, r14} + CMP r0, r1 + BEQ LOOP_EQ + + VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q2, Q3}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q4, Q5}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q6, Q7}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q8, Q9}, [r1]! @load 16 Word16 y[] + VLD1.S16 {Q10, Q11}, [r1]! @load 16 Word16 y[] + VLD1.S16 {Q12, Q13}, [r1]! @load 16 Word16 y[] + + VMULL.S16 Q15, D16, D0 + VMLAL.S16 Q15, D17, D1 + VMLAL.S16 Q15, D18, D2 + VMLAL.S16 Q15, D19, D3 + VLD1.S16 {Q0, Q1}, [r1]! @load 16 Word16 y[] + VMLAL.S16 Q15, D20, D4 + VMLAL.S16 Q15, D21, D5 + VMLAL.S16 Q15, D22, D6 + VMLAL.S16 Q15, D23, D7 + VMLAL.S16 Q15, D24, D8 + VMLAL.S16 Q15, D25, D9 + VMLAL.S16 Q15, D26, D10 + VMLAL.S16 Q15, D27, D11 + VMLAL.S16 Q15, D0, D12 + VMLAL.S16 Q15, D1, D13 + VMLAL.S16 Q15, D2, D14 + VMLAL.S16 Q15, D3, D15 + + CMP r2, #64 + BEQ Lable1 + VLD1.S16 {Q0, Q1}, [r0]! @load 16 Word16 x[] + VLD1.S16 {Q2, Q3}, [r1]! + VMLAL.S16 Q15, D4, D0 + VMLAL.S16 Q15, D5, D1 + VMLAL.S16 Q15, D6, D2 + VMLAL.S16 Q15, D7, D3 + BL Lable1 + +LOOP_EQ: + VLD1.S16 {Q0, Q1}, [r0]! + VLD1.S16 {Q2, Q3}, [r0]! + VLD1.S16 {Q4, Q5}, [r0]! + VLD1.S16 {Q6, Q7}, [r0]! + VMULL.S16 Q15, D0, D0 + VMLAL.S16 Q15, D1, D1 + VMLAL.S16 Q15, D2, D2 + VMLAL.S16 Q15, D3, D3 + VMLAL.S16 Q15, D4, D4 + VMLAL.S16 Q15, D5, D5 + VMLAL.S16 Q15, D6, D6 + VMLAL.S16 Q15, D7, D7 + VMLAL.S16 Q15, D8, D8 + VMLAL.S16 Q15, D9, D9 + VMLAL.S16 Q15, D10, D10 + VMLAL.S16 Q15, D11, D11 + VMLAL.S16 Q15, D12, D12 + VMLAL.S16 Q15, D13, D13 + VMLAL.S16 Q15, D14, D14 + VMLAL.S16 Q15, D15, D15 + + CMP r2, #64 + BEQ Lable1 + VLD1.S16 {Q0, Q1}, [r0]! + VMLAL.S16 Q15, D0, D0 + VMLAL.S16 Q15, D1, D1 + VMLAL.S16 Q15, D2, D2 + VMLAL.S16 Q15, D3, D3 + +Lable1: + + VQADD.S32 D30, D30, D31 + VPADD.S32 D30, D30, D30 + VMOV.S32 r12, D30[0] + + ADD r12, r12, r12 + ADD r12, r12, #1 @ L_sum = (L_sum << 1) + 1 + MOV r4, r12 + CMP r12, #0 + RSBLT r4, r12, #0 + CLZ r10, r4 + SUB r10, r10, #1 @ sft = norm_l(L_sum) + MOV r0, r12, LSL r10 @ L_sum = L_sum << sft + RSB r11, r10, #30 @ *exp = 30 - sft + STRH r11, [r3] + +Dot_product12_end: + + LDMFD r13!, {r4 - r12, r15} + + .END + |