summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/amrwbenc/src/asm/ARMV7/Dot_p_neon.s
blob: 7149a496846cb95530327717b02eec8f5bc8d3ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
@/*
@ ** Copyright 2003-2010, VisualOn, Inc.
@ **
@ ** Licensed under the Apache License, Version 2.0 (the "License");
@ ** you may not use this file except in compliance with the License.
@ ** You may obtain a copy of the License at
@ **
@ **     http://www.apache.org/licenses/LICENSE-2.0
@ **
@ ** Unless required by applicable law or agreed to in writing, software
@ ** distributed under the License is distributed on an "AS IS" BASIS,
@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ ** See the License for the specific language governing permissions and
@ ** limitations under the License.
@ */
@
@**********************************************************************/
@Word32 Dot_product12(                      /* (o) Q31: normalized result (1 < val <= -1) */
@       Word16 x[],                           /* (i) 12bits: x vector                       */
@       Word16 y[],                           /* (i) 12bits: y vector                       */
@       Word16 lg,                            /* (i)    : vector length                     */
@       Word16 * exp                          /* (o)    : exponent of result (0..+30)       */
@)
@************************************************************************
@  x[]   ---  r0
@  y[]   ---  r1
@  lg    ---  r2
@  *exp  ---  r3

          .section   .text
          .global    Dot_product12_asm

Dot_product12_asm:

          STMFD   	    r13!, {r4 - r12, r14}
	  CMP               r0, r1
	  BEQ               LOOP_EQ

          VLD1.S16          {Q0, Q1}, [r0]!               @load 16 Word16 x[]
          VLD1.S16          {Q2, Q3}, [r0]!               @load 16 Word16 x[]
          VLD1.S16          {Q4, Q5}, [r0]!               @load 16 Word16 x[]
          VLD1.S16          {Q6, Q7}, [r0]!               @load 16 Word16 x[]
	  VLD1.S16          {Q8, Q9}, [r1]!               @load 16 Word16 y[]
	  VLD1.S16          {Q10, Q11}, [r1]!             @load 16 Word16 y[]
	  VLD1.S16          {Q12, Q13}, [r1]!             @load 16 Word16 y[]

          VMULL.S16         Q15, D16, D0
          VMLAL.S16         Q15, D17, D1
          VMLAL.S16         Q15, D18, D2
          VMLAL.S16         Q15, D19, D3
	  VLD1.S16          {Q0, Q1}, [r1]!               @load 16 Word16 y[]
          VMLAL.S16         Q15, D20, D4
          VMLAL.S16         Q15, D21, D5
          VMLAL.S16         Q15, D22, D6
          VMLAL.S16         Q15, D23, D7
          VMLAL.S16         Q15, D24, D8
          VMLAL.S16         Q15, D25, D9
          VMLAL.S16         Q15, D26, D10
          VMLAL.S16         Q15, D27, D11
          VMLAL.S16         Q15, D0, D12
          VMLAL.S16         Q15, D1, D13
          VMLAL.S16         Q15, D2, D14
          VMLAL.S16         Q15, D3, D15

          CMP               r2, #64
          BEQ               Lable1
          VLD1.S16          {Q0, Q1}, [r0]!               @load 16 Word16 x[]
	  VLD1.S16          {Q2, Q3}, [r1]!
          VMLAL.S16         Q15, D4, D0
          VMLAL.S16         Q15, D5, D1
          VMLAL.S16         Q15, D6, D2
          VMLAL.S16         Q15, D7, D3
	  BL                Lable1

LOOP_EQ:
          VLD1.S16          {Q0, Q1}, [r0]!
	  VLD1.S16          {Q2, Q3}, [r0]!
	  VLD1.S16          {Q4, Q5}, [r0]!
	  VLD1.S16          {Q6, Q7}, [r0]!
	  VMULL.S16         Q15, D0, D0
	  VMLAL.S16         Q15, D1, D1
	  VMLAL.S16         Q15, D2, D2
	  VMLAL.S16         Q15, D3, D3
	  VMLAL.S16         Q15, D4, D4
	  VMLAL.S16         Q15, D5, D5
	  VMLAL.S16         Q15, D6, D6
	  VMLAL.S16         Q15, D7, D7
	  VMLAL.S16         Q15, D8, D8
	  VMLAL.S16         Q15, D9, D9
	  VMLAL.S16         Q15, D10, D10
	  VMLAL.S16         Q15, D11, D11
	  VMLAL.S16         Q15, D12, D12
	  VMLAL.S16         Q15, D13, D13
	  VMLAL.S16         Q15, D14, D14
	  VMLAL.S16         Q15, D15, D15

	  CMP               r2, #64
	  BEQ               Lable1
	  VLD1.S16          {Q0, Q1}, [r0]!
	  VMLAL.S16         Q15, D0, D0
	  VMLAL.S16         Q15, D1, D1
	  VMLAL.S16         Q15, D2, D2
	  VMLAL.S16         Q15, D3, D3

Lable1:

          VQADD.S32         D30, D30, D31
          VPADD.S32         D30, D30, D30
          VMOV.S32          r12, D30[0]

	  ADD               r12, r12, r12
          ADD               r12, r12, #1                         @ L_sum = (L_sum << 1)  + 1
	  MOV               r4, r12
	  CMP               r12, #0
	  RSBLT             r4, r12, #0
          CLZ               r10, r4
          SUB               r10, r10, #1                         @ sft = norm_l(L_sum)
          MOV               r0, r12, LSL r10                     @ L_sum = L_sum << sft
          RSB               r11, r10, #30                        @ *exp = 30 - sft
          STRH              r11, [r3]

Dot_product12_end:

          LDMFD   	    r13!, {r4 - r12, r15}

          .end