summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_Interpolate_Chroma_s.S
blob: 2f4293fee263baad8e987cf5e80a6f11b9e1fc2d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
/*
 * Copyright (C) 2007-2008 ARM Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
/*
 *
 */

    .eabi_attribute 24, 1
    .eabi_attribute 25, 1

    .arm
    .fpu neon

    .text
    .align 4

armVCM4P10_WidthBranchTableMVIsNotZero:
    .word   WidthIs2MVIsNotZero-(P0+8), WidthIs2MVIsNotZero-(P0+8)
    .word   WidthIs4MVIsNotZero-(P0+8), WidthIs4MVIsNotZero-(P0+8)
    .word   WidthIs8MVIsNotZero-(P0+8)

armVCM4P10_WidthBranchTableMVIsZero:
    .word   WidthIs2MVIsZero-(P0+8), WidthIs2MVIsZero-(P0+8)
    .word   WidthIs4MVIsZero-(P0+8), WidthIs4MVIsZero-(P0+8)
    .word   WidthIs8MVIsZero-(P0+8)

    .global armVCM4P10_Interpolate_Chroma
    .func   armVCM4P10_Interpolate_Chroma
armVCM4P10_Interpolate_Chroma:
    PUSH     {r4-r12,lr}
    VPUSH    {d8-d15}
    LDRD     r6,r7,[sp,#0x70]
    LDRD     r4,r5,[sp,#0x68]
    RSB      r8,r6,#8
    RSB      r9,r7,#8
    CMN      r6,r7
    MOV      r10,#1
    ADREQ    r11, armVCM4P10_WidthBranchTableMVIsZero
    SUB      lr,r1,r10
    ADRNE    r11, armVCM4P10_WidthBranchTableMVIsNotZero
    VLD1.8   {d0},[r0],r10
    SMULBB   r12,r8,r9
    SMULBB   r9,r6,r9
    VLD1.8   {d1},[r0],lr
    SMULBB   r8,r8,r7
    SMULBB   r6,r6,r7
    VDUP.8   d12,r12
    VDUP.8   d13,r9
    VDUP.8   d14,r8
    VDUP.8   d15,r6
    LDR      r11,[r11, r4, lsl #1]
P0: ADD      pc,r11

WidthIs8MVIsNotZero:
    VLD1.8   {d2},[r0],r10
    VMULL.U8 q2,d0,d12
    VLD1.8   {d3},[r0],lr
    VMULL.U8 q3,d2,d12
    VLD1.8   {d16},[r0],r10
    VMLAL.U8 q2,d1,d13
    VLD1.8   {d17},[r0],lr
    VMULL.U8 q11,d16,d12
    VMLAL.U8 q3,d3,d13
    VLD1.8   {d18},[r0],r10
    VMLAL.U8 q2,d2,d14
    VMLAL.U8 q11,d17,d13
    VMULL.U8 q12,d18,d12
    VLD1.8   {d19},[r0],lr
    VMLAL.U8 q3,d16,d14
    VLD1.8   {d0},[r0],r10
    VMLAL.U8 q12,d19,d13
    VMLAL.U8 q11,d18,d14
    VMLAL.U8 q2,d3,d15
    VLD1.8   {d1},[r0],lr
    VMLAL.U8 q12,d0,d14
    VMLAL.U8 q3,d17,d15
    VMLAL.U8 q11,d19,d15
    SUBS     r5,r5,#4
    VMLAL.U8 q12,d1,d15
    VQRSHRN.U16 d8,q2,#6
    VQRSHRN.U16 d9,q3,#6
    VQRSHRN.U16 d20,q11,#6
    VST1.64  {d8},[r2],r3
    VQRSHRN.U16 d21,q12,#6
    VST1.64  {d9},[r2],r3
    VST1.64  {d20},[r2],r3
    VST1.64  {d21},[r2],r3
    BGT      WidthIs8MVIsNotZero
    MOV      r0,#0
    VPOP     {d8-d15}
    POP      {r4-r12,pc}

WidthIs4MVIsNotZero:
    VLD1.8   {d2},[r0],r10
    VMULL.U8 q2,d0,d12
    VMULL.U8 q3,d2,d12
    VLD1.8   {d3},[r0],lr
    VMLAL.U8 q2,d1,d13
    VMLAL.U8 q3,d3,d13
    VLD1.8   {d0},[r0],r10
    VMLAL.U8 q2,d2,d14
    VMLAL.U8 q3,d0,d14
    VLD1.8   {d1},[r0],lr
    SUBS     r5,r5,#2
    VMLAL.U8 q3,d1,d15
    VMLAL.U8 q2,d3,d15
    VQRSHRN.U16 d9,q3,#6
    VQRSHRN.U16 d8,q2,#6
    VST1.32  {d8[0]},[r2],r3
    VST1.32  {d9[0]},[r2],r3
    BGT      WidthIs4MVIsNotZero
    MOV      r0,#0
    VPOP     {d8-d15}
    POP      {r4-r12,pc}

WidthIs2MVIsNotZero:
    VLD1.8   {d2},[r0],r10
    VMULL.U8 q2,d0,d12
    VMULL.U8 q3,d2,d12
    VLD1.8   {d3},[r0],lr
    VMLAL.U8 q2,d1,d13
    VMLAL.U8 q3,d3,d13
    VLD1.8   {d0},[r0],r10
    VMLAL.U8 q2,d2,d14
    VMLAL.U8 q3,d0,d14
    VLD1.8   {d1},[r0],lr
    SUBS     r5,r5,#2
    VMLAL.U8 q3,d1,d15
    VMLAL.U8 q2,d3,d15
    VQRSHRN.U16 d9,q3,#6
    VQRSHRN.U16 d8,q2,#6
    VST1.16  {d8[0]},[r2],r3
    VST1.16  {d9[0]},[r2],r3
    BGT      WidthIs2MVIsNotZero
    MOV      r0,#0
    VPOP     {d8-d15}
    POP      {r4-r12,pc}

WidthIs8MVIsZero:
    SUB      r0,r0,r1
WidthIs8LoopMVIsZero:
    VLD1.8   {d0},[r0],r1
    SUBS     r5,r5,#2
    VLD1.8   {d1},[r0],r1
    VST1.64  {d0},[r2],r3
    VST1.64  {d1},[r2],r3
    BGT      WidthIs8LoopMVIsZero
    MOV      r0,#0
    VPOP     {d8-d15}
    POP      {r4-r12,pc}

WidthIs4MVIsZero:
    VLD1.8   {d1},[r0],r1
    SUBS     r5,r5,#2
    VST1.32  {d0[0]},[r2],r3
    VLD1.8   {d0},[r0],r1
    VST1.32  {d1[0]},[r2],r3
    BGT      WidthIs4MVIsZero
    MOV      r0,#0
    VPOP     {d8-d15}
    POP      {r4-r12,pc}

WidthIs2MVIsZero:
    VLD1.8   {d1},[r0],r1
    SUBS     r5,r5,#2
    VST1.16  {d0[0]},[r2],r3
    VLD1.8   {d0},[r0],r1
    VST1.16  {d1[0]},[r2],r3
    BGT      WidthIs2MVIsZero
    MOV      r0,#0
    VPOP     {d8-d15}
    POP      {r4-r12,pc}
    .endfunc

    .end