summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_FilterDeblockingLuma_VerEdge_I_s.S
blob: f2a3682ac1022f8f4e1714430de966e131fed178 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
/*
 * Copyright (C) 2007-2008 ARM Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
/*
 *
 */

    .eabi_attribute 24, 1
    .eabi_attribute 25, 1

    .arm
    .fpu neon
    .text

    .global omxVCM4P10_FilterDeblockingLuma_VerEdge_I
omxVCM4P10_FilterDeblockingLuma_VerEdge_I:
    PUSH     {r4-r12,lr}
    VPUSH    {d8-d15}
    ADD      r7,r2,#1
    ADD      r8,r3,#1
    VLD1.8   {d0[]},[r2]
    SUB      r0,r0,#4
    VLD1.8   {d2[]},[r3]
    LDR      r4,[sp,#0x6c]
    LDR      r5,[sp,#0x68]
    MOV      r6,#0
    VMOV.I8  d14,#0
    VMOV.I8  d15,#0x1
    MOV      r9,#0x11000000
    ADD      r11,r1,r1
L0x38:
    LDRH     r12,[r4],#4
    CMP      r12,#0
    BEQ      L0x160
    ADD      r10,r0,r1
    VLD1.8   {d7},[r0],r11
    VLD1.8   {d8},[r10],r11
    VLD1.8   {d5},[r0],r11
    VZIP.8   d7,d8
    VLD1.8   {d10},[r10],r11
    VLD1.8   {d6},[r0],r11
    VZIP.8   d5,d10
    VLD1.8   {d9},[r10],r11
    VLD1.8   {d4},[r0],r11
    VLD1.8   {d11},[r10],r11
    VZIP.8   d6,d9
    VZIP.16  d8,d10
    VZIP.8   d4,d11
    SUB      r0,r0,r1,LSL #3
    VZIP.16  d7,d5
    VZIP.16  d9,d11
    VZIP.16  d6,d4
    VTRN.32  d8,d9
    VTRN.32  d5,d4
    VTRN.32  d10,d11
    VTRN.32  d7,d6
    VABD.U8  d13,d4,d8
    VABD.U8  d12,d5,d4
    VABD.U8  d18,d9,d8
    VABD.U8  d19,d6,d4
    TST      r12,#0xff
    VCGT.U8  d16,d0,d13
    VMAX.U8  d12,d18,d12
    VABD.U8  d17,d10,d8
    VMOVEQ.32 d16[0],r6
    TST      r12,#0xff00
    VCGT.U8  d19,d2,d19
    VCGT.U8  d12,d2,d12
    VMOVEQ.32 d16[1],r6
    VCGT.U8  d17,d2,d17
    VAND     d16,d16,d12
    TST      r12,#4
    VAND     d12,d16,d17
    VAND     d17,d16,d19
    BNE      L0x17c
    BL       armVCM4P10_DeblockingLumabSLT4_unsafe
    VZIP.8   d7,d6
    VZIP.8   d30,d29
    VZIP.8   d24,d25
    VZIP.8   d10,d11
    VZIP.16  d7,d30
    ADD      r10,r0,r1
    VZIP.16  d24,d10
    VZIP.16  d25,d11
    VZIP.16  d6,d29
    VTRN.32  d7,d24
    VTRN.32  d30,d10
    VTRN.32  d6,d25
    VTRN.32  d29,d11
    VST1.8   {d7},[r0],r11
    VST1.8   {d24},[r10],r11
    VST1.8   {d30},[r0],r11
    VST1.8   {d10},[r10],r11
    VST1.8   {d6},[r0],r11
    VST1.8   {d25},[r10],r11
    ADDS     r9,r9,r9
    VST1.8   {d29},[r0],r11
    ADD      r5,r5,#2
    VST1.8   {d11},[r10],r1
    SUB      r0,r0,r1,LSL #3
    VLD1.8   {d0[]},[r7]
    ADD      r0,r0,#4
    VLD1.8   {d2[]},[r8]
    BCC      L0x38
    B        L0x1f0
L0x160:
    ADD      r0,r0,#4
    ADDS     r9,r9,r9
    VLD1.8   {d0[]},[r7]
    ADD      r5,r5,#4
    VLD1.8   {d2[]},[r8]
    BCC      L0x38
    B        L0x1f0
L0x17c:
    BL       armVCM4P10_DeblockingLumabSGE4_unsafe
    VZIP.8   d7,d31
    VZIP.8   d30,d29
    VZIP.8   d24,d25
    VZIP.8   d28,d11
    VZIP.16  d7,d30
    ADD      r10,r0,r1
    VZIP.16  d24,d28
    VZIP.16  d25,d11
    VZIP.16  d31,d29
    VTRN.32  d7,d24
    VTRN.32  d30,d28
    VTRN.32  d31,d25
    VTRN.32  d29,d11
    VST1.8   {d7},[r0],r11
    VST1.8   {d24},[r10],r11
    VST1.8   {d30},[r0],r11
    VST1.8   {d28},[r10],r11
    VST1.8   {d31},[r0],r11
    VST1.8   {d25},[r10],r11
    ADDS     r9,r9,r9
    VST1.8   {d29},[r0],r11
    ADD      r5,r5,#4
    VST1.8   {d11},[r10],r11
    SUB      r0,r0,r1,LSL #3
    VLD1.8   {d0[]},[r7]
    ADD      r0,r0,#4
    VLD1.8   {d2[]},[r8]
    BCC      L0x38
L0x1f0:
    SUB      r4,r4,#0xe
    SUB      r5,r5,#0xe
    SUB      r0,r0,#0x10
    VLD1.8   {d0[]},[r2]
    ADD      r0,r0,r1,LSL #3
    VLD1.8   {d2[]},[r3]
    BNE      L0x38
    MOV      r0,#0
    VPOP     {d8-d15}
    POP      {r4-r12,pc}

    .end