summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
blob: a099dcb44829886d199c10f1daa5c5b7b9bef20d (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
;//
;// Copyright (C) 2007-2008 ARM Limited
;//
;// Licensed under the Apache License, Version 2.0 (the "License");
;// you may not use this file except in compliance with the License.
;// You may obtain a copy of the License at
;//
;//      http://www.apache.org/licenses/LICENSE-2.0
;//
;// Unless required by applicable law or agreed to in writing, software
;// distributed under the License is distributed on an "AS IS" BASIS,
;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;// See the License for the specific language governing permissions and
;// limitations under the License.
;//
;//
;// 
;// File Name:  omxVCM4P10_FilterDeblockingChroma_HorEdge_I_s.s
;// OpenMAX DL: v1.0.2
;// Revision:   12290
;// Date:       Wednesday, April 9, 2008
;// 
;// 
;// 
;//


        INCLUDE omxtypes_s.h
        INCLUDE armCOMM_s.h

        M_VARIANTS CortexA8

        IF CortexA8

        IMPORT  armVCM4P10_DeblockingChromabSGE4_unsafe
        IMPORT  armVCM4P10_DeblockingChromabSLT4_unsafe
        
LOOP_COUNT  EQU 0x40000000
MASK_3      EQU 0x03030303
MASK_4      EQU 0x04040404

;// Function arguments

pSrcDst     RN 0
srcdstStep  RN 1
pAlpha      RN 2
pBeta       RN 3

pThresholds RN 5
pBS         RN 4
bS3210      RN 6

;// Loop 

XY          RN 7

;// Pixels
dP_0        DN D4.U8
dP_1        DN D5.U8  
dP_2        DN D6.U8  
dQ_0        DN D8.U8  
dQ_1        DN D9.U8  
dQ_2        DN D10.U8 

;// Filtering Decision
dAlpha      DN D0.U8
dBeta       DN D2.U8

dFilt       DN D16.U8
dAqflg      DN D12.U8
dApflg      DN D17.U8 

dAp0q0      DN D13.U8
dAp1p0      DN D12.U8
dAq1q0      DN D18.U8
dAp2p0      DN D19.U8
dAq2q0      DN D17.U8

qBS3210     QN Q13.U16
dBS3210     DN D26
dMask_bs    DN D27
dFilt_bs    DN D26.U16

;// bSLT4
dMask_0     DN D14.U8
dMask_1     DN D15.U8    
dMask_4     DN D1.U16

Mask_4      RN 8
Mask_3      RN 9

dTemp       DN D19.U8

;// Result
dP_0t       DN D13.U8   
dQ_0t       DN D31.U8   

dP_0n       DN D29.U8
dQ_0n       DN D24.U8

        
        ;// Function header
        M_START omxVCM4P10_FilterDeblockingChroma_HorEdge_I, r9, d15
        
        ;//Arguments on the stack
        M_ARG   ppThresholds, 4
        M_ARG   ppBS, 4
        
        ;// d0-dAlpha_0
        ;// d2-dBeta_0

        ;load alpha1,beta1 somewhere to avoid more loads
        VLD1        {dAlpha[]}, [pAlpha]!
        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1 ;?
        SUB         pSrcDst, pSrcDst, srcdstStep
        VLD1        {dBeta[]}, [pBeta]! 
        
        M_LDR       pBS, ppBS
        M_LDR       pThresholds, ppThresholds 

        LDR         Mask_3, =MASK_3
        LDR         Mask_4, =MASK_4

        VMOV        dMask_0, #0     
        VMOV        dMask_1, #1     
        VMOV        dMask_4, #4     
        
        LDR         XY, =LOOP_COUNT

        ;// p0-p3 - d4-d7
        ;// q0-q3 - d8-d11
LoopY        
        LDR         bS3210, [pBS], #8
        
        VLD1        dP_2, [pSrcDst], srcdstStep
        ;1
        VLD1        dP_1, [pSrcDst], srcdstStep
        CMP         bS3210, #0
        VLD1        dP_0, [pSrcDst], srcdstStep
        ;1
        VLD1        dQ_0, [pSrcDst], srcdstStep
        VABD        dAp2p0, dP_2, dP_0
        VLD1        dQ_1, [pSrcDst], srcdstStep
        VABD        dAp0q0, dP_0, dQ_0
        VLD1        dQ_2, [pSrcDst], srcdstStep
        BEQ         NoFilterBS0

        VABD        dAp1p0, dP_1, dP_0
        VABD        dAq1q0, dQ_1, dQ_0

        VCGT        dFilt, dAlpha, dAp0q0
        VMOV.U32    dBS3210[0], bS3210
        VMAX        dAp1p0, dAq1q0, dAp1p0
        VMOVL       qBS3210, dBS3210.U8
        VABD        dAq2q0, dQ_2, dQ_0
        VCGT        dMask_bs.S16, dBS3210.S16, #0

        VCGT        dAp1p0, dBeta, dAp1p0 
        VCGT        dAp2p0, dBeta, dAp2p0
        
        VAND        dFilt, dMask_bs.U8

        TST         bS3210, Mask_3

        VCGT        dAq2q0, dBeta, dAq2q0
        VAND        dFilt, dFilt, dAp1p0

        VAND        dAqflg, dFilt, dAq2q0
        VAND        dApflg, dFilt, dAp2p0
        
        ;// bS < 4 Filtering
        BLNE        armVCM4P10_DeblockingChromabSLT4_unsafe

        TST         bS3210, Mask_4        

        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #2
        VTST        dFilt_bs, dFilt_bs, dMask_4

        ;// bS == 4 Filtering
        BLNE        armVCM4P10_DeblockingChromabSGE4_unsafe
                    
        VBIT        dP_0n, dP_0t, dFilt_bs
        VBIT        dQ_0n, dQ_0t, dFilt_bs
        
        VBIF        dP_0n, dP_0, dFilt      
        VBIF        dQ_0n, dQ_0, dFilt  

        ;// Result Storage
        VST1        dP_0n, [pSrcDst], srcdstStep
        ADDS        XY, XY, XY
        VST1        dQ_0n, [pSrcDst], srcdstStep

        BNE         LoopY        
        
        MOV         r0, #OMX_Sts_NoErr

        M_EXIT
        
NoFilterBS0

        VLD1        {dAlpha[]}, [pAlpha]
        SUB         pSrcDst, pSrcDst, srcdstStep, LSL #1
        ADDS        XY, XY, XY
        VLD1        {dBeta[]}, [pBeta]
        ADD         pThresholds, pThresholds, #4
        BNE         LoopY        

        MOV         r0, #OMX_Sts_NoErr
        M_END
        
        ENDIF
        

        END