summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s
blob: b1ad17c6fd3f170390ccfbf62675eab5c50a29d7 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
;//
;// 
;// File Name:  armVCM4P10_Interpolate_Chroma_s.s
;// OpenMAX DL: v1.0.2
;// Revision:   9641
;// Date:       Thursday, February 7, 2008
;// 
;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
;// 
;// 
;//


        INCLUDE omxtypes_s.h
        INCLUDE armCOMM_s.h
        
        M_VARIANTS ARM1136JS
        
    IF ARM1136JS

;// input registers

pSrc                 RN 0
iSrcStep             RN 1
pDst                 RN 2
iDstStep             RN 3
iWidth               RN 4
iHeight              RN 5
dx                   RN 6
dy                   RN 7


;// local variable registers
temp                 RN 11
r0x20                RN 12
tmp0x20              RN 14
return               RN 0
dxPlusdy             RN 10
EightMinusdx         RN 8 
EightMinusdy         RN 9
dxEightMinusdx       RN 8
BACoeff              RN 6
DCCoeff              RN 7
                     
iDstStepx2MinusWidth RN 8
iSrcStepx2MinusWidth RN 9
iSrcStep1            RN 10

pSrc1                RN 1
pSrc2                RN 8
pDst1                RN 8
pDst2                RN 12
                     
pix00                RN 8
pix01                RN 9
pix10                RN 10
pix11                RN 11

Out0100              RN 8  
Out1110              RN 10 

x00                  RN 8
x01                  RN 10
x02                  RN 12
x10                  RN 9
x11                  RN 11
x12                  RN 14
x20                  RN 10
x21                  RN 12
x22                  RN 14
                     
x01x00               RN 8  
x02x01               RN 10 
x11x10               RN 9  
x12x11               RN 11 
x21x20               RN 10 
x22x21               RN 12 
                     
OutRow00             RN 12
OutRow01             RN 14
OutRow10             RN 10
OutRow11             RN 12
                     
OutRow0100           RN 12
OutRow1110           RN 12
                     
;//-----------------------------------------------------------------------------------------------
;// armVCM4P10_Interpolate_Chroma_asm starts
;//-----------------------------------------------------------------------------------------------
        
        ;// Write function header
        M_START armVCM4P10_Interpolate_Chroma, r11
        
        ;// Define stack arguments
        M_ARG   Width,      4
        M_ARG   Height,     4
        M_ARG   Dx,         4
        M_ARG   Dy,         4
        
        ;// Load argument from the stack
        ;// M_STALL ARM1136JS=4
        
        M_LDR   iWidth,  Width  
        M_LDR   iHeight, Height  
        M_LDR   dx,      Dx 
        M_LDR   dy,      Dy
        
        ;// EightMinusdx = 8 - dx
        ;// EightMinusdy = 8 - dy
        
        ;// ACoeff = EightMinusdx * EightMinusdy
        ;// BCoeff = dx * EightMinusdy
        ;// CCoeff = EightMinusdx * dy
        ;// DCoeff = dx * dy
        
        ADD     pSrc1, pSrc, iSrcStep
        SUB     temp, iWidth, #1
        RSB     EightMinusdx, dx, #8 
        RSB     EightMinusdy, dy, #8
        CMN     dx,dy
        ADD     dxEightMinusdx, EightMinusdx, dx, LSL #16
        ORR     iWidth, iWidth, temp, LSL #16
        
        ;// Packed Coeffs.
        
        MUL     BACoeff, dxEightMinusdx, EightMinusdy
        MUL     DCCoeff, dxEightMinusdx, dy        
        
        
        ;// Checking either of dx and dy being non-zero
        
        BEQ     MVIsZero
        
;// Pixel layout:
;//
;//   x00 x01 x02
;//   x10 x11 x12
;//   x20 x21 x22

;// If fractionl mv is not (0, 0)
        
OuterLoopMVIsNotZero

InnerLoopMVIsNotZero
            
                LDRB    x00, [pSrc,  #+0]                   
                LDRB    x10, [pSrc1, #+0]                   
                LDRB    x01, [pSrc,  #+1]                  
                LDRB    x11, [pSrc1, #+1]                  
                LDRB    x02, [pSrc,  #+2]!                   
                LDRB    x12, [pSrc1, #+2]!                   
                
                ORR     x01x00, x00, x01, LSL #16        
                ;// M_STALL ARM1136JS=1
                ORR     x02x01, x01, x02, LSL #16        
                MOV     r0x20,  #32
                ORR     x11x10, x10, x11, LSL #16    
                ORR     x12x11, x11, x12, LSL #16        
                
                SMLAD   x01x00, x01x00, BACoeff, r0x20
                SMLAD   x02x01, x02x01, BACoeff, r0x20                
                
                ;// iWidth packed with MSB (top 16 bits) 
                ;// as inner loop counter value i.e 
                ;// (iWidth -1) and LSB (lower 16 bits)
                ;// as original width
                
                SUBS    iWidth, iWidth, #1<<17
                
                SMLAD   OutRow00, x11x10, DCCoeff, x01x00            
                SMLAD   OutRow01, x12x11, DCCoeff, x02x01            
                
                RSB     pSrc2, pSrc, pSrc1, LSL #1
                
                MOV     OutRow00, OutRow00, LSR #6
                MOV     OutRow01, OutRow01, LSR #6
                
                LDRB    x20,[pSrc2, #-2]
                
                ORR     OutRow0100, OutRow00, OutRow01, LSL #8
                STRH    OutRow0100, [pDst], #2
                
                LDRB    x21,[pSrc2, #-1]
                LDRB    x22,[pSrc2, #+0]
                
                ADD     pDst1, pDst, iDstStep
                
                ;// M_STALL ARM1136JS=1
                                
                ORR     x21x20, x20, x21, LSL #16
                ORR     x22x21, x21, x22, LSL #16     
                
                MOV     tmp0x20, #32
                
                ;// Reusing the packed data x11x10 and x12x11
                
                SMLAD   x11x10,  x11x10,  BACoeff, tmp0x20
                SMLAD   x12x11,  x12x11,  BACoeff, tmp0x20
                SMLAD   OutRow10, x21x20, DCCoeff, x11x10            
                SMLAD   OutRow11, x22x21, DCCoeff, x12x11
                
                MOV     OutRow10, OutRow10, LSR #6
                MOV     OutRow11, OutRow11, LSR #6
                
                ;// M_STALL ARM1136JS=1
               
                ORR     OutRow1110, OutRow10, OutRow11, LSL #8
                
                STRH    OutRow1110, [pDst1, #-2]
                
                BGT     InnerLoopMVIsNotZero
                
                SUBS    iHeight, iHeight, #2
                ADD     iWidth, iWidth, #1<<16
                RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
                SUB     iSrcStep1, pSrc1, pSrc
                SUB     temp, iWidth, #1
                RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
                ADD     pDst, pDst, iDstStepx2MinusWidth
                ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
                ADD     pSrc, pSrc, iSrcStepx2MinusWidth
                ORR     iWidth, iWidth, temp, LSL #16
                BGT     OuterLoopMVIsNotZero
                MOV     return,  #OMX_Sts_NoErr
                M_EXIT

;// If fractionl mv is (0, 0)

MVIsZero
                ;// M_STALL ARM1136JS=4
OuterLoopMVIsZero

InnerLoopMVIsZero
                                      
                LDRB    pix00, [pSrc],  #+1
                LDRB    pix01, [pSrc],  #+1
                LDRB    pix10, [pSrc1], #+1
                LDRB    pix11, [pSrc1], #+1
                
                ADD     pDst2,  pDst, iDstStep
                SUBS    iWidth, iWidth, #1<<17                
                
                ORR     Out0100, pix00, pix01, LSL #8 
                ORR     Out1110, pix10, pix11, LSL #8
                
                STRH    Out0100, [pDst],  #2
                STRH    Out1110, [pDst2], #2
                
                BGT     InnerLoopMVIsZero
                
                SUBS    iHeight, iHeight, #2
                ADD     iWidth, iWidth, #1<<16
                RSB     iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1
                SUB     iSrcStep1, pSrc1, pSrc
                SUB     temp, iWidth, #1
                RSB     iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1
                ADD     pDst, pDst, iDstStepx2MinusWidth
                ADD     pSrc1, pSrc1, iSrcStepx2MinusWidth
                ADD     pSrc, pSrc, iSrcStepx2MinusWidth
                ORR     iWidth, iWidth, temp, LSL #16
                BGT     OuterLoopMVIsZero
                MOV     return,  #OMX_Sts_NoErr
                M_END

        ENDIF ;// ARM1136JS

        
        END

;//-----------------------------------------------------------------------------------------------
;// armVCM4P10_Interpolate_Chroma_asm ends
;//-----------------------------------------------------------------------------------------------