summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/omxVCM4P10_InterpolateLuma_s.s
blob: cf611a3dd9695a31ed0d6e609193f388c58b4776 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
;//
;// 
;// File Name:  omxVCM4P10_InterpolateLuma_s.s
;// OpenMAX DL: v1.0.2
;// Revision:   9641
;// Date:       Thursday, February 7, 2008
;// 
;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
;// 
;// 
;//

;// Function:
;//     omxVCM4P10_InterpolateLuma
;//
;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
;// Performs quarter pel interpolation of inter luma MB.
;// It's assumed that the frame is already padded when calling this function.
;// Parameters:
;// [in]    pSrc        Pointer to the source reference frame buffer
;// [in]    srcStep     Reference frame step in byte
;// [in]    dstStep     Destination frame step in byte. Must be multiple of roi.width
;// [in]    dx          Fractional part of horizontal motion vector
;//                         component in 1/4 pixel unit; valid in the range [0,3]
;// [in]    dy          Fractional part of vertical motion vector
;//                         component in 1/4 pixel unit; valid in the range [0,3]
;// [in]    roi         Dimension of the interpolation region;the parameters roi.width and roi.height must
;//                         be equal to either 4, 8, or 16.
;// [out]   pDst        Pointer to the destination frame buffer.
;//                   if roi.width==4,  4-byte alignment required
;//                   if roi.width==8,  8-byte alignment required
;//                   if roi.width==16, 16-byte alignment required
;//
;// Return Value:
;// If the function runs without error, it returns OMX_Sts_NoErr.
;// It is assued that following cases are satisfied before calling this function:
;//  pSrc or pDst is not NULL.
;//  srcStep or dstStep >= roi.width.
;//     dx or dy is in the range [0-3].
;//     roi.width or roi.height is not out of range {4, 8, 16}.
;//     If roi.width is equal to 4, Dst is 4 byte aligned.
;//     If roi.width is equal to 8, pDst is 8 byte aligned.
;//     If roi.width is equal to 16, pDst is 16 byte aligned.
;//     srcStep and dstStep is multiple of 8.
;//
;//


        INCLUDE omxtypes_s.h
        INCLUDE armCOMM_s.h

        M_VARIANTS ARM1136JS

        EXPORT omxVCM4P10_InterpolateLuma
        
    IF ARM1136JS
        IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
        IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
        IMPORT armVCM4P10_Average_4x4_Align0_unsafe
        IMPORT armVCM4P10_Average_4x4_Align2_unsafe
        IMPORT armVCM4P10_Average_4x4_Align3_unsafe
        IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
        IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
    ENDIF

    IF ARM1136JS
        IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
        IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
        IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
        IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
    ENDIF
    
    

;// Declare input registers
pSrc            RN 0
srcStep         RN 1
pDst            RN 2
dstStep         RN 3
iHeight         RN 4
iWidth          RN 5

;// Declare other intermediate registers
idx             RN 6
idy             RN 7
index           RN 6
Temp            RN 12
pArgs           RN 11


        ;// End of CortexA8
                    
;//-------------------------------------------------------------------------------------------------------------------------    
;//-------------------------------------------------------------------------------------------------------------------------    
    IF ARM1136JS


        M_ALLOC4 ppDst, 8
        M_ALLOC4 ppSrc, 8
        M_ALLOC4 ppArgs, 16
        M_ALLOC4 pBuffer, 120                           ;// 120 = 12x10
        M_ALLOC8 pInterBuf, 120                         ;// 120 = 12*5*2
        M_ALLOC8 pTempBuf, 32                           ;// 32 =  8*4
        
        ;// Function header
        ;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
        ;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed.
        ;// Registers r4, r5, r6 to be preserved by internal unsafe functions
        ;// r4 - iHeight
        ;// r5 - iWidth
        ;// r6 - index
        M_START omxVCM4P10_InterpolateLuma, r11

;// Declare other intermediate registers
idx             RN 6
idy             RN 7
index           RN 6
Temp            RN 12
pArgs           RN 11

pBuf            RN 8
Height          RN 9 
bufStep         RN 9
        
        ;// Define stack arguments
        M_ARG   ptridx, 4
        M_ARG   ptridy, 4        
        M_ARG   ptrWidth, 4
        M_ARG   ptrHeight, 4        

        ;// Load structure elements of roi 
        M_LDR   idx, ptridx
        M_LDR   idy, ptridy
        M_LDR   iWidth, ptrWidth
        M_LDR   iHeight, ptrHeight
        
        M_PRINTF "roi.width %d\n", iWidth
        M_PRINTF "roi.height %d\n", iHeight

        ADD     index, idx, idy, LSL #2                 ;//  [index] = [idy][idx]
        M_ADR   pArgs, ppArgs

InterpolateLuma
Block4x4WidthLoop
Block4x4HeightLoop

        STM     pArgs, {pSrc,srcStep,pDst,dstStep} 
        M_ADR   pBuf, pBuffer                           

        ;// switch table using motion vector as index
        M_SWITCH index, L
        M_CASE  Case_0
        M_CASE  Case_1
        M_CASE  Case_2
        M_CASE  Case_3
        M_CASE  Case_4
        M_CASE  Case_5
        M_CASE  Case_6
        M_CASE  Case_7
        M_CASE  Case_8
        M_CASE  Case_9
        M_CASE  Case_a
        M_CASE  Case_b
        M_CASE  Case_c
        M_CASE  Case_d
        M_CASE  Case_e
        M_CASE  Case_f
        M_ENDSWITCH

Case_0
        ;// Case G
        M_PRINTF "Case 0 \n"

        BL      armVCM4P10_InterpolateLuma_Copy4x4_unsafe
        B       Block4x4LoopEnd

Case_1
        ;// Case a
        M_PRINTF "Case 1 \n"

        SUB     pSrc, pSrc, #2
        MOV     Height, #4
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
        BL      armVCM4P10_Average_4x4_Align2_unsafe
        B       Block4x4LoopEnd
Case_2
        ;// Case b
        M_PRINTF "Case 2 \n"
        
        SUB     pSrc, pSrc, #2
        MOV     Height, #4
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe        
        B       Block4x4LoopEnd
Case_3
        ;// Case c
        M_PRINTF "Case 3 \n"

        SUB     pSrc, pSrc, #2
        MOV     Height, #4
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
        BL      armVCM4P10_Average_4x4_Align3_unsafe
        B       Block4x4LoopEnd
Case_4
        ;// Case d
        M_PRINTF "Case 4 \n"

        SUB     pSrc, pSrc, srcStep, LSL #1
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
        BL      armVCM4P10_Average_4x4_Align0_unsafe

        B       Block4x4LoopEnd
Case_5
        ;// Case e
        M_PRINTF "Case 5 \n"

        SUB     pSrc, pSrc, #2
        MOV     Height, #4
        M_ADR   pDst, pTempBuf
        MOV     dstStep, #4
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
        M_ADR   pArgs, ppArgs
        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
        SUB     pSrc, pSrc, srcStep, LSL #1
        M_ADR   pBuf, pBuffer                           
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
        M_ADR   pSrc, pTempBuf
        MOV     srcStep, #4
        BL      armVCM4P10_Average_4x4_Align0_unsafe
        

        B       Block4x4LoopEnd
Case_6
        ;// Case f
        M_PRINTF "Case 6 \n"

        SUB     pSrc, pSrc, #2
        SUB     pSrc, pSrc, srcStep, LSL #1
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        M_ADR   pBuf, pInterBuf
        BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
        M_ADR   idy, pTempBuf
        BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe    
        BL      armVCM4P10_Average_4x4_Align0_unsafe
        B       Block4x4LoopEnd
Case_7
        ;// Case g
        M_PRINTF "Case 7 \n"
        
        SUB     pSrc, pSrc, #2
        MOV     Height, #4
        M_ADR   pDst, pTempBuf
        MOV     dstStep, #4
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
        M_ADR   pArgs, ppArgs
        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
        SUB     pSrc, pSrc, srcStep, LSL #1
        ADD     pSrc, pSrc, #1
        M_ADR   pBuf, pBuffer                           
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
        M_ADR   pSrc, pTempBuf
        MOV     srcStep, #4
        BL      armVCM4P10_Average_4x4_Align0_unsafe

        B       Block4x4LoopEnd
Case_8
        ;// Case h
        M_PRINTF "Case 8 \n"

        SUB     pSrc, pSrc, srcStep, LSL #1
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
        B       Block4x4LoopEnd
Case_9
        ;// Case i
        M_PRINTF "Case 9 \n"

        SUB     pSrc, pSrc, #2
        SUB     pSrc, pSrc, srcStep, LSL #1
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        ADD     pSrc, pSrc, srcStep, LSL #1
        M_ADR   pBuf, pInterBuf
        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
        M_ADR   idy, pTempBuf
        BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe    
        BL      armVCM4P10_Average_4x4_Align2_unsafe
        B       Block4x4LoopEnd
Case_a
        ;// Case j
        M_PRINTF "Case a \n"

        SUB     pSrc, pSrc, #2
        SUB     pSrc, pSrc, srcStep, LSL #1
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        ADD     pSrc, pSrc, srcStep, LSL #1
        M_ADR   pBuf, pInterBuf
        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
        B       Block4x4LoopEnd
Case_b
        ;// Case k
        M_PRINTF "Case b \n"
        SUB     pSrc, pSrc, #2
        SUB     pSrc, pSrc, srcStep, LSL #1
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        ADD     pSrc, pSrc, srcStep, LSL #1
        M_ADR   pBuf, pInterBuf
        BL      armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
        M_ADR   idy, pTempBuf
        BL      armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe    
        BL      armVCM4P10_Average_4x4_Align3_unsafe
        B       Block4x4LoopEnd
Case_c
        ;// Case n
        M_PRINTF "Case c \n"

        SUB     pSrc, pSrc, srcStep, LSL #1
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
        ADD     pSrc, pSrc, srcStep                     ;// Update pSrc to one row down
        BL      armVCM4P10_Average_4x4_Align0_unsafe
        B       Block4x4LoopEnd
Case_d
        ;// Case p
        M_PRINTF "Case d \n"
        SUB     pSrc, pSrc, #2
        ADD     pSrc, pSrc, srcStep
        MOV     Height, #4
        M_ADR   pDst, pTempBuf
        MOV     dstStep, #4
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
        M_ADR   pArgs, ppArgs
        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
        SUB     pSrc, pSrc, srcStep, LSL #1
        M_ADR   pBuf, pBuffer                           
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
        M_ADR   pSrc, pTempBuf
        MOV     srcStep, #4
        BL      armVCM4P10_Average_4x4_Align0_unsafe
        B       Block4x4LoopEnd
Case_e
        ;// Case q
        M_PRINTF "Case e \n"
        
        SUB     pSrc, pSrc, #2
        SUB     pSrc, pSrc, srcStep, LSL #1
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        M_ADR   pBuf, pInterBuf
        BL      armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
        M_ADR   idy, pTempBuf
        BL      armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
        ADD     pSrc, pSrc, #4    
        BL      armVCM4P10_Average_4x4_Align0_unsafe

        B       Block4x4LoopEnd
Case_f
        ;// Case r
        M_PRINTF "Case f \n"
        SUB     pSrc, pSrc, #2
        ADD     pSrc, pSrc, srcStep
        MOV     Height, #4
        M_ADR   pDst, pTempBuf
        MOV     dstStep, #4
        BL      armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
        M_ADR   pArgs, ppArgs
        LDM     pArgs, {pSrc, srcStep, pDst, dstStep}
        SUB     pSrc, pSrc, srcStep, LSL #1
        ADD     pSrc, pSrc, #1
        M_ADR   pBuf, pBuffer                           
        MOV     Height, #9
        BL      armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
        BL      armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
        M_ADR   pSrc, pTempBuf
        MOV     srcStep, #4
        BL      armVCM4P10_Average_4x4_Align0_unsafe

Block4x4LoopEnd

        ;// Width Loop
        SUBS    iWidth, iWidth, #4
        M_ADR   pArgs, ppArgs
        LDM     pArgs, {pSrc,srcStep,pDst,dstStep}  ;// Load arguments
        ADD     pSrc, pSrc, #4      
        ADD     pDst, pDst, #4
        BGT     Block4x4WidthLoop

        ;// Height Loop
        SUBS    iHeight, iHeight, #4
        M_LDR   iWidth, ptrWidth
        M_ADR   pArgs, ppArgs
        ADD     pSrc, pSrc, srcStep, LSL #2      
        ADD     pDst, pDst, dstStep, LSL #2
        SUB     pSrc, pSrc, iWidth
        SUB     pDst, pDst, iWidth
        BGT     Block4x4HeightLoop

EndOfInterpolation
        MOV     r0, #0
        M_END

    ENDIF
                    

    END