1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
|
;//
;//
;// File Name: omxVCM4P10_InterpolateLuma_s.s
;// OpenMAX DL: v1.0.2
;// Revision: 9641
;// Date: Thursday, February 7, 2008
;//
;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
;//
;//
;//
;// Function:
;// omxVCM4P10_InterpolateLuma
;//
;// This function implements omxVCM4P10_InterpolateLuma in v6 assembly.
;// Performs quarter pel interpolation of inter luma MB.
;// It's assumed that the frame is already padded when calling this function.
;// Parameters:
;// [in] pSrc Pointer to the source reference frame buffer
;// [in] srcStep Reference frame step in byte
;// [in] dstStep Destination frame step in byte. Must be multiple of roi.width
;// [in] dx Fractional part of horizontal motion vector
;// component in 1/4 pixel unit; valid in the range [0,3]
;// [in] dy Fractional part of vertical motion vector
;// component in 1/4 pixel unit; valid in the range [0,3]
;// [in] roi Dimension of the interpolation region;the parameters roi.width and roi.height must
;// be equal to either 4, 8, or 16.
;// [out] pDst Pointer to the destination frame buffer.
;// if roi.width==4, 4-byte alignment required
;// if roi.width==8, 8-byte alignment required
;// if roi.width==16, 16-byte alignment required
;//
;// Return Value:
;// If the function runs without error, it returns OMX_Sts_NoErr.
;// It is assued that following cases are satisfied before calling this function:
;// pSrc or pDst is not NULL.
;// srcStep or dstStep >= roi.width.
;// dx or dy is in the range [0-3].
;// roi.width or roi.height is not out of range {4, 8, 16}.
;// If roi.width is equal to 4, Dst is 4 byte aligned.
;// If roi.width is equal to 8, pDst is 8 byte aligned.
;// If roi.width is equal to 16, pDst is 16 byte aligned.
;// srcStep and dstStep is multiple of 8.
;//
;//
INCLUDE omxtypes_s.h
INCLUDE armCOMM_s.h
M_VARIANTS ARM1136JS
EXPORT omxVCM4P10_InterpolateLuma
IF ARM1136JS
IMPORT armVCM4P10_InterpolateLuma_Copy4x4_unsafe
IMPORT armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
IMPORT armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
IMPORT armVCM4P10_Average_4x4_Align0_unsafe
IMPORT armVCM4P10_Average_4x4_Align2_unsafe
IMPORT armVCM4P10_Average_4x4_Align3_unsafe
IMPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
IMPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
ENDIF
IF ARM1136JS
IMPORT armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
IMPORT armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
IMPORT armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
IMPORT armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
ENDIF
;// Declare input registers
pSrc RN 0
srcStep RN 1
pDst RN 2
dstStep RN 3
iHeight RN 4
iWidth RN 5
;// Declare other intermediate registers
idx RN 6
idy RN 7
index RN 6
Temp RN 12
pArgs RN 11
;// End of CortexA8
;//-------------------------------------------------------------------------------------------------------------------------
;//-------------------------------------------------------------------------------------------------------------------------
IF ARM1136JS
M_ALLOC4 ppDst, 8
M_ALLOC4 ppSrc, 8
M_ALLOC4 ppArgs, 16
M_ALLOC4 pBuffer, 120 ;// 120 = 12x10
M_ALLOC8 pInterBuf, 120 ;// 120 = 12*5*2
M_ALLOC8 pTempBuf, 32 ;// 32 = 8*4
;// Function header
;// Interpolation of luma is implemented by processing block of pixels, size 4x4 at a time.
;// Depending on the values of motion vector fractional parts (dx,dy), one out of 16 cases will be processed.
;// Registers r4, r5, r6 to be preserved by internal unsafe functions
;// r4 - iHeight
;// r5 - iWidth
;// r6 - index
M_START omxVCM4P10_InterpolateLuma, r11
;// Declare other intermediate registers
idx RN 6
idy RN 7
index RN 6
Temp RN 12
pArgs RN 11
pBuf RN 8
Height RN 9
bufStep RN 9
;// Define stack arguments
M_ARG ptridx, 4
M_ARG ptridy, 4
M_ARG ptrWidth, 4
M_ARG ptrHeight, 4
;// Load structure elements of roi
M_LDR idx, ptridx
M_LDR idy, ptridy
M_LDR iWidth, ptrWidth
M_LDR iHeight, ptrHeight
M_PRINTF "roi.width %d\n", iWidth
M_PRINTF "roi.height %d\n", iHeight
ADD index, idx, idy, LSL #2 ;// [index] = [idy][idx]
M_ADR pArgs, ppArgs
InterpolateLuma
Block4x4WidthLoop
Block4x4HeightLoop
STM pArgs, {pSrc,srcStep,pDst,dstStep}
M_ADR pBuf, pBuffer
;// switch table using motion vector as index
M_SWITCH index, L
M_CASE Case_0
M_CASE Case_1
M_CASE Case_2
M_CASE Case_3
M_CASE Case_4
M_CASE Case_5
M_CASE Case_6
M_CASE Case_7
M_CASE Case_8
M_CASE Case_9
M_CASE Case_a
M_CASE Case_b
M_CASE Case_c
M_CASE Case_d
M_CASE Case_e
M_CASE Case_f
M_ENDSWITCH
Case_0
;// Case G
M_PRINTF "Case 0 \n"
BL armVCM4P10_InterpolateLuma_Copy4x4_unsafe
B Block4x4LoopEnd
Case_1
;// Case a
M_PRINTF "Case 1 \n"
SUB pSrc, pSrc, #2
MOV Height, #4
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
BL armVCM4P10_Average_4x4_Align2_unsafe
B Block4x4LoopEnd
Case_2
;// Case b
M_PRINTF "Case 2 \n"
SUB pSrc, pSrc, #2
MOV Height, #4
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
B Block4x4LoopEnd
Case_3
;// Case c
M_PRINTF "Case 3 \n"
SUB pSrc, pSrc, #2
MOV Height, #4
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
BL armVCM4P10_Average_4x4_Align3_unsafe
B Block4x4LoopEnd
Case_4
;// Case d
M_PRINTF "Case 4 \n"
SUB pSrc, pSrc, srcStep, LSL #1
MOV Height, #9
BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
BL armVCM4P10_Average_4x4_Align0_unsafe
B Block4x4LoopEnd
Case_5
;// Case e
M_PRINTF "Case 5 \n"
SUB pSrc, pSrc, #2
MOV Height, #4
M_ADR pDst, pTempBuf
MOV dstStep, #4
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
M_ADR pArgs, ppArgs
LDM pArgs, {pSrc, srcStep, pDst, dstStep}
SUB pSrc, pSrc, srcStep, LSL #1
M_ADR pBuf, pBuffer
MOV Height, #9
BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
M_ADR pSrc, pTempBuf
MOV srcStep, #4
BL armVCM4P10_Average_4x4_Align0_unsafe
B Block4x4LoopEnd
Case_6
;// Case f
M_PRINTF "Case 6 \n"
SUB pSrc, pSrc, #2
SUB pSrc, pSrc, srcStep, LSL #1
MOV Height, #9
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
M_ADR pBuf, pInterBuf
BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
M_ADR idy, pTempBuf
BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
BL armVCM4P10_Average_4x4_Align0_unsafe
B Block4x4LoopEnd
Case_7
;// Case g
M_PRINTF "Case 7 \n"
SUB pSrc, pSrc, #2
MOV Height, #4
M_ADR pDst, pTempBuf
MOV dstStep, #4
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
M_ADR pArgs, ppArgs
LDM pArgs, {pSrc, srcStep, pDst, dstStep}
SUB pSrc, pSrc, srcStep, LSL #1
ADD pSrc, pSrc, #1
M_ADR pBuf, pBuffer
MOV Height, #9
BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
M_ADR pSrc, pTempBuf
MOV srcStep, #4
BL armVCM4P10_Average_4x4_Align0_unsafe
B Block4x4LoopEnd
Case_8
;// Case h
M_PRINTF "Case 8 \n"
SUB pSrc, pSrc, srcStep, LSL #1
MOV Height, #9
BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
B Block4x4LoopEnd
Case_9
;// Case i
M_PRINTF "Case 9 \n"
SUB pSrc, pSrc, #2
SUB pSrc, pSrc, srcStep, LSL #1
MOV Height, #9
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
ADD pSrc, pSrc, srcStep, LSL #1
M_ADR pBuf, pInterBuf
BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
M_ADR idy, pTempBuf
BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
BL armVCM4P10_Average_4x4_Align2_unsafe
B Block4x4LoopEnd
Case_a
;// Case j
M_PRINTF "Case a \n"
SUB pSrc, pSrc, #2
SUB pSrc, pSrc, srcStep, LSL #1
MOV Height, #9
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
ADD pSrc, pSrc, srcStep, LSL #1
M_ADR pBuf, pInterBuf
BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
B Block4x4LoopEnd
Case_b
;// Case k
M_PRINTF "Case b \n"
SUB pSrc, pSrc, #2
SUB pSrc, pSrc, srcStep, LSL #1
MOV Height, #9
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
ADD pSrc, pSrc, srcStep, LSL #1
M_ADR pBuf, pInterBuf
BL armVCM4P10_InterpolateLuma_HalfDiagVerHor4x4_unsafe
M_ADR idy, pTempBuf
BL armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
BL armVCM4P10_Average_4x4_Align3_unsafe
B Block4x4LoopEnd
Case_c
;// Case n
M_PRINTF "Case c \n"
SUB pSrc, pSrc, srcStep, LSL #1
MOV Height, #9
BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
ADD pSrc, pSrc, srcStep ;// Update pSrc to one row down
BL armVCM4P10_Average_4x4_Align0_unsafe
B Block4x4LoopEnd
Case_d
;// Case p
M_PRINTF "Case d \n"
SUB pSrc, pSrc, #2
ADD pSrc, pSrc, srcStep
MOV Height, #4
M_ADR pDst, pTempBuf
MOV dstStep, #4
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
M_ADR pArgs, ppArgs
LDM pArgs, {pSrc, srcStep, pDst, dstStep}
SUB pSrc, pSrc, srcStep, LSL #1
M_ADR pBuf, pBuffer
MOV Height, #9
BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
M_ADR pSrc, pTempBuf
MOV srcStep, #4
BL armVCM4P10_Average_4x4_Align0_unsafe
B Block4x4LoopEnd
Case_e
;// Case q
M_PRINTF "Case e \n"
SUB pSrc, pSrc, #2
SUB pSrc, pSrc, srcStep, LSL #1
MOV Height, #9
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
M_ADR pBuf, pInterBuf
BL armVCM4P10_InterpolateLuma_HalfDiagHorVer4x4_unsafe
M_ADR idy, pTempBuf
BL armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
ADD pSrc, pSrc, #4
BL armVCM4P10_Average_4x4_Align0_unsafe
B Block4x4LoopEnd
Case_f
;// Case r
M_PRINTF "Case f \n"
SUB pSrc, pSrc, #2
ADD pSrc, pSrc, srcStep
MOV Height, #4
M_ADR pDst, pTempBuf
MOV dstStep, #4
BL armVCM4P10_InterpolateLuma_HorAlign9x_unsafe
BL armVCM4P10_InterpolateLuma_HalfHor4x4_unsafe
M_ADR pArgs, ppArgs
LDM pArgs, {pSrc, srcStep, pDst, dstStep}
SUB pSrc, pSrc, srcStep, LSL #1
ADD pSrc, pSrc, #1
M_ADR pBuf, pBuffer
MOV Height, #9
BL armVCM4P10_InterpolateLuma_VerAlign4x_unsafe
BL armVCM4P10_InterpolateLuma_HalfVer4x4_unsafe
M_ADR pSrc, pTempBuf
MOV srcStep, #4
BL armVCM4P10_Average_4x4_Align0_unsafe
Block4x4LoopEnd
;// Width Loop
SUBS iWidth, iWidth, #4
M_ADR pArgs, ppArgs
LDM pArgs, {pSrc,srcStep,pDst,dstStep} ;// Load arguments
ADD pSrc, pSrc, #4
ADD pDst, pDst, #4
BGT Block4x4WidthLoop
;// Height Loop
SUBS iHeight, iHeight, #4
M_LDR iWidth, ptrWidth
M_ADR pArgs, ppArgs
ADD pSrc, pSrc, srcStep, LSL #2
ADD pDst, pDst, dstStep, LSL #2
SUB pSrc, pSrc, iWidth
SUB pDst, pDst, iWidth
BGT Block4x4HeightLoop
EndOfInterpolation
MOV r0, #0
M_END
ENDIF
END
|