1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
|
;/**
; * Function: omxVCCOMM_Copy16x16
; *
; * Description:
; * Copies the reference 16x16 block to the current block.
; * Parameters:
; * [in] pSrc - pointer to the reference block in the source frame; must be aligned on an 16-byte boundary.
; * [in] step - distance between the starts of consecutive lines in the reference frame, in bytes;
; * must be a multiple of 16 and must be larger than or equal to 16.
; * [out] pDst - pointer to the destination block; must be aligned on an 8-byte boundary.
; * Return Value:
; * OMX_Sts_NoErr - no error
; * OMX_Sts_BadArgErr - bad arguments; returned under any of the following conditions:
; * - one or more of the following pointers is NULL: pSrc, pDst
; * - one or more of the following pointers is not aligned on an 16-byte boundary: pSrc, pDst
; * - step <16 or step is not a multiple of 16.
; */
INCLUDE omxtypes_s.h
M_VARIANTS CortexA8
IF CortexA8
;//Input Arguments
pSrc RN 0
pDst RN 1
step RN 2
;//Local Variables
Return RN 0
;// Neon Registers
X0 DN D0.S8
X1 DN D1.S8
X2 DN D2.S8
X3 DN D3.S8
X4 DN D4.S8
X5 DN D5.S8
X6 DN D6.S8
X7 DN D7.S8
M_START omxVCCOMM_Copy16x16
VLD1 {X0,X1},[pSrc@128],step ;// Load 16 bytes from 16 byte aligned pSrc and pSrc=pSrc + step after loading
VLD1 {X2,X3},[pSrc@128],step
VLD1 {X4,X5},[pSrc@128],step
VLD1 {X6,X7},[pSrc@128],step
VST1 {X0,X1,X2,X3},[pDst@128]! ;// Store 32 bytes to 16 byte aligned pDst
VST1 {X4,X5,X6,X7},[pDst@128]!
VLD1 {X0,X1},[pSrc@128],step
VLD1 {X2,X3},[pSrc@128],step
VLD1 {X4,X5},[pSrc@128],step
VLD1 {X6,X7},[pSrc@128],step
VST1 {X0,X1,X2,X3},[pDst@128]!
VST1 {X4,X5,X6,X7},[pDst@128]!
VLD1 {X0,X1},[pSrc@128],step
VLD1 {X2,X3},[pSrc@128],step
VLD1 {X4,X5},[pSrc@128],step
VLD1 {X6,X7},[pSrc@128],step
VST1 {X0,X1,X2,X3},[pDst@128]!
VST1 {X4,X5,X6,X7},[pDst@128]!
VLD1 {X0,X1},[pSrc@128],step
VLD1 {X2,X3},[pSrc@128],step
VLD1 {X4,X5},[pSrc@128],step
VLD1 {X6,X7},[pSrc@128],step
VST1 {X0,X1,X2,X3},[pDst@128]!
VST1 {X4,X5,X6,X7},[pDst@128]!
MOV Return,#OMX_Sts_NoErr
M_END
ENDIF
END
|