summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/comm/src/omxVCCOMM_ExpandFrame_I_s.s
blob: 5c5b7d8166b957b2306c6fa03f4676f7ba4294db (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
;//
;// 
;// File Name:  omxVCCOMM_ExpandFrame_I_s.s
;// OpenMAX DL: v1.0.2
;// Revision:   12290
;// Date:       Wednesday, April 9, 2008
;// 
;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
;// 
;// 
;//
;// Description:
;// This function will Expand Frame boundary pixels into Plane
;// 
;// 

;// Include standard headers

        INCLUDE omxtypes_s.h
        INCLUDE armCOMM_s.h
        
        M_VARIANTS CortexA8

;// Import symbols required from other files
;// (For example tables)
    
  
;// Set debugging level        
DEBUG_ON    SETL {FALSE}


    
    IF CortexA8
    
        M_START omxVCCOMM_ExpandFrame_I,r11

;//Input registers

pSrcDstPlane    RN  0
iFrameWidth     RN  1
iFrameHeight    RN  2    
iExpandPels     RN  3
iPlaneStep      RN  4
pTop            RN  5
pBot            RN  6
pDstTop         RN  7
pDstBot         RN  8
pLeft           RN  5
pRight          RN  6
pDstLeft        RN  9
pDstRight       RN  10
Offset          RN  11
Temp            RN  14
Counter         RN  12
Tmp             RN  7
;//Output registers

result          RN  0
;// Neon registers
qData0          QN  0.U8
qData1          QN  1.U8
dData0          DN  0.U8
dData1          DN  1.U8
dData2          DN  2.U8
dData3          DN  3.U8

        ;// Define stack arguments
        M_ARG       pPlaneStep, 4
        
        ;// Load argument from the stack
        M_LDR       iPlaneStep, pPlaneStep
        
        SUB         pTop, pSrcDstPlane, #0              ;// Top row pointer of the frame
        MUL         Offset, iExpandPels, iPlaneStep     ;// E*Step        
        SUB         Temp, iFrameHeight, #1              ;// H-1
        MUL         Temp, iPlaneStep, Temp              ;// (H-1)*Step
        ADD         pBot, Temp, pSrcDstPlane            ;// BPtr = TPtr + (H-1)*Step
        MOV         Temp, iFrameWidth                   ;// Outer loop counter
        
        ;// Check if pSrcDstPlane and iPlaneStep are 16 byte aligned
        TST         pSrcDstPlane, #0xf
        TSTEQ       iPlaneStep, #0xf        
        BNE         Hor8Loop00
        
        ;//
        ;// Copy top and bottom region of the plane as follows
        ;// top region = top row elements from the frame
        ;// bottom region = last row elements from the frame
        ;//

        ;// Case for 16 byte alignment
Hor16Loop00
        SUB         pDstTop, pTop, Offset
        VLD1        qData0, [pTop @128]!
        MOV         Counter, iExpandPels                ;// Inner loop counter
        ADD         pDstBot, pBot, iPlaneStep
        VLD1        qData1, [pBot @128]!
Ver16Loop0
        VST1        qData0, [pDstTop @128], iPlaneStep
        VST1        qData0, [pDstTop @128], iPlaneStep
        VST1        qData0, [pDstTop @128], iPlaneStep
        VST1        qData0, [pDstTop @128], iPlaneStep
        VST1        qData0, [pDstTop @128], iPlaneStep
        VST1        qData0, [pDstTop @128], iPlaneStep
        VST1        qData0, [pDstTop @128], iPlaneStep
        VST1        qData0, [pDstTop @128], iPlaneStep
        SUBS        Counter, Counter, #8
        VST1        qData1, [pDstBot @128], iPlaneStep
        VST1        qData1, [pDstBot @128], iPlaneStep
        VST1        qData1, [pDstBot @128], iPlaneStep
        VST1        qData1, [pDstBot @128], iPlaneStep
        VST1        qData1, [pDstBot @128], iPlaneStep
        VST1        qData1, [pDstBot @128], iPlaneStep
        VST1        qData1, [pDstBot @128], iPlaneStep
        VST1        qData1, [pDstBot @128], iPlaneStep        
        BGT         Ver16Loop0

        SUBS        Temp, Temp, #16
        BGT         Hor16Loop00
        B           EndAlignedLoop
        
        ;// Case for 8 byte alignment
Hor8Loop00
        SUB         pDstTop, pTop, Offset
        VLD1        qData0, [pTop @64]!
        MOV         Counter, iExpandPels                ;// Inner loop counter
        ADD         pDstBot, pBot, iPlaneStep
        VLD1        qData1, [pBot @64]!
Ver8Loop0
        VST1        qData0, [pDstTop @64], iPlaneStep
        VST1        qData0, [pDstTop @64], iPlaneStep
        VST1        qData0, [pDstTop @64], iPlaneStep
        VST1        qData0, [pDstTop @64], iPlaneStep
        VST1        qData0, [pDstTop @64], iPlaneStep
        VST1        qData0, [pDstTop @64], iPlaneStep
        VST1        qData0, [pDstTop @64], iPlaneStep
        VST1        qData0, [pDstTop @64], iPlaneStep
        SUBS        Counter, Counter, #8
        VST1        qData1, [pDstBot @64], iPlaneStep
        VST1        qData1, [pDstBot @64], iPlaneStep
        VST1        qData1, [pDstBot @64], iPlaneStep
        VST1        qData1, [pDstBot @64], iPlaneStep
        VST1        qData1, [pDstBot @64], iPlaneStep
        VST1        qData1, [pDstBot @64], iPlaneStep
        VST1        qData1, [pDstBot @64], iPlaneStep
        VST1        qData1, [pDstBot @64], iPlaneStep        
        BGT         Ver8Loop0

        SUBS        Temp, Temp, #16
        BGT         Hor8Loop00

EndAlignedLoop
        ADD         Temp, pSrcDstPlane, iFrameWidth
        SUB         pDstRight, Temp, Offset
        SUB         pRight, Temp, #1
        SUB         pDstLeft, pSrcDstPlane, Offset    
        SUB         pDstLeft, pDstLeft, iExpandPels    
        ADD         pLeft, pSrcDstPlane, #0
        
        VLD1        {dData0 []}, [pLeft], iPlaneStep        ;// Top-Left corner pixel from frame duplicated in dData0
        SUB         Offset, iPlaneStep, iExpandPels
        VLD1        {dData1 []}, [pRight], iPlaneStep       ;// Top-Right corner pixel from frame duplicated in dData1
        MOV         Temp, iExpandPels

        ;//
        ;// Copy top-left and top-right region of the plane as follows
        ;// top-left region = top-left corner pixel from the frame
        ;// top-right region = top-right corner pixel from the frame
        ;//
HorLoop11
        MOV         Counter, iExpandPels
VerLoop1
        VST1        dData0, [pDstLeft], #8
        SUBS        Counter, Counter, #8
        VST1        dData1, [pDstRight], #8        
        BGT         VerLoop1

        SUBS        Temp, Temp, #1
        ADD         pDstLeft, pDstLeft, Offset
        ADD         pDstRight, pDstRight, Offset
        BPL         HorLoop11

        SUB         iFrameHeight, iFrameHeight, #1
        ;//
        ;// Copy left and right region of the plane as follows
        ;// Left region = copy the row with left start pixel from the frame
        ;// Right region = copy the row with right end pixel from the frame
        ;//
HorLoop22
        VLD1        {dData0 []}, [pLeft], iPlaneStep
        MOV         Counter, iExpandPels
        VLD1        {dData1 []}, [pRight], iPlaneStep
VerLoop2
        VST1        dData0, [pDstLeft], #8
        SUBS        Counter, Counter, #8
        VST1        dData1, [pDstRight], #8        
        BGT         VerLoop2

        SUBS        iFrameHeight, iFrameHeight, #1
        ADD         pDstLeft, pDstLeft, Offset
        ADD         pDstRight, pDstRight, Offset
        BGT         HorLoop22
                
        MOV         Temp, iExpandPels
        ;//
        ;// Copy bottom-left and bottom-right region of the plane as follows
        ;// bottom-left region = bottom-left corner pixel from the frame
        ;// bottom-right region = bottom-right corner pixel from the frame
        ;//
HorLoop33
        MOV         Counter, iExpandPels
VerLoop3
        VST1        dData0, [pDstLeft], #8
        SUBS        Counter, Counter, #8
        VST1        dData1, [pDstRight], #8        
        BGT         VerLoop3

        SUBS        Temp, Temp, #1
        ADD         pDstLeft, pDstLeft, Offset
        ADD         pDstRight, pDstRight, Offset
        BGT         HorLoop33
End
        MOV         r0, #OMX_Sts_NoErr
        
        M_END    
    
    ENDIF



        
;// Guarding implementation by the processor name
    
 
            
    END