1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
|
;//
;//
;// File Name: armVCM4P10_Average_4x_Align_unsafe_s.s
;// OpenMAX DL: v1.0.2
;// Revision: 12290
;// Date: Wednesday, April 9, 2008
;//
;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
;//
;//
;//
;// Functions:
;// armVCM4P10_Average_4x4_Align<ALIGNMENT>_unsafe
;//
;// Implements Average of 4x4 with equation c = (a+b+1)>>1.
;// First operand will be at offset ALIGNMENT from aligned address
;// Second operand will be at aligned location and will be used as output.
;// destination pointed by (pDst) for vertical interpolation.
;// This function needs to copy 4 bytes in horizontal direction
;//
;// Registers used as input for this function
;// r0,r1,r2,r3 where r2 containings aligned memory pointer and r3 step size
;//
;// Registers preserved for top level function
;// r4,r5,r6,r8,r9,r14
;//
;// Registers modified by the function
;// r7,r10,r11,r12
;//
;// Output registers
;// r2 - pointer to the aligned location
;// r3 - step size to this aligned location
INCLUDE omxtypes_s.h
INCLUDE armCOMM_s.h
M_VARIANTS ARM1136JS
EXPORT armVCM4P10_Average_4x4_Align0_unsafe
EXPORT armVCM4P10_Average_4x4_Align2_unsafe
EXPORT armVCM4P10_Average_4x4_Align3_unsafe
DEBUG_ON SETL {FALSE}
;// Declare input registers
pPred0 RN 0
iPredStep0 RN 1
pPred1 RN 2
iPredStep1 RN 3
pDstPred RN 2
iDstStep RN 3
;// Declare other intermediate registers
iPredA0 RN 10
iPredA1 RN 11
iPredB0 RN 12
iPredB1 RN 14
Temp1 RN 4
Temp2 RN 5
ResultA RN 5
ResultB RN 4
r0x80808080 RN 7
IF ARM1136JS
;// This function calculates average of 4x4 block
;// pPred0 is at alignment offset 0 and pPred1 is alignment 4
;// Function header
M_START armVCM4P10_Average_4x4_Align0_unsafe, r6
;// Code start
LDR r0x80808080, =0x80808080
;// 1st load
M_LDR iPredB0, [pPred1]
M_LDR iPredA0, [pPred0], iPredStep0
M_LDR iPredB1, [pPred1, iPredStep1]
M_LDR iPredA1, [pPred0], iPredStep0
;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
MVN iPredB0, iPredB0
MVN iPredB1, iPredB1
UHSUB8 ResultA, iPredA0, iPredB0
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
;// 2nd load
M_LDR iPredA0, [pPred0], iPredStep0
M_LDR iPredB0, [pPred1]
M_LDR iPredA1, [pPred0], iPredStep0
M_LDR iPredB1, [pPred1, iPredStep1]
MVN iPredB0, iPredB0
UHSUB8 ResultA, iPredA0, iPredB0
MVN iPredB1, iPredB1
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
End0
M_END
;// This function calculates average of 4x4 block
;// pPred0 is at alignment offset 2 and pPred1 is alignment 4
;// Function header
M_START armVCM4P10_Average_4x4_Align2_unsafe, r6
;// Code start
LDR r0x80808080, =0x80808080
;// 1st load
LDR Temp1, [pPred0, #4]
M_LDR iPredA0, [pPred0], iPredStep0
M_LDR iPredB0, [pPred1]
M_LDR iPredB1, [pPred1, iPredStep1]
M_LDR Temp2, [pPred0, #4]
M_LDR iPredA1, [pPred0], iPredStep0
MVN iPredB0, iPredB0
MVN iPredB1, iPredB1
MOV iPredA0, iPredA0, LSR #16
ORR iPredA0, iPredA0, Temp1, LSL #16
MOV iPredA1, iPredA1, LSR #16
ORR iPredA1, iPredA1, Temp2, LSL #16
;// (a+b+1)/2 = (a+256-(255-b))/2 = (a-(255-b))/2 + 128
UHSUB8 ResultA, iPredA0, iPredB0
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
;// 2nd load
LDR Temp1, [pPred0, #4]
M_LDR iPredA0, [pPred0], iPredStep0
LDR iPredB0, [pPred1]
LDR iPredB1, [pPred1, iPredStep1]
LDR Temp2, [pPred0, #4]
M_LDR iPredA1, [pPred0], iPredStep0
MVN iPredB0, iPredB0
MVN iPredB1, iPredB1
MOV iPredA0, iPredA0, LSR #16
ORR iPredA0, iPredA0, Temp1, LSL #16
MOV iPredA1, iPredA1, LSR #16
ORR iPredA1, iPredA1, Temp2, LSL #16
UHSUB8 ResultA, iPredA0, iPredB0
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
End2
M_END
;// This function calculates average of 4x4 block
;// pPred0 is at alignment offset 3 and pPred1 is alignment 4
;// Function header
M_START armVCM4P10_Average_4x4_Align3_unsafe, r6
;// Code start
LDR r0x80808080, =0x80808080
;// 1st load
LDR Temp1, [pPred0, #4]
M_LDR iPredA0, [pPred0], iPredStep0
LDR iPredB0, [pPred1]
LDR iPredB1, [pPred1, iPredStep1]
LDR Temp2, [pPred0, #4]
M_LDR iPredA1, [pPred0], iPredStep0
MVN iPredB0, iPredB0
MVN iPredB1, iPredB1
MOV iPredA0, iPredA0, LSR #24
ORR iPredA0, iPredA0, Temp1, LSL #8
MOV iPredA1, iPredA1, LSR #24
ORR iPredA1, iPredA1, Temp2, LSL #8
UHSUB8 ResultA, iPredA0, iPredB0
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
;// 2nd load
LDR Temp1, [pPred0, #4]
M_LDR iPredA0, [pPred0], iPredStep0
LDR iPredB0, [pPred1]
LDR iPredB1, [pPred1, iPredStep1]
LDR Temp2, [pPred0, #4]
M_LDR iPredA1, [pPred0], iPredStep0
MVN iPredB0, iPredB0
MVN iPredB1, iPredB1
MOV iPredA0, iPredA0, LSR #24
ORR iPredA0, iPredA0, Temp1, LSL #8
MOV iPredA1, iPredA1, LSR #24
ORR iPredA1, iPredA1, Temp2, LSL #8
UHSUB8 ResultA, iPredA0, iPredB0
UHSUB8 ResultB, iPredA1, iPredB1
EOR ResultA, ResultA, r0x80808080
M_STR ResultA, [pDstPred], iDstStep
EOR ResultB, ResultB, r0x80808080
M_STR ResultB, [pDstPred], iDstStep
End3
M_END
ENDIF
END
|