1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
|
@
@ Copyright (C) 2009 The Android Open Source Project
@
@ Licensed under the Apache License, Version 2.0 (the "License");
@ you may not use this file except in compliance with the License.
@ You may obtain a copy of the License at
@
@ http://www.apache.org/licenses/LICENSE-2.0
@
@ Unless required by applicable law or agreed to in writing, software
@ distributed under the License is distributed on an "AS IS" BASIS,
@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ See the License for the specific language governing permissions and
@ limitations under the License.
@
#include "asm_common.S"
preserve8
.fpu neon
.text
/* Input / output registers */
#define ref r0
#define fill r1
#define left r2
#define tmp2 r2
#define center r3
#define right r4
#define tmp1 r5
/* -- NEON registers -- */
#define qTmp0 Q0.U8
#define qTmp1 Q1.U8
#define dTmp0 D0.U8
#define dTmp1 D1.U8
#define dTmp2 D2.U8
#define dTmp3 D3.U8
/*
void h264bsdFillRow7(const u8 * ref, u8 * fill, i32 left, i32 center,
i32 right);
*/
function h264bsdFillRow7, export=1
PUSH {r4-r6,lr}
CMP left, #0
LDR right, [sp,#0x10]
BEQ switch_center
LDRB tmp1, [ref,#0]
loop_left:
SUBS left, left, #1
STRB tmp1, [fill], #1
BNE loop_left
switch_center:
ASR tmp2,center,#2
CMP tmp2,#9
ADDCC pc,pc,tmp2,LSL #2
B loop_center
B loop_center
B case_1
B case_2
B case_3
B case_4
B case_5
B case_6
B case_7
B case_8
case_8:
VLD1 {qTmp0, qTmp1}, [ref]!
SUB center, center, #32
VST1 {qTmp0}, [fill]!
VST1 {qTmp1}, [fill]!
B loop_center
case_7:
VLD1 {dTmp0,dTmp1,dTmp2}, [ref]!
SUB center, center, #28
LDR tmp2, [ref], #4
VST1 {dTmp0,dTmp1,dTmp2}, [fill]!
STR tmp2, [fill],#4
B loop_center
case_6:
VLD1 {dTmp0,dTmp1,dTmp2}, [ref]!
SUB center, center, #24
VST1 {dTmp0,dTmp1,dTmp2}, [fill]!
B loop_center
case_5:
VLD1 {qTmp0}, [ref]!
SUB center, center, #20
LDR tmp2, [ref], #4
VST1 {qTmp0}, [fill]!
STR tmp2, [fill],#4
B loop_center
case_4:
VLD1 {qTmp0}, [ref]!
SUB center, center, #16
VST1 {qTmp0}, [fill]!
B loop_center
case_3:
VLD1 {dTmp0}, [ref]!
SUB center, center, #12
LDR tmp2, [ref], #4
VST1 dTmp0, [fill]!
STR tmp2, [fill],#4
B loop_center
case_2:
LDR tmp2, [ref],#4
SUB center, center, #4
STR tmp2, [fill], #4
case_1:
LDR tmp2, [ref],#4
SUB center, center, #4
STR tmp2, [fill], #4
loop_center:
CMP center, #0
BEQ jump
LDRB tmp2, [ref], #1
SUB center, center, #1
STRB tmp2, [fill], #1
BNE loop_center
jump:
CMP right,#0
POPEQ {r4-r6,pc}
LDRB tmp2, [ref,#-1]
loop_right:
STRB tmp2, [fill], #1
SUBS right, right, #1
BNE loop_right
POP {r4-r6,pc}
endfunction
|