summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S
blob: 6ed6227931e5c58919b029b7341dc1a031a7e259 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
@
@ Copyright (C) 2009 The Android Open Source Project
@
@ Licensed under the Apache License, Version 2.0 (the "License");
@ you may not use this file except in compliance with the License.
@ You may obtain a copy of the License at
@
@      http://www.apache.org/licenses/LICENSE-2.0
@
@ Unless required by applicable law or agreed to in writing, software
@ distributed under the License is distributed on an "AS IS" BASIS,
@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ See the License for the specific language governing permissions and
@ limitations under the License.
@

#include "asm_common.S"

    PRESERVE8

    .fpu neon
    .text

/* Input / output registers */

#define ref     r0
#define fill    r1
#define left    r2
#define tmp2    r2
#define center  r3
#define right   r4
#define tmp1    r5

/* -- NEON registers -- */

#define qTmp0     Q0
#define qTmp1     Q1
#define dTmp0     D0
#define dTmp1     D1
#define dTmp2     D2
#define dTmp3     D3

/*
void h264bsdFillRow7(const u8 * ref, u8 * fill, i32 left, i32 center,
                     i32 right);
*/

function h264bsdFillRow7, export=1

        PUSH     {r4-r6,lr}
        CMP      left, #0
        LDR      right, [sp,#0x10]
        BEQ      switch_center
        LDRB     tmp1, [ref,#0]

loop_left:
        SUBS     left, left, #1
        STRB     tmp1, [fill], #1
        BNE      loop_left

switch_center:
        ASR      tmp2,center,#2
        CMP      tmp2,#9
        ADDCC    pc,pc,tmp2,LSL #2
        B        loop_center
        B        loop_center
        B        case_1
        B        case_2
        B        case_3
        B        case_4
        B        case_5
        B        case_6
        B        case_7
        B        case_8

case_8:
        VLD1.8  {qTmp0, qTmp1}, [ref]!
        SUB     center, center, #32
        VST1.8  {qTmp0}, [fill]!
        VST1.8  {qTmp1}, [fill]!
        B       loop_center
case_7:
        VLD1.8  {dTmp0,dTmp1,dTmp2}, [ref]!
        SUB     center, center, #28
        LDR     tmp2, [ref], #4
        VST1.8  {dTmp0,dTmp1,dTmp2}, [fill]!
        STR     tmp2, [fill],#4
        B       loop_center
case_6:
        VLD1.8  {dTmp0,dTmp1,dTmp2}, [ref]!
        SUB     center, center, #24
        VST1.8  {dTmp0,dTmp1,dTmp2}, [fill]!
        B       loop_center
case_5:
        VLD1.8  {qTmp0}, [ref]!
        SUB     center, center, #20
        LDR     tmp2, [ref], #4
        VST1.8  {qTmp0}, [fill]!
        STR     tmp2, [fill],#4
        B       loop_center
case_4:
        VLD1.8  {qTmp0}, [ref]!
        SUB     center, center, #16
        VST1.8  {qTmp0}, [fill]!
        B       loop_center
case_3:
        VLD1.8  {dTmp0}, [ref]!
        SUB     center, center, #12
        LDR     tmp2, [ref], #4
        VST1.8  dTmp0, [fill]!
        STR     tmp2, [fill],#4
        B       loop_center
case_2:
        LDR      tmp2, [ref],#4
        SUB      center, center, #4
        STR      tmp2, [fill], #4
case_1:
        LDR      tmp2, [ref],#4
        SUB      center, center, #4
        STR      tmp2, [fill], #4

loop_center:
        CMP      center, #0
        BEQ      jump
        LDRB     tmp2, [ref], #1
        SUB      center, center, #1
        STRB     tmp2, [fill], #1
        BNE      loop_center
jump:
        CMP      right,#0
        POPEQ    {r4-r6,pc}
        LDRB     tmp2, [ref,#-1]

loop_right:
        STRB     tmp2, [fill], #1
        SUBS     right, right, #1
        BNE      loop_right

        POP      {r4-r6,pc}

endfunction