summaryrefslogtreecommitdiffstats
path: root/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s
blob: 63fb88d2acda0c04b36dd8838180cb30656c5789 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
/*
 *
 * Copyright 2011 Samsung Electronics S.LSI Co. LTD
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * @file    csc_interleave_memcpy.s
 * @brief   SEC_OMX specific define
 * @author  ShinWon Lee (shinwon.lee@samsung.com)
 * @version 1.0
 * @history
 *   2011.7.01 : Create
 */
    .arch armv7-a
    .text
    .global csc_interleave_memcpy_neon
    .type   csc_interleave_memcpy_neon, %function
csc_interleave_memcpy_neon:
    .fnstart

    @r0     dest
    @r1     src1
    @r2     src2
    @r3     src_size
    @r4     i
    @r5     temp1
    @r6     temp2
    @r7     temp3
    @r8     temp2
    @r9     temp3

    stmfd       sp!, {r4-r12,r14}       @ backup registers

    mov         r4, #0
    cmp         r3, #128
    blt         LINEAR_SIZE_64

    bic         r5, r3, #0x2F
LINEAR_SIZE_128_LOOP:
    pld         [r1, #64]
    vld1.8      {q0}, [r1]!
    vld1.8      {q2}, [r1]!
    vld1.8      {q4}, [r1]!
    vld1.8      {q6}, [r1]!
    pld         [r2]
    vld1.8      {q8}, [r1]!
    vld1.8      {q10}, [r1]!
    vld1.8      {q12}, [r1]!
    vld1.8      {q14}, [r1]!
    pld         [r2, #64]
    vld1.8      {q1}, [r2]!
    vld1.8      {q3}, [r2]!
    vld1.8      {q5}, [r2]!
    vld1.8      {q7}, [r2]!
    vld1.8      {q9}, [r2]!
    vld1.8      {q11}, [r2]!
    vld1.8      {q13}, [r2]!
    vld1.8      {q15}, [r2]!

    vst2.8      {q0, q1}, [r0]!
    vst2.8      {q2, q3}, [r0]!
    vst2.8      {q4, q5}, [r0]!
    vst2.8      {q6, q7}, [r0]!
    vst2.8      {q8, q9}, [r0]!
    vst2.8      {q10, q11}, [r0]!
    pld         [r1]
    vst2.8      {q12, q13}, [r0]!
    vst2.8      {q14, q15}, [r0]!

    add         r4, #128
    cmp         r4, r5
    blt         LINEAR_SIZE_128_LOOP

LINEAR_SIZE_64:
    sub         r5, r3, r4
    cmp         r5, #64
    blt         LINEAR_SIZE_2
LINEAR_SIZE_64_LOOP:
    pld         [r2]
    vld1.8      {q0}, [r1]!
    vld1.8      {q2}, [r1]!
    vld1.8      {q4}, [r1]!
    vld1.8      {q6}, [r1]!
    vld1.8      {q1}, [r2]!
    vld1.8      {q3}, [r2]!
    vld1.8      {q5}, [r2]!
    vld1.8      {q7}, [r2]!

    vst2.8      {q0, q1}, [r0]!
    vst2.8      {q2, q3}, [r0]!
    pld         [r1]
    vst2.8      {q4, q5}, [r0]!
    vst2.8      {q6, q7}, [r0]!

    add         r4, #64
    cmp         r4, r3
    blt         LINEAR_SIZE_64_LOOP

LINEAR_SIZE_2:
    sub         r5, r3, r4
    cmp         r5, #2
    blt         RESTORE_REG
LINEAR_SIZE_2_LOOP:
    ldrb        r6, [r1], #1
    ldrb        r7, [r2], #1
    ldrb        r8, [r1], #1
    ldrb        r9, [r2], #1

    strb        r6, [r0], #1
    strb        r7, [r0], #1
    strb        r8, [r0], #1
    strb        r9, [r0], #1

    add         r4, #2
    cmp         r4, r3
    blt         LINEAR_SIZE_2_LOOP

RESTORE_REG:
    ldmfd       sp!, {r4-r12,r15}       @ restore registers
    .fnend