summaryrefslogtreecommitdiffstats
path: root/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s
blob: dc95bfa4333c4ef05f550d0bcc9f3efd67570d12 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
/*
 *
 * Copyright 2011 Samsung Electronics S.LSI Co. LTD
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/*
 * @file    csc_deinterleave_memcpy.s
 * @brief   SEC_OMX specific define
 * @author  ShinWon Lee (shinwon.lee@samsung.com)
 * @version 1.0
 * @history
 *   2011.7.01 : Create
 */
    .arch armv7-a
    .text
    .global csc_deinterleave_memcpy_neon
    .type   csc_deinterleave_memcpy_neon, %function
csc_deinterleave_memcpy_neon:
    .fnstart

    @r0     dest1
    @r1     dest2
    @r2     src
    @r3     src_size
    @r4     i
    @r5     temp1
    @r6     temp2
    @r7     temp3

    stmfd       sp!, {r4-r12,r14}       @ backup registers

    mov         r4, #0
    cmp         r3, #256
    blt         LINEAR_SIZE_128

    bic         r5, r3, #0xFF
LINEAR_SIZE_256_LOOP:
    pld         [r2, #64]
    vld2.8      {q0, q1}, [r2]!
    pld         [r2, #64]
    vld2.8      {q2, q3}, [r2]!
    pld         [r2, #64]
    vld2.8      {q4, q5}, [r2]!
    pld         [r2, #64]
    vld2.8      {q6, q7}, [r2]!
    pld         [r2, #64]
    vld2.8      {q8, q9}, [r2]!
    pld         [r2, #64]
    vld2.8      {q10, q11}, [r2]!
    vld2.8      {q12, q13}, [r2]!
    vld2.8      {q14, q15}, [r2]!

    vst1.8      {q0}, [r0]!
    vst1.8      {q2}, [r0]!
    vst1.8      {q4}, [r0]!
    vst1.8      {q6}, [r0]!
    vst1.8      {q8}, [r0]!
    vst1.8      {q10}, [r0]!
    vst1.8      {q12}, [r0]!
    vst1.8      {q14}, [r0]!

    vst1.8      {q1}, [r1]!
    vst1.8      {q3}, [r1]!
    vst1.8      {q5}, [r1]!
    vst1.8      {q7}, [r1]!
    vst1.8      {q9}, [r1]!
    vst1.8      {q11}, [r1]!
    vst1.8      {q13}, [r1]!
    vst1.8      {q15}, [r1]!

    add         r4, #256
    cmp         r4, r5
    blt         LINEAR_SIZE_256_LOOP

LINEAR_SIZE_128:
    sub         r5, r3, r4
    cmp         r5, #64
    blt         LINEAR_SIZE_4
    pld         [r2, #64]
    vld2.8      {q0, q1}, [r2]!
    pld         [r2, #64]
    vld2.8      {q2, q3}, [r2]!
    vld2.8      {q4, q5}, [r2]!
    vld2.8      {q6, q7}, [r2]!

    vst1.8      {q0}, [r0]!
    vst1.8      {q4}, [r0]!
    vst1.8      {q2}, [r0]!
    vst1.8      {q6}, [r0]!

    vst1.8      {q1}, [r1]!
    vst1.8      {q3}, [r1]!
    vst1.8      {q5}, [r1]!
    vst1.8      {q7}, [r1]!

    add         r4, #128

LINEAR_SIZE_4:
    ldrb        r6, [r2], #1
    ldrb        r7, [r2], #1
    ldrb        r8, [r2], #1
    ldrb        r9, [r2], #1

    strb        r6, [r0], #1
    strb        r8, [r0], #1
    strb        r7, [r1], #1
    strb        r9, [r1], #1

    add         r4, #4
    cmp         r4, r3
    blt         LINEAR_SIZE_4

RESTORE_REG:
    ldmfd       sp!, {r4-r12,r15}       @ restore registers
    .fnend