summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.S
blob: 8b2c67801ac6fec34a51951630f08dc8d6f73588 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
/*
 * Copyright (C) 2007-2008 ARM Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */
/*
 *
 */

    .eabi_attribute 24, 1
    .eabi_attribute 25, 1

    .arm
    .fpu neon
    .text

    .global armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe:
    PUSH     {r4-r6,lr}
    MOV      lr,#4
    LDR      r6, =0xfe00fe0
    LDR      r12, =0xff00ff
LoopStart1:
    LDR      r11,[r0,#0xc]
    LDR      r10,[r0,#8]
    LDR      r5,[r0,#4]
    LDR      r4,[r0],r1
    UQSUB16  r11,r11,r6
    UQSUB16  r10,r10,r6
    UQSUB16  r5,r5,r6
    UQSUB16  r4,r4,r6
    USAT16   r11,#13,r11
    USAT16   r10,#13,r10
    USAT16   r5,#13,r5
    USAT16   r4,#13,r4
    AND      r11,r12,r11,LSR #5
    AND      r10,r12,r10,LSR #5
    AND      r5,r12,r5,LSR #5
    AND      r4,r12,r4,LSR #5
    ORR      r11,r10,r11,LSL #8
    ORR      r10,r4,r5,LSL #8
    SUBS     lr,lr,#1
    STRD     r10,r11,[r7],#8
    BGT      LoopStart1
    SUB      r0,r7,#0x20
    MOV      r1,#8
    POP      {r4-r6,pc}

    .global armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe
armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe:
    PUSH     {r4-r6,lr}
    LDR      r6, =0xfe00fe0
    LDR      r12, =0xff00ff
    MOV      lr,#2
LoopStart:
    LDR      r11,[r0,#0xc]
    LDR      r10,[r0,#8]
    LDR      r5,[r0,#4]
    LDR      r4,[r0],r1
    UQSUB16  r11,r11,r6
    UQSUB16  r10,r10,r6
    UQSUB16  r5,r5,r6
    UQSUB16  r4,r4,r6
    USAT16   r11,#13,r11
    USAT16   r10,#13,r10
    USAT16   r5,#13,r5
    USAT16   r4,#13,r4
    AND      r11,r12,r11,LSR #5
    AND      r10,r12,r10,LSR #5
    AND      r5,r12,r5,LSR #5
    AND      r4,r12,r4,LSR #5
    ORR      r11,r10,r11,LSL #8
    ORR      r10,r4,r5,LSL #8
    PKHBT    r4,r10,r11,LSL #16
    STR      r4,[r7],#8
    PKHTB    r5,r11,r10,ASR #16
    STR      r5,[r7],#-4
    LDR      r11,[r0,#0xc]
    LDR      r10,[r0,#8]
    LDR      r5,[r0,#4]
    LDR      r4,[r0],r1
    UQSUB16  r11,r11,r6
    UQSUB16  r10,r10,r6
    UQSUB16  r5,r5,r6
    UQSUB16  r4,r4,r6
    USAT16   r11,#13,r11
    USAT16   r10,#13,r10
    USAT16   r5,#13,r5
    USAT16   r4,#13,r4
    AND      r11,r12,r11,LSR #5
    AND      r10,r12,r10,LSR #5
    AND      r5,r12,r5,LSR #5
    AND      r4,r12,r4,LSR #5
    ORR      r11,r10,r11,LSL #8
    ORR      r10,r4,r5,LSL #8
    PKHBT    r4,r10,r11,LSL #16
    SUBS     lr,lr,#1
    STR      r4,[r7],#8
    PKHTB    r5,r11,r10,ASR #16
    STR      r5,[r7],#4
    BGT      LoopStart
    SUB      r0,r7,#0x18
    MOV      r1,#4
    POP      {r4-r6,pc}

    .end