summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
blob: 66cfe5ef9266ad6d8d41885bf5c783233859033f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
;//
;// 
;// File Name:  armVCM4P10_InterpolateLuma_DiagCopy_unsafe_s.s
;// OpenMAX DL: v1.0.2
;// Revision:   9641
;// Date:       Thursday, February 7, 2008
;// 
;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
;// 
;// 
;//

        INCLUDE omxtypes_s.h
        INCLUDE armCOMM_s.h
        
        M_VARIANTS ARM1136JS

        EXPORT armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe
        EXPORT armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe

;// Functions: 
;//     armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe and
;//     armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe 
;//
;// Implements re-arrangement of data from temporary buffer to a buffer pointed by pBuf.
;// This will do the convertion of data from 16 bit to 8 bit and it also
;// remove offset and check for saturation.
;//
;// Registers used as input for this function
;// r0,r1,r7 where r0 is input pointer and r2 its step size, r7 is output pointer
;//
;// Registers preserved for top level function
;// r4,r5,r6,r8,r9,r14
;//
;// Registers modified by the function
;// r7,r10,r11,r12
;//
;// Output registers
;// r0 - pointer to the destination location
;// r1 - step size to this destination location


DEBUG_ON    SETL {FALSE}
        
MASK            EQU 0x80808080  ;// Mask is used to implement (a+b+1)/2

;// Declare input registers

pSrc0           RN 0
srcStep0        RN 1

;// Declare other intermediate registers
Temp1           RN 4
Temp2           RN 5
Temp3           RN 10
Temp4           RN 11
pBuf            RN 7
r0x0fe00fe0     RN 6
r0x00ff00ff     RN 12
Count           RN 14
ValueA0         RN 10
ValueA1         RN 11

    IF ARM1136JS


        ;// Function header
        M_START armVCM4P10_InterpolateLuma_HorDiagCopy_unsafe, r6

        ;// Code start     
        MOV         Count, #4   
        LDR         r0x0fe00fe0, =0x0fe00fe0
        LDR         r0x00ff00ff, =0x00ff00ff        
LoopStart1
        LDR         Temp4, [pSrc0, #12]
        LDR         Temp3, [pSrc0, #8]        
        LDR         Temp2, [pSrc0, #4]
        M_LDR       Temp1, [pSrc0], srcStep0              
        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
        USAT16      Temp4, #13, Temp4
        USAT16      Temp3, #13, Temp3                          
        USAT16      Temp2, #13, Temp2
        USAT16      Temp1, #13, Temp1                                  
        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
        ORR         ValueA1, Temp3, Temp4, LSL #8             
        ORR         ValueA0, Temp1, Temp2, LSL #8             
        SUBS        Count, Count, #1                   
        STRD        ValueA0, [pBuf], #8 
        BGT         LoopStart1
End1
        SUB        pSrc0, pBuf, #32
        MOV        srcStep0, #8

        M_END


        ;// Function header
        M_START armVCM4P10_InterpolateLuma_VerDiagCopy_unsafe, r6
        
        ;// Code start        
        LDR         r0x0fe00fe0, =0x0fe00fe0
        LDR         r0x00ff00ff, =0x00ff00ff
        MOV         Count, #2

LoopStart    
        LDR         Temp4, [pSrc0, #12]
        LDR         Temp3, [pSrc0, #8]        
        LDR         Temp2, [pSrc0, #4]
        M_LDR       Temp1, [pSrc0], srcStep0
        
        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
        
        USAT16      Temp4, #13, Temp4
        USAT16      Temp3, #13, Temp3                          
        USAT16      Temp2, #13, Temp2
        USAT16      Temp1, #13, Temp1
                                  
        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
        ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]             
        ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]         
                    
        PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]

        STR         Temp1, [pBuf], #8 
        PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
        STR         Temp2, [pBuf], #-4  

        LDR         Temp4, [pSrc0, #12]
        LDR         Temp3, [pSrc0, #8]        
        LDR         Temp2, [pSrc0, #4]
        M_LDR       Temp1, [pSrc0], srcStep0
        
        UQSUB16     Temp4, Temp4, r0x0fe00fe0        
        UQSUB16     Temp3, Temp3, r0x0fe00fe0                 
        UQSUB16     Temp2, Temp2, r0x0fe00fe0        
        UQSUB16     Temp1, Temp1, r0x0fe00fe0                 
        
        USAT16      Temp4, #13, Temp4
        USAT16      Temp3, #13, Temp3                          
        USAT16      Temp2, #13, Temp2
        USAT16      Temp1, #13, Temp1
                                  
        AND         Temp4, r0x00ff00ff, Temp4, LSR #5         
        AND         Temp3, r0x00ff00ff, Temp3, LSR #5         
        AND         Temp2, r0x00ff00ff, Temp2, LSR #5         
        AND         Temp1, r0x00ff00ff, Temp1, LSR #5         
        ORR         ValueA1, Temp3, Temp4, LSL #8        ;// [d2 c2 d0 c0]             
        ORR         ValueA0, Temp1, Temp2, LSL #8        ;// [b2 a2 b0 a0]         
                    
        PKHBT       Temp1, ValueA0, ValueA1, LSL #16     ;// [d0 c0 b0 a0]
        SUBS        Count, Count, #1
        STR         Temp1, [pBuf], #8 
        PKHTB       Temp2, ValueA1, ValueA0, ASR #16     ;// [d2 c2 b2 a2]
        STR         Temp2, [pBuf], #4  
        
        BGT         LoopStart
End2
        SUB         pSrc0, pBuf, #32-8
        MOV         srcStep0, #4

        M_END

    ENDIF
    
    END