1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
|
;//
;// Copyright (C) 2007-2008 ARM Limited
;//
;// Licensed under the Apache License, Version 2.0 (the "License");
;// you may not use this file except in compliance with the License.
;// You may obtain a copy of the License at
;//
;// http://www.apache.org/licenses/LICENSE-2.0
;//
;// Unless required by applicable law or agreed to in writing, software
;// distributed under the License is distributed on an "AS IS" BASIS,
;// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
;// See the License for the specific language governing permissions and
;// limitations under the License.
;//
;//
;//
;// File Name: omxVCM4P10_TransformDequantChromaDCFromPair_s.s
;// OpenMAX DL: v1.0.2
;// Revision: 12290
;// Date: Wednesday, April 9, 2008
;//
;//
;//
;//
INCLUDE omxtypes_s.h
INCLUDE armCOMM_s.h
IMPORT armVCM4P10_QPDivTable
IMPORT armVCM4P10_VMatrixQPModTable
M_VARIANTS CortexA8
IF CortexA8
;// ARM Registers
;//--------------------------------------
;// Declare input registers
;//--------------------------------------
ppSrc RN 0
pDst RN 1
QP RN 2
;//--------------------------------
;// Scratch variable for Unpack2x2
;//--------------------------------
pSrc RN 9
Value RN 4
Value2 RN 5
Flag RN 6
strOffset RN 7
cstOffset RN 8
;//--------------------------------
;// Scratch variable
;//--------------------------------
r0w0 RN 3
r0w1 RN 4
c0w0 RN 5
c1w0 RN 6
return RN 0
pQPDivTable RN 5
pQPModTable RN 6
Shift RN 9
Scale RN 2
;// Neon Registers
dZero DN D0.U16
dInvTrCoeff DN D0.S16
dScale DN D1.S16
qDqntCoeff QN Q1.S32
dDqntCoeff DN D2.S16
;// Write function header
M_START omxVCM4P10_TransformDequantChromaDCFromPair, r9
LDR pSrc, [ppSrc] ;// Load pSrc
VMOV dZero, #0
MOV cstOffset, #31 ;// To be used in the loop, to compute offset
;//-----------------------------------------------------------------------
;// Firstly, fill all the coefficient values on the <pDst> buffer by zero
;//-----------------------------------------------------------------------
VST1 dZero,[pDst] ;// pDst[0] = pDst[1] = pDst[2] = pDst[3] = 0
LDRB Flag, [pSrc], #1 ;// Preload <Flag> before <unpackLoop>
unpackLoop
TST Flag, #0x10 ;// Computing (Flag & 0x10)
LDRSBNE Value2,[pSrc,#1]
LDRBNE Value, [pSrc], #2 ;// Load byte wise to avoid unaligned access
AND strOffset, cstOffset, Flag, LSL #1 ;// strOffset = (Flag & 15) < 1;
LDRSBEQ Value, [pSrc], #1 ;// Value = (OMX_U8) *pSrc++
ORRNE Value,Value,Value2, LSL #8 ;// Value = (OMX_U16) *pSrc++
TST Flag, #0x20 ;// Computing (Flag & 0x20) to check, if we're done
LDRBEQ Flag, [pSrc], #1 ;// Flag = (OMX_U8) *pSrc++, for next iteration
STRH Value, [pDst, strOffset] ;// Store <Value> at offset <strOffset>
BEQ unpackLoop ;// Branch to the loop beginning
;//--------------------------------------------------
;//InvTransformDC2x2: Inlined (Implemented in ARM V6)
;//--------------------------------------------------
LDMIA pDst, {r0w0, r0w1} ;// r0w0 = |c1|c0| & r0w1 = |c3|c2|
STR pSrc, [ppSrc] ;// Update the bitstream pointer
LDR pQPDivTable, =armVCM4P10_QPDivTable ;// QP Division look-up-table base pointer
LDR pQPModTable, =armVCM4P10_VMatrixQPModTable ;// QP Modulo look-up-table base pointer
SADDSUBX r0w0, r0w0, r0w0 ;// [ c00+c01, c00-c01 ]
SADDSUBX r0w1, r0w1, r0w1 ;// [ c10+c11, c10-c11 ]
LDRSB Shift, [pQPDivTable, QP] ;// Shift = pQPDivTable[QP]
LDRSB Scale, [pQPModTable, QP] ;// Scale = pQPModTable[QP]
SADD16 c0w0, r0w0, r0w1 ;// [ d00+d10, d01+d11 ]
SSUB16 c1w0, r0w0, r0w1 ;// [ d00-d10, d01-d11 ]
;//-------------------------------------------------
;//DequantChromaDC2x2: Inlined (Neon Implementation)
;//-------------------------------------------------
LSL Scale, Scale, Shift ;// Scale = Scale << Shift
VMOV dInvTrCoeff, c0w0, c1w0
VREV32 dInvTrCoeff,dInvTrCoeff
VDUP dScale,Scale
VMULL qDqntCoeff,dInvTrCoeff,dScale
VSHRN dDqntCoeff,qDqntCoeff,#1
VST1 dDqntCoeff,[pDst] ;// Storing all the coefficients at once
MOV return, #OMX_Sts_NoErr
M_END
ENDIF ;// CortexA8
END
|