summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s
blob: 99ee68b88ed71793c2316ba3d6569f0823a7d2aa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
@/*
@ ** Copyright 2003-2010, VisualOn, Inc.
@ **
@ ** Licensed under the Apache License, Version 2.0 (the "License");
@ ** you may not use this file except in compliance with the License.
@ ** You may obtain a copy of the License at
@ **
@ **     http://www.apache.org/licenses/LICENSE-2.0
@ **
@ ** Unless required by applicable law or agreed to in writing, software
@ ** distributed under the License is distributed on an "AS IS" BASIS,
@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@ ** See the License for the specific language governing permissions and
@ ** limitations under the License.
@ */

@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@	File:		R4R8First_v7.s
@
@	Content:	Radix8First and Radix4First function armv7 assemble
@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

	.section .text
	.global	Radix8First

Radix8First:
	stmdb     		sp!, {r4 - r11, lr}

	ldr       		r3, SQRT1_2
	cmp       		r1, #0
	
	VDUP.I32  		Q15, r3	
	beq       		Radix8First_END
	
Radix8First_LOOP:
	VLD1.I32			{d0, d1, d2, d3},	[r0]!
	VLD1.I32			{d8, d9, d10, d11},	[r0]!
		
	VADD.S32			d4, d0, d1		@ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@
	VSUB.S32			d5, d0, d1		@ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@	
	VSUB.S32			d7, d2, d3		@ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@	
	VADD.S32			d6, d2, d3		@ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@
	VREV64.I32			d7, d7	
	
	VADD.S32			Q0, Q2, Q3		@ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3)
	VSUB.S32			Q1, Q2, Q3		@ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@

	VREV64.I32			d3, d3	

	VADD.S32			d4, d8, d9		@ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@
	VSUB.S32			d7, d10, d11	@ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@	
	VADD.S32			d6, d10, d11	@ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@
	VREV64.I32			d7, d7	
	VSUB.S32			d5, d8, d9		@ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@
	
	VTRN.32				d1, d3	
	
	VADD.S32			Q4, Q2, Q3		@ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@
	VSUB.S32			Q5, Q2, Q3		@ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@
	
	VREV64.I32			d3, d3
	
	VSHR.S32			d8, d8, #1		 
	VSHR.S32			Q0, Q0, #1
	VREV64.I32			d10, d10
	VTRN.32				d11, d9
	VSHR.S32			Q1, Q1, #1
	VSHR.S32			d10, d10, #1
	VREV64.I32			d9, d9
	
	sub       			r0, r0, #0x40
	
	VADD.S32			d12, d0, d8
	VSUB.S32			d16, d0, d8	
	VADD.S32			d14, d2, d10
	VSUB.S32			d18, d2, d10
	
	VSUB.S32			d4, d11, d9
	VADD.S32			d5, d11, d9
	
	VREV64.I32			d18, d18
	
	VQDMULH.S32			Q3, Q2, Q15
	VTRN.32				d14, d18
	VTRN.32				d6, d7
	VREV64.I32			d18, d18	
	
	VSUB.S32			d15, d3, d6
	VREV64.I32			d7, d7
	VADD.S32			d19, d3, d6
	VADD.S32			d13, d1, d7
	VSUB.S32			d17, d1, d7
	
	VREV64.I32			d17, d17
	VTRN.32				d13, d17
	VREV64.I32			d17, d17
	
	subs       			r1, r1, #1	
	
	VST1.I32			{d12, d13, d14, d15}, [r0]!
	VST1.I32			{d16, d17, d18, d19}, [r0]!	
	bne       			Radix8First_LOOP
	
Radix8First_END:
	ldmia     sp!, {r4 - r11, pc}	
SQRT1_2:
	.word      0x2d413ccd
	
	@ENDP  @ |Radix8First|
	
	.section .text
	.global	Radix4First

Radix4First:
	stmdb     	sp!, {r4 - r11, lr}

	cmp       	r1, #0
	beq       	Radix4First_END
	
Radix4First_LOOP:
	VLD1.I32			{d0, d1, d2, d3}, [r0]					
	
	VADD.S32			d4, d0, d1							@ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@		
	VSUB.S32			d5, d0, d1							@ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@
	VSUB.S32			d7, d2, d3							@ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@
	VADD.S32			d6, d2, d3							@ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@
	
	VREV64.I32		d7, d7									@ 
	
	VADD.S32			Q4, Q2, Q3
	VSUB.S32			Q5, Q2, Q3
	
	VREV64.I32		d11, d11
	VTRN.32				d9, d11
	subs       		r1, r1, #1	
	VREV64.I32		d11, d11
	VST1.I32			{d8, d9, d10, d11}, [r0]!

	bne       		Radix4First_LOOP
	
Radix4First_END:
	ldmia    		sp!, {r4 - r11, pc}

	@ENDP  @ |Radix4First|
	.end