summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/aacenc/src/asm/ARMV7
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/aacenc/src/asm/ARMV7')
-rw-r--r--media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s135
-rw-r--r--media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s146
-rw-r--r--media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s143
3 files changed, 424 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s
new file mode 100644
index 0000000..bf7dcba
--- /dev/null
+++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s
@@ -0,0 +1,135 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ File: PrePostMDCT_v7.s
+@
+@ Content: premdct and postmdct function armv7 assemble
+@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+ .section .text
+ .global PreMDCT
+
+PreMDCT:
+ stmdb sp!, {r4 - r11, lr}
+
+ add r9, r0, r1, lsl #2
+ sub r3, r9, #32
+
+ movs r1, r1, asr #2
+ beq PreMDCT_END
+
+PreMDCT_LOOP:
+ VLD4.I32 {d0, d2, d4, d6}, [r2]! @ cosa = *csptr++@ sina = *csptr++@
+ VLD4.I32 {d1, d3, d5, d7}, [r2]! @ cosb = *csptr++@ sinb = *csptr++@
+ VLD2.I32 {d8, d9, d10, d11}, [r0] @ tr1 = *(buf0 + 0)@ ti2 = *(buf0 + 1)@
+ VLD2.I32 {d13, d15}, [r3]! @ tr2 = *(buf1 - 1)@ ti1 = *(buf1 + 0)@
+ VLD2.I32 {d12, d14}, [r3]! @ tr2 = *(buf1 - 1)@ ti1 = *(buf1 + 0)@
+
+ VREV64.32 Q8, Q7
+ VREV64.32 Q9, Q6
+
+
+ VQDMULH.S32 Q10, Q0, Q4 @ MULHIGH(cosa, tr1)
+ VQDMULH.S32 Q11, Q1, Q8 @ MULHIGH(sina, ti1)
+ VQDMULH.S32 Q12, Q0, Q8 @ MULHIGH(cosa, ti1)
+ VQDMULH.S32 Q13, Q1, Q4 @ MULHIGH(sina, tr1)
+
+ VADD.S32 Q0, Q10, Q11 @ *buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@
+ VSUB.S32 Q1, Q12, Q13 @ *buf0++ = MULHIGH(cosa, ti1) - MULHIGH(sina, tr1)@
+
+ VST2.I32 {d0, d1, d2, d3}, [r0]!
+ sub r3, r3, #32
+
+ VQDMULH.S32 Q10, Q2, Q9 @ MULHIGH(cosb, tr2)
+ VQDMULH.S32 Q11, Q3, Q5 @ MULHIGH(sinb, ti2)
+ VQDMULH.S32 Q12, Q2, Q5 @ MULHIGH(cosb, ti2)
+ VQDMULH.S32 Q13, Q3, Q9 @ MULHIGH(sinb, tr2)
+
+ VADD.S32 Q0, Q10, Q11 @ MULHIGH(cosa, tr2) + MULHIGH(sina, ti2)@
+ VSUB.S32 Q1, Q12, Q13 @ MULHIGH(cosa, ti2) - MULHIGH(sina, tr2)@
+
+ VREV64.32 Q3, Q1
+ VREV64.32 Q2, Q0
+
+ VST2.I32 {d5, d7}, [r3]!
+ VST2.I32 {d4, d6}, [r3]!
+
+ subs r1, r1, #4
+ sub r3, r3, #64
+ bne PreMDCT_LOOP
+
+PreMDCT_END:
+ ldmia sp!, {r4 - r11, pc}
+ @ENDP @ |PreMDCT|
+
+ .section .text
+ .global PostMDCT
+
+PostMDCT:
+ stmdb sp!, {r4 - r11, lr}
+
+ add r9, r0, r1, lsl #2
+ sub r3, r9, #32
+
+ movs r1, r1, asr #2
+ beq PostMDCT_END
+
+PostMDCT_LOOP:
+ VLD4.I32 {d0, d2, d4, d6}, [r2]! @ cosa = *csptr++@ sina = *csptr++@
+ VLD4.I32 {d1, d3, d5, d7}, [r2]! @ cosb = *csptr++@ sinb = *csptr++@
+ VLD2.I32 {d8, d9, d10, d11}, [r0] @ tr1 = *(zbuf1 + 0)@ ti1 = *(zbuf1 + 1)@
+ VLD2.I32 {d13, d15}, [r3]! @ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@
+ VLD2.I32 {d12, d14}, [r3]! @ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@
+
+ VREV64.32 Q8, Q6
+ VREV64.32 Q9, Q7
+
+ VQDMULH.S32 Q10, Q0, Q4 @ MULHIGH(cosa, tr1)
+ VQDMULH.S32 Q11, Q1, Q5 @ MULHIGH(sina, ti1)
+ VQDMULH.S32 Q12, Q0, Q5 @ MULHIGH(cosa, ti1)
+ VQDMULH.S32 Q13, Q1, Q4 @ MULHIGH(sina, tr1)
+
+ VADD.S32 Q0, Q10, Q11 @ *buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@
+ VSUB.S32 Q5, Q13, Q12 @ *buf1-- = MULHIGH(sina, tr1) - MULHIGH(cosa, ti1)@
+
+ VQDMULH.S32 Q10, Q2, Q8 @ MULHIGH(cosb, tr2)
+ VQDMULH.S32 Q11, Q3, Q9 @ MULHIGH(sinb, ti2)
+ VQDMULH.S32 Q12, Q2, Q9 @ MULHIGH(cosb, ti2)
+ VQDMULH.S32 Q13, Q3, Q8 @ MULHIGH(sinb, tr2)
+
+ VADD.S32 Q4, Q10, Q11 @ *buf1-- = MULHIGH(cosa, tr2) + MULHIGH(sina, ti2)@
+ VSUB.S32 Q1, Q13, Q12 @ *buf0++ = MULHIGH(sina, tr2) - MULHIGH(cosa, ti2)@
+
+ VREV64.32 Q2, Q4
+ VREV64.32 Q3, Q5
+
+ sub r3, r3, #32
+ VST2.I32 {d0, d1, d2, d3}, [r0]!
+
+ VST2.I32 {d5, d7}, [r3]!
+ VST2.I32 {d4, d6}, [r3]!
+
+ subs r1, r1, #4
+ sub r3, r3, #64
+ bne PostMDCT_LOOP
+
+PostMDCT_END:
+ ldmia sp!, {r4 - r11, pc}
+
+ @ENDP @ |PostMDCT|
+ .end \ No newline at end of file
diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s
new file mode 100644
index 0000000..99ee68b
--- /dev/null
+++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s
@@ -0,0 +1,146 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ File: R4R8First_v7.s
+@
+@ Content: Radix8First and Radix4First function armv7 assemble
+@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+ .section .text
+ .global Radix8First
+
+Radix8First:
+ stmdb sp!, {r4 - r11, lr}
+
+ ldr r3, SQRT1_2
+ cmp r1, #0
+
+ VDUP.I32 Q15, r3
+ beq Radix8First_END
+
+Radix8First_LOOP:
+ VLD1.I32 {d0, d1, d2, d3}, [r0]!
+ VLD1.I32 {d8, d9, d10, d11}, [r0]!
+
+ VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@
+ VSUB.S32 d5, d0, d1 @ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@
+ VSUB.S32 d7, d2, d3 @ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@
+ VADD.S32 d6, d2, d3 @ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@
+ VREV64.I32 d7, d7
+
+ VADD.S32 Q0, Q2, Q3 @ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3)
+ VSUB.S32 Q1, Q2, Q3 @ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@
+
+ VREV64.I32 d3, d3
+
+ VADD.S32 d4, d8, d9 @ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@
+ VSUB.S32 d7, d10, d11 @ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@
+ VADD.S32 d6, d10, d11 @ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@
+ VREV64.I32 d7, d7
+ VSUB.S32 d5, d8, d9 @ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@
+
+ VTRN.32 d1, d3
+
+ VADD.S32 Q4, Q2, Q3 @ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@
+ VSUB.S32 Q5, Q2, Q3 @ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@
+
+ VREV64.I32 d3, d3
+
+ VSHR.S32 d8, d8, #1
+ VSHR.S32 Q0, Q0, #1
+ VREV64.I32 d10, d10
+ VTRN.32 d11, d9
+ VSHR.S32 Q1, Q1, #1
+ VSHR.S32 d10, d10, #1
+ VREV64.I32 d9, d9
+
+ sub r0, r0, #0x40
+
+ VADD.S32 d12, d0, d8
+ VSUB.S32 d16, d0, d8
+ VADD.S32 d14, d2, d10
+ VSUB.S32 d18, d2, d10
+
+ VSUB.S32 d4, d11, d9
+ VADD.S32 d5, d11, d9
+
+ VREV64.I32 d18, d18
+
+ VQDMULH.S32 Q3, Q2, Q15
+ VTRN.32 d14, d18
+ VTRN.32 d6, d7
+ VREV64.I32 d18, d18
+
+ VSUB.S32 d15, d3, d6
+ VREV64.I32 d7, d7
+ VADD.S32 d19, d3, d6
+ VADD.S32 d13, d1, d7
+ VSUB.S32 d17, d1, d7
+
+ VREV64.I32 d17, d17
+ VTRN.32 d13, d17
+ VREV64.I32 d17, d17
+
+ subs r1, r1, #1
+
+ VST1.I32 {d12, d13, d14, d15}, [r0]!
+ VST1.I32 {d16, d17, d18, d19}, [r0]!
+ bne Radix8First_LOOP
+
+Radix8First_END:
+ ldmia sp!, {r4 - r11, pc}
+SQRT1_2:
+ .word 0x2d413ccd
+
+ @ENDP @ |Radix8First|
+
+ .section .text
+ .global Radix4First
+
+Radix4First:
+ stmdb sp!, {r4 - r11, lr}
+
+ cmp r1, #0
+ beq Radix4First_END
+
+Radix4First_LOOP:
+ VLD1.I32 {d0, d1, d2, d3}, [r0]
+
+ VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@
+ VSUB.S32 d5, d0, d1 @ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@
+ VSUB.S32 d7, d2, d3 @ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@
+ VADD.S32 d6, d2, d3 @ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@
+
+ VREV64.I32 d7, d7 @
+
+ VADD.S32 Q4, Q2, Q3
+ VSUB.S32 Q5, Q2, Q3
+
+ VREV64.I32 d11, d11
+ VTRN.32 d9, d11
+ subs r1, r1, #1
+ VREV64.I32 d11, d11
+ VST1.I32 {d8, d9, d10, d11}, [r0]!
+
+ bne Radix4First_LOOP
+
+Radix4First_END:
+ ldmia sp!, {r4 - r11, pc}
+
+ @ENDP @ |Radix4First|
+ .end \ No newline at end of file
diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s
new file mode 100644
index 0000000..e1a8438
--- /dev/null
+++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s
@@ -0,0 +1,143 @@
+@/*
+@ ** Copyright 2003-2010, VisualOn, Inc.
+@ **
+@ ** Licensed under the Apache License, Version 2.0 (the "License");
+@ ** you may not use this file except in compliance with the License.
+@ ** You may obtain a copy of the License at
+@ **
+@ ** http://www.apache.org/licenses/LICENSE-2.0
+@ **
+@ ** Unless required by applicable law or agreed to in writing, software
+@ ** distributed under the License is distributed on an "AS IS" BASIS,
+@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ ** See the License for the specific language governing permissions and
+@ ** limitations under the License.
+@ */
+
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+@ File: Radix4FFT_v7.s
+@
+@ Content: Radix4FFT armv7 assemble
+@
+@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
+
+ .section .text
+ .global Radix4FFT
+
+Radix4FFT:
+ stmdb sp!, {r4 - r11, lr}
+
+ mov r1, r1, asr #2
+ cmp r1, #0
+ beq Radix4FFT_END
+
+Radix4FFT_LOOP1:
+ mov r5, r2, lsl #1
+ mov r8, r0
+ mov r7, r1
+ mov r5, r5, lsl #2
+ cmp r1, #0
+ rsbeq r12, r5, r5, lsl #2
+ beq Radix4FFT_LOOP1_END
+
+ rsb r12, r5, r5, lsl #2
+
+Radix4FFT_LOOP2:
+ mov r6, r3
+ mov r4, r2
+ cmp r2, #0
+ beq Radix4FFT_LOOP2_END
+
+Radix4FFT_LOOP3:
+ @r0 = xptr[0]@
+ @r1 = xptr[1]@
+ VLD2.I32 {D0, D1, D2, D3}, [r8]
+ VLD2.I32 {D28, D29, D30, D31}, [r6]! @ cosx = csptr[0]@ sinx = csptr[1]@
+
+ add r8, r8, r5 @ xptr += step@
+ VLD2.I32 {D4, D5, D6,D7}, [r8] @ r2 = xptr[0]@ r3 = xptr[1]@
+
+ VQDMULH.S32 Q10, Q2, Q14 @ MULHIGH(cosx, t0)
+ VQDMULH.S32 Q11, Q3, Q15 @ MULHIGH(sinx, t1)
+ VQDMULH.S32 Q12, Q3, Q14 @ MULHIGH(cosx, t1)
+ VQDMULH.S32 Q13, Q2, Q15 @ MULHIGH(sinx, t0)
+
+ VADD.S32 Q2, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1)
+ VSUB.S32 Q3, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0)
+
+ add r8, r8, r5 @ xptr += step@
+ VSHR.S32 Q10, Q0, #2 @ t0 = r0 >> 2@
+ VSHR.S32 Q11, Q1, #2 @ t1 = r1 >> 2@
+
+ VSUB.S32 Q0, Q10, Q2 @ r0 = t0 - r2@
+ VSUB.S32 Q1, Q11, Q3 @ r1 = t1 - r3@
+ VADD.S32 Q2, Q10, Q2 @ r2 = t0 + r2@
+ VADD.S32 Q3, Q11, Q3 @ r3 = t1 + r3@
+
+ VLD2.I32 {D8, D9, D10, D11}, [r8]
+ VLD2.I32 {D28, D29, D30, D31}, [r6]!
+ add r8, r8, r5
+
+ VQDMULH.S32 Q10, Q4, Q14 @ MULHIGH(cosx, t0)
+ VQDMULH.S32 Q11, Q5, Q15 @ MULHIGH(sinx, t1)
+ VQDMULH.S32 Q12, Q5, Q14 @ MULHIGH(cosx, t1)
+ VQDMULH.S32 Q13, Q4, Q15 @ MULHIGH(sinx, t0)
+
+ VADD.S32 Q8, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1)
+ VSUB.S32 Q9, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0)
+
+ VLD2.I32 {D12, D13, D14, D15}, [r8]
+ VLD2.I32 {D28, D29, D30, D31}, [r6]!
+
+ VQDMULH.S32 Q10, Q6, Q14 @ MULHIGH(cosx, t0)
+ VQDMULH.S32 Q11, Q7, Q15 @ MULHIGH(sinx, t1)
+ VQDMULH.S32 Q12, Q7, Q14 @ MULHIGH(cosx, t1)
+ VQDMULH.S32 Q13, Q6, Q15 @ MULHIGH(sinx, t0)
+
+ VADD.S32 Q6, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1)
+ VSUB.S32 Q7, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0)
+
+ VADD.S32 Q4, Q8, Q6 @ r4 = t0 + r6@
+ VSUB.S32 Q5, Q7, Q9 @ r5 = r7 - t1@
+ VSUB.S32 Q6, Q8, Q6 @ r6 = t0 - r6@
+ VADD.S32 Q7, Q7, Q9 @ r7 = r7 + t1@
+
+ VADD.S32 Q8, Q0, Q5 @ xptr[0] = r0 + r5@
+ VADD.S32 Q9, Q1, Q6 @ xptr[1] = r1 + r6@
+ VST2.I32 {D16, D17, D18, D19}, [r8]
+
+ VSUB.S32 Q10, Q2, Q4 @ xptr[0] = r2 - r4@
+ sub r8, r8, r5 @ xptr -= step@
+ VSUB.S32 Q11, Q3, Q7 @ xptr[1] = r3 - r7@
+ VST2.I32 {D20, D21, D22, D23}, [r8]
+
+ VSUB.S32 Q8, Q0, Q5 @ xptr[0] = r0 - r5@
+ sub r8, r8, r5 @ xptr -= step@
+ VSUB.S32 Q9, Q1, Q6 @ xptr[1] = r1 - r6@
+ VST2.I32 {D16, D17, D18, D19}, [r8]
+
+ VADD.S32 Q10, Q2, Q4 @ xptr[0] = r2 + r4@
+ sub r8, r8, r5 @ xptr -= step@
+ VADD.S32 Q11, Q3, Q7 @ xptr[1] = r3 + r7@
+ VST2.I32 {D20, D21, D22, D23}, [r8]!
+
+ subs r4, r4, #4
+ bne Radix4FFT_LOOP3
+
+Radix4FFT_LOOP2_END:
+ add r8, r8, r12
+ sub r7, r7, #1
+ cmp r7, #0
+ bhi Radix4FFT_LOOP2
+
+Radix4FFT_LOOP1_END:
+ add r3, r12, r3
+ mov r2, r2, lsl #2
+ movs r1, r1, asr #2
+ bne Radix4FFT_LOOP1
+
+Radix4FFT_END:
+ ldmia sp!, {r4 - r11, pc}
+
+ @ENDP @ |Radix4FFT|
+ .end \ No newline at end of file