diff options
Diffstat (limited to 'media/libstagefright/codecs/aacenc/src/asm/ARMV7')
3 files changed, 421 insertions, 421 deletions
diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s index bf7dcba..a04c105 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s @@ -1,135 +1,135 @@ -@/*
-@ ** Copyright 2003-2010, VisualOn, Inc.
-@ **
-@ ** Licensed under the Apache License, Version 2.0 (the "License");
-@ ** you may not use this file except in compliance with the License.
-@ ** You may obtain a copy of the License at
-@ **
-@ ** http://www.apache.org/licenses/LICENSE-2.0
-@ **
-@ ** Unless required by applicable law or agreed to in writing, software
-@ ** distributed under the License is distributed on an "AS IS" BASIS,
-@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ ** See the License for the specific language governing permissions and
-@ ** limitations under the License.
-@ */
-
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-@ File: PrePostMDCT_v7.s
-@
-@ Content: premdct and postmdct function armv7 assemble
-@
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-
- .section .text
- .global PreMDCT
-
-PreMDCT:
- stmdb sp!, {r4 - r11, lr}
-
- add r9, r0, r1, lsl #2
- sub r3, r9, #32
-
- movs r1, r1, asr #2
- beq PreMDCT_END
-
-PreMDCT_LOOP:
- VLD4.I32 {d0, d2, d4, d6}, [r2]! @ cosa = *csptr++@ sina = *csptr++@
- VLD4.I32 {d1, d3, d5, d7}, [r2]! @ cosb = *csptr++@ sinb = *csptr++@
- VLD2.I32 {d8, d9, d10, d11}, [r0] @ tr1 = *(buf0 + 0)@ ti2 = *(buf0 + 1)@
- VLD2.I32 {d13, d15}, [r3]! @ tr2 = *(buf1 - 1)@ ti1 = *(buf1 + 0)@
- VLD2.I32 {d12, d14}, [r3]! @ tr2 = *(buf1 - 1)@ ti1 = *(buf1 + 0)@
-
- VREV64.32 Q8, Q7
- VREV64.32 Q9, Q6
-
-
- VQDMULH.S32 Q10, Q0, Q4 @ MULHIGH(cosa, tr1)
- VQDMULH.S32 Q11, Q1, Q8 @ MULHIGH(sina, ti1)
- VQDMULH.S32 Q12, Q0, Q8 @ MULHIGH(cosa, ti1)
- VQDMULH.S32 Q13, Q1, Q4 @ MULHIGH(sina, tr1)
-
- VADD.S32 Q0, Q10, Q11 @ *buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@
- VSUB.S32 Q1, Q12, Q13 @ *buf0++ = MULHIGH(cosa, ti1) - MULHIGH(sina, tr1)@
-
- VST2.I32 {d0, d1, d2, d3}, [r0]!
- sub r3, r3, #32
-
- VQDMULH.S32 Q10, Q2, Q9 @ MULHIGH(cosb, tr2)
- VQDMULH.S32 Q11, Q3, Q5 @ MULHIGH(sinb, ti2)
- VQDMULH.S32 Q12, Q2, Q5 @ MULHIGH(cosb, ti2)
- VQDMULH.S32 Q13, Q3, Q9 @ MULHIGH(sinb, tr2)
-
- VADD.S32 Q0, Q10, Q11 @ MULHIGH(cosa, tr2) + MULHIGH(sina, ti2)@
- VSUB.S32 Q1, Q12, Q13 @ MULHIGH(cosa, ti2) - MULHIGH(sina, tr2)@
-
- VREV64.32 Q3, Q1
- VREV64.32 Q2, Q0
-
- VST2.I32 {d5, d7}, [r3]!
- VST2.I32 {d4, d6}, [r3]!
-
- subs r1, r1, #4
- sub r3, r3, #64
- bne PreMDCT_LOOP
-
-PreMDCT_END:
- ldmia sp!, {r4 - r11, pc}
- @ENDP @ |PreMDCT|
-
- .section .text
- .global PostMDCT
-
-PostMDCT:
- stmdb sp!, {r4 - r11, lr}
-
- add r9, r0, r1, lsl #2
- sub r3, r9, #32
-
- movs r1, r1, asr #2
- beq PostMDCT_END
-
-PostMDCT_LOOP:
- VLD4.I32 {d0, d2, d4, d6}, [r2]! @ cosa = *csptr++@ sina = *csptr++@
- VLD4.I32 {d1, d3, d5, d7}, [r2]! @ cosb = *csptr++@ sinb = *csptr++@
- VLD2.I32 {d8, d9, d10, d11}, [r0] @ tr1 = *(zbuf1 + 0)@ ti1 = *(zbuf1 + 1)@
- VLD2.I32 {d13, d15}, [r3]! @ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@
- VLD2.I32 {d12, d14}, [r3]! @ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@
-
- VREV64.32 Q8, Q6
- VREV64.32 Q9, Q7
-
- VQDMULH.S32 Q10, Q0, Q4 @ MULHIGH(cosa, tr1)
- VQDMULH.S32 Q11, Q1, Q5 @ MULHIGH(sina, ti1)
- VQDMULH.S32 Q12, Q0, Q5 @ MULHIGH(cosa, ti1)
- VQDMULH.S32 Q13, Q1, Q4 @ MULHIGH(sina, tr1)
-
- VADD.S32 Q0, Q10, Q11 @ *buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@
- VSUB.S32 Q5, Q13, Q12 @ *buf1-- = MULHIGH(sina, tr1) - MULHIGH(cosa, ti1)@
-
- VQDMULH.S32 Q10, Q2, Q8 @ MULHIGH(cosb, tr2)
- VQDMULH.S32 Q11, Q3, Q9 @ MULHIGH(sinb, ti2)
- VQDMULH.S32 Q12, Q2, Q9 @ MULHIGH(cosb, ti2)
- VQDMULH.S32 Q13, Q3, Q8 @ MULHIGH(sinb, tr2)
-
- VADD.S32 Q4, Q10, Q11 @ *buf1-- = MULHIGH(cosa, tr2) + MULHIGH(sina, ti2)@
- VSUB.S32 Q1, Q13, Q12 @ *buf0++ = MULHIGH(sina, tr2) - MULHIGH(cosa, ti2)@
-
- VREV64.32 Q2, Q4
- VREV64.32 Q3, Q5
-
- sub r3, r3, #32
- VST2.I32 {d0, d1, d2, d3}, [r0]!
-
- VST2.I32 {d5, d7}, [r3]!
- VST2.I32 {d4, d6}, [r3]!
-
- subs r1, r1, #4
- sub r3, r3, #64
- bne PostMDCT_LOOP
-
-PostMDCT_END:
- ldmia sp!, {r4 - r11, pc}
-
- @ENDP @ |PostMDCT|
+@/* +@ ** Copyright 2003-2010, VisualOn, Inc. +@ ** +@ ** Licensed under the Apache License, Version 2.0 (the "License"); +@ ** you may not use this file except in compliance with the License. +@ ** You may obtain a copy of the License at +@ ** +@ ** http://www.apache.org/licenses/LICENSE-2.0 +@ ** +@ ** Unless required by applicable law or agreed to in writing, software +@ ** distributed under the License is distributed on an "AS IS" BASIS, +@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ ** See the License for the specific language governing permissions and +@ ** limitations under the License. +@ */ + +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@ File: PrePostMDCT_v7.s +@ +@ Content: premdct and postmdct function armv7 assemble +@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + + .section .text + .global PreMDCT + +PreMDCT: + stmdb sp!, {r4 - r11, lr} + + add r9, r0, r1, lsl #2 + sub r3, r9, #32 + + movs r1, r1, asr #2 + beq PreMDCT_END + +PreMDCT_LOOP: + VLD4.I32 {d0, d2, d4, d6}, [r2]! @ cosa = *csptr++@ sina = *csptr++@ + VLD4.I32 {d1, d3, d5, d7}, [r2]! @ cosb = *csptr++@ sinb = *csptr++@ + VLD2.I32 {d8, d9, d10, d11}, [r0] @ tr1 = *(buf0 + 0)@ ti2 = *(buf0 + 1)@ + VLD2.I32 {d13, d15}, [r3]! @ tr2 = *(buf1 - 1)@ ti1 = *(buf1 + 0)@ + VLD2.I32 {d12, d14}, [r3]! @ tr2 = *(buf1 - 1)@ ti1 = *(buf1 + 0)@ + + VREV64.32 Q8, Q7 + VREV64.32 Q9, Q6 + + + VQDMULH.S32 Q10, Q0, Q4 @ MULHIGH(cosa, tr1) + VQDMULH.S32 Q11, Q1, Q8 @ MULHIGH(sina, ti1) + VQDMULH.S32 Q12, Q0, Q8 @ MULHIGH(cosa, ti1) + VQDMULH.S32 Q13, Q1, Q4 @ MULHIGH(sina, tr1) + + VADD.S32 Q0, Q10, Q11 @ *buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@ + VSUB.S32 Q1, Q12, Q13 @ *buf0++ = MULHIGH(cosa, ti1) - MULHIGH(sina, tr1)@ + + VST2.I32 {d0, d1, d2, d3}, [r0]! + sub r3, r3, #32 + + VQDMULH.S32 Q10, Q2, Q9 @ MULHIGH(cosb, tr2) + VQDMULH.S32 Q11, Q3, Q5 @ MULHIGH(sinb, ti2) + VQDMULH.S32 Q12, Q2, Q5 @ MULHIGH(cosb, ti2) + VQDMULH.S32 Q13, Q3, Q9 @ MULHIGH(sinb, tr2) + + VADD.S32 Q0, Q10, Q11 @ MULHIGH(cosa, tr2) + MULHIGH(sina, ti2)@ + VSUB.S32 Q1, Q12, Q13 @ MULHIGH(cosa, ti2) - MULHIGH(sina, tr2)@ + + VREV64.32 Q3, Q1 + VREV64.32 Q2, Q0 + + VST2.I32 {d5, d7}, [r3]! + VST2.I32 {d4, d6}, [r3]! + + subs r1, r1, #4 + sub r3, r3, #64 + bne PreMDCT_LOOP + +PreMDCT_END: + ldmia sp!, {r4 - r11, pc} + @ENDP @ |PreMDCT| + + .section .text + .global PostMDCT + +PostMDCT: + stmdb sp!, {r4 - r11, lr} + + add r9, r0, r1, lsl #2 + sub r3, r9, #32 + + movs r1, r1, asr #2 + beq PostMDCT_END + +PostMDCT_LOOP: + VLD4.I32 {d0, d2, d4, d6}, [r2]! @ cosa = *csptr++@ sina = *csptr++@ + VLD4.I32 {d1, d3, d5, d7}, [r2]! @ cosb = *csptr++@ sinb = *csptr++@ + VLD2.I32 {d8, d9, d10, d11}, [r0] @ tr1 = *(zbuf1 + 0)@ ti1 = *(zbuf1 + 1)@ + VLD2.I32 {d13, d15}, [r3]! @ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@ + VLD2.I32 {d12, d14}, [r3]! @ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@ + + VREV64.32 Q8, Q6 + VREV64.32 Q9, Q7 + + VQDMULH.S32 Q10, Q0, Q4 @ MULHIGH(cosa, tr1) + VQDMULH.S32 Q11, Q1, Q5 @ MULHIGH(sina, ti1) + VQDMULH.S32 Q12, Q0, Q5 @ MULHIGH(cosa, ti1) + VQDMULH.S32 Q13, Q1, Q4 @ MULHIGH(sina, tr1) + + VADD.S32 Q0, Q10, Q11 @ *buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@ + VSUB.S32 Q5, Q13, Q12 @ *buf1-- = MULHIGH(sina, tr1) - MULHIGH(cosa, ti1)@ + + VQDMULH.S32 Q10, Q2, Q8 @ MULHIGH(cosb, tr2) + VQDMULH.S32 Q11, Q3, Q9 @ MULHIGH(sinb, ti2) + VQDMULH.S32 Q12, Q2, Q9 @ MULHIGH(cosb, ti2) + VQDMULH.S32 Q13, Q3, Q8 @ MULHIGH(sinb, tr2) + + VADD.S32 Q4, Q10, Q11 @ *buf1-- = MULHIGH(cosa, tr2) + MULHIGH(sina, ti2)@ + VSUB.S32 Q1, Q13, Q12 @ *buf0++ = MULHIGH(sina, tr2) - MULHIGH(cosa, ti2)@ + + VREV64.32 Q2, Q4 + VREV64.32 Q3, Q5 + + sub r3, r3, #32 + VST2.I32 {d0, d1, d2, d3}, [r0]! + + VST2.I32 {d5, d7}, [r3]! + VST2.I32 {d4, d6}, [r3]! + + subs r1, r1, #4 + sub r3, r3, #64 + bne PostMDCT_LOOP + +PostMDCT_END: + ldmia sp!, {r4 - r11, pc} + + @ENDP @ |PostMDCT| .end
\ No newline at end of file diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s index 99ee68b..defd45d 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s @@ -1,146 +1,146 @@ -@/*
-@ ** Copyright 2003-2010, VisualOn, Inc.
-@ **
-@ ** Licensed under the Apache License, Version 2.0 (the "License");
-@ ** you may not use this file except in compliance with the License.
-@ ** You may obtain a copy of the License at
-@ **
-@ ** http://www.apache.org/licenses/LICENSE-2.0
-@ **
-@ ** Unless required by applicable law or agreed to in writing, software
-@ ** distributed under the License is distributed on an "AS IS" BASIS,
-@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ ** See the License for the specific language governing permissions and
-@ ** limitations under the License.
-@ */
-
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-@ File: R4R8First_v7.s
-@
-@ Content: Radix8First and Radix4First function armv7 assemble
-@
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-
- .section .text
- .global Radix8First
-
-Radix8First:
- stmdb sp!, {r4 - r11, lr}
-
- ldr r3, SQRT1_2
- cmp r1, #0
-
- VDUP.I32 Q15, r3
- beq Radix8First_END
-
-Radix8First_LOOP:
- VLD1.I32 {d0, d1, d2, d3}, [r0]!
- VLD1.I32 {d8, d9, d10, d11}, [r0]!
-
- VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@
- VSUB.S32 d5, d0, d1 @ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@
- VSUB.S32 d7, d2, d3 @ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@
- VADD.S32 d6, d2, d3 @ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@
- VREV64.I32 d7, d7
-
- VADD.S32 Q0, Q2, Q3 @ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3)
- VSUB.S32 Q1, Q2, Q3 @ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@
-
- VREV64.I32 d3, d3
-
- VADD.S32 d4, d8, d9 @ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@
- VSUB.S32 d7, d10, d11 @ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@
- VADD.S32 d6, d10, d11 @ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@
- VREV64.I32 d7, d7
- VSUB.S32 d5, d8, d9 @ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@
-
- VTRN.32 d1, d3
-
- VADD.S32 Q4, Q2, Q3 @ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@
- VSUB.S32 Q5, Q2, Q3 @ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@
-
- VREV64.I32 d3, d3
-
- VSHR.S32 d8, d8, #1
- VSHR.S32 Q0, Q0, #1
- VREV64.I32 d10, d10
- VTRN.32 d11, d9
- VSHR.S32 Q1, Q1, #1
- VSHR.S32 d10, d10, #1
- VREV64.I32 d9, d9
-
- sub r0, r0, #0x40
-
- VADD.S32 d12, d0, d8
- VSUB.S32 d16, d0, d8
- VADD.S32 d14, d2, d10
- VSUB.S32 d18, d2, d10
-
- VSUB.S32 d4, d11, d9
- VADD.S32 d5, d11, d9
-
- VREV64.I32 d18, d18
-
- VQDMULH.S32 Q3, Q2, Q15
- VTRN.32 d14, d18
- VTRN.32 d6, d7
- VREV64.I32 d18, d18
-
- VSUB.S32 d15, d3, d6
- VREV64.I32 d7, d7
- VADD.S32 d19, d3, d6
- VADD.S32 d13, d1, d7
- VSUB.S32 d17, d1, d7
-
- VREV64.I32 d17, d17
- VTRN.32 d13, d17
- VREV64.I32 d17, d17
-
- subs r1, r1, #1
-
- VST1.I32 {d12, d13, d14, d15}, [r0]!
- VST1.I32 {d16, d17, d18, d19}, [r0]!
- bne Radix8First_LOOP
-
-Radix8First_END:
- ldmia sp!, {r4 - r11, pc}
-SQRT1_2:
- .word 0x2d413ccd
-
- @ENDP @ |Radix8First|
-
- .section .text
- .global Radix4First
-
-Radix4First:
- stmdb sp!, {r4 - r11, lr}
-
- cmp r1, #0
- beq Radix4First_END
-
-Radix4First_LOOP:
- VLD1.I32 {d0, d1, d2, d3}, [r0]
-
- VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@
- VSUB.S32 d5, d0, d1 @ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@
- VSUB.S32 d7, d2, d3 @ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@
- VADD.S32 d6, d2, d3 @ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@
-
- VREV64.I32 d7, d7 @
-
- VADD.S32 Q4, Q2, Q3
- VSUB.S32 Q5, Q2, Q3
-
- VREV64.I32 d11, d11
- VTRN.32 d9, d11
- subs r1, r1, #1
- VREV64.I32 d11, d11
- VST1.I32 {d8, d9, d10, d11}, [r0]!
-
- bne Radix4First_LOOP
-
-Radix4First_END:
- ldmia sp!, {r4 - r11, pc}
-
- @ENDP @ |Radix4First|
+@/* +@ ** Copyright 2003-2010, VisualOn, Inc. +@ ** +@ ** Licensed under the Apache License, Version 2.0 (the "License"); +@ ** you may not use this file except in compliance with the License. +@ ** You may obtain a copy of the License at +@ ** +@ ** http://www.apache.org/licenses/LICENSE-2.0 +@ ** +@ ** Unless required by applicable law or agreed to in writing, software +@ ** distributed under the License is distributed on an "AS IS" BASIS, +@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ ** See the License for the specific language governing permissions and +@ ** limitations under the License. +@ */ + +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@ File: R4R8First_v7.s +@ +@ Content: Radix8First and Radix4First function armv7 assemble +@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + + .section .text + .global Radix8First + +Radix8First: + stmdb sp!, {r4 - r11, lr} + + ldr r3, SQRT1_2 + cmp r1, #0 + + VDUP.I32 Q15, r3 + beq Radix8First_END + +Radix8First_LOOP: + VLD1.I32 {d0, d1, d2, d3}, [r0]! + VLD1.I32 {d8, d9, d10, d11}, [r0]! + + VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@ + VSUB.S32 d5, d0, d1 @ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@ + VSUB.S32 d7, d2, d3 @ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@ + VADD.S32 d6, d2, d3 @ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@ + VREV64.I32 d7, d7 + + VADD.S32 Q0, Q2, Q3 @ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3) + VSUB.S32 Q1, Q2, Q3 @ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@ + + VREV64.I32 d3, d3 + + VADD.S32 d4, d8, d9 @ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@ + VSUB.S32 d7, d10, d11 @ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@ + VADD.S32 d6, d10, d11 @ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@ + VREV64.I32 d7, d7 + VSUB.S32 d5, d8, d9 @ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@ + + VTRN.32 d1, d3 + + VADD.S32 Q4, Q2, Q3 @ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@ + VSUB.S32 Q5, Q2, Q3 @ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@ + + VREV64.I32 d3, d3 + + VSHR.S32 d8, d8, #1 + VSHR.S32 Q0, Q0, #1 + VREV64.I32 d10, d10 + VTRN.32 d11, d9 + VSHR.S32 Q1, Q1, #1 + VSHR.S32 d10, d10, #1 + VREV64.I32 d9, d9 + + sub r0, r0, #0x40 + + VADD.S32 d12, d0, d8 + VSUB.S32 d16, d0, d8 + VADD.S32 d14, d2, d10 + VSUB.S32 d18, d2, d10 + + VSUB.S32 d4, d11, d9 + VADD.S32 d5, d11, d9 + + VREV64.I32 d18, d18 + + VQDMULH.S32 Q3, Q2, Q15 + VTRN.32 d14, d18 + VTRN.32 d6, d7 + VREV64.I32 d18, d18 + + VSUB.S32 d15, d3, d6 + VREV64.I32 d7, d7 + VADD.S32 d19, d3, d6 + VADD.S32 d13, d1, d7 + VSUB.S32 d17, d1, d7 + + VREV64.I32 d17, d17 + VTRN.32 d13, d17 + VREV64.I32 d17, d17 + + subs r1, r1, #1 + + VST1.I32 {d12, d13, d14, d15}, [r0]! + VST1.I32 {d16, d17, d18, d19}, [r0]! + bne Radix8First_LOOP + +Radix8First_END: + ldmia sp!, {r4 - r11, pc} +SQRT1_2: + .word 0x2d413ccd + + @ENDP @ |Radix8First| + + .section .text + .global Radix4First + +Radix4First: + stmdb sp!, {r4 - r11, lr} + + cmp r1, #0 + beq Radix4First_END + +Radix4First_LOOP: + VLD1.I32 {d0, d1, d2, d3}, [r0] + + VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@ + VSUB.S32 d5, d0, d1 @ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@ + VSUB.S32 d7, d2, d3 @ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@ + VADD.S32 d6, d2, d3 @ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@ + + VREV64.I32 d7, d7 @ + + VADD.S32 Q4, Q2, Q3 + VSUB.S32 Q5, Q2, Q3 + + VREV64.I32 d11, d11 + VTRN.32 d9, d11 + subs r1, r1, #1 + VREV64.I32 d11, d11 + VST1.I32 {d8, d9, d10, d11}, [r0]! + + bne Radix4First_LOOP + +Radix4First_END: + ldmia sp!, {r4 - r11, pc} + + @ENDP @ |Radix4First| .end
\ No newline at end of file diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s index e1a8438..84a4a80 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s @@ -1,143 +1,143 @@ -@/*
-@ ** Copyright 2003-2010, VisualOn, Inc.
-@ **
-@ ** Licensed under the Apache License, Version 2.0 (the "License");
-@ ** you may not use this file except in compliance with the License.
-@ ** You may obtain a copy of the License at
-@ **
-@ ** http://www.apache.org/licenses/LICENSE-2.0
-@ **
-@ ** Unless required by applicable law or agreed to in writing, software
-@ ** distributed under the License is distributed on an "AS IS" BASIS,
-@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-@ ** See the License for the specific language governing permissions and
-@ ** limitations under the License.
-@ */
-
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-@ File: Radix4FFT_v7.s
-@
-@ Content: Radix4FFT armv7 assemble
-@
-@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-
- .section .text
- .global Radix4FFT
-
-Radix4FFT:
- stmdb sp!, {r4 - r11, lr}
-
- mov r1, r1, asr #2
- cmp r1, #0
- beq Radix4FFT_END
-
-Radix4FFT_LOOP1:
- mov r5, r2, lsl #1
- mov r8, r0
- mov r7, r1
- mov r5, r5, lsl #2
- cmp r1, #0
- rsbeq r12, r5, r5, lsl #2
- beq Radix4FFT_LOOP1_END
-
- rsb r12, r5, r5, lsl #2
-
-Radix4FFT_LOOP2:
- mov r6, r3
- mov r4, r2
- cmp r2, #0
- beq Radix4FFT_LOOP2_END
-
-Radix4FFT_LOOP3:
- @r0 = xptr[0]@
- @r1 = xptr[1]@
- VLD2.I32 {D0, D1, D2, D3}, [r8]
- VLD2.I32 {D28, D29, D30, D31}, [r6]! @ cosx = csptr[0]@ sinx = csptr[1]@
-
- add r8, r8, r5 @ xptr += step@
- VLD2.I32 {D4, D5, D6,D7}, [r8] @ r2 = xptr[0]@ r3 = xptr[1]@
-
- VQDMULH.S32 Q10, Q2, Q14 @ MULHIGH(cosx, t0)
- VQDMULH.S32 Q11, Q3, Q15 @ MULHIGH(sinx, t1)
- VQDMULH.S32 Q12, Q3, Q14 @ MULHIGH(cosx, t1)
- VQDMULH.S32 Q13, Q2, Q15 @ MULHIGH(sinx, t0)
-
- VADD.S32 Q2, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1)
- VSUB.S32 Q3, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0)
-
- add r8, r8, r5 @ xptr += step@
- VSHR.S32 Q10, Q0, #2 @ t0 = r0 >> 2@
- VSHR.S32 Q11, Q1, #2 @ t1 = r1 >> 2@
-
- VSUB.S32 Q0, Q10, Q2 @ r0 = t0 - r2@
- VSUB.S32 Q1, Q11, Q3 @ r1 = t1 - r3@
- VADD.S32 Q2, Q10, Q2 @ r2 = t0 + r2@
- VADD.S32 Q3, Q11, Q3 @ r3 = t1 + r3@
-
- VLD2.I32 {D8, D9, D10, D11}, [r8]
- VLD2.I32 {D28, D29, D30, D31}, [r6]!
- add r8, r8, r5
-
- VQDMULH.S32 Q10, Q4, Q14 @ MULHIGH(cosx, t0)
- VQDMULH.S32 Q11, Q5, Q15 @ MULHIGH(sinx, t1)
- VQDMULH.S32 Q12, Q5, Q14 @ MULHIGH(cosx, t1)
- VQDMULH.S32 Q13, Q4, Q15 @ MULHIGH(sinx, t0)
-
- VADD.S32 Q8, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1)
- VSUB.S32 Q9, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0)
-
- VLD2.I32 {D12, D13, D14, D15}, [r8]
- VLD2.I32 {D28, D29, D30, D31}, [r6]!
-
- VQDMULH.S32 Q10, Q6, Q14 @ MULHIGH(cosx, t0)
- VQDMULH.S32 Q11, Q7, Q15 @ MULHIGH(sinx, t1)
- VQDMULH.S32 Q12, Q7, Q14 @ MULHIGH(cosx, t1)
- VQDMULH.S32 Q13, Q6, Q15 @ MULHIGH(sinx, t0)
-
- VADD.S32 Q6, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1)
- VSUB.S32 Q7, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0)
-
- VADD.S32 Q4, Q8, Q6 @ r4 = t0 + r6@
- VSUB.S32 Q5, Q7, Q9 @ r5 = r7 - t1@
- VSUB.S32 Q6, Q8, Q6 @ r6 = t0 - r6@
- VADD.S32 Q7, Q7, Q9 @ r7 = r7 + t1@
-
- VADD.S32 Q8, Q0, Q5 @ xptr[0] = r0 + r5@
- VADD.S32 Q9, Q1, Q6 @ xptr[1] = r1 + r6@
- VST2.I32 {D16, D17, D18, D19}, [r8]
-
- VSUB.S32 Q10, Q2, Q4 @ xptr[0] = r2 - r4@
- sub r8, r8, r5 @ xptr -= step@
- VSUB.S32 Q11, Q3, Q7 @ xptr[1] = r3 - r7@
- VST2.I32 {D20, D21, D22, D23}, [r8]
-
- VSUB.S32 Q8, Q0, Q5 @ xptr[0] = r0 - r5@
- sub r8, r8, r5 @ xptr -= step@
- VSUB.S32 Q9, Q1, Q6 @ xptr[1] = r1 - r6@
- VST2.I32 {D16, D17, D18, D19}, [r8]
-
- VADD.S32 Q10, Q2, Q4 @ xptr[0] = r2 + r4@
- sub r8, r8, r5 @ xptr -= step@
- VADD.S32 Q11, Q3, Q7 @ xptr[1] = r3 + r7@
- VST2.I32 {D20, D21, D22, D23}, [r8]!
-
- subs r4, r4, #4
- bne Radix4FFT_LOOP3
-
-Radix4FFT_LOOP2_END:
- add r8, r8, r12
- sub r7, r7, #1
- cmp r7, #0
- bhi Radix4FFT_LOOP2
-
-Radix4FFT_LOOP1_END:
- add r3, r12, r3
- mov r2, r2, lsl #2
- movs r1, r1, asr #2
- bne Radix4FFT_LOOP1
-
-Radix4FFT_END:
- ldmia sp!, {r4 - r11, pc}
-
- @ENDP @ |Radix4FFT|
+@/* +@ ** Copyright 2003-2010, VisualOn, Inc. +@ ** +@ ** Licensed under the Apache License, Version 2.0 (the "License"); +@ ** you may not use this file except in compliance with the License. +@ ** You may obtain a copy of the License at +@ ** +@ ** http://www.apache.org/licenses/LICENSE-2.0 +@ ** +@ ** Unless required by applicable law or agreed to in writing, software +@ ** distributed under the License is distributed on an "AS IS" BASIS, +@ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ ** See the License for the specific language governing permissions and +@ ** limitations under the License. +@ */ + +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ +@ File: Radix4FFT_v7.s +@ +@ Content: Radix4FFT armv7 assemble +@ +@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ + + .section .text + .global Radix4FFT + +Radix4FFT: + stmdb sp!, {r4 - r11, lr} + + mov r1, r1, asr #2 + cmp r1, #0 + beq Radix4FFT_END + +Radix4FFT_LOOP1: + mov r5, r2, lsl #1 + mov r8, r0 + mov r7, r1 + mov r5, r5, lsl #2 + cmp r1, #0 + rsbeq r12, r5, r5, lsl #2 + beq Radix4FFT_LOOP1_END + + rsb r12, r5, r5, lsl #2 + +Radix4FFT_LOOP2: + mov r6, r3 + mov r4, r2 + cmp r2, #0 + beq Radix4FFT_LOOP2_END + +Radix4FFT_LOOP3: + @r0 = xptr[0]@ + @r1 = xptr[1]@ + VLD2.I32 {D0, D1, D2, D3}, [r8] + VLD2.I32 {D28, D29, D30, D31}, [r6]! @ cosx = csptr[0]@ sinx = csptr[1]@ + + add r8, r8, r5 @ xptr += step@ + VLD2.I32 {D4, D5, D6,D7}, [r8] @ r2 = xptr[0]@ r3 = xptr[1]@ + + VQDMULH.S32 Q10, Q2, Q14 @ MULHIGH(cosx, t0) + VQDMULH.S32 Q11, Q3, Q15 @ MULHIGH(sinx, t1) + VQDMULH.S32 Q12, Q3, Q14 @ MULHIGH(cosx, t1) + VQDMULH.S32 Q13, Q2, Q15 @ MULHIGH(sinx, t0) + + VADD.S32 Q2, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1) + VSUB.S32 Q3, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) + + add r8, r8, r5 @ xptr += step@ + VSHR.S32 Q10, Q0, #2 @ t0 = r0 >> 2@ + VSHR.S32 Q11, Q1, #2 @ t1 = r1 >> 2@ + + VSUB.S32 Q0, Q10, Q2 @ r0 = t0 - r2@ + VSUB.S32 Q1, Q11, Q3 @ r1 = t1 - r3@ + VADD.S32 Q2, Q10, Q2 @ r2 = t0 + r2@ + VADD.S32 Q3, Q11, Q3 @ r3 = t1 + r3@ + + VLD2.I32 {D8, D9, D10, D11}, [r8] + VLD2.I32 {D28, D29, D30, D31}, [r6]! + add r8, r8, r5 + + VQDMULH.S32 Q10, Q4, Q14 @ MULHIGH(cosx, t0) + VQDMULH.S32 Q11, Q5, Q15 @ MULHIGH(sinx, t1) + VQDMULH.S32 Q12, Q5, Q14 @ MULHIGH(cosx, t1) + VQDMULH.S32 Q13, Q4, Q15 @ MULHIGH(sinx, t0) + + VADD.S32 Q8, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1) + VSUB.S32 Q9, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) + + VLD2.I32 {D12, D13, D14, D15}, [r8] + VLD2.I32 {D28, D29, D30, D31}, [r6]! + + VQDMULH.S32 Q10, Q6, Q14 @ MULHIGH(cosx, t0) + VQDMULH.S32 Q11, Q7, Q15 @ MULHIGH(sinx, t1) + VQDMULH.S32 Q12, Q7, Q14 @ MULHIGH(cosx, t1) + VQDMULH.S32 Q13, Q6, Q15 @ MULHIGH(sinx, t0) + + VADD.S32 Q6, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1) + VSUB.S32 Q7, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) + + VADD.S32 Q4, Q8, Q6 @ r4 = t0 + r6@ + VSUB.S32 Q5, Q7, Q9 @ r5 = r7 - t1@ + VSUB.S32 Q6, Q8, Q6 @ r6 = t0 - r6@ + VADD.S32 Q7, Q7, Q9 @ r7 = r7 + t1@ + + VADD.S32 Q8, Q0, Q5 @ xptr[0] = r0 + r5@ + VADD.S32 Q9, Q1, Q6 @ xptr[1] = r1 + r6@ + VST2.I32 {D16, D17, D18, D19}, [r8] + + VSUB.S32 Q10, Q2, Q4 @ xptr[0] = r2 - r4@ + sub r8, r8, r5 @ xptr -= step@ + VSUB.S32 Q11, Q3, Q7 @ xptr[1] = r3 - r7@ + VST2.I32 {D20, D21, D22, D23}, [r8] + + VSUB.S32 Q8, Q0, Q5 @ xptr[0] = r0 - r5@ + sub r8, r8, r5 @ xptr -= step@ + VSUB.S32 Q9, Q1, Q6 @ xptr[1] = r1 - r6@ + VST2.I32 {D16, D17, D18, D19}, [r8] + + VADD.S32 Q10, Q2, Q4 @ xptr[0] = r2 + r4@ + sub r8, r8, r5 @ xptr -= step@ + VADD.S32 Q11, Q3, Q7 @ xptr[1] = r3 + r7@ + VST2.I32 {D20, D21, D22, D23}, [r8]! + + subs r4, r4, #4 + bne Radix4FFT_LOOP3 + +Radix4FFT_LOOP2_END: + add r8, r8, r12 + sub r7, r7, #1 + cmp r7, #0 + bhi Radix4FFT_LOOP2 + +Radix4FFT_LOOP1_END: + add r3, r12, r3 + mov r2, r2, lsl #2 + movs r1, r1, asr #2 + bne Radix4FFT_LOOP1 + +Radix4FFT_END: + ldmia sp!, {r4 - r11, pc} + + @ENDP @ |Radix4FFT| .end
\ No newline at end of file |