@/* @ ** Copyright 2003-2010, VisualOn, Inc. @ ** @ ** Licensed under the Apache License, Version 2.0 (the "License"); @ ** you may not use this file except in compliance with the License. @ ** You may obtain a copy of the License at @ ** @ ** http://www.apache.org/licenses/LICENSE-2.0 @ ** @ ** Unless required by applicable law or agreed to in writing, software @ ** distributed under the License is distributed on an "AS IS" BASIS, @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @ ** See the License for the specific language governing permissions and @ ** limitations under the License. @ */ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ @ File: R4R8First_v7.s @ @ Content: Radix8First and Radix4First function armv7 assemble @ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ .section .text .global Radix8First Radix8First: stmdb sp!, {r4 - r11, lr} ldr r3, SQRT1_2 cmp r1, #0 VDUP.I32 Q15, r3 beq Radix8First_END Radix8First_LOOP: VLD1.I32 {d0, d1, d2, d3}, [r0]! VLD1.I32 {d8, d9, d10, d11}, [r0]! VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@ VSUB.S32 d5, d0, d1 @ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@ VSUB.S32 d7, d2, d3 @ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@ VADD.S32 d6, d2, d3 @ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@ VREV64.I32 d7, d7 VADD.S32 Q0, Q2, Q3 @ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3) VSUB.S32 Q1, Q2, Q3 @ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@ VREV64.I32 d3, d3 VADD.S32 d4, d8, d9 @ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@ VSUB.S32 d7, d10, d11 @ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@ VADD.S32 d6, d10, d11 @ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@ VREV64.I32 d7, d7 VSUB.S32 d5, d8, d9 @ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@ VTRN.32 d1, d3 VADD.S32 Q4, Q2, Q3 @ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@ VSUB.S32 Q5, Q2, Q3 @ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@ VREV64.I32 d3, d3 VSHR.S32 d8, d8, #1 VSHR.S32 Q0, Q0, #1 VREV64.I32 d10, d10 VTRN.32 d11, d9 VSHR.S32 Q1, Q1, #1 VSHR.S32 d10, d10, #1 VREV64.I32 d9, d9 sub r0, r0, #0x40 VADD.S32 d12, d0, d8 VSUB.S32 d16, d0, d8 VADD.S32 d14, d2, d10 VSUB.S32 d18, d2, d10 VSUB.S32 d4, d11, d9 VADD.S32 d5, d11, d9 VREV64.I32 d18, d18 VQDMULH.S32 Q3, Q2, Q15 VTRN.32 d14, d18 VTRN.32 d6, d7 VREV64.I32 d18, d18 VSUB.S32 d15, d3, d6 VREV64.I32 d7, d7 VADD.S32 d19, d3, d6 VADD.S32 d13, d1, d7 VSUB.S32 d17, d1, d7 VREV64.I32 d17, d17 VTRN.32 d13, d17 VREV64.I32 d17, d17 subs r1, r1, #1 VST1.I32 {d12, d13, d14, d15}, [r0]! VST1.I32 {d16, d17, d18, d19}, [r0]! bne Radix8First_LOOP Radix8First_END: ldmia sp!, {r4 - r11, pc} SQRT1_2: .word 0x2d413ccd @ENDP @ |Radix8First| .section .text .global Radix4First Radix4First: stmdb sp!, {r4 - r11, lr} cmp r1, #0 beq Radix4First_END Radix4First_LOOP: VLD1.I32 {d0, d1, d2, d3}, [r0] VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@ VSUB.S32 d5, d0, d1 @ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@ VSUB.S32 d7, d2, d3 @ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@ VADD.S32 d6, d2, d3 @ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@ VREV64.I32 d7, d7 @ VADD.S32 Q4, Q2, Q3 VSUB.S32 Q5, Q2, Q3 VREV64.I32 d11, d11 VTRN.32 d9, d11 subs r1, r1, #1 VREV64.I32 d11, d11 VST1.I32 {d8, d9, d10, d11}, [r0]! bne Radix4First_LOOP Radix4First_END: ldmia sp!, {r4 - r11, pc} @ENDP @ |Radix4First| .end