diff options
Diffstat (limited to 'media/libstagefright/codecs/aacenc/src/asm')
9 files changed, 534 insertions, 534 deletions
diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/AutoCorrelation_v5.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/AutoCorrelation_v5.s index e0885f1..e705197 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/AutoCorrelation_v5.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/AutoCorrelation_v5.s @@ -22,34 +22,34 @@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ - .section .text + .section .text .global AutoCorrelation AutoCorrelation: stmdb sp!, {r4 - r11, lr} - sub r13, r13, #20 + sub r13, r13, #20 - mov r5, r0 - mov r7, r1 - mov r9, r3 - mov r2, r2, lsl #16 - mov r0, #0 - mov r4, r2, asr #16 - mov r8, #0 - cmp r4, #0 - ble L136 - - cmp r4, #8 - mov r2, #0 - blt L133 + mov r5, r0 + mov r7, r1 + mov r9, r3 + mov r2, r2, lsl #16 + mov r0, #0 + mov r4, r2, asr #16 + mov r8, #0 + cmp r4, #0 + ble L136 - sub r12, r4, #8 -L132: - ldr r6, [r5, r2] + cmp r4, #8 + mov r2, #0 + blt L133 + + sub r12, r4, #8 +L132: + ldr r6, [r5, r2] add r2, r2, #4 smulbb r3, r6, r6 - ldr r1, [r5, r2] + ldr r1, [r5, r2] smultt r10, r6, r6 mov r3, r3, asr #9 smulbb r6, r1, r1 @@ -72,95 +72,95 @@ L132: add r8, r8, #6 qadd r0, r0, r6 - cmp r8, r12 - blt L132 -L133: - ldrsh r6, [r5, r2] - mul r10, r6, r6 - add r2, r2, #2 - mov r1, r10, asr #9 + cmp r8, r12 + blt L132 +L133: + ldrsh r6, [r5, r2] + mul r10, r6, r6 + add r2, r2, #2 + mov r1, r10, asr #9 qadd r0, r0, r1 -L134: - add r8, r8, #1 - cmp r8, r4 - blt L133 -L135: -L136: - str r0, [r7, #0] - cmp r0, #0 - beq L1320 -L137: - mov r2, r9, lsl #16 - mov r8, #1 - mov r2, r2, asr #16 - cmp r2, #1 - ble L1319 -L138: -L139: - sub r4, r4, #1 - mov r14, #0 - mov r3, #0 - cmp r4, #0 - ble L1317 -L1310: - cmp r4, #6 - addlt r6, r5, r8, lsl #1 - blt L1314 -L1311: - add r6, r5, r8, lsl #1 - sub r12, r4, #6 - str r8, [r13, #8] - str r7, [r13, #4] -L1312: - mov r1, r3, lsl #1 - ldrsh r7, [r6, r1] - ldrsh r10, [r5, r1] - add r8, r1, r6 - add r9, r5, r1 +L134: + add r8, r8, #1 + cmp r8, r4 + blt L133 +L135: +L136: + str r0, [r7, #0] + cmp r0, #0 + beq L1320 +L137: + mov r2, r9, lsl #16 + mov r8, #1 + mov r2, r2, asr #16 + cmp r2, #1 + ble L1319 +L138: +L139: + sub r4, r4, #1 + mov r14, #0 + mov r3, #0 + cmp r4, #0 + ble L1317 +L1310: + cmp r4, #6 + addlt r6, r5, r8, lsl #1 + blt L1314 +L1311: + add r6, r5, r8, lsl #1 + sub r12, r4, #6 + str r8, [r13, #8] + str r7, [r13, #4] +L1312: + mov r1, r3, lsl #1 + ldrsh r7, [r6, r1] + ldrsh r10, [r5, r1] + add r8, r1, r6 + add r9, r5, r1 mul r7, r10, r7 - ldrsh r1, [r8, #2] - ldrsh r10, [r8, #4] - add r7, r14, r7, asr #9 - ldrsh r0, [r9, #2] - ldrsh r11, [r9, #4] - mul r1, r0, r1 - ldrsh r14, [r8, #6] - mul r10, r11, r10 - add r7, r7, r1, asr #9 - ldrsh r8, [r8, #8] + ldrsh r1, [r8, #2] + ldrsh r10, [r8, #4] + add r7, r14, r7, asr #9 + ldrsh r0, [r9, #2] + ldrsh r11, [r9, #4] + mul r1, r0, r1 + ldrsh r14, [r8, #6] + mul r10, r11, r10 + add r7, r7, r1, asr #9 + ldrsh r8, [r8, #8] add r3, r3, #5 - ldrsh r11, [r9, #6] - ldrsh r1, [r9, #8] - mul r14, r11, r14 - add r7, r7, r10, asr #9 - mul r1, r1, r8 - add r14, r7, r14, asr #9 - cmp r3, r12 - add r14, r14, r1, asr #9 - ble L1312 -L1313: - ldr r8, [r13, #8] - ldr r7, [r13, #4] -L1314: -L1315: - mov r12, r3, lsl #1 - ldrsh r9, [r6, r12] - ldrsh r12, [r5, r12] - add r3, r3, #1 - cmp r3, r4 - mul r12, r12, r9 - add r14, r14, r12, asr #9 - blt L1315 -L1316: -L1317: - str r14, [r7, +r8, lsl #2] - add r8, r8, #1 - cmp r8, r2 - blt L139 - + ldrsh r11, [r9, #6] + ldrsh r1, [r9, #8] + mul r14, r11, r14 + add r7, r7, r10, asr #9 + mul r1, r1, r8 + add r14, r7, r14, asr #9 + cmp r3, r12 + add r14, r14, r1, asr #9 + ble L1312 +L1313: + ldr r8, [r13, #8] + ldr r7, [r13, #4] +L1314: +L1315: + mov r12, r3, lsl #1 + ldrsh r9, [r6, r12] + ldrsh r12, [r5, r12] + add r3, r3, #1 + cmp r3, r4 + mul r12, r12, r9 + add r14, r14, r12, asr #9 + blt L1315 +L1316: +L1317: + str r14, [r7, +r8, lsl #2] + add r8, r8, #1 + cmp r8, r2 + blt L139 + L1319: L1320: - add r13, r13, #20 + add r13, r13, #20 ldmia sp!, {r4 - r11, pc} @ENDP @ |AutoCorrelation| diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/CalcWindowEnergy_v5.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/CalcWindowEnergy_v5.s index 75b916c..b30e8cb 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/CalcWindowEnergy_v5.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/CalcWindowEnergy_v5.s @@ -22,91 +22,91 @@ @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ .section .text - + .global CalcWindowEnergy CalcWindowEnergy: stmdb sp!, {r4 - r11, lr} - sub r13, r13, #20 + sub r13, r13, #20 - mov r3, r3, lsl #16 + mov r3, r3, lsl #16 ldr r10, [r0, #168] @ states0 = blockSwitchingControl->iirStates[0]; - mov r3, r3, asr #16 + mov r3, r3, asr #16 ldr r11, [r0, #172] @ states1 = blockSwitchingControl->iirStates[1]; mov r2, r2, lsl #16 - ldr r12, hiPassCoeff @ Coeff0 = hiPassCoeff[0]; + ldr r12, hiPassCoeff @ Coeff0 = hiPassCoeff[0]; mov r2, r2, asr #16 ldr r14, hiPassCoeff + 4 @ Coeff1 = hiPassCoeff[1]; - + mov r8, #0 @ w=0 mov r5, #0 @ wOffset = 0; - + BLOCK_BEGIN: - mov r6, #0 @ accuUE = 0; - mov r7, #0 @ accuFE = 0; + mov r6, #0 @ accuUE = 0; + mov r7, #0 @ accuFE = 0; mov r4, #0 @ i=0 - - str r8, [r13, #4] - str r0, [r13, #8] + + str r8, [r13, #4] + str r0, [r13, #8] str r3, [r13, #12] - -ENERGY_BEG: - mov r9, r5, lsl #1 + +ENERGY_BEG: + mov r9, r5, lsl #1 ldrsh r9, [r1, r9] @ tempUnfiltered = timeSignal[tidx]; add r5, r5, r2 @ tidx = tidx + chIncrement; - - smulwb r3, r14, r9 @ accu1 = L_mpy_ls(Coeff1, tempUnfiltered); + + smulwb r3, r14, r9 @ accu1 = L_mpy_ls(Coeff1, tempUnfiltered); smull r0, r8, r12, r11 @ accu2 = fixmul( Coeff0, states1 ); - + mov r3, r3, lsl #1 mov r8, r8, lsl #1 - sub r0, r3, r10 @ accu3 = accu1 - states0; + sub r0, r3, r10 @ accu3 = accu1 - states0; sub r8, r0, r8 @ out = accu3 - accu2; mov r10, r3 @ states0 = accu1; - mov r11, r8 @ states1 = out; - - mul r3, r9, r9 + mov r11, r8 @ states1 = out; + + mul r3, r9, r9 mov r8, r8, asr #16 - + add r4, r4, #1 add r6, r6, r3, asr #7 - mul r9, r8, r8 + mul r9, r8, r8 ldr r3, [r13, #12] add r7, r7, r9, asr #7 - - cmp r4, r3 - blt ENERGY_BEG - + + cmp r4, r3 + blt ENERGY_BEG + ldr r0, [r13, #8] ldr r8, [r13, #4] - + ENERGY_END: add r4, r0, r8, lsl #2 - str r6, [r4, #72] - add r8, r8, #1 - str r7, [r4, #136] + str r6, [r4, #72] + add r8, r8, #1 + str r7, [r4, #136] cmp r8, #8 - blt BLOCK_BEGIN + blt BLOCK_BEGIN BLOCK_END: - str r10, [r0, #168] - str r11, [r0, #172] - mov r0, #1 - - add r13, r13, #20 - ldmia sp!, {r4 - r11, pc} + str r10, [r0, #168] + str r11, [r0, #172] + mov r0, #1 + + add r13, r13, #20 + ldmia sp!, {r4 - r11, pc} hiPassCoeff: .word 0xbec8b439 .word 0x609d4952 - + @ENDP .end diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/PrePostMDCT_v5.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/PrePostMDCT_v5.s index 38fe092..103cc91 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/PrePostMDCT_v5.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/PrePostMDCT_v5.s @@ -26,46 +26,46 @@ PreMDCT: stmdb sp!, {r4 - r11, lr} - + add r9, r0, r1, lsl #2 sub r3, r9, #8 movs r1, r1, asr #2 beq PreMDCT_END - + PreMDCT_LOOP: ldr r8, [r2], #4 ldr r9, [r2], #4 - + ldrd r4, [r0] ldrd r6, [r3] - + smull r14, r11, r4, r8 @ MULHIGH(tr1, cosa) smull r10, r12, r7, r8 @ MULHIGH(ti1, cosa) - + smull r14, r8, r7, r9 @ MULHIGH(ti1, sina) - smull r7, r10, r4, r9 @ MULHIGH(tr1, sina) - - add r11, r11, r8 @ MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@ + smull r7, r10, r4, r9 @ MULHIGH(tr1, sina) + + add r11, r11, r8 @ MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@ sub r7, r12, r10 @ MULHIGH(ti1, cosa) - MULHIGH(tr1, sina) - + ldr r8, [r2], #4 ldr r9, [r2], #4 - + smull r14, r4, r6, r8 @ MULHIGH(tr2, cosa) smull r10, r12, r5, r8 @ MULHIGH(ti2, cosa) - + smull r14, r8, r5, r9 @ MULHIGH(ti2, sina) smull r5, r10, r6, r9 @ MULHIGH(tr2, sina) - + add r8, r8, r4 sub r9, r12, r10 - - mov r6, r11 - strd r6, [r0] + mov r6, r11 + + strd r6, [r0] strd r8, [r3] - + subs r1, r1, #1 sub r3, r3, #8 add r0, r0, #8 @@ -74,52 +74,52 @@ PreMDCT_LOOP: PreMDCT_END: ldmia sp!, {r4 - r11, pc} @ENDP @ |PreMDCT| - + .section .text .global PostMDCT PostMDCT: stmdb sp!, {r4 - r11, lr} - + add r9, r0, r1, lsl #2 sub r3, r9, #8 movs r1, r1, asr #2 beq PostMDCT_END - + PostMDCT_LOOP: - ldr r8, [r2], #4 + ldr r8, [r2], #4 ldr r9, [r2], #4 - + ldrd r4, [r0] ldrd r6, [r3] - + smull r14, r11, r4, r8 @ MULHIGH(tr1, cosa) smull r10, r12, r5, r8 @ MULHIGH(ti1, cosa) - + smull r14, r8, r5, r9 @ MULHIGH(ti1, sina) - smull r5, r10, r4, r9 @ MULHIGH(tr1, sina) - - add r4, r11, r8 @ MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@ + smull r5, r10, r4, r9 @ MULHIGH(tr1, sina) + + add r4, r11, r8 @ MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@ sub r11, r10, r12 @ MULHIGH(ti1, cosa) - MULHIGH(tr1, sina)@ - + ldr r8, [r2], #4 @ ldr r9, [r2], #4 - + smull r14, r5, r6, r8 @ MULHIGH(tr2, cosa) smull r10, r12, r7, r8 @ MULHIGH(ti2, cosa) - + smull r14, r8, r7, r9 @ MULHIGH(ti2, sina) smull r7, r10, r6, r9 @ MULHIGH(tr2, sina) - + add r6, r8, r5 @ MULHIGH(cosb, tr2) + MULHIGH(sinb, ti2)@ sub r5, r10, r12 @ MULHIGH(sinb, tr2) - MULHIGH(cosb, ti2)@ - - mov r7, r11 + + mov r7, r11 strd r4, [r0] strd r6, [r3] - + subs r1, r1, #1 sub r3, r3, #8 add r0, r0, #8 diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/R4R8First_v5.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/R4R8First_v5.s index b30881a..72cb9a3 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/R4R8First_v5.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/R4R8First_v5.s @@ -26,46 +26,46 @@ Radix4First: stmdb sp!, {r4 - r11, lr} - + movs r10, r1 mov r11, r0 beq Radix4First_END - + Radix4First_LOOP: ldrd r0, [r11] ldrd r2, [r11, #8] ldrd r4, [r11, #16] ldrd r6, [r11, #24] - + add r8, r0, r2 add r9, r1, r3 - + sub r0, r0, r2 sub r1, r1, r3 - + add r2, r4, r6 add r3, r5, r7 - + sub r4, r4, r6 sub r5, r5, r7 - + add r6, r8, r2 add r7, r9, r3 - + sub r8, r8, r2 sub r9, r9, r3 - + add r2, r0, r5 sub r3, r1, r4 - + sub r0, r0, r5 add r1, r1, r4 - + strd r6, [r11] strd r2, [r11, #8] strd r8, [r11, #16] strd r0, [r11, #24] - + subs r10, r10, #1 add r11, r11, #32 bne Radix4First_LOOP @@ -73,180 +73,180 @@ Radix4First_LOOP: Radix4First_END: ldmia sp!, {r4 - r11, pc} @ENDP @ |Radix4First| - + .section .text .global Radix8First Radix8First: stmdb sp!, {r4 - r11, lr} sub sp, sp, #0x24 - + mov r12, r1 mov r14, r0 cmp r12, #0 beq Radix8First_END - + Radix8First_LOOP: - ldrd r0, [r14] + ldrd r0, [r14] ldrd r2, [r14, #8] ldrd r4, [r14, #16] ldrd r6, [r14, #24] - + add r8, r0, r2 @ r0 = buf[0] + buf[2]@ add r9, r1, r3 @ i0 = buf[1] + buf[3]@ - + sub r0, r0, r2 @ r1 = buf[0] - buf[2]@ sub r1, r1, r3 @ i1 = buf[1] - buf[3]@ - + add r2, r4, r6 @ r2 = buf[4] + buf[6]@ add r3, r5, r7 @ i2 = buf[5] + buf[7]@ - + sub r4, r4, r6 @ r3 = buf[4] - buf[6]@ sub r5, r5, r7 @ i3 = buf[5] - buf[7]@ - + add r6, r8, r2 @ r4 = (r0 + r2) >> 1@ add r7, r9, r3 @ i4 = (i0 + i2) >> 1@ - + sub r8, r8, r2 @ r5 = (r0 - r2) >> 1@ sub r9, r9, r3 @ i5 = (i0 - i2) >> 1@ - + sub r2, r0, r5 @ r6 = (r1 - i3) >> 1@ add r3, r1, r4 @ i6 = (i1 + r3) >> 1@ - + add r0, r0, r5 @ r7 = (r1 + i3) >> 1@ sub r1, r1, r4 @ i7 = (i1 - r3) >> 1@ - + mov r6, r6, asr #1 @ mov r7, r7, asr #1 @ - + mov r8, r8, asr #1 mov r9, r9, asr #1 - + mov r2, r2, asr #1 mov r3, r3, asr #1 - + mov r0, r0, asr #1 - mov r1, r1, asr #1 - + mov r1, r1, asr #1 + str r6, [sp] str r7, [sp, #4] - + str r8, [sp, #8] str r9, [sp, #12] - + str r2, [sp, #16] - str r3, [sp, #20] - + str r3, [sp, #20] + str r0, [sp, #24] - str r1, [sp, #28] - - ldrd r2, [r14, #32] + str r1, [sp, #28] + + ldrd r2, [r14, #32] ldrd r4, [r14, #40] ldrd r6, [r14, #48] ldrd r8, [r14, #56] - + add r0, r2, r4 @ r0 = buf[ 8] + buf[10]@ add r1, r3, r5 @ i0 = buf[ 9] + buf[11]@ - + sub r2, r2, r4 @ r1 = buf[ 8] - buf[10]@ sub r3, r3, r5 @ i1 = buf[ 9] - buf[11]@ - + add r4, r6, r8 @ r2 = buf[12] + buf[14]@ add r5, r7, r9 @ i2 = buf[13] + buf[15]@ - + sub r6, r6, r8 @ r3 = buf[12] - buf[14]@ sub r7, r7, r9 @ i3 = buf[13] - buf[15]@ - + add r8, r0, r4 @ t0 = (r0 + r2) add r9, r1, r5 @ t1 = (i0 + i2) - + sub r0, r0, r4 @ t2 = (r0 - r2) sub r1, r1, r5 @ t3 = (i0 - i2) - + mov r8, r8, asr #1 ldr r4, [sp] - + mov r9, r9, asr #1 ldr r5, [sp, #4] - - mov r0, r0, asr #1 + + mov r0, r0, asr #1 mov r1, r1, asr #1 - + add r10, r4, r8 @ buf[ 0] = r4 + t0@ add r11, r5, r9 @ buf[ 1] = i4 + t1@ - + sub r4, r4, r8 @ buf[ 8] = r4 - t0@ sub r5, r5, r9 @ buf[ 9] = i4 - t1@ - + strd r10, [r14] strd r4, [r14, #32] - + ldr r10, [sp, #8] ldr r11, [sp, #12] - + add r4, r10, r1 @ buf[ 4] = r5 + t3@ sub r5, r11, r0 @ buf[ 5] = i5 - t2@ - + sub r10, r10, r1 @ buf[12] = r5 - t3@ add r11, r11, r0 @ buf[13] = i5 + t2@ - + strd r4, [r14, #16] strd r10, [r14, #48] - + sub r0, r2, r7 @ r0 = r1 - i3@ add r1, r3, r6 @ i0 = i1 + r3@ - + ldr r11, DATATab - + add r2, r2, r7 @ r2 = r1 + i3@ sub r3, r3, r6 @ i2 = i1 - r3@ - + sub r4, r0, r1 @ r0 - i0 add r5, r0, r1 @ r0 + i0 - + sub r0, r2, r3 @ r2 - i2 add r1, r2, r3 @ r2 + i2 - - smull r8, r6, r4, r11 - smull r9, r7, r5, r11 - + + smull r8, r6, r4, r11 + smull r9, r7, r5, r11 + ldr r2, [sp, #16] ldr r3, [sp, #20] - - smull r8, r4, r0, r11 - smull r9, r5, r1, r11 - + + smull r8, r4, r0, r11 + smull r9, r5, r1, r11 + ldr r10, [sp, #24] ldr r11, [sp, #28] - + sub r8, r2, r6 sub r9, r3, r7 - + add r2, r2, r6 add r3, r3, r7 - + add r6, r10, r5 sub r7, r11, r4 - + sub r0, r10, r5 add r1, r11, r4 - + strd r6, [r14, #8] strd r8, [r14, #24] strd r0, [r14, #40] strd r2, [r14, #56] - + subs r12, r12, #1 add r14, r14, #64 - + bne Radix8First_LOOP - + Radix8First_END: add sp, sp, #0x24 ldmia sp!, {r4 - r11, pc} - + DATATab: .word 0x5a82799a - + @ENDP @ |Radix8First| .end
\ No newline at end of file diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/Radix4FFT_v5.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/Radix4FFT_v5.s index bc069b4..e81c82e 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/Radix4FFT_v5.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/Radix4FFT_v5.s @@ -25,145 +25,145 @@ Radix4FFT: stmdb sp!, {r4 - r11, lr} - sub sp, sp, #32 + sub sp, sp, #32 mov r1, r1, asr #2 - cmp r1, #0 - beq Radix4FFT_END - -Radix4FFT_LOOP1: - mov r14, r0 @ xptr = buf@ + cmp r1, #0 + beq Radix4FFT_END + +Radix4FFT_LOOP1: + mov r14, r0 @ xptr = buf@ mov r10, r1 @ i = num@ mov r9, r2, lsl #3 @ step = 2*bgn@ - cmp r10, #0 - str r0, [sp] - str r1, [sp, #4] + cmp r10, #0 + str r0, [sp] + str r1, [sp, #4] str r2, [sp, #8] - str r3, [sp, #12] - beq Radix4FFT_LOOP1_END - -Radix4FFT_LOOP2: + str r3, [sp, #12] + beq Radix4FFT_LOOP1_END + +Radix4FFT_LOOP2: mov r12, r3 @ csptr = twidTab@ mov r11, r2 @ j = bgn - cmp r11, #0 + cmp r11, #0 str r10, [sp, #16] - beq Radix4FFT_LOOP2_END - -Radix4FFT_LOOP3: - str r11, [sp, #20] - + beq Radix4FFT_LOOP2_END + +Radix4FFT_LOOP3: + str r11, [sp, #20] + ldrd r0, [r14, #0] @ r0 = xptr[0]@ r1 = xptr[1]@ add r14, r14, r9 @ xptr += step@ - - ldrd r10, [r14, #0] @ r2 = xptr[0]@ r3 = xptr[1]@ + + ldrd r10, [r14, #0] @ r2 = xptr[0]@ r3 = xptr[1]@ ldr r8, [r12], #4 @ cosxsinx = csptr[0]@ - + smulwt r4, r10, r8 @ L_mpy_wx(cosx, t0) smulwt r3, r11, r8 @ L_mpy_wx(cosx, t1) - + smlawb r2, r11, r8, r4 @ r2 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@ smulwb r5, r10, r8 @ L_mpy_wx(sinx, t0) - + mov r10, r0, asr #2 @ t0 = r0 >> 2@ mov r11, r1, asr #2 @ t1 = r1 >> 2@ - + sub r3, r3, r5 @ r3 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@ add r14, r14, r9 @ xptr += step@ - + sub r0, r10, r2 @ r0 = t0 - r2@ sub r1, r11, r3 @ r1 = t1 - r3@ - + add r2, r10, r2 @ r2 = t0 + r2@ add r3, r11, r3 @ r3 = t1 + r3@ - + str r2, [sp, #24] str r3, [sp, #28] - + ldrd r10, [r14, #0] @ r4 = xptr[0]@ r5 = xptr[1]@ ldr r8, [r12], #4 @ cosxsinx = csptr[1]@ - + smulwt r6, r10, r8 @ L_mpy_wx(cosx, t0) smulwt r5, r11, r8 @ L_mpy_wx(cosx, t1) - + smlawb r4, r11, r8, r6 @ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@ smulwb r7, r10, r8 @ L_mpy_wx(sinx, t0) - + add r14, r14, r9 @ xptr += step@ sub r5, r5, r7 @ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@ - + ldrd r10, [r14] @ r6 = xptr[0]@ r7 = xptr[1]@ ldr r8, [r12], #4 @ cosxsinx = csptr[1]@ - + smulwt r2, r10, r8 @ L_mpy_wx(cosx, t0) smulwt r7, r11, r8 @ L_mpy_wx(cosx, t1) - + smlawb r6, r11, r8, r2 @ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@ smulwb r3, r10, r8 @ L_mpy_wx(sinx, t0) - + mov r10, r4 @ t0 = r4@ - mov r11, r5 @ t1 = r5@ - + mov r11, r5 @ t1 = r5@ + sub r7, r7, r3 @ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@ - - add r4, r10, r6 @ r4 = t0 + r6@ + + add r4, r10, r6 @ r4 = t0 + r6@ sub r5, r7, r11 @ r5 = r7 - t1@ - + sub r6, r10, r6 @ r6 = t0 - r6@ add r7, r7, r11 @ r7 = r7 + t1@ - + ldr r2, [sp, #24] ldr r3, [sp, #28] - + add r10, r0, r5 @ xptr[0] = r0 + r5@ add r11, r1, r6 @ xptr[0] = r1 + r6 - - strd r10, [r14] + + strd r10, [r14] sub r14, r14, r9 @ xptr -= step@ - + sub r10, r2, r4 @ xptr[0] = r2 - r4@ sub r11, r3, r7 @ xptr[1] = r3 - r7@ - - strd r10, [r14] + + strd r10, [r14] sub r14, r14, r9 @ xptr -= step@ - + sub r10, r0, r5 @ xptr[0] = r0 - r5@ sub r11, r1, r6 @ xptr[0] = r1 - r6 - - strd r10, [r14] + + strd r10, [r14] sub r14, r14, r9 @ xptr -= step@ - + add r10, r2, r4 @ xptr[0] = r2 - r4@ add r11, r3, r7 @ xptr[1] = r3 - r7@ - - strd r10, [r14] + + strd r10, [r14] add r14, r14, #8 @ xptr += 2@ - + ldr r11, [sp, #20] subs r11, r11, #1 - bne Radix4FFT_LOOP3 - -Radix4FFT_LOOP2_END: + bne Radix4FFT_LOOP3 + +Radix4FFT_LOOP2_END: ldr r10, [sp, #16] ldr r3, [sp, #12] ldr r2, [sp, #8] - rsb r8, r9, r9, lsl #2 + rsb r8, r9, r9, lsl #2 sub r10, r10, #1 - add r14, r14, r8 - cmp r10, #0 - bhi Radix4FFT_LOOP2 - -Radix4FFT_LOOP1_END: - ldr r0, [sp] + add r14, r14, r8 + cmp r10, #0 + bhi Radix4FFT_LOOP2 + +Radix4FFT_LOOP1_END: + ldr r0, [sp] ldr r1, [sp, #4] add r3, r3, r8, asr #1 - mov r2, r2, lsl #2 - movs r1, r1, asr #2 - bne Radix4FFT_LOOP1 - -Radix4FFT_END: - add sp, sp, #32 + mov r2, r2, lsl #2 + movs r1, r1, asr #2 + bne Radix4FFT_LOOP1 + +Radix4FFT_END: + add sp, sp, #32 ldmia sp!, {r4 - r11, pc} - + @ENDP @ |Radix4FFT| .end
\ No newline at end of file diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/band_nrg_v5.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/band_nrg_v5.s index 3b88810..4789f6d 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/band_nrg_v5.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV5E/band_nrg_v5.s @@ -26,31 +26,31 @@ .global CalcBandEnergy CalcBandEnergy: - stmdb sp!, {r4 - r11, lr} - - mov r2, r2, lsl #16 + stmdb sp!, {r4 - r11, lr} + + mov r2, r2, lsl #16 ldr r12, [r13, #36] mov r9, #0 - mov r5, r2, asr #16 - mov r4, #0 - cmp r5, #0 - ble L212 + mov r5, r2, asr #16 + mov r4, #0 + cmp r5, #0 + ble L212 L22: - mov r2, r4, lsl #1 - ldrsh r10, [r1, r2] - add r11, r1, r2 - ldrsh r2, [r11, #2] - mov r14, #0 - cmp r10, r2 - bge L28 - + mov r2, r4, lsl #1 + ldrsh r10, [r1, r2] + add r11, r1, r2 + ldrsh r2, [r11, #2] + mov r14, #0 + cmp r10, r2 + bge L28 + L23: - ldr r11, [r0, +r10, lsl #2] - add r10, r10, #1 - ldr r6, [r0, +r10, lsl #2] + ldr r11, [r0, +r10, lsl #2] + add r10, r10, #1 + ldr r6, [r0, +r10, lsl #2] smull r11, r7, r11, r11 - add r10, r10, #1 + add r10, r10, #1 smull r6, r8, r6, r6 ldr r11, [r0, +r10, lsl #2] qadd r14, r14, r7 @@ -59,71 +59,71 @@ L23: ldr r6, [r0, +r10, lsl #2] qadd r14, r14, r8 smull r6, r8, r6, r6 - add r10, r10, #1 + add r10, r10, #1 qadd r14, r14, r7 cmp r10, r2 qadd r14, r14, r8 - blt L23 + blt L23 -L28: +L28: qadd r14, r14, r14 str r14, [r3, +r4, lsl #2] - add r4, r4, #1 + add r4, r4, #1 qadd r9, r9, r14 - cmp r4, r5 + cmp r4, r5 - blt L22 + blt L22 -L212: - str r9, [r12, #0] +L212: + str r9, [r12, #0] ldmia sp!, {r4 - r11, pc} - + @ENDP ; |CalcBandEnergy| - + .global CalcBandEnergyMS CalcBandEnergyMS: stmdb sp!, {r4 - r11, lr} sub r13, r13, #24 - - mov r12, #0 - mov r3, r3, lsl #16 - mov r14, #0 - mov r3, r3, asr #16 - cmp r3, #0 - mov r4, #0 - ble L315 - -L32: + + mov r12, #0 + mov r3, r3, lsl #16 + mov r14, #0 + mov r3, r3, asr #16 + cmp r3, #0 + mov r4, #0 + ble L315 + +L32: mov r5, r4, lsl #1 mov r6, #0 ldrsh r10, [r2, r5] add r5, r2, r5 mov r7, #0 - ldrsh r11, [r5, #2] - cmp r10, r11 - bge L39 + ldrsh r11, [r5, #2] + cmp r10, r11 + bge L39 str r3, [r13, #4] str r4, [r13, #8] str r12, [r13, #12] str r14, [r13, #16] -L33: - ldr r8, [r0, +r10, lsl #2] +L33: + ldr r8, [r0, +r10, lsl #2] ldr r9, [r1, +r10, lsl #2] mov r8, r8, asr #1 add r10, r10, #1 mov r9, r9, asr #1 - ldr r12, [r0, +r10, lsl #2] - add r5, r8, r9 + ldr r12, [r0, +r10, lsl #2] + add r5, r8, r9 ldr r14, [r1, +r10, lsl #2] sub r8, r8, r9 - smull r5, r3, r5, r5 + smull r5, r3, r5, r5 mov r12, r12, asr #1 - smull r8, r4, r8, r8 + smull r8, r4, r8, r8 mov r14, r14, asr #1 qadd r6, r6, r3 @@ -131,27 +131,27 @@ L33: qadd r7, r7, r4 sub r8, r12, r14 - smull r5, r3, r5, r5 + smull r5, r3, r5, r5 add r10, r10, #1 - smull r8, r4, r8, r8 - + smull r8, r4, r8, r8 + qadd r6, r6, r3 qadd r7, r7, r4 - ldr r8, [r0, +r10, lsl #2] + ldr r8, [r0, +r10, lsl #2] ldr r9, [r1, +r10, lsl #2] mov r8, r8, asr #1 add r10, r10, #1 mov r9, r9, asr #1 - ldr r12, [r0, +r10, lsl #2] - add r5, r8, r9 + ldr r12, [r0, +r10, lsl #2] + add r5, r8, r9 ldr r14, [r1, +r10, lsl #2] sub r8, r8, r9 - smull r5, r3, r5, r5 + smull r5, r3, r5, r5 mov r12, r12, asr #1 - smull r8, r4, r8, r8 + smull r8, r4, r8, r8 mov r14, r14, asr #1 qadd r6, r6, r3 @@ -159,37 +159,37 @@ L33: qadd r7, r7, r4 sub r8, r12, r14 - smull r5, r3, r5, r5 + smull r5, r3, r5, r5 add r10, r10, #1 - smull r8, r4, r8, r8 - + smull r8, r4, r8, r8 + qadd r6, r6, r3 qadd r7, r7, r4 cmp r10, r11 - + blt L33 ldr r3, [r13, #4] - ldr r4, [r13, #8] + ldr r4, [r13, #8] ldr r12, [r13, #12] ldr r14, [r13, #16] -L39: +L39: qadd r6, r6, r6 - qadd r7, r7, r7 - + qadd r7, r7, r7 + ldr r8, [r13, #60] ldr r9, [r13, #68] qadd r12, r12, r6 qadd r14, r14, r7 - - str r6, [r8, +r4, lsl #2] - str r7, [r9, +r4, lsl #2] - + + str r6, [r8, +r4, lsl #2] + str r7, [r9, +r4, lsl #2] + add r4, r4, #1 cmp r4, r3 - blt L32 + blt L32 L315: ldr r8, [r13, #64] diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s index a04c105..64d767a 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/PrePostMDCT_v7.s @@ -26,53 +26,53 @@ PreMDCT: stmdb sp!, {r4 - r11, lr} - + add r9, r0, r1, lsl #2 sub r3, r9, #32 movs r1, r1, asr #2 - beq PreMDCT_END - + beq PreMDCT_END + PreMDCT_LOOP: VLD4.I32 {d0, d2, d4, d6}, [r2]! @ cosa = *csptr++@ sina = *csptr++@ VLD4.I32 {d1, d3, d5, d7}, [r2]! @ cosb = *csptr++@ sinb = *csptr++@ VLD2.I32 {d8, d9, d10, d11}, [r0] @ tr1 = *(buf0 + 0)@ ti2 = *(buf0 + 1)@ VLD2.I32 {d13, d15}, [r3]! @ tr2 = *(buf1 - 1)@ ti1 = *(buf1 + 0)@ VLD2.I32 {d12, d14}, [r3]! @ tr2 = *(buf1 - 1)@ ti1 = *(buf1 + 0)@ - - VREV64.32 Q8, Q7 + + VREV64.32 Q8, Q7 VREV64.32 Q9, Q6 - + VQDMULH.S32 Q10, Q0, Q4 @ MULHIGH(cosa, tr1) VQDMULH.S32 Q11, Q1, Q8 @ MULHIGH(sina, ti1) VQDMULH.S32 Q12, Q0, Q8 @ MULHIGH(cosa, ti1) VQDMULH.S32 Q13, Q1, Q4 @ MULHIGH(sina, tr1) - + VADD.S32 Q0, Q10, Q11 @ *buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@ VSUB.S32 Q1, Q12, Q13 @ *buf0++ = MULHIGH(cosa, ti1) - MULHIGH(sina, tr1)@ - + VST2.I32 {d0, d1, d2, d3}, [r0]! sub r3, r3, #32 - + VQDMULH.S32 Q10, Q2, Q9 @ MULHIGH(cosb, tr2) VQDMULH.S32 Q11, Q3, Q5 @ MULHIGH(sinb, ti2) VQDMULH.S32 Q12, Q2, Q5 @ MULHIGH(cosb, ti2) VQDMULH.S32 Q13, Q3, Q9 @ MULHIGH(sinb, tr2) - + VADD.S32 Q0, Q10, Q11 @ MULHIGH(cosa, tr2) + MULHIGH(sina, ti2)@ VSUB.S32 Q1, Q12, Q13 @ MULHIGH(cosa, ti2) - MULHIGH(sina, tr2)@ - + VREV64.32 Q3, Q1 VREV64.32 Q2, Q0 - - VST2.I32 {d5, d7}, [r3]! - VST2.I32 {d4, d6}, [r3]! - + + VST2.I32 {d5, d7}, [r3]! + VST2.I32 {d4, d6}, [r3]! + subs r1, r1, #4 - sub r3, r3, #64 + sub r3, r3, #64 bne PreMDCT_LOOP - + PreMDCT_END: ldmia sp!, {r4 - r11, pc} @ENDP @ |PreMDCT| @@ -82,50 +82,50 @@ PreMDCT_END: PostMDCT: stmdb sp!, {r4 - r11, lr} - + add r9, r0, r1, lsl #2 sub r3, r9, #32 movs r1, r1, asr #2 beq PostMDCT_END - + PostMDCT_LOOP: VLD4.I32 {d0, d2, d4, d6}, [r2]! @ cosa = *csptr++@ sina = *csptr++@ VLD4.I32 {d1, d3, d5, d7}, [r2]! @ cosb = *csptr++@ sinb = *csptr++@ VLD2.I32 {d8, d9, d10, d11}, [r0] @ tr1 = *(zbuf1 + 0)@ ti1 = *(zbuf1 + 1)@ VLD2.I32 {d13, d15}, [r3]! @ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@ - VLD2.I32 {d12, d14}, [r3]! @ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@ + VLD2.I32 {d12, d14}, [r3]! @ tr2 = *(zbuf2 - 1)@ ti2 = *(zbuf2 + 0)@ + + VREV64.32 Q8, Q6 + VREV64.32 Q9, Q7 - VREV64.32 Q8, Q6 - VREV64.32 Q9, Q7 - VQDMULH.S32 Q10, Q0, Q4 @ MULHIGH(cosa, tr1) VQDMULH.S32 Q11, Q1, Q5 @ MULHIGH(sina, ti1) VQDMULH.S32 Q12, Q0, Q5 @ MULHIGH(cosa, ti1) VQDMULH.S32 Q13, Q1, Q4 @ MULHIGH(sina, tr1) - + VADD.S32 Q0, Q10, Q11 @ *buf0++ = MULHIGH(cosa, tr1) + MULHIGH(sina, ti1)@ VSUB.S32 Q5, Q13, Q12 @ *buf1-- = MULHIGH(sina, tr1) - MULHIGH(cosa, ti1)@ - + VQDMULH.S32 Q10, Q2, Q8 @ MULHIGH(cosb, tr2) VQDMULH.S32 Q11, Q3, Q9 @ MULHIGH(sinb, ti2) VQDMULH.S32 Q12, Q2, Q9 @ MULHIGH(cosb, ti2) VQDMULH.S32 Q13, Q3, Q8 @ MULHIGH(sinb, tr2) - + VADD.S32 Q4, Q10, Q11 @ *buf1-- = MULHIGH(cosa, tr2) + MULHIGH(sina, ti2)@ - VSUB.S32 Q1, Q13, Q12 @ *buf0++ = MULHIGH(sina, tr2) - MULHIGH(cosa, ti2)@ - + VSUB.S32 Q1, Q13, Q12 @ *buf0++ = MULHIGH(sina, tr2) - MULHIGH(cosa, ti2)@ + VREV64.32 Q2, Q4 - VREV64.32 Q3, Q5 - - sub r3, r3, #32 + VREV64.32 Q3, Q5 + + sub r3, r3, #32 VST2.I32 {d0, d1, d2, d3}, [r0]! - - VST2.I32 {d5, d7}, [r3]! - VST2.I32 {d4, d6}, [r3]! - + + VST2.I32 {d5, d7}, [r3]! + VST2.I32 {d4, d6}, [r3]! + subs r1, r1, #4 - sub r3, r3, #64 + sub r3, r3, #64 bne PostMDCT_LOOP PostMDCT_END: diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s index defd45d..7fc5520 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/R4R8First_v7.s @@ -29,86 +29,86 @@ Radix8First: ldr r3, SQRT1_2 cmp r1, #0 - - VDUP.I32 Q15, r3 + + VDUP.I32 Q15, r3 beq Radix8First_END - + Radix8First_LOOP: VLD1.I32 {d0, d1, d2, d3}, [r0]! VLD1.I32 {d8, d9, d10, d11}, [r0]! - + VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@ - VSUB.S32 d5, d0, d1 @ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@ - VSUB.S32 d7, d2, d3 @ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@ + VSUB.S32 d5, d0, d1 @ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@ + VSUB.S32 d7, d2, d3 @ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@ VADD.S32 d6, d2, d3 @ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@ - VREV64.I32 d7, d7 - + VREV64.I32 d7, d7 + VADD.S32 Q0, Q2, Q3 @ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3) VSUB.S32 Q1, Q2, Q3 @ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@ - VREV64.I32 d3, d3 + VREV64.I32 d3, d3 VADD.S32 d4, d8, d9 @ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@ - VSUB.S32 d7, d10, d11 @ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@ + VSUB.S32 d7, d10, d11 @ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@ VADD.S32 d6, d10, d11 @ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@ - VREV64.I32 d7, d7 + VREV64.I32 d7, d7 VSUB.S32 d5, d8, d9 @ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@ - - VTRN.32 d1, d3 - + + VTRN.32 d1, d3 + VADD.S32 Q4, Q2, Q3 @ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@ VSUB.S32 Q5, Q2, Q3 @ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@ - + VREV64.I32 d3, d3 - - VSHR.S32 d8, d8, #1 + + VSHR.S32 d8, d8, #1 VSHR.S32 Q0, Q0, #1 VREV64.I32 d10, d10 VTRN.32 d11, d9 VSHR.S32 Q1, Q1, #1 VSHR.S32 d10, d10, #1 VREV64.I32 d9, d9 - + sub r0, r0, #0x40 - + VADD.S32 d12, d0, d8 - VSUB.S32 d16, d0, d8 + VSUB.S32 d16, d0, d8 VADD.S32 d14, d2, d10 VSUB.S32 d18, d2, d10 - + VSUB.S32 d4, d11, d9 VADD.S32 d5, d11, d9 - + VREV64.I32 d18, d18 - + VQDMULH.S32 Q3, Q2, Q15 VTRN.32 d14, d18 VTRN.32 d6, d7 - VREV64.I32 d18, d18 - + VREV64.I32 d18, d18 + VSUB.S32 d15, d3, d6 VREV64.I32 d7, d7 VADD.S32 d19, d3, d6 VADD.S32 d13, d1, d7 VSUB.S32 d17, d1, d7 - + VREV64.I32 d17, d17 VTRN.32 d13, d17 VREV64.I32 d17, d17 - - subs r1, r1, #1 - + + subs r1, r1, #1 + VST1.I32 {d12, d13, d14, d15}, [r0]! - VST1.I32 {d16, d17, d18, d19}, [r0]! + VST1.I32 {d16, d17, d18, d19}, [r0]! bne Radix8First_LOOP - + Radix8First_END: - ldmia sp!, {r4 - r11, pc} + ldmia sp!, {r4 - r11, pc} SQRT1_2: .word 0x2d413ccd - + @ENDP @ |Radix8First| - + .section .text .global Radix4First @@ -117,28 +117,28 @@ Radix4First: cmp r1, #0 beq Radix4First_END - + Radix4First_LOOP: - VLD1.I32 {d0, d1, d2, d3}, [r0] - - VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@ + VLD1.I32 {d0, d1, d2, d3}, [r0] + + VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@ VSUB.S32 d5, d0, d1 @ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@ VSUB.S32 d7, d2, d3 @ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@ VADD.S32 d6, d2, d3 @ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@ - - VREV64.I32 d7, d7 @ - + + VREV64.I32 d7, d7 @ + VADD.S32 Q4, Q2, Q3 VSUB.S32 Q5, Q2, Q3 - + VREV64.I32 d11, d11 VTRN.32 d9, d11 - subs r1, r1, #1 + subs r1, r1, #1 VREV64.I32 d11, d11 VST1.I32 {d8, d9, d10, d11}, [r0]! bne Radix4First_LOOP - + Radix4First_END: ldmia sp!, {r4 - r11, pc} diff --git a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s index 84a4a80..b8655ae 100644 --- a/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s +++ b/media/libstagefright/codecs/aacenc/src/asm/ARMV7/Radix4FFT_v7.s @@ -28,116 +28,116 @@ Radix4FFT: stmdb sp!, {r4 - r11, lr} mov r1, r1, asr #2 - cmp r1, #0 - beq Radix4FFT_END - -Radix4FFT_LOOP1: - mov r5, r2, lsl #1 - mov r8, r0 - mov r7, r1 - mov r5, r5, lsl #2 - cmp r1, #0 - rsbeq r12, r5, r5, lsl #2 - beq Radix4FFT_LOOP1_END - - rsb r12, r5, r5, lsl #2 - -Radix4FFT_LOOP2: - mov r6, r3 - mov r4, r2 - cmp r2, #0 - beq Radix4FFT_LOOP2_END - -Radix4FFT_LOOP3: + cmp r1, #0 + beq Radix4FFT_END + +Radix4FFT_LOOP1: + mov r5, r2, lsl #1 + mov r8, r0 + mov r7, r1 + mov r5, r5, lsl #2 + cmp r1, #0 + rsbeq r12, r5, r5, lsl #2 + beq Radix4FFT_LOOP1_END + + rsb r12, r5, r5, lsl #2 + +Radix4FFT_LOOP2: + mov r6, r3 + mov r4, r2 + cmp r2, #0 + beq Radix4FFT_LOOP2_END + +Radix4FFT_LOOP3: @r0 = xptr[0]@ @r1 = xptr[1]@ - VLD2.I32 {D0, D1, D2, D3}, [r8] + VLD2.I32 {D0, D1, D2, D3}, [r8] VLD2.I32 {D28, D29, D30, D31}, [r6]! @ cosx = csptr[0]@ sinx = csptr[1]@ - - add r8, r8, r5 @ xptr += step@ + + add r8, r8, r5 @ xptr += step@ VLD2.I32 {D4, D5, D6,D7}, [r8] @ r2 = xptr[0]@ r3 = xptr[1]@ - + VQDMULH.S32 Q10, Q2, Q14 @ MULHIGH(cosx, t0) VQDMULH.S32 Q11, Q3, Q15 @ MULHIGH(sinx, t1) VQDMULH.S32 Q12, Q3, Q14 @ MULHIGH(cosx, t1) VQDMULH.S32 Q13, Q2, Q15 @ MULHIGH(sinx, t0) - + VADD.S32 Q2, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1) VSUB.S32 Q3, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) - + add r8, r8, r5 @ xptr += step@ VSHR.S32 Q10, Q0, #2 @ t0 = r0 >> 2@ VSHR.S32 Q11, Q1, #2 @ t1 = r1 >> 2@ - + VSUB.S32 Q0, Q10, Q2 @ r0 = t0 - r2@ VSUB.S32 Q1, Q11, Q3 @ r1 = t1 - r3@ VADD.S32 Q2, Q10, Q2 @ r2 = t0 + r2@ VADD.S32 Q3, Q11, Q3 @ r3 = t1 + r3@ - - VLD2.I32 {D8, D9, D10, D11}, [r8] - VLD2.I32 {D28, D29, D30, D31}, [r6]! + + VLD2.I32 {D8, D9, D10, D11}, [r8] + VLD2.I32 {D28, D29, D30, D31}, [r6]! add r8, r8, r5 VQDMULH.S32 Q10, Q4, Q14 @ MULHIGH(cosx, t0) VQDMULH.S32 Q11, Q5, Q15 @ MULHIGH(sinx, t1) VQDMULH.S32 Q12, Q5, Q14 @ MULHIGH(cosx, t1) VQDMULH.S32 Q13, Q4, Q15 @ MULHIGH(sinx, t0) - + VADD.S32 Q8, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1) - VSUB.S32 Q9, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) - - VLD2.I32 {D12, D13, D14, D15}, [r8] + VSUB.S32 Q9, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) + + VLD2.I32 {D12, D13, D14, D15}, [r8] VLD2.I32 {D28, D29, D30, D31}, [r6]! - + VQDMULH.S32 Q10, Q6, Q14 @ MULHIGH(cosx, t0) VQDMULH.S32 Q11, Q7, Q15 @ MULHIGH(sinx, t1) VQDMULH.S32 Q12, Q7, Q14 @ MULHIGH(cosx, t1) VQDMULH.S32 Q13, Q6, Q15 @ MULHIGH(sinx, t0) - + VADD.S32 Q6, Q10, Q11 @ MULHIGH(cosx, t0) + MULHIGH(sinx, t1) - VSUB.S32 Q7, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) - + VSUB.S32 Q7, Q12, Q13 @ MULHIGH(cosx, t1) - MULHIGH(sinx, t0) + VADD.S32 Q4, Q8, Q6 @ r4 = t0 + r6@ VSUB.S32 Q5, Q7, Q9 @ r5 = r7 - t1@ VSUB.S32 Q6, Q8, Q6 @ r6 = t0 - r6@ VADD.S32 Q7, Q7, Q9 @ r7 = r7 + t1@ - + VADD.S32 Q8, Q0, Q5 @ xptr[0] = r0 + r5@ VADD.S32 Q9, Q1, Q6 @ xptr[1] = r1 + r6@ VST2.I32 {D16, D17, D18, D19}, [r8] - + VSUB.S32 Q10, Q2, Q4 @ xptr[0] = r2 - r4@ sub r8, r8, r5 @ xptr -= step@ VSUB.S32 Q11, Q3, Q7 @ xptr[1] = r3 - r7@ VST2.I32 {D20, D21, D22, D23}, [r8] - + VSUB.S32 Q8, Q0, Q5 @ xptr[0] = r0 - r5@ sub r8, r8, r5 @ xptr -= step@ VSUB.S32 Q9, Q1, Q6 @ xptr[1] = r1 - r6@ VST2.I32 {D16, D17, D18, D19}, [r8] - + VADD.S32 Q10, Q2, Q4 @ xptr[0] = r2 + r4@ sub r8, r8, r5 @ xptr -= step@ VADD.S32 Q11, Q3, Q7 @ xptr[1] = r3 + r7@ VST2.I32 {D20, D21, D22, D23}, [r8]! - - subs r4, r4, #4 - bne Radix4FFT_LOOP3 - -Radix4FFT_LOOP2_END: - add r8, r8, r12 - sub r7, r7, #1 + + subs r4, r4, #4 + bne Radix4FFT_LOOP3 + +Radix4FFT_LOOP2_END: + add r8, r8, r12 + sub r7, r7, #1 cmp r7, #0 - bhi Radix4FFT_LOOP2 - -Radix4FFT_LOOP1_END: - add r3, r12, r3 - mov r2, r2, lsl #2 - movs r1, r1, asr #2 - bne Radix4FFT_LOOP1 - -Radix4FFT_END: + bhi Radix4FFT_LOOP2 + +Radix4FFT_LOOP1_END: + add r3, r12, r3 + mov r2, r2, lsl #2 + movs r1, r1, asr #2 + bne Radix4FFT_LOOP1 + +Radix4FFT_END: ldmia sp!, {r4 - r11, pc} - + @ENDP @ |Radix4FFT| .end
\ No newline at end of file |