1 files changed, 428 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/aacdec/fft_rx4_long.cpp b/media/libstagefright/codecs/aacdec/fft_rx4_long.cpp
new file mode 100644
index 0000000..c517e7e
--- /dev/null
+++ b/media/libstagefright/codecs/aacdec/fft_rx4_long.cpp
@@ -0,0 +1,428 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+/*
+
+ Pathname: ./src/fft_rx4_long.c
+ Funtions: fft_rx4_long
+
+------------------------------------------------------------------------------
+ REVISION HISTORY
+
+ Description:
+            (1) Eliminated search for max in the main loop.
+            (2) Reduced precision on w_256rx4 from Q15 to Q10
+
+ Description:
+            (1) Created function fft_rx4_long_no_max to overcome LTP problem.
+
+ Description:
+            (1) Modified shift so the accumulation growths faster than the
+                downshift, so now the input can be as high as 1.0 and saturation
+                will not occurre. The accumulation times the Q10 format will
+                never exceed 31 bits. This increases precision
+            (2) Eliminated unneeded data moves, used before for max search.
+            (3) Eliminated function fft_rx4_long_no_max.
+
+ Description:
+            (1) Added comment to explain max search elimination and
+                Q format during multiplications
+
+ Who:                       Date:
+ Description:
+
+------------------------------------------------------------------------------
+ INPUT AND OUTPUT DEFINITIONS
+
+ Inputs:
+    Data       =  Input complex vector, arranged in the following order:
+                  real, imag, real, imag...
+                  This is a complex vector whose elements (real and Imag) are
+                  Int32.
+                  type Int32 *
+
+    peak_value =  Input,  peak value of the input vector
+                  Output,  peak value of the resulting vector
+                  type Int32 *
+
+ Local Stores/Buffers/Pointers Needed:
+    None
+
+ Global Stores/Buffers/Pointers Needed:
+    None
+
+ Outputs:
+    None
+
+ Pointers and Buffers Modified:
+    calculation are done in-place and returned in Data
+
+ Local Stores Modified:
+    None
+
+ Global Stores Modified:
+    None
+
+------------------------------------------------------------------------------
+ FUNCTION DESCRIPTION
+
+    Fast Fourier Transform, radix 4 with Decimation in Frequency and block
+    floating point arithmetic.
+    The radix-4 FFT  simply divides the FFT into four smaller FFTs. Each of
+    the smaller FFTs is then further divided into smaller ones and so on.
+    It consists of log 4 N stages and each stage consists of N/4 dragonflies.
+
+    An FFT is nothing but a bundle of multiplications and summations which
+    may overflow during calculations.
+
+
+    This routine uses a scheme to test and scale the result output from
+    each FFT stage in order to fix the accumulation overflow.
+
+    The Input Data should be in Q13 format to get the highest precision.
+    At the end of each dragonfly calculation, a test for possible bit growth
+    is made, if bit growth is possible the Data is scale down back to Q13.
+
+------------------------------------------------------------------------------
+ REQUIREMENTS
+
+    This function should provide a fixed point FFT for an input array
+    of size 256.
+
+------------------------------------------------------------------------------
+ REFERENCES
+
+    [1] Advance Digital Signal Processing, J. Proakis, C. Rader, F. Ling,
+        C. Nikias, Macmillan Pub. Co.
+
+------------------------------------------------------------------------------
+ PSEUDO-CODE
+
+
+   MODIFY( x[] )
+   RETURN( exponent )
+
+------------------------------------------------------------------------------
+ RESOURCES USED
+   When the code is written for a specific target processor the
+     the resources used should be documented below.
+
+ STACK USAGE: [stack count for this module] + [variable to represent
+          stack usage for each subroutine called]
+
+     where: [stack usage variable] = stack usage for [subroutine
+         name] (see [filename].ext)
+
+ DATA MEMORY USED: x words
+
+ PROGRAM MEMORY USED: x words
+
+ CLOCK CYCLES: [cycle count equation for this module] + [variable
+           used to represent cycle count for each subroutine
+           called]
+
+     where: [cycle count variable] = cycle count for [subroutine
+        name] (see [filename].ext)
+
+------------------------------------------------------------------------------
+*/
+/*----------------------------------------------------------------------------
+; INCLUDES
+----------------------------------------------------------------------------*/
+
+#include "pv_audio_type_defs.h"
+#include "fft_rx4.h"
+
+#include "fxp_mul32.h"
+
+/*----------------------------------------------------------------------------
+; MACROS
+; Define module specific macros here
+----------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------------
+; DEFINES
+; Include all pre-processor statements here. Include conditional
+; compile variables also.
+----------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------------
+; LOCAL FUNCTION DEFINITIONS
+; Function Prototype declaration
+----------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------------
+; LOCAL VARIABLE DEFINITIONS
+; Variable declaration - defined here and used outside this module
+----------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------------
+; EXTERNAL FUNCTION REFERENCES
+; Declare functions defined elsewhere and referenced in this module
+----------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------------
+; EXTERNAL VARIABLES REFERENCES
+; Declare variables used in this module but defined elsewhere
+----------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------------
+; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES
+; Declare variables used in this module but defined elsewhere
+----------------------------------------------------------------------------*/
+
+/*----------------------------------------------------------------------------
+; FUNCTION CODE
+----------------------------------------------------------------------------*/
+
+
+void fft_rx4_long(
+    Int32      Data[],
+    Int32      *peak_value)
+
+{
+    Int     n1;
+    Int     n2;
+    Int     j;
+    Int     k;
+    Int     i;
+
+    Int32   t1;
+    Int32   t2;
+    Int32   r1;
+    Int32   r2;
+    Int32   r3;
+    Int32   r4;
+    Int32   s1;
+    Int32   s2;
+    Int32   s3;
+    Int32   *pData1;
+    Int32   *pData2;
+    Int32   *pData3;
+    Int32   *pData4;
+    Int32   temp1;
+    Int32   temp2;
+    Int32   temp3;
+    Int32   temp4;
+    Int32   max;
+
+    Int32   exp_jw1;
+    Int32   exp_jw2;
+    Int32   exp_jw3;
+
+
+
+    const Int32  *pw = W_256rx4;
+
+    n2 = FFT_RX4_LONG;
+
+    for (k = FFT_RX4_LONG; k > 4; k >>= 2)
+    {
+
+        n1 = n2;
+        n2 >>= 2;
+
+        for (i = 0; i < FFT_RX4_LONG; i += n1)
+        {
+            pData1 = &Data[ i<<1];
+            pData2 = pData1 + n1;
+
+            temp1   = *pData1;
+            temp2   = *pData2;
+
+            r1      = temp1 + temp2;
+            r2      = temp1 - temp2;
+
+            pData3 = pData1 + (n1 >> 1);
+            pData4 = pData3 + n1;
+            temp3   = *pData3++;
+            temp4   = *pData4++;
+
+            t1      = temp3 + temp4;
+
+            *(pData1++) = (r1 + t1);
+            t2      = temp3 - temp4;
+            *(pData2++) = (r1 - t1);
+
+            temp1   = *pData1;
+            temp2   = *pData2;
+
+            s1      = temp1 + temp2;
+            temp3   = *pData3;
+            s2      = temp1 - temp2;
+            temp4   = *pData4;
+            *pData3--  = (s2 - t2);
+            *pData4--  = (s2 + t2);
+
+            t1      = temp3 + temp4;
+
+            *pData1    = (s1 + t1);
+            *pData2    = (s1 - t1);
+
+            r1      = temp3 - temp4;
+
+            *pData4    = (r2 - r1);
+            *pData3    = (r2 + r1);
+
+        }  /* i */
+
+
+
+        for (j = 1; j < n2; j++)
+        {
+
+            exp_jw1 = (*pw++);
+            exp_jw2 = (*pw++);
+            exp_jw3 = (*pw++);
+
+
+            for (i = j; i < FFT_RX4_LONG; i += n1)
+            {
+                pData1 = &Data[ i<<1];
+                pData2 = pData1 + n1;
+
+                temp1   = *pData1;
+                temp2   = *pData2++;
+
+                r1      = temp1 + temp2;
+                r2      = temp1 - temp2;
+
+                pData3 = pData1 + (n1 >> 1);
+                pData4 = pData3 + n1;
+                temp3   = *pData3++;
+                temp4   = *pData4++;
+
+                r3      = temp3 + temp4;
+                r4      = temp3 - temp4;
+
+                *(pData1++) = (r1 + r3);
+                r1          = (r1 - r3) << 1;
+
+                temp2   = *pData2;
+                temp1   = *pData1;
+
+                s1      = temp1 + temp2;
+                s2      = temp1 - temp2;
+                s3      = (s2 + r4) << 1;
+                s2      = (s2 - r4) << 1;
+
+                temp3   = *pData3;
+                temp4   = *pData4;
+
+                t1      = temp3 + temp4;
+                t2      = temp3 - temp4;
+
+                *pData1  = (s1 + t1);
+                s1       = (s1 - t1) << 1;
+
+                *pData2--  = cmplx_mul32_by_16(s1, -r1, exp_jw2);
+                r3      = (r2 - t2) << 1;
+                *pData2    = cmplx_mul32_by_16(r1,  s1, exp_jw2);
+
+                r2      = (r2 + t2) << 1;
+
+                *pData3--  = cmplx_mul32_by_16(s2, -r2, exp_jw1);
+                *pData3    = cmplx_mul32_by_16(r2,  s2, exp_jw1);
+
+                *pData4--  = cmplx_mul32_by_16(s3, -r3, exp_jw3);
+                *pData4    = cmplx_mul32_by_16(r3,  s3, exp_jw3);
+
+            }  /* i */
+
+        }  /*  j */
+
+    } /* k */
+
+
+    max = 0;
+
+    pData1 = Data - 7;
+
+
+    for (i = ONE_FOURTH_FFT_RX4_LONG; i != 0 ; i--)
+    {
+        pData1 += 7;
+        pData2 = pData1 + 4;
+
+
+        temp1   = *pData1;
+        temp2   = *pData2++;
+
+        r1      = temp1 + temp2;
+        r2      = temp1 - temp2;
+
+        pData3 = pData1 + 2;
+        pData4 = pData1 + 6;
+        temp1   = *pData3++;
+        temp2   = *pData4++;
+
+        t1      = temp1 + temp2;
+        t2      = temp1 - temp2;
+
+        temp1       = (r1 + t1);
+        r1          = (r1 - t1);
+        *(pData1++) = temp1;
+        max        |= (temp1 >> 31) ^ temp1;
+
+
+
+        temp2   = *pData2;
+        temp1   = *pData1;
+
+        s1      = temp1 + temp2;
+        s2      = temp1 - temp2;
+
+
+        temp1   = *pData3;
+        temp2   = *pData4;
+
+        s3      = (s2 + t2);
+        s2      = (s2 - t2);
+
+        t1      = temp1 + temp2;
+        t2      = temp1 - temp2;
+
+        temp1      = (s1 + t1);
+        *pData1    = temp1;
+        temp2      = (s1 - t1);
+
+        max       |= (temp1 >> 31) ^ temp1;
+        *pData2--  = temp2;
+        max       |= (temp2 >> 31) ^ temp2;
+
+        *pData2    = r1;
+        max       |= (r1 >> 31) ^ r1;
+        *pData3--  = s2;
+        max       |= (s2 >> 31) ^ s2;
+        *pData4--  = s3;
+        max       |= (s3 >> 31) ^ s3;
+
+        temp1      = (r2 - t2);
+        *pData4    = temp1;
+        temp2      = (r2 + t2);
+        *pData3    = temp2;
+        max       |= (temp1 >> 31) ^ temp1;
+        max       |= (temp2 >> 31) ^ temp2;
+
+    }  /* i */
+
+    *peak_value = max;
+
+    return ;
+
+}
+