1 files changed, 407 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
new file mode 100644
index 0000000..241d188
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
@@ -0,0 +1,407 @@
+;//
+;// 
+;// File Name:  armVCM4P10_TransformResidual4x4_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;// Transform Residual 4x4 Coefficients
+;// 
+;// 
+
+        
+;// Include standard headers
+
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+        M_VARIANTS ARM1136JS
+        
+;// Import symbols required from other files
+;// (For example tables)
+    
+        
+        
+        
+;// Set debugging level        
+;//DEBUG_ON    SETL {TRUE}
+
+
+
+;// Guarding implementation by the processor name
+    
+    IF  ARM1136JS 
+    
+;//Input Registers
+pDst                RN  0
+pSrc                RN  1
+
+;//Output Registers
+
+
+;//Local Scratch Registers
+
+;// Packed Input pixels
+in00                RN  2                   ;// Src[0] & Src[1] 
+in02                RN  3                   ;// Src[2] & Src[3]
+in10                RN  4                   ;// Src[4] & Src[5]
+in12                RN  5                   ;// Src[6] & Src[7]
+in20                RN  6                   ;// Src[8] & Src[9]
+in22                RN  7                   ;// Src[10] & Src[11]
+in30                RN  8                   ;// Src[12] & Src[13]
+in32                RN  9                   ;// Src[14] & Src[15]
+
+;// Transpose for Row operations (Rows to cols)
+trRow00             RN  2
+trRow10             RN  10
+trRow02             RN  3
+trRow12             RN  5
+trRow20             RN  11
+trRow30             RN  12
+trRow32             RN  14
+trRow22             RN  7
+
+;// Intermediate calculations
+e0                  RN  4                   
+e1                  RN  6
+e2                  RN  8
+e3                  RN  9
+constZero           RN  1
+
+;// Row operated pixels
+rowOp00             RN  2
+rowOp10             RN  10
+rowOp20             RN  11
+rowOp30             RN  12
+rowOp02             RN  3
+rowOp12             RN  5
+rowOp22             RN  7
+rowOp32             RN  14
+
+;// Transpose for colulmn operations
+trCol00             RN  2                   
+trCol02             RN  3                   
+trCol10             RN  4                   
+trCol12             RN  5                   
+trCol20             RN  6                   
+trCol22             RN  7                   
+trCol30             RN  8                   
+trCol32             RN  9  
+
+;// Intermediate calculations
+g0                  RN  10
+g1                  RN  11
+g2                  RN  12
+g3                  RN  14   
+
+;// Coloumn operated pixels
+colOp00             RN  2                   
+colOp02             RN  3                   
+colOp10             RN  4                   
+colOp12             RN  5                   
+colOp20             RN  6                   
+colOp22             RN  7                   
+colOp30             RN  8                   
+colOp32             RN  9  
+
+
+temp1               RN  10                  ;// Temporary scratch varaibles
+const1              RN  11      
+const2              RN  12
+mask                RN  14
+
+;// Output pixels
+out00               RN  2                   
+out02               RN  3                   
+out10               RN  4                   
+out12               RN  5                   
+out20               RN  6                   
+out22               RN  7                   
+out30               RN  8                   
+out32               RN  9  
+      
+       
+       
+    ;// Allocate stack memory required by the function
+        
+
+    ;// Write function header
+        M_START armVCM4P10_TransformResidual4x4,r11
+        
+        ;******************************************************************
+        ;// The strategy used in implementing the transform is as follows:*
+        ;// Load the 4x4 block into 8 registers                           *  
+        ;// Transpose the 4x4 matrix                                      *  
+        ;// Perform the row operations (on columns) using SIMD            *  
+        ;// Transpose the 4x4 result matrix                               *  
+        ;// Perform the coloumn operations                                *
+        ;// Store the 4x4 block at one go                                 *  
+        ;******************************************************************
+
+        ;// Load all the 4x4 pixels
+        
+        LDMIA   pSrc,{in00,in02,in10,in12,in20,in22,in30,in32}
+        
+        MOV       constZero,#0                                     ;// Used to right shift by 1 
+        ;LDR       constZero,=0x00000000  
+        
+        ;*****************************************************************
+        ;//
+        ;// Transpose the matrix inorder to perform row ops as coloumn ops
+        ;// Input:   in[][] = original matrix
+        ;// Output:  trRow[][]= transposed matrix
+        ;// Step1: Obtain the LL part of the transposed matrix
+        ;// Step2: Obtain the HL part
+        ;// step3: Obtain the LH part
+        ;// Step4: Obtain the HH part
+        ;//
+        ;*****************************************************************
+        
+        ;// LL 2x2 transposed matrix 
+        ;//   d0 d1 - -
+        ;//   d4 d5 - -
+        ;//   -  -  - -
+        ;//   -  -  - -
+        
+        PKHTB   trRow10,in10,in00,ASR #16               ;// [5 4] = [f5:f1]    
+        PKHBT   trRow00,in00,in10,LSL #16               ;// [1 0] = [f4:f0]  
+        
+        ;// HL 2x2 transposed matrix  
+        ;//    -   -   - -
+        ;//    -   -   - -
+        ;//    d8  d9  - -
+        ;//   d12 d13  - -
+        
+         
+         PKHTB   trRow30,in12,in02,ASR #16              ;// [13 12] = [7 3]
+         PKHBT   trRow20,in02,in12,LSL #16              ;// [9 8] = [6 2] 
+        
+        ;// LH 2x2 transposed matrix 
+        ;//   - - d2 d3 
+        ;//   - - d6 d7 
+        ;//   - - -  -
+        ;//   - - -  -
+        
+        PKHBT   trRow02,in20,in30,LSL #16               ;// [3 2] = [f12:f8]  
+        PKHTB   trRow12,in30,in20,ASR #16               ;// [7 6] = [f13:f9] 
+        
+        
+        
+         
+        ;// HH 2x2 transposed matrix  
+        ;//    - -   -   -
+        ;//    - -   -   -
+        ;//    - -  d10 d11
+        ;//    - -  d14 d15
+        
+        PKHTB   trRow32,in32,in22,ASR #16               ;// [15 14] = [15 11]
+        PKHBT   trRow22,in22,in32,LSL #16               ;// [11 10] = [14 10]
+       
+        
+        ;**************************************** 
+        ;// Row Operations (Performed on columns)
+        ;**************************************** 
+        
+        
+        ;// SIMD operations on first two columns(two rows of the original matrix)
+        
+        
+        SADD16      e0, trRow00,trRow20                   ;//  e0 = d0 + d2 
+        SSUB16    e1, trRow00,trRow20                   ;//  e1 = d0 - d2  
+        SHADD16   e2, trRow10,constZero                 ;// (f1>>1) constZero is a register holding 0
+        SHADD16   e3, trRow30,constZero                 ;//  avoid pipeline stalls for e2 and e3
+        SSUB16    e2, e2, trRow30                       ;//  e2 = (d1>>1) - d3  
+        SADD16    e3, e3, trRow10                       ;//  e3 = d1 + (d3>>1)  
+        SADD16    rowOp00, e0, e3                       ;//  f0 = e0 + e3  
+        SADD16    rowOp10, e1, e2                       ;//  f1 = e1 + e2  
+        SSUB16    rowOp20, e1, e2                       ;//  f2 = e1 - e2  
+        SSUB16    rowOp30, e0, e3                       ;//  f3 = e0 - e3
+        
+        ;// SIMD operations on next two columns(next two rows of the original matrix)
+        
+        SADD16      e0, trRow02,trRow22
+        SSUB16    e1, trRow02,trRow22
+        SHADD16   e2, trRow12,constZero                 ;//(f1>>1) constZero is a register holding 0
+        SHADD16   e3, trRow32,constZero
+        SSUB16    e2, e2, trRow32
+        SADD16    e3, e3, trRow12
+        SADD16    rowOp02, e0, e3
+        SADD16    rowOp12, e1, e2
+        SSUB16    rowOp22, e1, e2
+        SSUB16    rowOp32, e0, e3
+        
+        
+        ;*****************************************************************
+        ;// Transpose the resultant matrix
+        ;// Input:  rowOp[][]
+        ;// Output: trCol[][] 
+        ;*****************************************************************
+        
+        ;// LL 2x2 transposed matrix 
+        ;//   d0 d1 - -
+        ;//   d4 d5 - -
+        ;//   -  -  - -
+        ;//   -  -  - -
+        
+        PKHTB   trCol10,rowOp10,rowOp00,ASR #16           ;// [5 4] = [f5:f1]
+        PKHBT   trCol00,rowOp00,rowOp10,LSL #16           ;// [1 0] = [f4:f0]  
+        
+        ;// HL 2x2 transposed matrix  
+        ;//    -   -   - -
+        ;//    -   -   - -
+        ;//    d8  d9  - -
+        ;//   d12 d13  - -
+        
+         
+         PKHTB   trCol30,rowOp12,rowOp02,ASR #16          ;// [13 12] = [7 3]
+         PKHBT   trCol20,rowOp02,rowOp12,LSL #16          ;// [9 8] = [6 2] 
+        
+        ;// LH 2x2 transposed matrix 
+        ;//   - - d2 d3 
+        ;//   - - d6 d7 
+        ;//   - - -  -
+        ;//   - - -  -
+        
+        PKHBT   trCol02,rowOp20,rowOp30,LSL #16           ;// [3 2] = [f12:f8]  
+        PKHTB   trCol12,rowOp30,rowOp20,ASR #16           ;// [7 6] = [f13:f9] 
+        
+        
+        
+         
+        ;// HH 2x2 transposed matrix  
+        ;//    - -   -   -
+        ;//    - -   -   -
+        ;//    - -  d10 d11
+        ;//    - -  d14 d15
+        
+        PKHTB   trCol32,rowOp32,rowOp22,ASR #16            ;// [15 14] = [15 11]
+        PKHBT   trCol22,rowOp22,rowOp32,LSL #16            ;// [11 10] = [14 10]
+       
+        
+        ;******************************* 
+        ;// Coloumn Operations 
+        ;******************************* 
+        
+        
+        ;// SIMD operations on first two columns
+        
+          
+        SADD16      g0, trCol00,trCol20
+        SSUB16    g1, trCol00,trCol20
+        SHADD16   g2, trCol10,constZero                     ;// (f1>>1) constZero is a register holding 0
+        SHADD16   g3, trCol30,constZero
+        SSUB16    g2, g2, trCol30
+        SADD16    g3, g3, trCol10
+        SADD16    colOp00, g0, g3
+        SADD16    colOp10, g1, g2
+        SSUB16    colOp20, g1, g2
+        SSUB16    colOp30, g0, g3
+        
+        ;// SIMD operations on next two columns
+        
+        SADD16      g0, trCol02,trCol22
+        SSUB16    g1, trCol02,trCol22
+        SHADD16   g2, trCol12,constZero                     ;// (f1>>1) constZero is a register holding 0
+        SHADD16   g3, trCol32,constZero
+        SSUB16    g2, g2, trCol32
+        SADD16    g3, g3, trCol12
+        SADD16    colOp02, g0, g3
+        SADD16    colOp12, g1, g2
+        SSUB16    colOp22, g1, g2
+        SSUB16    colOp32, g0, g3
+        
+        
+             
+                  
+             
+        ;************************************************
+        ;// Calculate final value (colOp[i][j] + 32)>>6
+        ;************************************************
+        
+        ;// const1: Serves dual purpose 
+        ;// (1) Add #32 to both the lower and higher 16bits of the SIMD result 
+        ;// (2) Convert the lower 16 bit value to an unsigned number (Add 32768)
+        
+        LDR     const1, =0x00208020             
+        
+        LDR     mask, =0xffff03ff                       ;// Used to mask the down shifted 6 bits  
+        
+        ;// const2(#512): used to convert the lower 16bit number back to signed value 
+      
+        MOV     const2,#0x200                           ;// const2 = 2^9
+        
+        ;// First Row 
+        
+        SADD16    colOp00, colOp00, const1
+        SADD16    colOp02, colOp02, const1
+        AND     colOp00, mask, colOp00, ASR #6
+        AND     colOp02, mask, colOp02, ASR #6
+        SSUB16  out00,colOp00,const2
+        SSUB16  out02,colOp02,const2    
+        
+
+        ;// Second Row
+        
+        SADD16    colOp10, colOp10, const1
+        SADD16    colOp12, colOp12, const1
+        AND     colOp10, mask, colOp10, ASR #6
+        AND     colOp12, mask, colOp12, ASR #6
+        SSUB16  out10,colOp10,const2
+        SSUB16  out12,colOp12,const2    
+        
+        
+        ;// Third Row
+        
+        SADD16    colOp20, colOp20, const1
+        SADD16    colOp22, colOp22, const1
+        AND     colOp20, mask, colOp20, ASR #6
+        AND     colOp22, mask, colOp22, ASR #6
+        SSUB16  out20,colOp20,const2
+        SSUB16  out22,colOp22,const2
+        
+        
+        ;// Fourth Row   
+        
+        SADD16    colOp30, colOp30, const1
+        SADD16    colOp32, colOp32, const1
+        AND     colOp30, mask, colOp30, ASR #6
+        AND     colOp32, mask, colOp32, ASR #6
+        SSUB16  out30,colOp30,const2
+        SSUB16  out32,colOp32,const2
+        
+        
+        
+                
+        ;***************************
+        ;// Store all the 4x4 pixels
+        ;***************************
+        
+        STMIA   pDst,{out00,out02,out10,out12,out20,out22,out30,out32}
+        
+                               
+       
+        ;// Set return value
+        
+End                
+
+        
+        ;// Write function tail
+        M_END
+        
+    ENDIF                                                           ;//ARM1136JS    
+    
+    
+
+
+
+
+
+;// Guarding implementation by the processor name
+    
+            
+    END
+\ No newline at end of file