summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s')
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s407
1 files changed, 0 insertions, 407 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
deleted file mode 100644
index 241d188..0000000
--- a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_TransformResidual4x4_s.s
+++ /dev/null
@@ -1,407 +0,0 @@
-;//
-;//
-;// File Name: armVCM4P10_TransformResidual4x4_s.s
-;// OpenMAX DL: v1.0.2
-;// Revision: 9641
-;// Date: Thursday, February 7, 2008
-;//
-;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
-;//
-;//
-;//
-;// Description:
-;// Transform Residual 4x4 Coefficients
-;//
-;//
-
-
-;// Include standard headers
-
- INCLUDE omxtypes_s.h
- INCLUDE armCOMM_s.h
-
- M_VARIANTS ARM1136JS
-
-;// Import symbols required from other files
-;// (For example tables)
-
-
-
-
-;// Set debugging level
-;//DEBUG_ON SETL {TRUE}
-
-
-
-;// Guarding implementation by the processor name
-
- IF ARM1136JS
-
-;//Input Registers
-pDst RN 0
-pSrc RN 1
-
-;//Output Registers
-
-
-;//Local Scratch Registers
-
-;// Packed Input pixels
-in00 RN 2 ;// Src[0] & Src[1]
-in02 RN 3 ;// Src[2] & Src[3]
-in10 RN 4 ;// Src[4] & Src[5]
-in12 RN 5 ;// Src[6] & Src[7]
-in20 RN 6 ;// Src[8] & Src[9]
-in22 RN 7 ;// Src[10] & Src[11]
-in30 RN 8 ;// Src[12] & Src[13]
-in32 RN 9 ;// Src[14] & Src[15]
-
-;// Transpose for Row operations (Rows to cols)
-trRow00 RN 2
-trRow10 RN 10
-trRow02 RN 3
-trRow12 RN 5
-trRow20 RN 11
-trRow30 RN 12
-trRow32 RN 14
-trRow22 RN 7
-
-;// Intermediate calculations
-e0 RN 4
-e1 RN 6
-e2 RN 8
-e3 RN 9
-constZero RN 1
-
-;// Row operated pixels
-rowOp00 RN 2
-rowOp10 RN 10
-rowOp20 RN 11
-rowOp30 RN 12
-rowOp02 RN 3
-rowOp12 RN 5
-rowOp22 RN 7
-rowOp32 RN 14
-
-;// Transpose for colulmn operations
-trCol00 RN 2
-trCol02 RN 3
-trCol10 RN 4
-trCol12 RN 5
-trCol20 RN 6
-trCol22 RN 7
-trCol30 RN 8
-trCol32 RN 9
-
-;// Intermediate calculations
-g0 RN 10
-g1 RN 11
-g2 RN 12
-g3 RN 14
-
-;// Coloumn operated pixels
-colOp00 RN 2
-colOp02 RN 3
-colOp10 RN 4
-colOp12 RN 5
-colOp20 RN 6
-colOp22 RN 7
-colOp30 RN 8
-colOp32 RN 9
-
-
-temp1 RN 10 ;// Temporary scratch varaibles
-const1 RN 11
-const2 RN 12
-mask RN 14
-
-;// Output pixels
-out00 RN 2
-out02 RN 3
-out10 RN 4
-out12 RN 5
-out20 RN 6
-out22 RN 7
-out30 RN 8
-out32 RN 9
-
-
-
- ;// Allocate stack memory required by the function
-
-
- ;// Write function header
- M_START armVCM4P10_TransformResidual4x4,r11
-
- ;******************************************************************
- ;// The strategy used in implementing the transform is as follows:*
- ;// Load the 4x4 block into 8 registers *
- ;// Transpose the 4x4 matrix *
- ;// Perform the row operations (on columns) using SIMD *
- ;// Transpose the 4x4 result matrix *
- ;// Perform the coloumn operations *
- ;// Store the 4x4 block at one go *
- ;******************************************************************
-
- ;// Load all the 4x4 pixels
-
- LDMIA pSrc,{in00,in02,in10,in12,in20,in22,in30,in32}
-
- MOV constZero,#0 ;// Used to right shift by 1
- ;LDR constZero,=0x00000000
-
- ;*****************************************************************
- ;//
- ;// Transpose the matrix inorder to perform row ops as coloumn ops
- ;// Input: in[][] = original matrix
- ;// Output: trRow[][]= transposed matrix
- ;// Step1: Obtain the LL part of the transposed matrix
- ;// Step2: Obtain the HL part
- ;// step3: Obtain the LH part
- ;// Step4: Obtain the HH part
- ;//
- ;*****************************************************************
-
- ;// LL 2x2 transposed matrix
- ;// d0 d1 - -
- ;// d4 d5 - -
- ;// - - - -
- ;// - - - -
-
- PKHTB trRow10,in10,in00,ASR #16 ;// [5 4] = [f5:f1]
- PKHBT trRow00,in00,in10,LSL #16 ;// [1 0] = [f4:f0]
-
- ;// HL 2x2 transposed matrix
- ;// - - - -
- ;// - - - -
- ;// d8 d9 - -
- ;// d12 d13 - -
-
-
- PKHTB trRow30,in12,in02,ASR #16 ;// [13 12] = [7 3]
- PKHBT trRow20,in02,in12,LSL #16 ;// [9 8] = [6 2]
-
- ;// LH 2x2 transposed matrix
- ;// - - d2 d3
- ;// - - d6 d7
- ;// - - - -
- ;// - - - -
-
- PKHBT trRow02,in20,in30,LSL #16 ;// [3 2] = [f12:f8]
- PKHTB trRow12,in30,in20,ASR #16 ;// [7 6] = [f13:f9]
-
-
-
-
- ;// HH 2x2 transposed matrix
- ;// - - - -
- ;// - - - -
- ;// - - d10 d11
- ;// - - d14 d15
-
- PKHTB trRow32,in32,in22,ASR #16 ;// [15 14] = [15 11]
- PKHBT trRow22,in22,in32,LSL #16 ;// [11 10] = [14 10]
-
-
- ;****************************************
- ;// Row Operations (Performed on columns)
- ;****************************************
-
-
- ;// SIMD operations on first two columns(two rows of the original matrix)
-
-
- SADD16 e0, trRow00,trRow20 ;// e0 = d0 + d2
- SSUB16 e1, trRow00,trRow20 ;// e1 = d0 - d2
- SHADD16 e2, trRow10,constZero ;// (f1>>1) constZero is a register holding 0
- SHADD16 e3, trRow30,constZero ;// avoid pipeline stalls for e2 and e3
- SSUB16 e2, e2, trRow30 ;// e2 = (d1>>1) - d3
- SADD16 e3, e3, trRow10 ;// e3 = d1 + (d3>>1)
- SADD16 rowOp00, e0, e3 ;// f0 = e0 + e3
- SADD16 rowOp10, e1, e2 ;// f1 = e1 + e2
- SSUB16 rowOp20, e1, e2 ;// f2 = e1 - e2
- SSUB16 rowOp30, e0, e3 ;// f3 = e0 - e3
-
- ;// SIMD operations on next two columns(next two rows of the original matrix)
-
- SADD16 e0, trRow02,trRow22
- SSUB16 e1, trRow02,trRow22
- SHADD16 e2, trRow12,constZero ;//(f1>>1) constZero is a register holding 0
- SHADD16 e3, trRow32,constZero
- SSUB16 e2, e2, trRow32
- SADD16 e3, e3, trRow12
- SADD16 rowOp02, e0, e3
- SADD16 rowOp12, e1, e2
- SSUB16 rowOp22, e1, e2
- SSUB16 rowOp32, e0, e3
-
-
- ;*****************************************************************
- ;// Transpose the resultant matrix
- ;// Input: rowOp[][]
- ;// Output: trCol[][]
- ;*****************************************************************
-
- ;// LL 2x2 transposed matrix
- ;// d0 d1 - -
- ;// d4 d5 - -
- ;// - - - -
- ;// - - - -
-
- PKHTB trCol10,rowOp10,rowOp00,ASR #16 ;// [5 4] = [f5:f1]
- PKHBT trCol00,rowOp00,rowOp10,LSL #16 ;// [1 0] = [f4:f0]
-
- ;// HL 2x2 transposed matrix
- ;// - - - -
- ;// - - - -
- ;// d8 d9 - -
- ;// d12 d13 - -
-
-
- PKHTB trCol30,rowOp12,rowOp02,ASR #16 ;// [13 12] = [7 3]
- PKHBT trCol20,rowOp02,rowOp12,LSL #16 ;// [9 8] = [6 2]
-
- ;// LH 2x2 transposed matrix
- ;// - - d2 d3
- ;// - - d6 d7
- ;// - - - -
- ;// - - - -
-
- PKHBT trCol02,rowOp20,rowOp30,LSL #16 ;// [3 2] = [f12:f8]
- PKHTB trCol12,rowOp30,rowOp20,ASR #16 ;// [7 6] = [f13:f9]
-
-
-
-
- ;// HH 2x2 transposed matrix
- ;// - - - -
- ;// - - - -
- ;// - - d10 d11
- ;// - - d14 d15
-
- PKHTB trCol32,rowOp32,rowOp22,ASR #16 ;// [15 14] = [15 11]
- PKHBT trCol22,rowOp22,rowOp32,LSL #16 ;// [11 10] = [14 10]
-
-
- ;*******************************
- ;// Coloumn Operations
- ;*******************************
-
-
- ;// SIMD operations on first two columns
-
-
- SADD16 g0, trCol00,trCol20
- SSUB16 g1, trCol00,trCol20
- SHADD16 g2, trCol10,constZero ;// (f1>>1) constZero is a register holding 0
- SHADD16 g3, trCol30,constZero
- SSUB16 g2, g2, trCol30
- SADD16 g3, g3, trCol10
- SADD16 colOp00, g0, g3
- SADD16 colOp10, g1, g2
- SSUB16 colOp20, g1, g2
- SSUB16 colOp30, g0, g3
-
- ;// SIMD operations on next two columns
-
- SADD16 g0, trCol02,trCol22
- SSUB16 g1, trCol02,trCol22
- SHADD16 g2, trCol12,constZero ;// (f1>>1) constZero is a register holding 0
- SHADD16 g3, trCol32,constZero
- SSUB16 g2, g2, trCol32
- SADD16 g3, g3, trCol12
- SADD16 colOp02, g0, g3
- SADD16 colOp12, g1, g2
- SSUB16 colOp22, g1, g2
- SSUB16 colOp32, g0, g3
-
-
-
-
-
- ;************************************************
- ;// Calculate final value (colOp[i][j] + 32)>>6
- ;************************************************
-
- ;// const1: Serves dual purpose
- ;// (1) Add #32 to both the lower and higher 16bits of the SIMD result
- ;// (2) Convert the lower 16 bit value to an unsigned number (Add 32768)
-
- LDR const1, =0x00208020
-
- LDR mask, =0xffff03ff ;// Used to mask the down shifted 6 bits
-
- ;// const2(#512): used to convert the lower 16bit number back to signed value
-
- MOV const2,#0x200 ;// const2 = 2^9
-
- ;// First Row
-
- SADD16 colOp00, colOp00, const1
- SADD16 colOp02, colOp02, const1
- AND colOp00, mask, colOp00, ASR #6
- AND colOp02, mask, colOp02, ASR #6
- SSUB16 out00,colOp00,const2
- SSUB16 out02,colOp02,const2
-
-
- ;// Second Row
-
- SADD16 colOp10, colOp10, const1
- SADD16 colOp12, colOp12, const1
- AND colOp10, mask, colOp10, ASR #6
- AND colOp12, mask, colOp12, ASR #6
- SSUB16 out10,colOp10,const2
- SSUB16 out12,colOp12,const2
-
-
- ;// Third Row
-
- SADD16 colOp20, colOp20, const1
- SADD16 colOp22, colOp22, const1
- AND colOp20, mask, colOp20, ASR #6
- AND colOp22, mask, colOp22, ASR #6
- SSUB16 out20,colOp20,const2
- SSUB16 out22,colOp22,const2
-
-
- ;// Fourth Row
-
- SADD16 colOp30, colOp30, const1
- SADD16 colOp32, colOp32, const1
- AND colOp30, mask, colOp30, ASR #6
- AND colOp32, mask, colOp32, ASR #6
- SSUB16 out30,colOp30,const2
- SSUB16 out32,colOp32,const2
-
-
-
-
- ;***************************
- ;// Store all the 4x4 pixels
- ;***************************
-
- STMIA pDst,{out00,out02,out10,out12,out20,out22,out30,out32}
-
-
-
- ;// Set return value
-
-End
-
-
- ;// Write function tail
- M_END
-
- ENDIF ;//ARM1136JS
-
-
-
-
-
-
-
-;// Guarding implementation by the processor name
-
-
- END \ No newline at end of file