diff options
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s')
-rwxr-xr-x | media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s | 320 |
1 files changed, 320 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s new file mode 100755 index 0000000..a73f64a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s @@ -0,0 +1,320 @@ +; ********** +; * +; * File Name: omxVCM4P2_PredictReconCoefIntra_s.s +; * OpenMAX DL: v1.0.2 +; * Revision: 12290 +; * Date: Wednesday, April 9, 2008 +; * +; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +; * +; * +; * +; * Description: +; * Contains module for DC/AC coefficient prediction +; * +; * +; * Function: omxVCM4P2_PredictReconCoefIntra +; * +; * Description: +; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior +; * to the function call, prediction direction (predDir) should be selected +; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2. +; * +; * Remarks: +; * +; * Parameters: +; * [in] pSrcDst pointer to the coefficient buffer which contains the +; * quantized coefficient residuals (PQF) of the current +; * block; must be aligned on a 4-byte boundary. The +; * output coefficients are saturated to the range +; * [-2048, 2047]. +; * [in] pPredBufRow pointer to the coefficient row buffer; must be aligned +; * on a 4-byte boundary. +; * [in] pPredBufCol pointer to the coefficient column buffer; must be +; * aligned on a 4-byte boundary. +; * [in] curQP quantization parameter of the current block. curQP may +; * equal to predQP especially when the current block and +; * the predictor block are in the same macroblock. +; * [in] predQP quantization parameter of the predictor block +; * [in] predDir indicates the prediction direction which takes one +; * of the following values: +; * OMX_VIDEO_HORIZONTAL predict horizontally +; * OMX_VIDEO_VERTICAL predict vertically +; * [in] ACPredFlag a flag indicating if AC prediction should be +; * performed. It is equal to ac_pred_flag in the bit +; * stream syntax of MPEG-4 +; * [in] videoComp video component type (luminance, chrominance or +; * alpha) of the current block +; * [out] pSrcDst pointer to the coefficient buffer which contains +; * the quantized coefficients (QF) of the current +; * block +; * [out] pPredBufRow pointer to the updated coefficient row buffer +; * [out] pPredBufCol pointer to the updated coefficient column buffer +; * Return Value: +; * OMX_Sts_NoErr - no error +; * OMX_Sts_BadArgErr - Bad arguments +; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol. +; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31, +; * predQP > 31, preDir exceeds [1,2]. +; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not +; * 4-byte aligned. +; * +; ********* + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + + + IMPORT armVCM4P2_Reciprocal_QP_S32 + IMPORT armVCM4P2_Reciprocal_QP_S16 + IMPORT armVCM4P2_DCScaler + + IF CortexA8 +;// Input Arguments + +pSrcDst RN 0 +pPredBufRow RN 1 +pPredBufCol RN 2 +curQP RN 3 +QP RN 3 +predQP RN 4 +predDir RN 5 +ACPredFlag RN 6 +videoComp RN 7 + +;// Local Variables + +shortVideoHeader RN 4 +dcScaler RN 4 +index RN 6 +predCoeffTable RN 7 +temp1 RN 6 +temp2 RN 9 +temp RN 14 +Const RN 8 +temppPredColBuf RN 8 +tempPred RN 9 + +absCoeffDC RN 8 +negdcScaler RN 10 +Rem RN 11 +temp3 RN 12 + +dcRowbufCoeff RN 10 +dcColBuffCoeff RN 11 +Return RN 0 + +;//NEON Registers + +qPredRowBuf QN Q0.S16 +dPredRowBuf0 DN D0.S16 +dPredRowBuf1 DN D1.S16 + + + + +qCoeffTab QN Q1.S32 + +qPredQP QN Q2.S16 +dPredQP0 DN D4.S16 +dPredQP1 DN D5.S16 + + +qtemp1 QN Q3.S32 +qtemp QN Q3.S16 + +dtemp0 DN D6.S16 +dtemp1 DN D7.S16 + +dtemp2 DN D8.S16 +dtemp3 DN D9.S16 + +dtemp4 DN D2.S16 +dtemp5 DN D3.S16 +dtemp6 DN D4.S16 +dtemp7 DN D5.S16 + +qtempPred1 QN Q5.S32 +qtempPred QN Q5.S16 + +dtempPred0 DN D10.S16 +dtempPred1 DN D11.S16 + + + + M_START omxVCM4P2_PredictReconCoefIntra,r11,d11 + + ;// Assigning pointers to Input arguments on Stack + + M_ARG predQPonStack,4 + M_ARG predDironStack,4 + M_ARG ACPredFlagonStack,4 + M_ARG videoComponStack,4 + + ;// DC Prediction + + M_LDR videoComp,videoComponStack ;// Load videoComp From Stack + + M_LDR predDir,predDironStack ;// Load Prediction direction + ;// DC Scaler calculation + LDR index, =armVCM4P2_DCScaler + ADD index,index,videoComp,LSL #5 + LDRB dcScaler,[index,QP] + + + LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S16 ;// Loading the table with entries 32767/(1 to 63) + CMP predDir,#2 ;// Check if the Prediction direction is vertical + + ;// Caulucate tempPred + + LDREQSH absCoeffDC,[pPredBufRow] ;// If vetical load the coeff from Row Prediction Buffer + LDRNESH absCoeffDC,[pPredBufCol] ;// If horizontal load the coeff from column Prediction Buffer + + RSB negdcScaler,dcScaler,#0 ;// negdcScaler=-dcScaler + MOV temp1,absCoeffDC ;// Load the Prediction coeff to temp for comparision + CMP temp1,#0 + RSBLT absCoeffDC,temp1,#0 ;// calculate absolute val of prediction coeff + + ADD temp,dcScaler,dcScaler + LDRH temp,[predCoeffTable,temp] ;// Load value from coeff table for performing division using multiplication + SMULBB tempPred,temp,absCoeffDC ;// tempped=pPredBufRow(Col)[0]*32767/dcScaler + ADD temp3,dcScaler,#1 + LSR tempPred,tempPred,#15 ;// tempped=pPredBufRow(Col)[0]/dcScaler + LSR temp3,temp3,#1 ;// temp3=round(dcScaler/2) + MLA Rem,negdcScaler,tempPred,absCoeffDC ;// Remainder Rem=abs(pPredBufRow(Col)[0])-tempPred*dcScaler + + LDRH dcRowbufCoeff,[pPredBufCol] + + CMP Rem,temp3 ;// compare Rem with (dcScaler/2) + ADDGE tempPred,#1 ;// tempPred=tempPred+1 if Rem>=(dcScaler/2) + CMP temp1,#0 + RSBLT tempPred,tempPred,#0 ;// tempPred=-tempPred if + + STRH dcRowbufCoeff,[pPredBufRow,#-16] + + + LDRH temp,[pSrcDst] ;// temp=pSrcDst[0] + ADD temp,temp,tempPred ;// temp=pSrcDst[0]+tempPred + SSAT16 temp,#12,temp ;// clip temp to [-2048,2047] + SMULBB dcColBuffCoeff,temp,dcScaler ;// temp1=clipped(pSrcDst[0])*dcScaler + M_LDR ACPredFlag,ACPredFlagonStack + STRH dcColBuffCoeff,[pPredBufCol] + + + ;// AC Prediction + + M_LDR predQP,predQPonStack + + CMP ACPredFlag,#1 ;// Check if the AC prediction flag is set or not + BNE Exit ;// If not set Exit + CMP predDir,#2 ;// Check the Prediction direction + LDR predCoeffTable, =armVCM4P2_Reciprocal_QP_S32 ;// Loading the table with entries 0x1ffff/(1 to 63) + MOV Const,#4 + MUL curQP,curQP,Const ;// curQP=4*curQP + VDUP dPredQP0,predQP + LDR temp2,[predCoeffTable,curQP] ;// temp=0x1ffff/curQP + VDUP qCoeffTab,temp2 + BNE Horizontal ;// If the Prediction direction is horizontal branch to Horizontal + + + + ;// Vertical + ;//Calculating tempPred + + VLD1 {dPredRowBuf0,dPredRowBuf1},[pPredBufRow] ;// Loading pPredBufRow[i]:i=0 t0 7 + + VMULL qtemp1,dPredRowBuf0,dPredQP0 ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i]: i=0 t0 3 + VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3 + + VMULL qtemp1,dPredRowBuf1,dPredQP0 ;//qtemp1[i]=pPredBufRow[i]*dPredQP[i] : i=4 t0 7 + + VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=0 t0 3 + VSHRN dPredQP1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits + + + VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufRow[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7 + VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufRow[i]*dPredQP[i]/curQP) : i=4 t0 7 + VLD1 {dtemp0,dtemp1},[pSrcDst] ;//Loading pSrcDst[i] : i=0 to 7 + VSHRN dtempPred1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits + VMOV dtempPred0,dPredQP1 + + ;//updating source and row prediction buffer contents + VADD qtemp,qtemp,qtempPred ;//pSrcDst[i]=pSrcDst[i]+qtempPred[i]: i=0 to 7 + VQSHL qtemp,qtemp,#4 ;//Clip to [-2048,2047] + LDRH dcRowbufCoeff,[pPredBufRow] ;//Loading Dc Value of Row Prediction buffer + VSHR qtemp,qtemp,#4 + + VST1 {dtemp0,dtemp1},[pSrcDst] ;//storing back the updated values + VST1 {dtemp0,dtemp1},[pPredBufRow] ;//storing back the updated row prediction values + STRH dcRowbufCoeff,[pPredBufRow] ;// storing the updated DC Row Prediction coeff + + B Exit + +Horizontal + + ;// Calculating Temppred + + + + VLD1 {dPredRowBuf0,dPredRowBuf1},[pPredBufCol] ;// Loading pPredBufCol[i]:i=0 t0 7 + VMULL qtemp1,dPredRowBuf0,dPredQP0 ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i]: i=0 t0 3 + VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=0 t0 3 + + VMULL qtemp1,dPredRowBuf1,dPredQP0 ;//qtemp1[i]=pPredBufCol[i]*dPredQP[i] : i=4 t0 7 + + VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=0 t0 3 + VSHRN dPredQP1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits + + + VMUL qtempPred1,qtemp1,qCoeffTab ;//qtempPred1[i]=pPredBufCol[i]*dPredQP[i]*0x1ffff/curQP : i=4 t0 7 + + MOV temppPredColBuf,pPredBufCol + VRSHR qtempPred1,qtempPred1,#17 ;//qtempPred1[i]=round(pPredBufCol[i]*dPredQP[i]/curQP) : i=4 t0 7 + VLD4 {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst] ;// Loading coefficients Interleaving by 4 + VSHRN dtempPred1,qtempPred1,#0 ;// narrow qtempPred1[i] to 16 bits + VMOV dtempPred0,dPredQP1 + + ;// Updating source and column prediction buffer contents + ADD temp2,pSrcDst,#32 + VLD4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] ;// Loading next 16 coefficients Interleaving by 4 + VUZP dtemp0,dtemp4 ;// Interleaving by 8 + VADD dtemp0,dtemp0,dtempPred0 ;// Adding tempPred to coeffs + VQSHL dtemp0,dtemp0,#4 ;// Clip to [-2048,2047] + VSHR dtemp0,dtemp0,#4 + VST1 {dtemp0},[pPredBufCol]! ;// Updating Pridiction column buffer + VZIP dtemp0,dtemp4 ;// deinterleaving + VST4 {dtemp0,dtemp1,dtemp2,dtemp3},[pSrcDst] ;// Updating source coeffs + VST4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2]! + + MOV temp1,temp2 + VLD4 {dtemp0,dtemp1,dtemp2,dtemp3},[temp2]! ;// Loading coefficients Interleaving by 4 + + VLD4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] + VUZP dtemp0,dtemp4 ;// Interleaving by 8 + VADD dtemp0,dtemp0,dtempPred1 + VQSHL dtemp0,dtemp0,#4 ;// Clip to [-2048,2047] + VSHR dtemp0,dtemp0,#4 + VST1 {dtemp0},[pPredBufCol]! + VZIP dtemp0,dtemp4 + VST4 {dtemp0,dtemp1,dtemp2,dtemp3},[temp1] + STRH dcColBuffCoeff,[temppPredColBuf] + VST4 {dtemp4,dtemp5,dtemp6,dtemp7},[temp2] + +Exit + + STRH temp,[pSrcDst] + + + MOV Return,#OMX_Sts_NoErr + + M_END + ENDIF + + + END + + + |