summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s')
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s288
1 files changed, 288 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
new file mode 100755
index 0000000..0c3f4f2
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
@@ -0,0 +1,288 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingLuma_HorEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+ IMPORT armVCM4P10_DeblockingLumabSLT4_unsafe
+ IMPORT armVCM4P10_DeblockingLumabSGE4_unsafe
+
+ IF CortexA8
+
+LOOP_COUNT EQU 0x55000000
+
+
+;// Function arguments
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlpha RN 2
+pBeta RN 3
+
+pThresholds RN 5
+pBS RN 4
+bS10 RN 12
+
+pAlpha_0 RN 2
+pBeta_0 RN 3
+
+pAlpha_1 RN 7
+pBeta_1 RN 8
+
+
+
+;// Loop
+
+XY RN 9
+
+pTmp RN 6
+step RN 10
+
+;// Pixels
+dP_0 DN D4.U8
+dP_1 DN D5.U8
+dP_2 DN D6.U8
+dP_3 DN D7.U8
+dQ_0 DN D8.U8
+dQ_1 DN D9.U8
+dQ_2 DN D10.U8
+dQ_3 DN D11.U8
+
+
+;// Filtering Decision
+dAlpha DN D0.U8
+dBeta DN D2.U8
+
+dFilt DN D16.U8
+dAqflg DN D12.U8
+dApflg DN D17.U8
+
+dAp0q0 DN D13.U8
+dAp1p0 DN D12.U8
+dAq1q0 DN D18.U8
+dAp2p0 DN D19.U8
+dAq2q0 DN D17.U8
+
+;// bSLT4
+dTC0 DN D18.U8
+dTC1 DN D19.U8
+dTC01 DN D18.U8
+
+dTCs DN D31.S8
+dTC DN D31.U8
+
+dMask_0 DN D14.U8
+dMask_1 DN D15.U8
+
+Mask_0 RN 11
+
+dTemp DN D19.U8
+
+;// Computing P0,Q0
+qDq0p0 QN Q10.S16
+qDp1q1 QN Q11.S16
+qDelta QN Q10.S16 ; reuse qDq0p0
+dDelta DN D20.S8
+
+
+;// Computing P1,Q1
+dRp0q0 DN D24.U8
+
+dMaxP DN D23.U8
+dMinP DN D22.U8
+
+dMaxQ DN D19.U8
+dMinQ DN D21.U8
+
+dDeltaP DN D26.U8
+dDeltaQ DN D27.U8
+
+qP_0n QN Q14.S16
+qQ_0n QN Q12.S16
+
+dQ_0n DN D24.U8
+dQ_1n DN D25.U8
+dP_0n DN D29.U8
+dP_1n DN D30.U8
+
+;// bSGE4
+
+qSp0q0 QN Q10.U16
+
+qSp2q1 QN Q11.U16
+qSp0q0p1 QN Q12.U16
+qSp3p2 QN Q13.U16
+dHSp0q1 DN D28.U8
+
+qSq2p1 QN Q11.U16
+qSp0q0q1 QN Q12.U16
+qSq3q2 QN Q13.U16 ;!!
+dHSq0p1 DN D28.U8 ;!!
+
+qTemp1 QN Q11.U16 ;!!;qSp2q1
+qTemp2 QN Q12.U16 ;!!;qSp0q0p1
+
+dP_0t DN D28.U8 ;!!;dHSp0q1
+dQ_0t DN D22.U8 ;!!;Temp1
+
+dP_0n DN D29.U8
+dP_1n DN D30.U8
+dP_2n DN D31.U8
+
+dQ_0n DN D24.U8 ;!!;Temp2
+dQ_1n DN D25.U8 ;!!;Temp2
+dQ_2n DN D28.U8 ;!!;dQ_0t
+
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingLuma_HorEdge_I, r11, d15
+
+ ;//Arguments on the stack
+ M_ARG ppThresholds, 4
+ M_ARG ppBS, 4
+
+ ;// d0-dAlpha_0
+ ;// d2-dBeta_0
+
+ ADD pAlpha_1, pAlpha_0, #1
+ ADD pBeta_1, pBeta_0, #1
+
+ VLD1 {dAlpha[]}, [pAlpha_0]
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #2
+ VLD1 {dBeta[]}, [pBeta_0]
+
+ M_LDR pBS, ppBS
+ M_LDR pThresholds, ppThresholds
+
+ MOV Mask_0,#0
+
+ ;dMask_0-14
+ ;dMask_1-15
+
+ VMOV dMask_0, #0
+ VMOV dMask_1, #1
+
+ ADD step, srcdstStep, srcdstStep
+
+ LDR XY,=LOOP_COUNT
+
+ ;// p0-p3 - d4-d7
+ ;// q0-q3 - d8-d11
+LoopY
+LoopX
+ LDRH bS10, [pBS], #2
+ ADD pTmp, pSrcDst, srcdstStep
+ CMP bS10, #0
+ BEQ NoFilterBS0
+
+ VLD1 dP_3, [pSrcDst], step
+ VLD1 dP_2, [pTmp], step
+ VLD1 dP_1, [pSrcDst], step
+ VLD1 dP_0, [pTmp], step
+ VLD1 dQ_0, [pSrcDst], step
+ VABD dAp1p0, dP_0, dP_1
+ VLD1 dQ_1, [pTmp]
+ VABD dAp0q0, dQ_0, dP_0
+ VLD1 dQ_2, [pSrcDst], srcdstStep
+
+ VABD dAq1q0, dQ_1, dQ_0
+ VABD dAp2p0, dP_2, dP_0
+ VCGT dFilt, dAlpha, dAp0q0
+
+ TST bS10, #0xff
+ VMAX dAp1p0, dAq1q0, dAp1p0
+ VABD dAq2q0, dQ_2, dQ_0
+
+ VMOVEQ.U32 dFilt[0], Mask_0
+ TST bS10, #0xff00
+
+ VCGT dAp2p0, dBeta, dAp2p0
+ VCGT dAp1p0, dBeta, dAp1p0
+
+ VMOVEQ.U32 dFilt[1], Mask_0
+
+ VCGT dAq2q0, dBeta, dAq2q0
+ VLD1 dQ_3, [pSrcDst]
+ VAND dFilt, dFilt, dAp1p0
+ TST bS10, #4
+
+ VAND dAqflg, dFilt, dAq2q0
+ VAND dApflg, dFilt, dAp2p0
+
+ BNE bSGE4
+bSLT4
+ ;// bS < 4 Filtering
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #2
+ SUB pSrcDst, pSrcDst, srcdstStep
+
+ BL armVCM4P10_DeblockingLumabSLT4_unsafe
+
+ ;// Result Storage
+ VST1 dP_1n, [pSrcDst], srcdstStep
+ VST1 dP_0n, [pSrcDst], srcdstStep
+ SUB pTmp, pSrcDst, srcdstStep, LSL #2
+ VST1 dQ_0n, [pSrcDst], srcdstStep
+ ADDS XY, XY, XY
+ VST1 dQ_1n, [pSrcDst]
+ ADD pSrcDst, pTmp, #8
+
+ BCC LoopX
+ B ExitLoopY
+
+NoFilterBS0
+ ADD pSrcDst, pSrcDst, #8
+ ADDS XY, XY, XY
+ ADD pThresholds, pThresholds, #2
+ BCC LoopX
+ B ExitLoopY
+bSGE4
+ ;// bS >= 4 Filtering
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #2
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #1
+ BL armVCM4P10_DeblockingLumabSGE4_unsafe
+
+ ;// Result Storage
+ VST1 dP_2n, [pSrcDst], srcdstStep
+ VST1 dP_1n, [pSrcDst], srcdstStep
+ VST1 dP_0n, [pSrcDst], srcdstStep
+ SUB pTmp, pSrcDst, srcdstStep, LSL #2
+ VST1 dQ_0n, [pSrcDst], srcdstStep
+ ADDS XY,XY,XY
+ VST1 dQ_1n, [pSrcDst], srcdstStep
+ ADD pThresholds, pThresholds, #2
+ VST1 dQ_2n, [pSrcDst]
+
+ ADD pSrcDst, pTmp, #8
+ BCC LoopX
+
+ExitLoopY
+
+ SUB pSrcDst, pSrcDst, #16
+ VLD1 {dAlpha[]}, [pAlpha_1]
+ ADD pSrcDst, pSrcDst, srcdstStep, LSL #2
+ VLD1 {dBeta[]}, [pBeta_1]
+ BNE LoopY
+
+ MOV r0, #OMX_Sts_NoErr
+
+ M_END
+
+ ENDIF
+
+
+
+
+ END
+
+