summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s')
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s282
1 files changed, 282 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
new file mode 100644
index 0000000..18e6c1d
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
@@ -0,0 +1,282 @@
+;//
+;//
+;// File Name: omxVCM4P10_FilterDeblockingChroma_VerEdge_I_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+ IF CortexA8
+
+ IMPORT armVCM4P10_DeblockingChromabSGE4_unsafe
+ IMPORT armVCM4P10_DeblockingChromabSLT4_unsafe
+
+LOOP_COUNT EQU 0x40000000
+MASK_3 EQU 0x03030303
+MASK_4 EQU 0x04040404
+
+;// Function arguments
+
+pSrcDst RN 0
+srcdstStep RN 1
+pAlpha RN 2
+pBeta RN 3
+
+pThresholds RN 5
+pBS RN 4
+bS3210 RN 6
+pSrcDst_P RN 10
+pSrcDst_Q RN 12
+
+pTmp RN 10
+pTmp2 RN 12
+step RN 14
+
+;// Loop
+
+XY RN 7
+
+;// Rows input
+dRow0 DN D7.U8
+dRow1 DN D8.U8
+dRow2 DN D5.U8
+dRow3 DN D10.U8
+dRow4 DN D6.U8
+dRow5 DN D9.U8
+dRow6 DN D4.U8
+dRow7 DN D11.U8
+
+
+;// Pixels
+dP_0 DN D4.U8
+dP_1 DN D5.U8
+dP_2 DN D6.U8
+dQ_0 DN D8.U8
+dQ_1 DN D9.U8
+dQ_2 DN D10.U8
+
+;// Filtering Decision
+dAlpha DN D0.U8
+dBeta DN D2.U8
+
+dFilt DN D16.U8
+dAqflg DN D12.U8
+dApflg DN D17.U8
+
+dAp0q0 DN D13.U8
+dAp1p0 DN D12.U8
+dAq1q0 DN D18.U8
+dAp2p0 DN D19.U8
+dAq2q0 DN D17.U8
+
+qBS3210 QN Q13.U16
+dBS3210 DN D26
+dMask_bs DN D27
+dFilt_bs DN D26.U16
+
+;// bSLT4
+dMask_0 DN D14.U8
+dMask_1 DN D15.U8
+dMask_4 DN D1.U16
+
+Mask_4 RN 8
+Mask_3 RN 9
+
+dTemp DN D19.U8
+
+;// Result
+dP_0t DN D13.U8
+dQ_0t DN D31.U8
+
+dP_0n DN D29.U8
+dQ_0n DN D24.U8
+
+
+ ;// Function header
+ M_START omxVCM4P10_FilterDeblockingChroma_VerEdge_I, r12, d15
+
+ ;//Arguments on the stack
+ M_ARG ppThresholds, 4
+ M_ARG ppBS, 4
+
+ ;// d0-dAlpha_0
+ ;// d2-dBeta_0
+
+ ;load alpha1,beta1 somewhere to avoid more loads
+ VLD1 {dAlpha[]}, [pAlpha]!
+ SUB pSrcDst, pSrcDst, #4
+ VLD1 {dBeta[]}, [pBeta]!
+
+ M_LDR pBS, ppBS
+ M_LDR pThresholds, ppThresholds
+
+ LDR Mask_4, =MASK_4
+ LDR Mask_3, =MASK_3
+
+ ;dMask_0-14
+ ;dMask_1-15
+ ;dMask_4-19
+
+ VMOV dMask_0, #0
+ VMOV dMask_1, #1
+ VMOV dMask_4, #4
+
+ LDR XY, =LOOP_COUNT
+
+ ;// p0-p3 - d4-d7
+ ;// q0-q3 - d8-d11
+
+
+LoopY
+ LDR bS3210, [pBS], #8
+ ADD pTmp, pSrcDst, srcdstStep
+ ADD step, srcdstStep, srcdstStep
+
+ ;1
+ VLD1 dRow0, [pSrcDst], step
+ ;1
+ VLD1 dRow1, [pTmp], step
+ VLD1 dRow2, [pSrcDst], step
+ VLD1 dRow3, [pTmp], step
+ VLD1 dRow4, [pSrcDst], step
+ VLD1 dRow5, [pTmp], step
+ VLD1 dRow6, [pSrcDst], step
+ VLD1 dRow7, [pTmp], step
+
+
+ ;// dRow0 = [q3r0 q2r0 q1r0 q0r0 p0r0 p1r0 p2r0 p3r0]
+ ;// dRow1 = [q3r1 q2r1 q1r1 q0r1 p0r1 p1r1 p2r1 p3r1]
+ ;// dRow2 = [q3r2 q2r2 q1r2 q0r2 p0r2 p1r2 p2r2 p3r2]
+ ;// dRow3 = [q3r3 q2r3 q1r3 q0r3 p0r3 p1r3 p2r3 p3r3]
+ ;// dRow4 = [q3r4 q2r4 q1r4 q0r4 p0r4 p1r4 p2r4 p3r4]
+ ;// dRow5 = [q3r5 q2r5 q1r5 q0r5 p0r5 p1r5 p2r5 p3r5]
+ ;// dRow6 = [q3r6 q2r6 q1r6 q0r6 p0r6 p1r6 p2r6 p3r6]
+ ;// dRow7 = [q3r7 q2r7 q1r7 q0r7 p0r7 p1r7 p2r7 p3r7]
+
+ ;// 8x8 Transpose
+ VZIP.8 dRow0, dRow1
+ VZIP.8 dRow2, dRow3
+ VZIP.8 dRow4, dRow5
+ VZIP.8 dRow6, dRow7
+
+ VZIP.16 dRow0, dRow2
+ VZIP.16 dRow1, dRow3
+ VZIP.16 dRow4, dRow6
+ VZIP.16 dRow5, dRow7
+
+ VZIP.32 dRow0, dRow4
+ VZIP.32 dRow2, dRow6
+ VZIP.32 dRow3, dRow7
+ VZIP.32 dRow1, dRow5
+
+
+ ;Realign the pointers
+
+ CMP bS3210, #0
+ VABD dAp2p0, dP_2, dP_0
+ VABD dAp0q0, dP_0, dQ_0
+ BEQ NoFilterBS0
+
+ VABD dAp1p0, dP_1, dP_0
+ VABD dAq1q0, dQ_1, dQ_0
+
+ VMOV.U32 dBS3210[0], bS3210
+ VCGT dFilt, dAlpha, dAp0q0
+ VMAX dAp1p0, dAq1q0, dAp1p0
+ VMOVL qBS3210, dBS3210.U8
+ VABD dAq2q0, dQ_2, dQ_0
+ VCGT dMask_bs.S16, dBS3210.S16, #0
+
+ VCGT dAp1p0, dBeta, dAp1p0
+ VCGT dAp2p0, dBeta, dAp2p0
+ VAND dFilt, dMask_bs.U8
+
+ TST bS3210, Mask_3
+
+ VCGT dAq2q0, dBeta, dAq2q0
+ VAND dFilt, dFilt, dAp1p0
+
+ VAND dAqflg, dFilt, dAq2q0
+ VAND dApflg, dFilt, dAp2p0
+
+ ;// bS < 4 Filtering
+ BLNE armVCM4P10_DeblockingChromabSLT4_unsafe
+
+ TST bS3210, Mask_4
+
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #3
+ VTST dFilt_bs, dFilt_bs, dMask_4
+
+ ;// bS == 4 Filtering
+ BLNE armVCM4P10_DeblockingChromabSGE4_unsafe
+
+ VBIT dP_0n, dP_0t, dFilt_bs
+ VBIT dQ_0n, dQ_0t, dFilt_bs
+
+ ;// Result Storage
+ ADD pSrcDst_P, pSrcDst, #3
+ VBIF dP_0n, dP_0, dFilt
+
+ ADD pTmp2, pSrcDst_P, srcdstStep
+ ADD step, srcdstStep, srcdstStep
+ VBIF dQ_0n, dQ_0, dFilt
+
+ ADDS XY, XY, XY
+
+ VST1 {dP_0n[0]}, [pSrcDst_P], step
+ VST1 {dP_0n[1]}, [pTmp2], step
+ VST1 {dP_0n[2]}, [pSrcDst_P], step
+ VST1 {dP_0n[3]}, [pTmp2], step
+ VST1 {dP_0n[4]}, [pSrcDst_P], step
+ VST1 {dP_0n[5]}, [pTmp2], step
+ VST1 {dP_0n[6]}, [pSrcDst_P], step
+ VST1 {dP_0n[7]}, [pTmp2], step
+
+ ADD pSrcDst_Q, pSrcDst, #4
+ ADD pTmp, pSrcDst_Q, srcdstStep
+
+ VST1 {dQ_0n[0]}, [pSrcDst_Q], step
+ VST1 {dQ_0n[1]}, [pTmp], step
+ VST1 {dQ_0n[2]}, [pSrcDst_Q], step
+ VST1 {dQ_0n[3]}, [pTmp], step
+ VST1 {dQ_0n[4]}, [pSrcDst_Q], step
+ VST1 {dQ_0n[5]}, [pTmp], step
+ VST1 {dQ_0n[6]}, [pSrcDst_Q], step
+ VST1 {dQ_0n[7]}, [pTmp], step
+
+ ADD pSrcDst, pSrcDst, #4
+
+ BNE LoopY
+
+ MOV r0, #OMX_Sts_NoErr
+
+ M_EXIT
+
+NoFilterBS0
+ VLD1 {dAlpha[]}, [pAlpha]
+ ADD pSrcDst, pSrcDst, #4
+ SUB pSrcDst, pSrcDst, srcdstStep, LSL #3
+ ADDS XY, XY, XY
+ VLD1 {dBeta[]}, [pBeta]
+ ADD pThresholds, pThresholds, #4
+ BNE LoopY
+
+ MOV r0, #OMX_Sts_NoErr
+
+ M_END
+
+ ENDIF
+
+
+ END
+
+