summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s')
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s198
1 files changed, 198 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
new file mode 100755
index 0000000..4c3a77c
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s
@@ -0,0 +1,198 @@
+;//
+;//
+;// File Name: armVCM4P10_DeblockingChroma_unsafe_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision: 12290
+;// Date: Wednesday, April 9, 2008
+;//
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;//
+;//
+;//
+
+ INCLUDE omxtypes_s.h
+ INCLUDE armCOMM_s.h
+
+ M_VARIANTS CortexA8
+
+
+ IF CortexA8
+
+pAlpha RN 2
+pBeta RN 3
+
+pThresholds RN 5
+pBS RN 4
+bS3210 RN 6
+
+;// Pixels
+dP_0 DN D4.U8
+dP_1 DN D5.U8
+dP_2 DN D6.U8
+dP_3 DN D7.U8
+dQ_0 DN D8.U8
+dQ_1 DN D9.U8
+dQ_2 DN D10.U8
+dQ_3 DN D11.U8
+
+
+;// Filtering Decision
+dAlpha DN D0.U8
+dBeta DN D2.U8
+
+dFilt DN D16.U8
+dAqflg DN D12.U8
+dApflg DN D17.U8
+
+dAp0q0 DN D13.U8
+
+;// bSLT4
+dTC3210 DN D18.U8
+dTCs DN D31.S8
+dTC DN D31.U8
+
+dMask_0 DN D14.U8
+dMask_1 DN D15.U8
+dMask_4 DN D26.U16
+
+dTemp DN D28.U8
+dDummy DN D17.U8
+
+;// Computing P0,Q0
+qDq0p0 QN Q10.S16
+qDp1q1 QN Q11.S16
+qDelta QN Q10.S16 ; reuse qDq0p0
+dDelta DN D20.S8
+
+
+;// Computing P1,Q1
+qP_0n QN Q14.S16
+qQ_0n QN Q12.S16
+
+dQ_0n DN D24.U8
+dP_0n DN D29.U8
+
+;// bSGE4
+
+dHSp0q1 DN D13.U8
+dHSq0p1 DN D31.U8
+
+dBS3210 DN D28.U16
+
+dP_0t DN D13.U8 ;dHSp0q1
+dQ_0t DN D31.U8 ;Temp1
+
+dP_0n DN D29.U8
+dQ_0n DN D24.U8 ;Temp2
+
+;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe
+;//
+;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11
+;// - Filter masks - filt: D16, aqflg: D12, apflg: D17
+;// - Additional Params - pThresholds: r5
+;//
+;// Outputs - Pixels - P0-P1: D29-D30, Q0-Q1: D24-D25
+;// - Additional Params - pThresholds: r5
+
+;// Registers Corrupted - D18-D31
+
+
+ M_START armVCM4P10_DeblockingChromabSLT4_unsafe
+
+
+ ;dTC3210 -18
+ ;dTemp-28
+
+ VLD1 d18.U32[0], [pThresholds]! ;here
+
+ ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3;
+ ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1
+
+ ;// qDp1q1-11
+ ;// qDq0p0-10
+ VSUBL qDp1q1, dP_1, dQ_1
+ VMOV dTemp, dTC3210
+ VSUBL qDq0p0, dQ_0, dP_0
+ VSHR qDp1q1, qDp1q1, #2
+ VZIP.8 dTC3210, dTemp
+
+ ;// qDelta-qDq0p0-10
+
+ ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1)
+
+ ;// dTC3210-18
+ ;// dTemp-28
+ ;// dTC-31
+ VBIF dTC3210, dMask_0, dFilt
+ VRHADD qDelta, qDp1q1, qDq0p0
+ VADD dTC, dTC3210, dMask_1
+ VQMOVN dDelta, qDelta
+ ;// dDelta-d20
+
+ ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta);
+ VLD1 {dAlpha[]}, [pAlpha]
+ VMIN dDelta, dDelta, dTCs
+ VNEG dTCs, dTCs
+ VLD1 {dBeta[]}, [pBeta]
+ ;1
+ VMAX dDelta, dDelta, dTCs
+
+ ;// dP_0n - 29
+ ;// dQ_0n - 24
+
+ ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta);
+ ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta);
+
+ ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
+ ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta);
+
+ ;// qP_0n - 14
+ ;// qQ_0n - 12
+
+ VMOVL qP_0n, dP_0
+ VMOVL qQ_0n, dQ_0
+
+ ;1
+ VADDW qP_0n, qP_0n, dDelta
+ VSUBW qQ_0n, qQ_0n, dDelta
+
+ VQMOVUN dP_0n, qP_0n
+ VQMOVUN dQ_0n, qQ_0n
+
+ M_END
+
+;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe()
+;//
+;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11
+;// - Filter masks - filt: D16, aqflg: D12, apflg: D17
+;// - Additional Params - alpha: D0, dMask_1: D15
+;//
+;// Outputs - Pixels - P0-P2: D29-D31, Q0-Q2: D24,D25,D28
+
+;// Registers Corrupted - D18-D31
+
+ M_START armVCM4P10_DeblockingChromabSGE4_unsafe
+
+ ;dHSq0p1 - 31
+ ;dHSp0q1 - 13
+ VHADD dHSp0q1, dP_0, dQ_1
+ VHADD dHSq0p1, dQ_0, dP_1
+
+ ;// Prepare the bS mask
+
+ ;// dHSp0q1-13
+ ;// dP_0t-dHSp0q1-13
+ ;// dHSq0p1-31
+ ;// dQ_0t-Temp1-31
+ VLD1 {dAlpha[]}, [pAlpha]
+ ADD pThresholds, pThresholds, #4
+ VLD1 {dBeta[]}, [pBeta]
+
+ VRHADD dP_0t, dHSp0q1, dP_1
+ VRHADD dQ_0t, dHSq0p1, dQ_1
+
+ M_END
+
+ ENDIF
+
+ END