diff options
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s')
-rwxr-xr-x | media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s | 198 |
1 files changed, 198 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s new file mode 100755 index 0000000..4c3a77c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src/armVCM4P10_DeblockingChroma_unsafe_s.s @@ -0,0 +1,198 @@ +;// +;// +;// File Name: armVCM4P10_DeblockingChroma_unsafe_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 12290 +;// Date: Wednesday, April 9, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS CortexA8 + + + IF CortexA8 + +pAlpha RN 2 +pBeta RN 3 + +pThresholds RN 5 +pBS RN 4 +bS3210 RN 6 + +;// Pixels +dP_0 DN D4.U8 +dP_1 DN D5.U8 +dP_2 DN D6.U8 +dP_3 DN D7.U8 +dQ_0 DN D8.U8 +dQ_1 DN D9.U8 +dQ_2 DN D10.U8 +dQ_3 DN D11.U8 + + +;// Filtering Decision +dAlpha DN D0.U8 +dBeta DN D2.U8 + +dFilt DN D16.U8 +dAqflg DN D12.U8 +dApflg DN D17.U8 + +dAp0q0 DN D13.U8 + +;// bSLT4 +dTC3210 DN D18.U8 +dTCs DN D31.S8 +dTC DN D31.U8 + +dMask_0 DN D14.U8 +dMask_1 DN D15.U8 +dMask_4 DN D26.U16 + +dTemp DN D28.U8 +dDummy DN D17.U8 + +;// Computing P0,Q0 +qDq0p0 QN Q10.S16 +qDp1q1 QN Q11.S16 +qDelta QN Q10.S16 ; reuse qDq0p0 +dDelta DN D20.S8 + + +;// Computing P1,Q1 +qP_0n QN Q14.S16 +qQ_0n QN Q12.S16 + +dQ_0n DN D24.U8 +dP_0n DN D29.U8 + +;// bSGE4 + +dHSp0q1 DN D13.U8 +dHSq0p1 DN D31.U8 + +dBS3210 DN D28.U16 + +dP_0t DN D13.U8 ;dHSp0q1 +dQ_0t DN D31.U8 ;Temp1 + +dP_0n DN D29.U8 +dQ_0n DN D24.U8 ;Temp2 + +;// Register usage for - armVCM4P10_DeblockingLumabSLT4_unsafe +;// +;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 +;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 +;// - Additional Params - pThresholds: r5 +;// +;// Outputs - Pixels - P0-P1: D29-D30, Q0-Q1: D24-D25 +;// - Additional Params - pThresholds: r5 + +;// Registers Corrupted - D18-D31 + + + M_START armVCM4P10_DeblockingChromabSLT4_unsafe + + + ;dTC3210 -18 + ;dTemp-28 + + VLD1 d18.U32[0], [pThresholds]! ;here + + ;// delta = (((q0-p0)<<2) + (p1-q1) + 4) >> 3; + ;// dDelta = (qDp1q1 >> 2 + qDq0p0 + 1)>> 1 + + ;// qDp1q1-11 + ;// qDq0p0-10 + VSUBL qDp1q1, dP_1, dQ_1 + VMOV dTemp, dTC3210 + VSUBL qDq0p0, dQ_0, dP_0 + VSHR qDp1q1, qDp1q1, #2 + VZIP.8 dTC3210, dTemp + + ;// qDelta-qDq0p0-10 + + ;// dTC = dTC01 + (dAplg & 1) + (dAqflg & 1) + + ;// dTC3210-18 + ;// dTemp-28 + ;// dTC-31 + VBIF dTC3210, dMask_0, dFilt + VRHADD qDelta, qDp1q1, qDq0p0 + VADD dTC, dTC3210, dMask_1 + VQMOVN dDelta, qDelta + ;// dDelta-d20 + + ;// dDelta = (OMX_U8)armClip(0, 255, q0 - delta); + VLD1 {dAlpha[]}, [pAlpha] + VMIN dDelta, dDelta, dTCs + VNEG dTCs, dTCs + VLD1 {dBeta[]}, [pBeta] + ;1 + VMAX dDelta, dDelta, dTCs + + ;// dP_0n - 29 + ;// dQ_0n - 24 + + ;// pQ0[-1*Step] = (OMX_U8)armClip(0, 255, dP_0 - delta); + ;// pQ0[0*Step] = (OMX_U8)armClip(0, 255, dQ_0 - delta); + + ;// dP_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); + ;// dQ_0n = (OMX_U8)armClip(0, 255, dP_0 - dDelta); + + ;// qP_0n - 14 + ;// qQ_0n - 12 + + VMOVL qP_0n, dP_0 + VMOVL qQ_0n, dQ_0 + + ;1 + VADDW qP_0n, qP_0n, dDelta + VSUBW qQ_0n, qQ_0n, dDelta + + VQMOVUN dP_0n, qP_0n + VQMOVUN dQ_0n, qQ_0n + + M_END + +;// Register usage for - armVCM4P10_DeblockingLumabSGE4_unsafe() +;// +;// Inputs - Pixels - p0-p3: D4-D7, q0-q3: D8-D11 +;// - Filter masks - filt: D16, aqflg: D12, apflg: D17 +;// - Additional Params - alpha: D0, dMask_1: D15 +;// +;// Outputs - Pixels - P0-P2: D29-D31, Q0-Q2: D24,D25,D28 + +;// Registers Corrupted - D18-D31 + + M_START armVCM4P10_DeblockingChromabSGE4_unsafe + + ;dHSq0p1 - 31 + ;dHSp0q1 - 13 + VHADD dHSp0q1, dP_0, dQ_1 + VHADD dHSq0p1, dQ_0, dP_1 + + ;// Prepare the bS mask + + ;// dHSp0q1-13 + ;// dP_0t-dHSp0q1-13 + ;// dHSq0p1-31 + ;// dQ_0t-Temp1-31 + VLD1 {dAlpha[]}, [pAlpha] + ADD pThresholds, pThresholds, #4 + VLD1 {dBeta[]}, [pBeta] + + VRHADD dP_0t, dHSp0q1, dP_1 + VRHADD dQ_0t, dHSq0p1, dQ_1 + + M_END + + ENDIF + + END |