diff options
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s')
-rw-r--r-- | media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s new file mode 100644 index 0000000..b1ad17c --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p10/src/armVCM4P10_Interpolate_Chroma_s.s @@ -0,0 +1,273 @@ +;// +;// +;// File Name: armVCM4P10_Interpolate_Chroma_s.s +;// OpenMAX DL: v1.0.2 +;// Revision: 9641 +;// Date: Thursday, February 7, 2008 +;// +;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. +;// +;// +;// + + + INCLUDE omxtypes_s.h + INCLUDE armCOMM_s.h + + M_VARIANTS ARM1136JS + + IF ARM1136JS + +;// input registers + +pSrc RN 0 +iSrcStep RN 1 +pDst RN 2 +iDstStep RN 3 +iWidth RN 4 +iHeight RN 5 +dx RN 6 +dy RN 7 + + +;// local variable registers +temp RN 11 +r0x20 RN 12 +tmp0x20 RN 14 +return RN 0 +dxPlusdy RN 10 +EightMinusdx RN 8 +EightMinusdy RN 9 +dxEightMinusdx RN 8 +BACoeff RN 6 +DCCoeff RN 7 + +iDstStepx2MinusWidth RN 8 +iSrcStepx2MinusWidth RN 9 +iSrcStep1 RN 10 + +pSrc1 RN 1 +pSrc2 RN 8 +pDst1 RN 8 +pDst2 RN 12 + +pix00 RN 8 +pix01 RN 9 +pix10 RN 10 +pix11 RN 11 + +Out0100 RN 8 +Out1110 RN 10 + +x00 RN 8 +x01 RN 10 +x02 RN 12 +x10 RN 9 +x11 RN 11 +x12 RN 14 +x20 RN 10 +x21 RN 12 +x22 RN 14 + +x01x00 RN 8 +x02x01 RN 10 +x11x10 RN 9 +x12x11 RN 11 +x21x20 RN 10 +x22x21 RN 12 + +OutRow00 RN 12 +OutRow01 RN 14 +OutRow10 RN 10 +OutRow11 RN 12 + +OutRow0100 RN 12 +OutRow1110 RN 12 + +;//----------------------------------------------------------------------------------------------- +;// armVCM4P10_Interpolate_Chroma_asm starts +;//----------------------------------------------------------------------------------------------- + + ;// Write function header + M_START armVCM4P10_Interpolate_Chroma, r11 + + ;// Define stack arguments + M_ARG Width, 4 + M_ARG Height, 4 + M_ARG Dx, 4 + M_ARG Dy, 4 + + ;// Load argument from the stack + ;// M_STALL ARM1136JS=4 + + M_LDR iWidth, Width + M_LDR iHeight, Height + M_LDR dx, Dx + M_LDR dy, Dy + + ;// EightMinusdx = 8 - dx + ;// EightMinusdy = 8 - dy + + ;// ACoeff = EightMinusdx * EightMinusdy + ;// BCoeff = dx * EightMinusdy + ;// CCoeff = EightMinusdx * dy + ;// DCoeff = dx * dy + + ADD pSrc1, pSrc, iSrcStep + SUB temp, iWidth, #1 + RSB EightMinusdx, dx, #8 + RSB EightMinusdy, dy, #8 + CMN dx,dy + ADD dxEightMinusdx, EightMinusdx, dx, LSL #16 + ORR iWidth, iWidth, temp, LSL #16 + + ;// Packed Coeffs. + + MUL BACoeff, dxEightMinusdx, EightMinusdy + MUL DCCoeff, dxEightMinusdx, dy + + + ;// Checking either of dx and dy being non-zero + + BEQ MVIsZero + +;// Pixel layout: +;// +;// x00 x01 x02 +;// x10 x11 x12 +;// x20 x21 x22 + +;// If fractionl mv is not (0, 0) + +OuterLoopMVIsNotZero + +InnerLoopMVIsNotZero + + LDRB x00, [pSrc, #+0] + LDRB x10, [pSrc1, #+0] + LDRB x01, [pSrc, #+1] + LDRB x11, [pSrc1, #+1] + LDRB x02, [pSrc, #+2]! + LDRB x12, [pSrc1, #+2]! + + ORR x01x00, x00, x01, LSL #16 + ;// M_STALL ARM1136JS=1 + ORR x02x01, x01, x02, LSL #16 + MOV r0x20, #32 + ORR x11x10, x10, x11, LSL #16 + ORR x12x11, x11, x12, LSL #16 + + SMLAD x01x00, x01x00, BACoeff, r0x20 + SMLAD x02x01, x02x01, BACoeff, r0x20 + + ;// iWidth packed with MSB (top 16 bits) + ;// as inner loop counter value i.e + ;// (iWidth -1) and LSB (lower 16 bits) + ;// as original width + + SUBS iWidth, iWidth, #1<<17 + + SMLAD OutRow00, x11x10, DCCoeff, x01x00 + SMLAD OutRow01, x12x11, DCCoeff, x02x01 + + RSB pSrc2, pSrc, pSrc1, LSL #1 + + MOV OutRow00, OutRow00, LSR #6 + MOV OutRow01, OutRow01, LSR #6 + + LDRB x20,[pSrc2, #-2] + + ORR OutRow0100, OutRow00, OutRow01, LSL #8 + STRH OutRow0100, [pDst], #2 + + LDRB x21,[pSrc2, #-1] + LDRB x22,[pSrc2, #+0] + + ADD pDst1, pDst, iDstStep + + ;// M_STALL ARM1136JS=1 + + ORR x21x20, x20, x21, LSL #16 + ORR x22x21, x21, x22, LSL #16 + + MOV tmp0x20, #32 + + ;// Reusing the packed data x11x10 and x12x11 + + SMLAD x11x10, x11x10, BACoeff, tmp0x20 + SMLAD x12x11, x12x11, BACoeff, tmp0x20 + SMLAD OutRow10, x21x20, DCCoeff, x11x10 + SMLAD OutRow11, x22x21, DCCoeff, x12x11 + + MOV OutRow10, OutRow10, LSR #6 + MOV OutRow11, OutRow11, LSR #6 + + ;// M_STALL ARM1136JS=1 + + ORR OutRow1110, OutRow10, OutRow11, LSL #8 + + STRH OutRow1110, [pDst1, #-2] + + BGT InnerLoopMVIsNotZero + + SUBS iHeight, iHeight, #2 + ADD iWidth, iWidth, #1<<16 + RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 + SUB iSrcStep1, pSrc1, pSrc + SUB temp, iWidth, #1 + RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 + ADD pDst, pDst, iDstStepx2MinusWidth + ADD pSrc1, pSrc1, iSrcStepx2MinusWidth + ADD pSrc, pSrc, iSrcStepx2MinusWidth + ORR iWidth, iWidth, temp, LSL #16 + BGT OuterLoopMVIsNotZero + MOV return, #OMX_Sts_NoErr + M_EXIT + +;// If fractionl mv is (0, 0) + +MVIsZero + ;// M_STALL ARM1136JS=4 +OuterLoopMVIsZero + +InnerLoopMVIsZero + + LDRB pix00, [pSrc], #+1 + LDRB pix01, [pSrc], #+1 + LDRB pix10, [pSrc1], #+1 + LDRB pix11, [pSrc1], #+1 + + ADD pDst2, pDst, iDstStep + SUBS iWidth, iWidth, #1<<17 + + ORR Out0100, pix00, pix01, LSL #8 + ORR Out1110, pix10, pix11, LSL #8 + + STRH Out0100, [pDst], #2 + STRH Out1110, [pDst2], #2 + + BGT InnerLoopMVIsZero + + SUBS iHeight, iHeight, #2 + ADD iWidth, iWidth, #1<<16 + RSB iDstStepx2MinusWidth, iWidth, iDstStep, LSL #1 + SUB iSrcStep1, pSrc1, pSrc + SUB temp, iWidth, #1 + RSB iSrcStepx2MinusWidth, iWidth, iSrcStep1, LSL #1 + ADD pDst, pDst, iDstStepx2MinusWidth + ADD pSrc1, pSrc1, iSrcStepx2MinusWidth + ADD pSrc, pSrc, iSrcStepx2MinusWidth + ORR iWidth, iWidth, temp, LSL #16 + BGT OuterLoopMVIsZero + MOV return, #OMX_Sts_NoErr + M_END + + ENDIF ;// ARM1136JS + + + END + +;//----------------------------------------------------------------------------------------------- +;// armVCM4P10_Interpolate_Chroma_asm ends +;//----------------------------------------------------------------------------------------------- + |