summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s')
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s339
1 files changed, 0 insertions, 339 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s
deleted file mode 100755
index 7420ad3..0000000
--- a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_chroma_hor_ver.s
+++ /dev/null
@@ -1,339 +0,0 @@
-; Copyright (C) 2009 The Android Open Source Project
-;
-; Licensed under the Apache License, Version 2.0 (the "License");
-; you may not use this file except in compliance with the License.
-; You may obtain a copy of the License at
-;
-; http://www.apache.org/licenses/LICENSE-2.0
-;
-; Unless required by applicable law or agreed to in writing, software
-; distributed under the License is distributed on an "AS IS" BASIS,
-; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-; See the License for the specific language governing permissions and
-; limitations under the License.
-
-;-------------------------------------------------------------------------------
-;--
-;-- Abstract : ARMv6 optimized version of h264bsdInterpolateChromaHorVer
-;-- function
-;--
-;-------------------------------------------------------------------------------
-
-
- IF :DEF: H264DEC_WINASM
- ;// We dont use REQUIRE8 and PRESERVE8 for winasm
- ELSE
- REQUIRE8
- PRESERVE8
- ENDIF
-
- AREA |.text|, CODE
-
-
-;// h264bsdInterpolateChromaHorVer register allocation
-
-ref RN 0
-ptrA RN 0
-
-mb RN 1
-block RN 1
-
-x0 RN 2
-count RN 2
-
-y0 RN 3
-valY RN 3
-
-width RN 4
-
-tmp4 RN 5
-height RN 5
-
-tmp1 RN 6
-
-tmp2 RN 7
-
-tmp3 RN 8
-
-valX RN 9
-
-tmp5 RN 10
-chrPW RN 10
-
-tmp6 RN 11
-chrPH RN 11
-
-xFrac RN 12
-
-c32 RN 14
-yFrac RN 14
-
-;// function exports and imports
-
- IMPORT h264bsdFillBlock
-
- EXPORT h264bsdInterpolateChromaHorVer
-
-;// Function arguments
-;//
-;// u8 *ref, : 0xc4
-;// u8 *predPartChroma, : 0xc8
-;// i32 x0, : 0xcc
-;// i32 y0, : 0xd0
-;// u32 width, : 0xf8
-;// u32 height, : 0xfc
-;// u32 xFrac, : 0x100
-;// u32 yFrac, : 0x104
-;// u32 chromaPartWidth, : 0x108
-;// u32 chromaPartHeight : 0x10c
-
-h264bsdInterpolateChromaHorVer
- STMFD sp!, {r0-r11,lr}
- SUB sp, sp, #0xc4
-
- LDR chrPW, [sp, #0x108] ;// chromaPartWidth
- LDR xFrac, [sp, #0x100] ;// xFrac
- LDR width, [sp, #0xf8] ;// width
- CMP x0, #0
- BLT do_fill
-
- ADD tmp1, x0, chrPW ;// tmp1 = x0+ chromaPartWidth
- ADD tmp1, tmp1, #1 ;// tmp1 = x0+ chromaPartWidth+1
- CMP tmp1, width ;// x0+chromaPartWidth+1 > width
- BHI do_fill
-
- CMP y0, #0
- BLT do_fill
- LDR chrPH, [sp, #0x10c] ;// chromaPartHeight
- LDR height, [sp, #0xfc] ;// height
- ADD tmp1, y0, chrPH ;// tmp1 = y0 + chromaPartHeight
- ADD tmp1, tmp1, #1 ;// tmp1 = y0 + chromaPartHeight + 1
- CMP tmp1, height
- BLS skip_fill
-
-do_fill
- LDR chrPH, [sp, #0x10c] ;// chromaPartHeight
- LDR height, [sp, #0xfc] ;// height
- ADD tmp3, chrPW, #1 ;// tmp3 = chromaPartWidth+1
- ADD tmp1, chrPW, #1 ;// tmp1 = chromaPartWidth+1
- ADD tmp2, chrPH, #1 ;// tmp2 = chromaPartHeight+1
- STMIA sp,{width,height,tmp1,tmp2,tmp3}
- ADD block, sp, #0x1c ;// block
- BL h264bsdFillBlock
-
- LDR x0, [sp, #0xcc]
- LDR y0, [sp, #0xd0]
- LDR ref, [sp, #0xc4] ;// ref
- STMIA sp,{width,height,tmp1,tmp2,tmp3}
- ADD block, sp, #0x1c ;// block
- MLA ref, height, width, ref ;// ref += width * height;
- MLA block, tmp2, tmp1, block;// block + (chromaPW+1)*(chromaPH+1)
- BL h264bsdFillBlock
-
- MOV x0, #0 ;// x0 = 0
- MOV y0, #0 ;// y0 = 0
- STR x0, [sp, #0xcc]
- STR y0, [sp, #0xd0]
- ADD ref, sp, #0x1c ;// ref = block
- STR ref, [sp, #0xc4] ;// ref
-
- STR tmp2, [sp, #0xfc] ;// height
- STR tmp1, [sp, #0xf8] ;// width
- MOV width, tmp1
-
-skip_fill
- MLA tmp3, y0, width, x0 ;// tmp3 = y0*width+x0
- LDR yFrac, [sp, #0x104] ;// yFrac
- LDR xFrac, [sp, #0x100]
- ADD ptrA, ref, tmp3 ;// ptrA = ref + y0*width+x0
- RSB valX, xFrac, #8 ;// valX = 8-xFrac
- RSB valY, yFrac, #8 ;// valY = 8-yFrac
-
- LDR mb, [sp, #0xc8] ;// predPartChroma
-
-
- ;// pack values to count register
- ;// [31:28] loop_x (chromaPartWidth-1)
- ;// [27:24] loop_y (chromaPartHeight-1)
- ;// [23:20] chromaPartWidth-1
- ;// [19:16] chromaPartHeight-1
- ;// [15:00] nothing
-
- SUB tmp2, chrPH, #1 ;// chromaPartHeight-1
- SUB tmp1, chrPW, #1 ;// chromaPartWidth-1
- ADD count, count, tmp2, LSL #16 ;// chromaPartHeight-1
- ADD count, count, tmp2, LSL #24 ;// loop_y
- ADD count, count, tmp1, LSL #20 ;// chromaPartWidth-1
- AND tmp2, count, #0x00F00000 ;// loop_x
- PKHBT valY, valY, yFrac, LSL #16 ;// |yFrac|valY |
- MOV c32, #32
-
-
- ;///////////////////////////////////////////////////////////////////////////
- ;// Cb
- ;///////////////////////////////////////////////////////////////////////////
-
- ;// 2x2 pels per iteration
- ;// bilinear vertical and horizontal interpolation
-
-loop1_y
- LDRB tmp1, [ptrA]
- LDRB tmp3, [ptrA, width]
- LDRB tmp5, [ptrA, width, LSL #1]
-
- PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1|
- PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3|
-
- SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac)
- SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac)
-
- ADD count, count, tmp2, LSL #8
-loop1_x
- ;// first
- LDRB tmp2, [ptrA, #1]!
- LDRB tmp4, [ptrA, width]
- LDRB tmp6, [ptrA, width, LSL #1]
-
- PKHBT tmp2, tmp2, tmp4, LSL #16 ;// |t4|t2|
- PKHBT tmp4, tmp4, tmp6, LSL #16 ;// |t6|t4|
-
- SMUAD tmp2, tmp2, valY ;// t2=(t2*valY + t4*yFrac)
- MLA tmp5, tmp1, valX, c32 ;// t5=t1*valX+32
- MLA tmp5, tmp2, xFrac, tmp5 ;// t5=t2*xFrac+t5
-
- SMUAD tmp4, tmp4, valY ;// t4=(t4*valY + t6*yFrac)
- MLA tmp6, tmp3, valX, c32 ;// t3=t3*valX+32
- MLA tmp6, tmp4, xFrac, tmp6 ;// t6=t4*xFrac+t6
-
- MOV tmp6, tmp6, LSR #6 ;// scale down
- STRB tmp6, [mb, #8] ;// store pixel
- MOV tmp5, tmp5, LSR #6 ;// scale down
- STRB tmp5, [mb], #1 ;// store pixel
-
- ;// second
- LDRB tmp1, [ptrA, #1]!
- LDRB tmp3, [ptrA, width]
- LDRB tmp5, [ptrA, width, LSL #1]
-
- PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1|
- PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3|
-
- SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac)
- MLA tmp5, tmp1, xFrac, c32 ;// t1=t1*xFrac+32
- MLA tmp5, tmp2, valX, tmp5 ;// t5=t2*valX+t5
-
- SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac)
- MLA tmp6, tmp3, xFrac, c32 ;// t3=t3*xFrac+32
- MLA tmp6, tmp4, valX, tmp6 ;// t6=t4*valX+t6
-
- MOV tmp6, tmp6, LSR #6 ;// scale down
- STRB tmp6, [mb, #8] ;// store pixel
- MOV tmp5, tmp5, LSR #6 ;// scale down
- STRB tmp5, [mb], #1 ;// store pixel
-
- SUBS count, count, #2<<28
- BCS loop1_x
-
- AND tmp2, count, #0x00F00000
-
- ADDS mb, mb, #16
- SBC mb, mb, tmp2, LSR #20
- ADD ptrA, ptrA, width, LSL #1
- SBC ptrA, ptrA, tmp2, LSR #20
-
- ADDS count, count, #0xE << 24
- BGE loop1_y
-
- ;///////////////////////////////////////////////////////////////////////////
- ;// Cr
- ;///////////////////////////////////////////////////////////////////////////
- LDR height, [sp,#0xfc] ;// height
- LDR ref, [sp, #0xc4] ;// ref
- LDR tmp1, [sp, #0xd0] ;// y0
- LDR tmp2, [sp, #0xcc] ;// x0
- LDR mb, [sp, #0xc8] ;// predPartChroma
-
- ADD tmp1, height, tmp1
- MLA tmp3, tmp1, width, tmp2
- ADD ptrA, ref, tmp3
- ADD mb, mb, #64
-
- AND count, count, #0x00FFFFFF
- AND tmp1, count, #0x000F0000
- ADD count, count, tmp1, LSL #8
- AND tmp2, count, #0x00F00000
-
- ;// 2x2 pels per iteration
- ;// bilinear vertical and horizontal interpolation
-loop2_y
- LDRB tmp1, [ptrA]
- LDRB tmp3, [ptrA, width]
- LDRB tmp5, [ptrA, width, LSL #1]
-
- PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1|
- PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3|
-
- SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac)
- SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac)
-
- ADD count, count, tmp2, LSL #8
-loop2_x
- ;// first
- LDRB tmp2, [ptrA, #1]!
- LDRB tmp4, [ptrA, width]
- LDRB tmp6, [ptrA, width, LSL #1]
-
- PKHBT tmp2, tmp2, tmp4, LSL #16 ;// |t4|t2|
- PKHBT tmp4, tmp4, tmp6, LSL #16 ;// |t6|t4|
-
- SMUAD tmp2, tmp2, valY ;// t2=(t2*valY + t4*yFrac)
- MLA tmp5, tmp1, valX, c32 ;// t5=t1*valX+32
- MLA tmp5, tmp2, xFrac, tmp5 ;// t5=t2*xFrac+t5
-
- SMUAD tmp4, tmp4, valY ;// t4=(t4*valY + t6*yFrac)
- MLA tmp6, tmp3, valX, c32 ;// t3=t3*valX+32
- MLA tmp6, tmp4, xFrac, tmp6 ;// t6=t4*xFrac+t6
-
- MOV tmp6, tmp6, LSR #6 ;// scale down
- STRB tmp6, [mb, #8] ;// store pixel
- MOV tmp5, tmp5, LSR #6 ;// scale down
- STRB tmp5, [mb], #1 ;// store pixel
-
- ;// second
- LDRB tmp1, [ptrA, #1]!
- LDRB tmp3, [ptrA, width]
- LDRB tmp5, [ptrA, width, LSL #1]
-
- PKHBT tmp1, tmp1, tmp3, LSL #16 ;// |t3|t1|
- PKHBT tmp3, tmp3, tmp5, LSL #16 ;// |t5|t3|
-
- SMUAD tmp1, tmp1, valY ;// t1=(t1*valY + t3*yFrac)
- MLA tmp5, tmp1, xFrac, c32 ;// t1=t1*xFrac+32
- MLA tmp5, tmp2, valX, tmp5 ;// t5=t2*valX+t5
-
- SMUAD tmp3, tmp3, valY ;// t3=(t3*valY + t5*yFrac)
- MLA tmp6, tmp3, xFrac, c32 ;// t3=t3*xFrac+32
- MLA tmp6, tmp4, valX, tmp6 ;// t6=t4*valX+t6
-
- MOV tmp6, tmp6, LSR #6 ;// scale down
- STRB tmp6, [mb, #8] ;// store pixel
- MOV tmp5, tmp5, LSR #6 ;// scale down
- STRB tmp5, [mb], #1 ;// store pixel
-
- SUBS count, count, #2<<28
- BCS loop2_x
-
- AND tmp2, count, #0x00F00000
-
- ADDS mb, mb, #16
- SBC mb, mb, tmp2, LSR #20
- ADD ptrA, ptrA, width, LSL #1
- SBC ptrA, ptrA, tmp2, LSR #20
-
- ADDS count, count, #0xE << 24
- BGE loop2_y
-
- ADD sp,sp,#0xd4
- LDMFD sp!,{r4-r11,pc}
-
- END