summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s')
-rwxr-xr-xmedia/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s163
1 files changed, 163 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s
new file mode 100755
index 0000000..a81aed7
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm11_asm/h264bsd_interpolate_mid_hor.s
@@ -0,0 +1,163 @@
+; Copyright (C) 2009 The Android Open Source Project
+;
+; Licensed under the Apache License, Version 2.0 (the "License");
+; you may not use this file except in compliance with the License.
+; You may obtain a copy of the License at
+;
+; http://www.apache.org/licenses/LICENSE-2.0
+;
+; Unless required by applicable law or agreed to in writing, software
+; distributed under the License is distributed on an "AS IS" BASIS,
+; WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+; See the License for the specific language governing permissions and
+; limitations under the License.
+
+;-------------------------------------------------------------------------------
+;--
+;-- Abstract : ARMv6 optimized version horizontal part of
+;-- h264bsdInterpolateMid functions
+;--
+;-------------------------------------------------------------------------------
+
+
+ IF :DEF: H264DEC_WINASM
+ ;// We dont use REQUIRE8 and PRESERVE8 for winasm
+ ELSE
+ REQUIRE8
+ PRESERVE8
+ ENDIF
+
+ AREA |.text|, CODE
+
+
+;// Register allocation
+
+ref RN 0 ;// pointer to current position in reference image
+mb RN 1 ;// pointer to current position in interpolated mb
+count RN 2 ;// bit-packed width and count values
+
+x_2_0 RN 4
+x_3_1 RN 5
+x_6_4 RN 6
+x_7_5 RN 7
+
+tmp1 RN 8
+tmp2 RN 9
+tmp3 RN 10
+tmp4 RN 11
+
+mult_20_01 RN 12 ;// [20, 1]
+mult_20_m5 RN 14 ;// [20, -5]
+
+
+ EXPORT h264bsdInterpolateMidHorPart
+
+;// Horizontal filter approach
+;//
+;// Basic idea in horizontal filtering is to adjust coefficients
+;// like below. Calculation is done with 16-bit maths.
+;//
+;// Reg x_2_0 x_3_1 x_6_4 x_7_5 x_2_0
+;// [ 2 0 ] [ 3 1 ] [ 6 4 ] [ 7 5 ] [ 10 8 ] ...
+;// y_0 = 20 1 20 -5 -5 1
+;// y_1 = -5 20 1 1 20 -5
+;// y_2 = 1 -5 -5 20 1 20
+;// y_3 = 1 20 -5 -5 20 1
+
+
+h264bsdInterpolateMidHorPart
+ STMFD sp!, {r4-r11, lr}
+
+ ;// pack values to count register
+ ;// [31:28] loop_x (partWidth-1)
+ ;// [27:24] loop_y (partHeight-1)
+ ;// [23:20] partWidth-1
+ ;// [19:16] partHeight-1
+ ;// [15:00] width
+
+
+ LDR mult_20_01, = 0x00140001
+ LDR mult_20_m5, = 0x0014FFFB
+ AND tmp3, count, #0x000F0000 ;// partWidth-1
+loop_y
+ LDR x_3_1, [ref, #-8]
+ ADD count, count, tmp3, LSL #12
+ LDR x_7_5, [ref, #-4]
+ UXTB16 x_2_0, x_3_1
+ UXTB16 x_3_1, x_3_1, ROR #8
+ UXTB16 x_6_4, x_7_5
+
+loop_x
+ UXTB16 x_7_5, x_7_5, ROR #8
+
+ SMUAD tmp1, x_2_0, mult_20_01
+ SMULTB tmp2, x_2_0, mult_20_m5
+ SMULTB tmp3, x_2_0, mult_20_01
+ SMULTB tmp4, x_3_1, mult_20_01
+
+ SMLAD tmp1, x_3_1, mult_20_m5, tmp1
+ SMLAD tmp2, x_3_1, mult_20_01, tmp2
+ SMLATB tmp3, x_3_1, mult_20_m5, tmp3
+ LDR x_3_1, [ref], #4
+ SMLAD tmp4, x_6_4, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_6_4, mult_20_m5, tmp1
+ SMLADX tmp2, x_6_4, mult_20_01, tmp2
+ SMLADX tmp3, x_6_4, mult_20_m5, tmp3
+ SMLADX tmp4, x_7_5, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_7_5, mult_20_01, tmp1
+ SMLABB tmp2, x_7_5, mult_20_m5, tmp2
+ UXTB16 x_2_0, x_3_1
+ SMLADX tmp3, x_7_5, mult_20_01, tmp3
+ SMLABB tmp4, x_2_0, mult_20_01, tmp4
+
+ SUBS count, count, #4<<28
+ STR tmp1, [mb], #4
+ STR tmp2, [mb], #4
+ STR tmp3, [mb], #4
+ STR tmp4, [mb], #4
+ BCC next_y
+
+ UXTB16 x_3_1, x_3_1, ROR #8
+
+ SMUAD tmp1, x_6_4, mult_20_01
+ SMULTB tmp2, x_6_4, mult_20_m5
+ SMULTB tmp3, x_6_4, mult_20_01
+ SMULTB tmp4, x_7_5, mult_20_01
+
+ SMLAD tmp1, x_7_5, mult_20_m5, tmp1
+ SMLAD tmp2, x_7_5, mult_20_01, tmp2
+ SMLATB tmp3, x_7_5, mult_20_m5, tmp3
+ LDR x_7_5, [ref], #4
+ SMLAD tmp4, x_2_0, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_2_0, mult_20_m5, tmp1
+ SMLADX tmp2, x_2_0, mult_20_01, tmp2
+ SMLADX tmp3, x_2_0, mult_20_m5, tmp3
+ SMLADX tmp4, x_3_1, mult_20_m5, tmp4
+
+ SMLABB tmp1, x_3_1, mult_20_01, tmp1
+ SMLABB tmp2, x_3_1, mult_20_m5, tmp2
+ UXTB16 x_6_4, x_7_5
+ SMLADX tmp3, x_3_1, mult_20_01, tmp3
+ SMLABB tmp4, x_6_4, mult_20_01, tmp4
+
+ SUBS count, count, #4<<28
+ STR tmp1, [mb], #4
+ STR tmp2, [mb], #4
+ STR tmp3, [mb], #4
+ STR tmp4, [mb], #4
+ BCS loop_x
+
+next_y
+ AND tmp3, count, #0x000F0000 ;// partWidth-1
+ SMLABB ref, count, mult_20_01, ref ;// +width
+ SBC ref, ref, tmp3, LSR #16 ;// -(partWidth-1)-1
+ ADDS count, count, #(1<<28)-(1<<20)
+ BGE loop_y
+
+ LDMFD sp!, {r4-r11, pc}
+
+ END
+