summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S
diff options
context:
space:
mode:
authorJames Dong <jdong@google.com>2011-05-31 18:53:46 -0700
committerJames Dong <jdong@google.com>2011-06-02 12:32:46 -0700
commit0c1bc742181ded4930842b46e9507372f0b1b963 (patch)
treec952bfcb03ff7cce5e0f91ad7d25c67a2fdd39cb /media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S
parent92a746c3b18d035189f596ce32847bf26247aaca (diff)
downloadframeworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.zip
frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.tar.gz
frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.tar.bz2
Initial-checkin for ON2 Software AVC/H264 decoder
o when neon is present, the performance gain of On2 AVC software decoder over PV software decoder is more than 30%. o In addition, it fixes some known PV software decoder issues like missing output frames o allow both pv and on2 software avc to be available for easy comparision o change output frames from 8 to 16 Change-Id: I567ad1842025ead7092f0c47e3513d6d9ca232dd
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S')
-rw-r--r--media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S76
1 files changed, 76 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S
new file mode 100644
index 0000000..a3a0715
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm_neon/vc/m4p10/src_gcc/omxVCM4P10_TransformDequantLumaDCFromPair_s.S
@@ -0,0 +1,76 @@
+/*
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ *
+ */
+
+ .eabi_attribute 24, 1
+ .eabi_attribute 25, 1
+
+ .arm
+ .fpu neon
+ .text
+
+ .global armVCM4P10_InvTransformDequantLumaDC4x4
+ .func armVCM4P10_InvTransformDequantLumaDC4x4
+armVCM4P10_InvTransformDequantLumaDC4x4:
+ PUSH {r4-r6,lr}
+ VPUSH {d8-d13}
+ VLD4.16 {d0,d1,d2,d3},[r0]
+ LDR r2, =armVCM4P10_QPDivTable
+ LDR r3, =armVCM4P10_VMatrixQPModTable
+ VADD.I16 d4,d0,d1
+ VADD.I16 d5,d2,d3
+ VSUB.I16 d6,d0,d1
+ LDRSB r4,[r2,r1]
+ VSUB.I16 d7,d2,d3
+ LDRSB r5,[r3,r1]
+ VADD.I16 d0,d4,d5
+ VSUB.I16 d1,d4,d5
+ VSUB.I16 d2,d6,d7
+ LSL r5,r5,r4
+ VADD.I16 d3,d6,d7
+ VTRN.16 d0,d1
+ VTRN.16 d2,d3
+ VTRN.32 q0,q1
+ VADD.I16 d4,d0,d1
+ VADD.I16 d5,d2,d3
+ VSUB.I16 d6,d0,d1
+ VSUB.I16 d7,d2,d3
+ VADD.I16 d0,d4,d5
+ VSUB.I16 d1,d4,d5
+ VSUB.I16 d2,d6,d7
+ VADD.I16 d3,d6,d7
+ VDUP.16 d5,r5
+ VMOV.I32 q3,#0x2
+ VMOV.I32 q4,#0x2
+ VMOV.I32 q5,#0x2
+ VMOV.I32 q6,#0x2
+ VMLAL.S16 q3,d0,d5
+ VMLAL.S16 q4,d1,d5
+ VMLAL.S16 q5,d2,d5
+ VMLAL.S16 q6,d3,d5
+ VSHRN.I32 d0,q3,#2
+ VSHRN.I32 d1,q4,#2
+ VSHRN.I32 d2,q5,#2
+ VSHRN.I32 d3,q6,#2
+ VST1.16 {d0,d1,d2,d3},[r0]
+ VPOP {d8-d13}
+ POP {r4-r6,pc}
+ .endfunc
+
+.global omxVCM4P10_TransformDequantLumaDCFromPair
+.func omxVCM4P10_TransformDequantLumaDCFromPair
+omxVCM4P10_TransformDequantLumaDCFromPair:
+ PUSH {r4-r6,lr}
+ MOV r4,r1
+ MOV r5,r2
+ BL armVCM4P10_UnpackBlock4x4
+ MOV r0,r4
+ MOV r1,r5
+ BL armVCM4P10_InvTransformDequantLumaDC4x4
+ MOV r0,#0
+ POP {r4-r6,pc}
+ .endfunc
+
+ .end
+