summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc
diff options
context:
space:
mode:
authorJames Dong <jdong@google.com>2011-05-31 18:53:46 -0700
committerJames Dong <jdong@google.com>2011-06-02 12:32:46 -0700
commit0c1bc742181ded4930842b46e9507372f0b1b963 (patch)
treec952bfcb03ff7cce5e0f91ad7d25c67a2fdd39cb /media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc
parent92a746c3b18d035189f596ce32847bf26247aaca (diff)
downloadframeworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.zip
frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.tar.gz
frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.tar.bz2
Initial-checkin for ON2 Software AVC/H264 decoder
o when neon is present, the performance gain of On2 AVC software decoder over PV software decoder is more than 30%. o In addition, it fixes some known PV software decoder issues like missing output frames o allow both pv and on2 software avc to be available for easy comparision o change output frames from 8 to 16 Change-Id: I567ad1842025ead7092f0c47e3513d6d9ca232dd
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc')
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S41
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S68
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S48
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S143
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S78
-rw-r--r--media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S157
6 files changed, 535 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S
new file mode 100644
index 0000000..f39f5c4
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S
@@ -0,0 +1,41 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+
+
+
+ .macro REQUIRE8
+ .eabi_attribute 24, 1
+ .endm
+
+ .macro PRESERVE8
+ .eabi_attribute 25, 1
+ .endm
+
+
+ .macro function name, export=0
+.if \export
+ .global \name
+.endif
+ .type \name, %function
+ .func \name
+\name:
+ .endm
+
+ .macro endfunction
+ .endfunc
+ .endm
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S
new file mode 100644
index 0000000..c8a940e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S
@@ -0,0 +1,68 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+ preserve8
+
+ .fpu neon
+ .text
+
+/* Input / output registers */
+#define pMbLayer r0
+#define size r1
+#define pTmp r2
+#define step r3
+
+/* -- NEON registers -- */
+
+#define qZero Q0.U8
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdClearMbLayer
+
+ Functional description:
+
+ Inputs:
+
+ Outputs:
+
+ Returns:
+
+------------------------------------------------------------------------------*/
+
+function h264bsdClearMbLayer, export=1
+
+ VMOV qZero, #0
+ ADD pTmp, pMbLayer, #16
+ MOV step, #32
+ SUBS size, size, #64
+
+loop:
+ VST1 {qZero}, [pMbLayer], step
+ SUBS size, size, #64
+ VST1 {qZero}, [pTmp], step
+ VST1 {qZero}, [pMbLayer], step
+ VST1 {qZero}, [pTmp], step
+ BCS loop
+
+ BX lr
+
+endfunction
+
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S
new file mode 100644
index 0000000..05253d0
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S
@@ -0,0 +1,48 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+#include "asm_common.S"
+
+ preserve8
+ .arm
+ .text
+
+
+/* Input / output registers */
+#define value r0
+
+/* -- NEON registers -- */
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdCountLeadingZeros
+
+ Functional description:
+
+ Inputs:
+
+ Outputs:
+
+ Returns:
+
+------------------------------------------------------------------------------*/
+
+function h264bsdCountLeadingZeros, export=1
+
+ CLZ value, value
+ BX lr
+
+endfunction
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S
new file mode 100644
index 0000000..6955b9a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S
@@ -0,0 +1,143 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+ preserve8
+
+ .fpu neon
+ .text
+
+/* Input / output registers */
+
+#define ref r0
+#define fill r1
+#define left r2
+#define tmp2 r2
+#define center r3
+#define right r4
+#define tmp1 r5
+
+/* -- NEON registers -- */
+
+#define qTmp0 Q0.U8
+#define qTmp1 Q1.U8
+#define dTmp0 D0.U8
+#define dTmp1 D1.U8
+#define dTmp2 D2.U8
+#define dTmp3 D3.U8
+
+/*
+void h264bsdFillRow7(const u8 * ref, u8 * fill, i32 left, i32 center,
+ i32 right);
+*/
+
+function h264bsdFillRow7, export=1
+
+ PUSH {r4-r6,lr}
+ CMP left, #0
+ LDR right, [sp,#0x10]
+ BEQ switch_center
+ LDRB tmp1, [ref,#0]
+
+loop_left:
+ SUBS left, left, #1
+ STRB tmp1, [fill], #1
+ BNE loop_left
+
+switch_center:
+ ASR tmp2,center,#2
+ CMP tmp2,#9
+ ADDCC pc,pc,tmp2,LSL #2
+ B loop_center
+ B loop_center
+ B case_1
+ B case_2
+ B case_3
+ B case_4
+ B case_5
+ B case_6
+ B case_7
+ B case_8
+
+case_8:
+ VLD1 {qTmp0, qTmp1}, [ref]!
+ SUB center, center, #32
+ VST1 {qTmp0}, [fill]!
+ VST1 {qTmp1}, [fill]!
+ B loop_center
+case_7:
+ VLD1 {dTmp0,dTmp1,dTmp2}, [ref]!
+ SUB center, center, #28
+ LDR tmp2, [ref], #4
+ VST1 {dTmp0,dTmp1,dTmp2}, [fill]!
+ STR tmp2, [fill],#4
+ B loop_center
+case_6:
+ VLD1 {dTmp0,dTmp1,dTmp2}, [ref]!
+ SUB center, center, #24
+ VST1 {dTmp0,dTmp1,dTmp2}, [fill]!
+ B loop_center
+case_5:
+ VLD1 {qTmp0}, [ref]!
+ SUB center, center, #20
+ LDR tmp2, [ref], #4
+ VST1 {qTmp0}, [fill]!
+ STR tmp2, [fill],#4
+ B loop_center
+case_4:
+ VLD1 {qTmp0}, [ref]!
+ SUB center, center, #16
+ VST1 {qTmp0}, [fill]!
+ B loop_center
+case_3:
+ VLD1 {dTmp0}, [ref]!
+ SUB center, center, #12
+ LDR tmp2, [ref], #4
+ VST1 dTmp0, [fill]!
+ STR tmp2, [fill],#4
+ B loop_center
+case_2:
+ LDR tmp2, [ref],#4
+ SUB center, center, #4
+ STR tmp2, [fill], #4
+case_1:
+ LDR tmp2, [ref],#4
+ SUB center, center, #4
+ STR tmp2, [fill], #4
+
+loop_center:
+ CMP center, #0
+ BEQ jump
+ LDRB tmp2, [ref], #1
+ SUB center, center, #1
+ STRB tmp2, [fill], #1
+ BNE loop_center
+jump:
+ CMP right,#0
+ POPEQ {r4-r6,pc}
+ LDRB tmp2, [ref,#-1]
+
+loop_right:
+ STRB tmp2, [fill], #1
+ SUBS right, right, #1
+ BNE loop_right
+
+ POP {r4-r6,pc}
+
+endfunction
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S
new file mode 100644
index 0000000..b3f3191
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S
@@ -0,0 +1,78 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+ preserve8
+
+ .arm
+ .text
+
+/* Input / output registers */
+#define pStrmData r0
+#define numBits r1
+#define readBits r2
+#define strmBuffSize r3
+#define pStrmBuffStart r1
+#define pStrmCurrPos r2
+#define bitPosInWord r1
+
+/* Input / output registers */
+
+
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdFlushBits
+
+ Functional description:
+
+ Inputs:
+
+ Outputs:
+
+ Returns:
+
+------------------------------------------------------------------------------*/
+function h264bsdFlushBits, export=1
+
+ LDR readBits, [pStrmData, #0x10]
+ LDR strmBuffSize, [pStrmData, #0xC]
+
+ ADD readBits, readBits, numBits
+ AND bitPosInWord, readBits, #7
+
+ STR readBits, [pStrmData, #0x10]
+ STR bitPosInWord, [pStrmData, #0x8]
+
+ LDR pStrmBuffStart, [pStrmData, #0x0]
+
+ CMP readBits, strmBuffSize, LSL #3
+
+ BHI end_of_stream
+
+ ADD pStrmCurrPos, pStrmBuffStart, readBits, LSR #3
+ STR pStrmCurrPos, [pStrmData, #0x4]
+ MOV r0, #0
+ BX lr
+
+end_of_stream:
+ MVN r0, #0
+ BX lr
+
+endfunction
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S
new file mode 100644
index 0000000..495d560
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S
@@ -0,0 +1,157 @@
+@
+@ Copyright (C) 2009 The Android Open Source Project
+@
+@ Licensed under the Apache License, Version 2.0 (the "License");
+@ you may not use this file except in compliance with the License.
+@ You may obtain a copy of the License at
+@
+@ http://www.apache.org/licenses/LICENSE-2.0
+@
+@ Unless required by applicable law or agreed to in writing, software
+@ distributed under the License is distributed on an "AS IS" BASIS,
+@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+@ See the License for the specific language governing permissions and
+@ limitations under the License.
+@
+
+#include "asm_common.S"
+
+ require8
+ preserve8
+
+ .arm
+ .fpu neon
+ .text
+
+/* Input / output registers */
+#define image r0
+#define data r1
+#define width r2
+#define luma r3
+#define cb r4
+#define cr r5
+#define cwidth r6
+
+/* -- NEON registers -- */
+
+#define qRow0 Q0.U8
+#define qRow1 Q1.U8
+#define qRow2 Q2.U8
+#define qRow3 Q3.U8
+#define qRow4 Q4.U8
+#define qRow5 Q5.U8
+#define qRow6 Q6.U8
+#define qRow7 Q7.U8
+#define qRow8 Q8.U8
+#define qRow9 Q9.U8
+#define qRow10 Q10.U8
+#define qRow11 Q11.U8
+#define qRow12 Q12.U8
+#define qRow13 Q13.U8
+#define qRow14 Q14.U8
+#define qRow15 Q15.U8
+
+#define dRow0 D0.U8
+#define dRow1 D1.U8
+#define dRow2 D2.U8
+#define dRow3 D3.U8
+#define dRow4 D4.U8
+#define dRow5 D5.U8
+#define dRow6 D6.U8
+#define dRow7 D7.U8
+#define dRow8 D8.U8
+#define dRow9 D9.U8
+#define dRow10 D10.U8
+#define dRow11 D11.U8
+#define dRow12 D12.U8
+#define dRow13 D13.U8
+#define dRow14 D14.U8
+#define dRow15 D15.U8
+
+/*------------------------------------------------------------------------------
+
+ Function: h264bsdWriteMacroblock
+
+ Functional description:
+ Write one macroblock into the image. Both luma and chroma
+ components will be written at the same time.
+
+ Inputs:
+ data pointer to macroblock data to be written, 256 values for
+ luma followed by 64 values for both chroma components
+
+ Outputs:
+ image pointer to the image where the macroblock will be written
+
+ Returns:
+ none
+
+------------------------------------------------------------------------------*/
+
+function h264bsdWriteMacroblock, export=1
+ PUSH {r4-r6,lr}
+ VPUSH {q4-q7}
+
+ LDR width, [image, #4]
+ LDR luma, [image, #0xC]
+ LDR cb, [image, #0x10]
+ LDR cr, [image, #0x14]
+
+
+@ Write luma
+ VLD1 {qRow0, qRow1}, [data]!
+ LSL width, width, #4
+ VLD1 {qRow2, qRow3}, [data]!
+ LSR cwidth, width, #1
+ VST1 {qRow0}, [luma,:128], width
+ VLD1 {qRow4, qRow5}, [data]!
+ VST1 {qRow1}, [luma,:128], width
+ VLD1 {qRow6, qRow7}, [data]!
+ VST1 {qRow2}, [luma,:128], width
+ VLD1 {qRow8, qRow9}, [data]!
+ VST1 {qRow3}, [luma,:128], width
+ VLD1 {qRow10, qRow11}, [data]!
+ VST1 {qRow4}, [luma,:128], width
+ VLD1 {qRow12, qRow13}, [data]!
+ VST1 {qRow5}, [luma,:128], width
+ VLD1 {qRow14, qRow15}, [data]!
+ VST1 {qRow6}, [luma,:128], width
+
+ VLD1 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3
+ VST1 {qRow7}, [luma,:128], width
+ VLD1 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7
+ VST1 {qRow8}, [luma,:128], width
+ VLD1 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3
+ VST1 {qRow9}, [luma,:128], width
+ VLD1 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7
+ VST1 {qRow10}, [luma,:128], width
+ VST1 {dRow0}, [cb,:64], cwidth
+ VST1 {dRow8}, [cr,:64], cwidth
+ VST1 {qRow11}, [luma,:128], width
+ VST1 {dRow1}, [cb,:64], cwidth
+ VST1 {dRow9}, [cr,:64], cwidth
+ VST1 {qRow12}, [luma,:128], width
+ VST1 {dRow2}, [cb,:64], cwidth
+ VST1 {dRow10}, [cr,:64], cwidth
+ VST1 {qRow13}, [luma,:128], width
+ VST1 {dRow3}, [cb,:64], cwidth
+ VST1 {dRow11}, [cr,:64], cwidth
+ VST1 {qRow14}, [luma,:128], width
+ VST1 {dRow4}, [cb,:64], cwidth
+ VST1 {dRow12}, [cr,:64], cwidth
+ VST1 {qRow15}, [luma]
+ VST1 {dRow5}, [cb,:64], cwidth
+ VST1 {dRow13}, [cr,:64], cwidth
+ VST1 {dRow6}, [cb,:64], cwidth
+ VST1 {dRow14}, [cr,:64], cwidth
+ VST1 {dRow7}, [cb,:64]
+ VST1 {dRow15}, [cr,:64]
+
+ VPOP {q4-q7}
+ POP {r4-r6,pc}
+@ BX lr
+
+ .endfunc
+
+
+