diff options
author | James Dong <jdong@google.com> | 2011-05-31 18:53:46 -0700 |
---|---|---|
committer | James Dong <jdong@google.com> | 2011-06-02 12:32:46 -0700 |
commit | 0c1bc742181ded4930842b46e9507372f0b1b963 (patch) | |
tree | c952bfcb03ff7cce5e0f91ad7d25c67a2fdd39cb /media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc | |
parent | 92a746c3b18d035189f596ce32847bf26247aaca (diff) | |
download | frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.zip frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.tar.gz frameworks_av-0c1bc742181ded4930842b46e9507372f0b1b963.tar.bz2 |
Initial-checkin for ON2 Software AVC/H264 decoder
o when neon is present, the performance gain of On2 AVC software decoder
over PV software decoder is more than 30%.
o In addition, it fixes some known PV software decoder issues like missing
output frames
o allow both pv and on2 software avc to be available for easy comparision
o change output frames from 8 to 16
Change-Id: I567ad1842025ead7092f0c47e3513d6d9ca232dd
Diffstat (limited to 'media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc')
6 files changed, 535 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S new file mode 100644 index 0000000..f39f5c4 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/asm_common.S @@ -0,0 +1,41 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ + + + + + .macro REQUIRE8 + .eabi_attribute 24, 1 + .endm + + .macro PRESERVE8 + .eabi_attribute 25, 1 + .endm + + + .macro function name, export=0 +.if \export + .global \name +.endif + .type \name, %function + .func \name +\name: + .endm + + .macro endfunction + .endfunc + .endm + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S new file mode 100644 index 0000000..c8a940e --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdClearMbLayer.S @@ -0,0 +1,68 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ + +#include "asm_common.S" + + preserve8 + + .fpu neon + .text + +/* Input / output registers */ +#define pMbLayer r0 +#define size r1 +#define pTmp r2 +#define step r3 + +/* -- NEON registers -- */ + +#define qZero Q0.U8 + +/*------------------------------------------------------------------------------ + + Function: h264bsdClearMbLayer + + Functional description: + + Inputs: + + Outputs: + + Returns: + +------------------------------------------------------------------------------*/ + +function h264bsdClearMbLayer, export=1 + + VMOV qZero, #0 + ADD pTmp, pMbLayer, #16 + MOV step, #32 + SUBS size, size, #64 + +loop: + VST1 {qZero}, [pMbLayer], step + SUBS size, size, #64 + VST1 {qZero}, [pTmp], step + VST1 {qZero}, [pMbLayer], step + VST1 {qZero}, [pTmp], step + BCS loop + + BX lr + +endfunction + + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S new file mode 100644 index 0000000..05253d0 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdCountLeadingZeros.S @@ -0,0 +1,48 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ +#include "asm_common.S" + + preserve8 + .arm + .text + + +/* Input / output registers */ +#define value r0 + +/* -- NEON registers -- */ + +/*------------------------------------------------------------------------------ + + Function: h264bsdCountLeadingZeros + + Functional description: + + Inputs: + + Outputs: + + Returns: + +------------------------------------------------------------------------------*/ + +function h264bsdCountLeadingZeros, export=1 + + CLZ value, value + BX lr + +endfunction + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S new file mode 100644 index 0000000..6955b9a --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFillRow7.S @@ -0,0 +1,143 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ + +#include "asm_common.S" + + preserve8 + + .fpu neon + .text + +/* Input / output registers */ + +#define ref r0 +#define fill r1 +#define left r2 +#define tmp2 r2 +#define center r3 +#define right r4 +#define tmp1 r5 + +/* -- NEON registers -- */ + +#define qTmp0 Q0.U8 +#define qTmp1 Q1.U8 +#define dTmp0 D0.U8 +#define dTmp1 D1.U8 +#define dTmp2 D2.U8 +#define dTmp3 D3.U8 + +/* +void h264bsdFillRow7(const u8 * ref, u8 * fill, i32 left, i32 center, + i32 right); +*/ + +function h264bsdFillRow7, export=1 + + PUSH {r4-r6,lr} + CMP left, #0 + LDR right, [sp,#0x10] + BEQ switch_center + LDRB tmp1, [ref,#0] + +loop_left: + SUBS left, left, #1 + STRB tmp1, [fill], #1 + BNE loop_left + +switch_center: + ASR tmp2,center,#2 + CMP tmp2,#9 + ADDCC pc,pc,tmp2,LSL #2 + B loop_center + B loop_center + B case_1 + B case_2 + B case_3 + B case_4 + B case_5 + B case_6 + B case_7 + B case_8 + +case_8: + VLD1 {qTmp0, qTmp1}, [ref]! + SUB center, center, #32 + VST1 {qTmp0}, [fill]! + VST1 {qTmp1}, [fill]! + B loop_center +case_7: + VLD1 {dTmp0,dTmp1,dTmp2}, [ref]! + SUB center, center, #28 + LDR tmp2, [ref], #4 + VST1 {dTmp0,dTmp1,dTmp2}, [fill]! + STR tmp2, [fill],#4 + B loop_center +case_6: + VLD1 {dTmp0,dTmp1,dTmp2}, [ref]! + SUB center, center, #24 + VST1 {dTmp0,dTmp1,dTmp2}, [fill]! + B loop_center +case_5: + VLD1 {qTmp0}, [ref]! + SUB center, center, #20 + LDR tmp2, [ref], #4 + VST1 {qTmp0}, [fill]! + STR tmp2, [fill],#4 + B loop_center +case_4: + VLD1 {qTmp0}, [ref]! + SUB center, center, #16 + VST1 {qTmp0}, [fill]! + B loop_center +case_3: + VLD1 {dTmp0}, [ref]! + SUB center, center, #12 + LDR tmp2, [ref], #4 + VST1 dTmp0, [fill]! + STR tmp2, [fill],#4 + B loop_center +case_2: + LDR tmp2, [ref],#4 + SUB center, center, #4 + STR tmp2, [fill], #4 +case_1: + LDR tmp2, [ref],#4 + SUB center, center, #4 + STR tmp2, [fill], #4 + +loop_center: + CMP center, #0 + BEQ jump + LDRB tmp2, [ref], #1 + SUB center, center, #1 + STRB tmp2, [fill], #1 + BNE loop_center +jump: + CMP right,#0 + POPEQ {r4-r6,pc} + LDRB tmp2, [ref,#-1] + +loop_right: + STRB tmp2, [fill], #1 + SUBS right, right, #1 + BNE loop_right + + POP {r4-r6,pc} + +endfunction + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S new file mode 100644 index 0000000..b3f3191 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdFlushBits.S @@ -0,0 +1,78 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ + +#include "asm_common.S" + + preserve8 + + .arm + .text + +/* Input / output registers */ +#define pStrmData r0 +#define numBits r1 +#define readBits r2 +#define strmBuffSize r3 +#define pStrmBuffStart r1 +#define pStrmCurrPos r2 +#define bitPosInWord r1 + +/* Input / output registers */ + + + +/*------------------------------------------------------------------------------ + + Function: h264bsdFlushBits + + Functional description: + + Inputs: + + Outputs: + + Returns: + +------------------------------------------------------------------------------*/ +function h264bsdFlushBits, export=1 + + LDR readBits, [pStrmData, #0x10] + LDR strmBuffSize, [pStrmData, #0xC] + + ADD readBits, readBits, numBits + AND bitPosInWord, readBits, #7 + + STR readBits, [pStrmData, #0x10] + STR bitPosInWord, [pStrmData, #0x8] + + LDR pStrmBuffStart, [pStrmData, #0x0] + + CMP readBits, strmBuffSize, LSL #3 + + BHI end_of_stream + + ADD pStrmCurrPos, pStrmBuffStart, readBits, LSR #3 + STR pStrmCurrPos, [pStrmData, #0x4] + MOV r0, #0 + BX lr + +end_of_stream: + MVN r0, #0 + BX lr + +endfunction + + diff --git a/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S new file mode 100644 index 0000000..495d560 --- /dev/null +++ b/media/libstagefright/codecs/on2/h264dec/source/arm_neon_asm_gcc/h264bsdWriteMacroblock.S @@ -0,0 +1,157 @@ +@ +@ Copyright (C) 2009 The Android Open Source Project +@ +@ Licensed under the Apache License, Version 2.0 (the "License"); +@ you may not use this file except in compliance with the License. +@ You may obtain a copy of the License at +@ +@ http://www.apache.org/licenses/LICENSE-2.0 +@ +@ Unless required by applicable law or agreed to in writing, software +@ distributed under the License is distributed on an "AS IS" BASIS, +@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +@ See the License for the specific language governing permissions and +@ limitations under the License. +@ + +#include "asm_common.S" + + require8 + preserve8 + + .arm + .fpu neon + .text + +/* Input / output registers */ +#define image r0 +#define data r1 +#define width r2 +#define luma r3 +#define cb r4 +#define cr r5 +#define cwidth r6 + +/* -- NEON registers -- */ + +#define qRow0 Q0.U8 +#define qRow1 Q1.U8 +#define qRow2 Q2.U8 +#define qRow3 Q3.U8 +#define qRow4 Q4.U8 +#define qRow5 Q5.U8 +#define qRow6 Q6.U8 +#define qRow7 Q7.U8 +#define qRow8 Q8.U8 +#define qRow9 Q9.U8 +#define qRow10 Q10.U8 +#define qRow11 Q11.U8 +#define qRow12 Q12.U8 +#define qRow13 Q13.U8 +#define qRow14 Q14.U8 +#define qRow15 Q15.U8 + +#define dRow0 D0.U8 +#define dRow1 D1.U8 +#define dRow2 D2.U8 +#define dRow3 D3.U8 +#define dRow4 D4.U8 +#define dRow5 D5.U8 +#define dRow6 D6.U8 +#define dRow7 D7.U8 +#define dRow8 D8.U8 +#define dRow9 D9.U8 +#define dRow10 D10.U8 +#define dRow11 D11.U8 +#define dRow12 D12.U8 +#define dRow13 D13.U8 +#define dRow14 D14.U8 +#define dRow15 D15.U8 + +/*------------------------------------------------------------------------------ + + Function: h264bsdWriteMacroblock + + Functional description: + Write one macroblock into the image. Both luma and chroma + components will be written at the same time. + + Inputs: + data pointer to macroblock data to be written, 256 values for + luma followed by 64 values for both chroma components + + Outputs: + image pointer to the image where the macroblock will be written + + Returns: + none + +------------------------------------------------------------------------------*/ + +function h264bsdWriteMacroblock, export=1 + PUSH {r4-r6,lr} + VPUSH {q4-q7} + + LDR width, [image, #4] + LDR luma, [image, #0xC] + LDR cb, [image, #0x10] + LDR cr, [image, #0x14] + + +@ Write luma + VLD1 {qRow0, qRow1}, [data]! + LSL width, width, #4 + VLD1 {qRow2, qRow3}, [data]! + LSR cwidth, width, #1 + VST1 {qRow0}, [luma,:128], width + VLD1 {qRow4, qRow5}, [data]! + VST1 {qRow1}, [luma,:128], width + VLD1 {qRow6, qRow7}, [data]! + VST1 {qRow2}, [luma,:128], width + VLD1 {qRow8, qRow9}, [data]! + VST1 {qRow3}, [luma,:128], width + VLD1 {qRow10, qRow11}, [data]! + VST1 {qRow4}, [luma,:128], width + VLD1 {qRow12, qRow13}, [data]! + VST1 {qRow5}, [luma,:128], width + VLD1 {qRow14, qRow15}, [data]! + VST1 {qRow6}, [luma,:128], width + + VLD1 {qRow0, qRow1}, [data]! ;//cb rows 0,1,2,3 + VST1 {qRow7}, [luma,:128], width + VLD1 {qRow2, qRow3}, [data]! ;//cb rows 4,5,6,7 + VST1 {qRow8}, [luma,:128], width + VLD1 {qRow4, qRow5}, [data]! ;//cr rows 0,1,2,3 + VST1 {qRow9}, [luma,:128], width + VLD1 {qRow6, qRow7}, [data]! ;//cr rows 4,5,6,7 + VST1 {qRow10}, [luma,:128], width + VST1 {dRow0}, [cb,:64], cwidth + VST1 {dRow8}, [cr,:64], cwidth + VST1 {qRow11}, [luma,:128], width + VST1 {dRow1}, [cb,:64], cwidth + VST1 {dRow9}, [cr,:64], cwidth + VST1 {qRow12}, [luma,:128], width + VST1 {dRow2}, [cb,:64], cwidth + VST1 {dRow10}, [cr,:64], cwidth + VST1 {qRow13}, [luma,:128], width + VST1 {dRow3}, [cb,:64], cwidth + VST1 {dRow11}, [cr,:64], cwidth + VST1 {qRow14}, [luma,:128], width + VST1 {dRow4}, [cb,:64], cwidth + VST1 {dRow12}, [cr,:64], cwidth + VST1 {qRow15}, [luma] + VST1 {dRow5}, [cb,:64], cwidth + VST1 {dRow13}, [cr,:64], cwidth + VST1 {dRow6}, [cb,:64], cwidth + VST1 {dRow14}, [cr,:64], cwidth + VST1 {dRow7}, [cb,:64] + VST1 {dRow15}, [cr,:64] + + VPOP {q4-q7} + POP {r4-r6,pc} +@ BX lr + + .endfunc + + + |