18 files changed, 3682 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s
new file mode 100644
index 0000000..7801e57
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Clip8_s.s
@@ -0,0 +1,75 @@
+; /**
+; * 
+; * File Name:  armVCM4P2_Clip8_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains module for Clipping 16 bit value to [0,255] Range
+; */ 
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      
+
+      M_VARIANTS ARM1136JS
+
+        
+     
+      IF ARM1136JS
+ 
+;//Input Arguments
+
+pSrc                 RN 0
+pDst                 RN 1
+step                 RN 2
+
+;// Local variables
+
+x0                   RN 3
+x1                   RN 4
+x2                   RN 5
+x3                   RN 6
+
+Count                RN 14
+     
+        
+        M_START armVCM4P2_Clip8,r6
+       
+
+        MOV          Count,#8
+loop
+
+        LDMIA        pSrc!,{x0, x1}
+        SUBS         Count,Count, #1          ;// count down
+        LDMIA        pSrc!,{x2, x3}
+        USAT16       x0, #8, x0                 ;// clip two samples to [0,255]
+        USAT16       x1, #8, x1                 ;// clip two samples to [0,255]
+        STRB         x0, [pDst]
+        MOV          x0, x0, LSR #16
+        STRB         x0, [pDst,#1]
+        STRB         x1, [pDst,#2]
+        MOV          x1, x1, LSR #16
+        STRB         x1, [pDst,#3]
+                
+        USAT16       x2, #8, x2                 ;// clip two samples to [0,255]
+        USAT16       x3, #8, x3                 ;// clip two samples to [0,255]
+        STRB         x2, [pDst,#4]
+        MOV          x2, x2, LSR #16
+        STRB         x2, [pDst,#5]
+        STRB         x3, [pDst,#6]
+        MOV          x3, x3, LSR #16
+        STRB         x3, [pDst,#7]
+        ADD          pDst,pDst,step             ;// Increment pDst by step value
+         
+        BGT          loop                       ;// Continue loop until Count reaches 64 
+
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
new file mode 100644
index 0000000..9e30900
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
@@ -0,0 +1,398 @@
+;/**
+; * 
+; * File Name:  armVCM4P2_DecodeVLCZigzag_AC_unsafe_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter, intra block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_AC_unsafe
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan 
+; *
+; * 
+; *
+; * 
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+
+
+
+     IF ARM1136JS
+     
+        
+
+
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+shortVideoHeader     RN 3
+
+
+;//Local Variables
+
+Return               RN 0
+
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+
+ftype                RN 0
+temp3                RN 4
+temp                 RN 5
+Count                RN 6
+Escape               RN 5
+
+;// armVCM4P2_FillVLDBuffer
+zigzag               RN 0
+storeLevel           RN 1
+temp2                RN 4
+temp1                RN 5
+sign                 RN 5
+Last                 RN 7
+storeRun             RN 14
+
+
+packRetIndex         RN 5
+
+
+markerbit            RN 5
+
+;// Scratch Registers
+
+RBitStream           RN 8
+RBitBuffer           RN 9
+RBitCount            RN 10
+
+T1                   RN 11
+T2                   RN 12
+LR                   RN 14        
+        
+
+
+        M_ALLOC4        pppBitStream,4
+        M_ALLOC4        ppOffset,4
+        M_ALLOC4        pLinkRegister,4       
+        
+        M_START armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+        ;// get the table addresses from stack       
+        M_ARG           ppVlcTableL0L1,4
+        M_ARG           ppLMAXTableL0L1,4
+        M_ARG           ppRMAXTableL0L1,4
+        M_ARG           ppZigzagTable,4
+        
+        ;// Store ALL zeros at pDst
+        
+        MOV             temp1,#0                                        ;// Initialize Count to zero                                
+        MOV             Last,#0
+        M_STR           LR,pLinkRegister                                ;// Store Link Register on Stack
+        MOV             temp2,#0
+        MOV             LR,#0          
+        
+        ;// Initialize the Macro and Store all zeros to pDst 
+  
+        STM             pDst!,{temp2,temp1,Last,LR}                   
+        M_BD_INIT0      ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount  
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_BD_INIT1      T1, T2, T2
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_BD_INIT2      T1, T2, T2
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_STR           ppBitStream,pppBitStream                        ;// Store ppBitstream on stack                         
+        STM             pDst!,{temp2,temp1,Last,LR}
+        M_STR           pBitOffset,ppOffset                             ;// Store pBitOffset on stack
+        STM             pDst!,{temp2,temp1,Last,LR}
+        
+        STM             pDst!,{temp2,temp1,Last,LR}
+        STM             pDst!,{temp2,temp1,Last,LR}
+ 
+        
+        SUB             pDst,pDst,#128                                  ;// Restore pDst
+
+        ;// The armVCM4P2_GetVLCBits begins
+
+getVLCbits
+        
+        M_BD_LOOK8      Escape,7                                        ;// Load Escape Value
+        LSR             Escape,Escape,#25                                                  
+        CMP             Escape,#3                                       ;// check for escape mode
+        MOVNE           ftype,#0
+        BNE             notEscapemode                                   ;// Branch if not in Escape mode 3
+
+        M_BD_VSKIP8     #7,T1
+        CMP             shortVideoHeader,#0                             ;// Check shortVideoHeader flag to know the type of Escape mode
+        BEQ             endFillVLD                                       
+        
+        ;// Escape Mode 4
+
+        M_BD_READ8      Last,1,T1
+        M_BD_READ8      storeRun,6,T1
+        M_BD_READ8      storeLevel,8,T1
+
+           
+        ;// Check whether the Reserved values for Level are used and Exit with an Error Message if it is so
+
+        TEQ             storeLevel,#0
+        TEQNE           storeLevel,#128                    
+        BEQ             ExitError
+
+        ADD             temp2,storeRun,Count
+        CMP             temp2,#64
+        BGE             ExitError                                       ;// error if Count+storeRun >= 64
+        
+        
+        ;// Load address of zigzagTable
+        
+        M_LDR           pZigzagTable,ppZigzagTable                      ;// Loading the Address of Zigzag table
+               
+                
+        ;// armVCM4P2_FillVLDBuffer
+                
+        SXTB            storeLevel,storeLevel                           ;// Sign Extend storeLevel to 32 bits
+                              
+        
+        ;// To Reflect Runlength
+
+        ADD             Count,Count,storeRun
+        LDRB            zigzag,[pZigzagTable,Count]
+        ADD             Count,Count,#1
+        STRH            storeLevel,[pDst,zigzag]                        ;// store Level
+              
+        B               ExitOk
+       
+        
+
+endFillVLD
+        
+               
+        ;// Load Ftype( Escape Mode) value based on the two successive bits in the bitstream
+     
+        M_BD_READ8      temp1,1,T1           
+        CMP             temp1,#0    
+        MOVEQ           ftype,#1
+        BEQ             notEscapemode
+        M_BD_READ8      temp1,1,T1
+        CMP             temp1,#1
+        MOVEQ           ftype,#3
+        MOVNE           ftype,#2
+        
+
+notEscapemode
+
+        ;// Load optimized packed VLC table with last=0 and Last=1
+        
+        M_LDR           pVlcTableL0L1,ppVlcTableL0L1                    ;// Load Combined VLC Table
+                
+       
+        CMP             ftype,#3                                        ;// If ftype >=3 get perform Fixed Length Decoding (Escape Mode 3)
+        BGE             EscapeMode3                                     ;// Else continue normal VLC Decoding
+        
+        ;// Variable lengh decoding, "armUnPackVLC32" 
+        
+        
+        M_BD_VLD        packRetIndex,T1,T2,pVlcTableL0L1,4,2
+        
+        
+        LDR             temp3,=0xFFF
+        
+        CMP             packRetIndex,temp3                              ;// Check for invalid symbol
+        BEQ             ExitError                                       ;// if invalid symbol occurs exit with an error message
+        
+        AND             Last,packRetIndex,#2                            ;// Get Last from packed Index
+              
+         
+        
+
+        LSR             storeRun,packRetIndex,#7                        ;// Get Run Value from Packed index
+        AND             storeLevel,packRetIndex,#0x7c                   ;// storeLevel=packRetIndex[2-6],storeLevel[0-1]=0 
+                                                                        
+     
+        M_LDR           pLMAXTableL0L1,ppLMAXTableL0L1                  ;// Load LMAX table
+              
+       
+        LSR             storeLevel,storeLevel,#2                        ;// Level value
+
+        CMP             ftype,#1                                    
+        BNE             ftype2
+        
+        ;// ftype==1; Escape mode =1
+          
+        
+        ADD            temp1, pLMAXTableL0L1, Last, LSL#4              ;// If the Last=1 add 32 to table address
+        LDRB            temp1,[temp1,storeRun]
+
+       
+        ADD             storeLevel,temp1,storeLevel                     
+
+ftype2
+
+        ;// ftype =2; Escape mode =2
+        
+        M_LDR           pRMAXTableL0L1,ppRMAXTableL0L1                  ;// Load RMAX Table 
+                
+        CMP             ftype,#2
+        BNE             FillVLDL1
+                  
+        ADD            temp1, pRMAXTableL0L1, Last, LSL#4               ;// If Last=1 add 32 to table address
+        SUB             temp2,storeLevel,#1
+        LDRB            temp1,[temp1,temp2]
+
+       
+        ADD             storeRun,storeRun,#1
+        ADD             storeRun,temp1
+        
+FillVLDL1        
+            
+                
+        ;// armVCM4P2_FillVLDBuffer
+
+        M_LDR           pZigzagTable,ppZigzagTable                     ;// Load address of zigzagTable 
+                
+        M_BD_READ8      sign,1,T1
+
+        CMP             sign,#1
+        RSBEQ           storeLevel,storeLevel,#0
+ 
+        ADD             temp1,storeRun,Count                           ;// Exit with an error message if Run + Count exceeds 63
+        CMP             temp1,#64
+        BGE             ExitError
+
+      
+        
+        
+              
+        
+        ;// To Reflect Runlenght
+
+        ADD             Count,Count,storeRun
+ 
+storeLevelL1
+        
+        LDRB            zigzag,[pZigzagTable,Count]
+        CMP             Last,#2                                         ;// Check if the Level val is Last non zero val
+        ADD             Count,Count,#1
+        LSR             Last,Last,#1
+        STRH            storeLevel,[pDst,zigzag]                  
+           
+        BNE             end
+        
+        B               ExitOk
+ 
+
+
+        ;// Fixed Lengh Decoding Escape Mode 3
+
+EscapeMode3
+
+        M_BD_READ8      Last,1,T1
+        M_BD_READ8      storeRun,6,T1
+        
+        ADD             temp2,storeRun,Count                            ;// Exit with an error message if Run + Count exceeds 63
+        CMP             temp2,#64
+        BGE             ExitError
+
+        M_BD_READ8      markerbit,1,T1
+        TEQ             markerbit,#0                                    ;// Exit with an error message if marker bit is zero
+        BEQ             ExitError
+        
+        M_BD_READ16     storeLevel,12,T1
+
+        TST             storeLevel,#0x800                               ;// test if the level is negative
+        SUBNE           storeLevel,storeLevel,#4096
+        CMP             storeLevel,#0
+        CMPNE           storeLevel,#-2048
+        BEQ             ExitError                                       ;// Exit with an error message if Level==0 or  -2048 
+
+        M_LDR           pZigzagTable,ppZigzagTable                      ;// Load address of zigzagTable
+              
+        M_BD_READ8      markerbit,1,T1
+           
+
+        ;// armVCM4P2_FillVLDBuffer ( Sign not used as storeLevel is preprocessed)
+            
+               
+
+        ;// To Reflect Run Length
+
+        ADD             Count,Count,storeRun
+
+
+ 
+storeLevelLast
+        
+        LDRB            zigzag,[pZigzagTable,Count]
+        CMP             Last,#1
+        ADD             Count,Count,#1
+        STRH            storeLevel,[pDst,zigzag]                          
+                
+        BNE             end 
+      
+        B               ExitOk
+        
+end
+
+        CMP             Count,#64                                       ;//Run the Loop untill Count reaches 64
+
+        BLT             getVLCbits
+
+        
+ExitOk
+        ;// Exit When VLC Decoding is done Successfully 
+   
+        ;// Loading ppBitStream and pBitOffset from stack
+        
+        CMP             Last,#1
+        M_LDR           ppBitStream,pppBitStream
+        M_LDR           pBitOffset,ppOffset
+
+        ;//Ending the macro
+
+        M_BD_FINI       ppBitStream,pBitOffset
+             
+        MOVEQ           Return,#OMX_Sts_NoErr
+        MOVNE           Return,#OMX_Sts_Err
+        M_LDR           LR,pLinkRegister                               ;// Load the Link Register Back
+        B               exit2
+
+ExitError
+        ;// Exit When an Error occurs 
+
+        M_LDR           ppBitStream,pppBitStream
+        M_LDR           pBitOffset,ppOffset
+        ;//Ending the macro
+
+        M_BD_FINI       ppBitStream,pBitOffset
+        M_LDR           LR,pLinkRegister
+        MOV             Return,#OMX_Sts_Err
+
+exit2
+       
+
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
new file mode 100644
index 0000000..ba4d058
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Huff_Tables_VLC.c
@@ -0,0 +1,211 @@
+ /**
+ * 
+ * File Name:  armVCM4P2_Huff_Tables_VLC.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_Huff_Tables_VLC.c
+ * Description: Contains all the Huffman tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+#include "armCOMM_Bitstream.h"
+
+
+
+
+// Contains optimized and Packed VLC tables with Last=0 and Last=1
+
+//              optimized Packed VLC table Entry Format 
+//              ---------------------------------------
+// 
+//        15 14 13 12 11 10 09 08 07 06 05 04 03 02 01 00
+//       +------------------------------------------------+
+//       |  Len   |       Run       |     Level    |L | 1 |
+//       +------------------------------------------------+
+//       |                Offset                      | 0 |
+//       +------------------------------------------------+
+// If the table entry is a leaf entry then bit 0 set:
+//    Len    = Number of bits overread  (0 to 7)  3 bits
+//    Run    = RunLength of the Symbol  (0 to 63) 6 bits
+//    Level  = Level of the Symbol      (0 to 31) 5 bits
+//    L      = Last Value of the Symbol (0 or 1)  1 bit
+//
+// If the table entry is an internal node then bit 0 is clear:
+//    Offset = Number of (16-bit) half words from the table
+//             start to the next table node
+//
+// The table is accessed by successive lookup up on the
+// next Step bits of the input bitstream until a leaf node
+// is obtained. The Step sizes are supplied to the VLD macro.
+
+// The VLC tables used for Intra and non inta coefficients in non Escape mode
+// contains symbols with both Last=0 and Last=1.
+// If a symbol is not found in the table it will be coded as 0xFFF
+ 
+
+const OMX_U16 armVCM4P2_InterVlcL0L1[200] = {
+    0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x1b09,
+    0x4009, 0x4009, 0x4009, 0x4009, 0x2109, 0x2109, 0x0209, 0x0011,
+    0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+    0x3fff, 0x3fff, 0x0038, 0x0040, 0x2115, 0x2115, 0x201d, 0x201d,
+    0x2059, 0x2059, 0x2051, 0x2051, 0x1c0d, 0x1b0d, 0x1a0d, 0x190d,
+    0x0911, 0x0811, 0x0711, 0x0611, 0x0511, 0x0319, 0x0219, 0x0121,
+    0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+    0x2061, 0x2061, 0x2129, 0x2129, 0x3709, 0x3709, 0x3809, 0x3809,
+    0x3d0d, 0x3d0d, 0x3e0d, 0x3e0d, 0x3f0d, 0x3f0d, 0x200d, 0x200d,
+    0x0098, 0x00a0, 0x00a8, 0x00b0, 0x0131, 0x0221, 0x0419, 0x0519,
+    0x0619, 0x0a11, 0x1909, 0x1a09, 0x210d, 0x220d, 0x230d, 0x240d,
+    0x250d, 0x260d, 0x270d, 0x280d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+    0x0049, 0x0041, 0x380d, 0x380d, 0x370d, 0x370d, 0x360d, 0x360d,
+    0x350d, 0x350d, 0x340d, 0x340d, 0x330d, 0x330d, 0x320d, 0x320d,
+    0x00e8, 0x00f0, 0x00f8, 0x0100, 0x310d, 0x310d, 0x2015, 0x2015,
+    0x3609, 0x3609, 0x3509, 0x3509, 0x3409, 0x3409, 0x3309, 0x3309,
+    0x3209, 0x3209, 0x3109, 0x3109, 0x0110, 0x0130, 0x0138, 0x0140,
+    0x0118, 0x0120, 0x0128, 0x100d, 0x3009, 0x3009, 0x2f09, 0x2f09,
+    0x2411, 0x2411, 0x2311, 0x2311, 0x2039, 0x2039, 0x2031, 0x2031,
+    0x0f0d, 0x0e0d, 0x0d0d, 0x0c0d, 0x0b0d, 0x0a0d, 0x090d, 0x0e09,
+    0x0d09, 0x0211, 0x0119, 0x0029, 0x0150, 0x0158, 0x0160, 0x0168,
+    0x280d, 0x280d, 0x270d, 0x270d, 0x260d, 0x260d, 0x250d, 0x250d,
+    0x2c09, 0x2c09, 0xb759, 0xb759, 0x2a09, 0x2a09, 0x2021, 0x2021,
+    0x040d, 0x030d, 0x0b35, 0x010d, 0x0909, 0x0809, 0x0709, 0x0609,
+    0x0111, 0x0019, 0x2509, 0x2509, 0x2409, 0x2409, 0x2309, 0x2309
+};
+
+
+const OMX_U16 armVCM4P2_IntraVlcL0L1[200] = {
+    0x0020, 0x0108, 0x0148, 0x0170, 0x0178, 0x0180, 0x0188, 0x0f09,
+    0x4009, 0x4009, 0x4009, 0x4009, 0x2011, 0x2011, 0x0109, 0x0019,
+    0x0028, 0x0060, 0x00b8, 0x00e0, 0x0030, 0x0048, 0x0050, 0x0058,
+    0x3fff, 0x3fff, 0x0038, 0x0040, 0x203d, 0x203d, 0x2035, 0x2035,
+    0x20b1, 0x20b1, 0x20a9, 0x20a9, 0x0215, 0x011d, 0x002d, 0x0d09,
+    0x0519, 0x0811, 0x0419, 0x0321, 0x0221, 0x0139, 0x00a1, 0x0099,
+    0x0068, 0x0090, 0x3fff, 0x3fff, 0x0070, 0x0078, 0x0080, 0x0088,
+    0x20b9, 0x20b9, 0x20c1, 0x20c1, 0x2141, 0x2141, 0x2911, 0x2911,
+    0x2315, 0x2315, 0x2415, 0x2415, 0x2f0d, 0x2f0d, 0x300d, 0x300d,
+    0x0098, 0x00a0, 0x00a8, 0x00b0, 0x00c9, 0x00d1, 0x00d9, 0x0149,
+    0x0619, 0x0151, 0x0229, 0x0719, 0x0e09, 0x0045, 0x0515, 0x0615,
+    0x110d, 0x120d, 0x130d, 0x140d, 0x00c0, 0x00c8, 0x00d0, 0x00d8,
+    0x0091, 0x0089, 0x2e0d, 0x2e0d, 0x2d0d, 0x2d0d, 0x2c0d, 0x2c0d,
+    0x2b0d, 0x2b0d, 0x2a0d, 0x2a0d, 0x2115, 0x2115, 0x2025, 0x2025,
+    0x00e8, 0x00f0, 0x00f8, 0x0100, 0x2c09, 0x2c09, 0x2b09, 0x2b09,
+    0x2711, 0x2711, 0x2611, 0x2611, 0x2511, 0x2511, 0x2319, 0x2319,
+    0x2219, 0x2219, 0x2131, 0x2131, 0x0110, 0x0130, 0x0138, 0x0140,
+    0x0118, 0x0120, 0x0128, 0x080d, 0x2129, 0x2129, 0x2081, 0x2081,
+    0x2411, 0x2411, 0x2079, 0x2079, 0x2071, 0x2071, 0x2069, 0x2069,
+    0x1bb5, 0x060d, 0x001d, 0xd3f9, 0x0909, 0x0809, 0x090d, 0x0311,
+    0x0121, 0x0061, 0x0059, 0x0051, 0x0150, 0x0158, 0x0160, 0x0168,
+    0x240d, 0x240d, 0x230d, 0x230d, 0x2609, 0x2609, 0x250d, 0x250d,
+    0x2709, 0x2709, 0x2211, 0x2211, 0x2119, 0x2119, 0x2049, 0x2049,
+    0x0015, 0x0509, 0x020d, 0x010d, 0x0409, 0x0309, 0x0041, 0x0039,
+    0x0111, 0x0031, 0x2209, 0x2209, 0x2029, 0x2029, 0x2021, 0x2021
+};
+
+const OMX_U16 armVCM4P2_aIntraDCLumaChromaIndex[64] = {
+    0x0020, 0x000b, 0x2009, 0x2009, 0x2007, 0x2007, 0x2001, 0x2001,
+    0x4005, 0x4005, 0x4005, 0x4005, 0x4003, 0x4003, 0x4003, 0x4003,
+    0x0028, 0x000f, 0x200d, 0x200d, 0x0030, 0x0013, 0x2011, 0x2011,
+    0x0038, 0x0017, 0x2015, 0x2015, 0x3fff, 0x3fff, 0x2019, 0x2019,
+
+	0x0020, 0x0009, 0x2007, 0x2007, 0x4005, 0x4005, 0x4005, 0x4005,
+    0x4003, 0x4003, 0x4003, 0x4003, 0x4001, 0x4001, 0x4001, 0x4001,
+    0x0028, 0x000d, 0x200b, 0x200b, 0x0030, 0x0011, 0x200f, 0x200f,
+    0x0038, 0x0015, 0x2013, 0x2013, 0x1fff, 0x0019, 0x2017, 0x2017
+};
+
+
+const OMX_U16 armVCM4P2_aVlcMVD[124] = {
+    0x0010, 0x00f0, 0x0043, 0x003f, 0x4041, 0x4041, 0x4041, 0x4041,
+    0x0018, 0x00d8, 0x0047, 0x003b, 0x0020, 0x0080, 0x00a8, 0x00d0,
+    0x0028, 0x0048, 0x0070, 0x0078, 0x1fff, 0x0030, 0x0038, 0x0040,
+    0x0081, 0x0001, 0x007f, 0x0003, 0x207d, 0x207d, 0x2005, 0x2005,
+    0x207b, 0x207b, 0x2007, 0x2007, 0x0050, 0x0058, 0x0060, 0x0068,
+    0x2079, 0x2079, 0x2009, 0x2009, 0x2077, 0x2077, 0x200b, 0x200b,
+    0x2075, 0x2075, 0x200d, 0x200d, 0x2073, 0x2073, 0x200f, 0x200f,
+    0x0071, 0x0011, 0x006f, 0x0013, 0x006d, 0x0015, 0x006b, 0x0017,
+    0x0088, 0x0090, 0x0098, 0x00a0, 0x0069, 0x0019, 0x0067, 0x001b,
+    0x0065, 0x001d, 0x0063, 0x001f, 0x0061, 0x0021, 0x005f, 0x0023,
+    0x005d, 0x0025, 0x005b, 0x0027, 0x00b0, 0x00b8, 0x00c0, 0x00c8,
+    0x0059, 0x0029, 0x0057, 0x002b, 0x2055, 0x2055, 0x202d, 0x202d,
+    0x2053, 0x2053, 0x202f, 0x202f, 0x2051, 0x2051, 0x2031, 0x2031,
+    0x204f, 0x204f, 0x2033, 0x2033, 0x00e0, 0x00e8, 0x0049, 0x0039,
+    0x204d, 0x204d, 0x2035, 0x2035, 0x204b, 0x204b, 0x2037, 0x2037,
+    0x2045, 0x2045, 0x203d, 0x203d
+};
+
+/* LMAX table for non Inter (Last == 0 and Last=1)
+   Level - 1 Indexed
+   padded armVCM4P2_InterL0L1LMAX[27-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_InterL0L1LMAX[73] = 
+{
+   12,  6,  4,  3,  3,  3,  3,  2, 
+    2,  2,  2,  1,  1,  1,  1,  1,
+    1,  1,  1,  1,  1,  1,  1,  1,
+    1,  1,  1,  0,  0,  0,  0,  0,
+    3,  2,  1,  1,  1,  1,  1,  1, 
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1
+};
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+   Level - 1 Indexed 
+ padded armVCM4P2_InterL0L1RMAX[12-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_InterL0L1RMAX[35] = 
+{
+   26, 10,  6,  2,  1,  1,   
+    0,  0,  0,  0,  0,  0,
+	0,	0,	0,	0,	0,	0,
+	0,	0,	0,	0,	0,	0,
+	0,	0,	0,	0,
+    0,  0,  0,  0,  40,  1,  0
+};
+
+/* LMAX table for non Intra (Last == 0 and Last=1)
+   Level - 1 Indexed
+   padded armVCM4P2_IntraL0L1LMAX[15-31] with zeros to acess entries for Last=1 effectively
+
+*/
+const OMX_U8 armVCM4P2_IntraL0L1LMAX[53] = 
+{
+   27, 10,  5,  4,  3,  3,  3,  
+    3,  2,  2,  1,  1,  1,  1,  1,	0,
+	0,	0,	0,	0,	0,	0,	0,	0,
+	0,	0,	0,	0,	0,	0,	0,	0,
+
+	8,  3,  2,  2,  2,  2,  2,  1, 
+	1,  1,  1,  1,  1,  1,  1,  1,
+	1,  1,  1,  1,  1
+};
+
+
+/* RMAX table for non Inter (Last == 0 and Last=1)
+   Level - 1 Indexed 
+ padded armVCM4P2_IntraL0L1RMAX[27-31] with zeros to access entries for Last=1 table effectively */
+
+
+const OMX_U8 armVCM4P2_IntraL0L1RMAX[40] =
+{
+   14,  9,  7,  3,  2,  1,	1,  
+    1,  1,  1,  0,  0,  0, 	0,  
+    0,  0,  0,  0,  0,  0,  0,  
+    0,  0,  0,  0,  0,  0,  0,
+	0,	0,	0,	0,
+	
+	20,  6,  1,  0,  0,  0,  0,  0
+
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
new file mode 100644
index 0000000..25cf8db
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Lookup_Tables.c
@@ -0,0 +1,75 @@
+ /**
+ * 
+ * File Name:  armVCM4P2_Lookup_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_Lookup_Tables.c
+ * Description: Contains all the Lookup tables used in MPEG4 codec
+ *
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+
+    /* * Table Entries contain Dc Scaler values
+       * armVCM4P2_DCScaler[i]= 8           for i=1  to  4 and i=33 to 36
+       *                      = 2*i         for i=5  to  8
+       *                      = i+8         for i=9  to  25
+       *                      = 2*i-16      for i=26 to  31
+       *                      = (i-32+13)/2 for i=37 to  59
+       *                      = i-6-32      for i=60 to  63
+       *                      = 255         for i=0 and i=32
+       */
+       
+const OMX_U8 armVCM4P2_DCScaler[64]={
+	0xff, 0x8,  0x8,  0x8,  0x8,  0xa,  0xc,  0xe,  
+    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 
+    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+    0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e,
+    0xff, 0x8,  0x8,  0x8,  0x8,  0x9,  0x9,  0xa,  
+    0xa,  0xb,  0xb,  0xc,  0xc,  0xd,  0xd,  0xe,  
+    0xe,  0xf,  0xf,  0x10, 0x10, 0x11, 0x11, 0x12, 
+    0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
+
+};
+
+              
+     /*  Table Entries Contain reciprocal of 1 to 63
+      *  armVCM4P2_Reciprocal_QP_S16[i]=round(32767/i)
+      *  armVCM4P2_Reciprocal_QP_S16[0]= 0
+      */
+
+const OMX_S16 armVCM4P2_Reciprocal_QP_S16[64]={
+	0x0000,0x7fff,0x4000,0x2aaa,0x2000,0x1999,0x1555,0x1249,
+    0x1000,0x0e39,0x0ccd,0x0ba3,0x0aab,0x09d9,0x0925,0x0888,
+    0x0800,0x0787,0x071c,0x06bd,0x0666,0x0618,0x05d1,0x0591,
+    0x0555,0x051f,0x04ec,0x04be,0x0492,0x046a,0x0444,0x0421,
+    0x0400,0x03e1,0x03c4,0x03a8,0x038e,0x0376,0x035e,0x0348,
+    0x0333,0x031f,0x030c,0x02fa,0x02e9,0x02d8,0x02c8,0x02b9,
+    0x02ab,0x029d,0x028f,0x0282,0x0276,0x026a,0x025f,0x0254,
+    0x0249,0x023f,0x0235,0x022b,0x0222,0x0219,0x0211,0x0208
+	   
+};
+     
+      /* Table Entries Contain reciprocal of 1 to 63
+       * armVCM4P2_Reciprocal_QP_S32[i]=round(131071/i)
+       * armVCM4P2_Reciprocal_QP_S32[0]= 0
+       */
+
+const OMX_S32 armVCM4P2_Reciprocal_QP_S32[64]={
+	0x00000000,0x0001ffff,0x00010000,0x0000aaaa, 0x00008000, 0x00006666, 0x00005555, 0x00004924,
+    0x00004000,0x000038e3,0x00003333,0x00002e8c, 0x00002aab, 0x00002762, 0x00002492, 0x00002222,
+    0x00002000,0x00001e1e,0x00001c72,0x00001af2, 0x0000199a, 0x00001861, 0x00001746, 0x00001643,
+    0x00001555,0x0000147b,0x000013b1,0x000012f6, 0x00001249, 0x000011a8, 0x00001111, 0x00001084,
+    0x00001000,0x00000f84,0x00000f0f,0x00000ea1, 0x00000e39, 0x00000dd6, 0x00000d79, 0x00000d21,
+    0x00000ccd,0x00000c7d,0x00000c31,0x00000be8, 0x00000ba3, 0x00000b61, 0x00000b21, 0x00000ae5,
+    0x00000aab,0x00000a73,0x00000a3d,0x00000a0a, 0x000009d9, 0x000009a9, 0x0000097b, 0x0000094f,
+    0x00000925,0x000008fb,0x000008d4,0x000008ae, 0x00000889, 0x00000865, 0x00000842, 0x00000820
+	
+};
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
new file mode 100644
index 0000000..3f92d85
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_SetPredDir_s.s
@@ -0,0 +1,104 @@
+;//
+;// 
+;// File Name:  armVCM4P2_SetPredDir_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+; **
+; * Function: armVCM4P2_SetPredDir
+; *
+; * Description:
+; * Performs detecting the prediction direction
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] blockIndex  block index indicating the component type and
+; *                          position as defined in subclause 6.1.3.8, of ISO/IEC
+; *                          14496-2. Furthermore, indexes 6 to 9 indicate the
+; *                          alpha blocks spatially corresponding to luminance
+; *                          blocks 0 to 3 in the same macroblock.
+; * [in] pCoefBufRow pointer to the coefficient row buffer
+; * [in] pQpBuf      pointer to the quantization parameter buffer
+; * [out]predQP      quantization parameter of the predictor block
+; * [out]predDir     indicates the prediction direction which takes one
+; *                  of the following values:
+; *                  OMX_VC_HORIZONTAL    predict horizontally
+; *                  OMX_VC_VERTICAL      predict vertically
+; *
+; * Return Value:
+; * Standard OMXResult result. See enumeration for possible result codes.
+; *
+; */
+
+       INCLUDE omxtypes_s.h
+       INCLUDE armCOMM_s.h
+       INCLUDE omxVC_s.h
+
+
+       M_VARIANTS ARM1136JS
+
+
+       IF ARM1136JS
+ 
+;// Input Arguments
+BlockIndex         RN 0
+pCoefBufRow        RN 1
+pCoefBufCol        RN 2
+predDir            RN 3
+predQP             RN 4
+pQpBuf             RN 5
+
+;// Local Variables
+
+Return             RN 0
+blockDCLeft        RN 6  
+blockDCTop         RN 7
+blockDCTopLeft     RN 8
+temp1              RN 9
+temp2              RN 14
+
+       M_START    armVCM4P2_SetPredDir,r9
+
+       M_ARG       ppredQP,4
+       M_ARG       ppQpBuf,4
+    
+       LDRH        blockDCTopLeft,[pCoefBufRow,#-16]
+       LDRH        blockDCLeft,[pCoefBufCol]
+       
+       TEQ         BlockIndex,#3
+       LDREQH      blockDCTop,[pCoefBufCol,#-16]
+       LDRNEH      blockDCTop,[pCoefBufRow]
+             
+       SUBS        temp1,blockDCLeft,blockDCTopLeft
+       RSBLT       temp1,temp1,#0
+       SUBS        temp2,blockDCTopLeft,blockDCTop
+       RSBLT       temp2,temp2,#0
+      
+       M_LDR       pQpBuf,ppQpBuf
+       M_LDR       predQP,ppredQP
+       CMP         temp1,temp2
+       MOV         temp2,#OMX_VC_VERTICAL
+       LDRLTB      temp1,[pQpBuf,#1]
+       STRLT       temp2,[predDir]
+       STRLT       temp1,[predQP]
+       MOV         temp2,#OMX_VC_HORIZONTAL           
+       LDRGEB      temp1,[pQpBuf]
+       STRGE       temp2,[predDir]
+       MOV         Return,#OMX_Sts_NoErr
+       STRGE       temp1,[predQP] 
+
+         
+    
+       M_END
+ 
+       ENDIF
+
+       END    
+    
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
new file mode 100644
index 0000000..ed17f9b
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/armVCM4P2_Zigzag_Tables.c
@@ -0,0 +1,61 @@
+/**
+ * 
+ * File Name:  armVCM4P2_Zigzag_Tables.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * File:        armVCM4P2_ZigZag_Tables.c
+ * Description: Contains the zigzag tables
+ *
+ */
+
+#include "omxtypes.h"
+
+/* Contains Double the values in the reference Zigzag Table
+ * Contains Classical,Vetical and Horizontal Zigzagscan tables in one array  
+ */
+
+const OMX_U8 armVCM4P2_aClassicalZigzagScan [192] = 
+{
+     0,  2,  16, 32,  18,  4,  6, 20,
+    34, 48, 64, 50, 36, 22,  8,  10,
+    24, 38, 52, 66, 80, 96, 82, 68,
+    54, 40, 26,  12,  14, 28, 42, 56, 
+    70, 84, 98, 112, 114, 100, 86, 72,
+    58, 44, 30, 46, 60, 74, 88, 102,
+    116, 118, 104, 90, 76, 62, 78, 92,
+    106, 120, 122, 104, 94, 110, 124, 126,
+
+	0,  16, 32, 48,  2,  18,  4, 20,
+    34, 50, 64, 80, 96, 112, 114, 98,
+    82, 66, 52, 36,  6, 22,  8, 24,
+    38, 54, 68, 84, 100, 116, 70, 86,
+    102, 118, 40, 56,  10, 26,  12, 28,
+    42, 58, 72, 88, 104, 120, 74, 90, 
+    106, 122, 44, 60,  14, 30, 46, 62,
+    76, 92, 108, 124, 78, 94, 110, 126,
+
+    0,  2,  4,  6,  16,  18, 32, 34,
+    20, 22,  8,  10,  12,  14, 30, 28,
+    26, 24, 38, 36, 48, 50, 64, 66,
+    52, 54, 40, 42, 44, 46, 56, 58,
+    60, 62, 68, 70, 80, 82, 96, 98,
+    84, 86, 72, 74, 76, 78, 88, 90, 
+    92, 94, 100, 102, 112, 114, 116, 118,
+    104, 106, 108, 110, 120, 122, 124, 126
+
+
+};
+
+
+
+
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
new file mode 100644
index 0000000..b63d295
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Inter.c
@@ -0,0 +1,102 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeBlockCoef_Inter.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for inter reconstruction
+ * 
+ */
+ 
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Inter
+ *
+ * Description:
+ * Decodes the INTER block coefficients. Inverse quantization, inversely zigzag
+ * positioning and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results (residuals) are placed in a contiguous array
+ * of 64 elements. For INTER block, the output buffer holds the residuals for
+ * further reconstruction.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte in
+ *								the bit stream buffer. There is no boundary
+ *								check for the bit stream buffer.
+ * [in]	pBitOffset		pointer to the bit position in the byte pointed
+ *								to by *ppBitStream. *pBitOffset is valid within
+ *								[0-7]
+ * [in]	QP				quantization parameter
+ * [in] shortVideoHeader    a flag indicating presence of short_video_header;
+ *                           shortVideoHeader==1 indicates using quantization method defined in short
+ *                           video header mode, and shortVideoHeader==0 indicates normail quantization method.
+ * [out] ppBitStream 	*ppBitStream is updated after the block is decoded, so that it points to the
+ *                      current byte in the bit stream buffer.
+ * [out] pBitOffset		*pBitOffset is updated so that it points to the current bit position in the
+ *                      byte pointed by *ppBitStream
+ * [out] pDst			pointer to the decoded residual buffer (a contiguous array of 64 elements of
+ *                      OMX_S16 data type). Must be 16-byte aligned.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *   - At least one of the following pointers is Null: ppBitStream, *ppBitStream, pBitOffset , pDst
+ *   - At least one of the below case:
+ *   - *pBitOffset exceeds [0,7], QP <= 0;
+ *	 - pDst not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+OMXResult omxVCM4P2_DecodeBlockCoef_Inter(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT * pBitOffset,
+     OMX_S16 * pDst,
+     OMX_INT QP,
+     OMX_INT shortVideoHeader
+)
+{
+    /* 64 elements are needed but to align it to 16 bytes need
+    15 more elements of padding */
+    OMX_S16 tempBuf[79];
+    OMX_S16 *pTempBuf1;
+    OMXResult errorCode;
+    /* Aligning the local buffers */
+    pTempBuf1 = armAlignTo16Bytes(tempBuf);
+    
+    
+    /* VLD and zigzag */
+    errorCode = omxVCM4P2_DecodeVLCZigzag_Inter(ppBitStream, pBitOffset, 
+                                        pTempBuf1,shortVideoHeader);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Dequantization */
+    errorCode = omxVCM4P2_QuantInvInter_I(
+     pTempBuf1,
+     QP);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Inverse transform */
+    errorCode = omxVCM4P2_IDCT8x8blk(pTempBuf1, pDst);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+	    
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
new file mode 100644
index 0000000..c609a60
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeBlockCoef_Intra.c
@@ -0,0 +1,208 @@
+/**
+ * 
+ * File Name:  omxVCM4P2_DecodeBlockCoef_Intra.c
+ * OpenMAX DL: v1.0.2
+ * Revision:   9641
+ * Date:       Thursday, February 7, 2008
+ * 
+ * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+ * 
+ * 
+ *
+ * Description: 
+ * Contains modules for intra reconstruction
+ * 
+ */
+
+#include "omxtypes.h"
+#include "armOMX.h"
+#include "omxVC.h"
+
+#include "armCOMM.h"
+#include "armVC.h"
+
+/**
+ * Function: omxVCM4P2_DecodeBlockCoef_Intra
+ *
+ * Description:
+ * Decodes the INTRA block coefficients. Inverse quantization, inversely zigzag
+ * positioning, and IDCT, with appropriate clipping on each step, are performed
+ * on the coefficients. The results are then placed in the output frame/plane on
+ * a pixel basis. For INTRA block, the output values are clipped to [0, 255] and
+ * written to corresponding block buffer within the destination plane.
+ *
+ * Remarks:
+ *
+ * Parameters:
+ * [in]	ppBitStream		pointer to the pointer to the current byte in
+ *								the bit stream buffer. There is no boundary
+ *								check for the bit stream buffer.
+ * [in]	pBitOffset		pointer to the bit position in the byte pointed
+ *								to by *ppBitStream. *pBitOffset is valid within
+ *								[0-7].
+ * [in]	step			width of the destination plane
+ * [in/out]	pCoefBufRow		[in]  pointer to the coefficient row buffer
+ *                        [out] updated coefficient rwo buffer
+ * [in/out]	pCoefBufCol		[in]  pointer to the coefficient column buffer
+ *                        [out] updated coefficient column buffer
+ * [in]	curQP			quantization parameter of the macroblock which
+ *								the current block belongs to
+ * [in]	pQpBuf		 Pointer to a 2-element QP array. pQpBuf[0] holds the QP of the 8x8 block left to
+ *                   the current block(QPa). pQpBuf[1] holds the QP of the 8x8 block just above the
+ *                   current block(QPc).
+ *                   Note, in case the corresponding block is out of VOP bound, the QP value will have
+ *                   no effect to the intra-prediction process. Refer to subclause  "7.4.3.3 Adaptive
+ *                   ac coefficient prediction" of ISO/IEC 14496-2(MPEG4 Part2) for accurate description.
+ * [in]	blockIndex		block index indicating the component type and
+ *								position as defined in subclause 6.1.3.8,
+ *								Figure 6-5 of ISO/IEC 14496-2. 
+ * [in]	intraDCVLC		a code determined by intra_dc_vlc_thr and QP.
+ *								This allows a mechanism to switch between two VLC
+ *								for coding of Intra DC coefficients as per Table
+ *								6-21 of ISO/IEC 14496-2. 
+ * [in]	ACPredFlag		a flag equal to ac_pred_flag (of luminance) indicating
+ *								if the ac coefficients of the first row or first
+ *								column are differentially coded for intra coded
+ *								macroblock.
+ * [in] shortVideoHeader    a flag indicating presence of short_video_header;
+ *                           shortVideoHeader==1 selects linear intra DC mode,
+ *							and shortVideoHeader==0 selects nonlinear intra DC mode.
+ * [out]	ppBitStream		*ppBitStream is updated after the block is
+ *								decoded, so that it points to the current byte
+ *								in the bit stream buffer
+ * [out]	pBitOffset		*pBitOffset is updated so that it points to the
+ *								current bit position in the byte pointed by
+ *								*ppBitStream
+ * [out]	pDst			pointer to the block in the destination plane.
+ *								pDst should be 16-byte aligned.
+ * [out]	pCoefBufRow		pointer to the updated coefficient row buffer.
+ *
+ * Return Value:
+ * OMX_Sts_NoErr - no error
+ * OMX_Sts_BadArgErr - bad arguments
+ *   -	At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset,
+ *                                                      pCoefBufRow, pCoefBufCol, pQPBuf, pDst.
+ *      or
+ *   -  At least one of the below case: *pBitOffset exceeds [0,7], curQP exceeds (1, 31),
+ *      blockIndex exceeds [0,9], step is not the multiple of 8, intraDCVLC is zero while
+ *      blockIndex greater than 5.
+ *      or
+ *   -	pDst is not 16-byte aligned
+ * OMX_Sts_Err - status error
+ *
+ */
+
+OMXResult omxVCM4P2_DecodeBlockCoef_Intra(
+     const OMX_U8 ** ppBitStream,
+     OMX_INT *pBitOffset,
+     OMX_U8 *pDst,
+     OMX_INT step,
+     OMX_S16 *pCoefBufRow,
+     OMX_S16 *pCoefBufCol,
+     OMX_U8 curQP,
+     const OMX_U8 *pQPBuf,
+     OMX_INT blockIndex,
+     OMX_INT intraDCVLC,
+     OMX_INT ACPredFlag,
+	 OMX_INT shortVideoHeader
+ )
+{
+    OMX_S16 tempBuf1[79], tempBuf2[79];
+    OMX_S16 *pTempBuf1, *pTempBuf2;
+    OMX_INT predDir, predACDir;
+    OMX_INT  predQP;
+    OMXVCM4P2VideoComponent videoComp;
+    OMXResult errorCode;
+    
+    
+    /* Aligning the local buffers */
+    pTempBuf1 = armAlignTo16Bytes(tempBuf1);
+    pTempBuf2 = armAlignTo16Bytes(tempBuf2);
+    
+    /* Setting the AC prediction direction and prediction direction */
+    armVCM4P2_SetPredDir(
+        blockIndex,
+        pCoefBufRow,
+        pCoefBufCol,
+        &predDir,
+        &predQP,
+        pQPBuf);
+
+    predACDir = predDir;
+
+    
+    if (ACPredFlag == 0)
+    {
+        predACDir = OMX_VC_NONE;
+    }
+
+    /* Setting the videoComp */
+    if (blockIndex <= 3)
+    {
+        videoComp = OMX_VC_LUMINANCE;
+    }
+    else
+    {
+        videoComp = OMX_VC_CHROMINANCE;
+    }
+    
+
+    /* VLD and zigzag */
+    if (intraDCVLC == 1)
+    {
+        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraDCVLC(
+            ppBitStream,
+            pBitOffset,
+            pTempBuf1,
+            predACDir,
+            shortVideoHeader,
+            videoComp);
+        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    }
+    else
+    {
+        errorCode = omxVCM4P2_DecodeVLCZigzag_IntraACVLC(
+            ppBitStream,
+            pBitOffset,
+            pTempBuf1,
+            predACDir,
+            shortVideoHeader);
+        armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    }
+
+    /* AC DC prediction */
+    errorCode = omxVCM4P2_PredictReconCoefIntra(
+        pTempBuf1,
+        pCoefBufRow,
+        pCoefBufCol,
+        curQP,
+        predQP,
+        predDir,
+        ACPredFlag,
+        videoComp);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Dequantization */
+    errorCode = omxVCM4P2_QuantInvIntra_I(
+     pTempBuf1,
+     curQP,
+     videoComp,
+     shortVideoHeader);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Inverse transform */
+    errorCode = omxVCM4P2_IDCT8x8blk (pTempBuf1, pTempBuf2);
+    armRetDataErrIf((errorCode != OMX_Sts_NoErr), errorCode);
+    
+    /* Placing the linear array into the destination plane and clipping
+       it to 0 to 255 */
+    
+	armVCM4P2_Clip8(pTempBuf2,pDst,step);
+	
+	
+    return OMX_Sts_NoErr;
+}
+
+/* End of file */
+
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
new file mode 100644
index 0000000..a1861da
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodePadMV_PVOP_s.s
@@ -0,0 +1,364 @@
+; **********
+; * 
+; * File Name:  omxVCM4P2_DecodePadMV_PVOP_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; * 
+; **
+; * Function: omxVCM4P2_DecodePadMV_PVOP
+; *
+; * Description:
+; * Decodes and pads four motion vectors of the non-intra macroblock in P-VOP.
+; * The motion vector padding process is specified in subclause 7.6.1.6 of
+; * ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                            the bit stream buffer
+; * [in]    pBitOffset         pointer to the bit position in the byte pointed
+; *                            to by *ppBitStream. *pBitOffset is valid within
+; *                            [0-7].
+; * [in]    pSrcMVLeftMB       pointers to the motion vector buffers of the
+; *                           macroblocks specially at the left side of the current macroblock
+; *                     respectively.
+; * [in]    pSrcMVUpperMB      pointers to the motion vector buffers of the
+; *                     macroblocks specially at the upper side of the current macroblock
+; *                     respectively.
+; * [in]    pSrcMVUpperRightMB pointers to the motion vector buffers of the
+; *                     macroblocks specially at the upper-right side of the current macroblock
+; *                     respectively.
+; * [in]    fcodeForward       a code equal to vop_fcode_forward in MPEG-4
+; *                     bit stream syntax
+; * [in]    MBType         the type of the current macroblock. If MBType
+; *                     is not equal to OMX_VC_INTER4V, the destination
+; *                     motion vector buffer is still filled with the
+; *                     same decoded vector.
+; * [out]   ppBitStream         *ppBitStream is updated after the block is decoded,
+; *                     so that it points to the current byte in the bit
+; *                     stream buffer
+; * [out]   pBitOffset         *pBitOffset is updated so that it points to the
+; *                     current bit position in the byte pointed by
+; *                     *ppBitStream
+; * [out]   pDstMVCurMB         pointer to the motion vector buffer of the current
+; *                     macroblock which contains four decoded motion vectors
+; *
+; * Return Value:
+; * OMX_Sts_NoErr -no error
+; * 
+; *                     
+; * OMX_Sts_Err - status error
+; *
+; *
+     
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        INCLUDE armCOMM_BitDec_s.h
+        INCLUDE omxVC_s.h
+        
+       M_VARIANTS ARM1136JS
+       
+                
+
+
+        IF ARM1136JS
+
+;//Input Arguments
+
+ppBitStream           RN 0
+pBitOffset            RN 1
+pSrcMVLeftMB          RN 2
+pSrcMVUpperMB         RN 3
+pSrcMVUpperRightMB    RN 4
+pDstMVCurMB           RN 5
+fcodeForward          RN 6
+MBType                RN 7
+
+;//Local Variables
+
+zero                  RN 4
+one                   RN 4
+scaleFactor           RN 1
+
+
+Return                RN 0
+
+VlcMVD                RN 0
+index                 RN 4
+Count                 RN 7
+
+mvHorData             RN 4
+mvHorResidual         RN 0
+
+mvVerData             RN 4             
+mvVerResidual         RN 0
+
+temp                  RN 1
+
+temp1                 RN 3
+High                  RN 4
+Low                   RN 2
+Range                 RN 1
+
+BlkCount              RN 14
+
+diffMVdx              RN 0
+diffMVdy              RN 1
+
+;// Scratch Registers
+
+RBitStream            RN 8
+RBitCount             RN 9
+RBitBuffer            RN 10
+
+T1                    RN 11
+T2                    RN 12
+LR                    RN 14
+
+       IMPORT          armVCM4P2_aVlcMVD
+       IMPORT          omxVCM4P2_FindMVpred
+
+       ;// Allocate stack memory        
+       
+       M_ALLOC4        ppDstMVCurMB,4
+       M_ALLOC4        pDstMVPredME,4
+       M_ALLOC4        pBlkCount,4
+       
+       M_ALLOC4        pppBitStream,4
+       M_ALLOC4        ppBitOffset,4
+       M_ALLOC4        ppSrcMVLeftMB,4
+       M_ALLOC4        ppSrcMVUpperMB,4
+       
+       M_ALLOC4        pdiffMVdx,4
+       M_ALLOC4        pdiffMVdy,4
+       M_ALLOC4        pHigh,4
+       
+              
+
+
+       M_START   omxVCM4P2_DecodePadMV_PVOP,r11
+       
+       M_ARG           pSrcMVUpperRightMBonStack,4           ;// pointer to  pSrcMVUpperRightMB on stack
+       M_ARG           pDstMVCurMBonStack,4                  ;// pointer to pDstMVCurMB on stack
+       M_ARG           fcodeForwardonStack,4                 ;// pointer to fcodeForward on stack 
+       M_ARG           MBTypeonStack,4                       ;// pointer to MBType on stack
+
+      
+       
+       
+       
+       ;// Initializing the BitStream Macro
+
+       M_BD_INIT0      ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+       M_LDR           MBType,MBTypeonStack                  ;// Load MBType from stack
+       M_LDR           pDstMVCurMB,pDstMVCurMBonStack        ;// Load pDstMVCurMB from stack
+       MOV             zero,#0
+
+       TEQ             MBType,#OMX_VC_INTRA                  ;// Check if MBType=OMX_VC_INTRA
+       TEQNE           MBType,#OMX_VC_INTRA_Q                ;// check if MBType=OMX_VC_INTRA_Q
+       STREQ           zero,[pDstMVCurMB]
+       M_BD_INIT1      T1, T2, T2
+       STREQ           zero,[pDstMVCurMB,#4]
+       M_BD_INIT2      T1, T2, T2
+       STREQ           zero,[pDstMVCurMB,#4]
+       MOVEQ           Return,#OMX_Sts_NoErr
+       MOV             BlkCount,#0
+       STREQ           zero,[pDstMVCurMB,#4]
+       
+       BEQ             ExitOK
+
+       TEQ             MBType,#OMX_VC_INTER4V                ;// Check if MBType=OMX_VC_INTER4V
+       TEQNE           MBType,#OMX_VC_INTER4V_Q              ;// Check if MBType=OMX_VC_INTER4V_Q
+       MOVEQ           Count,#4
+
+       TEQ             MBType,#OMX_VC_INTER                  ;// Check if MBType=OMX_VC_INTER
+       TEQNE           MBType,#OMX_VC_INTER_Q                ;// Check if MBType=OMX_VC_INTER_Q
+       MOVEQ           Count,#1
+       
+       M_LDR           fcodeForward,fcodeForwardonStack      ;// Load fcodeForward  from stack
+
+       ;// Storing the values temporarily on stack
+
+       M_STR           ppBitStream,pppBitStream              
+       M_STR           pBitOffset,ppBitOffset
+            
+
+       SUB             temp,fcodeForward,#1                  ;// temp=fcodeForward-1
+       MOV             one,#1
+       M_STR           pSrcMVLeftMB,ppSrcMVLeftMB
+       LSL             scaleFactor,one,temp                  ;// scaleFactor=1<<(fcodeForward-1)
+       M_STR           pSrcMVUpperMB,ppSrcMVUpperMB
+       LSL             scaleFactor,scaleFactor,#5            
+       M_STR           scaleFactor,pHigh                     ;// [pHigh]=32*scaleFactor
+              
+       ;// VLD Decoding
+
+
+Loop
+
+       LDR             VlcMVD, =armVCM4P2_aVlcMVD        ;// Load the optimized MVD VLC table
+
+       ;// Horizontal Data and Residual calculation
+
+       LDR             temp,=0xFFF                           
+       M_BD_VLD        index,T1,T2,VlcMVD,3,2                ;// variable lenght decoding using the macro
+      
+       TEQ             index,temp
+       BEQ             ExitError                             ;// Exit with an Error Message if the decoded symbol is an invalied symbol 
+       
+       SUB             mvHorData,index,#32                   ;// mvHorData=index-32             
+       MOV             mvHorResidual,#1                      ;// mvHorResidual=1
+       CMP             fcodeForward,#1
+       TEQNE           mvHorData,#0
+       MOVEQ           diffMVdx,mvHorData                    ;// if scaleFactor=1(fcodeForward=1) or mvHorData=0 diffMVdx=mvHorData         
+       BEQ             VerticalData
+       
+       SUB             temp,fcodeForward,#1
+       M_BD_VREAD8     mvHorResidual,temp,T1,T2              ;// get mvHorResidual from bitstream if fcodeForward>1 and mvHorData!=0              
+       
+       CMP             mvHorData,#0
+       RSBLT           mvHorData,mvHorData,#0                ;// mvHorData=abs(mvHorData)
+       SUB             mvHorResidual,mvHorResidual,fcodeForward
+       SMLABB          diffMVdx,mvHorData,fcodeForward,mvHorResidual ;// diffMVdx=abs(mvHorData)*fcodeForward+mvHorResidual-fcodeForward
+       ADD             diffMVdx,diffMVdx,#1
+       RSBLT           diffMVdx,diffMVdx,#0
+       
+       ;// Vertical Data and Residual calculation
+
+VerticalData
+
+       M_STR           diffMVdx,pdiffMVdx                    ;// Store the diffMVdx on stack
+       LDR             VlcMVD, =armVCM4P2_aVlcMVD        ;// Loading the address of optimized VLC tables
+
+       LDR             temp,=0xFFF
+       M_BD_VLD        index,T1,T2,VlcMVD,3,2                ;// VLC decoding using the macro
+       
+       TEQ             index,temp
+       BEQ             ExitError                             ;// Exit with an Error Message if an Invalied Symbol occurs
+       
+       SUB             mvVerData,index,#32                   ;// mvVerData=index-32             
+       MOV             mvVerResidual,#1     
+       CMP             fcodeForward,#1
+       TEQNE           mvVerData,#0
+       MOVEQ           diffMVdy,mvVerData                    ;// diffMVdy = mvVerData if scaleFactor=1(fcodeForward=1) or mvVerData=0
+       BEQ             FindMVPred
+
+       SUB             temp,fcodeForward,#1
+       M_BD_VREAD8     mvVerResidual,temp,T1,T2              ;// Get mvVerResidual from bit stream if fcodeForward>1 and mnVerData!=0
+             
+
+       CMP             mvVerData,#0
+       RSBLT           mvVerData,mvVerData,#0
+       SUB             mvVerResidual,mvVerResidual,fcodeForward
+       SMLABB          diffMVdy,mvVerData,fcodeForward,mvVerResidual ;// diffMVdy=abs(mvVerData)*fcodeForward+mvVerResidual-fcodeForward
+       ADD             diffMVdy,diffMVdy,#1
+       RSBLT           diffMVdy,diffMVdy,#0
+
+       ;//Calling the Function omxVCM4P2_FindMVpred
+        
+FindMVPred
+
+       M_STR           diffMVdy,pdiffMVdy
+       ADD             temp,pDstMVCurMB,BlkCount,LSL #2      ;// temp=pDstMVCurMB[BlkCount]
+       M_STR           temp,ppDstMVCurMB                     ;// store temp on stack for passing as an argument to FindMVPred
+       
+       MOV             temp,#0
+       M_STR           temp,pDstMVPredME                     ;// Pass pDstMVPredME=NULL as an argument         
+       M_STR           BlkCount,pBlkCount                    ;// Passs BlkCount as Argument through stack
+
+       MOV             temp,pSrcMVLeftMB                     ;// temp (RN 1)=pSrcMVLeftMB
+       M_LDR           pSrcMVUpperRightMB,pSrcMVUpperRightMBonStack
+       MOV             pSrcMVLeftMB,pSrcMVUpperMB            ;// pSrcMVLeftMB ( RN 2) = pSrcMVUpperMB
+       MOV             ppBitStream,pDstMVCurMB               ;// ppBitStream  ( RN 0) = pDstMVCurMB
+       MOV             pSrcMVUpperMB,pSrcMVUpperRightMB      ;// pSrcMVUpperMB( RN 3) = pSrcMVUpperRightMB      
+       BL              omxVCM4P2_FindMVpred              ;// Branch to subroutine omxVCM4P2_FindMVpred
+
+       ;// Store Horizontal Motion Vector
+     
+       M_LDR           BlkCount,pBlkCount                    ;// Load BlkCount from stack
+       M_LDR           High,pHigh                            ;// High=32*scaleFactor
+       LSL             temp1,BlkCount,#2                     ;// temp=BlkCount*4
+       M_LDR           diffMVdx,pdiffMVdx                    ;// Laad diffMVdx
+       
+       LDRSH           temp,[pDstMVCurMB,temp1]              ;// temp=pDstMVCurMB[BlkCount]
+       
+       
+       RSB             Low,High,#0                           ;// Low = -32*scaleFactor
+       ADD             diffMVdx,temp,diffMVdx                ;// diffMVdx=pDstMVCurMB[BlkCount]+diffMVdx
+       ADD             Range,High,High                       ;// Range=64*ScaleFactor
+       SUB             High,High,#1                          ;// High= 32*scaleFactor-1
+
+       CMP             diffMVdx,Low                          ;// If diffMVdx<Low          
+       ADDLT           diffMVdx,diffMVdx,Range               ;// diffMVdx+=Range
+        
+       CMP             diffMVdx,High                         
+       SUBGT           diffMVdx,diffMVdx,Range               ;// If diffMVdx > High diffMVdx-=Range
+       STRH            diffMVdx,[pDstMVCurMB,temp1]
+
+       ;// Store Vertical
+
+       ADD             temp1,temp1,#2                        ;// temp1=4*BlkCount+2
+       M_LDR           diffMVdx,pdiffMVdy                    ;// Laad diffMVdy
+       LDRSH           temp,[pDstMVCurMB,temp1]              ;// temp=pDstMVCurMB[BlkCount].diffMVdy
+       ADD             BlkCount,BlkCount,#1                  ;// BlkCount=BlkCount+1
+       ADD             diffMVdx,temp,diffMVdx                
+       CMP             diffMVdx,Low
+       ADDLT           diffMVdx,diffMVdx,Range               ;// If diffMVdy<Low  diffMVdy+=Range                
+       CMP             diffMVdx,High
+       SUBGT           diffMVdx,diffMVdx,Range               ;// If diffMVdy > High diffMVdy-=Range
+       STRH            diffMVdx,[pDstMVCurMB,temp1]    
+       
+       CMP             BlkCount,Count
+       M_LDR           pSrcMVLeftMB,ppSrcMVLeftMB
+       M_LDR           pSrcMVUpperMB,ppSrcMVUpperMB
+
+       BLT             Loop                                  ;// If BlkCount<Count Continue the Loop
+
+
+       ;// If MBType=OMX_VC_INTER or MBtype=OMX_VC_INTER_Q copy pDstMVCurMB[0] to
+       ;// pDstMVCurMB[1], pDstMVCurMB[2], pDstMVCurMB[3] 
+
+       M_LDR           MBType,MBTypeonStack
+
+       TEQ             MBType,#OMX_VC_INTER                                       
+       TEQNE           MBType,#OMX_VC_INTER_Q                            
+       LDREQ           temp,[pDstMVCurMB]
+       M_LDR           ppBitStream,pppBitStream
+       STREQ           temp,[pDstMVCurMB,#4]
+       
+       STREQ           temp,[pDstMVCurMB,#8]
+       STREQ           temp,[pDstMVCurMB,#12]
+       
+       
+       M_LDR           pBitOffset,ppBitOffset
+       ;//Ending the macro
+       M_BD_FINI       ppBitStream,pBitOffset                 ;// Finishing the Macro       
+
+       
+       MOV             Return,#OMX_Sts_NoErr
+       B               ExitOK
+ 
+ExitError
+
+       M_LDR           ppBitStream,pppBitStream
+       M_LDR           pBitOffset,ppBitOffset
+       ;//Ending the macro
+       M_BD_FINI       ppBitStream,pBitOffset
+       
+       MOV             Return,#OMX_Sts_Err
+
+ExitOK             
+
+       M_END
+       ENDIF
+       END
+
+
+   
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
new file mode 100644
index 0000000..c43b253
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_Inter_s.s
@@ -0,0 +1,132 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_DecodeVLCZigzag_Inter_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one inter coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                    the bitstream buffer
+; * [in]    pBitOffset        pointer to the bit position in the byte pointed
+; *                    to by *ppBitStream. *pBitOffset is valid within    [0-7].
+; * [in] shortVideoHeader     binary flag indicating presence of short_video_header;
+; *                           escape modes 0-3 are used if shortVideoHeader==0,
+; *                           and escape mode 4 is used when shortVideoHeader==1.
+; * [out]    ppBitStream        *ppBitStream is updated after the block is
+; *                    decoded, so that it points to the current byte
+; *                    in the bit stream buffer
+; * [out]    pBitOffset        *pBitOffset is updated so that it points to the
+; *                    current bit position in the byte pointed by
+; *                    *ppBitStream
+; * [out]    pDst            pointer to the coefficient buffer of current
+; *                    block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; *   -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; *   -pDst is not 16-byte aligned, or
+; *   -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; *   -At least one mark bit is equal to zero
+; *   -Encountered an illegal stream code that cannot be found in the VLC table
+; *   -Encountered and illegal code in the VLC FLC table
+; *   -The number of coefficients is greater than 64
+; *
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+
+
+
+     IF ARM1136JS
+     
+        ;// Import various tables needed for the function
+
+        
+        IMPORT          armVCM4P2_InterVlcL0L1             ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+                                                               ;// Packed in Run:Level:Last format
+        IMPORT          armVCM4P2_InterL0L1LMAX            ;// Contains LMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_InterL0L1RMAX            ;// Contains RMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_aClassicalZigzagScan     ;// contains classical Zigzag table entries with double the original values
+        IMPORT          armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+shortVideoHeader     RN 3
+
+;//Local Variables
+
+Return               RN 0
+
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+Count                RN 6
+
+
+        
+        ;// Allocate stack memory to store the VLC,Zigzag,LMAX and RMAX tables
+     
+        
+        M_ALLOC4        ppVlcTableL0L1,4
+        M_ALLOC4        ppLMAXTableL0L1,4
+        M_ALLOC4        ppRMAXTableL0L1,4
+        M_ALLOC4        ppZigzagTable,4
+        
+        
+        M_START omxVCM4P2_DecodeVLCZigzag_Inter,r12
+
+        
+
+        
+        LDR             pZigzagTable, =armVCM4P2_aClassicalZigzagScan       ;// Load zigzag table
+        M_STR           pZigzagTable,ppZigzagTable                              ;// Store zigzag table on stack to pass as argument to unsafe function
+        LDR             pVlcTableL0L1, =armVCM4P2_InterVlcL0L1              ;// Load optimized VLC table with both L=0 and L=1 entries
+        M_STR           pVlcTableL0L1,ppVlcTableL0L1                            ;// Store optimized VLC table address on stack
+        LDR             pLMAXTableL0L1, =armVCM4P2_InterL0L1LMAX            ;// Load Interleaved L=0 and L=1 LMAX Tables
+        M_STR           pLMAXTableL0L1,ppLMAXTableL0L1                          ;// Store LMAX table address on stack
+        LDR             pRMAXTableL0L1, =armVCM4P2_InterL0L1RMAX            ;// Load Interleaved L=0 and L=1 RMAX Tables
+        MOV             Count,#0                                                ;// set start=0
+        M_STR           pRMAXTableL0L1,ppRMAXTableL0L1                          ;// store RMAX table address on stack
+                
+
+        BL              armVCM4P2_DecodeVLCZigzag_AC_unsafe                 ;// call Unsafe Function for VLC Zigzag Decoding
+         
+       
+
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
new file mode 100644
index 0000000..166729e
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
@@ -0,0 +1,136 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_DecodeVLCZigzag_IntraACVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                    the bitstream buffer
+; * [in]    pBitOffset        pointer to the bit position in the byte pointed
+; *                    to by *ppBitStream. *pBitOffset is valid within    [0-7].
+; * [in] shortVideoHeader     binary flag indicating presence of short_video_header;
+; *                           escape modes 0-3 are used if shortVideoHeader==0,
+; *                           and escape mode 4 is used when shortVideoHeader==1.
+; * [out]    ppBitStream        *ppBitStream is updated after the block is
+; *                    decoded, so that it points to the current byte
+; *                    in the bit stream buffer
+; * [out]    pBitOffset        *pBitOffset is updated so that it points to the
+; *                    current bit position in the byte pointed by
+; *                    *ppBitStream
+; * [out]    pDst            pointer to the coefficient buffer of current
+; *                    block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; *   -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; *   -pDst is not 16-byte aligned, or
+; *   -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; *   -At least one mark bit is equal to zero
+; *   -Encountered an illegal stream code that cannot be found in the VLC table
+; *   -Encountered and illegal code in the VLC FLC table
+; *   -The number of coefficients is greater than 64
+; *
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+
+
+
+     IF ARM1136JS
+     
+        ;// Import various tables needed for the function
+
+        
+        IMPORT          armVCM4P2_IntraVlcL0L1             ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+                                                               ;// Packed in Run:Level:Last format
+        IMPORT          armVCM4P2_IntraL0L1LMAX            ;// Contains LMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_IntraL0L1RMAX            ;// Contains RMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_aClassicalZigzagScan     ;// contains classical Zigzag table entries with double the original values
+        IMPORT          armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+PredDir              RN 3
+shortVideoHeader     RN 3
+
+;//Local Variables
+
+Return               RN 0
+
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+Count                RN 6
+
+
+        
+        ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses 
+     
+        M_ALLOC4        ppVlcTableL0L1,4
+        M_ALLOC4        ppLMAXTableL0L1,4
+        M_ALLOC4        ppRMAXTableL0L1,4
+        M_ALLOC4        ppZigzagTable,4
+
+        
+        M_START omxVCM4P2_DecodeVLCZigzag_IntraACVLC,r12
+
+        M_ARG           shortVideoHeaderonStack,4                             ;// pointer to Input Argument on stack           
+
+        LDR             pZigzagTable, =armVCM4P2_aClassicalZigzagScan     ;// Load Address of the Zigzag table    
+        ADD             pZigzagTable, pZigzagTable, PredDir, LSL #6           ;// Loading Different type of zigzag tables based on PredDir
+       
+        M_STR           pZigzagTable,ppZigzagTable                            ;// Store Zigzag table address on stack
+        LDR             pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1            ;// Load optimized packed VLC Table with both L=0 and L=1 entries
+        M_STR           pVlcTableL0L1,ppVlcTableL0L1                          ;// Store VLC Table address on stack
+        LDR             pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX          ;// Load LMAX Table
+        M_STR           pLMAXTableL0L1,ppLMAXTableL0L1                        ;// Store LMAX Table address on Stack
+        LDR             pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX          ;// Load RMAX Table
+        MOV             Count,#0                                              ;// Set Start=0        
+        
+        M_STR           pRMAXTableL0L1,ppRMAXTableL0L1                        ;// Store RMAX Table address on stack
+              
+
+       
+        M_LDR           shortVideoHeader,shortVideoHeaderonStack              ;// get the Input Argument from stack
+
+        BL              armVCM4P2_DecodeVLCZigzag_AC_unsafe               ;// Call Unsafe Function
+
+
+
+        
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
new file mode 100644
index 0000000..d19cb13
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
@@ -0,0 +1,224 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_DecodeVLCZigzag_IntraDCVLC_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for zigzag scanning and VLC decoding
+; * for inter block.
+; *
+; *
+; *
+; * Function: omxVCM4P2_DecodeVLCZigzag_Inter
+; *
+; * Description:
+; * Performs VLC decoding and inverse zigzag scan for one intra coded block.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    ppBitStream        pointer to the pointer to the current byte in
+; *                    the bitstream buffer
+; * [in]    pBitOffset        pointer to the bit position in the byte pointed
+; *                    to by *ppBitStream. *pBitOffset is valid within    [0-7].
+; * [in] shortVideoHeader     binary flag indicating presence of short_video_header;
+; *                           escape modes 0-3 are used if shortVideoHeader==0,
+; *                           and escape mode 4 is used when shortVideoHeader==1.
+; * [out]    ppBitStream        *ppBitStream is updated after the block is
+; *                    decoded, so that it points to the current byte
+; *                    in the bit stream buffer
+; * [out]    pBitOffset        *pBitOffset is updated so that it points to the
+; *                    current bit position in the byte pointed by
+; *                    *ppBitStream
+; * [out]    pDst            pointer to the coefficient buffer of current
+; *                    block. Must be 16-byte aligned
+; *
+; * Return Value:
+; * OMX_Sts_BadArgErr - bad arguments
+; *   -At least one of the following pointers is NULL: ppBitStream, *ppBitStream, pBitOffset, pDst, or
+; *   -pDst is not 16-byte aligned, or
+; *   -*pBitOffset exceeds [0,7].
+; * OMX_Sts_Err - status error
+; *   -At least one mark bit is equal to zero
+; *   -Encountered an illegal stream code that cannot be found in the VLC table
+; *   -Encountered and illegal code in the VLC FLC table
+; *   -The number of coefficients is greater than 64
+; *
+; */
+
+
+      INCLUDE omxtypes_s.h
+      INCLUDE armCOMM_s.h
+      INCLUDE armCOMM_BitDec_s.h
+
+
+      M_VARIANTS ARM1136JS
+
+     
+      
+
+
+      IF ARM1136JS :LOR: CortexA8
+
+     
+        ;// Import various tables needed for the function
+
+        
+        IMPORT          armVCM4P2_IntraVlcL0L1             ;// Contains optimized and packed VLC Tables for both Last =1 and last=0
+                                                               ;// Packed in Run:Level:Last format
+        IMPORT          armVCM4P2_IntraL0L1LMAX            ;// Contains LMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_IntraL0L1RMAX            ;// Contains RMAX table entries with both Last=0 and Last=1
+        IMPORT          armVCM4P2_aClassicalZigzagScan     ;// contains CLassical, Horizontal, Vertical Zigzag table entries with double the original values
+        IMPORT          armVCM4P2_aIntraDCLumaChromaIndex  ;// Contains Optimized DCLuma and DCChroma Index table Entries
+        
+
+        IMPORT          armVCM4P2_DecodeVLCZigzag_AC_unsafe
+
+;//Input Arguments
+
+ppBitStream          RN 0
+pBitOffset           RN 1
+pDst                 RN 2
+PredDir              RN 3
+shortVideoHeader     RN 3
+videoComp            RN 5
+;//Local Variables
+
+Return               RN 0
+
+pDCLumaChromaIndex   RN 4
+pDCChromaIndex       RN 7
+pVlcTableL0L1        RN 4
+pLMAXTableL0L1       RN 4
+pRMAXTableL0L1       RN 4
+pZigzagTable         RN 4
+Count                RN 6
+DCValueSize          RN 6
+powOfSize            RN 7
+temp1                RN 5
+
+
+;// Scratch Registers
+
+RBitStream           RN 8
+RBitBuffer           RN 9
+RBitCount            RN 10
+
+T1                   RN 11
+T2                   RN 12
+DCVal                RN 14
+
+        
+        ;// Allocate stack memory to store optimized VLC,Zigzag, RMAX, LMAX Table Addresses 
+     
+        M_ALLOC4        ppVlcTableL0L1,4
+        M_ALLOC4        ppLMAXTableL0L1,4
+        M_ALLOC4        ppRMAXTableL0L1,4
+        M_ALLOC4        ppZigzagTable,4
+        M_ALLOC4        pDCCoeff,4
+        
+
+        
+        M_START omxVCM4P2_DecodeVLCZigzag_IntraDCVLC,r12
+
+        M_ARG           shortVideoHeaderonStack,4                                  ;// Pointer to argument on stack  
+        M_ARG           videoComponstack,4                                         ;// Pointer to argument on stack
+
+        
+        ;// Decode DC Coefficient
+
+        
+        LDR             pDCLumaChromaIndex, =armVCM4P2_aIntraDCLumaChromaIndex ;// Load Optimized VLC Table for Luminance and Chrominance
+
+        ;// Initializing the Bitstream Macro
+
+        M_BD_INIT0      ppBitStream, pBitOffset, RBitStream, RBitBuffer, RBitCount
+        M_LDR           videoComp,videoComponstack                                 
+        M_BD_INIT1      T1, T2, T2
+        ADD             pDCLumaChromaIndex,pDCLumaChromaIndex,videoComp, LSL #6             
+        M_BD_INIT2      T1, T2, T2
+    
+        
+        M_BD_VLD        DCValueSize,T1,T2,pDCLumaChromaIndex,4,2                    ;// VLC Decode using optimized Luminance and Chrominance VLC Table
+
+    
+       
+
+DecodeDC
+                         
+        CMP             DCValueSize,#12     
+        BGT             ExitError
+        
+        CMP             DCValueSize,#0
+        MOVEQ           DCVal,#0                                                    ;// If DCValueSize is zero then DC coeff =0
+        BEQ             ACDecode                                                    ;// Branch to perform AC Coeff Decoding
+        
+        M_BD_VREAD16    DCVal,DCValueSize,T1,T2                                     ;// Get DC Value From Bit stream
+         
+
+        MOV             powOfSize,#1                                                
+        LSL             powOfSize,DCValueSize                                       ;// powOfSize=pow(2,DCValueSize)
+        CMP             DCVal,powOfSize,LSR #1                                      ;// Compare DCVal with powOfSize/2 
+        ADDLT           DCVal,DCVal,#1
+        SUBLT           DCVal,DCVal,powOfSize                                       ;// If Lessthan powOfSize/2 DCVal=DCVal-powOfSize+1
+                                                                                    ;// Else DCVal= fetchbits from bit stream
+
+CheckDCValueSize
+        
+        CMP             DCValueSize,#8                                              ;// If DCValueSize greater than 8 check marker bit
+
+        BLE             ACDecode
+
+        M_BD_READ8      temp1,1,T1
+        TEQ             temp1,#0                                                    ;// If Marker bit is zero Exit with an Error Message
+        BEQ             ExitError
+
+        
+
+        ;// Decode AC Coefficient
+
+ACDecode
+
+        M_STR           DCVal,pDCCoeff                                             ;// Store Decoded DC Coeff on Stack
+        M_BD_FINI       ppBitStream,pBitOffset                                     ;// Terminating the Bit stream Macro
+         
+        LDR             pZigzagTable, =armVCM4P2_aClassicalZigzagScan          ;// Load Zigzag talbe address   
+        ADD             pZigzagTable, pZigzagTable, PredDir, LSL #6                ;// Modify the Zigzag table adress based on PredDir                
+       
+        M_STR           pZigzagTable,ppZigzagTable                                 ;// Store zigzag table on stack
+        LDR             pVlcTableL0L1, =armVCM4P2_IntraVlcL0L1                 ;// Load Optimized VLC Table With both Last=0 and Last=1 Entries
+        M_STR           pVlcTableL0L1,ppVlcTableL0L1                               ;// Store Optimized VLC Table on stack
+        LDR             pLMAXTableL0L1, =armVCM4P2_IntraL0L1LMAX               ;// Load LMAX Table
+        M_STR           pLMAXTableL0L1,ppLMAXTableL0L1                             ;// Store LMAX table on stack
+        LDR             pRMAXTableL0L1, =armVCM4P2_IntraL0L1RMAX               ;// Load RMAX Table
+        MOV             Count,#1                                                   ;// Set Start =1        
+        
+        M_STR           pRMAXTableL0L1,ppRMAXTableL0L1                             ;// Store RMAX Table on Stack
+        
+       
+        M_LDR           shortVideoHeader,shortVideoHeaderonStack                   ;// Load the Input Argument From Stack
+        
+        BL              armVCM4P2_DecodeVLCZigzag_AC_unsafe                    ;// Call the Unsafe Function
+
+        M_LDR           DCVal,pDCCoeff                                             ;// Get the Decoded DC Value From Stack
+        STRH            DCVal,[pDst]                                               ;// Store the DC Value 
+        B               ExitOK
+        
+              
+
+ExitError
+ 
+        M_BD_FINI       ppBitStream,pBitOffset                                     ;// Terminating the Bit Stream Macro in case of an Error
+        MOV             Return,#OMX_Sts_Err                                        ;// Exit with an Error Message 
+ExitOK
+      
+        M_END
+        ENDIF
+        
+        END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
new file mode 100644
index 0000000..a4bfa71
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_FindMVpred_s.s
@@ -0,0 +1,194 @@
+;//
+;// 
+;// File Name:  omxVCM4P2_FindMVpred_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Function:
+;//     omxVCM4P2_FindMVpred
+;//
+        ;// Include headers
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        INCLUDE armVCCOMM_s.h
+
+        ;// Define cpu variants
+        M_VARIANTS ARM1136JS
+        
+        
+        IF ARM1136JS
+        
+        M_TABLE armVCM4P2_pBlkIndexTable
+        DCD  OMXVCBlk0, OMXVCBlk1
+        DCD  OMXVCBlk2, OMXVCBlk3
+
+;//--------------------------------------------
+;// Declare input registers
+;//--------------------------------------------
+        
+pSrcMVCurMB            RN 0
+pSrcCandMV1            RN 1
+pSrcCandMV2            RN 2
+pSrcCandMV3            RN 3
+pDstMVPred             RN 4
+pDstMVPredME           RN 5
+iBlk                   RN 6
+
+pTable                 RN 4
+CandMV                 RN 12
+
+pCandMV1               RN 7
+pCandMV2               RN 8
+pCandMV3               RN 9
+
+CandMV1dx              RN 0 
+CandMV1dy              RN 1 
+CandMV2dx              RN 2
+CandMV2dy              RN 3
+CandMV3dx              RN 10
+CandMV3dy              RN 11
+
+temp                   RN 14
+
+zero                   RN 14
+return                 RN 0
+        
+; ----------------------------------------------
+; Main routine
+; ----------------------------------------------        
+
+        M_ALLOC4 MV, 4
+        
+        ;// Function header 
+        M_START omxVCM4P2_FindMVpred, r11
+        
+        ;// Define stack arguments
+        M_ARG   ppDstMVPred,  4
+        M_ARG   ppDstMVPredME, 4
+        M_ARG   Blk, 4
+        
+        M_ADR CandMV, MV
+        MOV   zero, #0
+        M_LDR iBlk, Blk
+        
+        ;// Set the default value for these
+        ;// to be used if pSrcCandMV[1|2|3] == NULL
+        MOV   pCandMV1, CandMV
+        MOV   pCandMV2, CandMV
+        MOV   pCandMV3, CandMV
+    
+        STR   zero, [CandMV]
+
+        ;// Branch to the case based on blk number
+        M_SWITCH iBlk
+        M_CASE   OMXVCBlk0      ;// iBlk=0
+        M_CASE   OMXVCBlk1      ;// iBlk=0
+        M_CASE   OMXVCBlk2      ;// iBlk=0
+        M_CASE   OMXVCBlk3      ;// iBlk=0
+        M_ENDSWITCH
+        
+OMXVCBlk0
+        CMP   pSrcCandMV1, #0
+        ADDNE pCandMV1, pSrcCandMV1, #4
+        
+        CMP   pSrcCandMV2, #0
+        ADDNE pCandMV2, pSrcCandMV2, #8
+
+        CMP   pSrcCandMV3, #0
+        ADDNE pCandMV3, pSrcCandMV3, #8
+        CMPEQ pSrcCandMV1, #0
+    
+        MOVEQ pCandMV3, pCandMV2
+        MOVEQ pCandMV1, pCandMV2
+                
+        CMP   pSrcCandMV1, #0
+        CMPEQ pSrcCandMV2, #0
+    
+        MOVEQ pCandMV1, pCandMV3
+        MOVEQ pCandMV2, pCandMV3
+        
+        CMP   pSrcCandMV2, #0
+        CMPEQ pSrcCandMV3, #0
+    
+        MOVEQ pCandMV2, pCandMV1
+        MOVEQ pCandMV3, pCandMV1
+        
+        B     BlkEnd
+    
+OMXVCBlk1
+        MOV   pCandMV1, pSrcMVCurMB
+        CMP   pSrcCandMV3, #0
+        ADDNE pCandMV3, pSrcCandMV3, #8
+        
+        CMP   pSrcCandMV2, #0
+        ADDNE pCandMV2, pSrcCandMV2, #12
+    
+        CMPEQ pSrcCandMV3, #0
+    
+        MOVEQ pCandMV2, pCandMV1
+        MOVEQ pCandMV3, pCandMV1
+            
+        B     BlkEnd
+
+OMXVCBlk2
+        CMP   pSrcCandMV1, #0
+        MOV   pCandMV2, pSrcMVCurMB
+        ADD   pCandMV3, pSrcMVCurMB, #4
+        ADDNE pCandMV1, pSrcCandMV1, #12
+        B     BlkEnd
+
+OMXVCBlk3
+        ADD   pCandMV1, pSrcMVCurMB, #8
+        MOV   pCandMV2, pSrcMVCurMB
+        ADD   pCandMV3, pSrcMVCurMB, #4
+    
+BlkEnd
+
+        ;// Using the transperancy info, zero
+        ;// out the candidate MV if neccesary
+        LDRSH CandMV1dx, [pCandMV1], #2
+        LDRSH CandMV2dx, [pCandMV2], #2
+        LDRSH CandMV3dx, [pCandMV3], #2
+    
+        ;// Load argument from the stack
+        M_LDR pDstMVPredME, ppDstMVPredME
+
+        LDRSH CandMV1dy, [pCandMV1]
+        LDRSH CandMV2dy, [pCandMV2]
+        LDRSH CandMV3dy, [pCandMV3]
+
+        CMP pDstMVPredME, #0        
+
+        ;// Store the candidate MV's into the pDstMVPredME, 
+        ;// these can be used in the fast algorithm if implemented 
+
+        STRHNE CandMV1dx, [pDstMVPredME], #2
+        STRHNE CandMV1dy, [pDstMVPredME], #2        
+        STRHNE CandMV2dx, [pDstMVPredME], #2
+        STRHNE CandMV2dy, [pDstMVPredME], #2
+        STRHNE CandMV3dx, [pDstMVPredME], #2
+        STRHNE CandMV3dy, [pDstMVPredME]
+           
+        ; Find the median of the 3 candidate MV's
+        M_MEDIAN3 CandMV1dx, CandMV2dx, CandMV3dx, temp
+
+        ;// Load argument from the stack
+        M_LDR pDstMVPred, ppDstMVPred
+
+        M_MEDIAN3 CandMV1dy, CandMV2dy, CandMV3dy, temp
+    
+        STRH CandMV3dx, [pDstMVPred], #2
+        STRH CandMV3dy, [pDstMVPred]
+
+        MOV return, #OMX_Sts_NoErr
+    
+        M_END
+    ENDIF ;// ARM1136JS :LOR: CortexA8
+    
+    END
+\ No newline at end of file
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
new file mode 100644
index 0000000..bfeb540
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_IDCT8x8blk_s.s
@@ -0,0 +1,73 @@
+;//
+;// 
+;// File Name:  omxVCM4P2_IDCT8x8blk_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+
+;// Function:
+;//     omxVCM4P2_IDCT8x8blk
+;//
+        ;// Include headers
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+
+        ;// Define cpu variants
+        M_VARIANTS ARM1136JS
+
+        INCLUDE armCOMM_IDCT_s.h        
+        
+        IMPORT armCOMM_IDCTPreScale
+        ;//
+        ;// Function prototype
+        ;//
+        ;//     OMXResult
+        ;//     omxVCM4P2_IDCT8x8blk(const OMX_S16* pSrc,
+        ;//                                       OMX_S16* pDst)
+        ;//    
+        
+    IF ARM1136JS :LOR: CortexA8
+        M_ALLOC4  ppDest, 4
+        M_ALLOC4  pStride, 4
+        M_ALLOC8  pBlk, 2*8*8
+    ENDIF
+    
+    IF ARM1136JS
+        M_START omxVCM4P2_IDCT8x8blk, r11
+    ENDIF
+    
+        
+    IF ARM1136JS :LOR: CortexA8
+        
+;// Declare input registers
+pSrc            RN 0
+pDst            RN 1
+
+;// Declare other intermediate registers
+Result          RN 0
+
+;// Prototype for macro M_IDCT
+;// pSrc            RN 0  ;// source data buffer
+;// Stride          RN 1  ;// destination stride in bytes
+;// pDest           RN 2  ;// destination data buffer
+;// pScale          RN 3  ;// pointer to scaling table
+
+pSrc    RN 0    
+Stride  RN 1    
+pDest   RN 2    
+pScale  RN 3    
+                
+        MOV         pDest, pDst
+        LDR         pScale, =armCOMM_IDCTPreScale        
+        M_IDCT      s9, s16, 16      
+        MOV         Result, #OMX_Sts_NoErr
+        M_END       
+    ENDIF  
+        ;// ARM1136JS :LOR: CortexA8
+
+    END
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
new file mode 100644
index 0000000..20965bf
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_MCReconBlock_s.s
@@ -0,0 +1,713 @@
+;//
+;// 
+;// File Name:  omxVCM4P2_MCReconBlock_s.s
+;// OpenMAX DL: v1.0.2
+;// Revision:   9641
+;// Date:       Thursday, February 7, 2008
+;// 
+;// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+;// 
+;// 
+;//
+;// Description:
+;//
+;//
+
+;// Include standard headers
+    INCLUDE omxtypes_s.h
+    INCLUDE armCOMM_s.h
+
+;// Import symbols required from other files
+
+    M_VARIANTS ARM1136JS
+
+;// ***************************************************************************
+;// ARM1136JS implementation
+;// ***************************************************************************
+    IF  ARM1136JS
+    
+;// ***************************************************************************
+;// MACRO DEFINITIONS
+;// ***************************************************************************
+    ;// Description:
+    ;//
+    ;//   dest[j] = (x[j] + y[j] + round) >> 1,   j=0..3
+    ;//
+    ;// Similar to UHADD8 instruction, but with a rounding value of 1 added to
+    ;// each sum before dividing by two, if round is 1
+    ;//
+    ;// Syntax:
+    ;// M_UHADD8R   $dest, $x, $y, $round, $mask
+    ;//
+    ;// Inputs:
+    ;// $x        four packed bytes,   x[3] :  x[2]  :  x[1]  :  x[0]
+    ;// $y        four packed bytes,   y[3] :  y[2]  :  y[1]  :  y[0]
+    ;// $round    0 if no rounding to be added, 1 if rounding to be done
+    ;// $mask     some register set to 0x80808080
+    ;//
+    ;// Outputs:
+    ;// $dest     four packed bytes,   z[3] :  z[2]  :  z[1]  :  z[0]
+
+    MACRO
+    M_UHADD8R   $dest, $x, $y, $round, $mask
+    IF $round = 1
+        IF  $dest /= $y
+            MVN         $dest, $x
+            UHSUB8      $dest, $y, $dest
+            EOR         $dest, $dest, $mask
+        ELSE
+            MVN         $dest, $y
+            UHSUB8      $dest, $x, $dest
+            EOR         $dest, $dest, $mask
+        ENDIF
+    ELSE
+        UHADD8      $dest, $x, $y
+    ENDIF
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Load 8 bytes from $pSrc (aligned or unaligned locations)
+    ;//
+    ;// Syntax:
+    ;// M_LOAD_X    $pSrc, $srcStep, $out0, $out1, $scratch, $offset
+    ;// 
+    ;// Inputs:
+    ;// $pSrc       4 byte aligned source pointer to an address just less than 
+    ;//             or equal to the data location
+    ;// $srcStep    The stride on source
+    ;// $scratch    A scratch register, used internally for temp calculations
+    ;// $offset     Difference of source data location to the source pointer
+    ;//             Use when $offset != 0 (unaligned load)
+    ;//
+    ;// Outputs:
+    ;// $pSrc       In case the macro accepts stride, it increments the pSrc by 
+    ;//             that value, else unchanged
+    ;// $out0       four packed bytes,   z[3] :  z[2]  :  z[1]  :  z[0]
+    ;// $out1       four packed bytes,   z[7] :  z[6]  :  z[5]  :  z[4]
+    ;//
+    ;// Note: {$out0, $out1, $scratch} should be registers with ascending
+    ;// register numbering. In case offset is 0, $scratch is not modified.
+
+    MACRO
+    M_LOAD_X    $pSrc, $srcStep, $out0, $out1, $scratch, $offset
+        IF $offset = 0
+            LDM         $pSrc, {$out0, $out1}
+            ADD         $pSrc, $pSrc, $srcStep
+        ELSE
+            LDM         $pSrc, {$out0, $out1, $scratch} 
+            ADD         $pSrc, $pSrc, $srcStep
+            
+            MOV         $out0, $out0, LSR #8 * $offset
+            ORR         $out0, $out0, $out1, LSL #(32 - 8 * ($offset))
+            MOV         $out1, $out1, LSR #8 * $offset
+            ORR         $out1, $out1, $scratch, LSL #(32 - 8 * ($offset))
+        ENDIF
+    MEND
+
+;// ***************************************************************************
+    ;// Description:
+    ;// Loads three words for X interpolation, update pointer to next row. For 
+    ;// X interpolation, given a truncated-4byteAligned source pointer, 
+    ;// invariably three continous words are required from there to get the
+    ;// nine bytes from the source pointer for filtering. 
+    ;//
+    ;// Syntax:
+    ;// M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3
+    ;// 
+    ;// Inputs:
+    ;// $pSrc       4 byte aligned source pointer to an address just less than 
+    ;//             or equal to the data location
+    ;//
+    ;// $srcStep    The stride on source
+    ;//
+    ;// $offset     Difference of source data location to the source pointer
+    ;//             Use when $offset != 0 (unaligned load)
+    ;//
+    ;// Outputs:
+    ;// $pSrc       Incremented by $srcStep
+    ;//
+    ;// $word0, $word1, $word2, $word3
+    ;//             Three of these are outputs based on the $offset parameter. 
+    ;//             The outputs are specifically generated to be processed by 
+    ;//             the M_EXT_XINT macro. Following is the illustration to show 
+    ;//             how the nine bytes are spanned for different offsets from 
+    ;//             notTruncatedForAlignmentSourcePointer.
+    ;//
+    ;//              ------------------------------------------------------
+    ;//             | Offset | Aligned Ptr | word0 | word1 | word2 | word3 |
+    ;//             |------------------------------------------------------|
+    ;//             |    0   |       0     | 0123  | 4567  | 8xxx  |       |
+    ;//             |    1   |      -1     | x012  | 3456  | 78xx  |       |
+    ;//             |    2   |      -2     | xx01  | 2345  | 678x  |       |
+    ;//             |    3   |      -3     | xxx0  |       | 1234  | 5678  |
+    ;//              ------------------------------------------------------
+    ;// 
+    ;//             where the numbering (0-8) is to designate the 9 bytes from
+    ;//             start of a particular row. The illustration doesn't take in 
+    ;//             account the positioning of bytes with in the word and the 
+    ;//             macro combination with M_EXT_XINT will work only in little 
+    ;//             endian environs
+    ;// 
+    ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending
+    ;// register numbering
+
+    MACRO
+    M_LOAD_XINT $pSrc, $srcStep, $offset, $word0, $word1, $word2, $word3
+        IF $offset /= 3
+            LDM         $pSrc, {$word0, $word1, $word2}
+        ELSE
+            LDM         $pSrc, {$word0, $word2, $word3}
+        ENDIF
+        ADD         $pSrc, $pSrc, $srcStep
+    MEND
+
+;// ***************************************************************************
+    ;// Description:
+    ;// Extract four registers of four pixels for X interpolation 
+    ;// 
+    ;// Syntax:
+    ;// M_EXT_XINT $offset, $word0, $word1, $word2, $word3
+    ;// 
+    ;// Inputs:
+    ;// $offset     Difference of source data location to the source pointer
+    ;//             Use when $offset != 0 (unaligned load)
+    ;// 
+    ;// $word0, $word1, $word2, $word3
+    ;//             Three of these are inputs based on the $offset parameter. 
+    ;//             The inputs are specifically selected to be processed by 
+    ;//             the M_EXT_XINT macro.
+    ;//
+    ;//              ------------------------------------------------------
+    ;//             | Offset | Aligned Ptr | word0 | word1 | word2 | word3 |
+    ;//             |------------------------------------------------------|
+    ;//             |    0   |       0     | 0123  | 4567  | 8xxx  | yyyy  |
+    ;//             |    1   |      -1     | x012  | 3456  | 78xx  | yyyy  |
+    ;//             |    2   |      -2     | xx01  | 2345  | 678x  | yyyy  |
+    ;//             |    3   |      -3     | xxx0  | yyyy  | 1234  | 5678  |
+    ;//              ------------------------------------------------------
+    ;// 
+    ;// Outputs:
+    ;// $word0, $word1, $word2, $word3
+    ;//             Bytes from the original source pointer (not truncated for
+    ;//             4 byte alignment) as shown in the table. 
+    ;//              -------------------------------
+    ;//             | word0 | word1 | word2 | word3 |
+    ;//             |-------------------------------|
+    ;//             | 0123  | 4567  | 1234  | 5678  |
+    ;//              -------------------------------
+    ;//
+    ;// Note: {$word0, $word1, $word2, $word3} should be registers with ascending
+    ;// register numbering
+
+    MACRO
+    M_EXT_XINT $offset, $word0, $word1, $word2, $word3
+        IF $offset = 0
+            ; $word0 and $word1 are ok
+            ; $word2, $word3 are just 8 shifted versions
+            MOV         $word3, $word1, LSR #8
+            ORR         $word3, $word3, $word2, LSL #24
+            MOV         $word2, $word0, LSR #8
+            ORR         $word2, $word2, $word1, LSL #24
+        ELIF $offset = 3
+            ; $word2 and $word3 are ok (taken care while loading itself)
+            ; set $word0 & $word1
+            MOV         $word0, $word0, LSR #24
+            ORR         $word0, $word0, $word2, LSL #8
+            MOV         $word1, $word2, LSR #24
+            ORR         $word1, $word1, $word3, LSL #8
+        ELSE
+            MOV         $word0, $word0, LSR #8 * $offset
+            ORR         $word0, $word0, $word1, LSL #(32 - 8 * ($offset))
+            MOV         $word1, $word1, LSR #8 * $offset
+            ORR         $word1, $word1, $word2, LSL #(32 - 8 * ($offset))
+
+            MOV         $word3, $word1, LSR #8
+            ORR         $word3, $word3, $word2, LSL #(32 - 8 * (($offset)+1))
+            MOV         $word2, $word0, LSR #8
+            ORR         $word2, $word2, $word1, LSL #24
+        ENDIF
+    MEND
+
+;// ***************************************************************************
+    ;// Description:
+    ;// Computes half-sum and xor of two inputs and puts them in the input 
+    ;// registers in that order
+    ;//
+    ;// Syntax:
+    ;// M_HSUM_XOR      $v0, $v1, $tmp
+    ;// 
+    ;// Inputs:
+    ;// $v0         a, first input
+    ;// $v1         b, second input
+    ;// $tmp        scratch register
+    ;// 
+    ;// Outputs:
+    ;// $v0         (a + b)/2
+    ;// $v1         a ^ b
+
+    MACRO
+    M_HSUM_XOR      $v0, $v1, $tmp
+        UHADD8      $tmp, $v0, $v1     ;// s0 = a + b
+        EOR         $v1, $v0, $v1      ;// l0 = a ^ b
+        MOV         $v0, $tmp          ;// s0
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Calculates average of 4 values (a,b,c,d) for HalfPixelXY predict type in 
+    ;// mcReconBlock module. Very specific to the implementation of 
+    ;// M_MCRECONBLOCK_HalfPixelXY done here. Uses "tmp" as scratch register and 
+    ;// "yMask" for mask variable "0x1010101x" set in it. In yMask 4 lsbs are 
+    ;// not significant and are used by the callee for row counter (y)
+    ;//
+    ;// Some points to note are:
+    ;// 1. Input is pair of pair-averages and Xors
+    ;// 2. $sum1 and $lsb1 are not modified and hence can be reused in another 
+    ;//    running average
+    ;// 3. Output is in the first argument
+    ;//
+    ;// Syntax:
+    ;// M_AVG4         $sum0, $lsb0, $sum1, $lsb1, $rndVal
+    ;// 
+    ;// Inputs:
+    ;// $sum0       (a + b) >> 1, where a and b are 1st and 2nd inputs to be averaged
+    ;// $lsb0       (a ^ b)
+    ;// $sum1       (c + d) >> 1. Not modified
+    ;// $lsb1       (c ^ d)       Not modified
+    ;// $rndVal     Assembler Variable. 0 for rounding, 1 for no rounding
+    ;// 
+    ;// Outputs:
+    ;// $sum0       (a + b + c + d + 1) / 4 : If no rounding
+    ;//             (a + b + c + d + 2) / 4 : If rounding
+
+    MACRO
+    M_AVG4          $sum0, $lsb0, $sum1, $lsb1, $rndVal
+        LCLS OP1
+        LCLS OP2
+        IF $rndVal = 0 ;// rounding case
+OP1 SETS "AND"
+OP2 SETS "ORR"
+        ELSE           ;// Not rounding case
+OP1 SETS "ORR"
+OP2 SETS "AND"
+        ENDIF
+        
+        LCLS lsb2
+        LCLS sum2
+        LCLS dest
+    
+lsb2  SETS "tmp"
+sum2  SETS "$lsb0"
+dest  SETS "$sum0"
+
+        $OP1        $lsb0, $lsb0, $lsb1          ;// e0 = e0 & e1
+        EOR         $lsb2, $sum0, $sum1          ;// e2 = s0 ^ s1
+        $OP2        $lsb2, $lsb2, $lsb0          ;// e2 = e2 | e0
+        AND         $lsb2, $lsb2, yMask, LSR # 4 ;// e2 = e2 & mask
+        UHADD8      $sum2, $sum0, $sum1          ;// s2 = (s0 + s1)/2
+        UADD8       $dest, $sum2, $lsb2          ;// dest =  s2 + e2
+    MEND
+;// ***************************************************************************
+;// Motion compensation handler macros
+;// ***************************************************************************
+    ;// Description:
+    ;// Implement motion compensation routines using the named registers in 
+    ;// callee function. Each of the following 4 implement the 4 predict type
+    ;// Each handles 8 cases each ie all the combinations of 4 types of source 
+    ;// alignment offsets and 2 types of rounding flag
+    ;//
+    ;// Syntax:
+    ;// M_MCRECONBLOCK_IntegerPixel $rndVal, $offset
+    ;// M_MCRECONBLOCK_HalfPixelX   $rndVal, $offset
+    ;// M_MCRECONBLOCK_HalfPixelY   $rndVal, $offset
+    ;// M_MCRECONBLOCK_HalfPixelXY  $rndVal, $offset
+    ;// 
+    ;// Inputs:
+    ;// $rndVal     Assembler Variable. 0 for rounding, 1 for no rounding
+    ;// $offset     $pSrc MOD 4 value. Offset from 4 byte aligned location.
+    ;// 
+    ;// Outputs:
+    ;// Outputs come in the named registers of the callee functions
+    ;// The macro loads the data from the source pointer, processes it and 
+    ;// stores in the destination pointer. Does the whole prediction cycle
+    ;// of Motion Compensation routine for a particular predictType
+    ;// After this only residue addition to the predicted values remain
+
+    MACRO
+    M_MCRECONBLOCK_IntegerPixel $rndVal, $offset
+    ;// Algorithmic Description:
+    ;// This handles motion compensation for IntegerPixel predictType. Both
+    ;// rounding cases are handled by the same code base. It is just a copy
+    ;// from source to destination. Two lines are done per loop to reduce 
+    ;// stalls. Loop has been software pipelined as well for that purpose.
+    ;// 
+    ;// M_LOAD_X loads a whole row in two registers and then they are stored
+    
+CaseIntegerPixelRnd0Offset$offset
+CaseIntegerPixelRnd1Offset$offset
+    M_LOAD_X    pSrc, srcStep, tmp1, tmp2, tmp3, $offset
+    M_LOAD_X    pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+YloopIntegerPixelOffset$offset
+    SUBS        y, y, #2
+    STRD        tmp1, tmp2, [pDst], dstStep
+    STRD        tmp3, tmp4, [pDst], dstStep
+    M_LOAD_X    pSrc, srcStep, tmp1, tmp2, tmp3, $offset
+    M_LOAD_X    pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+    BGT         YloopIntegerPixelOffset$offset
+
+    B           SwitchPredictTypeEnd
+    MEND
+;// ***************************************************************************
+    MACRO
+    M_MCRECONBLOCK_HalfPixelX $rndVal, $offset
+    ;// Algorithmic Description:
+    ;// This handles motion compensation for HalfPixelX predictType. The two
+    ;// rounding cases are handled by the different code base and spanned by 
+    ;// different macro calls. Loop has been software pipelined to reduce 
+    ;// stalls.
+    ;// 
+    ;// Filtering involves averaging a pixel with the next horizontal pixel.
+    ;// M_LOAD_XINT and M_EXT_XINT combination generate 4 registers, 2 with 
+    ;// all pixels in a row with 4 pixel in each register and another 2
+    ;// registers with pixels corresponding to one horizontally shifted pixel
+    ;// corresponding to the initial row pixels. These are set of packed 
+    ;// registers appropriate to do 4 lane SIMD.
+    ;// After that M_UHADD8R macro does the averaging taking care of the 
+    ;// rounding as required
+    
+CaseHalfPixelXRnd$rndVal.Offset$offset
+    IF $rndVal = 0
+        LDR mask, =0x80808080
+    ENDIF
+
+    M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4
+YloopHalfPixelXRnd$rndVal.Offset$offset
+    SUBS        y, y, #1
+    M_EXT_XINT  $offset, tmp1, tmp2, tmp3, tmp4
+    M_UHADD8R   tmp5, tmp1, tmp3, (1-$rndVal), mask
+    M_UHADD8R   tmp6, tmp2, tmp4, (1-$rndVal), mask
+    STRD        tmp5, tmp6, [pDst], dstStep
+    M_LOAD_XINT pSrc, srcStep, $offset, tmp1, tmp2, tmp3, tmp4
+    BGT         YloopHalfPixelXRnd$rndVal.Offset$offset
+
+    B           SwitchPredictTypeEnd
+    MEND
+;// ***************************************************************************
+    MACRO
+    M_MCRECONBLOCK_HalfPixelY $rndVal, $offset
+    ;// Algorithmic Description:
+    ;// This handles motion compensation for HalfPixelY predictType. The two
+    ;// rounding cases are handled by the different code base and spanned by 
+    ;// different macro calls. PreLoading is used to avoid reload of same data. 
+    ;// 
+    ;// Filtering involves averaging a pixel with the next vertical pixel.
+    ;// M_LOAD_X generates 2 registers with all pixels in a row with 4 pixel in 
+    ;// each register. These are set of packed registers appropriate to do 
+    ;// 4 lane SIMD. After that M_UHADD8R macro does the averaging taking care 
+    ;// of the rounding as required
+    
+CaseHalfPixelYRnd$rndVal.Offset$offset
+    IF $rndVal = 0
+        LDR mask, =0x80808080
+    ENDIF
+
+    M_LOAD_X    pSrc, srcStep, tmp1, tmp2, tmp5, $offset ;// Pre-load
+YloopHalfPixelYRnd$rndVal.Offset$offset
+    SUBS        y, y, #2
+    ;// Processing one line
+    M_LOAD_X    pSrc, srcStep, tmp3, tmp4, tmp5, $offset
+    M_UHADD8R   tmp1, tmp1, tmp3, (1-$rndVal), mask
+    M_UHADD8R   tmp2, tmp2, tmp4, (1-$rndVal), mask
+    STRD        tmp1, tmp2, [pDst], dstStep
+    ;// Processing another line
+    M_LOAD_X    pSrc, srcStep, tmp1, tmp2, tmp5, $offset
+    M_UHADD8R   tmp3, tmp3, tmp1, (1-$rndVal), mask
+    M_UHADD8R   tmp4, tmp4, tmp2, (1-$rndVal), mask
+    STRD        tmp3, tmp4, [pDst], dstStep
+
+    BGT         YloopHalfPixelYRnd$rndVal.Offset$offset
+
+    B           SwitchPredictTypeEnd
+    MEND
+;// ***************************************************************************
+    MACRO
+    M_MCRECONBLOCK_HalfPixelXY $rndVal, $offset
+    ;// Algorithmic Description:
+    ;// This handles motion compensation for HalfPixelXY predictType. The two
+    ;// rounding cases are handled by the different code base and spanned by 
+    ;// different macro calls. PreLoading is used to avoid reload of same data. 
+    ;// 
+    ;// Filtering involves averaging a pixel with the next vertical, horizontal 
+    ;// and right-down diagonal pixels. Just as in HalfPixelX case, M_LOAD_XINT
+    ;// and M_EXT_XINT combination generates 4 registers with a row and its
+    ;// 1 pixel right shifted version, with 4 pixels in one register. Another 
+    ;// call of that macro-combination gets another row. Then M_HSUM_XOR is 
+    ;// called to get mutual half-sum and xor combinations of a row with its
+    ;// shifted version as they are inputs to the M_AVG4 macro which computes
+    ;// the 4 element average with rounding. Note that it is the half-sum/xor 
+    ;// values that are preserved for next row as they can be re-used in the 
+    ;// next call to the M_AVG4 and saves recomputation.
+    ;// Due to lack of register, the row counter and a masking value required 
+    ;// in M_AVG4 are packed into a single register yMask where the last nibble
+    ;// holds the row counter values and rest holds the masking variable left 
+    ;// shifted by 4
+    
+CaseHalfPixelXYRnd$rndVal.Offset$offset
+    LDR         yMask, =((0x01010101 << 4) + 8)
+
+    M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b'
+    M_EXT_XINT  $offset, t00, t01, t10, t11
+    M_HSUM_XOR  t00, t10, tmp               ;// s0, l0
+    M_HSUM_XOR  t01, t11, tmp               ;// s0', l0'
+
+YloopHalfPixelXYRnd$rndVal.Offset$offset
+    ;// Processsing one line
+    ;// t00, t01, t10, t11 required from previous loop
+    M_LOAD_XINT pSrc, srcStep, $offset, t20, t21, t30, t31 ;// Load c, c', d, d'
+    SUB         yMask, yMask, #2
+    M_EXT_XINT  $offset, t20, t21, t30, t31
+    M_HSUM_XOR  t20, t30, tmp               ;// s1, l1
+    M_HSUM_XOR  t21, t31, tmp               ;// s1', l1'
+    M_AVG4      t00, t10, t20, t30, $rndVal ;// s0, l0, s1, l1
+    M_AVG4      t01, t11, t21, t31, $rndVal ;// s0', l0', s1', l1'
+    STRD        t00, t01, [pDst], dstStep   ;// store the average
+    
+    ;// Processsing another line
+    ;// t20, t21, t30, t31 required from above
+    M_LOAD_XINT pSrc, srcStep, $offset, t00, t01, t10, t11 ;// Load a, a', b, b'
+    TST         yMask, #7
+    M_EXT_XINT  $offset, t00, t01, t10, t11
+    M_HSUM_XOR  t00, t10, tmp
+    M_HSUM_XOR  t01, t11, tmp
+    M_AVG4      t20, t30, t00, t10, $rndVal
+    M_AVG4      t21, t31, t01, t11, $rndVal
+    STRD        t20, t21, [pDst], dstStep
+
+    BGT         YloopHalfPixelXYRnd$rndVal.Offset$offset
+
+    IF $offset/=3 :LOR: $rndVal/=1
+        B           SwitchPredictTypeEnd
+    ENDIF
+    MEND
+;// ***************************************************************************
+;// Motion compensation handler macros end here
+;// ***************************************************************************
+    ;// Description:
+    ;// Populates all 4 kinds of offsets "cases" for each predictType and rndVal
+    ;// combination in the "switch" to prediction processing code segment
+    ;//
+    ;// Syntax:
+    ;// M_CASE_OFFSET $rnd, $predictType
+    ;// 
+    ;// Inputs:
+    ;// $rnd            0 for rounding, 1 for no rounding
+    ;// $predictType    The prediction mode
+    ;// 
+    ;// Outputs:
+    ;// Populated list of "M_CASE"s for the "M_SWITCH" macro
+
+    MACRO
+    M_CASE_OFFSET $rnd, $predictType
+        M_CASE      Case$predictType.Rnd$rnd.Offset0
+        M_CASE      Case$predictType.Rnd$rnd.Offset1
+        M_CASE      Case$predictType.Rnd$rnd.Offset2
+        M_CASE      Case$predictType.Rnd$rnd.Offset3
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Populates all 2 kinds of rounding "cases" for each predictType in the 
+    ;// "switch" to prediction processing code segment
+    ;//
+    ;// Syntax:
+    ;// M_CASE_OFFSET $predictType
+    ;// 
+    ;// Inputs:
+    ;// $predictType    The prediction mode
+    ;// 
+    ;// Outputs:
+    ;// Populated list of "M_CASE_OFFSET" macros
+
+    MACRO
+    M_CASE_MCRECONBLOCK $predictType
+        M_CASE_OFFSET  0, $predictType ;// 0 for rounding
+        M_CASE_OFFSET  1, $predictType ;// 1 for no rounding
+    MEND
+;// ***************************************************************************
+    ;// Description:
+    ;// Populates all 8 kinds of rounding and offset combinations handling macros 
+    ;// for the specified predictType. In case of "IntegerPixel" predictType, 
+    ;// rounding is not required so same code segment handles both cases
+    ;//
+    ;// Syntax:
+    ;// M_MCRECONBLOCK    $predictType
+    ;// 
+    ;// Inputs:
+    ;// $predictType    The prediction mode
+    ;// 
+    ;// Outputs:
+    ;// Populated list of "M_MCRECONBLOCK_<predictType>" macros for specified 
+    ;// predictType. Each 
+    ;//                 M_MCRECONBLOCK_<predictType> $rnd, $offset 
+    ;// is an code segment (starting with a label indicating the predictType, 
+    ;// rounding and offset combination)
+    ;// Four calls of this macro with the 4 prediction modes populate all the 32 
+    ;// handlers
+
+    MACRO
+    M_MCRECONBLOCK $predictType
+        M_MCRECONBLOCK_$predictType 0, 0
+        M_MCRECONBLOCK_$predictType 0, 1
+        M_MCRECONBLOCK_$predictType 0, 2
+        M_MCRECONBLOCK_$predictType 0, 3
+    IF "$predictType" /= "IntegerPixel" ;// If not IntegerPixel then rounding makes a difference
+        M_MCRECONBLOCK_$predictType 1, 0
+        M_MCRECONBLOCK_$predictType 1, 1
+        M_MCRECONBLOCK_$predictType 1, 2
+        M_MCRECONBLOCK_$predictType 1, 3
+    ENDIF
+    MEND
+;// ***************************************************************************
+;// Input/Output Registers
+pSrc                  RN 0
+srcStep               RN 1
+arg_pSrcResidue       RN 2
+pSrcResidue           RN 12
+pDst                  RN 3
+dstStep               RN 2
+predictType           RN 10
+rndVal                RN 11
+mask                  RN 11
+
+;// Local Scratch Registers
+zero                  RN 12
+y                     RN 14
+
+tmp1                  RN 4
+tmp2                  RN 5
+tmp3                  RN 6
+tmp4                  RN 7
+tmp5                  RN 8
+tmp6                  RN 9
+tmp7                  RN 10
+tmp8                  RN 11
+tmp9                  RN 12
+
+t00                   RN 4
+t01                   RN 5
+t10                   RN 6
+t11                   RN 7
+t20                   RN 8
+t21                   RN 9
+t30                   RN 10
+t31                   RN 11
+tmp                   RN 12
+
+yMask                 RN 14
+
+dst                   RN 1
+return                RN 0
+
+    ;// Allocate memory on stack
+    M_ALLOC4    Stk_pDst,           4
+    M_ALLOC4    Stk_pSrcResidue,    4
+    ;// Function header
+    M_START     omxVCM4P2_MCReconBlock, r11
+    ;// Define stack arguments
+    M_ARG       Arg_dstStep,        4
+    M_ARG       Arg_predictType,    4
+    M_ARG       Arg_rndVal,         4
+    ;// Save on stack
+    M_STR       pDst, Stk_pDst
+    M_STR       arg_pSrcResidue, Stk_pSrcResidue
+    ;// Load argument from the stack
+    M_LDR       dstStep, Arg_dstStep
+    M_LDR       predictType, Arg_predictType
+    M_LDR       rndVal, Arg_rndVal
+    
+    MOV         y, #8
+    
+    AND         tmp1, pSrc, #3
+    ORR         predictType, tmp1, predictType, LSL #3
+    ORR         predictType, predictType, rndVal, LSL #2
+    ;// Truncating source pointer to align to 4 byte location
+    BIC         pSrc, pSrc, #3
+
+    ;// Implementation takes care of all combinations of different 
+    ;// predictTypes, rounding cases and source pointer offsets to alignment 
+    ;// of 4 bytes in different code bases unless one of these parameter wasn't 
+    ;// making any difference to the implementation. Below M_CASE_MCRECONBLOCK
+    ;// macros branch into 8 M_CASE macros for all combinations of the 2 
+    ;// rounding cases and 4 offsets of the pSrc pointer to the 4 byte 
+    ;// alignment. 
+    M_SWITCH    predictType
+        M_CASE_MCRECONBLOCK IntegerPixel
+        M_CASE_MCRECONBLOCK HalfPixelX
+        M_CASE_MCRECONBLOCK HalfPixelY
+        M_CASE_MCRECONBLOCK HalfPixelXY
+    M_ENDSWITCH
+
+    ;// The M_MCRECONBLOCK macros populate the code bases by calling all 8 
+    ;// particular macros (4 in case of IntegerPixel as rounding makes no 
+    ;// difference there) to generate the code for all cases of rounding and 
+    ;// offsets. LTORG is used to segment the code as code size bloated beyond 
+    ;// 4KB.
+    M_MCRECONBLOCK IntegerPixel
+    M_MCRECONBLOCK HalfPixelX
+    LTORG
+    M_MCRECONBLOCK HalfPixelY
+    M_MCRECONBLOCK HalfPixelXY
+SwitchPredictTypeEnd
+
+    ;// Residue Addition
+    ;// This is done in 2 lane SIMD though loads are further optimized and
+    ;// 4 bytes are loaded in case of destination buffer. Algorithmic 
+    ;// details are in inlined comments
+    M_LDR       pSrcResidue, Stk_pSrcResidue
+    CMP         pSrcResidue, #0
+    BEQ         pSrcResidueConditionEnd
+pSrcResidueNotNull    
+    M_LDR       pDst, Stk_pDst
+    MOV         y, #8
+    SUB         dstStep, dstStep, #4
+Yloop_pSrcResidueNotNull
+    SUBS        y, y, #1
+    LDR         dst, [pDst]                ;// dst = [dcba]
+    LDMIA       pSrcResidue!, {tmp1, tmp2} ;// tmp1=[DC] tmp2=[BA]
+    PKHBT       tmp3, tmp1, tmp2, LSL #16  ;// Deltaval1 = [C A]
+    PKHTB       tmp4, tmp2, tmp1, ASR #16  ;// DeltaVal2 = [D B]
+    UXTB16      tmp1, dst                  ;// tmp1 = [0c0a]
+    UXTB16      tmp2, dst, ROR #8          ;// tmp2 = [0d0b]
+    QADD16      tmp1, tmp1, tmp3           ;// Add and saturate to 16 bits
+    QADD16      tmp2, tmp2, tmp4
+    USAT16      tmp1, #8, tmp1
+    USAT16      tmp2, #8, tmp2             ;// armClip(0, 255, tmp2)
+    ORR         tmp1, tmp1, tmp2, LSL #8   ;// tmp1 = [dcba]
+    STR         tmp1, [pDst], #4
+    
+    LDR         dst, [pDst]
+    LDMIA       pSrcResidue!, {tmp1, tmp2}
+    PKHBT       tmp3, tmp1, tmp2, LSL #16
+    PKHTB       tmp4, tmp2, tmp1, ASR #16
+    UXTB16      tmp1, dst
+    UXTB16      tmp2, dst, ROR #8
+    QADD16      tmp1, tmp1, tmp3
+    QADD16      tmp2, tmp2, tmp4
+    USAT16      tmp1, #8, tmp1
+    USAT16      tmp2, #8, tmp2
+    ORR         tmp1, tmp1, tmp2, LSL #8
+    STR         tmp1, [pDst], dstStep
+    
+    BGT         Yloop_pSrcResidueNotNull
+pSrcResidueConditionEnd
+
+    MOV         return, #OMX_Sts_NoErr
+
+    M_END
+    ENDIF ;// ARM1136JS
+
+;// ***************************************************************************
+;// CortexA8 implementation
+;// ***************************************************************************
+    END
+;// ***************************************************************************
+;// omxVCM4P2_MCReconBlock ends
+;// ***************************************************************************
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
new file mode 100644
index 0000000..213444a
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_PredictReconCoefIntra_s.s
@@ -0,0 +1,283 @@
+; **********
+; * 
+; * File Name:  omxVCM4P2_PredictReconCoefIntra_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; * 
+; * Description:
+; * Contains module for DC/AC coefficient prediction
+; *
+; * 
+; * Function: omxVCM4P2_PredictReconCoefIntra
+; *
+; * Description:
+; * Performs adaptive DC/AC coefficient prediction for an intra block. Prior
+; * to the function call, prediction direction (predDir) should be selected
+; * as specified in subclause 7.4.3.1 of ISO/IEC 14496-2.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]  pSrcDst      pointer to the coefficient buffer which contains the 
+; *                    quantized coefficient residuals (PQF) of the current 
+; *                    block; must be aligned on a 4-byte boundary. The 
+; *                    output coefficients are saturated to the range 
+; *                    [-2048, 2047].
+; * [in]  pPredBufRow  pointer to the coefficient row buffer; must be aligned
+; *                    on a 4-byte boundary.
+; * [in]  pPredBufCol  pointer to the coefficient column buffer; must be 
+; *                    aligned on a 4-byte boundary.
+; * [in]  curQP        quantization parameter of the current block. curQP may 
+; *                    equal to predQP especially when the current block and 
+; *                    the predictor block are in the same macroblock.
+; * [in]  predQP       quantization parameter of the predictor block
+; * [in]  predDir      indicates the prediction direction which takes one
+; *                    of the following values:
+; *                    OMX_VIDEO_HORIZONTAL    predict horizontally
+; *                    OMX_VIDEO_VERTICAL        predict vertically
+; * [in]  ACPredFlag   a flag indicating if AC prediction should be
+; *                    performed. It is equal to ac_pred_flag in the bit
+; *                    stream syntax of MPEG-4
+; * [in]  videoComp    video component type (luminance, chrominance or
+; *                    alpha) of the current block
+; * [out] pSrcDst      pointer to the coefficient buffer which contains
+; *                    the quantized coefficients (QF) of the current
+; *                    block
+; * [out] pPredBufRow  pointer to the updated coefficient row buffer
+; * [out] pPredBufCol  pointer to the updated coefficient column buffer
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - Bad arguments 
+; * - At least one of the pointers is NULL: pSrcDst, pPredBufRow, or pPredBufCol.
+; * - At least one the following cases: curQP <= 0, predQP <= 0, curQP >31, 
+; *   predQP > 31, preDir exceeds [1,2].
+; * - At least one of the pointers pSrcDst, pPredBufRow, or pPredBufCol is not 
+; *   4-byte aligned.
+; *
+; *********
+     
+        INCLUDE omxtypes_s.h
+        INCLUDE armCOMM_s.h
+        
+       M_VARIANTS ARM1136JS
+       
+             
+
+       IMPORT        armVCM4P2_Reciprocal_QP_S32
+       IMPORT        armVCM4P2_Reciprocal_QP_S16
+       IMPORT        armVCM4P2_DCScaler
+       
+
+
+        IF ARM1136JS
+
+
+;// Input Arguments
+
+pSrcDst          RN 0
+pPredBufRow      RN 1
+pPredBufCol      RN 2
+curQP            RN 3
+QP               RN 3
+predQP           RN 4
+predDir          RN 5
+ACPredFlag       RN 6
+videoComp        RN 7  
+
+;// Local Variables
+
+temp2            RN 5
+negCurQP         RN 7
+negdcScaler      RN 7
+tempPred         RN 8
+
+dcScaler         RN 4
+CoeffTable       RN 9
+absCoeffDC       RN 9
+temp3            RN 6
+absCoeffAC       RN 6
+
+shortVideoHeader RN 9
+predCoeffTable   RN 10
+Count            RN 10
+temp1            RN 12
+index            RN 12
+Rem              RN 14
+temp             RN 11
+Return           RN 0
+
+       
+
+       M_START   omxVCM4P2_PredictReconCoefIntra,r12
+       
+       ;// Assigning pointers to Input arguments on Stack
+    
+       M_ARG           predQPonStack,4  
+       M_ARG           predDironStack,4
+       M_ARG           ACPredFlagonStack,4
+       M_ARG           videoComponStack,4
+       
+       ;// DC Prediction
+
+       M_LDR           videoComp,videoComponStack                     ;// Load videoComp From Stack               
+       
+       M_LDR           predDir,predDironStack                         ;// Load Prediction direction
+       
+       ;// dcScaler Calculation
+
+       LDR             index, =armVCM4P2_DCScaler
+       ADD             index,index,videoComp,LSL #5
+       LDRB            dcScaler,[index,QP]
+           
+    
+calDCVal
+      
+       
+       LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S16   ;// Loading the table with entries 32767/(1 to 63) 
+      
+       CMP             predDir,#2                                     ;// Check if the Prediction direction is vertical
+
+       ;// Caulucate temp pred by performing Division
+            
+       LDREQSH         absCoeffDC,[pPredBufRow]                       ;// If vetical load the coeff from Row Prediction Buffer
+       LDRNESH         absCoeffDC,[pPredBufCol]                       ;// If horizontal load the coeff from column Prediction Buffer
+       
+       RSB             negdcScaler,dcScaler,#0                        ;// negdcScaler=-dcScaler  
+       
+       MOV             temp1,absCoeffDC                               ;// temp1=prediction coeff
+       CMP             temp1,#0
+       RSBLT           absCoeffDC,temp1,#0                            ;//absCoeffDC=abs(temp1)
+       
+       ADD             temp,dcScaler,dcScaler
+       LDRH            temp,[predCoeffTable,temp]                     ;// Load value from coeff table for performing division using multiplication
+       
+       SMULBB          tempPred,temp,absCoeffDC                       ;// tempPred=pPredBufRow(Col)[0]*32767/dcScaler
+       ADD             temp3,dcScaler,#1
+       LSR             tempPred,tempPred,#15                          ;// tempPred=pPredBufRow(Col)[0]/dcScaler          
+       LSR             temp3,temp3,#1                                 ;// temp3=round(dcScaler/2)
+       
+       MLA             Rem,negdcScaler,tempPred,absCoeffDC            ;// Rem = pPredBufRow(Col)[0]-tempPred*dcScaler      
+       
+       
+       LDRH            temp,[pPredBufCol]
+       CMP             Rem,temp3                                      
+       ADDGE           tempPred,#1                                    ;// If Rem>=round(dcScaler/2);tempPred=tempPred+1
+       CMP             temp1,#0
+       RSBLT           tempPred,tempPred,#0                            ;/ if pPredBufRow(Col)[0]<0; tempPred=-tempPred
+             
+       
+       STRH            temp,[pPredBufRow,#-16]      
+
+       LDRH            temp,[pSrcDst]                                 ;// temp=pSrcDst[0]
+       M_LDR           ACPredFlag,ACPredFlagonStack
+       ADD             temp,temp,tempPred                             ;// temp=pSrcDst[0]+tempPred
+       SSAT16          temp,#12,temp                                  ;// clip temp to [-2048,2047]
+       
+       SMULBB          temp1,temp,dcScaler                            ;// temp1=clipped(pSrcDst[0])*dcScaler           
+       M_LDR           predQP,predQPonStack
+       STRH            temp,[pSrcDst]                                 
+       CMP             ACPredFlag,#1                                  ;// Check if the AC prediction flag is set or not
+       STRH            temp1,[pPredBufCol]                            ;// store temp1 to pPredBufCol
+ 
+       ;// AC Prediction
+
+              
+       BNE             Exit                                           ;// If not set Exit
+       
+       LDR             predCoeffTable, =armVCM4P2_Reciprocal_QP_S32   ;// Loading the table with entries 0x1ffff/(1 to 63)
+       MOV             temp1,#4
+       MUL             temp1,curQP,temp1
+       CMP             predDir,#2                                     ;// Check the Prediction direction
+       RSB             negCurQP,curQP,#0                                  
+       LDR             CoeffTable,[predCoeffTable,temp1]              ;// CoeffTable=0x1ffff/curQP
+       ADD             curQP,curQP,#1                                 ;// curQP=curQP+1
+       LSR             curQP,curQP,#1                                 ;// curQP=round(curQP/2)                
+       MOV             Count,#2                                       ;// Initializing the Loop Count
+       BNE             Horizontal                                     ;// If the Prediction direction is horizontal branch to Horizontal
+
+       
+
+loop1       
+       ;// Calculate tempPred
+       
+       LDRSH           absCoeffAC,[pPredBufRow,Count]                 ;// absCoeffAC=pPredBufRow[i], 1=<i<=7
+       MOV             temp1,absCoeffAC
+       CMP             temp1,#0                                       ;// compare pPredBufRow[i] with zero, 1=<i<=7
+       RSBLT           absCoeffAC,temp1,#0                            ;// absCoeffAC= abs(pPredBufRow[i])
+                                            
+       SMULBB          absCoeffAC,absCoeffAC,predQP                   ;// temp1=pPredBufRow[i]*predQP
+       MUL             tempPred,absCoeffAC,CoeffTable                 ;// tempPred=pPredBufRow[i]*predQP*0x1ffff/curQP
+       LSR             tempPred,tempPred,#17          
+             
+       MLA             Rem,negCurQP,tempPred,absCoeffAC               ;// Rem=abs(pPredBufRow[i])-tempPred*curQP
+       LDRH            temp,[pSrcDst,Count]                           ;// temp=pSrcDst[i],1<=i<8
+       
+       CMP             Rem,curQP
+       ADDGE           tempPred,#1                                    ;// if Rem>=round(curQP/2); tempPred=tempPred+1
+       CMP             temp1,#0
+       RSBLT           tempPred,tempPred,#0                           ;// if pPredBufRow[i]<0 ; tempPred=-tempPred
+              
+       ;// Update source and Row Prediction buffers
+       
+       ADD             temp,temp,tempPred                             ;// temp=tempPred+pSrcDst[i]
+       SSAT16          temp,#12,temp                                  ;// Clip temp to [-2048,2047]
+       STRH            temp,[pSrcDst,Count]
+       STRH            temp,[pPredBufRow,Count]                       ;// pPredBufRow[i]=temp
+       ADD             Count,Count,#2                                 ;// i=i+1
+       CMP             Count,#16                                      ;// compare if i=8
+       BLT             loop1
+       B               Exit                                           ;// Branch to exit
+
+Horizontal
+
+       MOV             Count,#16                                      ;// Initializing i=8
+
+loop2  
+     
+       LSR             temp2,Count,#3                                 ;// temp2=i>>3
+       
+       ;// Calculate tempPred
+       
+       LDRH            absCoeffAC,[pPredBufCol,temp2]                 ;// absCoefAC=pPredBufCol[i>>3]                       
+       MOV             temp1,absCoeffAC
+       CMP             temp1,#0                                       ;// compare pPredBufRow[i] with zero, 1=<i<=7
+       RSBLT           absCoeffAC,temp1,#0                            ;// absCoeffAC=abs(pPredBufCol[i>>3])
+                                      
+       SMULBB          absCoeffAC,absCoeffAC,predQP                   ;// temp1=pPredBufCol[i>>3]*predQP
+       MUL             tempPred,absCoeffAC,CoeffTable                 ;// tempPred=pPredBufCol[i>>3]*predQP*0x1ffff/curQP
+       LSR             tempPred,tempPred,#17                          ;// tempPred=pPredBufCol[i>>3]*predQP/curQP
+       
+       MLA             Rem,negCurQP,tempPred,absCoeffAC
+       LDRH            temp,[pSrcDst,Count]                           ;// temp=pSrcDst[i]
+       
+       CMP             Rem,curQP                                      ;// Compare Rem with round(curQP/2)
+       ADDGE           tempPred,#1                                    ;// tempPred=tempPred+1 if Rem>=round(curQP/2)
+       CMP             temp1,#0
+       RSBLT           tempPred,tempPred,#0                           ;// if pPredBufCol[i>>3 <0 tempPred=-tempPred
+       
+       ;// Update source and Row Prediction buffers
+       
+       ADD             temp,temp,tempPred                             ;// temp=pSrcDst[i]+tempPred
+       SSAT16          temp,#12,temp                                  ;// Clip temp to [-2048,2047]
+       STRH            temp,[pSrcDst,Count]                           ;// pSrcDst[0]= clipped value
+       STRH            temp,[pPredBufCol,temp2]                       ;// pPredBufCol[i>>3]=temp
+       ADD             Count,Count,#16                                ;// i=i+8
+       CMP             Count,#128                                     ;// compare i with 64
+       BLT             loop2
+
+             
+Exit
+  
+       MOV             Return,#OMX_Sts_NoErr 
+
+       M_END
+       ENDIF
+       END
+
+
+   
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
new file mode 100644
index 0000000..c9591cb
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvInter_I_s.s
@@ -0,0 +1,141 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_QuantInvInter_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for inter reconstruction
+; * 
+; *
+; *
+; *
+; *
+; * Function: omxVCM4P2_QuantInvInter_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in] pSrcDst          pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in] QP              quantization parameter (quantiser_scale)
+; * [in] videoComp      (Intra version only.) Video component type of the
+; *                  current block. Takes one of the following flags:
+; *                  OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; *                  OMX_VC_ALPHA.
+; * [in] shortVideoHeader a flag indicating presence of short_video_header;
+; *                       shortVideoHeader==1 selects linear intra DC mode,
+; *                  and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out]    pSrcDst      pointer to the output (dequantized) intra/inter block.  Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; *    - If pSrcDst is NULL or is not 16-byte aligned.
+; *      or
+; *    - If QP <= 0.
+; *      or
+; *    - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+; */
+
+   INCLUDE omxtypes_s.h
+   INCLUDE armCOMM_s.h
+
+   M_VARIANTS ARM1136JS
+
+         
+
+     IF ARM1136JS
+
+;//Input Arguments
+pSrcDst            RN 0
+QP                 RN 1
+
+;//Local Variables
+Return             RN 0
+Count              RN 4      
+tempVal21          RN 2
+tempVal43          RN 3
+QP1                RN 5
+X2                 RN 6
+X3                 RN 14
+Result1            RN 8
+Result2            RN 9
+two                RN 7
+
+    M_START omxVCM4P2_QuantInvInter_I,r9
+       
+        MOV      Count,#64
+        TST      QP,#1
+        LDRD     tempVal21,[pSrcDst]      ;// Loads first two values of pSrcDst to tempVal21,
+                                          ;// next two values to tempVal43
+        SUBEQ    QP1,QP,#1                ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even
+        MOVNE    QP1,QP
+        MOV      two,#2
+        
+        
+
+Loop
+        
+        
+        SMULBB   X2,tempVal21,two         ;// X2= first val(lower 16 bits of tampVal21)*2
+        CMP      X2,#0
+        
+        RSBLT    X2,X2,#0                 ;// X2=absoluteval(first val)
+        SMLABBNE X2,QP,X2,QP1             ;// X2=2*absval(first val)*QP+QP if QP is odd 
+                                          ;// X2=2*absval(first val)*QP+QP-1 if QP is even 
+        SMULTB   X3,tempVal21,two         ;// X3= second val(top 16 bits of tampVal21)*2
+        RSBLT    X2,X2,#0
+        
+        CMP      X3,#0
+               
+        RSBLT    X3,X3,#0
+        SMLABBNE X3,QP,X3,QP1
+        
+        RSBLT    X3,X3,#0
+        PKHBT    Result1,X2,X3,LSL #16    ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31]
+        SMULBB   X2,tempVal43,two         ;// X2= first val(lower 16 bits of tampVal43)*2
+        SSAT16   Result1,#12,Result1      ;// clip to range [-2048,2047]
+        CMP      X2,#0
+       
+        
+               
+        RSBLE    X2,X2,#0
+        SMLABBNE X2,QP,X2,QP1
+        SMULTB   X3,tempVal43,two         ;// X2= first val(top 16 bits of tampVal21)*2
+        RSBLT    X2,X2,#0
+        CMP      X3,#0
+        
+        LDRD     tempVal21,[pSrcDst,#8]   ;// Load next four Values to tempVal21,tempVal43
+                
+        RSBLT    X3,X3,#0
+        SMLABBNE X3,QP,X3,QP1
+        RSBLT    X3,X3,#0
+        PKHBT    Result2,X2,X3,LSL #16    ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[0-15]
+        SSAT16   Result2,#12,Result2      ;// clip to range [-2048,2047]
+        
+        SUBS     Count,Count,#4           ;// Decrement Count by 4 and continue if it has not reached 0         
+        STRD     Result1,[pSrcDst],#8     ;// Store Double words and increment the pointer to point the next store address
+        
+        
+               
+        BGT      Loop
+        
+        MOV      Return,#OMX_Sts_NoErr
+        
+        M_END
+        ENDIF        
+        END
+
diff --git a/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
new file mode 100644
index 0000000..6328e01
--- /dev/null
+++ b/media/libstagefright/codecs/on2/h264dec/omxdl/arm11/vc/m4p2/src/omxVCM4P2_QuantInvIntra_I_s.s
@@ -0,0 +1,188 @@
+;/**
+; * 
+; * File Name:  omxVCM4P2_QuantInvIntra_I_s.s
+; * OpenMAX DL: v1.0.2
+; * Revision:   9641
+; * Date:       Thursday, February 7, 2008
+; * 
+; * (c) Copyright 2007-2008 ARM Limited. All Rights Reserved.
+; * 
+; * 
+; *
+; * Description: 
+; * Contains modules for inter reconstruction
+; * 
+; *
+; *
+; *
+; *
+; * 
+; * Function: omxVCM4P2_QuantInvIntra_I
+; *
+; * Description:
+; * Performs inverse quantization on intra/inter coded block.
+; * This function supports bits_per_pixel = 8. Mismatch control
+; * is performed for the first MPEG-4 mode inverse quantization method.
+; * The output coefficients are clipped to the range: [-2048, 2047].
+; * Mismatch control is performed for the first inverse quantization method.
+; *
+; * Remarks:
+; *
+; * Parameters:
+; * [in]    pSrcDst        pointer to the input (quantized) intra/inter block. Must be 16-byte aligned.
+; * [in]    QP            quantization parameter (quantiser_scale)
+; * [in]    videoComp          (Intra version only.) Video component type of the
+; *                    current block. Takes one of the following flags:
+; *                    OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE,
+; *                    OMX_VC_ALPHA.
+; * [in]    shortVideoHeader  a flag indicating presence of short_video_header;
+; *                           shortVideoHeader==1 selects linear intra DC mode,
+; *                    and shortVideoHeader==0 selects nonlinear intra DC mode.
+; * [out]    pSrcDst        pointer to the output (dequantized) intra/inter block.  Must be 16-byte aligned.
+; *
+; * Return Value:
+; * OMX_Sts_NoErr - no error
+; * OMX_Sts_BadArgErr - bad arguments
+; *    -    If pSrcDst is NULL or is not 16-byte aligned.
+; *      or
+; *    - If QP <= 0.
+; *      or
+; *    - videoComp is none of OMX_VC_LUMINANCE, OMX_VC_CHROMINANCE and OMX_VC_ALPHA.
+; *
+ 
+
+   INCLUDE omxtypes_s.h
+   INCLUDE armCOMM_s.h
+   
+   M_VARIANTS ARM1136JS
+   
+   
+   IMPORT        armVCM4P2_DCScaler
+ 
+         
+
+     IF ARM1136JS
+
+;//Input Arguments
+pSrcDst            RN 0
+QP                 RN 1
+videoComp          RN 2
+shortVideoHeader   RN 3
+
+;//Local Variables
+Return             RN 0
+dcScaler           RN 4
+temp               RN 12
+index              RN 6
+      
+tempVal21          RN 4
+tempVal43          RN 5
+QP1                RN 6
+X2                 RN 7
+X3                 RN 14
+Result1            RN 8
+Result2            RN 9
+two                RN 10
+Count              RN 11
+
+
+
+   
+    M_START omxVCM4P2_QuantInvIntra_I,r11
+
+
+        
+        ;// Perform Inverse Quantization for DC coefficient
+
+        TEQ       shortVideoHeader,#0      ;// Test if short Video Header flag =0             
+        MOVNE     dcScaler,#8              ;// if shortVideoHeader is non zero dcScaler=8
+        BNE       calDCVal
+        LDR       index, =armVCM4P2_DCScaler
+      ADD       index,index,videoComp,LSL #5
+      LDRB      dcScaler,[index,QP]
+
+
+        ;//M_CalDCScalar  shortVideoHeader,videoComp, QP
+
+calDCVal
+
+        LDRH     temp,[pSrcDst]
+        SMULBB   temp,temp,dcScaler       ;// dcCoeff = dcScaler * Quantized DC coefficient(from memory)
+        SSAT     temp,#12,temp            ;// Saturating to 12 bits
+        
+
+        MOV      Count,#64
+        TST      QP,#1
+        LDRD     tempVal21,[pSrcDst]      ;// Loads first two values of pSrcDst to tempVal21,
+                                          ;// next two values to tempVal43
+        SUBEQ    QP1,QP,#1                ;// QP1=QP if QP is odd , QP1=QP-1 if QP is even
+        MOVNE    QP1,QP
+        MOV      two,#2
+
+
+                
+        
+
+Loop
+        
+        
+        SMULBB   X2,tempVal21,two         ;// X2= first val(lower 16 bits of tampVal21)*2
+        CMP      X2,#0
+        
+        RSBLT    X2,X2,#0                 ;// X2=absoluteval(first val)
+        SMLABBNE X2,QP,X2,QP1             ;// X2=2*absval(first val)*QP+QP if QP is odd 
+                                          ;// X2=2*absval(first val)*QP+QP-1 if QP is even 
+        SMULTB   X3,tempVal21,two         ;// X3= second val(top 16 bits of tampVal21)*2
+        RSBLT    X2,X2,#0
+        
+        CMP      X3,#0
+               
+        RSBLT    X3,X3,#0
+        SMLABBNE X3,QP,X3,QP1
+        
+        RSBLT    X3,X3,#0
+        PKHBT    Result1,X2,X3,LSL #16    ;// Result1[0-15]=X2[0-15],Result1[16-31]=X3[16-31]
+        SMULBB   X2,tempVal43,two         ;// X2= first val(lower 16 bits of tampVal43)*2
+        SSAT16   Result1,#12,Result1      ;// clip to range [-2048,2047]
+        CMP      X2,#0
+       
+        
+               
+        RSBLE    X2,X2,#0
+        SMLABBNE X2,QP,X2,QP1
+        SMULTB   X3,tempVal43,two         ;// X2= first val(top 16 bits of tampVal21)*2
+        RSBLT    X2,X2,#0
+        CMP      X3,#0
+        
+        LDRD     tempVal21,[pSrcDst,#8]   ;// Load next four Values to tempVal21,tempVal43
+                
+        RSBLT    X3,X3,#0
+        SMLABBNE X3,QP,X3,QP1
+        RSBLT    X3,X3,#0
+        PKHBT    Result2,X2,X3,LSL #16    ;// Result2[0-15]=X2[0-15],Result2[16-31]=X3[16-31]
+        SSAT16   Result2,#12,Result2      ;// clip to range [-2048,2047]
+        
+        SUBS     Count,Count,#4           ;// Decrement Count by 4 and continue if it has not reached 0         
+        STRD     Result1,[pSrcDst],#8     ;// Store Double words and increment the pointer to point the next store address
+        
+        
+               
+        BGT      Loop
+
+        SUB      pSrcDst,pSrcDst,#128
+        
+        ;// Storing the Inverse Quantized DC coefficient
+
+        STRH     temp,[pSrcDst],#2
+        
+  
+        
+        MOV      Return,#OMX_Sts_NoErr
+             
+        
+        
+        
+        M_END
+        ENDIF        
+        END
+