summaryrefslogtreecommitdiffstats
path: root/media/libstagefright/codecs/m4v_h263/enc/src/sad_inline.h
diff options
context:
space:
mode:
Diffstat (limited to 'media/libstagefright/codecs/m4v_h263/enc/src/sad_inline.h')
-rw-r--r--media/libstagefright/codecs/m4v_h263/enc/src/sad_inline.h539
1 files changed, 539 insertions, 0 deletions
diff --git a/media/libstagefright/codecs/m4v_h263/enc/src/sad_inline.h b/media/libstagefright/codecs/m4v_h263/enc/src/sad_inline.h
new file mode 100644
index 0000000..ba77dfd
--- /dev/null
+++ b/media/libstagefright/codecs/m4v_h263/enc/src/sad_inline.h
@@ -0,0 +1,539 @@
+/* ------------------------------------------------------------------
+ * Copyright (C) 1998-2009 PacketVideo
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
+ * express or implied.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ * -------------------------------------------------------------------
+ */
+/*********************************************************************************/
+/* Filename: sad_inline.h */
+/* Description: Implementation for in-line functions used in dct.cpp */
+/* Modified: */
+/*********************************************************************************/
+#ifndef _SAD_INLINE_H_
+#define _SAD_INLINE_H_
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
+#if !defined(PV_ARM_GCC_V5) && !defined(PV_ARM_GCC_V4) /* ARM GNU COMPILER */
+
+ __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+ tmp = tmp - tmp2;
+ if (tmp > 0) sad += tmp;
+ else sad -= tmp;
+
+ return sad;
+ }
+
+ __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
+ {
+ int32 x7;
+
+ x7 = src2 ^ src1; /* check odd/even combination */
+ if ((uint32)src2 >= (uint32)src1)
+ {
+ src1 = src2 - src1; /* subs */
+ }
+ else
+ {
+ src1 = src1 - src2;
+ }
+ x7 = x7 ^ src1; /* only odd bytes need to add carry */
+ x7 = mask & ((uint32)x7 >> 1);
+ x7 = (x7 << 8) - x7;
+ src1 = src1 + (x7 >> 7); /* add 0xFF to the negative byte, add back carry */
+ src1 = src1 ^(x7 >> 7); /* take absolute value of negative byte */
+
+ return src1;
+ }
+
+#define NUMBER 3
+#define SHIFT 24
+
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 2
+#undef SHIFT
+#define SHIFT 16
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 1
+#undef SHIFT
+#define SHIFT 8
+#include "sad_mb_offset.h"
+
+
+ __inline int32 simd_sad_mb(UChar *ref, UChar *blk, Int dmin, Int lx)
+ {
+ int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+ x9 = 0x80808080; /* const. */
+
+ x8 = (uint32)ref & 0x3;
+ if (x8 == 3)
+ goto SadMBOffset3;
+ if (x8 == 2)
+ goto SadMBOffset2;
+ if (x8 == 1)
+ goto SadMBOffset1;
+
+// x5 = (x4<<8)-x4; /* x5 = x4*255; */
+ x4 = x5 = 0;
+
+ x6 = 0xFFFF00FF;
+
+ ref -= lx;
+ blk -= 16;
+
+ x8 = 16;
+
+LOOP_SAD0:
+ /****** process 8 pixels ******/
+ x10 = *((uint32*)(ref += lx));
+ x11 = *((uint32*)(ref + 4));
+ x12 = *((uint32*)(blk += 16));
+ x14 = *((uint32*)(blk + 4));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****** process 8 pixels ******/
+ x10 = *((uint32*)(ref + 8));
+ x11 = *((uint32*)(ref + 12));
+ x12 = *((uint32*)(blk + 8));
+ x14 = *((uint32*)(blk + 12));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****************/
+ x10 = x5 - (x4 << 8); /* extract low bytes */
+ x10 = x10 + x4; /* add with high bytes */
+ x10 = x10 + (x10 << 16); /* add with lower half word */
+
+ if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */
+ {
+ if (--x8)
+ {
+ goto LOOP_SAD0;
+ }
+
+ }
+
+ return ((uint32)x10 >> 16);
+
+SadMBOffset3:
+
+ return sad_mb_offset3(ref, blk, lx, dmin);
+
+SadMBOffset2:
+
+ return sad_mb_offset2(ref, blk, lx, dmin);
+
+SadMBOffset1:
+
+ return sad_mb_offset1(ref, blk, lx, dmin);
+
+ }
+
+#elif defined(__CC_ARM) /* only work with arm v5 */
+
+ __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+ __asm
+ {
+ rsbs tmp, tmp, tmp2 ;
+ rsbmi tmp, tmp, #0 ;
+ add sad, sad, tmp ;
+ }
+
+ return sad;
+ }
+
+ __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
+ {
+ int32 x7;
+
+ __asm
+ {
+ EOR x7, src2, src1; /* check odd/even combination */
+ SUBS src1, src2, src1;
+ EOR x7, x7, src1;
+ AND x7, mask, x7, lsr #1;
+ ORRCC x7, x7, #0x80000000;
+ RSB x7, x7, x7, lsl #8;
+ ADD src1, src1, x7, asr #7; /* add 0xFF to the negative byte, add back carry */
+ EOR src1, src1, x7, asr #7; /* take absolute value of negative byte */
+ }
+
+ return src1;
+ }
+
+ __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask)
+ {
+ int32 x7;
+
+ __asm
+ {
+ EOR x7, src2, src1; /* check odd/even combination */
+ ADDS src1, src2, src1;
+ EOR x7, x7, src1; /* only odd bytes need to add carry */
+ ANDS x7, mask, x7, rrx;
+ RSB x7, x7, x7, lsl #8;
+ SUB src1, src1, x7, asr #7; /* add 0xFF to the negative byte, add back carry */
+ EOR src1, src1, x7, asr #7; /* take absolute value of negative byte */
+ }
+
+ return src1;
+ }
+
+#define sum_accumulate __asm{ SBC x5, x5, x10; /* accumulate low bytes */ \
+ BIC x10, x6, x10; /* x10 & 0xFF00FF00 */ \
+ ADD x4, x4, x10,lsr #8; /* accumulate high bytes */ \
+ SBC x5, x5, x11; /* accumulate low bytes */ \
+ BIC x11, x6, x11; /* x11 & 0xFF00FF00 */ \
+ ADD x4, x4, x11,lsr #8; } /* accumulate high bytes */
+
+
+#define NUMBER 3
+#define SHIFT 24
+#define INC_X8 0x08000001
+
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 2
+#undef SHIFT
+#define SHIFT 16
+#undef INC_X8
+#define INC_X8 0x10000001
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 1
+#undef SHIFT
+#define SHIFT 8
+#undef INC_X8
+#define INC_X8 0x08000001
+#include "sad_mb_offset.h"
+
+
+ __inline int32 simd_sad_mb(UChar *ref, UChar *blk, Int dmin, Int lx)
+ {
+ int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+ x9 = 0x80808080; /* const. */
+ x4 = x5 = 0;
+
+ __asm
+ {
+ MOVS x8, ref, lsl #31 ;
+ BHI SadMBOffset3;
+ BCS SadMBOffset2;
+ BMI SadMBOffset1;
+
+ MVN x6, #0xFF00;
+ }
+LOOP_SAD0:
+ /****** process 8 pixels ******/
+ x11 = *((int32*)(ref + 12));
+ x10 = *((int32*)(ref + 8));
+ x14 = *((int32*)(blk + 12));
+ x12 = *((int32*)(blk + 8));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ __asm
+ {
+ /****** process 8 pixels ******/
+ LDR x11, [ref, #4];
+ LDR x10, [ref], lx ;
+ LDR x14, [blk, #4];
+ LDR x12, [blk], #16 ;
+ }
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****************/
+ x10 = x5 - (x4 << 8); /* extract low bytes */
+ x10 = x10 + x4; /* add with high bytes */
+ x10 = x10 + (x10 << 16); /* add with lower half word */
+
+ __asm
+ {
+ /****************/
+ RSBS x11, dmin, x10, lsr #16;
+ ADDLSS x8, x8, #0x10000001;
+ BLS LOOP_SAD0;
+ }
+
+ return ((uint32)x10 >> 16);
+
+SadMBOffset3:
+
+ return sad_mb_offset3(ref, blk, lx, dmin, x8);
+
+SadMBOffset2:
+
+ return sad_mb_offset2(ref, blk, lx, dmin, x8);
+
+SadMBOffset1:
+
+ return sad_mb_offset1(ref, blk, lx, dmin, x8);
+ }
+
+
+#elif ( defined(PV_ARM_GCC_V5) || defined(PV_ARM_GCC_V4) ) /* ARM GNU COMPILER */
+
+ __inline int32 SUB_SAD(int32 sad, int32 tmp, int32 tmp2)
+ {
+ register int32 out;
+ register int32 temp1;
+ register int32 ss = sad;
+ register int32 tt = tmp;
+ register int32 uu = tmp2;
+
+ asm volatile("rsbs %1, %4, %3\n\t"
+ "rsbmi %1, %1, #0\n\t"
+ "add %0, %2, %1"
+ : "=&r"(out),
+ "=&r"(temp1)
+ : "r"(ss),
+ "r"(tt),
+ "r"(uu));
+ return out;
+ }
+
+ __inline int32 sad_4pixel(int32 src1, int32 src2, int32 mask)
+{
+ register int32 out;
+ register int32 temp1;
+ register int32 s1 = src1;
+ register int32 s2 = src2;
+ register int32 mm = mask;
+
+ asm volatile("eor %0, %3, %2\n\t"
+ "subs %1, %3, %2\n\t"
+ "eor %0, %0, %1\n\t"
+ "and %0, %4, %0, lsr #1\n\t"
+ "orrcc %0, %0, #0x80000000\n\t"
+ "rsb %0, %0, %0, lsl #8\n\t"
+ "add %1, %1, %0, asr #7\n\t"
+ "eor %1, %1, %0, asr #7"
+ : "=&r"(out),
+ "=&r"(temp1)
+ : "r"(s1),
+ "r"(s2),
+ "r"(mm));
+
+ return temp1;
+ }
+
+ __inline int32 sad_4pixelN(int32 src1, int32 src2, int32 mask)
+{
+ register int32 out;
+ register int32 temp1;
+ register int32 s1 = src1;
+ register int32 s2 = src2;
+ register int32 mm = mask;
+
+ asm volatile("eor %1, %3, %2\n\t"
+ "adds %0, %3, %2\n\t"
+ "eor %1, %1, %0\n\t"
+ "ands %1, %4, %1,rrx\n\t"
+ "rsb %1, %1, %1, lsl #8\n\t"
+ "sub %0, %0, %1, asr #7\n\t"
+ "eor %0, %0, %1, asr #7"
+ : "=&r"(out),
+ "=&r"(temp1)
+ : "r"(s1),
+ "r"(s2),
+ "r"(mm));
+
+ return (out);
+ }
+
+#define sum_accumulate asm volatile("sbc %0, %0, %1\n\t" \
+ "bic %1, %4, %1\n\t" \
+ "add %2, %2, %1, lsr #8\n\t" \
+ "sbc %0, %0, %3\n\t" \
+ "bic %3, %4, %3\n\t" \
+ "add %2, %2, %3, lsr #8" \
+ :"+r"(x5), "+r"(x10), "+r"(x4), "+r"(x11) \
+ :"r"(x6));
+
+#define NUMBER 3
+#define SHIFT 24
+#define INC_X8 0x08000001
+
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 2
+#undef SHIFT
+#define SHIFT 16
+#undef INC_X8
+#define INC_X8 0x10000001
+#include "sad_mb_offset.h"
+
+#undef NUMBER
+#define NUMBER 1
+#undef SHIFT
+#define SHIFT 8
+#undef INC_X8
+#define INC_X8 0x08000001
+#include "sad_mb_offset.h"
+
+
+ __inline int32 simd_sad_mb(UChar *ref, UChar *blk, Int dmin, Int lx)
+{
+ int32 x4, x5, x6, x8, x9, x10, x11, x12, x14;
+
+ x9 = 0x80808080; /* const. */
+ x4 = x5 = 0;
+
+ x8 = (uint32)ref & 0x3;
+ if (x8 == 3)
+ goto SadMBOffset3;
+ if (x8 == 2)
+ goto SadMBOffset2;
+ if (x8 == 1)
+ goto SadMBOffset1;
+
+asm volatile("mvn %0, #0xFF00": "=r"(x6));
+
+LOOP_SAD0:
+ /****** process 8 pixels ******/
+ x11 = *((int32*)(ref + 12));
+ x10 = *((int32*)(ref + 8));
+ x14 = *((int32*)(blk + 12));
+ x12 = *((int32*)(blk + 8));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ asm volatile("ldr %0, [%4, #4]\n\t"
+ "ldr %1, [%4], %6\n\t"
+ "ldr %2, [%5, #4]\n\t"
+ "ldr %3, [%5], #16"
+ : "=r"(x11), "=r"(x10), "=r"(x14), "=r"(x12), "+r"(ref), "+r"(blk)
+ : "r"(lx));
+
+ /* process x11 & x14 */
+ x11 = sad_4pixel(x11, x14, x9);
+
+ /* process x12 & x10 */
+ x10 = sad_4pixel(x10, x12, x9);
+
+ x5 = x5 + x10; /* accumulate low bytes */
+ x10 = x10 & (x6 << 8); /* x10 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x10 >> 8); /* accumulate high bytes */
+ x5 = x5 + x11; /* accumulate low bytes */
+ x11 = x11 & (x6 << 8); /* x11 & 0xFF00FF00 */
+ x4 = x4 + ((uint32)x11 >> 8); /* accumulate high bytes */
+
+ /****************/
+ x10 = x5 - (x4 << 8); /* extract low bytes */
+ x10 = x10 + x4; /* add with high bytes */
+ x10 = x10 + (x10 << 16); /* add with lower half word */
+
+ if (((uint32)x10 >> 16) <= (uint32)dmin) /* compare with dmin */
+ {
+ if (--x8)
+ {
+ goto LOOP_SAD0;
+ }
+
+ }
+
+ return ((uint32)x10 >> 16);
+
+SadMBOffset3:
+
+ return sad_mb_offset3(ref, blk, lx, dmin);
+
+SadMBOffset2:
+
+ return sad_mb_offset2(ref, blk, lx, dmin);
+
+SadMBOffset1:
+
+ return sad_mb_offset1(ref, blk, lx, dmin);
+ }
+
+#endif // OS
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif // _SAD_INLINE_H_
+