summaryrefslogtreecommitdiffstats
path: root/sec_mm
diff options
context:
space:
mode:
authorSeungBeom Kim <sbcrux.kim@samsung.com>2011-07-18 11:18:27 +0900
committerJames Dong <jdong@google.com>2011-07-25 10:03:49 -0700
commit5af084cebef71cbe8990a96e73b37cc5fdfce462 (patch)
tree3aba03155c9f858898b66a0955fe173bff68b0b6 /sec_mm
parentabc28ea135621af9735021ea27763cdf624aada5 (diff)
downloaddevice_samsung_crespo-5af084cebef71cbe8990a96e73b37cc5fdfce462.zip
device_samsung_crespo-5af084cebef71cbe8990a96e73b37cc5fdfce462.tar.gz
device_samsung_crespo-5af084cebef71cbe8990a96e73b37cc5fdfce462.tar.bz2
Add color space convertor in SEC_OMX
NV12T to YUV420P NV12T to YUV420SP YUV420P to NV12T YUV420SP to NV12T Change-Id: I769bea28953786a9191824c488a633e8c997520f Signed-off-by: SeungBeom Kim <sbcrux.kim@samsung.com>
Diffstat (limited to 'sec_mm')
-rw-r--r--sec_mm/sec_omx/sec_codecs/Android.mk2
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk36
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/color_space_convertor.c1092
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s128
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s133
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s768
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s680
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s573
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s451
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c170
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c3
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h6
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h176
-rw-r--r--sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/Android.mk4
-rw-r--r--sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/SEC_OMX_H264dec.c20
-rw-r--r--sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/Android.mk2
-rw-r--r--sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/SEC_OMX_Mpeg4dec.c20
-rw-r--r--sec_mm/sec_omx/sec_omx_component/video/enc/h264enc/Android.mk2
18 files changed, 4069 insertions, 197 deletions
diff --git a/sec_mm/sec_omx/sec_codecs/Android.mk b/sec_mm/sec_omx/sec_codecs/Android.mk
index a51a075..3c163a4 100644
--- a/sec_mm/sec_omx/sec_codecs/Android.mk
+++ b/sec_mm/sec_omx/sec_codecs/Android.mk
@@ -4,4 +4,4 @@ include $(CLEAR_VARS)
include $(SEC_CODECS)/video/mfc_c110/dec/Android.mk
include $(SEC_CODECS)/video/mfc_c110/enc/Android.mk
-
+include $(SEC_CODECS)/video/mfc_c110/csc/Android.mk \ No newline at end of file
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk
new file mode 100644
index 0000000..4106a68
--- /dev/null
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk
@@ -0,0 +1,36 @@
+
+LOCAL_PATH := $(call my-dir)
+include $(CLEAR_VARS)
+
+LOCAL_MODULE_TAGS := optional
+
+ifeq ($(ARCH_ARM_HAVE_NEON),true)
+LOCAL_SRC_FILES := \
+ csc_yuv420_nv12t_y_neon.s \
+ csc_yuv420_nv12t_uv_neon.s \
+ csc_nv12t_yuv420_y_neon.s \
+ csc_nv12t_yuv420_uv_neon.s \
+ csc_interleave_memcpy.s \
+ csc_deinterleave_memcpy.s
+
+else
+LOCAL_SRC_FILES := \
+ color_space_convertor.c
+
+endif
+
+LOCAL_MODULE := libseccsc
+
+LOCAL_CFLAGS :=
+
+LOCAL_ARM_MODE := arm
+
+LOCAL_STATIC_LIBRARIES :=
+
+LOCAL_SHARED_LIBRARIES := liblog
+
+LOCAL_C_INCLUDES := \
+ $(SEC_CODECS)/video/mfc_c110/include
+
+include $(BUILD_STATIC_LIBRARY)
+
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/color_space_convertor.c b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/color_space_convertor.c
new file mode 100644
index 0000000..c1ac638
--- /dev/null
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/color_space_convertor.c
@@ -0,0 +1,1092 @@
+/*
+ *
+ * Copyright 2011 Samsung Electronics S.LSI Co. LTD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @file color_space_convertor.c
+ * @brief SEC_OMX specific define
+ * @author ShinWon Lee (shinwon.lee@samsung.com)
+ * @version 1.0
+ * @history
+ * 2011.7.01 : Create
+ */
+
+#include "stdlib.h"
+#include "color_space_convertor.h"
+
+#define TILED_SIZE 64*32
+
+/*
+ * De-interleaves src to dest1, dest2
+ *
+ * @param dest1
+ * Address of de-interleaved data[out]
+ *
+ * @param dest2
+ * Address of de-interleaved data[out]
+ *
+ * @param src
+ * Address of interleaved data[in]
+ *
+ * @param src_size
+ * Size of interleaved data[in]
+ */
+void csc_deinterleave_memcpy(char *dest1, char *dest2, char *src, int src_size)
+{
+ int i = 0;
+ for(i=0; i<src_size/2; i++) {
+ dest1[i] = src[i*2];
+ dest2[i] = src[i*2+1];
+ }
+}
+
+/*
+ * Interleaves src1, src2 to dest
+ *
+ * @param dest
+ * Address of interleaved data[out]
+ *
+ * @param src1
+ * Address of de-interleaved data[in]
+ *
+ * @param src2
+ * Address of de-interleaved data[in]
+ *
+ * @param src_size
+ * Size of de-interleaved data[in]
+ */
+void csc_interleave_memcpy(char *dest, char *src1, char *src2, int src_size)
+{
+ int i = 0;
+ for(i=0; i<src_size; i++) {
+ dest[i*2] = src1[i];
+ dest[i*2+1] = src2[i];
+ }
+}
+
+/*
+ * Converts tiled data to linear.
+ * 1. Y of NV12T to Y of YUV420P
+ * 2. Y of NV12T to Y of YUV420S
+ * 3. UV of NV12T to UV of YUV420S
+ *
+ * @param yuv420_dest
+ * Y or UV plane address of YUV420[out]
+ *
+ * @param nv12t_src
+ * Y or UV plane address of NV12T[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_height
+ * Y: Height of YUV420, UV: Height/2 of YUV420[in]
+ */
+void csc_tiled_to_linear(char *yuv420_dest, char *nv12t_src, int yuv420_width, int yuv420_height)
+{
+ unsigned int i, j;
+ unsigned int tiled_x_index = 0, tiled_y_index = 0;
+ unsigned int aligned_x_size = 0;
+ unsigned int tiled_offset = 0, tiled_offset1 = 0, tiled_offset2 = 0, tiled_offset3 = 0;
+ unsigned int temp1 = 0, temp2 = 0;
+
+ if (yuv420_width >= 1024) {
+ for (i=0; i<yuv420_height; i=i+1) {
+ tiled_offset = 0;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+2;
+ tiled_offset = tiled_offset<<11;
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ temp2 = 8;
+ } else {
+ temp2 = ((yuv420_height+31)>>5)<<5;
+ /* even fomula: x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ if ((i+32)<temp2) {
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*6;
+ tiled_offset3 = tiled_offset+2048*7;
+ temp2 = 8;
+ } else {
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ temp2 = 4;
+ }
+ }
+ temp1 = i&0x1F;
+ memcpy(yuv420_dest+yuv420_width*(i), nv12t_src+tiled_offset+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*1, nv12t_src+tiled_offset1+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*2, nv12t_src+tiled_offset2+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*3, nv12t_src+tiled_offset3+64*(temp1), 64);
+
+ tiled_offset = tiled_offset+temp2*2048;
+ tiled_offset1 = tiled_offset1+temp2*2048;
+ tiled_offset2 = tiled_offset2+temp2*2048;
+ tiled_offset3 = tiled_offset3+temp2*2048;
+ memcpy(yuv420_dest+yuv420_width*(i)+64*4, nv12t_src+tiled_offset+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*5, nv12t_src+tiled_offset1+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*6, nv12t_src+tiled_offset2+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*7, nv12t_src+tiled_offset3+64*(temp1), 64);
+
+ tiled_offset = tiled_offset+temp2*2048;
+ tiled_offset1 = tiled_offset1+temp2*2048;
+ tiled_offset2 = tiled_offset2+temp2*2048;
+ tiled_offset3 = tiled_offset3+temp2*2048;
+ memcpy(yuv420_dest+yuv420_width*(i)+64*8, nv12t_src+tiled_offset+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*9, nv12t_src+tiled_offset1+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*10, nv12t_src+tiled_offset2+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*11, nv12t_src+tiled_offset3+64*(temp1), 64);
+
+ tiled_offset = tiled_offset+temp2*2048;
+ tiled_offset1 = tiled_offset1+temp2*2048;
+ tiled_offset2 = tiled_offset2+temp2*2048;
+ tiled_offset3 = tiled_offset3+temp2*2048;
+ memcpy(yuv420_dest+yuv420_width*(i)+64*12, nv12t_src+tiled_offset+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*13, nv12t_src+tiled_offset1+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*14, nv12t_src+tiled_offset2+64*(temp1), 64);
+ memcpy(yuv420_dest+yuv420_width*(i)+64*15, nv12t_src+tiled_offset3+64*(temp1), 64);
+ }
+ aligned_x_size = 1024;
+ }
+
+ if ((yuv420_width-aligned_x_size) >= 512) {
+ for (i=0; i<yuv420_height; i=i+1) {
+ tiled_offset = 0;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+2;
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ temp2 = 8;
+ } else {
+ temp2 = ((yuv420_height+31)>>5)<<5;
+ /* even fomula: x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ if ((i+32)<temp2) {
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*6;
+ tiled_offset3 = tiled_offset+2048*7;
+ temp2 = 8;
+ } else {
+ temp1 = aligned_x_size>>6;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ temp2 = 4;
+ }
+ }
+ temp1 = i&0x1F;
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i), nv12t_src+tiled_offset+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*1, nv12t_src+tiled_offset1+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*2, nv12t_src+tiled_offset2+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*3, nv12t_src+tiled_offset3+64*(temp1), 64);
+
+ tiled_offset = tiled_offset+temp2*2048;
+ tiled_offset1 = tiled_offset1+temp2*2048;
+ tiled_offset2 = tiled_offset2+temp2*2048;
+ tiled_offset3 = tiled_offset3+temp2*2048;
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*4, nv12t_src+tiled_offset+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*5, nv12t_src+tiled_offset1+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*6, nv12t_src+tiled_offset2+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*7, nv12t_src+tiled_offset3+64*(temp1), 64);
+ }
+ aligned_x_size = aligned_x_size+512;
+ }
+
+ if ((yuv420_width-aligned_x_size) >= 256) {
+ for (i=0; i<yuv420_height; i=i+1) {
+ tiled_offset = 0;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+2;
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ } else {
+ temp2 = ((yuv420_height+31)>>5)<<5;
+ /* even fomula: x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ if ((i+32)<temp2) {
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*6;
+ tiled_offset3 = tiled_offset+2048*7;
+ } else {
+ temp1 = aligned_x_size>>6;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ }
+ }
+ temp1 = i&0x1F;
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i), nv12t_src+tiled_offset+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*1, nv12t_src+tiled_offset1+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*2, nv12t_src+tiled_offset2+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64*3, nv12t_src+tiled_offset3+64*(temp1), 64);
+ }
+ aligned_x_size = aligned_x_size+256;
+ }
+
+ if ((yuv420_width-aligned_x_size) >= 128) {
+ for (i=0; i<yuv420_height; i=i+2) {
+ tiled_offset = 0;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+2;
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ tiled_offset1 = tiled_offset+2048*1;
+ } else {
+ temp2 = ((yuv420_height+31)>>5)<<5;
+ /* even fomula: x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ if ((i+32)<temp2) {
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ } else {
+ temp1 = aligned_x_size>>6;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ }
+ }
+ temp1 = i&0x1F;
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i), nv12t_src+tiled_offset+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i)+64, nv12t_src+tiled_offset1+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i+1), nv12t_src+tiled_offset+64*(temp1+1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i+1)+64, nv12t_src+tiled_offset1+64*(temp1+1), 64);
+ }
+ aligned_x_size = aligned_x_size+128;
+ }
+
+ if ((yuv420_width-aligned_x_size) >= 64) {
+ for (i=0; i<yuv420_height; i=i+4) {
+ tiled_offset = 0;
+ tiled_x_index = aligned_x_size>>6;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset+2;
+ temp1 = (tiled_x_index>>2)<<2;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ } else {
+ temp2 = ((yuv420_height+31)>>5)<<5;
+ if ((i+32)<temp2) {
+ /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
+ temp1 = tiled_x_index+2;
+ temp1 = (temp1>>2)<<2;
+ tiled_offset = tiled_x_index+temp1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ } else {
+ /* even2 fomula: x+x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset<<11;
+ }
+ }
+
+ temp1 = i&0x1F;
+ temp2 = aligned_x_size&0x3F;
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i), nv12t_src+tiled_offset+temp2+64*(temp1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i+1), nv12t_src+tiled_offset+temp2+64*(temp1+1), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i+2), nv12t_src+tiled_offset+temp2+64*(temp1+2), 64);
+ memcpy(yuv420_dest+aligned_x_size+yuv420_width*(i+3), nv12t_src+tiled_offset+temp2+64*(temp1+3), 64);
+ }
+ aligned_x_size = aligned_x_size+64;
+ }
+
+ if (yuv420_width != aligned_x_size) {
+ for (i=0; i<yuv420_height; i=i+4) {
+ for (j=aligned_x_size; j<yuv420_width; j=j+4) {
+ tiled_offset = 0;
+ tiled_x_index = j>>6;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset+2;
+ temp1 = (tiled_x_index>>2)<<2;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ } else {
+ temp2 = ((yuv420_height+31)>>5)<<5;
+ if ((i+32)<temp2) {
+ /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
+ temp1 = tiled_x_index+2;
+ temp1 = (temp1>>2)<<2;
+ tiled_offset = tiled_x_index+temp1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ } else {
+ /* even2 fomula: x+x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset<<11;
+ }
+ }
+
+ temp1 = i&0x1F;
+ temp2 = j&0x3F;
+ memcpy(yuv420_dest+j+yuv420_width*(i), nv12t_src+tiled_offset+temp2+64*(temp1), 4);
+ memcpy(yuv420_dest+j+yuv420_width*(i+1), nv12t_src+tiled_offset+temp2+64*(temp1+1), 4);
+ memcpy(yuv420_dest+j+yuv420_width*(i+2), nv12t_src+tiled_offset+temp2+64*(temp1+2), 4);
+ memcpy(yuv420_dest+j+yuv420_width*(i+3), nv12t_src+tiled_offset+temp2+64*(temp1+3), 4);
+ }
+ }
+ }
+}
+
+/*
+ * Converts and Deinterleaves tiled data to linear
+ * 1. UV of NV12T to UV of YUV420P
+ *
+ * @param yuv420_u_dest
+ * U plane address of YUV420P[out]
+ *
+ * @param yuv420_v_dest
+ * V plane address of YUV420P[out]
+ *
+ * @param nv12t_src
+ * UV plane address of NV12T[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_uv_height
+ * Height/2 of YUV420[in]
+ */
+void csc_tiled_to_linear_deinterleave(char *yuv420_u_dest, char *yuv420_v_dest, char *nv12t_uv_src, int yuv420_width, int yuv420_uv_height)
+{
+ unsigned int i, j;
+ unsigned int tiled_x_index = 0, tiled_y_index = 0;
+ unsigned int aligned_x_size = 0;
+ unsigned int tiled_offset = 0, tiled_offset1 = 0, tiled_offset2 = 0, tiled_offset3 = 0;
+ unsigned int temp1 = 0, temp2 = 0;
+
+ if (yuv420_width >= 1024) {
+ for (i=0; i<yuv420_uv_height; i=i+1) {
+ tiled_offset = 0;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+2;
+ tiled_offset = tiled_offset<<11;
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ temp2 = 8;
+ } else {
+ temp2 = ((yuv420_uv_height+31)>>5)<<5;
+ /* even fomula: x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ if ((i+32)<temp2) {
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*6;
+ tiled_offset3 = tiled_offset+2048*7;
+ temp2 = 8;
+ } else {
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ temp2 = 4;
+ }
+ }
+ temp1 = i&0x1F;
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i), yuv420_v_dest+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*1, yuv420_v_dest+yuv420_width/2*(i)+32*1, nv12t_uv_src+tiled_offset1+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*2, yuv420_v_dest+yuv420_width/2*(i)+32*2, nv12t_uv_src+tiled_offset2+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*3, yuv420_v_dest+yuv420_width/2*(i)+32*3, nv12t_uv_src+tiled_offset3+64*(temp1), 64);
+
+ tiled_offset = tiled_offset+temp2*2048;
+ tiled_offset1 = tiled_offset1+temp2*2048;
+ tiled_offset2 = tiled_offset2+temp2*2048;
+ tiled_offset3 = tiled_offset3+temp2*2048;
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*4, yuv420_v_dest+yuv420_width/2*(i)+32*4, nv12t_uv_src+tiled_offset+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*5, yuv420_v_dest+yuv420_width/2*(i)+32*5, nv12t_uv_src+tiled_offset1+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*6, yuv420_v_dest+yuv420_width/2*(i)+32*6, nv12t_uv_src+tiled_offset2+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*7, yuv420_v_dest+yuv420_width/2*(i)+32*7, nv12t_uv_src+tiled_offset3+64*(temp1), 64);
+
+ tiled_offset = tiled_offset+temp2*2048;
+ tiled_offset1 = tiled_offset1+temp2*2048;
+ tiled_offset2 = tiled_offset2+temp2*2048;
+ tiled_offset3 = tiled_offset3+temp2*2048;
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*8, yuv420_v_dest+yuv420_width/2*(i)+32*8, nv12t_uv_src+tiled_offset+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*9, yuv420_v_dest+yuv420_width/2*(i)+32*9, nv12t_uv_src+tiled_offset1+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*10, yuv420_v_dest+yuv420_width/2*(i)+32*10, nv12t_uv_src+tiled_offset2+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*11, yuv420_v_dest+yuv420_width/2*(i)+32*11, nv12t_uv_src+tiled_offset3+64*(temp1), 64);
+
+ tiled_offset = tiled_offset+temp2*2048;
+ tiled_offset1 = tiled_offset1+temp2*2048;
+ tiled_offset2 = tiled_offset2+temp2*2048;
+ tiled_offset3 = tiled_offset3+temp2*2048;
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*12, yuv420_v_dest+yuv420_width/2*(i)+32*12, nv12t_uv_src+tiled_offset+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*13, yuv420_v_dest+yuv420_width/2*(i)+32*13, nv12t_uv_src+tiled_offset1+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*14, yuv420_v_dest+yuv420_width/2*(i)+32*14, nv12t_uv_src+tiled_offset2+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+yuv420_width/2*(i)+32*15, yuv420_v_dest+yuv420_width/2*(i)+32*15, nv12t_uv_src+tiled_offset3+64*(temp1), 64);
+ }
+ aligned_x_size = 1024;
+ }
+
+ if ((yuv420_width-aligned_x_size) >= 512) {
+ for (i=0; i<yuv420_uv_height; i=i+1) {
+ tiled_offset = 0;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+2;
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ temp2 = 8;
+ } else {
+ temp2 = ((yuv420_uv_height+31)>>5)<<5;
+ /* even fomula: x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ if ((i+32)<temp2) {
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*6;
+ tiled_offset3 = tiled_offset+2048*7;
+ temp2 = 8;
+ } else {
+ temp1 = aligned_x_size>>6;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ temp2 = 4;
+ }
+ }
+ temp1 = i&0x1F;
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, nv12t_uv_src+tiled_offset1+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*2, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*2, nv12t_uv_src+tiled_offset2+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*3, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*3, nv12t_uv_src+tiled_offset3+64*(temp1), 64);
+
+ tiled_offset = tiled_offset+temp2*2048;
+ tiled_offset1 = tiled_offset1+temp2*2048;
+ tiled_offset2 = tiled_offset2+temp2*2048;
+ tiled_offset3 = tiled_offset3+temp2*2048;
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*4, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*4, nv12t_uv_src+tiled_offset+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*5, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*5, nv12t_uv_src+tiled_offset1+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*6, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*6, nv12t_uv_src+tiled_offset2+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*7, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*7, nv12t_uv_src+tiled_offset3+64*(temp1), 64);
+ }
+ aligned_x_size = aligned_x_size+512;
+ }
+
+ if ((yuv420_width-aligned_x_size) >= 256) {
+ for (i=0; i<yuv420_uv_height; i=i+1) {
+ tiled_offset = 0;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+2;
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ } else {
+ temp2 = ((yuv420_uv_height+31)>>5)<<5;
+ /* even fomula: x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ if ((i+32)<temp2) {
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*6;
+ tiled_offset3 = tiled_offset+2048*7;
+ } else {
+ temp1 = aligned_x_size>>6;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ tiled_offset2 = tiled_offset+2048*2;
+ tiled_offset3 = tiled_offset+2048*3;
+ }
+ }
+ temp1 = i&0x1F;
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, nv12t_uv_src+tiled_offset1+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*2, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*2, nv12t_uv_src+tiled_offset2+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*3, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*3, nv12t_uv_src+tiled_offset3+64*(temp1), 64);
+ }
+ aligned_x_size = aligned_x_size+256;
+ }
+
+ if ((yuv420_width-aligned_x_size) >= 128) {
+ for (i=0; i<yuv420_uv_height; i=i+2) {
+ tiled_offset = 0;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+2;
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ tiled_offset1 = tiled_offset+2048*1;
+ } else {
+ temp2 = ((yuv420_uv_height+31)>>5)<<5;
+ /* even fomula: x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ if ((i+32)<temp2) {
+ temp1 = aligned_x_size>>5;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ } else {
+ temp1 = aligned_x_size>>6;
+ tiled_offset = tiled_offset+(temp1<<11);
+ tiled_offset1 = tiled_offset+2048*1;
+ }
+ }
+ temp1 = i&0x1F;
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i)+32*1, nv12t_uv_src+tiled_offset1+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i+1), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i+1), nv12t_uv_src+tiled_offset+64*(temp1+1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i+1)+32*1, yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i+1)+32*1, nv12t_uv_src+tiled_offset1+64*(temp1+1), 64);
+ }
+ aligned_x_size = aligned_x_size+128;
+ }
+
+ if ((yuv420_width-aligned_x_size) >= 64) {
+ for (i=0; i<yuv420_uv_height; i=i+2) {
+ tiled_offset = 0;
+ tiled_x_index = aligned_x_size>>6;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset+2;
+ temp1 = (tiled_x_index>>2)<<2;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ } else {
+ temp2 = ((yuv420_uv_height+31)>>5)<<5;
+ if ((i+32)<temp2) {
+ /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
+ temp1 = tiled_x_index+2;
+ temp1 = (temp1>>2)<<2;
+ tiled_offset = tiled_x_index+temp1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ } else {
+ /* even2 fomula: x+x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset<<11;
+ }
+ }
+ temp1 = i&0x1F;
+ temp2 = aligned_x_size&0x3F;
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+64*(temp1), 64);
+ csc_deinterleave_memcpy(yuv420_u_dest+aligned_x_size/2+yuv420_width/2*(i+1), yuv420_v_dest+aligned_x_size/2+yuv420_width/2*(i+1), nv12t_uv_src+tiled_offset+64*(temp1+1), 64);
+ }
+ aligned_x_size = aligned_x_size+64;
+ }
+
+ if (yuv420_width != aligned_x_size) {
+ for (i=0; i<yuv420_uv_height; i=i+2) {
+ for (j=aligned_x_size; j<yuv420_width; j=j+4) {
+ tiled_offset = 0;
+ tiled_x_index = j>>6;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset+2;
+ temp1 = (tiled_x_index>>2)<<2;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ } else {
+ temp2 = ((yuv420_uv_height+31)>>5)<<5;
+ if ((i+32)<temp2) {
+ /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
+ temp1 = tiled_x_index+2;
+ temp1 = (temp1>>2)<<2;
+ tiled_offset = tiled_x_index+temp1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ } else {
+ /* even2 fomula: x+x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset<<11;
+ }
+ }
+ temp1 = i&0x1F;
+ temp2 = j&0x3F;
+ csc_deinterleave_memcpy(yuv420_u_dest+j/2+yuv420_width/2*(i), yuv420_v_dest+j/2+yuv420_width/2*(i), nv12t_uv_src+tiled_offset+temp2+64*(temp1), 4);
+ csc_deinterleave_memcpy(yuv420_u_dest+j/2+yuv420_width/2*(i+1), yuv420_v_dest+j/2+yuv420_width/2*(i+1), nv12t_uv_src+tiled_offset+temp2+64*(temp1+1), 4);
+ }
+ }
+ }
+}
+
+/*
+ * Converts linear data to tiled.
+ * 1. Y of YUV420P to Y of NV12T
+ * 2. Y of YUV420S to Y of NV12T
+ * 3. UV of YUV420S to UV of NV12T
+ *
+ * @param nv12t_dest
+ * Y or UV plane address of NV12T[out]
+ *
+ * @param yuv420_src
+ * Y or UV plane address of YUV420P(S)[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_height
+ * Y: Height of YUV420, UV: Height/2 of YUV420[in]
+ */
+void csc_linear_to_tiled(char *nv12t_dest, char *yuv420_src, int yuv420_width, int yuv420_height)
+{
+ unsigned int i, j;
+ unsigned int tiled_x_index = 0, tiled_y_index = 0;
+ unsigned int aligned_x_size = 0, aligned_y_size = 0;
+ unsigned int tiled_offset = 0;
+ unsigned int temp1 = 0, temp2 = 0;
+
+ aligned_y_size = (yuv420_height>>5)<<5;
+ aligned_x_size = (yuv420_width>>6)<<6;
+
+ for (i=0; i<aligned_y_size; i=i+32) {
+ for (j=0; j<aligned_x_size; j=j+64) {
+ tiled_offset = 0;
+ tiled_x_index = j>>6;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset+2;
+ temp1 = (tiled_x_index>>2)<<2;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ } else {
+ temp2 = ((yuv420_height+31)>>5)<<5;
+ if ((i+32)<temp2) {
+ /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
+ temp1 = tiled_x_index+2;
+ temp1 = (temp1>>2)<<2;
+ tiled_offset = tiled_x_index+temp1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ } else {
+ /* even2 fomula: x+x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset<<11;
+ }
+ }
+
+ memcpy(nv12t_dest+tiled_offset, yuv420_src+j+yuv420_width*(i), 64);
+ memcpy(nv12t_dest+tiled_offset+64*1, yuv420_src+j+yuv420_width*(i+1), 64);
+ memcpy(nv12t_dest+tiled_offset+64*2, yuv420_src+j+yuv420_width*(i+2), 64);
+ memcpy(nv12t_dest+tiled_offset+64*3, yuv420_src+j+yuv420_width*(i+3), 64);
+ memcpy(nv12t_dest+tiled_offset+64*4, yuv420_src+j+yuv420_width*(i+4), 64);
+ memcpy(nv12t_dest+tiled_offset+64*5, yuv420_src+j+yuv420_width*(i+5), 64);
+ memcpy(nv12t_dest+tiled_offset+64*6, yuv420_src+j+yuv420_width*(i+6), 64);
+ memcpy(nv12t_dest+tiled_offset+64*7, yuv420_src+j+yuv420_width*(i+7), 64);
+ memcpy(nv12t_dest+tiled_offset+64*8, yuv420_src+j+yuv420_width*(i+8), 64);
+ memcpy(nv12t_dest+tiled_offset+64*9, yuv420_src+j+yuv420_width*(i+9), 64);
+ memcpy(nv12t_dest+tiled_offset+64*10, yuv420_src+j+yuv420_width*(i+10), 64);
+ memcpy(nv12t_dest+tiled_offset+64*11, yuv420_src+j+yuv420_width*(i+11), 64);
+ memcpy(nv12t_dest+tiled_offset+64*12, yuv420_src+j+yuv420_width*(i+12), 64);
+ memcpy(nv12t_dest+tiled_offset+64*13, yuv420_src+j+yuv420_width*(i+13), 64);
+ memcpy(nv12t_dest+tiled_offset+64*14, yuv420_src+j+yuv420_width*(i+14), 64);
+ memcpy(nv12t_dest+tiled_offset+64*15, yuv420_src+j+yuv420_width*(i+15), 64);
+ memcpy(nv12t_dest+tiled_offset+64*16, yuv420_src+j+yuv420_width*(i+16), 64);
+ memcpy(nv12t_dest+tiled_offset+64*17, yuv420_src+j+yuv420_width*(i+17), 64);
+ memcpy(nv12t_dest+tiled_offset+64*18, yuv420_src+j+yuv420_width*(i+18), 64);
+ memcpy(nv12t_dest+tiled_offset+64*19, yuv420_src+j+yuv420_width*(i+19), 64);
+ memcpy(nv12t_dest+tiled_offset+64*20, yuv420_src+j+yuv420_width*(i+20), 64);
+ memcpy(nv12t_dest+tiled_offset+64*21, yuv420_src+j+yuv420_width*(i+21), 64);
+ memcpy(nv12t_dest+tiled_offset+64*22, yuv420_src+j+yuv420_width*(i+22), 64);
+ memcpy(nv12t_dest+tiled_offset+64*23, yuv420_src+j+yuv420_width*(i+23), 64);
+ memcpy(nv12t_dest+tiled_offset+64*24, yuv420_src+j+yuv420_width*(i+24), 64);
+ memcpy(nv12t_dest+tiled_offset+64*25, yuv420_src+j+yuv420_width*(i+25), 64);
+ memcpy(nv12t_dest+tiled_offset+64*26, yuv420_src+j+yuv420_width*(i+26), 64);
+ memcpy(nv12t_dest+tiled_offset+64*27, yuv420_src+j+yuv420_width*(i+27), 64);
+ memcpy(nv12t_dest+tiled_offset+64*28, yuv420_src+j+yuv420_width*(i+28), 64);
+ memcpy(nv12t_dest+tiled_offset+64*29, yuv420_src+j+yuv420_width*(i+29), 64);
+ memcpy(nv12t_dest+tiled_offset+64*30, yuv420_src+j+yuv420_width*(i+30), 64);
+ memcpy(nv12t_dest+tiled_offset+64*31, yuv420_src+j+yuv420_width*(i+31), 64);
+ }
+ }
+
+ for (i=aligned_y_size; i<yuv420_height; i=i+4) {
+ for (j=0; j<aligned_x_size; j=j+64) {
+ tiled_offset = 0;
+ tiled_x_index = j>>6;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset+2;
+ temp1 = (tiled_x_index>>2)<<2;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ } else {
+ temp2 = ((yuv420_height+31)>>5)<<5;
+ if ((i+32)<temp2) {
+ /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
+ temp1 = tiled_x_index+2;
+ temp1 = (temp1>>2)<<2;
+ tiled_offset = tiled_x_index+temp1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ } else {
+ /* even2 fomula: x+x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset<<11;
+ }
+ }
+
+ temp1 = i&0x1F;
+ memcpy(nv12t_dest+tiled_offset+64*(temp1), yuv420_src+j+yuv420_width*(i), 64);
+ memcpy(nv12t_dest+tiled_offset+64*(temp1+1), yuv420_src+j+yuv420_width*(i+1), 64);
+ memcpy(nv12t_dest+tiled_offset+64*(temp1+2), yuv420_src+j+yuv420_width*(i+2), 64);
+ memcpy(nv12t_dest+tiled_offset+64*(temp1+3), yuv420_src+j+yuv420_width*(i+3), 64);
+ }
+ }
+
+ for (i=0; i<yuv420_height; i=i+4) {
+ for (j=aligned_x_size; j<yuv420_width; j=j+4) {
+ tiled_offset = 0;
+ tiled_x_index = j>>6;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset+2;
+ temp1 = (tiled_x_index>>2)<<2;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ } else {
+ temp2 = ((yuv420_height+31)>>5)<<5;
+ if ((i+32)<temp2) {
+ /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
+ temp1 = tiled_x_index+2;
+ temp1 = (temp1>>2)<<2;
+ tiled_offset = tiled_x_index+temp1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ } else {
+ /* even2 fomula: x+x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset<<11;
+ }
+ }
+
+ temp1 = i&0x1F;
+ temp2 = j&0x3F;
+ memcpy(nv12t_dest+tiled_offset+temp2+64*(temp1), yuv420_src+j+yuv420_width*(i), 4);
+ memcpy(nv12t_dest+tiled_offset+temp2+64*(temp1+1), yuv420_src+j+yuv420_width*(i+1), 4);
+ memcpy(nv12t_dest+tiled_offset+temp2+64*(temp1+2), yuv420_src+j+yuv420_width*(i+2), 4);
+ memcpy(nv12t_dest+tiled_offset+temp2+64*(temp1+3), yuv420_src+j+yuv420_width*(i+3), 4);
+ }
+ }
+}
+
+/*
+ * Converts and Interleaves linear to tiled
+ * 1. UV of YUV420P to UV of NV12T
+ *
+ * @param nv12t_uv_dest
+ * UV plane address of NV12T[out]
+ *
+ * @param yuv420p_u_src
+ * U plane address of YUV420P[in]
+ *
+ * @param yuv420p_v_src
+ * V plane address of YUV420P[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_uv_height
+ * Height/2 of YUV420[in]
+ */
+void csc_linear_to_tiled_interleave(char *nv12t_uv_dest, char *yuv420p_u_src, char *yuv420p_v_src, int yuv420_width, int yuv420_uv_height)
+{
+ unsigned int i, j;
+ unsigned int tiled_x_index = 0, tiled_y_index = 0;
+ unsigned int aligned_x_size = 0, aligned_y_size = 0;
+ unsigned int tiled_offset = 0;
+ unsigned int temp1 = 0, temp2 = 0;
+
+ aligned_y_size = (yuv420_uv_height>>5)<<5;
+ aligned_x_size = ((yuv420_width)>>6)<<6;
+
+ for (i=0; i<aligned_y_size; i=i+32) {
+ for (j=0; j<aligned_x_size; j=j+64) {
+ tiled_offset = 0;
+ tiled_x_index = j>>6;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset+2;
+ temp1 = (tiled_x_index>>2)<<2;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ } else {
+ temp2 = ((yuv420_uv_height+31)>>5)<<5;
+ if ((i+32)<temp2) {
+ /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
+ temp1 = tiled_x_index+2;
+ temp1 = (temp1>>2)<<2;
+ tiled_offset = tiled_x_index+temp1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ } else {
+ /* even2 fomula: x+x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset<<11;
+ }
+ }
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset, yuv420p_u_src+j/2+yuv420_width/2*(i), yuv420p_v_src+j/2+yuv420_width/2*(i), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*1, yuv420p_u_src+j/2+yuv420_width/2*(i+1), yuv420p_v_src+j/2+yuv420_width/2*(i+1), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*2, yuv420p_u_src+j/2+yuv420_width/2*(i+2), yuv420p_v_src+j/2+yuv420_width/2*(i+2), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*3, yuv420p_u_src+j/2+yuv420_width/2*(i+3), yuv420p_v_src+j/2+yuv420_width/2*(i+3), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*4, yuv420p_u_src+j/2+yuv420_width/2*(i+4), yuv420p_v_src+j/2+yuv420_width/2*(i+4), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*5, yuv420p_u_src+j/2+yuv420_width/2*(i+5), yuv420p_v_src+j/2+yuv420_width/2*(i+5), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*6, yuv420p_u_src+j/2+yuv420_width/2*(i+6), yuv420p_v_src+j/2+yuv420_width/2*(i+6), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*7, yuv420p_u_src+j/2+yuv420_width/2*(i+7), yuv420p_v_src+j/2+yuv420_width/2*(i+7), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*8, yuv420p_u_src+j/2+yuv420_width/2*(i+8), yuv420p_v_src+j/2+yuv420_width/2*(i+8), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*9, yuv420p_u_src+j/2+yuv420_width/2*(i+9), yuv420p_v_src+j/2+yuv420_width/2*(i+9), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*10, yuv420p_u_src+j/2+yuv420_width/2*(i+10), yuv420p_v_src+j/2+yuv420_width/2*(i+10), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*11, yuv420p_u_src+j/2+yuv420_width/2*(i+11), yuv420p_v_src+j/2+yuv420_width/2*(i+11), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*12, yuv420p_u_src+j/2+yuv420_width/2*(i+12), yuv420p_v_src+j/2+yuv420_width/2*(i+12), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*13, yuv420p_u_src+j/2+yuv420_width/2*(i+13), yuv420p_v_src+j/2+yuv420_width/2*(i+13), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*14, yuv420p_u_src+j/2+yuv420_width/2*(i+14), yuv420p_v_src+j/2+yuv420_width/2*(i+14), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*15, yuv420p_u_src+j/2+yuv420_width/2*(i+15), yuv420p_v_src+j/2+yuv420_width/2*(i+15), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*16, yuv420p_u_src+j/2+yuv420_width/2*(i+16), yuv420p_v_src+j/2+yuv420_width/2*(i+16), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*17, yuv420p_u_src+j/2+yuv420_width/2*(i+17), yuv420p_v_src+j/2+yuv420_width/2*(i+17), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*18, yuv420p_u_src+j/2+yuv420_width/2*(i+18), yuv420p_v_src+j/2+yuv420_width/2*(i+18), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*19, yuv420p_u_src+j/2+yuv420_width/2*(i+19), yuv420p_v_src+j/2+yuv420_width/2*(i+19), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*20, yuv420p_u_src+j/2+yuv420_width/2*(i+20), yuv420p_v_src+j/2+yuv420_width/2*(i+20), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*21, yuv420p_u_src+j/2+yuv420_width/2*(i+21), yuv420p_v_src+j/2+yuv420_width/2*(i+21), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*22, yuv420p_u_src+j/2+yuv420_width/2*(i+22), yuv420p_v_src+j/2+yuv420_width/2*(i+22), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*23, yuv420p_u_src+j/2+yuv420_width/2*(i+23), yuv420p_v_src+j/2+yuv420_width/2*(i+23), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*24, yuv420p_u_src+j/2+yuv420_width/2*(i+24), yuv420p_v_src+j/2+yuv420_width/2*(i+24), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*25, yuv420p_u_src+j/2+yuv420_width/2*(i+25), yuv420p_v_src+j/2+yuv420_width/2*(i+25), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*26, yuv420p_u_src+j/2+yuv420_width/2*(i+26), yuv420p_v_src+j/2+yuv420_width/2*(i+26), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*27, yuv420p_u_src+j/2+yuv420_width/2*(i+27), yuv420p_v_src+j/2+yuv420_width/2*(i+27), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*28, yuv420p_u_src+j/2+yuv420_width/2*(i+28), yuv420p_v_src+j/2+yuv420_width/2*(i+28), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*29, yuv420p_u_src+j/2+yuv420_width/2*(i+29), yuv420p_v_src+j/2+yuv420_width/2*(i+29), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*30, yuv420p_u_src+j/2+yuv420_width/2*(i+30), yuv420p_v_src+j/2+yuv420_width/2*(i+30), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*31, yuv420p_u_src+j/2+yuv420_width/2*(i+31), yuv420p_v_src+j/2+yuv420_width/2*(i+31), 32);
+ }
+ }
+
+ for (i=aligned_y_size; i<yuv420_uv_height; i=i+4) {
+ for (j=0; j<aligned_x_size; j=j+64) {
+ tiled_offset = 0;
+ tiled_x_index = j>>6;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset+2;
+ temp1 = (tiled_x_index>>2)<<2;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ } else {
+ temp2 = ((yuv420_uv_height+31)>>5)<<5;
+ if ((i+32)<temp2) {
+ /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
+ temp1 = tiled_x_index+2;
+ temp1 = (temp1>>2)<<2;
+ tiled_offset = tiled_x_index+temp1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ } else {
+ /* even2 fomula: x+x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset<<11;
+ }
+ }
+ temp1 = i&0x1F;
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*(temp1), yuv420p_u_src+j/2+yuv420_width/2*(i), yuv420p_v_src+j/2+yuv420_width/2*(i), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*(temp1+1), yuv420p_u_src+j/2+yuv420_width/2*(i+1), yuv420p_v_src+j/2+yuv420_width/2*(i+1), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*(temp1+2), yuv420p_u_src+j/2+yuv420_width/2*(i+2), yuv420p_v_src+j/2+yuv420_width/2*(i+2), 32);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+64*(temp1+3), yuv420p_u_src+j/2+yuv420_width/2*(i+3), yuv420p_v_src+j/2+yuv420_width/2*(i+3), 32);
+ }
+ }
+
+ for (i=0; i<yuv420_uv_height; i=i+4) {
+ for (j=aligned_x_size; j<yuv420_width; j=j+4) {
+ tiled_offset = 0;
+ tiled_x_index = j>>6;
+ tiled_y_index = i>>5;
+ if (tiled_y_index & 0x1) {
+ /* odd fomula: 2+x+(x>>2)<<2+x_block_num*(y-1) */
+ tiled_offset = tiled_y_index-1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset+2;
+ temp1 = (tiled_x_index>>2)<<2;
+ tiled_offset = tiled_offset+temp1;
+ tiled_offset = tiled_offset<<11;
+ } else {
+ temp2 = ((yuv420_uv_height+31)>>5)<<5;
+ if ((i+32)<temp2) {
+ /* even1 fomula: x+((x+2)>>2)<<2+x_block_num*y */
+ temp1 = tiled_x_index+2;
+ temp1 = (temp1>>2)<<2;
+ tiled_offset = tiled_x_index+temp1;
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_offset+tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset<<11;
+ } else {
+ /* even2 fomula: x+x_block_num*y */
+ temp1 = ((yuv420_width+127)>>7)<<7;
+ tiled_offset = tiled_y_index*(temp1>>6);
+ tiled_offset = tiled_offset+tiled_x_index;
+ tiled_offset = tiled_offset<<11;
+ }
+ }
+ temp1 = i&0x1F;
+ temp2 = j&0x3F;
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+temp2+64*(temp1), yuv420p_u_src+j/2+yuv420_width/2*(i), yuv420p_v_src+j/2+yuv420_width/2*(i), 2);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+temp2+64*(temp1+1), yuv420p_u_src+j/2+yuv420_width/2*(i+1), yuv420p_v_src+j/2+yuv420_width/2*(i+1), 2);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+temp2+64*(temp1+2), yuv420p_u_src+j/2+yuv420_width/2*(i+2), yuv420p_v_src+j/2+yuv420_width/2*(i+2), 2);
+ csc_interleave_memcpy(nv12t_uv_dest+tiled_offset+temp2+64*(temp1+3), yuv420p_u_src+j/2+yuv420_width/2*(i+3), yuv420p_v_src+j/2+yuv420_width/2*(i+3), 2);
+ }
+ }
+}
+
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s
new file mode 100644
index 0000000..5b55080
--- /dev/null
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s
@@ -0,0 +1,128 @@
+/*
+ *
+ * Copyright 2011 Samsung Electronics S.LSI Co. LTD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @file csc_deinterleave_memcpy.s
+ * @brief SEC_OMX specific define
+ * @author ShinWon Lee (shinwon.lee@samsung.com)
+ * @version 1.0
+ * @history
+ * 2011.7.01 : Create
+ */
+ .arch armv7-a
+ .text
+ .global csc_deinterleave_memcpy
+ .type csc_deinterleave_memcpy, %function
+csc_deinterleave_memcpy:
+ .fnstart
+
+ @r0 dest1
+ @r1 dest2
+ @r2 src
+ @r3 src_size
+ @r4 i
+ @r5 temp1
+ @r6 temp2
+ @r7 temp3
+
+ stmfd sp!, {r4-r12,r14} @ backup registers
+
+ mov r4, #0
+ cmp r3, #256
+ blt LINEAR_SIZE_128
+
+ bic r5, r3, #0xFF
+LINEAR_SIZE_256_LOOP:
+ pld [r2, #64]
+ vld2.8 {q0, q1}, [r2]!
+ pld [r2, #64]
+ vld2.8 {q2, q3}, [r2]!
+ pld [r2, #64]
+ vld2.8 {q4, q5}, [r2]!
+ pld [r2, #64]
+ vld2.8 {q6, q7}, [r2]!
+ pld [r2, #64]
+ vld2.8 {q8, q9}, [r2]!
+ pld [r2, #64]
+ vld2.8 {q10, q11}, [r2]!
+ vld2.8 {q12, q13}, [r2]!
+ vld2.8 {q14, q15}, [r2]!
+
+ vst1.8 {q0}, [r0]!
+ vst1.8 {q2}, [r0]!
+ vst1.8 {q4}, [r0]!
+ vst1.8 {q6}, [r0]!
+ vst1.8 {q8}, [r0]!
+ vst1.8 {q10}, [r0]!
+ vst1.8 {q12}, [r0]!
+ vst1.8 {q14}, [r0]!
+
+ vst1.8 {q1}, [r1]!
+ vst1.8 {q3}, [r1]!
+ vst1.8 {q5}, [r1]!
+ vst1.8 {q7}, [r1]!
+ vst1.8 {q9}, [r1]!
+ vst1.8 {q11}, [r1]!
+ vst1.8 {q13}, [r1]!
+ vst1.8 {q15}, [r1]!
+
+ add r4, #256
+ cmp r4, r5
+ blt LINEAR_SIZE_256_LOOP
+
+LINEAR_SIZE_128:
+ sub r5, r3, r4
+ cmp r5, #64
+ blt LINEAR_SIZE_4
+ pld [r2, #64]
+ vld2.8 {q0, q1}, [r2]!
+ pld [r2, #64]
+ vld2.8 {q2, q3}, [r2]!
+ vld2.8 {q4, q5}, [r2]!
+ vld2.8 {q6, q7}, [r2]!
+
+ vst1.8 {q0}, [r0]!
+ vst1.8 {q4}, [r0]!
+ vst1.8 {q2}, [r0]!
+ vst1.8 {q6}, [r0]!
+
+ vst1.8 {q1}, [r1]!
+ vst1.8 {q3}, [r1]!
+ vst1.8 {q5}, [r1]!
+ vst1.8 {q7}, [r1]!
+
+ add r4, #128
+
+LINEAR_SIZE_4:
+ ldrb r6, [r2], #1
+ ldrb r7, [r2], #1
+ ldrb r8, [r2], #1
+ ldrb r9, [r2], #1
+
+ strb r6, [r0], #1
+ strb r8, [r0], #1
+ strb r7, [r1], #1
+ strb r9, [r1], #1
+
+ add r4, #4
+ cmp r4, r3
+ blt LINEAR_SIZE_4
+
+RESTORE_REG:
+ ldmfd sp!, {r4-r12,r15} @ restore registers
+ .fnend
+
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s
new file mode 100644
index 0000000..54f4436
--- /dev/null
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s
@@ -0,0 +1,133 @@
+/*
+ *
+ * Copyright 2011 Samsung Electronics S.LSI Co. LTD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @file csc_interleave_memcpy.s
+ * @brief SEC_OMX specific define
+ * @author ShinWon Lee (shinwon.lee@samsung.com)
+ * @version 1.0
+ * @history
+ * 2011.7.01 : Create
+ */
+ .arch armv7-a
+ .text
+ .global csc_interleave_memcpy
+ .type csc_interleave_memcpy, %function
+csc_interleave_memcpy:
+ .fnstart
+
+ @r0 dest
+ @r1 src1
+ @r2 src2
+ @r3 src_size
+ @r4 i
+ @r5 temp1
+ @r6 temp2
+ @r7 temp3
+ @r8 temp2
+ @r9 temp3
+
+ stmfd sp!, {r4-r12,r14} @ backup registers
+
+ mov r4, #0
+ cmp r3, #128
+ blt LINEAR_SIZE_64
+
+ bic r5, r3, #0x2F
+LINEAR_SIZE_128_LOOP:
+ pld [r1, #64]
+ vld1.8 {q0}, [r1]!
+ vld1.8 {q2}, [r1]!
+ vld1.8 {q4}, [r1]!
+ vld1.8 {q6}, [r1]!
+ pld [r2]
+ vld1.8 {q8}, [r1]!
+ vld1.8 {q10}, [r1]!
+ vld1.8 {q12}, [r1]!
+ vld1.8 {q14}, [r1]!
+ pld [r2, #64]
+ vld1.8 {q1}, [r2]!
+ vld1.8 {q3}, [r2]!
+ vld1.8 {q5}, [r2]!
+ vld1.8 {q7}, [r2]!
+ vld1.8 {q9}, [r2]!
+ vld1.8 {q11}, [r2]!
+ vld1.8 {q13}, [r2]!
+ vld1.8 {q15}, [r2]!
+
+ vst2.8 {q0, q1}, [r0]!
+ vst2.8 {q2, q3}, [r0]!
+ vst2.8 {q4, q5}, [r0]!
+ vst2.8 {q6, q7}, [r0]!
+ vst2.8 {q8, q9}, [r0]!
+ vst2.8 {q10, q11}, [r0]!
+ pld [r1]
+ vst2.8 {q12, q13}, [r0]!
+ vst2.8 {q14, q15}, [r0]!
+
+ add r4, #128
+ cmp r4, r5
+ blt LINEAR_SIZE_128_LOOP
+
+LINEAR_SIZE_64:
+ sub r5, r3, r4
+ cmp r5, #64
+ blt LINEAR_SIZE_2
+LINEAR_SIZE_64_LOOP:
+ pld [r2]
+ vld1.8 {q0}, [r1]!
+ vld1.8 {q2}, [r1]!
+ vld1.8 {q4}, [r1]!
+ vld1.8 {q6}, [r1]!
+ vld1.8 {q1}, [r2]!
+ vld1.8 {q3}, [r2]!
+ vld1.8 {q5}, [r2]!
+ vld1.8 {q7}, [r2]!
+
+ vst2.8 {q0, q1}, [r0]!
+ vst2.8 {q2, q3}, [r0]!
+ pld [r1]
+ vst2.8 {q4, q5}, [r0]!
+ vst2.8 {q6, q7}, [r0]!
+
+ add r4, #64
+ cmp r4, r3
+ blt LINEAR_SIZE_64_LOOP
+
+LINEAR_SIZE_2:
+ sub r5, r3, r4
+ cmp r5, #2
+ blt RESTORE_REG
+LINEAR_SIZE_2_LOOP:
+ ldrb r6, [r1], #1
+ ldrb r7, [r2], #1
+ ldrb r8, [r1], #1
+ ldrb r9, [r2], #1
+
+ strb r6, [r0], #1
+ strb r7, [r0], #1
+ strb r8, [r0], #1
+ strb r9, [r0], #1
+
+ add r4, #2
+ cmp r4, r3
+ blt LINEAR_SIZE_2_LOOP
+
+RESTORE_REG:
+ ldmfd sp!, {r4-r12,r15} @ restore registers
+ .fnend
+
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s
new file mode 100644
index 0000000..08e359c
--- /dev/null
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s
@@ -0,0 +1,768 @@
+/*
+ *
+ * Copyright 2011 Samsung Electronics S.LSI Co. LTD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @file csc_nv12t_yuv420_uv_neon.s
+ * @brief SEC_OMX specific define
+ * @author ShinWon Lee (shinwon.lee@samsung.com)
+ * @version 1.0
+ * @history
+ * 2011.7.01 : Create
+ */
+
+/*
+ * Converts and Deinterleaves tiled data to linear
+ * 1. UV of NV12T to UV of YUV420P
+ *
+ * @param yuv420_u_dest
+ * U plane address of YUV420P[out]
+ *
+ * @param yuv420_v_dest
+ * V plane address of YUV420P[out]
+ *
+ * @param nv12t_src
+ * UV plane address of NV12T[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_uv_height
+ * Height/2 of YUV420[in]
+ */
+
+ .arch armv7-a
+ .text
+ .global csc_tiled_to_linear_deinterleave
+ .type csc_tiled_to_linear_deinterleave, %function
+csc_tiled_to_linear_deinterleave:
+ .fnstart
+
+ @r0 linear_u_dest
+ @r1 linear_v_dest
+ @r2 tiled_uv_src
+ @r3 linear_x_size
+ @r4 linear_y_size
+ @r5 j
+ @r6 i
+ @r7 tiled_addr
+ @r8 linear_addr
+ @r9 aligned_x_size
+ @r10 temp1
+ @r11 temp2
+ @r12 temp3
+ @r14 temp4
+
+ stmfd sp!, {r4-r12,r14} @ backup registers
+
+ ldr r4, [sp, #40] @ load linear_y_size to r4
+
+ mov r9, #0
+
+LINEAR_X_SIZE_1024:
+ cmp r3, #1024
+ blt LINEAR_X_SIZE_512
+
+ mov r6, #0
+LINEAR_X_SIZE_1024_LOOP:
+ mov r7, #0 @ tiled_offset = 0@
+ mov r5, r6, asr #5 @ tiled_y_index = i>>5@
+ and r10, r5, #0x1
+ cmp r10, #0x1
+ bne LINEAR_X_SIZE_1024_LOOP_EVEN
+LINEAR_X_SIZE_1024_LOOP_ODD:
+ sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@
+ add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
+ mul r7, r7, r10
+ mov r5, #8
+ mov r5, r5, lsl #11
+ sub r5, r5, #32
+ add r7, r7, #2 @ tiled_offset = tiled_offset+2@
+ mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r11, r7, #2048
+ add r12, r7, #4096
+ add r14, r7, #6144
+ b LINEAR_X_SIZE_1024_LOOP_MEMCPY
+
+LINEAR_X_SIZE_1024_LOOP_EVEN:
+ add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
+ bic r11, r11, #0x1F
+ add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
+ mul r7, r5, r10
+ add r12, r6, #32
+ cmp r12, r11
+ mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r11, r7, #2048
+ movlt r5, #8
+ addlt r12, r7, #12288
+ addlt r14, r7, #14336
+ movge r5, #4
+ addge r12, r7, #2048
+ addge r14, r7, #2048
+ mov r5, r5, lsl #11
+ sub r5, r5, #32
+
+LINEAR_X_SIZE_1024_LOOP_MEMCPY:
+ and r10, r6, #0x1F
+ mov r10, r10, lsl #6
+ add r10, r2, r10
+
+ add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1)
+ add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
+ pld [r11]
+ vld2.8 {q0, q1}, [r7]!
+ pld [r11, #32]
+ add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
+ vld2.8 {q2, q3}, [r7], r5
+ pld [r12]
+ vld2.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
+ vld2.8 {q6, q7}, [r11], r5
+ pld [r14]
+ vld2.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ mov r10, r3, asr #1
+ vld2.8 {q10, q11}, [r12], r5
+ mul r10, r10, r6
+ vld2.8 {q12, q13}, [r14]!
+ vld2.8 {q14, q15}, [r14], r5
+
+ add r8, r0, r10
+ vst1.8 {q0}, [r8]!
+ vst1.8 {q2}, [r8]!
+ vst1.8 {q4}, [r8]!
+ vst1.8 {q6}, [r8]!
+ vst1.8 {q8}, [r8]!
+ vst1.8 {q10}, [r8]!
+ vst1.8 {q12}, [r8]!
+ vst1.8 {q14}, [r8]!
+
+ add r10, r1, r10
+ vst1.8 {q1}, [r10]!
+ vst1.8 {q3}, [r10]!
+ vst1.8 {q5}, [r10]!
+ vst1.8 {q7}, [r10]!
+ vst1.8 {q9}, [r10]!
+ vst1.8 {q11}, [r10]!
+ pld [r7]
+ vst1.8 {q13}, [r10]!
+ pld [r7, #32]
+ vst1.8 {q15}, [r10]!
+
+ pld [r11]
+ vld2.8 {q0, q1}, [r7]!
+ pld [r11, #32]
+ vld2.8 {q2, q3}, [r7], r5
+ pld [r12]
+ vld2.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ vld2.8 {q6, q7}, [r11], r5
+ pld [r14]
+ vld2.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ vld2.8 {q10, q11}, [r12], r5
+ vld2.8 {q12, q13}, [r14]!
+ vld2.8 {q14, q15}, [r14], r5
+
+ vst1.8 {q0}, [r8]!
+ vst1.8 {q2}, [r8]!
+ vst1.8 {q4}, [r8]!
+ vst1.8 {q6}, [r8]!
+ vst1.8 {q8}, [r8]!
+ vst1.8 {q10}, [r8]!
+ vst1.8 {q12}, [r8]!
+ vst1.8 {q14}, [r8]!
+
+ vst1.8 {q1}, [r10]!
+ vst1.8 {q3}, [r10]!
+ vst1.8 {q5}, [r10]!
+ vst1.8 {q7}, [r10]!
+ vst1.8 {q9}, [r10]!
+ vst1.8 {q11}, [r10]!
+ pld [r7]
+ vst1.8 {q13}, [r10]!
+ pld [r7, #32]
+ vst1.8 {q15}, [r10]!
+
+ pld [r11]
+ vld2.8 {q0, q1}, [r7]!
+ pld [r11, #32]
+ vld2.8 {q2, q3}, [r7], r5
+ pld [r12]
+ vld2.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ vld2.8 {q6, q7}, [r11], r5
+ pld [r14]
+ vld2.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ vld2.8 {q10, q11}, [r12], r5
+ vld2.8 {q12, q13}, [r14]!
+ vld2.8 {q14, q15}, [r14], r5
+
+ vst1.8 {q0}, [r8]!
+ vst1.8 {q2}, [r8]!
+ vst1.8 {q4}, [r8]!
+ vst1.8 {q6}, [r8]!
+ vst1.8 {q8}, [r8]!
+ vst1.8 {q10}, [r8]!
+ vst1.8 {q12}, [r8]!
+ vst1.8 {q14}, [r8]!
+
+ vst1.8 {q1}, [r10]!
+ vst1.8 {q3}, [r10]!
+ vst1.8 {q5}, [r10]!
+ vst1.8 {q7}, [r10]!
+ vst1.8 {q9}, [r10]!
+ vst1.8 {q11}, [r10]!
+ pld [r7]
+ vst1.8 {q13}, [r10]!
+ pld [r7, #32]
+ vst1.8 {q15}, [r10]!
+
+ pld [r11]
+ vld2.8 {q0, q1}, [r7]!
+ pld [r11, #32]
+ vld2.8 {q2, q3}, [r7]
+ pld [r12]
+ vld2.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ vld2.8 {q6, q7}, [r11]
+ pld [r14]
+ vld2.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ vld2.8 {q10, q11}, [r12]
+ vld2.8 {q12, q13}, [r14]!
+ vld2.8 {q14, q15}, [r14]
+
+ vst1.8 {q0}, [r8]!
+ vst1.8 {q2}, [r8]!
+ vst1.8 {q4}, [r8]!
+ vst1.8 {q6}, [r8]!
+ vst1.8 {q8}, [r8]!
+ vst1.8 {q10}, [r8]!
+ vst1.8 {q12}, [r8]!
+ vst1.8 {q14}, [r8]!
+
+ vst1.8 {q1}, [r10]!
+ vst1.8 {q3}, [r10]!
+ vst1.8 {q5}, [r10]!
+ vst1.8 {q7}, [r10]!
+ vst1.8 {q9}, [r10]!
+ vst1.8 {q11}, [r10]!
+ add r6, #1
+ vst1.8 {q13}, [r10]!
+ cmp r6, r4
+ vst1.8 {q15}, [r10]!
+
+ blt LINEAR_X_SIZE_1024_LOOP
+
+ mov r9, #1024
+
+LINEAR_X_SIZE_512:
+ sub r10, r3, r9
+ cmp r10, #512
+ blt LINEAR_X_SIZE_256
+
+ mov r6, #0
+LINEAR_X_SIZE_512_LOOP:
+ mov r7, #0 @ tiled_offset = 0@
+ mov r5, r6, asr #5 @ tiled_y_index = i>>5@
+ and r10, r5, #0x1
+ cmp r10, #0x1
+ bne LINEAR_X_SIZE_512_LOOP_EVEN
+LINEAR_X_SIZE_512_LOOP_ODD:
+ sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@
+ add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
+ mul r7, r7, r10
+ mov r5, #8
+ mov r5, r5, lsl #11
+ add r7, r7, #2 @ tiled_offset = tiled_offset+2@
+ mov r10, r9, asr #5
+ add r7, r7, r10
+ mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r11, r7, #2048
+ add r12, r7, #4096
+ add r14, r7, #6144
+ sub r5, r5, #32
+ b LINEAR_X_SIZE_512_LOOP_MEMCPY
+
+LINEAR_X_SIZE_512_LOOP_EVEN:
+ add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
+ bic r11, r11, #0x1F
+ add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
+ mul r7, r5, r10
+ add r12, r6, #32
+ cmp r12, r11
+ mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
+ movlt r5, #8
+ movlt r10, r9, asr #5
+ movge r10, r9, asr #6
+ add r7, r7, r10, lsl #11
+ add r11, r7, #2048
+ addlt r12, r7, #12288
+ addlt r14, r7, #14336
+ movge r5, #4
+ addge r12, r7, #4096
+ addge r14, r7, #6144
+ mov r5, r5, lsl #11
+ sub r5, r5, #32
+
+LINEAR_X_SIZE_512_LOOP_MEMCPY:
+ and r10, r6, #0x1F
+ mov r10, r10, lsl #6
+ add r10, r2, r10
+
+ add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1)
+ add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
+ pld [r11]
+ vld2.8 {q0, q1}, [r7]!
+ pld [r11, #32]
+ add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
+ vld2.8 {q2, q3}, [r7], r5
+ pld [r12]
+ vld2.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
+ vld2.8 {q6, q7}, [r11], r5
+ pld [r14]
+ mov r10, r3, asr #1
+ vld2.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ mul r10, r10, r6
+ vld2.8 {q10, q11}, [r12], r5
+ add r8, r0, r10
+ vld2.8 {q12, q13}, [r14]!
+ add r8, r8, r9, asr #1
+ vld2.8 {q14, q15}, [r14], r5
+
+ vst1.8 {q0}, [r8]!
+ vst1.8 {q2}, [r8]!
+ vst1.8 {q4}, [r8]!
+ vst1.8 {q6}, [r8]!
+ vst1.8 {q8}, [r8]!
+ vst1.8 {q10}, [r8]!
+ vst1.8 {q12}, [r8]!
+ add r10, r1, r10
+ vst1.8 {q14}, [r8]!
+
+ add r10, r10, r9, asr #1
+ vst1.8 {q1}, [r10]!
+ vst1.8 {q3}, [r10]!
+ vst1.8 {q5}, [r10]!
+ vst1.8 {q7}, [r10]!
+ vst1.8 {q9}, [r10]!
+ vst1.8 {q11}, [r10]!
+ pld [r7]
+ vst1.8 {q13}, [r10]!
+ pld [r7, #32]
+ vst1.8 {q15}, [r10]!
+
+ pld [r11]
+ vld2.8 {q0, q1}, [r7]!
+ pld [r11, #32]
+ vld2.8 {q2, q3}, [r7]
+ pld [r12]
+ vld2.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ vld2.8 {q6, q7}, [r11]
+ pld [r14]
+ vld2.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ vld2.8 {q10, q11}, [r12]
+ vld2.8 {q12, q13}, [r14]!
+ vld2.8 {q14, q15}, [r14]
+
+ vst1.8 {q0}, [r8]!
+ vst1.8 {q2}, [r8]!
+ vst1.8 {q4}, [r8]!
+ vst1.8 {q6}, [r8]!
+ vst1.8 {q8}, [r8]!
+ vst1.8 {q10}, [r8]!
+ vst1.8 {q12}, [r8]!
+ vst1.8 {q14}, [r8]!
+
+ vst1.8 {q1}, [r10]!
+ vst1.8 {q3}, [r10]!
+ vst1.8 {q5}, [r10]!
+ vst1.8 {q7}, [r10]!
+ vst1.8 {q9}, [r10]!
+ vst1.8 {q11}, [r10]!
+ add r6, #1
+ vst1.8 {q13}, [r10]!
+ cmp r6, r4
+ vst1.8 {q15}, [r10]!
+
+ blt LINEAR_X_SIZE_512_LOOP
+
+ add r9, r9, #512
+
+LINEAR_X_SIZE_256:
+ sub r10, r3, r9
+ cmp r10, #256
+ blt LINEAR_X_SIZE_128
+
+ mov r6, #0
+LINEAR_X_SIZE_256_LOOP:
+ mov r7, #0 @ tiled_offset = 0@
+ mov r5, r6, asr #5 @ tiled_y_index = i>>5@
+ and r10, r5, #0x1
+ cmp r10, #0x1
+ bne LINEAR_X_SIZE_256_LOOP_EVEN
+LINEAR_X_SIZE_256_LOOP_ODD:
+ sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@
+ add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
+ mul r7, r7, r10
+ add r7, r7, #2 @ tiled_offset = tiled_offset+2@
+ mov r10, r9, asr #5
+ add r7, r7, r10
+ mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r11, r7, #2048
+ add r12, r7, #4096
+ add r14, r7, #6144
+ b LINEAR_X_SIZE_256_LOOP_MEMCPY
+
+LINEAR_X_SIZE_256_LOOP_EVEN:
+ add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
+ bic r11, r11, #0x1F
+ add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
+ mul r7, r5, r10
+ mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r12, r6, #32
+ cmp r12, r11
+ movlt r10, r9, asr #5
+ addlt r7, r7, r10, lsl #11
+ addlt r11, r7, #2048
+ addlt r12, r7, #12288
+ addlt r14, r7, #14336
+ movge r10, r9, asr #6
+ addge r7, r7, r10, lsl #11
+ addge r11, r7, #2048
+ addge r12, r7, #4096
+ addge r14, r7, #6144
+
+LINEAR_X_SIZE_256_LOOP_MEMCPY:
+ and r10, r6, #0x1F
+ mov r10, r10, lsl #6
+ add r10, r2, r10
+
+ add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1)
+ add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
+ pld [r11]
+ vld2.8 {q0, q1}, [r7]!
+ pld [r11, #32]
+ add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
+ vld2.8 {q2, q3}, [r7]
+ pld [r12]
+ vld2.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
+ vld2.8 {q6, q7}, [r11]
+ pld [r14]
+ vld2.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ mov r10, r3, asr #1
+ vld2.8 {q10, q11}, [r12]
+ mul r10, r10, r6
+ vld2.8 {q12, q13}, [r14]!
+ add r8, r0, r10
+ vld2.8 {q14, q15}, [r14]
+
+ add r8, r8, r9, asr #1
+ vst1.8 {q0}, [r8]!
+ vst1.8 {q2}, [r8]!
+ vst1.8 {q4}, [r8]!
+ vst1.8 {q6}, [r8]!
+ vst1.8 {q8}, [r8]!
+ vst1.8 {q10}, [r8]!
+ vst1.8 {q12}, [r8]!
+ add r10, r1, r10
+ vst1.8 {q14}, [r8]!
+
+ add r10, r10, r9, asr #1
+ vst1.8 {q1}, [r10]!
+ vst1.8 {q3}, [r10]!
+ vst1.8 {q5}, [r10]!
+ vst1.8 {q7}, [r10]!
+ vst1.8 {q9}, [r10]!
+ vst1.8 {q11}, [r10]!
+ add r6, #1
+ vst1.8 {q13}, [r10]!
+ cmp r6, r4
+ vst1.8 {q15}, [r10]!
+ blt LINEAR_X_SIZE_256_LOOP
+
+ add r9, r9, #256
+
+LINEAR_X_SIZE_128:
+ sub r10, r3, r9
+ cmp r10, #128
+ blt LINEAR_X_SIZE_64
+
+ mov r6, #0
+LINEAR_X_SIZE_128_LOOP:
+ mov r7, #0 @ tiled_offset = 0@
+ mov r5, r6, asr #5 @ tiled_y_index = i>>5@
+ and r10, r5, #0x1
+ cmp r10, #0x1
+ bne LINEAR_X_SIZE_128_LOOP_EVEN
+LINEAR_X_SIZE_128_LOOP_ODD:
+ sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@
+ add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
+ mul r7, r7, r10
+ add r7, r7, #2 @ tiled_offset = tiled_offset+2@
+ mov r10, r9, asr #5
+ add r7, r7, r10
+ mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r11, r7, #2048
+ b LINEAR_X_SIZE_128_LOOP_MEMCPY
+
+LINEAR_X_SIZE_128_LOOP_EVEN:
+ add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
+ bic r11, r11, #0x1F
+ add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
+ mul r7, r5, r10
+ mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r12, r6, #32
+ cmp r12, r11
+ movlt r10, r9, asr #5
+ movge r10, r9, asr #6
+ add r7, r7, r10, lsl #11
+ add r11, r7, #2048
+
+LINEAR_X_SIZE_128_LOOP_MEMCPY:
+ and r10, r6, #0x1F
+ mov r10, r10, lsl #6
+ add r10, r2, r10
+
+ add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1)
+ add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
+ pld [r11]
+ vld2.8 {q0, q1}, [r7]!
+ pld [r11, #32]
+ vld2.8 {q2, q3}, [r7]!
+ pld [r7]
+ vld2.8 {q4, q5}, [r11]!
+ mov r10, r3, asr #1
+ pld [r7, #32]
+ vld2.8 {q6, q7}, [r11]!
+ mul r10, r10, r6
+ pld [r11]
+ vld2.8 {q8, q9}, [r7]!
+ add r10, r10, r9, asr #1
+ pld [r11, #32]
+ vld2.8 {q10, q11}, [r7]!
+ add r8, r0, r10
+ vld2.8 {q12, q13}, [r11]!
+ mov r14, r3, asr #1
+ vld2.8 {q14, q15}, [r11]!
+
+ sub r14, r14, #48
+ vst1.8 {q0}, [r8]!
+ vst1.8 {q2}, [r8]!
+ vst1.8 {q4}, [r8]!
+ vst1.8 {q6}, [r8], r14
+ vst1.8 {q8}, [r8]!
+ vst1.8 {q10}, [r8]!
+ vst1.8 {q12}, [r8]!
+ vst1.8 {q14}, [r8]!
+
+ add r10, r1, r10
+ vst1.8 {q1}, [r10]!
+ vst1.8 {q3}, [r10]!
+ vst1.8 {q5}, [r10]!
+ vst1.8 {q7}, [r10], r14
+ vst1.8 {q9}, [r10]!
+ vst1.8 {q11}, [r10]!
+ add r6, #2
+ vst1.8 {q13}, [r10]!
+ cmp r6, r4
+ vst1.8 {q15}, [r10]!
+
+ blt LINEAR_X_SIZE_128_LOOP
+
+ add r9, r9, #128
+
+LINEAR_X_SIZE_64:
+ sub r10, r3, r9
+ cmp r10, #64
+ blt LINEAR_X_SIZE_4
+
+ mov r5, r9
+ mov r6, #0
+
+LINEAR_X_SIZE_64_LOOP:
+ bl GET_TILED_OFFSET
+
+LINEAR_X_SIZE_64_LOOP_MEMCPY:
+ and r10, r6, #0x1F
+ mov r14, r3, asr #1
+ mov r10, r10, lsl #6
+ sub r14, r14, #16
+ add r10, r2, r10
+
+ add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1)
+ pld [r7, #64]
+ vld2.8 {q0, q1}, [r7]!
+ mov r10, r3, asr #1
+ pld [r7, #64]
+ vld2.8 {q2, q3}, [r7]!
+ mul r10, r10, r6
+ vld2.8 {q4, q5}, [r7]!
+ add r10, r10, r9, asr #1
+ vld2.8 {q6, q7}, [r7]!
+ add r8, r0, r10
+
+ vst1.8 {q0}, [r8]!
+ vst1.8 {q2}, [r8], r14
+ vst1.8 {q4}, [r8]!
+ vst1.8 {q6}, [r8], r14
+
+ add r10, r1, r10
+ vst1.8 {q1}, [r10]!
+ vst1.8 {q3}, [r10], r14
+ add r6, #2
+ vst1.8 {q5}, [r10]!
+ cmp r6, r4
+ vst1.8 {q7}, [r10], r14
+
+ blt LINEAR_X_SIZE_64_LOOP
+
+ add r9, r9, #64
+
+LINEAR_X_SIZE_4:
+ cmp r9, r3
+ beq RESTORE_REG
+
+ mov r6, #0 @ i = 0
+LINEAR_Y_SIZE_4_LOOP:
+
+ mov r5, r9 @ j = aligned_x_size
+LINEAR_X_SIZE_4_LOOP:
+
+ bl GET_TILED_OFFSET
+
+ mov r11, r3, asr #1 @ temp1 = linear_x_size/2
+ mul r11, r11, r6 @ temp1 = temp1*(i)
+ add r11, r11, r5, asr #1 @ temp1 = temp1+j/2
+ mov r12, r3, asr #1 @ temp2 = linear_x_size/2
+ sub r12, r12, #1 @ temp2 = linear_x_size-1
+
+ add r8, r0, r11 @ linear_addr = linear_dest_u+temp1
+ add r11, r1, r11 @ temp1 = linear_dest_v+temp1
+ add r7, r2, r7 @ tiled_addr = tiled_src+tiled_addr
+ and r14, r6, #0x1F @ temp3 = i&0x1F@
+ mov r14, r14, lsl #6 @ temp3 = temp3*64
+ add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
+ and r14, r5, #0x3F @ temp3 = j&0x3F
+ add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
+
+ ldrh r10, [r7], #2
+ ldrh r14, [r7], #62
+ strb r10, [r8], #1
+ mov r10, r10, asr #8
+ strb r10, [r11], #1
+ strb r14, [r8], r12
+ mov r14, r14, asr #8
+ strb r14, [r11], r12
+
+ ldrh r10, [r7], #2
+ ldrh r14, [r7], #62
+ strb r10, [r8], #1
+ mov r10, r10, asr #8
+ strb r10, [r11], #1
+ strb r14, [r8], r12
+ mov r14, r14, asr #8
+ strb r14, [r11], r12
+
+ add r5, r5, #4 @ j = j+4
+ cmp r5, r3 @ j<linear_x_size
+ blt LINEAR_X_SIZE_4_LOOP
+
+ add r6, r6, #2 @ i = i+4
+ cmp r6, r4 @ i<linear_y_size
+ blt LINEAR_Y_SIZE_4_LOOP
+
+RESTORE_REG:
+ ldmfd sp!, {r4-r12,r15} @ restore registers
+
+GET_TILED_OFFSET:
+ stmfd sp!, {r14}
+
+ mov r12, r6, asr #5 @ temp2 = i>>5
+ mov r11, r5, asr #6 @ temp1 = j>>6
+
+ and r14, r12, #0x1 @ if (temp2 & 0x1)
+ cmp r14, #0x1
+ bne GET_TILED_OFFSET_EVEN_FORMULA_1
+
+GET_TILED_OFFSET_ODD_FORMULA:
+ sub r7, r12, #1 @ tiled_addr = temp2-1
+ add r14, r3, #127 @ temp3 = linear_x_size+127
+ bic r14, r14, #0x7F @ temp3 = (temp3 >>7)<<7
+ mov r14, r14, asr #6 @ temp3 = temp3>>6
+ mul r7, r7, r14 @ tiled_addr = tiled_addr*temp3
+ add r7, r7, r11 @ tiled_addr = tiled_addr+temp1
+ add r7, r7, #2 @ tiled_addr = tiled_addr+2
+ bic r14, r11, #0x3 @ temp3 = (temp1>>2)<<2
+ add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
+ mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11
+ b GET_TILED_OFFSET_RETURN
+
+GET_TILED_OFFSET_EVEN_FORMULA_1:
+ add r14, r4, #31 @ temp3 = linear_y_size+31
+ bic r14, r14, #0x1F @ temp3 = (temp3>>5)<<5
+ sub r14, r14, #32 @ temp3 = temp3 - 32
+ cmp r6, r14 @ if (i<(temp3-32)) {
+ bge GET_TILED_OFFSET_EVEN_FORMULA_2
+ add r14, r11, #2 @ temp3 = temp1+2
+ bic r14, r14, #3 @ temp3 = (temp3>>2)<<2
+ add r7, r11, r14 @ tiled_addr = temp1+temp3
+ add r14, r3, #127 @ temp3 = linear_x_size+127
+ bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7
+ mov r14, r14, asr #6 @ temp3 = temp3>>6
+ mul r12, r12, r14 @ tiled_y_index = tiled_y_index*temp3
+ add r7, r7, r12 @ tiled_addr = tiled_addr+tiled_y_index
+ mov r7, r7, lsl #11 @
+ b GET_TILED_OFFSET_RETURN
+
+GET_TILED_OFFSET_EVEN_FORMULA_2:
+ add r14, r3, #127 @ temp3 = linear_x_size+127
+ bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7
+ mov r14, r14, asr #6 @ temp3 = temp3>>6
+ mul r7, r12, r14 @ tiled_addr = temp2*temp3
+ add r7, r7, r11 @ tiled_addr = tiled_addr+temp3
+ mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11@
+
+GET_TILED_OFFSET_RETURN:
+ ldmfd sp!, {r15} @ restore registers
+ .fnend
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s
new file mode 100644
index 0000000..d71ee17
--- /dev/null
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s
@@ -0,0 +1,680 @@
+/*
+ *
+ * Copyright 2011 Samsung Electronics S.LSI Co. LTD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @file csc_nv12t_yuv420_y_neon.s
+ * @brief SEC_OMX specific define
+ * @author ShinWon Lee (shinwon.lee@samsung.com)
+ * @version 1.0
+ * @history
+ * 2011.7.01 : Create
+ */
+
+/*
+ * Converts tiled data to linear.
+ * 1. Y of NV12T to Y of YUV420P
+ * 2. Y of NV12T to Y of YUV420S
+ * 3. UV of NV12T to UV of YUV420S
+ *
+ * @param yuv420_dest
+ * Y or UV plane address of YUV420[out]
+ *
+ * @param nv12t_src
+ * Y or UV plane address of NV12T[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_height
+ * Y: Height of YUV420, UV: Height/2 of YUV420[in]
+ */
+
+ .arch armv7-a
+ .text
+ .global csc_tiled_to_linear
+ .type csc_tiled_to_linear, %function
+csc_tiled_to_linear:
+ .fnstart
+
+ @r0 linear_dest
+ @r1 tiled_src
+ @r2 linear_x_size
+ @r3 linear_y_size
+ @r4 j
+ @r5 i
+ @r6 tiled_addr
+ @r7 linear_addr
+ @r8 aligned_x_size
+ @r9 aligned_y_size
+ @r10 temp1
+ @r11 temp2
+ @r12 temp3
+ @r14 temp4
+
+ stmfd sp!, {r4-r12,r14} @ backup registers
+
+ mov r8, #0
+ cmp r2, #1024
+ blt LINEAR_X_SIZE_512
+
+LINEAR_X_SIZE_1024:
+
+ mov r5, #0
+LINEAR_X_SIZE_1024_LOOP:
+ mov r6, #0 @ tiled_offset = 0@
+ mov r4, r5, asr #5 @ tiled_y_index = i>>5@
+ and r10, r4, #0x1
+ cmp r10, #0x1
+ bne LINEAR_X_SIZE_1024_LOOP_EVEN
+LINEAR_X_SIZE_1024_LOOP_ODD:
+ sub r6, r4, #1 @ tiled_offset = tiled_y_index-1@
+ add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
+ mul r6, r6, r10
+ mov r4, #8
+ mov r4, r4, lsl #11
+ sub r4, r4, #32
+ add r6, r6, #2 @ tiled_offset = tiled_offset+2@
+ mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r11, r6, #2048
+ add r12, r6, #4096
+ add r14, r6, #6144
+ b LINEAR_X_SIZE_1024_LOOP_MEMCPY
+
+LINEAR_X_SIZE_1024_LOOP_EVEN:
+ add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
+ bic r11, r11, #0x1F
+ add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
+ mul r6, r4, r10
+ add r12, r5, #32
+ cmp r12, r11
+ mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r11, r6, #2048
+ movlt r4, #8
+ addlt r12, r6, #12288
+ addlt r14, r6, #14336
+ movge r4, #4
+ addge r12, r6, #4096
+ addge r14, r6, #6144
+ mov r4, r4, lsl #11
+ sub r4, r4, #32
+
+LINEAR_X_SIZE_1024_LOOP_MEMCPY:
+ and r10, r5, #0x1F
+ mov r10, r10, lsl #6
+ add r10, r1, r10
+
+ add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1)
+ add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
+ pld [r11]
+ vld1.8 {q0, q1}, [r6]!
+ pld [r11, #32]
+ add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
+ vld1.8 {q2, q3}, [r6], r4
+ pld [r12]
+ vld1.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
+ vld1.8 {q6, q7}, [r11], r4
+ pld [r14]
+ vld1.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ mul r7, r2, r5
+ vld1.8 {q10, q11}, [r12], r4
+ add r7, r7, r0
+ vld1.8 {q12, q13}, [r14]!
+ vld1.8 {q14, q15}, [r14], r4
+
+ vst1.8 {q0, q1}, [r7]!
+ vst1.8 {q2, q3}, [r7]!
+ vst1.8 {q4, q5}, [r7]!
+ vst1.8 {q6, q7}, [r7]!
+ vst1.8 {q8, q9}, [r7]!
+ vst1.8 {q10, q11}, [r7]!
+ pld [r6]
+ vst1.8 {q12, q13}, [r7]!
+ pld [r6, #32]
+ vst1.8 {q14, q15}, [r7]!
+
+ pld [r11]
+ vld1.8 {q0, q1}, [r6]!
+ pld [r11, #32]
+ vld1.8 {q2, q3}, [r6], r4
+
+ pld [r12]
+ vld1.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ vld1.8 {q6, q7}, [r11], r4
+ pld [r14]
+ vld1.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ vld1.8 {q10, q11}, [r12], r4
+ vld1.8 {q12, q13}, [r14]!
+ vld1.8 {q14, q15}, [r14], r4
+
+ vst1.8 {q0, q1}, [r7]!
+ vst1.8 {q2, q3}, [r7]!
+ vst1.8 {q4, q5}, [r7]!
+ vst1.8 {q6, q7}, [r7]!
+ vst1.8 {q8, q9}, [r7]!
+ vst1.8 {q10, q11}, [r7]!
+ pld [r6]
+ vst1.8 {q12, q13}, [r7]!
+ pld [r6, #32]
+ vst1.8 {q14, q15}, [r7]!
+
+ pld [r11]
+ vld1.8 {q0, q1}, [r6]!
+ pld [r11, #32]
+ vld1.8 {q2, q3}, [r6], r4
+ pld [r12]
+ vld1.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ vld1.8 {q6, q7}, [r11], r4
+ pld [r14]
+ vld1.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ vld1.8 {q10, q11}, [r12], r4
+ vld1.8 {q12, q13}, [r14]!
+ vld1.8 {q14, q15}, [r14], r4
+
+ vst1.8 {q0, q1}, [r7]!
+ vst1.8 {q2, q3}, [r7]!
+ vst1.8 {q4, q5}, [r7]!
+ vst1.8 {q6, q7}, [r7]!
+ vst1.8 {q8, q9}, [r7]!
+ vst1.8 {q10, q11}, [r7]!
+ pld [r6]
+ vst1.8 {q12, q13}, [r7]!
+ pld [r6, #32]
+ vst1.8 {q14, q15}, [r7]!
+
+ pld [r11]
+ vld1.8 {q0, q1}, [r6]!
+ pld [r11, #32]
+ vld1.8 {q2, q3}, [r6]
+ pld [r12]
+ vld1.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ vld1.8 {q6, q7}, [r11]
+ pld [r14]
+ vld1.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ vld1.8 {q10, q11}, [r12]
+ vld1.8 {q12, q13}, [r14]!
+ vld1.8 {q14, q15}, [r14]
+
+ vst1.8 {q0, q1}, [r7]!
+ vst1.8 {q2, q3}, [r7]!
+ vst1.8 {q4, q5}, [r7]!
+ vst1.8 {q6, q7}, [r7]!
+ vst1.8 {q8, q9}, [r7]!
+ vst1.8 {q10, q11}, [r7]!
+ add r5, #1
+ vst1.8 {q12, q13}, [r7]!
+ cmp r5, r3
+ vst1.8 {q14, q15}, [r7]!
+
+ blt LINEAR_X_SIZE_1024_LOOP
+
+ mov r8, #1024
+
+LINEAR_X_SIZE_512:
+
+ sub r14, r2, r8
+ cmp r14, #512
+ blt LINEAR_X_SIZE_256
+
+ mov r5, #0
+LINEAR_X_SIZE_512_LOOP:
+ mov r6, #0
+ mov r4, r5, asr #5 @ tiled_y_index = i>>5
+ and r10, r4, #0x1
+ cmp r10, #0x1
+ bne LINEAR_X_SIZE_512_LOOP_EVEN
+
+LINEAR_X_SIZE_512_LOOP_ODD:
+ sub r6, r4, #1
+ add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
+ mul r6, r6, r10
+ mov r4, #8
+ mov r4, r4, lsl #11
+ sub r4, r4, #32
+ add r6, r6, #2 @ tiled_offset = tiled_offset+2@
+ mov r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
+ add r6, r6, r10 @ tiled_offset = tiled_offset+temp1@
+ mov r6, r6, lsl #11
+ add r11, r6, #2048
+ add r12, r6, #4096
+ add r14, r6, #6144
+ b LINEAR_X_SIZE_512_LOOP_MEMCPY
+
+LINEAR_X_SIZE_512_LOOP_EVEN:
+ add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
+ bic r11, r11, #0x1F
+ add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
+ mul r6, r4, r10
+ add r12, r5, #32
+ cmp r12, r11
+ mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@
+ movlt r4, #8
+ movlt r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
+ movge r10, r8, asr #6 @ temp1 = aligned_x_size>>6@
+ add r6, r6, r10, lsl #11 @ tiled_offset = tiled_offset+(temp1<<11)@
+ add r11, r6, #2048
+ addlt r12, r6, #12288
+ addlt r14, r6, #14336
+ movge r4, #4
+ addge r12, r6, #4096
+ addge r14, r6, #6144
+ mov r4, r4, lsl #11
+ sub r4, r4, #32
+
+LINEAR_X_SIZE_512_LOOP_MEMCPY:
+ and r10, r5, #0x1F
+ mov r10, r10, lsl #6
+ add r10, r1, r10
+
+ add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1)
+ add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
+ pld [r11]
+ vld1.8 {q0, q1}, [r6]!
+ pld [r11, #32]
+ add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
+ vld1.8 {q2, q3}, [r6], r4
+ pld [r12]
+ vld1.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
+ vld1.8 {q6, q7}, [r11], r4
+ pld [r14]
+ vld1.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ mul r7, r2, r5
+ vld1.8 {q10, q11}, [r12], r4
+ add r7, r7, r8
+ vld1.8 {q12, q13}, [r14]!
+ vld1.8 {q14, q15}, [r14], r4
+
+ add r7, r7, r0
+ vst1.8 {q0, q1}, [r7]!
+ vst1.8 {q2, q3}, [r7]!
+ vst1.8 {q4, q5}, [r7]!
+ vst1.8 {q6, q7}, [r7]!
+ vst1.8 {q8, q9}, [r7]!
+ vst1.8 {q10, q11}, [r7]!
+ pld [r6]
+ vst1.8 {q12, q13}, [r7]!
+ pld [r6, #32]
+ vst1.8 {q14, q15}, [r7]!
+
+ pld [r11]
+ vld1.8 {q0, q1}, [r6]!
+ pld [r11, #32]
+ vld1.8 {q2, q3}, [r6], r4
+ pld [r12]
+ vld1.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ vld1.8 {q6, q7}, [r11], r4
+ pld [r14]
+ vld1.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ vld1.8 {q10, q11}, [r12], r4
+ vld1.8 {q12, q13}, [r14]!
+ vld1.8 {q14, q15}, [r14], r4
+
+ vst1.8 {q0, q1}, [r7]!
+ vst1.8 {q2, q3}, [r7]!
+ vst1.8 {q4, q5}, [r7]!
+ vst1.8 {q6, q7}, [r7]!
+ vst1.8 {q8, q9}, [r7]!
+ vst1.8 {q10, q11}, [r7]!
+ add r5, #1
+ vst1.8 {q12, q13}, [r7]!
+ cmp r5, r3
+ vst1.8 {q14, q15}, [r7]!
+
+ blt LINEAR_X_SIZE_512_LOOP
+
+ add r8, r8, #512
+
+LINEAR_X_SIZE_256:
+
+ sub r14, r2, r8
+ cmp r14, #256
+ blt LINEAR_X_SIZE_128
+
+ mov r5, #0
+LINEAR_X_SIZE_256_LOOP:
+ mov r6, #0
+ mov r4, r5, asr #5 @ tiled_y_index = i>>5
+ and r10, r4, #0x1
+ cmp r10, #0x1
+ bne LINEAR_X_SIZE_256_LOOP_EVEN
+
+LINEAR_X_SIZE_256_LOOP_ODD:
+ sub r6, r4, #1
+ add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
+ mul r6, r6, r10
+ add r6, r6, #2 @ tiled_offset = tiled_offset+2@
+ mov r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
+ add r6, r6, r10 @ tiled_offset = tiled_offset+temp1@
+ mov r6, r6, lsl #11
+ add r11, r6, #2048
+ add r12, r6, #4096
+ add r14, r6, #6144
+ b LINEAR_X_SIZE_256_LOOP_MEMCPY
+
+LINEAR_X_SIZE_256_LOOP_EVEN:
+ add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
+ bic r11, r11, #0x1F
+ add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
+ mul r6, r4, r10
+ mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r12, r5, #32
+ cmp r12, r11
+ movlt r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
+ movge r10, r8, asr #6 @ temp1 = aligned_x_size>>6@
+ add r6, r6, r10, lsl #11 @ tiled_offset = tiled_offset+(temp1<<11)@
+ add r11, r6, #2048
+ addlt r12, r6, #12288
+ addlt r14, r6, #14336
+ addge r12, r6, #4096
+ addge r14, r6, #6144
+
+LINEAR_X_SIZE_256_LOOP_MEMCPY:
+ and r10, r5, #0x1F
+ mov r10, r10, lsl #6
+ add r10, r1, r10
+
+ add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1)
+ add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
+ pld [r11]
+ vld1.8 {q0, q1}, [r6]!
+ pld [r11, #32]
+ add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
+ vld1.8 {q2, q3}, [r6]
+ pld [r12]
+ vld1.8 {q4, q5}, [r11]!
+ pld [r12, #32]
+ add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
+ vld1.8 {q6, q7}, [r11]
+ pld [r14]
+ mul r7, r2, r5
+ vld1.8 {q8, q9}, [r12]!
+ pld [r14, #32]
+ add r7, r7, r8
+ vld1.8 {q10, q11}, [r12]
+ add r7, r7, r0
+ vld1.8 {q12, q13}, [r14]!
+ vld1.8 {q14, q15}, [r14]
+
+ vst1.8 {q0, q1}, [r7]!
+ vst1.8 {q2, q3}, [r7]!
+ vst1.8 {q4, q5}, [r7]!
+ vst1.8 {q6, q7}, [r7]!
+ vst1.8 {q8, q9}, [r7]!
+ vst1.8 {q10, q11}, [r7]!
+ add r5, #1
+ vst1.8 {q12, q13}, [r7]!
+ cmp r5, r3
+ vst1.8 {q14, q15}, [r7]!
+
+ blt LINEAR_X_SIZE_256_LOOP
+
+ add r8, r8, #256
+
+LINEAR_X_SIZE_128:
+
+ sub r14, r2, r8
+ cmp r14, #128
+ blt LINEAR_X_SIZE_64
+
+ mov r5, #0
+LINEAR_X_SIZE_128_LOOP:
+ mov r6, #0
+ mov r4, r5, asr #5 @ tiled_y_index = i>>5
+ and r10, r4, #0x1
+ cmp r10, #0x1
+ bne LINEAR_X_SIZE_128_LOOP_EVEN
+
+LINEAR_X_SIZE_128_LOOP_ODD:
+ sub r6, r4, #1
+ add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
+ mul r6, r6, r10
+ add r6, r6, #2 @ tiled_offset = tiled_offset+2@
+ mov r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
+ add r6, r6, r10 @ tiled_offset = tiled_offset+temp1@
+ mov r6, r6, lsl #11
+ add r11, r6, #2048
+ b LINEAR_X_SIZE_128_LOOP_MEMCPY
+
+LINEAR_X_SIZE_128_LOOP_EVEN:
+ add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
+ bic r11, r11, #0x1F
+ add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
+ bic r10, #0x7F
+ mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
+ mul r6, r4, r10
+ mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@
+ add r12, r5, #32
+ cmp r12, r11
+ movlt r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
+ movge r10, r8, asr #6 @ temp1 = aligned_x_size>>6@
+ add r6, r6, r10, lsl #11 @ tiled_offset = tiled_offset+(temp1<<11)@
+ add r11, r6, #2048
+
+LINEAR_X_SIZE_128_LOOP_MEMCPY:
+ and r10, r5, #0x1F
+ mov r10, r10, lsl #6
+ add r10, r1, r10
+
+ add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1)
+ add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
+ pld [r6, #64]
+ vld1.8 {q0, q1}, [r6]!
+ pld [r6, #64]
+ vld1.8 {q2, q3}, [r6]!
+ mul r7, r2, r5
+ pld [r11]
+ vld1.8 {q4, q5}, [r6]!
+ add r7, r7, r8
+ pld [r11, #32]
+ vld1.8 {q6, q7}, [r6]
+ add r7, r7, r0
+ pld [r11, #64]
+ vld1.8 {q8, q9}, [r11]!
+ pld [r11, #64]
+ vld1.8 {q10, q11}, [r11]!
+ vld1.8 {q12, q13}, [r11]!
+ vld1.8 {q14, q15}, [r11]
+
+ sub r9, r2, #96
+ vst1.8 {q0, q1}, [r7]!
+ vst1.8 {q2, q3}, [r7]!
+ vst1.8 {q8, q9}, [r7]!
+ vst1.8 {q10, q11}, [r7], r9
+ vst1.8 {q4, q5}, [r7]!
+ vst1.8 {q6, q7}, [r7]!
+ add r5, #2
+ vst1.8 {q12, q13}, [r7]!
+ cmp r5, r3
+ vst1.8 {q14, q15}, [r7]
+
+ blt LINEAR_X_SIZE_128_LOOP
+
+ add r8, r8, #128
+
+LINEAR_X_SIZE_64:
+
+ sub r14, r2, r8
+ cmp r14, #64
+ blt LINEAR_X_SIZE_4
+
+ mov r5, #0
+ mov r4, r8
+
+LINEAR_X_SIZE_64_LOOP:
+
+ bl GET_TILED_OFFSET
+
+ add r6, r1, r6 @ tiled_addr = tiled_src+tiled_addr
+ and r11, r5, #0x1F @ temp2 = i&0x1F
+ mov r11, r11, lsl #6 @ temp2 = 64*temp2
+ add r6, r6, r11 @ tiled_addr = tiled_addr+temp2
+
+ pld [r6, #64]
+ vld1.8 {q0, q1}, [r6]! @ store {tiled_addr}
+ mul r10, r2, r5 @ temp1 = linear_x_size*(i)
+ pld [r6, #64]
+ vld1.8 {q2, q3}, [r6]!
+ pld [r6, #64]
+ vld1.8 {q4, q5}, [r6]! @ store {tiled_addr+64*1}
+ pld [r6, #64]
+ vld1.8 {q6, q7}, [r6]!
+ pld [r6, #64]
+ vld1.8 {q8, q9}, [r6]! @ store {tiled_addr+64*2}
+ pld [r6, #64]
+ vld1.8 {q10, q11}, [r6]!
+ add r7, r0, r4 @ linear_addr = linear_dest+j
+ vld1.8 {q12, q13}, [r6]! @ store {tiled_addr+64*3}
+ add r7, r7, r10 @ linear_addr = linear_addr+temp1
+ vld1.8 {q14, q15}, [r6]!
+ sub r10, r2, #32 @ temp1 = linear_x_size-32
+
+ vst1.8 {q0, q1}, [r7]! @ load {linear_src, 64}
+ vst1.8 {q2, q3}, [r7], r10
+ vst1.8 {q4, q5}, [r7]! @ load {linear_src+linear_x_size*1, 64}
+ vst1.8 {q6, q7}, [r7], r10
+ vst1.8 {q8, q9}, [r7]! @ load {linear_src+linear_x_size*2, 64}
+ vst1.8 {q10, q11}, [r7], r10
+ add r5, #4
+ vst1.8 {q12, q13}, [r7]! @ load {linear_src+linear_x_size*3, 64}
+ cmp r5, r3
+ vst1.8 {q14, q15}, [r7], r10
+
+ blt LINEAR_X_SIZE_64_LOOP
+
+ add r8, r8, #64
+
+LINEAR_X_SIZE_4:
+ cmp r8, r2
+ beq RESTORE_REG
+
+ mov r5, #0 @ i = 0
+LINEAR_Y_SIZE_4_LOOP:
+
+ mov r4, r8 @ j = aligned_x_size
+LINEAR_X_SIZE_4_LOOP:
+
+ bl GET_TILED_OFFSET
+
+ and r10, r5, #0x1F @ temp1 = i&0x1F
+ and r11, r4, #0x3F @ temp2 = j&0x3F
+
+ add r6, r6, r1
+ add r6, r6, r11
+ add r6, r6, r10, lsl #6
+
+ ldr r10, [r6], #64
+ add r7, r0, r4
+ ldr r11, [r6], #64
+ mul r9, r2, r5
+ ldr r12, [r6], #64
+ add r7, r7, r9
+ ldr r14, [r6], #64
+
+ str r10, [r7], r2
+ str r11, [r7], r2
+ str r12, [r7], r2
+ str r14, [r7], r2
+
+ add r4, r4, #4 @ j = j+4
+ cmp r4, r2 @ j<linear_x_size
+ blt LINEAR_X_SIZE_4_LOOP
+
+ add r5, r5, #4 @ i = i+4
+ cmp r5, r3 @ i<linear_y_size
+ blt LINEAR_Y_SIZE_4_LOOP
+
+RESTORE_REG:
+ ldmfd sp!, {r4-r12,r15} @ restore registers
+
+GET_TILED_OFFSET:
+
+ mov r11, r5, asr #5 @ temp2 = i>>5
+ mov r10, r4, asr #6 @ temp1 = j>>6
+
+ and r12, r11, #0x1 @ if (temp2 & 0x1)
+ cmp r12, #0x1
+ bne GET_TILED_OFFSET_EVEN_FORMULA_1
+
+GET_TILED_OFFSET_ODD_FORMULA:
+ sub r6, r11, #1 @ tiled_addr = temp2-1
+ add r12, r2, #127 @ temp3 = linear_x_size+127
+ bic r12, r12, #0x7F @ temp3 = (temp3 >>7)<<7
+ mov r12, r12, asr #6 @ temp3 = temp3>>6
+ mul r6, r6, r12 @ tiled_addr = tiled_addr*temp3
+ add r6, r6, r10 @ tiled_addr = tiled_addr+temp1
+ add r6, r6, #2 @ tiled_addr = tiled_addr+2
+ bic r12, r10, #0x3 @ temp3 = (temp1>>2)<<2
+ add r6, r6, r12 @ tiled_addr = tiled_addr+temp3
+ mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11
+ b GET_TILED_OFFSET_RETURN
+
+GET_TILED_OFFSET_EVEN_FORMULA_1:
+ add r12, r3, #31 @ temp3 = linear_y_size+31
+ bic r12, r12, #0x1F @ temp3 = (temp3>>5)<<5
+ sub r12, r12, #32 @ temp3 = temp3 - 32
+ cmp r5, r12 @ if (i<(temp3-32)) {
+ bge GET_TILED_OFFSET_EVEN_FORMULA_2
+ add r12, r10, #2 @ temp3 = temp1+2
+ bic r12, r12, #3 @ temp3 = (temp3>>2)<<2
+ add r6, r10, r12 @ tiled_addr = temp1+temp3
+ add r12, r2, #127 @ temp3 = linear_x_size+127
+ bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7
+ mov r12, r12, asr #6 @ temp3 = temp3>>6
+ mul r11, r11, r12 @ tiled_y_index = tiled_y_index*temp3
+ add r6, r6, r11 @ tiled_addr = tiled_addr+tiled_y_index
+ mov r6, r6, lsl #11 @
+ b GET_TILED_OFFSET_RETURN
+
+GET_TILED_OFFSET_EVEN_FORMULA_2:
+ add r12, r2, #127 @ temp3 = linear_x_size+127
+ bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7
+ mov r12, r12, asr #6 @ temp3 = temp3>>6
+ mul r6, r11, r12 @ tiled_addr = temp2*temp3
+ add r6, r6, r10 @ tiled_addr = tiled_addr+temp3
+ mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11@
+
+GET_TILED_OFFSET_RETURN:
+ mov pc, lr
+ .fnend
+
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s
new file mode 100644
index 0000000..dd2c879
--- /dev/null
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s
@@ -0,0 +1,573 @@
+/*
+ *
+ * Copyright 2011 Samsung Electronics S.LSI Co. LTD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @file csc_yuv420_nv12t_uv_neon.s
+ * @brief SEC_OMX specific define
+ * @author ShinWon Lee (shinwon.lee@samsung.com)
+ * @version 1.0
+ * @history
+ * 2011.7.01 : Create
+ */
+
+/*
+ * Converts and Interleaves linear to tiled
+ * 1. UV of YUV420P to UV of NV12T
+ *
+ * @param nv12t_uv_dest
+ * UV plane address of NV12T[out]
+ *
+ * @param yuv420p_u_src
+ * U plane address of YUV420P[in]
+ *
+ * @param yuv420p_v_src
+ * V plane address of YUV420P[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_uv_height
+ * Height/2 of YUV420[in]
+ */
+
+ .arch armv7-a
+ .text
+ .global csc_linear_to_tiled_interleave
+ .type csc_linear_to_tiled_interleave, %function
+csc_linear_to_tiled_interleave:
+ .fnstart
+
+ @r0 tiled_dest
+ @r1 linear_src_u
+ @r2 linear_src_v
+ @r3 linear_x_size
+ @r4 linear_y_size
+ @r5 j
+ @r6 i
+ @r7 tiled_addr
+ @r8 linear_addr
+ @r9 aligned_x_size
+ @r10 aligned_y_size
+ @r11 temp1
+ @r12 temp2
+ @r14 temp3
+
+ stmfd sp!, {r4-r12,r14} @ backup registers
+
+ ldr r4, [sp, #40] @ load linear_y_size to r4
+
+ bic r10, r4, #0x1F @ aligned_y_size = (linear_y_size>>5)<<5
+ bic r9, r3, #0x3F @ aligned_x_size = (linear_x_size>>6)<<6
+
+ mov r6, #0 @ i = 0
+LOOP_ALIGNED_Y_SIZE:
+
+ mov r5, #0 @ j = 0
+LOOP_ALIGNED_X_SIZE:
+
+ bl GET_TILED_OFFSET
+
+ mov r11, r3, asr #1 @ temp1 = linear_x_size/2
+ mul r11, r11, r6 @ temp1 = temp1*(i)
+ add r11, r11, r5, asr #1 @ temp1 = temp1+j/2
+ mov r12, r3, asr #1 @ temp2 = linear_x_size/2
+ sub r12, r12, #16 @ temp2 = linear_x_size-16
+
+ add r8, r1, r11 @ linear_addr = linear_src_u+temp1
+ add r11, r2, r11 @ temp1 = linear_src_v+temp1
+ add r7, r0, r7 @ tiled_addr = tiled_dest+tiled_addr
+
+ pld [r8, r3]
+ vld1.8 {q0}, [r8]!
+ vld1.8 {q2}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q1}, [r11]!
+ vld1.8 {q3}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q4}, [r8]!
+ vld1.8 {q6}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q5}, [r11]!
+ vld1.8 {q7}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q8}, [r8]!
+ vld1.8 {q10}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q9}, [r11]!
+ vld1.8 {q11}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q12}, [r8]!
+ vld1.8 {q14}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q13}, [r11]!
+ vld1.8 {q15}, [r11], r12
+
+ vst2.8 {q0, q1}, [r7]!
+ vst2.8 {q2, q3}, [r7]!
+ vst2.8 {q4, q5}, [r7]!
+ vst2.8 {q6, q7}, [r7]!
+ vst2.8 {q8, q9}, [r7]!
+ vst2.8 {q10, q11}, [r7]!
+ vst2.8 {q12, q13}, [r7]!
+ vst2.8 {q14, q15}, [r7]!
+
+ pld [r8, r3]
+ vld1.8 {q0}, [r8]!
+ vld1.8 {q2}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q1}, [r11]!
+ vld1.8 {q3}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q4}, [r8]!
+ vld1.8 {q6}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q5}, [r11]!
+ vld1.8 {q7}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q8}, [r8]!
+ vld1.8 {q10}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q9}, [r11]!
+ vld1.8 {q11}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q12}, [r8]!
+ vld1.8 {q14}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q13}, [r11]!
+ vld1.8 {q15}, [r11], r12
+
+ vst2.8 {q0, q1}, [r7]!
+ vst2.8 {q2, q3}, [r7]!
+ vst2.8 {q4, q5}, [r7]!
+ vst2.8 {q6, q7}, [r7]!
+ vst2.8 {q8, q9}, [r7]!
+ vst2.8 {q10, q11}, [r7]!
+ vst2.8 {q12, q13}, [r7]!
+ vst2.8 {q14, q15}, [r7]!
+
+ pld [r8, r3]
+ vld1.8 {q0}, [r8]!
+ vld1.8 {q2}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q1}, [r11]!
+ vld1.8 {q3}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q4}, [r8]!
+ vld1.8 {q6}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q5}, [r11]!
+ vld1.8 {q7}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q8}, [r8]!
+ vld1.8 {q10}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q9}, [r11]!
+ vld1.8 {q11}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q12}, [r8]!
+ vld1.8 {q14}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q13}, [r11]!
+ vld1.8 {q15}, [r11], r12
+
+ vst2.8 {q0, q1}, [r7]!
+ vst2.8 {q2, q3}, [r7]!
+ vst2.8 {q4, q5}, [r7]!
+ vst2.8 {q6, q7}, [r7]!
+ vst2.8 {q8, q9}, [r7]!
+ vst2.8 {q10, q11}, [r7]!
+ vst2.8 {q12, q13}, [r7]!
+ vst2.8 {q14, q15}, [r7]!
+
+ pld [r8, r3]
+ vld1.8 {q0}, [r8]!
+ vld1.8 {q2}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q1}, [r11]!
+ vld1.8 {q3}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q4}, [r8]!
+ vld1.8 {q6}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q5}, [r11]!
+ vld1.8 {q7}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q8}, [r8]!
+ vld1.8 {q10}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q9}, [r11]!
+ vld1.8 {q11}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q12}, [r8]!
+ vld1.8 {q14}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q13}, [r11]!
+ vld1.8 {q15}, [r11], r12
+
+ vst2.8 {q0, q1}, [r7]!
+ vst2.8 {q2, q3}, [r7]!
+ vst2.8 {q4, q5}, [r7]!
+ vst2.8 {q6, q7}, [r7]!
+ vst2.8 {q8, q9}, [r7]!
+ vst2.8 {q10, q11}, [r7]!
+ vst2.8 {q12, q13}, [r7]!
+ vst2.8 {q14, q15}, [r7]!
+
+ pld [r8, r3]
+ vld1.8 {q0}, [r8]!
+ vld1.8 {q2}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q1}, [r11]!
+ vld1.8 {q3}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q4}, [r8]!
+ vld1.8 {q6}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q5}, [r11]!
+ vld1.8 {q7}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q8}, [r8]!
+ vld1.8 {q10}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q9}, [r11]!
+ vld1.8 {q11}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q12}, [r8]!
+ vld1.8 {q14}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q13}, [r11]!
+ vld1.8 {q15}, [r11], r12
+
+ vst2.8 {q0, q1}, [r7]!
+ vst2.8 {q2, q3}, [r7]!
+ vst2.8 {q4, q5}, [r7]!
+ vst2.8 {q6, q7}, [r7]!
+ vst2.8 {q8, q9}, [r7]!
+ vst2.8 {q10, q11}, [r7]!
+ vst2.8 {q12, q13}, [r7]!
+ vst2.8 {q14, q15}, [r7]!
+
+ pld [r8, r3]
+ vld1.8 {q0}, [r8]!
+ vld1.8 {q2}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q1}, [r11]!
+ vld1.8 {q3}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q4}, [r8]!
+ vld1.8 {q6}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q5}, [r11]!
+ vld1.8 {q7}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q8}, [r8]!
+ vld1.8 {q10}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q9}, [r11]!
+ vld1.8 {q11}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q12}, [r8]!
+ vld1.8 {q14}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q13}, [r11]!
+ vld1.8 {q15}, [r11], r12
+
+ vst2.8 {q0, q1}, [r7]!
+ vst2.8 {q2, q3}, [r7]!
+ vst2.8 {q4, q5}, [r7]!
+ vst2.8 {q6, q7}, [r7]!
+ vst2.8 {q8, q9}, [r7]!
+ vst2.8 {q10, q11}, [r7]!
+ vst2.8 {q12, q13}, [r7]!
+ vst2.8 {q14, q15}, [r7]!
+
+ pld [r8, r3]
+ vld1.8 {q0}, [r8]!
+ vld1.8 {q2}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q1}, [r11]!
+ vld1.8 {q3}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q4}, [r8]!
+ vld1.8 {q6}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q5}, [r11]!
+ vld1.8 {q7}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q8}, [r8]!
+ vld1.8 {q10}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q9}, [r11]!
+ vld1.8 {q11}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q12}, [r8]!
+ vld1.8 {q14}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q13}, [r11]!
+ vld1.8 {q15}, [r11], r12
+
+ vst2.8 {q0, q1}, [r7]!
+ vst2.8 {q2, q3}, [r7]!
+ vst2.8 {q4, q5}, [r7]!
+ vst2.8 {q6, q7}, [r7]!
+ vst2.8 {q8, q9}, [r7]!
+ vst2.8 {q10, q11}, [r7]!
+ vst2.8 {q12, q13}, [r7]!
+ vst2.8 {q14, q15}, [r7]!
+
+ pld [r8, r3]
+ vld1.8 {q0}, [r8]!
+ vld1.8 {q2}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q1}, [r11]!
+ vld1.8 {q3}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q4}, [r8]!
+ vld1.8 {q6}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q5}, [r11]!
+ vld1.8 {q7}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q8}, [r8]!
+ vld1.8 {q10}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q9}, [r11]!
+ vld1.8 {q11}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q12}, [r8]!
+ vld1.8 {q14}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q13}, [r11]!
+ vld1.8 {q15}, [r11], r12
+
+ vst2.8 {q0, q1}, [r7]!
+ vst2.8 {q2, q3}, [r7]!
+ vst2.8 {q4, q5}, [r7]!
+ vst2.8 {q6, q7}, [r7]!
+ vst2.8 {q8, q9}, [r7]!
+ vst2.8 {q10, q11}, [r7]!
+ vst2.8 {q12, q13}, [r7]!
+ vst2.8 {q14, q15}, [r7]!
+
+ add r5, r5, #64 @ j = j+64
+ cmp r5, r9 @ j<aligned_x_size
+ blt LOOP_ALIGNED_X_SIZE
+
+ add r6, r6, #32 @ i = i+32
+ cmp r6, r10 @ i<aligned_y_size
+ blt LOOP_ALIGNED_Y_SIZE
+
+ ldr r4, [sp, #40] @ load linear_y_size to r4
+ cmp r6, r4
+ beq LOOP_LINEAR_Y_SIZE_2_START
+
+LOOP_LINEAR_Y_SIZE_1:
+
+ mov r5, #0 @ j = 0
+LOOP_ALIGNED_X_SIZE_1:
+
+ bl GET_TILED_OFFSET
+
+ mov r11, r3, asr #1 @ temp1 = linear_x_size/2
+ mul r11, r11, r6 @ temp1 = temp1*(i)
+ add r11, r11, r5, asr #1 @ temp1 = temp1+j/2
+ mov r12, r3, asr #1 @ temp2 = linear_x_size/2
+ sub r12, r12, #16 @ temp2 = linear_x_size-16
+
+ add r8, r1, r11 @ linear_addr = linear_src_u+temp1
+ add r11, r2, r11 @ temp1 = linear_src_v+temp1
+ add r7, r0, r7 @ tiled_addr = tiled_dest+tiled_addr
+ and r14, r6, #0x1F @ temp3 = i&0x1F@
+ mov r14, r14, lsl #6 @ temp3 = temp3*64
+ add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
+
+ pld [r8, r3]
+ vld1.8 {q0}, [r8]!
+ vld1.8 {q2}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q1}, [r11]!
+ vld1.8 {q3}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q4}, [r8]!
+ vld1.8 {q6}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q5}, [r11]!
+ vld1.8 {q7}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q8}, [r8]!
+ vld1.8 {q10}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q9}, [r11]!
+ vld1.8 {q11}, [r11], r12
+ pld [r8, r3]
+ vld1.8 {q12}, [r8]!
+ vld1.8 {q14}, [r8], r12
+ pld [r11, r3]
+ vld1.8 {q13}, [r11]!
+ vld1.8 {q15}, [r11], r12
+
+ vst2.8 {q0, q1}, [r7]! @ store {tiled_addr}
+ vst2.8 {q2, q3}, [r7]!
+ vst2.8 {q4, q5}, [r7]! @ store {tiled_addr+64*1}
+ vst2.8 {q6, q7}, [r7]!
+ vst2.8 {q8, q9}, [r7]! @ store {tiled_addr+64*2}
+ vst2.8 {q10, q11}, [r7]!
+ vst2.8 {q12, q13}, [r7]! @ store {tiled_addr+64*3}
+ vst2.8 {q14, q15}, [r7]!
+
+ add r5, r5, #64 @ j = j+64
+ cmp r5, r9 @ j<aligned_x_size
+ blt LOOP_ALIGNED_X_SIZE_1
+
+ add r6, r6, #4 @ i = i+4
+ cmp r6, r4 @ i<linear_y_size
+ blt LOOP_LINEAR_Y_SIZE_1
+
+LOOP_LINEAR_Y_SIZE_2_START:
+ cmp r5, r3
+ beq RESTORE_REG
+
+ mov r6, #0 @ i = 0
+LOOP_LINEAR_Y_SIZE_2:
+
+ mov r5, r9 @ j = aligned_x_size
+LOOP_LINEAR_X_SIZE_2:
+
+ bl GET_TILED_OFFSET
+
+ mov r11, r3, asr #1 @ temp1 = linear_x_size/2
+ mul r11, r11, r6 @ temp1 = temp1*(i)
+ add r11, r11, r5, asr #1 @ temp1 = temp1+j/2
+ mov r12, r3, asr #1 @ temp2 = linear_x_size/2
+ sub r12, r12, #1 @ temp2 = linear_x_size-1
+
+ add r8, r1, r11 @ linear_addr = linear_src_u+temp1
+ add r11, r2, r11 @ temp1 = linear_src_v+temp1
+ add r7, r0, r7 @ tiled_addr = tiled_dest+tiled_addr
+ and r14, r6, #0x1F @ temp3 = i&0x1F@
+ mov r14, r14, lsl #6 @ temp3 = temp3*64
+ add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
+ and r14, r5, #0x3F @ temp3 = j&0x3F
+ add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
+
+ ldrb r10, [r8], #1
+ ldrb r14, [r11], #1
+ mov r14, r14, lsl #8
+ orr r10, r10, r14
+ strh r10, [r7], #2
+ ldrb r10, [r8], r12
+ ldrb r14, [r11], r12
+ mov r14, r14, lsl #8
+ orr r10, r10, r14
+ strh r10, [r7], #62
+
+ ldrb r10, [r8], #1
+ ldrb r14, [r11], #1
+ mov r14, r14, lsl #8
+ orr r10, r10, r14
+ strh r10, [r7], #2
+ ldrb r10, [r8], r12
+ ldrb r14, [r11], r12
+ mov r14, r14, lsl #8
+ orr r10, r10, r14
+ strh r10, [r7], #62
+
+ ldrb r10, [r8], #1
+ ldrb r14, [r11], #1
+ mov r14, r14, lsl #8
+ orr r10, r10, r14
+ strh r10, [r7], #2
+ ldrb r10, [r8], r12
+ ldrb r14, [r11], r12
+ mov r14, r14, lsl #8
+ orr r10, r10, r14
+ strh r10, [r7], #62
+
+ ldrb r10, [r8], #1
+ ldrb r14, [r11], #1
+ mov r14, r14, lsl #8
+ orr r10, r10, r14
+ strh r10, [r7], #2
+ ldrb r10, [r8], r12
+ ldrb r14, [r11], r12
+ mov r14, r14, lsl #8
+ orr r10, r10, r14
+ strh r10, [r7], #62
+
+ add r5, r5, #4 @ j = j+4
+ cmp r5, r3 @ j<linear_x_size
+ blt LOOP_LINEAR_X_SIZE_2
+
+ add r6, r6, #4 @ i = i+4
+ cmp r6, r4 @ i<linear_y_size
+ blt LOOP_LINEAR_Y_SIZE_2
+
+RESTORE_REG:
+ ldmfd sp!, {r4-r12,r15} @ restore registers
+
+GET_TILED_OFFSET:
+ stmfd sp!, {r14}
+
+ mov r12, r6, asr #5 @ temp2 = i>>5
+ mov r11, r5, asr #6 @ temp1 = j>>6
+
+ and r14, r12, #0x1 @ if (temp2 & 0x1)
+ cmp r14, #0x1
+ bne GET_TILED_OFFSET_EVEN_FORMULA_1
+
+GET_TILED_OFFSET_ODD_FORMULA:
+ sub r7, r12, #1 @ tiled_addr = temp2-1
+ add r14, r3, #127 @ temp3 = linear_x_size+127
+ bic r14, r14, #0x7F @ temp3 = (temp3 >>7)<<7
+ mov r14, r14, asr #6 @ temp3 = temp3>>6
+ mul r7, r7, r14 @ tiled_addr = tiled_addr*temp3
+ add r7, r7, r11 @ tiled_addr = tiled_addr+temp1
+ add r7, r7, #2 @ tiled_addr = tiled_addr+2
+ bic r14, r11, #0x3 @ temp3 = (temp1>>2)<<2
+ add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
+ mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11
+ b GET_TILED_OFFSET_RETURN
+
+GET_TILED_OFFSET_EVEN_FORMULA_1:
+ add r14, r4, #31 @ temp3 = linear_y_size+31
+ bic r14, r14, #0x1F @ temp3 = (temp3>>5)<<5
+ sub r14, r14, #32 @ temp3 = temp3 - 32
+ cmp r6, r14 @ if (i<(temp3-32)) {
+ bge GET_TILED_OFFSET_EVEN_FORMULA_2
+ add r14, r11, #2 @ temp3 = temp1+2
+ bic r14, r14, #3 @ temp3 = (temp3>>2)<<2
+ add r7, r11, r14 @ tiled_addr = temp1+temp3
+ add r14, r3, #127 @ temp3 = linear_x_size+127
+ bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7
+ mov r14, r14, asr #6 @ temp3 = temp3>>6
+ mul r12, r12, r14 @ tiled_y_index = tiled_y_index*temp3
+ add r7, r7, r12 @ tiled_addr = tiled_addr+tiled_y_index
+ mov r7, r7, lsl #11 @
+ b GET_TILED_OFFSET_RETURN
+
+GET_TILED_OFFSET_EVEN_FORMULA_2:
+ add r14, r3, #127 @ temp3 = linear_x_size+127
+ bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7
+ mov r14, r14, asr #6 @ temp3 = temp3>>6
+ mul r7, r12, r14 @ tiled_addr = temp2*temp3
+ add r7, r7, r11 @ tiled_addr = tiled_addr+temp3
+ mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11@
+
+GET_TILED_OFFSET_RETURN:
+ ldmfd sp!, {r15} @ restore registers
+ .fnend
+
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s
new file mode 100644
index 0000000..3f8932a
--- /dev/null
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s
@@ -0,0 +1,451 @@
+/*
+ *
+ * Copyright 2011 Samsung Electronics S.LSI Co. LTD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @file csc_yuv420_nv12t_y_neon.s
+ * @brief SEC_OMX specific define
+ * @author ShinWon Lee (shinwon.lee@samsung.com)
+ * @version 1.0
+ * @history
+ * 2011.7.01 : Create
+ */
+
+/*
+ * Converts linear data to tiled.
+ * 1. Y of YUV420P to Y of NV12T
+ * 2. Y of YUV420S to Y of NV12T
+ * 3. UV of YUV420S to UV of NV12T
+ *
+ * @param nv12t_dest
+ * Y or UV plane address of NV12T[out]
+ *
+ * @param yuv420_src
+ * Y or UV plane address of YUV420P(S)[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_height
+ * Y: Height of YUV420, UV: Height/2 of YUV420[in]
+ */
+
+ .arch armv7-a
+ .text
+ .global csc_linear_to_tiled
+ .type csc_linear_to_tiled, %function
+csc_linear_to_tiled:
+ .fnstart
+
+ @r0 tiled_dest
+ @r1 linear_src
+ @r2 linear_x_size
+ @r3 linear_y_size
+ @r4 j
+ @r5 i
+ @r6 nn(tiled_addr)
+ @r7 mm(linear_addr)
+ @r8 aligned_x_size
+ @r9 aligned_y_size
+ @r10 temp1
+ @r11 temp2
+ @r12 temp3
+ @r14 temp4
+
+ stmfd sp!, {r4-r12,r14} @ backup registers
+
+ bic r9, r3, #0x1F @ aligned_y_size = (linear_y_size>>5)<<5
+ bic r8, r2, #0x3F @ aligned_x_size = (linear_x_size>>6)<<6
+
+ mov r5, #0 @ i = 0
+LOOP_ALIGNED_Y_SIZE:
+
+ mov r4, #0 @ j = 0
+LOOP_ALIGNED_X_SIZE:
+
+ bl GET_TILED_OFFSET
+
+ mul r10, r2, r5 @ temp1 = linear_x_size*(i)
+ add r7, r1, r4 @ linear_addr = linear_src+j
+ add r7, r7, r10 @ linear_addr = linear_addr+temp1
+ sub r10, r2, #32
+
+ pld [r7, r2, lsl #1]
+ vld1.8 {q0, q1}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q2, q3}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q4, q5}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q6, q7}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q8, q9}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q10, q11}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q12, q13}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q14, q15}, [r7], r10
+
+ add r6, r0, r6 @ tiled_addr = tiled_dest+tiled_addr
+
+ vst1.8 {q0, q1}, [r6]!
+ vst1.8 {q2, q3}, [r6]!
+ vst1.8 {q4, q5}, [r6]!
+ vst1.8 {q6, q7}, [r6]!
+ vst1.8 {q8, q9}, [r6]!
+ vst1.8 {q10, q11}, [r6]!
+ vst1.8 {q12, q13}, [r6]!
+ vst1.8 {q14, q15}, [r6]!
+
+ pld [r7, r2, lsl #1]
+ vld1.8 {q0, q1}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q2, q3}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q4, q5}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q6, q7}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q8, q9}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q10, q11}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q12, q13}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q14, q15}, [r7], r10
+
+ vst1.8 {q0, q1}, [r6]!
+ vst1.8 {q2, q3}, [r6]!
+ vst1.8 {q4, q5}, [r6]!
+ vst1.8 {q6, q7}, [r6]!
+ vst1.8 {q8, q9}, [r6]!
+ vst1.8 {q10, q11}, [r6]!
+ vst1.8 {q12, q13}, [r6]!
+ vst1.8 {q14, q15}, [r6]!
+
+ pld [r7, r2, lsl #1]
+ vld1.8 {q0, q1}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q2, q3}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q4, q5}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q6, q7}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q8, q9}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q10, q11}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q12, q13}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q14, q15}, [r7], r10
+
+ vst1.8 {q0, q1}, [r6]!
+ vst1.8 {q2, q3}, [r6]!
+ vst1.8 {q4, q5}, [r6]!
+ vst1.8 {q6, q7}, [r6]!
+ vst1.8 {q8, q9}, [r6]!
+ vst1.8 {q10, q11}, [r6]!
+ vst1.8 {q12, q13}, [r6]!
+ vst1.8 {q14, q15}, [r6]!
+
+ pld [r7, r2, lsl #1]
+ vld1.8 {q0, q1}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q2, q3}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q4, q5}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q6, q7}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q8, q9}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q10, q11}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q12, q13}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q14, q15}, [r7], r10
+
+ vst1.8 {q0, q1}, [r6]!
+ vst1.8 {q2, q3}, [r6]!
+ vst1.8 {q4, q5}, [r6]!
+ vst1.8 {q6, q7}, [r6]!
+ vst1.8 {q8, q9}, [r6]!
+ vst1.8 {q10, q11}, [r6]!
+ vst1.8 {q12, q13}, [r6]!
+ vst1.8 {q14, q15}, [r6]!
+
+ pld [r7, r2, lsl #1]
+ vld1.8 {q0, q1}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q2, q3}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q4, q5}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q6, q7}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q8, q9}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q10, q11}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q12, q13}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q14, q15}, [r7], r10
+
+ vst1.8 {q0, q1}, [r6]!
+ vst1.8 {q2, q3}, [r6]!
+ vst1.8 {q4, q5}, [r6]!
+ vst1.8 {q6, q7}, [r6]!
+ vst1.8 {q8, q9}, [r6]!
+ vst1.8 {q10, q11}, [r6]!
+ vst1.8 {q12, q13}, [r6]!
+ vst1.8 {q14, q15}, [r6]!
+
+ pld [r7, r2, lsl #1]
+ vld1.8 {q0, q1}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q2, q3}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q4, q5}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q6, q7}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q8, q9}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q10, q11}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q12, q13}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q14, q15}, [r7], r10
+
+ vst1.8 {q0, q1}, [r6]!
+ vst1.8 {q2, q3}, [r6]!
+ vst1.8 {q4, q5}, [r6]!
+ vst1.8 {q6, q7}, [r6]!
+ vst1.8 {q8, q9}, [r6]!
+ vst1.8 {q10, q11}, [r6]!
+ vst1.8 {q12, q13}, [r6]!
+ vst1.8 {q14, q15}, [r6]!
+
+ pld [r7, r2, lsl #1]
+ vld1.8 {q0, q1}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q2, q3}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q4, q5}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q6, q7}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q8, q9}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q10, q11}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q12, q13}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q14, q15}, [r7], r10
+
+ vst1.8 {q0, q1}, [r6]!
+ vst1.8 {q2, q3}, [r6]!
+ vst1.8 {q4, q5}, [r6]!
+ vst1.8 {q6, q7}, [r6]!
+ vst1.8 {q8, q9}, [r6]!
+ vst1.8 {q10, q11}, [r6]!
+ vst1.8 {q12, q13}, [r6]!
+ vst1.8 {q14, q15}, [r6]!
+
+ pld [r7, r2, lsl #1]
+ vld1.8 {q0, q1}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q2, q3}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q4, q5}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q6, q7}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q8, q9}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q10, q11}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q12, q13}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q14, q15}, [r7], r10
+
+ vst1.8 {q0, q1}, [r6]!
+ vst1.8 {q2, q3}, [r6]!
+ vst1.8 {q4, q5}, [r6]!
+ vst1.8 {q6, q7}, [r6]!
+ vst1.8 {q8, q9}, [r6]!
+ vst1.8 {q10, q11}, [r6]!
+ vst1.8 {q12, q13}, [r6]!
+ vst1.8 {q14, q15}, [r6]!
+
+ add r4, r4, #64 @ j = j+64
+ cmp r4, r8 @ j<aligned_x_size
+ blt LOOP_ALIGNED_X_SIZE
+
+ add r5, r5, #32 @ i = i+32
+ cmp r5, r9 @ i<aligned_y_size
+ blt LOOP_ALIGNED_Y_SIZE
+
+ cmp r5, r3
+ beq LOOP_LINEAR_Y_SIZE_2_START
+
+LOOP_LINEAR_Y_SIZE_1:
+
+ mov r4, #0 @ j = 0
+LOOP_ALIGNED_X_SIZE_1:
+
+ bl GET_TILED_OFFSET
+
+ mul r10, r2, r5 @ temp1 = linear_x_size*(i)
+ add r7, r1, r4 @ linear_addr = linear_src+j
+ add r7, r7, r10 @ linear_addr = linear_addr+temp1
+ sub r10, r2, #32 @ temp1 = linear_x_size-32
+
+ pld [r7, r2, lsl #1]
+ vld1.8 {q0, q1}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q2, q3}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q4, q5}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q6, q7}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q8, q9}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q10, q11}, [r7], r10
+ pld [r7, r2, lsl #1]
+ vld1.8 {q12, q13}, [r7]!
+ pld [r7, r2, lsl #1]
+ vld1.8 {q14, q15}, [r7], r10
+
+ add r6, r0, r6 @ tiled_addr = tiled_dest+tiled_addr
+ and r11, r5, #0x1F @ temp2 = i&0x1F
+ mov r11, r11, lsl #6 @ temp2 = 64*temp2
+ add r6, r6, r11 @ tiled_addr = tiled_addr+temp2
+
+ vst1.8 {q0, q1}, [r6]!
+ vst1.8 {q2, q3}, [r6]!
+ vst1.8 {q4, q5}, [r6]!
+ vst1.8 {q6, q7}, [r6]!
+ vst1.8 {q8, q9}, [r6]!
+ vst1.8 {q10, q11}, [r6]!
+ vst1.8 {q12, q13}, [r6]!
+ vst1.8 {q14, q15}, [r6]!
+
+ add r4, r4, #64 @ j = j+64
+ cmp r4, r8 @ j<aligned_x_size
+ blt LOOP_ALIGNED_X_SIZE_1
+
+ add r5, r5, #4 @ i = i+4
+ cmp r5, r3 @ i<linear_y_size
+ blt LOOP_LINEAR_Y_SIZE_1
+
+LOOP_LINEAR_Y_SIZE_2_START:
+ cmp r4, r2
+ beq RESTORE_REG
+
+ mov r5, #0 @ i = 0
+LOOP_LINEAR_Y_SIZE_2:
+
+ mov r4, r8 @ j = aligned_x_size
+LOOP_LINEAR_X_SIZE_2:
+
+ bl GET_TILED_OFFSET
+
+ mul r10, r2, r5 @ temp1 = linear_x_size*(i)
+ add r7, r1, r4 @ linear_addr = linear_src+j
+ add r7, r7, r10 @ linear_addr = linear_addr+temp1
+
+ add r6, r0, r6 @ tiled_addr = tiled_dest+tiled_addr
+ and r11, r5, #0x1F @ temp2 = i&0x1F
+ mov r11, r11, lsl #6 @ temp2 = 64*temp2
+ add r6, r6, r11 @ tiled_addr = tiled_addr+temp2
+ and r11, r4, #0x3F @ temp2 = j&0x3F
+ add r6, r6, r11 @ tiled_addr = tiled_addr+temp2
+
+ ldr r10, [r7], r2
+ ldr r11, [r7], r2
+ ldr r12, [r7], r2
+ ldr r14, [r7], r2
+ str r10, [r6], #64
+ str r11, [r6], #64
+ str r12, [r6], #64
+ str r14, [r6], #64
+
+ add r4, r4, #4 @ j = j+4
+ cmp r4, r2 @ j<linear_x_size
+ blt LOOP_LINEAR_X_SIZE_2
+
+ add r5, r5, #4 @ i = i+4
+ cmp r5, r3 @ i<linear_y_size
+ blt LOOP_LINEAR_Y_SIZE_2
+
+RESTORE_REG:
+ ldmfd sp!, {r4-r12,r15} @ restore registers
+
+GET_TILED_OFFSET:
+
+ mov r11, r5, asr #5 @ temp2 = i>>5
+ mov r10, r4, asr #6 @ temp1 = j>>6
+
+ and r12, r11, #0x1 @ if (temp2 & 0x1)
+ cmp r12, #0x1
+ bne GET_TILED_OFFSET_EVEN_FORMULA_1
+
+GET_TILED_OFFSET_ODD_FORMULA:
+ sub r6, r11, #1 @ tiled_addr = temp2-1
+ add r12, r2, #127 @ temp3 = linear_x_size+127
+ bic r12, r12, #0x7F @ temp3 = (temp3 >>7)<<7
+ mov r12, r12, asr #6 @ temp3 = temp3>>6
+ mul r6, r6, r12 @ tiled_addr = tiled_addr*temp3
+ add r6, r6, r10 @ tiled_addr = tiled_addr+temp1
+ add r6, r6, #2 @ tiled_addr = tiled_addr+2
+ bic r12, r10, #0x3 @ temp3 = (temp1>>2)<<2
+ add r6, r6, r12 @ tiled_addr = tiled_addr+temp3
+ mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11
+ b GET_TILED_OFFSET_RETURN
+
+GET_TILED_OFFSET_EVEN_FORMULA_1:
+ add r12, r3, #31 @ temp3 = linear_y_size+31
+ bic r12, r12, #0x1F @ temp3 = (temp3>>5)<<5
+ sub r12, r12, #32 @ temp3 = temp3 - 32
+ cmp r5, r12 @ if (i<(temp3-32)) {
+ bge GET_TILED_OFFSET_EVEN_FORMULA_2
+ add r12, r10, #2 @ temp3 = temp1+2
+ bic r12, r12, #3 @ temp3 = (temp3>>2)<<2
+ add r6, r10, r12 @ tiled_addr = temp1+temp3
+ add r12, r2, #127 @ temp3 = linear_x_size+127
+ bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7
+ mov r12, r12, asr #6 @ temp3 = temp3>>6
+ mul r11, r11, r12 @ tiled_y_index = tiled_y_index*temp3
+ add r6, r6, r11 @ tiled_addr = tiled_addr+tiled_y_index
+ mov r6, r6, lsl #11 @
+ b GET_TILED_OFFSET_RETURN
+
+GET_TILED_OFFSET_EVEN_FORMULA_2:
+ add r12, r2, #127 @ temp3 = linear_x_size+127
+ bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7
+ mov r12, r12, asr #6 @ temp3 = temp3>>6
+ mul r6, r11, r12 @ tiled_addr = temp2*temp3
+ add r6, r6, r10 @ tiled_addr = tiled_addr+temp3
+ mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11@
+
+GET_TILED_OFFSET_RETURN:
+ mov pc, lr
+ .fnend
+
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c
index 508f290..19b63b0 100644
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c
@@ -503,173 +503,3 @@ SSBSIP_MFC_ERROR_CODE SsbSipMfcDecGetConfig(void *openHandle, SSBSIP_MFC_DEC_CON
return MFC_RET_OK;
}
-
-int tile_4x2_read(int x_size, int y_size, int x_pos, int y_pos)
-{
- int pixel_x_m1, pixel_y_m1;
- int roundup_x, roundup_y;
- int linear_addr0, linear_addr1, bank_addr ;
- int x_addr;
- int trans_addr;
-
- pixel_x_m1 = x_size -1;
- pixel_y_m1 = y_size -1;
-
- roundup_x = ((pixel_x_m1 >> 7) + 1);
- roundup_y = ((pixel_x_m1 >> 6) + 1);
-
- x_addr = x_pos >> 2;
-
- if ((y_size <= y_pos+32) && ( y_pos < y_size) &&
- (((pixel_y_m1 >> 5) & 0x1) == 0) && (((y_pos >> 5) & 0x1) == 0)) {
- linear_addr0 = (((y_pos & 0x1f) <<4) | (x_addr & 0xf));
- linear_addr1 = (((y_pos >> 6) & 0xff) * roundup_x + ((x_addr >> 6) & 0x3f));
-
- if (((x_addr >> 5) & 0x1) == ((y_pos >> 5) & 0x1))
- bank_addr = ((x_addr >> 4) & 0x1);
- else
- bank_addr = 0x2 | ((x_addr >> 4) & 0x1);
- } else {
- linear_addr0 = (((y_pos & 0x1f) << 4) | (x_addr & 0xf));
- linear_addr1 = (((y_pos >> 6) & 0xff) * roundup_x + ((x_addr >> 5) & 0x7f));
-
- if (((x_addr >> 5) & 0x1) == ((y_pos >> 5) & 0x1))
- bank_addr = ((x_addr >> 4) & 0x1);
- else
- bank_addr = 0x2 | ((x_addr >> 4) & 0x1);
- }
-
- linear_addr0 = linear_addr0 << 2;
- trans_addr = (linear_addr1 <<13) | (bank_addr << 11) | linear_addr0;
-
- return trans_addr;
-}
-
-void Y_tile_to_linear_4x2(unsigned char *p_linear_addr, unsigned char *p_tiled_addr, unsigned int x_size, unsigned int y_size)
-{
- int trans_addr;
- unsigned int i, j, k, index;
- unsigned char data8[4];
- unsigned int max_index = x_size * y_size;
-
- for (i = 0; i < y_size; i = i + 16) {
- for (j = 0; j < x_size; j = j + 16) {
- trans_addr = tile_4x2_read(x_size, y_size, j, i);
- for (k = 0; k < 16; k++) {
- /* limit check - prohibit segmentation fault */
- index = (i * x_size) + (x_size * k) + j;
- /* remove equal condition to solve thumbnail bug */
- if (index + 16 > max_index) {
- continue;
- }
-
- data8[0] = p_tiled_addr[trans_addr + 64 * k + 0];
- data8[1] = p_tiled_addr[trans_addr + 64 * k + 1];
- data8[2] = p_tiled_addr[trans_addr + 64 * k + 2];
- data8[3] = p_tiled_addr[trans_addr + 64 * k + 3];
-
- p_linear_addr[index] = data8[0];
- p_linear_addr[index + 1] = data8[1];
- p_linear_addr[index + 2] = data8[2];
- p_linear_addr[index + 3] = data8[3];
-
- data8[0] = p_tiled_addr[trans_addr + 64 * k + 4];
- data8[1] = p_tiled_addr[trans_addr + 64 * k + 5];
- data8[2] = p_tiled_addr[trans_addr + 64 * k + 6];
- data8[3] = p_tiled_addr[trans_addr + 64 * k + 7];
-
- p_linear_addr[index + 4] = data8[0];
- p_linear_addr[index + 5] = data8[1];
- p_linear_addr[index + 6] = data8[2];
- p_linear_addr[index + 7] = data8[3];
-
- data8[0] = p_tiled_addr[trans_addr + 64 * k + 8];
- data8[1] = p_tiled_addr[trans_addr + 64 * k + 9];
- data8[2] = p_tiled_addr[trans_addr + 64 * k + 10];
- data8[3] = p_tiled_addr[trans_addr + 64 * k + 11];
-
- p_linear_addr[index + 8] = data8[0];
- p_linear_addr[index + 9] = data8[1];
- p_linear_addr[index + 10] = data8[2];
- p_linear_addr[index + 11] = data8[3];
-
- data8[0] = p_tiled_addr[trans_addr + 64 * k + 12];
- data8[1] = p_tiled_addr[trans_addr + 64 * k + 13];
- data8[2] = p_tiled_addr[trans_addr + 64 * k + 14];
- data8[3] = p_tiled_addr[trans_addr + 64 * k + 15];
-
- p_linear_addr[index + 12] = data8[0];
- p_linear_addr[index + 13] = data8[1];
- p_linear_addr[index + 14] = data8[2];
- p_linear_addr[index + 15] = data8[3];
- }
- }
- }
-}
-
-void CbCr_tile_to_linear_4x2(unsigned char *p_linear_addr, unsigned char *p_tiled_addr, unsigned int x_size, unsigned int y_size)
-{
- int trans_addr;
- unsigned int i, j, k, index;
- unsigned char data8[4];
- unsigned int half_y_size = y_size / 2;
- unsigned int max_index = x_size * half_y_size;
- unsigned char *pUVAddr[2];
-
- pUVAddr[0] = p_linear_addr;
- pUVAddr[1] = p_linear_addr + ((x_size * half_y_size) / 2);
-
- for (i = 0; i < half_y_size; i = i + 16) {
- for (j = 0; j < x_size; j = j + 16) {
- trans_addr = tile_4x2_read(x_size, half_y_size, j, i);
- for (k = 0; k < 16; k++) {
- /* limit check - prohibit segmentation fault */
- index = (i * x_size) + (x_size * k) + j;
- /* remove equal condition to solve thumbnail bug */
- if (index + 16 > max_index) {
- continue;
- }
-
- data8[0] = p_tiled_addr[trans_addr + 64 * k + 0];
- data8[1] = p_tiled_addr[trans_addr + 64 * k + 1];
- data8[2] = p_tiled_addr[trans_addr + 64 * k + 2];
- data8[3] = p_tiled_addr[trans_addr + 64 * k + 3];
-
- pUVAddr[index%2][index/2] = data8[0];
- pUVAddr[(index+1)%2][(index+1)/2] = data8[1];
- pUVAddr[(index+2)%2][(index+2)/2] = data8[2];
- pUVAddr[(index+3)%2][(index+3)/2] = data8[3];
-
- data8[0] = p_tiled_addr[trans_addr + 64 * k + 4];
- data8[1] = p_tiled_addr[trans_addr + 64 * k + 5];
- data8[2] = p_tiled_addr[trans_addr + 64 * k + 6];
- data8[3] = p_tiled_addr[trans_addr + 64 * k + 7];
-
- pUVAddr[(index+4)%2][(index+4)/2] = data8[0];
- pUVAddr[(index+5)%2][(index+5)/2] = data8[1];
- pUVAddr[(index+6)%2][(index+6)/2] = data8[2];
- pUVAddr[(index+7)%2][(index+7)/2] = data8[3];
-
- data8[0] = p_tiled_addr[trans_addr + 64 * k + 8];
- data8[1] = p_tiled_addr[trans_addr + 64 * k + 9];
- data8[2] = p_tiled_addr[trans_addr + 64 * k + 10];
- data8[3] = p_tiled_addr[trans_addr + 64 * k + 11];
-
- pUVAddr[(index+8)%2][(index+8)/2] = data8[0];
- pUVAddr[(index+9)%2][(index+9)/2] = data8[1];
- pUVAddr[(index+10)%2][(index+10)/2] = data8[2];
- pUVAddr[(index+11)%2][(index+11)/2] = data8[3];
-
- data8[0] = p_tiled_addr[trans_addr + 64 * k + 12];
- data8[1] = p_tiled_addr[trans_addr + 64 * k + 13];
- data8[2] = p_tiled_addr[trans_addr + 64 * k + 14];
- data8[3] = p_tiled_addr[trans_addr + 64 * k + 15];
-
- pUVAddr[(index+12)%2][(index+12)/2] = data8[0];
- pUVAddr[(index+13)%2][(index+13)/2] = data8[1];
- pUVAddr[(index+14)%2][(index+14)/2] = data8[2];
- pUVAddr[(index+15)%2][(index+15)/2] = data8[3];
- }
- }
- }
-}
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c
index 2c33c5b..c31e522 100644
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c
@@ -467,6 +467,9 @@ SSBSIP_MFC_ERROR_CODE SsbSipMfcEncGetInBuf(void *openHandle, SSBSIP_MFC_ENC_INPU
input_info->YVirAddr = (void*)pCTX->virFrmBuf.luma;
input_info->CVirAddr = (void*)pCTX->virFrmBuf.chroma;
+ input_info->YSize = aligned_y_size;
+ input_info->CSize = aligned_c_size;
+
return MFC_RET_OK;
}
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h
index e083998..87e9b2d 100644
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h
@@ -285,12 +285,6 @@ extern "C" {
#endif
/*--------------------------------------------------------------------------------*/
-/* Format Conversion API */
-/*--------------------------------------------------------------------------------*/
-void Y_tile_to_linear_4x2(unsigned char *p_linear_addr, unsigned char *p_tiled_addr, unsigned int x_size, unsigned int y_size);
-void CbCr_tile_to_linear_4x2(unsigned char *p_linear_addr, unsigned char *p_tiled_addr, unsigned int x_size, unsigned int y_size);
-
-/*--------------------------------------------------------------------------------*/
/* Decoding APIs */
/*--------------------------------------------------------------------------------*/
void *SsbSipMfcDecOpen(void);
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h
new file mode 100644
index 0000000..4ad5bda
--- /dev/null
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h
@@ -0,0 +1,176 @@
+/*
+ *
+ * Copyright 2011 Samsung Electronics S.LSI Co. LTD
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+ * @file color_space_convertor.h
+ * @brief SEC_OMX specific define.
+ * NV12T(tiled) layout:
+ * Each element is not pixel. It is 64x32 pixel block.
+ * uv pixel block is interleaved as u v u v u v ...
+ * y1 y2 y7 y8 y9 y10 y15 y16
+ * y3 y4 y5 y6 y11 y12 y13 y14
+ * y17 y18 y23 y24 y25 y26 y31 y32
+ * y19 y20 y21 y22 y27 y28 y29 y30
+ * uv1 uv2 uv7 uv8 uv9 uv10 uv15 uv16
+ * uv3 uv4 uv5 uv6 uv11 uv12 uv13 uv14
+ * YUV420Planar(linear) layout:
+ * Each element is not pixel. It is 64x32 pixel block.
+ * y1 y2 y3 y4 y5 y6 y7 y8
+ * y9 y10 y11 y12 y13 y14 y15 y16
+ * y17 y18 y19 y20 y21 y22 y23 y24
+ * y25 y26 y27 y28 y29 y30 y31 y32
+ * u1 u2 u3 u4 u5 u6 u7 u8
+ * v1 v2 v3 v4 v5 v6 v7 v8
+ * YUV420Semiplanar(linear) layout:
+ * Each element is not pixel. It is 64x32 pixel block.
+ * uv pixel block is interleaved as u v u v u v ...
+ * y1 y2 y3 y4 y5 y6 y7 y8
+ * y9 y10 y11 y12 y13 y14 y15 y16
+ * y17 y18 y19 y20 y21 y22 y23 y24
+ * y25 y26 y27 y28 y29 y30 y31 y32
+ * uv1 uv2 uv3 uv4 uv5 uv6 uv7 uv8
+ * uv9 uv10 uv11 uv12 uv13 uv14 uv15 uv16
+ * @author ShinWon Lee (shinwon.lee@samsung.com)
+ * @version 1.0
+ * @history
+ * 2011.7.01 : Create
+ */
+
+#ifndef COLOR_SPACE_CONVERTOR_H_
+#define COLOR_SPACE_CONVERTOR_H_
+
+/*--------------------------------------------------------------------------------*/
+/* Format Conversion API */
+/*--------------------------------------------------------------------------------*/
+/* C Code */
+/*
+ * De-interleaves src to dest1, dest2
+ *
+ * @param dest1
+ * Address of de-interleaved data[out]
+ *
+ * @param dest2
+ * Address of de-interleaved data[out]
+ *
+ * @param src
+ * Address of interleaved data[in]
+ *
+ * @param src_size
+ * Size of interleaved data[in]
+ */
+void csc_deinterleave_memcpy(char *dest1, char *dest2, char *src, int src_size);
+
+/*
+ * Interleaves src1, src2 to dest
+ *
+ * @param dest
+ * Address of interleaved data[out]
+ *
+ * @param src1
+ * Address of de-interleaved data[in]
+ *
+ * @param src2
+ * Address of de-interleaved data[in]
+ *
+ * @param src_size
+ * Size of de-interleaved data[in]
+ */
+void csc_interleave_memcpy(char *dest, char *src1, char *src2, int src_size);
+
+/*
+ * Converts tiled data to linear.
+ * 1. Y of NV12T to Y of YUV420P
+ * 2. Y of NV12T to Y of YUV420S
+ * 3. UV of NV12T to UV of YUV420S
+ *
+ * @param yuv420_dest
+ * Y or UV plane address of YUV420[out]
+ *
+ * @param nv12t_src
+ * Y or UV plane address of NV12T[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_height
+ * Y: Height of YUV420, UV: Height/2 of YUV420[in]
+ */
+void csc_tiled_to_linear(char *yuv420p_y_dest, char *nv12t_y_src, int yuv420p_width, int yuv420p_y_height);
+
+/*
+ * Converts and Deinterleaves tiled data to linear
+ * 1. UV of NV12T to UV of YUV420P
+ *
+ * @param yuv420_u_dest
+ * U plane address of YUV420P[out]
+ *
+ * @param yuv420_v_dest
+ * V plane address of YUV420P[out]
+ *
+ * @param nv12t_src
+ * UV plane address of NV12T[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_uv_height
+ * Height/2 of YUV420[in]
+ */
+void csc_tiled_to_linear_deinterleave(char *yuv420p_u_dest, char *yuv420p_v_dest, char *nv12t_uv_src, int yuv420p_width, int yuv420p_uv_height);
+
+/*
+ * Converts linear data to tiled.
+ * 1. Y of YUV420P to Y of NV12T
+ * 2. Y of YUV420S to Y of NV12T
+ * 3. UV of YUV420S to UV of NV12T
+ *
+ * @param nv12t_dest
+ * Y or UV plane address of NV12T[out]
+ *
+ * @param yuv420_src
+ * Y or UV plane address of YUV420P(S)[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_height
+ * Y: Height of YUV420, UV: Height/2 of YUV420[in]
+ */
+void csc_linear_to_tiled(char *nv12t_dest, char *yuv420p_src, int yuv420p_width, int yuv420p_y_height);
+
+/*
+ * Converts and Interleaves linear to tiled
+ * 1. UV of YUV420P to UV of NV12T
+ *
+ * @param nv12t_uv_dest
+ * UV plane address of NV12T[out]
+ *
+ * @param yuv420p_u_src
+ * U plane address of YUV420P[in]
+ *
+ * @param yuv420p_v_src
+ * V plane address of YUV420P[in]
+ *
+ * @param yuv420_width
+ * Width of YUV420[in]
+ *
+ * @param yuv420_uv_height
+ * Height/2 of YUV420[in]
+ */
+void csc_linear_to_tiled_interleave(char *nv12t_uv_dest, char *yuv420p_u_src, char *yuv420p_v_src, int yuv420p_width, int yuv420p_uv_height);
+
+#endif /*COLOR_SPACE_CONVERTOR_H_*/
diff --git a/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/Android.mk b/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/Android.mk
index 08e9874..cdf345c 100644
--- a/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/Android.mk
+++ b/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/Android.mk
@@ -14,7 +14,7 @@ LOCAL_CFLAGS :=
LOCAL_ARM_MODE := arm
-LOCAL_STATIC_LIBRARIES := libSEC_OMX_Vdec libsecosal libsecbasecomponent libsecmfcdecapi
+LOCAL_STATIC_LIBRARIES := libSEC_OMX_Vdec libsecosal libsecbasecomponent libsecmfcdecapi libseccsc
LOCAL_SHARED_LIBRARIES := libc libdl libcutils libutils
LOCAL_C_INCLUDES := $(SEC_OMX_INC)/khronos \
@@ -22,7 +22,7 @@ LOCAL_C_INCLUDES := $(SEC_OMX_INC)/khronos \
$(SEC_OMX_TOP)/sec_osal \
$(SEC_OMX_TOP)/sec_omx_core \
$(SEC_OMX_COMPONENT)/common \
- $(SEC_OMX_COMPONENT)/video/dec \
+ $(SEC_OMX_COMPONENT)/video/dec
LOCAL_C_INCLUDES += $(SEC_OMX_TOP)/sec_codecs/video/mfc_c110/include
diff --git a/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/SEC_OMX_H264dec.c b/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/SEC_OMX_H264dec.c
index 9acae4e..2c5f3bb 100644
--- a/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/SEC_OMX_H264dec.c
+++ b/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/SEC_OMX_H264dec.c
@@ -35,6 +35,7 @@
#include "library_register.h"
#include "SEC_OMX_H264dec.h"
#include "SsbSipMfcApi.h"
+#include "color_space_convertor.h"
#undef SEC_LOG_TAG
#define SEC_LOG_TAG "SEC_H264_DEC"
@@ -955,14 +956,17 @@ OMX_ERRORTYPE SEC_MFC_H264_Decode(OMX_COMPONENTTYPE *pOMXComponent, SEC_OMX_DATA
SEC_OSAL_Memcpy(pOutBuf + sizeof(frameSize) + (sizeof(void *) * 3), &(outputInfo.CVirAddr), sizeof(outputInfo.CVirAddr));
} else {
SEC_OSAL_Log(SEC_LOG_TRACE, "YUV420 out for ThumbnailMode");
- Y_tile_to_linear_4x2(
- (unsigned char *)pOutBuf,
- (unsigned char *)outputInfo.YVirAddr,
- bufWidth, bufHeight);
- CbCr_tile_to_linear_4x2(
- ((unsigned char *)pOutBuf) + frameSize,
- (unsigned char *)outputInfo.CVirAddr,
- bufWidth, bufHeight);
+ csc_tiled_to_linear(
+ (unsigned char *)pOutBuf,
+ (unsigned char *)outputInfo.YVirAddr,
+ bufWidth,
+ bufHeight);
+ csc_tiled_to_linear_deinterleave(
+ (unsigned char *)pOutBuf + frameSize,
+ (unsigned char *)pOutBuf + (frameSize * 5) / 4,
+ (unsigned char *)outputInfo.CVirAddr,
+ bufWidth,
+ bufHeight >> 1);
}
}
diff --git a/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/Android.mk b/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/Android.mk
index 92891a7..66353d6 100644
--- a/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/Android.mk
+++ b/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/Android.mk
@@ -14,7 +14,7 @@ LOCAL_CFLAGS :=
LOCAL_ARM_MODE := arm
-LOCAL_STATIC_LIBRARIES := libSEC_OMX_Vdec libsecosal libsecbasecomponent libsecmfcdecapi
+LOCAL_STATIC_LIBRARIES := libSEC_OMX_Vdec libsecosal libsecbasecomponent libsecmfcdecapi libseccsc
LOCAL_SHARED_LIBRARIES := libc libdl libcutils libutils
LOCAL_C_INCLUDES := $(SEC_OMX_INC)/khronos \
diff --git a/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/SEC_OMX_Mpeg4dec.c b/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/SEC_OMX_Mpeg4dec.c
index 7396a2c..0d64e47 100644
--- a/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/SEC_OMX_Mpeg4dec.c
+++ b/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/SEC_OMX_Mpeg4dec.c
@@ -35,6 +35,7 @@
#include "library_register.h"
#include "SEC_OMX_Mpeg4dec.h"
#include "SsbSipMfcApi.h"
+#include "color_space_convertor.h"
#undef SEC_LOG_TAG
#define SEC_LOG_TAG "SEC_MPEG4_DEC"
@@ -1135,14 +1136,17 @@ OMX_ERRORTYPE SEC_MFC_Mpeg4_Decode(OMX_COMPONENTTYPE *pOMXComponent, SEC_OMX_DAT
SEC_OSAL_Memcpy(pOutputBuf + sizeof(frameSize) + (sizeof(void *) * 3), &(outputInfo.CVirAddr), sizeof(outputInfo.CVirAddr));
} else {
SEC_OSAL_Log(SEC_LOG_TRACE, "YUV420 out for ThumbnailMode");
- Y_tile_to_linear_4x2(
- (unsigned char *)pOutputBuf,
- (unsigned char *)outputInfo.YVirAddr,
- bufWidth, bufHeight);
- CbCr_tile_to_linear_4x2(
- ((unsigned char *)pOutputBuf) + frameSize,
- (unsigned char *)outputInfo.CVirAddr,
- bufWidth, bufHeight);
+ csc_tiled_to_linear(
+ (unsigned char *)pOutputBuf,
+ (unsigned char *)outputInfo.YVirAddr,
+ bufWidth,
+ bufHeight);
+ csc_tiled_to_linear_deinterleave(
+ (unsigned char *)pOutputBuf + frameSize,
+ (unsigned char *)pOutputBuf + (frameSize * 5) / 4,
+ (unsigned char *)outputInfo.CVirAddr,
+ bufWidth,
+ bufHeight >> 1);
}
}
diff --git a/sec_mm/sec_omx/sec_omx_component/video/enc/h264enc/Android.mk b/sec_mm/sec_omx/sec_omx_component/video/enc/h264enc/Android.mk
index 3edcb58..cf91356 100644
--- a/sec_mm/sec_omx/sec_omx_component/video/enc/h264enc/Android.mk
+++ b/sec_mm/sec_omx/sec_omx_component/video/enc/h264enc/Android.mk
@@ -22,7 +22,7 @@ LOCAL_C_INCLUDES := $(SEC_OMX_INC)/khronos \
$(SEC_OMX_TOP)/sec_osal \
$(SEC_OMX_TOP)/sec_omx_core \
$(SEC_OMX_COMPONENT)/common \
- $(SEC_OMX_COMPONENT)/video/enc \
+ $(SEC_OMX_COMPONENT)/video/enc
LOCAL_C_INCLUDES += $(SEC_OMX_TOP)/sec_codecs/video/mfc_c110/include