summaryrefslogtreecommitdiffstats
path: root/sec_mm
diff options
context:
space:
mode:
authorJames Dong <jdong@google.com>2011-07-21 19:47:52 -0700
committerJames Dong <jdong@google.com>2011-07-21 19:47:52 -0700
commitabc28ea135621af9735021ea27763cdf624aada5 (patch)
treee3e269e94898bbb105701379ce0c1d1f5603cf9c /sec_mm
parent9d669beeaf822d69178437f9ca8b3305cd124817 (diff)
downloaddevice_samsung_crespo-abc28ea135621af9735021ea27763cdf624aada5.zip
device_samsung_crespo-abc28ea135621af9735021ea27763cdf624aada5.tar.gz
device_samsung_crespo-abc28ea135621af9735021ea27763cdf624aada5.tar.bz2
Revert "Add color space convertor in SEC_OMX"
This reverts commit 7cf106ba5ff2dac2be536d8c84c715ca87d0a2d9.
Diffstat (limited to 'sec_mm')
-rw-r--r--sec_mm/sec_omx/sec_codecs/Android.mk2
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk31
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s128
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s133
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s768
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s680
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s573
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s451
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c170
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c3
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h6
-rw-r--r--sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h176
-rw-r--r--sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/Android.mk4
-rw-r--r--sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/SEC_OMX_H264dec.c20
-rw-r--r--sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/Android.mk2
-rw-r--r--sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/SEC_OMX_Mpeg4dec.c20
-rw-r--r--sec_mm/sec_omx/sec_omx_component/video/enc/h264enc/Android.mk2
17 files changed, 197 insertions, 2972 deletions
diff --git a/sec_mm/sec_omx/sec_codecs/Android.mk b/sec_mm/sec_omx/sec_codecs/Android.mk
index 3c163a4..a51a075 100644
--- a/sec_mm/sec_omx/sec_codecs/Android.mk
+++ b/sec_mm/sec_omx/sec_codecs/Android.mk
@@ -4,4 +4,4 @@ include $(CLEAR_VARS)
include $(SEC_CODECS)/video/mfc_c110/dec/Android.mk
include $(SEC_CODECS)/video/mfc_c110/enc/Android.mk
-include $(SEC_CODECS)/video/mfc_c110/csc/Android.mk \ No newline at end of file
+
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk
deleted file mode 100644
index fee8529..0000000
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/Android.mk
+++ /dev/null
@@ -1,31 +0,0 @@
-
-LOCAL_PATH := $(call my-dir)
-include $(CLEAR_VARS)
-
-LOCAL_MODULE_TAGS := optional
-
-LOCAL_SRC_FILES := \
- csc_yuv420_nv12t_y_neon.s \
- csc_yuv420_nv12t_uv_neon.s \
- csc_nv12t_yuv420_y_neon.s \
- csc_nv12t_yuv420_uv_neon.s \
- csc_interleave_memcpy.s \
- csc_deinterleave_memcpy.s
-
-LOCAL_MODULE := libseccsc
-
-
-
-LOCAL_CFLAGS :=
-
-LOCAL_ARM_MODE := arm
-
-LOCAL_STATIC_LIBRARIES :=
-
-LOCAL_SHARED_LIBRARIES := liblog
-
-LOCAL_C_INCLUDES := \
- $(SEC_CODECS)/video/mfc_c110/include
-
-include $(BUILD_STATIC_LIBRARY)
-
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s
deleted file mode 100644
index dc95bfa..0000000
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_deinterleave_memcpy.s
+++ /dev/null
@@ -1,128 +0,0 @@
-/*
- *
- * Copyright 2011 Samsung Electronics S.LSI Co. LTD
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * @file csc_deinterleave_memcpy.s
- * @brief SEC_OMX specific define
- * @author ShinWon Lee (shinwon.lee@samsung.com)
- * @version 1.0
- * @history
- * 2011.7.01 : Create
- */
- .arch armv7-a
- .text
- .global csc_deinterleave_memcpy_neon
- .type csc_deinterleave_memcpy_neon, %function
-csc_deinterleave_memcpy_neon:
- .fnstart
-
- @r0 dest1
- @r1 dest2
- @r2 src
- @r3 src_size
- @r4 i
- @r5 temp1
- @r6 temp2
- @r7 temp3
-
- stmfd sp!, {r4-r12,r14} @ backup registers
-
- mov r4, #0
- cmp r3, #256
- blt LINEAR_SIZE_128
-
- bic r5, r3, #0xFF
-LINEAR_SIZE_256_LOOP:
- pld [r2, #64]
- vld2.8 {q0, q1}, [r2]!
- pld [r2, #64]
- vld2.8 {q2, q3}, [r2]!
- pld [r2, #64]
- vld2.8 {q4, q5}, [r2]!
- pld [r2, #64]
- vld2.8 {q6, q7}, [r2]!
- pld [r2, #64]
- vld2.8 {q8, q9}, [r2]!
- pld [r2, #64]
- vld2.8 {q10, q11}, [r2]!
- vld2.8 {q12, q13}, [r2]!
- vld2.8 {q14, q15}, [r2]!
-
- vst1.8 {q0}, [r0]!
- vst1.8 {q2}, [r0]!
- vst1.8 {q4}, [r0]!
- vst1.8 {q6}, [r0]!
- vst1.8 {q8}, [r0]!
- vst1.8 {q10}, [r0]!
- vst1.8 {q12}, [r0]!
- vst1.8 {q14}, [r0]!
-
- vst1.8 {q1}, [r1]!
- vst1.8 {q3}, [r1]!
- vst1.8 {q5}, [r1]!
- vst1.8 {q7}, [r1]!
- vst1.8 {q9}, [r1]!
- vst1.8 {q11}, [r1]!
- vst1.8 {q13}, [r1]!
- vst1.8 {q15}, [r1]!
-
- add r4, #256
- cmp r4, r5
- blt LINEAR_SIZE_256_LOOP
-
-LINEAR_SIZE_128:
- sub r5, r3, r4
- cmp r5, #64
- blt LINEAR_SIZE_4
- pld [r2, #64]
- vld2.8 {q0, q1}, [r2]!
- pld [r2, #64]
- vld2.8 {q2, q3}, [r2]!
- vld2.8 {q4, q5}, [r2]!
- vld2.8 {q6, q7}, [r2]!
-
- vst1.8 {q0}, [r0]!
- vst1.8 {q4}, [r0]!
- vst1.8 {q2}, [r0]!
- vst1.8 {q6}, [r0]!
-
- vst1.8 {q1}, [r1]!
- vst1.8 {q3}, [r1]!
- vst1.8 {q5}, [r1]!
- vst1.8 {q7}, [r1]!
-
- add r4, #128
-
-LINEAR_SIZE_4:
- ldrb r6, [r2], #1
- ldrb r7, [r2], #1
- ldrb r8, [r2], #1
- ldrb r9, [r2], #1
-
- strb r6, [r0], #1
- strb r8, [r0], #1
- strb r7, [r1], #1
- strb r9, [r1], #1
-
- add r4, #4
- cmp r4, r3
- blt LINEAR_SIZE_4
-
-RESTORE_REG:
- ldmfd sp!, {r4-r12,r15} @ restore registers
- .fnend
-
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s
deleted file mode 100644
index 63fb88d..0000000
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_interleave_memcpy.s
+++ /dev/null
@@ -1,133 +0,0 @@
-/*
- *
- * Copyright 2011 Samsung Electronics S.LSI Co. LTD
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * @file csc_interleave_memcpy.s
- * @brief SEC_OMX specific define
- * @author ShinWon Lee (shinwon.lee@samsung.com)
- * @version 1.0
- * @history
- * 2011.7.01 : Create
- */
- .arch armv7-a
- .text
- .global csc_interleave_memcpy_neon
- .type csc_interleave_memcpy_neon, %function
-csc_interleave_memcpy_neon:
- .fnstart
-
- @r0 dest
- @r1 src1
- @r2 src2
- @r3 src_size
- @r4 i
- @r5 temp1
- @r6 temp2
- @r7 temp3
- @r8 temp2
- @r9 temp3
-
- stmfd sp!, {r4-r12,r14} @ backup registers
-
- mov r4, #0
- cmp r3, #128
- blt LINEAR_SIZE_64
-
- bic r5, r3, #0x2F
-LINEAR_SIZE_128_LOOP:
- pld [r1, #64]
- vld1.8 {q0}, [r1]!
- vld1.8 {q2}, [r1]!
- vld1.8 {q4}, [r1]!
- vld1.8 {q6}, [r1]!
- pld [r2]
- vld1.8 {q8}, [r1]!
- vld1.8 {q10}, [r1]!
- vld1.8 {q12}, [r1]!
- vld1.8 {q14}, [r1]!
- pld [r2, #64]
- vld1.8 {q1}, [r2]!
- vld1.8 {q3}, [r2]!
- vld1.8 {q5}, [r2]!
- vld1.8 {q7}, [r2]!
- vld1.8 {q9}, [r2]!
- vld1.8 {q11}, [r2]!
- vld1.8 {q13}, [r2]!
- vld1.8 {q15}, [r2]!
-
- vst2.8 {q0, q1}, [r0]!
- vst2.8 {q2, q3}, [r0]!
- vst2.8 {q4, q5}, [r0]!
- vst2.8 {q6, q7}, [r0]!
- vst2.8 {q8, q9}, [r0]!
- vst2.8 {q10, q11}, [r0]!
- pld [r1]
- vst2.8 {q12, q13}, [r0]!
- vst2.8 {q14, q15}, [r0]!
-
- add r4, #128
- cmp r4, r5
- blt LINEAR_SIZE_128_LOOP
-
-LINEAR_SIZE_64:
- sub r5, r3, r4
- cmp r5, #64
- blt LINEAR_SIZE_2
-LINEAR_SIZE_64_LOOP:
- pld [r2]
- vld1.8 {q0}, [r1]!
- vld1.8 {q2}, [r1]!
- vld1.8 {q4}, [r1]!
- vld1.8 {q6}, [r1]!
- vld1.8 {q1}, [r2]!
- vld1.8 {q3}, [r2]!
- vld1.8 {q5}, [r2]!
- vld1.8 {q7}, [r2]!
-
- vst2.8 {q0, q1}, [r0]!
- vst2.8 {q2, q3}, [r0]!
- pld [r1]
- vst2.8 {q4, q5}, [r0]!
- vst2.8 {q6, q7}, [r0]!
-
- add r4, #64
- cmp r4, r3
- blt LINEAR_SIZE_64_LOOP
-
-LINEAR_SIZE_2:
- sub r5, r3, r4
- cmp r5, #2
- blt RESTORE_REG
-LINEAR_SIZE_2_LOOP:
- ldrb r6, [r1], #1
- ldrb r7, [r2], #1
- ldrb r8, [r1], #1
- ldrb r9, [r2], #1
-
- strb r6, [r0], #1
- strb r7, [r0], #1
- strb r8, [r0], #1
- strb r9, [r0], #1
-
- add r4, #2
- cmp r4, r3
- blt LINEAR_SIZE_2_LOOP
-
-RESTORE_REG:
- ldmfd sp!, {r4-r12,r15} @ restore registers
- .fnend
-
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s
deleted file mode 100644
index d1e0d2f..0000000
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_uv_neon.s
+++ /dev/null
@@ -1,768 +0,0 @@
-/*
- *
- * Copyright 2011 Samsung Electronics S.LSI Co. LTD
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * @file csc_nv12t_yuv420_uv_neon.s
- * @brief SEC_OMX specific define
- * @author ShinWon Lee (shinwon.lee@samsung.com)
- * @version 1.0
- * @history
- * 2011.7.01 : Create
- */
-
-/*
- * Converts and Deinterleaves tiled data to linear
- * 1. UV of NV12T to UV of YUV420P
- *
- * @param yuv420_u_dest
- * U plane address of YUV420P[out]
- *
- * @param yuv420_v_dest
- * V plane address of YUV420P[out]
- *
- * @param nv12t_src
- * UV plane address of NV12T[in]
- *
- * @param yuv420_width
- * Width of YUV420[in]
- *
- * @param yuv420_uv_height
- * Height/2 of YUV420[in]
- */
-
- .arch armv7-a
- .text
- .global csc_tiled_to_linear_deinterleave_neon
- .type csc_tiled_to_linear_deinterleave_neon, %function
-csc_tiled_to_linear_deinterleave_neon:
- .fnstart
-
- @r0 linear_u_dest
- @r1 linear_v_dest
- @r2 tiled_uv_src
- @r3 linear_x_size
- @r4 linear_y_size
- @r5 j
- @r6 i
- @r7 tiled_addr
- @r8 linear_addr
- @r9 aligned_x_size
- @r10 temp1
- @r11 temp2
- @r12 temp3
- @r14 temp4
-
- stmfd sp!, {r4-r12,r14} @ backup registers
-
- ldr r4, [sp, #40] @ load linear_y_size to r4
-
- mov r9, #0
-
-LINEAR_X_SIZE_1024:
- cmp r3, #1024
- blt LINEAR_X_SIZE_512
-
- mov r6, #0
-LINEAR_X_SIZE_1024_LOOP:
- mov r7, #0 @ tiled_offset = 0@
- mov r5, r6, asr #5 @ tiled_y_index = i>>5@
- and r10, r5, #0x1
- cmp r10, #0x1
- bne LINEAR_X_SIZE_1024_LOOP_EVEN
-LINEAR_X_SIZE_1024_LOOP_ODD:
- sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@
- add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
- mul r7, r7, r10
- mov r5, #8
- mov r5, r5, lsl #11
- sub r5, r5, #32
- add r7, r7, #2 @ tiled_offset = tiled_offset+2@
- mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r11, r7, #2048
- add r12, r7, #4096
- add r14, r7, #6144
- b LINEAR_X_SIZE_1024_LOOP_MEMCPY
-
-LINEAR_X_SIZE_1024_LOOP_EVEN:
- add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
- bic r11, r11, #0x1F
- add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
- mul r7, r5, r10
- add r12, r6, #32
- cmp r12, r11
- mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r11, r7, #2048
- movlt r5, #8
- addlt r12, r7, #12288
- addlt r14, r7, #14336
- movge r5, #4
- addge r12, r7, #2048
- addge r14, r7, #2048
- mov r5, r5, lsl #11
- sub r5, r5, #32
-
-LINEAR_X_SIZE_1024_LOOP_MEMCPY:
- and r10, r6, #0x1F
- mov r10, r10, lsl #6
- add r10, r2, r10
-
- add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1)
- add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
- pld [r11]
- vld2.8 {q0, q1}, [r7]!
- pld [r11, #32]
- add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
- vld2.8 {q2, q3}, [r7], r5
- pld [r12]
- vld2.8 {q4, q5}, [r11]!
- pld [r12, #32]
- add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
- vld2.8 {q6, q7}, [r11], r5
- pld [r14]
- vld2.8 {q8, q9}, [r12]!
- pld [r14, #32]
- mov r10, r3, asr #1
- vld2.8 {q10, q11}, [r12], r5
- mul r10, r10, r6
- vld2.8 {q12, q13}, [r14]!
- vld2.8 {q14, q15}, [r14], r5
-
- add r8, r0, r10
- vst1.8 {q0}, [r8]!
- vst1.8 {q2}, [r8]!
- vst1.8 {q4}, [r8]!
- vst1.8 {q6}, [r8]!
- vst1.8 {q8}, [r8]!
- vst1.8 {q10}, [r8]!
- vst1.8 {q12}, [r8]!
- vst1.8 {q14}, [r8]!
-
- add r10, r1, r10
- vst1.8 {q1}, [r10]!
- vst1.8 {q3}, [r10]!
- vst1.8 {q5}, [r10]!
- vst1.8 {q7}, [r10]!
- vst1.8 {q9}, [r10]!
- vst1.8 {q11}, [r10]!
- pld [r7]
- vst1.8 {q13}, [r10]!
- pld [r7, #32]
- vst1.8 {q15}, [r10]!
-
- pld [r11]
- vld2.8 {q0, q1}, [r7]!
- pld [r11, #32]
- vld2.8 {q2, q3}, [r7], r5
- pld [r12]
- vld2.8 {q4, q5}, [r11]!
- pld [r12, #32]
- vld2.8 {q6, q7}, [r11], r5
- pld [r14]
- vld2.8 {q8, q9}, [r12]!
- pld [r14, #32]
- vld2.8 {q10, q11}, [r12], r5
- vld2.8 {q12, q13}, [r14]!
- vld2.8 {q14, q15}, [r14], r5
-
- vst1.8 {q0}, [r8]!
- vst1.8 {q2}, [r8]!
- vst1.8 {q4}, [r8]!
- vst1.8 {q6}, [r8]!
- vst1.8 {q8}, [r8]!
- vst1.8 {q10}, [r8]!
- vst1.8 {q12}, [r8]!
- vst1.8 {q14}, [r8]!
-
- vst1.8 {q1}, [r10]!
- vst1.8 {q3}, [r10]!
- vst1.8 {q5}, [r10]!
- vst1.8 {q7}, [r10]!
- vst1.8 {q9}, [r10]!
- vst1.8 {q11}, [r10]!
- pld [r7]
- vst1.8 {q13}, [r10]!
- pld [r7, #32]
- vst1.8 {q15}, [r10]!
-
- pld [r11]
- vld2.8 {q0, q1}, [r7]!
- pld [r11, #32]
- vld2.8 {q2, q3}, [r7], r5
- pld [r12]
- vld2.8 {q4, q5}, [r11]!
- pld [r12, #32]
- vld2.8 {q6, q7}, [r11], r5
- pld [r14]
- vld2.8 {q8, q9}, [r12]!
- pld [r14, #32]
- vld2.8 {q10, q11}, [r12], r5
- vld2.8 {q12, q13}, [r14]!
- vld2.8 {q14, q15}, [r14], r5
-
- vst1.8 {q0}, [r8]!
- vst1.8 {q2}, [r8]!
- vst1.8 {q4}, [r8]!
- vst1.8 {q6}, [r8]!
- vst1.8 {q8}, [r8]!
- vst1.8 {q10}, [r8]!
- vst1.8 {q12}, [r8]!
- vst1.8 {q14}, [r8]!
-
- vst1.8 {q1}, [r10]!
- vst1.8 {q3}, [r10]!
- vst1.8 {q5}, [r10]!
- vst1.8 {q7}, [r10]!
- vst1.8 {q9}, [r10]!
- vst1.8 {q11}, [r10]!
- pld [r7]
- vst1.8 {q13}, [r10]!
- pld [r7, #32]
- vst1.8 {q15}, [r10]!
-
- pld [r11]
- vld2.8 {q0, q1}, [r7]!
- pld [r11, #32]
- vld2.8 {q2, q3}, [r7]
- pld [r12]
- vld2.8 {q4, q5}, [r11]!
- pld [r12, #32]
- vld2.8 {q6, q7}, [r11]
- pld [r14]
- vld2.8 {q8, q9}, [r12]!
- pld [r14, #32]
- vld2.8 {q10, q11}, [r12]
- vld2.8 {q12, q13}, [r14]!
- vld2.8 {q14, q15}, [r14]
-
- vst1.8 {q0}, [r8]!
- vst1.8 {q2}, [r8]!
- vst1.8 {q4}, [r8]!
- vst1.8 {q6}, [r8]!
- vst1.8 {q8}, [r8]!
- vst1.8 {q10}, [r8]!
- vst1.8 {q12}, [r8]!
- vst1.8 {q14}, [r8]!
-
- vst1.8 {q1}, [r10]!
- vst1.8 {q3}, [r10]!
- vst1.8 {q5}, [r10]!
- vst1.8 {q7}, [r10]!
- vst1.8 {q9}, [r10]!
- vst1.8 {q11}, [r10]!
- add r6, #1
- vst1.8 {q13}, [r10]!
- cmp r6, r4
- vst1.8 {q15}, [r10]!
-
- blt LINEAR_X_SIZE_1024_LOOP
-
- mov r9, #1024
-
-LINEAR_X_SIZE_512:
- sub r10, r3, r9
- cmp r10, #512
- blt LINEAR_X_SIZE_256
-
- mov r6, #0
-LINEAR_X_SIZE_512_LOOP:
- mov r7, #0 @ tiled_offset = 0@
- mov r5, r6, asr #5 @ tiled_y_index = i>>5@
- and r10, r5, #0x1
- cmp r10, #0x1
- bne LINEAR_X_SIZE_512_LOOP_EVEN
-LINEAR_X_SIZE_512_LOOP_ODD:
- sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@
- add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
- mul r7, r7, r10
- mov r5, #8
- mov r5, r5, lsl #11
- add r7, r7, #2 @ tiled_offset = tiled_offset+2@
- mov r10, r9, asr #5
- add r7, r7, r10
- mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r11, r7, #2048
- add r12, r7, #4096
- add r14, r7, #6144
- sub r5, r5, #32
- b LINEAR_X_SIZE_512_LOOP_MEMCPY
-
-LINEAR_X_SIZE_512_LOOP_EVEN:
- add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
- bic r11, r11, #0x1F
- add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
- mul r7, r5, r10
- add r12, r6, #32
- cmp r12, r11
- mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
- movlt r5, #8
- movlt r10, r9, asr #5
- movge r10, r9, asr #6
- add r7, r7, r10, lsl #11
- add r11, r7, #2048
- addlt r12, r7, #12288
- addlt r14, r7, #14336
- movge r5, #4
- addge r12, r7, #4096
- addge r14, r7, #6144
- mov r5, r5, lsl #11
- sub r5, r5, #32
-
-LINEAR_X_SIZE_512_LOOP_MEMCPY:
- and r10, r6, #0x1F
- mov r10, r10, lsl #6
- add r10, r2, r10
-
- add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1)
- add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
- pld [r11]
- vld2.8 {q0, q1}, [r7]!
- pld [r11, #32]
- add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
- vld2.8 {q2, q3}, [r7], r5
- pld [r12]
- vld2.8 {q4, q5}, [r11]!
- pld [r12, #32]
- add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
- vld2.8 {q6, q7}, [r11], r5
- pld [r14]
- mov r10, r3, asr #1
- vld2.8 {q8, q9}, [r12]!
- pld [r14, #32]
- mul r10, r10, r6
- vld2.8 {q10, q11}, [r12], r5
- add r8, r0, r10
- vld2.8 {q12, q13}, [r14]!
- add r8, r8, r9, asr #1
- vld2.8 {q14, q15}, [r14], r5
-
- vst1.8 {q0}, [r8]!
- vst1.8 {q2}, [r8]!
- vst1.8 {q4}, [r8]!
- vst1.8 {q6}, [r8]!
- vst1.8 {q8}, [r8]!
- vst1.8 {q10}, [r8]!
- vst1.8 {q12}, [r8]!
- add r10, r1, r10
- vst1.8 {q14}, [r8]!
-
- add r10, r10, r9, asr #1
- vst1.8 {q1}, [r10]!
- vst1.8 {q3}, [r10]!
- vst1.8 {q5}, [r10]!
- vst1.8 {q7}, [r10]!
- vst1.8 {q9}, [r10]!
- vst1.8 {q11}, [r10]!
- pld [r7]
- vst1.8 {q13}, [r10]!
- pld [r7, #32]
- vst1.8 {q15}, [r10]!
-
- pld [r11]
- vld2.8 {q0, q1}, [r7]!
- pld [r11, #32]
- vld2.8 {q2, q3}, [r7]
- pld [r12]
- vld2.8 {q4, q5}, [r11]!
- pld [r12, #32]
- vld2.8 {q6, q7}, [r11]
- pld [r14]
- vld2.8 {q8, q9}, [r12]!
- pld [r14, #32]
- vld2.8 {q10, q11}, [r12]
- vld2.8 {q12, q13}, [r14]!
- vld2.8 {q14, q15}, [r14]
-
- vst1.8 {q0}, [r8]!
- vst1.8 {q2}, [r8]!
- vst1.8 {q4}, [r8]!
- vst1.8 {q6}, [r8]!
- vst1.8 {q8}, [r8]!
- vst1.8 {q10}, [r8]!
- vst1.8 {q12}, [r8]!
- vst1.8 {q14}, [r8]!
-
- vst1.8 {q1}, [r10]!
- vst1.8 {q3}, [r10]!
- vst1.8 {q5}, [r10]!
- vst1.8 {q7}, [r10]!
- vst1.8 {q9}, [r10]!
- vst1.8 {q11}, [r10]!
- add r6, #1
- vst1.8 {q13}, [r10]!
- cmp r6, r4
- vst1.8 {q15}, [r10]!
-
- blt LINEAR_X_SIZE_512_LOOP
-
- add r9, r9, #512
-
-LINEAR_X_SIZE_256:
- sub r10, r3, r9
- cmp r10, #256
- blt LINEAR_X_SIZE_128
-
- mov r6, #0
-LINEAR_X_SIZE_256_LOOP:
- mov r7, #0 @ tiled_offset = 0@
- mov r5, r6, asr #5 @ tiled_y_index = i>>5@
- and r10, r5, #0x1
- cmp r10, #0x1
- bne LINEAR_X_SIZE_256_LOOP_EVEN
-LINEAR_X_SIZE_256_LOOP_ODD:
- sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@
- add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
- mul r7, r7, r10
- add r7, r7, #2 @ tiled_offset = tiled_offset+2@
- mov r10, r9, asr #5
- add r7, r7, r10
- mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r11, r7, #2048
- add r12, r7, #4096
- add r14, r7, #6144
- b LINEAR_X_SIZE_256_LOOP_MEMCPY
-
-LINEAR_X_SIZE_256_LOOP_EVEN:
- add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
- bic r11, r11, #0x1F
- add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
- mul r7, r5, r10
- mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r12, r6, #32
- cmp r12, r11
- movlt r10, r9, asr #5
- addlt r7, r7, r10, lsl #11
- addlt r11, r7, #2048
- addlt r12, r7, #12288
- addlt r14, r7, #14336
- movge r10, r9, asr #6
- addge r7, r7, r10, lsl #11
- addge r11, r7, #2048
- addge r12, r7, #4096
- addge r14, r7, #6144
-
-LINEAR_X_SIZE_256_LOOP_MEMCPY:
- and r10, r6, #0x1F
- mov r10, r10, lsl #6
- add r10, r2, r10
-
- add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1)
- add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
- pld [r11]
- vld2.8 {q0, q1}, [r7]!
- pld [r11, #32]
- add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
- vld2.8 {q2, q3}, [r7]
- pld [r12]
- vld2.8 {q4, q5}, [r11]!
- pld [r12, #32]
- add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
- vld2.8 {q6, q7}, [r11]
- pld [r14]
- vld2.8 {q8, q9}, [r12]!
- pld [r14, #32]
- mov r10, r3, asr #1
- vld2.8 {q10, q11}, [r12]
- mul r10, r10, r6
- vld2.8 {q12, q13}, [r14]!
- add r8, r0, r10
- vld2.8 {q14, q15}, [r14]
-
- add r8, r8, r9, asr #1
- vst1.8 {q0}, [r8]!
- vst1.8 {q2}, [r8]!
- vst1.8 {q4}, [r8]!
- vst1.8 {q6}, [r8]!
- vst1.8 {q8}, [r8]!
- vst1.8 {q10}, [r8]!
- vst1.8 {q12}, [r8]!
- add r10, r1, r10
- vst1.8 {q14}, [r8]!
-
- add r10, r10, r9, asr #1
- vst1.8 {q1}, [r10]!
- vst1.8 {q3}, [r10]!
- vst1.8 {q5}, [r10]!
- vst1.8 {q7}, [r10]!
- vst1.8 {q9}, [r10]!
- vst1.8 {q11}, [r10]!
- add r6, #1
- vst1.8 {q13}, [r10]!
- cmp r6, r4
- vst1.8 {q15}, [r10]!
- blt LINEAR_X_SIZE_256_LOOP
-
- add r9, r9, #256
-
-LINEAR_X_SIZE_128:
- sub r10, r3, r9
- cmp r10, #128
- blt LINEAR_X_SIZE_64
-
- mov r6, #0
-LINEAR_X_SIZE_128_LOOP:
- mov r7, #0 @ tiled_offset = 0@
- mov r5, r6, asr #5 @ tiled_y_index = i>>5@
- and r10, r5, #0x1
- cmp r10, #0x1
- bne LINEAR_X_SIZE_128_LOOP_EVEN
-LINEAR_X_SIZE_128_LOOP_ODD:
- sub r7, r5, #1 @ tiled_offset = tiled_y_index-1@
- add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
- mul r7, r7, r10
- add r7, r7, #2 @ tiled_offset = tiled_offset+2@
- mov r10, r9, asr #5
- add r7, r7, r10
- mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r11, r7, #2048
- b LINEAR_X_SIZE_128_LOOP_MEMCPY
-
-LINEAR_X_SIZE_128_LOOP_EVEN:
- add r11, r4, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
- bic r11, r11, #0x1F
- add r10, r3, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
- mul r7, r5, r10
- mov r7, r7, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r12, r6, #32
- cmp r12, r11
- movlt r10, r9, asr #5
- movge r10, r9, asr #6
- add r7, r7, r10, lsl #11
- add r11, r7, #2048
-
-LINEAR_X_SIZE_128_LOOP_MEMCPY:
- and r10, r6, #0x1F
- mov r10, r10, lsl #6
- add r10, r2, r10
-
- add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1)
- add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
- pld [r11]
- vld2.8 {q0, q1}, [r7]!
- pld [r11, #32]
- vld2.8 {q2, q3}, [r7]!
- pld [r7]
- vld2.8 {q4, q5}, [r11]!
- mov r10, r3, asr #1
- pld [r7, #32]
- vld2.8 {q6, q7}, [r11]!
- mul r10, r10, r6
- pld [r11]
- vld2.8 {q8, q9}, [r7]!
- add r10, r10, r9, asr #1
- pld [r11, #32]
- vld2.8 {q10, q11}, [r7]!
- add r8, r0, r10
- vld2.8 {q12, q13}, [r11]!
- mov r14, r3, asr #1
- vld2.8 {q14, q15}, [r11]!
-
- sub r14, r14, #48
- vst1.8 {q0}, [r8]!
- vst1.8 {q2}, [r8]!
- vst1.8 {q4}, [r8]!
- vst1.8 {q6}, [r8], r14
- vst1.8 {q8}, [r8]!
- vst1.8 {q10}, [r8]!
- vst1.8 {q12}, [r8]!
- vst1.8 {q14}, [r8]!
-
- add r10, r1, r10
- vst1.8 {q1}, [r10]!
- vst1.8 {q3}, [r10]!
- vst1.8 {q5}, [r10]!
- vst1.8 {q7}, [r10], r14
- vst1.8 {q9}, [r10]!
- vst1.8 {q11}, [r10]!
- add r6, #2
- vst1.8 {q13}, [r10]!
- cmp r6, r4
- vst1.8 {q15}, [r10]!
-
- blt LINEAR_X_SIZE_128_LOOP
-
- add r9, r9, #128
-
-LINEAR_X_SIZE_64:
- sub r10, r3, r9
- cmp r10, #64
- blt LINEAR_X_SIZE_4
-
- mov r5, r9
- mov r6, #0
-
-LINEAR_X_SIZE_64_LOOP:
- bl GET_TILED_OFFSET
-
-LINEAR_X_SIZE_64_LOOP_MEMCPY:
- and r10, r6, #0x1F
- mov r14, r3, asr #1
- mov r10, r10, lsl #6
- sub r14, r14, #16
- add r10, r2, r10
-
- add r7, r7, r10 @ tiled_addr = tiled_src+64*(temp1)
- pld [r7, #64]
- vld2.8 {q0, q1}, [r7]!
- mov r10, r3, asr #1
- pld [r7, #64]
- vld2.8 {q2, q3}, [r7]!
- mul r10, r10, r6
- vld2.8 {q4, q5}, [r7]!
- add r10, r10, r9, asr #1
- vld2.8 {q6, q7}, [r7]!
- add r8, r0, r10
-
- vst1.8 {q0}, [r8]!
- vst1.8 {q2}, [r8], r14
- vst1.8 {q4}, [r8]!
- vst1.8 {q6}, [r8], r14
-
- add r10, r1, r10
- vst1.8 {q1}, [r10]!
- vst1.8 {q3}, [r10], r14
- add r6, #2
- vst1.8 {q5}, [r10]!
- cmp r6, r4
- vst1.8 {q7}, [r10], r14
-
- blt LINEAR_X_SIZE_64_LOOP
-
- add r9, r9, #64
-
-LINEAR_X_SIZE_4:
- cmp r9, r3
- beq RESTORE_REG
-
- mov r6, #0 @ i = 0
-LINEAR_Y_SIZE_4_LOOP:
-
- mov r5, r9 @ j = aligned_x_size
-LINEAR_X_SIZE_4_LOOP:
-
- bl GET_TILED_OFFSET
-
- mov r11, r3, asr #1 @ temp1 = linear_x_size/2
- mul r11, r11, r6 @ temp1 = temp1*(i)
- add r11, r11, r5, asr #1 @ temp1 = temp1+j/2
- mov r12, r3, asr #1 @ temp2 = linear_x_size/2
- sub r12, r12, #1 @ temp2 = linear_x_size-1
-
- add r8, r0, r11 @ linear_addr = linear_dest_u+temp1
- add r11, r1, r11 @ temp1 = linear_dest_v+temp1
- add r7, r2, r7 @ tiled_addr = tiled_src+tiled_addr
- and r14, r6, #0x1F @ temp3 = i&0x1F@
- mov r14, r14, lsl #6 @ temp3 = temp3*64
- add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
- and r14, r5, #0x3F @ temp3 = j&0x3F
- add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
-
- ldrh r10, [r7], #2
- ldrh r14, [r7], #62
- strb r10, [r8], #1
- mov r10, r10, asr #8
- strb r10, [r11], #1
- strb r14, [r8], r12
- mov r14, r14, asr #8
- strb r14, [r11], r12
-
- ldrh r10, [r7], #2
- ldrh r14, [r7], #62
- strb r10, [r8], #1
- mov r10, r10, asr #8
- strb r10, [r11], #1
- strb r14, [r8], r12
- mov r14, r14, asr #8
- strb r14, [r11], r12
-
- add r5, r5, #4 @ j = j+4
- cmp r5, r3 @ j<linear_x_size
- blt LINEAR_X_SIZE_4_LOOP
-
- add r6, r6, #2 @ i = i+4
- cmp r6, r4 @ i<linear_y_size
- blt LINEAR_Y_SIZE_4_LOOP
-
-RESTORE_REG:
- ldmfd sp!, {r4-r12,r15} @ restore registers
-
-GET_TILED_OFFSET:
- stmfd sp!, {r14}
-
- mov r12, r6, asr #5 @ temp2 = i>>5
- mov r11, r5, asr #6 @ temp1 = j>>6
-
- and r14, r12, #0x1 @ if (temp2 & 0x1)
- cmp r14, #0x1
- bne GET_TILED_OFFSET_EVEN_FORMULA_1
-
-GET_TILED_OFFSET_ODD_FORMULA:
- sub r7, r12, #1 @ tiled_addr = temp2-1
- add r14, r3, #127 @ temp3 = linear_x_size+127
- bic r14, r14, #0x7F @ temp3 = (temp3 >>7)<<7
- mov r14, r14, asr #6 @ temp3 = temp3>>6
- mul r7, r7, r14 @ tiled_addr = tiled_addr*temp3
- add r7, r7, r11 @ tiled_addr = tiled_addr+temp1
- add r7, r7, #2 @ tiled_addr = tiled_addr+2
- bic r14, r11, #0x3 @ temp3 = (temp1>>2)<<2
- add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
- mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11
- b GET_TILED_OFFSET_RETURN
-
-GET_TILED_OFFSET_EVEN_FORMULA_1:
- add r14, r4, #31 @ temp3 = linear_y_size+31
- bic r14, r14, #0x1F @ temp3 = (temp3>>5)<<5
- sub r14, r14, #32 @ temp3 = temp3 - 32
- cmp r6, r14 @ if (i<(temp3-32)) {
- bge GET_TILED_OFFSET_EVEN_FORMULA_2
- add r14, r11, #2 @ temp3 = temp1+2
- bic r14, r14, #3 @ temp3 = (temp3>>2)<<2
- add r7, r11, r14 @ tiled_addr = temp1+temp3
- add r14, r3, #127 @ temp3 = linear_x_size+127
- bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7
- mov r14, r14, asr #6 @ temp3 = temp3>>6
- mul r12, r12, r14 @ tiled_y_index = tiled_y_index*temp3
- add r7, r7, r12 @ tiled_addr = tiled_addr+tiled_y_index
- mov r7, r7, lsl #11 @
- b GET_TILED_OFFSET_RETURN
-
-GET_TILED_OFFSET_EVEN_FORMULA_2:
- add r14, r3, #127 @ temp3 = linear_x_size+127
- bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7
- mov r14, r14, asr #6 @ temp3 = temp3>>6
- mul r7, r12, r14 @ tiled_addr = temp2*temp3
- add r7, r7, r11 @ tiled_addr = tiled_addr+temp3
- mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11@
-
-GET_TILED_OFFSET_RETURN:
- ldmfd sp!, {r15} @ restore registers
- .fnend
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s
deleted file mode 100644
index a378579..0000000
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_nv12t_yuv420_y_neon.s
+++ /dev/null
@@ -1,680 +0,0 @@
-/*
- *
- * Copyright 2011 Samsung Electronics S.LSI Co. LTD
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * @file csc_nv12t_yuv420_y_neon.s
- * @brief SEC_OMX specific define
- * @author ShinWon Lee (shinwon.lee@samsung.com)
- * @version 1.0
- * @history
- * 2011.7.01 : Create
- */
-
-/*
- * Converts tiled data to linear.
- * 1. Y of NV12T to Y of YUV420P
- * 2. Y of NV12T to Y of YUV420S
- * 3. UV of NV12T to UV of YUV420S
- *
- * @param yuv420_dest
- * Y or UV plane address of YUV420[out]
- *
- * @param nv12t_src
- * Y or UV plane address of NV12T[in]
- *
- * @param yuv420_width
- * Width of YUV420[in]
- *
- * @param yuv420_height
- * Y: Height of YUV420, UV: Height/2 of YUV420[in]
- */
-
- .arch armv7-a
- .text
- .global csc_tiled_to_linear_neon
- .type csc_tiled_to_linear_neon, %function
-csc_tiled_to_linear_neon:
- .fnstart
-
- @r0 linear_dest
- @r1 tiled_src
- @r2 linear_x_size
- @r3 linear_y_size
- @r4 j
- @r5 i
- @r6 tiled_addr
- @r7 linear_addr
- @r8 aligned_x_size
- @r9 aligned_y_size
- @r10 temp1
- @r11 temp2
- @r12 temp3
- @r14 temp4
-
- stmfd sp!, {r4-r12,r14} @ backup registers
-
- mov r8, #0
- cmp r2, #1024
- blt LINEAR_X_SIZE_512
-
-LINEAR_X_SIZE_1024:
-
- mov r5, #0
-LINEAR_X_SIZE_1024_LOOP:
- mov r6, #0 @ tiled_offset = 0@
- mov r4, r5, asr #5 @ tiled_y_index = i>>5@
- and r10, r4, #0x1
- cmp r10, #0x1
- bne LINEAR_X_SIZE_1024_LOOP_EVEN
-LINEAR_X_SIZE_1024_LOOP_ODD:
- sub r6, r4, #1 @ tiled_offset = tiled_y_index-1@
- add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
- mul r6, r6, r10
- mov r4, #8
- mov r4, r4, lsl #11
- sub r4, r4, #32
- add r6, r6, #2 @ tiled_offset = tiled_offset+2@
- mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r11, r6, #2048
- add r12, r6, #4096
- add r14, r6, #6144
- b LINEAR_X_SIZE_1024_LOOP_MEMCPY
-
-LINEAR_X_SIZE_1024_LOOP_EVEN:
- add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
- bic r11, r11, #0x1F
- add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
- mul r6, r4, r10
- add r12, r5, #32
- cmp r12, r11
- mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r11, r6, #2048
- movlt r4, #8
- addlt r12, r6, #12288
- addlt r14, r6, #14336
- movge r4, #4
- addge r12, r6, #4096
- addge r14, r6, #6144
- mov r4, r4, lsl #11
- sub r4, r4, #32
-
-LINEAR_X_SIZE_1024_LOOP_MEMCPY:
- and r10, r5, #0x1F
- mov r10, r10, lsl #6
- add r10, r1, r10
-
- add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1)
- add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
- pld [r11]
- vld1.8 {q0, q1}, [r6]!
- pld [r11, #32]
- add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
- vld1.8 {q2, q3}, [r6], r4
- pld [r12]
- vld1.8 {q4, q5}, [r11]!
- pld [r12, #32]
- add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
- vld1.8 {q6, q7}, [r11], r4
- pld [r14]
- vld1.8 {q8, q9}, [r12]!
- pld [r14, #32]
- mul r7, r2, r5
- vld1.8 {q10, q11}, [r12], r4
- add r7, r7, r0
- vld1.8 {q12, q13}, [r14]!
- vld1.8 {q14, q15}, [r14], r4
-
- vst1.8 {q0, q1}, [r7]!
- vst1.8 {q2, q3}, [r7]!
- vst1.8 {q4, q5}, [r7]!
- vst1.8 {q6, q7}, [r7]!
- vst1.8 {q8, q9}, [r7]!
- vst1.8 {q10, q11}, [r7]!
- pld [r6]
- vst1.8 {q12, q13}, [r7]!
- pld [r6, #32]
- vst1.8 {q14, q15}, [r7]!
-
- pld [r11]
- vld1.8 {q0, q1}, [r6]!
- pld [r11, #32]
- vld1.8 {q2, q3}, [r6], r4
-
- pld [r12]
- vld1.8 {q4, q5}, [r11]!
- pld [r12, #32]
- vld1.8 {q6, q7}, [r11], r4
- pld [r14]
- vld1.8 {q8, q9}, [r12]!
- pld [r14, #32]
- vld1.8 {q10, q11}, [r12], r4
- vld1.8 {q12, q13}, [r14]!
- vld1.8 {q14, q15}, [r14], r4
-
- vst1.8 {q0, q1}, [r7]!
- vst1.8 {q2, q3}, [r7]!
- vst1.8 {q4, q5}, [r7]!
- vst1.8 {q6, q7}, [r7]!
- vst1.8 {q8, q9}, [r7]!
- vst1.8 {q10, q11}, [r7]!
- pld [r6]
- vst1.8 {q12, q13}, [r7]!
- pld [r6, #32]
- vst1.8 {q14, q15}, [r7]!
-
- pld [r11]
- vld1.8 {q0, q1}, [r6]!
- pld [r11, #32]
- vld1.8 {q2, q3}, [r6], r4
- pld [r12]
- vld1.8 {q4, q5}, [r11]!
- pld [r12, #32]
- vld1.8 {q6, q7}, [r11], r4
- pld [r14]
- vld1.8 {q8, q9}, [r12]!
- pld [r14, #32]
- vld1.8 {q10, q11}, [r12], r4
- vld1.8 {q12, q13}, [r14]!
- vld1.8 {q14, q15}, [r14], r4
-
- vst1.8 {q0, q1}, [r7]!
- vst1.8 {q2, q3}, [r7]!
- vst1.8 {q4, q5}, [r7]!
- vst1.8 {q6, q7}, [r7]!
- vst1.8 {q8, q9}, [r7]!
- vst1.8 {q10, q11}, [r7]!
- pld [r6]
- vst1.8 {q12, q13}, [r7]!
- pld [r6, #32]
- vst1.8 {q14, q15}, [r7]!
-
- pld [r11]
- vld1.8 {q0, q1}, [r6]!
- pld [r11, #32]
- vld1.8 {q2, q3}, [r6]
- pld [r12]
- vld1.8 {q4, q5}, [r11]!
- pld [r12, #32]
- vld1.8 {q6, q7}, [r11]
- pld [r14]
- vld1.8 {q8, q9}, [r12]!
- pld [r14, #32]
- vld1.8 {q10, q11}, [r12]
- vld1.8 {q12, q13}, [r14]!
- vld1.8 {q14, q15}, [r14]
-
- vst1.8 {q0, q1}, [r7]!
- vst1.8 {q2, q3}, [r7]!
- vst1.8 {q4, q5}, [r7]!
- vst1.8 {q6, q7}, [r7]!
- vst1.8 {q8, q9}, [r7]!
- vst1.8 {q10, q11}, [r7]!
- add r5, #1
- vst1.8 {q12, q13}, [r7]!
- cmp r5, r3
- vst1.8 {q14, q15}, [r7]!
-
- blt LINEAR_X_SIZE_1024_LOOP
-
- mov r8, #1024
-
-LINEAR_X_SIZE_512:
-
- sub r14, r2, r8
- cmp r14, #512
- blt LINEAR_X_SIZE_256
-
- mov r5, #0
-LINEAR_X_SIZE_512_LOOP:
- mov r6, #0
- mov r4, r5, asr #5 @ tiled_y_index = i>>5
- and r10, r4, #0x1
- cmp r10, #0x1
- bne LINEAR_X_SIZE_512_LOOP_EVEN
-
-LINEAR_X_SIZE_512_LOOP_ODD:
- sub r6, r4, #1
- add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
- mul r6, r6, r10
- mov r4, #8
- mov r4, r4, lsl #11
- sub r4, r4, #32
- add r6, r6, #2 @ tiled_offset = tiled_offset+2@
- mov r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
- add r6, r6, r10 @ tiled_offset = tiled_offset+temp1@
- mov r6, r6, lsl #11
- add r11, r6, #2048
- add r12, r6, #4096
- add r14, r6, #6144
- b LINEAR_X_SIZE_512_LOOP_MEMCPY
-
-LINEAR_X_SIZE_512_LOOP_EVEN:
- add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
- bic r11, r11, #0x1F
- add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
- mul r6, r4, r10
- add r12, r5, #32
- cmp r12, r11
- mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@
- movlt r4, #8
- movlt r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
- movge r10, r8, asr #6 @ temp1 = aligned_x_size>>6@
- add r6, r6, r10, lsl #11 @ tiled_offset = tiled_offset+(temp1<<11)@
- add r11, r6, #2048
- addlt r12, r6, #12288
- addlt r14, r6, #14336
- movge r4, #4
- addge r12, r6, #4096
- addge r14, r6, #6144
- mov r4, r4, lsl #11
- sub r4, r4, #32
-
-LINEAR_X_SIZE_512_LOOP_MEMCPY:
- and r10, r5, #0x1F
- mov r10, r10, lsl #6
- add r10, r1, r10
-
- add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1)
- add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
- pld [r11]
- vld1.8 {q0, q1}, [r6]!
- pld [r11, #32]
- add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
- vld1.8 {q2, q3}, [r6], r4
- pld [r12]
- vld1.8 {q4, q5}, [r11]!
- pld [r12, #32]
- add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
- vld1.8 {q6, q7}, [r11], r4
- pld [r14]
- vld1.8 {q8, q9}, [r12]!
- pld [r14, #32]
- mul r7, r2, r5
- vld1.8 {q10, q11}, [r12], r4
- add r7, r7, r8
- vld1.8 {q12, q13}, [r14]!
- vld1.8 {q14, q15}, [r14], r4
-
- add r7, r7, r0
- vst1.8 {q0, q1}, [r7]!
- vst1.8 {q2, q3}, [r7]!
- vst1.8 {q4, q5}, [r7]!
- vst1.8 {q6, q7}, [r7]!
- vst1.8 {q8, q9}, [r7]!
- vst1.8 {q10, q11}, [r7]!
- pld [r6]
- vst1.8 {q12, q13}, [r7]!
- pld [r6, #32]
- vst1.8 {q14, q15}, [r7]!
-
- pld [r11]
- vld1.8 {q0, q1}, [r6]!
- pld [r11, #32]
- vld1.8 {q2, q3}, [r6], r4
- pld [r12]
- vld1.8 {q4, q5}, [r11]!
- pld [r12, #32]
- vld1.8 {q6, q7}, [r11], r4
- pld [r14]
- vld1.8 {q8, q9}, [r12]!
- pld [r14, #32]
- vld1.8 {q10, q11}, [r12], r4
- vld1.8 {q12, q13}, [r14]!
- vld1.8 {q14, q15}, [r14], r4
-
- vst1.8 {q0, q1}, [r7]!
- vst1.8 {q2, q3}, [r7]!
- vst1.8 {q4, q5}, [r7]!
- vst1.8 {q6, q7}, [r7]!
- vst1.8 {q8, q9}, [r7]!
- vst1.8 {q10, q11}, [r7]!
- add r5, #1
- vst1.8 {q12, q13}, [r7]!
- cmp r5, r3
- vst1.8 {q14, q15}, [r7]!
-
- blt LINEAR_X_SIZE_512_LOOP
-
- add r8, r8, #512
-
-LINEAR_X_SIZE_256:
-
- sub r14, r2, r8
- cmp r14, #256
- blt LINEAR_X_SIZE_128
-
- mov r5, #0
-LINEAR_X_SIZE_256_LOOP:
- mov r6, #0
- mov r4, r5, asr #5 @ tiled_y_index = i>>5
- and r10, r4, #0x1
- cmp r10, #0x1
- bne LINEAR_X_SIZE_256_LOOP_EVEN
-
-LINEAR_X_SIZE_256_LOOP_ODD:
- sub r6, r4, #1
- add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
- mul r6, r6, r10
- add r6, r6, #2 @ tiled_offset = tiled_offset+2@
- mov r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
- add r6, r6, r10 @ tiled_offset = tiled_offset+temp1@
- mov r6, r6, lsl #11
- add r11, r6, #2048
- add r12, r6, #4096
- add r14, r6, #6144
- b LINEAR_X_SIZE_256_LOOP_MEMCPY
-
-LINEAR_X_SIZE_256_LOOP_EVEN:
- add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
- bic r11, r11, #0x1F
- add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
- mul r6, r4, r10
- mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r12, r5, #32
- cmp r12, r11
- movlt r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
- movge r10, r8, asr #6 @ temp1 = aligned_x_size>>6@
- add r6, r6, r10, lsl #11 @ tiled_offset = tiled_offset+(temp1<<11)@
- add r11, r6, #2048
- addlt r12, r6, #12288
- addlt r14, r6, #14336
- addge r12, r6, #4096
- addge r14, r6, #6144
-
-LINEAR_X_SIZE_256_LOOP_MEMCPY:
- and r10, r5, #0x1F
- mov r10, r10, lsl #6
- add r10, r1, r10
-
- add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1)
- add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
- pld [r11]
- vld1.8 {q0, q1}, [r6]!
- pld [r11, #32]
- add r12, r12, r10 @ tiled_addr2 = tiled_src+64*(temp1)
- vld1.8 {q2, q3}, [r6]
- pld [r12]
- vld1.8 {q4, q5}, [r11]!
- pld [r12, #32]
- add r14, r14, r10 @ tiled_addr3 = tiled_src+64*(temp1)
- vld1.8 {q6, q7}, [r11]
- pld [r14]
- mul r7, r2, r5
- vld1.8 {q8, q9}, [r12]!
- pld [r14, #32]
- add r7, r7, r8
- vld1.8 {q10, q11}, [r12]
- add r7, r7, r0
- vld1.8 {q12, q13}, [r14]!
- vld1.8 {q14, q15}, [r14]
-
- vst1.8 {q0, q1}, [r7]!
- vst1.8 {q2, q3}, [r7]!
- vst1.8 {q4, q5}, [r7]!
- vst1.8 {q6, q7}, [r7]!
- vst1.8 {q8, q9}, [r7]!
- vst1.8 {q10, q11}, [r7]!
- add r5, #1
- vst1.8 {q12, q13}, [r7]!
- cmp r5, r3
- vst1.8 {q14, q15}, [r7]!
-
- blt LINEAR_X_SIZE_256_LOOP
-
- add r8, r8, #256
-
-LINEAR_X_SIZE_128:
-
- sub r14, r2, r8
- cmp r14, #128
- blt LINEAR_X_SIZE_64
-
- mov r5, #0
-LINEAR_X_SIZE_128_LOOP:
- mov r6, #0
- mov r4, r5, asr #5 @ tiled_y_index = i>>5
- and r10, r4, #0x1
- cmp r10, #0x1
- bne LINEAR_X_SIZE_128_LOOP_EVEN
-
-LINEAR_X_SIZE_128_LOOP_ODD:
- sub r6, r4, #1
- add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_offset*(temp1>>6)@
- mul r6, r6, r10
- add r6, r6, #2 @ tiled_offset = tiled_offset+2@
- mov r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
- add r6, r6, r10 @ tiled_offset = tiled_offset+temp1@
- mov r6, r6, lsl #11
- add r11, r6, #2048
- b LINEAR_X_SIZE_128_LOOP_MEMCPY
-
-LINEAR_X_SIZE_128_LOOP_EVEN:
- add r11, r3, #31 @ temp2 = ((linear_y_size+31)>>5)<<5@
- bic r11, r11, #0x1F
- add r10, r2, #127 @ temp1 = ((linear_x_size+127)>>7)<<7@
- bic r10, #0x7F
- mov r10, r10, asr #6 @ tiled_offset = tiled_y_index*(temp1>>6)@
- mul r6, r4, r10
- mov r6, r6, lsl #11 @ tiled_offset = tiled_offset<<11@
- add r12, r5, #32
- cmp r12, r11
- movlt r10, r8, asr #5 @ temp1 = aligned_x_size>>5@
- movge r10, r8, asr #6 @ temp1 = aligned_x_size>>6@
- add r6, r6, r10, lsl #11 @ tiled_offset = tiled_offset+(temp1<<11)@
- add r11, r6, #2048
-
-LINEAR_X_SIZE_128_LOOP_MEMCPY:
- and r10, r5, #0x1F
- mov r10, r10, lsl #6
- add r10, r1, r10
-
- add r6, r6, r10 @ tiled_addr = tiled_src+64*(temp1)
- add r11, r11, r10 @ tiled_addr1 = tiled_src+64*(temp1)
- pld [r6, #64]
- vld1.8 {q0, q1}, [r6]!
- pld [r6, #64]
- vld1.8 {q2, q3}, [r6]!
- mul r7, r2, r5
- pld [r11]
- vld1.8 {q4, q5}, [r6]!
- add r7, r7, r8
- pld [r11, #32]
- vld1.8 {q6, q7}, [r6]
- add r7, r7, r0
- pld [r11, #64]
- vld1.8 {q8, q9}, [r11]!
- pld [r11, #64]
- vld1.8 {q10, q11}, [r11]!
- vld1.8 {q12, q13}, [r11]!
- vld1.8 {q14, q15}, [r11]
-
- sub r9, r2, #96
- vst1.8 {q0, q1}, [r7]!
- vst1.8 {q2, q3}, [r7]!
- vst1.8 {q8, q9}, [r7]!
- vst1.8 {q10, q11}, [r7], r9
- vst1.8 {q4, q5}, [r7]!
- vst1.8 {q6, q7}, [r7]!
- add r5, #2
- vst1.8 {q12, q13}, [r7]!
- cmp r5, r3
- vst1.8 {q14, q15}, [r7]
-
- blt LINEAR_X_SIZE_128_LOOP
-
- add r8, r8, #128
-
-LINEAR_X_SIZE_64:
-
- sub r14, r2, r8
- cmp r14, #64
- blt LINEAR_X_SIZE_4
-
- mov r5, #0
- mov r4, r8
-
-LINEAR_X_SIZE_64_LOOP:
-
- bl GET_TILED_OFFSET
-
- add r6, r1, r6 @ tiled_addr = tiled_src+tiled_addr
- and r11, r5, #0x1F @ temp2 = i&0x1F
- mov r11, r11, lsl #6 @ temp2 = 64*temp2
- add r6, r6, r11 @ tiled_addr = tiled_addr+temp2
-
- pld [r6, #64]
- vld1.8 {q0, q1}, [r6]! @ store {tiled_addr}
- mul r10, r2, r5 @ temp1 = linear_x_size*(i)
- pld [r6, #64]
- vld1.8 {q2, q3}, [r6]!
- pld [r6, #64]
- vld1.8 {q4, q5}, [r6]! @ store {tiled_addr+64*1}
- pld [r6, #64]
- vld1.8 {q6, q7}, [r6]!
- pld [r6, #64]
- vld1.8 {q8, q9}, [r6]! @ store {tiled_addr+64*2}
- pld [r6, #64]
- vld1.8 {q10, q11}, [r6]!
- add r7, r0, r4 @ linear_addr = linear_dest+j
- vld1.8 {q12, q13}, [r6]! @ store {tiled_addr+64*3}
- add r7, r7, r10 @ linear_addr = linear_addr+temp1
- vld1.8 {q14, q15}, [r6]!
- sub r10, r2, #32 @ temp1 = linear_x_size-32
-
- vst1.8 {q0, q1}, [r7]! @ load {linear_src, 64}
- vst1.8 {q2, q3}, [r7], r10
- vst1.8 {q4, q5}, [r7]! @ load {linear_src+linear_x_size*1, 64}
- vst1.8 {q6, q7}, [r7], r10
- vst1.8 {q8, q9}, [r7]! @ load {linear_src+linear_x_size*2, 64}
- vst1.8 {q10, q11}, [r7], r10
- add r5, #4
- vst1.8 {q12, q13}, [r7]! @ load {linear_src+linear_x_size*3, 64}
- cmp r5, r3
- vst1.8 {q14, q15}, [r7], r10
-
- blt LINEAR_X_SIZE_64_LOOP
-
- add r8, r8, #64
-
-LINEAR_X_SIZE_4:
- cmp r8, r2
- beq RESTORE_REG
-
- mov r5, #0 @ i = 0
-LINEAR_Y_SIZE_4_LOOP:
-
- mov r4, r8 @ j = aligned_x_size
-LINEAR_X_SIZE_4_LOOP:
-
- bl GET_TILED_OFFSET
-
- and r10, r5, #0x1F @ temp1 = i&0x1F
- and r11, r4, #0x3F @ temp2 = j&0x3F
-
- add r6, r6, r1
- add r6, r6, r11
- add r6, r6, r10, lsl #6
-
- ldr r10, [r6], #64
- add r7, r0, r4
- ldr r11, [r6], #64
- mul r9, r2, r5
- ldr r12, [r6], #64
- add r7, r7, r9
- ldr r14, [r6], #64
-
- str r10, [r7], r2
- str r11, [r7], r2
- str r12, [r7], r2
- str r14, [r7], r2
-
- add r4, r4, #4 @ j = j+4
- cmp r4, r2 @ j<linear_x_size
- blt LINEAR_X_SIZE_4_LOOP
-
- add r5, r5, #4 @ i = i+4
- cmp r5, r3 @ i<linear_y_size
- blt LINEAR_Y_SIZE_4_LOOP
-
-RESTORE_REG:
- ldmfd sp!, {r4-r12,r15} @ restore registers
-
-GET_TILED_OFFSET:
-
- mov r11, r5, asr #5 @ temp2 = i>>5
- mov r10, r4, asr #6 @ temp1 = j>>6
-
- and r12, r11, #0x1 @ if (temp2 & 0x1)
- cmp r12, #0x1
- bne GET_TILED_OFFSET_EVEN_FORMULA_1
-
-GET_TILED_OFFSET_ODD_FORMULA:
- sub r6, r11, #1 @ tiled_addr = temp2-1
- add r12, r2, #127 @ temp3 = linear_x_size+127
- bic r12, r12, #0x7F @ temp3 = (temp3 >>7)<<7
- mov r12, r12, asr #6 @ temp3 = temp3>>6
- mul r6, r6, r12 @ tiled_addr = tiled_addr*temp3
- add r6, r6, r10 @ tiled_addr = tiled_addr+temp1
- add r6, r6, #2 @ tiled_addr = tiled_addr+2
- bic r12, r10, #0x3 @ temp3 = (temp1>>2)<<2
- add r6, r6, r12 @ tiled_addr = tiled_addr+temp3
- mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11
- b GET_TILED_OFFSET_RETURN
-
-GET_TILED_OFFSET_EVEN_FORMULA_1:
- add r12, r3, #31 @ temp3 = linear_y_size+31
- bic r12, r12, #0x1F @ temp3 = (temp3>>5)<<5
- sub r12, r12, #32 @ temp3 = temp3 - 32
- cmp r5, r12 @ if (i<(temp3-32)) {
- bge GET_TILED_OFFSET_EVEN_FORMULA_2
- add r12, r10, #2 @ temp3 = temp1+2
- bic r12, r12, #3 @ temp3 = (temp3>>2)<<2
- add r6, r10, r12 @ tiled_addr = temp1+temp3
- add r12, r2, #127 @ temp3 = linear_x_size+127
- bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7
- mov r12, r12, asr #6 @ temp3 = temp3>>6
- mul r11, r11, r12 @ tiled_y_index = tiled_y_index*temp3
- add r6, r6, r11 @ tiled_addr = tiled_addr+tiled_y_index
- mov r6, r6, lsl #11 @
- b GET_TILED_OFFSET_RETURN
-
-GET_TILED_OFFSET_EVEN_FORMULA_2:
- add r12, r2, #127 @ temp3 = linear_x_size+127
- bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7
- mov r12, r12, asr #6 @ temp3 = temp3>>6
- mul r6, r11, r12 @ tiled_addr = temp2*temp3
- add r6, r6, r10 @ tiled_addr = tiled_addr+temp3
- mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11@
-
-GET_TILED_OFFSET_RETURN:
- mov pc, lr
- .fnend
-
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s
deleted file mode 100644
index 114e9bb..0000000
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_uv_neon.s
+++ /dev/null
@@ -1,573 +0,0 @@
-/*
- *
- * Copyright 2011 Samsung Electronics S.LSI Co. LTD
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * @file csc_yuv420_nv12t_uv_neon.s
- * @brief SEC_OMX specific define
- * @author ShinWon Lee (shinwon.lee@samsung.com)
- * @version 1.0
- * @history
- * 2011.7.01 : Create
- */
-
-/*
- * Converts and Interleaves linear to tiled
- * 1. UV of YUV420P to UV of NV12T
- *
- * @param nv12t_uv_dest
- * UV plane address of NV12T[out]
- *
- * @param yuv420p_u_src
- * U plane address of YUV420P[in]
- *
- * @param yuv420p_v_src
- * V plane address of YUV420P[in]
- *
- * @param yuv420_width
- * Width of YUV420[in]
- *
- * @param yuv420_uv_height
- * Height/2 of YUV420[in]
- */
-
- .arch armv7-a
- .text
- .global csc_linear_to_tiled_interleave_neon
- .type csc_linear_to_tiled_interleave_neon, %function
-csc_linear_to_tiled_interleave_neon:
- .fnstart
-
- @r0 tiled_dest
- @r1 linear_src_u
- @r2 linear_src_v
- @r3 linear_x_size
- @r4 linear_y_size
- @r5 j
- @r6 i
- @r7 tiled_addr
- @r8 linear_addr
- @r9 aligned_x_size
- @r10 aligned_y_size
- @r11 temp1
- @r12 temp2
- @r14 temp3
-
- stmfd sp!, {r4-r12,r14} @ backup registers
-
- ldr r4, [sp, #40] @ load linear_y_size to r4
-
- bic r10, r4, #0x1F @ aligned_y_size = (linear_y_size>>5)<<5
- bic r9, r3, #0x3F @ aligned_x_size = (linear_x_size>>6)<<6
-
- mov r6, #0 @ i = 0
-LOOP_ALIGNED_Y_SIZE:
-
- mov r5, #0 @ j = 0
-LOOP_ALIGNED_X_SIZE:
-
- bl GET_TILED_OFFSET
-
- mov r11, r3, asr #1 @ temp1 = linear_x_size/2
- mul r11, r11, r6 @ temp1 = temp1*(i)
- add r11, r11, r5, asr #1 @ temp1 = temp1+j/2
- mov r12, r3, asr #1 @ temp2 = linear_x_size/2
- sub r12, r12, #16 @ temp2 = linear_x_size-16
-
- add r8, r1, r11 @ linear_addr = linear_src_u+temp1
- add r11, r2, r11 @ temp1 = linear_src_v+temp1
- add r7, r0, r7 @ tiled_addr = tiled_dest+tiled_addr
-
- pld [r8, r3]
- vld1.8 {q0}, [r8]!
- vld1.8 {q2}, [r8], r12
- pld [r11, r3]
- vld1.8 {q1}, [r11]!
- vld1.8 {q3}, [r11], r12
- pld [r8, r3]
- vld1.8 {q4}, [r8]!
- vld1.8 {q6}, [r8], r12
- pld [r11, r3]
- vld1.8 {q5}, [r11]!
- vld1.8 {q7}, [r11], r12
- pld [r8, r3]
- vld1.8 {q8}, [r8]!
- vld1.8 {q10}, [r8], r12
- pld [r11, r3]
- vld1.8 {q9}, [r11]!
- vld1.8 {q11}, [r11], r12
- pld [r8, r3]
- vld1.8 {q12}, [r8]!
- vld1.8 {q14}, [r8], r12
- pld [r11, r3]
- vld1.8 {q13}, [r11]!
- vld1.8 {q15}, [r11], r12
-
- vst2.8 {q0, q1}, [r7]!
- vst2.8 {q2, q3}, [r7]!
- vst2.8 {q4, q5}, [r7]!
- vst2.8 {q6, q7}, [r7]!
- vst2.8 {q8, q9}, [r7]!
- vst2.8 {q10, q11}, [r7]!
- vst2.8 {q12, q13}, [r7]!
- vst2.8 {q14, q15}, [r7]!
-
- pld [r8, r3]
- vld1.8 {q0}, [r8]!
- vld1.8 {q2}, [r8], r12
- pld [r11, r3]
- vld1.8 {q1}, [r11]!
- vld1.8 {q3}, [r11], r12
- pld [r8, r3]
- vld1.8 {q4}, [r8]!
- vld1.8 {q6}, [r8], r12
- pld [r11, r3]
- vld1.8 {q5}, [r11]!
- vld1.8 {q7}, [r11], r12
- pld [r8, r3]
- vld1.8 {q8}, [r8]!
- vld1.8 {q10}, [r8], r12
- pld [r11, r3]
- vld1.8 {q9}, [r11]!
- vld1.8 {q11}, [r11], r12
- pld [r8, r3]
- vld1.8 {q12}, [r8]!
- vld1.8 {q14}, [r8], r12
- pld [r11, r3]
- vld1.8 {q13}, [r11]!
- vld1.8 {q15}, [r11], r12
-
- vst2.8 {q0, q1}, [r7]!
- vst2.8 {q2, q3}, [r7]!
- vst2.8 {q4, q5}, [r7]!
- vst2.8 {q6, q7}, [r7]!
- vst2.8 {q8, q9}, [r7]!
- vst2.8 {q10, q11}, [r7]!
- vst2.8 {q12, q13}, [r7]!
- vst2.8 {q14, q15}, [r7]!
-
- pld [r8, r3]
- vld1.8 {q0}, [r8]!
- vld1.8 {q2}, [r8], r12
- pld [r11, r3]
- vld1.8 {q1}, [r11]!
- vld1.8 {q3}, [r11], r12
- pld [r8, r3]
- vld1.8 {q4}, [r8]!
- vld1.8 {q6}, [r8], r12
- pld [r11, r3]
- vld1.8 {q5}, [r11]!
- vld1.8 {q7}, [r11], r12
- pld [r8, r3]
- vld1.8 {q8}, [r8]!
- vld1.8 {q10}, [r8], r12
- pld [r11, r3]
- vld1.8 {q9}, [r11]!
- vld1.8 {q11}, [r11], r12
- pld [r8, r3]
- vld1.8 {q12}, [r8]!
- vld1.8 {q14}, [r8], r12
- pld [r11, r3]
- vld1.8 {q13}, [r11]!
- vld1.8 {q15}, [r11], r12
-
- vst2.8 {q0, q1}, [r7]!
- vst2.8 {q2, q3}, [r7]!
- vst2.8 {q4, q5}, [r7]!
- vst2.8 {q6, q7}, [r7]!
- vst2.8 {q8, q9}, [r7]!
- vst2.8 {q10, q11}, [r7]!
- vst2.8 {q12, q13}, [r7]!
- vst2.8 {q14, q15}, [r7]!
-
- pld [r8, r3]
- vld1.8 {q0}, [r8]!
- vld1.8 {q2}, [r8], r12
- pld [r11, r3]
- vld1.8 {q1}, [r11]!
- vld1.8 {q3}, [r11], r12
- pld [r8, r3]
- vld1.8 {q4}, [r8]!
- vld1.8 {q6}, [r8], r12
- pld [r11, r3]
- vld1.8 {q5}, [r11]!
- vld1.8 {q7}, [r11], r12
- pld [r8, r3]
- vld1.8 {q8}, [r8]!
- vld1.8 {q10}, [r8], r12
- pld [r11, r3]
- vld1.8 {q9}, [r11]!
- vld1.8 {q11}, [r11], r12
- pld [r8, r3]
- vld1.8 {q12}, [r8]!
- vld1.8 {q14}, [r8], r12
- pld [r11, r3]
- vld1.8 {q13}, [r11]!
- vld1.8 {q15}, [r11], r12
-
- vst2.8 {q0, q1}, [r7]!
- vst2.8 {q2, q3}, [r7]!
- vst2.8 {q4, q5}, [r7]!
- vst2.8 {q6, q7}, [r7]!
- vst2.8 {q8, q9}, [r7]!
- vst2.8 {q10, q11}, [r7]!
- vst2.8 {q12, q13}, [r7]!
- vst2.8 {q14, q15}, [r7]!
-
- pld [r8, r3]
- vld1.8 {q0}, [r8]!
- vld1.8 {q2}, [r8], r12
- pld [r11, r3]
- vld1.8 {q1}, [r11]!
- vld1.8 {q3}, [r11], r12
- pld [r8, r3]
- vld1.8 {q4}, [r8]!
- vld1.8 {q6}, [r8], r12
- pld [r11, r3]
- vld1.8 {q5}, [r11]!
- vld1.8 {q7}, [r11], r12
- pld [r8, r3]
- vld1.8 {q8}, [r8]!
- vld1.8 {q10}, [r8], r12
- pld [r11, r3]
- vld1.8 {q9}, [r11]!
- vld1.8 {q11}, [r11], r12
- pld [r8, r3]
- vld1.8 {q12}, [r8]!
- vld1.8 {q14}, [r8], r12
- pld [r11, r3]
- vld1.8 {q13}, [r11]!
- vld1.8 {q15}, [r11], r12
-
- vst2.8 {q0, q1}, [r7]!
- vst2.8 {q2, q3}, [r7]!
- vst2.8 {q4, q5}, [r7]!
- vst2.8 {q6, q7}, [r7]!
- vst2.8 {q8, q9}, [r7]!
- vst2.8 {q10, q11}, [r7]!
- vst2.8 {q12, q13}, [r7]!
- vst2.8 {q14, q15}, [r7]!
-
- pld [r8, r3]
- vld1.8 {q0}, [r8]!
- vld1.8 {q2}, [r8], r12
- pld [r11, r3]
- vld1.8 {q1}, [r11]!
- vld1.8 {q3}, [r11], r12
- pld [r8, r3]
- vld1.8 {q4}, [r8]!
- vld1.8 {q6}, [r8], r12
- pld [r11, r3]
- vld1.8 {q5}, [r11]!
- vld1.8 {q7}, [r11], r12
- pld [r8, r3]
- vld1.8 {q8}, [r8]!
- vld1.8 {q10}, [r8], r12
- pld [r11, r3]
- vld1.8 {q9}, [r11]!
- vld1.8 {q11}, [r11], r12
- pld [r8, r3]
- vld1.8 {q12}, [r8]!
- vld1.8 {q14}, [r8], r12
- pld [r11, r3]
- vld1.8 {q13}, [r11]!
- vld1.8 {q15}, [r11], r12
-
- vst2.8 {q0, q1}, [r7]!
- vst2.8 {q2, q3}, [r7]!
- vst2.8 {q4, q5}, [r7]!
- vst2.8 {q6, q7}, [r7]!
- vst2.8 {q8, q9}, [r7]!
- vst2.8 {q10, q11}, [r7]!
- vst2.8 {q12, q13}, [r7]!
- vst2.8 {q14, q15}, [r7]!
-
- pld [r8, r3]
- vld1.8 {q0}, [r8]!
- vld1.8 {q2}, [r8], r12
- pld [r11, r3]
- vld1.8 {q1}, [r11]!
- vld1.8 {q3}, [r11], r12
- pld [r8, r3]
- vld1.8 {q4}, [r8]!
- vld1.8 {q6}, [r8], r12
- pld [r11, r3]
- vld1.8 {q5}, [r11]!
- vld1.8 {q7}, [r11], r12
- pld [r8, r3]
- vld1.8 {q8}, [r8]!
- vld1.8 {q10}, [r8], r12
- pld [r11, r3]
- vld1.8 {q9}, [r11]!
- vld1.8 {q11}, [r11], r12
- pld [r8, r3]
- vld1.8 {q12}, [r8]!
- vld1.8 {q14}, [r8], r12
- pld [r11, r3]
- vld1.8 {q13}, [r11]!
- vld1.8 {q15}, [r11], r12
-
- vst2.8 {q0, q1}, [r7]!
- vst2.8 {q2, q3}, [r7]!
- vst2.8 {q4, q5}, [r7]!
- vst2.8 {q6, q7}, [r7]!
- vst2.8 {q8, q9}, [r7]!
- vst2.8 {q10, q11}, [r7]!
- vst2.8 {q12, q13}, [r7]!
- vst2.8 {q14, q15}, [r7]!
-
- pld [r8, r3]
- vld1.8 {q0}, [r8]!
- vld1.8 {q2}, [r8], r12
- pld [r11, r3]
- vld1.8 {q1}, [r11]!
- vld1.8 {q3}, [r11], r12
- pld [r8, r3]
- vld1.8 {q4}, [r8]!
- vld1.8 {q6}, [r8], r12
- pld [r11, r3]
- vld1.8 {q5}, [r11]!
- vld1.8 {q7}, [r11], r12
- pld [r8, r3]
- vld1.8 {q8}, [r8]!
- vld1.8 {q10}, [r8], r12
- pld [r11, r3]
- vld1.8 {q9}, [r11]!
- vld1.8 {q11}, [r11], r12
- pld [r8, r3]
- vld1.8 {q12}, [r8]!
- vld1.8 {q14}, [r8], r12
- pld [r11, r3]
- vld1.8 {q13}, [r11]!
- vld1.8 {q15}, [r11], r12
-
- vst2.8 {q0, q1}, [r7]!
- vst2.8 {q2, q3}, [r7]!
- vst2.8 {q4, q5}, [r7]!
- vst2.8 {q6, q7}, [r7]!
- vst2.8 {q8, q9}, [r7]!
- vst2.8 {q10, q11}, [r7]!
- vst2.8 {q12, q13}, [r7]!
- vst2.8 {q14, q15}, [r7]!
-
- add r5, r5, #64 @ j = j+64
- cmp r5, r9 @ j<aligned_x_size
- blt LOOP_ALIGNED_X_SIZE
-
- add r6, r6, #32 @ i = i+32
- cmp r6, r10 @ i<aligned_y_size
- blt LOOP_ALIGNED_Y_SIZE
-
- ldr r4, [sp, #40] @ load linear_y_size to r4
- cmp r6, r4
- beq LOOP_LINEAR_Y_SIZE_2_START
-
-LOOP_LINEAR_Y_SIZE_1:
-
- mov r5, #0 @ j = 0
-LOOP_ALIGNED_X_SIZE_1:
-
- bl GET_TILED_OFFSET
-
- mov r11, r3, asr #1 @ temp1 = linear_x_size/2
- mul r11, r11, r6 @ temp1 = temp1*(i)
- add r11, r11, r5, asr #1 @ temp1 = temp1+j/2
- mov r12, r3, asr #1 @ temp2 = linear_x_size/2
- sub r12, r12, #16 @ temp2 = linear_x_size-16
-
- add r8, r1, r11 @ linear_addr = linear_src_u+temp1
- add r11, r2, r11 @ temp1 = linear_src_v+temp1
- add r7, r0, r7 @ tiled_addr = tiled_dest+tiled_addr
- and r14, r6, #0x1F @ temp3 = i&0x1F@
- mov r14, r14, lsl #6 @ temp3 = temp3*64
- add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
-
- pld [r8, r3]
- vld1.8 {q0}, [r8]!
- vld1.8 {q2}, [r8], r12
- pld [r11, r3]
- vld1.8 {q1}, [r11]!
- vld1.8 {q3}, [r11], r12
- pld [r8, r3]
- vld1.8 {q4}, [r8]!
- vld1.8 {q6}, [r8], r12
- pld [r11, r3]
- vld1.8 {q5}, [r11]!
- vld1.8 {q7}, [r11], r12
- pld [r8, r3]
- vld1.8 {q8}, [r8]!
- vld1.8 {q10}, [r8], r12
- pld [r11, r3]
- vld1.8 {q9}, [r11]!
- vld1.8 {q11}, [r11], r12
- pld [r8, r3]
- vld1.8 {q12}, [r8]!
- vld1.8 {q14}, [r8], r12
- pld [r11, r3]
- vld1.8 {q13}, [r11]!
- vld1.8 {q15}, [r11], r12
-
- vst2.8 {q0, q1}, [r7]! @ store {tiled_addr}
- vst2.8 {q2, q3}, [r7]!
- vst2.8 {q4, q5}, [r7]! @ store {tiled_addr+64*1}
- vst2.8 {q6, q7}, [r7]!
- vst2.8 {q8, q9}, [r7]! @ store {tiled_addr+64*2}
- vst2.8 {q10, q11}, [r7]!
- vst2.8 {q12, q13}, [r7]! @ store {tiled_addr+64*3}
- vst2.8 {q14, q15}, [r7]!
-
- add r5, r5, #64 @ j = j+64
- cmp r5, r9 @ j<aligned_x_size
- blt LOOP_ALIGNED_X_SIZE_1
-
- add r6, r6, #4 @ i = i+4
- cmp r6, r4 @ i<linear_y_size
- blt LOOP_LINEAR_Y_SIZE_1
-
-LOOP_LINEAR_Y_SIZE_2_START:
- cmp r5, r3
- beq RESTORE_REG
-
- mov r6, #0 @ i = 0
-LOOP_LINEAR_Y_SIZE_2:
-
- mov r5, r9 @ j = aligned_x_size
-LOOP_LINEAR_X_SIZE_2:
-
- bl GET_TILED_OFFSET
-
- mov r11, r3, asr #1 @ temp1 = linear_x_size/2
- mul r11, r11, r6 @ temp1 = temp1*(i)
- add r11, r11, r5, asr #1 @ temp1 = temp1+j/2
- mov r12, r3, asr #1 @ temp2 = linear_x_size/2
- sub r12, r12, #1 @ temp2 = linear_x_size-1
-
- add r8, r1, r11 @ linear_addr = linear_src_u+temp1
- add r11, r2, r11 @ temp1 = linear_src_v+temp1
- add r7, r0, r7 @ tiled_addr = tiled_dest+tiled_addr
- and r14, r6, #0x1F @ temp3 = i&0x1F@
- mov r14, r14, lsl #6 @ temp3 = temp3*64
- add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
- and r14, r5, #0x3F @ temp3 = j&0x3F
- add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
-
- ldrb r10, [r8], #1
- ldrb r14, [r11], #1
- mov r14, r14, lsl #8
- orr r10, r10, r14
- strh r10, [r7], #2
- ldrb r10, [r8], r12
- ldrb r14, [r11], r12
- mov r14, r14, lsl #8
- orr r10, r10, r14
- strh r10, [r7], #62
-
- ldrb r10, [r8], #1
- ldrb r14, [r11], #1
- mov r14, r14, lsl #8
- orr r10, r10, r14
- strh r10, [r7], #2
- ldrb r10, [r8], r12
- ldrb r14, [r11], r12
- mov r14, r14, lsl #8
- orr r10, r10, r14
- strh r10, [r7], #62
-
- ldrb r10, [r8], #1
- ldrb r14, [r11], #1
- mov r14, r14, lsl #8
- orr r10, r10, r14
- strh r10, [r7], #2
- ldrb r10, [r8], r12
- ldrb r14, [r11], r12
- mov r14, r14, lsl #8
- orr r10, r10, r14
- strh r10, [r7], #62
-
- ldrb r10, [r8], #1
- ldrb r14, [r11], #1
- mov r14, r14, lsl #8
- orr r10, r10, r14
- strh r10, [r7], #2
- ldrb r10, [r8], r12
- ldrb r14, [r11], r12
- mov r14, r14, lsl #8
- orr r10, r10, r14
- strh r10, [r7], #62
-
- add r5, r5, #4 @ j = j+4
- cmp r5, r3 @ j<linear_x_size
- blt LOOP_LINEAR_X_SIZE_2
-
- add r6, r6, #4 @ i = i+4
- cmp r6, r4 @ i<linear_y_size
- blt LOOP_LINEAR_Y_SIZE_2
-
-RESTORE_REG:
- ldmfd sp!, {r4-r12,r15} @ restore registers
-
-GET_TILED_OFFSET:
- stmfd sp!, {r14}
-
- mov r12, r6, asr #5 @ temp2 = i>>5
- mov r11, r5, asr #6 @ temp1 = j>>6
-
- and r14, r12, #0x1 @ if (temp2 & 0x1)
- cmp r14, #0x1
- bne GET_TILED_OFFSET_EVEN_FORMULA_1
-
-GET_TILED_OFFSET_ODD_FORMULA:
- sub r7, r12, #1 @ tiled_addr = temp2-1
- add r14, r3, #127 @ temp3 = linear_x_size+127
- bic r14, r14, #0x7F @ temp3 = (temp3 >>7)<<7
- mov r14, r14, asr #6 @ temp3 = temp3>>6
- mul r7, r7, r14 @ tiled_addr = tiled_addr*temp3
- add r7, r7, r11 @ tiled_addr = tiled_addr+temp1
- add r7, r7, #2 @ tiled_addr = tiled_addr+2
- bic r14, r11, #0x3 @ temp3 = (temp1>>2)<<2
- add r7, r7, r14 @ tiled_addr = tiled_addr+temp3
- mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11
- b GET_TILED_OFFSET_RETURN
-
-GET_TILED_OFFSET_EVEN_FORMULA_1:
- add r14, r4, #31 @ temp3 = linear_y_size+31
- bic r14, r14, #0x1F @ temp3 = (temp3>>5)<<5
- sub r14, r14, #32 @ temp3 = temp3 - 32
- cmp r6, r14 @ if (i<(temp3-32)) {
- bge GET_TILED_OFFSET_EVEN_FORMULA_2
- add r14, r11, #2 @ temp3 = temp1+2
- bic r14, r14, #3 @ temp3 = (temp3>>2)<<2
- add r7, r11, r14 @ tiled_addr = temp1+temp3
- add r14, r3, #127 @ temp3 = linear_x_size+127
- bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7
- mov r14, r14, asr #6 @ temp3 = temp3>>6
- mul r12, r12, r14 @ tiled_y_index = tiled_y_index*temp3
- add r7, r7, r12 @ tiled_addr = tiled_addr+tiled_y_index
- mov r7, r7, lsl #11 @
- b GET_TILED_OFFSET_RETURN
-
-GET_TILED_OFFSET_EVEN_FORMULA_2:
- add r14, r3, #127 @ temp3 = linear_x_size+127
- bic r14, r14, #0x7F @ temp3 = (temp3>>7)<<7
- mov r14, r14, asr #6 @ temp3 = temp3>>6
- mul r7, r12, r14 @ tiled_addr = temp2*temp3
- add r7, r7, r11 @ tiled_addr = tiled_addr+temp3
- mov r7, r7, lsl #11 @ tiled_addr = tiled_addr<<11@
-
-GET_TILED_OFFSET_RETURN:
- ldmfd sp!, {r15} @ restore registers
- .fnend
-
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s
deleted file mode 100644
index fb69a02..0000000
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/csc/csc_yuv420_nv12t_y_neon.s
+++ /dev/null
@@ -1,451 +0,0 @@
-/*
- *
- * Copyright 2011 Samsung Electronics S.LSI Co. LTD
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * @file csc_yuv420_nv12t_y_neon.s
- * @brief SEC_OMX specific define
- * @author ShinWon Lee (shinwon.lee@samsung.com)
- * @version 1.0
- * @history
- * 2011.7.01 : Create
- */
-
-/*
- * Converts linear data to tiled.
- * 1. Y of YUV420P to Y of NV12T
- * 2. Y of YUV420S to Y of NV12T
- * 3. UV of YUV420S to UV of NV12T
- *
- * @param nv12t_dest
- * Y or UV plane address of NV12T[out]
- *
- * @param yuv420_src
- * Y or UV plane address of YUV420P(S)[in]
- *
- * @param yuv420_width
- * Width of YUV420[in]
- *
- * @param yuv420_height
- * Y: Height of YUV420, UV: Height/2 of YUV420[in]
- */
-
- .arch armv7-a
- .text
- .global csc_linear_to_tiled_neon
- .type csc_linear_to_tiled_neon, %function
-csc_linear_to_tiled_neon:
- .fnstart
-
- @r0 tiled_dest
- @r1 linear_src
- @r2 linear_x_size
- @r3 linear_y_size
- @r4 j
- @r5 i
- @r6 nn(tiled_addr)
- @r7 mm(linear_addr)
- @r8 aligned_x_size
- @r9 aligned_y_size
- @r10 temp1
- @r11 temp2
- @r12 temp3
- @r14 temp4
-
- stmfd sp!, {r4-r12,r14} @ backup registers
-
- bic r9, r3, #0x1F @ aligned_y_size = (linear_y_size>>5)<<5
- bic r8, r2, #0x3F @ aligned_x_size = (linear_x_size>>6)<<6
-
- mov r5, #0 @ i = 0
-LOOP_ALIGNED_Y_SIZE:
-
- mov r4, #0 @ j = 0
-LOOP_ALIGNED_X_SIZE:
-
- bl GET_TILED_OFFSET
-
- mul r10, r2, r5 @ temp1 = linear_x_size*(i)
- add r7, r1, r4 @ linear_addr = linear_src+j
- add r7, r7, r10 @ linear_addr = linear_addr+temp1
- sub r10, r2, #32
-
- pld [r7, r2, lsl #1]
- vld1.8 {q0, q1}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q2, q3}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q4, q5}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q6, q7}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q8, q9}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q10, q11}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q12, q13}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q14, q15}, [r7], r10
-
- add r6, r0, r6 @ tiled_addr = tiled_dest+tiled_addr
-
- vst1.8 {q0, q1}, [r6]!
- vst1.8 {q2, q3}, [r6]!
- vst1.8 {q4, q5}, [r6]!
- vst1.8 {q6, q7}, [r6]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q14, q15}, [r6]!
-
- pld [r7, r2, lsl #1]
- vld1.8 {q0, q1}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q2, q3}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q4, q5}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q6, q7}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q8, q9}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q10, q11}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q12, q13}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q14, q15}, [r7], r10
-
- vst1.8 {q0, q1}, [r6]!
- vst1.8 {q2, q3}, [r6]!
- vst1.8 {q4, q5}, [r6]!
- vst1.8 {q6, q7}, [r6]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q14, q15}, [r6]!
-
- pld [r7, r2, lsl #1]
- vld1.8 {q0, q1}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q2, q3}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q4, q5}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q6, q7}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q8, q9}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q10, q11}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q12, q13}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q14, q15}, [r7], r10
-
- vst1.8 {q0, q1}, [r6]!
- vst1.8 {q2, q3}, [r6]!
- vst1.8 {q4, q5}, [r6]!
- vst1.8 {q6, q7}, [r6]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q14, q15}, [r6]!
-
- pld [r7, r2, lsl #1]
- vld1.8 {q0, q1}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q2, q3}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q4, q5}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q6, q7}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q8, q9}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q10, q11}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q12, q13}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q14, q15}, [r7], r10
-
- vst1.8 {q0, q1}, [r6]!
- vst1.8 {q2, q3}, [r6]!
- vst1.8 {q4, q5}, [r6]!
- vst1.8 {q6, q7}, [r6]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q14, q15}, [r6]!
-
- pld [r7, r2, lsl #1]
- vld1.8 {q0, q1}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q2, q3}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q4, q5}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q6, q7}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q8, q9}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q10, q11}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q12, q13}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q14, q15}, [r7], r10
-
- vst1.8 {q0, q1}, [r6]!
- vst1.8 {q2, q3}, [r6]!
- vst1.8 {q4, q5}, [r6]!
- vst1.8 {q6, q7}, [r6]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q14, q15}, [r6]!
-
- pld [r7, r2, lsl #1]
- vld1.8 {q0, q1}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q2, q3}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q4, q5}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q6, q7}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q8, q9}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q10, q11}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q12, q13}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q14, q15}, [r7], r10
-
- vst1.8 {q0, q1}, [r6]!
- vst1.8 {q2, q3}, [r6]!
- vst1.8 {q4, q5}, [r6]!
- vst1.8 {q6, q7}, [r6]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q14, q15}, [r6]!
-
- pld [r7, r2, lsl #1]
- vld1.8 {q0, q1}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q2, q3}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q4, q5}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q6, q7}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q8, q9}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q10, q11}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q12, q13}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q14, q15}, [r7], r10
-
- vst1.8 {q0, q1}, [r6]!
- vst1.8 {q2, q3}, [r6]!
- vst1.8 {q4, q5}, [r6]!
- vst1.8 {q6, q7}, [r6]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q14, q15}, [r6]!
-
- pld [r7, r2, lsl #1]
- vld1.8 {q0, q1}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q2, q3}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q4, q5}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q6, q7}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q8, q9}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q10, q11}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q12, q13}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q14, q15}, [r7], r10
-
- vst1.8 {q0, q1}, [r6]!
- vst1.8 {q2, q3}, [r6]!
- vst1.8 {q4, q5}, [r6]!
- vst1.8 {q6, q7}, [r6]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q14, q15}, [r6]!
-
- add r4, r4, #64 @ j = j+64
- cmp r4, r8 @ j<aligned_x_size
- blt LOOP_ALIGNED_X_SIZE
-
- add r5, r5, #32 @ i = i+32
- cmp r5, r9 @ i<aligned_y_size
- blt LOOP_ALIGNED_Y_SIZE
-
- cmp r5, r3
- beq LOOP_LINEAR_Y_SIZE_2_START
-
-LOOP_LINEAR_Y_SIZE_1:
-
- mov r4, #0 @ j = 0
-LOOP_ALIGNED_X_SIZE_1:
-
- bl GET_TILED_OFFSET
-
- mul r10, r2, r5 @ temp1 = linear_x_size*(i)
- add r7, r1, r4 @ linear_addr = linear_src+j
- add r7, r7, r10 @ linear_addr = linear_addr+temp1
- sub r10, r2, #32 @ temp1 = linear_x_size-32
-
- pld [r7, r2, lsl #1]
- vld1.8 {q0, q1}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q2, q3}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q4, q5}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q6, q7}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q8, q9}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q10, q11}, [r7], r10
- pld [r7, r2, lsl #1]
- vld1.8 {q12, q13}, [r7]!
- pld [r7, r2, lsl #1]
- vld1.8 {q14, q15}, [r7], r10
-
- add r6, r0, r6 @ tiled_addr = tiled_dest+tiled_addr
- and r11, r5, #0x1F @ temp2 = i&0x1F
- mov r11, r11, lsl #6 @ temp2 = 64*temp2
- add r6, r6, r11 @ tiled_addr = tiled_addr+temp2
-
- vst1.8 {q0, q1}, [r6]!
- vst1.8 {q2, q3}, [r6]!
- vst1.8 {q4, q5}, [r6]!
- vst1.8 {q6, q7}, [r6]!
- vst1.8 {q8, q9}, [r6]!
- vst1.8 {q10, q11}, [r6]!
- vst1.8 {q12, q13}, [r6]!
- vst1.8 {q14, q15}, [r6]!
-
- add r4, r4, #64 @ j = j+64
- cmp r4, r8 @ j<aligned_x_size
- blt LOOP_ALIGNED_X_SIZE_1
-
- add r5, r5, #4 @ i = i+4
- cmp r5, r3 @ i<linear_y_size
- blt LOOP_LINEAR_Y_SIZE_1
-
-LOOP_LINEAR_Y_SIZE_2_START:
- cmp r4, r2
- beq RESTORE_REG
-
- mov r5, #0 @ i = 0
-LOOP_LINEAR_Y_SIZE_2:
-
- mov r4, r8 @ j = aligned_x_size
-LOOP_LINEAR_X_SIZE_2:
-
- bl GET_TILED_OFFSET
-
- mul r10, r2, r5 @ temp1 = linear_x_size*(i)
- add r7, r1, r4 @ linear_addr = linear_src+j
- add r7, r7, r10 @ linear_addr = linear_addr+temp1
-
- add r6, r0, r6 @ tiled_addr = tiled_dest+tiled_addr
- and r11, r5, #0x1F @ temp2 = i&0x1F
- mov r11, r11, lsl #6 @ temp2 = 64*temp2
- add r6, r6, r11 @ tiled_addr = tiled_addr+temp2
- and r11, r4, #0x3F @ temp2 = j&0x3F
- add r6, r6, r11 @ tiled_addr = tiled_addr+temp2
-
- ldr r10, [r7], r2
- ldr r11, [r7], r2
- ldr r12, [r7], r2
- ldr r14, [r7], r2
- str r10, [r6], #64
- str r11, [r6], #64
- str r12, [r6], #64
- str r14, [r6], #64
-
- add r4, r4, #4 @ j = j+4
- cmp r4, r2 @ j<linear_x_size
- blt LOOP_LINEAR_X_SIZE_2
-
- add r5, r5, #4 @ i = i+4
- cmp r5, r3 @ i<linear_y_size
- blt LOOP_LINEAR_Y_SIZE_2
-
-RESTORE_REG:
- ldmfd sp!, {r4-r12,r15} @ restore registers
-
-GET_TILED_OFFSET:
-
- mov r11, r5, asr #5 @ temp2 = i>>5
- mov r10, r4, asr #6 @ temp1 = j>>6
-
- and r12, r11, #0x1 @ if (temp2 & 0x1)
- cmp r12, #0x1
- bne GET_TILED_OFFSET_EVEN_FORMULA_1
-
-GET_TILED_OFFSET_ODD_FORMULA:
- sub r6, r11, #1 @ tiled_addr = temp2-1
- add r12, r2, #127 @ temp3 = linear_x_size+127
- bic r12, r12, #0x7F @ temp3 = (temp3 >>7)<<7
- mov r12, r12, asr #6 @ temp3 = temp3>>6
- mul r6, r6, r12 @ tiled_addr = tiled_addr*temp3
- add r6, r6, r10 @ tiled_addr = tiled_addr+temp1
- add r6, r6, #2 @ tiled_addr = tiled_addr+2
- bic r12, r10, #0x3 @ temp3 = (temp1>>2)<<2
- add r6, r6, r12 @ tiled_addr = tiled_addr+temp3
- mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11
- b GET_TILED_OFFSET_RETURN
-
-GET_TILED_OFFSET_EVEN_FORMULA_1:
- add r12, r3, #31 @ temp3 = linear_y_size+31
- bic r12, r12, #0x1F @ temp3 = (temp3>>5)<<5
- sub r12, r12, #32 @ temp3 = temp3 - 32
- cmp r5, r12 @ if (i<(temp3-32)) {
- bge GET_TILED_OFFSET_EVEN_FORMULA_2
- add r12, r10, #2 @ temp3 = temp1+2
- bic r12, r12, #3 @ temp3 = (temp3>>2)<<2
- add r6, r10, r12 @ tiled_addr = temp1+temp3
- add r12, r2, #127 @ temp3 = linear_x_size+127
- bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7
- mov r12, r12, asr #6 @ temp3 = temp3>>6
- mul r11, r11, r12 @ tiled_y_index = tiled_y_index*temp3
- add r6, r6, r11 @ tiled_addr = tiled_addr+tiled_y_index
- mov r6, r6, lsl #11 @
- b GET_TILED_OFFSET_RETURN
-
-GET_TILED_OFFSET_EVEN_FORMULA_2:
- add r12, r2, #127 @ temp3 = linear_x_size+127
- bic r12, r12, #0x7F @ temp3 = (temp3>>7)<<7
- mov r12, r12, asr #6 @ temp3 = temp3>>6
- mul r6, r11, r12 @ tiled_addr = temp2*temp3
- add r6, r6, r10 @ tiled_addr = tiled_addr+temp3
- mov r6, r6, lsl #11 @ tiled_addr = tiled_addr<<11@
-
-GET_TILED_OFFSET_RETURN:
- mov pc, lr
- .fnend
-
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c
index 19b63b0..508f290 100644
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/dec/src/SsbSipMfcDecAPI.c
@@ -503,3 +503,173 @@ SSBSIP_MFC_ERROR_CODE SsbSipMfcDecGetConfig(void *openHandle, SSBSIP_MFC_DEC_CON
return MFC_RET_OK;
}
+
+int tile_4x2_read(int x_size, int y_size, int x_pos, int y_pos)
+{
+ int pixel_x_m1, pixel_y_m1;
+ int roundup_x, roundup_y;
+ int linear_addr0, linear_addr1, bank_addr ;
+ int x_addr;
+ int trans_addr;
+
+ pixel_x_m1 = x_size -1;
+ pixel_y_m1 = y_size -1;
+
+ roundup_x = ((pixel_x_m1 >> 7) + 1);
+ roundup_y = ((pixel_x_m1 >> 6) + 1);
+
+ x_addr = x_pos >> 2;
+
+ if ((y_size <= y_pos+32) && ( y_pos < y_size) &&
+ (((pixel_y_m1 >> 5) & 0x1) == 0) && (((y_pos >> 5) & 0x1) == 0)) {
+ linear_addr0 = (((y_pos & 0x1f) <<4) | (x_addr & 0xf));
+ linear_addr1 = (((y_pos >> 6) & 0xff) * roundup_x + ((x_addr >> 6) & 0x3f));
+
+ if (((x_addr >> 5) & 0x1) == ((y_pos >> 5) & 0x1))
+ bank_addr = ((x_addr >> 4) & 0x1);
+ else
+ bank_addr = 0x2 | ((x_addr >> 4) & 0x1);
+ } else {
+ linear_addr0 = (((y_pos & 0x1f) << 4) | (x_addr & 0xf));
+ linear_addr1 = (((y_pos >> 6) & 0xff) * roundup_x + ((x_addr >> 5) & 0x7f));
+
+ if (((x_addr >> 5) & 0x1) == ((y_pos >> 5) & 0x1))
+ bank_addr = ((x_addr >> 4) & 0x1);
+ else
+ bank_addr = 0x2 | ((x_addr >> 4) & 0x1);
+ }
+
+ linear_addr0 = linear_addr0 << 2;
+ trans_addr = (linear_addr1 <<13) | (bank_addr << 11) | linear_addr0;
+
+ return trans_addr;
+}
+
+void Y_tile_to_linear_4x2(unsigned char *p_linear_addr, unsigned char *p_tiled_addr, unsigned int x_size, unsigned int y_size)
+{
+ int trans_addr;
+ unsigned int i, j, k, index;
+ unsigned char data8[4];
+ unsigned int max_index = x_size * y_size;
+
+ for (i = 0; i < y_size; i = i + 16) {
+ for (j = 0; j < x_size; j = j + 16) {
+ trans_addr = tile_4x2_read(x_size, y_size, j, i);
+ for (k = 0; k < 16; k++) {
+ /* limit check - prohibit segmentation fault */
+ index = (i * x_size) + (x_size * k) + j;
+ /* remove equal condition to solve thumbnail bug */
+ if (index + 16 > max_index) {
+ continue;
+ }
+
+ data8[0] = p_tiled_addr[trans_addr + 64 * k + 0];
+ data8[1] = p_tiled_addr[trans_addr + 64 * k + 1];
+ data8[2] = p_tiled_addr[trans_addr + 64 * k + 2];
+ data8[3] = p_tiled_addr[trans_addr + 64 * k + 3];
+
+ p_linear_addr[index] = data8[0];
+ p_linear_addr[index + 1] = data8[1];
+ p_linear_addr[index + 2] = data8[2];
+ p_linear_addr[index + 3] = data8[3];
+
+ data8[0] = p_tiled_addr[trans_addr + 64 * k + 4];
+ data8[1] = p_tiled_addr[trans_addr + 64 * k + 5];
+ data8[2] = p_tiled_addr[trans_addr + 64 * k + 6];
+ data8[3] = p_tiled_addr[trans_addr + 64 * k + 7];
+
+ p_linear_addr[index + 4] = data8[0];
+ p_linear_addr[index + 5] = data8[1];
+ p_linear_addr[index + 6] = data8[2];
+ p_linear_addr[index + 7] = data8[3];
+
+ data8[0] = p_tiled_addr[trans_addr + 64 * k + 8];
+ data8[1] = p_tiled_addr[trans_addr + 64 * k + 9];
+ data8[2] = p_tiled_addr[trans_addr + 64 * k + 10];
+ data8[3] = p_tiled_addr[trans_addr + 64 * k + 11];
+
+ p_linear_addr[index + 8] = data8[0];
+ p_linear_addr[index + 9] = data8[1];
+ p_linear_addr[index + 10] = data8[2];
+ p_linear_addr[index + 11] = data8[3];
+
+ data8[0] = p_tiled_addr[trans_addr + 64 * k + 12];
+ data8[1] = p_tiled_addr[trans_addr + 64 * k + 13];
+ data8[2] = p_tiled_addr[trans_addr + 64 * k + 14];
+ data8[3] = p_tiled_addr[trans_addr + 64 * k + 15];
+
+ p_linear_addr[index + 12] = data8[0];
+ p_linear_addr[index + 13] = data8[1];
+ p_linear_addr[index + 14] = data8[2];
+ p_linear_addr[index + 15] = data8[3];
+ }
+ }
+ }
+}
+
+void CbCr_tile_to_linear_4x2(unsigned char *p_linear_addr, unsigned char *p_tiled_addr, unsigned int x_size, unsigned int y_size)
+{
+ int trans_addr;
+ unsigned int i, j, k, index;
+ unsigned char data8[4];
+ unsigned int half_y_size = y_size / 2;
+ unsigned int max_index = x_size * half_y_size;
+ unsigned char *pUVAddr[2];
+
+ pUVAddr[0] = p_linear_addr;
+ pUVAddr[1] = p_linear_addr + ((x_size * half_y_size) / 2);
+
+ for (i = 0; i < half_y_size; i = i + 16) {
+ for (j = 0; j < x_size; j = j + 16) {
+ trans_addr = tile_4x2_read(x_size, half_y_size, j, i);
+ for (k = 0; k < 16; k++) {
+ /* limit check - prohibit segmentation fault */
+ index = (i * x_size) + (x_size * k) + j;
+ /* remove equal condition to solve thumbnail bug */
+ if (index + 16 > max_index) {
+ continue;
+ }
+
+ data8[0] = p_tiled_addr[trans_addr + 64 * k + 0];
+ data8[1] = p_tiled_addr[trans_addr + 64 * k + 1];
+ data8[2] = p_tiled_addr[trans_addr + 64 * k + 2];
+ data8[3] = p_tiled_addr[trans_addr + 64 * k + 3];
+
+ pUVAddr[index%2][index/2] = data8[0];
+ pUVAddr[(index+1)%2][(index+1)/2] = data8[1];
+ pUVAddr[(index+2)%2][(index+2)/2] = data8[2];
+ pUVAddr[(index+3)%2][(index+3)/2] = data8[3];
+
+ data8[0] = p_tiled_addr[trans_addr + 64 * k + 4];
+ data8[1] = p_tiled_addr[trans_addr + 64 * k + 5];
+ data8[2] = p_tiled_addr[trans_addr + 64 * k + 6];
+ data8[3] = p_tiled_addr[trans_addr + 64 * k + 7];
+
+ pUVAddr[(index+4)%2][(index+4)/2] = data8[0];
+ pUVAddr[(index+5)%2][(index+5)/2] = data8[1];
+ pUVAddr[(index+6)%2][(index+6)/2] = data8[2];
+ pUVAddr[(index+7)%2][(index+7)/2] = data8[3];
+
+ data8[0] = p_tiled_addr[trans_addr + 64 * k + 8];
+ data8[1] = p_tiled_addr[trans_addr + 64 * k + 9];
+ data8[2] = p_tiled_addr[trans_addr + 64 * k + 10];
+ data8[3] = p_tiled_addr[trans_addr + 64 * k + 11];
+
+ pUVAddr[(index+8)%2][(index+8)/2] = data8[0];
+ pUVAddr[(index+9)%2][(index+9)/2] = data8[1];
+ pUVAddr[(index+10)%2][(index+10)/2] = data8[2];
+ pUVAddr[(index+11)%2][(index+11)/2] = data8[3];
+
+ data8[0] = p_tiled_addr[trans_addr + 64 * k + 12];
+ data8[1] = p_tiled_addr[trans_addr + 64 * k + 13];
+ data8[2] = p_tiled_addr[trans_addr + 64 * k + 14];
+ data8[3] = p_tiled_addr[trans_addr + 64 * k + 15];
+
+ pUVAddr[(index+12)%2][(index+12)/2] = data8[0];
+ pUVAddr[(index+13)%2][(index+13)/2] = data8[1];
+ pUVAddr[(index+14)%2][(index+14)/2] = data8[2];
+ pUVAddr[(index+15)%2][(index+15)/2] = data8[3];
+ }
+ }
+ }
+}
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c
index c31e522..2c33c5b 100644
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/enc/src/SsbSipMfcEncAPI.c
@@ -467,9 +467,6 @@ SSBSIP_MFC_ERROR_CODE SsbSipMfcEncGetInBuf(void *openHandle, SSBSIP_MFC_ENC_INPU
input_info->YVirAddr = (void*)pCTX->virFrmBuf.luma;
input_info->CVirAddr = (void*)pCTX->virFrmBuf.chroma;
- input_info->YSize = aligned_y_size;
- input_info->CSize = aligned_c_size;
-
return MFC_RET_OK;
}
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h
index 87e9b2d..e083998 100644
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h
+++ b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/SsbSipMfcApi.h
@@ -285,6 +285,12 @@ extern "C" {
#endif
/*--------------------------------------------------------------------------------*/
+/* Format Conversion API */
+/*--------------------------------------------------------------------------------*/
+void Y_tile_to_linear_4x2(unsigned char *p_linear_addr, unsigned char *p_tiled_addr, unsigned int x_size, unsigned int y_size);
+void CbCr_tile_to_linear_4x2(unsigned char *p_linear_addr, unsigned char *p_tiled_addr, unsigned int x_size, unsigned int y_size);
+
+/*--------------------------------------------------------------------------------*/
/* Decoding APIs */
/*--------------------------------------------------------------------------------*/
void *SsbSipMfcDecOpen(void);
diff --git a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h b/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h
deleted file mode 100644
index c5cef08..0000000
--- a/sec_mm/sec_omx/sec_codecs/video/mfc_c110/include/color_space_convertor.h
+++ /dev/null
@@ -1,176 +0,0 @@
-/*
- *
- * Copyright 2011 Samsung Electronics S.LSI Co. LTD
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/*
- * @file color_space_convertor.h
- * @brief SEC_OMX specific define.
- * NV12T(tiled) layout:
- * Each element is not pixel. It is 64x32 pixel block.
- * uv pixel block is interleaved as u v u v u v ...
- * y1 y2 y7 y8 y9 y10 y15 y16
- * y3 y4 y5 y6 y11 y12 y13 y14
- * y17 y18 y23 y24 y25 y26 y31 y32
- * y19 y20 y21 y22 y27 y28 y29 y30
- * uv1 uv2 uv7 uv8 uv9 uv10 uv15 uv16
- * uv3 uv4 uv5 uv6 uv11 uv12 uv13 uv14
- * YUV420Planar(linear) layout:
- * Each element is not pixel. It is 64x32 pixel block.
- * y1 y2 y3 y4 y5 y6 y7 y8
- * y9 y10 y11 y12 y13 y14 y15 y16
- * y17 y18 y19 y20 y21 y22 y23 y24
- * y25 y26 y27 y28 y29 y30 y31 y32
- * u1 u2 u3 u4 u5 u6 u7 u8
- * v1 v2 v3 v4 v5 v6 v7 v8
- * YUV420Semiplanar(linear) layout:
- * Each element is not pixel. It is 64x32 pixel block.
- * uv pixel block is interleaved as u v u v u v ...
- * y1 y2 y3 y4 y5 y6 y7 y8
- * y9 y10 y11 y12 y13 y14 y15 y16
- * y17 y18 y19 y20 y21 y22 y23 y24
- * y25 y26 y27 y28 y29 y30 y31 y32
- * uv1 uv2 uv3 uv4 uv5 uv6 uv7 uv8
- * uv9 uv10 uv11 uv12 uv13 uv14 uv15 uv16
- * @author ShinWon Lee (shinwon.lee@samsung.com)
- * @version 1.0
- * @history
- * 2011.7.01 : Create
- */
-
-#ifndef COLOR_SPACE_CONVERTOR_H_
-#define COLOR_SPACE_CONVERTOR_H_
-
-/*--------------------------------------------------------------------------------*/
-/* Format Conversion API */
-/*--------------------------------------------------------------------------------*/
-/* Neon Code */
-/*
- * De-interleaves src to dest1, dest2
- *
- * @param dest1
- * Address of de-interleaved data[out]
- *
- * @param dest2
- * Address of de-interleaved data[out]
- *
- * @param src
- * Address of interleaved data[in]
- *
- * @param src_size
- * Size of interleaved data[in]
- */
-void csc_deinterleave_memcpy_neon(char *dest1, char *dest2, char *src, int src_size);
-
-/*
- * Interleaves src1, src2 to dest
- *
- * @param dest
- * Address of interleaved data[out]
- *
- * @param src1
- * Address of de-interleaved data[in]
- *
- * @param src2
- * Address of de-interleaved data[in]
- *
- * @param src_size
- * Size of de-interleaved data[in]
- */
-void csc_interleave_memcpy_neon(char *dest, char *src1, char *src2, int src_size);
-
-/*
- * Converts tiled data to linear.
- * 1. Y of NV12T to Y of YUV420P
- * 2. Y of NV12T to Y of YUV420S
- * 3. UV of NV12T to UV of YUV420S
- *
- * @param yuv420_dest
- * Y or UV plane address of YUV420[out]
- *
- * @param nv12t_src
- * Y or UV plane address of NV12T[in]
- *
- * @param yuv420_width
- * Width of YUV420[in]
- *
- * @param yuv420_height
- * Y: Height of YUV420, UV: Height/2 of YUV420[in]
- */
-void csc_tiled_to_linear_neon(char *yuv420p_y_dest, char *nv12t_y_src, int yuv420p_width, int yuv420p_y_height);
-
-/*
- * Converts and Deinterleaves tiled data to linear
- * 1. UV of NV12T to UV of YUV420P
- *
- * @param yuv420_u_dest
- * U plane address of YUV420P[out]
- *
- * @param yuv420_v_dest
- * V plane address of YUV420P[out]
- *
- * @param nv12t_src
- * UV plane address of NV12T[in]
- *
- * @param yuv420_width
- * Width of YUV420[in]
- *
- * @param yuv420_uv_height
- * Height/2 of YUV420[in]
- */
-void csc_tiled_to_linear_deinterleave_neon(char *yuv420p_u_dest, char *yuv420p_v_dest, char *nv12t_uv_src, int yuv420p_width, int yuv420p_uv_height);
-
-/*
- * Converts linear data to tiled.
- * 1. Y of YUV420P to Y of NV12T
- * 2. Y of YUV420S to Y of NV12T
- * 3. UV of YUV420S to UV of NV12T
- *
- * @param nv12t_dest
- * Y or UV plane address of NV12T[out]
- *
- * @param yuv420_src
- * Y or UV plane address of YUV420P(S)[in]
- *
- * @param yuv420_width
- * Width of YUV420[in]
- *
- * @param yuv420_height
- * Y: Height of YUV420, UV: Height/2 of YUV420[in]
- */
-void csc_linear_to_tiled_neon(char *nv12t_dest, char *yuv420p_src, int yuv420p_width, int yuv420p_y_height);
-
-/*
- * Converts and Interleaves linear to tiled
- * 1. UV of YUV420P to UV of NV12T
- *
- * @param nv12t_uv_dest
- * UV plane address of NV12T[out]
- *
- * @param yuv420p_u_src
- * U plane address of YUV420P[in]
- *
- * @param yuv420p_v_src
- * V plane address of YUV420P[in]
- *
- * @param yuv420_width
- * Width of YUV420[in]
- *
- * @param yuv420_uv_height
- * Height/2 of YUV420[in]
- */
-void csc_linear_to_tiled_interleave_neon(char *nv12t_uv_dest, char *yuv420p_u_src, char *yuv420p_v_src, int yuv420p_width, int yuv420p_uv_height);
-
-#endif /*COLOR_SPACE_CONVERTOR_H_*/
diff --git a/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/Android.mk b/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/Android.mk
index cdf345c..08e9874 100644
--- a/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/Android.mk
+++ b/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/Android.mk
@@ -14,7 +14,7 @@ LOCAL_CFLAGS :=
LOCAL_ARM_MODE := arm
-LOCAL_STATIC_LIBRARIES := libSEC_OMX_Vdec libsecosal libsecbasecomponent libsecmfcdecapi libseccsc
+LOCAL_STATIC_LIBRARIES := libSEC_OMX_Vdec libsecosal libsecbasecomponent libsecmfcdecapi
LOCAL_SHARED_LIBRARIES := libc libdl libcutils libutils
LOCAL_C_INCLUDES := $(SEC_OMX_INC)/khronos \
@@ -22,7 +22,7 @@ LOCAL_C_INCLUDES := $(SEC_OMX_INC)/khronos \
$(SEC_OMX_TOP)/sec_osal \
$(SEC_OMX_TOP)/sec_omx_core \
$(SEC_OMX_COMPONENT)/common \
- $(SEC_OMX_COMPONENT)/video/dec
+ $(SEC_OMX_COMPONENT)/video/dec \
LOCAL_C_INCLUDES += $(SEC_OMX_TOP)/sec_codecs/video/mfc_c110/include
diff --git a/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/SEC_OMX_H264dec.c b/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/SEC_OMX_H264dec.c
index e1edf95..9acae4e 100644
--- a/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/SEC_OMX_H264dec.c
+++ b/sec_mm/sec_omx/sec_omx_component/video/dec/h264dec/SEC_OMX_H264dec.c
@@ -35,7 +35,6 @@
#include "library_register.h"
#include "SEC_OMX_H264dec.h"
#include "SsbSipMfcApi.h"
-#include "color_space_convertor.h"
#undef SEC_LOG_TAG
#define SEC_LOG_TAG "SEC_H264_DEC"
@@ -956,17 +955,14 @@ OMX_ERRORTYPE SEC_MFC_H264_Decode(OMX_COMPONENTTYPE *pOMXComponent, SEC_OMX_DATA
SEC_OSAL_Memcpy(pOutBuf + sizeof(frameSize) + (sizeof(void *) * 3), &(outputInfo.CVirAddr), sizeof(outputInfo.CVirAddr));
} else {
SEC_OSAL_Log(SEC_LOG_TRACE, "YUV420 out for ThumbnailMode");
- csc_tiled_to_linear_neon(
- (unsigned char *)pOutBuf,
- (unsigned char *)outputInfo.YVirAddr,
- bufWidth,
- bufHeight);
- csc_tiled_to_linear_deinterleave_neon(
- (unsigned char *)pOutBuf + frameSize,
- (unsigned char *)pOutBuf + (frameSize * 5) / 4,
- (unsigned char *)outputInfo.CVirAddr,
- bufWidth,
- bufHeight >> 1);
+ Y_tile_to_linear_4x2(
+ (unsigned char *)pOutBuf,
+ (unsigned char *)outputInfo.YVirAddr,
+ bufWidth, bufHeight);
+ CbCr_tile_to_linear_4x2(
+ ((unsigned char *)pOutBuf) + frameSize,
+ (unsigned char *)outputInfo.CVirAddr,
+ bufWidth, bufHeight);
}
}
diff --git a/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/Android.mk b/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/Android.mk
index 66353d6..92891a7 100644
--- a/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/Android.mk
+++ b/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/Android.mk
@@ -14,7 +14,7 @@ LOCAL_CFLAGS :=
LOCAL_ARM_MODE := arm
-LOCAL_STATIC_LIBRARIES := libSEC_OMX_Vdec libsecosal libsecbasecomponent libsecmfcdecapi libseccsc
+LOCAL_STATIC_LIBRARIES := libSEC_OMX_Vdec libsecosal libsecbasecomponent libsecmfcdecapi
LOCAL_SHARED_LIBRARIES := libc libdl libcutils libutils
LOCAL_C_INCLUDES := $(SEC_OMX_INC)/khronos \
diff --git a/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/SEC_OMX_Mpeg4dec.c b/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/SEC_OMX_Mpeg4dec.c
index d7ac10a..7396a2c 100644
--- a/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/SEC_OMX_Mpeg4dec.c
+++ b/sec_mm/sec_omx/sec_omx_component/video/dec/mpeg4dec/SEC_OMX_Mpeg4dec.c
@@ -35,7 +35,6 @@
#include "library_register.h"
#include "SEC_OMX_Mpeg4dec.h"
#include "SsbSipMfcApi.h"
-#include "color_space_convertor.h"
#undef SEC_LOG_TAG
#define SEC_LOG_TAG "SEC_MPEG4_DEC"
@@ -1136,17 +1135,14 @@ OMX_ERRORTYPE SEC_MFC_Mpeg4_Decode(OMX_COMPONENTTYPE *pOMXComponent, SEC_OMX_DAT
SEC_OSAL_Memcpy(pOutputBuf + sizeof(frameSize) + (sizeof(void *) * 3), &(outputInfo.CVirAddr), sizeof(outputInfo.CVirAddr));
} else {
SEC_OSAL_Log(SEC_LOG_TRACE, "YUV420 out for ThumbnailMode");
- csc_tiled_to_linear_neon(
- (unsigned char *)pOutputBuf,
- (unsigned char *)outputInfo.YVirAddr,
- bufWidth,
- bufHeight);
- csc_tiled_to_linear_deinterleave_neon(
- (unsigned char *)pOutputBuf + frameSize,
- (unsigned char *)pOutputBuf + (frameSize * 5) / 4,
- (unsigned char *)outputInfo.CVirAddr,
- bufWidth,
- bufHeight >> 1);
+ Y_tile_to_linear_4x2(
+ (unsigned char *)pOutputBuf,
+ (unsigned char *)outputInfo.YVirAddr,
+ bufWidth, bufHeight);
+ CbCr_tile_to_linear_4x2(
+ ((unsigned char *)pOutputBuf) + frameSize,
+ (unsigned char *)outputInfo.CVirAddr,
+ bufWidth, bufHeight);
}
}
diff --git a/sec_mm/sec_omx/sec_omx_component/video/enc/h264enc/Android.mk b/sec_mm/sec_omx/sec_omx_component/video/enc/h264enc/Android.mk
index cf91356..3edcb58 100644
--- a/sec_mm/sec_omx/sec_omx_component/video/enc/h264enc/Android.mk
+++ b/sec_mm/sec_omx/sec_omx_component/video/enc/h264enc/Android.mk
@@ -22,7 +22,7 @@ LOCAL_C_INCLUDES := $(SEC_OMX_INC)/khronos \
$(SEC_OMX_TOP)/sec_osal \
$(SEC_OMX_TOP)/sec_omx_core \
$(SEC_OMX_COMPONENT)/common \
- $(SEC_OMX_COMPONENT)/video/enc
+ $(SEC_OMX_COMPONENT)/video/enc \
LOCAL_C_INCLUDES += $(SEC_OMX_TOP)/sec_codecs/video/mfc_c110/include