diff options
author | Suman Anna <s-anna@ti.com> | 2011-04-25 13:41:57 -0500 |
---|---|---|
committer | Colin Cross <ccross@android.com> | 2011-06-14 09:05:22 -0700 |
commit | 2b94df0645fc93bf061999b02b6931236ceffe78 (patch) | |
tree | cd21b7674d7c537c8b6ee67a07f8bd1abdcb8d02 /drivers/media/video | |
parent | f775333297bf13cd5525d9cccfd3f7f1f4475298 (diff) | |
download | kernel_samsung_tuna-2b94df0645fc93bf061999b02b6931236ceffe78.zip kernel_samsung_tuna-2b94df0645fc93bf061999b02b6931236ceffe78.tar.gz kernel_samsung_tuna-2b94df0645fc93bf061999b02b6931236ceffe78.tar.bz2 |
TILER-DMM: Main TILER driver implementation
This patch contains the TILER driver and implementation of the TILER
block manipulation and mapping functions including information on TILER
geometry, as well as tiler_view_t object manipulation functions.
It also contains the makefile and config file for the TILER driver.
Signed-off-by: Lajos Molnar <molnar@ti.com>
Signed-off-by: David Sin <davidsin@ti.com>
Signed-off-by: Suman Anna <s-anna@ti.com>
Diffstat (limited to 'drivers/media/video')
-rw-r--r-- | drivers/media/video/tiler/Kconfig | 126 | ||||
-rw-r--r-- | drivers/media/video/tiler/Makefile | 7 | ||||
-rw-r--r-- | drivers/media/video/tiler/_tiler.h | 168 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-geom.c | 372 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-iface.c | 273 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-main.c | 1270 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-reserve.c | 550 |
7 files changed, 2766 insertions, 0 deletions
diff --git a/drivers/media/video/tiler/Kconfig b/drivers/media/video/tiler/Kconfig new file mode 100644 index 0000000..00461eb --- /dev/null +++ b/drivers/media/video/tiler/Kconfig @@ -0,0 +1,126 @@ +config HAVE_TI_TILER + bool + default y + depends on ARCH_OMAP4 + +menuconfig TI_TILER + tristate "TI TILER support" + default y + depends on HAVE_TI_TILER + help + TILER and TILER-DMM driver for TI chips. The TI TILER device + enables video rotation on certain TI chips such as OMAP4 or + Netra. Video rotation will be limited without TILER support. + +config TILER_GRANULARITY + int "Allocation granularity (2^n)" + range 1 4096 + default 128 + depends on TI_TILER + help + This option sets the default TILER allocation granularity. It can + be overriden by the tiler.grain boot argument. + + The allocation granularity is the smallest TILER block size (in + bytes) managed distinctly by the TILER driver. TILER blocks of any + size are managed in chunks of at least this size. + + Must be a 2^n in the range of 1 to 4096; however, the TILER driver + may use a larger supported granularity. + + Supported values are: 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, + 2048, 4096. + +config TILER_ALIGNMENT + int "Allocation alignment (2^n)" + range 1 4096 + default 4096 + depends on TI_TILER + help + This option sets the default TILER allocation alignment. It can + be overriden by the tiler.align boot argument. + + Must be a 2^n in the range of 1 to 4096; however, it is naturally + aligned to the TILER granularity. + + Supported values are: 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, + 2048, 4096. + +config TILER_CACHE_LIMIT + int "Memory limit to cache free pages in MBytes" + range 0 128 + default 40 + depends on TI_TILER + help + This option sets the minimum memory that TILER retains even if + there is less TILER allocated memory is use. The unused memory is + instead stored in a cache to speed up allocation and freeing of + physical pages. + + This option can be overriden by the tiler.cache boot argument. + + While initially TILER will use less memory than this limit (0), it + will not release any memory used until it reaches this limit. + Thereafter, TILER will release any unused memory immediately as + long as there it is above this threshold. + +config TILER_SECURITY + int "Process security" + range 0 1 + default 1 + depends on TI_TILER + help + This option sets the default TILER process security. It can be + overriden by the tiler.secure boot argument. + + If process security is enabled (1), the TILER driver uses a separate + TILER buffer address spaces (for mmap purposes) for each process. + This means that one process cannot simply map another process's + TILER buffer into its memory, even for sharing. However, it can + recreate the buffer by knowing the id-s and secret keys for the + TILER blocks involved. This is the preferred configuration. + + Disabling security (0) allows sharing buffers simply by sharing the + mmap offset and size. However, because buffers can potentially be + shared between processes, it delays resource cleanup while any + process has an open TILER device. + +config TILER_SSPTR_ID + int "Use SSPtr for id" + range 0 1 + default 1 + depends on TI_TILER + help + This option sets the default behavior for TILER block ids. It can + be overriden by the tiler.ssptr_id boot argument. + + If true, TILER driver uses the system-space (physical) address + (SSPtr) of a TILER block as its unique id. This may help sharing + TILER blocks between co-processors if using a constant key for each + block. + + Note that the SSPtr is unique for each TILER block. + +config TILER_SECURE + bool "Secure TILER build" + default n + depends on TI_TILER + help + This option forces TILER security features that bypasses module + parameters. + + If set, process security will be hardwired and ssptr and offset + lookup APIs are removed. + +config TILER_EXPOSE_SSPTR + bool "Expose SSPtr to userspace" + default y + depends on TI_TILER + help + This option sets whether SSPtr-s for blocks are exposed + during TILIOC_GBLK ioctls (MemMgr_Alloc APIs). In a secure + TILER build, this may be the only way for the userspace code + to learn the system-space addresses of TILER blocks. + + You can use this flag to see if the userspace is relying on + having access to the SSPtr. diff --git a/drivers/media/video/tiler/Makefile b/drivers/media/video/tiler/Makefile new file mode 100644 index 0000000..aeb0f05 --- /dev/null +++ b/drivers/media/video/tiler/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_TI_TILER) += tcm/ + +obj-$(CONFIG_TI_TILER) += tiler.o +tiler-objs = tiler-geom.o tiler-main.o tiler-iface.o tiler-reserve.o tmm-pat.o + +obj-$(CONFIG_TI_TILER) += tiler_dmm.o +tiler_dmm-objs = dmm.o diff --git a/drivers/media/video/tiler/_tiler.h b/drivers/media/video/tiler/_tiler.h new file mode 100644 index 0000000..aa39bcf --- /dev/null +++ b/drivers/media/video/tiler/_tiler.h @@ -0,0 +1,168 @@ +/* + * _tiler.h + * + * TI TILER driver internal shared definitions. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2011 Texas Instruments, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Texas Instruments Incorporated nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TILER_H +#define _TILER_H + +#include <linux/kernel.h> +#include <mach/tiler.h> +#include "tcm.h" + +#define TILER_FORMATS (TILFMT_MAX - TILFMT_MIN + 1) + +/* per process (thread group) info */ +struct process_info { + struct list_head list; /* other processes */ + struct list_head groups; /* my groups */ + struct list_head bufs; /* my registered buffers */ + pid_t pid; /* really: thread group ID */ + u32 refs; /* open tiler devices, 0 for processes + tracked via kernel APIs */ + bool kernel; /* tracking kernel objects */ +}; + +/* per group info (within a process) */ +struct gid_info { + struct list_head by_pid; /* other groups */ + struct list_head areas; /* all areas in this pid/gid */ + struct list_head reserved; /* areas pre-reserved */ + struct list_head onedim; /* all 1D areas in this pid/gid */ + u32 gid; /* group ID */ + int refs; /* instances directly using this ptr */ + struct process_info *pi; /* parent */ +}; + +/* info for an area reserved from a container */ +struct area_info { + struct list_head by_gid; /* areas in this pid/gid */ + struct list_head blocks; /* blocks in this area */ + u32 nblocks; /* # of blocks in this area */ + + struct tcm_area area; /* area details */ + struct gid_info *gi; /* link to parent, if still alive */ +}; + +/* info for a block */ +struct mem_info { + struct list_head global; /* reserved / global blocks */ + struct tiler_block_t blk; /* block info */ + u32 num_pg; /* number of pages in page-list */ + u32 usr; /* user space address */ + u32 *pg_ptr; /* list of mapped struct page ptrs */ + struct tcm_area area; + u32 *mem; /* list of alloced phys addresses */ + int refs; /* number of times referenced */ + bool alloced; /* still alloced */ + + struct list_head by_area; /* blocks in the same area / 1D */ + void *parent; /* area info for 2D, else group info */ +}; + +/* tiler geometry information */ +struct tiler_geom { + u32 x_shft; /* unused X-bits (as part of bpp) */ + u32 y_shft; /* unused Y-bits (as part of bpp) */ + u32 bpp; /* bytes per pixel */ + u32 slot_w; /* width of each slot (in pixels) */ + u32 slot_h; /* height of each slot (in pixels) */ + u32 bpp_m; /* modified bytes per pixel (=1 for page mode) */ +}; + +/* methods and variables shared between source files */ +struct tiler_ops { + /* block operations */ + s32 (*alloc) (enum tiler_fmt fmt, u32 width, u32 height, + u32 align, u32 offs, u32 key, + u32 gid, struct process_info *pi, + struct mem_info **info); + s32 (*map) (enum tiler_fmt fmt, u32 width, u32 height, + u32 key, u32 gid, struct process_info *pi, + struct mem_info **info, u32 usr_addr); + void (*reserve_nv12) (u32 n, u32 width, u32 height, u32 align, u32 offs, + u32 gid, struct process_info *pi); + void (*reserve) (u32 n, enum tiler_fmt fmt, u32 width, u32 height, + u32 align, u32 offs, u32 gid, struct process_info *pi); + void (*unreserve) (u32 gid, struct process_info *pi); + + /* block access operations */ + struct mem_info * (*lock) (u32 key, u32 id, struct gid_info *gi); + struct mem_info * (*lock_by_ssptr) (u32 sys_addr); + void (*describe) (struct mem_info *i, struct tiler_block_info *blk); + void (*unlock_free) (struct mem_info *mi, bool free); + + s32 (*lay_2d) (enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band, + u16 align, u16 offs, struct gid_info *gi, + struct list_head *pos); + s32 (*lay_nv12) (int n, u16 w, u16 w1, u16 h, struct gid_info *gi, + u8 *p); + /* group operations */ + struct gid_info * (*get_gi) (struct process_info *pi, u32 gid); + void (*release_gi) (struct gid_info *gi); + void (*destroy_group) (struct gid_info *pi); + + /* group access operations */ + void (*add_reserved) (struct list_head *reserved, struct gid_info *gi); + void (*release) (struct list_head *reserved); + + /* area operations */ + s32 (*analize) (enum tiler_fmt fmt, u32 width, u32 height, + u16 *x_area, u16 *y_area, u16 *band, + u16 *align, u16 *offs, u16 *in_offs); + + /* process operations */ + void (*cleanup) (void); + + /* geometry operations */ + void (*xy) (u32 ssptr, u32 *x, u32 *y); + u32 (*addr) (enum tiler_fmt fmt, u32 x, u32 y); + const struct tiler_geom * (*geom) (enum tiler_fmt fmt); + + /* additional info */ + const struct file_operations *fops; + + bool nv12_packed; /* whether NV12 is packed into same container */ + u32 page; /* page size */ + u32 width; /* container width */ + u32 height; /* container height */ +}; + +void tiler_iface_init(struct tiler_ops *tiler); +void tiler_geom_init(struct tiler_ops *tiler); +void tiler_reserve_init(struct tiler_ops *tiler); + +#endif diff --git a/drivers/media/video/tiler/tiler-geom.c b/drivers/media/video/tiler/tiler-geom.c new file mode 100644 index 0000000..f95ae5c --- /dev/null +++ b/drivers/media/video/tiler/tiler-geom.c @@ -0,0 +1,372 @@ +/* + * tiler-geom.c + * + * TILER geometry functions for TI TILER hardware block. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <linux/module.h> +#include "_tiler.h" + +/* bits representing the same slot in DMM-TILER hw-block */ +#define SLOT_WIDTH_BITS 6 +#define SLOT_HEIGHT_BITS 6 + +/* bits reserved to describe coordinates in DMM-TILER hw-block */ +#define CONT_WIDTH_BITS 14 +#define CONT_HEIGHT_BITS 13 + +static struct tiler_geom geom[TILER_FORMATS] = { + { + .x_shft = 0, + .y_shft = 0, + }, + { + .x_shft = 0, + .y_shft = 1, + }, + { + .x_shft = 1, + .y_shft = 1, + }, + { + .x_shft = SLOT_WIDTH_BITS, + .y_shft = SLOT_HEIGHT_BITS, + }, +}; + +/* tiler space addressing bitfields */ +#define MASK_XY_FLIP (1 << 31) +#define MASK_Y_INVERT (1 << 30) +#define MASK_X_INVERT (1 << 29) +#define SHIFT_ACC_MODE 27 +#define MASK_ACC_MODE 3 + +/* calculated constants */ +#define TILER_PAGE (1 << (SLOT_WIDTH_BITS + SLOT_HEIGHT_BITS)) +#define TILER_WIDTH (1 << (CONT_WIDTH_BITS - SLOT_WIDTH_BITS)) +#define TILER_HEIGHT (1 << (CONT_HEIGHT_BITS - SLOT_HEIGHT_BITS)) + +#define VIEW_SIZE (1u << (CONT_WIDTH_BITS + CONT_HEIGHT_BITS)) +#define VIEW_MASK (VIEW_SIZE - 1u) + +#define MASK(bits) ((1 << (bits)) - 1) + +#define TILER_FMT(x) ((enum tiler_fmt) \ + ((x >> SHIFT_ACC_MODE) & MASK_ACC_MODE)) + +#define MASK_VIEW (MASK_X_INVERT | MASK_Y_INVERT | MASK_XY_FLIP) + +/* location of the various tiler views in physical address space */ +#define TILVIEW_8BIT 0x60000000u +#define TILVIEW_16BIT (TILVIEW_8BIT + VIEW_SIZE) +#define TILVIEW_32BIT (TILVIEW_16BIT + VIEW_SIZE) +#define TILVIEW_PAGE (TILVIEW_32BIT + VIEW_SIZE) +#define TILVIEW_END (TILVIEW_PAGE + VIEW_SIZE) + +/* create tsptr by adding view orientation and access mode */ +#define TIL_ADDR(x, orient, a)\ + ((u32) (x) | (orient) | ((a) << SHIFT_ACC_MODE)) + +bool is_tiler_addr(u32 phys) +{ + return phys >= TILVIEW_8BIT && phys < TILVIEW_END; +} +EXPORT_SYMBOL(is_tiler_addr); + +u32 tiler_bpp(const struct tiler_block_t *b) +{ + enum tiler_fmt fmt = tiler_fmt(b->phys); + BUG_ON(fmt == TILFMT_INVALID); + + return geom[fmt].bpp_m; +} +EXPORT_SYMBOL(tiler_bpp); + +/* return the stride of a tiler-block in tiler space */ +static inline s32 tiler_stride(u32 tsptr) +{ + enum tiler_fmt fmt = TILER_FMT(tsptr); + + if (fmt == TILFMT_PAGE) + return 0; + else if (tsptr & MASK_XY_FLIP) + return 1 << (CONT_HEIGHT_BITS + geom[fmt].x_shft); + else + return 1 << (CONT_WIDTH_BITS + geom[fmt].y_shft); +} + +u32 tiler_pstride(const struct tiler_block_t *b) +{ + enum tiler_fmt fmt = tiler_fmt(b->phys); + BUG_ON(fmt == TILFMT_INVALID); + + /* return the virtual stride for page mode */ + if (fmt == TILFMT_PAGE) + return tiler_vstride(b); + + return tiler_stride(b->phys & ~MASK_VIEW); +} +EXPORT_SYMBOL(tiler_pstride); + +enum tiler_fmt tiler_fmt(u32 phys) +{ + if (!is_tiler_addr(phys)) + return TILFMT_INVALID; + + return TILER_FMT(phys); +} +EXPORT_SYMBOL(tiler_fmt); + +/* returns the tiler geometry information for a format */ +static const struct tiler_geom *get_geom(enum tiler_fmt fmt) +{ + if (fmt >= TILFMT_MIN && fmt <= TILFMT_MAX) + return geom + fmt; + return NULL; +} + +/** + * Returns the natural x and y coordinates for a pixel in tiler space address. + * That is, the coordinates for the same pixel in the natural (non-rotated, + * non-mirrored) view. This allows to uniquely identify a tiler pixel in any + * view orientation. + */ +static void tiler_get_natural_xy(u32 tsptr, u32 *x, u32 *y) +{ + u32 x_bits, y_bits, offset; + enum tiler_fmt fmt; + + fmt = TILER_FMT(tsptr); + + x_bits = CONT_WIDTH_BITS - geom[fmt].x_shft; + y_bits = CONT_HEIGHT_BITS - geom[fmt].y_shft; + offset = (tsptr & VIEW_MASK) >> (geom[fmt].x_shft + geom[fmt].y_shft); + + /* separate coordinate bitfields based on view orientation */ + if (tsptr & MASK_XY_FLIP) { + *x = offset >> y_bits; + *y = offset & MASK(y_bits); + } else { + *x = offset & MASK(x_bits); + *y = offset >> x_bits; + } + + /* account for mirroring */ + if (tsptr & MASK_X_INVERT) + *x ^= MASK(x_bits); + if (tsptr & MASK_Y_INVERT) + *y ^= MASK(y_bits); +} + +/* calculate the tiler space address of a pixel in a view orientation */ +static u32 tiler_get_address(u32 orient, enum tiler_fmt fmt, u32 x, u32 y) +{ + u32 x_bits, y_bits, tmp, x_mask, y_mask, alignment; + + x_bits = CONT_WIDTH_BITS - geom[fmt].x_shft; + y_bits = CONT_HEIGHT_BITS - geom[fmt].y_shft; + alignment = geom[fmt].x_shft + geom[fmt].y_shft; + + /* validate coordinate */ + x_mask = MASK(x_bits); + y_mask = MASK(y_bits); + if (x < 0 || x > x_mask || y < 0 || y > y_mask) + return 0; + + /* account for mirroring */ + if (orient & MASK_X_INVERT) + x ^= x_mask; + if (orient & MASK_Y_INVERT) + y ^= y_mask; + + /* get coordinate address */ + if (orient & MASK_XY_FLIP) + tmp = ((x << y_bits) + y); + else + tmp = ((y << x_bits) + x); + + return TIL_ADDR((tmp << alignment), orient, fmt); +} + +void tilview_create(struct tiler_view_t *view, u32 phys, u32 width, u32 height) +{ + BUG_ON(!is_tiler_addr(phys)); + + view->tsptr = phys & ~MASK_VIEW; + view->bpp = geom[TILER_FMT(phys)].bpp_m; + view->width = width; + view->height = height; + view->h_inc = view->bpp; + view->v_inc = tiler_stride(view->tsptr); +} +EXPORT_SYMBOL(tilview_create); + +void tilview_get(struct tiler_view_t *view, struct tiler_block_t *blk) +{ + view->tsptr = blk->phys & ~MASK_VIEW; + view->bpp = tiler_bpp(blk); + view->width = blk->width; + view->height = blk->height; + view->h_inc = view->bpp; + view->v_inc = tiler_stride(view->tsptr); +} +EXPORT_SYMBOL(tilview_get); + +s32 tilview_crop(struct tiler_view_t *view, u32 left, u32 top, u32 width, + u32 height) +{ + /* check for valid crop */ + if (left + width < left || left + width > view->width || + top + height < top || top + height > view->height) + return -EINVAL; + + view->tsptr += left * view->h_inc + top * view->v_inc; + view->width = width; + view->height = height; + return 0; +} +EXPORT_SYMBOL(tilview_crop); + +/* calculate tilerspace address and stride after view orientation change */ +static void reorient(struct tiler_view_t *view, u32 orient) +{ + u32 x, y; + + tiler_get_natural_xy(view->tsptr, &x, &y); + view->tsptr = tiler_get_address(orient, + TILER_FMT(view->tsptr), x, y); + view->v_inc = tiler_stride(view->tsptr); +} + +s32 tilview_rotate(struct tiler_view_t *view, s32 rotation) +{ + u32 orient; + + if (rotation % 90) + return -EINVAL; + + /* normalize rotation to quarters */ + rotation = (rotation / 90) & 3; + if (!rotation) + return 0; /* nothing to do */ + + /* PAGE mode view cannot be rotated */ + if (TILER_FMT(view->tsptr) == TILFMT_PAGE) + return -EPERM; + + /* + * first adjust top-left corner. NOTE: it rotates counter-clockwise: + * 0 < 3 + * v ^ + * 1 > 2 + */ + if (rotation < 3) + view->tsptr += (view->height - 1) * view->v_inc; + if (rotation > 1) + view->tsptr += (view->width - 1) * view->h_inc; + + /* then rotate view itself */ + orient = view->tsptr & MASK_VIEW; + + /* rotate first 2 quarters */ + if (rotation & 2) { + orient ^= MASK_X_INVERT; + orient ^= MASK_Y_INVERT; + } + + /* rotate last quarter */ + if (rotation & 1) { + orient ^= (orient & MASK_XY_FLIP) ? + MASK_X_INVERT : MASK_Y_INVERT; + + /* swap x & y */ + orient ^= MASK_XY_FLIP; + swap(view->height, view->width); + } + + /* finally reorient view */ + reorient(view, orient); + return 0; +} +EXPORT_SYMBOL(tilview_rotate); + +s32 tilview_flip(struct tiler_view_t *view, bool flip_x, bool flip_y) +{ + u32 orient; + orient = view->tsptr & MASK_VIEW; + + if (!flip_x && !flip_y) + return 0; /* nothing to do */ + + /* PAGE mode view cannot be flipped */ + if (TILER_FMT(view->tsptr) == TILFMT_PAGE) + return -EPERM; + + /* adjust top-left corner */ + if (flip_x) + view->tsptr += (view->width - 1) * view->h_inc; + if (flip_y) + view->tsptr += (view->height - 1) * view->v_inc; + + /* flip view orientation */ + if (orient & MASK_XY_FLIP) + swap(flip_x, flip_y); + + if (flip_x) + orient ^= MASK_X_INVERT; + if (flip_y) + orient ^= MASK_Y_INVERT; + + /* finally reorient view */ + reorient(view, orient); + return 0; +} +EXPORT_SYMBOL(tilview_flip); + +/* return the alias address for a coordinate */ +static inline u32 alias_address(enum tiler_fmt fmt, u32 x, u32 y) +{ + return tiler_get_address(0, fmt, x, y) + TILVIEW_8BIT; +} + +/* get the coordinates for an alias address */ +static inline void alias_xy(u32 ssptr, u32 *x, u32 *y) +{ + tiler_get_natural_xy(ssptr & ~MASK_VIEW, x, y); +} + +/* initialize shared geometric data */ +void tiler_geom_init(struct tiler_ops *tiler) +{ + struct tiler_geom *g; + + tiler->xy = alias_xy; + tiler->addr = alias_address; + tiler->geom = get_geom; + + tiler->page = TILER_PAGE; + tiler->width = TILER_WIDTH; + tiler->height = TILER_HEIGHT; + + /* calculate geometry */ + for (g = geom; g < geom + TILER_FORMATS; g++) { + g->bpp_m = g->bpp = 1 << (g->x_shft + g->y_shft); + g->slot_w = 1 << (SLOT_WIDTH_BITS - g->x_shft); + g->slot_h = 1 << (SLOT_HEIGHT_BITS - g->y_shft); + } + + /* set bpp_m = 1 for page mode as most applications deal in byte data */ + geom[TILFMT_PAGE].bpp_m = 1; +} diff --git a/drivers/media/video/tiler/tiler-iface.c b/drivers/media/video/tiler/tiler-iface.c new file mode 100644 index 0000000..2961707 --- /dev/null +++ b/drivers/media/video/tiler/tiler-iface.c @@ -0,0 +1,273 @@ +/* + * tiler-iface.c + * + * TILER driver interace functions for TI TILER hardware block. + * + * Authors: Lajos Molnar <molnar@ti.com> + * David Sin <davidsin@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/fs.h> /* fops */ +#include <linux/uaccess.h> /* copy_to_user */ +#include <linux/slab.h> /* kmalloc */ +#include <linux/sched.h> /* current */ +#include <linux/mm.h> +#include <linux/mm_types.h> +#include <asm/mach/map.h> /* for ioremap_page */ + +#include "_tiler.h" + +static bool security = CONFIG_TILER_SECURITY; +static bool ssptr_lookup = true; +static bool offset_lookup = true; + +module_param(security, bool, 0644); +MODULE_PARM_DESC(security, + "Separate allocations by different processes into different pages"); +module_param(ssptr_lookup, bool, 0644); +MODULE_PARM_DESC(ssptr_lookup, + "Allow looking up a block by ssptr - This is a security risk"); +module_param(offset_lookup, bool, 0644); +MODULE_PARM_DESC(offset_lookup, + "Allow looking up a buffer by offset - This is a security risk"); + +static struct mutex mtx; +static struct list_head procs; /* list of process info structs */ +static struct tiler_ops *ops; /* shared methods and variables */ + +/* + * process_info handling methods + * ========================================================================== + */ + +/* get process info, and increment refs for device tracking */ +static struct process_info *__get_pi(pid_t pid, bool kernel) +{ + struct process_info *pi; + + /* + * treat all processes as the same, kernel processes are still treated + * differently so not to free kernel allocated areas when a user process + * closes the tiler driver + */ + if (!security) + pid = 0; + + /* find process context */ + mutex_lock(&mtx); + list_for_each_entry(pi, &procs, list) { + if (pi->pid == pid && pi->kernel == kernel) + goto done; + } + + /* create process context */ + pi = kmalloc(sizeof(*pi), GFP_KERNEL); + if (!pi) + goto done; + memset(pi, 0, sizeof(*pi)); + + pi->pid = pid; + pi->kernel = kernel; + INIT_LIST_HEAD(&pi->groups); + INIT_LIST_HEAD(&pi->bufs); + list_add(&pi->list, &procs); +done: + /* increment reference count */ + if (pi && !kernel) + pi->refs++; + mutex_unlock(&mtx); + return pi; +} + +/* initialize tiler interface */ +void tiler_iface_init(struct tiler_ops *tiler) +{ + ops = tiler; + ops->cleanup = NULL; + ops->fops = NULL; + +#ifdef CONFIG_TILER_SECURE + security = true; + offset_lookup = ssptr_lookup = false; +#endif + + mutex_init(&mtx); + INIT_LIST_HEAD(&procs); +} + +/* + * Kernel APIs + * ========================================================================== + */ + +u32 tiler_virt2phys(u32 usr) +{ + pmd_t *pmd; + pte_t *ptep; + pgd_t *pgd = pgd_offset(current->mm, usr); + + if (pgd_none(*pgd) || pgd_bad(*pgd)) + return 0; + + pmd = pmd_offset(pgd, usr); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + return 0; + + ptep = pte_offset_map(pmd, usr); + if (ptep && pte_present(*ptep)) + return (*ptep & PAGE_MASK) | (~PAGE_MASK & usr); + + return 0; +} +EXPORT_SYMBOL(tiler_virt2phys); + +void tiler_reservex(u32 n, enum tiler_fmt fmt, u32 width, u32 height, + u32 align, u32 offs, u32 gid, pid_t pid) +{ + struct process_info *pi = __get_pi(pid, true); + + if (pi) + ops->reserve(n, fmt, width, height, align, offs, gid, pi); +} +EXPORT_SYMBOL(tiler_reservex); + +void tiler_reserve(u32 n, enum tiler_fmt fmt, u32 width, u32 height, + u32 align, u32 offs) +{ + tiler_reservex(n, fmt, width, height, align, offs, 0, current->tgid); +} +EXPORT_SYMBOL(tiler_reserve); + +void tiler_reservex_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, + u32 gid, pid_t pid) +{ + struct process_info *pi = __get_pi(pid, true); + + if (pi) + ops->reserve_nv12(n, width, height, align, offs, gid, pi); +} +EXPORT_SYMBOL(tiler_reservex_nv12); + +void tiler_reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs) +{ + tiler_reservex_nv12(n, width, height, align, offs, 0, current->tgid); +} +EXPORT_SYMBOL(tiler_reserve_nv12); + +s32 tiler_allocx(struct tiler_block_t *blk, enum tiler_fmt fmt, + u32 align, u32 offs, u32 gid, pid_t pid) +{ + struct mem_info *mi; + struct process_info *pi; + s32 res; + + BUG_ON(!blk || blk->phys); + + pi = __get_pi(pid, true); + if (!pi) + return -ENOMEM; + + res = ops->alloc(fmt, blk->width, blk->height, align, offs, blk->key, + gid, pi, &mi); + if (mi) { + blk->phys = mi->blk.phys; + blk->id = mi->blk.id; + } + return res; +} +EXPORT_SYMBOL(tiler_allocx); + +s32 tiler_alloc(struct tiler_block_t *blk, enum tiler_fmt fmt, + u32 align, u32 offs) +{ + return tiler_allocx(blk, fmt, align, offs, 0, current->tgid); +} +EXPORT_SYMBOL(tiler_alloc); + +s32 tiler_mapx(struct tiler_block_t *blk, enum tiler_fmt fmt, u32 gid, + pid_t pid, u32 usr_addr) +{ + struct mem_info *mi; + struct process_info *pi; + s32 res; + + BUG_ON(!blk || blk->phys); + + pi = __get_pi(pid, true); + if (!pi) + return -ENOMEM; + + res = ops->map(fmt, blk->width, blk->height, blk->key, gid, pi, &mi, + usr_addr); + if (mi) { + blk->phys = mi->blk.phys; + blk->id = mi->blk.id; + } + return res; + +} +EXPORT_SYMBOL(tiler_mapx); + +s32 tiler_map(struct tiler_block_t *blk, enum tiler_fmt fmt, u32 usr_addr) +{ + return tiler_mapx(blk, fmt, 0, current->tgid, usr_addr); +} +EXPORT_SYMBOL(tiler_map); + +s32 tiler_ioremap_blk(struct tiler_block_t *blk, u32 offs, u32 size, + u32 addr, u32 mtype) +{ + u32 v, p; + u32 len; /* area to map */ + const struct mem_type *type = get_mem_type(mtype); + + /* mapping must fit into address space */ + BUG_ON(addr > addr + size); + + /* mapping must fit into block */ + BUG_ON(offs > offs + size || offs + size > tiler_size(blk)); + + v = tiler_vstride(blk); + p = tiler_pstride(blk); + + /* move offset and address to end */ + offs += blk->phys + size; + addr += size; + + len = v - (offs % v); /* initial area to map */ + while (size) { + while (len && size) { + if (ioremap_page(addr - size, offs - size, type)) + return -EAGAIN; + len -= PAGE_SIZE; + size -= PAGE_SIZE; + } + + offs += p - v; + len = v; /* subsequent area to map */ + } + return 0; +} +EXPORT_SYMBOL(tiler_ioremap_blk); + +void tiler_free(struct tiler_block_t *blk) +{ + /* find block */ + struct mem_info *mi = ops->lock(blk->key, blk->id, NULL); + if (mi) + ops->unlock_free(mi, true); + blk->phys = blk->id = 0; +} +EXPORT_SYMBOL(tiler_free); diff --git a/drivers/media/video/tiler/tiler-main.c b/drivers/media/video/tiler/tiler-main.c new file mode 100644 index 0000000..5d72db1 --- /dev/null +++ b/drivers/media/video/tiler/tiler-main.c @@ -0,0 +1,1270 @@ +/* + * tiler-main.c + * + * TILER driver main support functions for TI TILER hardware block. + * + * Authors: Lajos Molnar <molnar@ti.com> + * David Sin <davidsin@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/cdev.h> /* struct cdev */ +#include <linux/kdev_t.h> /* MKDEV() */ +#include <linux/fs.h> /* register_chrdev_region() */ +#include <linux/device.h> /* struct class */ +#include <linux/platform_device.h> /* platform_device() */ +#include <linux/err.h> /* IS_ERR() */ +#include <linux/errno.h> +#include <linux/mutex.h> +#include <linux/dma-mapping.h> /* dma_alloc_coherent */ +#include <linux/pagemap.h> /* page_cache_release() */ +#include <linux/slab.h> +#include <linux/sched.h> + +#include <mach/dmm.h> +#include "tmm.h" +#include "_tiler.h" +#include "tcm/tcm-sita.h" /* TCM algorithm */ + +static bool ssptr_id = CONFIG_TILER_SSPTR_ID; +static uint default_align = CONFIG_TILER_ALIGNMENT; +static uint granularity = CONFIG_TILER_GRANULARITY; + +/* + * We can only change ssptr_id if there are no blocks allocated, so that + * pseudo-random ids and ssptrs do not potentially clash. For now make it + * read-only. + */ +module_param(ssptr_id, bool, 0444); +MODULE_PARM_DESC(ssptr_id, "Use ssptr as block ID"); +module_param_named(align, default_align, uint, 0644); +MODULE_PARM_DESC(align, "Default block ssptr alignment"); +module_param_named(grain, granularity, uint, 0644); +MODULE_PARM_DESC(grain, "Granularity (bytes)"); + +struct tiler_dev { + struct cdev cdev; +}; + +struct platform_driver tiler_driver_ldm = { + .driver = { + .owner = THIS_MODULE, + .name = "tiler", + }, + .probe = NULL, + .shutdown = NULL, + .remove = NULL, +}; + +static struct tiler_ops tiler; /* shared methods and variables */ + +static struct list_head blocks; /* all tiler blocks */ +static struct list_head orphan_areas; /* orphaned 2D areas */ +static struct list_head orphan_onedim; /* orphaned 1D areas */ + +static s32 tiler_major; +static s32 tiler_minor; +static struct tiler_dev *tiler_device; +static struct class *tilerdev_class; +static struct mutex mtx; +static struct tcm *tcm[TILER_FORMATS]; +static struct tmm *tmm[TILER_FORMATS]; +static u32 *dmac_va; +static dma_addr_t dmac_pa; + +/* + * TMM connectors + * ========================================================================== + */ +/* wrapper around tmm_map */ +static s32 refill_pat(struct tmm *tmm, struct tcm_area *area, u32 *ptr) +{ + s32 res = 0; + struct pat_area p_area = {0}; + struct tcm_area slice, area_s; + + tcm_for_each_slice(slice, *area, area_s) { + p_area.x0 = slice.p0.x; + p_area.y0 = slice.p0.y; + p_area.x1 = slice.p1.x; + p_area.y1 = slice.p1.y; + + memcpy(dmac_va, ptr, sizeof(*ptr) * tcm_sizeof(slice)); + ptr += tcm_sizeof(slice); + + if (tmm_map(tmm, p_area, dmac_pa)) { + res = -EFAULT; + break; + } + } + + return res; +} + +/* wrapper around tmm_clear */ +static void clear_pat(struct tmm *tmm, struct tcm_area *area) +{ + struct pat_area p_area = {0}; + struct tcm_area slice, area_s; + + tcm_for_each_slice(slice, *area, area_s) { + p_area.x0 = slice.p0.x; + p_area.y0 = slice.p0.y; + p_area.x1 = slice.p1.x; + p_area.y1 = slice.p1.y; + + tmm_clear(tmm, p_area); + } +} + +/* + * ID handling methods + * ========================================================================== + */ + +/* check if an id is used */ +static bool _m_id_in_use(u32 id) +{ + struct mem_info *mi; + list_for_each_entry(mi, &blocks, global) + if (mi->blk.id == id) + return 1; + return 0; +} + +/* get an id */ +static u32 _m_get_id(void) +{ + static u32 id = 0x2d7ae; + + /* ensure noone is using this id */ + while (_m_id_in_use(id)) { + /* generate a new pseudo-random ID */ + + /* Galois LSFR: 32, 22, 2, 1 */ + id = (id >> 1) ^ (u32)((0 - (id & 1u)) & 0x80200003u); + } + + return id; +} + +/* + * gid_info handling methods + * ========================================================================== + */ + +/* get or create new gid_info object */ +static struct gid_info *_m_get_gi(struct process_info *pi, u32 gid) +{ + struct gid_info *gi; + + /* have mutex */ + + /* see if group already exist */ + list_for_each_entry(gi, &pi->groups, by_pid) { + if (gi->gid == gid) + goto done; + } + + /* create new group */ + gi = kmalloc(sizeof(*gi), GFP_KERNEL); + if (!gi) + return gi; + + memset(gi, 0, sizeof(*gi)); + INIT_LIST_HEAD(&gi->areas); + INIT_LIST_HEAD(&gi->onedim); + INIT_LIST_HEAD(&gi->reserved); + gi->pi = pi; + gi->gid = gid; + list_add(&gi->by_pid, &pi->groups); +done: + /* + * Once area is allocated, the group info's ref count will be + * decremented as the reference is no longer needed. + */ + gi->refs++; + return gi; +} + +/* free gid_info object if empty */ +static void _m_try_free_group(struct gid_info *gi) +{ + /* have mutex */ + if (gi && list_empty(&gi->areas) && list_empty(&gi->onedim) && + /* also ensure noone is still using this group */ + !gi->refs) { + BUG_ON(!list_empty(&gi->reserved)); + list_del(&gi->by_pid); + + /* if group is tracking kernel objects, we may free even + the process info */ + if (gi->pi->kernel && list_empty(&gi->pi->groups)) { + list_del(&gi->pi->list); + kfree(gi->pi); + } + + kfree(gi); + } +} + +/* --- external versions --- */ + +static struct gid_info *get_gi(struct process_info *pi, u32 gid) +{ + struct gid_info *gi; + mutex_lock(&mtx); + gi = _m_get_gi(pi, gid); + mutex_unlock(&mtx); + return gi; +} + +static void release_gi(struct gid_info *gi) +{ + mutex_lock(&mtx); + gi->refs--; + _m_try_free_group(gi); + mutex_unlock(&mtx); +} + +/* + * Area handling methods + * ========================================================================== + */ + +/* allocate an reserved area of size, alignment and link it to gi */ +/* leaves mutex locked to be able to add block to area */ +static struct area_info *area_new_m(u16 width, u16 height, u16 align, + struct tcm *tcm, struct gid_info *gi) +{ + struct area_info *ai = kmalloc(sizeof(*ai), GFP_KERNEL); + if (!ai) + return NULL; + + /* set up empty area info */ + memset(ai, 0x0, sizeof(*ai)); + INIT_LIST_HEAD(&ai->blocks); + + /* reserve an allocation area */ + if (tcm_reserve_2d(tcm, width, height, align, &ai->area)) { + kfree(ai); + return NULL; + } + + ai->gi = gi; + mutex_lock(&mtx); + list_add_tail(&ai->by_gid, &gi->areas); + return ai; +} + +/* (must have mutex) free an area */ +static inline void _m_area_free(struct area_info *ai) +{ + if (ai) { + list_del(&ai->by_gid); + kfree(ai); + } +} + +static s32 __analize_area(enum tiler_fmt fmt, u32 width, u32 height, + u16 *x_area, u16 *y_area, u16 *band, + u16 *align, u16 *offs, u16 *in_offs) +{ + /* input: width, height is in pixels, align, offs in bytes */ + /* output: x_area, y_area, band, align, offs in slots */ + + /* slot width, height, and row size */ + u32 slot_row, min_align; + const struct tiler_geom *g; + + /* width and height must be positive */ + if (!width || !height) + return -EINVAL; + + /* align must be 2 power */ + if (*align & (*align - 1)) + return -EINVAL; + + if (fmt == TILFMT_PAGE) { + /* adjust size to accomodate offset, only do page alignment */ + *align = PAGE_SIZE; + *in_offs = *offs & ~PAGE_MASK; + width += *in_offs; + + /* for 1D area keep the height (1), width is in tiler slots */ + *x_area = DIV_ROUND_UP(width, tiler.page); + *y_area = *band = 1; + + if (*x_area * *y_area > tiler.width * tiler.height) + return -ENOMEM; + return 0; + } + + /* format must be valid */ + g = tiler.geom(fmt); + if (!g) + return -EINVAL; + + /* get the # of bytes per row in 1 slot */ + slot_row = g->slot_w * g->bpp; + + /* how many slots are can be accessed via one physical page */ + *band = PAGE_SIZE / slot_row; + + /* minimum alignment is at least 1 slot. Use default if needed */ + min_align = max(slot_row, granularity); + *align = ALIGN(*align ? : default_align, min_align); + + /* align must still be 2 power (in case default_align is wrong) */ + if (*align & (*align - 1)) + return -EAGAIN; + + /* offset must be multiple of bpp */ + if (*offs & (g->bpp - 1) || *offs >= *align) + return -EINVAL; + + /* round down the offset to the nearest slot size, and increase width + to allow space for having the correct offset */ + width += (*offs & (min_align - 1)) / g->bpp; + if (in_offs) + *in_offs = *offs & (min_align - 1); + *offs &= ~(min_align - 1); + + /* expand width to block size */ + width = ALIGN(width, min_align / g->bpp); + + /* adjust to slots */ + *x_area = DIV_ROUND_UP(width, g->slot_w); + *y_area = DIV_ROUND_UP(height, g->slot_h); + *align /= slot_row; + *offs /= slot_row; + + if (*x_area > tiler.width || *y_area > tiler.height) + return -ENOMEM; + return 0; +} + +/** + * Find a place where a 2D block would fit into a 2D area of the + * same height. + * + * @author a0194118 (3/19/2010) + * + * @param w Width of the block. + * @param align Alignment of the block. + * @param offs Offset of the block (within alignment) + * @param ai Pointer to area info + * @param next Pointer to the variable where the next block + * will be stored. The block should be inserted + * before this block. + * + * @return the end coordinate (x1 + 1) where a block would fit, + * or 0 if it does not fit. + * + * (must have mutex) + */ +static u16 _m_blk_find_fit(u16 w, u16 align, u16 offs, + struct area_info *ai, struct list_head **before) +{ + int x = ai->area.p0.x + w + offs; + struct mem_info *mi; + + /* area blocks are sorted by x */ + list_for_each_entry(mi, &ai->blocks, by_area) { + /* check if buffer would fit before this area */ + if (x <= mi->area.p0.x) { + *before = &mi->by_area; + return x; + } + x = ALIGN(mi->area.p1.x + 1 - offs, align) + w + offs; + } + *before = &ai->blocks; + + /* check if buffer would fit after last area */ + return (x <= ai->area.p1.x + 1) ? x : 0; +} + +/* (must have mutex) adds a block to an area with certain x coordinates */ +static inline +struct mem_info *_m_add2area(struct mem_info *mi, struct area_info *ai, + u16 x0, u16 w, struct list_head *before) +{ + mi->parent = ai; + mi->area = ai->area; + mi->area.p0.x = x0; + mi->area.p1.x = x0 + w - 1; + list_add_tail(&mi->by_area, before); + ai->nblocks++; + return mi; +} + +static struct mem_info *get_2d_area(u16 w, u16 h, u16 align, u16 offs, u16 band, + struct gid_info *gi, struct tcm *tcm) +{ + struct area_info *ai = NULL; + struct mem_info *mi = NULL; + struct list_head *before = NULL; + u16 x = 0; /* this holds the end of a potential area */ + + /* allocate map info */ + + /* see if there is available prereserved space */ + mutex_lock(&mtx); + list_for_each_entry(mi, &gi->reserved, global) { + if (mi->area.tcm == tcm && + tcm_aheight(mi->area) == h && + tcm_awidth(mi->area) == w && + (mi->area.p0.x & (align - 1)) == offs) { + /* this area is already set up */ + + /* remove from reserved list */ + list_del(&mi->global); + goto done; + } + } + mutex_unlock(&mtx); + + /* if not, reserve a block struct */ + mi = kmalloc(sizeof(*mi), GFP_KERNEL); + if (!mi) + return mi; + memset(mi, 0, sizeof(*mi)); + + /* see if allocation fits in one of the existing areas */ + /* this sets x, ai and before */ + mutex_lock(&mtx); + list_for_each_entry(ai, &gi->areas, by_gid) { + if (ai->area.tcm == tcm && + tcm_aheight(ai->area) == h) { + x = _m_blk_find_fit(w, align, offs, ai, &before); + if (x) { + _m_add2area(mi, ai, x - w, w, before); + goto done; + } + } + } + mutex_unlock(&mtx); + + /* if no area fit, reserve a new one */ + ai = area_new_m(ALIGN(w + offs, max(band, align)), h, + max(band, align), tcm, gi); + if (ai) { + _m_add2area(mi, ai, ai->area.p0.x + offs, w, &ai->blocks); + } else { + /* clean up */ + kfree(mi); + return NULL; + } + +done: + mutex_unlock(&mtx); + return mi; +} + +/* layout reserved 2d blocks in a larger area */ +/* NOTE: band, w, h, a(lign), o(ffs) is in slots */ +static s32 lay_2d(enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band, + u16 align, u16 offs, struct gid_info *gi, + struct list_head *pos) +{ + u16 x, x0, e = ALIGN(w, align), w_res = (n - 1) * e + w; + struct mem_info *mi = NULL; + struct area_info *ai = NULL; + + printk(KERN_INFO "packing %u %u buffers into %u width\n", + n, w, w_res); + + /* calculate dimensions, band, offs and alignment in slots */ + /* reserve an area */ + ai = area_new_m(ALIGN(w_res + offs, max(band, align)), h, + max(band, align), tcm[fmt], gi); + if (!ai) + return -ENOMEM; + + /* lay out blocks in the reserved area */ + for (n = 0, x = offs; x < w_res; x += e, n++) { + /* reserve a block struct */ + mi = kmalloc(sizeof(*mi), GFP_KERNEL); + if (!mi) + break; + + memset(mi, 0, sizeof(*mi)); + x0 = ai->area.p0.x + x; + _m_add2area(mi, ai, x0, w, &ai->blocks); + list_add(&mi->global, pos); + } + + mutex_unlock(&mtx); + return n; +} + +/* layout reserved nv12 blocks in a larger area */ +/* NOTE: area w(idth), w1 (8-bit block width), h(eight) are in slots */ +/* p is a pointer to a packing description, which is a list of offsets in + the area for consecutive 8-bit and 16-bit blocks */ +static s32 lay_nv12(int n, u16 w, u16 w1, u16 h, struct gid_info *gi, u8 *p) +{ + u16 wh = (w1 + 1) >> 1, width, x0; + int m; + int a = PAGE_SIZE / tiler.geom(TILFMT_8BIT)->slot_w; + + struct mem_info *mi = NULL; + struct area_info *ai = NULL; + struct list_head *pos; + + /* reserve area */ + ai = area_new_m(w, h, a, TILFMT_8BIT, gi); + if (!ai) + return -ENOMEM; + + /* lay out blocks in the reserved area */ + for (m = 0; m < 2 * n; m++) { + width = (m & 1) ? wh : w1; + x0 = ai->area.p0.x + *p++; + + /* get insertion head */ + list_for_each(pos, &ai->blocks) { + mi = list_entry(pos, struct mem_info, by_area); + if (mi->area.p0.x > x0) + break; + } + + /* reserve a block struct */ + mi = kmalloc(sizeof(*mi), GFP_KERNEL); + if (!mi) + break; + + memset(mi, 0, sizeof(*mi)); + + _m_add2area(mi, ai, x0, width, pos); + list_add(&mi->global, &gi->reserved); + } + + mutex_unlock(&mtx); + return n; +} + +/* (must have mutex) free block and any freed areas */ +static s32 _m_free(struct mem_info *mi) +{ + struct area_info *ai = NULL; + struct page *page = NULL; + s32 res = 0; + u32 i; + + /* release memory */ + if (mi->pg_ptr) { + for (i = 0; i < mi->num_pg; i++) { + page = (struct page *)mi->pg_ptr[i]; + if (page) { + if (!PageReserved(page)) + SetPageDirty(page); + page_cache_release(page); + } + } + kfree(mi->pg_ptr); + } else if (mi->mem) { + tmm_free(tmm[tiler_fmt(mi->blk.phys)], mi->mem); + } + clear_pat(tmm[tiler_fmt(mi->blk.phys)], &mi->area); + + /* safe deletion as list may not have been assigned */ + if (mi->global.next) + list_del(&mi->global); + if (mi->by_area.next) + list_del(&mi->by_area); + + /* remove block from area first if 2D */ + if (mi->area.is2d) { + ai = mi->parent; + + /* check to see if area needs removing also */ + if (ai && !--ai->nblocks) { + res = tcm_free(&ai->area); + list_del(&ai->by_gid); + /* try to remove parent if it became empty */ + _m_try_free_group(ai->gi); + kfree(ai); + ai = NULL; + } + } else { + /* remove 1D area */ + res = tcm_free(&mi->area); + /* try to remove parent if it became empty */ + _m_try_free_group(mi->parent); + } + + kfree(mi); + return res; +} + +/* (must have mutex) returns true if block was freed */ +static bool _m_chk_ref(struct mem_info *mi) +{ + /* check references */ + if (mi->refs) + return 0; + + if (_m_free(mi)) + printk(KERN_ERR "error while removing tiler block\n"); + + return 1; +} + +/* (must have mutex) */ +static inline bool _m_dec_ref(struct mem_info *mi) +{ + if (mi->refs-- <= 1) + return _m_chk_ref(mi); + + return 0; +} + +/* (must have mutex) */ +static inline void _m_inc_ref(struct mem_info *mi) +{ + mi->refs++; +} + +/* (must have mutex) returns true if block was freed */ +static inline bool _m_try_free(struct mem_info *mi) +{ + if (mi->alloced) { + mi->refs--; + mi->alloced = false; + } + return _m_chk_ref(mi); +} + +/* --- external methods --- */ + +/* find a block by key/id and lock it */ +static struct mem_info * +find_n_lock(u32 key, u32 id, struct gid_info *gi) { + struct area_info *ai = NULL; + struct mem_info *mi = NULL; + + mutex_lock(&mtx); + + /* if group is not given, look globally */ + if (!gi) { + list_for_each_entry(mi, &blocks, global) { + if (mi->blk.key == key && mi->blk.id == id) + goto done; + } + } else { + /* is id is ssptr, we know if block is 1D or 2D by the address, + so we optimize lookup */ + if (!ssptr_id || + tiler_fmt(id) == TILFMT_PAGE) { + list_for_each_entry(mi, &gi->onedim, by_area) { + if (mi->blk.key == key && mi->blk.id == id) + goto done; + } + } + + if (!ssptr_id || + tiler_fmt(id) != TILFMT_PAGE) { + list_for_each_entry(ai, &gi->areas, by_gid) { + list_for_each_entry(mi, &ai->blocks, by_area) { + if (mi->blk.key == key && + mi->blk.id == id) + goto done; + } + } + } + } + + mi = NULL; +done: + /* lock block by increasing its ref count */ + if (mi) + mi->refs++; + + mutex_unlock(&mtx); + + return mi; +} + +/* unlock a block, and optionally free it */ +static void unlock_n_free(struct mem_info *mi, bool free) +{ + mutex_lock(&mtx); + + _m_dec_ref(mi); + if (free) + _m_try_free(mi); + + mutex_unlock(&mtx); +} + +/** + * Free all blocks in a group: + * + * allocated blocks, and unreferenced blocks. Any blocks/areas still referenced + * will move to the orphaned lists to avoid issues if a new process is created + * with the same pid. + * + * (must have mutex) + */ +static void destroy_group(struct gid_info *gi) +{ + struct area_info *ai, *ai_; + struct mem_info *mi, *mi_; + bool ai_autofreed, need2free; + + mutex_lock(&mtx); + + /* free all allocated blocks, and remove unreferenced ones */ + + /* + * Group info structs when they become empty on an _m_try_free. + * However, if the group info is already empty, we need to + * remove it manually + */ + need2free = list_empty(&gi->areas) && list_empty(&gi->onedim); + list_for_each_entry_safe(ai, ai_, &gi->areas, by_gid) { + ai_autofreed = true; + list_for_each_entry_safe(mi, mi_, &ai->blocks, by_area) + ai_autofreed &= _m_try_free(mi); + + /* save orphaned areas for later removal */ + if (!ai_autofreed) { + need2free = true; + ai->gi = NULL; + list_move(&ai->by_gid, &orphan_areas); + } + } + + list_for_each_entry_safe(mi, mi_, &gi->onedim, by_area) { + if (!_m_try_free(mi)) { + need2free = true; + /* save orphaned 1D blocks */ + mi->parent = NULL; + list_move(&mi->by_area, &orphan_onedim); + } + } + + /* if group is still alive reserved list should have been + emptied as there should be no reference on those blocks */ + if (need2free) { + BUG_ON(!list_empty(&gi->onedim)); + BUG_ON(!list_empty(&gi->areas)); + _m_try_free_group(gi); + } + + mutex_unlock(&mtx); +} + +/* release (reserved) blocks */ +static void release_blocks(struct list_head *reserved) +{ + struct mem_info *mi, *mi_; + + mutex_lock(&mtx); + + /* find block in global list and free it */ + list_for_each_entry_safe(mi, mi_, reserved, global) { + BUG_ON(mi->refs || mi->alloced); + _m_free(mi); + } + mutex_unlock(&mtx); +} + +/* add reserved blocks to a group */ +static void add_reserved_blocks(struct list_head *reserved, struct gid_info *gi) +{ + mutex_lock(&mtx); + list_splice_init(reserved, &gi->reserved); + mutex_unlock(&mtx); +} + +/* find a block by ssptr */ +static struct mem_info *find_block_by_ssptr(u32 sys_addr) +{ + struct mem_info *i; + struct tcm_pt pt; + u32 x, y; + enum tiler_fmt fmt; + const struct tiler_geom *g; + + fmt = tiler_fmt(sys_addr); + if (fmt == TILFMT_INVALID) + return NULL; + + g = tiler.geom(fmt); + + /* convert x & y pixel coordinates to slot coordinates */ + tiler.xy(sys_addr, &x, &y); + pt.x = x / g->slot_w; + pt.y = y / g->slot_h; + + mutex_lock(&mtx); + list_for_each_entry(i, &blocks, global) { + if (tiler_fmt(i->blk.phys) == tiler_fmt(sys_addr) && + tcm_is_in(pt, i->area)) { + i->refs++; + goto found; + } + } + i = NULL; + +found: + mutex_unlock(&mtx); + return i; +} + +/* find a block by ssptr */ +static void fill_block_info(struct mem_info *i, struct tiler_block_info *blk) +{ + blk->fmt = tiler_fmt(i->blk.phys); +#ifdef CONFIG_TILER_EXPOSE_SSPTR + blk->ssptr = i->blk.phys; +#endif + if (blk->fmt == TILFMT_PAGE) { + blk->dim.len = i->blk.width; + blk->group_id = ((struct gid_info *) i->parent)->gid; + } else { + blk->stride = tiler_vstride(&i->blk); + blk->dim.area.width = i->blk.width; + blk->dim.area.height = i->blk.height; + blk->group_id = ((struct area_info *) i->parent)->gi->gid; + } + blk->id = i->blk.id; + blk->key = i->blk.key; + blk->offs = i->blk.phys & ~PAGE_MASK; + blk->align = PAGE_SIZE; +} + +/* + * Block operations + * ========================================================================== + */ + +static struct mem_info *__get_area(enum tiler_fmt fmt, u32 width, u32 height, + u16 align, u16 offs, struct gid_info *gi) +{ + u16 x, y, band, in_offs = 0; + struct mem_info *mi = NULL; + const struct tiler_geom *g = tiler.geom(fmt); + + /* calculate dimensions, band, offs and alignment in slots */ + if (__analize_area(fmt, width, height, &x, &y, &band, &align, &offs, + &in_offs)) + return NULL; + + if (fmt == TILFMT_PAGE) { + /* 1D areas don't pack */ + mi = kmalloc(sizeof(*mi), GFP_KERNEL); + if (!mi) + return NULL; + memset(mi, 0x0, sizeof(*mi)); + + if (tcm_reserve_1d(tcm[fmt], x * y, &mi->area)) { + kfree(mi); + return NULL; + } + + mutex_lock(&mtx); + mi->parent = gi; + list_add(&mi->by_area, &gi->onedim); + } else { + mi = get_2d_area(x, y, align, offs, band, gi, tcm[fmt]); + if (!mi) + return NULL; + + mutex_lock(&mtx); + } + + list_add(&mi->global, &blocks); + mi->alloced = true; + mi->refs++; + gi->refs--; + mutex_unlock(&mtx); + + mi->blk.phys = tiler.addr(fmt, + mi->area.p0.x * g->slot_w, mi->area.p0.y * g->slot_h) + + in_offs; + return mi; +} + +static s32 alloc_block(enum tiler_fmt fmt, u32 width, u32 height, + u32 align, u32 offs, u32 key, u32 gid, struct process_info *pi, + struct mem_info **info) +{ + struct mem_info *mi = NULL; + struct gid_info *gi = NULL; + + *info = NULL; + + /* only support up to page alignment */ + if (align > PAGE_SIZE || offs >= (align ? : default_align) || !pi) + return -EINVAL; + + /* get group context */ + mutex_lock(&mtx); + gi = _m_get_gi(pi, gid); + mutex_unlock(&mtx); + + if (!gi) + return -ENOMEM; + + /* reserve area in tiler container */ + mi = __get_area(fmt, width, height, align, offs, gi); + if (!mi) { + mutex_lock(&mtx); + gi->refs--; + _m_try_free_group(gi); + mutex_unlock(&mtx); + return -ENOMEM; + } + + mi->blk.width = width; + mi->blk.height = height; + mi->blk.key = key; + if (ssptr_id) { + mi->blk.id = mi->blk.phys; + } else { + mutex_lock(&mtx); + mi->blk.id = _m_get_id(); + mutex_unlock(&mtx); + } + + /* allocate and map if mapping is supported */ + if (tmm_can_map(tmm[fmt])) { + mi->num_pg = tcm_sizeof(mi->area); + + mi->mem = tmm_get(tmm[fmt], mi->num_pg); + if (!mi->mem) + goto cleanup; + + /* Ensure the data reaches to main memory before PAT refill */ + wmb(); + + /* program PAT */ + if (refill_pat(tmm[fmt], &mi->area, mi->mem)) + goto cleanup; + } + *info = mi; + return 0; + +cleanup: + mutex_lock(&mtx); + _m_free(mi); + mutex_unlock(&mtx); + return -ENOMEM; + +} + +static s32 map_block(enum tiler_fmt fmt, u32 width, u32 height, + u32 key, u32 gid, struct process_info *pi, + struct mem_info **info, u32 usr_addr) +{ + u32 i = 0, tmp = -1, *mem = NULL; + u8 write = 0; + s32 res = -ENOMEM; + struct mem_info *mi = NULL; + struct page *page = NULL; + struct task_struct *curr_task = current; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = NULL; + struct gid_info *gi = NULL; + + *info = NULL; + + /* we only support mapping a user buffer in page mode */ + if (fmt != TILFMT_PAGE) + return -EPERM; + + /* check if mapping is supported by tmm */ + if (!tmm_can_map(tmm[fmt])) + return -EPERM; + + /* get group context */ + mutex_lock(&mtx); + gi = _m_get_gi(pi, gid); + mutex_unlock(&mtx); + + if (!gi) + return -ENOMEM; + + /* reserve area in tiler container */ + mi = __get_area(fmt, width, height, 0, 0, gi); + if (!mi) { + mutex_lock(&mtx); + gi->refs--; + _m_try_free_group(gi); + mutex_unlock(&mtx); + return -ENOMEM; + } + + mi->blk.width = width; + mi->blk.height = height; + mi->blk.key = key; + if (ssptr_id) { + mi->blk.id = mi->blk.phys; + } else { + mutex_lock(&mtx); + mi->blk.id = _m_get_id(); + mutex_unlock(&mtx); + } + + mi->usr = usr_addr; + + /* allocate pages */ + mi->num_pg = tcm_sizeof(mi->area); + + mem = kmalloc(mi->num_pg * sizeof(*mem), GFP_KERNEL); + if (!mem) + goto done; + memset(mem, 0x0, sizeof(*mem) * mi->num_pg); + + mi->pg_ptr = kmalloc(mi->num_pg * sizeof(*mi->pg_ptr), GFP_KERNEL); + if (!mi->pg_ptr) + goto done; + memset(mi->pg_ptr, 0x0, sizeof(*mi->pg_ptr) * mi->num_pg); + + /* + * Important Note: usr_addr is mapped from user + * application process to current process - it must lie + * completely within the current virtual memory address + * space in order to be of use to us here. + */ + down_read(&mm->mmap_sem); + vma = find_vma(mm, mi->usr); + res = -EFAULT; + + /* + * It is observed that under some circumstances, the user + * buffer is spread across several vmas, so loop through + * and check if the entire user buffer is covered. + */ + while ((vma) && (mi->usr + width > vma->vm_end)) { + /* jump to the next VMA region */ + vma = find_vma(mm, vma->vm_end + 1); + } + if (!vma) { + printk(KERN_ERR "Failed to get the vma region for " + "user buffer.\n"); + goto fault; + } + + if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) + write = 1; + + tmp = mi->usr; + for (i = 0; i < mi->num_pg; i++) { + if (get_user_pages(curr_task, mm, tmp, 1, write, 1, &page, + NULL) && page) { + if (page_count(page) < 1) { + printk(KERN_ERR "Bad page count from" + "get_user_pages()\n"); + } + mi->pg_ptr[i] = (u32)page; + mem[i] = page_to_phys(page); + tmp += PAGE_SIZE; + } else { + printk(KERN_ERR "get_user_pages() failed\n"); + goto fault; + } + } + up_read(&mm->mmap_sem); + + /* Ensure the data reaches to main memory before PAT refill */ + wmb(); + + if (refill_pat(tmm[fmt], &mi->area, mem)) + goto fault; + + res = 0; + *info = mi; + goto done; +fault: + up_read(&mm->mmap_sem); +done: + if (res) { + mutex_lock(&mtx); + _m_free(mi); + mutex_unlock(&mtx); + } + kfree(mem); + return res; +} + +/* + * Driver code + * ========================================================================== + */ + +static s32 __init tiler_init(void) +{ + dev_t dev = 0; + s32 r = -1; + struct device *device = NULL; + struct tcm_pt div_pt; + struct tcm *sita = NULL; + struct tmm *tmm_pat = NULL; + + tiler.alloc = alloc_block; + tiler.map = map_block; + tiler.lock = find_n_lock; + tiler.unlock_free = unlock_n_free; + tiler.lay_2d = lay_2d; + tiler.lay_nv12 = lay_nv12; + tiler.destroy_group = destroy_group; + tiler.lock_by_ssptr = find_block_by_ssptr; + tiler.describe = fill_block_info; + tiler.get_gi = get_gi; + tiler.release_gi = release_gi; + tiler.release = release_blocks; + tiler.add_reserved = add_reserved_blocks; + tiler.analize = __analize_area; + tiler_geom_init(&tiler); + tiler_reserve_init(&tiler); + tiler_iface_init(&tiler); + + /* check module parameters for correctness */ + if (default_align > PAGE_SIZE || + default_align & (default_align - 1) || + granularity < 1 || granularity > PAGE_SIZE || + granularity & (granularity - 1)) + return -EINVAL; + + /* + * Array of physical pages for PAT programming, which must be a 16-byte + * aligned physical address. + */ + dmac_va = dma_alloc_coherent(NULL, tiler.width * tiler.height * + sizeof(*dmac_va), &dmac_pa, GFP_ATOMIC); + if (!dmac_va) + return -ENOMEM; + + /* Allocate tiler container manager (we share 1 on OMAP4) */ + div_pt.x = tiler.width; /* hardcoded default */ + div_pt.y = (3 * tiler.height) / 4; + sita = sita_init(tiler.width, tiler.height, (void *)&div_pt); + + tcm[TILFMT_8BIT] = sita; + tcm[TILFMT_16BIT] = sita; + tcm[TILFMT_32BIT] = sita; + tcm[TILFMT_PAGE] = sita; + + /* Allocate tiler memory manager (must have 1 unique TMM per TCM ) */ + tmm_pat = tmm_pat_init(0); + tmm[TILFMT_8BIT] = tmm_pat; + tmm[TILFMT_16BIT] = tmm_pat; + tmm[TILFMT_32BIT] = tmm_pat; + tmm[TILFMT_PAGE] = tmm_pat; + + tiler.nv12_packed = tcm[TILFMT_8BIT] == tcm[TILFMT_16BIT]; + + tiler_device = kmalloc(sizeof(*tiler_device), GFP_KERNEL); + if (!tiler_device || !sita || !tmm_pat) { + r = -ENOMEM; + goto error; + } + + memset(tiler_device, 0x0, sizeof(*tiler_device)); + if (tiler_major) { + dev = MKDEV(tiler_major, tiler_minor); + r = register_chrdev_region(dev, 1, "tiler"); + } else { + r = alloc_chrdev_region(&dev, tiler_minor, 1, "tiler"); + tiler_major = MAJOR(dev); + } + + cdev_init(&tiler_device->cdev, tiler.fops); + tiler_device->cdev.owner = THIS_MODULE; + tiler_device->cdev.ops = tiler.fops; + + r = cdev_add(&tiler_device->cdev, dev, 1); + if (r) + printk(KERN_ERR "cdev_add():failed\n"); + + tilerdev_class = class_create(THIS_MODULE, "tiler"); + + if (IS_ERR(tilerdev_class)) { + printk(KERN_ERR "class_create():failed\n"); + goto error; + } + + device = device_create(tilerdev_class, NULL, dev, NULL, "tiler"); + if (device == NULL) + printk(KERN_ERR "device_create() fail\n"); + + r = platform_driver_register(&tiler_driver_ldm); + + mutex_init(&mtx); + INIT_LIST_HEAD(&blocks); + INIT_LIST_HEAD(&orphan_areas); + INIT_LIST_HEAD(&orphan_onedim); + +error: + /* TODO: error handling for device registration */ + if (r) { + kfree(tiler_device); + tcm_deinit(sita); + tmm_deinit(tmm_pat); + dma_free_coherent(NULL, tiler.width * tiler.height * + sizeof(*dmac_va), dmac_va, dmac_pa); + } + + return r; +} + +static void __exit tiler_exit(void) +{ + int i, j; + + mutex_lock(&mtx); + + /* free all process data */ + tiler.cleanup(); + + /* all lists should have cleared */ + BUG_ON(!list_empty(&blocks)); + BUG_ON(!list_empty(&orphan_onedim)); + BUG_ON(!list_empty(&orphan_areas)); + + mutex_unlock(&mtx); + + dma_free_coherent(NULL, tiler.width * tiler.height * sizeof(*dmac_va), + dmac_va, dmac_pa); + + /* close containers only once */ + for (i = TILFMT_MIN; i <= TILFMT_MAX; i++) { + /* remove identical containers (tmm is unique per tcm) */ + for (j = i + 1; j <= TILFMT_MAX; j++) + if (tcm[i] == tcm[j]) { + tcm[j] = NULL; + tmm[j] = NULL; + } + + tcm_deinit(tcm[i]); + tmm_deinit(tmm[i]); + } + + mutex_destroy(&mtx); + platform_driver_unregister(&tiler_driver_ldm); + cdev_del(&tiler_device->cdev); + kfree(tiler_device); + device_destroy(tilerdev_class, MKDEV(tiler_major, tiler_minor)); + class_destroy(tilerdev_class); +} + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Lajos Molnar <molnar@ti.com>"); +MODULE_AUTHOR("David Sin <davidsin@ti.com>"); +module_init(tiler_init); +module_exit(tiler_exit); diff --git a/drivers/media/video/tiler/tiler-reserve.c b/drivers/media/video/tiler/tiler-reserve.c new file mode 100644 index 0000000..6715d3d --- /dev/null +++ b/drivers/media/video/tiler/tiler-reserve.c @@ -0,0 +1,550 @@ +/* + * tiler-reserve.c + * + * TILER driver area reservation functions for TI TILER hardware block. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include "_tiler.h" + +static struct tiler_ops *ops; /* shared methods and variables */ +static int band_8; /* size of 8-bit band in slots */ +static int band_16; /* size of 16-bit band in slots */ + +/** + * Calculate the maximum number buffers that can be packed next to each other, + * and the area they occupy. This method is used for both 2D and NV12 packing. + * + * @author a0194118 (7/16/2010) + * + * @param o desired offset + * @param w width of one block (>0) + * @param a desired alignment + * @param b band width (each block must occupy the same number of bands) + * @param n pointer to the desired number of blocks to pack. It will be + * updated with the maximum number of blocks that can be packed. + * @param _area pointer to store total area needed + * + * @return packing efficiency (0-1024) + */ +static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area) +{ + u16 m = 0, max_n = *n; /* m is mostly n - 1 */ + u16 e = ALIGN(w, a); /* effective width of one block */ + u32 eff, best_eff = 0; /* best values */ + u16 stride = ALIGN(o + w, b); /* block stride */ + u16 area = stride; /* area needed (for m + 1 blocks) */ + + /* NOTE: block #m+1 occupies the range (o + m * e, o + m * e + w) */ + + /* see how many blocks we can pack */ + while (m < max_n && + /* blocks must fit in tiler container */ + o + m * e + w <= ops->width && + /* block stride must be correct */ + stride == ALIGN(area - o - m * e, b)) { + + m++; + eff = m * w * 1024 / area; + if (eff > best_eff) { + /* store packing for best efficiency & smallest area */ + best_eff = eff; + *n = m; + if (_area) + *_area = area; + } + /* update area */ + area = ALIGN(o + m * e + w, b); + } + + return best_eff; +} + +/* + * NV12 Reservation Functions + * + * TILER is designed so that a (w * h) * 8bit area is twice as wide as a + * (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit + * blocks is a common usecase for TILER, we optimize packing these into a + * TILER area. + * + * During reservation we want to find the most effective packing (most used area + * in the smallest overall area) + * + * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit + * blocks into separate container areas, or pack them together into same area. + */ + +/** + * Calculate effectiveness of packing. We weight total area much higher than + * packing efficiency to get the smallest overall container use. + * + * @param w width of one (8-bit) block + * @param n buffers in a packing + * @param area width of packing area + * @param n_total total number of buffers to be packed + * @return effectiveness, the higher the better + */ +static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total) +{ + return 0x10000000 - + /* weigh against total area needed (for all buffers) */ + /* 64-slots = -2048 */ + DIV_ROUND_UP(n_total, n) * area * 32 + + /* packing efficiency (0 - 1024) */ + 1024 * n * ((w * 3 + 1) >> 1) / area; +} + +/** + * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate + * areas. + * + * @author a0194118 (7/16/2010) + * + * @param o desired offset (<a) + * @param a desired alignment (>=2) + * @param w block width (>0) + * @param n number of blocks desired + * @param area pointer to store total area needed + * + * @return number of blocks that can be allocated + */ +static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area) +{ + tiler_best2pack(o, a, band_8, w, &n, area); + tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area); + *area *= 3; + return n; +} + +/* + * Specialized NV12 Reservation Algorithms + * + * We use 4 packing methods that pack nv12 blocks into the same area. Together + * these 4 methods give the optimal result for most possible input parameters. + * + * For now we pack into a 64-slot area, so that we don't have to worry about + * stride issues (all blocks get 4K stride). For some of the algorithms this + * could be true even if the area was 128. + */ + +/** + * Packing types are marked using a letter sequence, capital letters denoting + * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks. + * + * All methods have the following parameters. They also define the maximum + * number of coordinates that could potentially be packed. + * + * @param o, a, w, n offset, alignment, width, # of blocks as usual + * @param area pointer to store area needed for packing + * @param p pointer to store packing coordinates + * @return number of blocks that can be packed + */ + +/* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */ +#define MAX_A 21 +static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 x = o, u, l, m = 0; + *area = band_8; + + while (x + w < *area && m < n) { + /* current 8bit upper bound (a) is next 8bit lower bound (B) */ + l = u = (*area + x) >> 1; + + /* pack until upper bound */ + while (x + w <= u && m < n) { + /* save packing */ + BUG_ON(m + 1 >= MAX_A); + *p++ = x; + *p++ = l; + l = (*area + x + w + 1) >> 1; + x = ALIGN(x + w - o, a) + o; + m++; + } + x = ALIGN(l - o, a) + o; /* set new lower bound */ + } + return m; +} + +/* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */ +static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 m; + + /* this is a mirrored packing of method A */ + n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p); + + /* reverse packing */ + for (m = 0; m < n; m++) { + *p = *area - *p - w; + p++; + *p = *area - *p - ((w + 1) >> 1); + p++; + } + return n; +} + +/* Method B: simple layout: aAbcBdeCfgDhEFGH */ +#define MAX_B 8 +static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 e = (o + w) % a; /* end offset */ + u16 o1 = (o >> 1) % a; /* half offset */ + u16 e1 = ((o + w + 1) >> 1) % a; /* half end offset */ + u16 o2 = o1 + (a >> 2); /* 2nd half offset */ + u16 e2 = e1 + (a >> 2); /* 2nd half end offset */ + u16 m = 0; + *area = band_8; + + /* ensure 16-bit blocks don't overlap 8-bit blocks */ + + /* width cannot wrap around alignment, half block must be before block, + 2nd half can be before or after */ + if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e)) + while (o + w <= *area && m < n) { + BUG_ON(m + 1 >= MAX_B); + *p++ = o; + *p++ = o >> 1; + m++; + o += a; + } + return m; +} + +/* Method C: butterfly layout: AAbbaaBB */ +#define MAX_C 20 +static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + int m = 0; + u16 o2, e = ALIGN(w, a), i = 0, j = 0; + *area = band_8; + o2 = *area - (a - (o + w) % a) % a; /* end of last possible block */ + + m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1; + for (i = j = 0; i < m && j < n; i++, j++) { + BUG_ON(j + 1 >= MAX_C); + *p++ = o + i * e; + *p++ = (o + i * e + *area) >> 1; + if (++j < n) { + *p++ = o2 - i * e - w; + *p++ = (o2 - i * e - w) >> 1; + } + } + return j; +} + +/* Method D: for large allocation: aA or Aa */ +#define MAX_D 1 +static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 o1, w1 = (w + 1) >> 1, d; + *area = ALIGN(o + w, band_8); + + for (d = 0; n > 0 && d + o + w <= *area; d += a) { + /* try to fit 16-bit before 8-bit */ + o1 = ((o + d) % band_8) >> 1; + if (o1 + w1 <= o + d) { + *p++ = o + d; + *p++ = o1; + return 1; + } + + /* try to fit 16-bit after 8-bit */ + o1 += ALIGN(d + o + w - o1, band_16); + if (o1 + w1 <= *area) { + *p++ = o; + *p++ = o1; + return 1; + } + } + return 0; +} + +/** + * Umbrella nv12 packing method. This selects the best packings from the above + * methods. It also contains hardcoded packings for parameter combinations + * that have more efficient packings. This method provides is guaranteed to + * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large. + */ +#define MAX_ANY 21 /* must be MAX(method-MAX-s, hardcoded n-s) */ +static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing) +{ + u16 n_best, a_best, n2, a_, o_, w_; + + /* algo results (packings) */ + u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2]; + u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2]; + u8 pack_D[MAX_D * 2]; + + /* + * Hardcoded packings. They are sorted by increasing area, and then by + * decreasing n. We may not get the best efficiency if less than n + * blocks are needed as packings are not necessarily sorted in + * increasing order. However, for those n-s one of the other 4 methods + * may return the optimal packing. + */ + u8 packings[] = { + /* n=9, o=2, w=4, a=4, area=64 */ + 9, 2, 4, 4, 64, + /* 8-bit, 16-bit block coordinate pairs */ + 2, 33, 6, 35, 10, 37, 14, 39, 18, 41, + 46, 23, 50, 25, 54, 27, 58, 29, + /* o=0, w=12, a=4, n=3 */ + 3, 0, 12, 4, 64, + 0, 32, 12, 38, 48, 24, + /* end */ + 0 + }, *p = packings, *p_best = NULL, *p_end; + p_end = packings + sizeof(packings) - 1; + + /* see which method gives the best packing */ + + /* start with smallest area algorithms A, B & C, stop if we can + pack all buffers */ + n_best = nv12_A(o, a, w, n, area, pack_A); + p_best = pack_A; + if (n_best < n) { + n2 = nv12_revA(o, a, w, n, &a_best, pack_rA); + if (n2 > n_best) { + n_best = n2; + p_best = pack_rA; + *area = a_best; + } + } + if (n_best < n) { + n2 = nv12_B(o, a, w, n, &a_best, pack_B); + if (n2 > n_best) { + n_best = n2; + p_best = pack_B; + *area = a_best; + } + } + if (n_best < n) { + n2 = nv12_C(o, a, w, n, &a_best, pack_C); + if (n2 > n_best) { + n_best = n2; + p_best = pack_C; + *area = a_best; + } + } + + /* traverse any special packings */ + while (*p) { + n2 = *p++; + o_ = *p++; + w_ = *p++; + a_ = *p++; + /* stop if we already have a better packing */ + if (n2 < n_best) + break; + + /* check if this packing is satisfactory */ + if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) { + *area = *p++; + n_best = min(n2, n); + p_best = p; + break; + } + + /* skip to next packing */ + p += 1 + n2 * 2; + } + + /* + * If so far unsuccessful, check whether 8 and 16 bit blocks can be + * co-packed. This will actually be done in the end by the normal + * allocation, but we need to reserve a big-enough area. + */ + if (!n_best) { + n_best = nv12_D(o, a, w, n, area, pack_D); + p_best = NULL; + } + + /* store best packing */ + if (p_best && n_best) { + BUG_ON(n_best > MAX_ANY); + memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A)); + } + + return n_best; +} + +/* reserve nv12 blocks */ +static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, + u32 gid, struct process_info *pi) +{ + u16 w, h, band, a = align, o = offs; + struct gid_info *gi; + int res = 0, res2, i; + u16 n_t, n_s, area_t, area_s; + u8 packing[2 * MAX_ANY]; + struct list_head reserved = LIST_HEAD_INIT(reserved); + + /* adjust alignment to the largest slot width (128 bytes) */ + a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a); + + /* Check input parameters for correctness, and support */ + if (!width || !height || !n || + offs >= align || offs & 1 || + align >= PAGE_SIZE || + n > ops->width * ops->height / 2) + return; + + /* calculate dimensions, band, offs and alignment in slots */ + if (ops->analize(TILFMT_8BIT, width, height, &w, &h, &band, &a, &o, + NULL)) + return; + + /* get group context */ + gi = ops->get_gi(pi, gid); + if (!gi) + return; + + /* reserve in groups until failed or all is reserved */ + for (i = 0; i < n && res >= 0; i += res) { + /* check packing separately vs together */ + n_s = nv12_separate(o, a, w, n - i, &area_s); + if (ops->nv12_packed) + n_t = nv12_together(o, a, w, n - i, &area_t, packing); + else + n_t = 0; + + /* pack based on better efficiency */ + res = -1; + if (!ops->nv12_packed || + nv12_eff(w, n_s, area_s, n - i) > + nv12_eff(w, n_t, area_t, n - i)) { + + /* + * Reserve blocks separately into a temporary list, so + * that we can free them if unsuccessful. We need to be + * able to reserve both 8- and 16-bit blocks as the + * offsets of them must match. + */ + res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o, + gi, &reserved); + res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h, + band_16, a >> 1, o >> 1, gi, &reserved); + + if (res2 < 0 || res < 0 || res != res2) { + /* clean up */ + ops->release(&reserved); + res = -1; + } else { + /* add list to reserved */ + ops->add_reserved(&reserved, gi); + } + } + + /* if separate packing failed, still try to pack together */ + if (res < 0 && ops->nv12_packed && n_t) { + /* pack together */ + res = ops->lay_nv12(n_t, area_t, w, h, gi, packing); + } + } + + ops->release_gi(gi); +} + +/** + * We also optimize packing regular 2D areas as the auto-packing may result in + * sub-optimal efficiency. This is most pronounced if the area is wider than + * half a PAGE_SIZE (e.g. 2048 in 8-bit mode, or 1024 in 16-bit mode). + */ + +/* reserve 2d blocks */ +static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height, + u32 align, u32 offs, u32 gid, + struct process_info *pi) +{ + u32 bpt, res = 0, i; + u16 o = offs, a = align, band, w, h, n_try; + struct gid_info *gi; + const struct tiler_geom *g; + + /* Check input parameters for correctness, and support */ + if (!width || !height || !n || + align > PAGE_SIZE || offs >= align || + fmt < TILFMT_8BIT || fmt > TILFMT_32BIT) + return; + + /* tiler slot in bytes */ + g = ops->geom(fmt); + bpt = g->slot_w * g->bpp; + + /* + * For blocks narrower than half PAGE_SIZE the default allocation is + * sufficient. Also check for basic area info. + */ + if (width * g->bpp * 2 <= PAGE_SIZE || + ops->analize(fmt, width, height, &w, &h, &band, &a, &o, NULL)) + return; + + /* get group id */ + gi = ops->get_gi(pi, gid); + if (!gi) + return; + + /* reserve in groups until failed or all is reserved */ + for (i = 0; i < n && res >= 0; i += res + 1) { + /* blocks to allocate in one area */ + n_try = min(n - i, ops->width); + tiler_best2pack(offs, a, band, w, &n_try, NULL); + + res = -1; + while (n_try > 1) { + /* adjust res so we fail on 0 return value */ + res = ops->lay_2d(fmt, n_try, w, h, band, a, o, + gi, &gi->reserved) - 1; + if (res >= 0) + break; + + /* reduce n if failed to allocate area */ + n_try--; + } + } + /* keep reserved blocks even if failed to reserve all */ + + ops->release_gi(gi); +} + +/* unreserve blocks for a group id */ +static void unreserve_blocks(u32 gid, struct process_info *pi) +{ + struct gid_info *gi; + + gi = ops->get_gi(pi, gid); + if (!gi) + return; + + ops->release(&gi->reserved); + + ops->release_gi(gi); +} + +/* initialize shared method pointers and global static variables */ +void tiler_reserve_init(struct tiler_ops *tiler) +{ + ops = tiler; + + ops->reserve_nv12 = reserve_nv12; + ops->reserve = reserve_blocks; + ops->unreserve = unreserve_blocks; + + band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w + / ops->geom(TILFMT_8BIT)->bpp; + band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w + / ops->geom(TILFMT_16BIT)->bpp; +} |