diff options
Diffstat (limited to 'drivers/media')
29 files changed, 8680 insertions, 0 deletions
diff --git a/drivers/media/Kconfig b/drivers/media/Kconfig index 6995940..74383b1 100644 --- a/drivers/media/Kconfig +++ b/drivers/media/Kconfig @@ -114,4 +114,6 @@ source "drivers/media/radio/Kconfig" source "drivers/media/dvb/Kconfig" +source "drivers/media/video/tiler/Kconfig" + endif # MEDIA_SUPPORT diff --git a/drivers/media/video/Kconfig b/drivers/media/video/Kconfig index bb53de7..c474ea7 100644 --- a/drivers/media/video/Kconfig +++ b/drivers/media/video/Kconfig @@ -573,6 +573,8 @@ source "drivers/media/video/davinci/Kconfig" source "drivers/media/video/omap/Kconfig" +source "drivers/media/video/omapgfx/Kconfig" + source "drivers/media/video/bt8xx/Kconfig" config VIDEO_PMS diff --git a/drivers/media/video/Makefile b/drivers/media/video/Makefile index f0fecd6..169d259 100644 --- a/drivers/media/video/Makefile +++ b/drivers/media/video/Makefile @@ -183,6 +183,8 @@ obj-$(CONFIG_VIDEO_IR_I2C) += ir-kbd-i2c.o obj-y += davinci/ obj-$(CONFIG_ARCH_OMAP) += omap/ +obj-$(CONFIG_TI_TILER) += tiler/ +obj-$(CONFIG_VIDEO_OMAP_GFX) += omapgfx/ EXTRA_CFLAGS += -Idrivers/media/dvb/dvb-core EXTRA_CFLAGS += -Idrivers/media/dvb/frontends diff --git a/drivers/media/video/omapgfx/Kbuild b/drivers/media/video/omapgfx/Kbuild new file mode 100644 index 0000000..3ba7b68 --- /dev/null +++ b/drivers/media/video/omapgfx/Kbuild @@ -0,0 +1,6 @@ + +gfx_vout_mod-objs := gfx_init.o gfx_io.o gfx_bc.o gfx_tiler.o + +obj-$(CONFIG_VIDEO_OMAP_GFX) += gfx_vout_mod.o + +EXTRA_CFLAGS += -Idrivers/gpu/pvr diff --git a/drivers/media/video/omapgfx/Kconfig b/drivers/media/video/omapgfx/Kconfig new file mode 100644 index 0000000..1d16d8a --- /dev/null +++ b/drivers/media/video/omapgfx/Kconfig @@ -0,0 +1,8 @@ +config VIDEO_OMAP_GFX + tristate "OMAP V4L2-GFX driver" + select VIDEOBUF_GEN + select OMAP2_DSS + depends on VIDEO_DEV && (ARCH_OMAP34XX || ARCH_OMAP4) + default m + ---help--- + V4L2 GFX support for OMAP based boards. diff --git a/drivers/media/video/omapgfx/gfx_bc.c b/drivers/media/video/omapgfx/gfx_bc.c new file mode 100644 index 0000000..619d5d3 --- /dev/null +++ b/drivers/media/video/omapgfx/gfx_bc.c @@ -0,0 +1,494 @@ +/* + * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/ + * Copyright(c) 2008 Imagination Technologies Ltd. All rights reserved. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + * + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/slab.h> +#include <linux/delay.h> + +#define LINUX /* Needed by IMG headers */ +#include "pvrmodule.h" +#include "img_defs.h" +#include "servicesext.h" +#include "kernelbuffer.h" +#include "gfx_bc.h" +#include "v4gfx.h" + +#define DEVICE_COUNT 1 + +#define BCLOGNM "v4l2-gfx bc: " + +#define BCERR(fmt, arg...) printk(KERN_ERR BCLOGNM fmt, ## arg) + +#define BCLOG(fmt, arg...) \ +do { \ + if (debug >= 1) \ + printk(KERN_INFO BCLOGNM fmt, ## arg); \ +} while (0) + + +struct bc_buffer { + u32 size; + unsigned long *paddrp; /* physical addr. array */ + PVRSRV_SYNC_DATA *pvr_sync_data; +}; + +struct gfx_bc_devinfo { + struct bc_buffer bc_buf[VIDEO_MAX_FRAME]; + int ref; + int num_bufs; + int ref_cnt; + + /* PVR data types */ + IMG_UINT32 pvr_id; + BUFFER_INFO pvr_bcinfo; + PVRSRV_BC_SRV2BUFFER_KMJTABLE pvr_s2b_jt; +}; + +static struct gfx_bc_devinfo *g_devices[DEVICE_COUNT] = { NULL }; +static PVRSRV_BC_BUFFER2SRV_KMJTABLE pvr_b2s_jt; /* Jump table from driver to SGX */ + +/* + * Service to Buffer Device API - this section covers the entry points from + * the SGX kernel services to this driver + */ +static PVRSRV_ERROR s2b_open_bc_device(IMG_UINT32 ui32DeviceID, + IMG_HANDLE *hdevicep) +{ + struct gfx_bc_devinfo *devinfo; + + BCLOG("+%s %d\n", __func__, (int)ui32DeviceID); + +#ifdef MULTIPLEBUFFERCLASSDEVICESUPPORTED + if (ui32DeviceID >= DEVICE_COUNT) { + BCERR("Attempting to open device %d, max device id is %d\n", + ui32DeviceID, DEVICE_COUNT-1); + return -EINVAL; + + } + devinfo = g_devices[ui32DeviceID]; +#else + devinfo = g_devices[0]; +#endif + *hdevicep = (IMG_HANDLE)devinfo; + return PVRSRV_OK; +} + +static PVRSRV_ERROR s2b_close_bc_device(IMG_UINT32 ui32DeviceID, + IMG_HANDLE hdevice) +{ + PVR_UNREFERENCED_PARAMETER(hdevice); + return PVRSRV_OK; +} + +static PVRSRV_ERROR s2b_get_bc_buffer(IMG_HANDLE hdevice, + IMG_UINT32 bufno, + PVRSRV_SYNC_DATA *pvr_sync_data, + IMG_HANDLE *hbufferp) +{ + struct gfx_bc_devinfo *devinfo; + BCLOG("+%s\n", __func__); + + if (!hdevice || !hbufferp) + return PVRSRV_ERROR_INVALID_PARAMS; + + devinfo = (struct gfx_bc_devinfo *) hdevice; + + if (bufno < devinfo->pvr_bcinfo.ui32BufferCount) { + devinfo->bc_buf[bufno].pvr_sync_data = pvr_sync_data; + *hbufferp = (IMG_HANDLE) &devinfo->bc_buf[bufno]; + + } else { + return PVRSRV_ERROR_INVALID_PARAMS; + } + + return PVRSRV_OK; +} + +static PVRSRV_ERROR s2b_get_bc_info(IMG_HANDLE hdevice, BUFFER_INFO *bcinfop) +{ + struct gfx_bc_devinfo *devinfo = NULL; + int rv = 0; + + if (!hdevice || !bcinfop) { + rv = PVRSRV_ERROR_INVALID_PARAMS; + } else { + devinfo = (struct gfx_bc_devinfo *) hdevice; + *bcinfop = devinfo->pvr_bcinfo; + + BCLOG("ui32BufferCount =%d", + (int)devinfo->pvr_bcinfo.ui32BufferCount); + BCLOG("pixelformat =%d", + (int)devinfo->pvr_bcinfo.pixelformat); + BCLOG("ui32Width =%d", + (int)devinfo->pvr_bcinfo.ui32Width); + BCLOG("ui32Height =%d", + (int)devinfo->pvr_bcinfo.ui32Height); + BCLOG("ui32ByteStride =%d", + (int)devinfo->pvr_bcinfo.ui32ByteStride); + BCLOG("ui32BufferDeviceID =%d", + (int)devinfo->pvr_bcinfo.ui32BufferDeviceID); + BCLOG("ui32Flags = %d", + (int)devinfo->pvr_bcinfo.ui32Flags); + + } + BCLOG("-%s %d (0x%x)\n", __func__, rv, (int)devinfo); + return PVRSRV_OK; +} + +static PVRSRV_ERROR s2b_get_buffer_addr(IMG_HANDLE hdevice, + IMG_HANDLE hbuffer, + IMG_SYS_PHYADDR **sysaddrpp, + IMG_UINT32 *sizebytesp, + IMG_VOID **cpuvaddrpp, + IMG_HANDLE *osmapinfop, + IMG_BOOL *iscontiguousp, + IMG_UINT32 *pui32TilingStride) +{ + struct bc_buffer *bc_buf; + PVRSRV_ERROR rv = PVRSRV_OK; + BCLOG("+%s\n", __func__); + + if (!hdevice || !hbuffer || !sysaddrpp || !sizebytesp) + return PVRSRV_ERROR_INVALID_PARAMS; + + bc_buf = (struct bc_buffer *)hbuffer; + *cpuvaddrpp = NULL; + *sizebytesp = bc_buf->size; + + if (bc_buf->paddrp) { + *iscontiguousp = IMG_FALSE; + *sysaddrpp = (IMG_SYS_PHYADDR *)bc_buf->paddrp; + *osmapinfop = IMG_NULL; + *pui32TilingStride = 0; + + BCLOG("+%s paddrp[0] 0x%x, vaddr = 0x%x, sizebytes = %d", + __func__, (int)bc_buf->paddrp[0], + (int)*cpuvaddrpp, (int)*sizebytesp); + + } else { + rv = PVRSRV_ERROR_NOT_SUPPORTED; + } + return rv; +} + +/* + * Rest of the functions + */ +static PVRSRV_PIXEL_FORMAT v4l2_to_pvr_pixfmt(u32 v4l2pixelfmt) +{ + PVRSRV_PIXEL_FORMAT pvr_fmt; + + switch (v4l2pixelfmt) { + case V4L2_PIX_FMT_RGB565: + pvr_fmt = PVRSRV_PIXEL_FORMAT_RGB565; + break; + case V4L2_PIX_FMT_RGB32: + pvr_fmt = PVRSRV_PIXEL_FORMAT_RGB888; + break; + case V4L2_PIX_FMT_YUYV: + pvr_fmt = PVRSRV_PIXEL_FORMAT_FOURCC_ORG_YUYV; + break; + case V4L2_PIX_FMT_UYVY: + pvr_fmt = PVRSRV_PIXEL_FORMAT_FOURCC_ORG_UYVY; + break; + case V4L2_PIX_FMT_NV12: + pvr_fmt = PVRSRV_PIXEL_FORMAT_NV12; + break; + default: + pvr_fmt = PVRSRV_PIXEL_FORMAT_UNKNOWN; + } + return pvr_fmt; +} + +static int gfx_bc_release_device_resources(int id) +{ + struct gfx_bc_devinfo *devinfo; + + devinfo = g_devices[id]; + if (devinfo == NULL) + return -ENOENT; + + if (!devinfo->num_bufs) + return 0; + + devinfo->num_bufs = 0; + devinfo->pvr_bcinfo.pixelformat = PVRSRV_PIXEL_FORMAT_UNKNOWN; + devinfo->pvr_bcinfo.ui32Width = 0; + devinfo->pvr_bcinfo.ui32Height = 0; + devinfo->pvr_bcinfo.ui32ByteStride = 0; + devinfo->pvr_bcinfo.ui32BufferDeviceID = id; + devinfo->pvr_bcinfo.ui32Flags = 0; + devinfo->pvr_bcinfo.ui32BufferCount = 0; + + return 0; +} + +static int gfx_bc_register(int id) +{ + struct gfx_bc_devinfo *devinfo; + int rv = 0; + BCLOG("+%s\n", __func__); + + devinfo = g_devices[id]; + + if (devinfo) { + devinfo->ref_cnt++; + BCLOG("%s device already registered\n", __func__); + rv = 0; + goto end; + } + + devinfo = (struct gfx_bc_devinfo *) + kzalloc(sizeof(*devinfo), GFP_KERNEL); + if (!devinfo) { + rv = -ENOMEM; + goto end; + } + BCLOG("%s devinfo id=%d addr=0x%x\n", __func__, id, (int)devinfo); + + devinfo->pvr_bcinfo.pixelformat = PVRSRV_PIXEL_FORMAT_UNKNOWN; + devinfo->pvr_bcinfo.ui32Width = 0; + devinfo->pvr_bcinfo.ui32Height = 0; + devinfo->pvr_bcinfo.ui32ByteStride = 0; + devinfo->pvr_bcinfo.ui32BufferDeviceID = id; + devinfo->pvr_bcinfo.ui32Flags = 0; + devinfo->pvr_bcinfo.ui32BufferCount = devinfo->num_bufs; + + devinfo->pvr_s2b_jt.ui32TableSize = + sizeof(PVRSRV_BC_SRV2BUFFER_KMJTABLE); + devinfo->pvr_s2b_jt.pfnOpenBCDevice = s2b_open_bc_device; + devinfo->pvr_s2b_jt.pfnCloseBCDevice = s2b_close_bc_device; + devinfo->pvr_s2b_jt.pfnGetBCBuffer = s2b_get_bc_buffer; + devinfo->pvr_s2b_jt.pfnGetBCInfo = s2b_get_bc_info; + devinfo->pvr_s2b_jt.pfnGetBufferAddr = s2b_get_buffer_addr; + + if (pvr_b2s_jt.pfnPVRSRVRegisterBCDevice(&devinfo->pvr_s2b_jt, + &devinfo->pvr_id) != PVRSRV_OK) { + BCLOG("RegisterBCDevice failed\n"); + rv = -EIO; + goto end; + } + + BCLOG("my device id: %d\n", (int)devinfo->pvr_id); + + devinfo->ref_cnt++; + g_devices[id] = devinfo; +end: + BCLOG("-%s [%d]\n", __func__, rv); + return rv; +} + +static int gfx_bc_unregister(int id) +{ + int rv = 0; + struct gfx_bc_devinfo *devinfo; + + devinfo = g_devices[id]; + if (devinfo == NULL) { + rv = -ENODEV; + goto end; + } + + devinfo->ref_cnt--; + + if (devinfo->ref_cnt) { + rv = -EAGAIN; + goto end; + } + + if (pvr_b2s_jt.pfnPVRSRVRemoveBCDevice(devinfo->pvr_id) != PVRSRV_OK) { + rv = -EIO; + goto end; + } + + kfree(devinfo); + g_devices[id] = NULL; + +end: + return rv; +} + +#define FIELDCOPY(dst, src, field) { (dst)->field = (src)->field; } + +#define BC_BUF_PARAMS_COPY(dst, src) { \ + FIELDCOPY(dst, src, count); \ + FIELDCOPY(dst, src, width); \ + FIELDCOPY(dst, src, height); \ + FIELDCOPY(dst, src, pixel_fmt); \ + FIELDCOPY(dst, src, stride); \ + FIELDCOPY(dst, src, size); \ + } + +static void gfx_bc_params2_to_common(struct bc_buf_params2 *p, + struct bc_buf_params_common *pc) +{ + BC_BUF_PARAMS_COPY(pc, p); +} + +/* + * Validate the bc_buf_params and get the PVR pixel format + * + * We shouldn't need to do any further validation of the V4L2 pixelformat + * properties as this should have been taken care of in the appropriate V4L2 + * ioctl handlers. + */ +static int gfx_bc_validateparams( + int id, + struct bc_buf_params_common *p, + struct gfx_bc_devinfo **devinfop, + PVRSRV_PIXEL_FORMAT *pvr_pix_fmtp) +{ + struct gfx_bc_devinfo *devinfo; + int rv = 0; + + devinfo = g_devices[id]; + if (devinfo == NULL) { + BCLOG("%s: no such device %d", __func__, id); + rv = -ENODEV; + } + + /* validate a series of params */ + if (p->count <= 0) { + BCLOG("%s: invalid count", __func__); + rv = -EINVAL; + } + + *pvr_pix_fmtp = v4l2_to_pvr_pixfmt(p->pixel_fmt); + if (*pvr_pix_fmtp == PVRSRV_PIXEL_FORMAT_UNKNOWN) { + BCLOG("%s: invalid pixel format", __func__); + rv = -EINVAL; + } + + *devinfop = rv != 0 ? NULL : devinfo; + return rv; +} + +/* + * API for the V4L2 component + */ +int bc_init(void) +{ + int id, rv; + BCLOG("+%s\n", __func__); + + if (!PVRGetBufferClassJTable(&pvr_b2s_jt)) { + BCERR("no jump table to SGX APIs\n"); + rv = -EIO; + goto end; + } + + for (id = 0; id < DEVICE_COUNT; id++) { + rv = gfx_bc_register(id); + if (rv != 0) { + BCERR("can't register BC service\n"); + goto end; + } + } + +end: + BCLOG("-%s [%d]\n", __func__, rv); + return rv; +} + +void bc_cleanup(void) +{ + int id; + for (id = 0; id < DEVICE_COUNT; id++) { + if (gfx_bc_release_device_resources(id) != 0) + BCERR("can't b/c device resources: %d\n", id); + if (gfx_bc_unregister(id) != 0) + BCERR("can't un-register BC service\n"); + } +} + +int bc_setup_complete(int id, struct bc_buf_params2 *p) +{ + /* Fn called after successful bc_setup() so id should be valid */ + struct gfx_bc_devinfo *devinfo = g_devices[id]; + if (p->count != devinfo->num_bufs) { + BCLOG("+%s: Count doesn't match\n", __func__); + return -ENODEV; + } + return 0; +} + +int bc_setup_buffer(int id, struct bc_buf_params2 *p, unsigned long *paddrp) +{ + int idx; + /* Fn called after successful bc_setup() so id should be valid */ + struct gfx_bc_devinfo *devinfo = g_devices[id]; + idx = devinfo->num_bufs; + if (unlikely(idx >= VIDEO_MAX_FRAME)) + return -ENOENT; + + devinfo->num_bufs++; + devinfo->pvr_bcinfo.ui32BufferCount = devinfo->num_bufs; + + memset(&devinfo->bc_buf[idx], 0, sizeof(devinfo->bc_buf[idx])); + devinfo->bc_buf[idx].paddrp = paddrp; + devinfo->bc_buf[idx].size = p->size; + devinfo->bc_buf[idx].pvr_sync_data = IMG_NULL; + return 0; +} + +int bc_setup(int id, struct bc_buf_params2 *p) +{ + struct gfx_bc_devinfo *devinfo; + int rv = 0; + PVRSRV_PIXEL_FORMAT pvr_pix_fmt; + struct bc_buf_params_common pc; + + BCLOG("+%s\n", __func__); + + gfx_bc_params2_to_common(p, &pc); + rv = gfx_bc_validateparams(id, &pc, &devinfo, &pvr_pix_fmt); + if (rv != 0) + goto end; + + p->stride = 4096; /* Tiler stride */ + p->size = p->height * p->stride; + if (p->pixel_fmt == V4L2_PIX_FMT_NV12) + p->size += (p->height / 2) * p->stride; /* UV size */ + + devinfo->num_bufs = 0; /* See bc_setup_buffer */ + + devinfo->pvr_bcinfo.pixelformat = pvr_pix_fmt; + devinfo->pvr_bcinfo.ui32Width = p->width; + devinfo->pvr_bcinfo.ui32Height = p->height; + devinfo->pvr_bcinfo.ui32ByteStride = p->stride; + devinfo->pvr_bcinfo.ui32BufferDeviceID = id; + /* I'm not 100% sure these flags are right but here goes */ + devinfo->pvr_bcinfo.ui32Flags = + PVRSRV_BC_FLAGS_YUVCSC_FULL_RANGE | + PVRSRV_BC_FLAGS_YUVCSC_BT601; + + BCLOG("buffers: count=%d, w=%d, h=%d, stride=%d, sz=%d fmt=%d\n", + p->count, p->width, p->height, p->stride, p->size, pvr_pix_fmt); +end: + BCLOG("-%s [%d]\n", __func__, rv); + return rv; +} + +/* + * The caller of this API will ensure that the arguments are valid + */ +int bc_sync_status(int id, int bufidx) +{ + struct gfx_bc_devinfo *devinfo = g_devices[id]; + int ui32ReadOpsPending, ui32ReadOpsComplete; + + ui32ReadOpsPending = + devinfo->bc_buf[bufidx].pvr_sync_data->ui32ReadOpsPending; + ui32ReadOpsComplete = + devinfo->bc_buf[bufidx].pvr_sync_data->ui32ReadOpsComplete; + + return ui32ReadOpsComplete == ui32ReadOpsPending ? 1 : 0; +} + diff --git a/drivers/media/video/omapgfx/gfx_bc.h b/drivers/media/video/omapgfx/gfx_bc.h new file mode 100644 index 0000000..ea2bf22 --- /dev/null +++ b/drivers/media/video/omapgfx/gfx_bc.h @@ -0,0 +1,76 @@ +/********************************************************************** + * + * Copyright (C) 2010 Texas Instruments Incorporated - http://www.ti.com/ + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + * + ******************************************************************************/ + +#ifndef __V4L2_GFX_BC_H__ +#define __V4L2_GFX_BC_H__ + +#include <media/v4l2-dev.h> + +struct bc_buf_params_common { + int count; /*number of buffers */ + int width; /*buffer width in pixel, multiple of 32 */ + int height; /*buffer height in pixel */ + u32 pixel_fmt; /* V4L2 buffer pixel format */ + int stride; + int size; +}; + +struct bc_buf_params { + int count; /*number of buffers (in) */ + int width; /*buffer width in pixel, multiple of 32 (in) */ + int height; /*buffer height in pixel (in) */ + u32 pixel_fmt; /* V4L2 buffer pixel format (in) */ + int stride; /*(out) */ + int size; /*(out */ +}; + +struct bc_buf_params2 { + int count; /*number of buffers (in) */ + int width; /*buffer width in pixel, multiple of 32 (in) */ + int height; /*buffer height in pixel (in) */ + u32 pixel_fmt; /* V4L2 buffer pixel format (in) */ + int stride; /*(in) */ + int size; /*(out */ +}; +extern int bc_init(void); +extern void bc_cleanup(void); + +/* bc_setup + * + * This API will validate the buffer parameters in order to setup a + * buffer class device. Buffers should be added with subsequent calls to + * bc_setup_buffer() + */ +extern int bc_setup(int id, struct bc_buf_params2 *p); + +/* bc_setup_buffer + * + * Only called after a successful bc_setup(), add a physical buffer reference + * to this device + */ +extern int bc_setup_buffer( + int id, struct bc_buf_params2 *p, unsigned long *paddr); + +/* bc_setup_complete + * + * Called after all physical buffers have been added to the device + */ +extern int bc_setup_complete(int id, struct bc_buf_params2 *p); + +/* bc_sync_status + * + * Return the synchronization status of this devices buffer + * + * Return values: + * 0 SGX still has pending operations on the buffer + * 1 SGX done with the buffer + */ +extern int bc_sync_status(int id, int bufidx); +#endif diff --git a/drivers/media/video/omapgfx/gfx_init.c b/drivers/media/video/omapgfx/gfx_init.c new file mode 100644 index 0000000..14ee80f --- /dev/null +++ b/drivers/media/video/omapgfx/gfx_init.c @@ -0,0 +1,297 @@ +/* + * drivers/media/video/omap/v4gfx.c + * + * Copyright (C) 2010 Texas Instruments. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/vmalloc.h> +#include <linux/types.h> +#include <linux/platform_device.h> +#include <linux/version.h> + +#include <linux/omap_v4l2_gfx.h> /* private ioctls */ + +#include <media/v4l2-ioctl.h> + +#include "v4gfx.h" +#include "gfx_bc.h" + +MODULE_AUTHOR("Texas Instruments."); +MODULE_DESCRIPTION("OMAP V4L2 GFX driver"); +MODULE_LICENSE("GPL"); + +/* + * Device node will be: /dev/video<VOUT_DEVICENODE_SUFFIX> + * See also /sys/devices/virtual/video4linux/<node>/name which will be + * whatever the value of VOUT_NAME is + */ +#define VOUT_DEVICENODE_SUFFIX 100 + +static struct gbl_v4gfx *gbl_dev; + +int debug; /* is used outside this compilation unit too */ +module_param(debug, int, 0644); + +/* + * If bypass is set then buffer streaming operations will be bypassed. This + * enables us to check what the raw performance of stack above the V4L2 + * driver is + */ +static int bypass; +module_param(bypass, int, 0644); + + +static int bypass_vidioc_qbuf( + struct file *file, void *fh, struct v4l2_buffer *buf) { return 0; } + +static int bypass_vidioc_dqbuf( + struct file *file, void *fh, struct v4l2_buffer *buf) { return 0; } + +static int bypass_vidioc_streamon( + struct file *file, void *fh, enum v4l2_buf_type i) { return 0; } + +static int bypass_vidioc_streamoff( + struct file *file, void *fh, enum v4l2_buf_type i) { return 0; } + +static long bypass_vidioc_default( + struct file *file, void *fh, int cmd, void *arg) +{ + struct v4l2_gfx_buf_params *parms = (struct v4l2_gfx_buf_params *)arg; + int rv = 0; + + switch (cmd) { + case V4L2_GFX_IOC_CONSUMER: + break; + case V4L2_GFX_IOC_ACQ: + /* In bypass mode default the first buffer */ + parms->bufid = 0; + break; + case V4L2_GFX_IOC_REL: + break; + default: + rv = -EINVAL; + } + return rv; +} + +/* + * If the module is put in bypass mode the following ioctls + * are effectively nops + */ +static void v4gfx_enable_bypass(void) +{ + v4gfx_ioctl_ops.vidioc_qbuf = bypass_vidioc_qbuf; + v4gfx_ioctl_ops.vidioc_dqbuf = bypass_vidioc_dqbuf; + v4gfx_ioctl_ops.vidioc_streamon = bypass_vidioc_streamon; + v4gfx_ioctl_ops.vidioc_streamoff = bypass_vidioc_streamoff; + v4gfx_ioctl_ops.vidioc_default = bypass_vidioc_default; +} + +static void v4gfx_cleanup_device(struct v4gfx_device *vout) +{ + struct video_device *vfd; + + if (!vout) + return; + vfd = vout->vfd; + + if (vfd) { + if (vfd->minor == -1) { + /* + * The device was never registered, so release the + * video_device struct directly. + */ + video_device_release(vfd); + } else { + /* + * The unregister function will release the video_device + * struct as well as unregistering it. + */ + video_unregister_device(vfd); + } + } + + v4gfx_tiler_buffer_free(vout, vout->buffer_allocated, 0); + kfree(vout); +} + +static int driver_remove(struct platform_device *pdev) +{ + struct v4l2_device *v4l2_dev = platform_get_drvdata(pdev); + struct gbl_v4gfx *dev = container_of(v4l2_dev, struct + gbl_v4gfx, v4l2_dev); + int k; + + v4l2_device_unregister(v4l2_dev); + for (k = 0; k < pdev->num_resources; k++) + v4gfx_cleanup_device(dev->vouts[k]); + + kfree(gbl_dev); + return 0; +} + +static int driver_probe(struct platform_device *pdev) +{ + printk(KERN_INFO "Probing: " VOUT_NAME); + return 0; +} + +static int v4gfx_create_instance(struct v4gfx_device **voutp) +{ + int r = 0; + struct v4gfx_device *vout = NULL; + struct video_device *vfd = NULL; + + vout = kzalloc(sizeof(struct v4gfx_device), GFP_KERNEL); + if (vout == NULL) { + r = -ENOMEM; + goto end; + } + mutex_init(&vout->lock); + spin_lock_init(&vout->vbq_lock); + /* TODO set this to an invalid value, need to change unit test though */ + vout->bpp = RGB565_BPP; + vout->gbl_dev = gbl_dev; + vout->type = V4L2_BUF_TYPE_VIDEO_OUTPUT; + + init_timer(&vout->acquire_timer); + vout->acquire_timer.function = v4gfx_acquire_timer; + vout->acquire_timer.data = (unsigned long)vout; + + init_waitqueue_head(&vout->sync_done); + init_waitqueue_head(&vout->consumer_wait); + + vfd = vout->vfd = video_device_alloc(); + if (!vfd) + goto end; + + strlcpy(vfd->name, VOUT_NAME, sizeof(vfd->name)); + vfd->vfl_type = VFL_TYPE_GRABBER; + vfd->release = video_device_release; + vfd->ioctl_ops = &v4gfx_ioctl_ops; + vfd->fops = &v4gfx_fops; + vfd->minor = -1; + vfd->debug = debug; + + r = video_register_device(vfd, VFL_TYPE_GRABBER, + VOUT_DEVICENODE_SUFFIX); + if (r < 0) + goto end; + + video_set_drvdata(vfd, vout); + + *voutp = vout; + printk(KERN_INFO VOUT_NAME ":video device registered\n"); + return 0; +end: + + if (vfd) + video_device_release(vfd); + + kfree(vout); /* safe with null vout */ + + return r; +} + +static void v4gfx_delete_instance( + struct v4l2_device *v4l2_dev, struct v4gfx_device *vout) +{ + v4l2_info(v4l2_dev, "unregistering /dev/video%d\n", vout->vfd->num); + video_unregister_device(vout->vfd); + v4gfx_buffer_array_free(vout, vout->buffer_allocated); + kfree(vout); + return; +} + +static struct platform_driver v4gfx_driver = { + .driver = { + .name = VOUT_NAME, + }, + .probe = driver_probe, + .remove = driver_remove, +}; + +static int module_init_v4gfx(void) +{ + int rv; + bool v4l2_dev_registered = false; + bool bc_dev_registered = false; + + if (bypass) { + printk(KERN_INFO VOUT_NAME ":Enable bypass mode\n"); + v4gfx_enable_bypass(); + } + + rv = platform_driver_register(&v4gfx_driver); + if (rv != 0) { + printk(KERN_ERR VOUT_NAME ":platform_driver_register failed\n"); + goto end; + } + + gbl_dev = kzalloc(sizeof(struct gbl_v4gfx), GFP_KERNEL); + if (gbl_dev == NULL) { + rv = -ENOMEM; + goto end; + } + + snprintf(gbl_dev->v4l2_dev.name, sizeof(gbl_dev->v4l2_dev.name), + "%s-%03d", VOUT_NAME, VOUT_DEVICENODE_SUFFIX); + + rv = v4l2_device_register(NULL, &gbl_dev->v4l2_dev); + if (rv != 0) { + printk(KERN_ERR VOUT_NAME ":v4l2_device_register failed\n"); + goto end; + } + v4l2_dev_registered = true; + + rv = v4gfx_create_instance(&gbl_dev->vouts[0]); + if (rv != 0) + goto end; + + rv = bc_init(); + if (rv != 0) + goto end; + + bc_dev_registered = true; + + printk(KERN_INFO VOUT_NAME ":OMAP V4L2 GFX driver loaded ok\n"); + return rv; +end: + printk(KERN_INFO VOUT_NAME ":Error %d loading OMAP V4L2 GFX driver\n", + rv); + + if (bc_dev_registered) + bc_cleanup(); + + if (v4l2_dev_registered) + v4l2_device_unregister(&gbl_dev->v4l2_dev); + + kfree(gbl_dev); /* gbl_dev can be null */ + + return rv; +} + +static void module_exit_v4gfx(void) +{ + bc_cleanup(); + + v4gfx_delete_instance(&gbl_dev->v4l2_dev, gbl_dev->vouts[0]); + + v4l2_device_unregister(&gbl_dev->v4l2_dev); + + kfree(gbl_dev); + + platform_driver_unregister(&v4gfx_driver); +} + +module_init(module_init_v4gfx); +module_exit(module_exit_v4gfx); diff --git a/drivers/media/video/omapgfx/gfx_io.c b/drivers/media/video/omapgfx/gfx_io.c new file mode 100644 index 0000000..e753b4e --- /dev/null +++ b/drivers/media/video/omapgfx/gfx_io.c @@ -0,0 +1,1329 @@ +/* + * drivers/media/video/omap/v4gfx.c + * + * Copyright (C) 2010 Texas Instruments. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + * + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/errno.h> +#include <linux/fs.h> +#include <linux/kernel.h> +#include <linux/vmalloc.h> +#include <linux/interrupt.h> +#include <linux/kdev_t.h> +#include <linux/types.h> +#include <linux/wait.h> +#include <linux/videodev2.h> +#include <linux/platform_device.h> +#include <linux/dma-mapping.h> +#include <linux/irq.h> +#include <linux/delay.h> +#include <linux/omap_v4l2_gfx.h> /* private ioctls */ + +#include <media/videobuf-dma-contig.h> +#include <media/v4l2-dev.h> +#include <media/v4l2-ioctl.h> +#include <media/v4l2-common.h> +#include <media/v4l2-device.h> + +#include "v4gfx.h" +#include "gfx_bc.h" + +#define V4GFX_WAIT_DEQUE 1 /* Poll buffer sync status during dq */ +#define V4GFX_WAIT_UNLOCK 2 /* Poll buffer sync status from render loop */ +/* + * V4GFX_WAITMETHOD is used to select between how we wait for SGX to release + * buffers sent to it. + */ +/* #define V4GFX_WAITMETHOD V4GFX_WAIT_DEQUE */ +#define V4GFX_WAITMETHOD V4GFX_WAIT_UNLOCK + +#define VID_MAX_WIDTH 2048 /* Largest width */ +#define VID_MAX_HEIGHT 2048 /* Largest height */ +#define VID_MIN_WIDTH 0 +#define VID_MIN_HEIGHT 0 +#define V4GFX_FRAME_UNLOCK_TIMEOUT 16 /* ms */ + + +/* + * This will enable dumping of the mappings obtain + */ +#ifdef V4L2GFX_DUMPMMAP +#define DUMPMMAP(msg, k, vma, m, pos, p) \ + printk(KERN_NOTICE \ + "%s: vm_start+%d = 0x%lx, dma->vmalloc+%d = 0x%lx, w=0x%x\n", \ + msg, k, vma->vm_start + k, m, (pos + m), p); +#else +#define DUMPMMAP(msg, k, vma, m, pos, p) +#endif + +static struct videobuf_queue_ops video_vbq_ops; + +static u32 v4gfx_calc_buffer_size( + int bpp, u32 width, u32 height, u32 pixelformat); +static u32 v4gfx_calc_stride(int bpp, u32 width); + +/* + * List of image formats supported by the SGX buffer-class api + */ +static const struct v4l2_fmtdesc gfx_bc_formats[] = { + { + /* Note: V4L2 defines RGB565 as: + * + * Byte 0 Byte 1 + * g2 g1 g0 r4 r3 r2 r1 r0 b4 b3 b2 b1 b0 g5 g4 g3 + * + * OMAP video pipelines interpret RGB565 as: + * + * Byte 0 Byte 1 + * g2 g1 g0 b4 b3 b2 b1 b0 r4 r3 r2 r1 r0 g5 g4 g3 + * + * GFX ?? TODO + */ + .description = "RGB565, le", + .pixelformat = V4L2_PIX_FMT_RGB565, + }, + { + .description = "RGB32, le", + .pixelformat = V4L2_PIX_FMT_RGB32, + }, + { + .description = "YUYV (YUV 4:2:2), packed", + .pixelformat = V4L2_PIX_FMT_YUYV, + }, + { + .description = "UYVY, packed", + .pixelformat = V4L2_PIX_FMT_UYVY, + }, + { + .description = "NV12 - YUV420 format", + .pixelformat = V4L2_PIX_FMT_NV12, + }, +}; + +#define NUM_OUTPUT_FORMATS (ARRAY_SIZE(gfx_bc_formats)) + +int v4gfx_try_format(struct v4l2_pix_format *pix) +{ + int ifmt, bpp = 0; + + pix->height = + clamp(pix->height, (u32)VID_MIN_HEIGHT, (u32)VID_MAX_HEIGHT); + pix->width = clamp(pix->width, (u32)VID_MIN_WIDTH, (u32)VID_MAX_WIDTH); + + for (ifmt = 0; ifmt < NUM_OUTPUT_FORMATS; ifmt++) { + if (pix->pixelformat == gfx_bc_formats[ifmt].pixelformat) + break; + } + + if (ifmt >= NUM_OUTPUT_FORMATS) + ifmt = 0; /* Default V4L2_PIX_FMT_RGB565 */ + pix->pixelformat = gfx_bc_formats[ifmt].pixelformat; + + pix->field = V4L2_FIELD_ANY; + pix->priv = 0; + + switch (pix->pixelformat) { + case V4L2_PIX_FMT_YUYV: + case V4L2_PIX_FMT_UYVY: + default: + pix->colorspace = V4L2_COLORSPACE_JPEG; + bpp = YUYV_BPP; + break; + case V4L2_PIX_FMT_RGB565: + case V4L2_PIX_FMT_RGB565X: + pix->colorspace = V4L2_COLORSPACE_SRGB; + bpp = RGB565_BPP; + break; + case V4L2_PIX_FMT_RGB24: + pix->colorspace = V4L2_COLORSPACE_SRGB; + bpp = RGB24_BPP; + break; + case V4L2_PIX_FMT_RGB32: + case V4L2_PIX_FMT_BGR32: + pix->colorspace = V4L2_COLORSPACE_SRGB; + bpp = RGB32_BPP; + break; + case V4L2_PIX_FMT_NV12: + pix->colorspace = V4L2_COLORSPACE_JPEG; + bpp = 1; /* 12bits per pixel, 1 byte for Y */ + break; + } + + pix->bytesperline = v4gfx_calc_stride(bpp, pix->width); + pix->sizeimage = v4gfx_calc_buffer_size(bpp, pix->width, pix->height, + pix->pixelformat); + + if (V4L2_PIX_FMT_NV12 == pix->pixelformat) + pix->sizeimage += pix->sizeimage >> 1; + + return bpp; +} + +void v4gfx_acquire_timer(unsigned long arg) +{ + struct v4gfx_device *vout = (struct v4gfx_device *)arg; + + set_bit(1, &vout->acquire_timedout); +} + +#if V4GFX_WAITMETHOD == V4GFX_WAIT_DEQUE +static struct videobuf_buffer *v4gfx_get_next_syncframe( + struct v4gfx_device *vout) +{ + struct videobuf_buffer *buf; + mutex_lock(&vout->lock); + if (list_empty(&vout->sync_queue)) { + mutex_unlock(&vout->lock); + return NULL; + } + buf = list_entry(vout->sync_queue.next, struct videobuf_buffer, queue); + mutex_unlock(&vout->lock); + return buf; +} + +static int v4gfx_wait_on_pending(struct v4gfx_device *vout, int bufidx) +{ + int dqable = 0; + int iteration = 0; + + do { + dqable = bc_sync_status(0, bufidx); + if (!dqable) { + /* printk("w-on %d [%d]\n", bufidx, iteration); */ + if (iteration++ < V4GFX_FRAME_UNLOCK_TIMEOUT) { + msleep(1); /* milliseconds */ + } else { + /*printk("t-o %d\n", bufidx); */ + break; /* Timed out */ + } + } +/* + else { + printk("dq-o %d\n", bufidx); + } + */ + } while (!dqable); + + return dqable; +} + +static void v4gfx_done_syncframe(struct v4gfx_device *vout, + struct videobuf_buffer *sync_frame) +{ + struct timeval timevalue = {0}; + unsigned long flags; + mutex_lock(&vout->lock); + spin_lock_irqsave(&vout->vbq_lock, flags); + + list_del(&sync_frame->queue); + + do_gettimeofday(&timevalue); + sync_frame->ts = timevalue; + sync_frame->state = VIDEOBUF_DONE; + wake_up_interruptible(&sync_frame->done); + spin_unlock_irqrestore(&vout->vbq_lock, flags); + mutex_unlock(&vout->lock); +} +#endif /* V4GFX_WAIT_DEQUE */ + + +static u32 v4gfx_calc_stride(int bpp, u32 width) +{ + return PAGE_ALIGN(width * bpp); +} + +static u32 v4gfx_calc_buffer_size( + int bpp, u32 width, u32 height, u32 pixelformat) +{ + int stride; + stride = v4gfx_calc_stride(bpp, width); + + /* i is the block-width - either 4K or 8K, depending upon input width*/ + /* for NV12 format, buffer is height + height / 2*/ + if (V4L2_PIX_FMT_NV12 == pixelformat) + return height * 3/2 * stride; + else + return height * stride; +} + +void v4gfx_buffer_array_free(struct v4gfx_device *vout, int cnt) +{ + /* Fn should be robust and callable with args in a dubious state */ + int i; + if (!vout || !cnt) + return; + if (vout->buf_phys_addr_array) { + for (i = 0; i < cnt; i++) + kfree(vout->buf_phys_addr_array[i]); + kfree(vout->buf_phys_addr_array); + vout->buf_phys_addr_array = NULL; + } +} + +/* + * Allocate a buffer array for all the requested buffers + * If there is an allocation failure the function will clean up after itself + */ +static int v4gfx_buffer_array_realloc(struct v4gfx_device *vout, + int oldcnt, int newcnt) +{ + int i; + + if (vout->buf_phys_addr_array) + v4gfx_buffer_array_free(vout, oldcnt); + + vout->buf_phys_addr_array = + kzalloc(sizeof(unsigned long *) * newcnt, GFP_KERNEL); + if (!vout->buf_phys_addr_array) + return -ENOMEM; + + /* 2048 is the max image height, 2 = (2048 * 4) / CPU_PAGE_SIZE */ + for (i = 0; i < newcnt; i++) { + vout->buf_phys_addr_array[i] = + kmalloc(sizeof(unsigned long) * 2048 * 2, GFP_KERNEL); + if (!vout->buf_phys_addr_array[i]) { + v4gfx_buffer_array_free(vout, newcnt); + return -ENOMEM; + } + } + return 0; +} + +static void v4gfx_buffer_array_fill( + struct v4gfx_device *vout, + int bufno, + unsigned long tiler_paddr_in, + unsigned long tiler_paddr_uv_in) +{ + int buf_phys_idx = 0; + int m = 0, i; + int cpu_pgwidth; + int tiler_increment; + + v4gfx_tiler_image_incr(vout, &cpu_pgwidth, &tiler_increment); + + for (i = 0; i < vout->pix.height; i++) { + unsigned long pg, pgend, tiler_paddr; + + tiler_paddr = tiler_paddr_in+m; + pg = tiler_paddr; + pgend = pg + cpu_pgwidth; + do { + GFXLOGA(2, "%d %d: = %lx\n", bufno, buf_phys_idx, + (long)pg); + vout->buf_phys_addr_array[bufno][buf_phys_idx] = pg; + pg += 4096; + buf_phys_idx++; + } while (pg < pgend); + + m += tiler_increment; + } + + if (V4L2_PIX_FMT_NV12 == vout->pix.pixelformat) { + m = 0; + v4gfx_tiler_image_incr_uv(vout, &tiler_increment); + + /* UV buffer is height / 2 */ + for (i = 0; i < vout->pix.height / 2; i++) { + unsigned long pg; + + pg = tiler_paddr_uv_in+m; + vout->buf_phys_addr_array[bufno][buf_phys_idx] = pg; + m += tiler_increment; + buf_phys_idx++; + } + + GFXLOGA(1, "nv12 uv: 0x%lx\n", tiler_paddr_uv_in); + m += tiler_increment; + } +} + +static int v4gfx_frame_lock(struct v4gfx_device *vout, int *bufid) +{ + struct videobuf_buffer *oldbuf = NULL; +#if V4GFX_WAITMETHOD == V4GFX_WAIT_UNLOCK + struct timeval timevalue = {0}; +#else /* V4GFX_WAIT_DEQUE */ + int oldbufid = -1; +#endif + unsigned long flags; + int rv = 0; + + mutex_lock(&vout->lock); + spin_lock_irqsave(&vout->vbq_lock, flags); + if (!vout->streaming || !vout->cur_frm) { + GFXLOG(1, V4L2DEV(vout), + "%s: ERROR: device not streaming yet\n", __func__); + rv = -EAGAIN; + goto unlock; + } + + /* vout->cur_frm must be set if streaming */ + + if (vout->cur_frm == vout->locked_frm) { + /* + * If this frame has been locked before we will + * attempt to get the next buffer in the dma queue. + * If there is a next buffer, mark the locked + * buffer as done and then promote the next buffer + * to the current buffer whilst locking it in the + * process. + */ + if (list_empty(&vout->dma_queue)) { + *bufid = vout->cur_frm->i; + /* + * We can't do anything else here, it will be upto + * the consumer application to decide whether it wants + * to re-render the texture which depends on what the + * app is doing. + */ + goto unlock; + } + + /* Deactivate the cur_frm */ + oldbuf = vout->cur_frm; + + vout->cur_frm = list_entry(vout->dma_queue.next, + struct videobuf_buffer, queue); + + list_del(&vout->cur_frm->queue); + + vout->cur_frm->state = VIDEOBUF_ACTIVE; + + GFXLOG(2, V4L2DEV(vout), "Active frame %d\n", vout->cur_frm->i); + + vout->locked_frm = vout->cur_frm; + +#if V4GFX_WAITMETHOD == V4GFX_WAIT_UNLOCK + /* + * Mark the previous current buffer done and release it for + * dequeue + */ + do_gettimeofday(&timevalue); + oldbuf->ts = timevalue; + oldbuf->state = VIDEOBUF_DONE; + wake_up_interruptible(&oldbuf->done); +#else /* V4GFX_WAIT_DEQUE */ + oldbufid = oldbuf->i; + list_add_tail(&oldbuf->queue, &vout->sync_queue); + wake_up_interruptible(&vout->sync_done); +#endif + + } else { + /* First time we've tried to lock this frame */ + vout->locked_frm = vout->cur_frm; + /* We be marked for dequeue next time */ + } + *bufid = vout->locked_frm->i; +unlock: + spin_unlock_irqrestore(&vout->vbq_lock, flags); + mutex_unlock(&vout->lock); + +#if V4GFX_WAITMETHOD == V4GFX_WAIT_DEQUE +/* + if (oldbufid != -1) + printk("sync_queue + %d\n", oldbufid); + */ +#endif + return rv; +} + +static int v4gfx_frame_unlock(struct v4gfx_device *vout, int bufidx) +{ + struct videobuf_buffer *vbuf; + int rv = 0; +#if V4GFX_WAITMETHOD == V4GFX_WAIT_UNLOCK + int iteration = 0; +#endif + + mutex_lock(&vout->lock); + vbuf = vout->locked_frm; + if (!vbuf) { + GFXLOG(1, V4L2DEV(vout), + "%s: ERROR: trying to unlock a non-existent frame\n", + __func__); + rv = -EINVAL; + } else if (vbuf->i != bufidx) { + GFXLOG(1, V4L2DEV(vout), + "%s: ERROR: trying to unlock wrong frame %d %d\n", + __func__, vbuf->i, bufidx); + rv = -EINVAL; + } + mutex_unlock(&vout->lock); + +#if V4GFX_WAITMETHOD == V4GFX_WAIT_UNLOCK + if (rv != 0) + goto end; + + do { + /* + * Interrogate the buffer class synch data buffer to see if SGX + * is done with this buffer + */ + rv = bc_sync_status(0, bufidx); + if (rv == 0) { + if (iteration++ < V4GFX_FRAME_UNLOCK_TIMEOUT) + msleep(1); /* milliseconds */ + } + } while (rv == 0); + + if (iteration >= V4GFX_FRAME_UNLOCK_TIMEOUT) { + printk("%s: INFO: timed out\n", __func__); + rv = -ETIMEDOUT; + } else + rv = 0; +end: +#endif /* V4GFX_WAIT_UNLOCK */ + return rv; +} + +/* + * Buffer setup function is called by videobuf layer when REQBUF ioctl is + * called. This is used to setup buffers and return size and count of + * buffers allocated. After the call to this buffer, videobuf layer will + * setup buffer queue depending on the size and count of buffers + */ +static int vbq_ops_buf_setup(struct videobuf_queue *q, unsigned int *count, + unsigned int *size) +{ + struct v4gfx_device *vout = q->priv_data; + int rv = 0; + GFXLOG(1, V4L2DEV(vout), "+%s\n", __func__); + + if (!vout || (V4L2_BUF_TYPE_VIDEO_OUTPUT != q->type)) { + rv = -EINVAL; goto end; + } + + *size = vout->buffer_size = v4gfx_calc_buffer_size( + vout->bpp, + vout->pix.width, + vout->pix.height, + vout->pix.pixelformat); + + GFXLOG(1, V4L2DEV(vout), "height=%d, size=%d\n", + vout->pix.height, *size); + + if (v4gfx_tiler_buffer_setup(vout, count, 0, &vout->pix)) { + rv = -ENOMEM; goto end; + } + +end: + GFXLOG(1, V4L2DEV(vout), "Exiting %s\n", __func__); + return rv; +} + +/* + * This function will be called when VIDIOC_QBUF ioctl is called. + * It prepare buffers before give out for the display. This function + * user space virtual address into physical address if userptr memory + * exchange mechanism is used. + */ +static int vbq_ops_buf_prepare(struct videobuf_queue *q, + struct videobuf_buffer *vb, + enum v4l2_field field) +{ + struct v4gfx_device *vout = q->priv_data; + + if (VIDEOBUF_NEEDS_INIT == vb->state) { + vb->width = vout->pix.width; + vb->height = vout->pix.height; + vb->size = vb->width * vb->height * vout->bpp; + vb->field = field; + + } + vb->state = VIDEOBUF_PREPARED; + + return 0; +} + +/* + * Buffer queue function will be called from the videobuf layer when _QBUF + * ioctl is called. It is used to enqueue buffer, which is ready to be + * displayed. + */ +static void vbq_ops_buf_queue(struct videobuf_queue *q, + struct videobuf_buffer *vb) +{ + struct v4gfx_device *vout = q->priv_data; + + list_add_tail(&vb->queue, &vout->dma_queue); + vb->state = VIDEOBUF_QUEUED; +} + +/* + * Buffer release function is called from videobuf layer to release buffer + * which are already allocated + */ +static void vbq_ops_buf_release(struct videobuf_queue *q, + struct videobuf_buffer *vb) +{ + struct v4gfx_device *vout = q->priv_data; + + vb->state = VIDEOBUF_NEEDS_INIT; + + if (V4L2_MEMORY_MMAP != vout->memory) + return; +} + +/* + * File operations + */ +static void v4gfx_vm_open(struct vm_area_struct *vma) +{ + struct v4gfx_device *vout = vma->vm_private_data; + + GFXLOG(1, V4L2DEV(vout), + "vm_open [vma=%08lx-%08lx]\n", vma->vm_start, vma->vm_end); + vout->mmap_count++; +} + +static void v4gfx_vm_close(struct vm_area_struct *vma) +{ + struct v4gfx_device *vout = vma->vm_private_data; + + GFXLOG(1, V4L2DEV(vout), + "vm_close [vma=%08lx-%08lx]\n", vma->vm_start, vma->vm_end); + + vout->mmap_count--; +} + +static struct vm_operations_struct v4gfx_vm_ops = { + .open = v4gfx_vm_open, + .close = v4gfx_vm_close, +}; + +static int vidfop_mmap(struct file *file, struct vm_area_struct *vma) +{ + struct v4gfx_device *vout = file->private_data; + struct videobuf_queue *q = &vout->vbq; + int i; + void *pos; + int j = 0, k = 0, m = 0, p = 0, m_increment = 0; + + GFXLOG(1, V4L2DEV(vout), "Entering %s\n", __func__); + + /* look for the buffer to map */ + for (i = 0; i < VIDEO_MAX_FRAME; i++) { + if (NULL == q->bufs[i]) + continue; + if (V4L2_MEMORY_MMAP != q->bufs[i]->memory) + continue; + if (q->bufs[i]->boff == (vma->vm_pgoff << PAGE_SHIFT)) + break; + } + + if (VIDEO_MAX_FRAME == i) { + GFXLOG(1, V4L2DEV(vout), + "offset invalid [offset=0x%lx]\n", + (vma->vm_pgoff << PAGE_SHIFT)); + return -EINVAL; + } + q->bufs[i]->baddr = vma->vm_start; + + vma->vm_flags |= VM_RESERVED; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + vma->vm_ops = &v4gfx_vm_ops; + vma->vm_private_data = (void *) vout; + pos = (void *)vout->buf_phy_addr[i]; + + /* get line width */ + v4gfx_tiler_image_incr(vout, &p, &m_increment); + + for (j = 0; j < vout->pix.height; j++) { + /* map each page of the line */ + DUMPMMAP("Y buffer", k, vma, m, pos, p); + + vma->vm_pgoff = ((unsigned long)pos + m) >> PAGE_SHIFT; + + if (remap_pfn_range(vma, vma->vm_start + k, + ((unsigned long)pos + m) >> PAGE_SHIFT, + p, vma->vm_page_prot)) + return -EAGAIN; + k += p; + m += m_increment; + } + m = 0; + + /* UV Buffer in case of NV12 format */ + if (V4L2_PIX_FMT_NV12 == vout->pix.pixelformat) { + pos = (void *)vout->buf_phy_uv_addr[i]; + + v4gfx_tiler_image_incr_uv(vout, &m_increment); + + /* UV buffer is height / 2 */ + for (j = 0; j < vout->pix.height / 2; j++) { + /* map each page of the line */ + DUMPMMAP("UV buffer", k, vma, m, pos, p); + + vma->vm_pgoff = ((unsigned long)pos + m) >> PAGE_SHIFT; + + if (remap_pfn_range(vma, vma->vm_start + k, + ((unsigned long)pos + m) >> PAGE_SHIFT, + p, vma->vm_page_prot)) + return -EAGAIN; + k += p; + m += m_increment; + } + } + + vma->vm_flags &= ~VM_IO; /* using shared anonymous pages */ + vout->mmap_count++; + GFXLOG(1, V4L2DEV(vout), "Exiting %s\n", __func__); + return 0; +} + +static int vidfop_release(struct file *file) +{ + struct v4gfx_device *vout = file->private_data; + struct videobuf_queue *q; + unsigned int r = 0; + + GFXLOG(1, V4L2DEV(vout), "Entering %s\n", __func__); + GFXLOG(1, V4L2DEV(vout), + "current process id/pid is %d\n", current->pid); + + if (!vout) + goto end; + + vout->opened = vout->opened ? vout->opened - 1 : 0; + if (vout->opened) { + r = 0; + goto end; + } + + clear_bit(1, &vout->producer_ready); + + q = &vout->vbq; + + if (vout->streaming) { + del_timer_sync(&vout->acquire_timer); + clear_bit(1, &vout->acquire_timedout); + + vout->streaming = false; + videobuf_streamoff(q); + videobuf_queue_cancel(q); + } + + if (q->bufs[0] && (V4L2_MEMORY_MMAP == q->bufs[0]->memory)) + videobuf_mmap_free(q); + vout->mmap_count = 0; + + /* Free buffers */ + if (vout->buffer_allocated) { + v4gfx_tiler_buffer_free(vout, vout->buffer_allocated, 0); + vout->buffer_allocated = 0; + } + + memset(&vout->crop, 0, sizeof(vout->crop)); + memset(&vout->pix, 0, sizeof(vout->pix)); + + file->private_data = NULL; + +end: + GFXLOG(1, V4L2DEV(vout), "Exiting %s\n", __func__); + return r; +} + +static int vidfop_open(struct file *file) +{ + struct v4gfx_device *vout = NULL; + struct videobuf_queue *q; + int rv = 0; + + vout = video_drvdata(file); + if (vout == NULL) { + rv = -ENODEV; + goto end; + } + + GFXLOG(1, V4L2DEV(vout), "Entering %s : %x\n", __func__, (int)vout); + GFXLOG(1, V4L2DEV(vout), "current pid is %d\n", current->pid); + + vout->opened += 1; + file->private_data = vout; + + if (vout->opened > 1) { + GFXLOG(1, V4L2DEV(vout), "Another opening....\n"); + goto end; + } + + clear_bit(1, &vout->producer_ready); + + q = &vout->vbq; + video_vbq_ops.buf_setup = vbq_ops_buf_setup; + video_vbq_ops.buf_prepare = vbq_ops_buf_prepare; + video_vbq_ops.buf_release = vbq_ops_buf_release; + video_vbq_ops.buf_queue = vbq_ops_buf_queue; + + videobuf_queue_dma_contig_init(q, &video_vbq_ops, q->dev, + &vout->vbq_lock, vout->type, V4L2_FIELD_NONE, + sizeof(struct videobuf_buffer), vout); + +end: + GFXLOG(1, V4L2DEV(vout), "Exiting %s :%d\n", __func__, rv); + return rv; +} + +/* V4L2 ioctls */ +static int vidioc_querycap(struct file *file, void *fh, + struct v4l2_capability *cap) +{ + struct v4gfx_device *vout = fh; + GFXLOG(1, V4L2DEV(vout), "Entering %s\n", __func__); + + strlcpy(cap->driver, VOUT_NAME, sizeof(cap->driver)); + strlcpy(cap->card, vout->vfd->name, sizeof(cap->card)); + cap->bus_info[0] = '\0'; + cap->version = VOUT_VERSION; + cap->capabilities = V4L2_CAP_STREAMING | V4L2_CAP_VIDEO_OUTPUT; + return 0; +} + +static int vidioc_log_status(struct file *file, void *fh) +{ + /* struct v4gfx_device *vout = fh; */ + printk(KERN_INFO "\n"); + printk(KERN_INFO "============== START LOG STATUS ================\n"); + printk(KERN_INFO "=============== END LOG STATUS =================\n"); + printk(KERN_INFO "\n"); + return 0; +} + +static int vidioc_enum_fmt_vid_out(struct file *file, void *fh, + struct v4l2_fmtdesc *fmt) +{ + struct v4gfx_device *vout = fh; + int index = fmt->index; + enum v4l2_buf_type type = fmt->type; + int rv = 0; + + GFXLOG(1, V4L2DEV(vout), "+%s\n", __func__); + + fmt->index = index; + fmt->type = type; + if (index >= NUM_OUTPUT_FORMATS) { + rv = -EINVAL; + goto end; + } + + fmt->flags = gfx_bc_formats[index].flags; + strlcpy(fmt->description, gfx_bc_formats[index].description, + sizeof(fmt->description)); + fmt->pixelformat = gfx_bc_formats[index].pixelformat; +end: + GFXLOG(1, V4L2DEV(vout), "-%s [%d]\n", __func__, rv); + return rv; +} + +static int vidioc_g_fmt_vid_out(struct file *file, void *fh, + struct v4l2_format *f) +{ + struct v4gfx_device *vout = fh; + GFXLOG(1, V4L2DEV(vout), "+%s\n", __func__); + + f->fmt.pix = vout->pix; + + GFXLOG(1, V4L2DEV(vout), "-%s [%d]\n", __func__, 0); + return 0; + +} + +/* + * VIDIOC_TRY_FMT ioctl is equivalent to VIDIOC_S_FMT with one + * exception: it does not change driver state. It can also be called at any + * time, never returning EBUSY. + */ +static int vidioc_try_fmt_vid_out(struct file *file, void *fh, + struct v4l2_format *f) +{ + int r; + struct v4gfx_device *vout = fh; + GFXLOG(1, V4L2DEV(vout), "+%s\n", __func__); + + r = v4gfx_try_format(&f->fmt.pix); + + GFXLOG(1, V4L2DEV(vout), "-%s [%d]\n", __func__, r); + return (r >= 0) ? 0 : r; +} + +static int vidioc_s_fmt_vid_out(struct file *file, void *fh, + struct v4l2_format *f) +{ + struct v4gfx_device *vout = fh; + int rv = 0; + int bpp; + + GFXLOG(1, V4L2DEV(vout), "+%s\n", __func__); + + mutex_lock(&vout->lock); + if (vout->streaming) { + rv = -EBUSY; + goto end; + } + + bpp = v4gfx_try_format(&f->fmt.pix); + if (bpp <= 0) { + rv = bpp; + goto end; + } + + /* try & set the new output format */ + vout->bpp = bpp; + vout->pix = f->fmt.pix; + +end: + mutex_unlock(&vout->lock); + GFXLOG(1, V4L2DEV(vout), "-%s [%d]\n", __func__, rv); + return rv; +} + +static int vidioc_reqbufs(struct file *file, void *fh, + struct v4l2_requestbuffers *req) +{ + struct bc_buf_params2 bc_params; + struct v4gfx_device *vout = fh; + struct videobuf_queue *q = &vout->vbq; + unsigned int i; + int rv = 0; + + GFXLOG(1, V4L2DEV(vout), "+%s\n", __func__); + + if ((req->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) || + (req->count < 0) || + (req->memory != V4L2_MEMORY_MMAP) + ) { + rv = -EINVAL; goto end; + } + + + mutex_lock(&vout->lock); + /* Cannot be requested when streaming is on */ + if (vout->streaming) { + mutex_unlock(&vout->lock); + rv = -EBUSY; goto end; + } + + /* + * TODO A count value of zero frees all buffers, after aborting or + * finishing any DMA in progress, an implicit VIDIOC_STREAMOFF. + */ + + /* If buffers are already allocated free them */ + if (q->bufs[0] && (V4L2_MEMORY_MMAP == q->bufs[0]->memory)) { + if (vout->mmap_count) { + mutex_unlock(&vout->lock); + rv = -EBUSY; goto end; + } + + v4gfx_tiler_buffer_free(vout, vout->buffer_allocated, 0); + vout->buffer_allocated = 0; + + videobuf_mmap_free(q); + } + + bc_params.count = req->count; + bc_params.width = vout->pix.width; + bc_params.height = vout->pix.height; + bc_params.pixel_fmt = vout->pix.pixelformat; +/* bc_params.stride = vout->pix.bytesperline; */ + rv = bc_setup(0, &bc_params); + if (rv < 0) { + GFXLOG(1, V4L2DEV(vout), + "+%s bc_setup() failed %d\n", __func__, rv); + goto end; + } + + /* + * Note that the actual buffer allocation is done in + * vbq_ops_buf_setup + */ + rv = videobuf_reqbufs(q, req); + if (rv < 0) { + mutex_unlock(&vout->lock); + goto end; + } + + INIT_LIST_HEAD(&vout->dma_queue); + INIT_LIST_HEAD(&vout->sync_queue); + + /* + * The realloc will free the old array and allocate a new one + */ + rv = v4gfx_buffer_array_realloc(vout, vout->buffer_allocated, + req->count); + if (rv < 0) { + mutex_unlock(&vout->lock); + goto end; + } + + vout->memory = req->memory; + vout->buffer_allocated = req->count; + + for (i = 0; i < req->count; i++) { + + v4gfx_buffer_array_fill(vout, i, + vout->buf_phy_addr[i], + V4L2_PIX_FMT_NV12 == vout->pix.pixelformat ? + vout->buf_phy_uv_addr[i] : 0); + + bc_setup_buffer(0, &bc_params, vout->buf_phys_addr_array[i]); + } + bc_setup_complete(0, &bc_params); + + mutex_unlock(&vout->lock); +end: + GFXLOG(1, V4L2DEV(vout), "-%s [%d]\n", __func__, rv); + return rv; +} + +static int vidioc_querybuf(struct file *file, void *fh, + struct v4l2_buffer *b) +{ + struct v4gfx_device *vout = fh; + int rv; + + GFXLOG(1, V4L2DEV(vout), "+%s\n", __func__); + + rv = videobuf_querybuf(&vout->vbq, b); + + GFXLOG(1, V4L2DEV(vout), "-%s [%d]\n", __func__, rv); + return rv; +} + +static int vidioc_qbuf(struct file *file, void *fh, + struct v4l2_buffer *buf) +{ + struct v4gfx_device *vout = fh; + struct videobuf_queue *q = &vout->vbq; + int rv = 0; + + GFXLOG(1, V4L2DEV(vout), "qbuf buf: %d\n", buf->index); + + if ((V4L2_BUF_TYPE_VIDEO_OUTPUT != buf->type) || + (buf->index >= vout->buffer_allocated) || + (q->bufs[buf->index]->memory != buf->memory)) { + return -EINVAL; + } + if (V4L2_MEMORY_USERPTR == buf->memory) { + if ((buf->length < vout->pix.sizeimage) || + (0 == buf->m.userptr)) { + return -EINVAL; + } + } + + rv = videobuf_qbuf(q, buf); + + mutex_lock(&vout->lock); + if (vout->streaming && vout->acquire_timeout_ms) { + del_timer(&vout->acquire_timer); + mod_timer(&vout->acquire_timer, + jiffies + msecs_to_jiffies(vout->acquire_timeout_ms)); + } + mutex_unlock(&vout->lock); + + GFXLOG(2, V4L2DEV(vout), "-%s [%d]\n", __func__, rv); + return rv; +} + +static int vidioc_dqbuf(struct file *file, void *fh, + struct v4l2_buffer *buf) +{ + struct v4gfx_device *vout = fh; + struct videobuf_queue *q = &vout->vbq; + int rv = 0; + int nonblocking = file->f_flags & O_NONBLOCK ? 1 : 0; + + GFXLOG(2, V4L2DEV(vout), "dqbuf buf: %x (%d)\n", + (int)buf, nonblocking); + + mutex_lock(&vout->lock); + if (!vout->streaming) { + mutex_unlock(&vout->lock); + return -EINVAL; + } + + mutex_unlock(&vout->lock); + +#if V4GFX_WAITMETHOD == V4GFX_WAIT_DEQUE +{ + struct videobuf_buffer *sync_frame = NULL; + + wait_event_interruptible(vout->sync_done, + !list_empty(&vout->sync_queue)); + + sync_frame = v4gfx_get_next_syncframe(vout); + + if (sync_frame) { + (void)v4gfx_wait_on_pending(vout, sync_frame->i); + v4gfx_done_syncframe(vout, sync_frame); + } else { + /* Can be from an interrupted task */ + printk(KERN_INFO "No sync frame\n"); + } +} +#endif + + rv = videobuf_dqbuf(q, buf, nonblocking); + + GFXLOG(2, V4L2DEV(vout), "-%s [%d]\n", __func__, rv); + return rv; +} + +static int vidioc_streamon(struct file *file, void *fh, + enum v4l2_buf_type i) +{ + struct v4gfx_device *vout = fh; + struct videobuf_queue *q = &vout->vbq; + int rv = 0; + GFXLOG(1, V4L2DEV(vout), "+%s\n", __func__); + + mutex_lock(&vout->lock); + + if (vout->streaming) { + rv = -EBUSY; + goto end_unlock; + } + + vout->cur_frm = NULL; + vout->locked_frm = NULL; + + rv = videobuf_streamon(q); + if (rv < 0) + goto end_unlock; + + if (list_empty(&vout->dma_queue)) { + rv = -EIO; + goto end_unlock; + } + + vout->streaming = true; + + /* Activate the next current buffer */ + vout->cur_frm = + list_entry(vout->dma_queue.next, struct videobuf_buffer, queue); + list_del(&vout->cur_frm->queue); + vout->cur_frm->state = VIDEOBUF_ACTIVE; + + set_bit(1, &vout->producer_ready); + wake_up_interruptible(&vout->consumer_wait); + +end_unlock: + mutex_unlock(&vout->lock); + GFXLOG(1, V4L2DEV(vout), "-%s [%d]\n", __func__, rv); + + return rv; +} + +static int vidioc_streamoff(struct file *file, void *fh, + enum v4l2_buf_type i) +{ + struct v4gfx_device *vout = fh; + int rv; + + mutex_lock(&vout->lock); + if (!vout->streaming) { + rv = -EINVAL; + goto end; + } + + del_timer_sync(&vout->acquire_timer); + clear_bit(1, &vout->acquire_timedout); + + clear_bit(1, &vout->producer_ready); + + vout->streaming = false; + + INIT_LIST_HEAD(&vout->dma_queue); + INIT_LIST_HEAD(&vout->sync_queue); + + videobuf_streamoff(&vout->vbq); + videobuf_queue_cancel(&vout->vbq); +end: + mutex_unlock(&vout->lock); + GFXLOG(1, V4L2DEV(vout), "-%s [%d]\n", __func__, rv); + return rv; +} + +static int vidioc_cropcap(struct file *file, void *fh, + struct v4l2_cropcap *cropcap) +{ + struct v4gfx_device *vout = fh; + struct v4l2_pix_format *pix = &vout->pix; + + if (cropcap->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) + return -EINVAL; + + /* Width and height are always even */ + cropcap->bounds.width = pix->width & ~1; + cropcap->bounds.height = pix->height & ~1; + cropcap->pixelaspect.numerator = 1; + cropcap->pixelaspect.denominator = 1; + return 0; +} + +static int vidioc_g_crop(struct file *file, void *fh, struct v4l2_crop *crop) +{ + struct v4gfx_device *vout = fh; + + if (crop->type != V4L2_BUF_TYPE_VIDEO_OUTPUT) + return -EINVAL; + crop->c = vout->crop; + GFXLOG(1, V4L2DEV(vout), "g_crop w:%d,h:%d\n", + crop->c.width, crop->c.height); + return 0; +} + +static int vidioc_s_crop(struct file *file, void *fh, struct v4l2_crop *crop) +{ + struct v4gfx_device *vout = fh; + GFXLOG(1, V4L2DEV(vout), "Entering %s\n", __func__); + vout->crop = crop->c; + return 0; +} + +static long vidioc_default(struct file *file, void *fh, int cmd, void *arg) +{ + int rv = 0; + struct v4gfx_device *vout = fh; + GFXLOG(1, V4L2DEV(vout), "Entering %s (c=0x%x)\n", __func__, cmd); + + switch (cmd) { + case V4L2_GFX_IOC_CONSUMER: + { + struct v4l2_gfx_consumer_params *parms = + (struct v4l2_gfx_consumer_params *)arg; + if (parms->type != V4L2_GFX_CONSUMER_WAITSTREAM) + return -EINVAL; + + clear_bit(1, &vout->acquire_timedout); + + rv = wait_event_interruptible(vout->consumer_wait, + test_bit(1, &vout->producer_ready)); + mutex_lock(&vout->lock); + if (rv == -ERESTARTSYS) { + /* + * This condition is hit when the user process + * generates a signal, when we return this value the + * process will continue to block on the ioctl + */ + GFXLOG(1, V4L2DEV(vout), "Woke by signal: %d\n", + ERESTARTSYS); + } else { + vout->acquire_timeout_ms = parms->acquire_timeout_ms; + } + mutex_unlock(&vout->lock); + break; + + } + case V4L2_GFX_IOC_INFO: + { + struct v4l2_gfx_info_params *parms = + (struct v4l2_gfx_info_params *)arg; + parms->opencnt = vout->opened; + break; + } + case V4L2_GFX_IOC_PRODUCER: + { + struct v4l2_gfx_producer_params *parms = + (struct v4l2_gfx_producer_params *)arg; + vout->producer_flags = parms->flags; + if (!(vout->producer_flags & V4L2_GFX_PRODUCER_MASK_OPEN)) { + /* + * We decrement the count here because the Android + * mediaserver threads won't close the V4L2 device + */ + if (vout->opened) + vout->opened--; + } + break; + } + case V4L2_GFX_IOC_ACQ: + { + struct v4l2_gfx_buf_params *parms = + (struct v4l2_gfx_buf_params *)arg; + int bufid = -1; + int timedout; + rv = v4gfx_frame_lock(vout, &bufid); + if (!rv) { + parms->bufid = bufid; + parms->crop_top = vout->crop.top; + parms->crop_left = vout->crop.left; + parms->crop_width = vout->crop.width; + parms->crop_height = vout->crop.height; + GFXLOG(3, V4L2DEV(vout), "%d:%d:%d:%d:%d\n", + parms->bufid , + parms->crop_top , + parms->crop_left , + parms->crop_width , + parms->crop_height); + } + timedout = test_and_clear_bit(1, &vout->acquire_timedout); + if (timedout) { + GFXLOG(1, V4L2DEV(vout), "ACQ Timed out\n"); + rv = -ETIMEDOUT; + } + mutex_lock(&vout->lock); + if (!vout->streaming) { + GFXLOG(1, V4L2DEV(vout), "ACQ stream off\n"); + rv = -ENODEV; + } + mutex_unlock(&vout->lock); + break; + } + case V4L2_GFX_IOC_REL: + { + struct v4l2_gfx_buf_params *parms = + (struct v4l2_gfx_buf_params *)arg; + int bufid = parms->bufid; + rv = v4gfx_frame_unlock(vout, bufid); + break; + } + default: + rv = -EINVAL; + } + GFXLOG(1, V4L2DEV(vout), "Leaving %s (%d)\n", __func__, rv); + return rv; +} + +static int vidioc_s_ctrl(struct file *file, void *fh, struct v4l2_control *a) +{ + struct v4gfx_device *vout = fh; + GFXLOG(1, V4L2DEV(vout), "%s: %d\n", __func__, a->id); + return 0; +} + +struct v4l2_ioctl_ops v4gfx_ioctl_ops = { + .vidioc_querycap = vidioc_querycap, + .vidioc_log_status = vidioc_log_status, + .vidioc_enum_fmt_vid_out = vidioc_enum_fmt_vid_out, + .vidioc_g_fmt_vid_out = vidioc_g_fmt_vid_out, + .vidioc_try_fmt_vid_out = vidioc_try_fmt_vid_out, + .vidioc_s_fmt_vid_out = vidioc_s_fmt_vid_out, + .vidioc_reqbufs = vidioc_reqbufs, + .vidioc_querybuf = vidioc_querybuf, + .vidioc_qbuf = vidioc_qbuf, + .vidioc_dqbuf = vidioc_dqbuf, + .vidioc_streamon = vidioc_streamon, + .vidioc_streamoff = vidioc_streamoff, + .vidioc_cropcap = vidioc_cropcap, + .vidioc_g_crop = vidioc_g_crop, + .vidioc_s_crop = vidioc_s_crop, + .vidioc_default = vidioc_default, + .vidioc_s_ctrl = vidioc_s_ctrl, +}; + +const struct v4l2_file_operations v4gfx_fops = { + .owner = THIS_MODULE, + .ioctl = video_ioctl2, + .mmap = vidfop_mmap, + .open = vidfop_open, + .release = vidfop_release, +}; + diff --git a/drivers/media/video/omapgfx/gfx_tiler.c b/drivers/media/video/omapgfx/gfx_tiler.c new file mode 100644 index 0000000..1e77983 --- /dev/null +++ b/drivers/media/video/omapgfx/gfx_tiler.c @@ -0,0 +1,152 @@ +/* + * drivers/media/video/omap/gfx_tiler.c + * + * Copyright (C) 2010 Texas Instruments. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + * + */ +#include <linux/errno.h> +#include <linux/kernel.h> +#include <linux/types.h> + +#include "v4gfx.h" +#include "gfx_bc.h" + +#ifdef CONFIG_TILER_OMAP +#include <mach/tiler.h> +#define TILER_ALLOCATE_V4L2 +#endif + +void v4gfx_tiler_buffer_free(struct v4gfx_device *vout, unsigned int count, + unsigned int startindex) +{ + int i; + + if (startindex < 0) + startindex = 0; + if (startindex + count > VIDEO_MAX_FRAME) + count = VIDEO_MAX_FRAME - startindex; + + for (i = startindex; i < startindex + count; i++) { + if (vout->buf_phy_addr_alloced[i]) + tiler_free(vout->buf_phy_addr_alloced[i]); + if (vout->buf_phy_uv_addr_alloced[i]) + tiler_free(vout->buf_phy_uv_addr_alloced[i]); + vout->buf_phy_addr[i] = 0; + vout->buf_phy_addr_alloced[i] = 0; + vout->buf_phy_uv_addr[i] = 0; + vout->buf_phy_uv_addr_alloced[i] = 0; + } +} + +/* Allocate the buffers for TILER space. Ideally, the buffers will be ONLY + in tiler space, with different rotated views available by just a convert. + */ +int v4gfx_tiler_buffer_setup(struct v4gfx_device *vout, + unsigned int *count, unsigned int startindex, + struct v4l2_pix_format *pix) +{ + /* startindex is always passed as 0, possibly tidy up? */ + int i, aligned = 1, bpp; + enum tiler_fmt fmt; + int rv = 0; + + /* normalize buffers to allocate so we stay within bounds */ + int start = (startindex < 0) ? 0 : startindex; + int n_alloc = (start + *count > VIDEO_MAX_FRAME) ? + VIDEO_MAX_FRAME - start : *count; + + GFXLOG(1, V4L2DEV(vout), "+%s\n", __func__); + bpp = v4gfx_try_format(pix); + if (bpp <= 0) { + rv = bpp; /* error condition */ + goto end; + } + + GFXLOG(1, V4L2DEV(vout), "tiler buffer alloc: " + "count = %d, start = %d :\n", *count, startindex); + + /* special allocation scheme for NV12 format */ + if (V4L2_PIX_FMT_NV12 == pix->pixelformat) { + + tiler_alloc_packed_nv12(&n_alloc, ALIGN(pix->width, 128), + pix->height, + (void **) vout->buf_phy_addr + start, + (void **) vout->buf_phy_uv_addr + start, + (void **) vout->buf_phy_addr_alloced + start, + (void **) vout->buf_phy_uv_addr_alloced + start, + aligned); + + } else { + /* Only bpp of 1, 2, and 4 is supported by tiler */ + fmt = (bpp == 1 ? TILFMT_8BIT : + bpp == 2 ? TILFMT_16BIT : + bpp == 4 ? TILFMT_32BIT : TILFMT_INVALID); + if (fmt == TILFMT_INVALID) { + rv = -ENOMEM; + goto end; + } + + tiler_alloc_packed(&n_alloc, fmt, ALIGN(pix->width, 128 / bpp), + pix->height, + (void **) vout->buf_phy_addr + start, + (void **) vout->buf_phy_addr_alloced + start, + aligned); + } + + GFXLOG(1, V4L2DEV(vout), + "allocated %d buffers\n", n_alloc); + + if (n_alloc < *count) { + if (n_alloc && (startindex == -1 || + V4L2_MEMORY_MMAP != vout->memory)) { + /* TODO: check this condition's logic */ + v4gfx_tiler_buffer_free(vout, n_alloc, start); + *count = 0; + rv = -ENOMEM; + goto end; + } + } + + for (i = start; i < start + n_alloc; i++) { + GFXLOG(1, V4L2DEV(vout), + "y=%08lx (%d) uv=%08lx (%d)\n", + vout->buf_phy_addr[i], + vout->buf_phy_addr_alloced[i] ? 1 : 0, + vout->buf_phy_uv_addr[i], + vout->buf_phy_uv_addr_alloced[i] ? 1 : 0); + } + + *count = n_alloc; +end: + GFXLOG(1, V4L2DEV(vout), "-%s [%d]\n", __func__, rv); + return rv; +} + +void v4gfx_tiler_image_incr(struct v4gfx_device *vout, int *cpu_pgwidth, + int *tiler_increment) +{ + /* for NV12, Y buffer is 1bpp*/ + if (V4L2_PIX_FMT_NV12 == vout->pix.pixelformat) { + *cpu_pgwidth = + (vout->pix.width + TILER_PAGE - 1) & ~(TILER_PAGE - 1); + *tiler_increment = 64 * TILER_WIDTH; + } else { + *cpu_pgwidth = (vout->pix.width * vout->bpp + TILER_PAGE - 1) & + ~(TILER_PAGE - 1); + if (vout->bpp > 1) + *tiler_increment = 2 * 64 * TILER_WIDTH; + else + *tiler_increment = 64 * TILER_WIDTH; + } +} + +void v4gfx_tiler_image_incr_uv(struct v4gfx_device *vout, int *tiler_increment) +{ + if (vout->pix.pixelformat == V4L2_PIX_FMT_NV12) + *tiler_increment = 2 * 64 * TILER_WIDTH; + /* Otherwise do nothing */ +} diff --git a/drivers/media/video/omapgfx/v4gfx.h b/drivers/media/video/omapgfx/v4gfx.h new file mode 100644 index 0000000..b0b72dd --- /dev/null +++ b/drivers/media/video/omapgfx/v4gfx.h @@ -0,0 +1,171 @@ +/* + * drivers/media/video/omapgfx/v4gfx.h + * + * Copyright (C) 2010 Texas Instruments. + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __V4L2_GFX_H__ +#define __V4L2_GFX_H__ + +#include <linux/version.h> +#include <media/videobuf-core.h> +#include <media/v4l2-device.h> +#include <asm/atomic.h> + +#define MAX_VOUT_DEV 3 + +struct gbl_v4gfx { + struct mutex mtx; + int state; + struct v4l2_device v4l2_dev; + struct v4gfx_device *vouts[MAX_VOUT_DEV]; +}; + +/* per-device data structure */ +struct v4gfx_device { + + struct video_device *vfd; + + struct gbl_v4gfx *gbl_dev; + + int bpp; /* bytes per pixel */ + + enum v4l2_buf_type type; + + struct v4l2_pix_format pix; + + struct v4l2_rect crop; + + enum v4l2_memory memory; /* how memory is managed for the device */ + + /* we don't allow to change image fmt/size once buffer has + * been allocated + */ + int buffer_allocated; /* count of buffers allocated */ + + /* allow to reuse previously allocated buffer which is big enough */ + int buffer_size; + + unsigned long buf_phy_addr[VIDEO_MAX_FRAME]; + + unsigned long buf_phy_uv_addr[VIDEO_MAX_FRAME]; /* NV12 support*/ + + /* keep which buffers we actually allocated (via tiler) */ + unsigned long buf_phy_uv_addr_alloced[VIDEO_MAX_FRAME]; + + unsigned long buf_phy_addr_alloced[VIDEO_MAX_FRAME]; + + /* + For each V4L2 buffer requested we will have an array of page addresses + to give through the buffer class API + */ + unsigned long **buf_phys_addr_array; + + int mmap_count; + + int opened; /* inc/dec on open/close of the device */ + + bool streaming; /* is streaming is in progress? */ + + struct mutex lock; /* protect shared data structures in ioctl */ + + struct videobuf_buffer *cur_frm; + + struct videobuf_buffer *locked_frm; + + struct videobuf_queue vbq; + + /* + * Buffers added by QBUF from the producer application + */ + struct list_head dma_queue; + + /* + * Buffers marked as done with by the consumer application but could + * still be being used by the GPU. DQBUF will examine this queue + * for available buffers. + */ + struct list_head sync_queue; + + wait_queue_head_t sync_done; + + unsigned long producer_ready; + + wait_queue_head_t consumer_wait; + + /* + * If acquire_timeout_ms is non-zero the acquire_timer will be reset + * when buffers are queued. If the timer expires ETIMEOUT will be + * returned via the V4L2_GFX_IOC_ACQ ioctl. + */ + struct timer_list acquire_timer; + + unsigned int acquire_timeout_ms; + + unsigned long acquire_timedout; + + spinlock_t vbq_lock; /* spinlock for videobuf queues */ + + unsigned int producer_flags; +}; + +extern int debug; + +#define GFXLOG(level, dev, fmt, arg...) \ +do { \ + if (debug >= level) \ + printk(KERN_INFO "%s: " fmt, (dev)->name , ## arg); \ +} while (0) + +#define GFXLOGA(level, fmt, arg...) \ +do { \ + if (debug >= level) \ + printk(KERN_INFO "v4l2-gfx: " fmt, ## arg); \ +} while (0) + +/* + * Convert local handle to v4l2_dev, currently only a global dev is supported + */ +#define V4L2DEV(vout) (&vout->gbl_dev->v4l2_dev) + +/* tiler */ +void v4gfx_tiler_buffer_free( + struct v4gfx_device *vout, unsigned int count, + unsigned int startindex); + +int v4gfx_tiler_buffer_setup(struct v4gfx_device *vout, + unsigned int *count, unsigned int startindex, + struct v4l2_pix_format *pix); + +void v4gfx_tiler_image_incr(struct v4gfx_device *vout, + int *cpu_pgwidth, int *tiler_increment); + +void v4gfx_tiler_image_incr_uv(struct v4gfx_device *vout, int *tiler_increment); + +/* v4gfx */ +int v4gfx_try_format(struct v4l2_pix_format *pix); +void v4gfx_buffer_array_free(struct v4gfx_device *vout, int cnt); +extern struct v4l2_ioctl_ops v4gfx_ioctl_ops; +extern const struct v4l2_file_operations v4gfx_fops; +extern void v4gfx_acquire_timer(unsigned long arg); + +/* Other stuff */ +#define YUYV_BPP 2 +#define RGB565_BPP 2 +#define RGB24_BPP 3 +#define RGB32_BPP 4 + +#define VOUT_NAME "v4gfx" + +/* configuration macros */ +#define VOUT_MAJOR_VERSION 0 +#define VOUT_MINOR_VERSION 0 +#define VOUT_RELEASE 0 +#define VOUT_VERSION \ + KERNEL_VERSION(VOUT_MAJOR_VERSION, VOUT_MINOR_VERSION, VOUT_RELEASE) + +#endif /* ifndef __V4L2_GFX_H__ */ diff --git a/drivers/media/video/tiler/Kconfig b/drivers/media/video/tiler/Kconfig new file mode 100644 index 0000000..202f7f8 --- /dev/null +++ b/drivers/media/video/tiler/Kconfig @@ -0,0 +1,129 @@ +config HAVE_TI_TILER + bool + default y + depends on ARCH_OMAP4 + +menuconfig TI_TILER + tristate "TI TILER support" + default y + depends on HAVE_TI_TILER + help + TILER and TILER-DMM driver for TI chips. The TI TILER device + enables video rotation on certain TI chips such as OMAP4 or + Netra. Video rotation will be limited without TILER support. + +config TILER_GRANULARITY + int "Allocation granularity (2^n)" + range 1 4096 + default 128 + depends on TI_TILER + help + This option sets the default TILER allocation granularity. It can + be overriden by the tiler.grain boot argument. + + The allocation granularity is the smallest TILER block size (in + bytes) managed distinctly by the TILER driver. TILER blocks of any + size are managed in chunks of at least this size. + + Must be a 2^n in the range of 1 to 4096; however, the TILER driver + may use a larger supported granularity. + + Supported values are: 1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, + 2048, 4096. + +config TILER_CACHE_LIMIT + int "Memory limit to cache free pages in MBytes" + range 0 128 + default 40 + depends on TI_TILER + help + This option sets the minimum memory that TILER retains even if + there is less TILER allocated memory is use. The unused memory is + instead stored in a cache to speed up allocation and freeing of + physical pages. + + This option can be overriden by the tiler.cache boot argument. + + While initially TILER will use less memory than this limit (0), it + will not release any memory used until it reaches this limit. + Thereafter, TILER will release any unused memory immediately as + long as there it is above this threshold. + +config TILER_SECURITY + int "Process security" + range 0 1 + default 1 + depends on TI_TILER + help + This option sets the default TILER process security. It can be + overriden by the tiler.secure boot argument. + + If process security is enabled (1), the TILER driver uses a separate + TILER buffer address spaces (for mmap purposes) for each process. + This means that one process cannot simply map another process's + TILER buffer into its memory, even for sharing. However, it can + recreate the buffer by knowing the id-s and secret keys for the + TILER blocks involved. This is the preferred configuration. + + Disabling security (0) allows sharing buffers simply by sharing the + mmap offset and size. However, because buffers can potentially be + shared between processes, it delays resource cleanup while any + process has an open TILER device. + +config TILER_SSPTR_ID + int "Use SSPtr for id" + range 0 1 + default 1 + depends on TI_TILER + help + This option sets the default behavior for TILER block ids. It can + be overriden by the tiler.ssptr_id boot argument. + + If true, TILER driver uses the system-space (physical) address + (SSPtr) of a TILER block as its unique id. This may help sharing + TILER blocks between co-processors if using a constant key for each + block. + + Note that the SSPtr is unique for each TILER block. + +config TILER_SECURE + bool "Secure TILER build" + default n + depends on TI_TILER + help + This option forces TILER security features that bypasses module + parameters. + + If set, process security will be hardwired and ssptr and offset + lookup APIs are removed. + +config TILER_EXPOSE_SSPTR + bool "Expose SSPtr to userspace" + default y + depends on TI_TILER + help + This option sets whether SSPtr-s for blocks are exposed + during TILIOC_GBLK ioctls (MemMgr_Alloc APIs). In a secure + TILER build, this may be the only way for the userspace code + to learn the system-space addresses of TILER blocks. + + You can use this flag to see if the userspace is relying on + having access to the SSPtr. + +config TILER_ENABLE_NV12 + bool "Enable NV12 support" + default y + depends on TI_TILER + help + This option enables NV12 functionality in the TILER driver. + + If set, nv12 support will be compiled into the driver and APIs + will be enabled. + +config TILER_ENABLE_USERSPACE + bool "Enable userspace API" + default y + depends on TI_TILER + help + This option enabled the userspace API. If set, an ioctl interface + will be available to users. diff --git a/drivers/media/video/tiler/Makefile b/drivers/media/video/tiler/Makefile new file mode 100644 index 0000000..7d4b113 --- /dev/null +++ b/drivers/media/video/tiler/Makefile @@ -0,0 +1,15 @@ +obj-$(CONFIG_TI_TILER) += tcm/ + +obj-$(CONFIG_TI_TILER) += tiler_dmm.o +tiler_dmm-objs = dmm.o + +obj-$(CONFIG_TI_TILER) += tiler.o +tiler-objs = tiler-geom.o tiler-main.o tiler-iface.o tiler-reserve.o tmm-pat.o + +ifdef CONFIG_TILER_ENABLE_NV12 +tiler-objs += tiler-nv12.o +endif + +ifdef CONFIG_TILER_ENABLE_USERSPACE +tiler-objs += tiler-ioctl.o +endif diff --git a/drivers/media/video/tiler/_tiler.h b/drivers/media/video/tiler/_tiler.h new file mode 100644 index 0000000..9da70d0 --- /dev/null +++ b/drivers/media/video/tiler/_tiler.h @@ -0,0 +1,184 @@ +/* + * _tiler.h + * + * TI TILER driver internal shared definitions. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2011 Texas Instruments, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Texas Instruments Incorporated nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TILER_H +#define _TILER_H + +#include <linux/kernel.h> +#include <mach/tiler.h> +#include "tcm.h" + +#define TILER_FORMATS (TILFMT_MAX - TILFMT_MIN + 1) + +/* per process (thread group) info */ +struct process_info { + struct list_head list; /* other processes */ + struct list_head groups; /* my groups */ + struct list_head bufs; /* my registered buffers */ + pid_t pid; /* really: thread group ID */ + u32 refs; /* open tiler devices, 0 for processes + tracked via kernel APIs */ + bool kernel; /* tracking kernel objects */ +}; + +struct __buf_info { + struct list_head by_pid; /* list of buffers per pid */ + struct tiler_buf_info buf_info; + struct mem_info *mi[TILER_MAX_NUM_BLOCKS]; /* blocks */ +}; + +/* per group info (within a process) */ +struct gid_info { + struct list_head by_pid; /* other groups */ + struct list_head areas; /* all areas in this pid/gid */ + struct list_head reserved; /* areas pre-reserved */ + struct list_head onedim; /* all 1D areas in this pid/gid */ + u32 gid; /* group ID */ + int refs; /* instances directly using this ptr */ + struct process_info *pi; /* parent */ +}; + +/* info for an area reserved from a container */ +struct area_info { + struct list_head by_gid; /* areas in this pid/gid */ + struct list_head blocks; /* blocks in this area */ + u32 nblocks; /* # of blocks in this area */ + + struct tcm_area area; /* area details */ + struct gid_info *gi; /* link to parent, if still alive */ +}; + +/* info for a block */ +struct mem_info { + struct list_head global; /* reserved / global blocks */ + struct tiler_block_t blk; /* block info */ + struct tiler_pa_info pa; /* pinned physical pages */ + struct tcm_area area; + int refs; /* number of times referenced */ + bool alloced; /* still alloced */ + + struct list_head by_area; /* blocks in the same area / 1D */ + void *parent; /* area info for 2D, else group info */ +}; + +/* tiler geometry information */ +struct tiler_geom { + u32 x_shft; /* unused X-bits (as part of bpp) */ + u32 y_shft; /* unused Y-bits (as part of bpp) */ + u32 bpp; /* bytes per pixel */ + u32 slot_w; /* width of each slot (in pixels) */ + u32 slot_h; /* height of each slot (in pixels) */ + u32 bpp_m; /* modified bytes per pixel (=1 for page mode) */ +}; + +/* methods and variables shared between source files */ +struct tiler_ops { + /* block operations */ + s32 (*alloc) (enum tiler_fmt fmt, u32 width, u32 height, + u32 key, + u32 gid, struct process_info *pi, + struct mem_info **info); + s32 (*pin) (enum tiler_fmt fmt, u32 width, u32 height, + u32 key, u32 gid, struct process_info *pi, + struct mem_info **info, u32 usr_addr); + void (*reserve_nv12) (u32 n, u32 width, u32 height, + u32 gid, struct process_info *pi); + void (*reserve) (u32 n, enum tiler_fmt fmt, u32 width, u32 height, + u32 gid, struct process_info *pi); + void (*unreserve) (u32 gid, struct process_info *pi); + + /* block access operations */ + struct mem_info * (*lock) (u32 key, u32 id, struct gid_info *gi); + struct mem_info * (*lock_by_ssptr) (u32 sys_addr); + void (*describe) (struct mem_info *i, struct tiler_block_info *blk); + void (*unlock_free) (struct mem_info *mi, bool free); + + s32 (*lay_2d) (enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band, + u16 align, struct gid_info *gi, + struct list_head *pos); +#ifdef CONFIG_TILER_ENABLE_NV12 + s32 (*lay_nv12) (int n, u16 w, u16 w1, u16 h, struct gid_info *gi, + u8 *p); +#endif + /* group operations */ + struct gid_info * (*get_gi) (struct process_info *pi, u32 gid); + void (*release_gi) (struct gid_info *gi); + void (*destroy_group) (struct gid_info *pi); + + /* group access operations */ + void (*add_reserved) (struct list_head *reserved, struct gid_info *gi); + void (*release) (struct list_head *reserved); + + /* area operations */ + s32 (*analize) (enum tiler_fmt fmt, u32 width, u32 height, + u16 *x_area, u16 *y_area, u16 *band, u16 *align); + + /* process operations */ + void (*cleanup) (void); + + /* geometry operations */ + void (*xy) (u32 ssptr, u32 *x, u32 *y); + u32 (*addr) (enum tiler_fmt fmt, u32 x, u32 y); + const struct tiler_geom * (*geom) (enum tiler_fmt fmt); + + /* additional info */ + const struct file_operations *fops; +#ifdef CONFIG_TILER_ENABLE_NV12 + bool nv12_packed; /* whether NV12 is packed into same container */ +#endif + u32 page; /* page size */ + u32 width; /* container width */ + u32 height; /* container height */ + + struct mutex mtx; /* mutex for interfaces and ioctls */ +}; + +void tiler_iface_init(struct tiler_ops *tiler); +void tiler_geom_init(struct tiler_ops *tiler); +void tiler_reserve_init(struct tiler_ops *tiler); +void tiler_nv12_init(struct tiler_ops *tiler); +u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area); +void tiler_ioctl_init(struct tiler_ops *tiler); +struct process_info *__get_pi(pid_t pid, bool kernel); +void _m_unregister_buf(struct __buf_info *_b); +s32 tiler_notify_event(int event, void *data); +void _m_free_process_info(struct process_info *pi); + +struct process_info *__get_pi(pid_t pid, bool kernel); + +#endif diff --git a/drivers/media/video/tiler/dmm.c b/drivers/media/video/tiler/dmm.c new file mode 100644 index 0000000..ce0f07a --- /dev/null +++ b/drivers/media/video/tiler/dmm.c @@ -0,0 +1,277 @@ +/* + * dmm.c + * + * DMM driver support functions for TI OMAP processors. + * + * Authors: David Sin <davidsin@ti.com> + * Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/platform_device.h> /* platform_device() */ +#include <linux/io.h> /* ioremap() */ +#include <linux/errno.h> +#include <linux/slab.h> +#include <linux/delay.h> + +#include <mach/dmm.h> + +#undef __DEBUG__ + +#define MASK(msb, lsb) (((1 << ((msb) + 1 - (lsb))) - 1) << (lsb)) +#define SET_FLD(reg, msb, lsb, val) \ +(((reg) & ~MASK((msb), (lsb))) | (((val) << (lsb)) & MASK((msb), (lsb)))) + +#ifdef __DEBUG__ +#define DEBUG(x, y) printk(KERN_NOTICE "%s()::%d:%s=(0x%08x)\n", \ + __func__, __LINE__, x, (s32)y); +#else +#define DEBUG(x, y) +#endif + +static struct mutex dmm_mtx; + +static struct omap_dmm_platform_data *device_data; + +static int dmm_probe(struct platform_device *pdev) +{ + if (!pdev || !pdev->dev.platform_data) { + printk(KERN_ERR "dmm: invalid platform data\n"); + return -EINVAL; + } + + device_data = pdev->dev.platform_data; + + printk(KERN_INFO "dmm: probe base: %p, irq %d\n", + device_data->base, device_data->irq); + writel(0x88888888, device_data->base + DMM_TILER_OR__0); + writel(0x88888888, device_data->base + DMM_TILER_OR__1); + + return 0; +} + +static struct platform_driver dmm_driver_ldm = { + .probe = dmm_probe, + .driver = { + .owner = THIS_MODULE, + .name = "dmm", + }, +}; + +s32 dmm_pat_refill(struct dmm *dmm, struct pat *pd, enum pat_mode mode) +{ + s32 ret = -EFAULT; + void __iomem *r; + u32 v, i; + + /* Only manual refill supported */ + if (mode != MANUAL) + return ret; + + mutex_lock(&dmm_mtx); + + /* Check that the DMM_PAT_STATUS register has not reported an error */ + r = dmm->base + DMM_PAT_STATUS__0; + v = __raw_readl(r); + if (WARN(v & 0xFC00, KERN_ERR "Abort dmm refill, bad status\n")) { + ret = -EIO; + goto refill_error; + } + + /* Set "next" register to NULL */ + r = dmm->base + DMM_PAT_DESCR__0; + v = __raw_readl(r); + v = SET_FLD(v, 31, 4, (u32) NULL); + __raw_writel(v, r); + + /* Set area to be refilled */ + r = dmm->base + DMM_PAT_AREA__0; + v = __raw_readl(r); + v = SET_FLD(v, 30, 24, pd->area.y1); + v = SET_FLD(v, 23, 16, pd->area.x1); + v = SET_FLD(v, 14, 8, pd->area.y0); + v = SET_FLD(v, 7, 0, pd->area.x0); + __raw_writel(v, r); + wmb(); + +#ifdef __DEBUG__ + printk(KERN_NOTICE "\nx0=(%d),y0=(%d),x1=(%d),y1=(%d)\n", + (char)pd->area.x0, + (char)pd->area.y0, + (char)pd->area.x1, + (char)pd->area.y1); +#endif + + /* First, clear the DMM_PAT_IRQSTATUS register */ + r = dmm->base + DMM_PAT_IRQSTATUS; + __raw_writel(0xFFFFFFFF, r); + wmb(); + + r = dmm->base + DMM_PAT_IRQSTATUS_RAW; + i = 1000; + while(__raw_readl(r) != 0) { + if (--i == 0) { + printk(KERN_ERR "Cannot clear status register\n"); + goto refill_error; + } + udelay(1); + } + + /* Fill data register */ + r = dmm->base + DMM_PAT_DATA__0; + v = __raw_readl(r); + + /* pd->data must be 16 aligned */ + BUG_ON(pd->data & 15); + v = SET_FLD(v, 31, 4, pd->data >> 4); + __raw_writel(v, r); + wmb(); + + /* Read back PAT_DATA__0 to see if write was successful */ + i = 1000; + while(__raw_readl(r) != pd->data) { + if (--i == 0) { + printk(KERN_ERR "Write failed to PAT_DATA__0\n"); + goto refill_error; + } + udelay(1); + } + + r = dmm->base + DMM_PAT_CTRL__0; + v = __raw_readl(r); + v = SET_FLD(v, 31, 28, pd->ctrl.ini); + v = SET_FLD(v, 16, 16, pd->ctrl.sync); + v = SET_FLD(v, 9, 8, pd->ctrl.lut_id); + v = SET_FLD(v, 6, 4, pd->ctrl.dir); + v = SET_FLD(v, 0, 0, pd->ctrl.start); + __raw_writel(v, r); + wmb(); + + /* Check if PAT_IRQSTATUS_RAW is set after the PAT has been refilled */ + r = dmm->base + DMM_PAT_IRQSTATUS_RAW; + i = 1000; + while((__raw_readl(r) & 0x3) != 0x3) { + if (--i == 0) { + printk(KERN_ERR "Status check failed after PAT refill\n"); + goto refill_error; + } + udelay(1); + } + + /* Again, clear the DMM_PAT_IRQSTATUS register */ + r = dmm->base + DMM_PAT_IRQSTATUS; + __raw_writel(0xFFFFFFFF, r); + wmb(); + + r = dmm->base + DMM_PAT_IRQSTATUS_RAW; + i = 1000; + while (__raw_readl(r) != 0x0) { + if (--i == 0) { + printk(KERN_ERR "Failed to clear DMM PAT IRQSTATUS\n"); + goto refill_error; + } + udelay(1); + } + + /* Again, set "next" register to NULL to clear any PAT STATUS errors */ + r = dmm->base + DMM_PAT_DESCR__0; + v = __raw_readl(r); + v = SET_FLD(v, 31, 4, (u32) NULL); + __raw_writel(v, r); + + /* + * Now, check that the DMM_PAT_STATUS register + * has not reported an error before exiting. + */ + r = dmm->base + DMM_PAT_STATUS__0; + v = __raw_readl(r); + if ((v & 0xFC00) != 0) { + printk(KERN_ERR "Abort dmm refill. Operation failed\n"); + goto refill_error; + } + + ret = 0; + +refill_error: + mutex_unlock(&dmm_mtx); + + return ret; +} +EXPORT_SYMBOL(dmm_pat_refill); + +struct dmm *dmm_pat_init(u32 id) +{ + u32 base; + struct dmm *dmm; + switch (id) { + case 0: + /* only support id 0 for now */ + base = DMM_BASE; + break; + default: + return NULL; + } + + dmm = kmalloc(sizeof(*dmm), GFP_KERNEL); + if (!dmm) + return NULL; + + dmm->base = ioremap(base, DMM_SIZE); + if (!dmm->base) { + kfree(dmm); + return NULL; + } + + __raw_writel(0x88888888, dmm->base + DMM_PAT_VIEW__0); + __raw_writel(0x88888888, dmm->base + DMM_PAT_VIEW__1); + __raw_writel(0x80808080, dmm->base + DMM_PAT_VIEW_MAP__0); + __raw_writel(0x80000000, dmm->base + DMM_PAT_VIEW_MAP_BASE); + __raw_writel(0x88888888, dmm->base + DMM_TILER_OR__0); + __raw_writel(0x88888888, dmm->base + DMM_TILER_OR__1); + + return dmm; +} +EXPORT_SYMBOL(dmm_pat_init); + +/** + * Clean up the physical address translator. + * @param dmm Device data + * @return an error status. + */ +void dmm_pat_release(struct dmm *dmm) +{ + if (dmm) { + iounmap(dmm->base); + kfree(dmm); + } +} +EXPORT_SYMBOL(dmm_pat_release); + +static s32 __init dmm_init(void) +{ + mutex_init(&dmm_mtx); + return platform_driver_register(&dmm_driver_ldm); +} + +static void __exit dmm_exit(void) +{ + mutex_destroy(&dmm_mtx); + platform_driver_unregister(&dmm_driver_ldm); +} + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("davidsin@ti.com"); +MODULE_AUTHOR("molnar@ti.com"); +module_init(dmm_init); +module_exit(dmm_exit); diff --git a/drivers/media/video/tiler/tcm.h b/drivers/media/video/tiler/tcm.h new file mode 100644 index 0000000..3189607 --- /dev/null +++ b/drivers/media/video/tiler/tcm.h @@ -0,0 +1,341 @@ +/* + * tcm.h + * + * TILER container manager specification and support functions for TI + * TILER driver. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Texas Instruments Incorporated nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TCM_H +#define TCM_H + +struct tcm; + +/* point */ +struct tcm_pt { + u16 x; + u16 y; +}; + +/* 1d or 2d area */ +struct tcm_area { + bool is2d; /* whether area is 1d or 2d */ + struct tcm *tcm; /* parent */ + struct tcm_pt p0; + struct tcm_pt p1; +}; + +struct tcm { + u16 width, height; /* container dimensions */ + + /* 'pvt' structure shall contain any tcm details (attr) along with + linked list of allocated areas and mutex for mutually exclusive access + to the list. It may also contain copies of width and height to notice + any changes to the publicly available width and height fields. */ + void *pvt; + + /* function table */ + s32 (*reserve_2d)(struct tcm *tcm, u16 height, u16 width, u8 align, + struct tcm_area *area); + s32 (*reserve_1d)(struct tcm *tcm, u32 slots, struct tcm_area *area); + s32 (*free) (struct tcm *tcm, struct tcm_area *area); + void (*deinit) (struct tcm *tcm); +}; + +/*============================================================================= + BASIC TILER CONTAINER MANAGER INTERFACE +=============================================================================*/ + +/* + * NOTE: + * + * Since some basic parameter checking is done outside the TCM algorithms, + * TCM implementation do NOT have to check the following: + * + * area pointer is NULL + * width and height fits within container + * number of pages is more than the size of the container + * + */ + +/** + * Template for <ALGO_NAME>_tcm_init method. Define as: + * TCM_INIT(<ALGO_NAME>_tcm_init) + * + * Allocates and initializes a tiler container manager. + * + * @param width Width of container + * @param height Height of container + * @param attr Container manager specific configuration + * arguments. Please describe these in + * your header file. + * + * @return Pointer to the allocated and initialized container + * manager. NULL on failure. DO NOT leak any memory on + * failure! + */ +#define TCM_INIT(name, attr_t) \ +struct tcm *name(u16 width, u16 height, typeof(attr_t) *attr); + +/** + * Deinitialize tiler container manager. + * + * @param tcm Pointer to container manager. + * + * @return 0 on success, non-0 error value on error. The call + * should free as much memory as possible and meaningful + * even on failure. Some error codes: -ENODEV: invalid + * manager. + */ +static inline void tcm_deinit(struct tcm *tcm) +{ + if (tcm) + tcm->deinit(tcm); +} + +/** + * Reserves a 2D area in the container. + * + * @param tcm Pointer to container manager. + * @param height Height(in pages) of area to be reserved. + * @param width Width(in pages) of area to be reserved. + * @param align Alignment requirement for top-left corner of area. Not + * all values may be supported by the container manager, + * but it must support 0 (1), 32 and 64. + * 0 value is equivalent to 1. + * @param area Pointer to where the reserved area should be stored. + * + * @return 0 on success. Non-0 error code on failure. Also, + * the tcm field of the area will be set to NULL on + * failure. Some error codes: -ENODEV: invalid manager, + * -EINVAL: invalid area, -ENOMEM: not enough space for + * allocation. + */ +static inline s32 tcm_reserve_2d(struct tcm *tcm, u16 width, u16 height, + u16 align, struct tcm_area *area) +{ + /* perform rudimentary error checking */ + s32 res = tcm == NULL ? -ENODEV : + (area == NULL || width == 0 || height == 0 || + /* align must be a 2 power */ + align & (align - 1)) ? -EINVAL : + (height > tcm->height || width > tcm->width) ? -ENOMEM : 0; + + if (!res) { + area->is2d = true; + res = tcm->reserve_2d(tcm, height, width, align, area); + area->tcm = res ? NULL : tcm; + } + + return res; +} + +/** + * Reserves a 1D area in the container. + * + * @param tcm Pointer to container manager. + * @param slots Number of (contiguous) slots to reserve. + * @param area Pointer to where the reserved area should be stored. + * + * @return 0 on success. Non-0 error code on failure. Also, + * the tcm field of the area will be set to NULL on + * failure. Some error codes: -ENODEV: invalid manager, + * -EINVAL: invalid area, -ENOMEM: not enough space for + * allocation. + */ +static inline s32 tcm_reserve_1d(struct tcm *tcm, u32 slots, + struct tcm_area *area) +{ + /* perform rudimentary error checking */ + s32 res = tcm == NULL ? -ENODEV : + (area == NULL || slots == 0) ? -EINVAL : + slots > (tcm->width * (u32) tcm->height) ? -ENOMEM : 0; + + if (!res) { + area->is2d = false; + res = tcm->reserve_1d(tcm, slots, area); + area->tcm = res ? NULL : tcm; + } + + return res; +} + +/** + * Free a previously reserved area from the container. + * + * @param area Pointer to area reserved by a prior call to + * tcm_reserve_1d or tcm_reserve_2d call, whether + * it was successful or not. (Note: all fields of + * the structure must match.) + * + * @return 0 on success. Non-0 error code on failure. Also, the tcm + * field of the area is set to NULL on success to avoid subsequent + * freeing. This call will succeed even if supplying + * the area from a failed reserved call. + */ +static inline s32 tcm_free(struct tcm_area *area) +{ + s32 res = 0; /* free succeeds by default */ + + if (area && area->tcm) { + res = area->tcm->free(area->tcm, area); + if (res == 0) + area->tcm = NULL; + } + + return res; +} + +/*============================================================================= + HELPER FUNCTION FOR ANY TILER CONTAINER MANAGER +=============================================================================*/ + +/** + * This method slices off the topmost 2D slice from the parent area, and stores + * it in the 'slice' parameter. The 'parent' parameter will get modified to + * contain the remaining portion of the area. If the whole parent area can + * fit in a 2D slice, its tcm pointer is set to NULL to mark that it is no + * longer a valid area. + * + * @param parent Pointer to a VALID parent area that will get modified + * @param slice Pointer to the slice area that will get modified + */ +static inline void tcm_slice(struct tcm_area *parent, struct tcm_area *slice) +{ + *slice = *parent; + + /* check if we need to slice */ + if (slice->tcm && !slice->is2d && + slice->p0.y != slice->p1.y && + (slice->p0.x || (slice->p1.x != slice->tcm->width - 1))) { + /* set end point of slice (start always remains) */ + slice->p1.x = slice->tcm->width - 1; + slice->p1.y = (slice->p0.x) ? slice->p0.y : slice->p1.y - 1; + /* adjust remaining area */ + parent->p0.x = 0; + parent->p0.y = slice->p1.y + 1; + } else { + /* mark this as the last slice */ + parent->tcm = NULL; + } +} + +/* Verify if a tcm area is logically valid */ +static inline bool tcm_area_is_valid(struct tcm_area *area) +{ + return area && area->tcm && + /* coordinate bounds */ + area->p1.x < area->tcm->width && + area->p1.y < area->tcm->height && + area->p0.y <= area->p1.y && + /* 1D coordinate relationship + p0.x check */ + ((!area->is2d && + area->p0.x < area->tcm->width && + area->p0.x + area->p0.y * area->tcm->width <= + area->p1.x + area->p1.y * area->tcm->width) || + /* 2D coordinate relationship */ + (area->is2d && + area->p0.x <= area->p1.x)); +} + +/* see if a coordinate is within an area */ +static inline bool __tcm_is_in(struct tcm_pt *p, struct tcm_area *a) +{ + u16 i; + + if (a->is2d) { + return p->x >= a->p0.x && p->x <= a->p1.x && + p->y >= a->p0.y && p->y <= a->p1.y; + } else { + i = p->x + p->y * a->tcm->width; + return i >= a->p0.x + a->p0.y * a->tcm->width && + i <= a->p1.x + a->p1.y * a->tcm->width; + } +} + +/* calculate area width */ +static inline u16 __tcm_area_width(struct tcm_area *area) +{ + return area->p1.x - area->p0.x + 1; +} + +/* calculate area height */ +static inline u16 __tcm_area_height(struct tcm_area *area) +{ + return area->p1.y - area->p0.y + 1; +} + +/* calculate number of slots in an area */ +static inline u16 __tcm_sizeof(struct tcm_area *area) +{ + return area->is2d ? + __tcm_area_width(area) * __tcm_area_height(area) : + (area->p1.x - area->p0.x + 1) + (area->p1.y - area->p0.y) * + area->tcm->width; +} +#define tcm_sizeof(area) __tcm_sizeof(&(area)) +#define tcm_awidth(area) __tcm_area_width(&(area)) +#define tcm_aheight(area) __tcm_area_height(&(area)) +#define tcm_is_in(pt, area) __tcm_is_in(&(pt), &(area)) + +/* limit a 1D area to the first N pages */ +static inline s32 tcm_1d_limit(struct tcm_area *a, u32 num_pg) +{ + if (__tcm_sizeof(a) < num_pg) + return -ENOMEM; + if (!num_pg) + return -EINVAL; + + a->p1.x = (a->p0.x + num_pg - 1) % a->tcm->width; + a->p1.y = a->p0.y + ((a->p0.x + num_pg - 1) / a->tcm->width); + return 0; +} + +/** + * Iterate through 2D slices of a valid area. Behaves + * syntactically as a for(;;) statement. + * + * @param var Name of a local variable of type 'struct + * tcm_area *' that will get modified to + * contain each slice. + * @param area Pointer to the VALID parent area. This + * structure will not get modified + * throughout the loop. + * + */ +#define tcm_for_each_slice(var, area, safe) \ + for (safe = area, \ + tcm_slice(&safe, &var); \ + var.tcm; tcm_slice(&safe, &var)) + +#endif diff --git a/drivers/media/video/tiler/tcm/Makefile b/drivers/media/video/tiler/tcm/Makefile new file mode 100644 index 0000000..8434607 --- /dev/null +++ b/drivers/media/video/tiler/tcm/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_TI_TILER) += tcm-sita.o diff --git a/drivers/media/video/tiler/tcm/_tcm-sita.h b/drivers/media/video/tiler/tcm/_tcm-sita.h new file mode 100644 index 0000000..20a7d86 --- /dev/null +++ b/drivers/media/video/tiler/tcm/_tcm-sita.h @@ -0,0 +1,85 @@ +/* + * _tcm_sita.h + * + * SImple Tiler Allocator (SiTA) private structures. + * + * Author: Ravi Ramachandra <r.ramachandra@ti.com> + * + * Copyright (C) 2009-2011 Texas Instruments, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Texas Instruments Incorporated nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TCM_SITA_H +#define _TCM_SITA_H + +#include "../tcm.h" + +/* length between two coordinates */ +#define LEN(a, b) ((a) > (b) ? (a) - (b) + 1 : (b) - (a) + 1) + +enum criteria { + CR_MAX_NEIGHS = 0x01, + CR_FIRST_FOUND = 0x10, + CR_BIAS_HORIZONTAL = 0x20, + CR_BIAS_VERTICAL = 0x40, + CR_DIAGONAL_BALANCE = 0x80 +}; + +/* nearness to the beginning of the search field from 0 to 1000 */ +struct nearness_factor { + s32 x; + s32 y; +}; + +/* + * Statistics on immediately neighboring slots. Edge is the number of + * border segments that are also border segments of the scan field. Busy + * refers to the number of neighbors that are occupied. + */ +struct neighbor_stats { + u16 edge; + u16 busy; +}; + +/* structure to keep the score of a potential allocation */ +struct score { + struct nearness_factor f; + struct neighbor_stats n; + struct tcm_area a; + u16 neighs; /* number of busy neighbors */ +}; + +struct sita_pvt { + struct mutex mtx; + struct tcm_pt div_pt; /* divider point splitting container */ + struct tcm_area ***map; /* pointers to the parent area for each slot */ +}; + +#endif diff --git a/drivers/media/video/tiler/tcm/tcm-sita.c b/drivers/media/video/tiler/tcm/tcm-sita.c new file mode 100644 index 0000000..d0784c6 --- /dev/null +++ b/drivers/media/video/tiler/tcm/tcm-sita.c @@ -0,0 +1,936 @@ +/* + * tcm-sita.c + * + * SImple Tiler Allocator (SiTA): 2D and 1D allocation(reservation) algorithm + * + * Authors: Ravi Ramachandra <r.ramachandra@ti.com>, + * Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + * + */ +#include <linux/slab.h> + +#include "_tcm-sita.h" +#include "tcm-sita.h" + +#define TCM_ALG_NAME "tcm_sita" +#include "tcm-utils.h" + +#define X_SCAN_LIMITER 1 +#define Y_SCAN_LIMITER 1 + +#define ALIGN_DOWN(value, align) ((value) & ~((align) - 1)) + +/* Individual selection criteria for different scan areas */ +static s32 CR_L2R_T2B = CR_BIAS_HORIZONTAL; +static s32 CR_R2L_T2B = CR_DIAGONAL_BALANCE; +#ifdef SCAN_BOTTOM_UP +static s32 CR_R2L_B2T = CR_FIRST_FOUND; +static s32 CR_L2R_B2T = CR_DIAGONAL_BALANCE; +#endif + +/********************************************* + * TCM API - Sita Implementation + *********************************************/ +static s32 sita_reserve_2d(struct tcm *tcm, u16 h, u16 w, u8 align, + struct tcm_area *area); +static s32 sita_reserve_1d(struct tcm *tcm, u32 slots, struct tcm_area *area); +static s32 sita_free(struct tcm *tcm, struct tcm_area *area); +static void sita_deinit(struct tcm *tcm); + +/********************************************* + * Main Scanner functions + *********************************************/ +static s32 scan_areas_and_find_fit(struct tcm *tcm, u16 w, u16 h, u16 align, + struct tcm_area *area); + +static s32 scan_l2r_t2b(struct tcm *tcm, u16 w, u16 h, u16 align, + struct tcm_area *field, struct tcm_area *area); + +static s32 scan_r2l_t2b(struct tcm *tcm, u16 w, u16 h, u16 align, + struct tcm_area *field, struct tcm_area *area); + +#ifdef SCAN_BOTTOM_UP +static s32 scan_l2r_b2t(struct tcm *tcm, u16 w, u16 h, u16 align, + struct tcm_area *field, struct tcm_area *area); + +static s32 scan_r2l_b2t(struct tcm *tcm, u16 w, u16 h, u16 align, + struct tcm_area *field, struct tcm_area *area); +#endif +static s32 scan_r2l_b2t_one_dim(struct tcm *tcm, u32 num_slots, + struct tcm_area *field, struct tcm_area *area); + +/********************************************* + * Support Infrastructure Methods + *********************************************/ +static s32 is_area_free(struct tcm_area ***map, u16 x0, u16 y0, u16 w, u16 h); + +static s32 update_candidate(struct tcm *tcm, u16 x0, u16 y0, u16 w, u16 h, + struct tcm_area *field, s32 criteria, + struct score *best); + +static void get_nearness_factor(struct tcm_area *field, + struct tcm_area *candidate, + struct nearness_factor *nf); + +static void get_neighbor_stats(struct tcm *tcm, struct tcm_area *area, + struct neighbor_stats *stat); + +static void fill_area(struct tcm *tcm, + struct tcm_area *area, struct tcm_area *parent); + +/*********************************************/ + +/********************************************* + * Utility Methods + *********************************************/ +struct tcm *sita_init(u16 width, u16 height, struct tcm_pt *attr) +{ + struct tcm *tcm; + struct sita_pvt *pvt; + struct tcm_area area = {0}; + s32 i; + + if (width == 0 || height == 0) + return NULL; + + tcm = kmalloc(sizeof(*tcm), GFP_KERNEL); + pvt = kmalloc(sizeof(*pvt), GFP_KERNEL); + if (!tcm || !pvt) + goto error; + + memset(tcm, 0, sizeof(*tcm)); + memset(pvt, 0, sizeof(*pvt)); + + /* Updating the pointers to SiTA implementation APIs */ + tcm->height = height; + tcm->width = width; + tcm->reserve_2d = sita_reserve_2d; + tcm->reserve_1d = sita_reserve_1d; + tcm->free = sita_free; + tcm->deinit = sita_deinit; + tcm->pvt = (void *)pvt; + + mutex_init(&(pvt->mtx)); + + /* Creating tam map */ + pvt->map = kmalloc(sizeof(*pvt->map) * tcm->width, GFP_KERNEL); + if (!pvt->map) + goto error; + + for (i = 0; i < tcm->width; i++) { + pvt->map[i] = + kmalloc(sizeof(**pvt->map) * tcm->height, + GFP_KERNEL); + if (pvt->map[i] == NULL) { + while (i--) + kfree(pvt->map[i]); + kfree(pvt->map); + goto error; + } + } + + if (attr && attr->x <= tcm->width && attr->y <= tcm->height) { + pvt->div_pt.x = attr->x; + pvt->div_pt.y = attr->y; + + } else { + /* Defaulting to 3:1 ratio on width for 2D area split */ + /* Defaulting to 3:1 ratio on height for 2D and 1D split */ + pvt->div_pt.x = (tcm->width * 3) / 4; + pvt->div_pt.y = (tcm->height * 3) / 4; + } + + mutex_lock(&(pvt->mtx)); + assign(&area, 0, 0, width - 1, height - 1); + fill_area(tcm, &area, NULL); + mutex_unlock(&(pvt->mtx)); + return tcm; + +error: + kfree(tcm); + kfree(pvt); + return NULL; +} + +static void sita_deinit(struct tcm *tcm) +{ + struct sita_pvt *pvt = (struct sita_pvt *)tcm->pvt; + struct tcm_area area = {0}; + s32 i; + + area.p1.x = tcm->width - 1; + area.p1.y = tcm->height - 1; + + mutex_lock(&(pvt->mtx)); + fill_area(tcm, &area, NULL); + mutex_unlock(&(pvt->mtx)); + + mutex_destroy(&(pvt->mtx)); + + for (i = 0; i < tcm->height; i++) + kfree(pvt->map[i]); + kfree(pvt->map); + kfree(pvt); +} + +/** + * Reserve a 1D area in the container + * + * @param num_slots size of 1D area + * @param area pointer to the area that will be populated with the + * reserved area + * + * @return 0 on success, non-0 error value on failure. + */ +static s32 sita_reserve_1d(struct tcm *tcm, u32 num_slots, + struct tcm_area *area) +{ + s32 ret; + struct tcm_area field = {0}; + struct sita_pvt *pvt = (struct sita_pvt *)tcm->pvt; + + mutex_lock(&(pvt->mtx)); +#ifdef RESTRICT_1D + /* scan within predefined 1D boundary */ + assign(&field, tcm->width - 1, tcm->height - 1, 0, pvt->div_pt.y); +#else + /* Scanning entire container */ + assign(&field, tcm->width - 1, tcm->height - 1, 0, 0); +#endif + ret = scan_r2l_b2t_one_dim(tcm, num_slots, &field, area); + if (!ret) + /* update map */ + fill_area(tcm, area, area); + + mutex_unlock(&(pvt->mtx)); + return ret; +} + +/** + * Reserve a 2D area in the container + * + * @param w width + * @param h height + * @param area pointer to the area that will be populated with the reesrved + * area + * + * @return 0 on success, non-0 error value on failure. + */ +static s32 sita_reserve_2d(struct tcm *tcm, u16 h, u16 w, u8 align, + struct tcm_area *area) +{ + s32 ret; + struct sita_pvt *pvt = (struct sita_pvt *)tcm->pvt; + + /* not supporting more than 64 as alignment */ + if (align > 64) + return -EINVAL; + + /* we prefer 1, 32 and 64 as alignment */ + align = align <= 1 ? 1 : align <= 32 ? 32 : 64; + + mutex_lock(&(pvt->mtx)); + ret = scan_areas_and_find_fit(tcm, w, h, align, area); + if (!ret) + /* update map */ + fill_area(tcm, area, area); + + mutex_unlock(&(pvt->mtx)); + return ret; +} + +/** + * Unreserve a previously allocated 2D or 1D area + * @param area area to be freed + * @return 0 - success + */ +static s32 sita_free(struct tcm *tcm, struct tcm_area *area) +{ + struct sita_pvt *pvt = (struct sita_pvt *)tcm->pvt; + + mutex_lock(&(pvt->mtx)); + + /* check that this is in fact an existing area */ + WARN_ON(pvt->map[area->p0.x][area->p0.y] != area || + pvt->map[area->p1.x][area->p1.y] != area); + + /* Clear the contents of the associated tiles in the map */ + fill_area(tcm, area, NULL); + + mutex_unlock(&(pvt->mtx)); + + return 0; +} + +/** + * Note: In general the cordinates in the scan field area relevant to the can + * sweep directions. The scan origin (e.g. top-left corner) will always be + * the p0 member of the field. Therfore, for a scan from top-left p0.x <= p1.x + * and p0.y <= p1.y; whereas, for a scan from bottom-right p1.x <= p0.x and p1.y + * <= p0.y + */ + +/** + * Raster scan horizontally right to left from top to bottom to find a place for + * a 2D area of given size inside a scan field. + * + * @param w width of desired area + * @param h height of desired area + * @param align desired area alignment + * @param area pointer to the area that will be set to the best position + * @param field area to scan (inclusive) + * + * @return 0 on success, non-0 error value on failure. + */ +static s32 scan_r2l_t2b(struct tcm *tcm, u16 w, u16 h, u16 align, + struct tcm_area *field, struct tcm_area *area) +{ + s32 x, y; + s16 start_x, end_x, start_y, end_y, found_x = -1; + struct tcm_area ***map = ((struct sita_pvt *)tcm->pvt)->map; + struct score best = {{0}, {0}, {0}, 0}; + + PA(2, "scan_r2l_t2b:", field); + + start_x = field->p0.x; + end_x = field->p1.x; + start_y = field->p0.y; + end_y = field->p1.y; + + /* check scan area co-ordinates */ + if (field->p0.x < field->p1.x || + field->p1.y < field->p0.y) + return -EINVAL; + + /* check if allocation would fit in scan area */ + if (w > LEN(start_x, end_x) || h > LEN(end_y, start_y)) + return -ENOSPC; + + /* adjust start_x and end_y, as allocation would not fit beyond */ + start_x = ALIGN_DOWN(start_x - w + 1, align); /* - 1 to be inclusive */ + end_y = end_y - h + 1; + + /* check if allocation would still fit in scan area */ + if (start_x < end_x) + return -ENOSPC; + + P2("ali=%d x=%d..%d y=%d..%d", align, start_x, end_x, start_y, end_y); + + /* scan field top-to-bottom, right-to-left */ + for (y = start_y; y <= end_y; y++) { + for (x = start_x; x >= end_x; x -= align) { + if (is_area_free(map, x, y, w, h)) { + P3("found shoulder: %d,%d", x, y); + found_x = x; + + /* update best candidate */ + if (update_candidate(tcm, x, y, w, h, field, + CR_R2L_T2B, &best)) + goto done; + +#ifdef X_SCAN_LIMITER + /* change upper x bound */ + end_x = x + 1; +#endif + break; + } else if (map[x][y] && map[x][y]->is2d) { + /* step over 2D areas */ + x = ALIGN(map[x][y]->p0.x - w + 1, align); + P3("moving to: %d,%d", x, y); + } + } +#ifdef Y_SCAN_LIMITER + /* break if you find a free area shouldering the scan field */ + if (found_x == start_x) + break; +#endif + } + + if (!best.a.tcm) + return -ENOSPC; +done: + assign(area, best.a.p0.x, best.a.p0.y, best.a.p1.x, best.a.p1.y); + return 0; +} + +#ifdef SCAN_BOTTOM_UP +/** + * Raster scan horizontally right to left from bottom to top to find a place + * for a 2D area of given size inside a scan field. + * + * @param w width of desired area + * @param h height of desired area + * @param align desired area alignment + * @param area pointer to the area that will be set to the best position + * @param field area to scan (inclusive) + * + * @return 0 on success, non-0 error value on failure. + */ +static s32 scan_r2l_b2t(struct tcm *tcm, u16 w, u16 h, u16 align, + struct tcm_area *field, struct tcm_area *area) +{ + /* TODO: Should I check scan area? + * Might have to take it as input during initialization + */ + s32 x, y; + s16 start_x, end_x, start_y, end_y, found_x = -1; + struct tcm_area ***map = ((struct sita_pvt *)tcm->pvt)->map; + struct score best = {{0}, {0}, {0}, 0}; + + PA(2, "scan_r2l_b2t:", field); + + start_x = field->p0.x; + end_x = field->p1.x; + start_y = field->p0.y; + end_y = field->p1.y; + + /* check scan area co-ordinates */ + if (field->p1.x < field->p0.x || + field->p1.y < field->p0.y) + return -EINVAL; + + /* check if allocation would fit in scan area */ + if (w > LEN(start_x, end_x) || h > LEN(start_y, end_y)) + return -ENOSPC; + + /* adjust start_x and start_y, as allocation would not fit beyond */ + start_x = ALIGN_DOWN(start_x - w + 1, align); /* + 1 to be inclusive */ + start_y = start_y - h + 1; + + /* check if allocation would still fit in scan area */ + if (start_x < end_x) + return -ENOSPC; + + P2("ali=%d x=%d..%d y=%d..%d", align, start_x, end_x, start_y, end_y); + + /* scan field bottom-to-top, right-to-left */ + for (y = start_y; y >= end_y; y--) { + for (x = start_x; x >= end_x; x -= align) { + if (is_area_free(map, x, y, w, h)) { + P3("found shoulder: %d,%d", x, y); + found_x = x; + + /* update best candidate */ + if (update_candidate(tcm, x, y, w, h, field, + CR_R2L_B2T, &best)) + goto done; +#ifdef X_SCAN_LIMITER + /* change upper x bound */ + end_x = x + 1; +#endif + break; + } else if (map[x][y] && map[x][y]->is2d) { + /* step over 2D areas */ + x = ALIGN(map[x][y]->p0.x - w + 1, align); + P3("moving to: %d,%d", x, y); + } + } +#ifdef Y_SCAN_LIMITER + /* break if you find a free area shouldering the scan field */ + if (found_x == start_x) + break; +#endif + } + + if (!best.a.tcm) + return -ENOSPC; +done: + assign(area, best.a.p0.x, best.a.p0.y, best.a.p1.x, best.a.p1.y); + return 0; +} +#endif + +/** + * Raster scan horizontally left to right from top to bottom to find a place for + * a 2D area of given size inside a scan field. + * + * @param w width of desired area + * @param h height of desired area + * @param align desired area alignment + * @param area pointer to the area that will be set to the best position + * @param field area to scan (inclusive) + * + * @return 0 on success, non-0 error value on failure. + */ +static s32 scan_l2r_t2b(struct tcm *tcm, u16 w, u16 h, u16 align, + struct tcm_area *field, struct tcm_area *area) +{ + s32 x, y; + s16 start_x, end_x, start_y, end_y, found_x = -1; + struct tcm_area ***map = ((struct sita_pvt *)tcm->pvt)->map; + struct score best = {{0}, {0}, {0}, 0}; + + PA(2, "scan_l2r_t2b:", field); + + start_x = field->p0.x; + end_x = field->p1.x; + start_y = field->p0.y; + end_y = field->p1.y; + + /* check scan area co-ordinates */ + if (field->p1.x < field->p0.x || + field->p1.y < field->p0.y) + return -EINVAL; + + /* check if allocation would fit in scan area */ + if (w > LEN(end_x, start_x) || h > LEN(end_y, start_y)) + return -ENOSPC; + + start_x = ALIGN(start_x, align); + + /* check if allocation would still fit in scan area */ + if (w > LEN(end_x, start_x)) + return -ENOSPC; + + /* adjust end_x and end_y, as allocation would not fit beyond */ + end_x = end_x - w + 1; /* + 1 to be inclusive */ + end_y = end_y - h + 1; + + P2("ali=%d x=%d..%d y=%d..%d", align, start_x, end_x, start_y, end_y); + + /* scan field top-to-bottom, left-to-right */ + for (y = start_y; y <= end_y; y++) { + for (x = start_x; x <= end_x; x += align) { + if (is_area_free(map, x, y, w, h)) { + P3("found shoulder: %d,%d", x, y); + found_x = x; + + /* update best candidate */ + if (update_candidate(tcm, x, y, w, h, field, + CR_L2R_T2B, &best)) + goto done; +#ifdef X_SCAN_LIMITER + /* change upper x bound */ + end_x = x - 1; +#endif + break; + } else if (map[x][y] && map[x][y]->is2d) { + /* step over 2D areas */ + x = ALIGN_DOWN(map[x][y]->p1.x, align); + P3("moving to: %d,%d", x, y); + } + } +#ifdef Y_SCAN_LIMITER + /* break if you find a free area shouldering the scan field */ + if (found_x == start_x) + break; +#endif + } + + if (!best.a.tcm) + return -ENOSPC; +done: + assign(area, best.a.p0.x, best.a.p0.y, best.a.p1.x, best.a.p1.y); + return 0; +} + +#ifdef SCAN_BOTTOM_UP +/** + * Raster scan horizontally left to right from bottom to top to find a + * place for a 2D area of given size inside a scan field. + * + * @param w width of desired area + * @param h height of desired area + * @param align desired area alignment + * @param area pointer to the area that will be set to the best position + * @param field area to scan (inclusive) + * + * @return 0 on success, non-0 error value on failure. + */ +static s32 scan_l2r_b2t(struct tcm *tcm, u16 w, u16 h, u16 align, + struct tcm_area *field, struct tcm_area *area) +{ + s32 x, y; + s16 start_x, end_x, start_y, end_y, found_x = -1; + struct tcm_area ***map = ((struct sita_pvt *)tcm->pvt)->map; + struct score best = {{0}, {0}, {0}, 0}; + + PA(2, "scan_l2r_b2t:", field); + + start_x = field->p0.x; + end_x = field->p1.x; + start_y = field->p0.y; + end_y = field->p1.y; + + /* check scan area co-ordinates */ + if (field->p1.x < field->p0.x || + field->p0.y < field->p1.y) + return -EINVAL; + + /* check if allocation would fit in scan area */ + if (w > LEN(end_x, start_x) || h > LEN(start_y, end_y)) + return -ENOSPC; + + start_x = ALIGN(start_x, align); + + /* check if allocation would still fit in scan area */ + if (w > LEN(end_x, start_x)) + return -ENOSPC; + + /* adjust end_x and start_y, as allocation would not fit beyond */ + end_x = end_x - w + 1; /* + 1 to be inclusive */ + start_y = start_y - h + 1; + + P2("ali=%d x=%d..%d y=%d..%d", align, start_x, end_x, start_y, end_y); + + /* scan field bottom-to-top, left-to-right */ + for (y = start_y; y >= end_y; y--) { + for (x = start_x; x <= end_x; x += align) { + if (is_area_free(map, x, y, w, h)) { + P3("found shoulder: %d,%d", x, y); + found_x = x; + + /* update best candidate */ + if (update_candidate(tcm, x, y, w, h, field, + CR_L2R_B2T, &best)) + goto done; +#ifdef X_SCAN_LIMITER + /* change upper x bound */ + end_x = x - 1; +#endif + break; + } else if (map[x][y] && map[x][y]->is2d) { + /* step over 2D areas */ + x = ALIGN_DOWN(map[x][y]->p1.x, align); + P3("moving to: %d,%d", x, y); + } + } + +#ifdef Y_SCAN_LIMITER + /* break if you find a free area shouldering the scan field */ + if (found_x == start_x) + break; +#endif + } + + if (!best.a.tcm) + return -ENOSPC; +done: + assign(area, best.a.p0.x, best.a.p0.y, best.a.p1.x, best.a.p1.y); + return 0; +} +#endif + +/** + * Raster scan horizontally right to left from bottom to top to find a place + * for a 1D area of given size inside a scan field. + * + * @param num_slots size of desired area + * @param align desired area alignment + * @param area pointer to the area that will be set to the best + * position + * @param field area to scan (inclusive) + * + * @return 0 on success, non-0 error value on failure. + */ +static s32 scan_r2l_b2t_one_dim(struct tcm *tcm, u32 num_slots, + struct tcm_area *field, struct tcm_area *area) +{ + s32 found = 0; + s16 x, y; + struct sita_pvt *pvt = (struct sita_pvt *)tcm->pvt; + struct tcm_area *p; + + /* check scan area co-ordinates */ + if (field->p0.y < field->p1.y) + return -EINVAL; + + PA(2, "scan_r2l_b2t_one_dim:", field); + + /** + * Currently we only support full width 1D scan field, which makes sense + * since 1D slot-ordering spans the full container width. + */ + if (tcm->width != field->p0.x - field->p1.x + 1) + return -EINVAL; + + /* check if allocation would fit in scan area */ + if (num_slots > tcm->width * LEN(field->p0.y, field->p1.y)) + return -ENOSPC; + + x = field->p0.x; + y = field->p0.y; + + /* find num_slots consecutive free slots to the left */ + while (found < num_slots) { + if (y < 0) + return -ENOSPC; + + /* remember bottom-right corner */ + if (found == 0) { + area->p1.x = x; + area->p1.y = y; + } + + /* skip busy regions */ + p = pvt->map[x][y]; + if (p) { + /* move to left of 2D areas, top left of 1D */ + x = p->p0.x; + if (!p->is2d) + y = p->p0.y; + + /* start over */ + found = 0; + } else { + /* count consecutive free slots */ + found++; + if (found == num_slots) + break; + } + + /* move to the left */ + if (x == 0) + y--; + x = (x ? : tcm->width) - 1; + + } + + /* set top-left corner */ + area->p0.x = x; + area->p0.y = y; + return 0; +} + +/** + * Find a place for a 2D area of given size inside a scan field based on its + * alignment needs. + * + * @param w width of desired area + * @param h height of desired area + * @param align desired area alignment + * @param area pointer to the area that will be set to the best position + * + * @return 0 on success, non-0 error value on failure. + */ +static s32 scan_areas_and_find_fit(struct tcm *tcm, u16 w, u16 h, u16 align, + struct tcm_area *area) +{ + s32 ret = 0; + struct tcm_area field = {0}; + u16 boundary_x, boundary_y; + struct sita_pvt *pvt = (struct sita_pvt *)tcm->pvt; + + if (align > 1) { + /* prefer top-left corner */ + boundary_x = pvt->div_pt.x - 1; + boundary_y = pvt->div_pt.y - 1; + + /* expand width and height if needed */ + if (w > pvt->div_pt.x) + boundary_x = tcm->width - 1; + if (h > pvt->div_pt.y) + boundary_y = tcm->height - 1; + + assign(&field, 0, 0, boundary_x, boundary_y); + ret = scan_l2r_t2b(tcm, w, h, align, &field, area); + + /* scan whole container if failed, but do not scan 2x */ + if (ret != 0 && (boundary_x != tcm->width - 1 || + boundary_y != tcm->height - 1)) { + /* scan the entire container if nothing found */ + assign(&field, 0, 0, tcm->width - 1, tcm->height - 1); + ret = scan_l2r_t2b(tcm, w, h, align, &field, area); + } + } else if (align == 1) { + /* prefer top-right corner */ + boundary_x = pvt->div_pt.x; + boundary_y = pvt->div_pt.y - 1; + + /* expand width and height if needed */ + if (w > (tcm->width - pvt->div_pt.x)) + boundary_x = 0; + if (h > pvt->div_pt.y) + boundary_y = tcm->height - 1; + + assign(&field, tcm->width - 1, 0, boundary_x, boundary_y); + ret = scan_r2l_t2b(tcm, w, h, align, &field, area); + + /* scan whole container if failed, but do not scan 2x */ + if (ret != 0 && (boundary_x != 0 || + boundary_y != tcm->height - 1)) { + /* scan the entire container if nothing found */ + assign(&field, tcm->width - 1, 0, 0, tcm->height - 1); + ret = scan_r2l_t2b(tcm, w, h, align, &field, + area); + } + } + + return ret; +} + +/* check if an entire area is free */ +static s32 is_area_free(struct tcm_area ***map, u16 x0, u16 y0, u16 w, u16 h) +{ + u16 x = 0, y = 0; + for (y = y0; y < y0 + h; y++) { + for (x = x0; x < x0 + w; x++) { + if (map[x][y]) + return false; + } + } + return true; +} + +/* fills an area with a parent tcm_area */ +static void fill_area(struct tcm *tcm, struct tcm_area *area, + struct tcm_area *parent) +{ + s32 x, y; + struct sita_pvt *pvt = (struct sita_pvt *)tcm->pvt; + struct tcm_area a, a_; + + /* set area's tcm; otherwise, enumerator considers it invalid */ + area->tcm = tcm; + + tcm_for_each_slice(a, *area, a_) { + PA(2, "fill 2d area", &a); + for (x = a.p0.x; x <= a.p1.x; ++x) + for (y = a.p0.y; y <= a.p1.y; ++y) + pvt->map[x][y] = parent; + + } +} + +/** + * Compares a candidate area to the current best area, and if it is a better + * fit, it updates the best to this one. + * + * @param x0, y0, w, h top, left, width, height of candidate area + * @param field scan field + * @param criteria scan criteria + * @param best best candidate and its scores + * + * @return 1 (true) if the candidate area is known to be the final best, so no + * more searching should be performed + */ +static s32 update_candidate(struct tcm *tcm, u16 x0, u16 y0, u16 w, u16 h, + struct tcm_area *field, s32 criteria, + struct score *best) +{ + struct score me; /* score for area */ + + /* + * If first found is enabled then we stop looking + * NOTE: For horizontal bias we always give the first found, because our + * scan is horizontal-raster-based and the first candidate will always + * have the horizontal bias. + */ + bool first = criteria & (CR_FIRST_FOUND | CR_BIAS_HORIZONTAL); + + assign(&me.a, x0, y0, x0 + w - 1, y0 + h - 1); + + /* calculate score for current candidate */ + if (!first) { + get_neighbor_stats(tcm, &me.a, &me.n); + me.neighs = me.n.edge + me.n.busy; + get_nearness_factor(field, &me.a, &me.f); + } + + /* the 1st candidate is always the best */ + if (!best->a.tcm) + goto better; + + BUG_ON(first); + + /* see if this are is better than the best so far */ + + /* neighbor check */ + if ((criteria & CR_MAX_NEIGHS) && + me.neighs > best->neighs) + goto better; + + /* vertical bias check */ + if ((criteria & CR_BIAS_VERTICAL) && + /* + * NOTE: not checking if lengths are same, because that does not + * find new shoulders on the same row after a fit + */ + LEN(me.a.p0.y, field->p0.y) > + LEN(best->a.p0.y, field->p0.y)) + goto better; + + /* diagonal balance check */ + if ((criteria & CR_DIAGONAL_BALANCE) && + best->neighs <= me.neighs && + (best->neighs < me.neighs || + /* this implies that neighs and occupied match */ + best->n.busy < me.n.busy || + (best->n.busy == me.n.busy && + /* check the nearness factor */ + best->f.x + best->f.y > me.f.x + me.f.y))) + goto better; + + /* not better, keep going */ + return 0; + +better: + /* save current area as best */ + memcpy(best, &me, sizeof(me)); + best->a.tcm = tcm; + return first; +} + +/** + * Calculate the nearness factor of an area in a search field. The nearness + * factor is smaller if the area is closer to the search origin. + */ +static void get_nearness_factor(struct tcm_area *field, struct tcm_area *area, + struct nearness_factor *nf) +{ + /** + * Using signed math as field coordinates may be reversed if + * search direction is right-to-left or bottom-to-top. + */ + nf->x = (s32)(area->p0.x - field->p0.x) * 1000 / + (field->p1.x - field->p0.x); + nf->y = (s32)(area->p0.y - field->p0.y) * 1000 / + (field->p1.y - field->p0.y); +} + +/* get neighbor statistics */ +static void get_neighbor_stats(struct tcm *tcm, struct tcm_area *area, + struct neighbor_stats *stat) +{ + s16 x = 0, y = 0; + struct sita_pvt *pvt = (struct sita_pvt *)tcm->pvt; + + /* Clearing any exisiting values */ + memset(stat, 0, sizeof(*stat)); + + /* process top & bottom edges */ + for (x = area->p0.x; x <= area->p1.x; x++) { + if (area->p0.y == 0) + stat->edge++; + else if (pvt->map[x][area->p0.y - 1]) + stat->busy++; + + if (area->p1.y == tcm->height - 1) + stat->edge++; + else if (pvt->map[x][area->p1.y + 1]) + stat->busy++; + } + + /* process left & right edges */ + for (y = area->p0.y; y <= area->p1.y; ++y) { + if (area->p0.x == 0) + stat->edge++; + else if (pvt->map[area->p0.x - 1][y]) + stat->busy++; + + if (area->p1.x == tcm->width - 1) + stat->edge++; + else if (pvt->map[area->p1.x + 1][y]) + stat->busy++; + } +} diff --git a/drivers/media/video/tiler/tcm/tcm-sita.h b/drivers/media/video/tiler/tcm/tcm-sita.h new file mode 100644 index 0000000..6b604bf --- /dev/null +++ b/drivers/media/video/tiler/tcm/tcm-sita.h @@ -0,0 +1,59 @@ +/* + * tcm_sita.h + * + * SImple Tiler Allocator (SiTA) interface. + * + * Author: Ravi Ramachandra <r.ramachandra@ti.com> + * + * Copyright (C) 2009-2011 Texas Instruments, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Texas Instruments Incorporated nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TCM_SITA_H +#define TCM_SITA_H + +#include "../tcm.h" + +/** + * Create a SiTA tiler container manager. + * + * @param width Container width + * @param height Container height + * @param attr preferred division point between 64-aligned + * allocation (top left), 32-aligned allocations + * (top right), and page mode allocations (bottom) + * + * @return TCM instance + */ +struct tcm *sita_init(u16 width, u16 height, struct tcm_pt *attr); + +TCM_INIT(sita_init, struct tcm_pt); + +#endif /* TCM_SITA_H_ */ diff --git a/drivers/media/video/tiler/tcm/tcm-utils.h b/drivers/media/video/tiler/tcm/tcm-utils.h new file mode 100644 index 0000000..3fe8f7d --- /dev/null +++ b/drivers/media/video/tiler/tcm/tcm-utils.h @@ -0,0 +1,74 @@ +/* + * tcm_utils.h + * + * Utility functions for implementing TILER container managers. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2011 Texas Instruments, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Texas Instruments Incorporated nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TCM_UTILS_H +#define TCM_UTILS_H + +#include "../tcm.h" + +/* TCM_ALG_NAME must be defined to use the debug methods */ + +#ifdef DEBUG +#define IFDEBUG(x) x +#else +/* compile-check debug statements even if not DEBUG */ +#define IFDEBUG(x) do { if (0) x; } while (0) +#endif + +#define P(level, fmt, ...) \ + IFDEBUG(printk(level TCM_ALG_NAME ":%d:%s()" fmt "\n", \ + __LINE__, __func__, ##__VA_ARGS__)) + +#define P1(fmt, ...) P(KERN_NOTICE, fmt, ##__VA_ARGS__) +#define P2(fmt, ...) P(KERN_INFO, fmt, ##__VA_ARGS__) +#define P3(fmt, ...) P(KERN_DEBUG, fmt, ##__VA_ARGS__) + +#define PA(level, msg, p_area) P##level(msg " (%03d %03d)-(%03d %03d)\n", \ + (p_area)->p0.x, (p_area)->p0.y, (p_area)->p1.x, (p_area)->p1.y) + +/* assign coordinates to area */ +static inline +void assign(struct tcm_area *a, u16 x0, u16 y0, u16 x1, u16 y1) +{ + a->p0.x = x0; + a->p0.y = y0; + a->p1.x = x1; + a->p1.y = y1; +} + +#endif diff --git a/drivers/media/video/tiler/tiler-geom.c b/drivers/media/video/tiler/tiler-geom.c new file mode 100644 index 0000000..f95ae5c --- /dev/null +++ b/drivers/media/video/tiler/tiler-geom.c @@ -0,0 +1,372 @@ +/* + * tiler-geom.c + * + * TILER geometry functions for TI TILER hardware block. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <linux/module.h> +#include "_tiler.h" + +/* bits representing the same slot in DMM-TILER hw-block */ +#define SLOT_WIDTH_BITS 6 +#define SLOT_HEIGHT_BITS 6 + +/* bits reserved to describe coordinates in DMM-TILER hw-block */ +#define CONT_WIDTH_BITS 14 +#define CONT_HEIGHT_BITS 13 + +static struct tiler_geom geom[TILER_FORMATS] = { + { + .x_shft = 0, + .y_shft = 0, + }, + { + .x_shft = 0, + .y_shft = 1, + }, + { + .x_shft = 1, + .y_shft = 1, + }, + { + .x_shft = SLOT_WIDTH_BITS, + .y_shft = SLOT_HEIGHT_BITS, + }, +}; + +/* tiler space addressing bitfields */ +#define MASK_XY_FLIP (1 << 31) +#define MASK_Y_INVERT (1 << 30) +#define MASK_X_INVERT (1 << 29) +#define SHIFT_ACC_MODE 27 +#define MASK_ACC_MODE 3 + +/* calculated constants */ +#define TILER_PAGE (1 << (SLOT_WIDTH_BITS + SLOT_HEIGHT_BITS)) +#define TILER_WIDTH (1 << (CONT_WIDTH_BITS - SLOT_WIDTH_BITS)) +#define TILER_HEIGHT (1 << (CONT_HEIGHT_BITS - SLOT_HEIGHT_BITS)) + +#define VIEW_SIZE (1u << (CONT_WIDTH_BITS + CONT_HEIGHT_BITS)) +#define VIEW_MASK (VIEW_SIZE - 1u) + +#define MASK(bits) ((1 << (bits)) - 1) + +#define TILER_FMT(x) ((enum tiler_fmt) \ + ((x >> SHIFT_ACC_MODE) & MASK_ACC_MODE)) + +#define MASK_VIEW (MASK_X_INVERT | MASK_Y_INVERT | MASK_XY_FLIP) + +/* location of the various tiler views in physical address space */ +#define TILVIEW_8BIT 0x60000000u +#define TILVIEW_16BIT (TILVIEW_8BIT + VIEW_SIZE) +#define TILVIEW_32BIT (TILVIEW_16BIT + VIEW_SIZE) +#define TILVIEW_PAGE (TILVIEW_32BIT + VIEW_SIZE) +#define TILVIEW_END (TILVIEW_PAGE + VIEW_SIZE) + +/* create tsptr by adding view orientation and access mode */ +#define TIL_ADDR(x, orient, a)\ + ((u32) (x) | (orient) | ((a) << SHIFT_ACC_MODE)) + +bool is_tiler_addr(u32 phys) +{ + return phys >= TILVIEW_8BIT && phys < TILVIEW_END; +} +EXPORT_SYMBOL(is_tiler_addr); + +u32 tiler_bpp(const struct tiler_block_t *b) +{ + enum tiler_fmt fmt = tiler_fmt(b->phys); + BUG_ON(fmt == TILFMT_INVALID); + + return geom[fmt].bpp_m; +} +EXPORT_SYMBOL(tiler_bpp); + +/* return the stride of a tiler-block in tiler space */ +static inline s32 tiler_stride(u32 tsptr) +{ + enum tiler_fmt fmt = TILER_FMT(tsptr); + + if (fmt == TILFMT_PAGE) + return 0; + else if (tsptr & MASK_XY_FLIP) + return 1 << (CONT_HEIGHT_BITS + geom[fmt].x_shft); + else + return 1 << (CONT_WIDTH_BITS + geom[fmt].y_shft); +} + +u32 tiler_pstride(const struct tiler_block_t *b) +{ + enum tiler_fmt fmt = tiler_fmt(b->phys); + BUG_ON(fmt == TILFMT_INVALID); + + /* return the virtual stride for page mode */ + if (fmt == TILFMT_PAGE) + return tiler_vstride(b); + + return tiler_stride(b->phys & ~MASK_VIEW); +} +EXPORT_SYMBOL(tiler_pstride); + +enum tiler_fmt tiler_fmt(u32 phys) +{ + if (!is_tiler_addr(phys)) + return TILFMT_INVALID; + + return TILER_FMT(phys); +} +EXPORT_SYMBOL(tiler_fmt); + +/* returns the tiler geometry information for a format */ +static const struct tiler_geom *get_geom(enum tiler_fmt fmt) +{ + if (fmt >= TILFMT_MIN && fmt <= TILFMT_MAX) + return geom + fmt; + return NULL; +} + +/** + * Returns the natural x and y coordinates for a pixel in tiler space address. + * That is, the coordinates for the same pixel in the natural (non-rotated, + * non-mirrored) view. This allows to uniquely identify a tiler pixel in any + * view orientation. + */ +static void tiler_get_natural_xy(u32 tsptr, u32 *x, u32 *y) +{ + u32 x_bits, y_bits, offset; + enum tiler_fmt fmt; + + fmt = TILER_FMT(tsptr); + + x_bits = CONT_WIDTH_BITS - geom[fmt].x_shft; + y_bits = CONT_HEIGHT_BITS - geom[fmt].y_shft; + offset = (tsptr & VIEW_MASK) >> (geom[fmt].x_shft + geom[fmt].y_shft); + + /* separate coordinate bitfields based on view orientation */ + if (tsptr & MASK_XY_FLIP) { + *x = offset >> y_bits; + *y = offset & MASK(y_bits); + } else { + *x = offset & MASK(x_bits); + *y = offset >> x_bits; + } + + /* account for mirroring */ + if (tsptr & MASK_X_INVERT) + *x ^= MASK(x_bits); + if (tsptr & MASK_Y_INVERT) + *y ^= MASK(y_bits); +} + +/* calculate the tiler space address of a pixel in a view orientation */ +static u32 tiler_get_address(u32 orient, enum tiler_fmt fmt, u32 x, u32 y) +{ + u32 x_bits, y_bits, tmp, x_mask, y_mask, alignment; + + x_bits = CONT_WIDTH_BITS - geom[fmt].x_shft; + y_bits = CONT_HEIGHT_BITS - geom[fmt].y_shft; + alignment = geom[fmt].x_shft + geom[fmt].y_shft; + + /* validate coordinate */ + x_mask = MASK(x_bits); + y_mask = MASK(y_bits); + if (x < 0 || x > x_mask || y < 0 || y > y_mask) + return 0; + + /* account for mirroring */ + if (orient & MASK_X_INVERT) + x ^= x_mask; + if (orient & MASK_Y_INVERT) + y ^= y_mask; + + /* get coordinate address */ + if (orient & MASK_XY_FLIP) + tmp = ((x << y_bits) + y); + else + tmp = ((y << x_bits) + x); + + return TIL_ADDR((tmp << alignment), orient, fmt); +} + +void tilview_create(struct tiler_view_t *view, u32 phys, u32 width, u32 height) +{ + BUG_ON(!is_tiler_addr(phys)); + + view->tsptr = phys & ~MASK_VIEW; + view->bpp = geom[TILER_FMT(phys)].bpp_m; + view->width = width; + view->height = height; + view->h_inc = view->bpp; + view->v_inc = tiler_stride(view->tsptr); +} +EXPORT_SYMBOL(tilview_create); + +void tilview_get(struct tiler_view_t *view, struct tiler_block_t *blk) +{ + view->tsptr = blk->phys & ~MASK_VIEW; + view->bpp = tiler_bpp(blk); + view->width = blk->width; + view->height = blk->height; + view->h_inc = view->bpp; + view->v_inc = tiler_stride(view->tsptr); +} +EXPORT_SYMBOL(tilview_get); + +s32 tilview_crop(struct tiler_view_t *view, u32 left, u32 top, u32 width, + u32 height) +{ + /* check for valid crop */ + if (left + width < left || left + width > view->width || + top + height < top || top + height > view->height) + return -EINVAL; + + view->tsptr += left * view->h_inc + top * view->v_inc; + view->width = width; + view->height = height; + return 0; +} +EXPORT_SYMBOL(tilview_crop); + +/* calculate tilerspace address and stride after view orientation change */ +static void reorient(struct tiler_view_t *view, u32 orient) +{ + u32 x, y; + + tiler_get_natural_xy(view->tsptr, &x, &y); + view->tsptr = tiler_get_address(orient, + TILER_FMT(view->tsptr), x, y); + view->v_inc = tiler_stride(view->tsptr); +} + +s32 tilview_rotate(struct tiler_view_t *view, s32 rotation) +{ + u32 orient; + + if (rotation % 90) + return -EINVAL; + + /* normalize rotation to quarters */ + rotation = (rotation / 90) & 3; + if (!rotation) + return 0; /* nothing to do */ + + /* PAGE mode view cannot be rotated */ + if (TILER_FMT(view->tsptr) == TILFMT_PAGE) + return -EPERM; + + /* + * first adjust top-left corner. NOTE: it rotates counter-clockwise: + * 0 < 3 + * v ^ + * 1 > 2 + */ + if (rotation < 3) + view->tsptr += (view->height - 1) * view->v_inc; + if (rotation > 1) + view->tsptr += (view->width - 1) * view->h_inc; + + /* then rotate view itself */ + orient = view->tsptr & MASK_VIEW; + + /* rotate first 2 quarters */ + if (rotation & 2) { + orient ^= MASK_X_INVERT; + orient ^= MASK_Y_INVERT; + } + + /* rotate last quarter */ + if (rotation & 1) { + orient ^= (orient & MASK_XY_FLIP) ? + MASK_X_INVERT : MASK_Y_INVERT; + + /* swap x & y */ + orient ^= MASK_XY_FLIP; + swap(view->height, view->width); + } + + /* finally reorient view */ + reorient(view, orient); + return 0; +} +EXPORT_SYMBOL(tilview_rotate); + +s32 tilview_flip(struct tiler_view_t *view, bool flip_x, bool flip_y) +{ + u32 orient; + orient = view->tsptr & MASK_VIEW; + + if (!flip_x && !flip_y) + return 0; /* nothing to do */ + + /* PAGE mode view cannot be flipped */ + if (TILER_FMT(view->tsptr) == TILFMT_PAGE) + return -EPERM; + + /* adjust top-left corner */ + if (flip_x) + view->tsptr += (view->width - 1) * view->h_inc; + if (flip_y) + view->tsptr += (view->height - 1) * view->v_inc; + + /* flip view orientation */ + if (orient & MASK_XY_FLIP) + swap(flip_x, flip_y); + + if (flip_x) + orient ^= MASK_X_INVERT; + if (flip_y) + orient ^= MASK_Y_INVERT; + + /* finally reorient view */ + reorient(view, orient); + return 0; +} +EXPORT_SYMBOL(tilview_flip); + +/* return the alias address for a coordinate */ +static inline u32 alias_address(enum tiler_fmt fmt, u32 x, u32 y) +{ + return tiler_get_address(0, fmt, x, y) + TILVIEW_8BIT; +} + +/* get the coordinates for an alias address */ +static inline void alias_xy(u32 ssptr, u32 *x, u32 *y) +{ + tiler_get_natural_xy(ssptr & ~MASK_VIEW, x, y); +} + +/* initialize shared geometric data */ +void tiler_geom_init(struct tiler_ops *tiler) +{ + struct tiler_geom *g; + + tiler->xy = alias_xy; + tiler->addr = alias_address; + tiler->geom = get_geom; + + tiler->page = TILER_PAGE; + tiler->width = TILER_WIDTH; + tiler->height = TILER_HEIGHT; + + /* calculate geometry */ + for (g = geom; g < geom + TILER_FORMATS; g++) { + g->bpp_m = g->bpp = 1 << (g->x_shft + g->y_shft); + g->slot_w = 1 << (SLOT_WIDTH_BITS - g->x_shft); + g->slot_h = 1 << (SLOT_HEIGHT_BITS - g->y_shft); + } + + /* set bpp_m = 1 for page mode as most applications deal in byte data */ + geom[TILFMT_PAGE].bpp_m = 1; +} diff --git a/drivers/media/video/tiler/tiler-iface.c b/drivers/media/video/tiler/tiler-iface.c new file mode 100644 index 0000000..b7d84d5 --- /dev/null +++ b/drivers/media/video/tiler/tiler-iface.c @@ -0,0 +1,340 @@ +/* + * tiler-iface.c + * + * TILER driver interace functions for TI TILER hardware block. + * + * Authors: Lajos Molnar <molnar@ti.com> + * David Sin <davidsin@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/fs.h> /* fops */ +#include <linux/uaccess.h> /* copy_to_user */ +#include <linux/slab.h> /* kmalloc */ +#include <linux/sched.h> /* current */ +#include <linux/mm.h> +#include <linux/mm_types.h> +#include <asm/mach/map.h> /* for ioremap_page */ + +#include "_tiler.h" + +static bool security = CONFIG_TILER_SECURITY; + +module_param(security, bool, 0644); +MODULE_PARM_DESC(security, + "Separate allocations by different processes into different pages"); + +static struct list_head procs; /* list of process info structs */ +static struct tiler_ops *ops; /* shared methods and variables */ + +/* + * process_info handling methods + * ========================================================================== + */ + +/* get process info, and increment refs for device tracking */ +struct process_info *__get_pi(pid_t pid, bool kernel) +{ + struct process_info *pi; + + /* + * treat all processes as the same, kernel processes are still treated + * differently so not to free kernel allocated areas when a user process + * closes the tiler driver + */ + if (!security) + pid = 0; + + /* find process context */ + mutex_lock(&ops->mtx); + list_for_each_entry(pi, &procs, list) { + if (pi->pid == pid && pi->kernel == kernel) + goto done; + } + + /* create process context */ + pi = kmalloc(sizeof(*pi), GFP_KERNEL); + if (!pi) + goto done; + memset(pi, 0, sizeof(*pi)); + + pi->pid = pid; + pi->kernel = kernel; + INIT_LIST_HEAD(&pi->groups); + INIT_LIST_HEAD(&pi->bufs); + list_add(&pi->list, &procs); +done: + /* increment reference count */ + if (pi && !kernel) + pi->refs++; + mutex_unlock(&ops->mtx); + return pi; +} + +/** + * Free all info kept by a process: all registered buffers, allocated blocks, + * and unreferenced blocks. Any blocks/areas still referenced will move to the + * orphaned lists to avoid issues if a new process is created with the same pid. + * + * caller MUST already have mtx + */ +void _m_free_process_info(struct process_info *pi) +{ + struct gid_info *gi, *gi_; +#ifdef CONFIG_TILER_ENABLE_USERSPACE + struct __buf_info *_b = NULL, *_b_ = NULL; + + if (!list_empty(&pi->bufs)) + tiler_notify_event(TILER_DEVICE_CLOSE, NULL); + + /* unregister all buffers */ + list_for_each_entry_safe(_b, _b_, &pi->bufs, by_pid) + _m_unregister_buf(_b); +#endif + BUG_ON(!list_empty(&pi->bufs)); + + /* free all allocated blocks, and remove unreferenced ones */ + list_for_each_entry_safe(gi, gi_, &pi->groups, by_pid) + ops->destroy_group(gi); + + BUG_ON(!list_empty(&pi->groups)); + list_del(&pi->list); + kfree(pi); +} + +static void destroy_processes(void) +{ + struct process_info *pi, *pi_; + + mutex_lock(&ops->mtx); + + list_for_each_entry_safe(pi, pi_, &procs, list) + _m_free_process_info(pi); + BUG_ON(!list_empty(&procs)); + + mutex_unlock(&ops->mtx); +} + + +/* initialize tiler interface */ +void tiler_iface_init(struct tiler_ops *tiler) +{ + ops = tiler; + ops->cleanup = destroy_processes; + +#ifdef CONFIG_TILER_SECURE + security = true; +#endif + INIT_LIST_HEAD(&procs); +} + +/* + * Kernel APIs + * ========================================================================== + */ + +u32 tiler_virt2phys(u32 usr) +{ + pmd_t *pmd; + pte_t *ptep; + pgd_t *pgd = pgd_offset(current->mm, usr); + + if (pgd_none(*pgd) || pgd_bad(*pgd)) + return 0; + + pmd = pmd_offset(pgd, usr); + if (pmd_none(*pmd) || pmd_bad(*pmd)) + return 0; + + ptep = pte_offset_map(pmd, usr); + if (ptep && pte_present(*ptep)) + return (*ptep & PAGE_MASK) | (~PAGE_MASK & usr); + + return 0; +} +EXPORT_SYMBOL(tiler_virt2phys); + +void tiler_reservex(u32 n, enum tiler_fmt fmt, u32 width, u32 height, + u32 gid, pid_t pid) +{ + struct process_info *pi = __get_pi(pid, true); + + if (pi) + ops->reserve(n, fmt, width, height, gid, pi); +} +EXPORT_SYMBOL(tiler_reservex); + +void tiler_reserve(u32 n, enum tiler_fmt fmt, u32 width, u32 height) +{ + tiler_reservex(n, fmt, width, height, 0, current->tgid); +} +EXPORT_SYMBOL(tiler_reserve); + +#ifdef CONFIG_TILER_ENABLE_NV12 +void tiler_reservex_nv12(u32 n, u32 width, u32 height, + u32 gid, pid_t pid) +{ + struct process_info *pi = __get_pi(pid, true); + + if (pi) + ops->reserve_nv12(n, width, height, gid, pi); +} +EXPORT_SYMBOL(tiler_reservex_nv12); + +void tiler_reserve_nv12(u32 n, u32 width, u32 height) +{ + tiler_reservex_nv12(n, width, height, 0, current->tgid); +} +EXPORT_SYMBOL(tiler_reserve_nv12); +#endif + +s32 tiler_allocx(struct tiler_block_t *blk, enum tiler_fmt fmt, + u32 gid, pid_t pid) +{ + struct mem_info *mi; + struct process_info *pi; + s32 res; + + BUG_ON(!blk || blk->phys); + + pi = __get_pi(pid, true); + if (!pi) + return -ENOMEM; + + res = ops->alloc(fmt, blk->width, blk->height, blk->key, gid, pi, &mi); + if (mi) { + blk->phys = mi->blk.phys; + blk->id = mi->blk.id; + } + return res; +} +EXPORT_SYMBOL(tiler_allocx); + +s32 tiler_alloc(struct tiler_block_t *blk, enum tiler_fmt fmt) +{ + return tiler_allocx(blk, fmt, 0, current->tgid); +} +EXPORT_SYMBOL(tiler_alloc); + +s32 tiler_mapx(struct tiler_block_t *blk, enum tiler_fmt fmt, u32 gid, + pid_t pid, u32 usr_addr) +{ + struct mem_info *mi; + struct process_info *pi; + s32 res; + + BUG_ON(!blk || blk->phys); + + pi = __get_pi(pid, true); + if (!pi) + return -ENOMEM; + + res = ops->pin(fmt, blk->width, blk->height, blk->key, gid, pi, &mi, + usr_addr); + if (mi) { + blk->phys = mi->blk.phys; + blk->id = mi->blk.id; + } + return res; + +} +EXPORT_SYMBOL(tiler_mapx); + +s32 tiler_map(struct tiler_block_t *blk, enum tiler_fmt fmt, u32 usr_addr) +{ + return tiler_mapx(blk, fmt, 0, current->tgid, usr_addr); +} +EXPORT_SYMBOL(tiler_map); + +s32 tiler_mmap_blk(struct tiler_block_t *blk, u32 offs, u32 size, + struct vm_area_struct *vma, u32 voffs) +{ + u32 v, p, len; + + /* mapping must fit into vma */ + BUG_ON(vma->vm_start > vma->vm_start + voffs || + vma->vm_start + voffs > vma->vm_start + voffs + size || + vma->vm_start + voffs + size > vma->vm_end); + + /* mapping must fit into block */ + BUG_ON(offs > offs + size || offs + size > tiler_size(blk)); + + v = tiler_vstride(blk); + p = tiler_pstride(blk); + + /* remap block portion */ + len = v - (offs % v); /* initial area to map */ + while (size) { + /* restrict to size still needs mapping */ + if (len > size) + len = size; + + vma->vm_pgoff = (blk->phys + offs) >> PAGE_SHIFT; + if (remap_pfn_range(vma, vma->vm_start + voffs, vma->vm_pgoff, + len, vma->vm_page_prot)) + return -EAGAIN; + voffs += len; + offs += len + p - v; + size -= len; + len = v; /* subsequent area to map */ + } + return 0; +} +EXPORT_SYMBOL(tiler_mmap_blk); + +s32 tiler_ioremap_blk(struct tiler_block_t *blk, u32 offs, u32 size, + u32 addr, u32 mtype) +{ + u32 v, p; + u32 len; /* area to map */ + const struct mem_type *type = get_mem_type(mtype); + + /* mapping must fit into address space */ + BUG_ON(addr > addr + size); + + /* mapping must fit into block */ + BUG_ON(offs > offs + size || offs + size > tiler_size(blk)); + + v = tiler_vstride(blk); + p = tiler_pstride(blk); + + /* move offset and address to end */ + offs += blk->phys + size; + addr += size; + + len = v - (offs % v); /* initial area to map */ + while (size) { + while (len && size) { + if (ioremap_page(addr - size, offs - size, type)) + return -EAGAIN; + len -= PAGE_SIZE; + size -= PAGE_SIZE; + } + + offs += p - v; + len = v; /* subsequent area to map */ + } + return 0; +} +EXPORT_SYMBOL(tiler_ioremap_blk); + +void tiler_free(struct tiler_block_t *blk) +{ + /* find block */ + struct mem_info *mi = ops->lock(blk->key, blk->id, NULL); + if (mi) + ops->unlock_free(mi, true); + blk->phys = blk->id = 0; +} +EXPORT_SYMBOL(tiler_free); diff --git a/drivers/media/video/tiler/tiler-ioctl.c b/drivers/media/video/tiler/tiler-ioctl.c new file mode 100644 index 0000000..b54c39f --- /dev/null +++ b/drivers/media/video/tiler/tiler-ioctl.c @@ -0,0 +1,529 @@ +/* + * tiler-ioctl.c + * + * TILER driver userspace interface functions for TI TILER hardware block. + * + * Authors: Lajos Molnar <molnar@ti.com> + * David Sin <davidsin@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <linux/module.h> +#include <linux/mutex.h> +#include <linux/fs.h> /* fops */ +#include <linux/uaccess.h> /* copy_to_user */ +#include <linux/slab.h> /* kmalloc */ +#include <linux/sched.h> /* current */ +#include <linux/mm.h> +#include <linux/mm_types.h> +#include <asm/mach/map.h> /* for ioremap_page */ + +#include "_tiler.h" + +static bool ssptr_lookup = true; +static bool offset_lookup = true; + +module_param(ssptr_lookup, bool, 0644); +MODULE_PARM_DESC(ssptr_lookup, + "Allow looking up a block by ssptr - This is a security risk"); +module_param(offset_lookup, bool, 0644); +MODULE_PARM_DESC(offset_lookup, + "Allow looking up a buffer by offset - This is a security risk"); + +static struct tiler_ops *ops; /* shared methods and variables */ +static struct blocking_notifier_head notifier; /* notifier for events */ + +/* + * Event notification methods + * ========================================================================== + */ + +s32 tiler_notify_event(int event, void *data) +{ + return blocking_notifier_call_chain(¬ifier, event, data); +} + +/* + * Buffer handling methods + * ========================================================================== + */ + +/* check if an offset is used */ +static bool _m_offs_in_use(u32 offs, u32 length, struct process_info *pi) +{ + struct __buf_info *_b; + /* have mutex */ + list_for_each_entry(_b, &pi->bufs, by_pid) + if (_b->buf_info.offset < offs + length && + _b->buf_info.offset + _b->buf_info.length > offs) + return 1; + return 0; +} + +/* get an offset */ +static u32 _m_get_offs(struct process_info *pi, u32 length) +{ + static u32 offs = 0xda7a; + + /* ensure no-one is using this offset */ + while ((offs << PAGE_SHIFT) + length < length || + _m_offs_in_use(offs << PAGE_SHIFT, length, pi)) { + /* use a pseudo-random generator to get a new offset to try */ + + /* Galois LSF: 20, 17 */ + offs = (offs >> 1) ^ (u32)((0 - (offs & 1u)) & 0x90000); + } + + return offs << PAGE_SHIFT; +} + +/* find and lock a block. process_info is optional */ +static struct mem_info * +_m_lock_block(u32 key, u32 id, struct process_info *pi) { + struct gid_info *gi; + struct mem_info *mi; + + /* if process_info is given, look there first */ + if (pi) { + /* have mutex */ + + /* find block in process list and free it */ + list_for_each_entry(gi, &pi->groups, by_pid) { + mi = ops->lock(key, id, gi); + if (mi) + return mi; + } + } + + /* if not found or no process_info given, find block in global list */ + return ops->lock(key, id, NULL); +} + +/* register a buffer */ +static s32 _m_register_buf(struct __buf_info *_b, struct process_info *pi) +{ + struct mem_info *mi; + struct tiler_buf_info *b = &_b->buf_info; + u32 i, num = b->num_blocks, offs; + + /* check validity */ + if (num > TILER_MAX_NUM_BLOCKS || num == 0) + return -EINVAL; + + /* find each block */ + b->length = 0; + for (i = 0; i < num; i++) { + mi = _m_lock_block(b->blocks[i].key, b->blocks[i].id, pi); + if (!mi) { + /* unlock any blocks already found */ + while (i--) + ops->unlock_free(_b->mi[i], false); + return -EACCES; + } + _b->mi[i] = mi; + + /* we don't keep track of ptr and 1D stride so clear them */ + b->blocks[i].ptr = NULL; + b->blocks[i].stride = 0; + + ops->describe(mi, b->blocks + i); + b->length += tiler_size(&mi->blk); + } + + /* if found all, register buffer */ + offs = _b->mi[0]->blk.phys & ~PAGE_MASK; + b->offset = _m_get_offs(pi, b->length) + offs; + b->length -= offs; + + /* have mutex */ + list_add(&_b->by_pid, &pi->bufs); + + return 0; +} + +/* unregister a buffer */ +void _m_unregister_buf(struct __buf_info *_b) +{ + u32 i; + + /* unregister */ + list_del(&_b->by_pid); + + /* no longer using the blocks */ + for (i = 0; i < _b->buf_info.num_blocks; i++) + ops->unlock_free(_b->mi[i], false); + + kfree(_b); +} + + +/* + * File operations (mmap, ioctl, open, close) + * ========================================================================== + */ + +/* mmap tiler buffer into user's virtual space */ +static s32 tiler_mmap(struct file *filp, struct vm_area_struct *vma) +{ + struct __buf_info *_b; + struct tiler_buf_info *b = NULL; + u32 i, map_offs, map_size, blk_offs, blk_size, mapped_size; + struct process_info *pi = filp->private_data; + u32 offs = vma->vm_pgoff << PAGE_SHIFT; + u32 size = vma->vm_end - vma->vm_start; + + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); + + /* find tiler buffer to mmap */ + mutex_lock(&ops->mtx); + list_for_each_entry(_b, &pi->bufs, by_pid) { + /* we support partial mmaping of a whole tiler buffer */ + if (offs >= (_b->buf_info.offset & PAGE_MASK) && + offs + size <= PAGE_ALIGN(_b->buf_info.offset + + _b->buf_info.length)) { + b = &_b->buf_info; + break; + } + } + mutex_unlock(&ops->mtx); + + /* we use b to detect if we found the bufffer */ + if (!b) + return -ENXIO; + + /* mmap relevant blocks */ + blk_offs = _b->buf_info.offset; + + /* start at the beginning of the region */ + mapped_size = 0; + for (i = 0; i < b->num_blocks; i++, blk_offs += blk_size) { + blk_size = tiler_size(&_b->mi[i]->blk); + /* see if tiler block is inside the requested region */ + if (offs >= blk_offs + blk_size || offs + size < blk_offs) + continue; + /* get the offset and map size for this particular block */ + map_offs = max(offs, blk_offs) - blk_offs; + map_size = min(size - mapped_size, blk_size); + + /* mmap block */ + if (tiler_mmap_blk(&_b->mi[i]->blk, map_offs, map_size, vma, + mapped_size)) + return -EAGAIN; + + /* update mmap region pointer */ + mapped_size += map_size; + } + return 0; +} + +/* ioctl handler */ +static long tiler_ioctl(struct file *filp, u32 cmd, unsigned long arg) +{ + s32 r; + void __user *data = (void __user *)arg; + struct process_info *pi = filp->private_data; + struct __buf_info *_b; + struct tiler_buf_info buf_info = {0}; + struct tiler_block_info block_info = {0}; + struct mem_info *mi; + u32 phys_addr; + + switch (cmd) { + /* allocate block */ + case TILIOC_GBLK: + if (copy_from_user(&block_info, data, sizeof(block_info))) + return -EFAULT; + + switch (block_info.fmt) { + case TILFMT_PAGE: + r = ops->alloc(block_info.fmt, block_info.dim.len, 1, + block_info.key, block_info.group_id, + pi, &mi); + break; + case TILFMT_8BIT: + case TILFMT_16BIT: + case TILFMT_32BIT: + r = ops->alloc(block_info.fmt, + block_info.dim.area.width, + block_info.dim.area.height, + block_info.key, block_info.group_id, + pi, &mi); + break; + default: + return -EINVAL; + } + if (r) + return r; + + /* fill out block info */ + if (mi) { + block_info.ptr = NULL; + ops->describe(mi, &block_info); + } + + if (copy_to_user(data, &block_info, sizeof(block_info))) + return -EFAULT; + break; + /* free/unmap block */ + case TILIOC_FBLK: + case TILIOC_UMBLK: + if (copy_from_user(&block_info, data, sizeof(block_info))) + return -EFAULT; + + /* search current process first, then all processes */ + mutex_lock(&ops->mtx); + mi = _m_lock_block(block_info.key, block_info.id, pi); + mutex_unlock(&ops->mtx); + if (mi) + ops->unlock_free(mi, true); + + /* free always succeeds */ + break; + /* get physical address */ + case TILIOC_GSSP: + down_read(¤t->mm->mmap_sem); + phys_addr = tiler_virt2phys(arg); + up_read(¤t->mm->mmap_sem); + return phys_addr; + break; + /* map block */ + case TILIOC_MBLK: + if (copy_from_user(&block_info, data, sizeof(block_info))) + return -EFAULT; + + if (!block_info.ptr) + return -EFAULT; + + r = ops->pin(block_info.fmt, block_info.dim.len, 1, + block_info.key, block_info.group_id, pi, + &mi, (u32)block_info.ptr); + if (r) + return r; + + /* fill out block info */ + if (mi) + ops->describe(mi, &block_info); + + if (copy_to_user(data, &block_info, sizeof(block_info))) + return -EFAULT; + break; +#ifndef CONFIG_TILER_SECURE + /* query buffer information by offset */ + case TILIOC_QBUF: + if (!offset_lookup) + return -EPERM; + + if (copy_from_user(&buf_info, data, sizeof(buf_info))) + return -EFAULT; + + /* find buffer */ + mutex_lock(&ops->mtx); + r = -ENOENT; + /* buffer registration is per process */ + list_for_each_entry(_b, &pi->bufs, by_pid) { + if (buf_info.offset == _b->buf_info.offset) { + memcpy(&buf_info, &_b->buf_info, + sizeof(buf_info)); + r = 0; + break; + } + } + mutex_unlock(&ops->mtx); + + if (r) + return r; + + if (copy_to_user(data, &_b->buf_info, sizeof(_b->buf_info))) + return -EFAULT; + break; +#endif + /* register buffer */ + case TILIOC_RBUF: + /* save buffer information */ + _b = kmalloc(sizeof(*_b), GFP_KERNEL); + if (!_b) + return -ENOMEM; + memset(_b, 0, sizeof(*_b)); + + if (copy_from_user(&_b->buf_info, data, sizeof(_b->buf_info))) { + kfree(_b); + return -EFAULT; + } + + mutex_lock(&ops->mtx); + r = _m_register_buf(_b, pi); + mutex_unlock(&ops->mtx); + + if (r) { + kfree(_b); + return -EACCES; + } + + /* undo registration on failure */ + if (copy_to_user(data, &_b->buf_info, sizeof(_b->buf_info))) { + mutex_lock(&ops->mtx); + _m_unregister_buf(_b); + mutex_unlock(&ops->mtx); + return -EFAULT; + } + break; + /* unregister a buffer */ + case TILIOC_URBUF: + if (copy_from_user(&buf_info, data, sizeof(buf_info))) + return -EFAULT; + + /* find buffer */ + r = -EFAULT; + mutex_lock(&ops->mtx); + /* buffer registration is per process */ + list_for_each_entry(_b, &pi->bufs, by_pid) { + if (buf_info.offset == _b->buf_info.offset) { + /* only retrieve buffer length */ + buf_info.length = _b->buf_info.length; + _m_unregister_buf(_b); + r = 0; + break; + } + } + mutex_unlock(&ops->mtx); + + if (r) + return r; + + if (copy_to_user(data, &buf_info, sizeof(buf_info))) + return -EFAULT; + break; + /* prereserv blocks */ + case TILIOC_PRBLK: + if (copy_from_user(&block_info, data, sizeof(block_info))) + return -EFAULT; + + if (block_info.fmt == TILFMT_8AND16) +#ifdef CONFIG_TILER_ENABLE_NV12 + ops->reserve_nv12(block_info.key, + block_info.dim.area.width, + block_info.dim.area.height, + block_info.group_id, pi); +#else + return -EINVAL; +#endif + else + ops->reserve(block_info.key, + block_info.fmt, + block_info.dim.area.width, + block_info.dim.area.height, + block_info.group_id, pi); + break; + /* unreserve blocks */ + case TILIOC_URBLK: + ops->unreserve(arg, pi); + break; + /* query a tiler block */ + case TILIOC_QBLK: + if (copy_from_user(&block_info, data, sizeof(block_info))) + return -EFAULT; + + if (block_info.id) { + /* look up by id if specified */ + mutex_lock(&ops->mtx); + mi = _m_lock_block(block_info.key, block_info.id, pi); + mutex_unlock(&ops->mtx); + } else +#ifndef CONFIG_TILER_SECURE + if (ssptr_lookup) { + /* otherwise, look up by ssptr if allowed */ + mi = ops->lock_by_ssptr(block_info.ssptr); + } else +#endif + return -EPERM; + + if (!mi) + return -EFAULT; + + /* we don't keep track of ptr and 1D stride so clear them */ + block_info.ptr = NULL; + block_info.stride = 0; + + ops->describe(mi, &block_info); + ops->unlock_free(mi, false); + + if (copy_to_user(data, &block_info, sizeof(block_info))) + return -EFAULT; + break; + default: + return -EINVAL; + } + return 0; +} + +/* open tiler driver */ +static s32 tiler_open(struct inode *ip, struct file *filp) +{ + struct process_info *pi = __get_pi(current->tgid, false); + if (!pi) + return -ENOMEM; + + filp->private_data = pi; + return 0; +} + +/* close tiler driver */ +static s32 tiler_release(struct inode *ip, struct file *filp) +{ + struct process_info *pi = filp->private_data; + + mutex_lock(&ops->mtx); + /* free resources if last device in this process */ + if (0 == --pi->refs) + _m_free_process_info(pi); + + mutex_unlock(&ops->mtx); + + return 0; +} + +/* tiler driver file operations */ +static const struct file_operations tiler_fops = { + .open = tiler_open, + .unlocked_ioctl = tiler_ioctl, + .release = tiler_release, + .mmap = tiler_mmap, +}; + + +void tiler_ioctl_init(struct tiler_ops *tiler) +{ + ops = tiler; + ops->fops = &tiler_fops; + +#ifdef CONFIG_TILER_SECURE + offset_lookup = ssptr_lookup = false; +#endif + BLOCKING_INIT_NOTIFIER_HEAD(¬ifier); +} + + +s32 tiler_reg_notifier(struct notifier_block *nb) +{ + if (!nb) + return -EINVAL; + return blocking_notifier_chain_register(¬ifier, nb); +} +EXPORT_SYMBOL(tiler_reg_notifier); + +s32 tiler_unreg_notifier(struct notifier_block *nb) +{ + if (!nb) + return -EINVAL; + return blocking_notifier_chain_unregister(¬ifier, nb); +} +EXPORT_SYMBOL(tiler_unreg_notifier); diff --git a/drivers/media/video/tiler/tiler-main.c b/drivers/media/video/tiler/tiler-main.c new file mode 100644 index 0000000..34bb1e4 --- /dev/null +++ b/drivers/media/video/tiler/tiler-main.c @@ -0,0 +1,1772 @@ +/* + * tiler-main.c + * + * TILER driver main support functions for TI TILER hardware block. + * + * Authors: Lajos Molnar <molnar@ti.com> + * David Sin <davidsin@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/cdev.h> /* struct cdev */ +#include <linux/kdev_t.h> /* MKDEV() */ +#include <linux/fs.h> /* register_chrdev_region() */ +#include <linux/device.h> /* struct class */ +#include <linux/platform_device.h> /* platform_device() */ +#include <linux/err.h> /* IS_ERR() */ +#include <linux/errno.h> +#include <linux/mutex.h> +#include <linux/dma-mapping.h> /* dma_alloc_coherent */ +#include <linux/pagemap.h> /* page_cache_release() */ +#include <linux/slab.h> +#include <linux/sched.h> +#include <linux/seq_file.h> +#include <linux/debugfs.h> + +#include <mach/dmm.h> +#include "tmm.h" +#include "_tiler.h" +#include "tcm/tcm-sita.h" /* TCM algorithm */ + +static bool ssptr_id = CONFIG_TILER_SSPTR_ID; +static uint granularity = CONFIG_TILER_GRANULARITY; +static uint tiler_alloc_debug; + +/* + * We can only change ssptr_id if there are no blocks allocated, so that + * pseudo-random ids and ssptrs do not potentially clash. For now make it + * read-only. + */ +module_param(ssptr_id, bool, 0444); +MODULE_PARM_DESC(ssptr_id, "Use ssptr as block ID"); +module_param_named(grain, granularity, uint, 0644); +MODULE_PARM_DESC(grain, "Granularity (bytes)"); +module_param_named(alloc_debug, tiler_alloc_debug, uint, 0644); +MODULE_PARM_DESC(alloc_debug, "Allocation debug flag"); + +struct tiler_dev { + struct cdev cdev; +}; +static struct dentry *dbgfs; +static struct dentry *dbg_map; + +static struct tiler_ops tiler; /* shared methods and variables */ + +static struct list_head blocks; /* all tiler blocks */ +static struct list_head orphan_areas; /* orphaned 2D areas */ +static struct list_head orphan_onedim; /* orphaned 1D areas */ + +static s32 tiler_major; +static s32 tiler_minor; +static struct tiler_dev *tiler_device; +static struct class *tilerdev_class; +static struct mutex mtx; +static struct tcm *tcm[TILER_FORMATS]; +static struct tmm *tmm[TILER_FORMATS]; +static u32 *dmac_va; +static dma_addr_t dmac_pa; +static DEFINE_MUTEX(dmac_mtx); + +/* + * TMM connectors + * ========================================================================== + */ +/* wrapper around tmm_pin */ +static s32 pin_mem_to_area(struct tmm *tmm, struct tcm_area *area, u32 *ptr) +{ + s32 res = 0; + struct pat_area p_area = {0}; + struct tcm_area slice, area_s; + + /* Ensure the data reaches to main memory before PAT refill */ + wmb(); + + mutex_lock(&dmac_mtx); + tcm_for_each_slice(slice, *area, area_s) { + p_area.x0 = slice.p0.x; + p_area.y0 = slice.p0.y; + p_area.x1 = slice.p1.x; + p_area.y1 = slice.p1.y; + + memcpy(dmac_va, ptr, sizeof(*ptr) * tcm_sizeof(slice)); + ptr += tcm_sizeof(slice); + + /* pin memory into DMM */ + if (tmm_pin(tmm, p_area, dmac_pa)) { + res = -EFAULT; + break; + } + } + mutex_unlock(&dmac_mtx); + + return res; +} + +/* wrapper around tmm_unpin */ +static void unpin_mem_from_area(struct tmm *tmm, struct tcm_area *area) +{ + struct pat_area p_area = {0}; + struct tcm_area slice, area_s; + + mutex_lock(&dmac_mtx); + tcm_for_each_slice(slice, *area, area_s) { + p_area.x0 = slice.p0.x; + p_area.y0 = slice.p0.y; + p_area.x1 = slice.p1.x; + p_area.y1 = slice.p1.y; + + tmm_unpin(tmm, p_area); + } + mutex_unlock(&dmac_mtx); +} + +/* + * ID handling methods + * ========================================================================== + */ + +/* check if an id is used */ +static bool _m_id_in_use(u32 id) +{ + struct mem_info *mi; + list_for_each_entry(mi, &blocks, global) + if (mi->blk.id == id) + return 1; + return 0; +} + +/* get an id */ +static u32 _m_get_id(void) +{ + static u32 id = 0x2d7ae; + + /* ensure noone is using this id */ + while (_m_id_in_use(id)) { + /* generate a new pseudo-random ID */ + + /* Galois LSFR: 32, 22, 2, 1 */ + id = (id >> 1) ^ (u32)((0 - (id & 1u)) & 0x80200003u); + } + + return id; +} + +/* + * Debugfs support + * ========================================================================== + */ +struct tiler_debugfs_data { + char name[17]; + void (*func)(struct seq_file *, u32 arg); + u32 arg; +}; + +static void fill_map(char **map, int xdiv, int ydiv, struct tcm_area *a, + char c, bool ovw) +{ + int x, y; + for (y = a->p0.y / ydiv; y <= a->p1.y / ydiv; y++) + for (x = a->p0.x / xdiv; x <= a->p1.x / xdiv; x++) + if (map[y][x] == ' ' || ovw) + map[y][x] = c; +} + +static void fill_map_pt(char **map, int xdiv, int ydiv, struct tcm_pt *p, + char c) +{ + map[p->y / ydiv][p->x / xdiv] = c; +} + +static char read_map_pt(char **map, int xdiv, int ydiv, struct tcm_pt *p) +{ + return map[p->y / ydiv][p->x / xdiv]; +} + +static int map_width(int xdiv, int x0, int x1) +{ + return (x1 / xdiv) - (x0 / xdiv) + 1; +} + +static void text_map(char **map, int xdiv, char *nice, int yd, int x0, int x1) +{ + char *p = map[yd] + (x0 / xdiv); + int w = (map_width(xdiv, x0, x1) - strlen(nice)) / 2; + if (w >= 0) { + p += w; + while (*nice) + *p++ = *nice++; + } +} + +static void map_1d_info(char **map, int xdiv, int ydiv, char *nice, + struct tcm_area *a) +{ + sprintf(nice, "%dK", tcm_sizeof(*a) * 4); + if (a->p0.y + 1 < a->p1.y) { + text_map(map, xdiv, nice, (a->p0.y + a->p1.y) / 2 / ydiv, 0, + tiler.width - 1); + } else if (a->p0.y < a->p1.y) { + if (strlen(nice) < map_width(xdiv, a->p0.x, tiler.width - 1)) + text_map(map, xdiv, nice, a->p0.y / ydiv, + a->p0.x + xdiv, tiler.width - 1); + else if (strlen(nice) < map_width(xdiv, 0, a->p1.x)) + text_map(map, xdiv, nice, a->p1.y / ydiv, + 0, a->p1.y - xdiv); + } else if (strlen(nice) + 1 < map_width(xdiv, a->p0.x, a->p1.x)) { + text_map(map, xdiv, nice, a->p0.y / ydiv, a->p0.x, a->p1.x); + } +} + +static void map_2d_info(char **map, int xdiv, int ydiv, char *nice, + struct tcm_area *a) +{ + sprintf(nice, "(%d*%d)", tcm_awidth(*a), tcm_aheight(*a)); + if (strlen(nice) + 1 < map_width(xdiv, a->p0.x, a->p1.x)) + text_map(map, xdiv, nice, (a->p0.y + a->p1.y) / 2 / ydiv, + a->p0.x, a->p1.x); +} + +static void debug_allocation_map(struct seq_file *s, u32 arg) +{ + int xdiv = (arg >> 8) & 0xFF; + int ydiv = arg & 0xFF; + int i; + char **map, *global_map; + struct area_info *ai; + struct mem_info *mi; + struct tcm_area a, p; + static char *m2d = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"; + static char *a2d = ".,:;'\"`~!^-+"; + char *m2dp = m2d, *a2dp = a2d; + char nice[128]; + + /* allocate map */ + map = kzalloc(tiler.height / ydiv * sizeof(*map), GFP_KERNEL); + global_map = kzalloc((tiler.width / xdiv + 1) * tiler.height / ydiv, + GFP_KERNEL); + if (!map || !global_map) { + printk(KERN_ERR "could not allocate map for debug print\n"); + goto error; + } + memset(global_map, ' ', (tiler.width / xdiv + 1) * tiler.height / ydiv); + for (i = 0; i < tiler.height / ydiv; i++) { + map[i] = global_map + i * (tiler.width / xdiv + 1); + map[i][tiler.width / xdiv] = 0; + } + + /* get all allocations */ + mutex_lock(&mtx); + + list_for_each_entry(mi, &blocks, global) { + if (mi->area.is2d) { + ai = mi->parent; + fill_map(map, xdiv, ydiv, &ai->area, *a2dp, false); + fill_map(map, xdiv, ydiv, &mi->area, *m2dp, true); + if (!*++a2dp) + a2dp = a2d; + if (!*++m2dp) + m2dp = m2d; + map_2d_info(map, xdiv, ydiv, nice, &mi->area); + } else { + bool start = read_map_pt(map, xdiv, ydiv, &mi->area.p0) + == ' '; + bool end = read_map_pt(map, xdiv, ydiv, &mi->area.p1) + == ' '; + tcm_for_each_slice(a, mi->area, p) + fill_map(map, xdiv, ydiv, &a, '=', true); + fill_map_pt(map, xdiv, ydiv, &mi->area.p0, + start ? '<' : 'X'); + fill_map_pt(map, xdiv, ydiv, &mi->area.p1, + end ? '>' : 'X'); + map_1d_info(map, xdiv, ydiv, nice, &mi->area); + } + } + + seq_printf(s, "BEGIN TILER MAP\n"); + for (i = 0; i < tiler.height / ydiv; i++) + seq_printf(s, "%03d:%s\n", i * ydiv, map[i]); + seq_printf(s, "END TILER MAP\n"); + + mutex_unlock(&mtx); + +error: + kfree(map); + kfree(global_map); +} + +const struct tiler_debugfs_data debugfs_maps[] = { + { "1x1", debug_allocation_map, 0x0101 }, + { "2x1", debug_allocation_map, 0x0201 }, + { "4x1", debug_allocation_map, 0x0401 }, + { "2x2", debug_allocation_map, 0x0202 }, + { "4x2", debug_allocation_map, 0x0402 }, + { "4x4", debug_allocation_map, 0x0404 }, +}; + +static int tiler_debug_show(struct seq_file *s, void *unused) +{ + struct tiler_debugfs_data *fn = s->private; + fn->func(s, fn->arg); + return 0; +} + +static int tiler_debug_open(struct inode *inode, struct file *file) +{ + return single_open(file, tiler_debug_show, inode->i_private); +} + +static const struct file_operations tiler_debug_fops = { + .open = tiler_debug_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +/* + * gid_info handling methods + * ========================================================================== + */ + +/* get or create new gid_info object */ +static struct gid_info *_m_get_gi(struct process_info *pi, u32 gid) +{ + struct gid_info *gi; + + /* have mutex */ + + /* see if group already exist */ + list_for_each_entry(gi, &pi->groups, by_pid) { + if (gi->gid == gid) + goto done; + } + + /* create new group */ + gi = kmalloc(sizeof(*gi), GFP_KERNEL); + if (!gi) + return gi; + + memset(gi, 0, sizeof(*gi)); + INIT_LIST_HEAD(&gi->areas); + INIT_LIST_HEAD(&gi->onedim); + INIT_LIST_HEAD(&gi->reserved); + gi->pi = pi; + gi->gid = gid; + list_add(&gi->by_pid, &pi->groups); +done: + /* + * Once area is allocated, the group info's ref count will be + * decremented as the reference is no longer needed. + */ + gi->refs++; + return gi; +} + +/* free gid_info object if empty */ +static void _m_try_free_group(struct gid_info *gi) +{ + /* have mutex */ + if (gi && list_empty(&gi->areas) && list_empty(&gi->onedim) && + /* also ensure noone is still using this group */ + !gi->refs) { + BUG_ON(!list_empty(&gi->reserved)); + list_del(&gi->by_pid); + + /* if group is tracking kernel objects, we may free even + the process info */ + if (gi->pi->kernel && list_empty(&gi->pi->groups)) { + list_del(&gi->pi->list); + kfree(gi->pi); + } + + kfree(gi); + } +} + +/* --- external versions --- */ + +static struct gid_info *get_gi(struct process_info *pi, u32 gid) +{ + struct gid_info *gi; + mutex_lock(&mtx); + gi = _m_get_gi(pi, gid); + mutex_unlock(&mtx); + return gi; +} + +static void release_gi(struct gid_info *gi) +{ + mutex_lock(&mtx); + gi->refs--; + _m_try_free_group(gi); + mutex_unlock(&mtx); +} + +/* + * Area handling methods + * ========================================================================== + */ + +/* allocate an reserved area of size, alignment and link it to gi */ +/* leaves mutex locked to be able to add block to area */ +static struct area_info *area_new_m(u16 width, u16 height, u16 align, + struct tcm *tcm, struct gid_info *gi) +{ + struct area_info *ai = kmalloc(sizeof(*ai), GFP_KERNEL); + if (!ai) + return NULL; + + /* set up empty area info */ + memset(ai, 0x0, sizeof(*ai)); + INIT_LIST_HEAD(&ai->blocks); + + /* reserve an allocation area */ + if (tcm_reserve_2d(tcm, width, height, align, &ai->area)) { + kfree(ai); + return NULL; + } + + ai->gi = gi; + mutex_lock(&mtx); + list_add_tail(&ai->by_gid, &gi->areas); + return ai; +} + +/* (must have mutex) free an area */ +static inline void _m_area_free(struct area_info *ai) +{ + if (ai) { + list_del(&ai->by_gid); + kfree(ai); + } +} + +static s32 __analize_area(enum tiler_fmt fmt, u32 width, u32 height, + u16 *x_area, u16 *y_area, u16 *band, + u16 *align) +{ + /* input: width, height is in pixels */ + /* output: x_area, y_area, band, align */ + + /* slot width, height, and row size */ + u32 slot_row, min_align; + const struct tiler_geom *g; + + /* set alignment to page size */ + *align = PAGE_SIZE; + + /* width and height must be positive */ + if (!width || !height) + return -EINVAL; + + if (fmt == TILFMT_PAGE) { + /* for 1D area keep the height (1), width is in tiler slots */ + *x_area = DIV_ROUND_UP(width, tiler.page); + *y_area = *band = 1; + + if (*x_area * *y_area > tiler.width * tiler.height) + return -ENOMEM; + return 0; + } + + /* format must be valid */ + g = tiler.geom(fmt); + if (!g) + return -EINVAL; + + /* get the # of bytes per row in 1 slot */ + slot_row = g->slot_w * g->bpp; + + /* how many slots are can be accessed via one physical page */ + *band = PAGE_SIZE / slot_row; + + /* minimum alignment is at least 1 slot */ + min_align = max(slot_row, granularity); + *align = ALIGN(*align, min_align); + + /* adjust to slots */ + *x_area = DIV_ROUND_UP(width, g->slot_w); + *y_area = DIV_ROUND_UP(height, g->slot_h); + *align /= slot_row; + + if (*x_area > tiler.width || *y_area > tiler.height) + return -ENOMEM; + return 0; +} + +void fill_virt_array(struct tiler_block_t *blk, u32 *virt_array) +{ + u32 v, p, len, size; + u32 i = 0, offs = 0; + + if (!virt_array) + return; + + /* get page aligned stride */ + v = tiler_vstride(blk); + p = tiler_pstride(blk); + + /* get page aligned virtual size for the block */ + size = tiler_size(blk); + offs = blk->phys; + while (size) { + /* set len to length of one row (2D), or full length if 1D */ + len = v; + + while (len && size) { + virt_array[i++] = PAGE_ALIGN(offs); + size -= PAGE_SIZE; + len -= PAGE_SIZE; + offs += PAGE_SIZE; + } + + /* set offset to next row beginning */ + offs += p - v; + } +} + +/** + * Find a place where a 2D block would fit into a 2D area of the + * same height. + * + * @author a0194118 (3/19/2010) + * + * @param w Width of the block. + * @param align Alignment of the block. + * @param ai Pointer to area info + * @param next Pointer to the variable where the next block + * will be stored. The block should be inserted + * before this block. + * + * @return the end coordinate (x1 + 1) where a block would fit, + * or 0 if it does not fit. + * + * (must have mutex) + */ +static u16 _m_blk_find_fit(u16 w, u16 align, + struct area_info *ai, struct list_head **before) +{ + int x = ai->area.p0.x + w; + struct mem_info *mi; + + /* area blocks are sorted by x */ + list_for_each_entry(mi, &ai->blocks, by_area) { + /* check if buffer would fit before this area */ + if (x <= mi->area.p0.x) { + *before = &mi->by_area; + return x; + } + x = ALIGN(mi->area.p1.x + 1, align) + w; + } + *before = &ai->blocks; + + /* check if buffer would fit after last area */ + return (x <= ai->area.p1.x + 1) ? x : 0; +} + +/* (must have mutex) adds a block to an area with certain x coordinates */ +static inline +struct mem_info *_m_add2area(struct mem_info *mi, struct area_info *ai, + u16 x0, u16 w, struct list_head *before) +{ + mi->parent = ai; + mi->area = ai->area; + mi->area.p0.x = x0; + mi->area.p1.x = x0 + w - 1; + list_add_tail(&mi->by_area, before); + ai->nblocks++; + return mi; +} + +static struct mem_info *get_2d_area(u16 w, u16 h, u16 align, u16 band, + struct gid_info *gi, struct tcm *tcm) +{ + struct area_info *ai = NULL; + struct mem_info *mi = NULL; + struct list_head *before = NULL; + u16 x = 0; /* this holds the end of a potential area */ + + /* allocate map info */ + + /* see if there is available prereserved space */ + mutex_lock(&mtx); + list_for_each_entry(mi, &gi->reserved, global) { + if (mi->area.tcm == tcm && + tcm_aheight(mi->area) == h && + tcm_awidth(mi->area) == w && + (mi->area.p0.x & (align - 1)) == 0) { + /* this area is already set up */ + + /* remove from reserved list */ + list_del(&mi->global); + if (tiler_alloc_debug & 1) + printk(KERN_ERR "(=2d (%d-%d,%d-%d) in (%d-%d,%d-%d) prereserved)\n", + mi->area.p0.x, mi->area.p1.x, + mi->area.p0.y, mi->area.p1.y, + ((struct area_info *) mi->parent)->area.p0.x, + ((struct area_info *) mi->parent)->area.p1.x, + ((struct area_info *) mi->parent)->area.p0.y, + ((struct area_info *) mi->parent)->area.p1.y); + + goto done; + } + } + mutex_unlock(&mtx); + + /* if not, reserve a block struct */ + mi = kmalloc(sizeof(*mi), GFP_KERNEL); + if (!mi) + return mi; + memset(mi, 0, sizeof(*mi)); + + /* see if allocation fits in one of the existing areas */ + /* this sets x, ai and before */ + mutex_lock(&mtx); + list_for_each_entry(ai, &gi->areas, by_gid) { + if (ai->area.tcm == tcm && + tcm_aheight(ai->area) == h) { + x = _m_blk_find_fit(w, align, ai, &before); + if (x) { + _m_add2area(mi, ai, x - w, w, before); + + if (tiler_alloc_debug & 1) + printk(KERN_ERR "(+2d (%d-%d,%d-%d) in (%d-%d,%d-%d) existing)\n", + mi->area.p0.x, mi->area.p1.x, + mi->area.p0.y, mi->area.p1.y, + ((struct area_info *) mi->parent)->area.p0.x, + ((struct area_info *) mi->parent)->area.p1.x, + ((struct area_info *) mi->parent)->area.p0.y, + ((struct area_info *) mi->parent)->area.p1.y); + + goto done; + } + } + } + mutex_unlock(&mtx); + + /* if no area fit, reserve a new one */ + ai = area_new_m(ALIGN(w, max(band, align)), h, + max(band, align), tcm, gi); + if (ai) { + _m_add2area(mi, ai, ai->area.p0.x, w, &ai->blocks); + if (tiler_alloc_debug & 1) + printk(KERN_ERR "(+2d (%d-%d,%d-%d) in (%d-%d,%d-%d) new)\n", + mi->area.p0.x, mi->area.p1.x, + mi->area.p0.y, mi->area.p1.y, + ai->area.p0.x, ai->area.p1.x, + ai->area.p0.y, ai->area.p1.y); + } else { + /* clean up */ + kfree(mi); + return NULL; + } + +done: + mutex_unlock(&mtx); + return mi; +} + +/* layout reserved 2d blocks in a larger area */ +/* NOTE: band, w, h, a(lign) is in slots */ +static s32 lay_2d(enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band, + u16 align, struct gid_info *gi, + struct list_head *pos) +{ + u16 x, x0, e = ALIGN(w, align), w_res = (n - 1) * e + w; + struct mem_info *mi = NULL; + struct area_info *ai = NULL; + + printk(KERN_INFO "packing %u %u buffers into %u width\n", + n, w, w_res); + + /* calculate dimensions, band, and alignment in slots */ + /* reserve an area */ + ai = area_new_m(ALIGN(w_res, max(band, align)), h, + max(band, align), tcm[fmt], gi); + if (!ai) + return -ENOMEM; + + /* lay out blocks in the reserved area */ + for (n = 0, x = 0; x < w_res; x += e, n++) { + /* reserve a block struct */ + mi = kmalloc(sizeof(*mi), GFP_KERNEL); + if (!mi) + break; + + memset(mi, 0, sizeof(*mi)); + x0 = ai->area.p0.x + x; + _m_add2area(mi, ai, x0, w, &ai->blocks); + list_add(&mi->global, pos); + } + + mutex_unlock(&mtx); + return n; +} + +#ifdef CONFIG_TILER_ENABLE_NV12 +/* layout reserved nv12 blocks in a larger area */ +/* NOTE: area w(idth), w1 (8-bit block width), h(eight) are in slots */ +/* p is a pointer to a packing description, which is a list of offsets in + the area for consecutive 8-bit and 16-bit blocks */ +static s32 lay_nv12(int n, u16 w, u16 w1, u16 h, struct gid_info *gi, u8 *p) +{ + u16 wh = (w1 + 1) >> 1, width, x0; + int m; + int a = PAGE_SIZE / tiler.geom(TILFMT_8BIT)->slot_w; + + struct mem_info *mi = NULL; + struct area_info *ai = NULL; + struct list_head *pos; + + /* reserve area */ + ai = area_new_m(w, h, a, TILFMT_8BIT, gi); + if (!ai) + return -ENOMEM; + + /* lay out blocks in the reserved area */ + for (m = 0; m < 2 * n; m++) { + width = (m & 1) ? wh : w1; + x0 = ai->area.p0.x + *p++; + + /* get insertion head */ + list_for_each(pos, &ai->blocks) { + mi = list_entry(pos, struct mem_info, by_area); + if (mi->area.p0.x > x0) + break; + } + + /* reserve a block struct */ + mi = kmalloc(sizeof(*mi), GFP_KERNEL); + if (!mi) + break; + + memset(mi, 0, sizeof(*mi)); + + _m_add2area(mi, ai, x0, width, pos); + list_add(&mi->global, &gi->reserved); + } + + mutex_unlock(&mtx); + return n; +} +#endif + +static void _m_unpin(struct mem_info *mi) +{ + /* release memory */ + if (mi->pa.memtype == TILER_MEM_GOT_PAGES) { + int i; + for (i = 0; i < mi->pa.num_pg; i++) { + struct page *page = phys_to_page(mi->pa.mem[i]); + if (page) { + if (!PageReserved(page)) + SetPageDirty(page); + page_cache_release(page); + } + } + } else if (mi->pa.memtype == TILER_MEM_ALLOCED && mi->pa.mem) { + tmm_free(tmm[tiler_fmt(mi->blk.phys)], mi->pa.mem); + /* + * TRICKY: tmm module uses the same mi->pa.mem pointer which + * it just freed. We need to clear ours so we don't double free + */ + mi->pa.mem = NULL; + } + kfree(mi->pa.mem); + mi->pa.mem = NULL; + mi->pa.num_pg = 0; + unpin_mem_from_area(tmm[tiler_fmt(mi->blk.phys)], &mi->area); +} + +/* (must have mutex) free block and any freed areas */ +static s32 _m_free(struct mem_info *mi) +{ + struct area_info *ai = NULL; + s32 res = 0; + + _m_unpin(mi); + + /* safe deletion as list may not have been assigned */ + if (mi->global.next) + list_del(&mi->global); + if (mi->by_area.next) + list_del(&mi->by_area); + + /* remove block from area first if 2D */ + if (mi->area.is2d) { + ai = mi->parent; + + /* check to see if area needs removing also */ + if (ai && !--ai->nblocks) { + if (tiler_alloc_debug & 1) + printk(KERN_ERR "(-2d (%d-%d,%d-%d) in (%d-%d,%d-%d) last)\n", + mi->area.p0.x, mi->area.p1.x, + mi->area.p0.y, mi->area.p1.y, + ai->area.p0.x, ai->area.p1.x, + ai->area.p0.y, ai->area.p1.y); + + res = tcm_free(&ai->area); + list_del(&ai->by_gid); + /* try to remove parent if it became empty */ + _m_try_free_group(ai->gi); + kfree(ai); + ai = NULL; + } else if (tiler_alloc_debug & 1) + printk(KERN_ERR "(-2d (%d-%d,%d-%d) in (%d-%d,%d-%d) remaining)\n", + mi->area.p0.x, mi->area.p1.x, + mi->area.p0.y, mi->area.p1.y, + ai->area.p0.x, ai->area.p1.x, + ai->area.p0.y, ai->area.p1.y); + + } else { + if (tiler_alloc_debug & 1) + printk(KERN_ERR "(-1d: %d,%d..%d,%d)\n", + mi->area.p0.x, mi->area.p0.y, + mi->area.p1.x, mi->area.p1.y); + /* remove 1D area */ + res = tcm_free(&mi->area); + /* try to remove parent if it became empty */ + _m_try_free_group(mi->parent); + } + + kfree(mi); + return res; +} + +/* (must have mutex) returns true if block was freed */ +static bool _m_chk_ref(struct mem_info *mi) +{ + /* check references */ + if (mi->refs) + return 0; + + if (_m_free(mi)) + printk(KERN_ERR "error while removing tiler block\n"); + + return 1; +} + +/* (must have mutex) */ +static inline bool _m_dec_ref(struct mem_info *mi) +{ + if (mi->refs-- <= 1) + return _m_chk_ref(mi); + + return 0; +} + +/* (must have mutex) */ +static inline void _m_inc_ref(struct mem_info *mi) +{ + mi->refs++; +} + +/* (must have mutex) returns true if block was freed */ +static inline bool _m_try_free(struct mem_info *mi) +{ + if (mi->alloced) { + mi->refs--; + mi->alloced = false; + } + return _m_chk_ref(mi); +} + +/* --- external methods --- */ + +/* find a block by key/id and lock it */ +static struct mem_info * +find_n_lock(u32 key, u32 id, struct gid_info *gi) { + struct area_info *ai = NULL; + struct mem_info *mi = NULL; + + mutex_lock(&mtx); + + /* if group is not given, look globally */ + if (!gi) { + list_for_each_entry(mi, &blocks, global) { + if (mi->blk.key == key && mi->blk.id == id) + goto done; + } + } else { + /* is id is ssptr, we know if block is 1D or 2D by the address, + so we optimize lookup */ + if (!ssptr_id || + tiler_fmt(id) == TILFMT_PAGE) { + list_for_each_entry(mi, &gi->onedim, by_area) { + if (mi->blk.key == key && mi->blk.id == id) + goto done; + } + } + + if (!ssptr_id || + tiler_fmt(id) != TILFMT_PAGE) { + list_for_each_entry(ai, &gi->areas, by_gid) { + list_for_each_entry(mi, &ai->blocks, by_area) { + if (mi->blk.key == key && + mi->blk.id == id) + goto done; + } + } + } + } + + mi = NULL; +done: + /* lock block by increasing its ref count */ + if (mi) + mi->refs++; + + mutex_unlock(&mtx); + + return mi; +} + +/* unlock a block, and optionally free it */ +static void unlock_n_free(struct mem_info *mi, bool free) +{ + mutex_lock(&mtx); + + _m_dec_ref(mi); + if (free) + _m_try_free(mi); + + mutex_unlock(&mtx); +} + +/** + * Free all blocks in a group: + * + * allocated blocks, and unreferenced blocks. Any blocks/areas still referenced + * will move to the orphaned lists to avoid issues if a new process is created + * with the same pid. + * + * (must have mutex) + */ +static void destroy_group(struct gid_info *gi) +{ + struct area_info *ai, *ai_; + struct mem_info *mi, *mi_; + bool ai_autofreed, need2free; + + mutex_lock(&mtx); + + /* free all allocated blocks, and remove unreferenced ones */ + + /* + * Group info structs when they become empty on an _m_try_free. + * However, if the group info is already empty, we need to + * remove it manually + */ + need2free = list_empty(&gi->areas) && list_empty(&gi->onedim); + list_for_each_entry_safe(ai, ai_, &gi->areas, by_gid) { + ai_autofreed = true; + list_for_each_entry_safe(mi, mi_, &ai->blocks, by_area) + ai_autofreed &= _m_try_free(mi); + + /* save orphaned areas for later removal */ + if (!ai_autofreed) { + need2free = true; + ai->gi = NULL; + list_move(&ai->by_gid, &orphan_areas); + } + } + + list_for_each_entry_safe(mi, mi_, &gi->onedim, by_area) { + if (!_m_try_free(mi)) { + need2free = true; + /* save orphaned 1D blocks */ + mi->parent = NULL; + list_move(&mi->by_area, &orphan_onedim); + } + } + + /* if group is still alive reserved list should have been + emptied as there should be no reference on those blocks */ + if (need2free) { + BUG_ON(!list_empty(&gi->onedim)); + BUG_ON(!list_empty(&gi->areas)); + _m_try_free_group(gi); + } + + mutex_unlock(&mtx); +} + +/* release (reserved) blocks */ +static void release_blocks(struct list_head *reserved) +{ + struct mem_info *mi, *mi_; + + mutex_lock(&mtx); + + /* find block in global list and free it */ + list_for_each_entry_safe(mi, mi_, reserved, global) { + BUG_ON(mi->refs || mi->alloced); + _m_free(mi); + } + mutex_unlock(&mtx); +} + +/* add reserved blocks to a group */ +static void add_reserved_blocks(struct list_head *reserved, struct gid_info *gi) +{ + mutex_lock(&mtx); + list_splice_init(reserved, &gi->reserved); + mutex_unlock(&mtx); +} + +/* find a block by ssptr */ +static struct mem_info *find_block_by_ssptr(u32 sys_addr) +{ + struct mem_info *i; + struct tcm_pt pt; + u32 x, y; + enum tiler_fmt fmt; + const struct tiler_geom *g; + + fmt = tiler_fmt(sys_addr); + if (fmt == TILFMT_INVALID) + return NULL; + + g = tiler.geom(fmt); + + /* convert x & y pixel coordinates to slot coordinates */ + tiler.xy(sys_addr, &x, &y); + pt.x = x / g->slot_w; + pt.y = y / g->slot_h; + + mutex_lock(&mtx); + list_for_each_entry(i, &blocks, global) { + if (tiler_fmt(i->blk.phys) == tiler_fmt(sys_addr) && + tcm_is_in(pt, i->area)) { + i->refs++; + goto found; + } + } + i = NULL; + +found: + mutex_unlock(&mtx); + return i; +} + +/* find a block by ssptr */ +static void fill_block_info(struct mem_info *i, struct tiler_block_info *blk) +{ + blk->fmt = tiler_fmt(i->blk.phys); +#ifdef CONFIG_TILER_EXPOSE_SSPTR + blk->ssptr = i->blk.phys; +#endif + if (blk->fmt == TILFMT_PAGE) { + blk->dim.len = i->blk.width; + blk->group_id = ((struct gid_info *) i->parent)->gid; + } else { + blk->stride = tiler_vstride(&i->blk); + blk->dim.area.width = i->blk.width; + blk->dim.area.height = i->blk.height; + blk->group_id = ((struct area_info *) i->parent)->gi->gid; + } + blk->id = i->blk.id; + blk->key = i->blk.key; +} + +/* + * Block operations + * ========================================================================== + */ +static struct mem_info *alloc_area(enum tiler_fmt fmt, u32 width, u32 height, + struct gid_info *gi) +{ + u16 x, y, band, align; + struct mem_info *mi = NULL; + const struct tiler_geom *g = tiler.geom(fmt); + + /* calculate dimensions, band, and alignment in slots */ + if (__analize_area(fmt, width, height, &x, &y, &band, &align)) + return NULL; + + if (fmt == TILFMT_PAGE) { + /* 1D areas don't pack */ + mi = kmalloc(sizeof(*mi), GFP_KERNEL); + if (!mi) + return NULL; + memset(mi, 0x0, sizeof(*mi)); + + if (tcm_reserve_1d(tcm[fmt], x * y, &mi->area)) { + kfree(mi); + return NULL; + } + + if (tiler_alloc_debug & 1) + printk(KERN_ERR "(+1d: %d,%d..%d,%d)\n", + mi->area.p0.x, mi->area.p0.y, + mi->area.p1.x, mi->area.p1.y); + + mutex_lock(&mtx); + mi->parent = gi; + list_add(&mi->by_area, &gi->onedim); + } else { + mi = get_2d_area(x, y, align, band, gi, tcm[fmt]); + if (!mi) + return NULL; + + mutex_lock(&mtx); + } + + list_add(&mi->global, &blocks); + mi->alloced = true; + mi->refs++; + gi->refs--; + mutex_unlock(&mtx); + + mi->blk.phys = tiler.addr(fmt, + mi->area.p0.x * g->slot_w, mi->area.p0.y * g->slot_h); + return mi; +} + +static struct mem_info *alloc_block_area(enum tiler_fmt fmt, u32 width, + u32 height, u32 key, u32 gid, + struct process_info *pi) +{ + struct mem_info *mi = NULL; + struct gid_info *gi = NULL; + + /* validate parameters */ + if (!pi) + return ERR_PTR(-EINVAL); + + /* get group context */ + mutex_lock(&mtx); + gi = _m_get_gi(pi, gid); + mutex_unlock(&mtx); + + if (!gi) + return ERR_PTR(-ENOMEM); + + /* reserve area in tiler container */ + mi = alloc_area(fmt, width, height, gi); + if (!mi) { + mutex_lock(&mtx); + gi->refs--; + _m_try_free_group(gi); + mutex_unlock(&mtx); + return ERR_PTR(-ENOMEM); + } + + mi->blk.width = width; + mi->blk.height = height; + mi->blk.key = key; + if (ssptr_id) { + mi->blk.id = mi->blk.phys; + } else { + mutex_lock(&mtx); + mi->blk.id = _m_get_id(); + mutex_unlock(&mtx); + } + + return mi; +} + +static s32 pin_memory(struct mem_info *mi, struct tiler_pa_info *pa) +{ + enum tiler_fmt fmt = tiler_fmt(mi->blk.phys); + struct tcm_area area = mi->area; + + /* ensure we can pin */ + if (!tmm_can_pin(tmm[fmt])) + return -EINVAL; + + /* ensure pages fit into area */ + if (pa->num_pg > tcm_sizeof(mi->area)) + return -ENOMEM; + + /* for 2D area, pages must fit exactly */ + if (fmt != TILFMT_PAGE && + pa->num_pg != tcm_sizeof(mi->area)) + return -EINVAL; + + /* save pages used */ + mi->pa = *pa; + pa->mem = NULL; /* transfered array */ + + /* only refill available pages for 1D */ + if (fmt == TILFMT_PAGE) + tcm_1d_limit(&area, pa->num_pg); + if (mi->pa.num_pg) + return pin_mem_to_area(tmm[fmt], &area, mi->pa.mem); + return 0; +} + +void tiler_pa_free(struct tiler_pa_info *pa) +{ + if (pa) + kfree(pa->mem); + kfree(pa); +} +EXPORT_SYMBOL(tiler_pa_free); + +/* allocate physical pages for a block */ +static struct tiler_pa_info *get_new_pa(struct tmm *tmm, u32 num_pg) +{ + struct tiler_pa_info *pa = NULL; + pa = kzalloc(sizeof(*pa), GFP_KERNEL); + if (!pa) + return NULL; + + pa->mem = tmm_get(tmm, num_pg); + if (pa->mem) { + pa->num_pg = num_pg; + pa->memtype = TILER_MEM_ALLOCED; + return pa; + } else { + kfree(pa); + return NULL; + } +} + +static s32 alloc_block(enum tiler_fmt fmt, u32 width, u32 height, + u32 key, u32 gid, struct process_info *pi, + struct mem_info **info) +{ + struct mem_info *mi; + struct tiler_pa_info *pa = NULL; + int res; + + *info = NULL; + + /* allocate tiler container area */ + mi = alloc_block_area(fmt, width, height, key, gid, pi); + if (IS_ERR_OR_NULL(mi)) + return mi ? -ENOMEM : PTR_ERR(mi); + + /* allocate memory */ + pa = get_new_pa(tmm[fmt], tcm_sizeof(mi->area)); + if (IS_ERR_OR_NULL(pa)) { + res = -ENOMEM; + goto cleanup; + } + + /* pin memory */ + res = pin_memory(mi, pa); + tiler_pa_free(pa); + if (res) + goto cleanup; + + *info = mi; + return 0; + +cleanup: + mutex_lock(&mtx); + _m_free(mi); + mutex_unlock(&mtx); + return res; +} + + +/* get physical pages of a user block */ +struct tiler_pa_info *user_block_to_pa(u32 usr_addr, u32 num_pg) +{ + struct task_struct *curr_task = current; + struct mm_struct *mm = current->mm; + struct vm_area_struct *vma = NULL; + + struct tiler_pa_info *pa = NULL; + struct page **pages = NULL; + u32 *mem = NULL, write, i; + int usr_count; + + pa = kzalloc(sizeof(*pa), GFP_KERNEL); + if (!pa) + return NULL; + + mem = kzalloc(num_pg * sizeof(*mem), GFP_KERNEL); + if (!mem) { + kfree(pa); + return NULL; + } + + pages = kmalloc(num_pg * sizeof(*pages), GFP_KERNEL); + if (!pages) { + kfree(mem); + kfree(pa); + return NULL; + } + + /* + * Important Note: usr_addr is mapped from user + * application process to current process - it must lie + * completely within the current virtual memory address + * space in order to be of use to us here. + */ + down_read(&mm->mmap_sem); + vma = find_vma(mm, usr_addr + (num_pg << PAGE_SHIFT)); + + if (!vma || (usr_addr < vma->vm_start)) { + kfree(mem); + kfree(pa); + kfree(pages); + up_read(&mm->mmap_sem); + printk(KERN_ERR "Address is outside VMA: address start = %08x, " + "user end = %08x\n", + usr_addr, (usr_addr + (num_pg << PAGE_SHIFT))); + return ERR_PTR(-EFAULT); + } + + if (vma->vm_flags & (VM_WRITE | VM_MAYWRITE)) + write = 1; + + usr_count = get_user_pages(curr_task, mm, usr_addr, num_pg, write, 1, + pages, NULL); + + if (usr_count > 0) { + /* process user allocated buffer */ + if (usr_count != num_pg) { + /* release the pages we did get */ + for (i = 0; i < usr_count; i++) + page_cache_release(pages[i]); + } else { + /* fill in the physical address information */ + for (i = 0; i < num_pg; i++) { + mem[i] = page_to_phys(pages[i]); + BUG_ON(pages[i] != phys_to_page(mem[i])); + } + } + } else { + /* fallback for kernel allocated buffers */ + for (i = 0; i < num_pg; i++) { + mem[i] = tiler_virt2phys(usr_addr); + + if (!mem[i]) { + printk(KERN_ERR "VMA not in page table\n"); + break; + } + + usr_addr += PAGE_SIZE; + } + } + + up_read(&mm->mmap_sem); + + kfree(pages); + + /* if failed to map all pages */ + if (i < num_pg) { + kfree(mem); + kfree(pa); + return ERR_PTR(-EFAULT); + } + + pa->mem = mem; + pa->memtype = usr_count > 0 ? TILER_MEM_GOT_PAGES : TILER_MEM_USING; + pa->num_pg = num_pg; + return pa; +} +EXPORT_SYMBOL(user_block_to_pa); + +/* allocate area from container and pin memory */ +static s32 pin_any_block(enum tiler_fmt fmt, u32 width, u32 height, + u32 key, u32 gid, struct process_info *pi, + struct mem_info **info, struct tiler_pa_info *pa) +{ + s32 res = -EPERM; + struct mem_info *mi = NULL; + + *info = NULL; + + /* check if mapping is supported by tmm */ + if (!tmm_can_pin(tmm[fmt])) + goto done; + + /* get allocation area */ + mi = alloc_block_area(fmt, width, height, key, gid, pi); + if (IS_ERR_OR_NULL(mi)) { + res = mi ? PTR_ERR(mi) : -ENOMEM; + goto done; + } + + /* pin pages to tiler container */ + res = pin_memory(mi, pa); + + /* success */ + if (!res) { + *info = mi; + } else { + mutex_lock(&mtx); + _m_free(mi); + mutex_unlock(&mtx); + } +done: + tiler_pa_free(pa); + return res; +} + +static s32 pin_block(enum tiler_fmt fmt, u32 width, u32 height, + u32 key, u32 gid, struct process_info *pi, + struct mem_info **info, u32 usr_addr) +{ + struct tiler_pa_info *pa = NULL; + + /* we only support mapping a user buffer in page mode */ + if (fmt != TILFMT_PAGE) + return -ENOMEM; + + /* get user pages */ + pa = user_block_to_pa(usr_addr, DIV_ROUND_UP(width, PAGE_SIZE)); + if (IS_ERR_OR_NULL(pa)) + return pa ? PTR_ERR(pa) : -ENOMEM; + + return pin_any_block(fmt, width, height, key, gid, pi, info, pa); +} + +s32 tiler_pin_block(tiler_blk_handle block, u32 *addr_array, u32 nents) +{ + struct tiler_pa_info *pa = NULL; + u32 *mem = NULL; + int res; + + pa = kzalloc(sizeof(*pa), GFP_KERNEL); + if (!pa) + return -ENOMEM; + + mem = kmemdup(addr_array, sizeof(*addr_array)*nents, GFP_KERNEL); + if (!mem) { + kfree(pa); + return -ENOMEM; + } + + pa->mem = mem; + pa->memtype = TILER_MEM_USING; + pa->num_pg = nents; + + res = pin_memory(block, pa); + tiler_pa_free(pa); + + return res; +} +EXPORT_SYMBOL(tiler_pin_block); + +/* + * Driver code + * ========================================================================== + */ + +#ifdef CONFIG_PM +static int tiler_resume(struct device *pdev) +{ + struct mem_info *mi; + struct pat_area area = {0}; + + /* clear out PAT entries and set dummy page */ + area.x1 = tiler.width - 1; + area.y1 = tiler.height - 1; + mutex_lock(&dmac_mtx); + tmm_unpin(tmm[TILFMT_8BIT], area); + mutex_unlock(&dmac_mtx); + + /* iterate over all the blocks and refresh the PAT entries */ + list_for_each_entry(mi, &blocks, global) { + if (mi->pa.mem) + if (pin_mem_to_area(tmm[tiler_fmt(mi->blk.phys)], + &mi->area, mi->pa.mem)) + printk(KERN_ERR "Failed PAT restore - %08x\n", + mi->blk.phys); + } + + return 0; +} + +static const struct dev_pm_ops tiler_pm_ops = { + .resume = tiler_resume, +}; +#endif + +static struct platform_driver tiler_driver_ldm = { + .driver = { + .owner = THIS_MODULE, + .name = "tiler", +#ifdef CONFIG_PM + .pm = &tiler_pm_ops, +#endif + }, +}; + +static s32 __init tiler_init(void) +{ + dev_t dev = 0; + s32 r = -1; + struct device *device = NULL; + struct tcm_pt div_pt; + struct tcm *sita = NULL; + struct tmm *tmm_pat = NULL; + struct pat_area area = {0}; + + tiler.alloc = alloc_block; + tiler.pin = pin_block; + tiler.lock = find_n_lock; + tiler.unlock_free = unlock_n_free; + tiler.lay_2d = lay_2d; +#ifdef CONFIG_TILER_ENABLE_NV12 + tiler.lay_nv12 = lay_nv12; +#endif + tiler.destroy_group = destroy_group; + tiler.lock_by_ssptr = find_block_by_ssptr; + tiler.describe = fill_block_info; + tiler.get_gi = get_gi; + tiler.release_gi = release_gi; + tiler.release = release_blocks; + tiler.add_reserved = add_reserved_blocks; + tiler.analize = __analize_area; + tiler_geom_init(&tiler); + tiler_reserve_init(&tiler); + + mutex_init(&tiler.mtx); + tiler_iface_init(&tiler); +#ifdef CONFIG_TILER_ENABLE_USERSPACE + tiler_ioctl_init(&tiler); +#endif +#ifdef CONFIG_TILER_ENABLE_NV12 + tiler_nv12_init(&tiler); +#endif + + /* check module parameters for correctness */ + if (granularity < 1 || granularity > PAGE_SIZE || + granularity & (granularity - 1)) + return -EINVAL; + + /* + * Array of physical pages for PAT programming, which must be a 16-byte + * aligned physical address. + */ + dmac_va = dma_alloc_coherent(NULL, tiler.width * tiler.height * + sizeof(*dmac_va), &dmac_pa, GFP_ATOMIC); + if (!dmac_va) + return -ENOMEM; + + /* Allocate tiler container manager (we share 1 on OMAP4) */ + div_pt.x = tiler.width; /* hardcoded default */ + div_pt.y = (3 * tiler.height) / 4; + sita = sita_init(tiler.width, tiler.height, (void *)&div_pt); + + tcm[TILFMT_8BIT] = sita; + tcm[TILFMT_16BIT] = sita; + tcm[TILFMT_32BIT] = sita; + tcm[TILFMT_PAGE] = sita; + + /* Allocate tiler memory manager (must have 1 unique TMM per TCM ) */ + tmm_pat = tmm_pat_init(0, dmac_va, dmac_pa); + tmm[TILFMT_8BIT] = tmm_pat; + tmm[TILFMT_16BIT] = tmm_pat; + tmm[TILFMT_32BIT] = tmm_pat; + tmm[TILFMT_PAGE] = tmm_pat; + + /* Clear out all PAT entries */ + area.x1 = tiler.width - 1; + area.y1 = tiler.height - 1; + tmm_unpin(tmm_pat, area); + +#ifdef CONFIG_TILER_ENABLE_NV12 + tiler.nv12_packed = tcm[TILFMT_8BIT] == tcm[TILFMT_16BIT]; +#endif + + tiler_device = kmalloc(sizeof(*tiler_device), GFP_KERNEL); + if (!tiler_device || !sita || !tmm_pat) { + r = -ENOMEM; + goto error; + } + + memset(tiler_device, 0x0, sizeof(*tiler_device)); + if (tiler_major) { + dev = MKDEV(tiler_major, tiler_minor); + r = register_chrdev_region(dev, 1, "tiler"); + } else { + r = alloc_chrdev_region(&dev, tiler_minor, 1, "tiler"); + tiler_major = MAJOR(dev); + } + + cdev_init(&tiler_device->cdev, tiler.fops); + tiler_device->cdev.owner = THIS_MODULE; + tiler_device->cdev.ops = tiler.fops; + + r = cdev_add(&tiler_device->cdev, dev, 1); + if (r) + printk(KERN_ERR "cdev_add():failed\n"); + + tilerdev_class = class_create(THIS_MODULE, "tiler"); + + if (IS_ERR(tilerdev_class)) { + printk(KERN_ERR "class_create():failed\n"); + goto error; + } + + device = device_create(tilerdev_class, NULL, dev, NULL, "tiler"); + if (device == NULL) + printk(KERN_ERR "device_create() fail\n"); + + r = platform_driver_register(&tiler_driver_ldm); + + mutex_init(&mtx); + INIT_LIST_HEAD(&blocks); + INIT_LIST_HEAD(&orphan_areas); + INIT_LIST_HEAD(&orphan_onedim); + + dbgfs = debugfs_create_dir("tiler", NULL); + if (IS_ERR_OR_NULL(dbgfs)) + dev_warn(device, "failed to create debug files.\n"); + else + dbg_map = debugfs_create_dir("map", dbgfs); + if (!IS_ERR_OR_NULL(dbg_map)) { + int i; + for (i = 0; i < ARRAY_SIZE(debugfs_maps); i++) + debugfs_create_file(debugfs_maps[i].name, S_IRUGO, + dbg_map, (void *) (debugfs_maps + i), + &tiler_debug_fops); + } + +error: + /* TODO: error handling for device registration */ + if (r) { + kfree(tiler_device); + tcm_deinit(sita); + tmm_deinit(tmm_pat); + dma_free_coherent(NULL, tiler.width * tiler.height * + sizeof(*dmac_va), dmac_va, dmac_pa); + } + + return r; +} + +static void __exit tiler_exit(void) +{ + int i, j; + + mutex_lock(&mtx); + + debugfs_remove_recursive(dbgfs); + + /* free all process data */ + tiler.cleanup(); + + /* all lists should have cleared */ + BUG_ON(!list_empty(&blocks)); + BUG_ON(!list_empty(&orphan_onedim)); + BUG_ON(!list_empty(&orphan_areas)); + + mutex_unlock(&mtx); + + dma_free_coherent(NULL, tiler.width * tiler.height * sizeof(*dmac_va), + dmac_va, dmac_pa); + + /* close containers only once */ + for (i = TILFMT_MIN; i <= TILFMT_MAX; i++) { + /* remove identical containers (tmm is unique per tcm) */ + for (j = i + 1; j <= TILFMT_MAX; j++) + if (tcm[i] == tcm[j]) { + tcm[j] = NULL; + tmm[j] = NULL; + } + + tcm_deinit(tcm[i]); + tmm_deinit(tmm[i]); + } + + mutex_destroy(&mtx); + platform_driver_unregister(&tiler_driver_ldm); + cdev_del(&tiler_device->cdev); + kfree(tiler_device); + device_destroy(tilerdev_class, MKDEV(tiler_major, tiler_minor)); + class_destroy(tilerdev_class); +} + +tiler_blk_handle tiler_map_1d_block(struct tiler_pa_info *pa) +{ + struct mem_info *mi = NULL; + struct tiler_pa_info *pa_tmp = kmemdup(pa, sizeof(*pa), GFP_KERNEL); + s32 res = pin_any_block(TILFMT_PAGE, pa->num_pg << PAGE_SHIFT, 1, 0, 0, + __get_pi(0, true), &mi, pa_tmp); + return res ? ERR_PTR(res) : mi; +} +EXPORT_SYMBOL(tiler_map_1d_block); + +void tiler_free_block_area(tiler_blk_handle block) +{ + mutex_lock(&mtx); + _m_try_free(block); + mutex_unlock(&mtx); +} +EXPORT_SYMBOL(tiler_free_block_area); + +tiler_blk_handle tiler_alloc_block_area(enum tiler_fmt fmt, u32 width, + u32 height, u32 *ssptr, u32 *virt_array) +{ + struct mem_info *mi; + *ssptr = 0; + + /* if tiler is not initialized fail gracefully */ + if (!tilerdev_class) + return NULL; + + mi = alloc_block_area(fmt, width, height, 0, 0, __get_pi(0, true)); + + if (IS_ERR_OR_NULL(mi)) + goto done; + + fill_virt_array(&mi->blk, virt_array); + *ssptr = mi->blk.phys; + +done: + return mi; +} +EXPORT_SYMBOL(tiler_alloc_block_area); + +void tiler_unpin_block(tiler_blk_handle block) +{ + mutex_lock(&mtx); + _m_unpin(block); + mutex_unlock(&mtx); +} +EXPORT_SYMBOL(tiler_unpin_block); + +s32 tiler_memsize(enum tiler_fmt fmt, u32 width, u32 height, u32 *alloc_pages, + u32 *virt_pages) +{ + u16 x, y, band, align; + int res; + struct tiler_block_t blk; + + *alloc_pages = *virt_pages = 0; + + res = tiler.analize(fmt, width, height, &x, &y, &align, &band); + + if (!res) { + blk.height = height; + blk.width = width; + blk.phys = tiler.addr(fmt, 0, 0); + *alloc_pages = x*y; + *virt_pages = tiler_size(&blk) / PAGE_SIZE; + } + + return res; +} +EXPORT_SYMBOL(tiler_memsize); + +u32 tiler_block_vstride(tiler_blk_handle block) +{ + return tiler_vstride(&block->blk); +} +EXPORT_SYMBOL(tiler_block_vstride); + +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Lajos Molnar <molnar@ti.com>"); +MODULE_AUTHOR("David Sin <davidsin@ti.com>"); +module_init(tiler_init); +module_exit(tiler_exit); diff --git a/drivers/media/video/tiler/tiler-nv12.c b/drivers/media/video/tiler/tiler-nv12.c new file mode 100644 index 0000000..e166122 --- /dev/null +++ b/drivers/media/video/tiler/tiler-nv12.c @@ -0,0 +1,417 @@ +/* + * tiler-nv12.c + * + * TILER driver NV12 area reservation functions for TI TILER hardware block. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include "_tiler.h" + +static struct tiler_ops *ops; /* shared methods and variables */ +static int band_8; +static int band_16; + +/* + * NV12 Reservation Functions + * + * TILER is designed so that a (w * h) * 8bit area is twice as wide as a + * (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit + * blocks is a common usecase for TILER, we optimize packing these into a + * TILER area. + * + * During reservation we want to find the most effective packing (most used area + * in the smallest overall area) + * + * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit + * blocks into separate container areas, or pack them together into same area. + */ + +/** + * Calculate effectiveness of packing. We weight total area much higher than + * packing efficiency to get the smallest overall container use. + * + * @param w width of one (8-bit) block + * @param n buffers in a packing + * @param area width of packing area + * @param n_total total number of buffers to be packed + * @return effectiveness, the higher the better + */ +static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total) +{ + return 0x10000000 - + /* weigh against total area needed (for all buffers) */ + /* 64-slots = -2048 */ + DIV_ROUND_UP(n_total, n) * area * 32 + + /* packing efficiency (0 - 1024) */ + 1024 * n * ((w * 3 + 1) >> 1) / area; +} + +/** + * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate + * areas. + * + * @author a0194118 (7/16/2010) + * + * @param o desired offset (<a) + * @param a desired alignment (>=2) + * @param w block width (>0) + * @param n number of blocks desired + * @param area pointer to store total area needed + * + * @return number of blocks that can be allocated + */ +static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area) +{ + tiler_best2pack(o, a, band_8, w, &n, area); + tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area); + *area *= 3; + return n; +} + +/* + * Specialized NV12 Reservation Algorithms + * + * We use 4 packing methods that pack nv12 blocks into the same area. Together + * these 4 methods give the optimal result for most possible input parameters. + * + * For now we pack into a 64-slot area, so that we don't have to worry about + * stride issues (all blocks get 4K stride). For some of the algorithms this + * could be true even if the area was 128. + */ + +/** + * Packing types are marked using a letter sequence, capital letters denoting + * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks. + * + * All methods have the following parameters. They also define the maximum + * number of coordinates that could potentially be packed. + * + * @param o, a, w, n offset, alignment, width, # of blocks as usual + * @param area pointer to store area needed for packing + * @param p pointer to store packing coordinates + * @return number of blocks that can be packed + */ + +/* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */ +#define MAX_A 21 +static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 x = o, u, l, m = 0; + *area = band_8; + + while (x + w < *area && m < n) { + /* current 8bit upper bound (a) is next 8bit lower bound (B) */ + l = u = (*area + x) >> 1; + + /* pack until upper bound */ + while (x + w <= u && m < n) { + /* save packing */ + BUG_ON(m + 1 >= MAX_A); + *p++ = x; + *p++ = l; + l = (*area + x + w + 1) >> 1; + x = ALIGN(x + w - o, a) + o; + m++; + } + x = ALIGN(l - o, a) + o; /* set new lower bound */ + } + return m; +} + +/* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */ +static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 m; + + /* this is a mirrored packing of method A */ + n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p); + + /* reverse packing */ + for (m = 0; m < n; m++) { + *p = *area - *p - w; + p++; + *p = *area - *p - ((w + 1) >> 1); + p++; + } + return n; +} + +/* Method B: simple layout: aAbcBdeCfgDhEFGH */ +#define MAX_B 8 +static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 e = (o + w) % a; /* end offset */ + u16 o1 = (o >> 1) % a; /* half offset */ + u16 e1 = ((o + w + 1) >> 1) % a; /* half end offset */ + u16 o2 = o1 + (a >> 2); /* 2nd half offset */ + u16 e2 = e1 + (a >> 2); /* 2nd half end offset */ + u16 m = 0; + *area = band_8; + + /* ensure 16-bit blocks don't overlap 8-bit blocks */ + + /* width cannot wrap around alignment, half block must be before block, + 2nd half can be before or after */ + if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e)) + while (o + w <= *area && m < n) { + BUG_ON(m + 1 >= MAX_B); + *p++ = o; + *p++ = o >> 1; + m++; + o += a; + } + return m; +} + +/* Method C: butterfly layout: AAbbaaBB */ +#define MAX_C 20 +static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + int m = 0; + u16 o2, e = ALIGN(w, a), i = 0, j = 0; + *area = band_8; + o2 = *area - (a - (o + w) % a) % a; /* end of last possible block */ + + m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1; + for (i = j = 0; i < m && j < n; i++, j++) { + BUG_ON(j + 1 >= MAX_C); + *p++ = o + i * e; + *p++ = (o + i * e + *area) >> 1; + if (++j < n) { + *p++ = o2 - i * e - w; + *p++ = (o2 - i * e - w) >> 1; + } + } + return j; +} + +/* Method D: for large allocation: aA or Aa */ +#define MAX_D 1 +static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 o1, w1 = (w + 1) >> 1, d; + *area = ALIGN(o + w, band_8); + + for (d = 0; n > 0 && d + o + w <= *area; d += a) { + /* try to fit 16-bit before 8-bit */ + o1 = ((o + d) % band_8) >> 1; + if (o1 + w1 <= o + d) { + *p++ = o + d; + *p++ = o1; + return 1; + } + + /* try to fit 16-bit after 8-bit */ + o1 += ALIGN(d + o + w - o1, band_16); + if (o1 + w1 <= *area) { + *p++ = o; + *p++ = o1; + return 1; + } + } + return 0; +} + +/** + * Umbrella nv12 packing method. This selects the best packings from the above + * methods. It also contains hardcoded packings for parameter combinations + * that have more efficient packings. This method provides is guaranteed to + * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large. + */ +#define MAX_ANY 21 /* must be MAX(method-MAX-s, hardcoded n-s) */ +static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing) +{ + u16 n_best, a_best, n2, a_, o_, w_; + + /* algo results (packings) */ + u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2]; + u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2]; + u8 pack_D[MAX_D * 2]; + + /* + * Hardcoded packings. They are sorted by increasing area, and then by + * decreasing n. We may not get the best efficiency if less than n + * blocks are needed as packings are not necessarily sorted in + * increasing order. However, for those n-s one of the other 4 methods + * may return the optimal packing. + */ + u8 packings[] = { + /* n=9, o=2, w=4, a=4, area=64 */ + 9, 2, 4, 4, 64, + /* 8-bit, 16-bit block coordinate pairs */ + 2, 33, 6, 35, 10, 37, 14, 39, 18, 41, + 46, 23, 50, 25, 54, 27, 58, 29, + /* o=0, w=12, a=4, n=3 */ + 3, 0, 12, 4, 64, + 0, 32, 12, 38, 48, 24, + /* end */ + 0 + }, *p = packings, *p_best = NULL, *p_end; + p_end = packings + sizeof(packings) - 1; + + /* see which method gives the best packing */ + + /* start with smallest area algorithms A, B & C, stop if we can + pack all buffers */ + n_best = nv12_A(o, a, w, n, area, pack_A); + p_best = pack_A; + if (n_best < n) { + n2 = nv12_revA(o, a, w, n, &a_best, pack_rA); + if (n2 > n_best) { + n_best = n2; + p_best = pack_rA; + *area = a_best; + } + } + if (n_best < n) { + n2 = nv12_B(o, a, w, n, &a_best, pack_B); + if (n2 > n_best) { + n_best = n2; + p_best = pack_B; + *area = a_best; + } + } + if (n_best < n) { + n2 = nv12_C(o, a, w, n, &a_best, pack_C); + if (n2 > n_best) { + n_best = n2; + p_best = pack_C; + *area = a_best; + } + } + + /* traverse any special packings */ + while (*p) { + n2 = *p++; + o_ = *p++; + w_ = *p++; + a_ = *p++; + /* stop if we already have a better packing */ + if (n2 < n_best) + break; + + /* check if this packing is satisfactory */ + if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) { + *area = *p++; + n_best = min(n2, n); + p_best = p; + break; + } + + /* skip to next packing */ + p += 1 + n2 * 2; + } + + /* + * If so far unsuccessful, check whether 8 and 16 bit blocks can be + * co-packed. This will actually be done in the end by the normal + * allocation, but we need to reserve a big-enough area. + */ + if (!n_best) { + n_best = nv12_D(o, a, w, n, area, pack_D); + p_best = NULL; + } + + /* store best packing */ + if (p_best && n_best) { + BUG_ON(n_best > MAX_ANY); + memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A)); + } + + return n_best; +} + +/* reserve nv12 blocks */ +static void reserve_nv12(u32 n, u32 width, u32 height, + u32 gid, struct process_info *pi) +{ + u16 w, h, band, a, o = 0; + struct gid_info *gi; + int res = 0, res2, i; + u16 n_t, n_s, area_t, area_s; + u8 packing[2 * MAX_ANY]; + struct list_head reserved = LIST_HEAD_INIT(reserved); + + /* Check input parameters for correctness, and support */ + if (!width || !height || !n || + n > ops->width * ops->height / 2) + return; + + /* calculate dimensions, band, and alignment in slots */ + if (ops->analize(TILFMT_8BIT, width, height, &w, &h, &band, &a)) + return; + + /* get group context */ + gi = ops->get_gi(pi, gid); + if (!gi) + return; + + /* reserve in groups until failed or all is reserved */ + for (i = 0; i < n && res >= 0; i += res) { + /* check packing separately vs together */ + n_s = nv12_separate(o, a, w, n - i, &area_s); + if (ops->nv12_packed) + n_t = nv12_together(o, a, w, n - i, &area_t, packing); + else + n_t = 0; + + /* pack based on better efficiency */ + res = -1; + if (!ops->nv12_packed || + nv12_eff(w, n_s, area_s, n - i) > + nv12_eff(w, n_t, area_t, n - i)) { + + /* + * Reserve blocks separately into a temporary list, so + * that we can free them if unsuccessful. We need to be + * able to reserve both 8- and 16-bit blocks as the + * offsets of them must match. + */ + res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, + gi, &reserved); + res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h, + band_16, a >> 1, gi, &reserved); + + if (res2 < 0 || res < 0 || res != res2) { + /* clean up */ + ops->release(&reserved); + res = -1; + } else { + /* add list to reserved */ + ops->add_reserved(&reserved, gi); + } + } + + /* if separate packing failed, still try to pack together */ + if (res < 0 && ops->nv12_packed && n_t) { + /* pack together */ + res = ops->lay_nv12(n_t, area_t, w, h, gi, packing); + } + } + + ops->release_gi(gi); +} + +/* initialize shared method pointers and global static variables */ +void tiler_nv12_init(struct tiler_ops *tiler) +{ + ops = tiler; + + ops->reserve_nv12 = reserve_nv12; + + band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w + / ops->geom(TILFMT_8BIT)->bpp; + band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w + / ops->geom(TILFMT_16BIT)->bpp; +} diff --git a/drivers/media/video/tiler/tiler-reserve.c b/drivers/media/video/tiler/tiler-reserve.c new file mode 100644 index 0000000..fbabc6d --- /dev/null +++ b/drivers/media/video/tiler/tiler-reserve.c @@ -0,0 +1,154 @@ +/* + * tiler-reserve.c + * + * TILER driver area reservation functions for TI TILER hardware block. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include "_tiler.h" + +static struct tiler_ops *ops; /* shared methods and variables */ + +/** + * Calculate the maximum number buffers that can be packed next to each other, + * and the area they occupy. This method is used for both 2D and NV12 packing. + * + * @author a0194118 (7/16/2010) + * + * @param o desired offset + * @param w width of one block (>0) + * @param a desired alignment + * @param b band width (each block must occupy the same number of bands) + * @param n pointer to the desired number of blocks to pack. It will be + * updated with the maximum number of blocks that can be packed. + * @param _area pointer to store total area needed + * + * @return packing efficiency (0-1024) + */ +u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area) +{ + u16 m = 0, max_n = *n; /* m is mostly n - 1 */ + u16 e = ALIGN(w, a); /* effective width of one block */ + u32 eff, best_eff = 0; /* best values */ + u16 stride = ALIGN(o + w, b); /* block stride */ + u16 area = stride; /* area needed (for m + 1 blocks) */ + + /* NOTE: block #m+1 occupies the range (o + m * e, o + m * e + w) */ + + /* see how many blocks we can pack */ + while (m < max_n && + /* blocks must fit in tiler container */ + o + m * e + w <= ops->width && + /* block stride must be correct */ + stride == ALIGN(area - o - m * e, b)) { + + m++; + eff = m * w * 1024 / area; + if (eff > best_eff) { + /* store packing for best efficiency & smallest area */ + best_eff = eff; + *n = m; + if (_area) + *_area = area; + } + /* update area */ + area = ALIGN(o + m * e + w, b); + } + + return best_eff; +} + +/** + * We also optimize packing regular 2D areas as the auto-packing may result in + * sub-optimal efficiency. This is most pronounced if the area is wider than + * half a PAGE_SIZE (e.g. 2048 in 8-bit mode, or 1024 in 16-bit mode). + */ + +/* reserve 2d blocks */ +static void reserve_blocks(u32 n, enum tiler_fmt fmt, u32 width, u32 height, + u32 gid, + struct process_info *pi) +{ + u32 bpt, res = 0, i; + u16 a, band, w, h, n_try; + struct gid_info *gi; + const struct tiler_geom *g; + + /* Check input parameters for correctness, and support */ + if (!width || !height || !n || + fmt < TILFMT_8BIT || fmt > TILFMT_32BIT) + return; + + /* tiler slot in bytes */ + g = ops->geom(fmt); + bpt = g->slot_w * g->bpp; + + /* + * For blocks narrower than half PAGE_SIZE the default allocation is + * sufficient. Also check for basic area info. + */ + if (width * g->bpp * 2 <= PAGE_SIZE || + ops->analize(fmt, width, height, &w, &h, &band, &a)) + return; + + /* get group id */ + gi = ops->get_gi(pi, gid); + if (!gi) + return; + + /* reserve in groups until failed or all is reserved */ + for (i = 0; i < n && res >= 0; i += res + 1) { + /* blocks to allocate in one area */ + n_try = min(n - i, ops->width); + tiler_best2pack(0, a, band, w, &n_try, NULL); + + res = -1; + while (n_try > 1) { + /* adjust res so we fail on 0 return value */ + res = ops->lay_2d(fmt, n_try, w, h, band, a, + gi, &gi->reserved) - 1; + if (res >= 0) + break; + + /* reduce n if failed to allocate area */ + n_try--; + } + } + /* keep reserved blocks even if failed to reserve all */ + + ops->release_gi(gi); +} + +/* unreserve blocks for a group id */ +static void unreserve_blocks(u32 gid, struct process_info *pi) +{ + struct gid_info *gi; + + gi = ops->get_gi(pi, gid); + if (!gi) + return; + + ops->release(&gi->reserved); + + ops->release_gi(gi); +} + +/* initialize shared method pointers and global static variables */ +void tiler_reserve_init(struct tiler_ops *tiler) +{ + ops = tiler; + + ops->reserve = reserve_blocks; + ops->unreserve = unreserve_blocks; +} diff --git a/drivers/media/video/tiler/tmm-pat.c b/drivers/media/video/tiler/tmm-pat.c new file mode 100644 index 0000000..2d902f9 --- /dev/null +++ b/drivers/media/video/tiler/tmm-pat.c @@ -0,0 +1,326 @@ +/* + * tmm-pat.c + * + * DMM driver support functions for TI TILER hardware block. + * + * Author: Lajos Molnar <molnar@ti.com>, David Sin <dsin@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ +#include <linux/init.h> +#include <linux/module.h> +#include <linux/mm.h> +#include <linux/mmzone.h> +#include <asm/cacheflush.h> +#include <linux/mutex.h> +#include <linux/list.h> +#include <linux/slab.h> + +#include "tmm.h" + +static int param_set_mem(const char *val, struct kernel_param *kp); + +/* Memory limit to cache free pages. TILER will eventually use this much */ +static u32 cache_limit = CONFIG_TILER_CACHE_LIMIT << 20; + +param_check_uint(cache, &cache_limit); +module_param_call(cache, param_set_mem, param_get_uint, &cache_limit, 0644); +__MODULE_PARM_TYPE(cache, "uint"); +MODULE_PARM_DESC(cache, "Cache free pages if total memory is under this limit"); + +/* global state - statically initialized */ +static LIST_HEAD(free_list); /* page cache: list of free pages */ +static u32 total_mem; /* total memory allocated (free & used) */ +static u32 refs; /* number of tmm_pat instances */ +static DEFINE_MUTEX(mtx); /* global mutex */ + +/* The page struct pointer and physical address of each page.*/ +struct mem { + struct list_head list; + struct page *pg; /* page struct */ + u32 pa; /* physical address */ +}; + +/* Used to keep track of mem per tmm_pat_get_pages call */ +struct fast { + struct list_head list; + struct mem **mem; /* array of page info */ + u32 *pa; /* array of physical addresses */ + u32 num; /* number of pages */ +}; + +/* TMM PAT private structure */ +struct dmm_mem { + struct list_head fast_list; + struct dmm *dmm; + u32 *dmac_va; /* coherent memory */ + u32 dmac_pa; /* phys.addr of coherent memory */ + struct page *dummy_pg; /* dummy page */ + u32 dummy_pa; /* phys.addr of dummy page */ +}; + +/* read mem values for a param */ +static int param_set_mem(const char *val, struct kernel_param *kp) +{ + u32 a; + char *p; + + /* must specify memory */ + if (!val) + return -EINVAL; + + /* parse value */ + a = memparse(val, &p); + if (p == val || *p) + return -EINVAL; + + /* store parsed value */ + *(uint *)kp->arg = a; + return 0; +} + +/** + * Frees pages in a fast structure. Moves pages to the free list if there + * are less pages used than max_to_keep. Otherwise, it frees the pages + */ +static void free_fast(struct fast *f) +{ + s32 i = 0; + + /* mutex is locked */ + for (i = 0; i < f->num; i++) { + if (total_mem < cache_limit) { + /* cache free page if under the limit */ + list_add(&f->mem[i]->list, &free_list); + } else { + /* otherwise, free */ + total_mem -= PAGE_SIZE; + __free_page(f->mem[i]->pg); + kfree(f->mem[i]); + } + } + kfree(f->pa); + kfree(f->mem); + /* remove only if element was added */ + if (f->list.next) + list_del(&f->list); + kfree(f); +} + +/* allocate and flush a page */ +static struct mem *alloc_mem(void) +{ + struct mem *m = kmalloc(sizeof(*m), GFP_KERNEL); + if (!m) + return NULL; + memset(m, 0, sizeof(*m)); + + m->pg = alloc_page(GFP_KERNEL | GFP_DMA); + if (!m->pg) { + kfree(m); + return NULL; + } + + m->pa = page_to_phys(m->pg); + + /* flush the cache entry for each page we allocate. */ + dmac_flush_range(page_address(m->pg), + page_address(m->pg) + PAGE_SIZE); + outer_flush_range(m->pa, m->pa + PAGE_SIZE); + + return m; +} + +static void free_page_cache(void) +{ + struct mem *m, *m_; + + /* mutex is locked */ + list_for_each_entry_safe(m, m_, &free_list, list) { + __free_page(m->pg); + total_mem -= PAGE_SIZE; + list_del(&m->list); + kfree(m); + } +} + +static void tmm_pat_deinit(struct tmm *tmm) +{ + struct fast *f, *f_; + struct dmm_mem *pvt = (struct dmm_mem *) tmm->pvt; + + mutex_lock(&mtx); + + /* free all outstanding used memory */ + list_for_each_entry_safe(f, f_, &pvt->fast_list, list) + free_fast(f); + + /* if this is the last tmm_pat, free all memory */ + if (--refs == 0) + free_page_cache(); + + __free_page(pvt->dummy_pg); + + mutex_unlock(&mtx); +} + +static u32 *tmm_pat_get_pages(struct tmm *tmm, u32 n) +{ + struct mem *m; + struct fast *f; + struct dmm_mem *pvt = (struct dmm_mem *) tmm->pvt; + + f = kmalloc(sizeof(*f), GFP_KERNEL); + if (!f) + return NULL; + memset(f, 0, sizeof(*f)); + + /* array of mem struct pointers */ + f->mem = kmalloc(n * sizeof(*f->mem), GFP_KERNEL); + + /* array of physical addresses */ + f->pa = kmalloc(n * sizeof(*f->pa), GFP_KERNEL); + + /* no pages have been allocated yet (needed for cleanup) */ + f->num = 0; + + if (!f->mem || !f->pa) + goto cleanup; + + memset(f->mem, 0, n * sizeof(*f->mem)); + memset(f->pa, 0, n * sizeof(*f->pa)); + + /* fill out fast struct mem array with free pages */ + mutex_lock(&mtx); + while (f->num < n) { + /* if there is a free cached page use it */ + if (!list_empty(&free_list)) { + /* unbind first element from list */ + m = list_first_entry(&free_list, typeof(*m), list); + list_del(&m->list); + } else { + mutex_unlock(&mtx); + + /** + * Unlock mutex during allocation and cache flushing. + */ + m = alloc_mem(); + if (!m) + goto cleanup; + + mutex_lock(&mtx); + total_mem += PAGE_SIZE; + } + + f->mem[f->num] = m; + f->pa[f->num++] = m->pa; + } + + list_add(&f->list, &pvt->fast_list); + mutex_unlock(&mtx); + return f->pa; + +cleanup: + free_fast(f); + return NULL; +} + +static void tmm_pat_free_pages(struct tmm *tmm, u32 *page_list) +{ + struct dmm_mem *pvt = (struct dmm_mem *) tmm->pvt; + struct fast *f, *f_; + + mutex_lock(&mtx); + /* find fast struct based on 1st page */ + list_for_each_entry_safe(f, f_, &pvt->fast_list, list) { + if (f->pa[0] == page_list[0]) { + free_fast(f); + break; + } + } + mutex_unlock(&mtx); +} + +static s32 tmm_pat_pin(struct tmm *tmm, struct pat_area area, u32 page_pa) +{ + struct dmm_mem *pvt = (struct dmm_mem *) tmm->pvt; + struct pat pat_desc = {0}; + + /* send pat descriptor to dmm driver */ + pat_desc.ctrl.dir = 0; + pat_desc.ctrl.ini = 0; + pat_desc.ctrl.lut_id = 0; + pat_desc.ctrl.start = 1; + pat_desc.ctrl.sync = 0; + pat_desc.area = area; + pat_desc.next = NULL; + + /* must be a 16-byte aligned physical address */ + pat_desc.data = page_pa; + return dmm_pat_refill(pvt->dmm, &pat_desc, MANUAL); +} + +static void tmm_pat_unpin(struct tmm *tmm, struct pat_area area) +{ + u16 w = (u8) area.x1 - (u8) area.x0; + u16 h = (u8) area.y1 - (u8) area.y0; + u16 i = (w + 1) * (h + 1); + struct dmm_mem *pvt = (struct dmm_mem *) tmm->pvt; + + while (i--) + pvt->dmac_va[i] = pvt->dummy_pa; + + tmm_pat_pin(tmm, area, pvt->dmac_pa); +} + +struct tmm *tmm_pat_init(u32 pat_id, u32 *dmac_va, u32 dmac_pa) +{ + struct tmm *tmm = NULL; + struct dmm_mem *pvt = NULL; + + struct dmm *dmm = dmm_pat_init(pat_id); + if (dmm) + tmm = kmalloc(sizeof(*tmm), GFP_KERNEL); + if (tmm) + pvt = kmalloc(sizeof(*pvt), GFP_KERNEL); + if (pvt) + pvt->dummy_pg = alloc_page(GFP_KERNEL | GFP_DMA); + if (pvt->dummy_pg) { + /* private data */ + pvt->dmm = dmm; + pvt->dmac_pa = dmac_pa; + pvt->dmac_va = dmac_va; + pvt->dummy_pa = page_to_phys(pvt->dummy_pg); + + INIT_LIST_HEAD(&pvt->fast_list); + + /* increate tmm_pat references */ + mutex_lock(&mtx); + refs++; + mutex_unlock(&mtx); + + /* public data */ + tmm->pvt = pvt; + tmm->deinit = tmm_pat_deinit; + tmm->get = tmm_pat_get_pages; + tmm->free = tmm_pat_free_pages; + tmm->pin = tmm_pat_pin; + tmm->unpin = tmm_pat_unpin; + + return tmm; + } + + kfree(pvt); + kfree(tmm); + dmm_pat_release(dmm); + return NULL; +} +EXPORT_SYMBOL(tmm_pat_init); diff --git a/drivers/media/video/tiler/tmm.h b/drivers/media/video/tiler/tmm.h new file mode 100644 index 0000000..dc1b5b3 --- /dev/null +++ b/drivers/media/video/tiler/tmm.h @@ -0,0 +1,130 @@ +/* + * tmm.h + * + * TMM interface definition for TI TILER driver. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2011 Texas Instruments, Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * * Neither the name of Texas Instruments Incorporated nor the names of + * its contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef TMM_H +#define TMM_H + +#include <mach/dmm.h> +/** + * TMM interface + */ +struct tmm { + void *pvt; + + /* function table */ + u32 *(*get) (struct tmm *tmm, u32 num_pages); + void (*free) (struct tmm *tmm, u32 *pages); + s32 (*pin) (struct tmm *tmm, struct pat_area area, u32 page_pa); + void (*unpin) (struct tmm *tmm, struct pat_area area); + void (*deinit) (struct tmm *tmm); +}; + +/** + * Request a set of pages from the DMM free page stack. + * @return a pointer to a list of physical page addresses. + */ +static inline +u32 *tmm_get(struct tmm *tmm, u32 num_pages) +{ + if (tmm && tmm->pvt) + return tmm->get(tmm, num_pages); + return NULL; +} + +/** + * Return a set of used pages to the DMM free page stack. + * @param list a pointer to a list of physical page addresses. + */ +static inline +void tmm_free(struct tmm *tmm, u32 *pages) +{ + if (tmm && tmm->pvt) + tmm->free(tmm, pages); +} + +/** + * Program the physical address translator. + * @param area PAT area + * @param list of pages + */ +static inline +s32 tmm_pin(struct tmm *tmm, struct pat_area area, u32 page_pa) +{ + if (tmm && tmm->pin && tmm->pvt) + return tmm->pin(tmm, area, page_pa); + return -ENODEV; +} + +/** + * Clears the physical address translator. + * @param area PAT area + */ +static inline +void tmm_unpin(struct tmm *tmm, struct pat_area area) +{ + if (tmm && tmm->unpin && tmm->pvt) + tmm->unpin(tmm, area); +} + +/** + * Checks whether tiler memory manager supports mapping + */ +static inline +bool tmm_can_pin(struct tmm *tmm) +{ + return tmm && tmm->pin; +} + +/** + * Deinitialize tiler memory manager + */ +static inline +void tmm_deinit(struct tmm *tmm) +{ + if (tmm && tmm->pvt) + tmm->deinit(tmm); +} + +/** + * TMM implementation for PAT support. + * + * Initialize TMM for PAT with given id. + */ +struct tmm *tmm_pat_init(u32 pat_id, u32 *dmac_va, u32 dmac_pa); + +#endif |