diff options
author | Andy Gross <andy.gross@ti.com> | 2011-06-07 22:15:55 -0500 |
---|---|---|
committer | Suman Anna <s-anna@ti.com> | 2011-06-17 22:39:57 -0500 |
commit | 1c1a6d48cc61b034dbcac31bfeb2930ebb6a373b (patch) | |
tree | 72f506bbdd0e7139526a856fb65fcf1bd794b38b | |
parent | 4e3f43b498be6c96026c85577f05fd5add6e3b41 (diff) | |
download | kernel_samsung_tuna-1c1a6d48cc61b034dbcac31bfeb2930ebb6a373b.zip kernel_samsung_tuna-1c1a6d48cc61b034dbcac31bfeb2930ebb6a373b.tar.gz kernel_samsung_tuna-1c1a6d48cc61b034dbcac31bfeb2930ebb6a373b.tar.bz2 |
TILER: Make tiler nv12 support a configuration option
The tiler driver now allows for configuring the nv12
support as a kernel configuration option. If enabled,
nv12 support will be compiled into the driver.
Change-Id: Ie51517f9fa943af086314caf60ecad5dec52fdb7
Signed-off-by: Andy Gross <andy.gross@ti.com>
-rw-r--r-- | drivers/media/video/tiler/Kconfig | 10 | ||||
-rw-r--r-- | drivers/media/video/tiler/Makefile | 4 | ||||
-rw-r--r-- | drivers/media/video/tiler/_tiler.h | 9 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-iface.c | 6 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-main.c | 9 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-nv12.c | 423 | ||||
-rw-r--r-- | drivers/media/video/tiler/tiler-reserve.c | 397 |
7 files changed, 460 insertions, 398 deletions
diff --git a/drivers/media/video/tiler/Kconfig b/drivers/media/video/tiler/Kconfig index 00461eb..65bdf54 100644 --- a/drivers/media/video/tiler/Kconfig +++ b/drivers/media/video/tiler/Kconfig @@ -124,3 +124,13 @@ config TILER_EXPOSE_SSPTR You can use this flag to see if the userspace is relying on having access to the SSPtr. + +config TILER_ENABLE_NV12 + bool "Enable NV12 support" + default y + depends on TI_TILER + help + This option enables NV12 functionality in the TILER driver. + + If set, nv12 support will be compiled into the driver and APIs + will be enabled. diff --git a/drivers/media/video/tiler/Makefile b/drivers/media/video/tiler/Makefile index aeb0f05..ad2dfa2 100644 --- a/drivers/media/video/tiler/Makefile +++ b/drivers/media/video/tiler/Makefile @@ -3,5 +3,9 @@ obj-$(CONFIG_TI_TILER) += tcm/ obj-$(CONFIG_TI_TILER) += tiler.o tiler-objs = tiler-geom.o tiler-main.o tiler-iface.o tiler-reserve.o tmm-pat.o +ifdef CONFIG_TILER_ENABLE_NV12 +tiler-objs += tiler-nv12.o +endif + obj-$(CONFIG_TI_TILER) += tiler_dmm.o tiler_dmm-objs = dmm.o diff --git a/drivers/media/video/tiler/_tiler.h b/drivers/media/video/tiler/_tiler.h index d23ba43..372b2b1 100644 --- a/drivers/media/video/tiler/_tiler.h +++ b/drivers/media/video/tiler/_tiler.h @@ -125,8 +125,10 @@ struct tiler_ops { s32 (*lay_2d) (enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band, u16 align, u16 offs, struct gid_info *gi, struct list_head *pos); +#ifdef CONFIG_TILER_ENABLE_NV12 s32 (*lay_nv12) (int n, u16 w, u16 w1, u16 h, struct gid_info *gi, - u8 *p); + u8 *p); +#endif /* group operations */ struct gid_info * (*get_gi) (struct process_info *pi, u32 gid); void (*release_gi) (struct gid_info *gi); @@ -151,8 +153,9 @@ struct tiler_ops { /* additional info */ const struct file_operations *fops; - +#ifdef CONFIG_TILER_ENABLE_NV12 bool nv12_packed; /* whether NV12 is packed into same container */ +#endif u32 page; /* page size */ u32 width; /* container width */ u32 height; /* container height */ @@ -161,6 +164,8 @@ struct tiler_ops { void tiler_iface_init(struct tiler_ops *tiler); void tiler_geom_init(struct tiler_ops *tiler); void tiler_reserve_init(struct tiler_ops *tiler); +void tiler_nv12_init(struct tiler_ops *tiler); +u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area); struct process_info *__get_pi(pid_t pid, bool kernel); diff --git a/drivers/media/video/tiler/tiler-iface.c b/drivers/media/video/tiler/tiler-iface.c index c96da83..44caafd 100644 --- a/drivers/media/video/tiler/tiler-iface.c +++ b/drivers/media/video/tiler/tiler-iface.c @@ -505,12 +505,16 @@ static long tiler_ioctl(struct file *filp, u32 cmd, unsigned long arg) return -EFAULT; if (block_info.fmt == TILFMT_8AND16) +#ifdef CONFIG_TILER_ENABLE_NV12 ops->reserve_nv12(block_info.key, block_info.dim.area.width, block_info.dim.area.height, block_info.align, block_info.offs, block_info.group_id, pi); +#else + return -EINVAL; +#endif else ops->reserve(block_info.key, block_info.fmt, @@ -672,6 +676,7 @@ void tiler_reserve(u32 n, enum tiler_fmt fmt, u32 width, u32 height, } EXPORT_SYMBOL(tiler_reserve); +#ifdef CONFIG_TILER_ENABLE_NV12 void tiler_reservex_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, u32 gid, pid_t pid) { @@ -687,6 +692,7 @@ void tiler_reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs) tiler_reservex_nv12(n, width, height, align, offs, 0, current->tgid); } EXPORT_SYMBOL(tiler_reserve_nv12); +#endif s32 tiler_allocx(struct tiler_block_t *blk, enum tiler_fmt fmt, u32 align, u32 offs, u32 gid, pid_t pid) diff --git a/drivers/media/video/tiler/tiler-main.c b/drivers/media/video/tiler/tiler-main.c index bffd8cc..23d130f 100644 --- a/drivers/media/video/tiler/tiler-main.c +++ b/drivers/media/video/tiler/tiler-main.c @@ -513,6 +513,7 @@ static s32 lay_2d(enum tiler_fmt fmt, u16 n, u16 w, u16 h, u16 band, return n; } +#ifdef CONFIG_TILER_ENABLE_NV12 /* layout reserved nv12 blocks in a larger area */ /* NOTE: area w(idth), w1 (8-bit block width), h(eight) are in slots */ /* p is a pointer to a packing description, which is a list of offsets in @@ -558,6 +559,7 @@ static s32 lay_nv12(int n, u16 w, u16 w1, u16 h, struct gid_info *gi, u8 *p) mutex_unlock(&mtx); return n; } +#endif static void _m_unpin(struct mem_info *mi) { @@ -1221,7 +1223,9 @@ static s32 __init tiler_init(void) tiler.lock = find_n_lock; tiler.unlock_free = unlock_n_free; tiler.lay_2d = lay_2d; +#ifdef CONFIG_TILER_ENABLE_NV12 tiler.lay_nv12 = lay_nv12; +#endif tiler.destroy_group = destroy_group; tiler.lock_by_ssptr = find_block_by_ssptr; tiler.describe = fill_block_info; @@ -1233,6 +1237,9 @@ static s32 __init tiler_init(void) tiler_geom_init(&tiler); tiler_reserve_init(&tiler); tiler_iface_init(&tiler); +#ifdef CONFIG_TILER_ENABLE_NV12 + tiler_nv12_init(&tiler); +#endif /* check module parameters for correctness */ if (default_align > PAGE_SIZE || @@ -1272,7 +1279,9 @@ static s32 __init tiler_init(void) area.y1 = tiler.height - 1; tmm_unpin(tmm_pat, area); +#ifdef CONFIG_TILER_ENABLE_NV12 tiler.nv12_packed = tcm[TILFMT_8BIT] == tcm[TILFMT_16BIT]; +#endif tiler_device = kmalloc(sizeof(*tiler_device), GFP_KERNEL); if (!tiler_device || !sita || !tmm_pat) { diff --git a/drivers/media/video/tiler/tiler-nv12.c b/drivers/media/video/tiler/tiler-nv12.c new file mode 100644 index 0000000..c16a140 --- /dev/null +++ b/drivers/media/video/tiler/tiler-nv12.c @@ -0,0 +1,423 @@ +/* + * tiler-nv12.c + * + * TILER driver NV12 area reservation functions for TI TILER hardware block. + * + * Author: Lajos Molnar <molnar@ti.com> + * + * Copyright (C) 2009-2010 Texas Instruments, Inc. + * + * This package is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * THIS PACKAGE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED + * WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +#include "_tiler.h" + +static struct tiler_ops *ops; /* shared methods and variables */ +static int band_8; +static int band_16; + +/* + * NV12 Reservation Functions + * + * TILER is designed so that a (w * h) * 8bit area is twice as wide as a + * (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit + * blocks is a common usecase for TILER, we optimize packing these into a + * TILER area. + * + * During reservation we want to find the most effective packing (most used area + * in the smallest overall area) + * + * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit + * blocks into separate container areas, or pack them together into same area. + */ + +/** + * Calculate effectiveness of packing. We weight total area much higher than + * packing efficiency to get the smallest overall container use. + * + * @param w width of one (8-bit) block + * @param n buffers in a packing + * @param area width of packing area + * @param n_total total number of buffers to be packed + * @return effectiveness, the higher the better + */ +static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total) +{ + return 0x10000000 - + /* weigh against total area needed (for all buffers) */ + /* 64-slots = -2048 */ + DIV_ROUND_UP(n_total, n) * area * 32 + + /* packing efficiency (0 - 1024) */ + 1024 * n * ((w * 3 + 1) >> 1) / area; +} + +/** + * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate + * areas. + * + * @author a0194118 (7/16/2010) + * + * @param o desired offset (<a) + * @param a desired alignment (>=2) + * @param w block width (>0) + * @param n number of blocks desired + * @param area pointer to store total area needed + * + * @return number of blocks that can be allocated + */ +static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area) +{ + tiler_best2pack(o, a, band_8, w, &n, area); + tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area); + *area *= 3; + return n; +} + +/* + * Specialized NV12 Reservation Algorithms + * + * We use 4 packing methods that pack nv12 blocks into the same area. Together + * these 4 methods give the optimal result for most possible input parameters. + * + * For now we pack into a 64-slot area, so that we don't have to worry about + * stride issues (all blocks get 4K stride). For some of the algorithms this + * could be true even if the area was 128. + */ + +/** + * Packing types are marked using a letter sequence, capital letters denoting + * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks. + * + * All methods have the following parameters. They also define the maximum + * number of coordinates that could potentially be packed. + * + * @param o, a, w, n offset, alignment, width, # of blocks as usual + * @param area pointer to store area needed for packing + * @param p pointer to store packing coordinates + * @return number of blocks that can be packed + */ + +/* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */ +#define MAX_A 21 +static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 x = o, u, l, m = 0; + *area = band_8; + + while (x + w < *area && m < n) { + /* current 8bit upper bound (a) is next 8bit lower bound (B) */ + l = u = (*area + x) >> 1; + + /* pack until upper bound */ + while (x + w <= u && m < n) { + /* save packing */ + BUG_ON(m + 1 >= MAX_A); + *p++ = x; + *p++ = l; + l = (*area + x + w + 1) >> 1; + x = ALIGN(x + w - o, a) + o; + m++; + } + x = ALIGN(l - o, a) + o; /* set new lower bound */ + } + return m; +} + +/* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */ +static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 m; + + /* this is a mirrored packing of method A */ + n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p); + + /* reverse packing */ + for (m = 0; m < n; m++) { + *p = *area - *p - w; + p++; + *p = *area - *p - ((w + 1) >> 1); + p++; + } + return n; +} + +/* Method B: simple layout: aAbcBdeCfgDhEFGH */ +#define MAX_B 8 +static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 e = (o + w) % a; /* end offset */ + u16 o1 = (o >> 1) % a; /* half offset */ + u16 e1 = ((o + w + 1) >> 1) % a; /* half end offset */ + u16 o2 = o1 + (a >> 2); /* 2nd half offset */ + u16 e2 = e1 + (a >> 2); /* 2nd half end offset */ + u16 m = 0; + *area = band_8; + + /* ensure 16-bit blocks don't overlap 8-bit blocks */ + + /* width cannot wrap around alignment, half block must be before block, + 2nd half can be before or after */ + if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e)) + while (o + w <= *area && m < n) { + BUG_ON(m + 1 >= MAX_B); + *p++ = o; + *p++ = o >> 1; + m++; + o += a; + } + return m; +} + +/* Method C: butterfly layout: AAbbaaBB */ +#define MAX_C 20 +static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + int m = 0; + u16 o2, e = ALIGN(w, a), i = 0, j = 0; + *area = band_8; + o2 = *area - (a - (o + w) % a) % a; /* end of last possible block */ + + m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1; + for (i = j = 0; i < m && j < n; i++, j++) { + BUG_ON(j + 1 >= MAX_C); + *p++ = o + i * e; + *p++ = (o + i * e + *area) >> 1; + if (++j < n) { + *p++ = o2 - i * e - w; + *p++ = (o2 - i * e - w) >> 1; + } + } + return j; +} + +/* Method D: for large allocation: aA or Aa */ +#define MAX_D 1 +static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) +{ + u16 o1, w1 = (w + 1) >> 1, d; + *area = ALIGN(o + w, band_8); + + for (d = 0; n > 0 && d + o + w <= *area; d += a) { + /* try to fit 16-bit before 8-bit */ + o1 = ((o + d) % band_8) >> 1; + if (o1 + w1 <= o + d) { + *p++ = o + d; + *p++ = o1; + return 1; + } + + /* try to fit 16-bit after 8-bit */ + o1 += ALIGN(d + o + w - o1, band_16); + if (o1 + w1 <= *area) { + *p++ = o; + *p++ = o1; + return 1; + } + } + return 0; +} + +/** + * Umbrella nv12 packing method. This selects the best packings from the above + * methods. It also contains hardcoded packings for parameter combinations + * that have more efficient packings. This method provides is guaranteed to + * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large. + */ +#define MAX_ANY 21 /* must be MAX(method-MAX-s, hardcoded n-s) */ +static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing) +{ + u16 n_best, a_best, n2, a_, o_, w_; + + /* algo results (packings) */ + u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2]; + u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2]; + u8 pack_D[MAX_D * 2]; + + /* + * Hardcoded packings. They are sorted by increasing area, and then by + * decreasing n. We may not get the best efficiency if less than n + * blocks are needed as packings are not necessarily sorted in + * increasing order. However, for those n-s one of the other 4 methods + * may return the optimal packing. + */ + u8 packings[] = { + /* n=9, o=2, w=4, a=4, area=64 */ + 9, 2, 4, 4, 64, + /* 8-bit, 16-bit block coordinate pairs */ + 2, 33, 6, 35, 10, 37, 14, 39, 18, 41, + 46, 23, 50, 25, 54, 27, 58, 29, + /* o=0, w=12, a=4, n=3 */ + 3, 0, 12, 4, 64, + 0, 32, 12, 38, 48, 24, + /* end */ + 0 + }, *p = packings, *p_best = NULL, *p_end; + p_end = packings + sizeof(packings) - 1; + + /* see which method gives the best packing */ + + /* start with smallest area algorithms A, B & C, stop if we can + pack all buffers */ + n_best = nv12_A(o, a, w, n, area, pack_A); + p_best = pack_A; + if (n_best < n) { + n2 = nv12_revA(o, a, w, n, &a_best, pack_rA); + if (n2 > n_best) { + n_best = n2; + p_best = pack_rA; + *area = a_best; + } + } + if (n_best < n) { + n2 = nv12_B(o, a, w, n, &a_best, pack_B); + if (n2 > n_best) { + n_best = n2; + p_best = pack_B; + *area = a_best; + } + } + if (n_best < n) { + n2 = nv12_C(o, a, w, n, &a_best, pack_C); + if (n2 > n_best) { + n_best = n2; + p_best = pack_C; + *area = a_best; + } + } + + /* traverse any special packings */ + while (*p) { + n2 = *p++; + o_ = *p++; + w_ = *p++; + a_ = *p++; + /* stop if we already have a better packing */ + if (n2 < n_best) + break; + + /* check if this packing is satisfactory */ + if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) { + *area = *p++; + n_best = min(n2, n); + p_best = p; + break; + } + + /* skip to next packing */ + p += 1 + n2 * 2; + } + + /* + * If so far unsuccessful, check whether 8 and 16 bit blocks can be + * co-packed. This will actually be done in the end by the normal + * allocation, but we need to reserve a big-enough area. + */ + if (!n_best) { + n_best = nv12_D(o, a, w, n, area, pack_D); + p_best = NULL; + } + + /* store best packing */ + if (p_best && n_best) { + BUG_ON(n_best > MAX_ANY); + memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A)); + } + + return n_best; +} + +/* reserve nv12 blocks */ +static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, + u32 gid, struct process_info *pi) +{ + u16 w, h, band, a = align, o = offs; + struct gid_info *gi; + int res = 0, res2, i; + u16 n_t, n_s, area_t, area_s; + u8 packing[2 * MAX_ANY]; + struct list_head reserved = LIST_HEAD_INIT(reserved); + + /* adjust alignment to the largest slot width (128 bytes) */ + a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a); + + /* Check input parameters for correctness, and support */ + if (!width || !height || !n || + offs >= align || offs & 1 || + align >= PAGE_SIZE || + n > ops->width * ops->height / 2) + return; + + /* calculate dimensions, band, offs and alignment in slots */ + if (ops->analize(TILFMT_8BIT, width, height, &w, &h, &band, &a, &o, + NULL)) + return; + + /* get group context */ + gi = ops->get_gi(pi, gid); + if (!gi) + return; + + /* reserve in groups until failed or all is reserved */ + for (i = 0; i < n && res >= 0; i += res) { + /* check packing separately vs together */ + n_s = nv12_separate(o, a, w, n - i, &area_s); + if (ops->nv12_packed) + n_t = nv12_together(o, a, w, n - i, &area_t, packing); + else + n_t = 0; + + /* pack based on better efficiency */ + res = -1; + if (!ops->nv12_packed || + nv12_eff(w, n_s, area_s, n - i) > + nv12_eff(w, n_t, area_t, n - i)) { + + /* + * Reserve blocks separately into a temporary list, so + * that we can free them if unsuccessful. We need to be + * able to reserve both 8- and 16-bit blocks as the + * offsets of them must match. + */ + res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o, + gi, &reserved); + res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h, + band_16, a >> 1, o >> 1, gi, &reserved); + + if (res2 < 0 || res < 0 || res != res2) { + /* clean up */ + ops->release(&reserved); + res = -1; + } else { + /* add list to reserved */ + ops->add_reserved(&reserved, gi); + } + } + + /* if separate packing failed, still try to pack together */ + if (res < 0 && ops->nv12_packed && n_t) { + /* pack together */ + res = ops->lay_nv12(n_t, area_t, w, h, gi, packing); + } + } + + ops->release_gi(gi); +} + +/* initialize shared method pointers and global static variables */ +void tiler_nv12_init(struct tiler_ops *tiler) +{ + ops = tiler; + + ops->reserve_nv12 = reserve_nv12; + + band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w + / ops->geom(TILFMT_8BIT)->bpp; + band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w + / ops->geom(TILFMT_16BIT)->bpp; +} diff --git a/drivers/media/video/tiler/tiler-reserve.c b/drivers/media/video/tiler/tiler-reserve.c index 6715d3d..770fb07 100644 --- a/drivers/media/video/tiler/tiler-reserve.c +++ b/drivers/media/video/tiler/tiler-reserve.c @@ -19,8 +19,6 @@ #include "_tiler.h" static struct tiler_ops *ops; /* shared methods and variables */ -static int band_8; /* size of 8-bit band in slots */ -static int band_16; /* size of 16-bit band in slots */ /** * Calculate the maximum number buffers that can be packed next to each other, @@ -38,7 +36,7 @@ static int band_16; /* size of 16-bit band in slots */ * * @return packing efficiency (0-1024) */ -static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area) +u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area) { u16 m = 0, max_n = *n; /* m is mostly n - 1 */ u16 e = ALIGN(w, a); /* effective width of one block */ @@ -71,393 +69,6 @@ static u32 tiler_best2pack(u16 o, u16 a, u16 b, u16 w, u16 *n, u16 *_area) return best_eff; } -/* - * NV12 Reservation Functions - * - * TILER is designed so that a (w * h) * 8bit area is twice as wide as a - * (w/2 * h/2) * 16bit area. Since having pairs of such 8-bit and 16-bit - * blocks is a common usecase for TILER, we optimize packing these into a - * TILER area. - * - * During reservation we want to find the most effective packing (most used area - * in the smallest overall area) - * - * We have two algorithms for packing nv12 blocks: either pack 8- and 16-bit - * blocks into separate container areas, or pack them together into same area. - */ - -/** - * Calculate effectiveness of packing. We weight total area much higher than - * packing efficiency to get the smallest overall container use. - * - * @param w width of one (8-bit) block - * @param n buffers in a packing - * @param area width of packing area - * @param n_total total number of buffers to be packed - * @return effectiveness, the higher the better - */ -static inline u32 nv12_eff(u16 w, u16 n, u16 area, u16 n_total) -{ - return 0x10000000 - - /* weigh against total area needed (for all buffers) */ - /* 64-slots = -2048 */ - DIV_ROUND_UP(n_total, n) * area * 32 + - /* packing efficiency (0 - 1024) */ - 1024 * n * ((w * 3 + 1) >> 1) / area; -} - -/** - * Fallback nv12 packing algorithm: pack 8 and 16 bit block into separate - * areas. - * - * @author a0194118 (7/16/2010) - * - * @param o desired offset (<a) - * @param a desired alignment (>=2) - * @param w block width (>0) - * @param n number of blocks desired - * @param area pointer to store total area needed - * - * @return number of blocks that can be allocated - */ -static u16 nv12_separate(u16 o, u16 a, u16 w, u16 n, u16 *area) -{ - tiler_best2pack(o, a, band_8, w, &n, area); - tiler_best2pack(o >> 1, a >> 1, band_16, (w + 1) >> 1, &n, area); - *area *= 3; - return n; -} - -/* - * Specialized NV12 Reservation Algorithms - * - * We use 4 packing methods that pack nv12 blocks into the same area. Together - * these 4 methods give the optimal result for most possible input parameters. - * - * For now we pack into a 64-slot area, so that we don't have to worry about - * stride issues (all blocks get 4K stride). For some of the algorithms this - * could be true even if the area was 128. - */ - -/** - * Packing types are marked using a letter sequence, capital letters denoting - * 8-bit blocks, lower case letters denoting corresponding 16-bit blocks. - * - * All methods have the following parameters. They also define the maximum - * number of coordinates that could potentially be packed. - * - * @param o, a, w, n offset, alignment, width, # of blocks as usual - * @param area pointer to store area needed for packing - * @param p pointer to store packing coordinates - * @return number of blocks that can be packed - */ - -/* Method A: progressive packing: AAAAaaaaBBbbCc into 64-slot area */ -#define MAX_A 21 -static int nv12_A(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) -{ - u16 x = o, u, l, m = 0; - *area = band_8; - - while (x + w < *area && m < n) { - /* current 8bit upper bound (a) is next 8bit lower bound (B) */ - l = u = (*area + x) >> 1; - - /* pack until upper bound */ - while (x + w <= u && m < n) { - /* save packing */ - BUG_ON(m + 1 >= MAX_A); - *p++ = x; - *p++ = l; - l = (*area + x + w + 1) >> 1; - x = ALIGN(x + w - o, a) + o; - m++; - } - x = ALIGN(l - o, a) + o; /* set new lower bound */ - } - return m; -} - -/* Method -A: regressive packing: cCbbBBaaaaAAAA into 64-slot area */ -static int nv12_revA(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) -{ - u16 m; - - /* this is a mirrored packing of method A */ - n = nv12_A((a - (o + w) % a) % a, a, w, n, area, p); - - /* reverse packing */ - for (m = 0; m < n; m++) { - *p = *area - *p - w; - p++; - *p = *area - *p - ((w + 1) >> 1); - p++; - } - return n; -} - -/* Method B: simple layout: aAbcBdeCfgDhEFGH */ -#define MAX_B 8 -static int nv12_B(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) -{ - u16 e = (o + w) % a; /* end offset */ - u16 o1 = (o >> 1) % a; /* half offset */ - u16 e1 = ((o + w + 1) >> 1) % a; /* half end offset */ - u16 o2 = o1 + (a >> 2); /* 2nd half offset */ - u16 e2 = e1 + (a >> 2); /* 2nd half end offset */ - u16 m = 0; - *area = band_8; - - /* ensure 16-bit blocks don't overlap 8-bit blocks */ - - /* width cannot wrap around alignment, half block must be before block, - 2nd half can be before or after */ - if (w < a && o < e && e1 <= o && (e2 <= o || o2 >= e)) - while (o + w <= *area && m < n) { - BUG_ON(m + 1 >= MAX_B); - *p++ = o; - *p++ = o >> 1; - m++; - o += a; - } - return m; -} - -/* Method C: butterfly layout: AAbbaaBB */ -#define MAX_C 20 -static int nv12_C(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) -{ - int m = 0; - u16 o2, e = ALIGN(w, a), i = 0, j = 0; - *area = band_8; - o2 = *area - (a - (o + w) % a) % a; /* end of last possible block */ - - m = (min(o2 - 2 * o, 2 * o2 - o - *area) / 3 - w) / e + 1; - for (i = j = 0; i < m && j < n; i++, j++) { - BUG_ON(j + 1 >= MAX_C); - *p++ = o + i * e; - *p++ = (o + i * e + *area) >> 1; - if (++j < n) { - *p++ = o2 - i * e - w; - *p++ = (o2 - i * e - w) >> 1; - } - } - return j; -} - -/* Method D: for large allocation: aA or Aa */ -#define MAX_D 1 -static int nv12_D(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *p) -{ - u16 o1, w1 = (w + 1) >> 1, d; - *area = ALIGN(o + w, band_8); - - for (d = 0; n > 0 && d + o + w <= *area; d += a) { - /* try to fit 16-bit before 8-bit */ - o1 = ((o + d) % band_8) >> 1; - if (o1 + w1 <= o + d) { - *p++ = o + d; - *p++ = o1; - return 1; - } - - /* try to fit 16-bit after 8-bit */ - o1 += ALIGN(d + o + w - o1, band_16); - if (o1 + w1 <= *area) { - *p++ = o; - *p++ = o1; - return 1; - } - } - return 0; -} - -/** - * Umbrella nv12 packing method. This selects the best packings from the above - * methods. It also contains hardcoded packings for parameter combinations - * that have more efficient packings. This method provides is guaranteed to - * provide the optimal packing if 2 <= a <= 64 and w <= 64 and n is large. - */ -#define MAX_ANY 21 /* must be MAX(method-MAX-s, hardcoded n-s) */ -static u16 nv12_together(u16 o, u16 a, u16 w, u16 n, u16 *area, u8 *packing) -{ - u16 n_best, a_best, n2, a_, o_, w_; - - /* algo results (packings) */ - u8 pack_A[MAX_A * 2], pack_rA[MAX_A * 2]; - u8 pack_B[MAX_B * 2], pack_C[MAX_C * 2]; - u8 pack_D[MAX_D * 2]; - - /* - * Hardcoded packings. They are sorted by increasing area, and then by - * decreasing n. We may not get the best efficiency if less than n - * blocks are needed as packings are not necessarily sorted in - * increasing order. However, for those n-s one of the other 4 methods - * may return the optimal packing. - */ - u8 packings[] = { - /* n=9, o=2, w=4, a=4, area=64 */ - 9, 2, 4, 4, 64, - /* 8-bit, 16-bit block coordinate pairs */ - 2, 33, 6, 35, 10, 37, 14, 39, 18, 41, - 46, 23, 50, 25, 54, 27, 58, 29, - /* o=0, w=12, a=4, n=3 */ - 3, 0, 12, 4, 64, - 0, 32, 12, 38, 48, 24, - /* end */ - 0 - }, *p = packings, *p_best = NULL, *p_end; - p_end = packings + sizeof(packings) - 1; - - /* see which method gives the best packing */ - - /* start with smallest area algorithms A, B & C, stop if we can - pack all buffers */ - n_best = nv12_A(o, a, w, n, area, pack_A); - p_best = pack_A; - if (n_best < n) { - n2 = nv12_revA(o, a, w, n, &a_best, pack_rA); - if (n2 > n_best) { - n_best = n2; - p_best = pack_rA; - *area = a_best; - } - } - if (n_best < n) { - n2 = nv12_B(o, a, w, n, &a_best, pack_B); - if (n2 > n_best) { - n_best = n2; - p_best = pack_B; - *area = a_best; - } - } - if (n_best < n) { - n2 = nv12_C(o, a, w, n, &a_best, pack_C); - if (n2 > n_best) { - n_best = n2; - p_best = pack_C; - *area = a_best; - } - } - - /* traverse any special packings */ - while (*p) { - n2 = *p++; - o_ = *p++; - w_ = *p++; - a_ = *p++; - /* stop if we already have a better packing */ - if (n2 < n_best) - break; - - /* check if this packing is satisfactory */ - if (a_ >= a && o + w + ALIGN(o_ - o, a) <= o_ + w_) { - *area = *p++; - n_best = min(n2, n); - p_best = p; - break; - } - - /* skip to next packing */ - p += 1 + n2 * 2; - } - - /* - * If so far unsuccessful, check whether 8 and 16 bit blocks can be - * co-packed. This will actually be done in the end by the normal - * allocation, but we need to reserve a big-enough area. - */ - if (!n_best) { - n_best = nv12_D(o, a, w, n, area, pack_D); - p_best = NULL; - } - - /* store best packing */ - if (p_best && n_best) { - BUG_ON(n_best > MAX_ANY); - memcpy(packing, p_best, n_best * 2 * sizeof(*pack_A)); - } - - return n_best; -} - -/* reserve nv12 blocks */ -static void reserve_nv12(u32 n, u32 width, u32 height, u32 align, u32 offs, - u32 gid, struct process_info *pi) -{ - u16 w, h, band, a = align, o = offs; - struct gid_info *gi; - int res = 0, res2, i; - u16 n_t, n_s, area_t, area_s; - u8 packing[2 * MAX_ANY]; - struct list_head reserved = LIST_HEAD_INIT(reserved); - - /* adjust alignment to the largest slot width (128 bytes) */ - a = max_t(u16, PAGE_SIZE / min(band_8, band_16), a); - - /* Check input parameters for correctness, and support */ - if (!width || !height || !n || - offs >= align || offs & 1 || - align >= PAGE_SIZE || - n > ops->width * ops->height / 2) - return; - - /* calculate dimensions, band, offs and alignment in slots */ - if (ops->analize(TILFMT_8BIT, width, height, &w, &h, &band, &a, &o, - NULL)) - return; - - /* get group context */ - gi = ops->get_gi(pi, gid); - if (!gi) - return; - - /* reserve in groups until failed or all is reserved */ - for (i = 0; i < n && res >= 0; i += res) { - /* check packing separately vs together */ - n_s = nv12_separate(o, a, w, n - i, &area_s); - if (ops->nv12_packed) - n_t = nv12_together(o, a, w, n - i, &area_t, packing); - else - n_t = 0; - - /* pack based on better efficiency */ - res = -1; - if (!ops->nv12_packed || - nv12_eff(w, n_s, area_s, n - i) > - nv12_eff(w, n_t, area_t, n - i)) { - - /* - * Reserve blocks separately into a temporary list, so - * that we can free them if unsuccessful. We need to be - * able to reserve both 8- and 16-bit blocks as the - * offsets of them must match. - */ - res = ops->lay_2d(TILFMT_8BIT, n_s, w, h, band_8, a, o, - gi, &reserved); - res2 = ops->lay_2d(TILFMT_16BIT, n_s, (w + 1) >> 1, h, - band_16, a >> 1, o >> 1, gi, &reserved); - - if (res2 < 0 || res < 0 || res != res2) { - /* clean up */ - ops->release(&reserved); - res = -1; - } else { - /* add list to reserved */ - ops->add_reserved(&reserved, gi); - } - } - - /* if separate packing failed, still try to pack together */ - if (res < 0 && ops->nv12_packed && n_t) { - /* pack together */ - res = ops->lay_nv12(n_t, area_t, w, h, gi, packing); - } - } - - ops->release_gi(gi); -} - /** * We also optimize packing regular 2D areas as the auto-packing may result in * sub-optimal efficiency. This is most pronounced if the area is wider than @@ -539,12 +150,6 @@ void tiler_reserve_init(struct tiler_ops *tiler) { ops = tiler; - ops->reserve_nv12 = reserve_nv12; ops->reserve = reserve_blocks; ops->unreserve = unreserve_blocks; - - band_8 = PAGE_SIZE / ops->geom(TILFMT_8BIT)->slot_w - / ops->geom(TILFMT_8BIT)->bpp; - band_16 = PAGE_SIZE / ops->geom(TILFMT_16BIT)->slot_w - / ops->geom(TILFMT_16BIT)->bpp; } |