diff options
author | Zack Rusin <zackr@vmware.com> | 2013-10-24 22:05:22 -0400 |
---|---|---|
committer | Zack Rusin <zackr@vmware.com> | 2013-11-25 13:05:03 -0500 |
commit | 0510ec67e2c5b5ddb4755564314ccfe057555984 (patch) | |
tree | 0e9c19cf4e6f3ad63d0fc0818056a2f3de41e523 /src/gallium/drivers/llvmpipe/lp_rast_tri.c | |
parent | 5455c818b558cdc618441988434eb2755cd98b67 (diff) | |
download | external_mesa3d-0510ec67e2c5b5ddb4755564314ccfe057555984.zip external_mesa3d-0510ec67e2c5b5ddb4755564314ccfe057555984.tar.gz external_mesa3d-0510ec67e2c5b5ddb4755564314ccfe057555984.tar.bz2 |
llvmpipe: support 8bit subpixel precision
8 bit precision is required by d3d10 but unfortunately
requires 64 bit rasterizer. This commit implements
64 bit rasterization with full support for 8bit subpixel
precision. It's a combination of all individual commits
from the llvmpipe-rast-64 branch.
Signed-off-by: Zack Rusin <zackr@vmware.com>
Reviewed-by: José Fonseca <jfonseca@vmware.com>
Reviewed-by: Brian Paul <brianp@vmware.com>
Reviewed-by: Roland Scheidegger <sroland@vmware.com>
Diffstat (limited to 'src/gallium/drivers/llvmpipe/lp_rast_tri.c')
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_rast_tri.c | 173 |
1 files changed, 121 insertions, 52 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_rast_tri.c b/src/gallium/drivers/llvmpipe/lp_rast_tri.c index 5ef070a..41f6fbf 100644 --- a/src/gallium/drivers/llvmpipe/lp_rast_tri.c +++ b/src/gallium/drivers/llvmpipe/lp_rast_tri.c @@ -35,9 +35,6 @@ #include "lp_perf.h" #include "lp_rast_priv.h" - - - /** * Shade all pixels in a 4x4 block. */ @@ -66,44 +63,42 @@ block_full_16(struct lp_rasterizer_task *task, block_full_4(task, tri, x + ix, y + iy); } -#if !defined(PIPE_ARCH_SSE) - static INLINE unsigned -build_mask_linear(int c, int dcdx, int dcdy) +build_mask_linear(int64_t c, int64_t dcdx, int64_t dcdy) { - int mask = 0; - - int c0 = c; - int c1 = c0 + dcdy; - int c2 = c1 + dcdy; - int c3 = c2 + dcdy; - - mask |= ((c0 + 0 * dcdx) >> 31) & (1 << 0); - mask |= ((c0 + 1 * dcdx) >> 31) & (1 << 1); - mask |= ((c0 + 2 * dcdx) >> 31) & (1 << 2); - mask |= ((c0 + 3 * dcdx) >> 31) & (1 << 3); - mask |= ((c1 + 0 * dcdx) >> 31) & (1 << 4); - mask |= ((c1 + 1 * dcdx) >> 31) & (1 << 5); - mask |= ((c1 + 2 * dcdx) >> 31) & (1 << 6); - mask |= ((c1 + 3 * dcdx) >> 31) & (1 << 7); - mask |= ((c2 + 0 * dcdx) >> 31) & (1 << 8); - mask |= ((c2 + 1 * dcdx) >> 31) & (1 << 9); - mask |= ((c2 + 2 * dcdx) >> 31) & (1 << 10); - mask |= ((c2 + 3 * dcdx) >> 31) & (1 << 11); - mask |= ((c3 + 0 * dcdx) >> 31) & (1 << 12); - mask |= ((c3 + 1 * dcdx) >> 31) & (1 << 13); - mask |= ((c3 + 2 * dcdx) >> 31) & (1 << 14); - mask |= ((c3 + 3 * dcdx) >> 31) & (1 << 15); + unsigned mask = 0; + + int64_t c0 = c; + int64_t c1 = c0 + dcdy; + int64_t c2 = c1 + dcdy; + int64_t c3 = c2 + dcdy; + + mask |= ((c0 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 0); + mask |= ((c0 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 1); + mask |= ((c0 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 2); + mask |= ((c0 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 3); + mask |= ((c1 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 4); + mask |= ((c1 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 5); + mask |= ((c1 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 6); + mask |= ((c1 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 7); + mask |= ((c2 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 8); + mask |= ((c2 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 9); + mask |= ((c2 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 10); + mask |= ((c2 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 11); + mask |= ((c3 + 0 * dcdx) >> FIXED_SHIFT) & (1 << 12); + mask |= ((c3 + 1 * dcdx) >> FIXED_SHIFT) & (1 << 13); + mask |= ((c3 + 2 * dcdx) >> FIXED_SHIFT) & (1 << 14); + mask |= ((c3 + 3 * dcdx) >> FIXED_SHIFT) & (1 << 15); return mask; } static INLINE void -build_masks(int c, - int cdiff, - int dcdx, - int dcdy, +build_masks(int64_t c, + int64_t cdiff, + int64_t dcdx, + int64_t dcdy, unsigned *outmask, unsigned *partmask) { @@ -122,6 +117,13 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, } void +lp_rast_triangle_3_4(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + lp_rast_triangle_3_16(task, arg); +} + +void lp_rast_triangle_4_16(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { @@ -131,11 +133,33 @@ lp_rast_triangle_4_16(struct lp_rasterizer_task *task, lp_rast_triangle_4(task, arg2); } +#if !defined(PIPE_ARCH_SSE) + void -lp_rast_triangle_3_4(struct lp_rasterizer_task *task, +lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<3)-1; + lp_rast_triangle_32_3(task, arg2); +} + +void +lp_rast_triangle_32_4_16(struct lp_rasterizer_task *task, + const union lp_rast_cmd_arg arg) +{ + union lp_rast_cmd_arg arg2; + arg2.triangle.tri = arg.triangle.tri; + arg2.triangle.plane_mask = (1<<4)-1; + lp_rast_triangle_32_4(task, arg2); +} + +void +lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { - lp_rast_triangle_3_16(task, arg); + lp_rast_triangle_32_3_16(task, arg); } #else @@ -144,12 +168,12 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, static INLINE void -build_masks(int c, - int cdiff, - int dcdx, - int dcdy, - unsigned *outmask, - unsigned *partmask) +build_masks_32(int c, + int cdiff, + int dcdx, + int dcdy, + unsigned *outmask, + unsigned *partmask) { __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3); __m128i xdcdy = _mm_set1_epi32(dcdy); @@ -190,7 +214,7 @@ build_masks(int c, static INLINE unsigned -build_mask_linear(int c, int dcdx, int dcdy) +build_mask_linear_32(int c, int dcdx, int dcdy) { __m128i cstep0 = _mm_setr_epi32(c, c+dcdx, c+dcdx*2, c+dcdx*3); __m128i xdcdy = _mm_set1_epi32(dcdy); @@ -248,7 +272,7 @@ sign_bits4(const __m128i *cstep, int cdiff) void -lp_rast_triangle_3_16(struct lp_rasterizer_task *task, +lp_rast_triangle_32_3_16(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; @@ -260,9 +284,9 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, struct { unsigned mask:16; unsigned i:8; unsigned j:8; } out[16]; unsigned nr = 0; - __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ - __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ - __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */ __m128i zero = _mm_setzero_si128(); __m128i c; @@ -362,7 +386,7 @@ lp_rast_triangle_3_16(struct lp_rasterizer_task *task, void -lp_rast_triangle_3_4(struct lp_rasterizer_task *task, +lp_rast_triangle_32_3_4(struct lp_rasterizer_task *task, const union lp_rast_cmd_arg arg) { const struct lp_rast_triangle *tri = arg.triangle.tri; @@ -370,9 +394,9 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, unsigned x = (arg.triangle.plane_mask & 0xff) + task->x; unsigned y = (arg.triangle.plane_mask >> 8) + task->y; - __m128i p0 = _mm_load_si128((__m128i *)&plane[0]); /* c, dcdx, dcdy, eo */ - __m128i p1 = _mm_load_si128((__m128i *)&plane[1]); /* c, dcdx, dcdy, eo */ - __m128i p2 = _mm_load_si128((__m128i *)&plane[2]); /* c, dcdx, dcdy, eo */ + __m128i p0 = lp_plane_to_m128i(&plane[0]); /* c, dcdx, dcdy, eo */ + __m128i p1 = lp_plane_to_m128i(&plane[1]); /* c, dcdx, dcdy, eo */ + __m128i p2 = lp_plane_to_m128i(&plane[2]); /* c, dcdx, dcdy, eo */ __m128i zero = _mm_setzero_si128(); __m128i c; @@ -450,7 +474,8 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, #endif - +#define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks(c, cdiff, dcdx, dcdy, omask, pmask) +#define BUILD_MASK_LINEAR(c, dcdx, dcdy) build_mask_linear(c, dcdx, dcdy) #define TAG(x) x##_1 #define NR_PLANES 1 @@ -468,7 +493,7 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, #define TAG(x) x##_4 #define NR_PLANES 4 -#define TRI_16 lp_rast_triangle_4_16 +/*#define TRI_16 lp_rast_triangle_4_16*/ #include "lp_rast_tri_tmp.h" #define TAG(x) x##_5 @@ -487,3 +512,47 @@ lp_rast_triangle_3_4(struct lp_rasterizer_task *task, #define NR_PLANES 8 #include "lp_rast_tri_tmp.h" +#ifdef PIPE_ARCH_SSE +#undef BUILD_MASKS +#undef BUILD_MASK_LINEAR +#define BUILD_MASKS(c, cdiff, dcdx, dcdy, omask, pmask) build_masks_32((int)c, (int)cdiff, dcdx, dcdy, omask, pmask) +#define BUILD_MASK_LINEAR(c, dcdx, dcdy) build_mask_linear_32((int)c, dcdx, dcdy) +#endif + +#define TAG(x) x##_32_1 +#define NR_PLANES 1 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_2 +#define NR_PLANES 2 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_3 +#define NR_PLANES 3 +/*#define TRI_4 lp_rast_triangle_3_4*/ +/*#define TRI_16 lp_rast_triangle_3_16*/ +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_4 +#define NR_PLANES 4 +#ifdef PIPE_ARCH_SSE +#define TRI_16 lp_rast_triangle_32_4_16 +#endif +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_5 +#define NR_PLANES 5 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_6 +#define NR_PLANES 6 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_7 +#define NR_PLANES 7 +#include "lp_rast_tri_tmp.h" + +#define TAG(x) x##_32_8 +#define NR_PLANES 8 +#include "lp_rast_tri_tmp.h" + |