diff options
author | James Benton <jbenton@vmware.com> | 2012-09-13 16:04:42 +0100 |
---|---|---|
committer | José Fonseca <jfonseca@vmware.com> | 2012-11-28 19:14:36 +0000 |
commit | fa1b481c09b14e01eca1b3db8e0854033f6dee3d (patch) | |
tree | 170810687d31e60041309682e8f923f409174077 /src/gallium/auxiliary | |
parent | 1d3789bccbbcc814fd7b339e9f5b5631e30d9f0e (diff) | |
download | external_mesa3d-fa1b481c09b14e01eca1b3db8e0854033f6dee3d.zip external_mesa3d-fa1b481c09b14e01eca1b3db8e0854033f6dee3d.tar.gz external_mesa3d-fa1b481c09b14e01eca1b3db8e0854033f6dee3d.tar.bz2 |
llvmpipe: Unswizzled rendering.
Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_conv.c | 75 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_conv.h | 10 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_logic.c | 9 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_logic.h | 3 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_pack.c | 36 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_pack.h | 9 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_quad.c | 50 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_quad.h | 9 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_sample.c | 22 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_swizzle.c | 137 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_swizzle.h | 25 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c | 4 |
12 files changed, 360 insertions, 29 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.c b/src/gallium/auxiliary/gallivm/lp_bld_conv.c index af942ad..cc44236 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.c @@ -415,6 +415,81 @@ lp_build_unsigned_norm_to_float(struct gallivm_state *gallivm, /** + * Pick a suitable num_dsts for lp_build_conv to ensure optimal cases are used. + * + * Returns the number of dsts created from src + */ +int lp_build_conv_auto(struct gallivm_state *gallivm, + struct lp_type src_type, + struct lp_type* dst_type, + const LLVMValueRef *src, + unsigned num_srcs, + LLVMValueRef *dst) +{ + int i; + int num_dsts = num_srcs; + + if (src_type.floating == dst_type->floating && + src_type.width == dst_type->width && + src_type.length == dst_type->length && + src_type.fixed == dst_type->fixed && + src_type.norm == dst_type->norm && + src_type.sign == dst_type->sign) + return num_dsts; + + /* Special case 4x4f -> 1x16ub or 2x8f -> 1x16ub + */ + if (src_type.floating == 1 && + src_type.fixed == 0 && + src_type.sign == 1 && + src_type.norm == 0 && + src_type.width == 32 && + + dst_type->floating == 0 && + dst_type->fixed == 0 && + dst_type->sign == 0 && + dst_type->norm == 1 && + dst_type->width == 8) + { + /* Special case 4x4f --> 1x16ub */ + if (src_type.length == 4 && util_cpu_caps.has_sse2) + { + assert((num_srcs % 4) == 0); + + num_dsts = num_srcs / 4; + dst_type->length = 16; + + lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts); + return num_dsts; + } + + /* Special case 2x8f --> 1x16ub */ + if (src_type.length == 8 && util_cpu_caps.has_avx) + { + assert((num_srcs % 2) == 0); + + num_dsts = num_srcs / 2; + dst_type->length = 16; + + lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts); + return num_dsts; + } + } + + /* lp_build_resize does not support M:N */ + if (src_type.width == dst_type->width) { + lp_build_conv(gallivm, src_type, *dst_type, src, num_srcs, dst, num_dsts); + } else { + for (i = 0; i < num_srcs; ++i) { + lp_build_conv(gallivm, src_type, *dst_type, &src[i], 1, &dst[i], 1); + } + } + + return num_dsts; +} + + +/** * Generic type conversion. * * TODO: Take a precision argument, or even better, add a new precision member diff --git a/src/gallium/auxiliary/gallivm/lp_bld_conv.h b/src/gallium/auxiliary/gallivm/lp_bld_conv.h index ef22105..42a1113 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_conv.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_conv.h @@ -70,6 +70,16 @@ lp_build_conv(struct gallivm_state *gallivm, const LLVMValueRef *srcs, unsigned num_srcs, LLVMValueRef *dsts, unsigned num_dsts); + +int +lp_build_conv_auto(struct gallivm_state *gallivm, + struct lp_type src_type, + struct lp_type* dst_type, + const LLVMValueRef *src, + unsigned num_srcs, + LLVMValueRef *dst); + + void lp_build_conv_mask(struct gallivm_state *gallivm, struct lp_type src_type, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.c b/src/gallium/auxiliary/gallivm/lp_bld_logic.c index 8a77a43..f56b61b 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.c @@ -560,7 +560,8 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, unsigned mask, LLVMValueRef a, - LLVMValueRef b) + LLVMValueRef b, + unsigned num_channels) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; @@ -594,8 +595,8 @@ lp_build_select_aos(struct lp_build_context *bld, LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) + for(j = 0; j < n; j += num_channels) + for(i = 0; i < num_channels; ++i) shuffles[j + i] = LLVMConstInt(elem_type, (mask & (1 << i) ? 0 : n) + j + i, 0); @@ -603,7 +604,7 @@ lp_build_select_aos(struct lp_build_context *bld, return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), ""); } else { - LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, 4); + LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels); return lp_build_select(bld, mask_vec, a, b); } } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_logic.h b/src/gallium/auxiliary/gallivm/lp_bld_logic.h index 64c0a1f..f530424 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_logic.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_logic.h @@ -79,7 +79,8 @@ LLVMValueRef lp_build_select_aos(struct lp_build_context *bld, unsigned mask, LLVMValueRef a, - LLVMValueRef b); + LLVMValueRef b, + unsigned num_channels); LLVMValueRef diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.c b/src/gallium/auxiliary/gallivm/lp_bld_pack.c index e57d414..b467d56 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.c @@ -211,6 +211,42 @@ lp_build_concat(struct gallivm_state *gallivm, return tmp[0]; } + +/** + * Combines vectors to reduce from num_srcs to num_dsts. + * Returns the number of src vectors concatenated in a single dst. + * + * num_srcs must be exactly divisible by num_dsts. + * + * e.g. For num_srcs = 4 and src = [x, y, z, w] + * num_dsts = 1 dst = [xyzw] return = 4 + * num_dsts = 2 dst = [xy, zw] return = 2 + */ +int +lp_build_concat_n(struct gallivm_state *gallivm, + struct lp_type src_type, + LLVMValueRef *src, + unsigned num_srcs, + LLVMValueRef *dst, + unsigned num_dsts) +{ + int size = num_srcs / num_dsts; + int i; + + assert(num_srcs >= num_dsts); + assert((num_srcs % size) == 0); + + if (num_srcs == num_dsts) + return 1; + + for (i = 0; i < num_dsts; ++i) { + dst[i] = lp_build_concat(gallivm, &src[i * size], src_type, size); + } + + return size; +} + + /** * Interleave vector elements. * diff --git a/src/gallium/auxiliary/gallivm/lp_bld_pack.h b/src/gallium/auxiliary/gallivm/lp_bld_pack.h index f734c60..7cede35 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_pack.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_pack.h @@ -87,6 +87,15 @@ lp_build_concat(struct gallivm_state *gallivm, struct lp_type src_type, unsigned num_vectors); +int +lp_build_concat_n(struct gallivm_state *gallivm, + struct lp_type src_type, + LLVMValueRef *src, + unsigned num_srcs, + LLVMValueRef *dst, + unsigned num_dsts); + + LLVMValueRef lp_build_packs2(struct gallivm_state *gallivm, struct lp_type src_type, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.c b/src/gallium/auxiliary/gallivm/lp_bld_quad.c index c7c58ed..8a0efed 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.c @@ -31,6 +31,7 @@ #include "lp_bld_const.h" #include "lp_bld_swizzle.h" #include "lp_bld_quad.h" +#include "lp_bld_pack.h" static const unsigned char @@ -156,3 +157,52 @@ lp_build_packed_ddx_ddy_twocoord(struct lp_build_context *bld, return LLVMBuildSub(builder, vec2, vec1, "ddxddyddxddy"); } + +/** + * Twiddle from quad format to row format + * + * src0 src1 + * ######### ######### ################# + * # 0 | 1 # # 4 | 5 # # 0 | 1 | 4 | 5 # src0 + * #---+---# #---+---# -> ################# + * # 2 | 3 # # 6 | 7 # # 2 | 3 | 6 | 7 # src1 + * ######### ######### ################# + * + */ +void +lp_bld_quad_twiddle(struct gallivm_state *gallivm, + struct lp_type lp_dst_type, + const LLVMValueRef* src, + unsigned src_count, + LLVMValueRef* dst) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMTypeRef dst_type_ref; + LLVMTypeRef type2_ref; + struct lp_type type2; + unsigned i; + + assert((src_count % 2) == 0); + + /* Create a type with only 2 elements */ + type2 = lp_dst_type; + type2.width = (lp_dst_type.width * lp_dst_type.length) / 2; + type2.length = 2; + type2.floating = 0; + + type2_ref = lp_build_vec_type(gallivm, type2); + dst_type_ref = lp_build_vec_type(gallivm, lp_dst_type); + + for (i = 0; i < src_count; i += 2) { + LLVMValueRef src0, src1; + + src0 = LLVMBuildBitCast(builder, src[i + 0], type2_ref, ""); + src1 = LLVMBuildBitCast(builder, src[i + 1], type2_ref, ""); + + dst[i + 0] = lp_build_interleave2(gallivm, type2, src0, src1, 0); + dst[i + 1] = lp_build_interleave2(gallivm, type2, src0, src1, 1); + + dst[i + 0] = LLVMBuildBitCast(builder, dst[i + 0], dst_type_ref, ""); + dst[i + 1] = LLVMBuildBitCast(builder, dst[i + 1], dst_type_ref, ""); + } +} diff --git a/src/gallium/auxiliary/gallivm/lp_bld_quad.h b/src/gallium/auxiliary/gallivm/lp_bld_quad.h index be6a1ef..e41f80e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_quad.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_quad.h @@ -88,5 +88,14 @@ LLVMValueRef lp_build_packed_ddx_ddy_onecoord(struct lp_build_context *bld, LLVMValueRef a); +/* + * Twiddle from quad format to row format + */ +void +lp_bld_quad_twiddle(struct gallivm_state *gallivm, + struct lp_type lp_dst_type, + const LLVMValueRef* src, + unsigned src_count, + LLVMValueRef* dst); #endif /* LP_BLD_QUAD_H_ */ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c index 37490e4..8ea5f5e 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c @@ -772,7 +772,7 @@ lp_build_get_mip_offsets(struct lp_build_sample_context *bld, offset1 = LLVMBuildLoad(builder, offset1, ""); offsets = LLVMBuildInsertElement(builder, offsets, offset1, indexo, ""); } - offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0); + offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, offsets, 0, 4); } else { unsigned i; @@ -849,7 +849,7 @@ lp_build_get_level_stride_vec(struct lp_build_sample_context *bld, stride1 = LLVMBuildLoad(builder, stride1, ""); stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, ""); } - stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0); + stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4); } else { LLVMValueRef stride1; @@ -1045,11 +1045,11 @@ lp_build_extract_image_sizes(struct lp_build_sample_context *bld, *out_width = size; } else if (bld->num_lods == num_quads) { - *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0); + *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4); if (dims >= 2) { - *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1); + *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4); if (dims == 3) { - *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2); + *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4); } } } @@ -1246,9 +1246,9 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, signrxyz = LLVMBuildBitCast(builder, rxyz, lp_build_vec_type(gallivm, intctype), ""); signrxyz = LLVMBuildAnd(builder, signrxyz, signmask, ""); - arxs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 0); - arys = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 1); - arzs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 2); + arxs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 0, 4); + arys = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 1, 4); + arzs = lp_build_swizzle_scalar_aos(coord_bld, arxyz, 2, 4); /* * select x if x >= y else select y @@ -1267,15 +1267,15 @@ lp_build_cube_lookup(struct lp_build_sample_context *bld, * snewz = signrz * rx; * tnewz = -ry; */ - signrxs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 0); + signrxs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 0, 4); snewx = LLVMBuildXor(builder, signrxs, rzneg, ""); tnewx = ryneg; - signrys = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 1); + signrys = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 1, 4); snewy = rx; tnewy = LLVMBuildXor(builder, signrys, rz, ""); - signrzs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 2); + signrzs = lp_build_swizzle_scalar_aos(cint_bld, signrxyz, 2, 4); snewz = LLVMBuildXor(builder, signrzs, rx, ""); tnewz = ryneg; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c index 4ae4f37..377884a 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.c @@ -159,21 +159,24 @@ lp_build_extract_broadcast(struct gallivm_state *gallivm, /** - * Swizzle one channel into all other three channels. + * Swizzle one channel into other channels. */ LLVMValueRef lp_build_swizzle_scalar_aos(struct lp_build_context *bld, LLVMValueRef a, - unsigned channel) + unsigned channel, + unsigned num_channels) { LLVMBuilderRef builder = bld->gallivm->builder; const struct lp_type type = bld->type; const unsigned n = type.length; unsigned i, j; - if(a == bld->undef || a == bld->zero || a == bld->one) + if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1) return a; + assert(num_channels == 2 || num_channels == 4); + /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing * using shuffles here actually causes worst results. More investigation is * needed. */ @@ -184,12 +187,55 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld, LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; - for(j = 0; j < n; j += 4) - for(i = 0; i < 4; ++i) + for(j = 0; j < n; j += num_channels) + for(i = 0; i < num_channels; ++i) shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0); return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), ""); } + else if (num_channels == 2) { + /* + * Bit mask and shifts + * + * XY XY .... XY <= input + * 0Y 0Y .... 0Y + * YY YY .... YY + * YY YY .... YY <= output + */ + struct lp_type type2; + LLVMValueRef tmp = NULL; + int shift; + + a = LLVMBuildAnd(builder, a, + lp_build_const_mask_aos(bld->gallivm, + type, 1 << channel, num_channels), ""); + + type2 = type; + type2.floating = FALSE; + type2.width *= 2; + type2.length /= 2; + + a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), ""); + +#ifdef PIPE_ARCH_LITTLE_ENDIAN + shift = channel == 0 ? 1 : -1; +#else + shift = channel == 0 ? -1 : 1; +#endif + + if (shift > 0) { + tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), ""); + } else if (shift < 0) { + tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), ""); + } + + assert(tmp); + if (tmp) { + a = LLVMBuildOr(builder, a, tmp, ""); + } + + return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), ""); + } else { /* * Bit mask and recursive shifts @@ -247,6 +293,45 @@ lp_build_swizzle_scalar_aos(struct lp_build_context *bld, } +/** + * Swizzle a vector consisting of an array of XYZW structs. + * + * This fills a vector of dst_len length with the swizzled channels from src. + * + * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in + * RGBA RGBA = BGR BGR BG + * + * @param swizzles the swizzle array + * @param num_swizzles the number of elements in swizzles + * @param dst_len the length of the result + */ +LLVMValueRef +lp_build_swizzle_aos_n(struct gallivm_state* gallivm, + LLVMValueRef src, + const unsigned char* swizzles, + unsigned num_swizzles, + unsigned dst_len) +{ + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH]; + unsigned i; + + assert(dst_len < LP_MAX_VECTOR_WIDTH); + + for (i = 0; i < dst_len; ++i) { + int swizzle = swizzles[i % num_swizzles]; + + if (swizzle == LP_BLD_SWIZZLE_DONTCARE) { + shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context)); + } else { + shuffles[i] = lp_build_const_int32(gallivm, swizzle); + } + } + + return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), ""); +} + + LLVMValueRef lp_build_swizzle_aos(struct lp_build_context *bld, LLVMValueRef a, @@ -272,7 +357,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld, case PIPE_SWIZZLE_GREEN: case PIPE_SWIZZLE_BLUE: case PIPE_SWIZZLE_ALPHA: - return lp_build_swizzle_scalar_aos(bld, a, swizzles[0]); + return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4); case PIPE_SWIZZLE_ZERO: return bld->zero; case PIPE_SWIZZLE_ONE: @@ -367,7 +452,7 @@ lp_build_swizzle_aos(struct lp_build_context *bld, cond |= 1 << chan; } } - res = lp_build_select_aos(bld, cond, bld->one, bld->zero); + res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4); /* * Build a type where each element is an integer that cover the four @@ -554,6 +639,44 @@ lp_build_transpose_aos(struct gallivm_state *gallivm, /** + * Transpose from AOS <-> SOA for num_srcs + */ +void +lp_build_transpose_aos_n(struct gallivm_state *gallivm, + struct lp_type type, + const LLVMValueRef* src, + unsigned num_srcs, + LLVMValueRef* dst) +{ + switch (num_srcs) { + case 1: + dst[0] = src[0]; + break; + + case 2: + { + /* Note: we must use a temporary incase src == dst */ + LLVMValueRef lo, hi; + + lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0); + hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1); + + dst[0] = lo; + dst[1] = hi; + break; + } + + case 4: + lp_build_transpose_aos(gallivm, type, src, dst); + break; + + default: + assert(0); + }; +} + + +/** * Pack n-th element of aos values, * pad out to destination size. * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _ diff --git a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h index c49d916..91ecd34 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_swizzle.h @@ -67,13 +67,14 @@ lp_build_extract_broadcast(struct gallivm_state *gallivm, /** - * Broadcast one channel of a vector composed of arrays of XYZW structures into - * all four channel. + * Broadcast one channel of a vector composed of arrays of XYZ.. structures into + * all channels XXX... */ LLVMValueRef lp_build_swizzle_scalar_aos(struct lp_build_context *bld, - LLVMValueRef a, - unsigned channel); + LLVMValueRef a, + unsigned channel, + unsigned num_channels); /** @@ -88,6 +89,14 @@ lp_build_swizzle_aos(struct lp_build_context *bld, LLVMValueRef +lp_build_swizzle_aos_n(struct gallivm_state* gallivm, + LLVMValueRef src, + const unsigned char* swizzles, + unsigned num_swizzles, + unsigned dst_len); + + +LLVMValueRef lp_build_swizzle_soa_channel(struct lp_build_context *bld, const LLVMValueRef *unswizzled, unsigned swizzle); @@ -113,6 +122,14 @@ lp_build_transpose_aos(struct gallivm_state *gallivm, LLVMValueRef dst[4]); +void +lp_build_transpose_aos_n(struct gallivm_state *gallivm, + struct lp_type type, + const LLVMValueRef* src, + unsigned num_srcs, + LLVMValueRef* dst); + + LLVMValueRef lp_build_pack_aos_scalars(struct gallivm_state *gallivm, struct lp_type src_type, diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c index 44f684a..dbd9ccb 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_aos.c @@ -94,7 +94,7 @@ swizzle_scalar_aos(struct lp_build_tgsi_aos_context *bld, unsigned chan) { chan = bld->swizzles[chan]; - return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan); + return lp_build_swizzle_scalar_aos(&bld->bld_base.base, a, chan, 4); } @@ -623,7 +623,7 @@ lp_emit_instruction_aos( case TGSI_OPCODE_EX2: src0 = lp_build_emit_fetch(&bld->bld_base, inst, 0, LP_CHAN_ALL); - tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X); + tmp0 = lp_build_swizzle_scalar_aos(&bld->bld_base.base, src0, TGSI_SWIZZLE_X, TGSI_NUM_CHANNELS); dst0 = lp_build_exp2(&bld->bld_base.base, tmp0); break; |