summaryrefslogtreecommitdiffstats
path: root/src/gallium/auxiliary/gallivm/lp_bld_sample.c
diff options
context:
space:
mode:
authorRoland Scheidegger <sroland@vmware.com>2013-03-09 01:46:33 +0100
committerRoland Scheidegger <sroland@vmware.com>2013-03-12 00:24:22 +0100
commit5c41d1c22282fe2fd72a77339246de8e861b4b22 (patch)
tree426989fa6fa76bcfd859a6e826ed15d4314a2a91 /src/gallium/auxiliary/gallivm/lp_bld_sample.c
parentb7262ac7ea650c4416af28097c66fc64f72e3c28 (diff)
downloadexternal_mesa3d-5c41d1c22282fe2fd72a77339246de8e861b4b22.zip
external_mesa3d-5c41d1c22282fe2fd72a77339246de8e861b4b22.tar.gz
external_mesa3d-5c41d1c22282fe2fd72a77339246de8e861b4b22.tar.bz2
gallivm: clean up passing derivatives around
Previously, the derivatives were calculated and passed in a packed form to the sample code (for implicit derivatives, explicit derivatives were packed to the same format). There's several reasons why this wasn't such a good idea: 1) the derivatives may not even be needed (not as bad as it sounds since llvm will just throw the calculations needed for them away but still) 2) the special packing format really shouldn't be part of the sampler interface 3) depending what the sample code actually does the derivatives will be processed differently, hence there is no "ideal" packing. For cube maps with explicit derivatives (which we don't do yet) for instance the packing looked downright useless, and for non-isotropic filtering we'd need different calculations too. So, instead just pass the derivatives as is (for explicit derivatives), or let the rho calculating sample code calculate them itself. This still does exactly the same packing stuff for implicit derivatives for now, though explicit ones are handled in a more straightforward manner (quick estimates show performance should be quite similar, though it is much easier to follow and also does the rho calculation per-pixel until the end, which we eventually need for spec compliance anyway). No piglit changes. Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
Diffstat (limited to 'src/gallium/auxiliary/gallivm/lp_bld_sample.c')
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_sample.c271
1 files changed, 163 insertions, 108 deletions
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_sample.c b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
index ef0631c..fc8bae7 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_sample.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_sample.c
@@ -46,6 +46,7 @@
#include "lp_bld_type.h"
#include "lp_bld_logic.h"
#include "lp_bld_pack.h"
+#include "lp_bld_quad.h"
/*
@@ -203,6 +204,9 @@ lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
static LLVMValueRef
lp_build_rho(struct lp_build_sample_context *bld,
unsigned texture_unit,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
const struct lp_derivatives *derivs)
{
struct gallivm_state *gallivm = bld->gallivm;
@@ -211,8 +215,8 @@ lp_build_rho(struct lp_build_sample_context *bld,
struct lp_build_context *float_bld = &bld->float_bld;
struct lp_build_context *coord_bld = &bld->coord_bld;
struct lp_build_context *perquadf_bld = &bld->perquadf_bld;
- const LLVMValueRef *ddx_ddy = derivs->ddx_ddy;
const unsigned dims = bld->dims;
+ LLVMValueRef ddx_ddy[2];
LLVMBuilderRef builder = bld->gallivm->builder;
LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
@@ -229,59 +233,7 @@ lp_build_rho(struct lp_build_sample_context *bld,
LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
LLVMValueRef rho_xvec, rho_yvec;
- abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
- if (dims > 2) {
- abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
- }
- else {
- abs_ddx_ddy[1] = NULL;
- }
-
- if (dims == 1) {
- static const unsigned char swizzle1[] = {
- 0, LP_BLD_SWIZZLE_DONTCARE,
- LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
- };
- static const unsigned char swizzle2[] = {
- 1, LP_BLD_SWIZZLE_DONTCARE,
- LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
- };
- rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
- rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
- }
- else if (dims == 2) {
- static const unsigned char swizzle1[] = {
- 0, 2,
- LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
- };
- static const unsigned char swizzle2[] = {
- 1, 3,
- LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
- };
- rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
- rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
- }
- else {
- LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
- assert(dims == 3);
- for (i = 0; i < num_quads; i++) {
- shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
- shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
- shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
- shuffles1[4*i + 3] = i32undef;
- shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
- shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
- shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1);
- shuffles2[4*i + 3] = i32undef;
- }
- rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
- LLVMConstVector(shuffles1, length), "");
- rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
- LLVMConstVector(shuffles2, length), "");
- }
-
- rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
+ /* Note that all simplified calculations will only work for isotropic filtering */
first_level = bld->dynamic_state->first_level(bld->dynamic_state,
bld->gallivm, texture_unit);
@@ -289,76 +241,176 @@ lp_build_rho(struct lp_build_sample_context *bld,
int_size = lp_build_minify(int_size_bld, bld->int_size, first_level_vec);
float_size = lp_build_int_to_float(float_size_bld, int_size);
- if (bld->coord_type.length > 4) {
- /* expand size to each quad */
+ /* XXX ignoring explicit derivs for cube maps for now */
+ if (derivs && !(bld->static_texture_state->target == PIPE_TEXTURE_CUBE)) {
+ LLVMValueRef ddmax[3];
+ for (i = 0; i < dims; i++) {
+ LLVMValueRef ddx, ddy;
+ LLVMValueRef floatdim;
+ LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
+ ddx = lp_build_abs(coord_bld, derivs->ddx[i]);
+ ddy = lp_build_abs(coord_bld, derivs->ddy[i]);
+ ddmax[i] = lp_build_max(coord_bld, ddx, ddy);
+ floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
+ coord_bld->type, float_size, indexi);
+ ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
+ }
+ rho_vec = ddmax[0];
if (dims > 1) {
- /* could use some broadcast_vector helper for this? */
- int num_quads = bld->coord_type.length / 4;
- LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
- for (i = 0; i < num_quads; i++) {
- src[i] = float_size;
+ rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[1]);
+ if (dims > 2) {
+ rho_vec = lp_build_max(coord_bld, rho_vec, ddmax[2]);
+ }
+ }
+ /*
+ * rho_vec now still contains per-pixel rho, convert to scalar per quad
+ * since we can't handle per-pixel rho/lod from now on (TODO).
+ */
+ rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
+ perquadf_bld->type, rho_vec, 0);
+ }
+ else {
+ /*
+ * This looks all a bit complex, but it's not that bad
+ * (the shuffle code makes it look worse than it is).
+ * Still, might not be ideal for all cases.
+ */
+ if (dims < 2) {
+ ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
+ }
+ else if (dims >= 2) {
+ ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld,
+ s, t);
+ if (dims > 2) {
+ ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
}
- float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
+ }
+
+ abs_ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
+ if (dims > 2) {
+ abs_ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
}
else {
- float_size = lp_build_broadcast_scalar(coord_bld, float_size);
+ abs_ddx_ddy[1] = NULL;
}
- rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
- if (dims <= 1) {
- rho = rho_vec;
+ if (dims == 1) {
+ static const unsigned char swizzle1[] = {
+ 0, LP_BLD_SWIZZLE_DONTCARE,
+ LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+ };
+ static const unsigned char swizzle2[] = {
+ 1, LP_BLD_SWIZZLE_DONTCARE,
+ LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+ };
+ rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
+ rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
+ }
+ else if (dims == 2) {
+ static const unsigned char swizzle1[] = {
+ 0, 2,
+ LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+ };
+ static const unsigned char swizzle2[] = {
+ 1, 3,
+ LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+ };
+ rho_xvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle1);
+ rho_yvec = lp_build_swizzle_aos(coord_bld, abs_ddx_ddy[0], swizzle2);
}
else {
- if (dims >= 2) {
- static const unsigned char swizzle1[] = {
- 0, LP_BLD_SWIZZLE_DONTCARE,
- LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
- };
- static const unsigned char swizzle2[] = {
- 1, LP_BLD_SWIZZLE_DONTCARE,
- LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
- };
- LLVMValueRef rho_s, rho_t, rho_r;
-
- rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
- rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
-
- rho = lp_build_max(coord_bld, rho_s, rho_t);
-
- if (dims >= 3) {
- static const unsigned char swizzle3[] = {
- 2, LP_BLD_SWIZZLE_DONTCARE,
+ LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
+ assert(dims == 3);
+ for (i = 0; i < num_quads; i++) {
+ shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
+ shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
+ shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
+ shuffles1[4*i + 3] = i32undef;
+ shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
+ shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
+ shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 1);
+ shuffles2[4*i + 3] = i32undef;
+ }
+ rho_xvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
+ LLVMConstVector(shuffles1, length), "");
+ rho_yvec = LLVMBuildShuffleVector(builder, abs_ddx_ddy[0], abs_ddx_ddy[1],
+ LLVMConstVector(shuffles2, length), "");
+ }
+
+ rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
+
+ if (bld->coord_type.length > 4) {
+ /* expand size to each quad */
+ if (dims > 1) {
+ /* could use some broadcast_vector helper for this? */
+ int num_quads = bld->coord_type.length / 4;
+ LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
+ for (i = 0; i < num_quads; i++) {
+ src[i] = float_size;
+ }
+ float_size = lp_build_concat(bld->gallivm, src, float_size_bld->type, num_quads);
+ }
+ else {
+ float_size = lp_build_broadcast_scalar(coord_bld, float_size);
+ }
+ rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
+
+ if (dims <= 1) {
+ rho = rho_vec;
+ }
+ else {
+ if (dims >= 2) {
+ static const unsigned char swizzle1[] = {
+ 0, LP_BLD_SWIZZLE_DONTCARE,
LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
};
- rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle3);
- rho = lp_build_max(coord_bld, rho, rho_r);
+ static const unsigned char swizzle2[] = {
+ 1, LP_BLD_SWIZZLE_DONTCARE,
+ LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+ };
+ LLVMValueRef rho_s, rho_t, rho_r;
+
+ rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
+ rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
+
+ rho = lp_build_max(coord_bld, rho_s, rho_t);
+
+ if (dims >= 3) {
+ static const unsigned char swizzle3[] = {
+ 2, LP_BLD_SWIZZLE_DONTCARE,
+ LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
+ };
+ rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle3);
+ rho = lp_build_max(coord_bld, rho, rho_r);
+ }
}
}
- }
- rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
- perquadf_bld->type, rho, 0);
- }
- else {
- if (dims <= 1) {
- rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
- }
- rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
-
- if (dims <= 1) {
- rho = rho_vec;
+ rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
+ perquadf_bld->type, rho, 0);
}
else {
- if (dims >= 2) {
- LLVMValueRef rho_s, rho_t, rho_r;
+ if (dims <= 1) {
+ rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
+ }
+ rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
+
+ if (dims <= 1) {
+ rho = rho_vec;
+ }
+ else {
+ if (dims >= 2) {
+ LLVMValueRef rho_s, rho_t, rho_r;
- rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
- rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
+ rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
+ rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
- rho = lp_build_max(float_bld, rho_s, rho_t);
+ rho = lp_build_max(float_bld, rho_s, rho_t);
- if (dims >= 3) {
- rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
- rho = lp_build_max(float_bld, rho, rho_r);
+ if (dims >= 3) {
+ rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
+ rho = lp_build_max(float_bld, rho, rho_r);
+ }
}
}
}
@@ -511,6 +563,9 @@ void
lp_build_lod_selector(struct lp_build_sample_context *bld,
unsigned texture_unit,
unsigned sampler_unit,
+ LLVMValueRef s,
+ LLVMValueRef t,
+ LLVMValueRef r,
const struct lp_derivatives *derivs,
LLVMValueRef lod_bias, /* optional */
LLVMValueRef explicit_lod, /* optional */
@@ -544,7 +599,7 @@ lp_build_lod_selector(struct lp_build_sample_context *bld,
else {
LLVMValueRef rho;
- rho = lp_build_rho(bld, texture_unit, derivs);
+ rho = lp_build_rho(bld, texture_unit, s, t, r, derivs);
/*
* Compute lod = log2(rho)