summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Scheidegger <sroland@vmware.com>2013-06-05 00:17:22 +0200
committerRoland Scheidegger <sroland@vmware.com>2013-06-05 00:29:47 +0200
commitffe2a1ca3c097661dd3f6e3ca5cfd72be184426c (patch)
tree8cd2bcebb61a96090c2996e745a01e983f602d8d
parentef3e887084fbb36b9041faafc18fb75f6cfa7501 (diff)
downloadexternal_mesa3d-ffe2a1ca3c097661dd3f6e3ca5cfd72be184426c.zip
external_mesa3d-ffe2a1ca3c097661dd3f6e3ca5cfd72be184426c.tar.gz
external_mesa3d-ffe2a1ca3c097661dd3f6e3ca5cfd72be184426c.tar.bz2
llvmpipe: reduce alignment requirement for 1d resources from 4x4 to 4x1
For rendering to buffers, we cannot have any y alignment. So make sure that tile clear commands only clear up to the fb width/height, not more (do this for all resources actually as clearing more seems pointless for other resources too). For the jit fs function, skip execution of the lower half of the fragment shader for the 4x4 stamp completely, for depth/stencil only load/store the values from the first row (replace other row with undef). For the blend function, also only load half the values from fs output, replace the rest with undefs so that everything still operates on the full 4x4 block to keep code the same between 4x1 and 4x4 (except for load/store of course which also needs to skip (store) or replace these values with undefs (load))., at the cost of slightly less optimal code being produced in some cases. Also reduce 1d and 1d array alignment too, because they can be handled the same as buffers so don't need to waste memory. v2: don't try to run special blend code for 4x1, (very) slightly less complexity if we just use the same code as for 4x4 which may or may not make it easier to optimize in the future (as we care a lot more about 4x4 performance than 1d). v2: don't use undef values for unused fs src outputs with llvm 3.1 as it apparently can trigger a bug in llvm. Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.c19
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_depth.h2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_rast.c8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_scene.h4
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c121
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.h1
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.c24
-rw-r--r--src/gallium/drivers/llvmpipe/lp_texture.h21
9 files changed, 158 insertions, 44 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index df6a6c4..a8bd15f 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -525,6 +525,7 @@ lp_build_occlusion_count(struct gallivm_state *gallivm,
*
* \param type the data type of the fragment depth/stencil values
* \param format_desc description of the depth/stencil surface
+ * \param is_1d whether this resource has only one dimension
* \param loop_counter the current loop iteration
* \param depth_ptr pointer to the depth/stencil values of this 4x4 block
* \param depth_stride stride of the depth/stencil buffer
@@ -535,6 +536,7 @@ void
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
+ boolean is_1d,
LLVMValueRef depth_ptr,
LLVMValueRef depth_stride,
LLVMValueRef *z_fb,
@@ -592,9 +594,14 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, "");
zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, "");
- zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
- zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
- zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+ if (is_1d) {
+ zs_dst2 = lp_build_undef(gallivm, zs_load_type);
+ }
+ else {
+ zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, "");
+ zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, "");
+ zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, "");
+ }
*z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2,
LLVMConstVector(shuffles, zs_type.length), "");
@@ -648,6 +655,7 @@ lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
*
* \param type the data type of the fragment depth/stencil values
* \param format_desc description of the depth/stencil surface
+ * \param is_1d whether this resource has only one dimension
* \param mask the alive/dead pixel mask for the quad (vector)
* \param z_fb z values read from fb (with padding)
* \param s_fb s values read from fb (with padding)
@@ -661,6 +669,7 @@ void
lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
+ boolean is_1d,
struct lp_build_mask_context *mask,
LLVMValueRef z_fb,
LLVMValueRef s_fb,
@@ -791,7 +800,9 @@ lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
}
LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1);
- LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+ if (!is_1d) {
+ LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2);
+ }
}
/**
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.h b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
index 2534dc3..d169c89 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.h
@@ -74,6 +74,7 @@ void
lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
+ boolean is_1d,
LLVMValueRef depth_ptr,
LLVMValueRef depth_stride,
LLVMValueRef *z_fb,
@@ -84,6 +85,7 @@ void
lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm,
struct lp_type z_src_type,
const struct util_format_description *format_desc,
+ boolean is_1d,
struct lp_build_mask_context *mask,
LLVMValueRef z_fb,
LLVMValueRef s_fb,
diff --git a/src/gallium/drivers/llvmpipe/lp_rast.c b/src/gallium/drivers/llvmpipe/lp_rast.c
index be5a286..981dd71 100644
--- a/src/gallium/drivers/llvmpipe/lp_rast.c
+++ b/src/gallium/drivers/llvmpipe/lp_rast.c
@@ -95,10 +95,10 @@ lp_rast_tile_begin(struct lp_rasterizer_task *task,
task->bin = bin;
task->x = x * TILE_SIZE;
task->y = y * TILE_SIZE;
- task->width = TILE_SIZE + x * TILE_SIZE > task->scene->width_aligned ?
- task->scene->width_aligned - x * TILE_SIZE : TILE_SIZE;
- task->height = TILE_SIZE + y * TILE_SIZE > task->scene->height_aligned ?
- task->scene->height_aligned - y * TILE_SIZE : TILE_SIZE;
+ task->width = TILE_SIZE + x * TILE_SIZE > task->scene->fb.width ?
+ task->scene->fb.width - x * TILE_SIZE : TILE_SIZE;
+ task->height = TILE_SIZE + y * TILE_SIZE > task->scene->fb.height ?
+ task->scene->fb.height - y * TILE_SIZE : TILE_SIZE;
/* reset pointers to color and depth tile(s) */
memset(task->color_tiles, 0, sizeof(task->color_tiles));
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.c b/src/gallium/drivers/llvmpipe/lp_scene.c
index 2dfc7ff..771ad08 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.c
+++ b/src/gallium/drivers/llvmpipe/lp_scene.c
@@ -505,8 +505,6 @@ void lp_scene_begin_binning( struct lp_scene *scene,
scene->tiles_x = align(fb->width, TILE_SIZE) / TILE_SIZE;
scene->tiles_y = align(fb->height, TILE_SIZE) / TILE_SIZE;
- scene->width_aligned = align(fb->width, LP_RASTER_BLOCK_SIZE);
- scene->height_aligned = align(fb->height, LP_RASTER_BLOCK_SIZE);
assert(scene->tiles_x <= TILES_X);
assert(scene->tiles_y <= TILES_Y);
diff --git a/src/gallium/drivers/llvmpipe/lp_scene.h b/src/gallium/drivers/llvmpipe/lp_scene.h
index bc6c448..fa5bbca 100644
--- a/src/gallium/drivers/llvmpipe/lp_scene.h
+++ b/src/gallium/drivers/llvmpipe/lp_scene.h
@@ -144,10 +144,6 @@ struct lp_scene {
/** list of resources referenced by the scene commands */
struct resource_ref *resources;
- /** aligned scene width, height */
- unsigned width_aligned;
- unsigned height_aligned;
-
/** Total memory used by the scene (in bytes). This sums all the
* data blocks and counts all bins, state, resource references and
* other random allocations within the scene.
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index a7bd836..260d93c 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -353,7 +353,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
if (depth_mode & EARLY_DEPTH_TEST) {
lp_build_depth_stencil_load_swizzled(gallivm, type,
- zs_format_desc,
+ zs_format_desc, key->resource_1d,
depth_ptr, depth_stride,
&z_fb, &s_fb, loop_state.counter);
lp_build_depth_stencil_test(gallivm,
@@ -369,7 +369,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
!simple_shader);
if (depth_mode & EARLY_DEPTH_WRITE) {
- lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+ lp_build_depth_stencil_write_swizzled(gallivm, type,
+ zs_format_desc, key->resource_1d,
NULL, NULL, NULL, loop_state.counter,
depth_ptr, depth_stride,
z_value, s_value);
@@ -424,7 +425,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
}
lp_build_depth_stencil_load_swizzled(gallivm, type,
- zs_format_desc,
+ zs_format_desc, key->resource_1d,
depth_ptr, depth_stride,
&z_fb, &s_fb, loop_state.counter);
@@ -441,7 +442,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
!simple_shader);
/* Late Z write */
if (depth_mode & LATE_DEPTH_WRITE) {
- lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+ lp_build_depth_stencil_write_swizzled(gallivm, type,
+ zs_format_desc, key->resource_1d,
NULL, NULL, NULL, loop_state.counter,
depth_ptr, depth_stride,
z_value, s_value);
@@ -454,7 +456,8 @@ generate_fs_loop(struct gallivm_state *gallivm,
* depth value, update from zs_value with the new mask value and
* write that out.
*/
- lp_build_depth_stencil_write_swizzled(gallivm, type, zs_format_desc,
+ lp_build_depth_stencil_write_swizzled(gallivm, type,
+ zs_format_desc, key->resource_1d,
&mask, z_fb, s_fb, loop_state.counter,
depth_ptr, depth_stride,
z_value, s_value);
@@ -508,6 +511,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
*
* @param type fragment shader type (4x or 8x float)
* @param num_fs number of fs_src
+ * @param is_1d whether we're outputting to a 1d resource
* @param dst_channels number of output channels
* @param fs_src output from fragment shader
* @param dst pointer to store result
@@ -1345,6 +1349,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
LLVMValueRef blend_alpha;
LLVMValueRef i32_zero;
LLVMValueRef check_mask;
+ LLVMValueRef undef_src_val;
struct lp_build_mask_context mask_ctx;
struct lp_type mask_type;
@@ -1369,9 +1374,16 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
const boolean dual_source_blend = variant->key.blend.rt[0].blend_enable &&
util_blend_state_is_dual(&variant->key.blend, 0);
+ const boolean is_1d = variant->key.resource_1d;
+ unsigned num_fullblock_fs = is_1d ? 2 * num_fs : num_fs;
+
mask_type = lp_int32_vec4_type();
mask_type.length = fs_type.length;
+ for (i = num_fs; i < num_fullblock_fs; i++) {
+ fs_mask[i] = lp_build_zero(gallivm, mask_type);
+ }
+
/* Compute the alignment of the destination pointer in bytes */
#if 0
dst_alignment = (block_width * out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
@@ -1388,7 +1400,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
if (do_branch) {
check_mask = LLVMConstNull(lp_build_int_vec_type(gallivm, mask_type));
- for (i = 0; i < num_fs; ++i) {
+ for (i = 0; i < num_fullblock_fs; ++i) {
check_mask = LLVMBuildOr(builder, check_mask, fs_mask[i], "");
}
@@ -1399,6 +1411,17 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
partial_mask |= !variant->opaque;
i32_zero = lp_build_const_int32(gallivm, 0);
+#if HAVE_LLVM < 0x0302
+ /*
+ * undef triggers a crash in LLVMBuildTrunc in convert_from_blend_type in some
+ * cases (seen with r10g10b10a2, 128bit wide vectors) (only used for 1d case).
+ */
+ undef_src_val = lp_build_zero(gallivm, fs_type);
+#else
+ undef_src_val = lp_build_undef(gallivm, fs_type);
+#endif
+
+
/* Get type from output format */
lp_blend_type_from_format_desc(out_format_desc, &row_type);
lp_mem_type_from_format_desc(out_format_desc, &dst_type);
@@ -1459,14 +1482,25 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
/*
* Load shader output
*/
- for (i = 0; i < num_fs; ++i) {
+ for (i = 0; i < num_fullblock_fs; ++i) {
/* Always load alpha for use in blending */
- LLVMValueRef alpha = LLVMBuildLoad(builder, fs_out_color[rt][alpha_channel][i], "");
+ LLVMValueRef alpha;
+ if (i < num_fs) {
+ alpha = LLVMBuildLoad(builder, fs_out_color[rt][alpha_channel][i], "");
+ }
+ else {
+ alpha = undef_src_val;
+ }
/* Load each channel */
for (j = 0; j < dst_channels; ++j) {
assert(swizzle[j] < 4);
- fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[rt][swizzle[j]][i], "");
+ if (i < num_fs) {
+ fs_src[i][j] = LLVMBuildLoad(builder, fs_out_color[rt][swizzle[j]][i], "");
+ }
+ else {
+ fs_src[i][j] = undef_src_val;
+ }
}
/* If 3 channels then pad to include alpha for 4 element transpose */
@@ -1492,12 +1526,23 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
}
if (dual_source_blend) {
/* same as above except different src/dst, skip masks and comments... */
- for (i = 0; i < num_fs; ++i) {
- LLVMValueRef alpha = LLVMBuildLoad(builder, fs_out_color[1][alpha_channel][i], "");
+ for (i = 0; i < num_fullblock_fs; ++i) {
+ LLVMValueRef alpha;
+ if (i < num_fs) {
+ alpha = LLVMBuildLoad(builder, fs_out_color[1][alpha_channel][i], "");
+ }
+ else {
+ alpha = undef_src_val;
+ }
for (j = 0; j < dst_channels; ++j) {
assert(swizzle[j] < 4);
- fs_src1[i][j] = LLVMBuildLoad(builder, fs_out_color[1][swizzle[j]][i], "");
+ if (i < num_fs) {
+ fs_src1[i][j] = LLVMBuildLoad(builder, fs_out_color[1][swizzle[j]][i], "");
+ }
+ else {
+ fs_src1[i][j] = undef_src_val;
+ }
}
if (dst_channels == 3 && !has_alpha) {
fs_src1[i][3] = alpha;
@@ -1518,7 +1563,7 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
*/
fs_type.floating = 0;
fs_type.sign = dst_type.sign;
- for (i = 0; i < num_fs; ++i) {
+ for (i = 0; i < num_fullblock_fs; ++i) {
for (j = 0; j < dst_channels; ++j) {
fs_src[i][j] = LLVMBuildBitCast(builder, fs_src[i][j],
lp_build_vec_type(gallivm, fs_type), "");
@@ -1533,16 +1578,16 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
/*
* Pixel twiddle from fragment shader order to memory order
*/
- src_count = generate_fs_twiddle(gallivm, fs_type, num_fs,
+ src_count = generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs,
dst_channels, fs_src, src, pad_inline);
if (dual_source_blend) {
- generate_fs_twiddle(gallivm, fs_type, num_fs, dst_channels,
+ generate_fs_twiddle(gallivm, fs_type, num_fullblock_fs, dst_channels,
fs_src1, src1, pad_inline);
}
src_channels = dst_channels < 3 ? dst_channels : 4;
- if (src_count != num_fs * src_channels) {
- unsigned ds = src_count / (num_fs * src_channels);
+ if (src_count != num_fullblock_fs * src_channels) {
+ unsigned ds = src_count / (num_fullblock_fs * src_channels);
row_type.length /= ds;
fs_type.length = row_type.length;
}
@@ -1685,8 +1730,18 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
dst_type.length = block_width;
}
- load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
- dst, dst_type, dst_count, dst_alignment);
+ if (is_1d) {
+ load_unswizzled_block(gallivm, color_ptr, stride, block_width, 1,
+ dst, dst_type, dst_count / 4, dst_alignment);
+ for (i = dst_count / 4; i < dst_count; i++) {
+ dst[i] = lp_build_undef(gallivm, dst_type);
+ }
+
+ }
+ else {
+ load_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
+ dst, dst_type, dst_count, dst_alignment);
+ }
/*
@@ -1761,8 +1816,14 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
/*
* Store blend result to memory
*/
- store_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
- dst, dst_type, dst_count, dst_alignment);
+ if (is_1d) {
+ store_unswizzled_block(gallivm, color_ptr, stride, block_width, 1,
+ dst, dst_type, dst_count / 4, dst_alignment);
+ }
+ else {
+ store_unswizzled_block(gallivm, color_ptr, stride, block_width, block_height,
+ dst, dst_type, dst_count, dst_alignment);
+ }
if (do_branch) {
lp_build_mask_end(&mask_ctx);
@@ -1855,7 +1916,6 @@ generate_fragment(struct llvmpipe_context *lp,
fs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
fs_type.width = 32; /* 32-bit float */
fs_type.length = MIN2(lp_native_vector_width / 32, 16); /* n*4 elements per vector */
- num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
memset(&blend_type, 0, sizeof blend_type);
blend_type.floating = FALSE; /* values are integers */
@@ -1944,6 +2004,11 @@ generate_fragment(struct llvmpipe_context *lp,
/* code generated texture sampling */
sampler = lp_llvm_sampler_soa_create(key->state, context_ptr);
+ num_fs = 16 / fs_type.length; /* number of loops per 4x4 stamp */
+ /* for 1d resources only run "upper half" of stamp */
+ if (key->resource_1d)
+ num_fs /= 2;
+
{
LLVMValueRef num_loop = lp_build_const_int32(gallivm, num_fs);
LLVMTypeRef mask_type = lp_build_int_vec_type(gallivm, fs_type);
@@ -2533,6 +2598,9 @@ make_variant_key(struct llvmpipe_context *lp,
key->zsbuf_format = zsbuf_format;
memcpy(&key->stencil, &lp->depth_stencil->stencil, sizeof key->stencil);
}
+ if (llvmpipe_resource_is_1d(lp->framebuffer.zsbuf->texture)) {
+ key->resource_1d = TRUE;
+ }
}
/* alpha test only applies if render buffer 0 is non-integer (or does not exist) */
@@ -2570,6 +2638,15 @@ make_variant_key(struct llvmpipe_context *lp,
key->cbuf_format[i] = format;
+ /*
+ * Figure out if this is a 1d resource. Note that OpenGL allows crazy
+ * mixing of 2d textures with height 1 and 1d textures, so make sure
+ * we pick 1d if any cbuf or zsbuf is 1d.
+ */
+ if (llvmpipe_resource_is_1d(lp->framebuffer.cbufs[0]->texture)) {
+ key->resource_1d = TRUE;
+ }
+
format_desc = util_format_description(format);
assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB ||
format_desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.h b/src/gallium/drivers/llvmpipe/lp_state_fs.h
index c8dc1c3..3314090 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.h
@@ -75,6 +75,7 @@ struct lp_fragment_shader_variant_key
unsigned nr_sampler_views:8; /* actually derivable from just the shader */
unsigned flatshade:1;
unsigned occlusion_count:1;
+ unsigned resource_1d:1;
enum pipe_format zsbuf_format;
enum pipe_format cbuf_format[PIPE_MAX_COLOR_BUFS];
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index 22f952c..f1a1ed0 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -83,22 +83,30 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
/* Row stride and image stride */
{
- unsigned alignment, nblocksx, nblocksy, block_size;
+ unsigned align_x, align_y, nblocksx, nblocksy, block_size;
/* For non-compressed formats we need 4x4 pixel alignment
- * (for now). We also want cache line size in x direction,
+ * so we can read/write LP_RASTER_BLOCK_SIZE when rendering to them.
+ * We also want cache line size in x direction,
* otherwise same cache line could end up in multiple threads.
- * XXX this blows up 1d/1d array textures by a factor of 4.
+ * For explicit 1d resources however we reduce this to 4x1 and
+ * handle specially in render output code (as we need to do special
+ * handling there for buffers in any case).
*/
if (util_format_is_compressed(pt->format))
- alignment = 1;
- else
- alignment = LP_RASTER_BLOCK_SIZE;
+ align_x = align_y = 1;
+ else {
+ align_x = LP_RASTER_BLOCK_SIZE;
+ if (llvmpipe_resource_is_1d(&lpr->base))
+ align_y = 1;
+ else
+ align_y = LP_RASTER_BLOCK_SIZE;
+ }
nblocksx = util_format_get_nblocksx(pt->format,
- align(width, alignment));
+ align(width, align_x));
nblocksy = util_format_get_nblocksy(pt->format,
- align(height, alignment));
+ align(height, align_y));
block_size = util_format_get_blocksize(pt->format);
if (util_format_is_compressed(pt->format))
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.h b/src/gallium/drivers/llvmpipe/lp_texture.h
index faba6f2..e73d449 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.h
+++ b/src/gallium/drivers/llvmpipe/lp_texture.h
@@ -159,6 +159,27 @@ llvmpipe_resource_is_texture(const struct pipe_resource *resource)
}
+static INLINE boolean
+llvmpipe_resource_is_1d(const struct pipe_resource *resource)
+{
+ switch (resource->target) {
+ case PIPE_BUFFER:
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_1D_ARRAY:
+ return TRUE;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_RECT:
+ case PIPE_TEXTURE_3D:
+ case PIPE_TEXTURE_CUBE:
+ return FALSE;
+ default:
+ assert(0);
+ return FALSE;
+ }
+}
+
+
static INLINE unsigned
llvmpipe_resource_stride(struct pipe_resource *resource,
unsigned level)