summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorRoland Scheidegger <sroland@vmware.com>2013-06-04 03:20:55 +0200
committerRoland Scheidegger <sroland@vmware.com>2013-06-05 00:29:47 +0200
commit008fd036007a9e71f669ca49c87767fd19fd77e1 (patch)
treebcd95f1d8e15cacb9ba5d40d1b005b1b4cf36913
parentffe2a1ca3c097661dd3f6e3ca5cfd72be184426c (diff)
downloadexternal_mesa3d-008fd036007a9e71f669ca49c87767fd19fd77e1.zip
external_mesa3d-008fd036007a9e71f669ca49c87767fd19fd77e1.tar.gz
external_mesa3d-008fd036007a9e71f669ca49c87767fd19fd77e1.tar.bz2
llvmpipe: improve alignment calculation for fetching/storing pixels
This was always doing per-pixel alignment which isn't necessary, except for the buffer case (due to the per-element offset). The disabled code for calculating it was incorrect because it assumed that always the full block would be fetched, which may not be the case, so fix this up. The original code failed for instance for r10g10b10a2 the alignment would have been calculated as 4 (block_width) * 4 (bytes) so 16, but the actual fetch may have only fetched 2 values at a time, hence only alignment 8 - it is unclear what exactly would happen in this case (alignment larger than size to fetch). So just use the (already calculated) fetch size instead and get alignment from that which should always work, no matter if fetching 1,2 or 4 pixels. Reviewed-by: Jose Fonseca <jfonseca@vmware.com>
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c33
1 files changed, 21 insertions, 12 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 260d93c..fc2ba5e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -1384,18 +1384,6 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
fs_mask[i] = lp_build_zero(gallivm, mask_type);
}
- /* Compute the alignment of the destination pointer in bytes */
-#if 0
- dst_alignment = (block_width * out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
-#else
- /* FIXME -- currently we're fetching pixels one by one, instead of row by row */
- dst_alignment = (1 * out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
-#endif
- /* Force power-of-two alignment by extracting only the least-significant-bit */
- dst_alignment = 1 << (ffs(dst_alignment) - 1);
- /* Resource base and stride pointers are aligned to 16 bytes, so that's the maximum alignment we can guarantee */
- dst_alignment = MIN2(dst_alignment, 16);
-
/* Do not bother executing code when mask is empty.. */
if (do_branch) {
check_mask = LLVMConstNull(lp_build_int_vec_type(gallivm, mask_type));
@@ -1730,6 +1718,27 @@ generate_unswizzled_blend(struct gallivm_state *gallivm,
dst_type.length = block_width;
}
+ /*
+ * Compute the alignment of the destination pointer in bytes
+ * We fetch 1-4 pixels, if the format has pot alignment then those fetches
+ * are always aligned by MIN2(16, fetch_width) except for buffers (not
+ * 1d tex but can't distinguish here) so need to stick with per-pixel
+ * alignment in this case.
+ */
+ if (is_1d) {
+ dst_alignment = (out_format_desc->block.bits + 7)/(out_format_desc->block.width * 8);
+ }
+ else {
+ dst_alignment = dst_type.length * dst_type.width / 8;
+ }
+ /* Force power-of-two alignment by extracting only the least-significant-bit */
+ dst_alignment = 1 << (ffs(dst_alignment) - 1);
+ /*
+ * Resource base and stride pointers are aligned to 16 bytes, so that's
+ * the maximum alignment we can guarantee
+ */
+ dst_alignment = MIN2(16, dst_alignment);
+
if (is_1d) {
load_unswizzled_block(gallivm, color_ptr, stride, block_width, 1,
dst, dst_type, dst_count / 4, dst_alignment);