summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2013-05-02 17:44:28 -0700
committerEric Anholt <eric@anholt.net>2013-05-29 10:20:26 -0700
commit0a0b32319372027a872364f1fcc7b3a5c2e96510 (patch)
treee3269210b445721d54fa710b41fe215b75dff14e /src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
parent9a31c4f9ace9ed115e54df8bc769ea9748f25d39 (diff)
downloadexternal_mesa3d-0a0b32319372027a872364f1fcc7b3a5c2e96510.zip
external_mesa3d-0a0b32319372027a872364f1fcc7b3a5c2e96510.tar.gz
external_mesa3d-0a0b32319372027a872364f1fcc7b3a5c2e96510.tar.bz2
i965/fs: Fix test for smearing enabled on an instruction.
We were expanding the live range too far, breaking register_coalesce_2() and compute_to_mrf() on 16-wide shaders. Turning it back on improves GLB2.7 performance by 0.239355% +/- 0.0850649% (n=398). shader-db stats are: total instructions in shared programs: 1627211 -> 1609262 (-1.10%) instructions in affected programs: 450351 -> 432402 (-3.99%) While 33 new 16-wide shaders are gained, 70 are lost. Despite that, tropics (the app that lost the most 16-wide) shows a .41% +/- .16% (n=7/8, first-run outlier removed) performance improvement on my HSW. Reviewed-by: Kenneth Graunke <kenneth@whitecape.org>
Diffstat (limited to 'src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp')
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
index 3daf8fa..f5daab2 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_live_variables.cpp
@@ -216,7 +216,7 @@ fs_visitor::calculate_live_intervals()
* pixel_x/pixel_y, which are registers of 16-bit values and thus
* would get stomped by the first decode as well.
*/
- if (dispatch_width == 16 && (inst->src[i].smear ||
+ if (dispatch_width == 16 && (inst->src[i].smear >= 0 ||
(this->pixel_x.reg == reg ||
this->pixel_y.reg == reg))) {
end_ip++;