diff options
author | Chih-Wei Huang <cwhuang@linux.org.tw> | 2017-01-09 00:30:27 +0800 |
---|---|---|
committer | Chih-Wei Huang <cwhuang@linux.org.tw> | 2017-01-09 00:30:27 +0800 |
commit | e123ad753c152491c07a5cc6d346f62624b07949 (patch) | |
tree | 9095413f68d7d91a7dfc1b18ead20ef5616b43d6 /src/mesa/drivers | |
parent | dffabc025cca524fecb7a03d0f65ec9c628025f8 (diff) | |
parent | c8ece92ded9337b9ed60aa9568b41313025a1406 (diff) | |
download | external_mesa3d-e123ad753c152491c07a5cc6d346f62624b07949.zip external_mesa3d-e123ad753c152491c07a5cc6d346f62624b07949.tar.gz external_mesa3d-e123ad753c152491c07a5cc6d346f62624b07949.tar.bz2 |
Merge remote-tracking branch 'mesa/13.0' into nougat-x86
Diffstat (limited to 'src/mesa/drivers')
-rw-r--r-- | src/mesa/drivers/dri/i965/Makefile.am | 7 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.cpp | 10 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs.h | 6 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp | 19 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp | 8 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/intel_mipmap_tree.c | 2 | ||||
-rw-r--r-- | src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp | 213 |
7 files changed, 248 insertions, 17 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile.am b/src/mesa/drivers/dri/i965/Makefile.am index a192fc0..4b00977 100644 --- a/src/mesa/drivers/dri/i965/Makefile.am +++ b/src/mesa/drivers/dri/i965/Makefile.am @@ -106,6 +106,7 @@ TEST_LIBS = \ TESTS = \ test_fs_cmod_propagation \ + test_fs_copy_propagation \ test_fs_saturate_propagation \ test_eu_compact \ test_vf_float_conversions \ @@ -121,6 +122,12 @@ test_fs_cmod_propagation_LDADD = \ $(top_builddir)/src/gtest/libgtest.la \ $(TEST_LIBS) +test_fs_copy_propagation_SOURCES = \ + test_fs_copy_propagation.cpp +test_fs_copy_propagation_LDADD = \ + $(top_builddir)/src/gtest/libgtest.la \ + $(TEST_LIBS) + test_fs_saturate_propagation_SOURCES = \ test_fs_saturate_propagation.cpp test_fs_saturate_propagation_LDADD = \ diff --git a/src/mesa/drivers/dri/i965/brw_fs.cpp b/src/mesa/drivers/dri/i965/brw_fs.cpp index afb1057..c4cbf84 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs.cpp @@ -5692,7 +5692,7 @@ fs_visitor::optimize() OPT(opt_algebraic); OPT(opt_cse); - OPT(opt_copy_propagate); + OPT(opt_copy_propagation); OPT(opt_predicated_break, this); OPT(opt_cmod_propagation); OPT(dead_code_eliminate); @@ -5716,7 +5716,7 @@ fs_visitor::optimize() } if (OPT(lower_d2x)) { - OPT(opt_copy_propagate); + OPT(opt_copy_propagation); OPT(dead_code_eliminate); } @@ -5728,12 +5728,12 @@ fs_visitor::optimize() OPT(lower_logical_sends); if (progress) { - OPT(opt_copy_propagate); + OPT(opt_copy_propagation); /* Only run after logical send lowering because it's easier to implement * in terms of physical sends. */ if (OPT(opt_zero_samples)) - OPT(opt_copy_propagate); + OPT(opt_copy_propagation); /* Run after logical send lowering to give it a chance to CSE the * LOAD_PAYLOAD instructions created to construct the payloads of * e.g. texturing messages in cases where it wasn't possible to CSE the @@ -5762,7 +5762,7 @@ fs_visitor::optimize() if (devinfo->gen <= 5 && OPT(lower_minmax)) { OPT(opt_cmod_propagation); OPT(opt_cse); - OPT(opt_copy_propagate); + OPT(opt_copy_propagation); OPT(dead_code_eliminate); } diff --git a/src/mesa/drivers/dri/i965/brw_fs.h b/src/mesa/drivers/dri/i965/brw_fs.h index da01174..3a53768 100644 --- a/src/mesa/drivers/dri/i965/brw_fs.h +++ b/src/mesa/drivers/dri/i965/brw_fs.h @@ -133,11 +133,11 @@ public: bool opt_redundant_discard_jumps(); bool opt_cse(); bool opt_cse_local(bblock_t *block); - bool opt_copy_propagate(); + bool opt_copy_propagation(); bool try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry); bool try_constant_propagate(fs_inst *inst, acp_entry *entry); - bool opt_copy_propagate_local(void *mem_ctx, bblock_t *block, - exec_list *acp); + bool opt_copy_propagation_local(void *mem_ctx, bblock_t *block, + exec_list *acp); bool opt_drop_redundant_mov_to_flags(); bool opt_register_renaming(); bool register_coalesce(); diff --git a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp index e4e6816..da02fb1 100644 --- a/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp +++ b/src/mesa/drivers/dri/i965/brw_fs_copy_propagation.cpp @@ -129,7 +129,7 @@ fs_copy_prop_dataflow::fs_copy_prop_dataflow(void *mem_ctx, cfg_t *cfg, foreach_in_list(acp_entry, entry, &out_acp[block->num][i]) { acp[next_acp] = entry; - /* opt_copy_propagate_local populates out_acp with copies created + /* opt_copy_propagation_local populates out_acp with copies created * in a block which are still live at the end of the block. This * is exactly what we want in the COPY set. */ @@ -431,7 +431,9 @@ fs_visitor::try_copy_propagate(fs_inst *inst, int arg, acp_entry *entry) if (entry->saturate) { switch(inst->opcode) { case BRW_OPCODE_SEL: - if (inst->src[1].file != IMM || + if ((inst->conditional_mod != BRW_CONDITIONAL_GE && + inst->conditional_mod != BRW_CONDITIONAL_L) || + inst->src[1].file != IMM || inst->src[1].f < 0.0 || inst->src[1].f > 1.0) { return false; @@ -735,8 +737,8 @@ can_propagate_from(fs_inst *inst) * list. */ bool -fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, - exec_list *acp) +fs_visitor::opt_copy_propagation_local(void *copy_prop_ctx, bblock_t *block, + exec_list *acp) { bool progress = false; @@ -819,7 +821,7 @@ fs_visitor::opt_copy_propagate_local(void *copy_prop_ctx, bblock_t *block, } bool -fs_visitor::opt_copy_propagate() +fs_visitor::opt_copy_propagation() { bool progress = false; void *copy_prop_ctx = ralloc_context(NULL); @@ -832,8 +834,8 @@ fs_visitor::opt_copy_propagate() * the set of copies available at the end of the block. */ foreach_block (block, cfg) { - progress = opt_copy_propagate_local(copy_prop_ctx, block, - out_acp[block->num]) || progress; + progress = opt_copy_propagation_local(copy_prop_ctx, block, + out_acp[block->num]) || progress; } /* Do dataflow analysis for those available copies. */ @@ -852,7 +854,8 @@ fs_visitor::opt_copy_propagate() } } - progress = opt_copy_propagate_local(copy_prop_ctx, block, in_acp) || progress; + progress = opt_copy_propagation_local(copy_prop_ctx, block, in_acp) || + progress; } for (int i = 0; i < cfg->num_blocks; i++) diff --git a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp index b0ee289..ac200d2 100644 --- a/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp +++ b/src/mesa/drivers/dri/i965/brw_vec4_gs_visitor.cpp @@ -780,7 +780,13 @@ brw_compile_gs(const struct brw_compiler *compiler, void *log_data, if (compiler->devinfo->gen >= 8) output_size_bytes += 32; - assert(output_size_bytes >= 1); + /* Shaders can technically set max_vertices = 0, at which point we + * may have a URB size of 0 bytes. Nothing good can come from that, + * so enforce a minimum size. + */ + if (output_size_bytes == 0) + output_size_bytes = 1; + unsigned max_output_size_bytes = GEN7_MAX_GS_URB_ENTRY_SIZE_BYTES; if (compiler->devinfo->gen == 6) max_output_size_bytes = GEN6_MAX_GS_URB_ENTRY_SIZE_BYTES; diff --git a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c index aba203a..78c7a11 100644 --- a/src/mesa/drivers/dri/i965/intel_mipmap_tree.c +++ b/src/mesa/drivers/dri/i965/intel_mipmap_tree.c @@ -2159,6 +2159,8 @@ intel_miptree_make_shareable(struct brw_context *brw, intel_miptree_release(&mt->mcs_mt); mt->fast_clear_state = INTEL_FAST_CLEAR_STATE_NO_MCS; } + + mt->disable_aux_buffers = true; } diff --git a/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp b/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp new file mode 100644 index 0000000..ed2f1e0 --- /dev/null +++ b/src/mesa/drivers/dri/i965/test_fs_copy_propagation.cpp @@ -0,0 +1,213 @@ +/* + * Copyright © 2016 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + */ + +#include <gtest/gtest.h> +#include "brw_fs.h" +#include "brw_cfg.h" +#include "program/program.h" + +using namespace brw; + +class copy_propagation_test : public ::testing::Test { + virtual void SetUp(); + +public: + struct brw_compiler *compiler; + struct gen_device_info *devinfo; + struct gl_context *ctx; + struct brw_wm_prog_data *prog_data; + struct gl_shader_program *shader_prog; + fs_visitor *v; +}; + +class copy_propagation_fs_visitor : public fs_visitor +{ +public: + copy_propagation_fs_visitor(struct brw_compiler *compiler, + struct brw_wm_prog_data *prog_data, + nir_shader *shader) + : fs_visitor(compiler, NULL, NULL, NULL, + &prog_data->base, (struct gl_program *) NULL, + shader, 8, -1) {} +}; + + +void copy_propagation_test::SetUp() +{ + ctx = (struct gl_context *)calloc(1, sizeof(*ctx)); + compiler = (struct brw_compiler *)calloc(1, sizeof(*compiler)); + devinfo = (struct gen_device_info *)calloc(1, sizeof(*devinfo)); + compiler->devinfo = devinfo; + + prog_data = ralloc(NULL, struct brw_wm_prog_data); + nir_shader *shader = + nir_shader_create(NULL, MESA_SHADER_FRAGMENT, NULL); + + v = new copy_propagation_fs_visitor(compiler, prog_data, shader); + + devinfo->gen = 4; +} + +static fs_inst * +instruction(bblock_t *block, int num) +{ + fs_inst *inst = (fs_inst *)block->start(); + for (int i = 0; i < num; i++) { + inst = (fs_inst *)inst->next; + } + return inst; +} + +static bool +copy_propagation(fs_visitor *v) +{ + const bool print = getenv("TEST_DEBUG"); + + if (print) { + fprintf(stderr, "= Before =\n"); + v->cfg->dump(v); + } + + bool ret = v->opt_copy_propagation(); + + if (print) { + fprintf(stderr, "\n= After =\n"); + v->cfg->dump(v); + } + + return ret; +} + +TEST_F(copy_propagation_test, basic) +{ + const fs_builder &bld = v->bld; + fs_reg vgrf0 = v->vgrf(glsl_type::float_type); + fs_reg vgrf1 = v->vgrf(glsl_type::float_type); + fs_reg vgrf2 = v->vgrf(glsl_type::float_type); + fs_reg vgrf3 = v->vgrf(glsl_type::float_type); + bld.MOV(vgrf0, vgrf2); + bld.ADD(vgrf1, vgrf0, vgrf3); + + /* = Before = + * + * 0: mov(8) vgrf0 vgrf2 + * 1: add(8) vgrf1 vgrf0 vgrf3 + * + * = After = + * 0: mov(8) vgrf0 vgrf2 + * 1: add(8) vgrf1 vgrf2 vgrf3 + */ + + v->calculate_cfg(); + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_TRUE(copy_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + fs_inst *mov = instruction(block0, 0); + EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode); + EXPECT_TRUE(mov->dst.equals(vgrf0)); + EXPECT_TRUE(mov->src[0].equals(vgrf2)); + + fs_inst *add = instruction(block0, 1); + EXPECT_EQ(BRW_OPCODE_ADD, add->opcode); + EXPECT_TRUE(add->dst.equals(vgrf1)); + EXPECT_TRUE(add->src[0].equals(vgrf2)); + EXPECT_TRUE(add->src[1].equals(vgrf3)); +} + +TEST_F(copy_propagation_test, maxmax_sat_imm) +{ + const fs_builder &bld = v->bld; + fs_reg vgrf0 = v->vgrf(glsl_type::float_type); + fs_reg vgrf1 = v->vgrf(glsl_type::float_type); + fs_reg vgrf2 = v->vgrf(glsl_type::float_type); + + static const struct { + enum brw_conditional_mod conditional_mod; + float immediate; + bool expected_result; + } test[] = { + /* conditional mod, imm, expected_result */ + { BRW_CONDITIONAL_GE , 0.1f, true }, + { BRW_CONDITIONAL_L , 0.1f, true }, + { BRW_CONDITIONAL_GE , 0.5f, true }, + { BRW_CONDITIONAL_L , 0.5f, true }, + { BRW_CONDITIONAL_GE , 0.9f, true }, + { BRW_CONDITIONAL_L , 0.9f, true }, + { BRW_CONDITIONAL_GE , -1.5f, false }, + { BRW_CONDITIONAL_L , -1.5f, false }, + { BRW_CONDITIONAL_GE , 1.5f, false }, + { BRW_CONDITIONAL_L , 1.5f, false }, + + { BRW_CONDITIONAL_NONE, 0.5f, false }, + { BRW_CONDITIONAL_Z , 0.5f, false }, + { BRW_CONDITIONAL_NZ , 0.5f, false }, + { BRW_CONDITIONAL_G , 0.5f, false }, + { BRW_CONDITIONAL_LE , 0.5f, false }, + { BRW_CONDITIONAL_R , 0.5f, false }, + { BRW_CONDITIONAL_O , 0.5f, false }, + { BRW_CONDITIONAL_U , 0.5f, false }, + }; + + for (unsigned i = 0; i < sizeof(test) / sizeof(test[0]); i++) { + fs_inst *mov = set_saturate(true, bld.MOV(vgrf0, vgrf1)); + fs_inst *sel = set_condmod(test[i].conditional_mod, + bld.SEL(vgrf2, vgrf0, + brw_imm_f(test[i].immediate))); + + v->calculate_cfg(); + + bblock_t *block0 = v->cfg->blocks[0]; + + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_EQ(test[i].expected_result, copy_propagation(v)); + EXPECT_EQ(0, block0->start_ip); + EXPECT_EQ(1, block0->end_ip); + + EXPECT_EQ(BRW_OPCODE_MOV, mov->opcode); + EXPECT_TRUE(mov->saturate); + EXPECT_TRUE(mov->dst.equals(vgrf0)); + EXPECT_TRUE(mov->src[0].equals(vgrf1)); + + EXPECT_EQ(BRW_OPCODE_SEL, sel->opcode); + EXPECT_EQ(test[i].conditional_mod, sel->conditional_mod); + EXPECT_EQ(test[i].expected_result, sel->saturate); + EXPECT_TRUE(sel->dst.equals(vgrf2)); + if (test[i].expected_result) { + EXPECT_TRUE(sel->src[0].equals(vgrf1)); + } else { + EXPECT_TRUE(sel->src[0].equals(vgrf0)); + } + EXPECT_TRUE(sel->src[1].equals(brw_imm_f(test[i].immediate))); + + delete v->cfg; + v->cfg = NULL; + } +} |