diff options
author | Vadim Girlin <vadimgirlin@gmail.com> | 2013-04-30 20:51:36 +0400 |
---|---|---|
committer | Vadim Girlin <vadimgirlin@gmail.com> | 2013-04-30 21:50:47 +0400 |
commit | 2cd769179345799d383f92dd615991755ec24be1 (patch) | |
tree | 9863c9e92e645cad35a861b7de76f0c98d64b0d0 /src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp | |
parent | fbb065d629d2f79a6224fc3e5e89d5acc275e3b4 (diff) | |
download | external_mesa3d-2cd769179345799d383f92dd615991755ec24be1.zip external_mesa3d-2cd769179345799d383f92dd615991755ec24be1.tar.gz external_mesa3d-2cd769179345799d383f92dd615991755ec24be1.tar.bz2 |
r600g/sb: initial commit of the optimizing shader backend
Diffstat (limited to 'src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp')
-rw-r--r-- | src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp | 608 |
1 files changed, 608 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp b/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp new file mode 100644 index 0000000..52e7668 --- /dev/null +++ b/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp @@ -0,0 +1,608 @@ +/* + * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com> + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Vadim Girlin + */ + +#define RA_DEBUG 0 + +#if RA_DEBUG +#define RA_DUMP(q) do { q } while (0) +#else +#define RA_DUMP(q) +#endif + +#include "sb_shader.h" +#include "sb_pass.h" + +namespace r600_sb { + +using std::cerr; + +int ra_coalesce::run() { + + sh.coal.run(); + + return 0; +} + +void coalescer::add_edge(value* a, value* b, unsigned cost) { + assert(a->is_sgpr() && b->is_sgpr()); + edges.insert(new ra_edge(a,b, cost)); +} + +void coalescer::create_chunk(value *v) { + + assert(v->is_sgpr()); + + ra_chunk *c = new ra_chunk(); + + c->values.push_back(v); + + if (v->is_chan_pinned()) + c->flags |= RCF_PIN_CHAN; + if (v->is_reg_pinned()) { + c->flags |= RCF_PIN_REG; + } + + c->pin = v->pin_gpr; + + RA_DUMP( + cerr << "create_chunk: "; + dump_chunk(c); + ); + + all_chunks.push_back(c); + v->chunk = c; + +} + +void coalescer::unify_chunks(ra_edge *e) { + ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk; + + RA_DUMP( + cerr << "unify_chunks: "; + dump_chunk(c1); + dump_chunk(c2); + ); + + if (c2->is_chan_pinned() && !c1->is_chan_pinned()) { + c1->flags |= RCF_PIN_CHAN; + c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan()); + } + + if (c2->is_reg_pinned() && !c1->is_reg_pinned()) { + c1->flags |= RCF_PIN_REG; + c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan()); + } + + c1->values.reserve(c1->values.size() + c2->values.size()); + + for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E; + ++I) { + (*I)->chunk = c1; + c1->values.push_back(*I); + } + + chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2); + assert(F != all_chunks.end()); + + all_chunks.erase(F); + + c1->cost += c2->cost + e->cost; + delete c2; +} + +bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) { + unsigned pin_flags = (c1->flags & c2->flags) & + (RCF_PIN_CHAN | RCF_PIN_REG); + + if ((pin_flags & RCF_PIN_CHAN) && + c1->pin.chan() != c2->pin.chan()) + return true; + + if ((pin_flags & RCF_PIN_REG) && + c1->pin.sel() != c2->pin.sel()) + return true; + + for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E; + ++I) { + value *v1 = *I; + + for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E; + ++I) { + value *v2 = *I; + + if (!v1->v_equal(v2) && v1->interferences.contains(v2)) + return true; + } + } + return false; +} + +void coalescer::build_chunks() { + + for (edge_queue::iterator I = edges.begin(), E = edges.end(); + I != E; ++I) { + + ra_edge *e = *I; + + if (!e->a->chunk) + create_chunk(e->a); + + if (!e->b->chunk) + create_chunk(e->b); + + ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk; + + if (c1 == c2) { + c1->cost += e->cost; + } else if (!chunks_interference(c1, c2)) + unify_chunks(e); + } +} + +ra_constraint* coalescer::create_constraint(constraint_kind kind) { + ra_constraint *c = new ra_constraint(kind); + all_constraints.push_back(c); + return c; +} + +void coalescer::dump_edges() { + cerr << "######## affinity edges\n"; + + for (edge_queue::iterator I = edges.begin(), E = edges.end(); + I != E; ++I) { + ra_edge* e = *I; + cerr << " ra_edge "; + dump::dump_val(e->a); + cerr << " <-> "; + dump::dump_val(e->b); + cerr << " cost = " << e->cost << "\n"; + } +} + +void coalescer::dump_chunks() { + cerr << "######## chunks\n"; + + for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end(); + I != E; ++I) { + ra_chunk* c = *I; + dump_chunk(c); + } +} + + +void coalescer::dump_constraint_queue() { + cerr << "######## constraints\n"; + + for (constraint_queue::iterator I = constraints.begin(), + E = constraints.end(); I != E; ++I) { + ra_constraint* c = *I; + dump_constraint(c); + } +} + +void coalescer::dump_chunk(ra_chunk* c) { + cerr << " ra_chunk cost = " << c->cost << " : "; + dump::dump_vec(c->values); + + if (c->flags & RCF_PIN_REG) + cerr << " REG = " << c->pin.sel(); + + if (c->flags & RCF_PIN_CHAN) + cerr << " CHAN = " << c->pin.chan(); + + cerr << (c->flags & RCF_GLOBAL ? " GLOBAL" : ""); + + cerr << "\n"; +} + +void coalescer::dump_constraint(ra_constraint* c) { + cerr << " ra_constraint: "; + switch (c->kind) { + case CK_PACKED_BS: cerr << "PACKED_BS"; break; + case CK_PHI: cerr << "PHI"; break; + case CK_SAME_REG: cerr << "SAME_REG"; break; + default: cerr << "UNKNOWN_KIND"; assert(0); break; + } + + cerr << " cost = " << c->cost << " : "; + dump::dump_vec(c->values); + + cerr << "\n"; +} + +void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) { + + for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E; + ++I) { + value *v = *I; + s.add_set(v->interferences); + } + s.remove_vec(c->values); +} + +void coalescer::build_chunk_queue() { + for (chunk_vec::iterator I = all_chunks.begin(), + E = all_chunks.end(); I != E; ++I) { + ra_chunk *c = *I; + + if (!c->is_fixed()) + chunks.insert(c); + } +} + +void coalescer::build_constraint_queue() { + for (constraint_vec::iterator I = all_constraints.begin(), + E = all_constraints.end(); I != E; ++I) { + ra_constraint *c = *I; + unsigned cost = 0; + + if (c->values.empty() || !c->values.front()->is_sgpr()) + continue; + + if (c->kind != CK_SAME_REG) + continue; + + for (vvec::iterator I = c->values.begin(), E = c->values.end(); + I != E; ++I) { + value *v = *I; + if (!v->chunk) + create_chunk(v); + else + cost += v->chunk->cost; + } + c->cost = cost; + constraints.insert(c); + } +} + +void coalescer::color_chunks() { + + for (chunk_queue::iterator I = chunks.begin(), E = chunks.end(); + I != E; ++I) { + ra_chunk *c = *I; + if (c->is_fixed() || c->values.size() == 1) + continue; + + sb_bitset rb; + val_set interf; + + get_chunk_interferences(c, interf); + + RA_DUMP( + cerr << "color_chunks: "; + dump_chunk(c); + cerr << "\n interferences: "; + dump::dump_set(sh,interf); + cerr << "\n"; + ); + + init_reg_bitset(rb, interf); + + unsigned pass = c->is_reg_pinned() ? 0 : 1; + + unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0; + unsigned ce = c->is_chan_pinned() ? cs + 1 : 4; + + unsigned color = 0; + + while (pass < 2) { + + unsigned rs, re; + + if (pass == 0) { + rs = c->pin.sel(); + re = rs + 1; + } else { + rs = 0; + re = sh.num_nontemp_gpr(); + } + + for (unsigned reg = rs; reg < re; ++reg) { + for (unsigned chan = cs; chan < ce; ++chan) { + unsigned bit = sel_chan(reg, chan); + if (bit >= rb.size() || !rb.get(bit)) { + color = bit; + break; + } + } + if (color) + break; + } + + if (color) + break; + + ++pass; + } + + assert(color); + color_chunk(c, color); + } +} + +void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) { + + for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) { + value *v = *I; + + if (!v->is_sgpr()) + continue; + + if (v->gpr) { + if (v->gpr >= bs.size()) + bs.resize(v->gpr + 64); + bs.set(v->gpr, 1); + } + } +} + +void coalescer::color_chunk(ra_chunk *c, sel_chan color) { + + vvec vv = c->values; + + for (vvec::iterator I = vv.begin(), E = vv.end(); I != E; + ++I) { + value *v = *I; + + if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) { + detach_value(v); + continue; + } + + if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) { + detach_value(v); + continue; + } + + v->gpr = color; + + if (v->constraint && v->constraint->kind == CK_PHI) + v->fix(); + + + RA_DUMP( + cerr << " assigned " << color << " to "; + dump::dump_val(v); + cerr << "\n"; + ); + } + + c->pin = color; + + if (c->is_reg_pinned()) { + c->fix(); + } +} + +coalescer::~coalescer() { + + // FIXME use pool allocator ?? + + for (constraint_vec::iterator I = all_constraints.begin(), + E = all_constraints.end(); I != E; ++I) { + delete (*I); + } + + for (chunk_vec::iterator I = all_chunks.begin(), + E = all_chunks.end(); I != E; ++I) { + delete (*I); + } + + for (edge_queue::iterator I = edges.begin(), E = edges.end(); + I != E; ++I) { + delete (*I); + } +} + +void coalescer::run() { + RA_DUMP( dump_edges(); ); + + build_chunks(); + RA_DUMP( dump_chunks(); ); + + build_constraint_queue(); + RA_DUMP( dump_constraint_queue(); ); + + color_constraints(); + + build_chunk_queue(); + color_chunks(); +} + +void coalescer::color_phi_constraint(ra_constraint* c) { +} + +ra_chunk* coalescer::detach_value(value *v) { + + vvec::iterator F = std::find(v->chunk->values.begin(), + v->chunk->values.end(), v); + + assert(F != v->chunk->values.end()); + v->chunk->values.erase(F); + create_chunk(v); + + if (v->is_reg_pinned()) { + v->chunk->fix(); + } + + RA_DUMP( + cerr << " detached : "; + dump_chunk(v->chunk); + ); + + return v->chunk; + +} + +void coalescer::color_reg_constraint(ra_constraint *c) { + unsigned k, cnt = c->values.size(); + vvec & cv = c->values; + + ra_chunk *ch[4]; + unsigned swz[4] = {0, 1, 2, 3}; + val_set interf[4]; + sb_bitset rb[4]; + + bool reg_pinned = false; + unsigned pin_reg = ~0; + + unsigned chan_mask = 0; + + k = 0; + for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) { + value *v = *I; + + if (!v->chunk) + create_chunk(v); + + ch[k] = v->chunk; + + if (v->chunk->is_chan_pinned()) { + unsigned chan = 1 << v->chunk->pin.chan(); + + if (chan & chan_mask) { // channel already in use + ch[k] = detach_value(v); + assert(!ch[k]->is_chan_pinned()); + } else { + chan_mask |= chan; + } + } + + if (v->chunk->is_reg_pinned()) { + if (!reg_pinned) { + reg_pinned = true; + pin_reg = v->chunk->pin.sel(); + } + } + + get_chunk_interferences(ch[k], interf[k]); + init_reg_bitset(rb[k], interf[k]); + } + + unsigned start_reg, end_reg; + + start_reg = 0; + end_reg = sh.num_nontemp_gpr(); + + unsigned min_reg = end_reg; + unsigned min_swz[4]; + unsigned i, pass = reg_pinned ? 0 : 1; + + bool done = false; + + while (pass < 2) { + + unsigned rs, re; + + if (pass == 0) { + re = pin_reg + 1; + rs = pin_reg; + } else { + re = end_reg; + rs = start_reg; + } + + min_reg = re; + + // cycle on swizzle combinations + do { + for (i = 0; i < cnt; ++i) { + if (ch[i]->flags & RCF_PIN_CHAN) + if (ch[i]->pin.chan() != swz[i]) + break; + } + if (i != cnt) + continue; + + // looking for minimal reg number such that the constrained chunks + // may be colored with the current swizzle combination + for (unsigned reg = rs; reg < min_reg; ++reg) { + for (i = 0; i < cnt; ++i) { + unsigned bit = sel_chan(reg, swz[i]); + if (bit < rb[i].size() && rb[i].get(bit)) + break; + } + if (i == cnt) { + done = true; + min_reg = reg; + std::copy(swz, swz + 4, min_swz); + break; + } + } + + if (pass == 0 && done) + break; + + } while (std::next_permutation(swz, swz + 4)); + + if (pass == 0 && done) + break; + + ++pass; + }; + + assert(done); + + RA_DUMP( + cerr << "min reg = " << min_reg << " min_swz = " + << min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n"; + ); + + for (i = 0; i < cnt; ++i) { + sel_chan color(min_reg, min_swz[i]); + ra_chunk *cc = ch[i]; + + if (cc->is_fixed()) { + if (cc->pin != color) + cc = detach_value(cv[i]); + else + continue; + } + + color_chunk(cc, color); + cc->fix(); + } +} + +void coalescer::color_constraints() { + for (constraint_queue::iterator I = constraints.begin(), + E = constraints.end(); I != E; ++I) { + + ra_constraint *c = *I; + + RA_DUMP( + cerr << "color_constraints: "; + dump_constraint(c); + ); + + if (c->kind == CK_SAME_REG) + color_reg_constraint(c); + else if (c->kind == CK_PHI) + color_phi_constraint(c); + } +} + +} // namespace r600_sb |