r600g/sb: initial commit of the optimizing shader backend

author: Vadim Girlin <vadimgirlin@gmail.com> 2013-04-30 20:51:36 +0400
committer: Vadim Girlin <vadimgirlin@gmail.com> 2013-04-30 21:50:47 +0400
commit: 2cd769179345799d383f92dd615991755ec24be1 (patch)
tree: 9863c9e92e645cad35a861b7de76f0c98d64b0d0 /src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp
parent: fbb065d629d2f79a6224fc3e5e89d5acc275e3b4 (diff)
download: external_mesa3d-2cd769179345799d383f92dd615991755ec24be1.zip
external_mesa3d-2cd769179345799d383f92dd615991755ec24be1.tar.gz
external_mesa3d-2cd769179345799d383f92dd615991755ec24be1.tar.bz2
1 files changed, 608 insertions, 0 deletions
diff --git a/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp b/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp
new file mode 100644
index 0000000..52e7668
--- /dev/null
+++ b/src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp
@@ -0,0 +1,608 @@
+/*
+ * Copyright 2013 Vadim Girlin <vadimgirlin@gmail.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * on the rights to use, copy, modify, merge, publish, distribute, sub
+ * license, and/or sell copies of the Software, and to permit persons to whom
+ * the Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors:
+ *      Vadim Girlin
+ */
+
+#define RA_DEBUG 0
+
+#if RA_DEBUG
+#define RA_DUMP(q) do { q } while (0)
+#else
+#define RA_DUMP(q)
+#endif
+
+#include "sb_shader.h"
+#include "sb_pass.h"
+
+namespace r600_sb {
+
+using std::cerr;
+
+int ra_coalesce::run() {
+
+	sh.coal.run();
+
+	return 0;
+}
+
+void coalescer::add_edge(value* a, value* b, unsigned cost) {
+	assert(a->is_sgpr() && b->is_sgpr());
+	edges.insert(new ra_edge(a,b, cost));
+}
+
+void coalescer::create_chunk(value *v) {
+
+	assert(v->is_sgpr());
+
+	ra_chunk *c = new ra_chunk();
+
+	c->values.push_back(v);
+
+	if (v->is_chan_pinned())
+		c->flags |= RCF_PIN_CHAN;
+	if (v->is_reg_pinned()) {
+		c->flags |= RCF_PIN_REG;
+	}
+
+	c->pin = v->pin_gpr;
+
+	RA_DUMP(
+		cerr << "create_chunk: ";
+		dump_chunk(c);
+	);
+
+	all_chunks.push_back(c);
+	v->chunk = c;
+
+}
+
+void coalescer::unify_chunks(ra_edge *e) {
+	ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
+
+	RA_DUMP(
+		cerr << "unify_chunks: ";
+		dump_chunk(c1);
+		dump_chunk(c2);
+	);
+
+	if (c2->is_chan_pinned() && !c1->is_chan_pinned()) {
+		c1->flags |= RCF_PIN_CHAN;
+		c1->pin = sel_chan(c1->pin.sel(), c2->pin.chan());
+	}
+
+	if (c2->is_reg_pinned() && !c1->is_reg_pinned()) {
+		c1->flags |= RCF_PIN_REG;
+		c1->pin = sel_chan(c2->pin.sel(), c1->pin.chan());
+	}
+
+	c1->values.reserve(c1->values.size() + c2->values.size());
+
+	for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
+			++I) {
+		(*I)->chunk = c1;
+		c1->values.push_back(*I);
+	}
+
+	chunk_vec::iterator F = std::find(all_chunks.begin(), all_chunks.end(), c2);
+	assert(F != all_chunks.end());
+
+	all_chunks.erase(F);
+
+	c1->cost += c2->cost + e->cost;
+	delete c2;
+}
+
+bool coalescer::chunks_interference(ra_chunk *c1, ra_chunk *c2) {
+	unsigned pin_flags = (c1->flags & c2->flags) &
+			(RCF_PIN_CHAN | RCF_PIN_REG);
+
+	if ((pin_flags & RCF_PIN_CHAN) &&
+			c1->pin.chan() != c2->pin.chan())
+		return true;
+
+	if ((pin_flags & RCF_PIN_REG) &&
+			c1->pin.sel() != c2->pin.sel())
+		return true;
+
+	for (vvec::iterator I = c1->values.begin(), E = c1->values.end(); I != E;
+			++I) {
+		value *v1 = *I;
+
+		for (vvec::iterator I = c2->values.begin(), E = c2->values.end(); I != E;
+				++I) {
+			value *v2 = *I;
+
+			if (!v1->v_equal(v2) && v1->interferences.contains(v2))
+				return true;
+		}
+	}
+	return false;
+}
+
+void coalescer::build_chunks() {
+
+	for (edge_queue::iterator I = edges.begin(), E = edges.end();
+			I != E; ++I) {
+
+		ra_edge *e = *I;
+
+		if (!e->a->chunk)
+			create_chunk(e->a);
+
+		if (!e->b->chunk)
+			create_chunk(e->b);
+
+		ra_chunk *c1 = e->a->chunk, *c2 = e->b->chunk;
+
+		if (c1 == c2) {
+			c1->cost += e->cost;
+		} else if (!chunks_interference(c1, c2))
+			unify_chunks(e);
+	}
+}
+
+ra_constraint* coalescer::create_constraint(constraint_kind kind) {
+	ra_constraint *c = new ra_constraint(kind);
+	all_constraints.push_back(c);
+	return c;
+}
+
+void coalescer::dump_edges() {
+	cerr << "######## affinity edges\n";
+
+	for (edge_queue::iterator I = edges.begin(), E = edges.end();
+			I != E; ++I) {
+		ra_edge* e = *I;
+		cerr << "  ra_edge ";
+		dump::dump_val(e->a);
+		cerr << " <-> ";
+		dump::dump_val(e->b);
+		cerr << "   cost = " << e->cost << "\n";
+	}
+}
+
+void coalescer::dump_chunks() {
+	cerr << "######## chunks\n";
+
+	for (chunk_vec::iterator I = all_chunks.begin(), E = all_chunks.end();
+			I != E; ++I) {
+		ra_chunk* c = *I;
+		dump_chunk(c);
+	}
+}
+
+
+void coalescer::dump_constraint_queue() {
+	cerr << "######## constraints\n";
+
+	for (constraint_queue::iterator I = constraints.begin(),
+			E = constraints.end(); I != E; ++I) {
+		ra_constraint* c = *I;
+		dump_constraint(c);
+	}
+}
+
+void coalescer::dump_chunk(ra_chunk* c) {
+	cerr << "  ra_chunk cost = " << c->cost << "  :  ";
+	dump::dump_vec(c->values);
+
+	if (c->flags & RCF_PIN_REG)
+		cerr << "   REG = " << c->pin.sel();
+
+	if (c->flags & RCF_PIN_CHAN)
+		cerr << "   CHAN = " << c->pin.chan();
+
+	cerr << (c->flags & RCF_GLOBAL ? "  GLOBAL" : "");
+
+	cerr << "\n";
+}
+
+void coalescer::dump_constraint(ra_constraint* c) {
+	cerr << "  ra_constraint: ";
+	switch (c->kind) {
+		case CK_PACKED_BS: cerr << "PACKED_BS"; break;
+		case CK_PHI: cerr << "PHI"; break;
+		case CK_SAME_REG: cerr << "SAME_REG"; break;
+		default: cerr << "UNKNOWN_KIND"; assert(0); break;
+	}
+
+	cerr << "  cost = " << c->cost << "  : ";
+	dump::dump_vec(c->values);
+
+	cerr << "\n";
+}
+
+void coalescer::get_chunk_interferences(ra_chunk *c, val_set &s) {
+
+	for (vvec::iterator I = c->values.begin(), E = c->values.end(); I != E;
+			++I) {
+		value *v = *I;
+		s.add_set(v->interferences);
+	}
+	s.remove_vec(c->values);
+}
+
+void coalescer::build_chunk_queue() {
+	for (chunk_vec::iterator I = all_chunks.begin(),
+			E = all_chunks.end(); I != E; ++I) {
+		ra_chunk *c = *I;
+
+		if (!c->is_fixed())
+			chunks.insert(c);
+	}
+}
+
+void coalescer::build_constraint_queue() {
+	for (constraint_vec::iterator I = all_constraints.begin(),
+			E = all_constraints.end(); I != E; ++I) {
+		ra_constraint *c = *I;
+		unsigned cost = 0;
+
+		if (c->values.empty() || !c->values.front()->is_sgpr())
+			continue;
+
+		if (c->kind != CK_SAME_REG)
+			continue;
+
+		for (vvec::iterator I = c->values.begin(), E = c->values.end();
+				I != E; ++I) {
+			value *v = *I;
+			if (!v->chunk)
+				create_chunk(v);
+			else
+				cost += v->chunk->cost;
+		}
+		c->cost = cost;
+		constraints.insert(c);
+	}
+}
+
+void coalescer::color_chunks() {
+
+	for (chunk_queue::iterator I = chunks.begin(), E = chunks.end();
+			I != E; ++I) {
+		ra_chunk *c = *I;
+		if (c->is_fixed() || c->values.size() == 1)
+			continue;
+
+		sb_bitset rb;
+		val_set interf;
+
+		get_chunk_interferences(c, interf);
+
+		RA_DUMP(
+			cerr << "color_chunks: ";
+			dump_chunk(c);
+			cerr << "\n interferences: ";
+			dump::dump_set(sh,interf);
+			cerr << "\n";
+		);
+
+		init_reg_bitset(rb, interf);
+
+		unsigned pass = c->is_reg_pinned() ? 0 : 1;
+
+		unsigned cs = c->is_chan_pinned() ? c->pin.chan() : 0;
+		unsigned ce = c->is_chan_pinned() ? cs + 1 : 4;
+
+		unsigned color = 0;
+
+		while (pass < 2) {
+
+			unsigned rs, re;
+
+			if (pass == 0) {
+				rs = c->pin.sel();
+				re = rs + 1;
+			} else {
+				rs = 0;
+				re = sh.num_nontemp_gpr();
+			}
+
+			for (unsigned reg = rs; reg < re; ++reg) {
+				for (unsigned chan = cs; chan < ce; ++chan) {
+					unsigned bit = sel_chan(reg, chan);
+					if (bit >= rb.size() || !rb.get(bit)) {
+						color = bit;
+						break;
+					}
+				}
+				if (color)
+					break;
+			}
+
+			if (color)
+				break;
+
+			++pass;
+		}
+
+		assert(color);
+		color_chunk(c, color);
+	}
+}
+
+void coalescer::init_reg_bitset(sb_bitset &bs, val_set &vs) {
+
+	for (val_set::iterator I = vs.begin(sh), E = vs.end(sh); I != E; ++I) {
+		value *v = *I;
+
+		if (!v->is_sgpr())
+			continue;
+
+		if (v->gpr) {
+			if (v->gpr >= bs.size())
+				bs.resize(v->gpr + 64);
+			bs.set(v->gpr, 1);
+		}
+	}
+}
+
+void coalescer::color_chunk(ra_chunk *c, sel_chan color) {
+
+	vvec vv = c->values;
+
+	for (vvec::iterator I = vv.begin(), E = vv.end(); I != E;
+			++I) {
+		value *v = *I;
+
+		if (v->is_reg_pinned() && v->pin_gpr.sel() != color.sel()) {
+			detach_value(v);
+			continue;
+		}
+
+		if (v->is_chan_pinned() && v->pin_gpr.chan() != color.chan()) {
+			detach_value(v);
+			continue;
+		}
+
+		v->gpr = color;
+
+		if (v->constraint && v->constraint->kind == CK_PHI)
+			v->fix();
+
+
+		RA_DUMP(
+			cerr << " assigned " << color << " to ";
+			dump::dump_val(v);
+			cerr << "\n";
+		);
+	}
+
+	c->pin = color;
+
+	if (c->is_reg_pinned()) {
+		c->fix();
+	}
+}
+
+coalescer::~coalescer() {
+
+	// FIXME use pool allocator ??
+
+	for (constraint_vec::iterator I = all_constraints.begin(),
+			E = all_constraints.end(); I != E; ++I) {
+		delete (*I);
+	}
+
+	for (chunk_vec::iterator I = all_chunks.begin(),
+			E = all_chunks.end(); I != E; ++I) {
+		delete (*I);
+	}
+
+	for (edge_queue::iterator I = edges.begin(), E = edges.end();
+			I != E; ++I) {
+		delete (*I);
+	}
+}
+
+void coalescer::run() {
+	RA_DUMP( dump_edges(); );
+
+	build_chunks();
+	RA_DUMP( dump_chunks(); );
+
+	build_constraint_queue();
+	RA_DUMP( dump_constraint_queue(); );
+
+	color_constraints();
+
+	build_chunk_queue();
+	color_chunks();
+}
+
+void coalescer::color_phi_constraint(ra_constraint* c) {
+}
+
+ra_chunk* coalescer::detach_value(value *v) {
+
+	vvec::iterator F = std::find(v->chunk->values.begin(),
+	                             v->chunk->values.end(), v);
+
+	assert(F != v->chunk->values.end());
+	v->chunk->values.erase(F);
+	create_chunk(v);
+
+	if (v->is_reg_pinned()) {
+		v->chunk->fix();
+	}
+
+	RA_DUMP(
+		cerr << "           detached : ";
+		dump_chunk(v->chunk);
+	);
+
+	return v->chunk;
+
+}
+
+void coalescer::color_reg_constraint(ra_constraint *c) {
+	unsigned k, cnt = c->values.size();
+	vvec & cv = c->values;
+
+	ra_chunk *ch[4];
+	unsigned swz[4] = {0, 1, 2, 3};
+	val_set interf[4];
+	sb_bitset rb[4];
+
+	bool reg_pinned = false;
+	unsigned pin_reg = ~0;
+
+	unsigned chan_mask = 0;
+
+	k = 0;
+	for (vvec::iterator I = cv.begin(), E = cv.end(); I != E; ++I, ++k) {
+		value *v = *I;
+
+		if (!v->chunk)
+			create_chunk(v);
+
+		ch[k] = v->chunk;
+
+		if (v->chunk->is_chan_pinned()) {
+			unsigned chan = 1 << v->chunk->pin.chan();
+
+			if (chan & chan_mask) { // channel already in use
+				ch[k] = detach_value(v);
+				assert(!ch[k]->is_chan_pinned());
+			} else {
+				chan_mask |= chan;
+			}
+		}
+
+		if (v->chunk->is_reg_pinned()) {
+			if (!reg_pinned) {
+				reg_pinned = true;
+				pin_reg = v->chunk->pin.sel();
+			}
+		}
+
+		get_chunk_interferences(ch[k], interf[k]);
+		init_reg_bitset(rb[k], interf[k]);
+	}
+
+	unsigned start_reg, end_reg;
+
+	start_reg = 0;
+	end_reg = sh.num_nontemp_gpr();
+
+	unsigned min_reg = end_reg;
+	unsigned min_swz[4];
+	unsigned i, pass = reg_pinned ? 0 : 1;
+
+	bool done = false;
+
+	while (pass < 2) {
+
+		unsigned rs, re;
+
+		if (pass == 0) {
+			re = pin_reg + 1;
+			rs = pin_reg;
+		} else {
+			re = end_reg;
+			rs = start_reg;
+		}
+
+		min_reg = re;
+
+		// cycle on swizzle combinations
+		do {
+			for (i = 0; i < cnt; ++i) {
+				if (ch[i]->flags & RCF_PIN_CHAN)
+					if (ch[i]->pin.chan() != swz[i])
+						break;
+			}
+			if (i != cnt)
+				continue;
+
+			// looking for minimal reg number such that the constrained chunks
+			// may be colored with the current swizzle combination
+			for (unsigned reg = rs; reg < min_reg; ++reg) {
+				for (i = 0; i < cnt; ++i) {
+					unsigned bit = sel_chan(reg, swz[i]);
+					if (bit < rb[i].size() && rb[i].get(bit))
+						break;
+				}
+				if (i == cnt) {
+					done = true;
+					min_reg = reg;
+					std::copy(swz, swz + 4, min_swz);
+					break;
+				}
+			}
+
+			if (pass == 0 && done)
+				break;
+
+		} while (std::next_permutation(swz, swz + 4));
+
+		if (pass == 0 && done)
+			break;
+
+		++pass;
+	};
+
+	assert(done);
+
+	RA_DUMP(
+	cerr << "min reg = " << min_reg << "   min_swz = "
+			<< min_swz[0] << min_swz[1] << min_swz[2] << min_swz[3] << "\n";
+	);
+
+	for (i = 0; i < cnt; ++i) {
+		sel_chan color(min_reg, min_swz[i]);
+		ra_chunk *cc = ch[i];
+
+		if (cc->is_fixed()) {
+			if (cc->pin != color)
+				cc = detach_value(cv[i]);
+			else
+				continue;
+		}
+
+		color_chunk(cc, color);
+		cc->fix();
+	}
+}
+
+void coalescer::color_constraints() {
+	for (constraint_queue::iterator I = constraints.begin(),
+			E = constraints.end(); I != E; ++I) {
+
+		ra_constraint *c = *I;
+
+		RA_DUMP(
+			cerr << "color_constraints: ";
+			dump_constraint(c);
+		);
+
+		if (c->kind == CK_SAME_REG)
+			color_reg_constraint(c);
+		else if (c->kind == CK_PHI)
+			color_phi_constraint(c);
+	}
+}
+
+} // namespace r600_sb
author	Vadim Girlin <vadimgirlin@gmail.com>	2013-04-30 20:51:36 +0400
committer	Vadim Girlin <vadimgirlin@gmail.com>	2013-04-30 21:50:47 +0400
commit	2cd769179345799d383f92dd615991755ec24be1 (patch)
tree	9863c9e92e645cad35a861b7de76f0c98d64b0d0 /src/gallium/drivers/r600/sb/sb_ra_coalesce.cpp
parent	fbb065d629d2f79a6224fc3e5e89d5acc275e3b4 (diff)
download	external_mesa3d-2cd769179345799d383f92dd615991755ec24be1.zip external_mesa3d-2cd769179345799d383f92dd615991755ec24be1.tar.gz external_mesa3d-2cd769179345799d383f92dd615991755ec24be1.tar.bz2