summaryrefslogtreecommitdiffstats
path: root/src/compiler/nir
diff options
context:
space:
mode:
Diffstat (limited to 'src/compiler/nir')
-rw-r--r--src/compiler/nir/glsl_to_nir.cpp25
-rw-r--r--src/compiler/nir/nir.c33
-rw-r--r--src/compiler/nir/nir.h77
-rw-r--r--src/compiler/nir/nir_algebraic.py31
-rw-r--r--src/compiler/nir/nir_builder.h62
-rw-r--r--src/compiler/nir/nir_clone.c20
-rw-r--r--src/compiler/nir/nir_constant_expressions.h2
-rw-r--r--src/compiler/nir/nir_constant_expressions.py246
-rw-r--r--src/compiler/nir/nir_from_ssa.c6
-rw-r--r--src/compiler/nir/nir_gs_count_vertices.c4
-rw-r--r--src/compiler/nir/nir_instr_set.c24
-rw-r--r--src/compiler/nir/nir_lower_alu_to_scalar.c11
-rw-r--r--src/compiler/nir/nir_lower_atomics.c12
-rw-r--r--src/compiler/nir/nir_lower_clip.c2
-rw-r--r--src/compiler/nir/nir_lower_indirect_derefs.c6
-rw-r--r--src/compiler/nir/nir_lower_io.c6
-rw-r--r--src/compiler/nir/nir_lower_load_const_to_scalar.c2
-rw-r--r--src/compiler/nir/nir_lower_locals_to_regs.c9
-rw-r--r--src/compiler/nir/nir_lower_phis_to_scalar.c10
-rw-r--r--src/compiler/nir/nir_lower_system_values.c6
-rw-r--r--src/compiler/nir/nir_lower_tex.c8
-rw-r--r--src/compiler/nir/nir_lower_two_sided_color.c2
-rw-r--r--src/compiler/nir/nir_lower_var_copies.c5
-rw-r--r--src/compiler/nir/nir_lower_vars_to_ssa.c5
-rw-r--r--src/compiler/nir/nir_opcodes.py138
-rw-r--r--src/compiler/nir/nir_opt_algebraic.py151
-rw-r--r--src/compiler/nir/nir_opt_constant_folding.c31
-rw-r--r--src/compiler/nir/nir_opt_dead_cf.c2
-rw-r--r--src/compiler/nir/nir_opt_peephole_select.c3
-rw-r--r--src/compiler/nir/nir_phi_builder.c65
-rw-r--r--src/compiler/nir/nir_phi_builder.h33
-rw-r--r--src/compiler/nir/nir_print.c4
-rw-r--r--src/compiler/nir/nir_repair_ssa.c3
-rw-r--r--src/compiler/nir/nir_search.c254
-rw-r--r--src/compiler/nir/nir_search.h14
-rw-r--r--src/compiler/nir/nir_to_ssa.c8
-rw-r--r--src/compiler/nir/nir_validate.c42
-rw-r--r--src/compiler/nir/spirv/spirv_to_nir.c36
-rw-r--r--src/compiler/nir/spirv/vtn_glsl450.c4
-rw-r--r--src/compiler/nir/spirv/vtn_variables.c11
40 files changed, 1028 insertions, 385 deletions
diff --git a/src/compiler/nir/glsl_to_nir.cpp b/src/compiler/nir/glsl_to_nir.cpp
index da5d730..7b8b466 100644
--- a/src/compiler/nir/glsl_to_nir.cpp
+++ b/src/compiler/nir/glsl_to_nir.cpp
@@ -731,7 +731,7 @@ nir_visitor::visit(ir_call *ir)
ir_dereference *param =
(ir_dereference *) ir->actual_parameters.get_head();
instr->variables[0] = evaluate_deref(&instr->instr, param);
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
}
@@ -765,7 +765,7 @@ nir_visitor::visit(ir_call *ir)
const nir_intrinsic_info *info =
&nir_intrinsic_infos[instr->intrinsic];
nir_ssa_dest_init(&instr->instr, &instr->dest,
- info->dest_components, NULL);
+ info->dest_components, 32, NULL);
}
if (op == nir_intrinsic_image_size ||
@@ -826,7 +826,7 @@ nir_visitor::visit(ir_call *ir)
nir_builder_instr_insert(&b, &instr->instr);
break;
case nir_intrinsic_shader_clock:
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
case nir_intrinsic_store_ssbo: {
@@ -867,7 +867,7 @@ nir_visitor::visit(ir_call *ir)
/* Setup destination register */
nir_ssa_dest_init(&instr->instr, &instr->dest,
- type->vector_elements, NULL);
+ type->vector_elements, 32, NULL);
/* Insert the created nir instruction now since in the case of boolean
* result we will need to emit another instruction after it
@@ -890,7 +890,7 @@ nir_visitor::visit(ir_call *ir)
load_ssbo_compare->src[1].swizzle[i] = 0;
nir_ssa_dest_init(&load_ssbo_compare->instr,
&load_ssbo_compare->dest.dest,
- type->vector_elements, NULL);
+ type->vector_elements, 32, NULL);
load_ssbo_compare->dest.write_mask = (1 << type->vector_elements) - 1;
nir_builder_instr_insert(&b, &load_ssbo_compare->instr);
dest = &load_ssbo_compare->dest.dest;
@@ -936,7 +936,7 @@ nir_visitor::visit(ir_call *ir)
/* Atomic result */
assert(ir->return_deref);
nir_ssa_dest_init(&instr->instr, &instr->dest,
- ir->return_deref->type->vector_elements, NULL);
+ ir->return_deref->type->vector_elements, 32, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
}
@@ -951,8 +951,9 @@ nir_visitor::visit(ir_call *ir)
instr->num_components = type->vector_elements;
/* Setup destination register */
+ unsigned bit_size = glsl_get_bit_size(type->base_type);
nir_ssa_dest_init(&instr->instr, &instr->dest,
- type->vector_elements, NULL);
+ type->vector_elements, bit_size, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
@@ -1013,8 +1014,10 @@ nir_visitor::visit(ir_call *ir)
/* Atomic result */
assert(ir->return_deref);
+ unsigned bit_size = glsl_get_bit_size(ir->return_deref->type->base_type);
nir_ssa_dest_init(&instr->instr, &instr->dest,
- ir->return_deref->type->vector_elements, NULL);
+ ir->return_deref->type->vector_elements,
+ bit_size, NULL);
nir_builder_instr_insert(&b, &instr->instr);
break;
}
@@ -1061,6 +1064,9 @@ nir_visitor::visit(ir_assignment *ir)
{
unsigned num_components = ir->lhs->type->vector_elements;
+ b.exact = ir->lhs->variable_referenced()->data.invariant ||
+ ir->lhs->variable_referenced()->data.precise;
+
if ((ir->rhs->as_dereference() || ir->rhs->as_constant()) &&
(ir->write_mask == (1 << num_components) - 1 || ir->write_mask == 0)) {
/* We're doing a plain-as-can-be copy, so emit a copy_var */
@@ -1163,7 +1169,7 @@ nir_visitor::add_instr(nir_instr *instr, unsigned num_components)
nir_dest *dest = get_instr_dest(instr);
if (dest)
- nir_ssa_dest_init(instr, dest, num_components, NULL);
+ nir_ssa_dest_init(instr, dest, num_components, 32, NULL);
nir_builder_instr_insert(&b, instr);
@@ -1203,6 +1209,7 @@ nir_visitor::visit(ir_expression *ir)
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(this->shader, nir_intrinsic_load_ubo);
load->num_components = ir->type->vector_elements;
+ load->dest.ssa.bit_size = glsl_get_bit_size(ir->type->base_type);
load->src[0] = nir_src_for_ssa(evaluate_rvalue(ir->operands[0]));
load->src[1] = nir_src_for_ssa(evaluate_rvalue(ir->operands[1]));
add_instr(&load->instr, ir->type->vector_elements);
diff --git a/src/compiler/nir/nir.c b/src/compiler/nir/nir.c
index 7e41ed3..b67916d 100644
--- a/src/compiler/nir/nir.c
+++ b/src/compiler/nir/nir.c
@@ -70,6 +70,7 @@ reg_create(void *mem_ctx, struct exec_list *list)
list_inithead(&reg->if_uses);
reg->num_components = 0;
+ reg->bit_size = 32;
reg->num_array_elems = 0;
reg->is_packed = false;
reg->name = NULL;
@@ -473,7 +474,7 @@ nir_load_const_instr_create(nir_shader *shader, unsigned num_components)
nir_load_const_instr *instr = ralloc(shader, nir_load_const_instr);
instr_init(&instr->instr, nir_instr_type_load_const);
- nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
return instr;
}
@@ -562,7 +563,7 @@ nir_ssa_undef_instr_create(nir_shader *shader, unsigned num_components)
nir_ssa_undef_instr *instr = ralloc(shader, nir_ssa_undef_instr);
instr_init(&instr->instr, nir_instr_type_ssa_undef);
- nir_ssa_def_init(&instr->instr, &instr->def, num_components, NULL);
+ nir_ssa_def_init(&instr->instr, &instr->def, num_components, 32, NULL);
return instr;
}
@@ -699,10 +700,10 @@ nir_deref_get_const_initializer_load(nir_shader *shader, nir_deref_var *deref)
case GLSL_TYPE_FLOAT:
case GLSL_TYPE_INT:
case GLSL_TYPE_UINT:
- load->value.u[i] = constant->value.u[matrix_offset + i];
+ load->value.u32[i] = constant->value.u[matrix_offset + i];
break;
case GLSL_TYPE_BOOL:
- load->value.u[i] = constant->value.b[matrix_offset + i] ?
+ load->value.u32[i] = constant->value.b[matrix_offset + i] ?
NIR_TRUE : NIR_FALSE;
break;
default:
@@ -731,18 +732,11 @@ reduce_cursor(nir_cursor cursor)
{
switch (cursor.option) {
case nir_cursor_before_block:
+ assert(nir_cf_node_prev(&cursor.block->cf_node) == NULL ||
+ nir_cf_node_prev(&cursor.block->cf_node)->type != nir_cf_node_block);
if (exec_list_is_empty(&cursor.block->instr_list)) {
/* Empty block. After is as good as before. */
cursor.option = nir_cursor_after_block;
- } else {
- /* Try to switch to after the previous block if there is one.
- * (This isn't likely, but it can happen.)
- */
- nir_cf_node *prev_node = nir_cf_node_prev(&cursor.block->cf_node);
- if (prev_node && prev_node->type == nir_cf_node_block) {
- cursor.block = nir_cf_node_as_block(prev_node);
- cursor.option = nir_cursor_after_block;
- }
}
return cursor;
@@ -1379,15 +1373,18 @@ nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest, nir_dest new_dest)
src_add_all_uses(dest->reg.indirect, instr, NULL);
}
+/* note: does *not* take ownership of 'name' */
void
nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
- unsigned num_components, const char *name)
+ unsigned num_components,
+ unsigned bit_size, const char *name)
{
- def->name = name;
+ def->name = ralloc_strdup(instr, name);
def->parent_instr = instr;
list_inithead(&def->uses);
list_inithead(&def->if_uses);
def->num_components = num_components;
+ def->bit_size = bit_size;
if (instr->block) {
nir_function_impl *impl =
@@ -1399,12 +1396,14 @@ nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
}
}
+/* note: does *not* take ownership of 'name' */
void
nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
- unsigned num_components, const char *name)
+ unsigned num_components, unsigned bit_size,
+ const char *name)
{
dest->is_ssa = true;
- nir_ssa_def_init(instr, &dest->ssa, num_components, name);
+ nir_ssa_def_init(instr, &dest->ssa, num_components, bit_size, name);
}
void
diff --git a/src/compiler/nir/nir.h b/src/compiler/nir/nir.h
index ab1afdb..2fd75ec 100644
--- a/src/compiler/nir/nir.h
+++ b/src/compiler/nir/nir.h
@@ -101,6 +101,7 @@ union nir_constant_data {
int i[16];
float f[16];
bool b[16];
+ double d[16];
};
typedef struct nir_constant {
@@ -381,6 +382,9 @@ typedef struct nir_register {
unsigned num_components; /** < number of vector components */
unsigned num_array_elems; /** < size of array (0 for no array) */
+ /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
+ uint8_t bit_size;
+
/** generic register index. */
unsigned index;
@@ -488,6 +492,9 @@ typedef struct nir_ssa_def {
struct list_head if_uses;
uint8_t num_components;
+
+ /* The bit-size of each channel; must be one of 8, 16, 32, or 64 */
+ uint8_t bit_size;
} nir_ssa_def;
struct nir_src;
@@ -594,6 +601,18 @@ nir_dest_for_reg(nir_register *reg)
return dest;
}
+static inline unsigned
+nir_src_bit_size(nir_src src)
+{
+ return src.is_ssa ? src.ssa->bit_size : src.reg.reg->bit_size;
+}
+
+static inline unsigned
+nir_dest_bit_size(nir_dest dest)
+{
+ return dest.is_ssa ? dest.ssa.bit_size : dest.reg.reg->bit_size;
+}
+
void nir_src_copy(nir_src *dest, const nir_src *src, void *instr_or_if);
void nir_dest_copy(nir_dest *dest, const nir_dest *src, nir_instr *instr);
@@ -649,9 +668,36 @@ typedef enum {
nir_type_float,
nir_type_int,
nir_type_uint,
- nir_type_bool
+ nir_type_bool,
+ nir_type_bool32 = 32 | nir_type_bool,
+ nir_type_int8 = 8 | nir_type_int,
+ nir_type_int16 = 16 | nir_type_int,
+ nir_type_int32 = 32 | nir_type_int,
+ nir_type_int64 = 64 | nir_type_int,
+ nir_type_uint8 = 8 | nir_type_uint,
+ nir_type_uint16 = 16 | nir_type_uint,
+ nir_type_uint32 = 32 | nir_type_uint,
+ nir_type_uint64 = 64 | nir_type_uint,
+ nir_type_float16 = 16 | nir_type_float,
+ nir_type_float32 = 32 | nir_type_float,
+ nir_type_float64 = 64 | nir_type_float,
} nir_alu_type;
+#define NIR_ALU_TYPE_SIZE_MASK 0xfffffff8
+#define NIR_ALU_TYPE_BASE_TYPE_MASK 0x00000007
+
+static inline unsigned
+nir_alu_type_get_type_size(nir_alu_type type)
+{
+ return type & NIR_ALU_TYPE_SIZE_MASK;
+}
+
+static inline unsigned
+nir_alu_type_get_base_type(nir_alu_type type)
+{
+ return type & NIR_ALU_TYPE_BASE_TYPE_MASK;
+}
+
typedef enum {
NIR_OP_IS_COMMUTATIVE = (1 << 0),
NIR_OP_IS_ASSOCIATIVE = (1 << 1),
@@ -708,6 +754,17 @@ extern const nir_op_info nir_op_infos[nir_num_opcodes];
typedef struct nir_alu_instr {
nir_instr instr;
nir_op op;
+
+ /** Indicates that this ALU instruction generates an exact value
+ *
+ * This is kind of a mixture of GLSL "precise" and "invariant" and not
+ * really equivalent to either. This indicates that the value generated by
+ * this operation is high-precision and any code transformations that touch
+ * it must ensure that the resulting value is bit-for-bit identical to the
+ * original.
+ */
+ bool exact;
+
nir_alu_dest dest;
nir_alu_src src[];
} nir_alu_instr;
@@ -1218,9 +1275,12 @@ nir_tex_instr_src_index(nir_tex_instr *instr, nir_tex_src_type type)
typedef struct {
union {
- float f[4];
- int32_t i[4];
- uint32_t u[4];
+ float f32[4];
+ double f64[4];
+ int32_t i32[4];
+ uint32_t u32[4];
+ int64_t i64[4];
+ uint64_t u64[4];
};
} nir_const_value;
@@ -2061,9 +2121,11 @@ void nir_instr_rewrite_dest(nir_instr *instr, nir_dest *dest,
nir_dest new_dest);
void nir_ssa_dest_init(nir_instr *instr, nir_dest *dest,
- unsigned num_components, const char *name);
+ unsigned num_components, unsigned bit_size,
+ const char *name);
void nir_ssa_def_init(nir_instr *instr, nir_ssa_def *def,
- unsigned num_components, const char *name);
+ unsigned num_components, unsigned bit_size,
+ const char *name);
void nir_ssa_def_rewrite_uses(nir_ssa_def *def, nir_src new_src);
void nir_ssa_def_rewrite_uses_after(nir_ssa_def *def, nir_src new_src,
nir_instr *after_me);
@@ -2094,9 +2156,10 @@ void nir_index_blocks(nir_function_impl *impl);
void nir_print_shader(nir_shader *shader, FILE *fp);
void nir_print_instr(const nir_instr *instr, FILE *fp);
-nir_shader * nir_shader_clone(void *mem_ctx, const nir_shader *s);
+nir_shader *nir_shader_clone(void *mem_ctx, const nir_shader *s);
nir_function_impl *nir_function_impl_clone(const nir_function_impl *fi);
nir_constant *nir_constant_clone(const nir_constant *c, nir_variable *var);
+nir_variable *nir_variable_clone(const nir_variable *c, nir_shader *shader);
#ifdef DEBUG
void nir_validate_shader(nir_shader *shader);
diff --git a/src/compiler/nir/nir_algebraic.py b/src/compiler/nir/nir_algebraic.py
index 2357b57..d05564f 100644
--- a/src/compiler/nir/nir_algebraic.py
+++ b/src/compiler/nir/nir_algebraic.py
@@ -63,12 +63,13 @@ class Value(object):
static const ${val.c_type} ${val.name} = {
{ ${val.type_enum} },
% if isinstance(val, Constant):
- { ${hex(val)} /* ${val.value} */ },
+ ${val.type()}, { ${hex(val)} /* ${val.value} */ },
% elif isinstance(val, Variable):
${val.index}, /* ${val.var_name} */
${'true' if val.is_constant else 'false'},
- nir_type_${ val.required_type or 'invalid' },
+ ${val.type() or 'nir_type_invalid' },
% elif isinstance(val, Expression):
+ ${'true' if val.inexact else 'false'},
nir_op_${val.opcode},
{ ${', '.join(src.c_ptr for src in val.sources)} },
% endif
@@ -107,10 +108,18 @@ class Constant(Value):
if isinstance(self.value, (int, long)):
return hex(self.value)
elif isinstance(self.value, float):
- return hex(struct.unpack('I', struct.pack('f', self.value))[0])
+ return hex(struct.unpack('Q', struct.pack('d', self.value))[0])
else:
assert False
+ def type(self):
+ if isinstance(self.value, (bool)):
+ return "nir_type_bool32"
+ elif isinstance(self.value, (int, long)):
+ return "nir_type_int"
+ elif isinstance(self.value, float):
+ return "nir_type_float"
+
_var_name_re = re.compile(r"(?P<const>#)?(?P<name>\w+)(?:@(?P<type>\w+))?")
class Variable(Value):
@@ -129,12 +138,26 @@ class Variable(Value):
self.index = varset[self.var_name]
+ def type(self):
+ if self.required_type == 'bool':
+ return "nir_type_bool32"
+ elif self.required_type in ('int', 'unsigned'):
+ return "nir_type_int"
+ elif self.required_type == 'float':
+ return "nir_type_float"
+
+_opcode_re = re.compile(r"(?P<inexact>~)?(?P<opcode>\w+)")
+
class Expression(Value):
def __init__(self, expr, name_base, varset):
Value.__init__(self, name_base, "expression")
assert isinstance(expr, tuple)
- self.opcode = expr[0]
+ m = _opcode_re.match(expr[0])
+ assert m and m.group('opcode') is not None
+
+ self.opcode = m.group('opcode')
+ self.inexact = m.group('inexact') is not None
self.sources = [ Value.create(src, "{0}_{1}".format(name_base, i), varset)
for (i, src) in enumerate(expr[1:]) ]
diff --git a/src/compiler/nir/nir_builder.h b/src/compiler/nir/nir_builder.h
index b4dde54..94f183c 100644
--- a/src/compiler/nir/nir_builder.h
+++ b/src/compiler/nir/nir_builder.h
@@ -31,6 +31,9 @@ struct exec_list;
typedef struct nir_builder {
nir_cursor cursor;
+ /* Whether new ALU instructions will be marked "exact" */
+ bool exact;
+
nir_shader *shader;
nir_function_impl *impl;
} nir_builder;
@@ -39,6 +42,7 @@ static inline void
nir_builder_init(nir_builder *build, nir_function_impl *impl)
{
memset(build, 0, sizeof(*build));
+ build->exact = false;
build->impl = impl;
build->shader = impl->function->shader;
}
@@ -50,6 +54,7 @@ nir_builder_init_simple_shader(nir_builder *build, void *mem_ctx,
{
build->shader = nir_shader_create(mem_ctx, stage, options);
nir_function *func = nir_function_create(build->shader, "main");
+ build->exact = false;
build->impl = nir_function_impl_create(func);
build->cursor = nir_after_cf_list(&build->impl->body);
}
@@ -104,7 +109,7 @@ nir_imm_float(nir_builder *build, float x)
nir_const_value v;
memset(&v, 0, sizeof(v));
- v.f[0] = x;
+ v.f32[0] = x;
return nir_build_imm(build, 1, v);
}
@@ -115,10 +120,10 @@ nir_imm_vec4(nir_builder *build, float x, float y, float z, float w)
nir_const_value v;
memset(&v, 0, sizeof(v));
- v.f[0] = x;
- v.f[1] = y;
- v.f[2] = z;
- v.f[3] = w;
+ v.f32[0] = x;
+ v.f32[1] = y;
+ v.f32[2] = z;
+ v.f32[3] = w;
return nir_build_imm(build, 4, v);
}
@@ -129,7 +134,7 @@ nir_imm_int(nir_builder *build, int x)
nir_const_value v;
memset(&v, 0, sizeof(v));
- v.i[0] = x;
+ v.i32[0] = x;
return nir_build_imm(build, 1, v);
}
@@ -140,10 +145,10 @@ nir_imm_ivec4(nir_builder *build, int x, int y, int z, int w)
nir_const_value v;
memset(&v, 0, sizeof(v));
- v.i[0] = x;
- v.i[1] = y;
- v.i[2] = z;
- v.i[3] = w;
+ v.i32[0] = x;
+ v.i32[1] = y;
+ v.i32[2] = z;
+ v.i32[3] = w;
return nir_build_imm(build, 4, v);
}
@@ -157,6 +162,8 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
if (!instr)
return NULL;
+ instr->exact = build->exact;
+
instr->src[0].src = nir_src_for_ssa(src0);
if (src1)
instr->src[1].src = nir_src_for_ssa(src1);
@@ -178,6 +185,25 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
}
assert(num_components != 0);
+ /* Figure out the bitwidth based on the source bitwidth if the instruction
+ * is variable-width.
+ */
+ unsigned bit_size = nir_alu_type_get_type_size(op_info->output_type);
+ if (bit_size == 0) {
+ for (unsigned i = 0; i < op_info->num_inputs; i++) {
+ unsigned src_bit_size = instr->src[i].src.ssa->bit_size;
+ if (nir_alu_type_get_type_size(op_info->input_types[i]) == 0) {
+ if (bit_size)
+ assert(src_bit_size == bit_size);
+ else
+ bit_size = src_bit_size;
+ } else {
+ assert(src_bit_size ==
+ nir_alu_type_get_type_size(op_info->input_types[i]));
+ }
+ }
+ }
+
/* Make sure we don't swizzle from outside of our source vector (like if a
* scalar value was passed into a multiply with a vector).
*/
@@ -187,7 +213,8 @@ nir_build_alu(nir_builder *build, nir_op op, nir_ssa_def *src0,
}
}
- nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
+ bit_size, NULL);
instr->dest.write_mask = (1 << num_components) - 1;
nir_builder_instr_insert(build, &instr->instr);
@@ -252,7 +279,9 @@ static inline nir_ssa_def *
nir_fmov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
{
nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_fmov);
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,
+ nir_src_bit_size(src.src), NULL);
+ mov->exact = build->exact;
mov->dest.write_mask = (1 << num_components) - 1;
mov->src[0] = src;
nir_builder_instr_insert(build, &mov->instr);
@@ -264,7 +293,9 @@ static inline nir_ssa_def *
nir_imov_alu(nir_builder *build, nir_alu_src src, unsigned num_components)
{
nir_alu_instr *mov = nir_alu_instr_create(build->shader, nir_op_imov);
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, num_components,
+ nir_src_bit_size(src.src), NULL);
+ mov->exact = build->exact;
mov->dest.write_mask = (1 << num_components) - 1;
mov->src[0] = src;
nir_builder_instr_insert(build, &mov->instr);
@@ -360,7 +391,8 @@ nir_load_var(nir_builder *build, nir_variable *var)
nir_intrinsic_instr_create(build->shader, nir_intrinsic_load_var);
load->num_components = num_components;
load->variables[0] = nir_deref_var_create(load, var);
- nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components,
+ glsl_get_bit_size(glsl_get_base_type(var->type)), NULL);
nir_builder_instr_insert(build, &load->instr);
return &load->dest.ssa;
}
@@ -426,7 +458,7 @@ nir_load_system_value(nir_builder *build, nir_intrinsic_op op, int index)
load->num_components = nir_intrinsic_infos[op].dest_components;
load->const_index[0] = index;
nir_ssa_dest_init(&load->instr, &load->dest,
- nir_intrinsic_infos[op].dest_components, NULL);
+ nir_intrinsic_infos[op].dest_components, 32, NULL);
nir_builder_instr_insert(build, &load->instr);
return &load->dest.ssa;
}
diff --git a/src/compiler/nir/nir_clone.c b/src/compiler/nir/nir_clone.c
index 3268deb..7d2e383 100644
--- a/src/compiler/nir/nir_clone.c
+++ b/src/compiler/nir/nir_clone.c
@@ -127,11 +127,10 @@ nir_constant_clone(const nir_constant *c, nir_variable *nvar)
/* NOTE: for cloning nir_variable's, bypass nir_variable_create to avoid
* having to deal with locals and globals separately:
*/
-static nir_variable *
-clone_variable(clone_state *state, const nir_variable *var)
+nir_variable *
+nir_variable_clone(const nir_variable *var, nir_shader *shader)
{
- nir_variable *nvar = rzalloc(state->ns, nir_variable);
- add_remap(state, nvar, var);
+ nir_variable *nvar = rzalloc(shader, nir_variable);
nvar->type = var->type;
nvar->name = ralloc_strdup(nvar, var->name);
@@ -149,6 +148,15 @@ clone_variable(clone_state *state, const nir_variable *var)
return nvar;
}
+static nir_variable *
+clone_variable(clone_state *state, const nir_variable *var)
+{
+ nir_variable *nvar = nir_variable_clone(var, state->ns);
+ add_remap(state, nvar, var);
+
+ return nvar;
+}
+
/* clone list of nir_variable: */
static void
clone_var_list(clone_state *state, struct exec_list *dst,
@@ -220,7 +228,8 @@ __clone_dst(clone_state *state, nir_instr *ninstr,
{
ndst->is_ssa = dst->is_ssa;
if (dst->is_ssa) {
- nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components, dst->ssa.name);
+ nir_ssa_dest_init(ninstr, ndst, dst->ssa.num_components,
+ dst->ssa.bit_size, dst->ssa.name);
add_remap(state, &ndst->ssa, &dst->ssa);
} else {
ndst->reg.reg = remap_reg(state, dst->reg.reg);
@@ -303,6 +312,7 @@ static nir_alu_instr *
clone_alu(clone_state *state, const nir_alu_instr *alu)
{
nir_alu_instr *nalu = nir_alu_instr_create(state->ns, alu->op);
+ nalu->exact = alu->exact;
__clone_dst(state, &nalu->instr, &nalu->dest.dest, &alu->dest.dest);
nalu->dest.saturate = alu->dest.saturate;
diff --git a/src/compiler/nir/nir_constant_expressions.h b/src/compiler/nir/nir_constant_expressions.h
index 97997f2..201f278 100644
--- a/src/compiler/nir/nir_constant_expressions.h
+++ b/src/compiler/nir/nir_constant_expressions.h
@@ -28,4 +28,4 @@
#include "nir.h"
nir_const_value nir_eval_const_opcode(nir_op op, unsigned num_components,
- nir_const_value *src);
+ unsigned bit_size, nir_const_value *src);
diff --git a/src/compiler/nir/nir_constant_expressions.py b/src/compiler/nir/nir_constant_expressions.py
index 32784f6..e36dc48 100644
--- a/src/compiler/nir/nir_constant_expressions.py
+++ b/src/compiler/nir/nir_constant_expressions.py
@@ -1,4 +1,43 @@
#! /usr/bin/python2
+
+def type_has_size(type_):
+ return type_[-1:].isdigit()
+
+def type_sizes(type_):
+ if type_.endswith("8"):
+ return [8]
+ elif type_.endswith("16"):
+ return [16]
+ elif type_.endswith("32"):
+ return [32]
+ elif type_.endswith("64"):
+ return [64]
+ else:
+ return [32, 64]
+
+def type_add_size(type_, size):
+ if type_has_size(type_):
+ return type_
+ return type_ + str(size)
+
+def get_const_field(type_):
+ if type_ == "int32":
+ return "i32"
+ if type_ == "uint32":
+ return "u32"
+ if type_ == "int64":
+ return "i64"
+ if type_ == "uint64":
+ return "u64"
+ if type_ == "bool32":
+ return "u32"
+ if type_ == "float32":
+ return "f32"
+ if type_ == "float64":
+ return "f64"
+ raise Exception(str(type_))
+ assert(0)
+
template = """\
/*
* Copyright (C) 2014 Intel Corporation
@@ -205,110 +244,140 @@ unpack_half_1x16(uint16_t u)
}
/* Some typed vector structures to make things like src0.y work */
-% for type in ["float", "int", "uint", "bool"]:
-struct ${type}_vec {
- ${type} x;
- ${type} y;
- ${type} z;
- ${type} w;
+typedef float float32_t;
+typedef double float64_t;
+typedef bool bool32_t;
+% for type in ["float", "int", "uint"]:
+% for width in [32, 64]:
+struct ${type}${width}_vec {
+ ${type}${width}_t x;
+ ${type}${width}_t y;
+ ${type}${width}_t z;
+ ${type}${width}_t w;
};
% endfor
+% endfor
+
+struct bool32_vec {
+ bool x;
+ bool y;
+ bool z;
+ bool w;
+};
% for name, op in sorted(opcodes.iteritems()):
static nir_const_value
-evaluate_${name}(unsigned num_components, nir_const_value *_src)
+evaluate_${name}(unsigned num_components, unsigned bit_size,
+ nir_const_value *_src)
{
nir_const_value _dst_val = { { {0, 0, 0, 0} } };
- ## For each non-per-component input, create a variable srcN that
- ## contains x, y, z, and w elements which are filled in with the
- ## appropriately-typed values.
- % for j in range(op.num_inputs):
- % if op.input_sizes[j] == 0:
- <% continue %>
- % elif "src" + str(j) not in op.const_expr:
- ## Avoid unused variable warnings
- <% continue %>
- %endif
-
- struct ${op.input_types[j]}_vec src${j} = {
- % for k in range(op.input_sizes[j]):
- % if op.input_types[j] == "bool":
- _src[${j}].u[${k}] != 0,
- % else:
- _src[${j}].${op.input_types[j][:1]}[${k}],
- % endif
- % endfor
- };
- % endfor
+ switch (bit_size) {
+ % for bit_size in [32, 64]:
+ case ${bit_size}: {
+ <%
+ output_type = type_add_size(op.output_type, bit_size)
+ input_types = [type_add_size(type_, bit_size) for type_ in op.input_types]
+ %>
+
+ ## For each non-per-component input, create a variable srcN that
+ ## contains x, y, z, and w elements which are filled in with the
+ ## appropriately-typed values.
+ % for j in range(op.num_inputs):
+ % if op.input_sizes[j] == 0:
+ <% continue %>
+ % elif "src" + str(j) not in op.const_expr:
+ ## Avoid unused variable warnings
+ <% continue %>
+ %endif
- % if op.output_size == 0:
- ## For per-component instructions, we need to iterate over the
- ## components and apply the constant expression one component
- ## at a time.
- for (unsigned _i = 0; _i < num_components; _i++) {
- ## For each per-component input, create a variable srcN that
- ## contains the value of the current (_i'th) component.
- % for j in range(op.num_inputs):
- % if op.input_sizes[j] != 0:
- <% continue %>
- % elif "src" + str(j) not in op.const_expr:
- ## Avoid unused variable warnings
- <% continue %>
- % elif op.input_types[j] == "bool":
- bool src${j} = _src[${j}].u[_i] != 0;
+ struct ${input_types[j]}_vec src${j} = {
+ % for k in range(op.input_sizes[j]):
+ % if input_types[j] == "bool32":
+ _src[${j}].u32[${k}] != 0,
% else:
- ${op.input_types[j]} src${j} = _src[${j}].${op.input_types[j][:1]}[_i];
+ _src[${j}].${get_const_field(input_types[j])}[${k}],
% endif
% endfor
+ };
+ % endfor
+
+ % if op.output_size == 0:
+ ## For per-component instructions, we need to iterate over the
+ ## components and apply the constant expression one component
+ ## at a time.
+ for (unsigned _i = 0; _i < num_components; _i++) {
+ ## For each per-component input, create a variable srcN that
+ ## contains the value of the current (_i'th) component.
+ % for j in range(op.num_inputs):
+ % if op.input_sizes[j] != 0:
+ <% continue %>
+ % elif "src" + str(j) not in op.const_expr:
+ ## Avoid unused variable warnings
+ <% continue %>
+ % elif input_types[j] == "bool32":
+ bool src${j} = _src[${j}].u32[_i] != 0;
+ % else:
+ ${input_types[j]}_t src${j} =
+ _src[${j}].${get_const_field(input_types[j])}[_i];
+ % endif
+ % endfor
+
+ ## Create an appropriately-typed variable dst and assign the
+ ## result of the const_expr to it. If const_expr already contains
+ ## writes to dst, just include const_expr directly.
+ % if "dst" in op.const_expr:
+ ${output_type}_t dst;
+ ${op.const_expr}
+ % else:
+ ${output_type}_t dst = ${op.const_expr};
+ % endif
+
+ ## Store the current component of the actual destination to the
+ ## value of dst.
+ % if output_type == "bool32":
+ ## Sanitize the C value to a proper NIR bool
+ _dst_val.u32[_i] = dst ? NIR_TRUE : NIR_FALSE;
+ % else:
+ _dst_val.${get_const_field(output_type)}[_i] = dst;
+ % endif
+ }
+ % else:
+ ## In the non-per-component case, create a struct dst with
+ ## appropriately-typed elements x, y, z, and w and assign the result
+ ## of the const_expr to all components of dst, or include the
+ ## const_expr directly if it writes to dst already.
+ struct ${output_type}_vec dst;
- ## Create an appropriately-typed variable dst and assign the
- ## result of the const_expr to it. If const_expr already contains
- ## writes to dst, just include const_expr directly.
% if "dst" in op.const_expr:
- ${op.output_type} dst;
${op.const_expr}
% else:
- ${op.output_type} dst = ${op.const_expr};
+ ## Splat the value to all components. This way expressions which
+ ## write the same value to all components don't need to explicitly
+ ## write to dest. One such example is fnoise which has a
+ ## const_expr of 0.0f.
+ dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
% endif
- ## Store the current component of the actual destination to the
- ## value of dst.
- % if op.output_type == "bool":
- ## Sanitize the C value to a proper NIR bool
- _dst_val.u[_i] = dst ? NIR_TRUE : NIR_FALSE;
- % else:
- _dst_val.${op.output_type[:1]}[_i] = dst;
- % endif
- }
- % else:
- ## In the non-per-component case, create a struct dst with
- ## appropriately-typed elements x, y, z, and w and assign the result
- ## of the const_expr to all components of dst, or include the
- ## const_expr directly if it writes to dst already.
- struct ${op.output_type}_vec dst;
-
- % if "dst" in op.const_expr:
- ${op.const_expr}
- % else:
- ## Splat the value to all components. This way expressions which
- ## write the same value to all components don't need to explicitly
- ## write to dest. One such example is fnoise which has a
- ## const_expr of 0.0f.
- dst.x = dst.y = dst.z = dst.w = ${op.const_expr};
+ ## For each component in the destination, copy the value of dst to
+ ## the actual destination.
+ % for k in range(op.output_size):
+ % if output_type == "bool32":
+ ## Sanitize the C value to a proper NIR bool
+ _dst_val.u32[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
+ % else:
+ _dst_val.${get_const_field(output_type)}[${k}] = dst.${"xyzw"[k]};
+ % endif
+ % endfor
% endif
- ## For each component in the destination, copy the value of dst to
- ## the actual destination.
- % for k in range(op.output_size):
- % if op.output_type == "bool":
- ## Sanitize the C value to a proper NIR bool
- _dst_val.u[${k}] = dst.${"xyzw"[k]} ? NIR_TRUE : NIR_FALSE;
- % else:
- _dst_val.${op.output_type[:1]}[${k}] = dst.${"xyzw"[k]};
- % endif
- % endfor
- % endif
+ break;
+ }
+ % endfor
+
+ default:
+ unreachable("unknown bit width");
+ }
return _dst_val;
}
@@ -316,12 +385,12 @@ evaluate_${name}(unsigned num_components, nir_const_value *_src)
nir_const_value
nir_eval_const_opcode(nir_op op, unsigned num_components,
- nir_const_value *src)
+ unsigned bit_width, nir_const_value *src)
{
switch (op) {
% for name in sorted(opcodes.iterkeys()):
case nir_op_${name}: {
- return evaluate_${name}(num_components, src);
+ return evaluate_${name}(num_components, bit_width, src);
break;
}
% endfor
@@ -333,4 +402,7 @@ nir_eval_const_opcode(nir_op op, unsigned num_components,
from nir_opcodes import opcodes
from mako.template import Template
-print Template(template).render(opcodes=opcodes)
+print Template(template).render(opcodes=opcodes, type_sizes=type_sizes,
+ type_has_size=type_has_size,
+ type_add_size=type_add_size,
+ get_const_field=get_const_field)
diff --git a/src/compiler/nir/nir_from_ssa.c b/src/compiler/nir/nir_from_ssa.c
index 8bc9f24..82317c2 100644
--- a/src/compiler/nir/nir_from_ssa.c
+++ b/src/compiler/nir/nir_from_ssa.c
@@ -342,7 +342,8 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
nir_parallel_copy_entry);
nir_ssa_dest_init(&pcopy->instr, &entry->dest,
- phi->dest.ssa.num_components, src->src.ssa->name);
+ phi->dest.ssa.num_components,
+ phi->dest.ssa.bit_size, src->src.ssa->name);
exec_list_push_tail(&pcopy->entries, &entry->node);
assert(src->src.is_ssa);
@@ -355,7 +356,8 @@ isolate_phi_nodes_block(nir_block *block, void *void_state)
nir_parallel_copy_entry *entry = rzalloc(state->dead_ctx,
nir_parallel_copy_entry);
nir_ssa_dest_init(&block_pcopy->instr, &entry->dest,
- phi->dest.ssa.num_components, phi->dest.ssa.name);
+ phi->dest.ssa.num_components, phi->dest.ssa.bit_size,
+ phi->dest.ssa.name);
exec_list_push_tail(&block_pcopy->entries, &entry->node);
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
diff --git a/src/compiler/nir/nir_gs_count_vertices.c b/src/compiler/nir/nir_gs_count_vertices.c
index db15d16..3c1bd2a 100644
--- a/src/compiler/nir/nir_gs_count_vertices.c
+++ b/src/compiler/nir/nir_gs_count_vertices.c
@@ -77,13 +77,13 @@ nir_gs_count_vertices(const nir_shader *shader)
return -1;
if (count == -1)
- count = val->i[0];
+ count = val->i32[0];
/* We've found contradictory set_vertex_count intrinsics.
* This can happen if there are early-returns in main() and
* different paths emit different numbers of vertices.
*/
- if (count != val->i[0])
+ if (count != val->i32[0])
return -1;
}
}
diff --git a/src/compiler/nir/nir_instr_set.c b/src/compiler/nir/nir_instr_set.c
index 159ded0..e244122 100644
--- a/src/compiler/nir/nir_instr_set.c
+++ b/src/compiler/nir/nir_instr_set.c
@@ -52,6 +52,7 @@ hash_alu(uint32_t hash, const nir_alu_instr *instr)
{
hash = HASH(hash, instr->op);
hash = HASH(hash, instr->dest.dest.ssa.num_components);
+ /* We explicitly don't hash instr->dest.dest.exact */
if (nir_op_infos[instr->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
assert(nir_op_infos[instr->op].num_inputs == 2);
@@ -81,9 +82,9 @@ hash_load_const(uint32_t hash, const nir_load_const_instr *instr)
{
hash = HASH(hash, instr->def.num_components);
- hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f,
+ hash = _mesa_fnv32_1a_accumulate_block(hash, instr->value.f32,
instr->def.num_components
- * sizeof(instr->value.f[0]));
+ * sizeof(instr->value.f32[0]));
return hash;
}
@@ -267,6 +268,8 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
if (alu1->dest.dest.ssa.num_components != alu2->dest.dest.ssa.num_components)
return false;
+ /* We explicitly don't hash instr->dest.dest.exact */
+
if (nir_op_infos[alu1->op].algebraic_properties & NIR_OP_IS_COMMUTATIVE) {
assert(nir_op_infos[alu1->op].num_inputs == 2);
return (nir_alu_srcs_equal(alu1, alu2, 0, 0) &&
@@ -322,8 +325,8 @@ nir_instrs_equal(const nir_instr *instr1, const nir_instr *instr2)
if (load1->def.num_components != load2->def.num_components)
return false;
- return memcmp(load1->value.f, load2->value.f,
- load1->def.num_components * sizeof(*load2->value.f)) == 0;
+ return memcmp(load1->value.f32, load2->value.f32,
+ load1->def.num_components * sizeof(*load2->value.f32)) == 0;
}
case nir_instr_type_phi: {
nir_phi_instr *phi1 = nir_instr_as_phi(instr1);
@@ -496,8 +499,17 @@ nir_instr_set_add_or_rewrite(struct set *instr_set, nir_instr *instr)
struct set_entry *entry = _mesa_set_search(instr_set, instr);
if (entry) {
nir_ssa_def *def = nir_instr_get_dest_ssa_def(instr);
- nir_ssa_def *new_def =
- nir_instr_get_dest_ssa_def((nir_instr *) entry->key);
+ nir_instr *match = (nir_instr *) entry->key;
+ nir_ssa_def *new_def = nir_instr_get_dest_ssa_def(match);
+
+ /* It's safe to replace a exact instruction with an inexact one as
+ * long as we make it exact. If we got here, the two instructions are
+ * exactly identical in every other way so, once we've set the exact
+ * bit, they are the same.
+ */
+ if (instr->type == nir_instr_type_alu && nir_instr_as_alu(instr)->exact)
+ nir_instr_as_alu(match)->exact = true;
+
nir_ssa_def_rewrite_uses(def, nir_src_for_ssa(new_def));
return true;
}
diff --git a/src/compiler/nir/nir_lower_alu_to_scalar.c b/src/compiler/nir/nir_lower_alu_to_scalar.c
index 312d2f9..e8ba640 100644
--- a/src/compiler/nir/nir_lower_alu_to_scalar.c
+++ b/src/compiler/nir/nir_lower_alu_to_scalar.c
@@ -31,9 +31,11 @@
*/
static void
-nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components)
+nir_alu_ssa_dest_init(nir_alu_instr *instr, unsigned num_components,
+ unsigned bit_size)
{
- nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
+ bit_size, NULL);
instr->dest.write_mask = (1 << num_components) - 1;
}
@@ -46,7 +48,7 @@ lower_reduction(nir_alu_instr *instr, nir_op chan_op, nir_op merge_op,
nir_ssa_def *last = NULL;
for (unsigned i = 0; i < num_components; i++) {
nir_alu_instr *chan = nir_alu_instr_create(builder->shader, chan_op);
- nir_alu_ssa_dest_init(chan, 1);
+ nir_alu_ssa_dest_init(chan, 1, instr->dest.dest.ssa.bit_size);
nir_alu_src_copy(&chan->src[0], &instr->src[0], chan);
chan->src[0].swizzle[0] = chan->src[0].swizzle[i];
if (nir_op_infos[chan_op].num_inputs > 1) {
@@ -80,6 +82,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
assert(instr->dest.write_mask != 0);
b->cursor = nir_before_instr(&instr->instr);
+ b->exact = instr->exact;
#define LOWER_REDUCTION(name, chan, merge) \
case name##2: \
@@ -220,7 +223,7 @@ lower_alu_instr_scalar(nir_alu_instr *instr, nir_builder *b)
lower->src[i].swizzle[j] = instr->src[i].swizzle[src_chan];
}
- nir_alu_ssa_dest_init(lower, 1);
+ nir_alu_ssa_dest_init(lower, 1, instr->dest.dest.ssa.bit_size);
lower->dest.saturate = instr->dest.saturate;
comps[chan] = &lower->dest.dest.ssa;
diff --git a/src/compiler/nir/nir_lower_atomics.c b/src/compiler/nir/nir_lower_atomics.c
index eefcb55..70381a7 100644
--- a/src/compiler/nir/nir_lower_atomics.c
+++ b/src/compiler/nir/nir_lower_atomics.c
@@ -75,7 +75,7 @@ lower_instr(nir_intrinsic_instr *instr,
state->shader_program->UniformStorage[uniform_loc].opaque[state->shader->stage].index);
nir_load_const_instr *offset_const = nir_load_const_instr_create(mem_ctx, 1);
- offset_const->value.u[0] = instr->variables[0]->var->data.offset;
+ offset_const->value.u32[0] = instr->variables[0]->var->data.offset;
nir_instr_insert_before(&instr->instr, &offset_const->instr);
@@ -90,17 +90,17 @@ lower_instr(nir_intrinsic_instr *instr,
unsigned child_array_elements = tail->child != NULL ?
glsl_get_aoa_size(tail->type) : 1;
- offset_const->value.u[0] += deref_array->base_offset *
+ offset_const->value.u32[0] += deref_array->base_offset *
child_array_elements * ATOMIC_COUNTER_SIZE;
if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
nir_load_const_instr *atomic_counter_size =
nir_load_const_instr_create(mem_ctx, 1);
- atomic_counter_size->value.u[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
+ atomic_counter_size->value.u32[0] = child_array_elements * ATOMIC_COUNTER_SIZE;
nir_instr_insert_before(&instr->instr, &atomic_counter_size->instr);
nir_alu_instr *mul = nir_alu_instr_create(mem_ctx, nir_op_imul);
- nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
mul->dest.write_mask = 0x1;
nir_src_copy(&mul->src[0].src, &deref_array->indirect, mul);
mul->src[1].src.is_ssa = true;
@@ -108,7 +108,7 @@ lower_instr(nir_intrinsic_instr *instr,
nir_instr_insert_before(&instr->instr, &mul->instr);
nir_alu_instr *add = nir_alu_instr_create(mem_ctx, nir_op_iadd);
- nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, 32, NULL);
add->dest.write_mask = 0x1;
add->src[0].src.is_ssa = true;
add->src[0].src.ssa = &mul->dest.dest.ssa;
@@ -125,7 +125,7 @@ lower_instr(nir_intrinsic_instr *instr,
if (instr->dest.is_ssa) {
nir_ssa_dest_init(&new_instr->instr, &new_instr->dest,
- instr->dest.ssa.num_components, NULL);
+ instr->dest.ssa.num_components, 32, NULL);
nir_ssa_def_rewrite_uses(&instr->dest.ssa,
nir_src_for_ssa(&new_instr->dest.ssa));
} else {
diff --git a/src/compiler/nir/nir_lower_clip.c b/src/compiler/nir/nir_lower_clip.c
index bcbad53..c711230 100644
--- a/src/compiler/nir/nir_lower_clip.c
+++ b/src/compiler/nir/nir_lower_clip.c
@@ -88,7 +88,7 @@ load_clipdist_input(nir_builder *b, nir_variable *in, nir_ssa_def **val)
load->num_components = 4;
nir_intrinsic_set_base(load, in->data.driver_location);
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
val[0] = nir_channel(b, &load->dest.ssa, 0);
diff --git a/src/compiler/nir/nir_lower_indirect_derefs.c b/src/compiler/nir/nir_lower_indirect_derefs.c
index a4affa7..62b8c84 100644
--- a/src/compiler/nir/nir_lower_indirect_derefs.c
+++ b/src/compiler/nir/nir_lower_indirect_derefs.c
@@ -75,8 +75,9 @@ emit_indirect_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
if (src == NULL) {
/* We're a load. We need to insert a phi node */
nir_phi_instr *phi = nir_phi_instr_create(b->shader);
+ unsigned bit_size = then_dest->bit_size;
nir_ssa_dest_init(&phi->instr, &phi->dest,
- then_dest->num_components, NULL);
+ then_dest->num_components, bit_size, NULL);
nir_phi_src *src0 = ralloc(phi, nir_phi_src);
src0->pred = nir_cf_node_as_block(nir_if_last_then_node(if_stmt));
@@ -125,8 +126,9 @@ emit_load_store(nir_builder *b, nir_intrinsic_instr *orig_instr,
load->num_components = orig_instr->num_components;
load->variables[0] =
nir_deref_as_var(nir_copy_deref(load, &deref->deref));
+ unsigned bit_size = orig_instr->dest.ssa.bit_size;
nir_ssa_dest_init(&load->instr, &load->dest,
- load->num_components, NULL);
+ load->num_components, bit_size, NULL);
nir_builder_instr_insert(b, &load->instr);
*dest = &load->dest.ssa;
} else {
diff --git a/src/compiler/nir/nir_lower_io.c b/src/compiler/nir/nir_lower_io.c
index 9d502ee..a30061d 100644
--- a/src/compiler/nir/nir_lower_io.c
+++ b/src/compiler/nir/nir_lower_io.c
@@ -289,7 +289,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
if (intrin->dest.is_ssa) {
nir_ssa_dest_init(&load->instr, &load->dest,
- intrin->num_components, NULL);
+ intrin->num_components,
+ intrin->dest.ssa.bit_size, NULL);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&load->dest.ssa));
} else {
@@ -369,7 +370,8 @@ nir_lower_io_block(nir_block *block, void *void_state)
if (intrin->dest.is_ssa) {
nir_ssa_dest_init(&atomic->instr, &atomic->dest,
- intrin->dest.ssa.num_components, NULL);
+ intrin->dest.ssa.num_components,
+ intrin->dest.ssa.bit_size, NULL);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&atomic->dest.ssa));
} else {
diff --git a/src/compiler/nir/nir_lower_load_const_to_scalar.c b/src/compiler/nir/nir_lower_load_const_to_scalar.c
index 1eeed13..b5df464 100644
--- a/src/compiler/nir/nir_lower_load_const_to_scalar.c
+++ b/src/compiler/nir/nir_lower_load_const_to_scalar.c
@@ -49,7 +49,7 @@ lower_load_const_instr_scalar(nir_load_const_instr *lower)
nir_ssa_def *loads[4];
for (unsigned i = 0; i < lower->def.num_components; i++) {
nir_load_const_instr *load_comp = nir_load_const_instr_create(b.shader, 1);
- load_comp->value.u[0] = lower->value.u[i];
+ load_comp->value.u32[0] = lower->value.u32[i];
nir_builder_instr_insert(&b, &load_comp->instr);
loads[i] = &load_comp->def;
}
diff --git a/src/compiler/nir/nir_lower_locals_to_regs.c b/src/compiler/nir/nir_lower_locals_to_regs.c
index 45036fa..0438802 100644
--- a/src/compiler/nir/nir_lower_locals_to_regs.c
+++ b/src/compiler/nir/nir_lower_locals_to_regs.c
@@ -161,7 +161,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
if (src.reg.indirect) {
nir_load_const_instr *load_const =
nir_load_const_instr_create(state->shader, 1);
- load_const->value.u[0] = glsl_get_length(parent_type);
+ load_const->value.u32[0] = glsl_get_length(parent_type);
nir_instr_insert_before(instr, &load_const->instr);
nir_alu_instr *mul = nir_alu_instr_create(state->shader, nir_op_imul);
@@ -169,7 +169,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
mul->src[1].src.is_ssa = true;
mul->src[1].src.ssa = &load_const->def;
mul->dest.write_mask = 1;
- nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
nir_instr_insert_before(instr, &mul->instr);
src.reg.indirect->is_ssa = true;
@@ -187,7 +187,7 @@ get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
add->src[0].src = *src.reg.indirect;
nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
add->dest.write_mask = 1;
- nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, 32, NULL);
nir_instr_insert_before(instr, &add->instr);
src.reg.indirect->is_ssa = true;
@@ -221,7 +221,8 @@ lower_locals_to_regs_block(nir_block *block, void *void_state)
mov->dest.write_mask = (1 << intrin->num_components) - 1;
if (intrin->dest.is_ssa) {
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- intrin->num_components, NULL);
+ intrin->num_components,
+ intrin->dest.ssa.bit_size, NULL);
nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
nir_src_for_ssa(&mov->dest.dest.ssa));
} else {
diff --git a/src/compiler/nir/nir_lower_phis_to_scalar.c b/src/compiler/nir/nir_lower_phis_to_scalar.c
index dd2abcf..026c866 100644
--- a/src/compiler/nir/nir_lower_phis_to_scalar.c
+++ b/src/compiler/nir/nir_lower_phis_to_scalar.c
@@ -188,6 +188,8 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
if (!should_lower_phi(phi, state))
continue;
+ unsigned bit_size = phi->dest.ssa.bit_size;
+
/* Create a vecN operation to combine the results. Most of these
* will be redundant, but copy propagation should clean them up for
* us. No need to add the complexity here.
@@ -202,12 +204,14 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
nir_alu_instr *vec = nir_alu_instr_create(state->mem_ctx, vec_op);
nir_ssa_dest_init(&vec->instr, &vec->dest.dest,
- phi->dest.ssa.num_components, NULL);
+ phi->dest.ssa.num_components,
+ bit_size, NULL);
vec->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
for (unsigned i = 0; i < phi->dest.ssa.num_components; i++) {
nir_phi_instr *new_phi = nir_phi_instr_create(state->mem_ctx);
- nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1, NULL);
+ nir_ssa_dest_init(&new_phi->instr, &new_phi->dest, 1,
+ phi->dest.ssa.bit_size, NULL);
vec->src[i].src = nir_src_for_ssa(&new_phi->dest.ssa);
@@ -215,7 +219,7 @@ lower_phis_to_scalar_block(nir_block *block, void *void_state)
/* We need to insert a mov to grab the i'th component of src */
nir_alu_instr *mov = nir_alu_instr_create(state->mem_ctx,
nir_op_imov);
- nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, NULL);
+ nir_ssa_dest_init(&mov->instr, &mov->dest.dest, 1, bit_size, NULL);
mov->dest.write_mask = 1;
nir_src_copy(&mov->src[0].src, &src->src, state->mem_ctx);
mov->src[0].swizzle[0] = i;
diff --git a/src/compiler/nir/nir_lower_system_values.c b/src/compiler/nir/nir_lower_system_values.c
index 79f6bed..c1cd139 100644
--- a/src/compiler/nir/nir_lower_system_values.c
+++ b/src/compiler/nir/nir_lower_system_values.c
@@ -65,9 +65,9 @@ convert_block(nir_block *block, void *void_state)
*/
nir_const_value local_size;
- local_size.u[0] = b->shader->info.cs.local_size[0];
- local_size.u[1] = b->shader->info.cs.local_size[1];
- local_size.u[2] = b->shader->info.cs.local_size[2];
+ local_size.u32[0] = b->shader->info.cs.local_size[0];
+ local_size.u32[1] = b->shader->info.cs.local_size[1];
+ local_size.u32[2] = b->shader->info.cs.local_size[2];
nir_ssa_def *group_id =
nir_load_system_value(b, nir_intrinsic_load_work_group_id, 0);
diff --git a/src/compiler/nir/nir_lower_tex.c b/src/compiler/nir/nir_lower_tex.c
index 806acd8..4999603 100644
--- a/src/compiler/nir/nir_lower_tex.c
+++ b/src/compiler/nir/nir_lower_tex.c
@@ -140,7 +140,7 @@ get_texture_size(nir_builder *b, nir_tex_instr *tex)
txs->src[0].src = nir_src_for_ssa(nir_imm_int(b, 0));
txs->src[0].src_type = nir_tex_src_lod;
- nir_ssa_dest_init(&txs->instr, &txs->dest, 2, NULL);
+ nir_ssa_dest_init(&txs->instr, &txs->dest, 2, 32, NULL);
nir_builder_instr_insert(b, &txs->instr);
return nir_i2f(b, &txs->dest.ssa);
@@ -223,13 +223,13 @@ get_zero_or_one(nir_builder *b, nir_alu_type type, uint8_t swizzle_val)
memset(&v, 0, sizeof(v));
if (swizzle_val == 4) {
- v.u[0] = v.u[1] = v.u[2] = v.u[3] = 0;
+ v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 0;
} else {
assert(swizzle_val == 5);
if (type == nir_type_float)
- v.f[0] = v.f[1] = v.f[2] = v.f[3] = 1.0;
+ v.f32[0] = v.f32[1] = v.f32[2] = v.f32[3] = 1.0;
else
- v.u[0] = v.u[1] = v.u[2] = v.u[3] = 1;
+ v.u32[0] = v.u32[1] = v.u32[2] = v.u32[3] = 1;
}
return nir_build_imm(b, 4, v);
diff --git a/src/compiler/nir/nir_lower_two_sided_color.c b/src/compiler/nir/nir_lower_two_sided_color.c
index fe3507c..c7fb67e 100644
--- a/src/compiler/nir/nir_lower_two_sided_color.c
+++ b/src/compiler/nir/nir_lower_two_sided_color.c
@@ -74,7 +74,7 @@ load_input(nir_builder *b, nir_variable *in)
load->num_components = 4;
nir_intrinsic_set_base(load, in->data.driver_location);
load->src[0] = nir_src_for_ssa(nir_imm_int(b, 0));
- nir_ssa_dest_init(&load->instr, &load->dest, 4, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, 4, 32, NULL);
nir_builder_instr_insert(b, &load->instr);
return &load->dest.ssa;
diff --git a/src/compiler/nir/nir_lower_var_copies.c b/src/compiler/nir/nir_lower_var_copies.c
index 7db9839..c994f0f 100644
--- a/src/compiler/nir/nir_lower_var_copies.c
+++ b/src/compiler/nir/nir_lower_var_copies.c
@@ -116,12 +116,15 @@ emit_copy_load_store(nir_intrinsic_instr *copy_instr,
assert(src_tail->type == dest_tail->type);
unsigned num_components = glsl_get_vector_elements(src_tail->type);
+ unsigned bit_size =
+ glsl_get_bit_size(glsl_get_base_type(src_tail->type));
nir_intrinsic_instr *load =
nir_intrinsic_instr_create(mem_ctx, nir_intrinsic_load_var);
load->num_components = num_components;
load->variables[0] = nir_deref_as_var(nir_copy_deref(load, &src_head->deref));
- nir_ssa_dest_init(&load->instr, &load->dest, num_components, NULL);
+ nir_ssa_dest_init(&load->instr, &load->dest, num_components, bit_size,
+ NULL);
nir_instr_insert_before(&copy_instr->instr, &load->instr);
diff --git a/src/compiler/nir/nir_lower_vars_to_ssa.c b/src/compiler/nir/nir_lower_vars_to_ssa.c
index a3f3fcf..9f9e454 100644
--- a/src/compiler/nir/nir_lower_vars_to_ssa.c
+++ b/src/compiler/nir/nir_lower_vars_to_ssa.c
@@ -505,6 +505,7 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
nir_ssa_undef_instr *undef =
nir_ssa_undef_instr_create(state->shader,
intrin->num_components);
+ undef->def.bit_size = intrin->dest.ssa.bit_size;
nir_instr_insert_before(&intrin->instr, &undef->instr);
nir_instr_remove(&intrin->instr);
@@ -528,7 +529,8 @@ rename_variables_block(nir_block *block, struct lower_variables_state *state)
mov->dest.write_mask = (1 << intrin->num_components) - 1;
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- intrin->num_components, NULL);
+ intrin->num_components,
+ intrin->dest.ssa.bit_size, NULL);
nir_instr_insert_before(&intrin->instr, &mov->instr);
nir_instr_remove(&intrin->instr);
@@ -719,6 +721,7 @@ nir_lower_vars_to_ssa_impl(nir_function_impl *impl)
node->pb_value =
nir_phi_builder_add_value(state.phi_builder,
glsl_get_vector_elements(node->type),
+ glsl_get_bit_size(glsl_get_base_type(node->type)),
store_blocks);
if (node->deref->var->constant_initializer) {
diff --git a/src/compiler/nir/nir_opcodes.py b/src/compiler/nir/nir_opcodes.py
index 60ade4a..d6b658d 100644
--- a/src/compiler/nir/nir_opcodes.py
+++ b/src/compiler/nir/nir_opcodes.py
@@ -90,8 +90,12 @@ class Opcode(object):
# helper variables for strings
tfloat = "float"
tint = "int"
-tbool = "bool"
+tbool = "bool32"
tuint = "uint"
+tfloat32 = "float32"
+tint32 = "int32"
+tuint32 = "uint32"
+tfloat64 = "float64"
commutative = "commutative "
associative = "associative "
@@ -155,57 +159,57 @@ unop("frsq", tfloat, "1.0f / sqrtf(src0)")
unop("fsqrt", tfloat, "sqrtf(src0)")
unop("fexp2", tfloat, "exp2f(src0)")
unop("flog2", tfloat, "log2f(src0)")
-unop_convert("f2i", tint, tfloat, "src0") # Float-to-integer conversion.
-unop_convert("f2u", tuint, tfloat, "src0") # Float-to-unsigned conversion
-unop_convert("i2f", tfloat, tint, "src0") # Integer-to-float conversion.
+unop_convert("f2i", tint32, tfloat32, "src0") # Float-to-integer conversion.
+unop_convert("f2u", tuint32, tfloat32, "src0") # Float-to-unsigned conversion
+unop_convert("i2f", tfloat32, tint32, "src0") # Integer-to-float conversion.
# Float-to-boolean conversion
-unop_convert("f2b", tbool, tfloat, "src0 != 0.0f")
+unop_convert("f2b", tbool, tfloat32, "src0 != 0.0f")
# Boolean-to-float conversion
-unop_convert("b2f", tfloat, tbool, "src0 ? 1.0f : 0.0f")
+unop_convert("b2f", tfloat32, tbool, "src0 ? 1.0f : 0.0f")
# Int-to-boolean conversion
-unop_convert("i2b", tbool, tint, "src0 != 0")
-unop_convert("b2i", tint, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
-unop_convert("u2f", tfloat, tuint, "src0") # Unsigned-to-float conversion.
+unop_convert("i2b", tbool, tint32, "src0 != 0")
+unop_convert("b2i", tint32, tbool, "src0 ? 1 : 0") # Boolean-to-int conversion
+unop_convert("u2f", tfloat32, tuint32, "src0") # Unsigned-to-float conversion.
# Unary floating-point rounding operations.
-unop("ftrunc", tfloat, "truncf(src0)")
-unop("fceil", tfloat, "ceilf(src0)")
-unop("ffloor", tfloat, "floorf(src0)")
-unop("ffract", tfloat, "src0 - floorf(src0)")
-unop("fround_even", tfloat, "_mesa_roundevenf(src0)")
+unop("ftrunc", tfloat, "bit_size == 64 ? trunc(src0) : truncf(src0)")
+unop("fceil", tfloat, "bit_size == 64 ? ceil(src0) : ceilf(src0)")
+unop("ffloor", tfloat, "bit_size == 64 ? floor(src0) : floorf(src0)")
+unop("ffract", tfloat, "src0 - (bit_size == 64 ? floor(src0) : floorf(src0))")
+unop("fround_even", tfloat, "bit_size == 64 ? _mesa_roundeven(src0) : _mesa_roundevenf(src0)")
unop("fquantize2f16", tfloat, "(fabs(src0) < ldexpf(1.0, -14)) ? copysignf(0.0f, src0) : _mesa_half_to_float(_mesa_float_to_half(src0))")
# Trigonometric operations.
-unop("fsin", tfloat, "sinf(src0)")
-unop("fcos", tfloat, "cosf(src0)")
+unop("fsin", tfloat, "bit_size == 64 ? sin(src0) : sinf(src0)")
+unop("fcos", tfloat, "bit_size == 64 ? cos(src0) : cosf(src0)")
# Partial derivatives.
-unop("fddx", tfloat, "0.0f") # the derivative of a constant is 0.
-unop("fddy", tfloat, "0.0f")
-unop("fddx_fine", tfloat, "0.0f")
-unop("fddy_fine", tfloat, "0.0f")
-unop("fddx_coarse", tfloat, "0.0f")
-unop("fddy_coarse", tfloat, "0.0f")
+unop("fddx", tfloat, "0.0") # the derivative of a constant is 0.
+unop("fddy", tfloat, "0.0")
+unop("fddx_fine", tfloat, "0.0")
+unop("fddy_fine", tfloat, "0.0")
+unop("fddx_coarse", tfloat, "0.0")
+unop("fddy_coarse", tfloat, "0.0")
# Floating point pack and unpack operations.
def pack_2x16(fmt):
- unop_horiz("pack_" + fmt + "_2x16", 1, tuint, 2, tfloat, """
+ unop_horiz("pack_" + fmt + "_2x16", 1, tuint32, 2, tfloat32, """
dst.x = (uint32_t) pack_fmt_1x16(src0.x);
dst.x |= ((uint32_t) pack_fmt_1x16(src0.y)) << 16;
""".replace("fmt", fmt))
def pack_4x8(fmt):
- unop_horiz("pack_" + fmt + "_4x8", 1, tuint, 4, tfloat, """
+ unop_horiz("pack_" + fmt + "_4x8", 1, tuint32, 4, tfloat32, """
dst.x = (uint32_t) pack_fmt_1x8(src0.x);
dst.x |= ((uint32_t) pack_fmt_1x8(src0.y)) << 8;
dst.x |= ((uint32_t) pack_fmt_1x8(src0.z)) << 16;
@@ -213,13 +217,13 @@ dst.x |= ((uint32_t) pack_fmt_1x8(src0.w)) << 24;
""".replace("fmt", fmt))
def unpack_2x16(fmt):
- unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat, 1, tuint, """
+ unop_horiz("unpack_" + fmt + "_2x16", 2, tfloat32, 1, tuint32, """
dst.x = unpack_fmt_1x16((uint16_t)(src0.x & 0xffff));
dst.y = unpack_fmt_1x16((uint16_t)(src0.x << 16));
""".replace("fmt", fmt))
def unpack_4x8(fmt):
- unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat, 1, tuint, """
+ unop_horiz("unpack_" + fmt + "_4x8", 4, tfloat32, 1, tuint32, """
dst.x = unpack_fmt_1x8((uint8_t)(src0.x & 0xff));
dst.y = unpack_fmt_1x8((uint8_t)((src0.x >> 8) & 0xff));
dst.z = unpack_fmt_1x8((uint8_t)((src0.x >> 16) & 0xff));
@@ -238,11 +242,11 @@ unpack_2x16("unorm")
unpack_4x8("unorm")
unpack_2x16("half")
-unop_horiz("pack_uvec2_to_uint", 1, tuint, 2, tuint, """
+unop_horiz("pack_uvec2_to_uint", 1, tuint32, 2, tuint32, """
dst.x = (src0.x & 0xffff) | (src0.y >> 16);
""")
-unop_horiz("pack_uvec4_to_uint", 1, tuint, 4, tuint, """
+unop_horiz("pack_uvec4_to_uint", 1, tuint32, 4, tuint32, """
dst.x = (src0.x << 0) |
(src0.y << 8) |
(src0.z << 16) |
@@ -252,22 +256,22 @@ dst.x = (src0.x << 0) |
# Lowered floating point unpacking operations.
-unop_horiz("unpack_half_2x16_split_x", 1, tfloat, 1, tuint,
+unop_horiz("unpack_half_2x16_split_x", 1, tfloat32, 1, tuint32,
"unpack_half_1x16((uint16_t)(src0.x & 0xffff))")
-unop_horiz("unpack_half_2x16_split_y", 1, tfloat, 1, tuint,
+unop_horiz("unpack_half_2x16_split_y", 1, tfloat32, 1, tuint32,
"unpack_half_1x16((uint16_t)(src0.x >> 16))")
# Bit operations, part of ARB_gpu_shader5.
-unop("bitfield_reverse", tuint, """
+unop("bitfield_reverse", tuint32, """
/* we're not winning any awards for speed here, but that's ok */
dst = 0;
for (unsigned bit = 0; bit < 32; bit++)
dst |= ((src0 >> bit) & 1) << (31 - bit);
""")
-unop("bit_count", tuint, """
+unop("bit_count", tuint32, """
dst = 0;
for (unsigned bit = 0; bit < 32; bit++) {
if ((src0 >> bit) & 1)
@@ -275,7 +279,7 @@ for (unsigned bit = 0; bit < 32; bit++) {
}
""")
-unop_convert("ufind_msb", tint, tuint, """
+unop_convert("ufind_msb", tint32, tuint32, """
dst = -1;
for (int bit = 31; bit > 0; bit--) {
if ((src0 >> bit) & 1) {
@@ -285,7 +289,7 @@ for (int bit = 31; bit > 0; bit--) {
}
""")
-unop("ifind_msb", tint, """
+unop("ifind_msb", tint32, """
dst = -1;
for (int bit = 31; bit >= 0; bit--) {
/* If src0 < 0, we're looking for the first 0 bit.
@@ -299,7 +303,7 @@ for (int bit = 31; bit >= 0; bit--) {
}
""")
-unop("find_lsb", tint, """
+unop("find_lsb", tint32, """
dst = -1;
for (unsigned bit = 0; bit < 32; bit++) {
if ((src0 >> bit) & 1) {
@@ -359,10 +363,10 @@ binop("fmul", tfloat, commutative + associative, "src0 * src1")
# low 32-bits of signed/unsigned integer multiply
binop("imul", tint, commutative + associative, "src0 * src1")
# high 32-bits of signed integer multiply
-binop("imul_high", tint, commutative,
+binop("imul_high", tint32, commutative,
"(int32_t)(((int64_t) src0 * (int64_t) src1) >> 32)")
# high 32-bits of unsigned integer multiply
-binop("umul_high", tuint, commutative,
+binop("umul_high", tuint32, commutative,
"(uint32_t)(((uint64_t) src0 * (uint64_t) src1) >> 32)")
binop("fdiv", tfloat, "", "src0 / src1")
@@ -427,18 +431,18 @@ binop_reduce("bany_inequal", 1, tbool, tint, "{src0} != {src1}",
# non-integer-aware GLSL-style comparisons that return 0.0 or 1.0
-binop_reduce("fall_equal", 1, tfloat, tfloat, "{src0} == {src1}",
+binop_reduce("fall_equal", 1, tfloat32, tfloat32, "{src0} == {src1}",
"{src0} && {src1}", "{src} ? 1.0f : 0.0f")
-binop_reduce("fany_nequal", 1, tfloat, tfloat, "{src0} != {src1}",
+binop_reduce("fany_nequal", 1, tfloat32, tfloat32, "{src0} != {src1}",
"{src0} || {src1}", "{src} ? 1.0f : 0.0f")
# These comparisons for integer-less hardware return 1.0 and 0.0 for true
# and false respectively
-binop("slt", tfloat, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
-binop("sge", tfloat, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
-binop("seq", tfloat, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
-binop("sne", tfloat, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
+binop("slt", tfloat32, "", "(src0 < src1) ? 1.0f : 0.0f") # Set on Less Than
+binop("sge", tfloat32, "", "(src0 >= src1) ? 1.0f : 0.0f") # Set on Greater or Equal
+binop("seq", tfloat32, commutative, "(src0 == src1) ? 1.0f : 0.0f") # Set on Equal
+binop("sne", tfloat32, commutative, "(src0 != src1) ? 1.0f : 0.0f") # Set on Not Equal
binop("ishl", tint, "", "src0 << src1")
@@ -461,11 +465,11 @@ binop("ixor", tuint, commutative + associative, "src0 ^ src1")
# These use (src != 0.0) for testing the truth of the input, and output 1.0
# for true and 0.0 for false
-binop("fand", tfloat, commutative,
+binop("fand", tfloat32, commutative,
"((src0 != 0.0f) && (src1 != 0.0f)) ? 1.0f : 0.0f")
-binop("for", tfloat, commutative,
+binop("for", tfloat32, commutative,
"((src0 != 0.0f) || (src1 != 0.0f)) ? 1.0f : 0.0f")
-binop("fxor", tfloat, commutative,
+binop("fxor", tfloat32, commutative,
"(src0 != 0.0f && src1 == 0.0f) || (src0 == 0.0f && src1 != 0.0f) ? 1.0f : 0.0f")
binop_reduce("fdot", 1, tfloat, tfloat, "{src0} * {src1}", "{src0} + {src1}",
@@ -487,7 +491,7 @@ binop("imax", tint, commutative + associative, "src1 > src0 ? src1 : src0")
binop("umax", tuint, commutative + associative, "src1 > src0 ? src1 : src0")
# Saturated vector add for 4 8bit ints.
-binop("usadd_4x8", tint, commutative + associative, """
+binop("usadd_4x8", tint32, commutative + associative, """
dst = 0;
for (int i = 0; i < 32; i += 8) {
dst |= MIN2(((src0 >> i) & 0xff) + ((src1 >> i) & 0xff), 0xff) << i;
@@ -495,7 +499,7 @@ for (int i = 0; i < 32; i += 8) {
""")
# Saturated vector subtract for 4 8bit ints.
-binop("ussub_4x8", tint, "", """
+binop("ussub_4x8", tint32, "", """
dst = 0;
for (int i = 0; i < 32; i += 8) {
int src0_chan = (src0 >> i) & 0xff;
@@ -506,7 +510,7 @@ for (int i = 0; i < 32; i += 8) {
""")
# vector min for 4 8bit ints.
-binop("umin_4x8", tint, commutative + associative, """
+binop("umin_4x8", tint32, commutative + associative, """
dst = 0;
for (int i = 0; i < 32; i += 8) {
dst |= MIN2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
@@ -514,7 +518,7 @@ for (int i = 0; i < 32; i += 8) {
""")
# vector max for 4 8bit ints.
-binop("umax_4x8", tint, commutative + associative, """
+binop("umax_4x8", tint32, commutative + associative, """
dst = 0;
for (int i = 0; i < 32; i += 8) {
dst |= MAX2((src0 >> i) & 0xff, (src1 >> i) & 0xff) << i;
@@ -522,7 +526,7 @@ for (int i = 0; i < 32; i += 8) {
""")
# unorm multiply: (a * b) / 255.
-binop("umul_unorm_4x8", tint, commutative + associative, """
+binop("umul_unorm_4x8", tint32, commutative + associative, """
dst = 0;
for (int i = 0; i < 32; i += 8) {
int src0_chan = (src0 >> i) & 0xff;
@@ -531,15 +535,15 @@ for (int i = 0; i < 32; i += 8) {
}
""")
-binop("fpow", tfloat, "", "powf(src0, src1)")
+binop("fpow", tfloat, "", "bit_size == 64 ? powf(src0, src1) : pow(src0, src1)")
-binop_horiz("pack_half_2x16_split", 1, tuint, 1, tfloat, 1, tfloat,
+binop_horiz("pack_half_2x16_split", 1, tuint32, 1, tfloat32, 1, tfloat32,
"pack_half_1x16(src0.x) | (pack_half_1x16(src1.x) << 16)")
# bfm implements the behavior of the first operation of the SM5 "bfi" assembly
# and that of the "bfi1" i965 instruction. That is, it has undefined behavior
# if either of its arguments are 32.
-binop_convert("bfm", tuint, tint, "", """
+binop_convert("bfm", tuint32, tint32, "", """
int bits = src0, offset = src1;
if (offset < 0 || bits < 0 || offset > 31 || bits > 31 || offset + bits > 32)
dst = 0; /* undefined */
@@ -548,7 +552,7 @@ else
""")
opcode("ldexp", 0, tfloat, [0, 0], [tfloat, tint], "", """
-dst = ldexpf(src0, src1);
+dst = (bit_size == 64) ? ldexp(src0, src1) : ldexpf(src0, src1);
/* flush denormals to zero. */
if (!isnormal(dst))
dst = copysignf(0.0f, src0);
@@ -588,12 +592,12 @@ triop("flrp", tfloat, "src0 * (1 - src2) + src1 * src2")
# bools (0.0 vs 1.0) and one for integer bools (0 vs ~0).
-triop("fcsel", tfloat, "(src0 != 0.0f) ? src1 : src2")
+triop("fcsel", tfloat32, "(src0 != 0.0f) ? src1 : src2")
opcode("bcsel", 0, tuint, [0, 0, 0],
[tbool, tuint, tuint], "", "src0 ? src1 : src2")
# SM5 bfi assembly
-triop("bfi", tuint, """
+triop("bfi", tuint32, """
unsigned mask = src0, insert = src1, base = src2;
if (mask == 0) {
dst = base;
@@ -608,8 +612,8 @@ if (mask == 0) {
""")
# SM5 ubfe/ibfe assembly
-opcode("ubfe", 0, tuint,
- [0, 0, 0], [tuint, tint, tint], "", """
+opcode("ubfe", 0, tuint32,
+ [0, 0, 0], [tuint32, tint32, tint32], "", """
unsigned base = src0;
int offset = src1, bits = src2;
if (bits == 0) {
@@ -622,8 +626,8 @@ if (bits == 0) {
dst = base >> offset;
}
""")
-opcode("ibfe", 0, tint,
- [0, 0, 0], [tint, tint, tint], "", """
+opcode("ibfe", 0, tint32,
+ [0, 0, 0], [tint32, tint32, tint32], "", """
int base = src0;
int offset = src1, bits = src2;
if (bits == 0) {
@@ -638,8 +642,8 @@ if (bits == 0) {
""")
# GLSL bitfieldExtract()
-opcode("ubitfield_extract", 0, tuint,
- [0, 0, 0], [tuint, tint, tint], "", """
+opcode("ubitfield_extract", 0, tuint32,
+ [0, 0, 0], [tuint32, tint32, tint32], "", """
unsigned base = src0;
int offset = src1, bits = src2;
if (bits == 0) {
@@ -650,8 +654,8 @@ if (bits == 0) {
dst = (base >> offset) & ((1ull << bits) - 1);
}
""")
-opcode("ibitfield_extract", 0, tint,
- [0, 0, 0], [tint, tint, tint], "", """
+opcode("ibitfield_extract", 0, tint32,
+ [0, 0, 0], [tint32, tint32, tint32], "", """
int base = src0;
int offset = src1, bits = src2;
if (bits == 0) {
@@ -678,8 +682,8 @@ def quadop_horiz(name, output_size, src1_size, src2_size, src3_size,
[tuint, tuint, tuint, tuint],
"", const_expr)
-opcode("bitfield_insert", 0, tuint, [0, 0, 0, 0],
- [tuint, tuint, tint, tint], "", """
+opcode("bitfield_insert", 0, tuint32, [0, 0, 0, 0],
+ [tuint32, tuint32, tint32, tint32], "", """
unsigned base = src0, insert = src1;
int offset = src2, bits = src3;
if (bits == 0) {
diff --git a/src/compiler/nir/nir_opt_algebraic.py b/src/compiler/nir/nir_opt_algebraic.py
index 54f7d86..ed21c5d 100644
--- a/src/compiler/nir/nir_opt_algebraic.py
+++ b/src/compiler/nir/nir_opt_algebraic.py
@@ -35,10 +35,17 @@ d = 'd'
# Written in the form (<search>, <replace>) where <search> is an expression
# and <replace> is either an expression or a value. An expression is
-# defined as a tuple of the form (<op>, <src0>, <src1>, <src2>, <src3>)
+# defined as a tuple of the form ([~]<op>, <src0>, <src1>, <src2>, <src3>)
# where each source is either an expression or a value. A value can be
# either a numeric constant or a string representing a variable name.
#
+# If the opcode in a search expression is prefixed by a '~' character, this
+# indicates that the operation is inexact. Such operations will only get
+# applied to SSA values that do not have the exact bit set. This should be
+# used by by any optimizations that are not bit-for-bit exact. It should not,
+# however, be used for backend-requested lowering operations as those need to
+# happen regardless of precision.
+#
# Variable names are specified as "[#]name[@type]" where "#" inicates that
# the given variable will only match constants and the type indicates that
# the given variable will only match values from ALU instructions with the
@@ -55,19 +62,19 @@ optimizations = [
(('fabs', ('fneg', a)), ('fabs', a)),
(('iabs', ('iabs', a)), ('iabs', a)),
(('iabs', ('ineg', a)), ('iabs', a)),
- (('fadd', a, 0.0), a),
+ (('~fadd', a, 0.0), a),
(('iadd', a, 0), a),
(('usadd_4x8', a, 0), a),
(('usadd_4x8', a, ~0), ~0),
- (('fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
+ (('~fadd', ('fmul', a, b), ('fmul', a, c)), ('fmul', a, ('fadd', b, c))),
(('iadd', ('imul', a, b), ('imul', a, c)), ('imul', a, ('iadd', b, c))),
- (('fadd', ('fneg', a), a), 0.0),
+ (('~fadd', ('fneg', a), a), 0.0),
(('iadd', ('ineg', a), a), 0),
(('iadd', ('ineg', a), ('iadd', a, b)), b),
(('iadd', a, ('iadd', ('ineg', a), b)), b),
- (('fadd', ('fneg', a), ('fadd', a, b)), b),
- (('fadd', a, ('fadd', ('fneg', a), b)), b),
- (('fmul', a, 0.0), 0.0),
+ (('~fadd', ('fneg', a), ('fadd', a, b)), b),
+ (('~fadd', a, ('fadd', ('fneg', a), b)), b),
+ (('~fmul', a, 0.0), 0.0),
(('imul', a, 0), 0),
(('umul_unorm_4x8', a, 0), 0),
(('umul_unorm_4x8', a, ~0), a),
@@ -76,32 +83,48 @@ optimizations = [
(('fmul', a, -1.0), ('fneg', a)),
(('imul', a, -1), ('ineg', a)),
(('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
- (('ffma', 0.0, a, b), b),
- (('ffma', a, 0.0, b), b),
- (('ffma', a, b, 0.0), ('fmul', a, b)),
+ (('~ffma', 0.0, a, b), b),
+ (('~ffma', a, 0.0, b), b),
+ (('~ffma', a, b, 0.0), ('fmul', a, b)),
(('ffma', a, 1.0, b), ('fadd', a, b)),
(('ffma', 1.0, a, b), ('fadd', a, b)),
- (('flrp', a, b, 0.0), a),
- (('flrp', a, b, 1.0), b),
- (('flrp', a, a, b), a),
- (('flrp', 0.0, a, b), ('fmul', a, b)),
+ (('~flrp', a, b, 0.0), a),
+ (('~flrp', a, b, 1.0), b),
+ (('~flrp', a, a, b), a),
+ (('~flrp', 0.0, a, b), ('fmul', a, b)),
+ (('~flrp', a, b, ('b2f', c)), ('bcsel', c, b, a), 'options->lower_flrp'),
(('flrp', a, b, c), ('fadd', ('fmul', c, ('fsub', b, a)), a), 'options->lower_flrp'),
(('ffract', a), ('fsub', a, ('ffloor', a)), 'options->lower_ffract'),
- (('fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c))), ('fmul', b, c)), ('flrp', a, b, c), '!options->lower_flrp'),
- (('fadd', a, ('fmul', c, ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', ('b2f', c)))), ('fmul', b, ('b2f', c))), ('bcsel', c, b, a), 'options->lower_flrp'),
+ (('~fadd', ('fmul', a, ('fadd', 1.0, ('fneg', c ))), ('fmul', b, c )), ('flrp', a, b, c), '!options->lower_flrp'),
+ (('~fadd', a, ('fmul', ('b2f', c), ('fadd', b, ('fneg', a)))), ('bcsel', c, b, a), 'options->lower_flrp'),
+ (('~fadd', a, ('fmul', c , ('fadd', b, ('fneg', a)))), ('flrp', a, b, c), '!options->lower_flrp'),
(('ffma', a, b, c), ('fadd', ('fmul', a, b), c), 'options->lower_ffma'),
- (('fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
+ (('~fadd', ('fmul', a, b), c), ('ffma', a, b, c), '!options->lower_ffma'),
# Comparison simplifications
- (('inot', ('flt', a, b)), ('fge', a, b)),
- (('inot', ('fge', a, b)), ('flt', a, b)),
- (('inot', ('feq', a, b)), ('fne', a, b)),
- (('inot', ('fne', a, b)), ('feq', a, b)),
+ (('~inot', ('flt', a, b)), ('fge', a, b)),
+ (('~inot', ('fge', a, b)), ('flt', a, b)),
+ (('~inot', ('feq', a, b)), ('fne', a, b)),
+ (('~inot', ('fne', a, b)), ('feq', a, b)),
(('inot', ('ilt', a, b)), ('ige', a, b)),
(('inot', ('ige', a, b)), ('ilt', a, b)),
(('inot', ('ieq', a, b)), ('ine', a, b)),
(('inot', ('ine', a, b)), ('ieq', a, b)),
+
+ # 0.0 >= b2f(a)
+ # b2f(a) <= 0.0
+ # b2f(a) == 0.0 because b2f(a) can only be 0 or 1
+ # inot(a)
+ (('fge', 0.0, ('b2f', a)), ('inot', a)),
+
+ # 0.0 < fabs(a)
+ # fabs(a) > 0.0
+ # fabs(a) != 0.0 because fabs(a) must be >= 0
+ # a != 0.0
+ (('flt', 0.0, ('fabs', a)), ('fne', a, 0.0)),
+
(('fge', ('fneg', ('fabs', a)), 0.0), ('feq', a, 0.0)),
- (('bcsel', ('flt', a, b), a, b), ('fmin', a, b)),
+ (('bcsel', ('flt', b, a), b, a), ('fmin', a, b)),
(('bcsel', ('flt', a, b), b, a), ('fmax', a, b)),
(('bcsel', ('inot', 'a@bool'), b, c), ('bcsel', a, c, b)),
(('bcsel', a, ('bcsel', a, b, c), d), ('bcsel', a, b, d)),
@@ -111,15 +134,19 @@ optimizations = [
(('imax', a, a), a),
(('umin', a, a), a),
(('umax', a, a), a),
- (('fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
- (('fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
+ (('~fmin', ('fmax', a, 0.0), 1.0), ('fsat', a), '!options->lower_fsat'),
+ (('~fmax', ('fmin', a, 1.0), 0.0), ('fsat', a), '!options->lower_fsat'),
(('fsat', a), ('fmin', ('fmax', a, 0.0), 1.0), 'options->lower_fsat'),
(('fsat', ('fsat', a)), ('fsat', a)),
(('fmin', ('fmax', ('fmin', ('fmax', a, 0.0), 1.0), 0.0), 1.0), ('fmin', ('fmax', a, 0.0), 1.0)),
- (('ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
- (('ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
- (('ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
- (('ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
+ (('~ior', ('flt', a, b), ('flt', a, c)), ('flt', a, ('fmax', b, c))),
+ (('~ior', ('flt', a, c), ('flt', b, c)), ('flt', ('fmin', a, b), c)),
+ (('~ior', ('fge', a, b), ('fge', a, c)), ('fge', a, ('fmin', b, c))),
+ (('~ior', ('fge', a, c), ('fge', b, c)), ('fge', ('fmax', a, b), c)),
+ (('fabs', ('slt', a, b)), ('slt', a, b)),
+ (('fabs', ('sge', a, b)), ('sge', a, b)),
+ (('fabs', ('seq', a, b)), ('seq', a, b)),
+ (('fabs', ('sne', a, b)), ('sne', a, b)),
(('slt', a, b), ('b2f', ('flt', a, b)), 'options->lower_scmp'),
(('sge', a, b), ('b2f', ('fge', a, b)), 'options->lower_scmp'),
(('seq', a, b), ('b2f', ('feq', a, b)), 'options->lower_scmp'),
@@ -151,7 +178,6 @@ optimizations = [
(('ior', a, 0), a),
(('fxor', a, a), 0.0),
(('ixor', a, a), 0),
- (('fxor', a, 0.0), a),
(('ixor', a, 0), a),
(('inot', ('inot', a)), a),
# DeMorgan's Laws
@@ -167,35 +193,35 @@ optimizations = [
(('iand', 0xff, ('ushr', a, 24)), ('ushr', a, 24)),
(('iand', 0xffff, ('ushr', a, 16)), ('ushr', a, 16)),
# Exponential/logarithmic identities
- (('fexp2', ('flog2', a)), a), # 2^lg2(a) = a
- (('flog2', ('fexp2', a)), a), # lg2(2^a) = a
+ (('~fexp2', ('flog2', a)), a), # 2^lg2(a) = a
+ (('~flog2', ('fexp2', a)), a), # lg2(2^a) = a
(('fpow', a, b), ('fexp2', ('fmul', ('flog2', a), b)), 'options->lower_fpow'), # a^b = 2^(lg2(a)*b)
- (('fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
- (('fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
- ('fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
- (('fpow', a, 1.0), a),
- (('fpow', a, 2.0), ('fmul', a, a)),
- (('fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
- (('fpow', 2.0, a), ('fexp2', a)),
- (('fpow', ('fpow', a, 2.2), 0.454545), a),
- (('fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
- (('fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
- (('frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
- (('frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
- (('flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
- (('flog2', ('frcp', a)), ('fneg', ('flog2', a))),
- (('flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
- (('flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
- (('fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
- (('fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
- (('fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
+ (('~fexp2', ('fmul', ('flog2', a), b)), ('fpow', a, b), '!options->lower_fpow'), # 2^(lg2(a)*b) = a^b
+ (('~fexp2', ('fadd', ('fmul', ('flog2', a), b), ('fmul', ('flog2', c), d))),
+ ('~fmul', ('fpow', a, b), ('fpow', c, d)), '!options->lower_fpow'), # 2^(lg2(a) * b + lg2(c) + d) = a^b * c^d
+ (('~fpow', a, 1.0), a),
+ (('~fpow', a, 2.0), ('fmul', a, a)),
+ (('~fpow', a, 4.0), ('fmul', ('fmul', a, a), ('fmul', a, a))),
+ (('~fpow', 2.0, a), ('fexp2', a)),
+ (('~fpow', ('fpow', a, 2.2), 0.454545), a),
+ (('~fpow', ('fabs', ('fpow', a, 2.2)), 0.454545), ('fabs', a)),
+ (('~fsqrt', ('fexp2', a)), ('fexp2', ('fmul', 0.5, a))),
+ (('~frcp', ('fexp2', a)), ('fexp2', ('fneg', a))),
+ (('~frsq', ('fexp2', a)), ('fexp2', ('fmul', -0.5, a))),
+ (('~flog2', ('fsqrt', a)), ('fmul', 0.5, ('flog2', a))),
+ (('~flog2', ('frcp', a)), ('fneg', ('flog2', a))),
+ (('~flog2', ('frsq', a)), ('fmul', -0.5, ('flog2', a))),
+ (('~flog2', ('fpow', a, b)), ('fmul', b, ('flog2', a))),
+ (('~fadd', ('flog2', a), ('flog2', b)), ('flog2', ('fmul', a, b))),
+ (('~fadd', ('flog2', a), ('fneg', ('flog2', b))), ('flog2', ('fdiv', a, b))),
+ (('~fmul', ('fexp2', a), ('fexp2', b)), ('fexp2', ('fadd', a, b))),
# Division and reciprocal
- (('fdiv', 1.0, a), ('frcp', a)),
+ (('~fdiv', 1.0, a), ('frcp', a)),
(('fdiv', a, b), ('fmul', a, ('frcp', b)), 'options->lower_fdiv'),
- (('frcp', ('frcp', a)), a),
- (('frcp', ('fsqrt', a)), ('frsq', a)),
+ (('~frcp', ('frcp', a)), a),
+ (('~frcp', ('fsqrt', a)), ('frsq', a)),
(('fsqrt', a), ('frcp', ('frsq', a)), 'options->lower_fsqrt'),
- (('frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
+ (('~frcp', ('frsq', a)), ('fsqrt', a), '!options->lower_fsqrt'),
# Boolean simplifications
(('ieq', 'a@bool', True), a),
(('ine', 'a@bool', True), ('inot', a)),
@@ -216,6 +242,10 @@ optimizations = [
(('i2b', ('b2i', a)), a),
(('f2i', ('ftrunc', a)), ('f2i', a)),
(('f2u', ('ftrunc', a)), ('f2u', a)),
+ (('i2b', ('ineg', a)), ('i2b', a)),
+ (('i2b', ('iabs', a)), ('i2b', a)),
+ (('fabs', ('b2f', a)), ('b2f', a)),
+ (('iabs', ('b2i', a)), ('b2i', a)),
# Byte extraction
(('ushr', a, 24), ('extract_u8', a, 3), '!options->lower_extract_byte'),
@@ -228,7 +258,7 @@ optimizations = [
(('iand', 0xffff, a), ('extract_u16', a, 0), '!options->lower_extract_word'),
# Subtracts
- (('fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
+ (('~fsub', a, ('fsub', 0.0, b)), ('fadd', a, b)),
(('isub', a, ('isub', 0, b)), ('iadd', a, b)),
(('ussub_4x8', a, 0), a),
(('ussub_4x8', a, ~0), 0),
@@ -236,7 +266,7 @@ optimizations = [
(('isub', a, b), ('iadd', a, ('ineg', b)), 'options->lower_sub'),
(('fneg', a), ('fsub', 0.0, a), 'options->lower_negate'),
(('ineg', a), ('isub', 0, a), 'options->lower_negate'),
- (('fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
+ (('~fadd', a, ('fsub', 0.0, b)), ('fsub', a, b)),
(('iadd', a, ('isub', 0, b)), ('isub', a, b)),
(('fabs', ('fsub', 0.0, a)), ('fabs', a)),
(('iabs', ('isub', 0, a)), ('iabs', a)),
@@ -368,10 +398,13 @@ for op in ['flt', 'fge', 'feq', 'fne',
# they help code generation but do not necessarily produce code that is
# more easily optimizable.
late_optimizations = [
+ # Most of these optimizations aren't quite safe when you get infinity or
+ # Nan involved but the first one should be fine.
(('flt', ('fadd', a, b), 0.0), ('flt', a, ('fneg', b))),
- (('fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
- (('feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
- (('fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+ (('~fge', ('fadd', a, b), 0.0), ('fge', a, ('fneg', b))),
+ (('~feq', ('fadd', a, b), 0.0), ('feq', a, ('fneg', b))),
+ (('~fne', ('fadd', a, b), 0.0), ('fne', a, ('fneg', b))),
+
(('fdot2', a, b), ('fdot_replicated2', a, b), 'options->fdot_replicates'),
(('fdot3', a, b), ('fdot_replicated3', a, b), 'options->fdot_replicates'),
(('fdot4', a, b), ('fdot_replicated4', a, b), 'options->fdot_replicates'),
diff --git a/src/compiler/nir/nir_opt_constant_folding.c b/src/compiler/nir/nir_opt_constant_folding.c
index 04876a4..e64ca36 100644
--- a/src/compiler/nir/nir_opt_constant_folding.c
+++ b/src/compiler/nir/nir_opt_constant_folding.c
@@ -46,10 +46,28 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
if (!instr->dest.dest.is_ssa)
return false;
+ /* In the case that any outputs/inputs have unsized types, then we need to
+ * guess the bit-size. In this case, the validator ensures that all
+ * bit-sizes match so we can just take the bit-size from first
+ * output/input with an unsized type. If all the outputs/inputs are sized
+ * then we don't need to guess the bit-size at all because the code we
+ * generate for constant opcodes in this case already knows the sizes of
+ * the types involved and does not need the provided bit-size for anything
+ * (although it still requires to receive a valid bit-size).
+ */
+ unsigned bit_size = 0;
+ if (!nir_alu_type_get_type_size(nir_op_infos[instr->op].output_type))
+ bit_size = instr->dest.dest.ssa.bit_size;
+
for (unsigned i = 0; i < nir_op_infos[instr->op].num_inputs; i++) {
if (!instr->src[i].src.is_ssa)
return false;
+ if (bit_size == 0 &&
+ !nir_alu_type_get_type_size(nir_op_infos[instr->op].input_sizes[i])) {
+ bit_size = instr->src[i].src.ssa->bit_size;
+ }
+
nir_instr *src_instr = instr->src[i].src.ssa->parent_instr;
if (src_instr->type != nir_instr_type_load_const)
@@ -58,24 +76,31 @@ constant_fold_alu_instr(nir_alu_instr *instr, void *mem_ctx)
for (unsigned j = 0; j < nir_ssa_alu_instr_src_components(instr, i);
j++) {
- src[i].u[j] = load_const->value.u[instr->src[i].swizzle[j]];
+ if (load_const->def.bit_size == 64)
+ src[i].u64[j] = load_const->value.u64[instr->src[i].swizzle[j]];
+ else
+ src[i].u32[j] = load_const->value.u32[instr->src[i].swizzle[j]];
}
/* We shouldn't have any source modifiers in the optimization loop. */
assert(!instr->src[i].abs && !instr->src[i].negate);
}
+ if (bit_size == 0)
+ bit_size = 32;
+
/* We shouldn't have any saturate modifiers in the optimization loop. */
assert(!instr->dest.saturate);
nir_const_value dest =
nir_eval_const_opcode(instr->op, instr->dest.dest.ssa.num_components,
- src);
+ bit_size, src);
nir_load_const_instr *new_instr =
nir_load_const_instr_create(mem_ctx,
instr->dest.dest.ssa.num_components);
+ new_instr->def.bit_size = instr->dest.dest.ssa.bit_size;
new_instr->value = dest;
nir_instr_insert_before(&instr->instr, &new_instr->instr);
@@ -106,7 +131,7 @@ constant_fold_deref(nir_instr *instr, nir_deref_var *deref)
nir_load_const_instr *indirect =
nir_instr_as_load_const(arr->indirect.ssa->parent_instr);
- arr->base_offset += indirect->value.u[0];
+ arr->base_offset += indirect->value.u32[0];
/* Clear out the source */
nir_instr_rewrite_src(instr, &arr->indirect, nir_src_for_ssa(NULL));
diff --git a/src/compiler/nir/nir_opt_dead_cf.c b/src/compiler/nir/nir_opt_dead_cf.c
index 4cc6798..4658b23 100644
--- a/src/compiler/nir/nir_opt_dead_cf.c
+++ b/src/compiler/nir/nir_opt_dead_cf.c
@@ -228,7 +228,7 @@ dead_cf_block(nir_block *block)
if (!const_value)
return false;
- opt_constant_if(following_if, const_value->u[0] != 0);
+ opt_constant_if(following_if, const_value->u32[0] != 0);
return true;
}
diff --git a/src/compiler/nir/nir_opt_peephole_select.c b/src/compiler/nir/nir_opt_peephole_select.c
index 0fc658d..bad9dc4 100644
--- a/src/compiler/nir/nir_opt_peephole_select.c
+++ b/src/compiler/nir/nir_opt_peephole_select.c
@@ -210,7 +210,8 @@ nir_opt_peephole_select_block(nir_block *block, void *void_state)
}
nir_ssa_dest_init(&sel->instr, &sel->dest.dest,
- phi->dest.ssa.num_components, phi->dest.ssa.name);
+ phi->dest.ssa.num_components,
+ phi->dest.ssa.bit_size, phi->dest.ssa.name);
sel->dest.write_mask = (1 << phi->dest.ssa.num_components) - 1;
nir_ssa_def_rewrite_uses(&phi->dest.ssa,
diff --git a/src/compiler/nir/nir_phi_builder.c b/src/compiler/nir/nir_phi_builder.c
index 5429083..a39e360 100644
--- a/src/compiler/nir/nir_phi_builder.c
+++ b/src/compiler/nir/nir_phi_builder.c
@@ -52,6 +52,7 @@ struct nir_phi_builder_value {
/* Needed so we can create phis and undefs */
unsigned num_components;
+ unsigned bit_size;
/* The list of phi nodes associated with this value. Phi nodes are not
* added directly. Instead, they are created, the instr->block pointer
@@ -61,8 +62,18 @@ struct nir_phi_builder_value {
*/
struct exec_list phis;
- /* Array of SSA defs, indexed by block. If a phi needs to be inserted
- * in a given block, it will have the magic value NEEDS_PHI.
+ /* Array of SSA defs, indexed by block. For each block, this array has has
+ * one of three types of values:
+ *
+ * - NULL. Indicates that there is no known definition in this block. If
+ * you need to find one, look at the block's immediate dominator.
+ *
+ * - NEEDS_PHI. Indicates that the block may need a phi node but none has
+ * been created yet. If a def is requested for a block, a phi will need
+ * to be created.
+ *
+ * - A regular SSA def. This will be either the result of a phi node or
+ * one of the defs provided by nir_phi_builder_value_set_blocK_def().
*/
nir_ssa_def *defs[0];
};
@@ -101,7 +112,7 @@ nir_phi_builder_create(nir_function_impl *impl)
struct nir_phi_builder_value *
nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
- const BITSET_WORD *defs)
+ unsigned bit_size, const BITSET_WORD *defs)
{
struct nir_phi_builder_value *val;
unsigned i, w_start = 0, w_end = 0;
@@ -109,6 +120,7 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks);
val->builder = pb;
val->num_components = num_components;
+ val->bit_size = bit_size;
exec_list_make_empty(&val->phis);
exec_list_push_tail(&pb->values, &val->node);
@@ -127,8 +139,7 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
set_foreach(cur->dom_frontier, dom_entry) {
nir_block *next = (nir_block *) dom_entry->key;
- /*
- * If there's more than one return statement, then the end block
+ /* If there's more than one return statement, then the end block
* can be a join point for some definitions. However, there are
* no instructions in the end block, so nothing would use those
* phi nodes. Of course, we couldn't place those phi nodes
@@ -139,6 +150,10 @@ nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
continue;
if (val->defs[next->index] == NULL) {
+ /* Instead of creating a phi node immediately, we simply set the
+ * value to the magic value NEEDS_PHI. Later, we create phi nodes
+ * on demand in nir_phi_builder_value_get_block_def().
+ */
val->defs[next->index] = NEEDS_PHI;
if (pb->work[next->index] < pb->iter_count) {
@@ -163,7 +178,9 @@ nir_ssa_def *
nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
nir_block *block)
{
+ /* For each block, we have one of three types of values */
if (val->defs[block->index] == NULL) {
+ /* NULL indicates that we have no SSA def for this block. */
if (block->imm_dom) {
/* Grab it from our immediate dominator. We'll stash it here for
* easy access later.
@@ -185,17 +202,36 @@ nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
return &undef->def;
}
} else if (val->defs[block->index] == NEEDS_PHI) {
- /* If we need a phi instruction, go ahead and create one but don't
- * add it to the program yet. Later, we'll go through and set up phi
- * sources and add the instructions will be added at that time.
+ /* The magic value NEEDS_PHI indicates that the block needs a phi node
+ * but none has been created. We need to create one now so we can
+ * return it to the caller.
+ *
+ * Because a phi node may use SSA defs that it does not dominate (this
+ * happens in loops), we do not yet have enough information to fully
+ * fill out the phi node. Instead, the phi nodes we create here will be
+ * empty (have no sources) and won't actually be placed in the block's
+ * instruction list yet. Later, in nir_phi_builder_finish(), we walk
+ * over all of the phi instructions, fill out the sources lists, and
+ * place them at the top of their respective block's instruction list.
+ *
+ * Creating phi nodes on-demand allows us to avoid creating dead phi
+ * nodes that will just get deleted later. While this probably isn't a
+ * big win for a full into-SSA pass, other users may use the phi builder
+ * to make small SSA form repairs where most of the phi nodes will never
+ * be used.
*/
nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader);
- nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components, NULL);
+ nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components,
+ val->bit_size, NULL);
phi->instr.block = block;
exec_list_push_tail(&val->phis, &phi->instr.node);
val->defs[block->index] = &phi->dest.ssa;
return &phi->dest.ssa;
} else {
+ /* In this case, we have an actual SSA def. It's either the result of a
+ * phi node created by the case above or one passed to us through
+ * nir_phi_builder_value_set_block_def().
+ */
return val->defs[block->index];
}
}
@@ -216,9 +252,14 @@ nir_phi_builder_finish(struct nir_phi_builder *pb)
NIR_VLA(nir_block *, preds, num_blocks);
foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) {
- /* We can't iterate over the list of phis normally because we are
- * removing them as we go and, in some cases, adding new phis as we
- * build the source lists of others.
+ /* We treat the linked list of phi nodes like a worklist. The list is
+ * pre-populated by calls to nir_phi_builder_value_get_block_def() that
+ * create phi nodes. As we fill in the sources of phi nodes, more may
+ * be created and are added to the end of the list.
+ *
+ * Because we are adding and removing phi nodes from the list as we go,
+ * we can't iterate over it normally. Instead, we just iterate until
+ * the list is empty.
*/
while (!exec_list_is_empty(&val->phis)) {
struct exec_node *head = exec_list_get_head(&val->phis);
diff --git a/src/compiler/nir/nir_phi_builder.h b/src/compiler/nir/nir_phi_builder.h
index 50251bf..edc5302 100644
--- a/src/compiler/nir/nir_phi_builder.h
+++ b/src/compiler/nir/nir_phi_builder.h
@@ -25,7 +25,38 @@
#include "nir.h"
+/** A helper for placing phi nodes in a NIR shader
+ *
+ * Basic usage goes something like this:
+ *
+ * each variable, var, has:
+ * a bitset var.defs of blocks where the variable is defined
+ * a struct nir_phi_builder_value *pb_val
+ *
+ * // initialize bitsets
+ * foreach block:
+ * foreach def of variable var:
+ * var.defs[def.block] = true;
+ *
+ * // initialize phi builder
+ * pb = nir_phi_builder_create()
+ * foreach var:
+ * var.pb_val = nir_phi_builder_add_value(pb, var.defs)
+ *
+ * // Visit each block. This needs to visit dominators first;
+ * // nir_for_each_block() will be ok.
+ * foreach block:
+ * foreach instruction:
+ * foreach use of variable var:
+ * replace use with nir_phi_builder_get_block_def(var.pb_val)
+ * foreach def of variable var:
+ * create ssa def, register with
+ * nir_phi_builder_set_block_def(var.pb_val)
+ *
+ * nir_phi_builder_finish(pb)
+ */
struct nir_phi_builder;
+
struct nir_phi_builder_value;
/* Create a new phi builder.
@@ -43,7 +74,7 @@ struct nir_phi_builder *nir_phi_builder_create(nir_function_impl *impl);
*/
struct nir_phi_builder_value *
nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
- const BITSET_WORD *defs);
+ unsigned bit_size, const BITSET_WORD *defs);
/* Register a definition for the given value and block.
*
diff --git a/src/compiler/nir/nir_print.c b/src/compiler/nir/nir_print.c
index 24d5281..60b74d1 100644
--- a/src/compiler/nir/nir_print.c
+++ b/src/compiler/nir/nir_print.c
@@ -207,6 +207,8 @@ print_alu_instr(nir_alu_instr *instr, print_state *state)
print_alu_dest(&instr->dest, state);
fprintf(fp, " = %s", nir_op_infos[instr->op].name);
+ if (instr->exact)
+ fprintf(fp, "!");
if (instr->dest.saturate)
fprintf(fp, ".sat");
fprintf(fp, " ");
@@ -714,7 +716,7 @@ print_load_const_instr(nir_load_const_instr *instr, print_state *state)
* and then print the float in a comment for readability.
*/
- fprintf(fp, "0x%08x /* %f */", instr->value.u[i], instr->value.f[i]);
+ fprintf(fp, "0x%08x /* %f */", instr->value.u32[i], instr->value.f32[i]);
}
fprintf(fp, ")");
diff --git a/src/compiler/nir/nir_repair_ssa.c b/src/compiler/nir/nir_repair_ssa.c
index 3ab4f0f..96c791c 100644
--- a/src/compiler/nir/nir_repair_ssa.c
+++ b/src/compiler/nir/nir_repair_ssa.c
@@ -85,7 +85,8 @@ repair_ssa_def(nir_ssa_def *def, void *void_state)
BITSET_SET(state->def_set, def->parent_instr->block->index);
struct nir_phi_builder_value *val =
- nir_phi_builder_add_value(pb, def->num_components, state->def_set);
+ nir_phi_builder_add_value(pb, def->num_components, def->bit_size,
+ state->def_set);
nir_phi_builder_value_set_block_def(val, def->parent_instr->block, def);
diff --git a/src/compiler/nir/nir_search.c b/src/compiler/nir/nir_search.c
index 56d7e81..6e63063 100644
--- a/src/compiler/nir/nir_search.c
+++ b/src/compiler/nir/nir_search.c
@@ -62,7 +62,8 @@ alu_instr_is_bool(nir_alu_instr *instr)
case nir_op_inot:
return src_is_bool(instr->src[0].src);
default:
- return nir_op_infos[instr->op].output_type == nir_type_bool;
+ return (nir_alu_type_get_base_type(nir_op_infos[instr->op].output_type)
+ == nir_type_bool);
}
}
@@ -125,8 +126,10 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
nir_alu_instr *src_alu =
nir_instr_as_alu(instr->src[src].src.ssa->parent_instr);
- if (nir_op_infos[src_alu->op].output_type != var->type &&
- !(var->type == nir_type_bool && alu_instr_is_bool(src_alu)))
+ if (nir_alu_type_get_base_type(nir_op_infos[src_alu->op].output_type) !=
+ var->type &&
+ !(nir_alu_type_get_base_type(var->type) == nir_type_bool &&
+ alu_instr_is_bool(src_alu)))
return false;
}
@@ -158,21 +161,65 @@ match_value(const nir_search_value *value, nir_alu_instr *instr, unsigned src,
nir_load_const_instr *load =
nir_instr_as_load_const(instr->src[src].src.ssa->parent_instr);
- switch (nir_op_infos[instr->op].input_types[src]) {
+ switch (const_val->type) {
case nir_type_float:
for (unsigned i = 0; i < num_components; ++i) {
- if (load->value.f[new_swizzle[i]] != const_val->data.f)
+ double val;
+ switch (load->def.bit_size) {
+ case 32:
+ val = load->value.f32[new_swizzle[i]];
+ break;
+ case 64:
+ val = load->value.f64[new_swizzle[i]];
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
+
+ if (val != const_val->data.d)
return false;
}
return true;
+
case nir_type_int:
+ for (unsigned i = 0; i < num_components; ++i) {
+ int64_t val;
+ switch (load->def.bit_size) {
+ case 32:
+ val = load->value.i32[new_swizzle[i]];
+ break;
+ case 64:
+ val = load->value.i64[new_swizzle[i]];
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
+
+ if (val != const_val->data.i)
+ return false;
+ }
+ return true;
+
case nir_type_uint:
- case nir_type_bool:
+ case nir_type_bool32:
for (unsigned i = 0; i < num_components; ++i) {
- if (load->value.i[new_swizzle[i]] != const_val->data.i)
+ uint64_t val;
+ switch (load->def.bit_size) {
+ case 32:
+ val = load->value.u32[new_swizzle[i]];
+ break;
+ case 64:
+ val = load->value.u64[new_swizzle[i]];
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
+
+ if (val != const_val->data.u)
return false;
}
return true;
+
default:
unreachable("Invalid alu source type");
}
@@ -191,6 +238,10 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
if (instr->op != expr->opcode)
return false;
+ assert(instr->dest.dest.is_ssa);
+ if (expr->inexact && instr->exact)
+ return false;
+
assert(!instr->dest.saturate);
assert(nir_op_infos[instr->op].num_inputs > 0);
@@ -244,9 +295,123 @@ match_expression(const nir_search_expression *expr, nir_alu_instr *instr,
}
}
+typedef struct bitsize_tree {
+ unsigned num_srcs;
+ struct bitsize_tree *srcs[4];
+
+ unsigned common_size;
+ bool is_src_sized[4];
+ bool is_dest_sized;
+
+ unsigned dest_size;
+ unsigned src_size[4];
+} bitsize_tree;
+
+static bitsize_tree *
+build_bitsize_tree(void *mem_ctx, struct match_state *state,
+ const nir_search_value *value)
+{
+ bitsize_tree *tree = ralloc(mem_ctx, bitsize_tree);
+
+ switch (value->type) {
+ case nir_search_value_expression: {
+ nir_search_expression *expr = nir_search_value_as_expression(value);
+ nir_op_info info = nir_op_infos[expr->opcode];
+ tree->num_srcs = info.num_inputs;
+ tree->common_size = 0;
+ for (unsigned i = 0; i < info.num_inputs; i++) {
+ tree->is_src_sized[i] = !!nir_alu_type_get_type_size(info.input_types[i]);
+ if (tree->is_src_sized[i])
+ tree->src_size[i] = nir_alu_type_get_type_size(info.input_types[i]);
+ tree->srcs[i] = build_bitsize_tree(mem_ctx, state, expr->srcs[i]);
+ }
+ tree->is_dest_sized = !!nir_alu_type_get_type_size(info.output_type);
+ if (tree->is_dest_sized)
+ tree->dest_size = nir_alu_type_get_type_size(info.output_type);
+ break;
+ }
+
+ case nir_search_value_variable: {
+ nir_search_variable *var = nir_search_value_as_variable(value);
+ tree->num_srcs = 0;
+ tree->is_dest_sized = true;
+ tree->dest_size = nir_src_bit_size(state->variables[var->variable].src);
+ break;
+ }
+
+ case nir_search_value_constant: {
+ tree->num_srcs = 0;
+ tree->is_dest_sized = false;
+ tree->common_size = 0;
+ break;
+ }
+ }
+
+ return tree;
+}
+
+static unsigned
+bitsize_tree_filter_up(bitsize_tree *tree)
+{
+ for (unsigned i = 0; i < tree->num_srcs; i++) {
+ unsigned src_size = bitsize_tree_filter_up(tree->srcs[i]);
+ if (src_size == 0)
+ continue;
+
+ if (tree->is_src_sized[i]) {
+ assert(src_size == tree->src_size[i]);
+ } else if (tree->common_size != 0) {
+ assert(src_size == tree->common_size);
+ tree->src_size[i] = src_size;
+ } else {
+ tree->common_size = src_size;
+ tree->src_size[i] = src_size;
+ }
+ }
+
+ if (tree->num_srcs && tree->common_size) {
+ if (tree->dest_size == 0)
+ tree->dest_size = tree->common_size;
+ else if (!tree->is_dest_sized)
+ assert(tree->dest_size == tree->common_size);
+
+ for (unsigned i = 0; i < tree->num_srcs; i++) {
+ if (!tree->src_size[i])
+ tree->src_size[i] = tree->common_size;
+ }
+ }
+
+ return tree->dest_size;
+}
+
+static void
+bitsize_tree_filter_down(bitsize_tree *tree, unsigned size)
+{
+ if (tree->dest_size)
+ assert(tree->dest_size == size);
+ else
+ tree->dest_size = size;
+
+ if (!tree->is_dest_sized) {
+ if (tree->common_size)
+ assert(tree->common_size == size);
+ else
+ tree->common_size = size;
+ }
+
+ for (unsigned i = 0; i < tree->num_srcs; i++) {
+ if (!tree->src_size[i]) {
+ assert(tree->common_size);
+ tree->src_size[i] = tree->common_size;
+ }
+ bitsize_tree_filter_down(tree->srcs[i], tree->src_size[i]);
+ }
+}
+
static nir_alu_src
-construct_value(const nir_search_value *value, nir_alu_type type,
- unsigned num_components, struct match_state *state,
+construct_value(const nir_search_value *value,
+ unsigned num_components, bitsize_tree *bitsize, bool exact,
+ struct match_state *state,
nir_instr *instr, void *mem_ctx)
{
switch (value->type) {
@@ -257,7 +422,9 @@ construct_value(const nir_search_value *value, nir_alu_type type,
num_components = nir_op_infos[expr->opcode].output_size;
nir_alu_instr *alu = nir_alu_instr_create(mem_ctx, expr->opcode);
- nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&alu->instr, &alu->dest.dest, num_components,
+ bitsize->dest_size, NULL);
+ alu->exact = exact;
alu->dest.write_mask = (1 << num_components) - 1;
alu->dest.saturate = false;
@@ -269,8 +436,7 @@ construct_value(const nir_search_value *value, nir_alu_type type,
num_components = nir_op_infos[alu->op].input_sizes[i];
alu->src[i] = construct_value(expr->srcs[i],
- nir_op_infos[alu->op].input_types[i],
- num_components,
+ num_components, bitsize->srcs[i], exact,
state, instr, mem_ctx);
}
@@ -301,23 +467,57 @@ construct_value(const nir_search_value *value, nir_alu_type type,
const nir_search_constant *c = nir_search_value_as_constant(value);
nir_load_const_instr *load = nir_load_const_instr_create(mem_ctx, 1);
- switch (type) {
+ switch (c->type) {
case nir_type_float:
- load->def.name = ralloc_asprintf(mem_ctx, "%f", c->data.f);
- load->value.f[0] = c->data.f;
+ load->def.name = ralloc_asprintf(load, "%f", c->data.d);
+ switch (bitsize->dest_size) {
+ case 32:
+ load->value.f32[0] = c->data.d;
+ break;
+ case 64:
+ load->value.f64[0] = c->data.d;
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
break;
+
case nir_type_int:
- load->def.name = ralloc_asprintf(mem_ctx, "%d", c->data.i);
- load->value.i[0] = c->data.i;
+ load->def.name = ralloc_asprintf(load, "%ld", c->data.i);
+ switch (bitsize->dest_size) {
+ case 32:
+ load->value.i32[0] = c->data.i;
+ break;
+ case 64:
+ load->value.i64[0] = c->data.i;
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
break;
+
case nir_type_uint:
- case nir_type_bool:
- load->value.u[0] = c->data.u;
+ load->def.name = ralloc_asprintf(load, "%lu", c->data.u);
+ switch (bitsize->dest_size) {
+ case 32:
+ load->value.u32[0] = c->data.u;
+ break;
+ case 64:
+ load->value.u64[0] = c->data.u;
+ break;
+ default:
+ unreachable("unknown bit size");
+ }
+
+ case nir_type_bool32:
+ load->value.u32[0] = c->data.u;
break;
default:
unreachable("Invalid alu source type");
}
+ load->def.bit_size = bitsize->dest_size;
+
nir_instr_insert_before(instr, &load->instr);
nir_alu_src val;
@@ -352,6 +552,11 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
swizzle, &state))
return NULL;
+ void *bitsize_ctx = ralloc_context(NULL);
+ bitsize_tree *tree = build_bitsize_tree(bitsize_ctx, &state, replace);
+ bitsize_tree_filter_up(tree);
+ bitsize_tree_filter_down(tree, instr->dest.dest.ssa.bit_size);
+
/* Inserting a mov may be unnecessary. However, it's much easier to
* simply let copy propagation clean this up than to try to go through
* and rewrite swizzles ourselves.
@@ -359,11 +564,12 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
nir_alu_instr *mov = nir_alu_instr_create(mem_ctx, nir_op_imov);
mov->dest.write_mask = instr->dest.write_mask;
nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
- instr->dest.dest.ssa.num_components, NULL);
+ instr->dest.dest.ssa.num_components,
+ instr->dest.dest.ssa.bit_size, NULL);
- mov->src[0] = construct_value(replace, nir_op_infos[instr->op].output_type,
- instr->dest.dest.ssa.num_components, &state,
- &instr->instr, mem_ctx);
+ mov->src[0] = construct_value(replace,
+ instr->dest.dest.ssa.num_components, tree,
+ instr->exact, &state, &instr->instr, mem_ctx);
nir_instr_insert_before(&instr->instr, &mov->instr);
nir_ssa_def_rewrite_uses(&instr->dest.dest.ssa,
@@ -375,5 +581,7 @@ nir_replace_instr(nir_alu_instr *instr, const nir_search_expression *search,
*/
nir_instr_remove(&instr->instr);
+ ralloc_free(bitsize_ctx);
+
return mov;
}
diff --git a/src/compiler/nir/nir_search.h b/src/compiler/nir/nir_search.h
index 7d47792..61742f1 100644
--- a/src/compiler/nir/nir_search.h
+++ b/src/compiler/nir/nir_search.h
@@ -71,16 +71,24 @@ typedef struct {
typedef struct {
nir_search_value value;
+ nir_alu_type type;
+
union {
- uint32_t u;
- int32_t i;
- float f;
+ uint64_t u;
+ int64_t i;
+ double d;
} data;
} nir_search_constant;
typedef struct {
nir_search_value value;
+ /* When set on a search expression, the expression will only match an SSA
+ * value that does *not* have the exact bit set. If unset, the exact bit
+ * on the SSA value is ignored.
+ */
+ bool inexact;
+
nir_op opcode;
const nir_search_value *srcs[4];
} nir_search_expression;
diff --git a/src/compiler/nir/nir_to_ssa.c b/src/compiler/nir/nir_to_ssa.c
index 44a5054..d588d7d 100644
--- a/src/compiler/nir/nir_to_ssa.c
+++ b/src/compiler/nir/nir_to_ssa.c
@@ -219,7 +219,9 @@ rewrite_def_forwards(nir_dest *dest, void *_state)
state->states[index].num_defs);
list_del(&dest->reg.def_link);
- nir_ssa_dest_init(state->parent_instr, dest, reg->num_components, name);
+ nir_ssa_dest_init(state->parent_instr, dest, reg->num_components,
+ reg->bit_size, name);
+ ralloc_free(name);
/* push our SSA destination on the stack */
state->states[index].index++;
@@ -271,7 +273,9 @@ rewrite_alu_instr_forward(nir_alu_instr *instr, rewrite_state *state)
instr->dest.write_mask = (1 << num_components) - 1;
list_del(&instr->dest.dest.reg.def_link);
- nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components, name);
+ nir_ssa_dest_init(&instr->instr, &instr->dest.dest, num_components,
+ reg->bit_size, name);
+ ralloc_free(name);
if (nir_op_infos[instr->op].output_size == 0) {
/*
diff --git a/src/compiler/nir/nir_validate.c b/src/compiler/nir/nir_validate.c
index 0c32d5f..9f18d1c 100644
--- a/src/compiler/nir/nir_validate.c
+++ b/src/compiler/nir/nir_validate.c
@@ -179,9 +179,12 @@ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
nir_alu_src *src = &instr->src[index];
unsigned num_components;
- if (src->src.is_ssa)
+ unsigned src_bit_size;
+ if (src->src.is_ssa) {
+ src_bit_size = src->src.ssa->bit_size;
num_components = src->src.ssa->num_components;
- else {
+ } else {
+ src_bit_size = src->src.reg.reg->bit_size;
if (src->src.reg.reg->is_packed)
num_components = 4; /* can't check anything */
else
@@ -194,6 +197,24 @@ validate_alu_src(nir_alu_instr *instr, unsigned index, validate_state *state)
assert(src->swizzle[i] < num_components);
}
+ nir_alu_type src_type = nir_op_infos[instr->op].input_types[index];
+
+ /* 8-bit float isn't a thing */
+ if (nir_alu_type_get_base_type(src_type) == nir_type_float)
+ assert(src_bit_size == 16 || src_bit_size == 32 || src_bit_size == 64);
+
+ if (nir_alu_type_get_type_size(src_type)) {
+ /* This source has an explicit bit size */
+ assert(nir_alu_type_get_type_size(src_type) == src_bit_size);
+ } else {
+ if (!nir_alu_type_get_type_size(nir_op_infos[instr->op].output_type)) {
+ unsigned dest_bit_size =
+ instr->dest.dest.is_ssa ? instr->dest.dest.ssa.bit_size
+ : instr->dest.dest.reg.reg->bit_size;
+ assert(dest_bit_size == src_bit_size);
+ }
+ }
+
validate_src(&src->src, state);
}
@@ -263,8 +284,10 @@ validate_dest(nir_dest *dest, validate_state *state)
}
static void
-validate_alu_dest(nir_alu_dest *dest, validate_state *state)
+validate_alu_dest(nir_alu_instr *instr, validate_state *state)
{
+ nir_alu_dest *dest = &instr->dest;
+
unsigned dest_size =
dest->dest.is_ssa ? dest->dest.ssa.num_components
: dest->dest.reg.reg->num_components;
@@ -282,6 +305,17 @@ validate_alu_dest(nir_alu_dest *dest, validate_state *state)
assert(nir_op_infos[alu->op].output_type == nir_type_float ||
!dest->saturate);
+ unsigned bit_size = dest->dest.is_ssa ? dest->dest.ssa.bit_size
+ : dest->dest.reg.reg->bit_size;
+ nir_alu_type type = nir_op_infos[instr->op].output_type;
+
+ /* 8-bit float isn't a thing */
+ if (nir_alu_type_get_base_type(type) == nir_type_float)
+ assert(bit_size == 16 || bit_size == 32 || bit_size == 64);
+
+ assert(nir_alu_type_get_type_size(type) == 0 ||
+ nir_alu_type_get_type_size(type) == bit_size);
+
validate_dest(&dest->dest, state);
}
@@ -294,7 +328,7 @@ validate_alu_instr(nir_alu_instr *instr, validate_state *state)
validate_alu_src(instr, i, state);
}
- validate_alu_dest(&instr->dest, state);
+ validate_alu_dest(instr, state);
}
static void
diff --git a/src/compiler/nir/spirv/spirv_to_nir.c b/src/compiler/nir/spirv/spirv_to_nir.c
index 5a7184a..42a1f95 100644
--- a/src/compiler/nir/spirv/spirv_to_nir.c
+++ b/src/compiler/nir/spirv/spirv_to_nir.c
@@ -92,7 +92,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
nir_load_const_instr_create(b->shader, num_components);
for (unsigned i = 0; i < num_components; i++)
- load->value.u[i] = constant->value.u[i];
+ load->value.u32[i] = constant->value.u[i];
nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
val->def = &load->def;
@@ -109,7 +109,7 @@ vtn_const_ssa_value(struct vtn_builder *b, nir_constant *constant,
nir_load_const_instr_create(b->shader, rows);
for (unsigned j = 0; j < rows; j++)
- load->value.u[j] = constant->value.u[rows * i + j];
+ load->value.u32[j] = constant->value.u[rows * i + j];
nir_instr_insert_before_cf_list(&b->impl->body, &load->instr);
col_val->def = &load->def;
@@ -1035,6 +1035,8 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
nir_op op = vtn_nir_alu_op_for_spirv_opcode(opcode, &swap);
unsigned num_components = glsl_get_vector_elements(val->const_type);
+ unsigned bit_size =
+ glsl_get_bit_size(glsl_get_base_type(val->const_type));
nir_const_value src[3];
assert(count <= 7);
@@ -1043,14 +1045,16 @@ vtn_handle_constant(struct vtn_builder *b, SpvOp opcode,
vtn_value(b, w[4 + i], vtn_value_type_constant)->constant;
unsigned j = swap ? 1 - i : i;
+ assert(bit_size == 32);
for (unsigned k = 0; k < num_components; k++)
- src[j].u[k] = c->value.u[k];
+ src[j].u32[k] = c->value.u[k];
}
- nir_const_value res = nir_eval_const_opcode(op, num_components, src);
+ nir_const_value res = nir_eval_const_opcode(op, num_components,
+ bit_size, src);
for (unsigned k = 0; k < num_components; k++)
- val->constant->value.u[k] = res.u[k];
+ val->constant->value.u[k] = res.u32[k];
return;
} /* default */
@@ -1414,7 +1418,7 @@ vtn_handle_texture(struct vtn_builder *b, SpvOp opcode,
}
nir_ssa_dest_init(&instr->instr, &instr->dest,
- nir_tex_instr_dest_size(instr), NULL);
+ nir_tex_instr_dest_size(instr), 32, NULL);
assert(glsl_get_vector_elements(ret_type->type) ==
nir_tex_instr_dest_size(instr));
@@ -1600,7 +1604,7 @@ vtn_handle_image(struct vtn_builder *b, SpvOp opcode,
if (opcode != SpvOpImageWrite) {
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
- nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, NULL);
+ nir_ssa_dest_init(&intrin->instr, &intrin->dest, 4, 32, NULL);
nir_builder_instr_insert(&b->nb, &intrin->instr);
@@ -1738,7 +1742,7 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
fill_common_atomic_sources(b, opcode, w, &atomic->src[2]);
}
- nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, NULL);
+ nir_ssa_dest_init(&atomic->instr, &atomic->dest, 1, 32, NULL);
struct vtn_type *type = vtn_value(b, w[1], vtn_value_type_type)->type;
struct vtn_value *val = vtn_push_value(b, w[2], vtn_value_type_ssa);
@@ -1750,7 +1754,7 @@ vtn_handle_ssbo_or_shared_atomic(struct vtn_builder *b, SpvOp opcode,
}
static nir_alu_instr *
-create_vec(nir_shader *shader, unsigned num_components)
+create_vec(nir_shader *shader, unsigned num_components, unsigned bit_size)
{
nir_op op;
switch (num_components) {
@@ -1762,7 +1766,8 @@ create_vec(nir_shader *shader, unsigned num_components)
}
nir_alu_instr *vec = nir_alu_instr_create(shader, op);
- nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components, NULL);
+ nir_ssa_dest_init(&vec->instr, &vec->dest.dest, num_components,
+ bit_size, NULL);
vec->dest.write_mask = (1 << num_components) - 1;
return vec;
@@ -1779,7 +1784,8 @@ vtn_ssa_transpose(struct vtn_builder *b, struct vtn_ssa_value *src)
for (unsigned i = 0; i < glsl_get_matrix_columns(dest->type); i++) {
nir_alu_instr *vec = create_vec(b->shader,
- glsl_get_matrix_columns(src->type));
+ glsl_get_matrix_columns(src->type),
+ glsl_get_bit_size(glsl_get_base_type(src->type)));
if (glsl_type_is_vector_or_scalar(src->type)) {
vec->src[0].src = nir_src_for_ssa(src->def);
vec->src[0].swizzle[0] = i;
@@ -1809,7 +1815,8 @@ nir_ssa_def *
vtn_vector_insert(struct vtn_builder *b, nir_ssa_def *src, nir_ssa_def *insert,
unsigned index)
{
- nir_alu_instr *vec = create_vec(b->shader, src->num_components);
+ nir_alu_instr *vec = create_vec(b->shader, src->num_components,
+ src->bit_size);
for (unsigned i = 0; i < src->num_components; i++) {
if (i == index) {
@@ -1854,7 +1861,7 @@ vtn_vector_shuffle(struct vtn_builder *b, unsigned num_components,
nir_ssa_def *src0, nir_ssa_def *src1,
const uint32_t *indices)
{
- nir_alu_instr *vec = create_vec(b->shader, num_components);
+ nir_alu_instr *vec = create_vec(b->shader, num_components, src0->bit_size);
nir_ssa_undef_instr *undef = nir_ssa_undef_instr_create(b->shader, 1);
nir_builder_instr_insert(&b->nb, &undef->instr);
@@ -1884,7 +1891,8 @@ static nir_ssa_def *
vtn_vector_construct(struct vtn_builder *b, unsigned num_components,
unsigned num_srcs, nir_ssa_def **srcs)
{
- nir_alu_instr *vec = create_vec(b->shader, num_components);
+ nir_alu_instr *vec = create_vec(b->shader, num_components,
+ srcs[0]->bit_size);
unsigned dest_idx = 0;
for (unsigned i = 0; i < num_srcs; i++) {
diff --git a/src/compiler/nir/spirv/vtn_glsl450.c b/src/compiler/nir/spirv/vtn_glsl450.c
index 6b649fd..3360fda 100644
--- a/src/compiler/nir/spirv/vtn_glsl450.c
+++ b/src/compiler/nir/spirv/vtn_glsl450.c
@@ -627,7 +627,9 @@ handle_glsl450_alu(struct vtn_builder *b, enum GLSLstd450 entrypoint,
nir_alu_instr *instr = nir_alu_instr_create(b->shader, op);
nir_ssa_dest_init(&instr->instr, &instr->dest.dest,
- glsl_get_vector_elements(val->ssa->type), val->name);
+ glsl_get_vector_elements(val->ssa->type),
+ glsl_get_bit_size(glsl_get_base_type(val->ssa->type)),
+ val->name);
instr->dest.write_mask = (1 << instr->dest.dest.ssa.num_components) - 1;
val->ssa->def = &instr->dest.dest.ssa;
diff --git a/src/compiler/nir/spirv/vtn_variables.c b/src/compiler/nir/spirv/vtn_variables.c
index 31bf416..3cbac1e 100644
--- a/src/compiler/nir/spirv/vtn_variables.c
+++ b/src/compiler/nir/spirv/vtn_variables.c
@@ -190,7 +190,9 @@ _vtn_local_load_store(struct vtn_builder *b, bool load, nir_deref_var *deref,
if (load) {
nir_ssa_dest_init(&intrin->instr, &intrin->dest,
- intrin->num_components, NULL);
+ intrin->num_components,
+ glsl_get_bit_size(glsl_get_base_type(tail->type)),
+ NULL);
inout->def = &intrin->dest.ssa;
} else {
nir_intrinsic_set_write_mask(intrin, (1 << intrin->num_components) - 1);
@@ -322,7 +324,7 @@ get_vulkan_resource_index(struct vtn_builder *b, struct vtn_access_chain *chain,
nir_intrinsic_set_desc_set(instr, chain->var->descriptor_set);
nir_intrinsic_set_binding(instr, chain->var->binding);
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
nir_builder_instr_insert(&b->nb, &instr->instr);
return &instr->dest.ssa;
@@ -411,7 +413,8 @@ _vtn_load_store_tail(struct vtn_builder *b, nir_intrinsic_op op, bool load,
if (load) {
nir_ssa_dest_init(&instr->instr, &instr->dest,
- instr->num_components, NULL);
+ instr->num_components,
+ glsl_get_bit_size(glsl_get_base_type(type)), NULL);
(*inout)->def = &instr->dest.ssa;
}
@@ -1385,7 +1388,7 @@ vtn_handle_variables(struct vtn_builder *b, SpvOp opcode,
nir_intrinsic_instr_create(b->nb.shader,
nir_intrinsic_get_buffer_size);
instr->src[0] = nir_src_for_ssa(index);
- nir_ssa_dest_init(&instr->instr, &instr->dest, 1, NULL);
+ nir_ssa_dest_init(&instr->instr, &instr->dest, 1, 32, NULL);
nir_builder_instr_insert(&b->nb, &instr->instr);
nir_ssa_def *buf_size = &instr->dest.ssa;