summaryrefslogtreecommitdiffstats
path: root/src/mesa/drivers/dri/i965
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2011-05-02 09:45:40 -0700
committerEric Anholt <eric@anholt.net>2011-08-16 13:04:41 -0700
commitaf3c9803d818fd33139f1247a387d64b967b8992 (patch)
tree45f6e1b4be0a9296a14a887cb4de3af0ff636e6c /src/mesa/drivers/dri/i965
parent65b5cbbcf783f6c668ab5b31a0734680dd396794 (diff)
downloadexternal_mesa3d-af3c9803d818fd33139f1247a387d64b967b8992.zip
external_mesa3d-af3c9803d818fd33139f1247a387d64b967b8992.tar.gz
external_mesa3d-af3c9803d818fd33139f1247a387d64b967b8992.tar.bz2
i965: Start adding the VS visitor and codegen.
The low-level IR is a mashup of brw_fs.cpp and ir_to_mesa.cpp. It's currently controlled by the INTEL_NEW_VS=1 environment variable, and only tested for the trivial "gl_Position = gl_Vertex;" shader so far.
Diffstat (limited to 'src/mesa/drivers/dri/i965')
-rw-r--r--src/mesa/drivers/dri/i965/Makefile5
-rw-r--r--src/mesa/drivers/dri/i965/brw_context.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_defines.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_eu.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_fs_emit.cpp33
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.cpp26
-rw-r--r--src/mesa/drivers/dri/i965/brw_shader.h2
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4.h434
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_emit.cpp568
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp77
-rw-r--r--src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp1649
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.c12
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs.h3
-rw-r--r--src/mesa/drivers/dri/i965/brw_vs_emit.c2
14 files changed, 2781 insertions, 37 deletions
diff --git a/src/mesa/drivers/dri/i965/Makefile b/src/mesa/drivers/dri/i965/Makefile
index 44f28cd..45a5350 100644
--- a/src/mesa/drivers/dri/i965/Makefile
+++ b/src/mesa/drivers/dri/i965/Makefile
@@ -124,7 +124,10 @@ CXX_SOURCES = \
brw_fs_reg_allocate.cpp \
brw_fs_schedule_instructions.cpp \
brw_fs_vector_splitting.cpp \
- brw_shader.cpp
+ brw_shader.cpp \
+ brw_vec4_emit.cpp \
+ brw_vec4_reg_allocate.cpp \
+ brw_vec4_visitor.cpp
ASM_SOURCES =
diff --git a/src/mesa/drivers/dri/i965/brw_context.h b/src/mesa/drivers/dri/i965/brw_context.h
index cc11d06..7b6b64c 100644
--- a/src/mesa/drivers/dri/i965/brw_context.h
+++ b/src/mesa/drivers/dri/i965/brw_context.h
@@ -529,7 +529,7 @@ struct brw_context
* the CURBE, the depth buffer, and a query BO.
*/
drm_intel_bo *validated_bos[VERT_ATTRIB_MAX + BRW_WM_MAX_SURF + 16];
- int validated_bo_count;
+ unsigned int validated_bo_count;
} state;
struct brw_cache cache;
diff --git a/src/mesa/drivers/dri/i965/brw_defines.h b/src/mesa/drivers/dri/i965/brw_defines.h
index da8d016..e3823c6 100644
--- a/src/mesa/drivers/dri/i965/brw_defines.h
+++ b/src/mesa/drivers/dri/i965/brw_defines.h
@@ -638,6 +638,8 @@ enum opcode {
FS_OPCODE_SPILL,
FS_OPCODE_UNSPILL,
FS_OPCODE_PULL_CONSTANT_LOAD,
+
+ VS_OPCODE_URB_WRITE,
};
#define BRW_PREDICATE_NONE 0
diff --git a/src/mesa/drivers/dri/i965/brw_eu.h b/src/mesa/drivers/dri/i965/brw_eu.h
index 72d50ea..38dd99b 100644
--- a/src/mesa/drivers/dri/i965/brw_eu.h
+++ b/src/mesa/drivers/dri/i965/brw_eu.h
@@ -44,6 +44,9 @@
#define BRW_SWIZZLE_NOOP BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XYZW BRW_SWIZZLE4(0,1,2,3)
#define BRW_SWIZZLE_XXXX BRW_SWIZZLE4(0,0,0,0)
+#define BRW_SWIZZLE_YYYY BRW_SWIZZLE4(1,1,1,1)
+#define BRW_SWIZZLE_ZZZZ BRW_SWIZZLE4(2,2,2,2)
+#define BRW_SWIZZLE_WWWW BRW_SWIZZLE4(3,3,3,3)
#define BRW_SWIZZLE_XYXY BRW_SWIZZLE4(0,1,0,1)
diff --git a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
index 285ba46..7367cca 100644
--- a/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
+++ b/src/mesa/drivers/dri/i965/brw_fs_emit.cpp
@@ -146,38 +146,7 @@ void
fs_visitor::generate_math(fs_inst *inst,
struct brw_reg dst, struct brw_reg *src)
{
- int op;
-
- switch (inst->opcode) {
- case SHADER_OPCODE_RCP:
- op = BRW_MATH_FUNCTION_INV;
- break;
- case SHADER_OPCODE_RSQ:
- op = BRW_MATH_FUNCTION_RSQ;
- break;
- case SHADER_OPCODE_SQRT:
- op = BRW_MATH_FUNCTION_SQRT;
- break;
- case SHADER_OPCODE_EXP2:
- op = BRW_MATH_FUNCTION_EXP;
- break;
- case SHADER_OPCODE_LOG2:
- op = BRW_MATH_FUNCTION_LOG;
- break;
- case SHADER_OPCODE_POW:
- op = BRW_MATH_FUNCTION_POW;
- break;
- case SHADER_OPCODE_SIN:
- op = BRW_MATH_FUNCTION_SIN;
- break;
- case SHADER_OPCODE_COS:
- op = BRW_MATH_FUNCTION_COS;
- break;
- default:
- assert(!"not reached: unknown math function");
- op = 0;
- break;
- }
+ int op = brw_math_function(inst->opcode);
if (intel->gen >= 6) {
assert(inst->mlen == 0);
diff --git a/src/mesa/drivers/dri/i965/brw_shader.cpp b/src/mesa/drivers/dri/i965/brw_shader.cpp
index f4005f8..2eeeec2 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.cpp
+++ b/src/mesa/drivers/dri/i965/brw_shader.cpp
@@ -199,3 +199,29 @@ brw_conditional_for_comparison(unsigned int op)
return BRW_CONDITIONAL_NZ;
}
}
+
+uint32_t
+brw_math_function(enum opcode op)
+{
+ switch (op) {
+ case SHADER_OPCODE_RCP:
+ return BRW_MATH_FUNCTION_INV;
+ case SHADER_OPCODE_RSQ:
+ return BRW_MATH_FUNCTION_RSQ;
+ case SHADER_OPCODE_SQRT:
+ return BRW_MATH_FUNCTION_SQRT;
+ case SHADER_OPCODE_EXP2:
+ return BRW_MATH_FUNCTION_EXP;
+ case SHADER_OPCODE_LOG2:
+ return BRW_MATH_FUNCTION_LOG;
+ case SHADER_OPCODE_POW:
+ return BRW_MATH_FUNCTION_POW;
+ case SHADER_OPCODE_SIN:
+ return BRW_MATH_FUNCTION_SIN;
+ case SHADER_OPCODE_COS:
+ return BRW_MATH_FUNCTION_COS;
+ default:
+ assert(!"not reached: unknown math function");
+ return 0;
+ }
+}
diff --git a/src/mesa/drivers/dri/i965/brw_shader.h b/src/mesa/drivers/dri/i965/brw_shader.h
index 21671d1..1054d7a 100644
--- a/src/mesa/drivers/dri/i965/brw_shader.h
+++ b/src/mesa/drivers/dri/i965/brw_shader.h
@@ -22,8 +22,10 @@
*/
#include <stdint.h>
+#include "brw_defines.h"
#pragma once
int brw_type_for_base_type(const struct glsl_type *type);
uint32_t brw_conditional_for_comparison(unsigned int op);
+uint32_t brw_math_function(enum opcode op);
diff --git a/src/mesa/drivers/dri/i965/brw_vec4.h b/src/mesa/drivers/dri/i965/brw_vec4.h
new file mode 100644
index 0000000..10168fc
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4.h
@@ -0,0 +1,434 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#ifndef BRW_VEC4_H
+#define BRW_VEC4_H
+
+#include <stdint.h>
+#include "brw_shader.h"
+#include "main/compiler.h"
+#include "program/hash_table.h"
+
+extern "C" {
+#include "brw_vs.h"
+#include "brw_context.h"
+#include "brw_eu.h"
+};
+
+#include "../glsl/ir.h"
+
+namespace brw {
+
+class dst_reg;
+
+/**
+ * Common helper for constructing swizzles. When only a subset of
+ * channels of a vec4 are used, we don't want to reference the other
+ * channels, as that will tell optimization passes that those other
+ * channels are used.
+ */
+static int
+swizzle_for_size(int size)
+{
+ int size_swizzles[4] = {
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_X, SWIZZLE_X, SWIZZLE_X),
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Y, SWIZZLE_Y),
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_Z),
+ BRW_SWIZZLE4(SWIZZLE_X, SWIZZLE_Y, SWIZZLE_Z, SWIZZLE_W),
+ };
+
+ assert((size >= 1) && (size <= 4));
+ return size_swizzles[size - 1];
+}
+
+enum register_file {
+ ARF = BRW_ARCHITECTURE_REGISTER_FILE,
+ GRF = BRW_GENERAL_REGISTER_FILE,
+ MRF = BRW_MESSAGE_REGISTER_FILE,
+ IMM = BRW_IMMEDIATE_VALUE,
+ HW_REG, /* a struct brw_reg */
+ ATTR,
+ UNIFORM, /* prog_data->params[hw_reg] */
+ BAD_FILE
+};
+
+class reg
+{
+public:
+ /** Register file: ARF, GRF, MRF, IMM. */
+ enum register_file file;
+ /** virtual register number. 0 = fixed hw reg */
+ int reg;
+ /** Offset within the virtual register. */
+ int reg_offset;
+ /** Register type. BRW_REGISTER_TYPE_* */
+ int type;
+ bool sechalf;
+ struct brw_reg fixed_hw_reg;
+ int smear; /* -1, or a channel of the reg to smear to all channels. */
+
+ /** Value for file == BRW_IMMMEDIATE_FILE */
+ union {
+ int32_t i;
+ uint32_t u;
+ float f;
+ } imm;
+};
+
+class src_reg : public reg
+{
+public:
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = ralloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ void init()
+ {
+ memset(this, 0, sizeof(*this));
+
+ this->file = BAD_FILE;
+ }
+
+ src_reg(register_file file, int reg, const glsl_type *type)
+ {
+ init();
+
+ this->file = file;
+ this->reg = reg;
+ if (type && (type->is_scalar() || type->is_vector() || type->is_matrix()))
+ this->swizzle = swizzle_for_size(type->vector_elements);
+ else
+ this->swizzle = SWIZZLE_XYZW;
+ }
+
+ /** Generic unset register constructor. */
+ src_reg()
+ {
+ init();
+ }
+
+ src_reg(float f)
+ {
+ init();
+
+ this->file = IMM;
+ this->type = BRW_REGISTER_TYPE_F;
+ this->imm.f = f;
+ }
+
+ src_reg(uint32_t u)
+ {
+ init();
+
+ this->file = IMM;
+ this->type = BRW_REGISTER_TYPE_UD;
+ this->imm.f = u;
+ }
+
+ src_reg(int32_t i)
+ {
+ init();
+
+ this->file = IMM;
+ this->type = BRW_REGISTER_TYPE_D;
+ this->imm.i = i;
+ }
+
+ src_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+ explicit src_reg(dst_reg reg);
+
+ GLuint swizzle; /**< SWIZZLE_XYZW swizzles from Mesa. */
+ bool negate;
+ bool abs;
+};
+
+class dst_reg : public reg
+{
+public:
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = ralloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ void init()
+ {
+ memset(this, 0, sizeof(*this));
+ this->file = BAD_FILE;
+ this->writemask = WRITEMASK_XYZW;
+ }
+
+ dst_reg()
+ {
+ init();
+ }
+
+ dst_reg(register_file file, int reg)
+ {
+ init();
+
+ this->file = file;
+ this->reg = reg;
+ }
+
+ dst_reg(struct brw_reg reg)
+ {
+ init();
+
+ this->file = HW_REG;
+ this->fixed_hw_reg = reg;
+ }
+
+ dst_reg(class vec4_visitor *v, const struct glsl_type *type);
+
+ explicit dst_reg(src_reg reg);
+
+ int writemask; /**< Bitfield of WRITEMASK_[XYZW] */
+};
+
+class vec4_instruction : public exec_node {
+public:
+ /* Callers of this ralloc-based new need not call delete. It's
+ * easier to just ralloc_free 'ctx' (or any of its ancestors). */
+ static void* operator new(size_t size, void *ctx)
+ {
+ void *node;
+
+ node = rzalloc_size(ctx, size);
+ assert(node != NULL);
+
+ return node;
+ }
+
+ struct brw_reg get_dst(void);
+ struct brw_reg get_src(int i);
+
+ enum opcode opcode; /* BRW_OPCODE_* or FS_OPCODE_* */
+ dst_reg dst;
+ src_reg src[3];
+
+ bool saturate;
+ bool predicate_inverse;
+ uint32_t predicate;
+
+ int conditional_mod; /**< BRW_CONDITIONAL_* */
+
+ int sampler;
+ int target; /**< MRT target. */
+ bool shadow_compare;
+
+ bool eot;
+ bool header_present;
+ int mlen; /**< SEND message length */
+ int base_mrf; /**< First MRF in the SEND message, if mlen is nonzero. */
+
+ uint32_t offset; /* spill/unspill offset */
+ /** @{
+ * Annotation for the generated IR. One of the two can be set.
+ */
+ ir_instruction *ir;
+ const char *annotation;
+};
+
+class vec4_visitor : public ir_visitor
+{
+public:
+ vec4_visitor(struct brw_vs_compile *c,
+ struct gl_shader_program *prog, struct brw_shader *shader);
+ ~vec4_visitor();
+
+ dst_reg dst_null_f()
+ {
+ return dst_reg(brw_null_reg());
+ }
+
+ dst_reg dst_null_d()
+ {
+ return dst_reg(retype(brw_null_reg(), BRW_REGISTER_TYPE_D));
+ }
+
+ dst_reg dst_null_cmp()
+ {
+ if (intel->gen > 4)
+ return dst_null_d();
+ else
+ return dst_null_f();
+ }
+
+ struct brw_context *brw;
+ const struct gl_vertex_program *vp;
+ struct intel_context *intel;
+ struct gl_context *ctx;
+ struct brw_vs_compile *c;
+ struct brw_vs_prog_data *prog_data;
+ struct brw_compile *p;
+ struct brw_shader *shader;
+ struct gl_shader_program *prog;
+ void *mem_ctx;
+ exec_list instructions;
+
+ char *fail_msg;
+ bool failed;
+
+ /**
+ * GLSL IR currently being processed, which is associated with our
+ * driver IR instructions for debugging purposes.
+ */
+ ir_instruction *base_ir;
+ const char *current_annotation;
+
+ int *virtual_grf_sizes;
+ int virtual_grf_count;
+ int virtual_grf_array_size;
+ int first_non_payload_grf;
+
+ dst_reg *variable_storage(ir_variable *var);
+
+ void reladdr_to_temp(ir_instruction *ir, src_reg *reg, int *num_reladdr);
+
+ src_reg src_reg_for_float(float val);
+
+ /**
+ * \name Visit methods
+ *
+ * As typical for the visitor pattern, there must be one \c visit method for
+ * each concrete subclass of \c ir_instruction. Virtual base classes within
+ * the hierarchy should not have \c visit methods.
+ */
+ /*@{*/
+ virtual void visit(ir_variable *);
+ virtual void visit(ir_loop *);
+ virtual void visit(ir_loop_jump *);
+ virtual void visit(ir_function_signature *);
+ virtual void visit(ir_function *);
+ virtual void visit(ir_expression *);
+ virtual void visit(ir_swizzle *);
+ virtual void visit(ir_dereference_variable *);
+ virtual void visit(ir_dereference_array *);
+ virtual void visit(ir_dereference_record *);
+ virtual void visit(ir_assignment *);
+ virtual void visit(ir_constant *);
+ virtual void visit(ir_call *);
+ virtual void visit(ir_return *);
+ virtual void visit(ir_discard *);
+ virtual void visit(ir_texture *);
+ virtual void visit(ir_if *);
+ /*@}*/
+
+ src_reg result;
+
+ /* Regs for vertex results. Generated at ir_variable visiting time
+ * for the ir->location's used.
+ */
+ dst_reg output_reg[VERT_RESULT_MAX];
+
+ struct hash_table *variable_ht;
+
+ bool run(void);
+ void fail(const char *msg, ...);
+
+ int virtual_grf_alloc(int size);
+ int setup_attributes(int payload_reg);
+ void setup_payload();
+ void reg_allocate_trivial();
+ void reg_allocate();
+
+ vec4_instruction *emit(enum opcode opcode);
+
+ vec4_instruction *emit(enum opcode opcode, dst_reg dst, src_reg src0);
+
+ vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1);
+
+ vec4_instruction *emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1, src_reg src2);
+
+ /** Walks an exec_list of ir_instruction and sends it through this visitor. */
+ void visit_instructions(const exec_list *list);
+
+ void emit_bool_to_cond_code(ir_rvalue *ir);
+ void emit_bool_comparison(unsigned int op, dst_reg dst, src_reg src0, src_reg src1);
+ void emit_if_gen6(ir_if *ir);
+
+ void emit_block_move(ir_assignment *ir);
+
+ /**
+ * Emit the correct dot-product instruction for the type of arguments
+ */
+ void emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements);
+
+ void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+ dst_reg dst, src_reg src0);
+
+ void emit_scalar(ir_instruction *ir, enum prog_opcode op,
+ dst_reg dst, src_reg src0, src_reg src1);
+
+ void emit_scs(ir_instruction *ir, enum prog_opcode op,
+ dst_reg dst, const src_reg &src);
+
+ void emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src);
+ void emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src);
+ void emit_math(enum opcode opcode, dst_reg dst, src_reg src);
+ void emit_math2_gen6(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+ void emit_math2_gen4(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+ void emit_math(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1);
+
+ int emit_vue_header_gen6(int header_mrf);
+ int emit_vue_header_gen4(int header_mrf);
+ void emit_urb_writes(void);
+
+ GLboolean try_emit_sat(ir_expression *ir);
+
+ bool process_move_condition(ir_rvalue *ir);
+
+ void generate_code();
+ void generate_vs_instruction(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg *src);
+ void generate_math1_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_math1_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src);
+ void generate_urb_write(vec4_instruction *inst);
+};
+
+} /* namespace brw */
+
+#endif /* BRW_VEC4_H */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
new file mode 100644
index 0000000..bdc7a79
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_emit.cpp
@@ -0,0 +1,568 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+extern "C" {
+#include "brw_eu.h"
+};
+
+using namespace brw;
+
+namespace brw {
+
+int
+vec4_visitor::setup_attributes(int payload_reg)
+{
+ int nr_attributes;
+ int attribute_map[VERT_ATTRIB_MAX];
+
+ nr_attributes = 0;
+ for (int i = 0; i < VERT_ATTRIB_MAX; i++) {
+ if (prog_data->inputs_read & BITFIELD64_BIT(i)) {
+ attribute_map[i] = payload_reg + nr_attributes;
+ nr_attributes++;
+ }
+ }
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+ for (int i = 0; i < 3; i++) {
+ if (inst->src[i].file != ATTR)
+ continue;
+
+ inst->src[i].file = HW_REG;
+ inst->src[i].fixed_hw_reg = brw_vec8_grf(attribute_map[inst->src[i].reg], 0);
+ inst->src[i].fixed_hw_reg.dw1.bits.swizzle = inst->src[i].swizzle;
+ }
+ }
+
+ /* The BSpec says we always have to read at least one thing from
+ * the VF, and it appears that the hardware wedges otherwise.
+ */
+ if (nr_attributes == 0)
+ nr_attributes = 1;
+
+ prog_data->urb_read_length = (nr_attributes + 1) / 2;
+
+ return nr_attributes;
+}
+
+void
+vec4_visitor::setup_payload(void)
+{
+ int reg = 0;
+
+ /* r0 is always reserved, as it contains the payload with the URB
+ * handles that are passed on to the URB write at the end of the
+ * thread.
+ */
+ reg++;
+
+ /* User clip planes from curbe:
+ */
+ if (c->key.nr_userclip) {
+ if (intel->gen >= 6) {
+ for (int i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride(brw_vec4_grf(reg + i / 2,
+ (i % 2) * 4), 0, 4, 1);
+ }
+ reg += ALIGN(c->key.nr_userclip, 2) / 2;
+ } else {
+ for (int i = 0; i < c->key.nr_userclip; i++) {
+ c->userplane[i] = stride(brw_vec4_grf(reg + (6 + i) / 2,
+ (i % 2) * 4), 0, 4, 1);
+ }
+ reg += (ALIGN(6 + c->key.nr_userclip, 4) / 4) * 2;
+ }
+ }
+
+ /* FINISHME: push constants */
+ c->prog_data.curb_read_length = reg - 1;
+ c->prog_data.nr_params = 0;
+ /* XXX 0 causes a bug elsewhere... */
+ if (intel->gen < 6 && c->prog_data.nr_params == 0)
+ c->prog_data.nr_params = 4;
+
+ reg += setup_attributes(reg);
+
+ this->first_non_payload_grf = reg;
+}
+
+struct brw_reg
+vec4_instruction::get_dst(void)
+{
+ struct brw_reg brw_reg;
+
+ switch (dst.file) {
+ case GRF:
+ assert(dst.reg_offset == 0);
+ brw_reg = brw_vec8_grf(dst.reg + dst.reg_offset, 0);
+ brw_reg = retype(brw_reg, dst.type);
+ brw_reg.dw1.bits.writemask = dst.writemask;
+ break;
+
+ case HW_REG:
+ brw_reg = dst.fixed_hw_reg;
+ break;
+
+ case BAD_FILE:
+ brw_reg = brw_null_reg();
+ break;
+
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+ return brw_reg;
+}
+
+struct brw_reg
+vec4_instruction::get_src(int i)
+{
+ struct brw_reg brw_reg;
+
+ switch (src[i].file) {
+ case GRF:
+ brw_reg = brw_vec8_grf(src[i].reg + src[i].reg_offset, 0);
+ brw_reg = retype(brw_reg, src[i].type);
+ brw_reg.dw1.bits.swizzle = src[i].swizzle;
+ if (src[i].abs)
+ brw_reg = brw_abs(brw_reg);
+ if (src[i].negate)
+ brw_reg = negate(brw_reg);
+ break;
+
+ case IMM:
+ switch (src[i].type) {
+ case BRW_REGISTER_TYPE_F:
+ brw_reg = brw_imm_f(src[i].imm.f);
+ break;
+ case BRW_REGISTER_TYPE_D:
+ brw_reg = brw_imm_d(src[i].imm.i);
+ break;
+ case BRW_REGISTER_TYPE_UD:
+ brw_reg = brw_imm_ud(src[i].imm.u);
+ break;
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+ break;
+
+ case HW_REG:
+ brw_reg = src[i].fixed_hw_reg;
+ break;
+
+ case BAD_FILE:
+ /* Probably unused. */
+ brw_reg = brw_null_reg();
+ break;
+ case ATTR:
+ default:
+ assert(!"not reached");
+ brw_reg = brw_null_reg();
+ break;
+ }
+
+ return brw_reg;
+}
+
+void
+vec4_visitor::generate_math1_gen4(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf,
+ src,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+void
+vec4_visitor::generate_math1_gen6(vec4_instruction *inst,
+ struct brw_reg dst,
+ struct brw_reg src)
+{
+ brw_math(p,
+ dst,
+ brw_math_function(inst->opcode),
+ BRW_MATH_SATURATE_NONE,
+ inst->base_mrf,
+ src,
+ BRW_MATH_DATA_SCALAR,
+ BRW_MATH_PRECISION_FULL);
+}
+
+void
+vec4_visitor::generate_urb_write(vec4_instruction *inst)
+{
+ brw_urb_WRITE(p,
+ brw_null_reg(), /* dest */
+ inst->base_mrf, /* starting mrf reg nr */
+ brw_vec8_grf(0, 0), /* src */
+ false, /* allocate */
+ true, /* used */
+ inst->mlen,
+ 0, /* response len */
+ inst->eot, /* eot */
+ inst->eot, /* writes complete */
+ inst->offset, /* urb destination offset */
+ BRW_URB_SWIZZLE_INTERLEAVE);
+}
+
+void
+vec4_visitor::generate_vs_instruction(vec4_instruction *instruction,
+ struct brw_reg dst,
+ struct brw_reg *src)
+{
+ vec4_instruction *inst = (vec4_instruction *)instruction;
+
+ switch (inst->opcode) {
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
+ if (intel->gen >= 6) {
+ generate_math1_gen6(inst, dst, src[0]);
+ } else {
+ generate_math1_gen4(inst, dst, src[0]);
+ }
+ break;
+
+ case SHADER_OPCODE_POW:
+ assert(!"finishme");
+ break;
+
+ case VS_OPCODE_URB_WRITE:
+ generate_urb_write(inst);
+ break;
+
+ default:
+ if (inst->opcode < (int)ARRAY_SIZE(brw_opcodes)) {
+ fail("unsupported opcode in `%s' in VS\n",
+ brw_opcodes[inst->opcode].name);
+ } else {
+ fail("Unsupported opcode %d in VS", inst->opcode);
+ }
+ }
+}
+
+bool
+vec4_visitor::run()
+{
+ /* Generate FS IR for main(). (the visitor only descends into
+ * functions called "main").
+ */
+ foreach_iter(exec_list_iterator, iter, *shader->ir) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+ base_ir = ir;
+ ir->accept(this);
+ }
+
+ emit_urb_writes();
+
+ if (failed)
+ return false;
+
+ setup_payload();
+ reg_allocate();
+
+ brw_set_access_mode(p, BRW_ALIGN_16);
+
+ generate_code();
+
+ return !failed;
+}
+
+void
+vec4_visitor::generate_code()
+{
+ int last_native_inst = p->nr_insn;
+ const char *last_annotation_string = NULL;
+ ir_instruction *last_annotation_ir = NULL;
+
+ int loop_stack_array_size = 16;
+ int loop_stack_depth = 0;
+ brw_instruction **loop_stack =
+ rzalloc_array(this->mem_ctx, brw_instruction *, loop_stack_array_size);
+ int *if_depth_in_loop =
+ rzalloc_array(this->mem_ctx, int, loop_stack_array_size);
+
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ printf("Native code for vertex shader %d:\n", prog->Name);
+ }
+
+ foreach_list(node, &this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)node;
+ struct brw_reg src[3], dst;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ if (last_annotation_ir != inst->ir) {
+ last_annotation_ir = inst->ir;
+ if (last_annotation_ir) {
+ printf(" ");
+ last_annotation_ir->print();
+ printf("\n");
+ }
+ }
+ if (last_annotation_string != inst->annotation) {
+ last_annotation_string = inst->annotation;
+ if (last_annotation_string)
+ printf(" %s\n", last_annotation_string);
+ }
+ }
+
+ for (unsigned int i = 0; i < 3; i++) {
+ src[i] = inst->get_src(i);
+ }
+ dst = inst->get_dst();
+
+ brw_set_conditionalmod(p, inst->conditional_mod);
+ brw_set_predicate_control(p, inst->predicate);
+ brw_set_predicate_inverse(p, inst->predicate_inverse);
+ brw_set_saturate(p, inst->saturate);
+
+ switch (inst->opcode) {
+ case BRW_OPCODE_MOV:
+ brw_MOV(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_ADD:
+ brw_ADD(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_MUL:
+ brw_MUL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_FRC:
+ brw_FRC(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDD:
+ brw_RNDD(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDE:
+ brw_RNDE(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_RNDZ:
+ brw_RNDZ(p, dst, src[0]);
+ break;
+
+ case BRW_OPCODE_AND:
+ brw_AND(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_OR:
+ brw_OR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_XOR:
+ brw_XOR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_NOT:
+ brw_NOT(p, dst, src[0]);
+ break;
+ case BRW_OPCODE_ASR:
+ brw_ASR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SHR:
+ brw_SHR(p, dst, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SHL:
+ brw_SHL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_CMP:
+ brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]);
+ break;
+ case BRW_OPCODE_SEL:
+ brw_SEL(p, dst, src[0], src[1]);
+ break;
+
+ case BRW_OPCODE_IF:
+ if (inst->src[0].file != BAD_FILE) {
+ /* The instruction has an embedded compare (only allowed on gen6) */
+ assert(intel->gen == 6);
+ gen6_IF(p, inst->conditional_mod, src[0], src[1]);
+ } else {
+ brw_IF(p, BRW_EXECUTE_8);
+ }
+ if_depth_in_loop[loop_stack_depth]++;
+ break;
+
+ case BRW_OPCODE_ELSE:
+ brw_ELSE(p);
+ break;
+ case BRW_OPCODE_ENDIF:
+ brw_ENDIF(p);
+ if_depth_in_loop[loop_stack_depth]--;
+ break;
+
+ case BRW_OPCODE_DO:
+ loop_stack[loop_stack_depth++] = brw_DO(p, BRW_EXECUTE_8);
+ if (loop_stack_array_size <= loop_stack_depth) {
+ loop_stack_array_size *= 2;
+ loop_stack = reralloc(this->mem_ctx, loop_stack, brw_instruction *,
+ loop_stack_array_size);
+ if_depth_in_loop = reralloc(this->mem_ctx, if_depth_in_loop, int,
+ loop_stack_array_size);
+ }
+ if_depth_in_loop[loop_stack_depth] = 0;
+ break;
+
+ case BRW_OPCODE_BREAK:
+ brw_BREAK(p, if_depth_in_loop[loop_stack_depth]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+ case BRW_OPCODE_CONTINUE:
+ /* FINISHME: We need to write the loop instruction support still. */
+ if (intel->gen >= 6)
+ gen6_CONT(p, loop_stack[loop_stack_depth - 1]);
+ else
+ brw_CONT(p, if_depth_in_loop[loop_stack_depth]);
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+ break;
+
+ case BRW_OPCODE_WHILE: {
+ struct brw_instruction *inst0, *inst1;
+ GLuint br = 1;
+
+ if (intel->gen >= 5)
+ br = 2;
+
+ assert(loop_stack_depth > 0);
+ loop_stack_depth--;
+ inst0 = inst1 = brw_WHILE(p, loop_stack[loop_stack_depth]);
+ if (intel->gen < 6) {
+ /* patch all the BREAK/CONT instructions from last BGNLOOP */
+ while (inst0 > loop_stack[loop_stack_depth]) {
+ inst0--;
+ if (inst0->header.opcode == BRW_OPCODE_BREAK &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0 + 1);
+ }
+ else if (inst0->header.opcode == BRW_OPCODE_CONTINUE &&
+ inst0->bits3.if_else.jump_count == 0) {
+ inst0->bits3.if_else.jump_count = br * (inst1 - inst0);
+ }
+ }
+ }
+ }
+ break;
+
+ default:
+ generate_vs_instruction(inst, dst, src);
+ break;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ for (unsigned int i = last_native_inst; i < p->nr_insn; i++) {
+ if (0) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ }
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+
+ last_native_inst = p->nr_insn;
+ }
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ printf("\n");
+ }
+
+ ralloc_free(loop_stack);
+ ralloc_free(if_depth_in_loop);
+
+ brw_set_uip_jip(p);
+
+ /* OK, while the INTEL_DEBUG=vs above is very nice for debugging VS
+ * emit issues, it doesn't get the jump distances into the output,
+ * which is often something we want to debug. So this is here in
+ * case you're doing that.
+ */
+ if (0) {
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ for (unsigned int i = 0; i < p->nr_insn; i++) {
+ printf("0x%08x 0x%08x 0x%08x 0x%08x ",
+ ((uint32_t *)&p->store[i])[3],
+ ((uint32_t *)&p->store[i])[2],
+ ((uint32_t *)&p->store[i])[1],
+ ((uint32_t *)&p->store[i])[0]);
+ brw_disasm(stdout, &p->store[i], intel->gen);
+ }
+ }
+ }
+}
+
+extern "C" {
+
+bool
+brw_vs_emit(struct brw_vs_compile *c)
+{
+ struct brw_compile *p = &c->func;
+ struct brw_context *brw = p->brw;
+ struct intel_context *intel = &brw->intel;
+ struct gl_context *ctx = &intel->ctx;
+ struct gl_shader_program *prog = ctx->Shader.CurrentVertexProgram;
+
+ if (!prog)
+ return false;
+
+ struct brw_shader *shader =
+ (brw_shader *) prog->_LinkedShaders[MESA_SHADER_VERTEX];
+ if (!shader)
+ return false;
+
+ if (unlikely(INTEL_DEBUG & DEBUG_VS)) {
+ printf("GLSL IR for native vertex shader %d:\n", prog->Name);
+ _mesa_print_ir(shader->ir, NULL);
+ printf("\n\n");
+ }
+
+ vec4_visitor v(c, prog, shader);
+ if (!v.run()) {
+ /* FINISHME: Cleanly fail, test at link time, etc. */
+ assert(!"not reached");
+ return false;
+ }
+
+ return true;
+}
+
+} /* extern "C" */
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
new file mode 100644
index 0000000..e7f6b28
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_reg_allocate.cpp
@@ -0,0 +1,77 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "../glsl/ir_print_visitor.h"
+
+using namespace brw;
+
+namespace brw {
+
+static void
+assign(int *reg_hw_locations, reg *reg)
+{
+ if (reg->file == GRF) {
+ reg->reg = reg_hw_locations[reg->reg];
+ }
+}
+
+void
+vec4_visitor::reg_allocate_trivial()
+{
+ int last_grf = 0;
+ int hw_reg_mapping[this->virtual_grf_count];
+ int i;
+ int next;
+
+ /* Note that compressed instructions require alignment to 2 registers. */
+ hw_reg_mapping[0] = this->first_non_payload_grf;
+ next = hw_reg_mapping[0] + this->virtual_grf_sizes[0];
+ for (i = 1; i < this->virtual_grf_count; i++) {
+ hw_reg_mapping[i] = next;
+ next += this->virtual_grf_sizes[i];
+ }
+ prog_data->total_grf = next;
+
+ foreach_iter(exec_list_iterator, iter, this->instructions) {
+ vec4_instruction *inst = (vec4_instruction *)iter.get();
+
+ assign(hw_reg_mapping, &inst->dst);
+ assign(hw_reg_mapping, &inst->src[0]);
+ assign(hw_reg_mapping, &inst->src[1]);
+ assign(hw_reg_mapping, &inst->src[2]);
+ }
+
+ if (last_grf >= BRW_MAX_GRF) {
+ fail("Ran out of regs on trivial allocator (%d/%d)\n",
+ last_grf, BRW_MAX_GRF);
+ }
+}
+
+void
+vec4_visitor::reg_allocate()
+{
+ reg_allocate_trivial();
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
new file mode 100644
index 0000000..bba1d81
--- /dev/null
+++ b/src/mesa/drivers/dri/i965/brw_vec4_visitor.cpp
@@ -0,0 +1,1649 @@
+/*
+ * Copyright © 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+#include "brw_vec4.h"
+#include "main/macros.h"
+
+namespace brw {
+
+src_reg::src_reg(dst_reg reg)
+{
+ init();
+
+ this->file = reg.file;
+ this->reg = reg.reg;
+ this->reg_offset = reg.reg_offset;
+ this->type = reg.type;
+
+ int swizzles[4];
+ int next_chan = 0;
+ int last = 0;
+
+ for (int i = 0; i < 4; i++) {
+ if (!(reg.writemask & (1 << i)))
+ continue;
+
+ swizzles[next_chan++] = last = i;
+ }
+
+ for (; next_chan < 4; next_chan++) {
+ swizzles[next_chan] = last;
+ }
+
+ this->swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+ swizzles[2], swizzles[3]);
+}
+
+dst_reg::dst_reg(src_reg reg)
+{
+ init();
+
+ this->file = reg.file;
+ this->reg = reg.reg;
+ this->reg_offset = reg.reg_offset;
+ this->type = reg.type;
+ this->writemask = WRITEMASK_XYZW;
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst,
+ src_reg src0, src_reg src1, src_reg src2)
+{
+ vec4_instruction *inst = new(mem_ctx) vec4_instruction();
+
+ inst->opcode = opcode;
+ inst->dst = dst;
+ inst->src[0] = src0;
+ inst->src[1] = src1;
+ inst->src[2] = src2;
+ inst->ir = this->base_ir;
+ inst->annotation = this->current_annotation;
+
+ this->instructions.push_tail(inst);
+
+ return inst;
+}
+
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0, src_reg src1)
+{
+ return emit(opcode, dst, src0, src1, src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode, dst_reg dst, src_reg src0)
+{
+ assert(dst.writemask != 0);
+ return emit(opcode, dst, src0, src_reg(), src_reg());
+}
+
+vec4_instruction *
+vec4_visitor::emit(enum opcode opcode)
+{
+ return emit(opcode, dst_reg(), src_reg(), src_reg(), src_reg());
+}
+
+void
+vec4_visitor::emit_dp(dst_reg dst, src_reg src0, src_reg src1, unsigned elements)
+{
+ static enum opcode dot_opcodes[] = {
+ BRW_OPCODE_DP2, BRW_OPCODE_DP3, BRW_OPCODE_DP4
+ };
+
+ emit(dot_opcodes[elements - 2], dst, src0, src1);
+}
+
+void
+vec4_visitor::emit_math1_gen6(enum opcode opcode, dst_reg dst, src_reg src)
+{
+ /* The gen6 math instruction ignores the source modifiers --
+ * swizzle, abs, negate, and at least some parts of the register
+ * region description. Move the source to the corresponding slots
+ * of the destination generally work.
+ */
+ src_reg expanded = src_reg(this, glsl_type::float_type);
+ emit(BRW_OPCODE_MOV, dst, src);
+ src = expanded;
+
+ emit(opcode, dst, src);
+}
+
+void
+vec4_visitor::emit_math1_gen4(enum opcode opcode, dst_reg dst, src_reg src)
+{
+ vec4_instruction *inst = emit(opcode, dst, src);
+ inst->base_mrf = 1;
+ inst->mlen = 1;
+}
+
+void
+vec4_visitor::emit_math(opcode opcode, dst_reg dst, src_reg src)
+{
+ switch (opcode) {
+ case SHADER_OPCODE_RCP:
+ case SHADER_OPCODE_RSQ:
+ case SHADER_OPCODE_SQRT:
+ case SHADER_OPCODE_EXP2:
+ case SHADER_OPCODE_LOG2:
+ case SHADER_OPCODE_SIN:
+ case SHADER_OPCODE_COS:
+ break;
+ default:
+ assert(!"not reached: bad math opcode");
+ return;
+ }
+
+ if (intel->gen >= 6) {
+ return emit_math1_gen6(opcode, dst, src);
+ } else {
+ return emit_math1_gen4(opcode, dst, src);
+ }
+}
+
+void
+vec4_visitor::emit_math2_gen6(enum opcode opcode,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ src_reg expanded;
+
+ /* The gen6 math instruction ignores the source modifiers --
+ * swizzle, abs, negate, and at least some parts of the register
+ * region description. Move the sources to temporaries to make it
+ * generally work.
+ */
+
+ expanded = src_reg(this, glsl_type::vec4_type);
+ emit(BRW_OPCODE_MOV, dst, src0);
+ src0 = expanded;
+
+ expanded = src_reg(this, glsl_type::vec4_type);
+ emit(BRW_OPCODE_MOV, dst, src1);
+ src1 = expanded;
+
+ emit(opcode, dst, src0, src1);
+}
+
+void
+vec4_visitor::emit_math2_gen4(enum opcode opcode,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ vec4_instruction *inst = emit(opcode, dst, src0, src1);
+ inst->base_mrf = 1;
+ inst->mlen = 2;
+}
+
+void
+vec4_visitor::emit_math(enum opcode opcode,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ assert(opcode == SHADER_OPCODE_POW);
+
+ if (intel->gen >= 6) {
+ return emit_math2_gen6(opcode, dst, src0, src1);
+ } else {
+ return emit_math2_gen4(opcode, dst, src0, src1);
+ }
+}
+
+void
+vec4_visitor::visit_instructions(const exec_list *list)
+{
+ foreach_iter(exec_list_iterator, iter, *list) {
+ ir_instruction *ir = (ir_instruction *)iter.get();
+
+ base_ir = ir;
+ ir->accept(this);
+ }
+}
+
+
+static int
+type_size(const struct glsl_type *type)
+{
+ unsigned int i;
+ int size;
+
+ switch (type->base_type) {
+ case GLSL_TYPE_UINT:
+ case GLSL_TYPE_INT:
+ case GLSL_TYPE_FLOAT:
+ case GLSL_TYPE_BOOL:
+ if (type->is_matrix()) {
+ return type->matrix_columns;
+ } else {
+ /* Regardless of size of vector, it gets a vec4. This is bad
+ * packing for things like floats, but otherwise arrays become a
+ * mess. Hopefully a later pass over the code can pack scalars
+ * down if appropriate.
+ */
+ return 1;
+ }
+ case GLSL_TYPE_ARRAY:
+ assert(type->length > 0);
+ return type_size(type->fields.array) * type->length;
+ case GLSL_TYPE_STRUCT:
+ size = 0;
+ for (i = 0; i < type->length; i++) {
+ size += type_size(type->fields.structure[i].type);
+ }
+ return size;
+ case GLSL_TYPE_SAMPLER:
+ /* Samplers take up one slot in UNIFORMS[], but they're baked in
+ * at link time.
+ */
+ return 1;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+int
+vec4_visitor::virtual_grf_alloc(int size)
+{
+ if (virtual_grf_array_size <= virtual_grf_count) {
+ if (virtual_grf_array_size == 0)
+ virtual_grf_array_size = 16;
+ else
+ virtual_grf_array_size *= 2;
+ virtual_grf_sizes = reralloc(mem_ctx, virtual_grf_sizes, int,
+ virtual_grf_array_size);
+ }
+ virtual_grf_sizes[virtual_grf_count] = size;
+ return virtual_grf_count++;
+}
+
+src_reg::src_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+ init();
+
+ this->file = GRF;
+ this->reg = v->virtual_grf_alloc(type_size(type));
+
+ if (type->is_array() || type->is_record()) {
+ this->swizzle = BRW_SWIZZLE_NOOP;
+ } else {
+ this->swizzle = swizzle_for_size(type->vector_elements);
+ }
+
+ this->type = brw_type_for_base_type(type);
+}
+
+dst_reg::dst_reg(class vec4_visitor *v, const struct glsl_type *type)
+{
+ init();
+
+ this->file = GRF;
+ this->reg = v->virtual_grf_alloc(type_size(type));
+
+ if (type->is_array() || type->is_record()) {
+ this->writemask = WRITEMASK_XYZW;
+ } else {
+ this->writemask = (1 << type->vector_elements) - 1;
+ }
+
+ this->type = brw_type_for_base_type(type);
+}
+
+dst_reg *
+vec4_visitor::variable_storage(ir_variable *var)
+{
+ return (dst_reg *)hash_table_find(this->variable_ht, var);
+}
+
+void
+vec4_visitor::emit_bool_to_cond_code(ir_rvalue *ir)
+{
+ ir_expression *expr = ir->as_expression();
+
+ if (expr) {
+ src_reg op[2];
+ vec4_instruction *inst;
+
+ assert(expr->get_num_operands() <= 2);
+ for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ assert(expr->operands[i]->type->is_scalar());
+
+ expr->operands[i]->accept(this);
+ op[i] = this->result;
+ }
+
+ switch (expr->operation) {
+ case ir_unop_logic_not:
+ inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], src_reg(1));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ break;
+
+ case ir_binop_logic_xor:
+ inst = emit(BRW_OPCODE_XOR, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_logic_or:
+ inst = emit(BRW_OPCODE_OR, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_logic_and:
+ inst = emit(BRW_OPCODE_AND, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_unop_f2b:
+ if (intel->gen >= 6) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0.0f));
+ } else {
+ inst = emit(BRW_OPCODE_MOV, dst_null_f(), op[0]);
+ }
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_unop_i2b:
+ if (intel->gen >= 6) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+ } else {
+ inst = emit(BRW_OPCODE_MOV, dst_null_d(), op[0]);
+ }
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ break;
+
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_equal:
+ case ir_binop_all_equal:
+ case ir_binop_nequal:
+ case ir_binop_any_nequal:
+ inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
+ break;
+
+ default:
+ assert(!"not reached");
+ break;
+ }
+ return;
+ }
+
+ ir->accept(this);
+
+ if (intel->gen >= 6) {
+ vec4_instruction *inst = emit(BRW_OPCODE_AND, dst_null_d(),
+ this->result, src_reg(1));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ } else {
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst_null_d(), this->result);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ }
+}
+
+/**
+ * Emit a gen6 IF statement with the comparison folded into the IF
+ * instruction.
+ */
+void
+vec4_visitor::emit_if_gen6(ir_if *ir)
+{
+ ir_expression *expr = ir->condition->as_expression();
+
+ if (expr) {
+ src_reg op[2];
+ vec4_instruction *inst;
+ dst_reg temp;
+
+ assert(expr->get_num_operands() <= 2);
+ for (unsigned int i = 0; i < expr->get_num_operands(); i++) {
+ assert(expr->operands[i]->type->is_scalar());
+
+ expr->operands[i]->accept(this);
+ op[i] = this->result;
+ }
+
+ switch (expr->operation) {
+ case ir_unop_logic_not:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+ return;
+
+ case ir_binop_logic_xor:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_logic_or:
+ temp = dst_reg(this, glsl_type::bool_type);
+ emit(BRW_OPCODE_OR, temp, op[0], op[1]);
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_logic_and:
+ temp = dst_reg(this, glsl_type::bool_type);
+ emit(BRW_OPCODE_AND, temp, op[0], op[1]);
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), src_reg(temp), src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_unop_f2b:
+ inst = emit(BRW_OPCODE_IF, dst_null_f(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_unop_i2b:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+
+ case ir_binop_greater:
+ case ir_binop_gequal:
+ case ir_binop_less:
+ case ir_binop_lequal:
+ case ir_binop_equal:
+ case ir_binop_all_equal:
+ case ir_binop_nequal:
+ case ir_binop_any_nequal:
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], op[1]);
+ inst->conditional_mod =
+ brw_conditional_for_comparison(expr->operation);
+ return;
+ default:
+ assert(!"not reached");
+ inst = emit(BRW_OPCODE_IF, dst_null_d(), op[0], src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ return;
+ }
+ return;
+ }
+
+ ir->condition->accept(this);
+
+ vec4_instruction *inst = emit(BRW_OPCODE_IF, dst_null_d(),
+ this->result, src_reg(0));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+}
+
+void
+vec4_visitor::visit(ir_variable *ir)
+{
+ dst_reg *reg = NULL;
+
+ if (variable_storage(ir))
+ return;
+
+ switch (ir->mode) {
+ case ir_var_in:
+ reg = new(mem_ctx) dst_reg(ATTR, ir->location);
+ reg->type = brw_type_for_base_type(ir->type);
+ hash_table_insert(this->variable_ht, reg, ir);
+ break;
+
+ case ir_var_out:
+ reg = new(mem_ctx) dst_reg(this, ir->type);
+ hash_table_insert(this->variable_ht, reg, ir);
+
+ for (int i = 0; i < type_size(ir->type); i++) {
+ output_reg[ir->location + i] = *reg;
+ output_reg[ir->location + i].reg_offset = i;
+ }
+ break;
+
+ case ir_var_temporary:
+ reg = new(mem_ctx) dst_reg(this, ir->type);
+ hash_table_insert(this->variable_ht, reg, ir);
+
+ break;
+
+ case ir_var_uniform:
+ /* FINISHME: uniforms */
+ break;
+ }
+}
+
+void
+vec4_visitor::visit(ir_loop *ir)
+{
+ ir_dereference_variable *counter = NULL;
+
+ /* We don't want debugging output to print the whole body of the
+ * loop as the annotation.
+ */
+ this->base_ir = NULL;
+
+ if (ir->counter != NULL)
+ counter = new(ir) ir_dereference_variable(ir->counter);
+
+ if (ir->from != NULL) {
+ assert(ir->counter != NULL);
+
+ ir_assignment *a = new(ir) ir_assignment(counter, ir->from, NULL);
+
+ a->accept(this);
+ delete a;
+ }
+
+ emit(BRW_OPCODE_DO);
+
+ if (ir->to) {
+ ir_expression *e =
+ new(ir) ir_expression(ir->cmp, glsl_type::bool_type,
+ counter, ir->to);
+ ir_if *if_stmt = new(ir) ir_if(e);
+
+ ir_loop_jump *brk = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
+
+ if_stmt->then_instructions.push_tail(brk);
+
+ if_stmt->accept(this);
+
+ delete if_stmt;
+ delete e;
+ delete brk;
+ }
+
+ visit_instructions(&ir->body_instructions);
+
+ if (ir->increment) {
+ ir_expression *e =
+ new(ir) ir_expression(ir_binop_add, counter->type,
+ counter, ir->increment);
+
+ ir_assignment *a = new(ir) ir_assignment(counter, e, NULL);
+
+ a->accept(this);
+ delete a;
+ delete e;
+ }
+
+ emit(BRW_OPCODE_WHILE);
+}
+
+void
+vec4_visitor::visit(ir_loop_jump *ir)
+{
+ switch (ir->mode) {
+ case ir_loop_jump::jump_break:
+ emit(BRW_OPCODE_BREAK);
+ break;
+ case ir_loop_jump::jump_continue:
+ emit(BRW_OPCODE_CONTINUE);
+ break;
+ }
+}
+
+
+void
+vec4_visitor::visit(ir_function_signature *ir)
+{
+ assert(0);
+ (void)ir;
+}
+
+void
+vec4_visitor::visit(ir_function *ir)
+{
+ /* Ignore function bodies other than main() -- we shouldn't see calls to
+ * them since they should all be inlined.
+ */
+ if (strcmp(ir->name, "main") == 0) {
+ const ir_function_signature *sig;
+ exec_list empty;
+
+ sig = ir->matching_signature(&empty);
+
+ assert(sig);
+
+ visit_instructions(&sig->body);
+ }
+}
+
+GLboolean
+vec4_visitor::try_emit_sat(ir_expression *ir)
+{
+ ir_rvalue *sat_src = ir->as_rvalue_to_saturate();
+ if (!sat_src)
+ return false;
+
+ sat_src->accept(this);
+ src_reg src = this->result;
+
+ this->result = src_reg(this, ir->type);
+ vec4_instruction *inst;
+ inst = emit(BRW_OPCODE_MOV, dst_reg(this->result), src);
+ inst->saturate = true;
+
+ return true;
+}
+
+void
+vec4_visitor::emit_bool_comparison(unsigned int op,
+ dst_reg dst, src_reg src0, src_reg src1)
+{
+ /* original gen4 does destination conversion before comparison. */
+ if (intel->gen < 5)
+ dst.type = src0.type;
+
+ vec4_instruction *inst = emit(BRW_OPCODE_CMP, dst, src0, src1);
+ inst->conditional_mod = brw_conditional_for_comparison(op);
+
+ dst.type = BRW_REGISTER_TYPE_D;
+ emit(BRW_OPCODE_AND, dst, src_reg(dst), src_reg(0x1));
+}
+
+void
+vec4_visitor::visit(ir_expression *ir)
+{
+ unsigned int operand;
+ src_reg op[Elements(ir->operands)];
+ src_reg result_src;
+ dst_reg result_dst;
+ vec4_instruction *inst;
+
+ if (try_emit_sat(ir))
+ return;
+
+ for (operand = 0; operand < ir->get_num_operands(); operand++) {
+ this->result.file = BAD_FILE;
+ ir->operands[operand]->accept(this);
+ if (this->result.file == BAD_FILE) {
+ printf("Failed to get tree for expression operand:\n");
+ ir->operands[operand]->print();
+ exit(1);
+ }
+ op[operand] = this->result;
+
+ /* Matrix expression operands should have been broken down to vector
+ * operations already.
+ */
+ assert(!ir->operands[operand]->type->is_matrix());
+ }
+
+ int vector_elements = ir->operands[0]->type->vector_elements;
+ if (ir->operands[1]) {
+ vector_elements = MAX2(vector_elements,
+ ir->operands[1]->type->vector_elements);
+ }
+
+ this->result.file = BAD_FILE;
+
+ /* Storage for our result. Ideally for an assignment we'd be using
+ * the actual storage for the result here, instead.
+ */
+ result_src = src_reg(this, ir->type);
+ /* convenience for the emit functions below. */
+ result_dst = dst_reg(result_src);
+ /* If nothing special happens, this is the result. */
+ this->result = result_src;
+ /* Limit writes to the channels that will be used by result_src later.
+ * This does limit this temp's use as a temporary for multi-instruction
+ * sequences.
+ */
+ result_dst.writemask = (1 << ir->type->vector_elements) - 1;
+
+ switch (ir->operation) {
+ case ir_unop_logic_not:
+ /* Note that BRW_OPCODE_NOT is not appropriate here, since it is
+ * ones complement of the whole register, not just bit 0.
+ */
+ emit(BRW_OPCODE_XOR, result_dst, op[0], src_reg(1));
+ break;
+ case ir_unop_neg:
+ op[0].negate = !op[0].negate;
+ this->result = op[0];
+ break;
+ case ir_unop_abs:
+ op[0].abs = true;
+ op[0].negate = false;
+ this->result = op[0];
+ break;
+
+ case ir_unop_sign:
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0.0f));
+
+ inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1.0f));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ inst = emit(BRW_OPCODE_CMP, dst_null_f(), op[0], src_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(-1.0f));
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ break;
+
+ case ir_unop_rcp:
+ emit_math(SHADER_OPCODE_RCP, result_dst, op[0]);
+ break;
+
+ case ir_unop_exp2:
+ emit_math(SHADER_OPCODE_EXP2, result_dst, op[0]);
+ break;
+ case ir_unop_log2:
+ emit_math(SHADER_OPCODE_LOG2, result_dst, op[0]);
+ break;
+ case ir_unop_exp:
+ case ir_unop_log:
+ assert(!"not reached: should be handled by ir_explog_to_explog2");
+ break;
+ case ir_unop_sin:
+ case ir_unop_sin_reduced:
+ emit_math(SHADER_OPCODE_SIN, result_dst, op[0]);
+ break;
+ case ir_unop_cos:
+ case ir_unop_cos_reduced:
+ emit_math(SHADER_OPCODE_COS, result_dst, op[0]);
+ break;
+
+ case ir_unop_dFdx:
+ case ir_unop_dFdy:
+ assert(!"derivatives not valid in vertex shader");
+ break;
+
+ case ir_unop_noise:
+ assert(!"not reached: should be handled by lower_noise");
+ break;
+
+ case ir_binop_add:
+ emit(BRW_OPCODE_ADD, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_sub:
+ assert(!"not reached: should be handled by ir_sub_to_add_neg");
+ break;
+
+ case ir_binop_mul:
+ emit(BRW_OPCODE_MUL, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_div:
+ assert(!"not reached: should be handled by ir_div_to_mul_rcp");
+ case ir_binop_mod:
+ assert(!"ir_binop_mod should have been converted to b * fract(a/b)");
+ break;
+
+ case ir_binop_less:
+ case ir_binop_greater:
+ case ir_binop_lequal:
+ case ir_binop_gequal:
+ case ir_binop_equal:
+ case ir_binop_nequal: {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+ inst->conditional_mod = brw_conditional_for_comparison(ir->operation);
+ emit(BRW_OPCODE_AND, result_dst, this->result, src_reg(0x1));
+ break;
+ }
+
+ case ir_binop_all_equal:
+ /* "==" operator producing a scalar boolean. */
+ if (ir->operands[0]->type->is_vector() ||
+ ir->operands[1]->type->is_vector()) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_Z;
+
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ALL4H;
+ } else {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+ }
+ break;
+ case ir_binop_any_nequal:
+ /* "!=" operator producing a scalar boolean. */
+ if (ir->operands[0]->type->is_vector() ||
+ ir->operands[1]->type->is_vector()) {
+ inst = emit(BRW_OPCODE_CMP, dst_null_cmp(), op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ } else {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(0x1));
+ }
+ break;
+
+ case ir_unop_any:
+ emit(BRW_OPCODE_CMP, dst_null_d(), op[0], src_reg(0));
+ emit(BRW_OPCODE_MOV, result_dst, src_reg(0));
+
+ inst = emit(BRW_OPCODE_MOV, result_dst, src_reg(1));
+ inst->predicate = BRW_PREDICATE_ALIGN16_ANY4H;
+ break;
+
+ case ir_binop_logic_xor:
+ emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_logic_or:
+ emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_logic_and:
+ emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_dot:
+ assert(ir->operands[0]->type->is_vector());
+ assert(ir->operands[0]->type == ir->operands[1]->type);
+ emit_dp(result_dst, op[0], op[1], ir->operands[0]->type->vector_elements);
+ break;
+
+ case ir_unop_sqrt:
+ emit_math(SHADER_OPCODE_SQRT, result_dst, op[0]);
+ break;
+ case ir_unop_rsq:
+ emit_math(SHADER_OPCODE_RSQ, result_dst, op[0]);
+ break;
+ case ir_unop_i2f:
+ case ir_unop_i2u:
+ case ir_unop_u2i:
+ case ir_unop_u2f:
+ case ir_unop_b2f:
+ case ir_unop_b2i:
+ case ir_unop_f2i:
+ emit(BRW_OPCODE_MOV, result_dst, op[0]);
+ break;
+ case ir_unop_f2b:
+ case ir_unop_i2b: {
+ dst_reg temp = result_dst;
+ /* original gen4 does implicit conversion before comparison. */
+ if (intel->gen < 5)
+ temp.type = op[0].type;
+
+ inst = emit(BRW_OPCODE_CMP, temp, op[0], src_reg(0.0f));
+ inst->conditional_mod = BRW_CONDITIONAL_NZ;
+ inst = emit(BRW_OPCODE_AND, result_dst, result_src, src_reg(1));
+ break;
+ }
+
+ case ir_unop_trunc:
+ emit(BRW_OPCODE_RNDZ, result_dst, op[0]);
+ break;
+ case ir_unop_ceil:
+ op[0].negate = !op[0].negate;
+ inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+ this->result.negate = true;
+ break;
+ case ir_unop_floor:
+ inst = emit(BRW_OPCODE_RNDD, result_dst, op[0]);
+ break;
+ case ir_unop_fract:
+ inst = emit(BRW_OPCODE_FRC, result_dst, op[0]);
+ break;
+ case ir_unop_round_even:
+ emit(BRW_OPCODE_RNDE, result_dst, op[0]);
+ break;
+
+ case ir_binop_min:
+ inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+ case ir_binop_max:
+ inst = emit(BRW_OPCODE_CMP, result_dst, op[0], op[1]);
+ inst->conditional_mod = BRW_CONDITIONAL_G;
+
+ inst = emit(BRW_OPCODE_SEL, result_dst, op[0], op[1]);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ break;
+
+ case ir_binop_pow:
+ emit_math(SHADER_OPCODE_POW, result_dst, op[0], op[1]);
+ break;
+
+ case ir_unop_bit_not:
+ inst = emit(BRW_OPCODE_NOT, result_dst, op[0]);
+ break;
+ case ir_binop_bit_and:
+ inst = emit(BRW_OPCODE_AND, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_bit_xor:
+ inst = emit(BRW_OPCODE_XOR, result_dst, op[0], op[1]);
+ break;
+ case ir_binop_bit_or:
+ inst = emit(BRW_OPCODE_OR, result_dst, op[0], op[1]);
+ break;
+
+ case ir_binop_lshift:
+ case ir_binop_rshift:
+ assert(!"GLSL 1.30 features unsupported");
+ break;
+
+ case ir_quadop_vector:
+ assert(!"not reached: should be handled by lower_quadop_vector");
+ break;
+ }
+}
+
+
+void
+vec4_visitor::visit(ir_swizzle *ir)
+{
+ src_reg src;
+ int i = 0;
+ int swizzle[4];
+
+ /* Note that this is only swizzles in expressions, not those on the left
+ * hand side of an assignment, which do write masking. See ir_assignment
+ * for that.
+ */
+
+ ir->val->accept(this);
+ src = this->result;
+ assert(src.file != BAD_FILE);
+
+ if (i < ir->type->vector_elements) {
+ switch (i) {
+ case 0:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.x);
+ break;
+ case 1:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.y);
+ break;
+ case 2:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.z);
+ break;
+ case 3:
+ swizzle[i] = BRW_GET_SWZ(src.swizzle, ir->mask.w);
+ break;
+ }
+ }
+ for (; i < 4; i++) {
+ /* Replicate the last channel out. */
+ swizzle[i] = swizzle[ir->type->vector_elements - 1];
+ }
+
+ src.swizzle = BRW_SWIZZLE4(swizzle[0], swizzle[1], swizzle[2], swizzle[3]);
+
+ this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_variable *ir)
+{
+ dst_reg *reg = variable_storage(ir->var);
+
+ if (!reg) {
+ fail("Failed to find variable storage for %s\n", ir->var->name);
+ this->result = src_reg(brw_null_reg());
+ return;
+ }
+
+ this->result = src_reg(*reg);
+}
+
+void
+vec4_visitor::visit(ir_dereference_array *ir)
+{
+ ir_constant *constant_index;
+ src_reg src;
+ int element_size = type_size(ir->type);
+
+ constant_index = ir->array_index->constant_expression_value();
+
+ ir->array->accept(this);
+ src = this->result;
+
+ if (constant_index) {
+ src.reg_offset += constant_index->value.i[0] * element_size;
+ } else {
+#if 0 /* Variable array index */
+ /* Variable index array dereference. It eats the "vec4" of the
+ * base of the array and an index that offsets the Mesa register
+ * index.
+ */
+ ir->array_index->accept(this);
+
+ src_reg index_reg;
+
+ if (element_size == 1) {
+ index_reg = this->result;
+ } else {
+ index_reg = src_reg(this, glsl_type::float_type);
+
+ emit(BRW_OPCODE_MUL, dst_reg(index_reg),
+ this->result, src_reg_for_float(element_size));
+ }
+
+ src.reladdr = ralloc(mem_ctx, src_reg);
+ memcpy(src.reladdr, &index_reg, sizeof(index_reg));
+#endif
+ }
+
+ /* If the type is smaller than a vec4, replicate the last channel out. */
+ if (ir->type->is_scalar() || ir->type->is_vector())
+ src.swizzle = swizzle_for_size(ir->type->vector_elements);
+ else
+ src.swizzle = BRW_SWIZZLE_NOOP;
+
+ this->result = src;
+}
+
+void
+vec4_visitor::visit(ir_dereference_record *ir)
+{
+ unsigned int i;
+ const glsl_type *struct_type = ir->record->type;
+ int offset = 0;
+
+ ir->record->accept(this);
+
+ for (i = 0; i < struct_type->length; i++) {
+ if (strcmp(struct_type->fields.structure[i].name, ir->field) == 0)
+ break;
+ offset += type_size(struct_type->fields.structure[i].type);
+ }
+
+ /* If the type is smaller than a vec4, replicate the last channel out. */
+ if (ir->type->is_scalar() || ir->type->is_vector())
+ this->result.swizzle = swizzle_for_size(ir->type->vector_elements);
+ else
+ this->result.swizzle = BRW_SWIZZLE_NOOP;
+
+ this->result.reg_offset += offset;
+}
+
+/**
+ * We want to be careful in assignment setup to hit the actual storage
+ * instead of potentially using a temporary like we might with the
+ * ir_dereference handler.
+ */
+static dst_reg
+get_assignment_lhs(ir_dereference *ir, vec4_visitor *v)
+{
+ /* The LHS must be a dereference. If the LHS is a variable indexed array
+ * access of a vector, it must be separated into a series conditional moves
+ * before reaching this point (see ir_vec_index_to_cond_assign).
+ */
+ assert(ir->as_dereference());
+ ir_dereference_array *deref_array = ir->as_dereference_array();
+ if (deref_array) {
+ assert(!deref_array->array->type->is_vector());
+ }
+
+ /* Use the rvalue deref handler for the most part. We'll ignore
+ * swizzles in it and write swizzles using writemask, though.
+ */
+ ir->accept(v);
+ return dst_reg(v->result);
+}
+
+void
+vec4_visitor::emit_block_move(ir_assignment *ir)
+{
+ ir->rhs->accept(this);
+ src_reg src = this->result;
+
+ dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
+ /* FINISHME: This should really set to the correct maximal writemask for each
+ * FINISHME: component written (in the loops below).
+ */
+ dst.writemask = WRITEMASK_XYZW;
+
+ for (int i = 0; i < type_size(ir->lhs->type); i++) {
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
+ if (ir->condition)
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ dst.reg_offset++;
+ src.reg_offset++;
+ }
+}
+
+void
+vec4_visitor::visit(ir_assignment *ir)
+{
+ if (!ir->lhs->type->is_scalar() &&
+ !ir->lhs->type->is_vector()) {
+ emit_block_move(ir);
+ return;
+ }
+
+ /* Now we're down to just a scalar/vector with writemasks. */
+ int i;
+
+ ir->rhs->accept(this);
+ src_reg src = this->result;
+
+ dst_reg dst = get_assignment_lhs(ir->lhs, this);
+
+ int swizzles[4];
+ int first_enabled_chan = 0;
+ int src_chan = 0;
+
+ assert(ir->lhs->type->is_vector());
+ dst.writemask = ir->write_mask;
+
+ for (int i = 0; i < 4; i++) {
+ if (dst.writemask & (1 << i)) {
+ first_enabled_chan = BRW_GET_SWZ(src.swizzle, i);
+ break;
+ }
+ }
+
+ /* Swizzle a small RHS vector into the channels being written.
+ *
+ * glsl ir treats write_mask as dictating how many channels are
+ * present on the RHS while in our instructions we need to make
+ * those channels appear in the slots of the vec4 they're written to.
+ */
+ for (int i = 0; i < 4; i++) {
+ if (dst.writemask & (1 << i))
+ swizzles[i] = BRW_GET_SWZ(src.swizzle, src_chan++);
+ else
+ swizzles[i] = first_enabled_chan;
+ }
+ src.swizzle = BRW_SWIZZLE4(swizzles[0], swizzles[1],
+ swizzles[2], swizzles[3]);
+
+ if (ir->condition) {
+ emit_bool_to_cond_code(ir->condition);
+ }
+
+ for (i = 0; i < type_size(ir->lhs->type); i++) {
+ vec4_instruction *inst = emit(BRW_OPCODE_MOV, dst, src);
+
+ if (ir->condition)
+ inst->predicate = BRW_PREDICATE_NORMAL;
+
+ dst.reg_offset++;
+ src.reg_offset++;
+ }
+}
+
+
+void
+vec4_visitor::visit(ir_constant *ir)
+{
+ if (ir->type->base_type == GLSL_TYPE_STRUCT) {
+ src_reg temp_base = src_reg(this, ir->type);
+ dst_reg temp = dst_reg(temp_base);
+
+ foreach_iter(exec_list_iterator, iter, ir->components) {
+ ir_constant *field_value = (ir_constant *)iter.get();
+ int size = type_size(field_value->type);
+
+ assert(size > 0);
+
+ field_value->accept(this);
+ src_reg src = this->result;
+
+ for (int i = 0; i < (unsigned int)size; i++) {
+ emit(BRW_OPCODE_MOV, temp, src);
+
+ src.reg_offset++;
+ temp.reg_offset++;
+ }
+ }
+ this->result = temp_base;
+ return;
+ }
+
+ if (ir->type->is_array()) {
+ src_reg temp_base = src_reg(this, ir->type);
+ dst_reg temp = dst_reg(temp_base);
+ int size = type_size(ir->type->fields.array);
+
+ assert(size > 0);
+
+ for (unsigned int i = 0; i < ir->type->length; i++) {
+ ir->array_elements[i]->accept(this);
+ src_reg src = this->result;
+ for (int j = 0; j < size; j++) {
+ emit(BRW_OPCODE_MOV, temp, src);
+
+ src.reg_offset++;
+ temp.reg_offset++;
+ }
+ }
+ this->result = temp_base;
+ return;
+ }
+
+ if (ir->type->is_matrix()) {
+ this->result = src_reg(this, ir->type);
+ dst_reg dst = dst_reg(this->result);
+
+ assert(ir->type->base_type == GLSL_TYPE_FLOAT);
+
+ for (int i = 0; i < ir->type->matrix_columns; i++) {
+ for (int j = 0; j < ir->type->vector_elements; j++) {
+ dst.writemask = 1 << j;
+ emit(BRW_OPCODE_MOV, dst,
+ src_reg(ir->value.f[i * ir->type->vector_elements + j]));
+ }
+ dst.reg_offset++;
+ }
+ return;
+ }
+
+ for (int i = 0; i < ir->type->vector_elements; i++) {
+ this->result = src_reg(this, ir->type);
+ dst_reg dst = dst_reg(this->result);
+
+ dst.writemask = 1 << i;
+
+ switch (ir->type->base_type) {
+ case GLSL_TYPE_FLOAT:
+ emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.f[i]));
+ break;
+ case GLSL_TYPE_INT:
+ emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.i[i]));
+ break;
+ case GLSL_TYPE_UINT:
+ emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.u[i]));
+ break;
+ case GLSL_TYPE_BOOL:
+ emit(BRW_OPCODE_MOV, dst, src_reg(ir->value.b[i]));
+ break;
+ default:
+ assert(!"Non-float/uint/int/bool constant");
+ break;
+ }
+ }
+}
+
+void
+vec4_visitor::visit(ir_call *ir)
+{
+ assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_texture *ir)
+{
+ assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_return *ir)
+{
+ assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_discard *ir)
+{
+ assert(!"not reached");
+}
+
+void
+vec4_visitor::visit(ir_if *ir)
+{
+ this->base_ir = ir->condition;
+ ir->condition->accept(this);
+ assert(this->result.file != BAD_FILE);
+
+ /* FINISHME: condcode */
+ emit(BRW_OPCODE_IF);
+
+ visit_instructions(&ir->then_instructions);
+
+ if (!ir->else_instructions.is_empty()) {
+ this->base_ir = ir->condition;
+ emit(BRW_OPCODE_ELSE);
+
+ visit_instructions(&ir->else_instructions);
+ }
+
+ this->base_ir = ir->condition;
+ emit(BRW_OPCODE_ENDIF);
+}
+
+int
+vec4_visitor::emit_vue_header_gen4(int header_mrf)
+{
+ /* Get the position */
+ src_reg pos = src_reg(output_reg[VERT_RESULT_HPOS]);
+
+ /* Build ndc coords, which are (x/w, y/w, z/w, 1/w) */
+ dst_reg ndc = dst_reg(this, glsl_type::vec4_type);
+
+ current_annotation = "NDC";
+ dst_reg ndc_w = ndc;
+ ndc_w.writemask = WRITEMASK_W;
+ src_reg pos_w = pos;
+ pos_w.swizzle = BRW_SWIZZLE4(SWIZZLE_W, SWIZZLE_W, SWIZZLE_W, SWIZZLE_W);
+ emit_math(SHADER_OPCODE_RCP, ndc_w, pos_w);
+
+ dst_reg ndc_xyz = ndc;
+ ndc_xyz.writemask = WRITEMASK_XYZ;
+
+ emit(BRW_OPCODE_MUL, ndc_xyz, pos, src_reg(ndc_w));
+
+ if ((c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) ||
+ c->key.nr_userclip || brw->has_negative_rhw_bug) {
+ dst_reg header1 = dst_reg(this, glsl_type::uvec4_type);
+ GLuint i;
+
+ emit(BRW_OPCODE_MOV, header1, 0u);
+
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+ assert(!"finishme: psiz");
+ src_reg psiz;
+
+ header1.writemask = WRITEMASK_W;
+ emit(BRW_OPCODE_MUL, header1, psiz, 1u << 11);
+ emit(BRW_OPCODE_AND, header1, src_reg(header1), 0x7ff << 8);
+ }
+
+ for (i = 0; i < c->key.nr_userclip; i++) {
+ vec4_instruction *inst;
+
+ inst = emit(BRW_OPCODE_DP4, dst_reg(brw_null_reg()),
+ pos, src_reg(c->userplane[i]));
+ inst->conditional_mod = BRW_CONDITIONAL_L;
+
+ emit(BRW_OPCODE_OR, header1, src_reg(header1), 1u << i);
+ inst->predicate = BRW_PREDICATE_NORMAL;
+ }
+
+ /* i965 clipping workaround:
+ * 1) Test for -ve rhw
+ * 2) If set,
+ * set ndc = (0,0,0,0)
+ * set ucp[6] = 1
+ *
+ * Later, clipping will detect ucp[6] and ensure the primitive is
+ * clipped against all fixed planes.
+ */
+ if (brw->has_negative_rhw_bug) {
+#if 0
+ /* FINISHME */
+ brw_CMP(p,
+ vec8(brw_null_reg()),
+ BRW_CONDITIONAL_L,
+ brw_swizzle1(ndc, 3),
+ brw_imm_f(0));
+
+ brw_OR(p, brw_writemask(header1, WRITEMASK_W), header1, brw_imm_ud(1<<6));
+ brw_MOV(p, ndc, brw_imm_f(0));
+ brw_set_predicate_control(p, BRW_PREDICATE_NONE);
+#endif
+ }
+
+ header1.writemask = WRITEMASK_XYZW;
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(header1));
+ } else {
+ emit(BRW_OPCODE_MOV, retype(brw_message_reg(header_mrf++),
+ BRW_REGISTER_TYPE_UD), 0u);
+ }
+
+ if (intel->gen == 5) {
+ /* There are 20 DWs (D0-D19) in VUE header on Ironlake:
+ * dword 0-3 (m1) of the header is indices, point width, clip flags.
+ * dword 4-7 (m2) is the ndc position (set above)
+ * dword 8-11 (m3) of the vertex header is the 4D space position
+ * dword 12-19 (m4,m5) of the vertex header is the user clip distance.
+ * m6 is a pad so that the vertex element data is aligned
+ * m7 is the first vertex data we fill, which is the vertex position.
+ */
+ current_annotation = "NDC";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+ current_annotation = "gl_Position";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+
+ /* user clip distance. */
+ header_mrf += 2;
+
+ /* Pad so that vertex element data (starts with position) is aligned. */
+ header_mrf++;
+ } else {
+ /* There are 8 dwords in VUE header pre-Ironlake:
+ * dword 0-3 (m1) is indices, point width, clip flags.
+ * dword 4-7 (m2) is ndc position (set above)
+ *
+ * dword 8-11 (m3) is the first vertex data, which we always have be the
+ * vertex position.
+ */
+ current_annotation = "NDC";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), src_reg(ndc));
+
+ current_annotation = "gl_Position";
+ emit(BRW_OPCODE_MOV, brw_message_reg(header_mrf++), pos);
+ }
+
+ return header_mrf;
+}
+
+int
+vec4_visitor::emit_vue_header_gen6(int header_mrf)
+{
+ struct brw_reg reg;
+
+ /* There are 8 or 16 DWs (D0-D15) in VUE header on Sandybridge:
+ * dword 0-3 (m2) of the header is indices, point width, clip flags.
+ * dword 4-7 (m3) is the 4D space position
+ * dword 8-15 (m4,m5) of the vertex header is the user clip distance if
+ * enabled.
+ *
+ * m4 or 6 is the first vertex element data we fill, which is
+ * the vertex position.
+ */
+
+ current_annotation = "indices, point width, clip flags";
+ reg = brw_message_reg(header_mrf++);
+ emit(BRW_OPCODE_MOV, retype(reg, BRW_REGISTER_TYPE_D), src_reg(0));
+ if (c->prog_data.outputs_written & BITFIELD64_BIT(VERT_RESULT_PSIZ)) {
+ emit(BRW_OPCODE_MOV, brw_writemask(reg, WRITEMASK_W),
+ src_reg(output_reg[VERT_RESULT_PSIZ]));
+ }
+
+ current_annotation = "gl_Position";
+ emit(BRW_OPCODE_MOV,
+ brw_message_reg(header_mrf++), src_reg(output_reg[VERT_RESULT_HPOS]));
+
+ current_annotation = "user clip distances";
+ if (c->key.nr_userclip) {
+ for (int i = 0; i < c->key.nr_userclip; i++) {
+ struct brw_reg m;
+ if (i < 4)
+ m = brw_message_reg(header_mrf);
+ else
+ m = brw_message_reg(header_mrf + 1);
+
+ emit(BRW_OPCODE_DP4,
+ dst_reg(brw_writemask(m, 1 << (i & 7))),
+ src_reg(c->userplane[i]));
+ }
+ header_mrf += 2;
+ }
+
+ current_annotation = NULL;
+
+ return header_mrf;
+}
+
+static int
+align_interleaved_urb_mlen(struct brw_context *brw, int mlen)
+{
+ struct intel_context *intel = &brw->intel;
+
+ if (intel->gen >= 6) {
+ /* URB data written (does not include the message header reg) must
+ * be a multiple of 256 bits, or 2 VS registers. See vol5c.5,
+ * section 5.4.3.2.2: URB_INTERLEAVED.
+ *
+ * URB entries are allocated on a multiple of 1024 bits, so an
+ * extra 128 bits written here to make the end align to 256 is
+ * no problem.
+ */
+ if ((mlen % 2) != 1)
+ mlen++;
+ }
+
+ return mlen;
+}
+
+/**
+ * Generates the VUE payload plus the 1 or 2 URB write instructions to
+ * complete the VS thread.
+ *
+ * The VUE layout is documented in Volume 2a.
+ */
+void
+vec4_visitor::emit_urb_writes()
+{
+ int base_mrf = 1;
+ int mrf = base_mrf;
+ int urb_entry_size;
+
+ /* FINISHME: edgeflag */
+
+ /* First mrf is the g0-based message header containing URB handles and such,
+ * which is implied in VS_OPCODE_URB_WRITE.
+ */
+ mrf++;
+
+ if (intel->gen >= 6) {
+ mrf = emit_vue_header_gen6(mrf);
+ } else {
+ mrf = emit_vue_header_gen4(mrf);
+ }
+
+ int attr;
+ for (attr = 0; attr < VERT_RESULT_MAX; attr++) {
+ if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+ continue;
+
+ /* This is loaded into the VUE header, and thus doesn't occupy
+ * an attribute slot.
+ */
+ if (attr == VERT_RESULT_PSIZ)
+ continue;
+
+ emit(BRW_OPCODE_MOV, brw_message_reg(mrf++), src_reg(output_reg[attr]));
+
+ /* If this is MRF 15, we can't fit anything more into this URB
+ * WRITE. Note that base_mrf of 1 means that MRF 15 is an
+ * even-numbered amount of URB write data, which will meet
+ * gen6's requirements for length alignment.
+ */
+ if (mrf == 15)
+ break;
+ }
+
+ vec4_instruction *inst = emit(VS_OPCODE_URB_WRITE);
+ inst->base_mrf = base_mrf;
+ inst->mlen = align_interleaved_urb_mlen(brw, mrf - base_mrf);
+ inst->eot = true;
+
+ urb_entry_size = mrf - base_mrf;
+
+ for (; attr < VERT_RESULT_MAX; attr++) {
+ if (!(c->prog_data.outputs_written & BITFIELD64_BIT(attr)))
+ continue;
+ fail("Second URB write not supported.\n");
+ break;
+ }
+
+ if (intel->gen == 6)
+ c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 8) / 8;
+ else
+ c->prog_data.urb_entry_size = ALIGN(urb_entry_size, 4) / 4;
+}
+
+vec4_visitor::vec4_visitor(struct brw_vs_compile *c,
+ struct gl_shader_program *prog,
+ struct brw_shader *shader)
+{
+ this->c = c;
+ this->p = &c->func;
+ this->brw = p->brw;
+ this->intel = &brw->intel;
+ this->ctx = &intel->ctx;
+ this->prog = prog;
+ this->shader = shader;
+
+ this->mem_ctx = ralloc_context(NULL);
+ this->failed = false;
+
+ this->base_ir = NULL;
+ this->current_annotation = NULL;
+
+ this->c = c;
+ this->prog_data = &c->prog_data;
+
+ this->variable_ht = hash_table_ctor(0,
+ hash_table_pointer_hash,
+ hash_table_pointer_compare);
+
+ this->virtual_grf_sizes = NULL;
+ this->virtual_grf_count = 0;
+ this->virtual_grf_array_size = 0;
+}
+
+vec4_visitor::~vec4_visitor()
+{
+ hash_table_dtor(this->variable_ht);
+}
+
+
+void
+vec4_visitor::fail(const char *format, ...)
+{
+ va_list va;
+ char *msg;
+
+ if (failed)
+ return;
+
+ failed = true;
+
+ va_start(va, format);
+ msg = ralloc_vasprintf(mem_ctx, format, va);
+ va_end(va);
+ msg = ralloc_asprintf(mem_ctx, "VS compile failed: %s\n", msg);
+
+ this->fail_msg = msg;
+
+ if (INTEL_DEBUG & DEBUG_VS) {
+ fprintf(stderr, "%s", msg);
+ }
+}
+
+} /* namespace brw */
diff --git a/src/mesa/drivers/dri/i965/brw_vs.c b/src/mesa/drivers/dri/i965/brw_vs.c
index a9ad531..bd0677d 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.c
+++ b/src/mesa/drivers/dri/i965/brw_vs.c
@@ -30,6 +30,7 @@
*/
+#include "main/compiler.h"
#include "brw_context.h"
#include "brw_vs.h"
#include "brw_util.h"
@@ -50,6 +51,7 @@ static void do_vs_prog( struct brw_context *brw,
void *mem_ctx;
int aux_size;
int i;
+ static int new_vs = -1;
memset(&c, 0, sizeof(c));
memcpy(&c.key, key, sizeof(*key));
@@ -85,7 +87,15 @@ static void do_vs_prog( struct brw_context *brw,
/* Emit GEN4 code.
*/
- brw_vs_emit(&c);
+ if (new_vs == -1)
+ new_vs = getenv("INTEL_NEW_VS") != NULL;
+
+ if (new_vs) {
+ if (!brw_vs_emit(&c))
+ brw_old_vs_emit(&c);
+ } else {
+ brw_old_vs_emit(&c);
+ }
/* get the program
*/
diff --git a/src/mesa/drivers/dri/i965/brw_vs.h b/src/mesa/drivers/dri/i965/brw_vs.h
index 432994a..9f9fed3 100644
--- a/src/mesa/drivers/dri/i965/brw_vs.h
+++ b/src/mesa/drivers/dri/i965/brw_vs.h
@@ -92,6 +92,7 @@ struct brw_vs_compile {
GLboolean needs_stack;
};
-void brw_vs_emit( struct brw_vs_compile *c );
+bool brw_vs_emit(struct brw_vs_compile *c);
+void brw_old_vs_emit(struct brw_vs_compile *c);
#endif
diff --git a/src/mesa/drivers/dri/i965/brw_vs_emit.c b/src/mesa/drivers/dri/i965/brw_vs_emit.c
index dbabb44..a06a2bb 100644
--- a/src/mesa/drivers/dri/i965/brw_vs_emit.c
+++ b/src/mesa/drivers/dri/i965/brw_vs_emit.c
@@ -1903,7 +1903,7 @@ brw_vs_rescale_gl_fixed(struct brw_vs_compile *c)
/* Emit the vertex program instructions here.
*/
-void brw_vs_emit(struct brw_vs_compile *c )
+void brw_old_vs_emit(struct brw_vs_compile *c )
{
#define MAX_IF_DEPTH 32
#define MAX_LOOP_DEPTH 32