summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/gallium/auxiliary/draw/draw_context.c17
-rw-r--r--src/gallium/auxiliary/draw/draw_context.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.c300
-rw-r--r--src/gallium/auxiliary/draw/draw_gs.h35
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c567
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h161
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c75
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi.h27
-rw-r--r--src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c173
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_fs.c4
10 files changed, 1285 insertions, 79 deletions
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c
index 6b70ac8..d64b82b 100644
--- a/src/gallium/auxiliary/draw/draw_context.c
+++ b/src/gallium/auxiliary/draw/draw_context.c
@@ -46,7 +46,7 @@
#include "gallivm/lp_bld_limits.h"
#include "draw_llvm.h"
-static boolean
+boolean
draw_get_option_use_llvm(void)
{
static boolean first = TRUE;
@@ -808,16 +808,15 @@ draw_set_mapped_texture(struct draw_context *draw,
uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS],
uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS])
{
- if (shader_stage == PIPE_SHADER_VERTEX) {
#ifdef HAVE_LLVM
- if (draw->llvm)
- draw_llvm_set_mapped_texture(draw,
- sview_idx,
- width, height, depth, first_level,
- last_level, base_ptr,
- row_stride, img_stride, mip_offsets);
+ if (draw->llvm)
+ draw_llvm_set_mapped_texture(draw,
+ shader_stage,
+ sview_idx,
+ width, height, depth, first_level,
+ last_level, base_ptr,
+ row_stride, img_stride, mip_offsets);
#endif
- }
}
/**
diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h
index 18c8595..369f6c8 100644
--- a/src/gallium/auxiliary/draw/draw_context.h
+++ b/src/gallium/auxiliary/draw/draw_context.h
@@ -282,4 +282,9 @@ draw_get_shader_param(unsigned shader, enum pipe_shader_cap param);
int
draw_get_shader_param_no_llvm(unsigned shader, enum pipe_shader_cap param);
+#ifdef HAVE_LLVM
+boolean
+draw_get_option_use_llvm(void);
+#endif
+
#endif /* DRAW_CONTEXT_H */
diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c
index cde0756..c8ed95a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.c
+++ b/src/gallium/auxiliary/draw/draw_gs.c
@@ -29,6 +29,9 @@
#include "draw_private.h"
#include "draw_context.h"
+#ifdef HAVE_LLVM
+#include "draw_llvm.h"
+#endif
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_exec.h"
@@ -67,7 +70,7 @@ draw_gs_get_input_index(int semantic, int index,
static INLINE boolean
draw_gs_should_flush(struct draw_geometry_shader *shader)
{
- return (shader->fetched_prim_count == 4);
+ return (shader->fetched_prim_count == shader->vector_length);
}
/*#define DEBUG_OUTPUTS 1*/
@@ -182,7 +185,7 @@ static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader,
}
static void tgsi_gs_prepare(struct draw_geometry_shader *shader,
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
{
struct tgsi_exec_machine *machine = shader->machine;
@@ -205,10 +208,148 @@ static unsigned tgsi_gs_run(struct draw_geometry_shader *shader,
/* run interpreter */
tgsi_exec_machine_run(machine);
- return
+ return
machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0];
}
+#ifdef HAVE_LLVM
+
+static void
+llvm_fetch_gs_input(struct draw_geometry_shader *shader,
+ unsigned *indices,
+ unsigned num_vertices,
+ unsigned prim_idx)
+{
+ unsigned slot, vs_slot, i;
+ unsigned input_vertex_stride = shader->input_vertex_stride;
+ const float (*input_ptr)[4];
+ float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data;
+
+ input_ptr = shader->input;
+
+ for (i = 0; i < num_vertices; ++i) {
+ const float (*input)[4];
+#if DEBUG_INPUTS
+ debug_printf("%d) vertex index = %d (prim idx = %d)\n",
+ i, indices[i], prim_idx);
+#endif
+ input = (const float (*)[4])(
+ (const char *)input_ptr + (indices[i] * input_vertex_stride));
+ for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) {
+ if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) {
+ (*input_data)[i][slot][0][prim_idx] = (float)shader->in_prim_idx;
+ (*input_data)[i][slot][1][prim_idx] = (float)shader->in_prim_idx;
+ (*input_data)[i][slot][2][prim_idx] = (float)shader->in_prim_idx;
+ (*input_data)[i][slot][3][prim_idx] = (float)shader->in_prim_idx;
+ } else {
+ vs_slot = draw_gs_get_input_index(
+ shader->info.input_semantic_name[slot],
+ shader->info.input_semantic_index[slot],
+ shader->input_info);
+#if DEBUG_INPUTS
+ debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n",
+ slot, vs_slot, idx);
+#endif
+#if 0
+ assert(!util_is_inf_or_nan(input[vs_slot][0]));
+ assert(!util_is_inf_or_nan(input[vs_slot][1]));
+ assert(!util_is_inf_or_nan(input[vs_slot][2]));
+ assert(!util_is_inf_or_nan(input[vs_slot][3]));
+#endif
+ (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0];
+ (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1];
+ (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2];
+ (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3];
+#if DEBUG_INPUTS
+ debug_printf("\t\t%f %f %f %f\n",
+ (*input_data)[i][slot][0][prim_idx],
+ (*input_data)[i][slot][1][prim_idx],
+ (*input_data)[i][slot][2][prim_idx],
+ (*input_data)[i][slot][3][prim_idx]);
+#endif
+ ++vs_slot;
+ }
+ }
+ }
+}
+
+static void
+llvm_fetch_gs_outputs(struct draw_geometry_shader *shader,
+ unsigned num_primitives,
+ float (**p_output)[4])
+{
+ int total_verts = 0;
+ int vertex_count = 0;
+ int total_prims = 0;
+ int max_prims_per_invocation = 0;
+ char *output_ptr = (char*)shader->gs_output;
+ int i, j, prim_idx;
+
+ for (i = 0; i < shader->vector_length; ++i) {
+ int prims = shader->llvm_emitted_primitives[i];
+ total_prims += prims;
+ max_prims_per_invocation = MAX2(max_prims_per_invocation, prims);
+ }
+ for (i = 0; i < shader->vector_length; ++i) {
+ total_verts += shader->llvm_emitted_vertices[i];
+ }
+
+
+ output_ptr += shader->emitted_vertices * shader->vertex_size;
+ for (i = 0; i < shader->vector_length - 1; ++i) {
+ int current_verts = shader->llvm_emitted_vertices[i];
+
+ if (current_verts != shader->max_output_vertices) {
+ memcpy(output_ptr + (vertex_count + current_verts) * shader->vertex_size,
+ output_ptr + (vertex_count + shader->max_output_vertices) * shader->vertex_size,
+ shader->vertex_size * (total_verts - vertex_count - current_verts));
+ }
+ vertex_count += current_verts;
+ }
+
+ prim_idx = 0;
+ for (i = 0; i < shader->vector_length; ++i) {
+ int num_prims = shader->llvm_emitted_primitives[i];
+ for (j = 0; j < num_prims; ++j) {
+ int prim_length =
+ shader->llvm_prim_lengths[j][i];
+ shader->primitive_lengths[shader->emitted_primitives + prim_idx] =
+ prim_length;
+ ++prim_idx;
+ }
+ }
+
+ shader->emitted_primitives += total_prims;
+ shader->emitted_vertices += total_verts;
+}
+
+static void
+llvm_gs_prepare(struct draw_geometry_shader *shader,
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS])
+{
+}
+
+static unsigned
+llvm_gs_run(struct draw_geometry_shader *shader,
+ unsigned input_primitives)
+{
+ unsigned ret;
+ char *input = (char*)shader->gs_output;
+
+ input += (shader->emitted_vertices * shader->vertex_size);
+
+ ret = shader->current_variant->jit_func(
+ shader->jit_context, shader->gs_input->data,
+ (struct vertex_header*)input,
+ input_primitives,
+ shader->draw->instance_id);
+
+ return ret;
+}
+
+#endif
+
static void gs_flush(struct draw_geometry_shader *shader)
{
unsigned out_prim_count;
@@ -219,13 +360,15 @@ static void gs_flush(struct draw_geometry_shader *shader)
input_primitives <= 4);
out_prim_count = shader->run(shader, input_primitives);
+ shader->fetch_outputs(shader, out_prim_count,
+ &shader->tmp_output);
+
#if 0
debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n",
shader->emitted_primitives, shader->emitted_vertices,
out_prim_count);
#endif
- shader->fetch_outputs(shader, out_prim_count,
- &shader->tmp_output);
+
shader->fetched_prim_count = 0;
}
@@ -331,8 +474,8 @@ static void gs_tri_adj(struct draw_geometry_shader *shader,
* Execute geometry shader.
*/
int draw_geometry_shader_run(struct draw_geometry_shader *shader,
- const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
- const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
+ const void *constants[PIPE_MAX_CONSTANT_BUFFERS],
+ const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS],
const struct draw_vertex_info *input_verts,
const struct draw_prim_info *input_prim,
const struct tgsi_shader_info *input_info,
@@ -344,14 +487,20 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
unsigned num_outputs = shader->info.num_outputs;
unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float);
unsigned num_input_verts = input_prim->linear ?
- input_verts->count :
- input_prim->count;
+ input_verts->count :
+ input_prim->count;
unsigned num_in_primitives =
- MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts),
- u_gs_prims_for_vertices(shader->input_primitive, num_input_verts));
+ align(
+ MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts),
+ u_gs_prims_for_vertices(shader->input_primitive, num_input_verts)),
+ shader->vector_length);
unsigned max_out_prims = u_gs_prims_for_vertices(shader->output_primitive,
shader->max_output_vertices)
- * num_in_primitives;
+ * num_in_primitives;
+
+ //Assume at least one primitive
+ max_out_prims = MAX2(max_out_prims, 1);
+
output_verts->vertex_size = vertex_size;
output_verts->stride = output_verts->vertex_size;
@@ -385,6 +534,34 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader,
FREE(shader->primitive_lengths);
shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned));
+
+#ifdef HAVE_LLVM
+ if (draw_get_option_use_llvm()) {
+ shader->gs_output = output_verts->verts;
+ if (max_out_prims > shader->max_out_prims) {
+ unsigned i;
+ if (shader->llvm_prim_lengths) {
+ for (i = 0; i < shader->max_out_prims; ++i) {
+ align_free(shader->llvm_prim_lengths[i]);
+ }
+ FREE(shader->llvm_prim_lengths);
+ }
+
+ shader->llvm_prim_lengths = MALLOC(max_out_prims * sizeof(unsigned*));
+ for (i = 0; i < max_out_prims; ++i) {
+ int vector_size = shader->vector_length * sizeof(unsigned);
+ shader->llvm_prim_lengths[i] =
+ align_malloc(vector_size, vector_size);
+ }
+
+ shader->max_out_prims = max_out_prims;
+ }
+ shader->jit_context->prim_lengths = shader->llvm_prim_lengths;
+ shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices;
+ shader->jit_context->emitted_prims = shader->llvm_emitted_primitives;
+ }
+#endif
+
shader->prepare(shader, constants, constants_size);
if (input_prim->linear)
@@ -464,10 +641,27 @@ struct draw_geometry_shader *
draw_create_geometry_shader(struct draw_context *draw,
const struct pipe_shader_state *state)
{
+#ifdef HAVE_LLVM
+ struct llvm_geometry_shader *llvm_gs;
+#endif
struct draw_geometry_shader *gs;
unsigned i;
- gs = CALLOC_STRUCT(draw_geometry_shader);
+#ifdef HAVE_LLVM
+ if (draw_get_option_use_llvm()) {
+ llvm_gs = CALLOC_STRUCT(llvm_geometry_shader);
+
+ if (llvm_gs == NULL)
+ return NULL;
+
+ gs = &llvm_gs->base;
+
+ make_empty_list(&llvm_gs->variants);
+ } else
+#endif
+ {
+ gs = CALLOC_STRUCT(draw_geometry_shader);
+ }
if (!gs)
return NULL;
@@ -486,6 +680,17 @@ draw_create_geometry_shader(struct draw_context *draw,
gs->input_primitive = PIPE_PRIM_TRIANGLES;
gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP;
gs->max_output_vertices = 32;
+ gs->max_out_prims = 0;
+
+ if (draw_get_option_use_llvm()) {
+ /* TODO: change the input array to handle the following
+ vector length, instead of the currently hardcoded
+ TGSI_NUM_CHANNELS
+ gs->vector_length = lp_native_vector_width / 32;*/
+ gs->vector_length = TGSI_NUM_CHANNELS;
+ } else {
+ gs->vector_length = TGSI_NUM_CHANNELS;
+ }
for (i = 0; i < gs->info.num_properties; ++i) {
if (gs->info.properties[i].name ==
@@ -507,10 +712,36 @@ draw_create_geometry_shader(struct draw_context *draw,
gs->machine = draw->gs.tgsi.machine;
- gs->fetch_outputs = tgsi_fetch_gs_outputs;
- gs->fetch_inputs = tgsi_fetch_gs_input;
- gs->prepare = tgsi_gs_prepare;
- gs->run = tgsi_gs_run;
+#ifdef HAVE_LLVM
+ if (draw_get_option_use_llvm()) {
+ int vector_size = gs->vector_length * sizeof(float);
+ gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16);
+ memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs));
+ gs->llvm_prim_lengths = 0;
+
+ gs->llvm_emitted_primitives = align_malloc(vector_size, vector_size);
+ gs->llvm_emitted_vertices = align_malloc(vector_size, vector_size);
+
+ gs->fetch_outputs = llvm_fetch_gs_outputs;
+ gs->fetch_inputs = llvm_fetch_gs_input;
+ gs->prepare = llvm_gs_prepare;
+ gs->run = llvm_gs_run;
+
+ gs->jit_context = &draw->llvm->gs_jit_context;
+
+
+ llvm_gs->variant_key_size =
+ draw_gs_llvm_variant_key_size(
+ MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1,
+ gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1));
+ } else
+#endif
+ {
+ gs->fetch_outputs = tgsi_fetch_gs_outputs;
+ gs->fetch_inputs = tgsi_fetch_gs_input;
+ gs->prepare = tgsi_gs_prepare;
+ gs->run = tgsi_gs_run;
+ }
return gs;
}
@@ -535,7 +766,42 @@ void draw_bind_geometry_shader(struct draw_context *draw,
void draw_delete_geometry_shader(struct draw_context *draw,
struct draw_geometry_shader *dgs)
{
+#ifdef HAVE_LLVM
+ if (draw_get_option_use_llvm()) {
+ struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs);
+ struct draw_gs_llvm_variant_list_item *li;
+
+ li = first_elem(&shader->variants);
+ while(!at_end(&shader->variants, li)) {
+ struct draw_gs_llvm_variant_list_item *next = next_elem(li);
+ draw_gs_llvm_destroy_variant(li->base);
+ li = next;
+ }
+
+ assert(shader->variants_cached == 0);
+
+ if (dgs->llvm_prim_lengths) {
+ unsigned i;
+ for (i = 0; i < dgs->max_out_prims; ++i) {
+ align_free(dgs->llvm_prim_lengths[i]);
+ }
+ FREE(dgs->llvm_prim_lengths);
+ }
+ align_free(dgs->llvm_emitted_primitives);
+ align_free(dgs->llvm_emitted_vertices);
+
+ align_free(dgs->gs_input);
+ }
+#endif
+
FREE(dgs->primitive_lengths);
FREE((void*) dgs->state.tokens);
FREE(dgs);
}
+
+
+void draw_gs_set_current_variant(struct draw_geometry_shader *shader,
+ struct draw_gs_llvm_variant *variant)
+{
+ shader->current_variant = variant;
+}
diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h
index 7ab4f04..e62b34a 100644
--- a/src/gallium/auxiliary/draw/draw_gs.h
+++ b/src/gallium/auxiliary/draw/draw_gs.h
@@ -31,11 +31,28 @@
#include "draw_context.h"
#include "draw_private.h"
-
#define MAX_TGSI_PRIMITIVES 4
struct draw_context;
+#ifdef HAVE_LLVM
+struct draw_gs_jit_context;
+struct draw_gs_llvm_variant;
+
+/**
+ * Structure holding the inputs to the geometry shader. It uses SOA layout.
+ * The dimensions are as follows:
+ * - maximum number of vertices for a geometry shader input primitive
+ * (6 for triangle_adjacency)
+ * - maximum number of attributes for each vertex
+ * - four channels per each attribute (x,y,z,w)
+ * - number of input primitives equal to the SOA vector length
+ */
+struct draw_gs_inputs {
+ float data[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS];
+};
+#endif
+
/**
* Private version of the compiled geometry shader
*/
@@ -66,6 +83,19 @@ struct draw_geometry_shader {
unsigned fetched_prim_count;
const float (*input)[4];
const struct tgsi_shader_info *input_info;
+ unsigned vector_length;
+ unsigned max_out_prims;
+
+#ifdef HAVE_LLVM
+ struct draw_gs_inputs *gs_input;
+ struct draw_gs_jit_context *jit_context;
+ struct draw_gs_llvm_variant *current_variant;
+ struct vertex_header *gs_output;
+
+ int **llvm_prim_lengths;
+ int *llvm_emitted_primitives;
+ int *llvm_emitted_vertices;
+#endif
void (*fetch_inputs)(struct draw_geometry_shader *shader,
unsigned *indices,
@@ -102,4 +132,7 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader,
int draw_gs_max_output_vertices(struct draw_geometry_shader *shader,
unsigned pipe_prim);
+void draw_gs_set_current_variant(struct draw_geometry_shader *shader,
+ struct draw_gs_llvm_variant *variant);
+
#endif
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index 602839d..e46195b 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -29,6 +29,7 @@
#include "draw_context.h"
#include "draw_vs.h"
+#include "draw_gs.h"
#include "gallivm/lp_bld_arit.h"
#include "gallivm/lp_bld_logic.h"
@@ -229,6 +230,85 @@ create_jit_context_type(struct gallivm_state *gallivm,
/**
+ * Create LLVM type for struct draw_gs_jit_context
+ */
+static LLVMTypeRef
+create_gs_jit_context_type(struct gallivm_state *gallivm,
+ unsigned vector_length,
+ LLVMTypeRef texture_type, LLVMTypeRef sampler_type,
+ const char *struct_name)
+{
+ LLVMTargetDataRef target = gallivm->target;
+ LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
+ LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context);
+ LLVMTypeRef elem_types[8];
+ LLVMTypeRef context_type;
+
+ elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */
+ LP_MAX_TGSI_CONST_BUFFERS);
+ elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4),
+ DRAW_TOTAL_CLIP_PLANES), 0);
+ elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */
+
+ elem_types[3] = LLVMPointerType(LLVMPointerType(int_type, 0), 0);
+ elem_types[4] = LLVMPointerType(LLVMVectorType(int_type,
+ vector_length), 0);
+ elem_types[5] = LLVMPointerType(LLVMVectorType(int_type,
+ vector_length), 0);
+
+ elem_types[6] = LLVMArrayType(texture_type,
+ PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */
+ elem_types[7] = LLVMArrayType(sampler_type,
+ PIPE_MAX_SAMPLERS); /* samplers */
+
+ context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
+ Elements(elem_types), 0);
+#if HAVE_LLVM < 0x0300
+ LLVMAddTypeName(gallivm->module, struct_name, context_type);
+
+ LLVMInvalidateStructLayout(gallivm->target, context_type);
+#endif
+
+ LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants,
+ target, context_type, 0);
+ LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes,
+ target, context_type, 1);
+ LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewport,
+ target, context_type, 2);
+ LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths,
+ target, context_type, 3);
+ LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices,
+ target, context_type, 4);
+ LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims,
+ target, context_type, 5);
+ LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures,
+ target, context_type,
+ DRAW_GS_JIT_CTX_TEXTURES);
+ LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers,
+ target, context_type,
+ DRAW_GS_JIT_CTX_SAMPLERS);
+ LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context,
+ target, context_type);
+
+ return context_type;
+}
+
+
+static LLVMTypeRef
+create_gs_jit_input_type(struct gallivm_state *gallivm)
+{
+ LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context);
+ LLVMTypeRef input_array;
+
+ input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */
+ input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */
+ input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */
+ input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */
+
+ return input_array;
+}
+
+/**
* Create LLVM type for struct pipe_vertex_buffer
*/
static LLVMTypeRef
@@ -397,6 +477,9 @@ draw_llvm_create(struct draw_context *draw)
llvm->nr_variants = 0;
make_empty_list(&llvm->vs_variants_list);
+ llvm->nr_gs_variants = 0;
+ make_empty_list(&llvm->gs_variants_list);
+
return llvm;
}
@@ -498,7 +581,8 @@ generate_vs(struct draw_llvm_variant *variant,
inputs,
outputs,
sampler,
- &llvm->draw->vs.vertex_shader->info);
+ &llvm->draw->vs.vertex_shader->info,
+ NULL);
{
LLVMValueRef out;
@@ -695,6 +779,7 @@ static void
store_aos_array(struct gallivm_state *gallivm,
struct lp_type soa_type,
LLVMValueRef io_ptr,
+ LLVMValueRef *indices,
LLVMValueRef* aos,
int attrib,
int num_outputs,
@@ -707,11 +792,15 @@ store_aos_array(struct gallivm_state *gallivm,
LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
int vector_length = soa_type.length;
int i;
-
+
debug_assert(TGSI_NUM_CHANNELS == 4);
for (i = 0; i < vector_length; i++) {
- inds[i] = lp_build_const_int32(gallivm, i);
+ if (indices) {
+ inds[i] = indices[i];
+ } else {
+ inds[i] = lp_build_const_int32(gallivm, i);
+ }
io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
}
@@ -753,6 +842,7 @@ store_aos_array(struct gallivm_state *gallivm,
static void
convert_to_aos(struct gallivm_state *gallivm,
LLVMValueRef io,
+ LLVMValueRef *indices,
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
LLVMValueRef clipmask,
int num_outputs,
@@ -803,7 +893,7 @@ convert_to_aos(struct gallivm_state *gallivm,
store_aos_array(gallivm,
soa_type,
- io,
+ io, indices,
aos,
attrib,
num_outputs,
@@ -821,14 +911,14 @@ convert_to_aos(struct gallivm_state *gallivm,
static void
store_clip(struct gallivm_state *gallivm,
const struct lp_type vs_type,
- LLVMValueRef io_ptr,
+ LLVMValueRef io_ptr,
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
boolean pre_clip_pos, int idx)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef soa[4];
LLVMValueRef aos[LP_MAX_VECTOR_LENGTH];
- LLVMValueRef indices[2];
+ LLVMValueRef indices[2];
LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32];
LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32];
LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32];
@@ -836,7 +926,7 @@ store_clip(struct gallivm_state *gallivm,
indices[0] =
indices[1] = lp_build_const_int32(gallivm, 0);
-
+
for (i = 0; i < vs_type.length; i++) {
inds[i] = lp_build_const_int32(gallivm, i);
io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, "");
@@ -893,13 +983,13 @@ generate_viewport(struct draw_llvm_variant *variant,
struct lp_type f32_type = vs_type;
LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type);
LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 .. wn*/
- LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/
+ LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/
LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr);
/* for 1/w convention*/
out3 = LLVMBuildFDiv(builder, const1, out3, "");
LLVMBuildStore(builder, out3, outputs[0][3]);
-
+
/* Viewport Mapping */
for (i=0; i<3; i++) {
LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 .. xn*/
@@ -908,7 +998,7 @@ generate_viewport(struct draw_llvm_variant *variant,
LLVMValueRef scale_i;
LLVMValueRef trans_i;
LLVMValueRef index;
-
+
index = lp_build_const_int32(gallivm, i);
scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, "");
@@ -930,14 +1020,14 @@ generate_viewport(struct draw_llvm_variant *variant,
/* store transformed outputs */
LLVMBuildStore(builder, out, outputs[0][i]);
}
-
+
}
/**
* Returns clipmask as nxi32 bitmask for the n vertices
*/
-static LLVMValueRef
+static LLVMValueRef
generate_clipmask(struct draw_llvm *llvm,
struct gallivm_state *gallivm,
struct lp_type vs_type,
@@ -952,7 +1042,7 @@ generate_clipmask(struct draw_llvm *llvm,
{
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef mask; /* stores the <nxi32> clipmasks */
- LLVMValueRef test, temp;
+ LLVMValueRef test, temp;
LLVMValueRef zero, shift;
LLVMValueRef pos_x, pos_y, pos_z, pos_w;
LLVMValueRef cv_x, cv_y, cv_z, cv_w;
@@ -967,7 +1057,7 @@ generate_clipmask(struct draw_llvm *llvm,
cd[0] = draw_current_shader_clipdistance_output(llvm->draw, 0);
cd[1] = draw_current_shader_clipdistance_output(llvm->draw, 1);
-
+
if (cd[0] != pos || cd[1] != pos)
have_cd = true;
@@ -1002,27 +1092,27 @@ generate_clipmask(struct draw_llvm *llvm,
/* plane 1 */
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
temp = shift;
- test = LLVMBuildAnd(builder, test, temp, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
mask = test;
-
+
/* plane 2 */
test = LLVMBuildFAdd(builder, pos_x, pos_w, "");
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
temp = LLVMBuildShl(builder, temp, shift, "");
- test = LLVMBuildAnd(builder, test, temp, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
mask = LLVMBuildOr(builder, mask, test, "");
-
+
/* plane 3 */
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w);
temp = LLVMBuildShl(builder, temp, shift, "");
- test = LLVMBuildAnd(builder, test, temp, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
mask = LLVMBuildOr(builder, mask, test, "");
/* plane 4 */
test = LLVMBuildFAdd(builder, pos_y, pos_w, "");
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
temp = LLVMBuildShl(builder, temp, shift, "");
- test = LLVMBuildAnd(builder, test, temp, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
mask = LLVMBuildOr(builder, mask, test, "");
}
@@ -1031,22 +1121,22 @@ generate_clipmask(struct draw_llvm *llvm,
if (clip_halfz) {
/* plane 5 */
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
- test = LLVMBuildAnd(builder, test, temp, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
mask = LLVMBuildOr(builder, mask, test, "");
- }
+ }
else {
/* plane 5 */
test = LLVMBuildFAdd(builder, pos_z, pos_w, "");
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test);
- test = LLVMBuildAnd(builder, test, temp, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
mask = LLVMBuildOr(builder, mask, test, "");
}
/* plane 6 */
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w);
temp = LLVMBuildShl(builder, temp, shift, "");
- test = LLVMBuildAnd(builder, test, temp, "");
+ test = LLVMBuildAnd(builder, test, temp, "");
mask = LLVMBuildOr(builder, mask, test, "");
- }
+ }
if (clip_user) {
LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr);
@@ -1118,7 +1208,7 @@ generate_clipmask(struct draw_llvm *llvm,
/**
* Returns boolean if any clipping has occurred
- * Used zero/non-zero i32 value to represent boolean
+ * Used zero/non-zero i32 value to represent boolean
*/
static LLVMValueRef
clipmask_booli32(struct gallivm_state *gallivm,
@@ -1144,6 +1234,94 @@ clipmask_booli32(struct gallivm_state *gallivm,
return ret;
}
+static void
+draw_gs_llvm_emit_vertex(struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef (*outputs)[4],
+ LLVMValueRef emitted_vertices_vec,
+ void *user_data)
+{
+ struct draw_gs_llvm_variant *variant =
+ (struct draw_gs_llvm_variant *)user_data;
+ struct gallivm_state *gallivm = variant->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ struct lp_type gs_type = bld_base->base.type;
+ LLVMValueRef clipmask = lp_build_const_int_vec(gallivm,
+ lp_int_type(gs_type), 0);
+ LLVMValueRef indices[LP_MAX_VECTOR_LENGTH];
+ LLVMValueRef max_output_vertices =
+ lp_build_const_int32(gallivm, variant->shader->base.max_output_vertices);
+ LLVMValueRef io = variant->io_ptr;
+ unsigned i;
+ const struct tgsi_shader_info *gs_info = &variant->shader->base.info;
+
+ for (i = 0; i < gs_type.length; ++i) {
+ LLVMValueRef ind = lp_build_const_int32(gallivm, i);
+ LLVMValueRef currently_emitted =
+ LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, "");
+ indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, "");
+ indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, "");
+ }
+
+ convert_to_aos(gallivm, io, indices,
+ outputs, clipmask,
+ gs_info->num_outputs, gs_type,
+ FALSE);
+}
+
+static void
+draw_gs_llvm_end_primitive(struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef verts_per_prim_vec,
+ LLVMValueRef emitted_prims_vec,
+ void *user_data)
+{
+ struct draw_gs_llvm_variant *variant =
+ (struct draw_gs_llvm_variant *)user_data;
+ struct gallivm_state *gallivm = variant->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef prim_lengts_ptr =
+ draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr);
+ unsigned i;
+
+ for (i = 0; i < bld_base->base.type.length; ++i) {
+ LLVMValueRef ind = lp_build_const_int32(gallivm, i);
+ LLVMValueRef prims_emitted =
+ LLVMBuildExtractElement(builder, emitted_prims_vec, ind, "");
+ LLVMValueRef store_ptr;
+ LLVMValueRef num_vertices =
+ LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, "");
+
+ /*lp_build_printf(gallivm, "XXXX emitting vertices, %d\n\n",
+ num_vertices);*/
+
+ store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, "");
+ store_ptr = LLVMBuildLoad(builder, store_ptr, "");
+ store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, "");
+ LLVMBuildStore(builder, num_vertices, store_ptr);
+ }
+}
+
+static void
+draw_gs_llvm_epilogue(struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef total_emitted_vertices_vec,
+ LLVMValueRef emitted_prims_vec,
+ void *user_data)
+{
+ struct draw_gs_llvm_variant *variant =
+ (struct draw_gs_llvm_variant *)user_data;
+ struct gallivm_state *gallivm = variant->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef emitted_verts_ptr =
+ draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr);
+ LLVMValueRef emitted_prims_ptr =
+ draw_gs_jit_emitted_prims(gallivm, variant->context_ptr);
+ LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
+
+ emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, "");
+ emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, "");
+
+ LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr);
+ LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr);
+}
static void
draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
@@ -1323,7 +1501,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
&true_index, 1, "");
true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt");
}
-
+
system_values.vertex_id = LLVMBuildInsertElement(gallivm->builder,
system_values.vertex_id, true_index,
lp_build_const_int32(gallivm, i), "");
@@ -1387,11 +1565,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0);
}
- /* store clipmask in vertex header,
- * original positions in clip
- * and transformed positions in data
- */
- convert_to_aos(gallivm, io, outputs, clipmask,
+ /* store clipmask in vertex header,
+ * original positions in clip
+ * and transformed positions in data
+ */
+ convert_to_aos(gallivm, io, NULL, outputs, clipmask,
vs_info->num_outputs, vs_type,
have_clipdist);
}
@@ -1437,8 +1615,8 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules;
key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
+ key->has_gs = llvm->draw->gs.geometry_shader != NULL;
key->pad1 = 0;
- key->pad2 = 0;
/* All variants of this shader will have the same value for
* nr_samplers. Not yet trying to compact away holes in the
@@ -1487,6 +1665,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
debug_printf("bypass_viewport = %u\n", key->bypass_viewport);
debug_printf("clip_halfz = %u\n", key->clip_halfz);
debug_printf("need_edgeflags = %u\n", key->need_edgeflags);
+ debug_printf("has_gs = %u\n", key->has_gs);
debug_printf("ucp_enable = %u\n", key->ucp_enable);
for (i = 0 ; i < key->nr_vertex_elements; i++) {
@@ -1504,6 +1683,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key)
void
draw_llvm_set_mapped_texture(struct draw_context *draw,
+ unsigned shader_stage,
unsigned sview_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t first_level, uint32_t last_level,
@@ -1515,9 +1695,18 @@ draw_llvm_set_mapped_texture(struct draw_context *draw,
unsigned j;
struct draw_jit_texture *jit_tex;
- assert(sview_idx < Elements(draw->llvm->jit_context.textures));
+ assert(shader_stage == PIPE_SHADER_VERTEX ||
+ shader_stage == PIPE_SHADER_GEOMETRY);
+
+ if (shader_stage == PIPE_SHADER_VERTEX) {
+ assert(sview_idx < Elements(draw->llvm->jit_context.textures));
- jit_tex = &draw->llvm->jit_context.textures[sview_idx];
+ jit_tex = &draw->llvm->jit_context.textures[sview_idx];
+ } else if (shader_stage == PIPE_SHADER_GEOMETRY) {
+ assert(sview_idx < Elements(draw->llvm->gs_jit_context.textures));
+
+ jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx];
+ }
jit_tex->width = width;
jit_tex->height = height;
@@ -1551,6 +1740,19 @@ draw_llvm_set_sampler_state(struct draw_context *draw)
COPY_4V(jit_sam->border_color, s->border_color.f);
}
}
+
+ for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) {
+ struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i];
+
+ if (draw->samplers[i]) {
+ const struct pipe_sampler_state *s
+ = draw->samplers[PIPE_SHADER_GEOMETRY][i];
+ jit_sam->min_lod = s->min_lod;
+ jit_sam->max_lod = s->max_lod;
+ jit_sam->lod_bias = s->lod_bias;
+ COPY_4V(jit_sam->border_color, s->border_color.f);
+ }
+ }
}
@@ -1577,3 +1779,298 @@ draw_llvm_destroy_variant(struct draw_llvm_variant *variant)
llvm->nr_variants--;
FREE(variant);
}
+
+
+/**
+ * Create LLVM types for various structures.
+ */
+static void
+create_gs_jit_types(struct draw_gs_llvm_variant *var)
+{
+ struct gallivm_state *gallivm = var->gallivm;
+ LLVMTypeRef texture_type, sampler_type, context_type;
+
+ texture_type = create_jit_texture_type(gallivm, "texture");
+ sampler_type = create_jit_sampler_type(gallivm, "sampler");
+
+ context_type = create_gs_jit_context_type(gallivm,
+ var->shader->base.vector_length,
+ texture_type, sampler_type,
+ "draw_gs_jit_context");
+ var->context_ptr_type = LLVMPointerType(context_type, 0);
+
+ var->input_array_type = create_gs_jit_input_type(gallivm);
+}
+
+static LLVMTypeRef
+get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant)
+{
+ if (!variant->context_ptr_type)
+ create_gs_jit_types(variant);
+ return variant->context_ptr_type;
+}
+
+static LLVMValueRef
+generate_mask_value(struct draw_gs_llvm_variant *variant,
+ struct lp_type gs_type)
+{
+ struct gallivm_state *gallivm = variant->gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ LLVMValueRef bits[16];
+ struct lp_type mask_type = lp_int_type(gs_type);
+ struct lp_type mask_elem_type = lp_elem_type(mask_type);
+ LLVMValueRef mask_val = lp_build_const_vec(gallivm,
+ mask_type,
+ 0);
+ unsigned i;
+
+ assert(gs_type.length <= Elements(bits));
+
+ for (i = gs_type.length; i >= 1; --i) {
+ int idx = i - 1;
+ LLVMValueRef ind = lp_build_const_int32(gallivm, i);
+ bits[idx] = lp_build_compare(gallivm,
+ mask_elem_type, PIPE_FUNC_GEQUAL,
+ variant->num_prims, ind);
+ }
+ for (i = 0; i < gs_type.length; ++i) {
+ LLVMValueRef ind = lp_build_const_int32(gallivm, i);
+ mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, "");
+ }
+ mask_val = lp_build_compare(gallivm,
+ mask_type, PIPE_FUNC_NOTEQUAL,
+ mask_val,
+ lp_build_const_int_vec(gallivm, mask_type, 0));
+
+ return mask_val;
+}
+
+static void
+draw_gs_llvm_generate(struct draw_llvm *llvm,
+ struct draw_gs_llvm_variant *variant)
+{
+ struct gallivm_state *gallivm = variant->gallivm;
+ LLVMContextRef context = gallivm->context;
+ LLVMTypeRef int32_type = LLVMInt32TypeInContext(context);
+ LLVMTypeRef arg_types[5];
+ LLVMTypeRef func_type;
+ LLVMValueRef variant_func;
+ LLVMValueRef context_ptr;
+ LLVMBasicBlockRef block;
+ LLVMBuilderRef builder;
+ LLVMValueRef io_ptr, input_array, num_prims, mask_val;
+ struct lp_build_sampler_soa *sampler = 0;
+ struct lp_build_context bld;
+ struct lp_bld_tgsi_system_values system_values;
+ struct lp_type gs_type;
+ unsigned i;
+ struct lp_build_tgsi_gs_iface gs_iface;
+ const struct tgsi_token *tokens = variant->shader->base.state.tokens;
+ LLVMValueRef consts_ptr;
+ LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS];
+ struct lp_build_mask_context mask;
+
+ memset(&system_values, 0, sizeof(system_values));
+
+ assert(variant->vertex_header_ptr_type);
+
+ arg_types[0] = get_gs_context_ptr_type(variant); /* context */
+ arg_types[1] = variant->input_array_type; /* input */
+ arg_types[2] = variant->vertex_header_ptr_type; /* vertex_header */
+ arg_types[3] = int32_type; /* num_prims */
+ arg_types[4] = int32_type; /* instance_id */
+
+ func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0);
+
+ variant_func = LLVMAddFunction(gallivm->module, "draw_geometry_shader",
+ func_type);
+ variant->function = variant_func;
+
+ LLVMSetFunctionCallConv(variant_func, LLVMCCallConv);
+
+ for (i = 0; i < Elements(arg_types); ++i)
+ if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind)
+ LLVMAddAttribute(LLVMGetParam(variant_func, i),
+ LLVMNoAliasAttribute);
+
+ context_ptr = LLVMGetParam(variant_func, 0);
+ input_array = LLVMGetParam(variant_func, 1);
+ io_ptr = LLVMGetParam(variant_func, 2);
+ num_prims = LLVMGetParam(variant_func, 3);
+ system_values.instance_id = LLVMGetParam(variant_func, 4);
+
+ lp_build_name(context_ptr, "context");
+ lp_build_name(input_array, "input");
+ lp_build_name(io_ptr, "io");
+ lp_build_name(io_ptr, "num_prims");
+ lp_build_name(system_values.instance_id, "instance_id");
+
+ variant->context_ptr = context_ptr;
+ variant->io_ptr = io_ptr;
+ variant->num_prims = num_prims;
+
+ gs_iface.input = input_array;
+ gs_iface.emit_vertex = draw_gs_llvm_emit_vertex;
+ gs_iface.end_primitive = draw_gs_llvm_end_primitive;
+ gs_iface.gs_epilogue = draw_gs_llvm_epilogue;
+ gs_iface.user_data = variant;
+
+ /*
+ * Function body
+ */
+
+ block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry");
+ builder = gallivm->builder;
+ LLVMPositionBuilderAtEnd(builder, block);
+
+ lp_build_context_init(&bld, gallivm, lp_type_int(32));
+
+ memset(&gs_type, 0, sizeof gs_type);
+ gs_type.floating = TRUE; /* floating point values */
+ gs_type.sign = TRUE; /* values are signed */
+ gs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */
+ gs_type.width = 32; /* 32-bit float */
+ gs_type.length = variant->shader->base.vector_length;
+
+ consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr);
+
+ /* code generated texture sampling */
+ sampler = draw_llvm_sampler_soa_create(variant->key.samplers,
+ context_ptr);
+
+ mask_val = generate_mask_value(variant, gs_type);
+ lp_build_mask_begin(&mask, gallivm, gs_type, mask_val);
+
+ lp_build_tgsi_soa(variant->gallivm,
+ tokens,
+ gs_type,
+ &mask,
+ consts_ptr,
+ &system_values,
+ NULL /*pos*/,
+ NULL,
+ outputs,
+ sampler,
+ &llvm->draw->gs.geometry_shader->info,
+ &gs_iface);
+
+ lp_build_mask_end(&mask);
+
+ LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32)));
+
+ gallivm_verify_function(gallivm, variant_func);
+}
+
+
+struct draw_gs_llvm_variant *
+draw_gs_llvm_create_variant(struct draw_llvm *llvm,
+ unsigned num_outputs,
+ const struct draw_gs_llvm_variant_key *key)
+{
+ struct draw_gs_llvm_variant *variant;
+ struct llvm_geometry_shader *shader =
+ llvm_geometry_shader(llvm->draw->gs.geometry_shader);
+ LLVMTypeRef vertex_header;
+
+ variant = MALLOC(sizeof *variant +
+ shader->variant_key_size -
+ sizeof variant->key);
+ if (variant == NULL)
+ return NULL;
+
+ variant->llvm = llvm;
+ variant->shader = shader;
+
+ variant->gallivm = gallivm_create();
+
+ create_gs_jit_types(variant);
+
+ memcpy(&variant->key, key, shader->variant_key_size);
+
+ vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs);
+
+ variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0);
+
+ draw_gs_llvm_generate(llvm, variant);
+
+ gallivm_compile_module(variant->gallivm);
+
+ variant->jit_func = (draw_gs_jit_func)
+ gallivm_jit_function(variant->gallivm, variant->function);
+
+ variant->list_item_global.base = variant;
+ variant->list_item_local.base = variant;
+ /*variant->no = */shader->variants_created++;
+ variant->list_item_global.base = variant;
+
+ return variant;
+}
+
+void
+draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant)
+{
+ struct draw_llvm *llvm = variant->llvm;
+
+ if (variant->function) {
+ gallivm_free_function(variant->gallivm,
+ variant->function, variant->jit_func);
+ }
+
+ gallivm_destroy(variant->gallivm);
+
+ remove_from_list(&variant->list_item_local);
+ variant->shader->variants_cached--;
+ remove_from_list(&variant->list_item_global);
+ llvm->nr_gs_variants--;
+ FREE(variant);
+}
+
+struct draw_gs_llvm_variant_key *
+draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
+{
+ unsigned i;
+ struct draw_gs_llvm_variant_key *key;
+ struct draw_sampler_static_state *draw_sampler;
+
+ key = (struct draw_gs_llvm_variant_key *)store;
+
+ /* All variants of this shader will have the same value for
+ * nr_samplers. Not yet trying to compact away holes in the
+ * sampler array.
+ */
+ key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1;
+ if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) {
+ key->nr_sampler_views =
+ llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1;
+ }
+ else {
+ key->nr_sampler_views = key->nr_samplers;
+ }
+
+ draw_sampler = key->samplers;
+
+ memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler);
+
+ for (i = 0 ; i < key->nr_samplers; i++) {
+ lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state,
+ llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]);
+ }
+ for (i = 0 ; i < key->nr_sampler_views; i++) {
+ lp_sampler_static_texture_state(&draw_sampler[i].texture_state,
+ llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]);
+ }
+
+ return key;
+}
+
+void
+draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key)
+{
+ unsigned i;
+ struct draw_sampler_static_state *sampler = key->samplers;
+
+ for (i = 0 ; i < key->nr_sampler_views; i++) {
+ debug_printf("sampler[%i].src_format = %s\n", i,
+ util_format_name(sampler[i].texture_state.format));
+ }
+}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index c03c69e..fc0d2bd 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -31,6 +31,8 @@
#include "draw/draw_private.h"
#include "draw/draw_vs.h"
+#include "draw/draw_gs.h"
+
#include "gallivm/lp_bld_sample.h"
#include "gallivm/lp_bld_limits.h"
@@ -40,6 +42,7 @@
struct draw_llvm;
struct llvm_vertex_shader;
+struct llvm_geometry_shader;
struct draw_jit_texture
{
@@ -166,6 +169,61 @@ struct draw_jit_context
lp_build_struct_get(_gallivm, _ptr, 1, "buffer_offset")
+/**
+ * This structure is passed directly to the generated geometry shader.
+ *
+ * It contains the derived state.
+ *
+ * Changes here must be reflected in the draw_gs_jit_context_* macros.
+ * Changes to the ordering should be avoided.
+ *
+ * Only use types with a clear size and padding here, in particular prefer the
+ * stdint.h types to the basic integer types.
+ */
+struct draw_gs_jit_context
+{
+ const float *constants[LP_MAX_TGSI_CONST_BUFFERS];
+ float (*planes) [DRAW_TOTAL_CLIP_PLANES][4];
+ float *viewport;
+
+ int **prim_lengths;
+ int *emitted_vertices;
+ int *emitted_prims;
+
+ struct draw_jit_texture textures[PIPE_MAX_SHADER_SAMPLER_VIEWS];
+ struct draw_jit_sampler samplers[PIPE_MAX_SAMPLERS];
+};
+
+
+#define draw_gs_jit_context_constants(_gallivm, _ptr) \
+ lp_build_struct_get_ptr(_gallivm, _ptr, 0, "constants")
+
+#define draw_gs_jit_context_planes(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, 1, "planes")
+
+#define draw_gs_jit_context_viewport(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, 2, "viewport")
+
+#define draw_gs_jit_prim_lengths(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, 3, "prim_lengths")
+
+#define draw_gs_jit_emitted_vertices(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, 4, "emitted_vertices")
+
+#define draw_gs_jit_emitted_prims(_gallivm, _ptr) \
+ lp_build_struct_get(_gallivm, _ptr, 5, "emitted_prims")
+
+#define DRAW_GS_JIT_CTX_TEXTURES 6
+#define DRAW_GS_JIT_CTX_SAMPLERS 7
+
+#define draw_gs_jit_context_textures(_gallivm, _ptr) \
+ lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_GS_JIT_CTX_TEXTURES, "textures")
+
+#define draw_gs_jit_context_samplers(_gallivm, _ptr) \
+ lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_GS_JIT_CTX_SAMPLERS, "samplers")
+
+
+
typedef int
(*draw_jit_vert_func)(struct draw_jit_context *context,
struct vertex_header *io,
@@ -187,6 +245,14 @@ typedef int
struct pipe_vertex_buffer *vertex_buffers,
unsigned instance_id);
+
+typedef int
+(*draw_gs_jit_func)(struct draw_gs_jit_context *context,
+ float inputs[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS],
+ struct vertex_header *output,
+ unsigned num_prims,
+ unsigned instance_id);
+
struct draw_llvm_variant_key
{
unsigned nr_vertex_elements:8;
@@ -199,13 +265,13 @@ struct draw_llvm_variant_key
unsigned clip_halfz:1;
unsigned bypass_viewport:1;
unsigned need_edgeflags:1;
+ unsigned has_gs:1;
/*
* it is important there are no holes in this struct
* (and all padding gets zeroed).
*/
- unsigned pad1:1;
unsigned ucp_enable:PIPE_MAX_CLIP_PLANES;
- unsigned pad2:32-PIPE_MAX_CLIP_PLANES;
+ unsigned pad1:32-PIPE_MAX_CLIP_PLANES;
/* Variable number of vertex elements:
*/
@@ -216,11 +282,23 @@ struct draw_llvm_variant_key
/* struct draw_sampler_static_state sampler; */
};
+struct draw_gs_llvm_variant_key
+{
+ unsigned nr_samplers:8;
+ unsigned nr_sampler_views:8;
+
+ struct draw_sampler_static_state samplers[1];
+};
+
#define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \
(sizeof(struct draw_llvm_variant_key) + \
PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state) + \
(PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element))
+#define DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE \
+ (sizeof(struct draw_gs_llvm_variant_key) + \
+ PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state))
+
static INLINE size_t
draw_llvm_variant_key_size(unsigned nr_vertex_elements,
@@ -232,6 +310,14 @@ draw_llvm_variant_key_size(unsigned nr_vertex_elements,
}
+static INLINE size_t
+draw_gs_llvm_variant_key_size(unsigned nr_samplers)
+{
+ return (sizeof(struct draw_gs_llvm_variant_key) +
+ (nr_samplers - 1) * sizeof(struct draw_sampler_static_state));
+}
+
+
static INLINE struct draw_sampler_static_state *
draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key)
{
@@ -246,6 +332,13 @@ struct draw_llvm_variant_list_item
struct draw_llvm_variant_list_item *next, *prev;
};
+struct draw_gs_llvm_variant_list_item
+{
+ struct draw_gs_llvm_variant *base;
+ struct draw_gs_llvm_variant_list_item *next, *prev;
+};
+
+
struct draw_llvm_variant
{
struct gallivm_state *gallivm;
@@ -271,6 +364,32 @@ struct draw_llvm_variant
struct draw_llvm_variant_key key;
};
+
+struct draw_gs_llvm_variant
+{
+ struct gallivm_state *gallivm;
+
+ /* LLVM JIT builder types */
+ LLVMTypeRef context_ptr_type;
+ LLVMTypeRef vertex_header_ptr_type;
+ LLVMTypeRef input_array_type;
+
+ LLVMValueRef context_ptr;
+ LLVMValueRef io_ptr;
+ LLVMValueRef num_prims;
+ LLVMValueRef function;
+ draw_gs_jit_func jit_func;
+
+ struct llvm_geometry_shader *shader;
+
+ struct draw_llvm *llvm;
+ struct draw_gs_llvm_variant_list_item list_item_global;
+ struct draw_gs_llvm_variant_list_item list_item_local;
+
+ /* key is variable-sized, must be last */
+ struct draw_gs_llvm_variant_key key;
+};
+
struct llvm_vertex_shader {
struct draw_vertex_shader base;
@@ -280,13 +399,27 @@ struct llvm_vertex_shader {
unsigned variants_cached;
};
+struct llvm_geometry_shader {
+ struct draw_geometry_shader base;
+
+ unsigned variant_key_size;
+ struct draw_gs_llvm_variant_list_item variants;
+ unsigned variants_created;
+ unsigned variants_cached;
+};
+
+
struct draw_llvm {
struct draw_context *draw;
struct draw_jit_context jit_context;
+ struct draw_gs_jit_context gs_jit_context;
struct draw_llvm_variant_list_item vs_variants_list;
int nr_variants;
+
+ struct draw_gs_llvm_variant_list_item gs_variants_list;
+ int nr_gs_variants;
};
@@ -296,6 +429,14 @@ llvm_vertex_shader(struct draw_vertex_shader *vs)
return (struct llvm_vertex_shader *)vs;
}
+static INLINE struct llvm_geometry_shader *
+llvm_geometry_shader(struct draw_geometry_shader *gs)
+{
+ return (struct llvm_geometry_shader *)gs;
+}
+
+
+
struct draw_llvm *
draw_llvm_create(struct draw_context *draw);
@@ -317,6 +458,21 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
void
draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key);
+
+struct draw_gs_llvm_variant *
+draw_gs_llvm_create_variant(struct draw_llvm *llvm,
+ unsigned num_vertex_header_attribs,
+ const struct draw_gs_llvm_variant_key *key);
+
+void
+draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant);
+
+struct draw_gs_llvm_variant_key *
+draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store);
+
+void
+draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key);
+
struct lp_build_sampler_soa *
draw_llvm_sampler_soa_create(const struct draw_sampler_static_state *static_state,
LLVMValueRef context_ptr);
@@ -326,6 +482,7 @@ draw_llvm_set_sampler_state(struct draw_context *draw);
void
draw_llvm_set_mapped_texture(struct draw_context *draw,
+ unsigned shader_stage,
unsigned sview_idx,
uint32_t width, uint32_t height, uint32_t depth,
uint32_t first_level, uint32_t last_level,
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index fab168c..ec0f758 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -57,6 +57,71 @@ struct llvm_middle_end {
};
+static void
+llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme)
+{
+ struct draw_context *draw = fpme->draw;
+ struct draw_geometry_shader *gs = draw->gs.geometry_shader;
+ struct draw_gs_llvm_variant_key *key;
+ struct draw_gs_llvm_variant *variant = NULL;
+ struct draw_gs_llvm_variant_list_item *li;
+ struct llvm_geometry_shader *shader = llvm_geometry_shader(gs);
+ char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE];
+ unsigned i;
+
+ key = draw_gs_llvm_make_variant_key(fpme->llvm, store);
+
+ /* Search shader's list of variants for the key */
+ li = first_elem(&shader->variants);
+ while (!at_end(&shader->variants, li)) {
+ if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) {
+ variant = li->base;
+ break;
+ }
+ li = next_elem(li);
+ }
+
+ if (variant) {
+ /* found the variant, move to head of global list (for LRU) */
+ move_to_head(&fpme->llvm->gs_variants_list,
+ &variant->list_item_global);
+ }
+ else {
+ /* Need to create new variant */
+
+ /* First check if we've created too many variants. If so, free
+ * 25% of the LRU to avoid using too much memory.
+ */
+ if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) {
+ /*
+ * XXX: should we flush here ?
+ */
+ for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) {
+ struct draw_gs_llvm_variant_list_item *item;
+ if (is_empty_list(&fpme->llvm->gs_variants_list)) {
+ break;
+ }
+ item = last_elem(&fpme->llvm->gs_variants_list);
+ assert(item);
+ assert(item->base);
+ draw_gs_llvm_destroy_variant(item->base);
+ }
+ }
+
+ variant = draw_gs_llvm_create_variant(fpme->llvm, gs->info.num_outputs, key);
+
+ if (variant) {
+ insert_at_head(&shader->variants, &variant->list_item_local);
+ insert_at_head(&fpme->llvm->gs_variants_list,
+ &variant->list_item_global);
+ fpme->llvm->nr_gs_variants++;
+ shader->variants_cached++;
+ }
+ }
+
+ gs->current_variant = variant;
+}
+
/**
* Prepare/validate middle part of the vertex pipeline.
* NOTE: if you change this function, also look at the non-LLVM
@@ -180,6 +245,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle,
fpme->current_variant = variant;
}
+
+ if (gs) {
+ llvm_middle_end_prepare_gs(fpme);
+ }
}
@@ -199,11 +268,17 @@ llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle)
for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); ++i) {
fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i];
}
+ for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); ++i) {
+ fpme->llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i];
+ }
fpme->llvm->jit_context.planes =
(float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
+ fpme->llvm->gs_jit_context.planes =
+ (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0];
fpme->llvm->jit_context.viewport = (float *) draw->viewport.scale;
+ fpme->llvm->gs_jit_context.viewport = (float *) draw->viewport.scale;
}
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
index 6e65e12..62d4707 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h
@@ -61,6 +61,7 @@ struct tgsi_shader_info;
struct lp_build_mask_context;
struct gallivm_state;
struct lp_derivatives;
+struct lp_build_tgsi_gs_iface;
enum lp_build_tex_modifier {
@@ -224,7 +225,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
const LLVMValueRef (*inputs)[4],
LLVMValueRef (*outputs)[4],
struct lp_build_sampler_soa *sampler,
- const struct tgsi_shader_info *info);
+ const struct tgsi_shader_info *info,
+ const struct lp_build_tgsi_gs_iface *gs_iface);
void
@@ -361,6 +363,24 @@ struct lp_build_tgsi_context
void (*emit_epilogue)(struct lp_build_tgsi_context*);
};
+struct lp_build_tgsi_gs_iface
+{
+ LLVMValueRef input;
+ void (*emit_vertex)(struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef (*outputs)[4],
+ LLVMValueRef emitted_vertices_vec,
+ void *user_data);
+ void (*end_primitive)(struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef verts_per_prim_vec,
+ LLVMValueRef emitted_prims_vec,
+ void *user_data);
+ void (*gs_epilogue)(struct lp_build_tgsi_context * bld_base,
+ LLVMValueRef total_emitted_vertices_vec,
+ LLVMValueRef emitted_prims_vec,
+ void *user_data);
+ void *user_data;
+};
+
struct lp_build_tgsi_soa_context
{
struct lp_build_tgsi_context bld_base;
@@ -368,6 +388,11 @@ struct lp_build_tgsi_soa_context
/* Builder for scalar elements of shader's data type (float) */
struct lp_build_context elem_bld;
+ const struct lp_build_tgsi_gs_iface *gs_iface;
+ LLVMValueRef emitted_prims_vec;
+ LLVMValueRef total_emitted_vertices_vec;
+ LLVMValueRef emitted_vertices_vec;
+
LLVMValueRef consts_ptr;
const LLVMValueRef *pos;
const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS];
diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
index cafc61f..6f174a5 100644
--- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
+++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c
@@ -437,6 +437,26 @@ lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
}
}
+/*
+ * If we have indirect addressing in outputs copy our alloca array
+ * to the outputs slots specified by the caller to make sure
+ * our outputs are delivered consistently via the same interface.
+ */
+static void
+gather_outputs(struct lp_build_tgsi_soa_context * bld)
+{
+ if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
+ unsigned index, chan;
+ assert(bld->bld_base.info->num_outputs <=
+ bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
+ for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
+ for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
+ bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
+ }
+ }
+ }
+}
+
/**
* Gather vector.
* XXX the lp_build_gather() function should be capable of doing this
@@ -757,6 +777,60 @@ emit_fetch_input(
return res;
}
+
+static LLVMValueRef
+emit_fetch_gs_input(
+ struct lp_build_tgsi_context * bld_base,
+ const struct tgsi_full_src_register * reg,
+ enum tgsi_opcode_type stype,
+ unsigned swizzle)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
+ LLVMBuilderRef builder = gallivm->builder;
+ //struct lp_build_context *uint_bld = &bld_base->uint_bld;
+ LLVMValueRef attrib_index = NULL;
+ LLVMValueRef vertex_index = NULL;
+ LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
+ LLVMValueRef indices[3];
+ LLVMValueRef res;
+
+ if (reg->Register.Indirect) {
+ attrib_index = get_indirect_index(bld,
+ reg->Register.File,
+ reg->Register.Index,
+ &reg->Indirect);
+ } else {
+ attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
+ }
+
+ if (reg->Dimension.Indirect) {
+ vertex_index = get_indirect_index(bld,
+ reg->Register.File,
+ reg->Dimension.Index,
+ &reg->DimIndirect);
+ } else {
+ vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
+ }
+
+ indices[0] = vertex_index;
+ indices[1] = attrib_index;
+ indices[2] = swizzle_index;
+
+ res = LLVMBuildGEP(builder, bld->gs_iface->input, indices, 3, "");
+ res = LLVMBuildLoad(builder, res, "");
+
+ assert(res);
+
+ if (stype == TGSI_TYPE_UNSIGNED) {
+ res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
+ } else if (stype == TGSI_TYPE_SIGNED) {
+ res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
+ }
+
+ return res;
+}
+
static LLVMValueRef
emit_fetch_temporary(
struct lp_build_tgsi_context * bld_base,
@@ -2081,6 +2155,66 @@ sviewinfo_emit(
emit_size_query(bld, emit_data->inst, emit_data->output, TRUE);
}
+static LLVMValueRef
+mask_to_one_vec(struct lp_build_tgsi_context *bld_base)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+ LLVMValueRef one_vec = bld_base->int_bld.one;
+ struct lp_exec_mask *exec_mask = &bld->exec_mask;
+
+ if (exec_mask->has_mask) {
+ one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, "");
+ }
+ one_vec = LLVMBuildAnd(builder, one_vec,
+ lp_build_mask_value(bld->mask), "");
+ return one_vec;
+}
+
+static void
+emit_vertex(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+
+ if (bld->gs_iface->emit_vertex) {
+ LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
+ gather_outputs(bld);
+ bld->gs_iface->emit_vertex(&bld->bld_base, bld->outputs,
+ bld->total_emitted_vertices_vec,
+ bld->gs_iface->user_data);
+ bld->emitted_vertices_vec =
+ LLVMBuildAdd(builder, bld->emitted_vertices_vec, masked_ones, "");
+ bld->total_emitted_vertices_vec =
+ LLVMBuildAdd(builder, bld->total_emitted_vertices_vec, masked_ones, "");
+ }
+}
+
+
+static void
+end_primitive(
+ const struct lp_build_tgsi_action * action,
+ struct lp_build_tgsi_context * bld_base,
+ struct lp_build_emit_data * emit_data)
+{
+ struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
+ LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
+
+ if (bld->gs_iface->end_primitive) {
+ LLVMValueRef masked_ones = mask_to_one_vec(bld_base);
+ bld->gs_iface->end_primitive(&bld->bld_base,
+ bld->emitted_vertices_vec,
+ bld->emitted_prims_vec,
+ bld->gs_iface->user_data);
+ bld->emitted_prims_vec =
+ LLVMBuildAdd(builder, bld->emitted_prims_vec, masked_ones, "");
+ bld->emitted_vertices_vec = bld_base->uint_bld.zero;
+ }
+}
+
static void
cal_emit(
const struct lp_build_tgsi_action * action,
@@ -2324,7 +2458,7 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base)
/* If we have indirect addressing in inputs we need to copy them into
* our alloca array to be able to iterate over them */
- if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
+ if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) {
unsigned index, chan;
LLVMTypeRef vec_type = bld_base->base.vec_type;
LLVMValueRef array_size = lp_build_const_int32(gallivm,
@@ -2349,6 +2483,13 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base)
}
}
}
+
+ if (bld->gs_iface) {
+ struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
+ bld->emitted_prims_vec = uint_bld->zero;
+ bld->emitted_vertices_vec = uint_bld->zero;
+ bld->total_emitted_vertices_vec = uint_bld->zero;
+ }
}
static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
@@ -2361,16 +2502,14 @@ static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
}
/* If we have indirect addressing in outputs we need to copy our alloca array
- * to the outputs slots specified by the called */
- if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
- unsigned index, chan;
- assert(bld_base->info->num_outputs <=
- bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1);
- for (index = 0; index < bld_base->info->num_outputs; ++index) {
- for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
- bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
- }
- }
+ * to the outputs slots specified by the caller */
+ if (bld->gs_iface) {
+ bld->gs_iface->gs_epilogue(&bld->bld_base,
+ bld->total_emitted_vertices_vec,
+ bld->emitted_prims_vec,
+ bld->gs_iface->user_data);
+ } else {
+ gather_outputs(bld);
}
}
@@ -2385,7 +2524,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS],
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
struct lp_build_sampler_soa *sampler,
- const struct tgsi_shader_info *info)
+ const struct tgsi_shader_info *info,
+ const struct lp_build_tgsi_gs_iface *gs_iface)
{
struct lp_build_tgsi_soa_context bld;
@@ -2463,6 +2603,15 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm,
bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
+ if (gs_iface) {
+ /* inputs are always indirect with gs */
+ bld.indirect_files |= (1 << TGSI_FILE_INPUT);
+ bld.gs_iface = gs_iface;
+ bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
+ bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
+ bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
+ }
+
lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base);
bld.system_values = *system_values;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index de51f39..ea41bd6 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -354,7 +354,7 @@ generate_fs(struct gallivm_state *gallivm,
lp_build_tgsi_soa(gallivm, tokens, type, &mask,
consts_ptr, &system_values,
interp->pos, interp->inputs,
- outputs, sampler, &shader->info.base);
+ outputs, sampler, &shader->info.base, NULL);
/* Alpha test */
if (key->alpha.enabled) {
@@ -607,7 +607,7 @@ generate_fs_loop(struct gallivm_state *gallivm,
lp_build_tgsi_soa(gallivm, tokens, type, &mask,
consts_ptr, &system_values,
interp->pos, interp->inputs,
- outputs, sampler, &shader->info.base);
+ outputs, sampler, &shader->info.base, NULL);
/* Alpha test */
if (key->alpha.enabled) {