diff options
-rw-r--r-- | src/gallium/auxiliary/draw/draw_context.c | 17 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_context.h | 5 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_gs.c | 300 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_gs.h | 35 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.c | 567 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_llvm.h | 161 | ||||
-rw-r--r-- | src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c | 75 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi.h | 27 | ||||
-rw-r--r-- | src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c | 173 | ||||
-rw-r--r-- | src/gallium/drivers/llvmpipe/lp_state_fs.c | 4 |
10 files changed, 1285 insertions, 79 deletions
diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 6b70ac8..d64b82b 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -46,7 +46,7 @@ #include "gallivm/lp_bld_limits.h" #include "draw_llvm.h" -static boolean +boolean draw_get_option_use_llvm(void) { static boolean first = TRUE; @@ -808,16 +808,15 @@ draw_set_mapped_texture(struct draw_context *draw, uint32_t img_stride[PIPE_MAX_TEXTURE_LEVELS], uint32_t mip_offsets[PIPE_MAX_TEXTURE_LEVELS]) { - if (shader_stage == PIPE_SHADER_VERTEX) { #ifdef HAVE_LLVM - if (draw->llvm) - draw_llvm_set_mapped_texture(draw, - sview_idx, - width, height, depth, first_level, - last_level, base_ptr, - row_stride, img_stride, mip_offsets); + if (draw->llvm) + draw_llvm_set_mapped_texture(draw, + shader_stage, + sview_idx, + width, height, depth, first_level, + last_level, base_ptr, + row_stride, img_stride, mip_offsets); #endif - } } /** diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 18c8595..369f6c8 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -282,4 +282,9 @@ draw_get_shader_param(unsigned shader, enum pipe_shader_cap param); int draw_get_shader_param_no_llvm(unsigned shader, enum pipe_shader_cap param); +#ifdef HAVE_LLVM +boolean +draw_get_option_use_llvm(void); +#endif + #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_gs.c b/src/gallium/auxiliary/draw/draw_gs.c index cde0756..c8ed95a 100644 --- a/src/gallium/auxiliary/draw/draw_gs.c +++ b/src/gallium/auxiliary/draw/draw_gs.c @@ -29,6 +29,9 @@ #include "draw_private.h" #include "draw_context.h" +#ifdef HAVE_LLVM +#include "draw_llvm.h" +#endif #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_exec.h" @@ -67,7 +70,7 @@ draw_gs_get_input_index(int semantic, int index, static INLINE boolean draw_gs_should_flush(struct draw_geometry_shader *shader) { - return (shader->fetched_prim_count == 4); + return (shader->fetched_prim_count == shader->vector_length); } /*#define DEBUG_OUTPUTS 1*/ @@ -182,7 +185,7 @@ static void tgsi_fetch_gs_input(struct draw_geometry_shader *shader, } static void tgsi_gs_prepare(struct draw_geometry_shader *shader, - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) { struct tgsi_exec_machine *machine = shader->machine; @@ -205,10 +208,148 @@ static unsigned tgsi_gs_run(struct draw_geometry_shader *shader, /* run interpreter */ tgsi_exec_machine_run(machine); - return + return machine->Temps[TGSI_EXEC_TEMP_PRIMITIVE_I].xyzw[TGSI_EXEC_TEMP_PRIMITIVE_C].u[0]; } +#ifdef HAVE_LLVM + +static void +llvm_fetch_gs_input(struct draw_geometry_shader *shader, + unsigned *indices, + unsigned num_vertices, + unsigned prim_idx) +{ + unsigned slot, vs_slot, i; + unsigned input_vertex_stride = shader->input_vertex_stride; + const float (*input_ptr)[4]; + float (*input_data)[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS] = &shader->gs_input->data; + + input_ptr = shader->input; + + for (i = 0; i < num_vertices; ++i) { + const float (*input)[4]; +#if DEBUG_INPUTS + debug_printf("%d) vertex index = %d (prim idx = %d)\n", + i, indices[i], prim_idx); +#endif + input = (const float (*)[4])( + (const char *)input_ptr + (indices[i] * input_vertex_stride)); + for (slot = 0, vs_slot = 0; slot < shader->info.num_inputs; ++slot) { + if (shader->info.input_semantic_name[slot] == TGSI_SEMANTIC_PRIMID) { + (*input_data)[i][slot][0][prim_idx] = (float)shader->in_prim_idx; + (*input_data)[i][slot][1][prim_idx] = (float)shader->in_prim_idx; + (*input_data)[i][slot][2][prim_idx] = (float)shader->in_prim_idx; + (*input_data)[i][slot][3][prim_idx] = (float)shader->in_prim_idx; + } else { + vs_slot = draw_gs_get_input_index( + shader->info.input_semantic_name[slot], + shader->info.input_semantic_index[slot], + shader->input_info); +#if DEBUG_INPUTS + debug_printf("\tSlot = %d, vs_slot = %d, idx = %d:\n", + slot, vs_slot, idx); +#endif +#if 0 + assert(!util_is_inf_or_nan(input[vs_slot][0])); + assert(!util_is_inf_or_nan(input[vs_slot][1])); + assert(!util_is_inf_or_nan(input[vs_slot][2])); + assert(!util_is_inf_or_nan(input[vs_slot][3])); +#endif + (*input_data)[i][slot][0][prim_idx] = input[vs_slot][0]; + (*input_data)[i][slot][1][prim_idx] = input[vs_slot][1]; + (*input_data)[i][slot][2][prim_idx] = input[vs_slot][2]; + (*input_data)[i][slot][3][prim_idx] = input[vs_slot][3]; +#if DEBUG_INPUTS + debug_printf("\t\t%f %f %f %f\n", + (*input_data)[i][slot][0][prim_idx], + (*input_data)[i][slot][1][prim_idx], + (*input_data)[i][slot][2][prim_idx], + (*input_data)[i][slot][3][prim_idx]); +#endif + ++vs_slot; + } + } + } +} + +static void +llvm_fetch_gs_outputs(struct draw_geometry_shader *shader, + unsigned num_primitives, + float (**p_output)[4]) +{ + int total_verts = 0; + int vertex_count = 0; + int total_prims = 0; + int max_prims_per_invocation = 0; + char *output_ptr = (char*)shader->gs_output; + int i, j, prim_idx; + + for (i = 0; i < shader->vector_length; ++i) { + int prims = shader->llvm_emitted_primitives[i]; + total_prims += prims; + max_prims_per_invocation = MAX2(max_prims_per_invocation, prims); + } + for (i = 0; i < shader->vector_length; ++i) { + total_verts += shader->llvm_emitted_vertices[i]; + } + + + output_ptr += shader->emitted_vertices * shader->vertex_size; + for (i = 0; i < shader->vector_length - 1; ++i) { + int current_verts = shader->llvm_emitted_vertices[i]; + + if (current_verts != shader->max_output_vertices) { + memcpy(output_ptr + (vertex_count + current_verts) * shader->vertex_size, + output_ptr + (vertex_count + shader->max_output_vertices) * shader->vertex_size, + shader->vertex_size * (total_verts - vertex_count - current_verts)); + } + vertex_count += current_verts; + } + + prim_idx = 0; + for (i = 0; i < shader->vector_length; ++i) { + int num_prims = shader->llvm_emitted_primitives[i]; + for (j = 0; j < num_prims; ++j) { + int prim_length = + shader->llvm_prim_lengths[j][i]; + shader->primitive_lengths[shader->emitted_primitives + prim_idx] = + prim_length; + ++prim_idx; + } + } + + shader->emitted_primitives += total_prims; + shader->emitted_vertices += total_verts; +} + +static void +llvm_gs_prepare(struct draw_geometry_shader *shader, + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS]) +{ +} + +static unsigned +llvm_gs_run(struct draw_geometry_shader *shader, + unsigned input_primitives) +{ + unsigned ret; + char *input = (char*)shader->gs_output; + + input += (shader->emitted_vertices * shader->vertex_size); + + ret = shader->current_variant->jit_func( + shader->jit_context, shader->gs_input->data, + (struct vertex_header*)input, + input_primitives, + shader->draw->instance_id); + + return ret; +} + +#endif + static void gs_flush(struct draw_geometry_shader *shader) { unsigned out_prim_count; @@ -219,13 +360,15 @@ static void gs_flush(struct draw_geometry_shader *shader) input_primitives <= 4); out_prim_count = shader->run(shader, input_primitives); + shader->fetch_outputs(shader, out_prim_count, + &shader->tmp_output); + #if 0 debug_printf("PRIM emitted prims = %d (verts=%d), cur prim count = %d\n", shader->emitted_primitives, shader->emitted_vertices, out_prim_count); #endif - shader->fetch_outputs(shader, out_prim_count, - &shader->tmp_output); + shader->fetched_prim_count = 0; } @@ -331,8 +474,8 @@ static void gs_tri_adj(struct draw_geometry_shader *shader, * Execute geometry shader. */ int draw_geometry_shader_run(struct draw_geometry_shader *shader, - const void *constants[PIPE_MAX_CONSTANT_BUFFERS], - const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], + const void *constants[PIPE_MAX_CONSTANT_BUFFERS], + const unsigned constants_size[PIPE_MAX_CONSTANT_BUFFERS], const struct draw_vertex_info *input_verts, const struct draw_prim_info *input_prim, const struct tgsi_shader_info *input_info, @@ -344,14 +487,20 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, unsigned num_outputs = shader->info.num_outputs; unsigned vertex_size = sizeof(struct vertex_header) + num_outputs * 4 * sizeof(float); unsigned num_input_verts = input_prim->linear ? - input_verts->count : - input_prim->count; + input_verts->count : + input_prim->count; unsigned num_in_primitives = - MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts), - u_gs_prims_for_vertices(shader->input_primitive, num_input_verts)); + align( + MAX2(u_gs_prims_for_vertices(input_prim->prim, num_input_verts), + u_gs_prims_for_vertices(shader->input_primitive, num_input_verts)), + shader->vector_length); unsigned max_out_prims = u_gs_prims_for_vertices(shader->output_primitive, shader->max_output_vertices) - * num_in_primitives; + * num_in_primitives; + + //Assume at least one primitive + max_out_prims = MAX2(max_out_prims, 1); + output_verts->vertex_size = vertex_size; output_verts->stride = output_verts->vertex_size; @@ -385,6 +534,34 @@ int draw_geometry_shader_run(struct draw_geometry_shader *shader, FREE(shader->primitive_lengths); shader->primitive_lengths = MALLOC(max_out_prims * sizeof(unsigned)); + +#ifdef HAVE_LLVM + if (draw_get_option_use_llvm()) { + shader->gs_output = output_verts->verts; + if (max_out_prims > shader->max_out_prims) { + unsigned i; + if (shader->llvm_prim_lengths) { + for (i = 0; i < shader->max_out_prims; ++i) { + align_free(shader->llvm_prim_lengths[i]); + } + FREE(shader->llvm_prim_lengths); + } + + shader->llvm_prim_lengths = MALLOC(max_out_prims * sizeof(unsigned*)); + for (i = 0; i < max_out_prims; ++i) { + int vector_size = shader->vector_length * sizeof(unsigned); + shader->llvm_prim_lengths[i] = + align_malloc(vector_size, vector_size); + } + + shader->max_out_prims = max_out_prims; + } + shader->jit_context->prim_lengths = shader->llvm_prim_lengths; + shader->jit_context->emitted_vertices = shader->llvm_emitted_vertices; + shader->jit_context->emitted_prims = shader->llvm_emitted_primitives; + } +#endif + shader->prepare(shader, constants, constants_size); if (input_prim->linear) @@ -464,10 +641,27 @@ struct draw_geometry_shader * draw_create_geometry_shader(struct draw_context *draw, const struct pipe_shader_state *state) { +#ifdef HAVE_LLVM + struct llvm_geometry_shader *llvm_gs; +#endif struct draw_geometry_shader *gs; unsigned i; - gs = CALLOC_STRUCT(draw_geometry_shader); +#ifdef HAVE_LLVM + if (draw_get_option_use_llvm()) { + llvm_gs = CALLOC_STRUCT(llvm_geometry_shader); + + if (llvm_gs == NULL) + return NULL; + + gs = &llvm_gs->base; + + make_empty_list(&llvm_gs->variants); + } else +#endif + { + gs = CALLOC_STRUCT(draw_geometry_shader); + } if (!gs) return NULL; @@ -486,6 +680,17 @@ draw_create_geometry_shader(struct draw_context *draw, gs->input_primitive = PIPE_PRIM_TRIANGLES; gs->output_primitive = PIPE_PRIM_TRIANGLE_STRIP; gs->max_output_vertices = 32; + gs->max_out_prims = 0; + + if (draw_get_option_use_llvm()) { + /* TODO: change the input array to handle the following + vector length, instead of the currently hardcoded + TGSI_NUM_CHANNELS + gs->vector_length = lp_native_vector_width / 32;*/ + gs->vector_length = TGSI_NUM_CHANNELS; + } else { + gs->vector_length = TGSI_NUM_CHANNELS; + } for (i = 0; i < gs->info.num_properties; ++i) { if (gs->info.properties[i].name == @@ -507,10 +712,36 @@ draw_create_geometry_shader(struct draw_context *draw, gs->machine = draw->gs.tgsi.machine; - gs->fetch_outputs = tgsi_fetch_gs_outputs; - gs->fetch_inputs = tgsi_fetch_gs_input; - gs->prepare = tgsi_gs_prepare; - gs->run = tgsi_gs_run; +#ifdef HAVE_LLVM + if (draw_get_option_use_llvm()) { + int vector_size = gs->vector_length * sizeof(float); + gs->gs_input = align_malloc(sizeof(struct draw_gs_inputs), 16); + memset(gs->gs_input, 0, sizeof(struct draw_gs_inputs)); + gs->llvm_prim_lengths = 0; + + gs->llvm_emitted_primitives = align_malloc(vector_size, vector_size); + gs->llvm_emitted_vertices = align_malloc(vector_size, vector_size); + + gs->fetch_outputs = llvm_fetch_gs_outputs; + gs->fetch_inputs = llvm_fetch_gs_input; + gs->prepare = llvm_gs_prepare; + gs->run = llvm_gs_run; + + gs->jit_context = &draw->llvm->gs_jit_context; + + + llvm_gs->variant_key_size = + draw_gs_llvm_variant_key_size( + MAX2(gs->info.file_max[TGSI_FILE_SAMPLER]+1, + gs->info.file_max[TGSI_FILE_SAMPLER_VIEW]+1)); + } else +#endif + { + gs->fetch_outputs = tgsi_fetch_gs_outputs; + gs->fetch_inputs = tgsi_fetch_gs_input; + gs->prepare = tgsi_gs_prepare; + gs->run = tgsi_gs_run; + } return gs; } @@ -535,7 +766,42 @@ void draw_bind_geometry_shader(struct draw_context *draw, void draw_delete_geometry_shader(struct draw_context *draw, struct draw_geometry_shader *dgs) { +#ifdef HAVE_LLVM + if (draw_get_option_use_llvm()) { + struct llvm_geometry_shader *shader = llvm_geometry_shader(dgs); + struct draw_gs_llvm_variant_list_item *li; + + li = first_elem(&shader->variants); + while(!at_end(&shader->variants, li)) { + struct draw_gs_llvm_variant_list_item *next = next_elem(li); + draw_gs_llvm_destroy_variant(li->base); + li = next; + } + + assert(shader->variants_cached == 0); + + if (dgs->llvm_prim_lengths) { + unsigned i; + for (i = 0; i < dgs->max_out_prims; ++i) { + align_free(dgs->llvm_prim_lengths[i]); + } + FREE(dgs->llvm_prim_lengths); + } + align_free(dgs->llvm_emitted_primitives); + align_free(dgs->llvm_emitted_vertices); + + align_free(dgs->gs_input); + } +#endif + FREE(dgs->primitive_lengths); FREE((void*) dgs->state.tokens); FREE(dgs); } + + +void draw_gs_set_current_variant(struct draw_geometry_shader *shader, + struct draw_gs_llvm_variant *variant) +{ + shader->current_variant = variant; +} diff --git a/src/gallium/auxiliary/draw/draw_gs.h b/src/gallium/auxiliary/draw/draw_gs.h index 7ab4f04..e62b34a 100644 --- a/src/gallium/auxiliary/draw/draw_gs.h +++ b/src/gallium/auxiliary/draw/draw_gs.h @@ -31,11 +31,28 @@ #include "draw_context.h" #include "draw_private.h" - #define MAX_TGSI_PRIMITIVES 4 struct draw_context; +#ifdef HAVE_LLVM +struct draw_gs_jit_context; +struct draw_gs_llvm_variant; + +/** + * Structure holding the inputs to the geometry shader. It uses SOA layout. + * The dimensions are as follows: + * - maximum number of vertices for a geometry shader input primitive + * (6 for triangle_adjacency) + * - maximum number of attributes for each vertex + * - four channels per each attribute (x,y,z,w) + * - number of input primitives equal to the SOA vector length + */ +struct draw_gs_inputs { + float data[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS]; +}; +#endif + /** * Private version of the compiled geometry shader */ @@ -66,6 +83,19 @@ struct draw_geometry_shader { unsigned fetched_prim_count; const float (*input)[4]; const struct tgsi_shader_info *input_info; + unsigned vector_length; + unsigned max_out_prims; + +#ifdef HAVE_LLVM + struct draw_gs_inputs *gs_input; + struct draw_gs_jit_context *jit_context; + struct draw_gs_llvm_variant *current_variant; + struct vertex_header *gs_output; + + int **llvm_prim_lengths; + int *llvm_emitted_primitives; + int *llvm_emitted_vertices; +#endif void (*fetch_inputs)(struct draw_geometry_shader *shader, unsigned *indices, @@ -102,4 +132,7 @@ void draw_geometry_shader_prepare(struct draw_geometry_shader *shader, int draw_gs_max_output_vertices(struct draw_geometry_shader *shader, unsigned pipe_prim); +void draw_gs_set_current_variant(struct draw_geometry_shader *shader, + struct draw_gs_llvm_variant *variant); + #endif diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c index 602839d..e46195b 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.c +++ b/src/gallium/auxiliary/draw/draw_llvm.c @@ -29,6 +29,7 @@ #include "draw_context.h" #include "draw_vs.h" +#include "draw_gs.h" #include "gallivm/lp_bld_arit.h" #include "gallivm/lp_bld_logic.h" @@ -229,6 +230,85 @@ create_jit_context_type(struct gallivm_state *gallivm, /** + * Create LLVM type for struct draw_gs_jit_context + */ +static LLVMTypeRef +create_gs_jit_context_type(struct gallivm_state *gallivm, + unsigned vector_length, + LLVMTypeRef texture_type, LLVMTypeRef sampler_type, + const char *struct_name) +{ + LLVMTargetDataRef target = gallivm->target; + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); + LLVMTypeRef int_type = LLVMInt32TypeInContext(gallivm->context); + LLVMTypeRef elem_types[8]; + LLVMTypeRef context_type; + + elem_types[0] = LLVMArrayType(LLVMPointerType(float_type, 0), /* constants */ + LP_MAX_TGSI_CONST_BUFFERS); + elem_types[1] = LLVMPointerType(LLVMArrayType(LLVMArrayType(float_type, 4), + DRAW_TOTAL_CLIP_PLANES), 0); + elem_types[2] = LLVMPointerType(float_type, 0); /* viewport */ + + elem_types[3] = LLVMPointerType(LLVMPointerType(int_type, 0), 0); + elem_types[4] = LLVMPointerType(LLVMVectorType(int_type, + vector_length), 0); + elem_types[5] = LLVMPointerType(LLVMVectorType(int_type, + vector_length), 0); + + elem_types[6] = LLVMArrayType(texture_type, + PIPE_MAX_SHADER_SAMPLER_VIEWS); /* textures */ + elem_types[7] = LLVMArrayType(sampler_type, + PIPE_MAX_SAMPLERS); /* samplers */ + + context_type = LLVMStructTypeInContext(gallivm->context, elem_types, + Elements(elem_types), 0); +#if HAVE_LLVM < 0x0300 + LLVMAddTypeName(gallivm->module, struct_name, context_type); + + LLVMInvalidateStructLayout(gallivm->target, context_type); +#endif + + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, constants, + target, context_type, 0); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, planes, + target, context_type, 1); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, viewport, + target, context_type, 2); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, prim_lengths, + target, context_type, 3); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_vertices, + target, context_type, 4); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, emitted_prims, + target, context_type, 5); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, textures, + target, context_type, + DRAW_GS_JIT_CTX_TEXTURES); + LP_CHECK_MEMBER_OFFSET(struct draw_gs_jit_context, samplers, + target, context_type, + DRAW_GS_JIT_CTX_SAMPLERS); + LP_CHECK_STRUCT_SIZE(struct draw_gs_jit_context, + target, context_type); + + return context_type; +} + + +static LLVMTypeRef +create_gs_jit_input_type(struct gallivm_state *gallivm) +{ + LLVMTypeRef float_type = LLVMFloatTypeInContext(gallivm->context); + LLVMTypeRef input_array; + + input_array = LLVMVectorType(float_type, TGSI_NUM_CHANNELS); /* num primitives */ + input_array = LLVMArrayType(input_array, TGSI_NUM_CHANNELS); /* num channels */ + input_array = LLVMArrayType(input_array, PIPE_MAX_SHADER_INPUTS); /* num attrs per vertex */ + input_array = LLVMPointerType(input_array, 0); /* num vertices per prim */ + + return input_array; +} + +/** * Create LLVM type for struct pipe_vertex_buffer */ static LLVMTypeRef @@ -397,6 +477,9 @@ draw_llvm_create(struct draw_context *draw) llvm->nr_variants = 0; make_empty_list(&llvm->vs_variants_list); + llvm->nr_gs_variants = 0; + make_empty_list(&llvm->gs_variants_list); + return llvm; } @@ -498,7 +581,8 @@ generate_vs(struct draw_llvm_variant *variant, inputs, outputs, sampler, - &llvm->draw->vs.vertex_shader->info); + &llvm->draw->vs.vertex_shader->info, + NULL); { LLVMValueRef out; @@ -695,6 +779,7 @@ static void store_aos_array(struct gallivm_state *gallivm, struct lp_type soa_type, LLVMValueRef io_ptr, + LLVMValueRef *indices, LLVMValueRef* aos, int attrib, int num_outputs, @@ -707,11 +792,15 @@ store_aos_array(struct gallivm_state *gallivm, LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32]; int vector_length = soa_type.length; int i; - + debug_assert(TGSI_NUM_CHANNELS == 4); for (i = 0; i < vector_length; i++) { - inds[i] = lp_build_const_int32(gallivm, i); + if (indices) { + inds[i] = indices[i]; + } else { + inds[i] = lp_build_const_int32(gallivm, i); + } io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, ""); } @@ -753,6 +842,7 @@ store_aos_array(struct gallivm_state *gallivm, static void convert_to_aos(struct gallivm_state *gallivm, LLVMValueRef io, + LLVMValueRef *indices, LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], LLVMValueRef clipmask, int num_outputs, @@ -803,7 +893,7 @@ convert_to_aos(struct gallivm_state *gallivm, store_aos_array(gallivm, soa_type, - io, + io, indices, aos, attrib, num_outputs, @@ -821,14 +911,14 @@ convert_to_aos(struct gallivm_state *gallivm, static void store_clip(struct gallivm_state *gallivm, const struct lp_type vs_type, - LLVMValueRef io_ptr, + LLVMValueRef io_ptr, LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], boolean pre_clip_pos, int idx) { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef soa[4]; LLVMValueRef aos[LP_MAX_VECTOR_LENGTH]; - LLVMValueRef indices[2]; + LLVMValueRef indices[2]; LLVMValueRef io_ptrs[LP_MAX_VECTOR_WIDTH / 32]; LLVMValueRef inds[LP_MAX_VECTOR_WIDTH / 32]; LLVMValueRef clip_ptrs[LP_MAX_VECTOR_WIDTH / 32]; @@ -836,7 +926,7 @@ store_clip(struct gallivm_state *gallivm, indices[0] = indices[1] = lp_build_const_int32(gallivm, 0); - + for (i = 0; i < vs_type.length; i++) { inds[i] = lp_build_const_int32(gallivm, i); io_ptrs[i] = LLVMBuildGEP(builder, io_ptr, &inds[i], 1, ""); @@ -893,13 +983,13 @@ generate_viewport(struct draw_llvm_variant *variant, struct lp_type f32_type = vs_type; LLVMTypeRef vs_type_llvm = lp_build_vec_type(gallivm, vs_type); LLVMValueRef out3 = LLVMBuildLoad(builder, outputs[0][3], ""); /*w0 w1 .. wn*/ - LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ + LLVMValueRef const1 = lp_build_const_vec(gallivm, f32_type, 1.0); /*1.0 1.0 1.0 1.0*/ LLVMValueRef vp_ptr = draw_jit_context_viewport(gallivm, context_ptr); /* for 1/w convention*/ out3 = LLVMBuildFDiv(builder, const1, out3, ""); LLVMBuildStore(builder, out3, outputs[0][3]); - + /* Viewport Mapping */ for (i=0; i<3; i++) { LLVMValueRef out = LLVMBuildLoad(builder, outputs[0][i], ""); /*x0 x1 .. xn*/ @@ -908,7 +998,7 @@ generate_viewport(struct draw_llvm_variant *variant, LLVMValueRef scale_i; LLVMValueRef trans_i; LLVMValueRef index; - + index = lp_build_const_int32(gallivm, i); scale_i = LLVMBuildGEP(builder, vp_ptr, &index, 1, ""); @@ -930,14 +1020,14 @@ generate_viewport(struct draw_llvm_variant *variant, /* store transformed outputs */ LLVMBuildStore(builder, out, outputs[0][i]); } - + } /** * Returns clipmask as nxi32 bitmask for the n vertices */ -static LLVMValueRef +static LLVMValueRef generate_clipmask(struct draw_llvm *llvm, struct gallivm_state *gallivm, struct lp_type vs_type, @@ -952,7 +1042,7 @@ generate_clipmask(struct draw_llvm *llvm, { LLVMBuilderRef builder = gallivm->builder; LLVMValueRef mask; /* stores the <nxi32> clipmasks */ - LLVMValueRef test, temp; + LLVMValueRef test, temp; LLVMValueRef zero, shift; LLVMValueRef pos_x, pos_y, pos_z, pos_w; LLVMValueRef cv_x, cv_y, cv_z, cv_w; @@ -967,7 +1057,7 @@ generate_clipmask(struct draw_llvm *llvm, cd[0] = draw_current_shader_clipdistance_output(llvm->draw, 0); cd[1] = draw_current_shader_clipdistance_output(llvm->draw, 1); - + if (cd[0] != pos || cd[1] != pos) have_cd = true; @@ -1002,27 +1092,27 @@ generate_clipmask(struct draw_llvm *llvm, /* plane 1 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w); temp = shift; - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = test; - + /* plane 2 */ test = LLVMBuildFAdd(builder, pos_x, pos_w, ""); test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); temp = LLVMBuildShl(builder, temp, shift, ""); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); - + /* plane 3 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_y, pos_w); temp = LLVMBuildShl(builder, temp, shift, ""); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); /* plane 4 */ test = LLVMBuildFAdd(builder, pos_y, pos_w, ""); test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); temp = LLVMBuildShl(builder, temp, shift, ""); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } @@ -1031,22 +1121,22 @@ generate_clipmask(struct draw_llvm *llvm, if (clip_halfz) { /* plane 5 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); - } + } else { /* plane 5 */ test = LLVMBuildFAdd(builder, pos_z, pos_w, ""); test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, test); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); } /* plane 6 */ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_z, pos_w); temp = LLVMBuildShl(builder, temp, shift, ""); - test = LLVMBuildAnd(builder, test, temp, ""); + test = LLVMBuildAnd(builder, test, temp, ""); mask = LLVMBuildOr(builder, mask, test, ""); - } + } if (clip_user) { LLVMValueRef planes_ptr = draw_jit_context_planes(gallivm, context_ptr); @@ -1118,7 +1208,7 @@ generate_clipmask(struct draw_llvm *llvm, /** * Returns boolean if any clipping has occurred - * Used zero/non-zero i32 value to represent boolean + * Used zero/non-zero i32 value to represent boolean */ static LLVMValueRef clipmask_booli32(struct gallivm_state *gallivm, @@ -1144,6 +1234,94 @@ clipmask_booli32(struct gallivm_state *gallivm, return ret; } +static void +draw_gs_llvm_emit_vertex(struct lp_build_tgsi_context * bld_base, + LLVMValueRef (*outputs)[4], + LLVMValueRef emitted_vertices_vec, + void *user_data) +{ + struct draw_gs_llvm_variant *variant = + (struct draw_gs_llvm_variant *)user_data; + struct gallivm_state *gallivm = variant->gallivm; + LLVMBuilderRef builder = gallivm->builder; + struct lp_type gs_type = bld_base->base.type; + LLVMValueRef clipmask = lp_build_const_int_vec(gallivm, + lp_int_type(gs_type), 0); + LLVMValueRef indices[LP_MAX_VECTOR_LENGTH]; + LLVMValueRef max_output_vertices = + lp_build_const_int32(gallivm, variant->shader->base.max_output_vertices); + LLVMValueRef io = variant->io_ptr; + unsigned i; + const struct tgsi_shader_info *gs_info = &variant->shader->base.info; + + for (i = 0; i < gs_type.length; ++i) { + LLVMValueRef ind = lp_build_const_int32(gallivm, i); + LLVMValueRef currently_emitted = + LLVMBuildExtractElement(builder, emitted_vertices_vec, ind, ""); + indices[i] = LLVMBuildMul(builder, ind, max_output_vertices, ""); + indices[i] = LLVMBuildAdd(builder, indices[i], currently_emitted, ""); + } + + convert_to_aos(gallivm, io, indices, + outputs, clipmask, + gs_info->num_outputs, gs_type, + FALSE); +} + +static void +draw_gs_llvm_end_primitive(struct lp_build_tgsi_context * bld_base, + LLVMValueRef verts_per_prim_vec, + LLVMValueRef emitted_prims_vec, + void *user_data) +{ + struct draw_gs_llvm_variant *variant = + (struct draw_gs_llvm_variant *)user_data; + struct gallivm_state *gallivm = variant->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef prim_lengts_ptr = + draw_gs_jit_prim_lengths(variant->gallivm, variant->context_ptr); + unsigned i; + + for (i = 0; i < bld_base->base.type.length; ++i) { + LLVMValueRef ind = lp_build_const_int32(gallivm, i); + LLVMValueRef prims_emitted = + LLVMBuildExtractElement(builder, emitted_prims_vec, ind, ""); + LLVMValueRef store_ptr; + LLVMValueRef num_vertices = + LLVMBuildExtractElement(builder, verts_per_prim_vec, ind, ""); + + /*lp_build_printf(gallivm, "XXXX emitting vertices, %d\n\n", + num_vertices);*/ + + store_ptr = LLVMBuildGEP(builder, prim_lengts_ptr, &prims_emitted, 1, ""); + store_ptr = LLVMBuildLoad(builder, store_ptr, ""); + store_ptr = LLVMBuildGEP(builder, store_ptr, &ind, 1, ""); + LLVMBuildStore(builder, num_vertices, store_ptr); + } +} + +static void +draw_gs_llvm_epilogue(struct lp_build_tgsi_context * bld_base, + LLVMValueRef total_emitted_vertices_vec, + LLVMValueRef emitted_prims_vec, + void *user_data) +{ + struct draw_gs_llvm_variant *variant = + (struct draw_gs_llvm_variant *)user_data; + struct gallivm_state *gallivm = variant->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef emitted_verts_ptr = + draw_gs_jit_emitted_vertices(gallivm, variant->context_ptr); + LLVMValueRef emitted_prims_ptr = + draw_gs_jit_emitted_prims(gallivm, variant->context_ptr); + LLVMValueRef zero = lp_build_const_int32(gallivm, 0); + + emitted_verts_ptr = LLVMBuildGEP(builder, emitted_verts_ptr, &zero, 0, ""); + emitted_prims_ptr = LLVMBuildGEP(builder, emitted_prims_ptr, &zero, 0, ""); + + LLVMBuildStore(builder, total_emitted_vertices_vec, emitted_verts_ptr); + LLVMBuildStore(builder, emitted_prims_vec, emitted_prims_ptr); +} static void draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, @@ -1323,7 +1501,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, &true_index, 1, ""); true_index = LLVMBuildLoad(builder, fetch_ptr, "fetch_elt"); } - + system_values.vertex_id = LLVMBuildInsertElement(gallivm->builder, system_values.vertex_id, true_index, lp_build_const_int32(gallivm, i), ""); @@ -1387,11 +1565,11 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant, clipmask = lp_build_const_int_vec(gallivm, lp_int_type(vs_type), 0); } - /* store clipmask in vertex header, - * original positions in clip - * and transformed positions in data - */ - convert_to_aos(gallivm, io, outputs, clipmask, + /* store clipmask in vertex header, + * original positions in clip + * and transformed positions in data + */ + convert_to_aos(gallivm, io, NULL, outputs, clipmask, vs_info->num_outputs, vs_type, have_clipdist); } @@ -1437,8 +1615,8 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store) key->clip_halfz = !llvm->draw->rasterizer->gl_rasterization_rules; key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE); key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable; + key->has_gs = llvm->draw->gs.geometry_shader != NULL; key->pad1 = 0; - key->pad2 = 0; /* All variants of this shader will have the same value for * nr_samplers. Not yet trying to compact away holes in the @@ -1487,6 +1665,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key) debug_printf("bypass_viewport = %u\n", key->bypass_viewport); debug_printf("clip_halfz = %u\n", key->clip_halfz); debug_printf("need_edgeflags = %u\n", key->need_edgeflags); + debug_printf("has_gs = %u\n", key->has_gs); debug_printf("ucp_enable = %u\n", key->ucp_enable); for (i = 0 ; i < key->nr_vertex_elements; i++) { @@ -1504,6 +1683,7 @@ draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key) void draw_llvm_set_mapped_texture(struct draw_context *draw, + unsigned shader_stage, unsigned sview_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t first_level, uint32_t last_level, @@ -1515,9 +1695,18 @@ draw_llvm_set_mapped_texture(struct draw_context *draw, unsigned j; struct draw_jit_texture *jit_tex; - assert(sview_idx < Elements(draw->llvm->jit_context.textures)); + assert(shader_stage == PIPE_SHADER_VERTEX || + shader_stage == PIPE_SHADER_GEOMETRY); + + if (shader_stage == PIPE_SHADER_VERTEX) { + assert(sview_idx < Elements(draw->llvm->jit_context.textures)); - jit_tex = &draw->llvm->jit_context.textures[sview_idx]; + jit_tex = &draw->llvm->jit_context.textures[sview_idx]; + } else if (shader_stage == PIPE_SHADER_GEOMETRY) { + assert(sview_idx < Elements(draw->llvm->gs_jit_context.textures)); + + jit_tex = &draw->llvm->gs_jit_context.textures[sview_idx]; + } jit_tex->width = width; jit_tex->height = height; @@ -1551,6 +1740,19 @@ draw_llvm_set_sampler_state(struct draw_context *draw) COPY_4V(jit_sam->border_color, s->border_color.f); } } + + for (i = 0; i < draw->num_samplers[PIPE_SHADER_GEOMETRY]; i++) { + struct draw_jit_sampler *jit_sam = &draw->llvm->gs_jit_context.samplers[i]; + + if (draw->samplers[i]) { + const struct pipe_sampler_state *s + = draw->samplers[PIPE_SHADER_GEOMETRY][i]; + jit_sam->min_lod = s->min_lod; + jit_sam->max_lod = s->max_lod; + jit_sam->lod_bias = s->lod_bias; + COPY_4V(jit_sam->border_color, s->border_color.f); + } + } } @@ -1577,3 +1779,298 @@ draw_llvm_destroy_variant(struct draw_llvm_variant *variant) llvm->nr_variants--; FREE(variant); } + + +/** + * Create LLVM types for various structures. + */ +static void +create_gs_jit_types(struct draw_gs_llvm_variant *var) +{ + struct gallivm_state *gallivm = var->gallivm; + LLVMTypeRef texture_type, sampler_type, context_type; + + texture_type = create_jit_texture_type(gallivm, "texture"); + sampler_type = create_jit_sampler_type(gallivm, "sampler"); + + context_type = create_gs_jit_context_type(gallivm, + var->shader->base.vector_length, + texture_type, sampler_type, + "draw_gs_jit_context"); + var->context_ptr_type = LLVMPointerType(context_type, 0); + + var->input_array_type = create_gs_jit_input_type(gallivm); +} + +static LLVMTypeRef +get_gs_context_ptr_type(struct draw_gs_llvm_variant *variant) +{ + if (!variant->context_ptr_type) + create_gs_jit_types(variant); + return variant->context_ptr_type; +} + +static LLVMValueRef +generate_mask_value(struct draw_gs_llvm_variant *variant, + struct lp_type gs_type) +{ + struct gallivm_state *gallivm = variant->gallivm; + LLVMBuilderRef builder = gallivm->builder; + LLVMValueRef bits[16]; + struct lp_type mask_type = lp_int_type(gs_type); + struct lp_type mask_elem_type = lp_elem_type(mask_type); + LLVMValueRef mask_val = lp_build_const_vec(gallivm, + mask_type, + 0); + unsigned i; + + assert(gs_type.length <= Elements(bits)); + + for (i = gs_type.length; i >= 1; --i) { + int idx = i - 1; + LLVMValueRef ind = lp_build_const_int32(gallivm, i); + bits[idx] = lp_build_compare(gallivm, + mask_elem_type, PIPE_FUNC_GEQUAL, + variant->num_prims, ind); + } + for (i = 0; i < gs_type.length; ++i) { + LLVMValueRef ind = lp_build_const_int32(gallivm, i); + mask_val = LLVMBuildInsertElement(builder, mask_val, bits[i], ind, ""); + } + mask_val = lp_build_compare(gallivm, + mask_type, PIPE_FUNC_NOTEQUAL, + mask_val, + lp_build_const_int_vec(gallivm, mask_type, 0)); + + return mask_val; +} + +static void +draw_gs_llvm_generate(struct draw_llvm *llvm, + struct draw_gs_llvm_variant *variant) +{ + struct gallivm_state *gallivm = variant->gallivm; + LLVMContextRef context = gallivm->context; + LLVMTypeRef int32_type = LLVMInt32TypeInContext(context); + LLVMTypeRef arg_types[5]; + LLVMTypeRef func_type; + LLVMValueRef variant_func; + LLVMValueRef context_ptr; + LLVMBasicBlockRef block; + LLVMBuilderRef builder; + LLVMValueRef io_ptr, input_array, num_prims, mask_val; + struct lp_build_sampler_soa *sampler = 0; + struct lp_build_context bld; + struct lp_bld_tgsi_system_values system_values; + struct lp_type gs_type; + unsigned i; + struct lp_build_tgsi_gs_iface gs_iface; + const struct tgsi_token *tokens = variant->shader->base.state.tokens; + LLVMValueRef consts_ptr; + LLVMValueRef outputs[PIPE_MAX_SHADER_OUTPUTS][TGSI_NUM_CHANNELS]; + struct lp_build_mask_context mask; + + memset(&system_values, 0, sizeof(system_values)); + + assert(variant->vertex_header_ptr_type); + + arg_types[0] = get_gs_context_ptr_type(variant); /* context */ + arg_types[1] = variant->input_array_type; /* input */ + arg_types[2] = variant->vertex_header_ptr_type; /* vertex_header */ + arg_types[3] = int32_type; /* num_prims */ + arg_types[4] = int32_type; /* instance_id */ + + func_type = LLVMFunctionType(int32_type, arg_types, Elements(arg_types), 0); + + variant_func = LLVMAddFunction(gallivm->module, "draw_geometry_shader", + func_type); + variant->function = variant_func; + + LLVMSetFunctionCallConv(variant_func, LLVMCCallConv); + + for (i = 0; i < Elements(arg_types); ++i) + if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) + LLVMAddAttribute(LLVMGetParam(variant_func, i), + LLVMNoAliasAttribute); + + context_ptr = LLVMGetParam(variant_func, 0); + input_array = LLVMGetParam(variant_func, 1); + io_ptr = LLVMGetParam(variant_func, 2); + num_prims = LLVMGetParam(variant_func, 3); + system_values.instance_id = LLVMGetParam(variant_func, 4); + + lp_build_name(context_ptr, "context"); + lp_build_name(input_array, "input"); + lp_build_name(io_ptr, "io"); + lp_build_name(io_ptr, "num_prims"); + lp_build_name(system_values.instance_id, "instance_id"); + + variant->context_ptr = context_ptr; + variant->io_ptr = io_ptr; + variant->num_prims = num_prims; + + gs_iface.input = input_array; + gs_iface.emit_vertex = draw_gs_llvm_emit_vertex; + gs_iface.end_primitive = draw_gs_llvm_end_primitive; + gs_iface.gs_epilogue = draw_gs_llvm_epilogue; + gs_iface.user_data = variant; + + /* + * Function body + */ + + block = LLVMAppendBasicBlockInContext(gallivm->context, variant_func, "entry"); + builder = gallivm->builder; + LLVMPositionBuilderAtEnd(builder, block); + + lp_build_context_init(&bld, gallivm, lp_type_int(32)); + + memset(&gs_type, 0, sizeof gs_type); + gs_type.floating = TRUE; /* floating point values */ + gs_type.sign = TRUE; /* values are signed */ + gs_type.norm = FALSE; /* values are not limited to [0,1] or [-1,1] */ + gs_type.width = 32; /* 32-bit float */ + gs_type.length = variant->shader->base.vector_length; + + consts_ptr = draw_gs_jit_context_constants(variant->gallivm, context_ptr); + + /* code generated texture sampling */ + sampler = draw_llvm_sampler_soa_create(variant->key.samplers, + context_ptr); + + mask_val = generate_mask_value(variant, gs_type); + lp_build_mask_begin(&mask, gallivm, gs_type, mask_val); + + lp_build_tgsi_soa(variant->gallivm, + tokens, + gs_type, + &mask, + consts_ptr, + &system_values, + NULL /*pos*/, + NULL, + outputs, + sampler, + &llvm->draw->gs.geometry_shader->info, + &gs_iface); + + lp_build_mask_end(&mask); + + LLVMBuildRet(builder, lp_build_zero(gallivm, lp_type_uint(32))); + + gallivm_verify_function(gallivm, variant_func); +} + + +struct draw_gs_llvm_variant * +draw_gs_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_outputs, + const struct draw_gs_llvm_variant_key *key) +{ + struct draw_gs_llvm_variant *variant; + struct llvm_geometry_shader *shader = + llvm_geometry_shader(llvm->draw->gs.geometry_shader); + LLVMTypeRef vertex_header; + + variant = MALLOC(sizeof *variant + + shader->variant_key_size - + sizeof variant->key); + if (variant == NULL) + return NULL; + + variant->llvm = llvm; + variant->shader = shader; + + variant->gallivm = gallivm_create(); + + create_gs_jit_types(variant); + + memcpy(&variant->key, key, shader->variant_key_size); + + vertex_header = create_jit_vertex_header(variant->gallivm, num_outputs); + + variant->vertex_header_ptr_type = LLVMPointerType(vertex_header, 0); + + draw_gs_llvm_generate(llvm, variant); + + gallivm_compile_module(variant->gallivm); + + variant->jit_func = (draw_gs_jit_func) + gallivm_jit_function(variant->gallivm, variant->function); + + variant->list_item_global.base = variant; + variant->list_item_local.base = variant; + /*variant->no = */shader->variants_created++; + variant->list_item_global.base = variant; + + return variant; +} + +void +draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant) +{ + struct draw_llvm *llvm = variant->llvm; + + if (variant->function) { + gallivm_free_function(variant->gallivm, + variant->function, variant->jit_func); + } + + gallivm_destroy(variant->gallivm); + + remove_from_list(&variant->list_item_local); + variant->shader->variants_cached--; + remove_from_list(&variant->list_item_global); + llvm->nr_gs_variants--; + FREE(variant); +} + +struct draw_gs_llvm_variant_key * +draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store) +{ + unsigned i; + struct draw_gs_llvm_variant_key *key; + struct draw_sampler_static_state *draw_sampler; + + key = (struct draw_gs_llvm_variant_key *)store; + + /* All variants of this shader will have the same value for + * nr_samplers. Not yet trying to compact away holes in the + * sampler array. + */ + key->nr_samplers = llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER] + 1; + if (llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] != -1) { + key->nr_sampler_views = + llvm->draw->gs.geometry_shader->info.file_max[TGSI_FILE_SAMPLER_VIEW] + 1; + } + else { + key->nr_sampler_views = key->nr_samplers; + } + + draw_sampler = key->samplers; + + memset(draw_sampler, 0, MAX2(key->nr_samplers, key->nr_sampler_views) * sizeof *draw_sampler); + + for (i = 0 ; i < key->nr_samplers; i++) { + lp_sampler_static_sampler_state(&draw_sampler[i].sampler_state, + llvm->draw->samplers[PIPE_SHADER_GEOMETRY][i]); + } + for (i = 0 ; i < key->nr_sampler_views; i++) { + lp_sampler_static_texture_state(&draw_sampler[i].texture_state, + llvm->draw->sampler_views[PIPE_SHADER_GEOMETRY][i]); + } + + return key; +} + +void +draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key) +{ + unsigned i; + struct draw_sampler_static_state *sampler = key->samplers; + + for (i = 0 ; i < key->nr_sampler_views; i++) { + debug_printf("sampler[%i].src_format = %s\n", i, + util_format_name(sampler[i].texture_state.format)); + } +} diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h index c03c69e..fc0d2bd 100644 --- a/src/gallium/auxiliary/draw/draw_llvm.h +++ b/src/gallium/auxiliary/draw/draw_llvm.h @@ -31,6 +31,8 @@ #include "draw/draw_private.h" #include "draw/draw_vs.h" +#include "draw/draw_gs.h" + #include "gallivm/lp_bld_sample.h" #include "gallivm/lp_bld_limits.h" @@ -40,6 +42,7 @@ struct draw_llvm; struct llvm_vertex_shader; +struct llvm_geometry_shader; struct draw_jit_texture { @@ -166,6 +169,61 @@ struct draw_jit_context lp_build_struct_get(_gallivm, _ptr, 1, "buffer_offset") +/** + * This structure is passed directly to the generated geometry shader. + * + * It contains the derived state. + * + * Changes here must be reflected in the draw_gs_jit_context_* macros. + * Changes to the ordering should be avoided. + * + * Only use types with a clear size and padding here, in particular prefer the + * stdint.h types to the basic integer types. + */ +struct draw_gs_jit_context +{ + const float *constants[LP_MAX_TGSI_CONST_BUFFERS]; + float (*planes) [DRAW_TOTAL_CLIP_PLANES][4]; + float *viewport; + + int **prim_lengths; + int *emitted_vertices; + int *emitted_prims; + + struct draw_jit_texture textures[PIPE_MAX_SHADER_SAMPLER_VIEWS]; + struct draw_jit_sampler samplers[PIPE_MAX_SAMPLERS]; +}; + + +#define draw_gs_jit_context_constants(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, 0, "constants") + +#define draw_gs_jit_context_planes(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 1, "planes") + +#define draw_gs_jit_context_viewport(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 2, "viewport") + +#define draw_gs_jit_prim_lengths(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 3, "prim_lengths") + +#define draw_gs_jit_emitted_vertices(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 4, "emitted_vertices") + +#define draw_gs_jit_emitted_prims(_gallivm, _ptr) \ + lp_build_struct_get(_gallivm, _ptr, 5, "emitted_prims") + +#define DRAW_GS_JIT_CTX_TEXTURES 6 +#define DRAW_GS_JIT_CTX_SAMPLERS 7 + +#define draw_gs_jit_context_textures(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_GS_JIT_CTX_TEXTURES, "textures") + +#define draw_gs_jit_context_samplers(_gallivm, _ptr) \ + lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_GS_JIT_CTX_SAMPLERS, "samplers") + + + typedef int (*draw_jit_vert_func)(struct draw_jit_context *context, struct vertex_header *io, @@ -187,6 +245,14 @@ typedef int struct pipe_vertex_buffer *vertex_buffers, unsigned instance_id); + +typedef int +(*draw_gs_jit_func)(struct draw_gs_jit_context *context, + float inputs[6][PIPE_MAX_SHADER_INPUTS][TGSI_NUM_CHANNELS][TGSI_NUM_CHANNELS], + struct vertex_header *output, + unsigned num_prims, + unsigned instance_id); + struct draw_llvm_variant_key { unsigned nr_vertex_elements:8; @@ -199,13 +265,13 @@ struct draw_llvm_variant_key unsigned clip_halfz:1; unsigned bypass_viewport:1; unsigned need_edgeflags:1; + unsigned has_gs:1; /* * it is important there are no holes in this struct * (and all padding gets zeroed). */ - unsigned pad1:1; unsigned ucp_enable:PIPE_MAX_CLIP_PLANES; - unsigned pad2:32-PIPE_MAX_CLIP_PLANES; + unsigned pad1:32-PIPE_MAX_CLIP_PLANES; /* Variable number of vertex elements: */ @@ -216,11 +282,23 @@ struct draw_llvm_variant_key /* struct draw_sampler_static_state sampler; */ }; +struct draw_gs_llvm_variant_key +{ + unsigned nr_samplers:8; + unsigned nr_sampler_views:8; + + struct draw_sampler_static_state samplers[1]; +}; + #define DRAW_LLVM_MAX_VARIANT_KEY_SIZE \ (sizeof(struct draw_llvm_variant_key) + \ PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state) + \ (PIPE_MAX_ATTRIBS-1) * sizeof(struct pipe_vertex_element)) +#define DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE \ + (sizeof(struct draw_gs_llvm_variant_key) + \ + PIPE_MAX_SHADER_SAMPLER_VIEWS * sizeof(struct draw_sampler_static_state)) + static INLINE size_t draw_llvm_variant_key_size(unsigned nr_vertex_elements, @@ -232,6 +310,14 @@ draw_llvm_variant_key_size(unsigned nr_vertex_elements, } +static INLINE size_t +draw_gs_llvm_variant_key_size(unsigned nr_samplers) +{ + return (sizeof(struct draw_gs_llvm_variant_key) + + (nr_samplers - 1) * sizeof(struct draw_sampler_static_state)); +} + + static INLINE struct draw_sampler_static_state * draw_llvm_variant_key_samplers(struct draw_llvm_variant_key *key) { @@ -246,6 +332,13 @@ struct draw_llvm_variant_list_item struct draw_llvm_variant_list_item *next, *prev; }; +struct draw_gs_llvm_variant_list_item +{ + struct draw_gs_llvm_variant *base; + struct draw_gs_llvm_variant_list_item *next, *prev; +}; + + struct draw_llvm_variant { struct gallivm_state *gallivm; @@ -271,6 +364,32 @@ struct draw_llvm_variant struct draw_llvm_variant_key key; }; + +struct draw_gs_llvm_variant +{ + struct gallivm_state *gallivm; + + /* LLVM JIT builder types */ + LLVMTypeRef context_ptr_type; + LLVMTypeRef vertex_header_ptr_type; + LLVMTypeRef input_array_type; + + LLVMValueRef context_ptr; + LLVMValueRef io_ptr; + LLVMValueRef num_prims; + LLVMValueRef function; + draw_gs_jit_func jit_func; + + struct llvm_geometry_shader *shader; + + struct draw_llvm *llvm; + struct draw_gs_llvm_variant_list_item list_item_global; + struct draw_gs_llvm_variant_list_item list_item_local; + + /* key is variable-sized, must be last */ + struct draw_gs_llvm_variant_key key; +}; + struct llvm_vertex_shader { struct draw_vertex_shader base; @@ -280,13 +399,27 @@ struct llvm_vertex_shader { unsigned variants_cached; }; +struct llvm_geometry_shader { + struct draw_geometry_shader base; + + unsigned variant_key_size; + struct draw_gs_llvm_variant_list_item variants; + unsigned variants_created; + unsigned variants_cached; +}; + + struct draw_llvm { struct draw_context *draw; struct draw_jit_context jit_context; + struct draw_gs_jit_context gs_jit_context; struct draw_llvm_variant_list_item vs_variants_list; int nr_variants; + + struct draw_gs_llvm_variant_list_item gs_variants_list; + int nr_gs_variants; }; @@ -296,6 +429,14 @@ llvm_vertex_shader(struct draw_vertex_shader *vs) return (struct llvm_vertex_shader *)vs; } +static INLINE struct llvm_geometry_shader * +llvm_geometry_shader(struct draw_geometry_shader *gs) +{ + return (struct llvm_geometry_shader *)gs; +} + + + struct draw_llvm * draw_llvm_create(struct draw_context *draw); @@ -317,6 +458,21 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store); void draw_llvm_dump_variant_key(struct draw_llvm_variant_key *key); + +struct draw_gs_llvm_variant * +draw_gs_llvm_create_variant(struct draw_llvm *llvm, + unsigned num_vertex_header_attribs, + const struct draw_gs_llvm_variant_key *key); + +void +draw_gs_llvm_destroy_variant(struct draw_gs_llvm_variant *variant); + +struct draw_gs_llvm_variant_key * +draw_gs_llvm_make_variant_key(struct draw_llvm *llvm, char *store); + +void +draw_gs_llvm_dump_variant_key(struct draw_gs_llvm_variant_key *key); + struct lp_build_sampler_soa * draw_llvm_sampler_soa_create(const struct draw_sampler_static_state *static_state, LLVMValueRef context_ptr); @@ -326,6 +482,7 @@ draw_llvm_set_sampler_state(struct draw_context *draw); void draw_llvm_set_mapped_texture(struct draw_context *draw, + unsigned shader_stage, unsigned sview_idx, uint32_t width, uint32_t height, uint32_t depth, uint32_t first_level, uint32_t last_level, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c index fab168c..ec0f758 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c @@ -57,6 +57,71 @@ struct llvm_middle_end { }; +static void +llvm_middle_end_prepare_gs(struct llvm_middle_end *fpme) +{ + struct draw_context *draw = fpme->draw; + struct draw_geometry_shader *gs = draw->gs.geometry_shader; + struct draw_gs_llvm_variant_key *key; + struct draw_gs_llvm_variant *variant = NULL; + struct draw_gs_llvm_variant_list_item *li; + struct llvm_geometry_shader *shader = llvm_geometry_shader(gs); + char store[DRAW_GS_LLVM_MAX_VARIANT_KEY_SIZE]; + unsigned i; + + key = draw_gs_llvm_make_variant_key(fpme->llvm, store); + + /* Search shader's list of variants for the key */ + li = first_elem(&shader->variants); + while (!at_end(&shader->variants, li)) { + if (memcmp(&li->base->key, key, shader->variant_key_size) == 0) { + variant = li->base; + break; + } + li = next_elem(li); + } + + if (variant) { + /* found the variant, move to head of global list (for LRU) */ + move_to_head(&fpme->llvm->gs_variants_list, + &variant->list_item_global); + } + else { + /* Need to create new variant */ + + /* First check if we've created too many variants. If so, free + * 25% of the LRU to avoid using too much memory. + */ + if (fpme->llvm->nr_gs_variants >= DRAW_MAX_SHADER_VARIANTS) { + /* + * XXX: should we flush here ? + */ + for (i = 0; i < DRAW_MAX_SHADER_VARIANTS / 4; i++) { + struct draw_gs_llvm_variant_list_item *item; + if (is_empty_list(&fpme->llvm->gs_variants_list)) { + break; + } + item = last_elem(&fpme->llvm->gs_variants_list); + assert(item); + assert(item->base); + draw_gs_llvm_destroy_variant(item->base); + } + } + + variant = draw_gs_llvm_create_variant(fpme->llvm, gs->info.num_outputs, key); + + if (variant) { + insert_at_head(&shader->variants, &variant->list_item_local); + insert_at_head(&fpme->llvm->gs_variants_list, + &variant->list_item_global); + fpme->llvm->nr_gs_variants++; + shader->variants_cached++; + } + } + + gs->current_variant = variant; +} + /** * Prepare/validate middle part of the vertex pipeline. * NOTE: if you change this function, also look at the non-LLVM @@ -180,6 +245,10 @@ llvm_middle_end_prepare( struct draw_pt_middle_end *middle, fpme->current_variant = variant; } + + if (gs) { + llvm_middle_end_prepare_gs(fpme); + } } @@ -199,11 +268,17 @@ llvm_middle_end_bind_parameters(struct draw_pt_middle_end *middle) for (i = 0; i < Elements(fpme->llvm->jit_context.vs_constants); ++i) { fpme->llvm->jit_context.vs_constants[i] = draw->pt.user.vs_constants[i]; } + for (i = 0; i < Elements(fpme->llvm->gs_jit_context.constants); ++i) { + fpme->llvm->gs_jit_context.constants[i] = draw->pt.user.gs_constants[i]; + } fpme->llvm->jit_context.planes = (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; + fpme->llvm->gs_jit_context.planes = + (float (*)[DRAW_TOTAL_CLIP_PLANES][4]) draw->pt.user.planes[0]; fpme->llvm->jit_context.viewport = (float *) draw->viewport.scale; + fpme->llvm->gs_jit_context.viewport = (float *) draw->viewport.scale; } diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h index 6e65e12..62d4707 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi.h @@ -61,6 +61,7 @@ struct tgsi_shader_info; struct lp_build_mask_context; struct gallivm_state; struct lp_derivatives; +struct lp_build_tgsi_gs_iface; enum lp_build_tex_modifier { @@ -224,7 +225,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, const LLVMValueRef (*inputs)[4], LLVMValueRef (*outputs)[4], struct lp_build_sampler_soa *sampler, - const struct tgsi_shader_info *info); + const struct tgsi_shader_info *info, + const struct lp_build_tgsi_gs_iface *gs_iface); void @@ -361,6 +363,24 @@ struct lp_build_tgsi_context void (*emit_epilogue)(struct lp_build_tgsi_context*); }; +struct lp_build_tgsi_gs_iface +{ + LLVMValueRef input; + void (*emit_vertex)(struct lp_build_tgsi_context * bld_base, + LLVMValueRef (*outputs)[4], + LLVMValueRef emitted_vertices_vec, + void *user_data); + void (*end_primitive)(struct lp_build_tgsi_context * bld_base, + LLVMValueRef verts_per_prim_vec, + LLVMValueRef emitted_prims_vec, + void *user_data); + void (*gs_epilogue)(struct lp_build_tgsi_context * bld_base, + LLVMValueRef total_emitted_vertices_vec, + LLVMValueRef emitted_prims_vec, + void *user_data); + void *user_data; +}; + struct lp_build_tgsi_soa_context { struct lp_build_tgsi_context bld_base; @@ -368,6 +388,11 @@ struct lp_build_tgsi_soa_context /* Builder for scalar elements of shader's data type (float) */ struct lp_build_context elem_bld; + const struct lp_build_tgsi_gs_iface *gs_iface; + LLVMValueRef emitted_prims_vec; + LLVMValueRef total_emitted_vertices_vec; + LLVMValueRef emitted_vertices_vec; + LLVMValueRef consts_ptr; const LLVMValueRef *pos; const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS]; diff --git a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c index cafc61f..6f174a5 100644 --- a/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c +++ b/src/gallium/auxiliary/gallivm/lp_bld_tgsi_soa.c @@ -437,6 +437,26 @@ lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, } } +/* + * If we have indirect addressing in outputs copy our alloca array + * to the outputs slots specified by the caller to make sure + * our outputs are delivered consistently via the same interface. + */ +static void +gather_outputs(struct lp_build_tgsi_soa_context * bld) +{ + if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { + unsigned index, chan; + assert(bld->bld_base.info->num_outputs <= + bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1); + for (index = 0; index < bld->bld_base.info->num_outputs; ++index) { + for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { + bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); + } + } + } +} + /** * Gather vector. * XXX the lp_build_gather() function should be capable of doing this @@ -757,6 +777,60 @@ emit_fetch_input( return res; } + +static LLVMValueRef +emit_fetch_gs_input( + struct lp_build_tgsi_context * bld_base, + const struct tgsi_full_src_register * reg, + enum tgsi_opcode_type stype, + unsigned swizzle) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + struct gallivm_state *gallivm = bld->bld_base.base.gallivm; + LLVMBuilderRef builder = gallivm->builder; + //struct lp_build_context *uint_bld = &bld_base->uint_bld; + LLVMValueRef attrib_index = NULL; + LLVMValueRef vertex_index = NULL; + LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); + LLVMValueRef indices[3]; + LLVMValueRef res; + + if (reg->Register.Indirect) { + attrib_index = get_indirect_index(bld, + reg->Register.File, + reg->Register.Index, + ®->Indirect); + } else { + attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); + } + + if (reg->Dimension.Indirect) { + vertex_index = get_indirect_index(bld, + reg->Register.File, + reg->Dimension.Index, + ®->DimIndirect); + } else { + vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); + } + + indices[0] = vertex_index; + indices[1] = attrib_index; + indices[2] = swizzle_index; + + res = LLVMBuildGEP(builder, bld->gs_iface->input, indices, 3, ""); + res = LLVMBuildLoad(builder, res, ""); + + assert(res); + + if (stype == TGSI_TYPE_UNSIGNED) { + res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); + } else if (stype == TGSI_TYPE_SIGNED) { + res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); + } + + return res; +} + static LLVMValueRef emit_fetch_temporary( struct lp_build_tgsi_context * bld_base, @@ -2081,6 +2155,66 @@ sviewinfo_emit( emit_size_query(bld, emit_data->inst, emit_data->output, TRUE); } +static LLVMValueRef +mask_to_one_vec(struct lp_build_tgsi_context *bld_base) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + LLVMValueRef one_vec = bld_base->int_bld.one; + struct lp_exec_mask *exec_mask = &bld->exec_mask; + + if (exec_mask->has_mask) { + one_vec = LLVMBuildAnd(builder, one_vec, exec_mask->exec_mask, ""); + } + one_vec = LLVMBuildAnd(builder, one_vec, + lp_build_mask_value(bld->mask), ""); + return one_vec; +} + +static void +emit_vertex( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + + if (bld->gs_iface->emit_vertex) { + LLVMValueRef masked_ones = mask_to_one_vec(bld_base); + gather_outputs(bld); + bld->gs_iface->emit_vertex(&bld->bld_base, bld->outputs, + bld->total_emitted_vertices_vec, + bld->gs_iface->user_data); + bld->emitted_vertices_vec = + LLVMBuildAdd(builder, bld->emitted_vertices_vec, masked_ones, ""); + bld->total_emitted_vertices_vec = + LLVMBuildAdd(builder, bld->total_emitted_vertices_vec, masked_ones, ""); + } +} + + +static void +end_primitive( + const struct lp_build_tgsi_action * action, + struct lp_build_tgsi_context * bld_base, + struct lp_build_emit_data * emit_data) +{ + struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); + LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; + + if (bld->gs_iface->end_primitive) { + LLVMValueRef masked_ones = mask_to_one_vec(bld_base); + bld->gs_iface->end_primitive(&bld->bld_base, + bld->emitted_vertices_vec, + bld->emitted_prims_vec, + bld->gs_iface->user_data); + bld->emitted_prims_vec = + LLVMBuildAdd(builder, bld->emitted_prims_vec, masked_ones, ""); + bld->emitted_vertices_vec = bld_base->uint_bld.zero; + } +} + static void cal_emit( const struct lp_build_tgsi_action * action, @@ -2324,7 +2458,7 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base) /* If we have indirect addressing in inputs we need to copy them into * our alloca array to be able to iterate over them */ - if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { + if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) { unsigned index, chan; LLVMTypeRef vec_type = bld_base->base.vec_type; LLVMValueRef array_size = lp_build_const_int32(gallivm, @@ -2349,6 +2483,13 @@ static void emit_prologue(struct lp_build_tgsi_context * bld_base) } } } + + if (bld->gs_iface) { + struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; + bld->emitted_prims_vec = uint_bld->zero; + bld->emitted_vertices_vec = uint_bld->zero; + bld->total_emitted_vertices_vec = uint_bld->zero; + } } static void emit_epilogue(struct lp_build_tgsi_context * bld_base) @@ -2361,16 +2502,14 @@ static void emit_epilogue(struct lp_build_tgsi_context * bld_base) } /* If we have indirect addressing in outputs we need to copy our alloca array - * to the outputs slots specified by the called */ - if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { - unsigned index, chan; - assert(bld_base->info->num_outputs <= - bld_base->info->file_max[TGSI_FILE_OUTPUT] + 1); - for (index = 0; index < bld_base->info->num_outputs; ++index) { - for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { - bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); - } - } + * to the outputs slots specified by the caller */ + if (bld->gs_iface) { + bld->gs_iface->gs_epilogue(&bld->bld_base, + bld->total_emitted_vertices_vec, + bld->emitted_prims_vec, + bld->gs_iface->user_data); + } else { + gather_outputs(bld); } } @@ -2385,7 +2524,8 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], struct lp_build_sampler_soa *sampler, - const struct tgsi_shader_info *info) + const struct tgsi_shader_info *info, + const struct lp_build_tgsi_gs_iface *gs_iface) { struct lp_build_tgsi_soa_context bld; @@ -2463,6 +2603,15 @@ lp_build_tgsi_soa(struct gallivm_state *gallivm, bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; + if (gs_iface) { + /* inputs are always indirect with gs */ + bld.indirect_files |= (1 << TGSI_FILE_INPUT); + bld.gs_iface = gs_iface; + bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; + bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; + bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; + } + lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.base); bld.system_values = *system_values; diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c index de51f39..ea41bd6 100644 --- a/src/gallium/drivers/llvmpipe/lp_state_fs.c +++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c @@ -354,7 +354,7 @@ generate_fs(struct gallivm_state *gallivm, lp_build_tgsi_soa(gallivm, tokens, type, &mask, consts_ptr, &system_values, interp->pos, interp->inputs, - outputs, sampler, &shader->info.base); + outputs, sampler, &shader->info.base, NULL); /* Alpha test */ if (key->alpha.enabled) { @@ -607,7 +607,7 @@ generate_fs_loop(struct gallivm_state *gallivm, lp_build_tgsi_soa(gallivm, tokens, type, &mask, consts_ptr, &system_values, interp->pos, interp->inputs, - outputs, sampler, &shader->info.base); + outputs, sampler, &shader->info.base, NULL); /* Alpha test */ if (key->alpha.enabled) { |