summaryrefslogtreecommitdiffstats
path: root/src/gallium
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium')
-rw-r--r--src/gallium/auxiliary/Makefile.am18
-rw-r--r--src/gallium/auxiliary/draw/draw_cliptest_tmp.h32
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.c140
-rw-r--r--src/gallium/auxiliary/draw/draw_llvm.h10
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_aaline.c9
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_clip.c100
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c7
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_emit.c62
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_post_vs.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_so_emit.c2
-rw-r--r--src/gallium/auxiliary/nir/tgsi_to_nir.c9
-rw-r--r--src/gallium/auxiliary/util/u_blitter.h9
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.h1
-rw-r--r--src/gallium/drivers/ddebug/dd_draw.c42
-rw-r--r--src/gallium/drivers/ddebug/dd_pipe.h3
-rw-r--r--src/gallium/drivers/ddebug/dd_screen.c9
-rw-r--r--src/gallium/drivers/freedreno/a2xx/a2xx.xml.h8
-rw-r--r--src/gallium/drivers/freedreno/a3xx/a3xx.xml.h176
-rw-r--r--src/gallium/drivers/freedreno/a3xx/fd3_program.c7
-rw-r--r--src/gallium/drivers/freedreno/a4xx/a4xx.xml.h110
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_emit.c6
-rw-r--r--src/gallium/drivers/freedreno/a4xx/fd4_program.c4
-rw-r--r--src/gallium/drivers/freedreno/adreno_common.xml.h8
-rw-r--r--src/gallium/drivers/freedreno/adreno_pm4.xml.h14
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_cmdline.c4
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c20
-rw-r--r--src/gallium/drivers/freedreno/ir3/ir3_print.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_bld_interp.h3
-rw-r--r--src/gallium/drivers/llvmpipe/lp_context.h18
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_context.h8
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_point.c2
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_derived.c78
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.c29
-rw-r--r--src/gallium/drivers/llvmpipe/lp_state_setup.h9
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp66
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp2
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp35
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp136
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h10
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp109
-rw-r--r--src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp1
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.c11
-rw-r--r--src/gallium/drivers/nouveau/nouveau_screen.h1
-rw-r--r--src/gallium/drivers/nouveau/nouveau_winsys.h7
-rw-r--r--src/gallium/drivers/nouveau/nv30/nv30_screen.c23
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_miptree.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw.c6
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c4
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_screen.c19
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_tex.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv50_vbo.c3
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv84_video.c8
-rw-r--r--src/gallium/drivers/nouveau/nv50/nv98_video.c57
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c3
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c6
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c10
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c19
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c31
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c5
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c2
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c4
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c3
-rw-r--r--src/gallium/drivers/r600/evergreen_state.c14
-rw-r--r--src/gallium/drivers/r600/r600_pipe.h1
-rw-r--r--src/gallium/drivers/r600/r600_state_common.c16
-rw-r--r--src/gallium/drivers/radeon/Makefile.am5
-rw-r--r--src/gallium/drivers/radeon/r600_perfcounter.c3
-rw-r--r--src/gallium/drivers/radeon/r600_query.c6
-rw-r--r--src/gallium/drivers/radeon/radeon_llvm_emit.c5
-rw-r--r--src/gallium/drivers/radeonsi/si_perfcounter.c2
-rw-r--r--src/gallium/drivers/radeonsi/si_state.c5
-rw-r--r--src/gallium/drivers/svga/svga_context.h7
-rw-r--r--src/gallium/drivers/svga/svga_draw.c17
-rw-r--r--src/gallium/drivers/svga/svga_state.c4
-rw-r--r--src/gallium/drivers/svga/svga_state_sampler.c22
-rw-r--r--src/gallium/drivers/vc4/Makefile.sources1
-rw-r--r--src/gallium/drivers/vc4/vc4_blit.c10
-rw-r--r--src/gallium/drivers/vc4/vc4_context.c17
-rw-r--r--src/gallium/drivers/vc4/vc4_drm.h45
-rw-r--r--src/gallium/drivers/vc4/vc4_job.c12
-rw-r--r--src/gallium/drivers/vc4/vc4_program.c5
-rw-r--r--src/gallium/drivers/vc4/vc4_qir.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_qir_schedule.c619
-rw-r--r--src/gallium/drivers/vc4/vc4_qpu_schedule.c85
-rw-r--r--src/gallium/drivers/vc4/vc4_resource.c12
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.c4
-rw-r--r--src/gallium/drivers/vc4/vc4_screen.h1
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator.c89
-rw-r--r--src/gallium/drivers/vc4/vc4_simulator_validate.h2
-rw-r--r--src/gallium/drivers/vc4/vc4_state.c4
-rw-r--r--src/gallium/state_trackers/osmesa/osmesa.c96
-rw-r--r--src/gallium/state_trackers/va/buffer.c9
-rw-r--r--src/gallium/state_trackers/va/image.c3
-rw-r--r--src/gallium/state_trackers/va/picture.c1
-rw-r--r--src/gallium/state_trackers/va/picture_hevc.c5
-rw-r--r--src/gallium/state_trackers/va/postproc.c49
-rw-r--r--src/gallium/state_trackers/va/surface.c18
-rw-r--r--src/gallium/state_trackers/va/va_private.h2
-rw-r--r--src/gallium/targets/opencl/Makefile.am5
-rw-r--r--src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c39
106 files changed, 2095 insertions, 709 deletions
diff --git a/src/gallium/auxiliary/Makefile.am b/src/gallium/auxiliary/Makefile.am
index 7b026b5..bcdf297 100644
--- a/src/gallium/auxiliary/Makefile.am
+++ b/src/gallium/auxiliary/Makefile.am
@@ -1,11 +1,10 @@
include Makefile.sources
include $(top_srcdir)/src/gallium/Automake.inc
-noinst_LTLIBRARIES = libgallium.la
+noinst_LTLIBRARIES = libgallium_nir.la
AM_CFLAGS = \
-I$(top_srcdir)/src/loader \
- -I$(top_builddir)/src/glsl/nir \
-I$(top_srcdir)/src/gallium/auxiliary/util \
$(GALLIUM_CFLAGS) \
$(VISIBILITY_CFLAGS) \
@@ -15,11 +14,24 @@ AM_CXXFLAGS = \
$(VISIBILITY_CXXFLAGS) \
$(MSVC2008_COMPAT_CXXFLAGS)
+libgallium_nir_la_SOURCES = \
+ $(NIR_SOURCES)
+
+libgallium_nir_la_CFLAGS = \
+ -I$(top_builddir)/src/glsl/nir \
+ $(GALLIUM_CFLAGS) \
+ $(VISIBILITY_CFLAGS) \
+ $(MSVC2013_COMPAT_CFLAGS)
+
+noinst_LTLIBRARIES += libgallium.la
+
libgallium_la_SOURCES = \
$(C_SOURCES) \
- $(NIR_SOURCES) \
$(GENERATED_SOURCES)
+libgallium_la_LIBADD = \
+ libgallium_nir.la
+
if HAVE_MESA_LLVM
AM_CFLAGS += \
diff --git a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
index 779b237..34add82 100644
--- a/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_cliptest_tmp.h
@@ -91,34 +91,34 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
}
for (i = 0; i < 4; i++) {
- out->clip[i] = clipvertex[i];
- out->pre_clip_pos[i] = position[i];
+ out->clip_pos[i] = position[i];
}
+ /* Be careful with NaNs. Comparisons must be true for them. */
/* Do the hardwired planes first:
*/
if (flags & DO_CLIP_XY_GUARD_BAND) {
- if (-0.50 * position[0] + position[3] < 0) mask |= (1<<0);
- if ( 0.50 * position[0] + position[3] < 0) mask |= (1<<1);
- if (-0.50 * position[1] + position[3] < 0) mask |= (1<<2);
- if ( 0.50 * position[1] + position[3] < 0) mask |= (1<<3);
+ if (!(-0.50 * position[0] + position[3] >= 0)) mask |= (1<<0);
+ if (!( 0.50 * position[0] + position[3] >= 0)) mask |= (1<<1);
+ if (!(-0.50 * position[1] + position[3] >= 0)) mask |= (1<<2);
+ if (!( 0.50 * position[1] + position[3] >= 0)) mask |= (1<<3);
}
else if (flags & DO_CLIP_XY) {
- if (-position[0] + position[3] < 0) mask |= (1<<0);
- if ( position[0] + position[3] < 0) mask |= (1<<1);
- if (-position[1] + position[3] < 0) mask |= (1<<2);
- if ( position[1] + position[3] < 0) mask |= (1<<3);
+ if (!(-position[0] + position[3] >= 0)) mask |= (1<<0);
+ if (!( position[0] + position[3] >= 0)) mask |= (1<<1);
+ if (!(-position[1] + position[3] >= 0)) mask |= (1<<2);
+ if (!( position[1] + position[3] >= 0)) mask |= (1<<3);
}
/* Clip Z planes according to full cube, half cube or none.
*/
if (flags & DO_CLIP_FULL_Z) {
- if ( position[2] + position[3] < 0) mask |= (1<<4);
- if (-position[2] + position[3] < 0) mask |= (1<<5);
+ if (!( position[2] + position[3] >= 0)) mask |= (1<<4);
+ if (!(-position[2] + position[3] >= 0)) mask |= (1<<5);
}
else if (flags & DO_CLIP_HALF_Z) {
- if ( position[2] < 0) mask |= (1<<4);
- if (-position[2] + position[3] < 0) mask |= (1<<5);
+ if (!( position[2] >= 0)) mask |= (1<<4);
+ if (!(-position[2] + position[3] >= 0)) mask |= (1<<5);
}
if (flags & DO_CLIP_USER) {
@@ -137,7 +137,6 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
if (have_cd && num_written_clipdistance) {
float clipdist;
i = plane_idx - 6;
- out->have_clipdist = 1;
/* first four clip distance in first vector etc. */
if (i < 4)
clipdist = out->data[cd[0]][i];
@@ -146,7 +145,7 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
if (clipdist < 0 || util_is_inf_or_nan(clipdist))
mask |= 1 << plane_idx;
} else {
- if (dot4(clipvertex, plane[plane_idx]) < 0)
+ if (!(dot4(clipvertex, plane[plane_idx]) >= 0))
mask |= 1 << plane_idx;
}
}
@@ -192,7 +191,6 @@ static boolean TAG(do_cliptest)( struct pt_post_vs *pvs,
out = (struct vertex_header *)( (char *)out + info->stride );
}
-
return need_pipeline != 0;
}
diff --git a/src/gallium/auxiliary/draw/draw_llvm.c b/src/gallium/auxiliary/draw/draw_llvm.c
index ee97424..f25dafe 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_llvm.c
@@ -188,6 +188,7 @@ create_jit_sampler_type(struct gallivm_state *gallivm, const char *struct_name)
sampler_type = LLVMStructTypeInContext(gallivm->context, elem_types,
Elements(elem_types), 0);
+ (void) target; /* silence unused var warning for non-debug build */
LP_CHECK_MEMBER_OFFSET(struct draw_jit_sampler, min_lod,
target, sampler_type,
DRAW_JIT_SAMPLER_MIN_LOD);
@@ -234,6 +235,8 @@ create_jit_context_type(struct gallivm_state *gallivm,
PIPE_MAX_SAMPLERS); /* samplers */
context_type = LLVMStructTypeInContext(gallivm->context, elem_types,
Elements(elem_types), 0);
+
+ (void) target; /* silence unused var warning for non-debug build */
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, vs_constants,
target, context_type, DRAW_JIT_CTX_CONSTANTS);
LP_CHECK_MEMBER_OFFSET(struct draw_jit_context, num_vs_constants,
@@ -375,15 +378,14 @@ static LLVMTypeRef
create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
{
LLVMTargetDataRef target = gallivm->target;
- LLVMTypeRef elem_types[4];
+ LLVMTypeRef elem_types[3];
LLVMTypeRef vertex_header;
char struct_name[24];
util_snprintf(struct_name, 23, "vertex_header%d", data_elems);
elem_types[DRAW_JIT_VERTEX_VERTEX_ID] = LLVMIntTypeInContext(gallivm->context, 32);
- elem_types[DRAW_JIT_VERTEX_CLIP] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
- elem_types[DRAW_JIT_VERTEX_PRE_CLIP_POS] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
+ elem_types[DRAW_JIT_VERTEX_CLIP_POS] = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
elem_types[DRAW_JIT_VERTEX_DATA] = LLVMArrayType(elem_types[1], data_elems);
vertex_header = LLVMStructTypeInContext(gallivm->context, elem_types,
@@ -403,12 +405,10 @@ create_jit_vertex_header(struct gallivm_state *gallivm, int data_elems)
target, vertex_header,
DRAW_JIT_VERTEX_VERTEX_ID);
*/
- LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip,
- target, vertex_header,
- DRAW_JIT_VERTEX_CLIP);
- LP_CHECK_MEMBER_OFFSET(struct vertex_header, pre_clip_pos,
+ (void) target; /* silence unused var warning for non-debug build */
+ LP_CHECK_MEMBER_OFFSET(struct vertex_header, clip_pos,
target, vertex_header,
- DRAW_JIT_VERTEX_PRE_CLIP_POS);
+ DRAW_JIT_VERTEX_CLIP_POS);
LP_CHECK_MEMBER_OFFSET(struct vertex_header, data,
target, vertex_header,
DRAW_JIT_VERTEX_DATA);
@@ -826,7 +826,7 @@ store_aos(struct gallivm_state *gallivm,
* struct vertex_header {
* unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
* unsigned edgeflag:1;
- * unsigned have_clipdist:1;
+ * unsigned pad:1;
* unsigned vertex_id:16;
* [...]
* }
@@ -838,7 +838,7 @@ store_aos(struct gallivm_state *gallivm,
* {
* return (x >> 16) | // vertex_id
* ((x & 0x3fff) << 18) | // clipmask
- * ((x & 0x4000) << 3) | // have_clipdist
+ * ((x & 0x4000) << 3) | // pad
* ((x & 0x8000) << 1); // edgeflag
* }
*/
@@ -850,19 +850,23 @@ adjust_mask(struct gallivm_state *gallivm,
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef vertex_id;
LLVMValueRef clipmask;
- LLVMValueRef have_clipdist;
+ LLVMValueRef pad;
LLVMValueRef edgeflag;
vertex_id = LLVMBuildLShr(builder, mask, lp_build_const_int32(gallivm, 16), "");
clipmask = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x3fff), "");
clipmask = LLVMBuildShl(builder, clipmask, lp_build_const_int32(gallivm, 18), "");
- have_clipdist = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
- have_clipdist = LLVMBuildShl(builder, have_clipdist, lp_build_const_int32(gallivm, 3), "");
+ if (0) {
+ pad = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x4000), "");
+ pad = LLVMBuildShl(builder, pad, lp_build_const_int32(gallivm, 3), "");
+ }
edgeflag = LLVMBuildAnd(builder, mask, lp_build_const_int32(gallivm, 0x8000), "");
edgeflag = LLVMBuildShl(builder, edgeflag, lp_build_const_int32(gallivm, 1), "");
mask = LLVMBuildOr(builder, vertex_id, clipmask, "");
- mask = LLVMBuildOr(builder, mask, have_clipdist, "");
+ if (0) {
+ mask = LLVMBuildOr(builder, mask, pad, "");
+ }
mask = LLVMBuildOr(builder, mask, edgeflag, "");
#endif
return mask;
@@ -877,7 +881,7 @@ store_aos_array(struct gallivm_state *gallivm,
int attrib,
int num_outputs,
LLVMValueRef clipmask,
- boolean have_clipdist)
+ boolean need_edgeflag)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef attr_index = lp_build_const_int32(gallivm, attrib);
@@ -908,11 +912,15 @@ store_aos_array(struct gallivm_state *gallivm,
* code here. See struct vertex_header in draw_private.h.
*/
assert(DRAW_TOTAL_CLIP_PLANES==14);
- /* initialize vertex id:16 = 0xffff, have_clipdist:1 = 0, edgeflag:1 = 1 */
- vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
- if (have_clipdist)
- vertex_id_pad_edgeflag |= 1 << (DRAW_TOTAL_CLIP_PLANES+1);
- val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type), vertex_id_pad_edgeflag);
+ /* initialize vertex id:16 = 0xffff, pad:1 = 0, edgeflag:1 = 1 */
+ if (!need_edgeflag) {
+ vertex_id_pad_edgeflag = (0xffff << 16) | (1 << DRAW_TOTAL_CLIP_PLANES);
+ }
+ else {
+ vertex_id_pad_edgeflag = (0xffff << 16);
+ }
+ val = lp_build_const_int_vec(gallivm, lp_int_type(soa_type),
+ vertex_id_pad_edgeflag);
/* OR with the clipmask */
cliptmp = LLVMBuildOr(builder, val, clipmask, "");
for (i = 0; i < vector_length; i++) {
@@ -942,7 +950,7 @@ convert_to_aos(struct gallivm_state *gallivm,
LLVMValueRef clipmask,
int num_outputs,
struct lp_type soa_type,
- boolean have_clipdist)
+ boolean need_edgeflag)
{
LLVMBuilderRef builder = gallivm->builder;
unsigned chan, attrib, i;
@@ -998,7 +1006,8 @@ convert_to_aos(struct gallivm_state *gallivm,
aos,
attrib,
num_outputs,
- clipmask, have_clipdist);
+ clipmask,
+ need_edgeflag);
}
#if DEBUG_STORE
lp_build_printf(gallivm, " # storing end\n");
@@ -1014,7 +1023,7 @@ store_clip(struct gallivm_state *gallivm,
const struct lp_type vs_type,
LLVMValueRef io_ptr,
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
- boolean pre_clip_pos, int idx)
+ int idx)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMValueRef soa[4];
@@ -1041,14 +1050,8 @@ store_clip(struct gallivm_state *gallivm,
soa[2] = LLVMBuildLoad(builder, outputs[idx][2], ""); /*z0 z1 .. zn*/
soa[3] = LLVMBuildLoad(builder, outputs[idx][3], ""); /*w0 w1 .. wn*/
- if (!pre_clip_pos) {
- for (i = 0; i < vs_type.length; i++) {
- clip_ptrs[i] = draw_jit_header_clip(gallivm, io_ptrs[i]);
- }
- } else {
- for (i = 0; i < vs_type.length; i++) {
- clip_ptrs[i] = draw_jit_header_pre_clip_pos(gallivm, io_ptrs[i]);
- }
+ for (i = 0; i < vs_type.length; i++) {
+ clip_ptrs[i] = draw_jit_header_clip_pos(gallivm, io_ptrs[i]);
}
lp_build_transpose_aos(gallivm, vs_type, soa, soa);
@@ -1140,11 +1143,7 @@ generate_clipmask(struct draw_llvm *llvm,
struct gallivm_state *gallivm,
struct lp_type vs_type,
LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS],
- boolean clip_xy,
- boolean clip_z,
- boolean clip_user,
- boolean clip_halfz,
- unsigned ucp_enable,
+ struct draw_llvm_variant_key *key,
LLVMValueRef context_ptr,
boolean *have_clipdist)
{
@@ -1160,7 +1159,9 @@ generate_clipmask(struct draw_llvm *llvm,
const unsigned pos = llvm->draw->vs.position_output;
const unsigned cv = llvm->draw->vs.clipvertex_output;
int num_written_clipdistance = llvm->draw->vs.vertex_shader->info.num_written_clipdistance;
- bool have_cd = false;
+ boolean have_cd = false;
+ boolean clip_user = key->clip_user;
+ unsigned ucp_enable = key->ucp_enable;
unsigned cd[2];
cd[0] = llvm->draw->vs.clipdistance_output[0];
@@ -1200,8 +1201,16 @@ generate_clipmask(struct draw_llvm *llvm,
cv_w = pos_w;
}
+ /*
+ * Be careful with the comparisons and NaNs (using llvm's unordered
+ * comparisons here).
+ */
/* Cliptest, for hardwired planes */
- if (clip_xy) {
+ /*
+ * XXX should take guardband into account (currently not in key).
+ * Otherwise might run the draw pipeline stages for nothing.
+ */
+ if (key->clip_xy) {
/* plane 1 */
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, pos_x , pos_w);
temp = shift;
@@ -1229,9 +1238,9 @@ generate_clipmask(struct draw_llvm *llvm,
mask = LLVMBuildOr(builder, mask, test, "");
}
- if (clip_z) {
+ if (key->clip_z) {
temp = lp_build_const_int_vec(gallivm, i32_type, 16);
- if (clip_halfz) {
+ if (key->clip_halfz) {
/* plane 5 */
test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_GREATER, zero, pos_z);
test = LLVMBuildAnd(builder, test, temp, "");
@@ -1318,6 +1327,20 @@ generate_clipmask(struct draw_llvm *llvm,
}
}
}
+ if (key->need_edgeflags) {
+ /*
+ * This isn't really part of clipmask but stored the same in vertex
+ * header later, so do it here.
+ */
+ unsigned edge_attr = llvm->draw->vs.edgeflag_output;
+ LLVMValueRef one = lp_build_const_vec(gallivm, f32_type, 1.0);
+ LLVMValueRef edgeflag = LLVMBuildLoad(builder, outputs[edge_attr][0], "");
+ test = lp_build_compare(gallivm, f32_type, PIPE_FUNC_EQUAL, one, edgeflag);
+ temp = lp_build_const_int_vec(gallivm, i32_type,
+ 1LL << DRAW_TOTAL_CLIP_PLANES);
+ test = LLVMBuildAnd(builder, test, temp, "");
+ mask = LLVMBuildOr(builder, mask, test, "");
+ }
return mask;
}
@@ -1329,7 +1352,8 @@ generate_clipmask(struct draw_llvm *llvm,
static LLVMValueRef
clipmask_booli32(struct gallivm_state *gallivm,
const struct lp_type vs_type,
- LLVMValueRef clipmask_bool_ptr)
+ LLVMValueRef clipmask_bool_ptr,
+ boolean edgeflag_in_clipmask)
{
LLVMBuilderRef builder = gallivm->builder;
LLVMTypeRef int32_type = LLVMInt32TypeInContext(gallivm->context);
@@ -1339,8 +1363,18 @@ clipmask_booli32(struct gallivm_state *gallivm,
int i;
/*
- * Can do this with log2(vector length) pack instructions and one extract
- * (as we don't actually need a or) with sse2 which would be way better.
+ * We need to invert the edgeflag bit from the clipmask here
+ * (because the result is really if we want to run the pipeline or not
+ * and we (may) need it if edgeflag was 0).
+ */
+ if (edgeflag_in_clipmask) {
+ struct lp_type i32_type = lp_int_type(vs_type);
+ LLVMValueRef edge = lp_build_const_int_vec(gallivm, i32_type,
+ 1LL << DRAW_TOTAL_CLIP_PLANES);
+ clipmask_bool = LLVMBuildXor(builder, clipmask_bool, edge, "");
+ }
+ /*
+ * Could do much better with just cmp/movmskps.
*/
for (i=0; i < vs_type.length; i++) {
temp = LLVMBuildExtractElement(builder, clipmask_bool,
@@ -1536,8 +1570,9 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
const boolean bypass_viewport = key->has_gs || key->bypass_viewport ||
llvm->draw->vs.vertex_shader->info.writes_viewport_index;
const boolean enable_cliptest = !key->has_gs && (key->clip_xy ||
- key->clip_z ||
- key->clip_user);
+ key->clip_z ||
+ key->clip_user ||
+ key->need_edgeflags);
LLVMValueRef variant_func;
const unsigned pos = llvm->draw->vs.position_output;
const unsigned cv = llvm->draw->vs.clipvertex_output;
@@ -1766,8 +1801,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
if (pos != -1 && cv != -1) {
/* store original positions in clip before further manipulation */
- store_clip(gallivm, vs_type, io, outputs, FALSE, key->clip_user ? cv : pos);
- store_clip(gallivm, vs_type, io, outputs, TRUE, pos);
+ store_clip(gallivm, vs_type, io, outputs, pos);
/* do cliptest */
if (enable_cliptest) {
@@ -1777,11 +1811,7 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
gallivm,
vs_type,
outputs,
- key->clip_xy,
- key->clip_z,
- key->clip_user,
- key->clip_halfz,
- key->ucp_enable,
+ key,
context_ptr, &have_clipdist);
temp = LLVMBuildOr(builder, clipmask, temp, "");
/* store temporary clipping boolean value */
@@ -1806,14 +1836,15 @@ draw_llvm_generate(struct draw_llvm *llvm, struct draw_llvm_variant *variant,
*/
convert_to_aos(gallivm, io, NULL, outputs, clipmask,
vs_info->num_outputs, vs_type,
- have_clipdist);
+ enable_cliptest && key->need_edgeflags);
}
lp_build_loop_end_cond(&lp_loop, count, step, LLVMIntUGE);
sampler->destroy(sampler);
/* return clipping boolean value for function */
- ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr);
+ ret = clipmask_booli32(gallivm, vs_type, clipmask_bool_ptr,
+ enable_cliptest && key->need_edgeflags);
LLVMBuildRet(builder, ret);
@@ -1847,6 +1878,7 @@ draw_llvm_make_variant_key(struct draw_llvm *llvm, char *store)
key->clip_user = llvm->draw->clip_user;
key->bypass_viewport = llvm->draw->bypass_viewport;
key->clip_halfz = llvm->draw->rasterizer->clip_halfz;
+ /* XXX assumes edgeflag output not at 0 */
key->need_edgeflags = (llvm->draw->vs.edgeflag_output ? TRUE : FALSE);
key->ucp_enable = llvm->draw->rasterizer->clip_plane_enable;
key->has_gs = llvm->draw->gs.geometry_shader != NULL;
diff --git a/src/gallium/auxiliary/draw/draw_llvm.h b/src/gallium/auxiliary/draw/draw_llvm.h
index d153c16..f617a29 100644
--- a/src/gallium/auxiliary/draw/draw_llvm.h
+++ b/src/gallium/auxiliary/draw/draw_llvm.h
@@ -104,8 +104,7 @@ enum {
enum {
DRAW_JIT_VERTEX_VERTEX_ID = 0,
- DRAW_JIT_VERTEX_CLIP,
- DRAW_JIT_VERTEX_PRE_CLIP_POS,
+ DRAW_JIT_VERTEX_CLIP_POS,
DRAW_JIT_VERTEX_DATA
};
@@ -162,11 +161,8 @@ enum {
#define draw_jit_header_id(_gallivm, _ptr) \
lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_VERTEX_ID, "id")
-#define draw_jit_header_clip(_gallivm, _ptr) \
- lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_CLIP, "clip")
-
-#define draw_jit_header_pre_clip_pos(_gallivm, _ptr) \
- lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_PRE_CLIP_POS, "pre_clip_pos")
+#define draw_jit_header_clip_pos(_gallivm, _ptr) \
+ lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_CLIP_POS, "clip_pos")
#define draw_jit_header_data(_gallivm, _ptr) \
lp_build_struct_get_ptr(_gallivm, _ptr, DRAW_JIT_VERTEX_DATA, "data")
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index 877db59..3ce550a 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -646,6 +646,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
struct pipe_context *pipe = draw->pipe;
const struct pipe_rasterizer_state *rast = draw->rasterizer;
uint num_samplers;
+ uint num_sampler_views;
void *r;
assert(draw->rasterizer->line_smooth);
@@ -667,9 +668,9 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
draw_aaline_prepare_outputs(draw, draw->pipeline.aaline);
/* how many samplers? */
- /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
- num_samplers = MAX2(aaline->num_sampler_views, aaline->num_samplers);
- num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1);
+ /* we'll use sampler/texture[aaline->sampler_unit] for the alpha texture */
+ num_samplers = MAX2(aaline->num_samplers, aaline->fs->sampler_unit + 1);
+ num_sampler_views = MAX2(num_samplers, aaline->num_sampler_views);
aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso;
pipe_sampler_view_reference(&aaline->state.sampler_views[aaline->fs->sampler_unit],
@@ -681,7 +682,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header)
num_samplers, aaline->state.sampler);
aaline->driver_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
- num_samplers, aaline->state.sampler_views);
+ num_sampler_views, aaline->state.sampler_views);
/* Disable triangle culling, stippling, unfilled mode etc. */
r = draw_get_rasterizer_no_cull(draw, rast->scissor, rast->flatshade);
diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c
index 47765cd..67d8eca 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_clip.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c
@@ -59,6 +59,8 @@ struct clip_stage {
struct draw_stage stage; /**< base class */
unsigned pos_attr;
+ boolean have_clipdist;
+ int cv_attr;
/* List of the attributes to be constant interpolated. */
uint num_const_attribs;
@@ -145,20 +147,23 @@ static void interp(const struct clip_stage *clip,
*/
dst->clipmask = 0;
dst->edgeflag = 0; /* will get overwritten later */
- dst->have_clipdist = in->have_clipdist;
+ dst->pad = 0;
dst->vertex_id = UNDEFINED_VERTEX_ID;
/* Interpolate the clip-space coords.
*/
- interp_attr(dst->clip, t, in->clip, out->clip);
+ if (clip->cv_attr >= 0) {
+ interp_attr(dst->data[clip->cv_attr], t,
+ in->data[clip->cv_attr], out->data[clip->cv_attr]);
+ }
/* interpolate the clip-space position */
- interp_attr(dst->pre_clip_pos, t, in->pre_clip_pos, out->pre_clip_pos);
+ interp_attr(dst->clip_pos, t, in->clip_pos, out->clip_pos);
/* Do the projective divide and viewport transformation to get
* new window coordinates:
*/
{
- const float *pos = dst->pre_clip_pos;
+ const float *pos = dst->clip_pos;
const float *scale =
clip->stage.draw->viewports[viewport_index].scale;
const float *trans =
@@ -192,11 +197,11 @@ static void interp(const struct clip_stage *clip,
t_nopersp = t;
/* find either in.x != out.x or in.y != out.y */
for (k = 0; k < 2; k++) {
- if (in->pre_clip_pos[k] != out->pre_clip_pos[k]) {
+ if (in->clip_pos[k] != out->clip_pos[k]) {
/* do divide by W, then compute linear interpolation factor */
- float in_coord = in->pre_clip_pos[k] / in->pre_clip_pos[3];
- float out_coord = out->pre_clip_pos[k] / out->pre_clip_pos[3];
- float dst_coord = dst->pre_clip_pos[k] / dst->pre_clip_pos[3];
+ float in_coord = in->clip_pos[k] / in->clip_pos[3];
+ float out_coord = out->clip_pos[k] / out->clip_pos[3];
+ float dst_coord = dst->clip_pos[k] / dst->clip_pos[3];
t_nopersp = (dst_coord - out_coord) / (in_coord - out_coord);
break;
}
@@ -214,9 +219,9 @@ static void interp(const struct clip_stage *clip,
* Triangle is considered null/empty if its area is equal to zero.
*/
static inline boolean
-is_tri_null(struct draw_context *draw, const struct prim_header *header)
+is_tri_null(const struct clip_stage *clip, const struct prim_header *header)
{
- const unsigned pos_attr = draw_current_shader_position_output(draw);
+ const unsigned pos_attr = clip->pos_attr;
float x1 = header->v[1]->data[pos_attr][0] - header->v[0]->data[pos_attr][0];
float y1 = header->v[1]->data[pos_attr][1] - header->v[0]->data[pos_attr][1];
float z1 = header->v[1]->data[pos_attr][2] - header->v[0]->data[pos_attr][2];
@@ -242,6 +247,7 @@ static void emit_poly(struct draw_stage *stage,
unsigned n,
const struct prim_header *origPrim)
{
+ const struct clip_stage *clipper = clip_stage(stage);
struct prim_header header;
unsigned i;
ushort edge_first, edge_middle, edge_last;
@@ -281,7 +287,7 @@ static void emit_poly(struct draw_stage *stage,
header.v[2] = inlist[0]; /* the provoking vertex */
}
- tri_null = is_tri_null(stage->draw, &header);
+ tri_null = is_tri_null(clipper, &header);
/* If we generated a triangle with an area, aka. non-null triangle,
* or if the previous triangle was also null then skip all subsequent
* null triangles */
@@ -306,11 +312,18 @@ static void emit_poly(struct draw_stage *stage,
debug_printf("Clipped tri: (flat-shade-first = %d)\n",
stage->draw->rasterizer->flatshade_first);
for (j = 0; j < 3; j++) {
- debug_printf(" Vert %d: clip: %f %f %f %f\n", j,
- header.v[j]->clip[0],
- header.v[j]->clip[1],
- header.v[j]->clip[2],
- header.v[j]->clip[3]);
+ debug_printf(" Vert %d: clip pos: %f %f %f %f\n", j,
+ header.v[j]->clip_pos[0],
+ header.v[j]->clip_pos[1],
+ header.v[j]->clip_pos[2],
+ header.v[j]->clip_pos[3]);
+ if (clipper->cv_attr >= 0) {
+ debug_printf(" Vert %d: cv: %f %f %f %f\n", j,
+ header.v[j]->data[clipper->cv_attr][0],
+ header.v[j]->data[clipper->cv_attr][1],
+ header.v[j]->data[clipper->cv_attr][2],
+ header.v[j]->data[clipper->cv_attr][3]);
+ }
for (k = 0; k < draw_num_shader_outputs(stage->draw); k++) {
debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k,
header.v[j]->data[k][0],
@@ -320,7 +333,7 @@ static void emit_poly(struct draw_stage *stage,
}
}
}
- stage->next->tri( stage->next, &header );
+ stage->next->tri(stage->next, &header);
}
}
@@ -345,15 +358,28 @@ static inline float getclipdist(const struct clip_stage *clipper,
{
const float *plane;
float dp;
- if (vert->have_clipdist && plane_idx >= 6) {
+ if (plane_idx < 6) {
+ /* ordinary xyz view volume clipping uses pos output */
+ plane = clipper->plane[plane_idx];
+ dp = dot4(vert->clip_pos, plane);
+ }
+ else if (clipper->have_clipdist) {
/* pick the correct clipdistance element from the output vectors */
int _idx = plane_idx - 6;
int cdi = _idx >= 4;
int vidx = cdi ? _idx - 4 : _idx;
dp = vert->data[draw_current_shader_clipdistance_output(clipper->stage.draw, cdi)][vidx];
} else {
+ /*
+ * legacy user clip planes or gl_ClipVertex
+ */
plane = clipper->plane[plane_idx];
- dp = dot4(vert->clip, plane);
+ if (clipper->cv_attr >= 0) {
+ dp = dot4(vert->data[clipper->cv_attr], plane);
+ }
+ else {
+ dp = dot4(vert->clip_pos, plane);
+ }
}
return dp;
}
@@ -400,13 +426,22 @@ do_clip_tri(struct draw_stage *stage,
viewport_index = draw_viewport_index(clipper->stage.draw, prov_vertex);
if (DEBUG_CLIP) {
- const float *v0 = header->v[0]->clip;
- const float *v1 = header->v[1]->clip;
- const float *v2 = header->v[2]->clip;
- debug_printf("Clip triangle:\n");
+ const float *v0 = header->v[0]->clip_pos;
+ const float *v1 = header->v[1]->clip_pos;
+ const float *v2 = header->v[2]->clip_pos;
+ debug_printf("Clip triangle pos:\n");
debug_printf(" %f, %f, %f, %f\n", v0[0], v0[1], v0[2], v0[3]);
debug_printf(" %f, %f, %f, %f\n", v1[0], v1[1], v1[2], v1[3]);
debug_printf(" %f, %f, %f, %f\n", v2[0], v2[1], v2[2], v2[3]);
+ if (clipper->cv_attr >= 0) {
+ const float *v0 = header->v[0]->data[clipper->cv_attr];
+ const float *v1 = header->v[1]->data[clipper->cv_attr];
+ const float *v2 = header->v[2]->data[clipper->cv_attr];
+ debug_printf("Clip triangle cv:\n");
+ debug_printf(" %f, %f, %f, %f\n", v0[0], v0[1], v0[2], v0[3]);
+ debug_printf(" %f, %f, %f, %f\n", v1[0], v1[1], v1[2], v1[3]);
+ debug_printf(" %f, %f, %f, %f\n", v2[0], v2[1], v2[2], v2[3]);
+ }
}
/*
@@ -555,7 +590,7 @@ do_clip_tri(struct draw_stage *stage,
/* Emit the polygon as triangles to the setup stage:
*/
- emit_poly( stage, inlist, inEdges, n, header );
+ emit_poly(stage, inlist, inEdges, n, header);
}
}
@@ -567,7 +602,7 @@ do_clip_line(struct draw_stage *stage,
struct prim_header *header,
unsigned clipmask)
{
- const struct clip_stage *clipper = clip_stage( stage );
+ const struct clip_stage *clipper = clip_stage(stage);
struct vertex_header *v0 = header->v[0];
struct vertex_header *v1 = header->v[1];
struct vertex_header *prov_vertex;
@@ -671,9 +706,9 @@ clip_point_guard_xy(struct draw_stage *stage, struct prim_header *header)
* automatically). These would usually be captured by depth clip
* too but this can be disabled.
*/
- if (header->v[0]->clip[3] <= 0.0f ||
- util_is_inf_or_nan(header->v[0]->clip[0]) ||
- util_is_inf_or_nan(header->v[0]->clip[1]))
+ if (header->v[0]->clip_pos[3] <= 0.0f ||
+ util_is_inf_or_nan(header->v[0]->clip_pos[0]) ||
+ util_is_inf_or_nan(header->v[0]->clip_pos[1]))
return;
}
stage->next->point(stage->next, header);
@@ -773,7 +808,7 @@ find_interp(const struct draw_fragment_shader *fs, int *indexed_interp,
static void
clip_init_state(struct draw_stage *stage)
{
- struct clip_stage *clipper = clip_stage( stage );
+ struct clip_stage *clipper = clip_stage(stage);
const struct draw_context *draw = stage->draw;
const struct draw_fragment_shader *fs = draw->fs.fragment_shader;
const struct tgsi_shader_info *info = draw_get_shader_info(draw);
@@ -781,6 +816,13 @@ clip_init_state(struct draw_stage *stage)
int indexed_interp[2];
clipper->pos_attr = draw_current_shader_position_output(draw);
+ clipper->have_clipdist = draw_current_shader_num_written_clipdistances(draw) > 0;
+ if (draw_current_shader_clipvertex_output(draw) != clipper->pos_attr) {
+ clipper->cv_attr = (int)draw_current_shader_clipvertex_output(draw);
+ }
+ else {
+ clipper->cv_attr = -1;
+ }
/* We need to know for each attribute what kind of interpolation is
* done on it (flat, smooth or noperspective). But the information
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index b58d753..cf52ca4 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -477,6 +477,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
struct pipe_context *pipe = pstip->pipe;
struct draw_context *draw = stage->draw;
uint num_samplers;
+ uint num_sampler_views;
assert(stage->draw->rasterizer->poly_stipple_enable);
@@ -490,8 +491,8 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
/* how many samplers? */
/* we'll use sampler/texture[pstip->sampler_unit] for the stipple */
- num_samplers = MAX2(pstip->num_sampler_views, pstip->num_samplers);
- num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1);
+ num_samplers = MAX2(pstip->num_samplers, pstip->fs->sampler_unit + 1);
+ num_sampler_views = MAX2(pstip->num_sampler_views, num_samplers);
/* plug in our sampler, texture */
pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso;
@@ -506,7 +507,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header)
num_samplers, pstip->state.samplers);
pstip->driver_set_sampler_views(pipe, PIPE_SHADER_FRAGMENT, 0,
- num_samplers, pstip->state.sampler_views);
+ num_sampler_views, pstip->state.sampler_views);
draw->suspend_flushing = FALSE;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 5584c4a..8774beb 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -86,11 +86,10 @@ struct draw_vertex_buffer {
struct vertex_header {
unsigned clipmask:DRAW_TOTAL_CLIP_PLANES;
unsigned edgeflag:1;
- unsigned have_clipdist:1;
+ unsigned pad:1;
unsigned vertex_id:16;
- float clip[4];
- float pre_clip_pos[4];
+ float clip_pos[4];
/* This will probably become float (*data)[4] soon:
*/
diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c
index d1eafd8..0b9fab5 100644
--- a/src/gallium/auxiliary/draw/draw_pt_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_emit.c
@@ -60,7 +60,7 @@ draw_pt_emit_prepare(struct pt_emit *emit,
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
- draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ draw_do_flush(draw, DRAW_FLUSH_BACKEND);
/* XXX: may need to defensively reset this later on as clipping can
* clobber this state in the render backend.
@@ -80,7 +80,7 @@ draw_pt_emit_prepare(struct pt_emit *emit,
unsigned emit_sz = 0;
unsigned src_buffer = 0;
unsigned output_format;
- unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) );
+ unsigned src_offset = vinfo->attrib[i].src_index * 4 * sizeof(float);
output_format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
emit_sz = draw_translate_vinfo_size(vinfo->attrib[i].emit);
@@ -89,8 +89,8 @@ draw_pt_emit_prepare(struct pt_emit *emit,
assert(emit_sz != 0);
if (vinfo->attrib[i].emit == EMIT_1F_PSIZE) {
- src_buffer = 1;
- src_offset = 0;
+ src_buffer = 1;
+ src_offset = 0;
}
hw_key.element[i].type = TRANSLATE_ELEMENT_NORMAL;
@@ -138,7 +138,7 @@ draw_pt_emit(struct pt_emit *emit,
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
- draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ draw_do_flush(draw, DRAW_FLUSH_BACKEND);
if (vertex_count == 0)
return;
@@ -152,31 +152,31 @@ draw_pt_emit(struct pt_emit *emit,
(ushort)translate->key.output_stride,
(ushort)vertex_count);
- hw_verts = render->map_vertices( render );
+ hw_verts = render->map_vertices(render);
if (!hw_verts) {
debug_warn_once("map of vertex buffer failed (out of memory?)");
return;
}
translate->set_buffer(translate,
- 0,
- vertex_data,
- stride,
- ~0);
+ 0,
+ vertex_data,
+ stride,
+ ~0);
translate->set_buffer(translate,
- 1,
- &draw->rasterizer->point_size,
- 0,
- ~0);
+ 1,
+ &draw->rasterizer->point_size,
+ 0,
+ ~0);
/* fetch/translate vertex attribs to fill hw_verts[] */
translate->run(translate,
- 0,
- vertex_count,
- draw->start_instance,
- draw->instance_id,
- hw_verts );
+ 0,
+ vertex_count,
+ 0,
+ 0,
+ hw_verts);
render->unmap_vertices(render, 0, vertex_count - 1);
@@ -212,7 +212,7 @@ draw_pt_emit_linear(struct pt_emit *emit,
#endif
/* XXX: need to flush to get prim_vbuf.c to release its allocation??
*/
- draw_do_flush( draw, DRAW_FLUSH_BACKEND );
+ draw_do_flush(draw, DRAW_FLUSH_BACKEND);
/* XXX: and work out some way to coordinate the render primitive
* between vbuf.c and here...
@@ -224,35 +224,35 @@ draw_pt_emit_linear(struct pt_emit *emit,
(ushort)count))
goto fail;
- hw_verts = render->map_vertices( render );
+ hw_verts = render->map_vertices(render);
if (!hw_verts)
goto fail;
translate->set_buffer(translate, 0,
- vertex_data, stride, count - 1);
+ vertex_data, stride, count - 1);
translate->set_buffer(translate, 1,
- &draw->rasterizer->point_size,
- 0, ~0);
+ &draw->rasterizer->point_size,
+ 0, ~0);
translate->run(translate,
0,
count,
- draw->start_instance,
- draw->instance_id,
+ 0,
+ 0,
hw_verts);
if (0) {
unsigned i;
for (i = 0; i < count; i++) {
debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i);
- draw_dump_emitted_vertex( emit->vinfo,
- (const uint8_t *)hw_verts +
- translate->key.output_stride * i );
+ draw_dump_emitted_vertex(emit->vinfo,
+ (const uint8_t *)hw_verts +
+ translate->key.output_stride * i);
}
}
- render->unmap_vertices( render, 0, count - 1 );
+ render->unmap_vertices(render, 0, count - 1);
for (start = i = 0;
i < prim_info->primitive_count;
@@ -262,7 +262,7 @@ draw_pt_emit_linear(struct pt_emit *emit,
start,
prim_info->primitive_lengths[i]);
}
-
+
render->release_vertices(render);
return;
diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
index 2d7569b..edd4541 100644
--- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline_llvm.c
@@ -453,6 +453,7 @@ llvm_pipeline_generic(struct draw_pt_middle_end *middle,
draw->vs.vertex_shader->info.writes_viewport_index)) {
clipped = draw_pt_post_vs_run( fpme->post_vs, vert_info, prim_info );
}
+ /* "clipped" also includes non-one edgeflag */
if (clipped) {
opt |= PT_PIPELINE;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
index f0d5e0f..3a9101a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c
+++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c
@@ -58,7 +58,7 @@ initialize_vertex_header(struct vertex_header *header)
{
header->clipmask = 0;
header->edgeflag = 1;
- header->have_clipdist = 0;
+ header->pad = 0;
header->vertex_id = UNDEFINED_VERTEX_ID;
}
diff --git a/src/gallium/auxiliary/draw/draw_pt_so_emit.c b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
index 20de26f..261bd34 100644
--- a/src/gallium/auxiliary/draw/draw_pt_so_emit.c
+++ b/src/gallium/auxiliary/draw/draw_pt_so_emit.c
@@ -275,7 +275,7 @@ void draw_pt_so_emit( struct pt_so_emit *emit,
emit->generated_primitives = 0;
emit->input_vertex_stride = input_verts->stride;
if (emit->use_pre_clip_pos)
- emit->pre_clip_pos = input_verts->verts->pre_clip_pos;
+ emit->pre_clip_pos = input_verts->verts->clip_pos;
emit->inputs = (const float (*)[4])input_verts->verts->data;
diff --git a/src/gallium/auxiliary/nir/tgsi_to_nir.c b/src/gallium/auxiliary/nir/tgsi_to_nir.c
index 5def6d3..2cb723c 100644
--- a/src/gallium/auxiliary/nir/tgsi_to_nir.c
+++ b/src/gallium/auxiliary/nir/tgsi_to_nir.c
@@ -22,10 +22,6 @@
* IN THE SOFTWARE.
*/
-#ifdef __GNUC__
-#pragma GCC diagnostic ignored "-Wdeclaration-after-statement"
-#endif
-
#include "util/ralloc.h"
#include "glsl/nir/nir.h"
#include "glsl/nir/nir_control_flow.h"
@@ -1069,7 +1065,9 @@ ttn_kill(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
static void
ttn_kill_if(nir_builder *b, nir_op op, nir_alu_dest dest, nir_ssa_def **src)
{
- nir_ssa_def *cmp = nir_bany4(b, nir_flt(b, src[0], nir_imm_float(b, 0.0)));
+ nir_ssa_def *cmp = nir_bany_inequal4(b, nir_flt(b, src[0],
+ nir_imm_float(b, 0.0)),
+ nir_imm_int(b, 0));
nir_intrinsic_instr *discard =
nir_intrinsic_instr_create(b->shader, nir_intrinsic_discard_if);
discard->src[0] = nir_src_for_ssa(cmp);
@@ -1901,6 +1899,7 @@ ttn_emit_instruction(struct ttn_compile *c)
&tgsi_dst->Indirect : NULL;
store->num_components = 4;
+ store->const_index[0] = 0xf;
store->variables[0] = ttn_array_deref(c, store, var, offset, indirect);
store->src[0] = nir_src_for_reg(dest.dest.reg.reg);
diff --git a/src/gallium/auxiliary/util/u_blitter.h b/src/gallium/auxiliary/util/u_blitter.h
index eab48c5..ed4a232 100644
--- a/src/gallium/auxiliary/util/u_blitter.h
+++ b/src/gallium/auxiliary/util/u_blitter.h
@@ -398,9 +398,8 @@ util_blitter_save_stencil_ref(struct blitter_context *blitter,
blitter->saved_stencil_ref = *state;
}
-static inline
-void util_blitter_save_rasterizer(struct blitter_context *blitter,
- void *state)
+static inline void
+util_blitter_save_rasterizer(struct blitter_context *blitter, void *state)
{
blitter->saved_rs_state = state;
}
@@ -459,8 +458,8 @@ util_blitter_save_scissor(struct blitter_context *blitter,
blitter->saved_scissor = *state;
}
-static inline
-void util_blitter_save_fragment_sampler_states(
+static inline void
+util_blitter_save_fragment_sampler_states(
struct blitter_context *blitter,
unsigned num_sampler_states,
void **sampler_states)
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h
index cda0f2e..fe1917f 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.h
+++ b/src/gallium/auxiliary/util/u_simple_shaders.h
@@ -31,6 +31,7 @@
#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
struct pipe_context;
diff --git a/src/gallium/drivers/ddebug/dd_draw.c b/src/gallium/drivers/ddebug/dd_draw.c
index b443c5b..0d7ee9a 100644
--- a/src/gallium/drivers/ddebug/dd_draw.c
+++ b/src/gallium/drivers/ddebug/dd_draw.c
@@ -588,8 +588,11 @@ dd_context_flush(struct pipe_context *_pipe,
static void
dd_before_draw(struct dd_context *dctx)
{
- if (dd_screen(dctx->base.screen)->mode == DD_DETECT_HANGS &&
- !dd_screen(dctx->base.screen)->no_flush)
+ struct dd_screen *dscreen = dd_screen(dctx->base.screen);
+
+ if (dscreen->mode == DD_DETECT_HANGS &&
+ !dscreen->no_flush &&
+ dctx->num_draw_calls >= dscreen->skip_count)
dd_flush_and_handle_hang(dctx, NULL, 0,
"GPU hang most likely caused by internal "
"driver commands");
@@ -598,22 +601,31 @@ dd_before_draw(struct dd_context *dctx)
static void
dd_after_draw(struct dd_context *dctx, struct dd_call *call)
{
- switch (dd_screen(dctx->base.screen)->mode) {
- case DD_DETECT_HANGS:
- if (!dd_screen(dctx->base.screen)->no_flush &&
- dd_flush_and_check_hang(dctx, NULL, 0)) {
- dd_dump_call(dctx, call, PIPE_DEBUG_DEVICE_IS_HUNG);
+ struct dd_screen *dscreen = dd_screen(dctx->base.screen);
- /* Terminate the process to prevent future hangs. */
- dd_kill_process();
+ if (dctx->num_draw_calls >= dscreen->skip_count) {
+ switch (dscreen->mode) {
+ case DD_DETECT_HANGS:
+ if (!dscreen->no_flush &&
+ dd_flush_and_check_hang(dctx, NULL, 0)) {
+ dd_dump_call(dctx, call, PIPE_DEBUG_DEVICE_IS_HUNG);
+
+ /* Terminate the process to prevent future hangs. */
+ dd_kill_process();
+ }
+ break;
+ case DD_DUMP_ALL_CALLS:
+ dd_dump_call(dctx, call, 0);
+ break;
+ default:
+ assert(0);
}
- break;
- case DD_DUMP_ALL_CALLS:
- dd_dump_call(dctx, call, 0);
- break;
- default:
- assert(0);
}
+
+ ++dctx->num_draw_calls;
+ if (dscreen->skip_count && dctx->num_draw_calls % 10000 == 0)
+ fprintf(stderr, "Gallium debugger reached %u draw calls.\n",
+ dctx->num_draw_calls);
}
static void
diff --git a/src/gallium/drivers/ddebug/dd_pipe.h b/src/gallium/drivers/ddebug/dd_pipe.h
index 34f5920..a045518 100644
--- a/src/gallium/drivers/ddebug/dd_pipe.h
+++ b/src/gallium/drivers/ddebug/dd_pipe.h
@@ -45,6 +45,7 @@ struct dd_screen
unsigned timeout_ms;
enum dd_mode mode;
bool no_flush;
+ unsigned skip_count;
};
struct dd_query
@@ -110,6 +111,8 @@ struct dd_context
struct pipe_scissor_state scissors[PIPE_MAX_VIEWPORTS];
struct pipe_viewport_state viewports[PIPE_MAX_VIEWPORTS];
float tess_default_levels[6];
+
+ unsigned num_draw_calls;
};
diff --git a/src/gallium/drivers/ddebug/dd_screen.c b/src/gallium/drivers/ddebug/dd_screen.c
index a776580..2716845 100644
--- a/src/gallium/drivers/ddebug/dd_screen.c
+++ b/src/gallium/drivers/ddebug/dd_screen.c
@@ -290,6 +290,9 @@ ddebug_screen_create(struct pipe_screen *screen)
puts(" $HOME/"DD_DIR"/ when a hang is detected.");
puts(" If 'noflush' is specified, only detect hangs in pipe->flush.");
puts("");
+ puts(" GALLIUM_DDEBUG_SKIP=[count]");
+ puts(" Skip flush and hang detection for the given initial number of draw calls.");
+ puts("");
exit(0);
}
@@ -349,5 +352,11 @@ ddebug_screen_create(struct pipe_screen *screen)
assert(0);
}
+ dscreen->skip_count = debug_get_num_option("GALLIUM_DDEBUG_SKIP", 0);
+ if (dscreen->skip_count > 0) {
+ fprintf(stderr, "Gallium debugger skipping the first %u draw calls.\n",
+ dscreen->skip_count);
+ }
+
return &dscreen->base;
}
diff --git a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
index 77f708f..d231113 100644
--- a/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
+++ b/src/gallium/drivers/freedreno/a2xx/a2xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
diff --git a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
index a6940df..c4f253b 100644
--- a/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
+++ b/src/gallium/drivers/freedreno/a3xx/a3xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -1421,15 +1421,23 @@ static inline uint32_t A3XX_PC_PRIM_VTX_CNTL_POLYMODE_BACK_PTYPE(enum adreno_pa_
#define REG_A3XX_PC_RESTART_INDEX 0x000021ed
#define REG_A3XX_HLSQ_CONTROL_0_REG 0x00002200
-#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000010
+#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK 0x00000030
#define A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT 4
static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE(enum a3xx_threadsize val)
{
return ((val) << A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_FSTHREADSIZE__MASK;
}
#define A3XX_HLSQ_CONTROL_0_REG_FSSUPERTHREADENABLE 0x00000040
+#define A3XX_HLSQ_CONTROL_0_REG_COMPUTEMODE 0x00000100
#define A3XX_HLSQ_CONTROL_0_REG_SPSHADERRESTART 0x00000200
#define A3XX_HLSQ_CONTROL_0_REG_RESERVED2 0x00000400
+#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK 0x00fff000
+#define A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT 12
+static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__SHIFT) & A3XX_HLSQ_CONTROL_0_REG_CYCLETIMEOUTLIMITVPC__MASK;
+}
+#define A3XX_HLSQ_CONTROL_0_REG_FSONLYTEX 0x02000000
#define A3XX_HLSQ_CONTROL_0_REG_CHUNKDISABLE 0x04000000
#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__MASK 0x08000000
#define A3XX_HLSQ_CONTROL_0_REG_CONSTMODE__SHIFT 27
@@ -1443,17 +1451,39 @@ static inline uint32_t A3XX_HLSQ_CONTROL_0_REG_CONSTMODE(uint32_t val)
#define A3XX_HLSQ_CONTROL_0_REG_SINGLECONTEXT 0x80000000
#define REG_A3XX_HLSQ_CONTROL_1_REG 0x00002201
-#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x00000040
+#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK 0x000000c0
#define A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT 6
static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(enum a3xx_threadsize val)
{
return ((val) << A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE__MASK;
}
#define A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE 0x00000100
-#define A3XX_HLSQ_CONTROL_1_REG_RESERVED1 0x00000200
-#define A3XX_HLSQ_CONTROL_1_REG_ZWCOORD 0x02000000
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK 0x00ff0000
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT 16
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID__MASK;
+}
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK 0xff000000
+#define A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT 24
+static inline uint32_t A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__SHIFT) & A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID__MASK;
+}
#define REG_A3XX_HLSQ_CONTROL_2_REG 0x00002202
+#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK 0x000003fc
+#define A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT 2
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_FACENESSREGID__MASK;
+}
+#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK 0x03fc0000
+#define A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT 18
+static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID(uint32_t val)
+{
+ return ((val) << A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__SHIFT) & A3XX_HLSQ_CONTROL_2_REG_COVVALUEREGID__MASK;
+}
#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__MASK 0xfc000000
#define A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD__SHIFT 26
static inline uint32_t A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(uint32_t val)
@@ -1470,13 +1500,13 @@ static inline uint32_t A3XX_HLSQ_CONTROL_3_REG_REGID(uint32_t val)
}
#define REG_A3XX_HLSQ_VS_CONTROL_REG 0x00002204
-#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x00000fff
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK 0x000003ff
#define A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT 0
static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(uint32_t val)
{
return ((val) << A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH__MASK;
}
-#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x00fff000
+#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x001ff000
#define A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12
static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
{
@@ -1490,13 +1520,13 @@ static inline uint32_t A3XX_HLSQ_VS_CONTROL_REG_INSTRLENGTH(uint32_t val)
}
#define REG_A3XX_HLSQ_FS_CONTROL_REG 0x00002205
-#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x00000fff
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK 0x000003ff
#define A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT 0
static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH(uint32_t val)
{
return ((val) << A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__SHIFT) & A3XX_HLSQ_FS_CONTROL_REG_CONSTLENGTH__MASK;
}
-#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x00fff000
+#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__MASK 0x001ff000
#define A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET__SHIFT 12
static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_CONSTSTARTOFFSET(uint32_t val)
{
@@ -1510,13 +1540,13 @@ static inline uint32_t A3XX_HLSQ_FS_CONTROL_REG_INSTRLENGTH(uint32_t val)
}
#define REG_A3XX_HLSQ_CONST_VSPRESV_RANGE_REG 0x00002206
-#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK 0x0000ffff
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK 0x000001ff
#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT 0
static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
{
return ((val) << A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_STARTENTRY__MASK;
}
-#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK 0xffff0000
+#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__MASK 0x01ff0000
#define A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY__SHIFT 16
static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
{
@@ -1524,13 +1554,13 @@ static inline uint32_t A3XX_HLSQ_CONST_VSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
}
#define REG_A3XX_HLSQ_CONST_FSPRESV_RANGE_REG 0x00002207
-#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK 0x0000ffff
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK 0x000001ff
#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT 0
static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY(uint32_t val)
{
return ((val) << A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__SHIFT) & A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_STARTENTRY__MASK;
}
-#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK 0xffff0000
+#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__MASK 0x01ff0000
#define A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY__SHIFT 16
static inline uint32_t A3XX_HLSQ_CONST_FSPRESV_RANGE_REG_ENDENTRY(uint32_t val)
{
@@ -2012,24 +2042,19 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffe
return ((val) << A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_VS_CTRL_REG0_INSTRBUFFERMODE__MASK;
}
#define A3XX_SP_VS_CTRL_REG0_CACHEINVALID 0x00000004
+#define A3XX_SP_VS_CTRL_REG0_ALUSCHMODE 0x00000008
#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0
#define A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4
static inline uint32_t A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
{
return ((val) << A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
}
-#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00
+#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00
#define A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10
static inline uint32_t A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
{
return ((val) << A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
}
-#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000
-#define A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18
-static inline uint32_t A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
-{
- return ((val) << A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP__MASK;
-}
#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK 0x00100000
#define A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT 20
static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
@@ -2037,8 +2062,6 @@ static inline uint32_t A3XX_SP_VS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
return ((val) << A3XX_SP_VS_CTRL_REG0_THREADSIZE__SHIFT) & A3XX_SP_VS_CTRL_REG0_THREADSIZE__MASK;
}
#define A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE 0x00200000
-#define A3XX_SP_VS_CTRL_REG0_PIXLODENABLE 0x00400000
-#define A3XX_SP_VS_CTRL_REG0_COMPUTEMODE 0x00800000
#define A3XX_SP_VS_CTRL_REG0_LENGTH__MASK 0xff000000
#define A3XX_SP_VS_CTRL_REG0_LENGTH__SHIFT 24
static inline uint32_t A3XX_SP_VS_CTRL_REG0_LENGTH(uint32_t val)
@@ -2079,7 +2102,8 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_PSIZEREGID(uint32_t val)
{
return ((val) << A3XX_SP_VS_PARAM_REG_PSIZEREGID__SHIFT) & A3XX_SP_VS_PARAM_REG_PSIZEREGID__MASK;
}
-#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0xfff00000
+#define A3XX_SP_VS_PARAM_REG_POS2DMODE 0x00010000
+#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__MASK 0x01f00000
#define A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR__SHIFT 20
static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
{
@@ -2089,24 +2113,26 @@ static inline uint32_t A3XX_SP_VS_PARAM_REG_TOTALVSOUTVAR(uint32_t val)
static inline uint32_t REG_A3XX_SP_VS_OUT(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
static inline uint32_t REG_A3XX_SP_VS_OUT_REG(uint32_t i0) { return 0x000022c7 + 0x1*i0; }
-#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000001ff
+#define A3XX_SP_VS_OUT_REG_A_REGID__MASK 0x000000ff
#define A3XX_SP_VS_OUT_REG_A_REGID__SHIFT 0
static inline uint32_t A3XX_SP_VS_OUT_REG_A_REGID(uint32_t val)
{
return ((val) << A3XX_SP_VS_OUT_REG_A_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_A_REGID__MASK;
}
+#define A3XX_SP_VS_OUT_REG_A_HALF 0x00000100
#define A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK 0x00001e00
#define A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT 9
static inline uint32_t A3XX_SP_VS_OUT_REG_A_COMPMASK(uint32_t val)
{
return ((val) << A3XX_SP_VS_OUT_REG_A_COMPMASK__SHIFT) & A3XX_SP_VS_OUT_REG_A_COMPMASK__MASK;
}
-#define A3XX_SP_VS_OUT_REG_B_REGID__MASK 0x01ff0000
+#define A3XX_SP_VS_OUT_REG_B_REGID__MASK 0x00ff0000
#define A3XX_SP_VS_OUT_REG_B_REGID__SHIFT 16
static inline uint32_t A3XX_SP_VS_OUT_REG_B_REGID(uint32_t val)
{
return ((val) << A3XX_SP_VS_OUT_REG_B_REGID__SHIFT) & A3XX_SP_VS_OUT_REG_B_REGID__MASK;
}
+#define A3XX_SP_VS_OUT_REG_B_HALF 0x01000000
#define A3XX_SP_VS_OUT_REG_B_COMPMASK__MASK 0x1e000000
#define A3XX_SP_VS_OUT_REG_B_COMPMASK__SHIFT 25
static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
@@ -2117,25 +2143,25 @@ static inline uint32_t A3XX_SP_VS_OUT_REG_B_COMPMASK(uint32_t val)
static inline uint32_t REG_A3XX_SP_VS_VPC_DST(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
static inline uint32_t REG_A3XX_SP_VS_VPC_DST_REG(uint32_t i0) { return 0x000022d0 + 0x1*i0; }
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x000000ff
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK 0x0000007f
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT 0
static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC0(uint32_t val)
{
return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC0__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC0__MASK;
}
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x0000ff00
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK 0x00007f00
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT 8
static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC1(uint32_t val)
{
return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC1__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC1__MASK;
}
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x00ff0000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK 0x007f0000
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT 16
static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC2(uint32_t val)
{
return ((val) << A3XX_SP_VS_VPC_DST_REG_OUTLOC2__SHIFT) & A3XX_SP_VS_VPC_DST_REG_OUTLOC2__MASK;
}
-#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0xff000000
+#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__MASK 0x7f000000
#define A3XX_SP_VS_VPC_DST_REG_OUTLOC3__SHIFT 24
static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
{
@@ -2143,6 +2169,12 @@ static inline uint32_t A3XX_SP_VS_VPC_DST_REG_OUTLOC3(uint32_t val)
}
#define REG_A3XX_SP_VS_OBJ_OFFSET_REG 0x000022d4
+#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK 0x0000ffff
+#define A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT 0
+static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_VS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK;
+}
#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
#define A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
@@ -2159,8 +2191,38 @@ static inline uint32_t A3XX_SP_VS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
#define REG_A3XX_SP_VS_OBJ_START_REG 0x000022d5
#define REG_A3XX_SP_VS_PVT_MEM_PARAM_REG 0x000022d6
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK 0x000000ff
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT 0
+static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK;
+}
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK 0x00ffff00
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT 8
+static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK;
+}
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK 0xff000000
+#define A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT 24
+static inline uint32_t A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_VS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK;
+}
#define REG_A3XX_SP_VS_PVT_MEM_ADDR_REG 0x000022d7
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK 0x0000001f
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT 0
+static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val)
+{
+ return ((val) << A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_BURSTLEN__MASK;
+}
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK 0xffffffe0
+#define A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5
+static inline uint32_t A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val)
+{
+ return ((val >> 5) << A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_VS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK;
+}
#define REG_A3XX_SP_VS_PVT_MEM_SIZE_REG 0x000022d8
@@ -2186,24 +2248,22 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE(enum a3xx_instrbuffe
return ((val) << A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__SHIFT) & A3XX_SP_FS_CTRL_REG0_INSTRBUFFERMODE__MASK;
}
#define A3XX_SP_FS_CTRL_REG0_CACHEINVALID 0x00000004
+#define A3XX_SP_FS_CTRL_REG0_ALUSCHMODE 0x00000008
#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK 0x000003f0
#define A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT 4
static inline uint32_t A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(uint32_t val)
{
return ((val) << A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT__MASK;
}
-#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0003fc00
+#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK 0x0000fc00
#define A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT 10
static inline uint32_t A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(uint32_t val)
{
return ((val) << A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__SHIFT) & A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT__MASK;
}
-#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK 0x000c0000
-#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT 18
-static inline uint32_t A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(uint32_t val)
-{
- return ((val) << A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__SHIFT) & A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP__MASK;
-}
+#define A3XX_SP_FS_CTRL_REG0_FSBYPASSENABLE 0x00020000
+#define A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP 0x00040000
+#define A3XX_SP_FS_CTRL_REG0_OUTORDERED 0x00080000
#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__MASK 0x00100000
#define A3XX_SP_FS_CTRL_REG0_THREADSIZE__SHIFT 20
static inline uint32_t A3XX_SP_FS_CTRL_REG0_THREADSIZE(enum a3xx_threadsize val)
@@ -2239,7 +2299,7 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING(uint32_t val)
{
return ((val) << A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__SHIFT) & A3XX_SP_FS_CTRL_REG1_INITIALOUTSTANDING__MASK;
}
-#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK 0x3f000000
+#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__MASK 0x7f000000
#define A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET__SHIFT 24
static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
{
@@ -2247,6 +2307,12 @@ static inline uint32_t A3XX_SP_FS_CTRL_REG1_HALFPRECVAROFFSET(uint32_t val)
}
#define REG_A3XX_SP_FS_OBJ_OFFSET_REG 0x000022e2
+#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK 0x0000ffff
+#define A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT 0
+static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__SHIFT) & A3XX_SP_FS_OBJ_OFFSET_REG_FIRSTEXECINSTROFFSET__MASK;
+}
#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__MASK 0x01ff0000
#define A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET__SHIFT 16
static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_CONSTOBJECTOFFSET(uint32_t val)
@@ -2263,8 +2329,38 @@ static inline uint32_t A3XX_SP_FS_OBJ_OFFSET_REG_SHADEROBJOFFSET(uint32_t val)
#define REG_A3XX_SP_FS_OBJ_START_REG 0x000022e3
#define REG_A3XX_SP_FS_PVT_MEM_PARAM_REG 0x000022e4
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK 0x000000ff
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT 0
+static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_MEMSIZEPERITEM__MASK;
+}
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK 0x00ffff00
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT 8
+static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKOFFSET__MASK;
+}
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK 0xff000000
+#define A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT 24
+static inline uint32_t A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__SHIFT) & A3XX_SP_FS_PVT_MEM_PARAM_REG_HWSTACKSIZEPERTHREAD__MASK;
+}
#define REG_A3XX_SP_FS_PVT_MEM_ADDR_REG 0x000022e5
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK 0x0000001f
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT 0
+static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN(uint32_t val)
+{
+ return ((val) << A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_BURSTLEN__MASK;
+}
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK 0xffffffe0
+#define A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT 5
+static inline uint32_t A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS(uint32_t val)
+{
+ return ((val >> 5) << A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__SHIFT) & A3XX_SP_FS_PVT_MEM_ADDR_REG_SHADERSTARTADDRESS__MASK;
+}
#define REG_A3XX_SP_FS_PVT_MEM_SIZE_REG 0x000022e6
diff --git a/src/gallium/drivers/freedreno/a3xx/fd3_program.c b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
index 7361516..a64ecf1 100644
--- a/src/gallium/drivers/freedreno/a3xx/fd3_program.c
+++ b/src/gallium/drivers/freedreno/a3xx/fd3_program.c
@@ -229,7 +229,8 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
A3XX_HLSQ_CONTROL_0_REG_SPCONSTFULLUPDATE);
OUT_RING(ring, A3XX_HLSQ_CONTROL_1_REG_VSTHREADSIZE(TWO_QUADS) |
A3XX_HLSQ_CONTROL_1_REG_VSSUPERTHREADENABLE |
- COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_ZWCOORD));
+ COND(fp->frag_coord, A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDXYREGID(regid(0,0)) |
+ A3XX_HLSQ_CONTROL_1_REG_FRAGCOORDZWREGID(regid(0,2))));
OUT_RING(ring, A3XX_HLSQ_CONTROL_2_REG_PRIMALLOCTHRESHOLD(31));
OUT_RING(ring, A3XX_HLSQ_CONTROL_3_REG_REGID(fp->pos_regid));
OUT_RING(ring, A3XX_HLSQ_VS_CONTROL_REG_CONSTLENGTH(vp->constlen) |
@@ -254,10 +255,8 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
COND(vpbuffer == CACHE, A3XX_SP_VS_CTRL_REG0_CACHEINVALID) |
A3XX_SP_VS_CTRL_REG0_HALFREGFOOTPRINT(vsi->max_half_reg + 1) |
A3XX_SP_VS_CTRL_REG0_FULLREGFOOTPRINT(vsi->max_reg + 1) |
- A3XX_SP_VS_CTRL_REG0_INOUTREGOVERLAP(0) |
A3XX_SP_VS_CTRL_REG0_THREADSIZE(TWO_QUADS) |
A3XX_SP_VS_CTRL_REG0_SUPERTHREADMODE |
- COND(vp->has_samp, A3XX_SP_VS_CTRL_REG0_PIXLODENABLE) |
A3XX_SP_VS_CTRL_REG0_LENGTH(vpbuffersz));
OUT_RING(ring, A3XX_SP_VS_CTRL_REG1_CONSTLENGTH(vp->constlen) |
A3XX_SP_VS_CTRL_REG1_INITIALOUTSTANDING(vp->total_in) |
@@ -336,7 +335,7 @@ fd3_program_emit(struct fd_ringbuffer *ring, struct fd3_emit *emit,
COND(fpbuffer == CACHE, A3XX_SP_FS_CTRL_REG0_CACHEINVALID) |
A3XX_SP_FS_CTRL_REG0_HALFREGFOOTPRINT(fsi->max_half_reg + 1) |
A3XX_SP_FS_CTRL_REG0_FULLREGFOOTPRINT(fsi->max_reg + 1) |
- A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP(1) |
+ A3XX_SP_FS_CTRL_REG0_INOUTREGOVERLAP |
A3XX_SP_FS_CTRL_REG0_THREADSIZE(FOUR_QUADS) |
A3XX_SP_FS_CTRL_REG0_SUPERTHREADMODE |
COND(fp->has_samp > 0, A3XX_SP_FS_CTRL_REG0_PIXLODENABLE) |
diff --git a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
index a450379..e8df429 100644
--- a/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
+++ b/src/gallium/drivers/freedreno/a4xx/a4xx.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -157,58 +157,62 @@ enum a4xx_vtx_fmt {
VFMT4_10_10_10_2_UNORM = 57,
VFMT4_10_10_10_2_SINT = 58,
VFMT4_10_10_10_2_SNORM = 59,
+ VFMT4_2_10_10_10_UINT = 60,
+ VFMT4_2_10_10_10_UNORM = 61,
+ VFMT4_2_10_10_10_SINT = 62,
+ VFMT4_2_10_10_10_SNORM = 63,
};
enum a4xx_tex_fmt {
- TFMT4_5_6_5_UNORM = 11,
- TFMT4_5_5_5_1_UNORM = 9,
- TFMT4_4_4_4_4_UNORM = 8,
- TFMT4_X8Z24_UNORM = 71,
- TFMT4_10_10_10_2_UNORM = 33,
- TFMT4_10_10_10_2_UINT = 34,
TFMT4_A8_UNORM = 3,
- TFMT4_L8_A8_UNORM = 13,
TFMT4_8_UNORM = 4,
- TFMT4_8_8_UNORM = 14,
- TFMT4_8_8_8_8_UNORM = 28,
TFMT4_8_SNORM = 5,
- TFMT4_8_8_SNORM = 15,
- TFMT4_8_8_8_8_SNORM = 29,
TFMT4_8_UINT = 6,
- TFMT4_8_8_UINT = 16,
- TFMT4_8_8_8_8_UINT = 30,
TFMT4_8_SINT = 7,
+ TFMT4_4_4_4_4_UNORM = 8,
+ TFMT4_5_5_5_1_UNORM = 9,
+ TFMT4_5_6_5_UNORM = 11,
+ TFMT4_L8_A8_UNORM = 13,
+ TFMT4_8_8_UNORM = 14,
+ TFMT4_8_8_SNORM = 15,
+ TFMT4_8_8_UINT = 16,
TFMT4_8_8_SINT = 17,
- TFMT4_8_8_8_8_SINT = 31,
TFMT4_16_UNORM = 18,
- TFMT4_16_16_UNORM = 38,
- TFMT4_16_16_16_16_UNORM = 51,
TFMT4_16_SNORM = 19,
- TFMT4_16_16_SNORM = 39,
- TFMT4_16_16_16_16_SNORM = 52,
+ TFMT4_16_FLOAT = 20,
TFMT4_16_UINT = 21,
- TFMT4_16_16_UINT = 41,
- TFMT4_16_16_16_16_UINT = 54,
TFMT4_16_SINT = 22,
+ TFMT4_8_8_8_8_UNORM = 28,
+ TFMT4_8_8_8_8_SNORM = 29,
+ TFMT4_8_8_8_8_UINT = 30,
+ TFMT4_8_8_8_8_SINT = 31,
+ TFMT4_9_9_9_E5_FLOAT = 32,
+ TFMT4_10_10_10_2_UNORM = 33,
+ TFMT4_10_10_10_2_UINT = 34,
+ TFMT4_11_11_10_FLOAT = 37,
+ TFMT4_16_16_UNORM = 38,
+ TFMT4_16_16_SNORM = 39,
+ TFMT4_16_16_FLOAT = 40,
+ TFMT4_16_16_UINT = 41,
TFMT4_16_16_SINT = 42,
- TFMT4_16_16_16_16_SINT = 55,
+ TFMT4_32_FLOAT = 43,
TFMT4_32_UINT = 44,
- TFMT4_32_32_UINT = 57,
- TFMT4_32_32_32_32_UINT = 64,
TFMT4_32_SINT = 45,
- TFMT4_32_32_SINT = 58,
- TFMT4_32_32_32_32_SINT = 65,
- TFMT4_16_FLOAT = 20,
- TFMT4_16_16_FLOAT = 40,
+ TFMT4_16_16_16_16_UNORM = 51,
+ TFMT4_16_16_16_16_SNORM = 52,
TFMT4_16_16_16_16_FLOAT = 53,
- TFMT4_32_FLOAT = 43,
+ TFMT4_16_16_16_16_UINT = 54,
+ TFMT4_16_16_16_16_SINT = 55,
TFMT4_32_32_FLOAT = 56,
- TFMT4_32_32_32_32_FLOAT = 63,
+ TFMT4_32_32_UINT = 57,
+ TFMT4_32_32_SINT = 58,
TFMT4_32_32_32_FLOAT = 59,
TFMT4_32_32_32_UINT = 60,
TFMT4_32_32_32_SINT = 61,
- TFMT4_9_9_9_E5_FLOAT = 32,
- TFMT4_11_11_10_FLOAT = 37,
+ TFMT4_32_32_32_32_FLOAT = 63,
+ TFMT4_32_32_32_32_UINT = 64,
+ TFMT4_32_32_32_32_SINT = 65,
+ TFMT4_X8Z24_UNORM = 71,
TFMT4_DXT1 = 86,
TFMT4_DXT3 = 87,
TFMT4_DXT5 = 88,
@@ -800,6 +804,7 @@ static inline uint32_t A4XX_RB_DEPTH_CONTROL_ZFUNC(enum adreno_compare_func val)
}
#define A4XX_RB_DEPTH_CONTROL_BF_ENABLE 0x00000080
#define A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE 0x00010000
+#define A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS 0x00020000
#define A4XX_RB_DEPTH_CONTROL_Z_TEST_ENABLE 0x80000000
#define REG_A4XX_RB_DEPTH_CLEAR 0x00002102
@@ -1060,6 +1065,9 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_TP_REG(uint32_t i0) { return 0x
#define REG_A4XX_RBBM_CFG_DEBBUS_SEL_D 0x0000004d
+#define REG_A4XX_RBBM_POWER_CNTL_IP 0x00000098
+#define A4XX_RBBM_POWER_CNTL_IP_SW_COLLAPSE 0x00000001
+
#define REG_A4XX_RBBM_PERFCTR_CP_0_LO 0x0000009c
static inline uint32_t REG_A4XX_RBBM_CLOCK_CTL_SP(uint32_t i0) { return 0x00000068 + 0x1*i0; }
@@ -1110,6 +1118,10 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1(uint32_t i0) { r
static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0) { return 0x0000008e + 0x1*i0; }
+#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_0 0x00000099
+
+#define REG_A4XX_RBBM_SP_REGFILE_SLEEP_CNTL_1 0x0000009a
+
#define REG_A4XX_RBBM_PERFCTR_PWR_1_LO 0x00000168
#define REG_A4XX_RBBM_PERFCTR_CTL 0x00000170
@@ -1163,6 +1175,11 @@ static inline uint32_t REG_A4XX_RBBM_CLOCK_DELAY_RB_MARB_CCU_L1_REG(uint32_t i0)
#define REG_A4XX_RBBM_INTERFACE_RRDY_STATUS5 0x0000019f
+#define REG_A4XX_RBBM_POWER_STATUS 0x000001b0
+#define A4XX_RBBM_POWER_STATUS_SP_TP_PWR_ON 0x00100000
+
+#define REG_A4XX_RBBM_WAIT_IDLE_CLOCKS_CTL2 0x000001b8
+
#define REG_A4XX_CP_SCRATCH_UMASK 0x00000228
#define REG_A4XX_CP_SCRATCH_ADDR 0x00000229
@@ -1265,6 +1282,28 @@ static inline uint32_t REG_A4XX_CP_SCRATCH_REG(uint32_t i0) { return 0x00000578
#define REG_A4XX_SP_MODE_CONTROL 0x00000ec3
+#define REG_A4XX_SP_PERFCTR_SP_SEL_0 0x00000ec4
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_1 0x00000ec5
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_2 0x00000ec6
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_3 0x00000ec7
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_4 0x00000ec8
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_5 0x00000ec9
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_6 0x00000eca
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_7 0x00000ecb
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_8 0x00000ecc
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_9 0x00000ecd
+
+#define REG_A4XX_SP_PERFCTR_SP_SEL_10 0x00000ece
+
#define REG_A4XX_SP_PERFCTR_SP_SEL_11 0x00000ecf
#define REG_A4XX_SP_SP_CTRL_REG 0x000022c0
@@ -2180,6 +2219,7 @@ static inline uint32_t A4XX_GRAS_SU_POINT_SIZE(float val)
#define REG_A4XX_GRAS_ALPHA_CONTROL 0x00002073
#define A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE 0x00000004
+#define A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS 0x00000008
#define REG_A4XX_GRAS_SU_POLY_OFFSET_SCALE 0x00002074
#define A4XX_GRAS_SU_POLY_OFFSET_SCALE__MASK 0xffffffff
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
index f220fc7..b9a2814 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_emit.c
@@ -529,14 +529,16 @@ fd4_emit_state(struct fd_context *ctx, struct fd_ringbuffer *ring,
OUT_PKT0(ring, REG_A4XX_RB_DEPTH_CONTROL, 1);
OUT_RING(ring, zsa->rb_depth_control |
- COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE));
+ COND(fragz, A4XX_RB_DEPTH_CONTROL_EARLY_Z_DISABLE) |
+ COND(fragz && fp->frag_coord, A4XX_RB_DEPTH_CONTROL_FORCE_FRAGZ_TO_FS));
/* maybe this register/bitfield needs a better name.. this
* appears to be just disabling early-z
*/
OUT_PKT0(ring, REG_A4XX_GRAS_ALPHA_CONTROL, 1);
OUT_RING(ring, zsa->gras_alpha_control |
- COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE));
+ COND(fragz, A4XX_GRAS_ALPHA_CONTROL_ALPHA_TEST_ENABLE) |
+ COND(fragz && fp->frag_coord, A4XX_GRAS_ALPHA_CONTROL_FORCE_FRAGZ_TO_FS));
}
if (dirty & FD_DIRTY_RASTERIZER) {
diff --git a/src/gallium/drivers/freedreno/a4xx/fd4_program.c b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
index 0e861b9..32b8fce 100644
--- a/src/gallium/drivers/freedreno/a4xx/fd4_program.c
+++ b/src/gallium/drivers/freedreno/a4xx/fd4_program.c
@@ -420,9 +420,7 @@ fd4_program_emit(struct fd_ringbuffer *ring, struct fd4_emit *emit,
COND(s[FS].v->frag_face, A4XX_RB_RENDER_CONTROL2_FACENESS) |
COND(s[FS].v->frag_coord, A4XX_RB_RENDER_CONTROL2_XCOORD |
A4XX_RB_RENDER_CONTROL2_YCOORD |
-// TODO enabling gl_FragCoord.z is causing lockups on 0ad (but seems
-// to work everywhere else).
-// A4XX_RB_RENDER_CONTROL2_ZCOORD |
+ A4XX_RB_RENDER_CONTROL2_ZCOORD |
A4XX_RB_RENDER_CONTROL2_WCOORD));
OUT_PKT0(ring, REG_A4XX_RB_FS_OUTPUT_REG, 1);
diff --git a/src/gallium/drivers/freedreno/adreno_common.xml.h b/src/gallium/drivers/freedreno/adreno_common.xml.h
index 0e0f0e6..f9c0e6a 100644
--- a/src/gallium/drivers/freedreno/adreno_common.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_common.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
diff --git a/src/gallium/drivers/freedreno/adreno_pm4.xml.h b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
index 4aabc08..c674189 100644
--- a/src/gallium/drivers/freedreno/adreno_pm4.xml.h
+++ b/src/gallium/drivers/freedreno/adreno_pm4.xml.h
@@ -11,10 +11,10 @@ The rules-ng-ng source files this header was generated from are:
- /home/robclark/src/freedreno/envytools/rnndb/adreno.xml ( 398 bytes, from 2015-09-24 17:25:31)
- /home/robclark/src/freedreno/envytools/rnndb/freedreno_copyright.xml ( 1453 bytes, from 2015-05-20 20:03:07)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/a2xx.xml ( 32901 bytes, from 2015-05-20 20:03:14)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 10755 bytes, from 2015-09-14 20:46:55)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 14968 bytes, from 2015-05-20 20:12:27)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 68291 bytes, from 2015-11-17 16:39:59)
-- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 64038 bytes, from 2015-11-17 16:37:36)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_common.xml ( 11518 bytes, from 2015-11-24 14:39:00)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/adreno_pm4.xml ( 15149 bytes, from 2015-11-20 16:22:25)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a3xx.xml ( 69600 bytes, from 2015-11-24 14:39:00)
+- /home/robclark/src/freedreno/envytools/rnndb/adreno/a4xx.xml ( 67220 bytes, from 2015-12-13 17:58:09)
- /home/robclark/src/freedreno/envytools/rnndb/adreno/ocmem.xml ( 1773 bytes, from 2015-09-24 17:30:00)
Copyright (C) 2013-2015 by the following authors:
@@ -199,7 +199,11 @@ enum adreno_state_type {
enum adreno_state_src {
SS_DIRECT = 0,
+ SS_INVALID_ALL_IC = 2,
+ SS_INVALID_PART_IC = 3,
SS_INDIRECT = 4,
+ SS_INDIRECT_TCM = 5,
+ SS_INDIRECT_STM = 6,
};
enum a4xx_index_size {
@@ -227,7 +231,7 @@ static inline uint32_t CP_LOAD_STATE_0_STATE_BLOCK(enum adreno_state_block val)
{
return ((val) << CP_LOAD_STATE_0_STATE_BLOCK__SHIFT) & CP_LOAD_STATE_0_STATE_BLOCK__MASK;
}
-#define CP_LOAD_STATE_0_NUM_UNIT__MASK 0x7fc00000
+#define CP_LOAD_STATE_0_NUM_UNIT__MASK 0xffc00000
#define CP_LOAD_STATE_0_NUM_UNIT__SHIFT 22
static inline uint32_t CP_LOAD_STATE_0_NUM_UNIT(uint32_t val)
{
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
index e768e61..d55daee 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_cmdline.c
@@ -114,8 +114,6 @@ int main(int argc, char **argv)
void *ptr;
size_t size;
- fd_mesa_debug |= FD_DBG_DISASM;
-
memset(&s, 0, sizeof(s));
memset(&v, 0, sizeof(v));
@@ -128,7 +126,7 @@ int main(int argc, char **argv)
while (n < argc) {
if (!strcmp(argv[n], "--verbose")) {
- fd_mesa_debug |= FD_DBG_MSGS | FD_DBG_OPTMSGS;
+ fd_mesa_debug |= FD_DBG_MSGS | FD_DBG_OPTMSGS | FD_DBG_DISASM;
n++;
continue;
}
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
index eea5c5e..44c74b8 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_compiler_nir.c
@@ -1264,7 +1264,7 @@ emit_intrinsic_load_ubo(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
/* handles array reads: */
static void
-emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
+emit_intrinsic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
struct ir3_instruction **dst)
{
nir_deref_var *dvar = intr->variables[0];
@@ -1305,7 +1305,7 @@ emit_intrinisic_load_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr,
/* handles array writes: */
static void
-emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
+emit_intrinsic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
{
nir_deref_var *dvar = intr->variables[0];
nir_deref_array *darr = nir_deref_as_array(dvar->deref.child);
@@ -1321,6 +1321,10 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
case nir_deref_array_type_direct:
/* direct access does not require anything special: */
for (int i = 0; i < intr->num_components; i++) {
+ /* ttn doesn't generate partial writemasks */
+ assert(intr->const_index[0] ==
+ (1 << intr->num_components) - 1);
+
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
arr->arr[n] = src[i];
@@ -1333,6 +1337,10 @@ emit_intrinisic_store_var(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
struct ir3_instruction *addr =
get_addr(ctx, get_src(ctx, &darr->indirect)[0]);
for (int i = 0; i < intr->num_components; i++) {
+ /* ttn doesn't generate partial writemasks */
+ assert(intr->const_index[0] ==
+ (1 << intr->num_components) - 1);
+
struct ir3_instruction *store;
unsigned n = darr->base_offset * 4 + i;
compile_assert(ctx, n < arr->length);
@@ -1392,7 +1400,7 @@ static void add_sysval_input(struct ir3_compile *ctx, gl_system_value slot,
}
static void
-emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
+emit_intrinsic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
{
const nir_intrinsic_info *info = &nir_intrinsic_infos[intr->intrinsic];
struct ir3_instruction **dst, **src;
@@ -1454,10 +1462,10 @@ emit_intrinisic(struct ir3_compile *ctx, nir_intrinsic_instr *intr)
}
break;
case nir_intrinsic_load_var:
- emit_intrinisic_load_var(ctx, intr, dst);
+ emit_intrinsic_load_var(ctx, intr, dst);
break;
case nir_intrinsic_store_var:
- emit_intrinisic_store_var(ctx, intr);
+ emit_intrinsic_store_var(ctx, intr);
break;
case nir_intrinsic_store_output:
const_offset = nir_src_as_const_value(intr->src[1]);
@@ -1927,7 +1935,7 @@ emit_instr(struct ir3_compile *ctx, nir_instr *instr)
emit_alu(ctx, nir_instr_as_alu(instr));
break;
case nir_instr_type_intrinsic:
- emit_intrinisic(ctx, nir_instr_as_intrinsic(instr));
+ emit_intrinsic(ctx, nir_instr_as_intrinsic(instr));
break;
case nir_instr_type_load_const:
emit_load_const(ctx, nir_instr_as_load_const(instr));
diff --git a/src/gallium/drivers/freedreno/ir3/ir3_print.c b/src/gallium/drivers/freedreno/ir3/ir3_print.c
index 07e03d2..a84e798 100644
--- a/src/gallium/drivers/freedreno/ir3/ir3_print.c
+++ b/src/gallium/drivers/freedreno/ir3/ir3_print.c
@@ -143,7 +143,7 @@ block_id(struct ir3_block *block)
#ifdef DEBUG
return block->serialno;
#else
- return (uint32_t)(uint64_t)block;
+ return (uint32_t)(unsigned long)block;
#endif
}
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_interp.h b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
index 9029d2a..0a52642 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_interp.h
+++ b/src/gallium/drivers/llvmpipe/lp_bld_interp.h
@@ -63,7 +63,8 @@ enum lp_interp {
LP_INTERP_LINEAR,
LP_INTERP_PERSPECTIVE,
LP_INTERP_POSITION,
- LP_INTERP_FACING
+ LP_INTERP_FACING,
+ LP_INTERP_ZERO
};
struct lp_shader_input {
diff --git a/src/gallium/drivers/llvmpipe/lp_context.h b/src/gallium/drivers/llvmpipe/lp_context.h
index c9a5d67..9dcc102 100644
--- a/src/gallium/drivers/llvmpipe/lp_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_context.h
@@ -108,22 +108,28 @@ struct llvmpipe_context {
struct vertex_info vertex_info;
/** Which vertex shader output slot contains color */
- int color_slot[2];
+ uint8_t color_slot[2];
/** Which vertex shader output slot contains bcolor */
- int bcolor_slot[2];
+ uint8_t bcolor_slot[2];
/** Which vertex shader output slot contains point size */
- int psize_slot;
+ uint8_t psize_slot;
/** Which vertex shader output slot contains viewport index */
- int viewport_index_slot;
+ uint8_t viewport_index_slot;
/** Which geometry shader output slot contains layer */
- int layer_slot;
+ uint8_t layer_slot;
/** A fake frontface output for unfilled primitives */
- int face_slot;
+ uint8_t face_slot;
+
+ /** Which output slot is used for the fake vp index info */
+ uint8_t fake_vpindex_slot;
+
+ /** Which output slot is used for the fake layer info */
+ uint8_t fake_layer_slot;
/** Depth format and bias settings. */
boolean floating_point_depth;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup.c b/src/gallium/drivers/llvmpipe/lp_setup.c
index 1778b13..ddbb88e 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup.c
@@ -1207,7 +1207,7 @@ lp_setup_update_state( struct lp_setup_context *setup,
/* Will probably need to move this somewhere else, just need
* to know about vertex shader point size attribute.
*/
- setup->psize = lp->psize_slot;
+ setup->psize_slot = lp->psize_slot;
setup->viewport_index_slot = lp->viewport_index_slot;
setup->layer_slot = lp->layer_slot;
setup->face_slot = lp->face_slot;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_context.h b/src/gallium/drivers/llvmpipe/lp_setup_context.h
index 2410e23..4451284 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_context.h
+++ b/src/gallium/drivers/llvmpipe/lp_setup_context.h
@@ -105,10 +105,10 @@ struct lp_setup_context
float pixel_offset;
float line_width;
float point_size;
- float psize;
- unsigned viewport_index_slot;
- unsigned layer_slot;
- int face_slot;
+ uint8_t psize_slot;
+ uint8_t viewport_index_slot;
+ uint8_t layer_slot;
+ uint8_t face_slot;
struct pipe_framebuffer_state fb;
struct u_rect framebuffer;
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_point.c b/src/gallium/drivers/llvmpipe/lp_setup_point.c
index 75544b5..14c389f 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_point.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_point.c
@@ -328,7 +328,7 @@ try_setup_point( struct lp_setup_context *setup,
struct llvmpipe_context *lp_context = (struct llvmpipe_context *)setup->pipe;
/* x/y positions in fixed point */
const struct lp_setup_variant_key *key = &setup->setup.variant->key;
- const int sizeAttr = setup->psize;
+ const int sizeAttr = setup->psize_slot;
const float size
= (setup->point_size_per_vertex && sizeAttr > 0) ? v0[sizeAttr][0]
: setup->point_size;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_derived.c b/src/gallium/drivers/llvmpipe/lp_state_derived.c
index a25e832..f5bcfb2 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_derived.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_derived.c
@@ -55,10 +55,14 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
draw_prepare_shader_outputs(llvmpipe->draw);
- llvmpipe->color_slot[0] = -1;
- llvmpipe->color_slot[1] = -1;
- llvmpipe->bcolor_slot[0] = -1;
- llvmpipe->bcolor_slot[1] = -1;
+ llvmpipe->color_slot[0] = 0;
+ llvmpipe->color_slot[1] = 0;
+ llvmpipe->bcolor_slot[0] = 0;
+ llvmpipe->bcolor_slot[1] = 0;
+ llvmpipe->viewport_index_slot = 0;
+ llvmpipe->layer_slot = 0;
+ llvmpipe->face_slot = 0;
+ llvmpipe->psize_slot = 0;
/*
* Match FS inputs against VS outputs, emitting the necessary
@@ -86,7 +90,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_COLOR &&
lpfs->info.base.input_semantic_index[i] < 2) {
int idx = lpfs->info.base.input_semantic_index[i];
- llvmpipe->color_slot[idx] = (int)vinfo->num_attribs;
+ llvmpipe->color_slot[idx] = vinfo->num_attribs;
}
if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_FACE) {
@@ -94,6 +98,30 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
} else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_PRIMID) {
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ /*
+ * For vp index and layer, if the fs requires them but the vs doesn't
+ * provide them, store the slot - we'll later replace the data directly
+ * with zero (as required by ARB_fragment_layer_viewport). This is
+ * because draw itself just redirects them to whatever was at output 0.
+ * We'll also store the real vpindex/layer slot for setup use.
+ */
+ } else if (lpfs->info.base.input_semantic_name[i] ==
+ TGSI_SEMANTIC_VIEWPORT_INDEX) {
+ if (vs_index >= 0) {
+ llvmpipe->viewport_index_slot = vinfo->num_attribs;
+ }
+ else {
+ llvmpipe->fake_vpindex_slot = vinfo->num_attribs;
+ }
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ } else if (lpfs->info.base.input_semantic_name[i] == TGSI_SEMANTIC_LAYER) {
+ if (vs_index >= 0) {
+ llvmpipe->layer_slot = vinfo->num_attribs;
+ }
+ else {
+ llvmpipe->fake_layer_slot = vinfo->num_attribs;
+ }
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
} else {
/*
* Emit the requested fs attribute for all but position.
@@ -101,6 +129,7 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
}
}
+
/* Figure out if we need bcolor as well.
*/
for (i = 0; i < 2; i++) {
@@ -108,12 +137,11 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
TGSI_SEMANTIC_BCOLOR, i);
if (vs_index >= 0) {
- llvmpipe->bcolor_slot[i] = (int)vinfo->num_attribs;
+ llvmpipe->bcolor_slot[i] = vinfo->num_attribs;
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, vs_index);
}
}
-
/* Figure out if we need pointsize as well.
*/
vs_index = draw_find_shader_output(llvmpipe->draw,
@@ -124,26 +152,26 @@ compute_vertex_info(struct llvmpipe_context *llvmpipe)
draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
}
- /* Figure out if we need viewport index */
- vs_index = draw_find_shader_output(llvmpipe->draw,
- TGSI_SEMANTIC_VIEWPORT_INDEX,
- 0);
- if (vs_index >= 0) {
- llvmpipe->viewport_index_slot = vinfo->num_attribs;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
- } else {
- llvmpipe->viewport_index_slot = 0;
+ /* Figure out if we need viewport index (if it wasn't already in fs input) */
+ if (llvmpipe->viewport_index_slot == 0) {
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ TGSI_SEMANTIC_VIEWPORT_INDEX,
+ 0);
+ if (vs_index >= 0) {
+ llvmpipe->viewport_index_slot = vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ }
}
- /* Figure out if we need layer */
- vs_index = draw_find_shader_output(llvmpipe->draw,
- TGSI_SEMANTIC_LAYER,
- 0);
- if (vs_index >= 0) {
- llvmpipe->layer_slot = vinfo->num_attribs;
- draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
- } else {
- llvmpipe->layer_slot = 0;
+ /* Figure out if we need layer (if it wasn't already in fs input) */
+ if (llvmpipe->layer_slot == 0) {
+ vs_index = draw_find_shader_output(llvmpipe->draw,
+ TGSI_SEMANTIC_LAYER,
+ 0);
+ if (vs_index >= 0) {
+ llvmpipe->layer_slot = vinfo->num_attribs;
+ draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, vs_index);
+ }
}
draw_compute_vertex_size(vinfo);
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.c b/src/gallium/drivers/llvmpipe/lp_state_setup.c
index 64215be..d7ba5c8 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.c
@@ -372,9 +372,9 @@ load_attribute(struct gallivm_state *gallivm,
/* Potentially modify it according to twoside, etc:
*/
if (key->twoside) {
- if (vert_attr == key->color_slot && key->bcolor_slot >= 0)
+ if (vert_attr == key->color_slot && key->bcolor_slot > 0)
lp_twoside(gallivm, args, key, key->bcolor_slot, attribv);
- else if (vert_attr == key->spec_slot && key->bspec_slot >= 0)
+ else if (vert_attr == key->spec_slot && key->bspec_slot > 0)
lp_twoside(gallivm, args, key, key->bspec_slot, attribv);
}
}
@@ -602,6 +602,13 @@ emit_tri_coef( struct gallivm_state *gallivm,
*/
break;
+ case LP_INTERP_ZERO:
+ /*
+ * The information we get from the output is bogus, replace it
+ * with zero.
+ */
+ emit_constant_coef4(gallivm, args, slot+1, args->bld.zero);
+ break;
case LP_INTERP_FACING:
emit_facing_coef(gallivm, args, slot+1);
break;
@@ -848,14 +855,10 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
key->size = Offset(struct lp_setup_variant_key,
inputs[key->num_inputs]);
- key->color_slot = lp->color_slot [0];
+ key->color_slot = lp->color_slot[0];
key->bcolor_slot = lp->bcolor_slot[0];
- key->spec_slot = lp->color_slot [1];
- key->bspec_slot = lp->bcolor_slot[1];
- assert(key->color_slot == lp->color_slot [0]);
- assert(key->bcolor_slot == lp->bcolor_slot[0]);
- assert(key->spec_slot == lp->color_slot [1]);
- assert(key->bspec_slot == lp->bcolor_slot[1]);
+ key->spec_slot = lp->color_slot[1];
+ key->bspec_slot = lp->bcolor_slot[1];
/*
* If depth is floating point, depth bias is calculated with respect
@@ -876,7 +879,13 @@ lp_make_setup_variant_key(struct llvmpipe_context *lp,
key->pad = 0;
memcpy(key->inputs, fs->inputs, key->num_inputs * sizeof key->inputs[0]);
for (i = 0; i < key->num_inputs; i++) {
- if (key->inputs[i].interp == LP_INTERP_COLOR) {
+ if (key->inputs[i].interp == LP_INTERP_CONSTANT) {
+ if (key->inputs[i].src_index == lp->fake_vpindex_slot ||
+ key->inputs[i].src_index == lp->fake_layer_slot) {
+ key->inputs[i].interp = LP_INTERP_ZERO;
+ }
+ }
+ else if (key->inputs[i].interp == LP_INTERP_COLOR) {
if (lp->rasterizer->flatshade)
key->inputs[i].interp = LP_INTERP_CONSTANT;
else
diff --git a/src/gallium/drivers/llvmpipe/lp_state_setup.h b/src/gallium/drivers/llvmpipe/lp_state_setup.h
index 82af835..6cee6fe 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_setup.h
+++ b/src/gallium/drivers/llvmpipe/lp_state_setup.h
@@ -17,11 +17,10 @@ struct lp_setup_variant_list_item
struct lp_setup_variant_key {
unsigned size:16;
unsigned num_inputs:8;
- int color_slot:8;
-
- int bcolor_slot:8;
- int spec_slot:8;
- int bspec_slot:8;
+ unsigned color_slot:8;
+ unsigned bcolor_slot:8;
+ unsigned spec_slot:8;
+ unsigned bspec_slot:8;
unsigned flatshade_first:1;
unsigned pixel_center_half:1;
unsigned twoside:1;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
index 216e119..c126c08 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_emit_nv50.cpp
@@ -1124,12 +1124,15 @@ CodeEmitterNV50::emitIMUL(const Instruction *i)
{
code[0] = 0x40000000;
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
+ if (i->sType == TYPE_S16)
+ code[0] |= 0x8100;
+ code[1] = 0;
+ emitForm_IMM(i);
+ } else
if (i->encSize == 8) {
code[1] = (i->sType == TYPE_S16) ? (0x8000 | 0x4000) : 0x0000;
- if (i->src(1).getFile() == FILE_IMMEDIATE)
- emitForm_IMM(i);
- else
- emitForm_MAD(i);
+ emitForm_MAD(i);
} else {
if (i->sType == TYPE_S16)
code[0] |= 0x8100;
@@ -1190,29 +1193,45 @@ CodeEmitterNV50::emitDMUL(const Instruction *i)
void
CodeEmitterNV50::emitIMAD(const Instruction *i)
{
+ int mode;
code[0] = 0x60000000;
- if (isSignedType(i->sType))
- code[1] = i->saturate ? 0x40000000 : 0x20000000;
- else
- code[1] = 0x00000000;
- int neg1 = i->src(0).mod.neg() ^ i->src(1).mod.neg();
- int neg2 = i->src(2).mod.neg();
-
- assert(!(neg1 & neg2));
- code[1] |= neg1 << 27;
- code[1] |= neg2 << 26;
+ assert(!i->src(0).mod && !i->src(1).mod && !i->src(2).mod);
+ if (!isSignedType(i->sType))
+ mode = 0;
+ else if (i->saturate)
+ mode = 2;
+ else
+ mode = 1;
- if (i->src(1).getFile() == FILE_IMMEDIATE)
+ if (i->src(1).getFile() == FILE_IMMEDIATE) {
+ code[1] = 0;
emitForm_IMM(i);
- else
+ code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
+ if (i->flagsSrc >= 0) {
+ assert(!(code[0] & 0x10400000));
+ assert(SDATA(i->src(i->flagsSrc)).id == 0);
+ code[0] |= 0x10400000;
+ }
+ } else
+ if (i->encSize == 4) {
+ emitForm_MUL(i);
+ code[0] |= (mode & 1) << 8 | (mode & 2) << 14;
+ if (i->flagsSrc >= 0) {
+ assert(!(code[0] & 0x10400000));
+ assert(SDATA(i->src(i->flagsSrc)).id == 0);
+ code[0] |= 0x10400000;
+ }
+ } else {
+ code[1] = mode << 29;
emitForm_MAD(i);
- if (i->flagsSrc >= 0) {
- // add with carry from $cX
- assert(!(code[1] & 0x0c000000) && !i->getPredicate());
- code[1] |= 0xc << 24;
- srcId(i->src(i->flagsSrc), 32 + 12);
+ if (i->flagsSrc >= 0) {
+ // add with carry from $cX
+ assert(!(code[1] & 0x0c000000) && !i->getPredicate());
+ code[1] |= 0xc << 24;
+ srcId(i->src(i->flagsSrc), 32 + 12);
+ }
}
}
@@ -2054,8 +2073,9 @@ CodeEmitterNV50::getMinEncodingSize(const Instruction *i) const
// check constraints on short MAD
if (info.srcNr >= 2 && i->srcExists(2)) {
- if (!i->defExists(0) || !isFloatType(i->dType) ||
- i->def(0).rep()->reg.data.id != i->src(2).rep()->reg.data.id)
+ if (!i->defExists(0) ||
+ (i->flagsSrc >= 0 && SDATA(i->src(i->flagsSrc)).id > 0) ||
+ DDATA(i->def(0)).id != SDATA(i->src(2)).id)
return 8;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
index 1d2caab..b233860 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_from_tgsi.cpp
@@ -1897,7 +1897,7 @@ Converter::handleTEX(Value *dst[4], int R, int S, int L, int C, int Dx, int Dy)
shd = fetchSrc(C >> 4, C & 3);
if (texi->op == OP_TXD) {
- for (c = 0; c < tgt.getDim(); ++c) {
+ for (c = 0; c < tgt.getDim() + tgt.isCube(); ++c) {
texi->dPdx[c].set(fetchSrc(Dx >> 4, (Dx & 3) + c));
texi->dPdy[c].set(fetchSrc(Dy >> 4, (Dy & 3) + c));
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
index 420cc4e..0b90378 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_gm107.cpp
@@ -57,7 +57,7 @@ GM107LoweringPass::handleManualTXD(TexInstruction *i)
Instruction *tex, *add;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
- const int dim = i->tex.target.getDim();
+ const int dim = i->tex.target.getDim() + i->tex.target.isCube();
const int array = i->tex.target.isArray();
i->op = OP_TEX; // no need to clone dPdx/dPdy later
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
index 64f5fc0..8752b0c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nv50.cpp
@@ -44,6 +44,8 @@ static bool
expandIntegerMUL(BuildUtil *bld, Instruction *mul)
{
const bool highResult = mul->subOp == NV50_IR_SUBOP_MUL_HIGH;
+ ImmediateValue src1;
+ bool src1imm = mul->src(1).getImmediate(src1);
DataType fTy; // full type
switch (mul->sType) {
@@ -72,24 +74,41 @@ expandIntegerMUL(BuildUtil *bld, Instruction *mul)
for (int j = 0; j < 4; ++j)
t[j] = bld->getSSA(fullSize);
- s[0] = mul->getSrc(0);
- s[1] = mul->getSrc(1);
-
if (isSignedType(mul->sType) && highResult) {
s[0] = bld->getSSA(fullSize);
s[1] = bld->getSSA(fullSize);
bld->mkOp1(OP_ABS, mul->sType, s[0], mul->getSrc(0));
bld->mkOp1(OP_ABS, mul->sType, s[1], mul->getSrc(1));
+ src1.reg.data.s32 = abs(src1.reg.data.s32);
+ } else {
+ s[0] = mul->getSrc(0);
+ s[1] = mul->getSrc(1);
}
// split sources into halves
i[0] = bld->mkSplit(a, halfSize, s[0]);
i[1] = bld->mkSplit(b, halfSize, s[1]);
- i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0], b[1]);
- i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
+ if (src1imm && (src1.reg.data.u32 & 0xffff0000) == 0) {
+ i[2] = i[3] = bld->mkOp2(OP_MUL, fTy, t[1], a[1],
+ bld->mkImm(src1.reg.data.u32 & 0xffff));
+ } else {
+ i[2] = bld->mkOp2(OP_MUL, fTy, t[0], a[0],
+ src1imm ? bld->mkImm(src1.reg.data.u32 >> 16) : b[1]);
+ if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) {
+ i[3] = i[2];
+ t[1] = t[0];
+ } else {
+ i[3] = bld->mkOp3(OP_MAD, fTy, t[1], a[1], b[0], t[0]);
+ }
+ }
i[7] = bld->mkOp2(OP_SHL, fTy, t[2], t[1], bld->mkImm(halfSize * 8));
- i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
+ if (src1imm && (src1.reg.data.u32 & 0x0000ffff) == 0) {
+ i[4] = i[3];
+ t[3] = t[2];
+ } else {
+ i[4] = bld->mkOp3(OP_MAD, fTy, t[3], a[0], b[0], t[2]);
+ }
if (highResult) {
Value *c[2];
@@ -911,7 +930,7 @@ NV50LoweringPreSSA::handleTXD(TexInstruction *i)
Instruction *tex;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
- const int dim = i->tex.target.getDim();
+ const int dim = i->tex.target.getDim() + i->tex.target.isCube();
handleTEX(i);
i->op = OP_TEX; // no need to clone dPdx/dPdy later
@@ -1225,7 +1244,7 @@ NV50LoweringPreSSA::handleEXPORT(Instruction *i)
i->setDef(0, new_LValue(func, FILE_GPR));
i->getDef(0)->reg.data.id = id;
- prog->maxGPR = MAX2(prog->maxGPR, id);
+ prog->maxGPR = MAX2(prog->maxGPR, id * 2);
}
}
return true;
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
index 0f575f2..e67bf3e 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.cpp
@@ -186,92 +186,68 @@ NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses,
uses.push_back(TexUse(usei, texi));
}
+// While it might be tempting to use the an algorithm that just looks at tex
+// uses, not all texture results are guaranteed to be used on all paths. In
+// the case where along some control flow path a texture result is never used,
+// we might reuse that register for something else, creating a
+// write-after-write hazard. So we have to manually look through all
+// instructions looking for ones that reference the registers in question.
void
-NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi,
- Instruction *insn,
- const BasicBlock *term,
- std::list<TexUse> &uses)
+NVC0LegalizePostRA::findFirstUses(
+ Instruction *texi, std::list<TexUse> &uses)
{
- while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0)))
- insn = insn->getSrc(0)->getUniqueInsn();
-
- // NOTE: the tex itself is, of course, not an overwriting definition
- if (insn == texi || !insn->bb->reachableBy(texi->bb, term))
- return;
+ int minGPR = texi->def(0).rep()->reg.data.id;
+ int maxGPR = minGPR + texi->def(0).rep()->reg.size / 4 - 1;
- switch (insn->op) {
- /* Values not connected to the tex's definition through any of these should
- * not be conflicting.
- */
- case OP_SPLIT:
- case OP_MERGE:
- case OP_PHI:
- case OP_UNION:
- /* recurse again */
- for (int s = 0; insn->srcExists(s); ++s)
- findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term,
- uses);
- break;
- default:
- // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ?
- addTexUse(uses, insn, texi);
- break;
- }
+ unordered_set<const BasicBlock *> visited;
+ findFirstUsesBB(minGPR, maxGPR, texi->next, texi, uses, visited);
}
void
-NVC0LegalizePostRA::findFirstUses(
- const Instruction *texi,
- const Instruction *insn,
- std::list<TexUse> &uses,
- unordered_set<const Instruction *>& visited)
+NVC0LegalizePostRA::findFirstUsesBB(
+ int minGPR, int maxGPR, Instruction *start,
+ const Instruction *texi, std::list<TexUse> &uses,
+ unordered_set<const BasicBlock *> &visited)
{
- for (int d = 0; insn->defExists(d); ++d) {
- Value *v = insn->getDef(d);
- for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) {
- Instruction *usei = (*u)->getInsn();
-
- // NOTE: In case of a loop that overwrites a value but never uses
- // it, it can happen that we have a cycle of uses that consists only
- // of phis and no-op moves and will thus cause an infinite loop here
- // since these are not considered actual uses.
- // The most obvious (and perhaps the only) way to prevent this is to
- // remember which instructions we've already visited.
-
- if (visited.find(usei) != visited.end())
- continue;
+ const BasicBlock *bb = start->bb;
+
+ // We don't process the whole bb the first time around. This is correct,
+ // however we might be in a loop and hit this BB again, and need to process
+ // the full thing. So only mark a bb as visited if we processed it from the
+ // beginning.
+ if (start == bb->getEntry()) {
+ if (visited.find(bb) != visited.end())
+ return;
+ visited.insert(bb);
+ }
- visited.insert(usei);
-
- if (usei->op == OP_PHI || usei->op == OP_UNION) {
- // need a barrier before WAW cases, like:
- // %r0 = tex
- // if ...
- // texbar <- is required or tex might replace x again
- // %r1 = x <- overwriting def
- // %r2 = phi %r0, %r1
- for (int s = 0; usei->srcExists(s); ++s) {
- Instruction *defi = usei->getSrc(s)->getUniqueInsn();
- if (defi && &usei->src(s) != *u)
- findOverwritingDefs(texi, defi, usei->bb, uses);
- }
- }
+ for (Instruction *insn = start; insn != bb->getExit(); insn = insn->next) {
+ if (insn->isNop())
+ continue;
- if (usei->op == OP_SPLIT ||
- usei->op == OP_MERGE ||
- usei->op == OP_PHI ||
- usei->op == OP_UNION) {
- // these uses don't manifest in the machine code
- findFirstUses(texi, usei, uses, visited);
- } else
- if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) &&
- usei->subOp != NV50_IR_SUBOP_MOV_FINAL) {
- findFirstUses(texi, usei, uses, visited);
- } else {
- addTexUse(uses, usei, texi);
- }
+ for (int d = 0; insn->defExists(d); ++d) {
+ if (insn->def(d).getFile() != FILE_GPR ||
+ insn->def(d).rep()->reg.data.id < minGPR ||
+ insn->def(d).rep()->reg.data.id > maxGPR)
+ continue;
+ addTexUse(uses, insn, texi);
+ return;
+ }
+
+ for (int s = 0; insn->srcExists(s); ++s) {
+ if (insn->src(s).getFile() != FILE_GPR ||
+ insn->src(s).rep()->reg.data.id < minGPR ||
+ insn->src(s).rep()->reg.data.id > maxGPR)
+ continue;
+ addTexUse(uses, insn, texi);
+ return;
}
}
+
+ for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
+ findFirstUsesBB(minGPR, maxGPR, BasicBlock::get(ei.getNode())->getEntry(),
+ texi, uses, visited);
+ }
}
// Texture barriers:
@@ -323,8 +299,7 @@ NVC0LegalizePostRA::insertTextureBarriers(Function *fn)
if (!uses)
return false;
for (size_t i = 0; i < texes.size(); ++i) {
- unordered_set<const Instruction *> visited;
- findFirstUses(texes[i], texes[i], uses[i], visited);
+ findFirstUses(texes[i], uses[i]);
}
// determine the barrier level at each use
@@ -870,7 +845,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
Instruction *tex;
Value *zero = bld.loadImm(bld.getSSA(), 0);
int l, c;
- const int dim = i->tex.target.getDim();
+ const int dim = i->tex.target.getDim() + i->tex.target.isCube();
const int array = i->tex.target.isArray();
i->op = OP_TEX; // no need to clone dPdx/dPdy later
@@ -917,7 +892,7 @@ NVC0LoweringPass::handleManualTXD(TexInstruction *i)
bool
NVC0LoweringPass::handleTXD(TexInstruction *txd)
{
- int dim = txd->tex.target.getDim();
+ int dim = txd->tex.target.getDim() + txd->tex.target.isCube();
unsigned arg = txd->tex.target.getArgCount();
unsigned expected_args = arg;
const int chipset = prog->getTarget()->getChipset();
@@ -937,8 +912,7 @@ NVC0LoweringPass::handleTXD(TexInstruction *txd)
if (expected_args > 4 ||
dim > 2 ||
- txd->tex.target.isShadow() ||
- txd->tex.target.isCube())
+ txd->tex.target.isShadow())
txd->op = OP_TEX;
handleTEX(txd);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
index 2ce52e5..adb400a 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_lowering_nvc0.h
@@ -69,12 +69,10 @@ private:
};
bool insertTextureBarriers(Function *);
inline bool insnDominatedBy(const Instruction *, const Instruction *) const;
- void findFirstUses(const Instruction *tex, const Instruction *def,
- std::list<TexUse>&,
- unordered_set<const Instruction *>&);
- void findOverwritingDefs(const Instruction *tex, Instruction *insn,
- const BasicBlock *term,
- std::list<TexUse>&);
+ void findFirstUses(Instruction *texi, std::list<TexUse> &uses);
+ void findFirstUsesBB(int minGPR, int maxGPR, Instruction *start,
+ const Instruction *texi, std::list<TexUse> &uses,
+ unordered_set<const BasicBlock *> &visited);
void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *);
const Instruction *recurseDef(const Instruction *);
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
index 805be5f..022626c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_peephole.cpp
@@ -1501,6 +1501,7 @@ private:
void handleSLCT(Instruction *);
void handleLOGOP(Instruction *);
void handleCVT_NEG(Instruction *);
+ void handleCVT_CVT(Instruction *);
void handleCVT_EXTBF(Instruction *);
void handleSUCLAMP(Instruction *);
@@ -1792,6 +1793,47 @@ AlgebraicOpt::handleCVT_NEG(Instruction *cvt)
delete_Instruction(prog, cvt);
}
+// F2I(TRUNC()) and so on can be expressed as a single CVT. If the earlier CVT
+// does a type conversion, this becomes trickier as there might be range
+// changes/etc. We could handle those in theory as long as the range was being
+// reduced or kept the same.
+void
+AlgebraicOpt::handleCVT_CVT(Instruction *cvt)
+{
+ Instruction *insn = cvt->getSrc(0)->getInsn();
+ RoundMode rnd = insn->rnd;
+
+ if (insn->saturate ||
+ insn->subOp ||
+ insn->dType != insn->sType ||
+ insn->dType != cvt->sType)
+ return;
+
+ switch (insn->op) {
+ case OP_CEIL:
+ rnd = ROUND_PI;
+ break;
+ case OP_FLOOR:
+ rnd = ROUND_MI;
+ break;
+ case OP_TRUNC:
+ rnd = ROUND_ZI;
+ break;
+ case OP_CVT:
+ break;
+ default:
+ return;
+ }
+
+ if (!isFloatType(cvt->dType) || !isFloatType(insn->sType))
+ rnd = (RoundMode)(rnd & 3);
+
+ cvt->rnd = rnd;
+ cvt->setSrc(0, insn->getSrc(0));
+ cvt->src(0).mod *= insn->src(0).mod;
+ cvt->sType = insn->sType;
+}
+
// Some shaders extract packed bytes out of words and convert them to
// e.g. float. The Fermi+ CVT instruction can extract those directly, as can
// nv50 for word sizes.
@@ -1961,6 +2003,7 @@ AlgebraicOpt::visit(BasicBlock *bb)
break;
case OP_CVT:
handleCVT_NEG(i);
+ handleCVT_CVT(i);
if (prog->getTarget()->isOpSupported(OP_EXTBF, TYPE_U32))
handleCVT_EXTBF(i);
break;
@@ -2532,6 +2575,7 @@ MemoryOpt::runOpt(BasicBlock *bb)
class FlatteningPass : public Pass
{
private:
+ virtual bool visit(Function *);
virtual bool visit(BasicBlock *);
bool tryPredicateConditional(BasicBlock *);
@@ -2540,6 +2584,8 @@ private:
inline bool isConstantCondition(Value *pred);
inline bool mayPredicate(const Instruction *, const Value *pred) const;
inline void removeFlow(Instruction *);
+
+ uint8_t gpr_unit;
};
bool
@@ -2561,9 +2607,15 @@ FlatteningPass::isConstantCondition(Value *pred)
file = ld->src(0).getFile();
} else {
file = insn->src(s).getFile();
- // catch $r63 on NVC0
- if (file == FILE_GPR && insn->getSrc(s)->reg.data.id > prog->maxGPR)
- file = FILE_IMMEDIATE;
+ // catch $r63 on NVC0 and $r63/$r127 on NV50. Unfortunately maxGPR is
+ // in register "units", which can vary between targets.
+ if (file == FILE_GPR) {
+ Value *v = insn->getSrc(s);
+ int bytes = v->reg.data.id * MIN2(v->reg.size, 4);
+ int units = bytes >> gpr_unit;
+ if (units > prog->maxGPR)
+ file = FILE_IMMEDIATE;
+ }
}
if (file != FILE_IMMEDIATE && file != FILE_MEMORY_CONST)
return false;
@@ -2669,6 +2721,14 @@ FlatteningPass::tryPropagateBranch(BasicBlock *bb)
}
bool
+FlatteningPass::visit(Function *fn)
+{
+ gpr_unit = prog->getTarget()->getFileUnit(FILE_GPR);
+
+ return true;
+}
+
+bool
FlatteningPass::visit(BasicBlock *bb)
{
if (tryPredicateConditional(bb))
@@ -2774,6 +2834,15 @@ private:
virtual bool visit(BasicBlock *);
};
+static bool
+post_ra_dead(Instruction *i)
+{
+ for (int d = 0; i->defExists(d); ++d)
+ if (i->getDef(d)->refCount())
+ return false;
+ return true;
+}
+
bool
NV50PostRaConstantFolding::visit(BasicBlock *bb)
{
@@ -2787,24 +2856,48 @@ NV50PostRaConstantFolding::visit(BasicBlock *bb)
i->src(0).getFile() != FILE_GPR ||
i->src(1).getFile() != FILE_GPR ||
i->src(2).getFile() != FILE_GPR ||
- i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id ||
- !isFloatType(i->dType))
+ i->getDef(0)->reg.data.id != i->getSrc(2)->reg.data.id)
break;
if (i->getDef(0)->reg.data.id >= 64 ||
i->getSrc(0)->reg.data.id >= 64)
break;
+ if (i->flagsSrc >= 0 && i->getSrc(i->flagsSrc)->reg.data.id != 0)
+ break;
+
+ if (i->getPredicate())
+ break;
+
def = i->getSrc(1)->getInsn();
+ if (def && def->op == OP_SPLIT && typeSizeof(def->sType) == 4)
+ def = def->getSrc(0)->getInsn();
if (def && def->op == OP_MOV && def->src(0).getFile() == FILE_IMMEDIATE) {
vtmp = i->getSrc(1);
- i->setSrc(1, def->getSrc(0));
+ if (isFloatType(i->sType)) {
+ i->setSrc(1, def->getSrc(0));
+ } else {
+ ImmediateValue val;
+ bool ret = def->src(0).getImmediate(val);
+ assert(ret);
+ if (i->getSrc(1)->reg.data.id & 1)
+ val.reg.data.u32 >>= 16;
+ val.reg.data.u32 &= 0xffff;
+ i->setSrc(1, new_ImmediateValue(bb->getProgram(), val.reg.data.u32));
+ }
/* There's no post-RA dead code elimination, so do it here
* XXX: if we add more code-removing post-RA passes, we might
* want to create a post-RA dead-code elim pass */
- if (vtmp->refCount() == 0)
- delete_Instruction(bb->getProgram(), def);
+ if (post_ra_dead(vtmp->getInsn())) {
+ Value *src = vtmp->getInsn()->getSrc(0);
+ // Careful -- splits will have already been removed from the
+ // functions. Don't double-delete.
+ if (vtmp->getInsn()->bb)
+ delete_Instruction(prog, vtmp->getInsn());
+ if (src->getInsn() && post_ra_dead(src->getInsn()))
+ delete_Instruction(prog, src->getInsn());
+ }
break;
}
diff --git a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
index b32bc13..cd8c42c 100644
--- a/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
+++ b/src/gallium/drivers/nouveau/codegen/nv50_ir_ra.cpp
@@ -1473,7 +1473,6 @@ GCRA::allocateRegisters(ArrayList& insns)
// Short encoding only possible if they're all GPRs, no need to
// affect them otherwise.
if (insn->flagsDef < 0 &&
- isFloatType(insn->dType) &&
insn->src(0).getFile() == FILE_GPR &&
insn->src(1).getFile() == FILE_GPR &&
insn->src(2).getFile() == FILE_GPR)
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.c b/src/gallium/drivers/nouveau/nouveau_screen.c
index a6065e4..4ca9e5c 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.c
+++ b/src/gallium/drivers/nouveau/nouveau_screen.c
@@ -147,6 +147,12 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
if (nv_dbg)
nouveau_mesa_debug = atoi(nv_dbg);
+ /* These must be set before any failure is possible, as the cleanup
+ * paths assume they're responsible for deleting them.
+ */
+ screen->drm = nouveau_drm(&dev->object);
+ screen->device = dev;
+
/*
* this is initialized to 1 in nouveau_drm_screen_create after screen
* is fully constructed and added to the global screen list.
@@ -175,7 +181,6 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
data, size, &screen->channel);
if (ret)
return ret;
- screen->device = dev;
ret = nouveau_client_new(screen->device, &screen->client);
if (ret)
@@ -229,6 +234,8 @@ nouveau_screen_init(struct nouveau_screen *screen, struct nouveau_device *dev)
void
nouveau_screen_fini(struct nouveau_screen *screen)
{
+ int fd = screen->drm->fd;
+
nouveau_mm_destroy(screen->mm_GART);
nouveau_mm_destroy(screen->mm_VRAM);
@@ -238,6 +245,8 @@ nouveau_screen_fini(struct nouveau_screen *screen)
nouveau_object_del(&screen->channel);
nouveau_device_del(&screen->device);
+ nouveau_drm_del(&screen->drm);
+ close(fd);
}
static void
diff --git a/src/gallium/drivers/nouveau/nouveau_screen.h b/src/gallium/drivers/nouveau/nouveau_screen.h
index 328646f..28c4760 100644
--- a/src/gallium/drivers/nouveau/nouveau_screen.h
+++ b/src/gallium/drivers/nouveau/nouveau_screen.h
@@ -17,6 +17,7 @@ struct nouveau_bo;
struct nouveau_screen {
struct pipe_screen base;
+ struct nouveau_drm *drm;
struct nouveau_device *device;
struct nouveau_object *channel;
struct nouveau_client *client;
diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h
index 1319c32..f13988e 100644
--- a/src/gallium/drivers/nouveau/nouveau_winsys.h
+++ b/src/gallium/drivers/nouveau/nouveau_winsys.h
@@ -6,6 +6,7 @@
#include "pipe/p_defines.h"
+#include <drm.h>
#include <nouveau.h>
#ifndef NV04_PFIFO_MAX_PACKET_LEN
@@ -79,13 +80,13 @@ nouveau_screen_transfer_flags(unsigned pipe)
return flags;
}
-extern struct pipe_screen *
+extern struct nouveau_screen *
nv30_screen_create(struct nouveau_device *);
-extern struct pipe_screen *
+extern struct nouveau_screen *
nv50_screen_create(struct nouveau_device *);
-extern struct pipe_screen *
+extern struct nouveau_screen *
nvc0_screen_create(struct nouveau_device *);
#endif
diff --git a/src/gallium/drivers/nouveau/nv30/nv30_screen.c b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
index 154c3d3..854f70c 100644
--- a/src/gallium/drivers/nouveau/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nouveau/nv30/nv30_screen.c
@@ -413,23 +413,20 @@ nv30_screen_destroy(struct pipe_screen *pscreen)
#define FAIL_SCREEN_INIT(str, err) \
do { \
NOUVEAU_ERR(str, err); \
- nv30_screen_destroy(pscreen); \
- return NULL; \
+ screen->base.base.context_create = NULL; \
+ return &screen->base; \
} while(0)
-struct pipe_screen *
+struct nouveau_screen *
nv30_screen_create(struct nouveau_device *dev)
{
- struct nv30_screen *screen = CALLOC_STRUCT(nv30_screen);
+ struct nv30_screen *screen;
struct pipe_screen *pscreen;
struct nouveau_pushbuf *push;
struct nv04_fifo *fifo;
unsigned oclass = 0;
int ret, i;
- if (!screen)
- return NULL;
-
switch (dev->chipset & 0xf0) {
case 0x30:
if (RANKINE_0397_CHIPSET & (1 << (dev->chipset & 0x0f)))
@@ -458,10 +455,16 @@ nv30_screen_create(struct nouveau_device *dev)
if (!oclass) {
NOUVEAU_ERR("unknown 3d class for 0x%02x\n", dev->chipset);
- FREE(screen);
return NULL;
}
+ screen = CALLOC_STRUCT(nv30_screen);
+ if (!screen)
+ return NULL;
+
+ pscreen = &screen->base.base;
+ pscreen->destroy = nv30_screen_destroy;
+
/*
* Some modern apps try to use msaa without keeping in mind the
* restrictions on videomem of older cards. Resulting in dmesg saying:
@@ -479,8 +482,6 @@ nv30_screen_create(struct nouveau_device *dev)
if (screen->max_sample_count > 4)
screen->max_sample_count = 4;
- pscreen = &screen->base.base;
- pscreen->destroy = nv30_screen_destroy;
pscreen->get_param = nv30_screen_get_param;
pscreen->get_paramf = nv30_screen_get_paramf;
pscreen->get_shader_param = nv30_screen_get_shader_param;
@@ -693,5 +694,5 @@ nv30_screen_create(struct nouveau_device *dev)
nouveau_pushbuf_kick(push, push->channel);
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
- return pscreen;
+ return &screen->base;
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
index 812d10c..7450119 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_miptree.c
@@ -336,9 +336,10 @@ nv50_miptree_create(struct pipe_screen *pscreen,
const struct pipe_resource *templ)
{
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ struct nouveau_drm *drm = nouveau_screen(pscreen)->drm;
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
struct pipe_resource *pt = &mt->base.base;
- bool compressed = dev->drm_version >= 0x01000101;
+ bool compressed = drm->version >= 0x01000101;
int ret;
union nouveau_bo_config bo_config;
uint32_t bo_flags;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
index b6ebbbf..cccd3b7 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw.c
@@ -113,6 +113,12 @@ static void
nv50_hw_destroy_query(struct nv50_context *nv50, struct nv50_query *q)
{
struct nv50_hw_query *hq = nv50_hw_query(q);
+
+ if (hq->funcs && hq->funcs->destroy_query) {
+ hq->funcs->destroy_query(nv50, hq);
+ return;
+ }
+
nv50_hw_query_allocate(nv50, q, 0);
nouveau_fence_ref(NULL, &hq->fence);
FREE(hq);
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
index d1bccb9..4a605f2 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_metric.c
@@ -71,7 +71,8 @@ nv50_hw_metric_destroy_query(struct nv50_context *nv50,
unsigned i;
for (i = 0; i < hmq->num_queries; i++)
- hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]);
+ if (hmq->queries[i]->funcs->destroy_query)
+ hmq->queries[i]->funcs->destroy_query(nv50, hmq->queries[i]);
FREE(hmq);
}
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
index 8453ce7..79c7023 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_query_hw_sm.c
@@ -153,7 +153,9 @@ static void
nv50_hw_sm_destroy_query(struct nv50_context *nv50, struct nv50_hw_query *hq)
{
struct nv50_query *q = &hq->base;
- q->funcs->destroy_query(nv50, q);
+ nv50_hw_query_allocate(nv50, q, 0);
+ nouveau_fence_ref(NULL, &hq->fence);
+ FREE(hq);
}
static boolean
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_screen.c b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
index 1e4b75f..272e1d4 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_screen.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_screen.c
@@ -405,6 +405,11 @@ nv50_screen_destroy(struct pipe_screen *pscreen)
if (screen->blitter)
nv50_blitter_destroy(screen);
+ if (screen->pm.prog) {
+ screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
+ nv50_program_destroy(NULL, screen->pm.prog);
+ FREE(screen->pm.prog);
+ }
nouveau_bo_ref(NULL, &screen->code);
nouveau_bo_ref(NULL, &screen->tls_bo);
@@ -518,11 +523,11 @@ nv50_screen_init_hwctx(struct nv50_screen *screen)
}
BEGIN_NV04(push, NV50_3D(ZETA_COMP_ENABLE), 1);
- PUSH_DATA(push, screen->base.device->drm_version >= 0x01000101);
+ PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
BEGIN_NV04(push, NV50_3D(RT_COMP_ENABLE(0)), 8);
for (i = 0; i < 8; ++i)
- PUSH_DATA(push, screen->base.device->drm_version >= 0x01000101);
+ PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
BEGIN_NV04(push, NV50_3D(RT_CONTROL), 1);
PUSH_DATA (push, 1);
@@ -747,7 +752,7 @@ int nv50_tls_realloc(struct nv50_screen *screen, unsigned tls_space)
return 1;
}
-struct pipe_screen *
+struct nouveau_screen *
nv50_screen_create(struct nouveau_device *dev)
{
struct nv50_screen *screen;
@@ -762,6 +767,7 @@ nv50_screen_create(struct nouveau_device *dev)
if (!screen)
return NULL;
pscreen = &screen->base.base;
+ pscreen->destroy = nv50_screen_destroy;
ret = nouveau_screen_init(&screen->base, dev);
if (ret) {
@@ -782,7 +788,6 @@ nv50_screen_create(struct nouveau_device *dev)
chan = screen->base.channel;
- pscreen->destroy = nv50_screen_destroy;
pscreen->context_create = nv50_create;
pscreen->is_format_supported = nv50_screen_is_format_supported;
pscreen->get_param = nv50_screen_get_param;
@@ -961,11 +966,11 @@ nv50_screen_create(struct nouveau_device *dev)
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
- return pscreen;
+ return &screen->base;
fail:
- nv50_screen_destroy(pscreen);
- return NULL;
+ screen->base.base.context_create = NULL;
+ return &screen->base;
}
int
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_tex.c b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
index 6083ea9..c3f4336 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_tex.c
@@ -192,8 +192,7 @@ nv50_create_texture_view(struct pipe_context *pipe,
tic[2] |= NV50_TIC_2_TARGET_BUFFER | NV50_TIC_2_LINEAR;
break;
default:
- NOUVEAU_ERR("invalid texture target: %d\n", mt->base.base.target);
- return false;
+ unreachable("unexpected/invalid texture target");
}
tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
diff --git a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
index 85878d5..7de2f1f 100644
--- a/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
+++ b/src/gallium/drivers/nouveau/nv50/nv50_vbo.c
@@ -91,6 +91,9 @@ nv50_vertex_state_create(struct pipe_context *pipe,
}
so->element[i].state = nv50_format_table[fmt].vtx;
so->need_conversion = true;
+ pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,
+ "Converting vertex element %d, no hw format %s",
+ i, util_format_name(ve->src_format));
}
so->element[i].state |= i;
diff --git a/src/gallium/drivers/nouveau/nv50/nv84_video.c b/src/gallium/drivers/nouveau/nv50/nv84_video.c
index 7a4670f..88655db 100644
--- a/src/gallium/drivers/nouveau/nv50/nv84_video.c
+++ b/src/gallium/drivers/nouveau/nv50/nv84_video.c
@@ -756,8 +756,8 @@ firmware_present(struct pipe_screen *pscreen, enum pipe_video_format codec)
int present, ret;
if (!FIRMWARE_PRESENT(checked, VP_KERN)) {
- nouveau_object_new(screen->channel, 0, 0x7476, NULL, 0, &obj);
- if (obj)
+ ret = nouveau_object_new(screen->channel, 0, 0x7476, NULL, 0, &obj);
+ if (!ret)
screen->firmware_info.profiles_present |= FIRMWARE_VP_KERN;
nouveau_object_del(&obj);
screen->firmware_info.profiles_checked |= FIRMWARE_VP_KERN;
@@ -765,8 +765,8 @@ firmware_present(struct pipe_screen *pscreen, enum pipe_video_format codec)
if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
if (!FIRMWARE_PRESENT(checked, BSP_KERN)) {
- nouveau_object_new(screen->channel, 0, 0x74b0, NULL, 0, &obj);
- if (obj)
+ ret = nouveau_object_new(screen->channel, 0, 0x74b0, NULL, 0, &obj);
+ if (!ret)
screen->firmware_info.profiles_present |= FIRMWARE_BSP_KERN;
nouveau_object_del(&obj);
screen->firmware_info.profiles_checked |= FIRMWARE_BSP_KERN;
diff --git a/src/gallium/drivers/nouveau/nv50/nv98_video.c b/src/gallium/drivers/nouveau/nv50/nv98_video.c
index 20ea547..177a7e0 100644
--- a/src/gallium/drivers/nouveau/nv50/nv98_video.c
+++ b/src/gallium/drivers/nouveau/nv50/nv98_video.c
@@ -25,6 +25,8 @@
#include "util/u_sampler.h"
#include "util/u_format.h"
+#include <nvif/class.h>
+
static void
nv98_decoder_decode_bitstream(struct pipe_video_codec *decoder,
struct pipe_video_buffer *video_target,
@@ -56,6 +58,28 @@ nv98_decoder_decode_bitstream(struct pipe_video_codec *decoder,
nv98_decoder_ppp(dec, desc, target, comm_seq);
}
+static const struct nouveau_mclass
+nv98_decoder_msvld[] = {
+ { G98_MSVLD, -1 },
+ { IGT21A_MSVLD, -1 },
+ { GT212_MSVLD, -1 },
+ {}
+};
+
+static const struct nouveau_mclass
+nv98_decoder_mspdec[] = {
+ { G98_MSPDEC, -1 },
+ { GT212_MSPDEC, -1 },
+ {}
+};
+
+static const struct nouveau_mclass
+nv98_decoder_msppp[] = {
+ { G98_MSPPP, -1 },
+ { GT212_MSPPP, -1 },
+ {}
+};
+
struct pipe_video_codec *
nv98_create_decoder(struct pipe_context *context,
const struct pipe_video_codec *templ)
@@ -103,12 +127,33 @@ nv98_create_decoder(struct pipe_context *context,
}
push = dec->pushbuf;
- if (!ret)
- ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x85b1, NULL, 0, &dec->bsp);
- if (!ret)
- ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x85b2, NULL, 0, &dec->vp);
- if (!ret)
- ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x85b3, NULL, 0, &dec->ppp);
+ if (!ret) {
+ ret = nouveau_object_mclass(dec->channel[0], nv98_decoder_msvld);
+ if (ret >= 0) {
+ ret = nouveau_object_new(dec->channel[0], 0xbeef85b1,
+ nv98_decoder_msvld[ret].oclass, NULL, 0,
+ &dec->bsp);
+ }
+ }
+
+ if (!ret) {
+ ret = nouveau_object_mclass(dec->channel[1], nv98_decoder_mspdec);
+ if (ret >= 0) {
+ ret = nouveau_object_new(dec->channel[1], 0xbeef85b2,
+ nv98_decoder_mspdec[ret].oclass, NULL, 0,
+ &dec->vp);
+ }
+ }
+
+ if (!ret) {
+ ret = nouveau_object_mclass(dec->channel[2], nv98_decoder_msppp);
+ if (ret >= 0) {
+ ret = nouveau_object_new(dec->channel[2], 0xbeef85b3,
+ nv98_decoder_msppp[ret].oclass, NULL, 0,
+ &dec->ppp);
+ }
+ }
+
if (ret)
goto fail;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
index 15991c3..ed1ac48 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -248,9 +248,10 @@ nvc0_miptree_create(struct pipe_screen *pscreen,
const struct pipe_resource *templ)
{
struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ struct nouveau_drm *drm = nouveau_screen(pscreen)->drm;
struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
struct pipe_resource *pt = &mt->base.base;
- bool compressed = dev->drm_version >= 0x01000101;
+ bool compressed = drm->version >= 0x01000101;
int ret;
union nouveau_bo_config bo_config;
uint32_t bo_flags;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
index 3845d61..7497317 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -184,7 +184,7 @@ nvc0_screen_get_driver_query_group_info(struct pipe_screen *pscreen,
count++;
#endif
- if (screen->base.device->drm_version >= 0x01000101) {
+ if (screen->base.drm->version >= 0x01000101) {
if (screen->compute) {
if (screen->base.class_3d == NVE4_3D_CLASS) {
count += 2;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
index 90ee82f..a70d524 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw.c
@@ -116,6 +116,12 @@ static void
nvc0_hw_destroy_query(struct nvc0_context *nvc0, struct nvc0_query *q)
{
struct nvc0_hw_query *hq = nvc0_hw_query(q);
+
+ if (hq->funcs && hq->funcs->destroy_query) {
+ hq->funcs->destroy_query(nvc0, hq);
+ return;
+ }
+
nvc0_hw_query_allocate(nvc0, q, 0);
nouveau_fence_ref(NULL, &hq->fence);
FREE(hq);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
index 12fb609..7a64b69 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
@@ -293,7 +293,8 @@ nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,
unsigned i;
for (i = 0; i < hmq->num_queries; i++)
- hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
+ if (hmq->queries[i]->funcs->destroy_query)
+ hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);
FREE(hmq);
}
@@ -420,7 +421,10 @@ sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])
{
switch (hq->base.type - NVE4_HW_METRIC_QUERY(0)) {
case NVE4_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:
- return sm20_hw_metric_calc_result(hq, res64);
+ /* (active_warps / active_cycles) / max. number of warps on a MP */
+ if (res64[1])
+ return (res64[0] / (double)res64[1]) / 64;
+ break;
case NVE4_HW_METRIC_QUERY_BRANCH_EFFICIENCY:
return sm20_hw_metric_calc_result(hq, res64);
case NVE4_HW_METRIC_QUERY_INST_ISSUED:
@@ -561,7 +565,7 @@ nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
uint16_t class_3d = screen->base.class_3d;
int count = 0;
- if (screen->base.device->drm_version >= 0x01000101) {
+ if (screen->base.drm->version >= 0x01000101) {
if (screen->compute) {
if (screen->base.class_3d == NVE4_3D_CLASS) {
count += NVE4_HW_METRIC_QUERY_COUNT;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
index 7d1e75f..721857e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_sm.c
@@ -782,7 +782,9 @@ static void
nvc0_hw_sm_destroy_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)
{
struct nvc0_query *q = &hq->base;
- q->funcs->destroy_query(nvc0, q);
+ nvc0_hw_query_allocate(nvc0, q, 0);
+ nouveau_fence_ref(NULL, &hq->fence);
+ FREE(hq);
}
static boolean
@@ -1075,17 +1077,6 @@ nve4_hw_sm_query_read_data(uint32_t count[32][8],
return true;
}
-/* Metric calculations:
- * sum(x) ... sum of x over all MPs
- * avg(x) ... average of x over all MPs
- *
- * IPC : sum(inst_executed) / clock
- * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
- * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
- * MP_EFFICIENCY : avg(active_cycles / clock)
- *
- * NOTE: Interpretation of IPC requires knowledge of MP count.
- */
static boolean
nvc0_hw_sm_get_query_result(struct nvc0_context *nvc0, struct nvc0_hw_query *hq,
boolean wait, union pipe_query_result *result)
@@ -1130,7 +1121,7 @@ nvc0_hw_sm_create_query(struct nvc0_context *nvc0, unsigned type)
struct nvc0_hw_query *hq;
unsigned space;
- if (nvc0->screen->base.device->drm_version < 0x01000101)
+ if (nvc0->screen->base.drm->version < 0x01000101)
return NULL;
if ((type < NVE4_HW_SM_QUERY(0) || type > NVE4_HW_SM_QUERY_LAST) &&
@@ -1225,7 +1216,7 @@ nvc0_hw_sm_get_driver_query_info(struct nvc0_screen *screen, unsigned id,
{
int count = 0;
- if (screen->base.device->drm_version >= 0x01000101) {
+ if (screen->base.drm->version >= 0x01000101) {
if (screen->compute) {
if (screen->base.class_3d == NVE4_3D_CLASS) {
count += NVE4_HW_SM_QUERY_COUNT;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
index 461fcaa..3995446 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -22,6 +22,7 @@
#include <xf86drm.h>
#include <nouveau_drm.h>
+#include <nvif/class.h>
#include "util/u_format.h"
#include "util/u_format_s3tc.h"
#include "pipe/p_screen.h"
@@ -428,6 +429,7 @@ nvc0_screen_destroy(struct pipe_screen *pscreen)
if (screen->pm.prog) {
screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
nvc0_program_destroy(NULL, screen->pm.prog);
+ FREE(screen->pm.prog);
}
nouveau_bo_ref(NULL, &screen->text);
@@ -617,11 +619,10 @@ nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
#define FAIL_SCREEN_INIT(str, err) \
do { \
NOUVEAU_ERR(str, err); \
- nvc0_screen_destroy(pscreen); \
- return NULL; \
+ goto fail; \
} while(0)
-struct pipe_screen *
+struct nouveau_screen *
nvc0_screen_create(struct nouveau_device *dev)
{
struct nvc0_screen *screen;
@@ -650,6 +651,7 @@ nvc0_screen_create(struct nouveau_device *dev)
if (!screen)
return NULL;
pscreen = &screen->base.base;
+ pscreen->destroy = nvc0_screen_destroy;
ret = nouveau_screen_init(&screen->base, dev);
if (ret) {
@@ -672,7 +674,6 @@ nvc0_screen_create(struct nouveau_device *dev)
screen->base.vidmem_bindings = 0;
}
- pscreen->destroy = nvc0_screen_destroy;
pscreen->context_create = nvc0_create;
pscreen->is_format_supported = nvc0_screen_is_format_supported;
pscreen->get_param = nvc0_screen_get_param;
@@ -687,7 +688,7 @@ nvc0_screen_create(struct nouveau_device *dev)
screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
flags = NOUVEAU_BO_GART | NOUVEAU_BO_MAP;
- if (dev->drm_version >= 0x01000202)
+ if (screen->base.drm->version >= 0x01000202)
flags |= NOUVEAU_BO_COHERENT;
ret = nouveau_bo_new(dev, flags, 0, 4096, NULL, &screen->fence.bo);
@@ -699,12 +700,13 @@ nvc0_screen_create(struct nouveau_device *dev)
screen->base.fence.update = nvc0_screen_fence_update;
- ret = nouveau_object_new(chan,
- (dev->chipset < 0xe0) ? 0x1f906e : 0x906e, 0x906e,
- NULL, 0, &screen->nvsw);
+ ret = nouveau_object_new(chan, (dev->chipset < 0xe0) ? 0x1f906e : 0x906e,
+ NVIF_CLASS_SW_GF100, NULL, 0, &screen->nvsw);
if (ret)
FAIL_SCREEN_INIT("Error creating SW object: %d\n", ret);
+ BEGIN_NVC0(push, SUBC_SW(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->nvsw->handle);
switch (dev->chipset & ~0xf) {
case 0x110:
@@ -811,10 +813,11 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATA (push, 0x17);
}
- IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE), dev->drm_version >= 0x01000101);
+ IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE),
+ screen->base.drm->version >= 0x01000101);
BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8);
for (i = 0; i < 8; ++i)
- PUSH_DATA(push, dev->drm_version >= 0x01000101);
+ PUSH_DATA(push, screen->base.drm->version >= 0x01000101);
BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
PUSH_DATA (push, 1);
@@ -910,7 +913,7 @@ nvc0_screen_create(struct nouveau_device *dev)
PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
- if (dev->drm_version >= 0x01000101) {
+ if (screen->base.drm->version >= 0x01000101) {
ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
if (ret) {
NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n");
@@ -1061,11 +1064,11 @@ nvc0_screen_create(struct nouveau_device *dev)
nouveau_fence_new(&screen->base, &screen->base.fence.current, false);
- return pscreen;
+ return &screen->base;
fail:
- nvc0_screen_destroy(pscreen);
- return NULL;
+ screen->base.base.context_create = NULL;
+ return &screen->base;
}
int
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
index 7e2e999..5e84ca9 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -236,11 +236,8 @@ nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
struct nouveau_pushbuf *push = nvc0->base.pushbuf;
struct nvc0_program *gp = nvc0->gmtyprog;
- if (gp)
- nvc0_program_validate(nvc0, gp);
-
/* we allow GPs with no code for specifying stream output state only */
- if (gp && gp->code_size) {
+ if (gp && nvc0_program_validate(nvc0, gp) && gp->code_size) {
const bool gp_selects_layer = !!(gp->hdr[13] & (1 << 9));
BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
index f8e1efb..4e43c4e 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -1030,9 +1030,11 @@ nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
nvc0->cond_cond, nvc0->cond_mode);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
+ nouveau_scratch_done(&nvc0->base);
nvc0->dirty = blit->saved.dirty |
(NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK |
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
index 2dd100f..74090ce 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -193,9 +193,7 @@ nvc0_create_texture_view(struct pipe_context *pipe,
tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
break;
default:
- NOUVEAU_ERR("unexpected/invalid texture target: %d\n",
- mt->base.base.target);
- return false;
+ unreachable("unexpected/invalid texture target");
}
tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
index c464904..54443bd 100644
--- a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -95,6 +95,9 @@ nvc0_vertex_state_create(struct pipe_context *pipe,
}
so->element[i].state = nvc0_format_table[fmt].vtx;
so->need_conversion = true;
+ pipe_debug_message(&nouveau_context(pipe)->debug, FALLBACK,
+ "Converting vertex element %d, no hw format %s",
+ i, util_format_name(ve->src_format));
}
size = util_format_get_blocksize(fmt);
diff --git a/src/gallium/drivers/r600/evergreen_state.c b/src/gallium/drivers/r600/evergreen_state.c
index 02d0c7f..1443bc0 100644
--- a/src/gallium/drivers/r600/evergreen_state.c
+++ b/src/gallium/drivers/r600/evergreen_state.c
@@ -2414,7 +2414,7 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
struct r600_command_buffer *cb = &rctx->start_cs_cmd;
int tmp, i;
- r600_init_command_buffer(cb, 342);
+ r600_init_command_buffer(cb, 338);
/* This must be first. */
r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
@@ -2468,10 +2468,6 @@ static void cayman_init_atom_start_cs(struct r600_context *rctx)
r600_store_context_reg(cb, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0);
- r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
- r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
- r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
-
r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1);
r600_store_context_reg_seq(cb, CM_R_028BD4_PA_SC_CENTROID_PRIORITY_0, 2);
@@ -2671,7 +2667,7 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
return;
}
- r600_init_command_buffer(cb, 342);
+ r600_init_command_buffer(cb, 338);
/* This must be first. */
r600_store_value(cb, PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
@@ -2896,10 +2892,6 @@ void evergreen_init_atom_start_cs(struct r600_context *rctx)
r600_store_value(cb, 0); /* R_028A3C_VGT_GROUP_VECT_1_FMT_CNTL */
r600_store_value(cb, 0); /* R_028A40_VGT_GS_MODE */
- r600_store_context_reg_seq(cb, R_028AB4_VGT_REUSE_OFF, 2);
- r600_store_value(cb, 0); /* R_028AB4_VGT_REUSE_OFF */
- r600_store_value(cb, 0); /* R_028AB8_VGT_VTX_CNT_EN */
-
r600_store_config_reg(cb, R_008A14_PA_CL_ENHANCE, (3 << 1) | 1);
r600_store_context_reg(cb, R_0288F0_SQ_VTX_SEMANTIC_CLEAR, ~0);
@@ -3731,7 +3723,7 @@ void evergreen_init_state_functions(struct r600_context *rctx)
r600_init_atom(rctx, &rctx->blend_color.atom, id++, r600_emit_blend_color, 6);
r600_init_atom(rctx, &rctx->blend_state.atom, id++, r600_emit_cso_state, 0);
r600_init_atom(rctx, &rctx->cb_misc_state.atom, id++, evergreen_emit_cb_misc_state, 4);
- r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 6);
+ r600_init_atom(rctx, &rctx->clip_misc_state.atom, id++, r600_emit_clip_misc_state, 9);
r600_init_atom(rctx, &rctx->clip_state.atom, id++, evergreen_emit_clip_state, 26);
r600_init_atom(rctx, &rctx->db_misc_state.atom, id++, evergreen_emit_db_misc_state, 10);
r600_init_atom(rctx, &rctx->db_state.atom, id++, evergreen_emit_db_state, 14);
diff --git a/src/gallium/drivers/r600/r600_pipe.h b/src/gallium/drivers/r600/r600_pipe.h
index 795fb9a..31f2a72 100644
--- a/src/gallium/drivers/r600/r600_pipe.h
+++ b/src/gallium/drivers/r600/r600_pipe.h
@@ -152,6 +152,7 @@ struct r600_clip_misc_state {
unsigned clip_plane_enable; /* from rasterizer */
unsigned clip_dist_write; /* from vertex shader */
boolean clip_disable; /* from vertex shader */
+ boolean vs_out_viewport; /* from vertex shader */
};
struct r600_alphatest_state {
diff --git a/src/gallium/drivers/r600/r600_state_common.c b/src/gallium/drivers/r600/r600_state_common.c
index 6a66634..ca589fa 100644
--- a/src/gallium/drivers/r600/r600_state_common.c
+++ b/src/gallium/drivers/r600/r600_state_common.c
@@ -1377,11 +1377,13 @@ static void r600_update_clip_state(struct r600_context *rctx,
{
if (current->pa_cl_vs_out_cntl != rctx->clip_misc_state.pa_cl_vs_out_cntl ||
current->shader.clip_dist_write != rctx->clip_misc_state.clip_dist_write ||
- current->shader.vs_position_window_space != rctx->clip_misc_state.clip_disable) {
- rctx->clip_misc_state.pa_cl_vs_out_cntl = current->pa_cl_vs_out_cntl;
- rctx->clip_misc_state.clip_dist_write = current->shader.clip_dist_write;
- rctx->clip_misc_state.clip_disable = current->shader.vs_position_window_space;
- r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
+ current->shader.vs_position_window_space != rctx->clip_misc_state.clip_disable ||
+ current->shader.vs_out_viewport != rctx->clip_misc_state.vs_out_viewport) {
+ rctx->clip_misc_state.pa_cl_vs_out_cntl = current->pa_cl_vs_out_cntl;
+ rctx->clip_misc_state.clip_dist_write = current->shader.clip_dist_write;
+ rctx->clip_misc_state.clip_disable = current->shader.vs_position_window_space;
+ rctx->clip_misc_state.vs_out_viewport = current->shader.vs_out_viewport;
+ r600_mark_atom_dirty(rctx, &rctx->clip_misc_state.atom);
}
}
@@ -1656,6 +1658,10 @@ void r600_emit_clip_misc_state(struct r600_context *rctx, struct r600_atom *atom
radeon_set_context_reg(cs, R_02881C_PA_CL_VS_OUT_CNTL,
state->pa_cl_vs_out_cntl |
(state->clip_plane_enable & state->clip_dist_write));
+ /* reuse needs to be set off if we write oViewport */
+ if (rctx->b.chip_class >= EVERGREEN)
+ radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
+ S_028AB4_REUSE_OFF(state->vs_out_viewport));
}
static void r600_draw_vbo(struct pipe_context *ctx, const struct pipe_draw_info *dinfo)
diff --git a/src/gallium/drivers/radeon/Makefile.am b/src/gallium/drivers/radeon/Makefile.am
index 13d8976..a6fc145 100644
--- a/src/gallium/drivers/radeon/Makefile.am
+++ b/src/gallium/drivers/radeon/Makefile.am
@@ -16,7 +16,8 @@ libradeon_la_SOURCES = \
if NEED_RADEON_LLVM
AM_CFLAGS += \
- $(LLVM_CFLAGS)
+ $(LLVM_CFLAGS) \
+ $(LIBELF_CFLAGS)
libradeon_la_SOURCES += \
$(LLVM_C_FILES)
@@ -24,7 +25,7 @@ libradeon_la_SOURCES += \
libradeon_la_LIBADD = \
$(CLOCK_LIB) \
$(LLVM_LIBS) \
- $(ELF_LIB)
+ $(LIBELF_LIBS)
libradeon_la_LDFLAGS = \
$(LLVM_LDFLAGS)
diff --git a/src/gallium/drivers/radeon/r600_perfcounter.c b/src/gallium/drivers/radeon/r600_perfcounter.c
index a835aee..fad7bde 100644
--- a/src/gallium/drivers/radeon/r600_perfcounter.c
+++ b/src/gallium/drivers/radeon/r600_perfcounter.c
@@ -202,9 +202,6 @@ static void r600_pc_query_add_result(struct r600_common_context *ctx,
for (i = 0; i < query->num_counters; ++i) {
struct r600_pc_counter *counter = &query->counters[i];
- if (counter->base == ~0)
- continue;
-
for (j = 0; j < counter->dwords; ++j) {
uint32_t value = results[counter->base + j * counter->stride];
result->batch[i].u32 += value;
diff --git a/src/gallium/drivers/radeon/r600_query.c b/src/gallium/drivers/radeon/r600_query.c
index ed0aefc..0aa19cd 100644
--- a/src/gallium/drivers/radeon/r600_query.c
+++ b/src/gallium/drivers/radeon/r600_query.c
@@ -119,7 +119,7 @@ static void r600_query_sw_end(struct r600_common_context *rctx,
rctx->b.flush(&rctx->b, &query->fence, 0);
break;
case R600_QUERY_DRAW_CALLS:
- query->begin_result = rctx->num_draw_calls;
+ query->end_result = rctx->num_draw_calls;
break;
case R600_QUERY_REQUESTED_VRAM:
case R600_QUERY_REQUESTED_GTT:
@@ -141,10 +141,10 @@ static void r600_query_sw_end(struct r600_common_context *rctx,
query->begin_result = 0;
break;
case R600_QUERY_NUM_COMPILATIONS:
- query->begin_result = p_atomic_read(&rctx->screen->num_compilations);
+ query->end_result = p_atomic_read(&rctx->screen->num_compilations);
break;
case R600_QUERY_NUM_SHADERS_CREATED:
- query->begin_result = p_atomic_read(&rctx->screen->num_shaders_created);
+ query->end_result = p_atomic_read(&rctx->screen->num_shaders_created);
break;
default:
unreachable("r600_query_sw_end: bad query type");
diff --git a/src/gallium/drivers/radeon/radeon_llvm_emit.c b/src/gallium/drivers/radeon/radeon_llvm_emit.c
index 6b2ebde..61ed940 100644
--- a/src/gallium/drivers/radeon/radeon_llvm_emit.c
+++ b/src/gallium/drivers/radeon/radeon_llvm_emit.c
@@ -188,8 +188,8 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
if (mem_err) {
fprintf(stderr, "%s: %s", __FUNCTION__, err);
FREE(err);
- LLVMDisposeTargetMachine(tm);
- return 1;
+ rval = 1;
+ goto out;
}
if (0 != rval) {
@@ -205,6 +205,7 @@ unsigned radeon_llvm_compile(LLVMModuleRef M, struct radeon_shader_binary *binar
/* Clean up */
LLVMDisposeMemoryBuffer(out_buffer);
+out:
if (dispose_tm) {
LLVMDisposeTargetMachine(tm);
}
diff --git a/src/gallium/drivers/radeonsi/si_perfcounter.c b/src/gallium/drivers/radeonsi/si_perfcounter.c
index a0ddff6..7ee1dae 100644
--- a/src/gallium/drivers/radeonsi/si_perfcounter.c
+++ b/src/gallium/drivers/radeonsi/si_perfcounter.c
@@ -436,7 +436,7 @@ static void si_pc_emit_select(struct r600_common_context *ctx,
dw = count + regs->num_prelude;
if (count >= regs->num_multi)
- count += regs->num_multi;
+ dw += regs->num_multi;
radeon_set_uconfig_reg_seq(cs, regs->select0, dw);
for (idx = 0; idx < regs->num_prelude; ++idx)
radeon_emit(cs, 0);
diff --git a/src/gallium/drivers/radeonsi/si_state.c b/src/gallium/drivers/radeonsi/si_state.c
index 4086819..2a6d2c6 100644
--- a/src/gallium/drivers/radeonsi/si_state.c
+++ b/src/gallium/drivers/radeonsi/si_state.c
@@ -605,6 +605,10 @@ static void si_emit_clip_regs(struct si_context *sctx, struct r600_atom *atom)
(clipdist_mask ? 0 :
sctx->queued.named.rasterizer->clip_plane_enable & SIX_BITS) |
S_028810_CLIP_DISABLE(window_space));
+
+ /* reuse needs to be set off if we write oViewport */
+ radeon_set_context_reg(cs, R_028AB4_VGT_REUSE_OFF,
+ S_028AB4_REUSE_OFF(info->writes_viewport_index));
}
static void si_set_scissor_states(struct pipe_context *ctx,
@@ -3468,7 +3472,6 @@ static void si_init_config(struct si_context *sctx)
si_pm4_set_reg(pm4, R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET, 0);
si_pm4_set_reg(pm4, R_028B98_VGT_STRMOUT_BUFFER_CONFIG, 0x0);
- si_pm4_set_reg(pm4, R_028AB4_VGT_REUSE_OFF, 0);
si_pm4_set_reg(pm4, R_028AB8_VGT_VTX_CNT_EN, 0x0);
if (sctx->b.chip_class < CIK)
si_pm4_set_reg(pm4, R_008A14_PA_CL_ENHANCE, S_008A14_NUM_CLIP_SEQ(3) |
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
index c4284cc..78e346a 100644
--- a/src/gallium/drivers/svga/svga_context.h
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -343,6 +343,13 @@ struct svga_hw_draw_state
SVGA3dElementLayoutId layout_id;
SVGA3dPrimitiveType topology;
+ struct svga_winsys_surface *ib; /**< index buffer for drawing */
+ SVGA3dSurfaceFormat ib_format;
+ unsigned ib_offset;
+
+ unsigned num_samplers[PIPE_SHADER_TYPES];
+ SVGA3dSamplerId samplers[PIPE_SHADER_TYPES][PIPE_MAX_SAMPLERS];
+
/* used for rebinding */
unsigned num_sampler_views[PIPE_SHADER_TYPES];
unsigned default_constbuf_size[PIPE_SHADER_TYPES];
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
index cca499a..2d3631d 100644
--- a/src/gallium/drivers/svga/svga_draw.c
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -539,11 +539,18 @@ draw_vgpu10(struct svga_hwtnl *hwtnl,
SVGA3dSurfaceFormat indexFormat = xlate_index_format(range->indexWidth);
/* setup index buffer */
- ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
- indexFormat,
- range->indexArray.offset);
- if (ret != PIPE_OK)
- return ret;
+ if (ib_handle != svga->state.hw_draw.ib ||
+ indexFormat != svga->state.hw_draw.ib_format ||
+ range->indexArray.offset != svga->state.hw_draw.ib_offset) {
+ ret = SVGA3D_vgpu10_SetIndexBuffer(svga->swc, ib_handle,
+ indexFormat,
+ range->indexArray.offset);
+ if (ret != PIPE_OK)
+ return ret;
+ svga->state.hw_draw.ib = ib_handle;
+ svga->state.hw_draw.ib_format = indexFormat;
+ svga->state.hw_draw.ib_offset = range->indexArray.offset;
+ }
if (instance_count > 1) {
ret = SVGA3D_vgpu10_DrawIndexedInstanced(svga->swc,
diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c
index 722b369..4479a27 100644
--- a/src/gallium/drivers/svga/svga_state.c
+++ b/src/gallium/drivers/svga/svga_state.c
@@ -129,7 +129,11 @@ update_state(struct svga_context *svga,
const struct svga_tracked_state *atoms[],
unsigned *state)
{
+#ifdef DEBUG
boolean debug = TRUE;
+#else
+ boolean debug = FALSE;
+#endif
enum pipe_error ret = PIPE_OK;
unsigned i;
diff --git a/src/gallium/drivers/svga/svga_state_sampler.c b/src/gallium/drivers/svga/svga_state_sampler.c
index c5d52bb..b070f65 100644
--- a/src/gallium/drivers/svga/svga_state_sampler.c
+++ b/src/gallium/drivers/svga/svga_state_sampler.c
@@ -301,13 +301,21 @@ update_samplers(struct svga_context *svga, unsigned dirty )
}
if (count > 0) {
- ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
- count,
- 0, /* start */
- svga_shader_type(shader), /* type */
- ids);
- if (ret != PIPE_OK)
- return ret;
+ if (count != svga->state.hw_draw.num_samplers[shader] ||
+ memcmp(ids, svga->state.hw_draw.samplers[shader],
+ count * sizeof(ids[0])) != 0) {
+ /* HW state is really changing */
+ ret = SVGA3D_vgpu10_SetSamplers(svga->swc,
+ count,
+ 0, /* start */
+ svga_shader_type(shader), /* type */
+ ids);
+ if (ret != PIPE_OK)
+ return ret;
+ memcpy(svga->state.hw_draw.samplers[shader], ids,
+ count * sizeof(ids[0]));
+ svga->state.hw_draw.num_samplers[shader] = count;
+ }
}
}
diff --git a/src/gallium/drivers/vc4/Makefile.sources b/src/gallium/drivers/vc4/Makefile.sources
index 24b577a..a9a2742 100644
--- a/src/gallium/drivers/vc4/Makefile.sources
+++ b/src/gallium/drivers/vc4/Makefile.sources
@@ -32,6 +32,7 @@ C_SOURCES := \
vc4_program.c \
vc4_qir.c \
vc4_qir_lower_uniforms.c \
+ vc4_qir_schedule.c \
vc4_qir.h \
vc4_qpu.c \
vc4_qpu_defines.h \
diff --git a/src/gallium/drivers/vc4/vc4_blit.c b/src/gallium/drivers/vc4/vc4_blit.c
index 16dcece..876c296 100644
--- a/src/gallium/drivers/vc4/vc4_blit.c
+++ b/src/gallium/drivers/vc4/vc4_blit.c
@@ -54,8 +54,8 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
bool old_msaa = vc4->msaa;
int old_tile_width = vc4->tile_width;
int old_tile_height = vc4->tile_height;
- bool msaa = (info->src.resource->nr_samples ||
- info->dst.resource->nr_samples);
+ bool msaa = (info->src.resource->nr_samples > 1 ||
+ info->dst.resource->nr_samples > 1);
int tile_width = msaa ? 32 : 64;
int tile_height = msaa ? 32 : 64;
@@ -110,9 +110,11 @@ vc4_tile_blit(struct pipe_context *pctx, const struct pipe_blit_info *info)
pipe_surface_reference(&vc4->color_read, src_surf);
pipe_surface_reference(&vc4->color_write,
- dst_surf->texture->nr_samples ? NULL : dst_surf);
+ dst_surf->texture->nr_samples > 1 ?
+ NULL : dst_surf);
pipe_surface_reference(&vc4->msaa_color_write,
- dst_surf->texture->nr_samples ? dst_surf : NULL);
+ dst_surf->texture->nr_samples > 1 ?
+ dst_surf : NULL);
pipe_surface_reference(&vc4->zs_read, NULL);
pipe_surface_reference(&vc4->zs_write, NULL);
pipe_surface_reference(&vc4->msaa_zs_write, NULL);
diff --git a/src/gallium/drivers/vc4/vc4_context.c b/src/gallium/drivers/vc4/vc4_context.c
index 1cd1676..312b006 100644
--- a/src/gallium/drivers/vc4/vc4_context.c
+++ b/src/gallium/drivers/vc4/vc4_context.c
@@ -67,15 +67,13 @@ vc4_flush(struct pipe_context *pctx)
cl_u8(&bcl, VC4_PACKET_FLUSH);
cl_end(&vc4->bcl, bcl);
- vc4->msaa = false;
if (cbuf && (vc4->resolve & PIPE_CLEAR_COLOR0)) {
pipe_surface_reference(&vc4->color_write,
- cbuf->texture->nr_samples ? NULL : cbuf);
+ cbuf->texture->nr_samples > 1 ?
+ NULL : cbuf);
pipe_surface_reference(&vc4->msaa_color_write,
- cbuf->texture->nr_samples ? cbuf : NULL);
-
- if (cbuf->texture->nr_samples)
- vc4->msaa = true;
+ cbuf->texture->nr_samples > 1 ?
+ cbuf : NULL);
if (!(vc4->cleared & PIPE_CLEAR_COLOR0)) {
pipe_surface_reference(&vc4->color_read, cbuf);
@@ -92,15 +90,12 @@ vc4_flush(struct pipe_context *pctx)
if (vc4->framebuffer.zsbuf &&
(vc4->resolve & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
pipe_surface_reference(&vc4->zs_write,
- zsbuf->texture->nr_samples ?
+ zsbuf->texture->nr_samples > 1 ?
NULL : zsbuf);
pipe_surface_reference(&vc4->msaa_zs_write,
- zsbuf->texture->nr_samples ?
+ zsbuf->texture->nr_samples > 1 ?
zsbuf : NULL);
- if (zsbuf->texture->nr_samples)
- vc4->msaa = true;
-
if (!(vc4->cleared & (PIPE_CLEAR_DEPTH | PIPE_CLEAR_STENCIL))) {
pipe_surface_reference(&vc4->zs_read, zsbuf);
} else {
diff --git a/src/gallium/drivers/vc4/vc4_drm.h b/src/gallium/drivers/vc4/vc4_drm.h
index bf58c6c..110c783 100644
--- a/src/gallium/drivers/vc4/vc4_drm.h
+++ b/src/gallium/drivers/vc4/vc4_drm.h
@@ -32,6 +32,7 @@
#define DRM_VC4_CREATE_BO 0x03
#define DRM_VC4_MMAP_BO 0x04
#define DRM_VC4_CREATE_SHADER_BO 0x05
+#define DRM_VC4_GET_HANG_STATE 0x06
#define DRM_IOCTL_VC4_SUBMIT_CL DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_SUBMIT_CL, struct drm_vc4_submit_cl)
#define DRM_IOCTL_VC4_WAIT_SEQNO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_WAIT_SEQNO, struct drm_vc4_wait_seqno)
@@ -39,6 +40,7 @@
#define DRM_IOCTL_VC4_CREATE_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_BO, struct drm_vc4_create_bo)
#define DRM_IOCTL_VC4_MMAP_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_MMAP_BO, struct drm_vc4_mmap_bo)
#define DRM_IOCTL_VC4_CREATE_SHADER_BO DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_CREATE_SHADER_BO, struct drm_vc4_create_shader_bo)
+#define DRM_IOCTL_VC4_GET_HANG_STATE DRM_IOWR( DRM_COMMAND_BASE + DRM_VC4_GET_HANG_STATE, struct drm_vc4_get_hang_state)
struct drm_vc4_submit_rcl_surface {
uint32_t hindex; /* Handle index, or ~0 if not present. */
@@ -231,4 +233,47 @@ struct drm_vc4_mmap_bo {
uint64_t offset;
};
+struct drm_vc4_get_hang_state_bo {
+ uint32_t handle;
+ uint32_t paddr;
+ uint32_t size;
+ uint32_t pad;
+};
+
+/**
+ * struct drm_vc4_hang_state - ioctl argument for collecting state
+ * from a GPU hang for analysis.
+*/
+struct drm_vc4_get_hang_state {
+ /** Pointer to array of struct drm_vc4_get_hang_state_bo. */
+ uint64_t bo;
+ /**
+ * On input, the size of the bo array. Output is the number
+ * of bos to be returned.
+ */
+ uint32_t bo_count;
+
+ uint32_t start_bin, start_render;
+
+ uint32_t ct0ca, ct0ea;
+ uint32_t ct1ca, ct1ea;
+ uint32_t ct0cs, ct1cs;
+ uint32_t ct0ra0, ct1ra0;
+
+ uint32_t bpca, bpcs;
+ uint32_t bpoa, bpos;
+
+ uint32_t vpmbase;
+
+ uint32_t dbge;
+ uint32_t fdbgo;
+ uint32_t fdbgb;
+ uint32_t fdbgr;
+ uint32_t fdbgs;
+ uint32_t errstat;
+
+ /* Pad that we may save more registers into in the future. */
+ uint32_t pad[16];
+};
+
#endif /* _UAPI_VC4_DRM_H_ */
diff --git a/src/gallium/drivers/vc4/vc4_job.c b/src/gallium/drivers/vc4/vc4_job.c
index 79bf2c4..5d071ec 100644
--- a/src/gallium/drivers/vc4/vc4_job.c
+++ b/src/gallium/drivers/vc4/vc4_job.c
@@ -89,7 +89,7 @@ vc4_submit_setup_rcl_surface(struct vc4_context *vc4,
submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
submit_surf->offset = surf->offset;
- if (psurf->texture->nr_samples == 0) {
+ if (psurf->texture->nr_samples <= 1) {
if (is_depth) {
submit_surf->bits =
VC4_SET_FIELD(VC4_LOADSTORE_TILE_BUFFER_ZS,
@@ -132,7 +132,7 @@ vc4_submit_setup_rcl_render_config_surface(struct vc4_context *vc4,
submit_surf->hindex = vc4_gem_hindex(vc4, rsc->bo);
submit_surf->offset = surf->offset;
- if (psurf->texture->nr_samples == 0) {
+ if (psurf->texture->nr_samples <= 1) {
submit_surf->bits =
VC4_SET_FIELD(vc4_rt_format_is_565(surf->base.format) ?
VC4_RENDER_CONFIG_FORMAT_BGR565 :
@@ -240,9 +240,11 @@ vc4_job_submit(struct vc4_context *vc4)
#else
ret = vc4_simulator_flush(vc4, &submit);
#endif
- if (ret) {
- fprintf(stderr, "VC4 submit failed\n");
- abort();
+ static bool warned = false;
+ if (ret && !warned) {
+ fprintf(stderr, "Draw call returned %s. "
+ "Expect corruption.\n", strerror(errno));
+ warned = true;
}
}
diff --git a/src/gallium/drivers/vc4/vc4_program.c b/src/gallium/drivers/vc4/vc4_program.c
index caad05c..d11c4e3 100644
--- a/src/gallium/drivers/vc4/vc4_program.c
+++ b/src/gallium/drivers/vc4/vc4_program.c
@@ -1848,12 +1848,15 @@ vc4_shader_ntq(struct vc4_context *vc4, enum qstage stage,
qir_optimize(c);
qir_lower_uniforms(c);
+ qir_schedule_instructions(c);
+
if (vc4_debug & VC4_DEBUG_QIR) {
fprintf(stderr, "%s prog %d/%d QIR:\n",
qir_get_stage_name(c->stage),
c->program_id, c->variant_id);
qir_dump(c);
}
+
qir_reorder_uniforms(c);
vc4_generate_code(vc4, c);
@@ -2043,7 +2046,7 @@ vc4_setup_shared_key(struct vc4_context *vc4, struct vc4_key *key,
key->tex[i].swizzle[2] = sampler->swizzle_b;
key->tex[i].swizzle[3] = sampler->swizzle_a;
- if (sampler->texture->nr_samples) {
+ if (sampler->texture->nr_samples > 1) {
key->tex[i].msaa_width = sampler->texture->width0;
key->tex[i].msaa_height = sampler->texture->height0;
} else if (sampler){
diff --git a/src/gallium/drivers/vc4/vc4_qir.h b/src/gallium/drivers/vc4/vc4_qir.h
index c34dce3..b0fbb4c 100644
--- a/src/gallium/drivers/vc4/vc4_qir.h
+++ b/src/gallium/drivers/vc4/vc4_qir.h
@@ -459,6 +459,7 @@ void qir_remove_instruction(struct vc4_compile *c, struct qinst *qinst);
struct qreg qir_uniform(struct vc4_compile *c,
enum quniform_contents contents,
uint32_t data);
+void qir_schedule_instructions(struct vc4_compile *c);
void qir_reorder_uniforms(struct vc4_compile *c);
void qir_emit(struct vc4_compile *c, struct qinst *inst);
diff --git a/src/gallium/drivers/vc4/vc4_qir_schedule.c b/src/gallium/drivers/vc4/vc4_qir_schedule.c
new file mode 100644
index 0000000..d20815f
--- /dev/null
+++ b/src/gallium/drivers/vc4/vc4_qir_schedule.c
@@ -0,0 +1,619 @@
+/*
+ * Copyright © 2010 Intel Corporation
+ * Copyright © 2014-2015 Broadcom
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
+ * IN THE SOFTWARE.
+ */
+
+/**
+ * @file vc4_qir_schedule.c
+ *
+ * The basic model of the list scheduler is to take a basic block, compute a
+ * DAG of the dependencies from the bottom up, and make a list of the DAG
+ * heads. Heuristically pick a DAG head and schedule (remove) it, then put
+ * all the parents that are now DAG heads into the list of things to
+ * schedule.
+ *
+ * The goal of scheduling here, before register allocation and conversion to
+ * QPU instructions, is to reduce register pressure by reordering instructions
+ * to consume values when possible.
+ */
+
+#include "vc4_qir.h"
+
+static bool debug;
+
+struct schedule_node {
+ struct list_head link;
+ struct qinst *inst;
+
+ struct schedule_node **children;
+ uint32_t child_count;
+ uint32_t child_array_size;
+ uint32_t parent_count;
+
+ /* Length of the longest (latency) chain from a DAG head to the this
+ * instruction.
+ */
+ uint32_t delay;
+
+ /* Longest time + latency_between(parent, this) of any parent of this
+ * node.
+ */
+ uint32_t unblocked_time;
+};
+
+struct schedule_state {
+ /* List of struct schedule_node *. This starts out with all
+ * instructions, and after dependency updates it's trimmed to be just
+ * the DAG heads.
+ */
+ struct list_head worklist;
+
+ uint32_t time;
+
+ uint32_t *temp_writes;
+
+ BITSET_WORD *temp_live;
+};
+
+/* When walking the instructions in reverse, we need to swap before/after in
+ * add_dep().
+ */
+enum direction { F, R };
+
+/**
+ * Marks a dependency between two intructions, that @after must appear after
+ * @before.
+ *
+ * Our dependencies are tracked as a DAG. Since we're scheduling bottom-up,
+ * the latest instructions with nothing left to schedule are the DAG heads,
+ * and their inputs are their children.
+ */
+static void
+add_dep(enum direction dir,
+ struct schedule_node *before,
+ struct schedule_node *after)
+{
+ if (!before || !after)
+ return;
+
+ assert(before != after);
+
+ if (dir == R) {
+ struct schedule_node *t = before;
+ before = after;
+ after = t;
+ }
+
+ for (int i = 0; i < after->child_count; i++) {
+ if (after->children[i] == after)
+ return;
+ }
+
+ if (after->child_array_size <= after->child_count) {
+ after->child_array_size = MAX2(after->child_array_size * 2, 16);
+ after->children = reralloc(after, after->children,
+ struct schedule_node *,
+ after->child_array_size);
+ }
+
+ after->children[after->child_count] = before;
+ after->child_count++;
+ before->parent_count++;
+}
+
+static void
+add_write_dep(enum direction dir,
+ struct schedule_node **before,
+ struct schedule_node *after)
+{
+ add_dep(dir, *before, after);
+ *before = after;
+}
+
+struct schedule_setup_state {
+ struct schedule_node **last_temp_write;
+ struct schedule_node *last_sf;
+ struct schedule_node *last_vary_read;
+ struct schedule_node *last_vpm_read;
+ struct schedule_node *last_vpm_write;
+ struct schedule_node *last_tex_coord;
+ struct schedule_node *last_tex_result;
+ struct schedule_node *last_tlb;
+ enum direction dir;
+
+ /**
+ * Texture FIFO tracking. This is done top-to-bottom, and is used to
+ * track the QOP_TEX_RESULTs and add dependencies on previous ones
+ * when trying to submit texture coords with TFREQ full or new texture
+ * fetches with TXRCV full.
+ */
+ struct {
+ struct schedule_node *node;
+ int coords;
+ } tex_fifo[8];
+ int tfreq_count; /**< Number of texture coords outstanding. */
+ int tfrcv_count; /**< Number of texture results outstanding. */
+ int tex_fifo_pos;
+};
+
+static void
+block_until_tex_result(struct schedule_setup_state *state, struct schedule_node *n)
+{
+ add_dep(state->dir, state->tex_fifo[0].node, n);
+
+ state->tfreq_count -= state->tex_fifo[0].coords;
+ state->tfrcv_count--;
+
+ memmove(&state->tex_fifo[0],
+ &state->tex_fifo[1],
+ state->tex_fifo_pos * sizeof(state->tex_fifo[0]));
+ state->tex_fifo_pos--;
+}
+
+/**
+ * Common code for dependencies that need to be tracked both forward and
+ * backward.
+ *
+ * This is for things like "all VPM reads have to happen in order."
+ */
+static void
+calculate_deps(struct schedule_setup_state *state, struct schedule_node *n)
+{
+ struct qinst *inst = n->inst;
+ enum direction dir = state->dir;
+
+
+ /* Add deps for temp registers and varyings accesses. Note that we
+ * ignore uniforms accesses, because qir_reorder_uniforms() happens
+ * after this.
+ */
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ switch (inst->src[i].file) {
+ case QFILE_TEMP:
+ add_dep(dir,
+ state->last_temp_write[inst->src[i].index], n);
+ break;
+
+ case QFILE_VARY:
+ add_write_dep(dir, &state->last_vary_read, n);
+ break;
+
+ case QFILE_VPM:
+ add_write_dep(dir, &state->last_vpm_read, n);
+ break;
+
+ default:
+ break;
+ }
+ }
+
+ switch (inst->op) {
+ case QOP_VARY_ADD_C:
+ add_dep(dir, state->last_vary_read, n);
+ break;
+
+ case QOP_TEX_S:
+ case QOP_TEX_T:
+ case QOP_TEX_R:
+ case QOP_TEX_B:
+ case QOP_TEX_DIRECT:
+ /* Texturing setup gets scheduled in order, because
+ * the uniforms referenced by them have to land in a
+ * specific order.
+ */
+ add_write_dep(dir, &state->last_tex_coord, n);
+ break;
+
+ case QOP_TEX_RESULT:
+ /* Results have to be fetched in order. */
+ add_write_dep(dir, &state->last_tex_result, n);
+ break;
+
+ case QOP_TLB_COLOR_WRITE:
+ case QOP_TLB_COLOR_READ:
+ case QOP_TLB_Z_WRITE:
+ case QOP_TLB_STENCIL_SETUP:
+ case QOP_MS_MASK:
+ add_write_dep(dir, &state->last_tlb, n);
+ break;
+
+ case QOP_TLB_DISCARD_SETUP:
+ add_write_dep(dir, &state->last_sf, n);
+ add_write_dep(dir, &state->last_tlb, n);
+ break;
+
+ default:
+ break;
+ }
+
+ if (inst->dst.file == QFILE_VPM)
+ add_write_dep(dir, &state->last_vpm_write, n);
+ else if (inst->dst.file == QFILE_TEMP)
+ add_write_dep(dir, &state->last_temp_write[inst->dst.index], n);
+
+ if (inst->sf)
+ add_write_dep(dir, &state->last_sf, n);
+
+ if (qir_depends_on_flags(inst)) {
+ add_dep(dir, state->last_sf, n);
+ }
+}
+
+static void
+calculate_forward_deps(struct vc4_compile *c, void *mem_ctx,
+ struct list_head *schedule_list)
+{
+ struct schedule_setup_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.last_temp_write = rzalloc_array(mem_ctx, struct schedule_node *,
+ c->num_temps);
+ state.dir = F;
+
+ list_for_each_entry(struct schedule_node, n, schedule_list, link) {
+ struct qinst *inst = n->inst;
+
+ calculate_deps(&state, n);
+
+ switch (inst->op) {
+ case QOP_TEX_S:
+ case QOP_TEX_T:
+ case QOP_TEX_R:
+ case QOP_TEX_B:
+ case QOP_TEX_DIRECT:
+ /* If the texture coordinate fifo is full,
+ * block this on the last QOP_TEX_RESULT.
+ */
+ if (state.tfreq_count == 8) {
+ block_until_tex_result(&state, n);
+ }
+
+ /* If the texture result fifo is full, block
+ * adding any more to it until the last
+ * QOP_TEX_RESULT.
+ */
+ if (inst->op == QOP_TEX_S ||
+ inst->op == QOP_TEX_DIRECT) {
+ if (state.tfrcv_count == 4)
+ block_until_tex_result(&state, n);
+ state.tfrcv_count++;
+ }
+
+ state.tex_fifo[state.tex_fifo_pos].coords++;
+ state.tfreq_count++;
+ break;
+
+ case QOP_TEX_RESULT:
+ /* Results have to be fetched after the
+ * coordinate setup. Note that we're assuming
+ * here that our input shader has the texture
+ * coord setup and result fetch in order,
+ * which is true initially but not of our
+ * instruction stream after this pass.
+ */
+ add_dep(state.dir, state.last_tex_coord, n);
+
+ state.tex_fifo[state.tex_fifo_pos].node = n;
+
+ state.tex_fifo_pos++;
+ memset(&state.tex_fifo[state.tex_fifo_pos], 0,
+ sizeof(state.tex_fifo[0]));
+ break;
+ default:
+ assert(!qir_is_tex(inst));
+ break;
+ }
+ }
+}
+
+static void
+calculate_reverse_deps(struct vc4_compile *c, void *mem_ctx,
+ struct list_head *schedule_list)
+{
+ struct schedule_setup_state state;
+
+ memset(&state, 0, sizeof(state));
+ state.dir = R;
+ state.last_temp_write = rzalloc_array(mem_ctx, struct schedule_node *,
+ c->num_temps);
+
+ list_for_each_entry_rev(struct schedule_node, n, schedule_list, link) {
+ calculate_deps(&state, n);
+ }
+}
+
+static int
+get_register_pressure_cost(struct schedule_state *state, struct qinst *inst)
+{
+ int cost = 0;
+
+ if (inst->dst.file == QFILE_TEMP &&
+ state->temp_writes[inst->dst.index] == 1)
+ cost--;
+
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_TEMP &&
+ !BITSET_TEST(state->temp_live, inst->src[i].index)) {
+ cost++;
+ }
+ }
+
+ return cost;
+}
+
+static bool
+locks_scoreboard(struct qinst *inst)
+{
+ switch (inst->op) {
+ case QOP_TLB_Z_WRITE:
+ case QOP_TLB_COLOR_WRITE:
+ case QOP_TLB_COLOR_WRITE_MS:
+ case QOP_TLB_COLOR_READ:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static struct schedule_node *
+choose_instruction(struct schedule_state *state)
+{
+ struct schedule_node *chosen = NULL;
+
+ list_for_each_entry(struct schedule_node, n, &state->worklist, link) {
+ if (!chosen) {
+ chosen = n;
+ continue;
+ }
+
+ /* Prefer scheduling things that lock the scoreboard, so that
+ * they appear late in the program and we get more parallelism
+ * between shaders on multiple QPUs hitting the same fragment.
+ */
+ if (locks_scoreboard(n->inst) &&
+ !locks_scoreboard(chosen->inst)) {
+ chosen = n;
+ continue;
+ } else if (!locks_scoreboard(n->inst) &&
+ locks_scoreboard(chosen->inst)) {
+ continue;
+ }
+
+ /* If we would block on the previously chosen node, but would
+ * block less on this one, then then prefer it.
+ */
+ if (chosen->unblocked_time > state->time &&
+ n->unblocked_time < chosen->unblocked_time) {
+ chosen = n;
+ continue;
+ } else if (n->unblocked_time > state->time &&
+ n->unblocked_time > chosen->unblocked_time) {
+ continue;
+ }
+
+ /* If we can definitely reduce register pressure, do so
+ * immediately.
+ */
+ int register_pressure_cost =
+ get_register_pressure_cost(state, n->inst);
+ int chosen_register_pressure_cost =
+ get_register_pressure_cost(state, chosen->inst);
+
+ if (register_pressure_cost < chosen_register_pressure_cost) {
+ chosen = n;
+ continue;
+ } else if (register_pressure_cost >
+ chosen_register_pressure_cost) {
+ continue;
+ }
+
+ /* Otherwise, prefer instructions with the deepest chain to
+ * the end of the program. This avoids the problem of
+ * "everything generates a temp, nothing finishes freeing one,
+ * guess I'll just keep emitting varying mul/adds".
+ */
+ if (n->delay > chosen->delay) {
+ chosen = n;
+ continue;
+ } else if (n->delay < chosen->delay) {
+ continue;
+ }
+ }
+
+ return chosen;
+}
+
+static void
+dump_state(struct vc4_compile *c, struct schedule_state *state)
+{
+ uint32_t i = 0;
+ list_for_each_entry(struct schedule_node, n, &state->worklist, link) {
+ fprintf(stderr, "%3d: ", i++);
+ qir_dump_inst(c, n->inst);
+ fprintf(stderr, " (%d cost)\n",
+ get_register_pressure_cost(state, n->inst));
+
+ for (int i = 0; i < n->child_count; i++) {
+ struct schedule_node *child = n->children[i];
+ fprintf(stderr, " - ");
+ qir_dump_inst(c, child->inst);
+ fprintf(stderr, " (%d parents)\n", child->parent_count);
+ }
+ }
+}
+
+/* Estimate of how many instructions we should schedule between operations.
+ *
+ * These aren't in real cycle counts, because we're just estimating cycle
+ * times anyway. QIR instructions will get paired up when turned into QPU
+ * instructions, or extra NOP delays will have to be added due to register
+ * allocation choices.
+ */
+static uint32_t
+latency_between(struct schedule_node *before, struct schedule_node *after)
+{
+ if ((before->inst->op == QOP_TEX_S ||
+ before->inst->op == QOP_TEX_DIRECT) &&
+ after->inst->op == QOP_TEX_RESULT)
+ return 100;
+
+ return 1;
+}
+
+/** Recursive computation of the delay member of a node. */
+static void
+compute_delay(struct schedule_node *n)
+{
+ if (!n->child_count) {
+ /* The color read needs to be scheduled late, to avoid locking
+ * the scoreboard early. This is our best tool for
+ * encouraging that. The other scoreboard locking ops will
+ * have this happen by default, since they are generally the
+ * DAG heads or close to them.
+ */
+ if (n->inst->op == QOP_TLB_COLOR_READ)
+ n->delay = 1000;
+ else
+ n->delay = 1;
+ } else {
+ for (int i = 0; i < n->child_count; i++) {
+ if (!n->children[i]->delay)
+ compute_delay(n->children[i]);
+ n->delay = MAX2(n->delay,
+ n->children[i]->delay +
+ latency_between(n, n->children[i]));
+ }
+ }
+}
+
+static void
+schedule_instructions(struct vc4_compile *c, struct schedule_state *state)
+{
+ if (debug) {
+ fprintf(stderr, "initial deps:\n");
+ dump_state(c, state);
+ }
+
+ /* Remove non-DAG heads from the list. */
+ list_for_each_entry_safe(struct schedule_node, n,
+ &state->worklist, link) {
+ if (n->parent_count != 0)
+ list_del(&n->link);
+ }
+
+ state->time = 0;
+ while (!list_empty(&state->worklist)) {
+ struct schedule_node *chosen = choose_instruction(state);
+ struct qinst *inst = chosen->inst;
+
+ if (debug) {
+ fprintf(stderr, "current list:\n");
+ dump_state(c, state);
+ fprintf(stderr, "chose: ");
+ qir_dump_inst(c, inst);
+ fprintf(stderr, " (%d cost)\n",
+ get_register_pressure_cost(state, inst));
+ }
+
+ state->time = MAX2(state->time, chosen->unblocked_time);
+
+ /* Schedule this instruction back onto the QIR list. */
+ list_del(&chosen->link);
+ list_add(&inst->link, &c->instructions);
+
+ /* Now that we've scheduled a new instruction, some of its
+ * children can be promoted to the list of instructions ready to
+ * be scheduled. Update the children's unblocked time for this
+ * DAG edge as we do so.
+ */
+ for (int i = chosen->child_count - 1; i >= 0; i--) {
+ struct schedule_node *child = chosen->children[i];
+
+ child->unblocked_time = MAX2(child->unblocked_time,
+ state->time +
+ latency_between(chosen,
+ child));
+ child->parent_count--;
+ if (child->parent_count == 0)
+ list_add(&child->link, &state->worklist);
+ }
+
+ /* Update our tracking of register pressure. */
+ for (int i = 0; i < qir_get_op_nsrc(inst->op); i++) {
+ if (inst->src[i].file == QFILE_TEMP)
+ BITSET_SET(state->temp_live, inst->src[i].index);
+ }
+ if (inst->dst.file == QFILE_TEMP) {
+ state->temp_writes[inst->dst.index]--;
+ if (state->temp_writes[inst->dst.index] == 0)
+ BITSET_CLEAR(state->temp_live, inst->dst.index);
+ }
+
+ state->time++;
+ }
+}
+
+void
+qir_schedule_instructions(struct vc4_compile *c)
+{
+ void *mem_ctx = ralloc_context(NULL);
+ struct schedule_state state = { 0 };
+
+ if (debug) {
+ fprintf(stderr, "Pre-schedule instructions\n");
+ qir_dump(c);
+ }
+
+ state.temp_writes = rzalloc_array(mem_ctx, uint32_t, c->num_temps);
+ state.temp_live = rzalloc_array(mem_ctx, BITSET_WORD,
+ BITSET_WORDS(c->num_temps));
+ list_inithead(&state.worklist);
+
+ /* Wrap each instruction in a scheduler structure. */
+ list_for_each_entry_safe(struct qinst, inst, &c->instructions, link) {
+ struct schedule_node *n = rzalloc(mem_ctx, struct schedule_node);
+
+ n->inst = inst;
+ list_del(&inst->link);
+ list_addtail(&n->link, &state.worklist);
+
+ if (inst->dst.file == QFILE_TEMP)
+ state.temp_writes[inst->dst.index]++;
+ }
+
+ /* Dependencies tracked top-to-bottom. */
+ calculate_forward_deps(c, mem_ctx, &state.worklist);
+ /* Dependencies tracked bottom-to-top. */
+ calculate_reverse_deps(c, mem_ctx, &state.worklist);
+
+ list_for_each_entry(struct schedule_node, n, &state.worklist, link)
+ compute_delay(n);
+
+ schedule_instructions(c, &state);
+
+ if (debug) {
+ fprintf(stderr, "Post-schedule instructions\n");
+ qir_dump(c);
+ }
+
+ ralloc_free(mem_ctx);
+}
diff --git a/src/gallium/drivers/vc4/vc4_qpu_schedule.c b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
index 98b7b60..09164b7 100644
--- a/src/gallium/drivers/vc4/vc4_qpu_schedule.c
+++ b/src/gallium/drivers/vc4/vc4_qpu_schedule.c
@@ -50,7 +50,7 @@ struct schedule_node {
uint32_t child_array_size;
uint32_t parent_count;
- /* Longest cycles + n->latency of any parent of this node. */
+ /* Longest cycles + instruction_latency() of any parent of this node. */
uint32_t unblocked_time;
/**
@@ -259,7 +259,8 @@ process_waddr_deps(struct schedule_state *state, struct schedule_node *n,
}
} else if (is_tmu_write(waddr)) {
add_write_dep(state, &state->last_tmu_write, n);
- } else if (qpu_waddr_is_tlb(waddr)) {
+ } else if (qpu_waddr_is_tlb(waddr) ||
+ waddr == QPU_W_MS_FLAGS) {
add_write_dep(state, &state->last_tlb, n);
} else {
switch (waddr) {
@@ -623,6 +624,46 @@ dump_state(struct list_head *schedule_list)
}
}
+static uint32_t waddr_latency(uint32_t waddr, uint64_t after)
+{
+ if (waddr < 32)
+ return 2;
+
+ /* Apply some huge latency between texture fetch requests and getting
+ * their results back.
+ */
+ if (waddr == QPU_W_TMU0_S) {
+ if (QPU_GET_FIELD(after, QPU_SIG) == QPU_SIG_LOAD_TMU0)
+ return 100;
+ }
+ if (waddr == QPU_W_TMU1_S) {
+ if (QPU_GET_FIELD(after, QPU_SIG) == QPU_SIG_LOAD_TMU1)
+ return 100;
+ }
+
+ switch(waddr) {
+ case QPU_W_SFU_RECIP:
+ case QPU_W_SFU_RECIPSQRT:
+ case QPU_W_SFU_EXP:
+ case QPU_W_SFU_LOG:
+ return 3;
+ default:
+ return 1;
+ }
+}
+
+static uint32_t
+instruction_latency(struct schedule_node *before, struct schedule_node *after)
+{
+ uint64_t before_inst = before->inst->inst;
+ uint64_t after_inst = after->inst->inst;
+
+ return MAX2(waddr_latency(QPU_GET_FIELD(before_inst, QPU_WADDR_ADD),
+ after_inst),
+ waddr_latency(QPU_GET_FIELD(before_inst, QPU_WADDR_MUL),
+ after_inst));
+}
+
/** Recursive computation of the delay member of a node. */
static void
compute_delay(struct schedule_node *n)
@@ -634,7 +675,8 @@ compute_delay(struct schedule_node *n)
if (!n->children[i].node->delay)
compute_delay(n->children[i].node);
n->delay = MAX2(n->delay,
- n->children[i].node->delay + n->latency);
+ n->children[i].node->delay +
+ instruction_latency(n, n->children[i].node));
}
}
}
@@ -663,9 +705,14 @@ mark_instruction_scheduled(struct list_head *schedule_list,
* immediately after (or paired with!) the thing reading the
* destination.
*/
- int latency_from_previous = war_only ? 0 : node->latency;
+ uint32_t latency = 0;
+ if (!war_only) {
+ latency = instruction_latency(node,
+ node->children[i].node);
+ }
+
child->unblocked_time = MAX2(child->unblocked_time,
- time + latency_from_previous);
+ time + latency);
child->parent_count--;
if (child->parent_count == 0)
list_add(&child->link, schedule_list);
@@ -798,33 +845,6 @@ schedule_instructions(struct vc4_compile *c, struct list_head *schedule_list)
return time;
}
-static uint32_t waddr_latency(uint32_t waddr)
-{
- if (waddr < 32)
- return 2;
-
- /* Some huge number, really. */
- if (waddr >= QPU_W_TMU0_S && waddr <= QPU_W_TMU1_B)
- return 100;
-
- switch(waddr) {
- case QPU_W_SFU_RECIP:
- case QPU_W_SFU_RECIPSQRT:
- case QPU_W_SFU_EXP:
- case QPU_W_SFU_LOG:
- return 3;
- default:
- return 1;
- }
-}
-
-static uint32_t
-instruction_latency(uint64_t inst)
-{
- return MAX2(waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_ADD)),
- waddr_latency(QPU_GET_FIELD(inst, QPU_WADDR_MUL)));
-}
-
uint32_t
qpu_schedule_instructions(struct vc4_compile *c)
{
@@ -851,7 +871,6 @@ qpu_schedule_instructions(struct vc4_compile *c)
struct schedule_node *n = rzalloc(mem_ctx, struct schedule_node);
n->inst = inst;
- n->latency = instruction_latency(inst->inst);
if (reads_uniform(inst->inst)) {
n->uniform = next_uniform++;
diff --git a/src/gallium/drivers/vc4/vc4_resource.c b/src/gallium/drivers/vc4/vc4_resource.c
index e7069a4..9e6678a 100644
--- a/src/gallium/drivers/vc4/vc4_resource.c
+++ b/src/gallium/drivers/vc4/vc4_resource.c
@@ -207,7 +207,7 @@ vc4_resource_transfer_map(struct pipe_context *pctx,
/* If the resource is multisampled, we need to resolve to single
* sample. This seems like it should be handled at a higher layer.
*/
- if (prsc->nr_samples) {
+ if (prsc->nr_samples > 1) {
trans->ss_resource = vc4_get_temp_resource(pctx, prsc, box);
if (!trans->ss_resource)
goto fail;
@@ -377,7 +377,7 @@ vc4_setup_slices(struct vc4_resource *rsc)
if (!rsc->tiled) {
slice->tiling = VC4_TILING_FORMAT_LINEAR;
- if (prsc->nr_samples) {
+ if (prsc->nr_samples > 1) {
/* MSAA (4x) surfaces are stored as raw tile buffer contents. */
level_width = align(level_width, 32);
level_height = align(level_height, 32);
@@ -458,7 +458,7 @@ vc4_resource_setup(struct pipe_screen *pscreen,
prsc->screen = pscreen;
rsc->base.vtbl = &vc4_resource_vtbl;
- if (prsc->nr_samples == 0)
+ if (prsc->nr_samples <= 1)
rsc->cpp = util_format_get_blocksize(tmpl->format);
else
rsc->cpp = sizeof(uint32_t);
@@ -475,7 +475,7 @@ get_resource_texture_format(struct pipe_resource *prsc)
uint8_t format = vc4_get_tex_format(prsc->format);
if (!rsc->tiled) {
- if (prsc->nr_samples) {
+ if (prsc->nr_samples > 1) {
return ~0;
} else {
assert(format == VC4_TEXTURE_TYPE_RGBA8888);
@@ -497,7 +497,7 @@ vc4_resource_create(struct pipe_screen *pscreen,
* communicate metadata about tiling currently.
*/
if (tmpl->target == PIPE_BUFFER ||
- tmpl->nr_samples ||
+ tmpl->nr_samples > 1 ||
(tmpl->bind & (PIPE_BIND_SCANOUT |
PIPE_BIND_LINEAR |
PIPE_BIND_SHARED |
@@ -832,7 +832,7 @@ vc4_dump_surface(struct pipe_surface *psurf)
if (!psurf)
return;
- if (psurf->texture->nr_samples)
+ if (psurf->texture->nr_samples > 1)
vc4_dump_surface_msaa(psurf);
else
vc4_dump_surface_non_msaa(psurf);
diff --git a/src/gallium/drivers/vc4/vc4_screen.c b/src/gallium/drivers/vc4/vc4_screen.c
index 090579c..8ddf086 100644
--- a/src/gallium/drivers/vc4/vc4_screen.c
+++ b/src/gallium/drivers/vc4/vc4_screen.c
@@ -57,6 +57,10 @@ static const struct debug_named_value debug_options[] = {
"Flush after each draw call" },
{ "always_sync", VC4_DEBUG_ALWAYS_SYNC,
"Wait for finish after each flush" },
+#if USE_VC4_SIMULATOR
+ { "dump", VC4_DEBUG_DUMP,
+ "Write a GPU command stream trace file" },
+#endif
{ NULL }
};
diff --git a/src/gallium/drivers/vc4/vc4_screen.h b/src/gallium/drivers/vc4/vc4_screen.h
index 5992e37..03f76b2 100644
--- a/src/gallium/drivers/vc4/vc4_screen.h
+++ b/src/gallium/drivers/vc4/vc4_screen.h
@@ -41,6 +41,7 @@ struct vc4_bo;
#define VC4_DEBUG_ALWAYS_FLUSH 0x0080
#define VC4_DEBUG_ALWAYS_SYNC 0x0100
#define VC4_DEBUG_NIR 0x0200
+#define VC4_DEBUG_DUMP 0x0400
#define VC4_MAX_MIP_LEVELS 12
#define VC4_MAX_TEXTURE_SAMPLERS 16
diff --git a/src/gallium/drivers/vc4/vc4_simulator.c b/src/gallium/drivers/vc4/vc4_simulator.c
index 4b1df92..521ef50 100644
--- a/src/gallium/drivers/vc4/vc4_simulator.c
+++ b/src/gallium/drivers/vc4/vc4_simulator.c
@@ -131,6 +131,93 @@ vc4_simulator_unpin_bos(struct vc4_exec_info *exec)
return 0;
}
+static void
+vc4_dump_to_file(struct vc4_exec_info *exec)
+{
+ static int dumpno = 0;
+ struct drm_vc4_get_hang_state *state;
+ struct drm_vc4_get_hang_state_bo *bo_state;
+ unsigned int dump_version = 0;
+
+ if (!(vc4_debug & VC4_DEBUG_DUMP))
+ return;
+
+ state = calloc(1, sizeof(*state));
+
+ int unref_count = 0;
+ list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
+ unref_head) {
+ unref_count++;
+ }
+
+ /* Add one more for the overflow area that isn't wrapped in a BO. */
+ state->bo_count = exec->bo_count + unref_count + 1;
+ bo_state = calloc(state->bo_count, sizeof(*bo_state));
+
+ char *filename = NULL;
+ asprintf(&filename, "vc4-dri-%d.dump", dumpno++);
+ FILE *f = fopen(filename, "w+");
+ if (!f) {
+ fprintf(stderr, "Couldn't open %s: %s", filename,
+ strerror(errno));
+ return;
+ }
+
+ fwrite(&dump_version, sizeof(dump_version), 1, f);
+
+ state->ct0ca = exec->ct0ca;
+ state->ct0ea = exec->ct0ea;
+ state->ct1ca = exec->ct1ca;
+ state->ct1ea = exec->ct1ea;
+ state->start_bin = exec->ct0ca;
+ state->start_render = exec->ct1ca;
+ fwrite(state, sizeof(*state), 1, f);
+
+ int i;
+ for (i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_cma_object *cma_bo = exec->bo[i];
+ bo_state[i].handle = i; /* Not used by the parser. */
+ bo_state[i].paddr = cma_bo->paddr;
+ bo_state[i].size = cma_bo->base.size;
+ }
+
+ list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
+ unref_head) {
+ struct drm_gem_cma_object *cma_bo = &bo->base;
+ bo_state[i].handle = 0;
+ bo_state[i].paddr = cma_bo->paddr;
+ bo_state[i].size = cma_bo->base.size;
+ i++;
+ }
+
+ /* Add the static overflow memory area. */
+ bo_state[i].handle = exec->bo_count;
+ bo_state[i].paddr = 0;
+ bo_state[i].size = OVERFLOW_SIZE;
+ i++;
+
+ fwrite(bo_state, sizeof(*bo_state), state->bo_count, f);
+
+ for (int i = 0; i < exec->bo_count; i++) {
+ struct drm_gem_cma_object *cma_bo = exec->bo[i];
+ fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+ }
+
+ list_for_each_entry_safe(struct drm_vc4_bo, bo, &exec->unref_list,
+ unref_head) {
+ struct drm_gem_cma_object *cma_bo = &bo->base;
+ fwrite(cma_bo->vaddr, cma_bo->base.size, 1, f);
+ }
+
+ void *overflow = calloc(1, OVERFLOW_SIZE);
+ fwrite(overflow, 1, OVERFLOW_SIZE, f);
+ free(overflow);
+
+ free(state);
+ free(bo_state);
+ fclose(f);
+}
+
int
vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
{
@@ -183,6 +270,8 @@ vc4_simulator_flush(struct vc4_context *vc4, struct drm_vc4_submit_cl *args)
exec.ct1ea - exec.ct1ca, true);
}
+ vc4_dump_to_file(&exec);
+
if (exec.ct0ca != exec.ct0ea) {
int bfc = simpenrose_do_binning(exec.ct0ca, exec.ct0ea);
if (bfc != 1) {
diff --git a/src/gallium/drivers/vc4/vc4_simulator_validate.h b/src/gallium/drivers/vc4/vc4_simulator_validate.h
index 40d3ada..e1f8b5a 100644
--- a/src/gallium/drivers/vc4/vc4_simulator_validate.h
+++ b/src/gallium/drivers/vc4/vc4_simulator_validate.h
@@ -45,7 +45,7 @@ struct vc4_exec_info;
#define roundup(x, y) align(x, y)
#define round_up(x, y) align(x, y)
#define max(x, y) MAX2(x, y)
-#define min(x, y) MiN2(x, y)
+#define min(x, y) MIN2(x, y)
#define BUG_ON(condition) assert(!(condition))
static inline int
diff --git a/src/gallium/drivers/vc4/vc4_state.c b/src/gallium/drivers/vc4/vc4_state.c
index d9c0f55..7e0ada7 100644
--- a/src/gallium/drivers/vc4/vc4_state.c
+++ b/src/gallium/drivers/vc4/vc4_state.c
@@ -462,9 +462,9 @@ vc4_set_framebuffer_state(struct pipe_context *pctx,
vc4->msaa = false;
if (cso->cbufs[0])
- vc4->msaa = cso->cbufs[0]->texture->nr_samples != 0;
+ vc4->msaa = cso->cbufs[0]->texture->nr_samples > 1;
else if (cso->zsbuf)
- vc4->msaa = cso->zsbuf->texture->nr_samples != 0;
+ vc4->msaa = cso->zsbuf->texture->nr_samples > 1;
if (vc4->msaa) {
vc4->tile_width = 32;
diff --git a/src/gallium/state_trackers/osmesa/osmesa.c b/src/gallium/state_trackers/osmesa/osmesa.c
index 0f27ba8..38d684b 100644
--- a/src/gallium/state_trackers/osmesa/osmesa.c
+++ b/src/gallium/state_trackers/osmesa/osmesa.c
@@ -544,11 +544,39 @@ GLAPI OSMesaContext GLAPIENTRY
OSMesaCreateContextExt(GLenum format, GLint depthBits, GLint stencilBits,
GLint accumBits, OSMesaContext sharelist)
{
+ int attribs[100], n = 0;
+
+ attribs[n++] = OSMESA_FORMAT;
+ attribs[n++] = format;
+ attribs[n++] = OSMESA_DEPTH_BITS;
+ attribs[n++] = depthBits;
+ attribs[n++] = OSMESA_STENCIL_BITS;
+ attribs[n++] = stencilBits;
+ attribs[n++] = OSMESA_ACCUM_BITS;
+ attribs[n++] = accumBits;
+ attribs[n++] = 0;
+
+ return OSMesaCreateContextAttribs(attribs, sharelist);
+}
+
+
+/**
+ * New in Mesa 11.2
+ *
+ * Create context with attribute list.
+ */
+GLAPI OSMesaContext GLAPIENTRY
+OSMesaCreateContextAttribs(const int *attribList, OSMesaContext sharelist)
+{
OSMesaContext osmesa;
struct st_context_iface *st_shared;
enum st_context_error st_error = 0;
struct st_context_attribs attribs;
struct st_api *stapi = get_st_api();
+ GLenum format = GL_RGBA;
+ int depthBits = 0, stencilBits = 0, accumBits = 0;
+ int profile = OSMESA_COMPAT_PROFILE, version_major = 1, version_minor = 0;
+ int i;
if (sharelist) {
st_shared = sharelist->stctx;
@@ -557,6 +585,64 @@ OSMesaCreateContextExt(GLenum format, GLint depthBits, GLint stencilBits,
st_shared = NULL;
}
+ for (i = 0; attribList[i]; i += 2) {
+ switch (attribList[i]) {
+ case OSMESA_FORMAT:
+ format = attribList[i+1];
+ switch (format) {
+ case OSMESA_COLOR_INDEX:
+ case OSMESA_RGBA:
+ case OSMESA_BGRA:
+ case OSMESA_ARGB:
+ case OSMESA_RGB:
+ case OSMESA_BGR:
+ case OSMESA_RGB_565:
+ /* legal */
+ break;
+ default:
+ return NULL;
+ }
+ break;
+ case OSMESA_DEPTH_BITS:
+ depthBits = attribList[i+1];
+ if (depthBits < 0)
+ return NULL;
+ break;
+ case OSMESA_STENCIL_BITS:
+ stencilBits = attribList[i+1];
+ if (stencilBits < 0)
+ return NULL;
+ break;
+ case OSMESA_ACCUM_BITS:
+ accumBits = attribList[i+1];
+ if (accumBits < 0)
+ return NULL;
+ break;
+ case OSMESA_PROFILE:
+ profile = attribList[i+1];
+ if (profile != OSMESA_CORE_PROFILE &&
+ profile != OSMESA_COMPAT_PROFILE)
+ return NULL;
+ break;
+ case OSMESA_CONTEXT_MAJOR_VERSION:
+ version_major = attribList[i+1];
+ if (version_major < 1)
+ return NULL;
+ break;
+ case OSMESA_CONTEXT_MINOR_VERSION:
+ version_minor = attribList[i+1];
+ if (version_minor < 0)
+ return NULL;
+ break;
+ case 0:
+ /* end of list */
+ break;
+ default:
+ fprintf(stderr, "Bad attribute in OSMesaCreateContextAttribs()\n");
+ return NULL;
+ }
+ }
+
osmesa = (OSMesaContext) CALLOC_STRUCT(osmesa_context);
if (!osmesa)
return NULL;
@@ -581,9 +667,11 @@ OSMesaCreateContextExt(GLenum format, GLint depthBits, GLint stencilBits,
/*
* Create the rendering context
*/
- attribs.profile = ST_PROFILE_DEFAULT;
- attribs.major = 2;
- attribs.minor = 1;
+ memset(&attribs, 0, sizeof(attribs));
+ attribs.profile = (profile == OSMESA_CORE_PROFILE)
+ ? ST_PROFILE_OPENGL_CORE : ST_PROFILE_DEFAULT;
+ attribs.major = version_major;
+ attribs.minor = version_minor;
attribs.flags = 0; /* ST_CONTEXT_FLAG_x */
attribs.options.force_glsl_extensions_warn = FALSE;
attribs.options.disable_blend_func_extended = FALSE;
@@ -614,6 +702,7 @@ OSMesaCreateContextExt(GLenum format, GLint depthBits, GLint stencilBits,
}
+
/**
* Destroy an Off-Screen Mesa rendering context.
*
@@ -883,6 +972,7 @@ struct name_function
static struct name_function functions[] = {
{ "OSMesaCreateContext", (OSMESAproc) OSMesaCreateContext },
{ "OSMesaCreateContextExt", (OSMESAproc) OSMesaCreateContextExt },
+ { "OSMesaCreateContextAttribs", (OSMESAproc) OSMesaCreateContextAttribs },
{ "OSMesaDestroyContext", (OSMESAproc) OSMesaDestroyContext },
{ "OSMesaMakeCurrent", (OSMESAproc) OSMesaMakeCurrent },
{ "OSMesaGetCurrentContext", (OSMESAproc) OSMesaGetCurrentContext },
diff --git a/src/gallium/state_trackers/va/buffer.c b/src/gallium/state_trackers/va/buffer.c
index 769305e..8de7935 100644
--- a/src/gallium/state_trackers/va/buffer.c
+++ b/src/gallium/state_trackers/va/buffer.c
@@ -212,6 +212,7 @@ VAStatus
vlVaAcquireBufferHandle(VADriverContextP ctx, VABufferID buf_id,
VABufferInfo *out_buf_info)
{
+ vlVaDriver *drv;
uint32_t i;
uint32_t mem_type;
vlVaBuffer *buf ;
@@ -255,13 +256,9 @@ vlVaAcquireBufferHandle(VADriverContextP ctx, VABufferID buf_id,
if (!buf->derived_surface.resource)
return VA_STATUS_ERROR_INVALID_BUFFER;
+ drv = VL_VA_DRIVER(ctx);
screen = VL_VA_PSCREEN(ctx);
- if (buf->derived_surface.fence) {
- screen->fence_finish(screen, buf->derived_surface.fence, PIPE_TIMEOUT_INFINITE);
- screen->fence_reference(screen, &buf->derived_surface.fence, NULL);
- }
-
if (buf->export_refcount > 0) {
if (buf->export_state.mem_type != mem_type)
return VA_STATUS_ERROR_INVALID_PARAMETER;
@@ -272,6 +269,8 @@ vlVaAcquireBufferHandle(VADriverContextP ctx, VABufferID buf_id,
case VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME: {
struct winsys_handle whandle;
+ drv->pipe->flush(drv->pipe, NULL, 0);
+
memset(&whandle, 0, sizeof(whandle));
whandle.type = DRM_API_HANDLE_TYPE_FD;
diff --git a/src/gallium/state_trackers/va/image.c b/src/gallium/state_trackers/va/image.c
index ae07da8..ccc263f 100644
--- a/src/gallium/state_trackers/va/image.c
+++ b/src/gallium/state_trackers/va/image.c
@@ -264,9 +264,8 @@ vlVaDeriveImage(VADriverContextP ctx, VASurfaceID surface, VAImage *image)
img->image_id = handle_table_add(drv->htab, img);
img_buf->type = VAImageBufferType;
- img_buf->size = image->data_size;
+ img_buf->size = img->data_size;
img_buf->num_elements = 1;
- img_buf->derived_surface.fence = surf->fence;
pipe_resource_reference(&img_buf->derived_surface.resource, surfaces[0]->texture);
diff --git a/src/gallium/state_trackers/va/picture.c b/src/gallium/state_trackers/va/picture.c
index 8623139..7b30bf8 100644
--- a/src/gallium/state_trackers/va/picture.c
+++ b/src/gallium/state_trackers/va/picture.c
@@ -92,7 +92,6 @@ vlVaGetReferenceFrame(vlVaDriver *drv, VASurfaceID surface_id,
static VAStatus
handlePictureParameterBuffer(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf)
{
- unsigned int i;
VAStatus vaStatus = VA_STATUS_SUCCESS;
switch (u_reduce_video_profile(context->templat.profile)) {
diff --git a/src/gallium/state_trackers/va/picture_hevc.c b/src/gallium/state_trackers/va/picture_hevc.c
index dc66b0f..28743ee 100644
--- a/src/gallium/state_trackers/va/picture_hevc.c
+++ b/src/gallium/state_trackers/va/picture_hevc.c
@@ -159,11 +159,6 @@ void vlVaHandlePictureParameterBufferHEVC(vlVaDriver *drv, vlVaContext *context,
for (i = 0 ; i < 15 ; i++) {
context->desc.h265.PicOrderCntVal[i] = hevc->ReferenceFrames[i].pic_order_cnt;
- unsigned int index = hevc->ReferenceFrames[i].picture_id & 0x7F;
-
- if (index == 0x7F)
- continue;
-
vlVaGetReferenceFrame(drv, hevc->ReferenceFrames[i].picture_id, &context->desc.h265.ref[i]);
if ((hevc->ReferenceFrames[i].flags & VA_PICTURE_HEVC_RPS_ST_CURR_BEFORE) && (iBefore < 8)) {
diff --git a/src/gallium/state_trackers/va/postproc.c b/src/gallium/state_trackers/va/postproc.c
index 1fdb4e7..15053a9 100644
--- a/src/gallium/state_trackers/va/postproc.c
+++ b/src/gallium/state_trackers/va/postproc.c
@@ -25,25 +25,35 @@
*
**************************************************************************/
-//#include "pipe/p_video_codec.h"
-
#include "util/u_handle_table.h"
-//#include "util/u_video.h"
-
-//#include "vl/vl_vlc.h"
-//#include "vl/vl_winsys.h"
#include "va_private.h"
+static const VARectangle *
+vlVaRegionDefault(const VARectangle *region, struct pipe_video_buffer *buf,
+ VARectangle *def)
+{
+ if (region)
+ return region;
+
+ def->x = 0;
+ def->y = 0;
+ def->width = buf->width;
+ def->height = buf->height;
+
+ return def;
+}
+
VAStatus
vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *context, vlVaBuffer *buf)
{
+ VARectangle def_src_region, def_dst_region;
+ const VARectangle *src_region, *dst_region;
struct u_rect src_rect;
struct u_rect dst_rect;
vlVaSurface *src_surface;
VAProcPipelineParameterBuffer *pipeline_param;
struct pipe_surface **surfaces;
- struct pipe_screen *screen;
struct pipe_surface *psurf;
if (!drv || !context)
@@ -66,27 +76,28 @@ vlVaHandleVAProcPipelineParameterBufferType(vlVaDriver *drv, vlVaContext *contex
if (!surfaces || !surfaces[0])
return VA_STATUS_ERROR_INVALID_SURFACE;
- screen = drv->pipe->screen;
-
psurf = surfaces[0];
- src_rect.x0 = pipeline_param->surface_region->x;
- src_rect.y0 = pipeline_param->surface_region->y;
- src_rect.x1 = pipeline_param->surface_region->x + pipeline_param->surface_region->width;
- src_rect.y1 = pipeline_param->surface_region->y + pipeline_param->surface_region->height;
+ src_region = vlVaRegionDefault(pipeline_param->surface_region, src_surface->buffer, &def_src_region);
+ dst_region = vlVaRegionDefault(pipeline_param->output_region, context->target, &def_dst_region);
+
+ src_rect.x0 = src_region->x;
+ src_rect.y0 = src_region->y;
+ src_rect.x1 = src_region->x + src_region->width;
+ src_rect.y1 = src_region->y + src_region->height;
- dst_rect.x0 = pipeline_param->output_region->x;
- dst_rect.y0 = pipeline_param->output_region->y;
- dst_rect.x1 = pipeline_param->output_region->x + pipeline_param->output_region->width;
- dst_rect.y1 = pipeline_param->output_region->y + pipeline_param->output_region->height;
+ dst_rect.x0 = dst_region->x;
+ dst_rect.y0 = dst_region->y;
+ dst_rect.x1 = dst_region->x + dst_region->width;
+ dst_rect.y1 = dst_region->y + dst_region->height;
vl_compositor_clear_layers(&drv->cstate);
vl_compositor_set_buffer_layer(&drv->cstate, &drv->compositor, 0, src_surface->buffer, &src_rect, NULL, VL_COMPOSITOR_WEAVE);
vl_compositor_set_layer_dst_area(&drv->cstate, 0, &dst_rect);
vl_compositor_render(&drv->cstate, &drv->compositor, psurf, NULL, false);
- screen->fence_reference(screen, &src_surface->fence, NULL);
- drv->pipe->flush(drv->pipe, &src_surface->fence, 0);
+ // TODO: figure out why this is necessary for DMA-buf sharing
+ drv->pipe->flush(drv->pipe, NULL, 0);
return VA_STATUS_SUCCESS;
}
diff --git a/src/gallium/state_trackers/va/surface.c b/src/gallium/state_trackers/va/surface.c
index c052c8f..5ddaf04 100644
--- a/src/gallium/state_trackers/va/surface.c
+++ b/src/gallium/state_trackers/va/surface.c
@@ -72,8 +72,6 @@ vlVaDestroySurfaces(VADriverContextP ctx, VASurfaceID *surface_list, int num_sur
vlVaSurface *surf = handle_table_get(drv->htab, surface_list[i]);
if (surf->buffer)
surf->buffer->destroy(surf->buffer);
- if(surf->fence)
- drv->pipe->screen->fence_reference(drv->pipe->screen, &surf->fence, NULL);
util_dynarray_fini(&surf->subpics);
FREE(surf);
handle_table_remove(drv->htab, surface_list[i]);
@@ -245,11 +243,6 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s
screen = drv->pipe->screen;
vscreen = drv->vscreen;
- if(surf->fence) {
- screen->fence_finish(screen, surf->fence, PIPE_TIMEOUT_INFINITE);
- screen->fence_reference(screen, &surf->fence, NULL);
- }
-
tex = vscreen->texture_from_drawable(vscreen, draw);
if (!tex)
return VA_STATUS_ERROR_INVALID_DISPLAY;
@@ -281,8 +274,7 @@ vlVaPutSurface(VADriverContextP ctx, VASurfaceID surface_id, void* draw, short s
screen->flush_frontbuffer(screen, tex, 0, 0,
vscreen->get_private(vscreen), NULL);
- screen->fence_reference(screen, &surf->fence, NULL);
- drv->pipe->flush(drv->pipe, &surf->fence, 0);
+ drv->pipe->flush(drv->pipe, NULL, 0);
pipe_resource_reference(&tex, NULL);
pipe_surface_reference(&surf_draw, NULL);
@@ -697,11 +689,11 @@ vlVaQueryVideoProcFilterCaps(VADriverContextP ctx, VAContextID context,
return VA_STATUS_SUCCESS;
}
-static VAProcColorStandardType vpp_input_color_standards[VAProcColorStandardCount] = {
+static VAProcColorStandardType vpp_input_color_standards[] = {
VAProcColorStandardBT601
};
-static VAProcColorStandardType vpp_output_color_standards[VAProcColorStandardCount] = {
+static VAProcColorStandardType vpp_output_color_standards[] = {
VAProcColorStandardBT601
};
@@ -725,9 +717,9 @@ vlVaQueryVideoProcPipelineCaps(VADriverContextP ctx, VAContextID context,
pipeline_cap->filter_flags = 0;
pipeline_cap->num_forward_references = 0;
pipeline_cap->num_backward_references = 0;
- pipeline_cap->num_input_color_standards = 1;
+ pipeline_cap->num_input_color_standards = Elements(vpp_input_color_standards);
pipeline_cap->input_color_standards = vpp_input_color_standards;
- pipeline_cap->num_output_color_standards = 1;
+ pipeline_cap->num_output_color_standards = Elements(vpp_output_color_standards);
pipeline_cap->output_color_standards = vpp_output_color_standards;
for (i = 0; i < num_filters; i++) {
diff --git a/src/gallium/state_trackers/va/va_private.h b/src/gallium/state_trackers/va/va_private.h
index 6739efc..fa6e0fb 100644
--- a/src/gallium/state_trackers/va/va_private.h
+++ b/src/gallium/state_trackers/va/va_private.h
@@ -244,7 +244,6 @@ typedef struct {
struct {
struct pipe_resource *resource;
struct pipe_transfer *transfer;
- struct pipe_fence_handle *fence;
} derived_surface;
unsigned int export_refcount;
VABufferInfo export_state;
@@ -252,7 +251,6 @@ typedef struct {
typedef struct {
struct pipe_video_buffer templat, *buffer;
- struct pipe_fence_handle *fence;
struct util_dynarray subpics; /* vlVaSubpicture */
} vlVaSurface;
diff --git a/src/gallium/targets/opencl/Makefile.am b/src/gallium/targets/opencl/Makefile.am
index 08f95e8..f3ba1e3 100644
--- a/src/gallium/targets/opencl/Makefile.am
+++ b/src/gallium/targets/opencl/Makefile.am
@@ -2,6 +2,9 @@ include $(top_srcdir)/src/gallium/Automake.inc
lib_LTLIBRARIES = lib@OPENCL_LIBNAME@.la
+AM_CPPFLAGS = \
+ $(LIBELF_CFLAGS)
+
lib@OPENCL_LIBNAME@_la_LDFLAGS = \
$(LLVM_LDFLAGS) \
-no-undefined \
@@ -19,7 +22,7 @@ lib@OPENCL_LIBNAME@_la_LIBADD = \
$(top_builddir)/src/gallium/state_trackers/clover/libclover.la \
$(top_builddir)/src/gallium/auxiliary/libgallium.la \
$(top_builddir)/src/util/libmesautil.la \
- $(ELF_LIB) \
+ $(LIBELF_LIBS) \
$(DLOPEN_LIBS) \
-lclangCodeGen \
-lclangFrontendTool \
diff --git a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
index c6603e3..c44424f 100644
--- a/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
+++ b/src/gallium/winsys/nouveau/drm/nouveau_drm_winsys.c
@@ -13,6 +13,9 @@
#include "nouveau/nouveau_winsys.h"
#include "nouveau/nouveau_screen.h"
+#include <nvif/class.h>
+#include <nvif/cl0080.h>
+
static struct util_hash_table *fd_tab = NULL;
pipe_static_mutex(nouveau_screen_mutex);
@@ -27,7 +30,7 @@ bool nouveau_drm_screen_unref(struct nouveau_screen *screen)
ret = --screen->refcount;
assert(ret >= 0);
if (ret == 0)
- util_hash_table_remove(fd_tab, intptr_to_pointer(screen->device->fd));
+ util_hash_table_remove(fd_tab, intptr_to_pointer(screen->drm->fd));
pipe_mutex_unlock(nouveau_screen_mutex);
return ret == 0;
}
@@ -57,16 +60,19 @@ static int compare_fd(void *key1, void *key2)
PUBLIC struct pipe_screen *
nouveau_drm_screen_create(int fd)
{
+ struct nouveau_drm *drm = NULL;
struct nouveau_device *dev = NULL;
- struct pipe_screen *(*init)(struct nouveau_device *);
- struct nouveau_screen *screen;
- int ret, dupfd = -1;
+ struct nouveau_screen *(*init)(struct nouveau_device *);
+ struct nouveau_screen *screen = NULL;
+ int ret, dupfd;
pipe_mutex_lock(nouveau_screen_mutex);
if (!fd_tab) {
fd_tab = util_hash_table_create(hash_fd, compare_fd);
- if (!fd_tab)
- goto err;
+ if (!fd_tab) {
+ pipe_mutex_unlock(nouveau_screen_mutex);
+ return NULL;
+ }
}
screen = util_hash_table_get(fd_tab, intptr_to_pointer(fd));
@@ -86,7 +92,15 @@ nouveau_drm_screen_create(int fd)
* creation error.
*/
dupfd = dup(fd);
- ret = nouveau_device_wrap(dupfd, 1, &dev);
+
+ ret = nouveau_drm_new(dupfd, &drm);
+ if (ret)
+ goto err;
+
+ ret = nouveau_device_new(&drm->client, NV_DEVICE,
+ &(struct nv_device_v0) {
+ .device = ~0ULL,
+ }, sizeof(struct nv_device_v0), &dev);
if (ret)
goto err;
@@ -116,8 +130,8 @@ nouveau_drm_screen_create(int fd)
goto err;
}
- screen = (struct nouveau_screen*)init(dev);
- if (!screen)
+ screen = init(dev);
+ if (!screen || !screen->base.context_create)
goto err;
/* Use dupfd in hash table, to avoid errors if the original fd gets
@@ -130,10 +144,13 @@ nouveau_drm_screen_create(int fd)
return &screen->base;
err:
- if (dev)
+ if (screen) {
+ screen->base.destroy(&screen->base);
+ } else {
nouveau_device_del(&dev);
- else if (dupfd >= 0)
+ nouveau_drm_del(&drm);
close(dupfd);
+ }
pipe_mutex_unlock(nouveau_screen_mutex);
return NULL;
}