summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/nvc0
diff options
context:
space:
mode:
Diffstat (limited to 'src/gallium/drivers/nouveau/nvc0')
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h380
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h1350
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h98
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.c271
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.h10
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h410
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c402
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h357
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_draw.c88
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_formats.c25
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h236
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h138
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c358
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c811
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.h68
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_push.c409
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c1448
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_resource.c62
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_resource.h58
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c967
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h325
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c278
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c1247
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c577
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h77
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c1265
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c814
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c558
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c891
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c649
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video.c331
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video.h48
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c155
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c143
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c202
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h144
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c652
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.h131
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h429
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h107
40 files changed, 16969 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h
new file mode 100644
index 0000000..9a488c1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h
@@ -0,0 +1,380 @@
+#ifndef NVC0_2D_XML
+#define NVC0_2D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_2D_DST_FORMAT 0x00000200
+
+#define NVC0_2D_DST_LINEAR 0x00000204
+
+#define NVC0_2D_DST_TILE_MODE 0x00000208
+
+#define NVC0_2D_DST_DEPTH 0x0000020c
+
+#define NVC0_2D_DST_LAYER 0x00000210
+
+#define NVC0_2D_DST_PITCH 0x00000214
+
+#define NVC0_2D_DST_WIDTH 0x00000218
+
+#define NVC0_2D_DST_HEIGHT 0x0000021c
+
+#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220
+
+#define NVC0_2D_DST_ADDRESS_LOW 0x00000224
+
+#define NVC0_2D_UNK228 0x00000228
+
+#define NVC0_2D_SRC_FORMAT 0x00000230
+
+#define NVC0_2D_SRC_LINEAR 0x00000234
+
+#define NVC0_2D_SRC_TILE_MODE 0x00000238
+
+#define NVC0_2D_SRC_DEPTH 0x0000023c
+
+#define NVC0_2D_SRC_LAYER 0x00000240
+
+#define NVC0_2D_SRC_PITCH 0x00000244
+#define NVC0_2D_SRC_PITCH__MAX 0x00040000
+
+#define NVC0_2D_SRC_WIDTH 0x00000248
+#define NVC0_2D_SRC_WIDTH__MAX 0x00010000
+
+#define NVC0_2D_SRC_HEIGHT 0x0000024c
+#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000
+
+#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250
+
+#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254
+
+#define NVC0_2D_UNK258 0x00000258
+
+#define NVC0_2D_SINGLE_GPC 0x00000260
+
+#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264
+
+#define NVC0_2D_COND_ADDRESS_LOW 0x00000268
+
+#define NVC0_2D_COND_MODE 0x0000026c
+#define NVC0_2D_COND_MODE_NEVER 0x00000000
+#define NVC0_2D_COND_MODE_ALWAYS 0x00000001
+#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_2D_COND_MODE_EQUAL 0x00000003
+#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_2D_CLIP_X 0x00000280
+
+#define NVC0_2D_CLIP_Y 0x00000284
+
+#define NVC0_2D_CLIP_W 0x00000288
+
+#define NVC0_2D_CLIP_H 0x0000028c
+
+#define NVC0_2D_CLIP_ENABLE 0x00000290
+
+#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294
+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000
+#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001
+#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002
+#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003
+#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004
+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005
+#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006
+
+#define NVC0_2D_COLOR_KEY 0x00000298
+
+#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c
+
+#define NVC0_2D_ROP 0x000002a0
+
+#define NVC0_2D_BETA1 0x000002a4
+
+#define NVC0_2D_BETA4 0x000002a8
+
+#define NVC0_2D_OPERATION 0x000002ac
+#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000
+#define NVC0_2D_OPERATION_ROP_AND 0x00000001
+#define NVC0_2D_OPERATION_BLEND 0x00000002
+#define NVC0_2D_OPERATION_SRCCOPY 0x00000003
+#define NVC0_2D_OPERATION_ROP 0x00000004
+#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005
+#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006
+
+#define NVC0_2D_UNK2B0 0x000002b0
+#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f
+#define NVC0_2D_UNK2B0_UNK0__SHIFT 0
+#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00
+#define NVC0_2D_UNK2B0_UNK1__SHIFT 8
+
+#define NVC0_2D_PATTERN_SELECT 0x000002b4
+#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000
+#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001
+#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002
+#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003
+
+#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8
+#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000
+#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001
+#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002
+#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003
+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004
+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005
+
+#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec
+#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000
+#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001
+
+#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0))
+#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002
+
+#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0))
+#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002
+
+#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0))
+#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040
+#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff
+#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0
+#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00
+#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8
+#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000
+#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16
+
+#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0))
+#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020
+#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f
+#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0
+#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0
+#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5
+#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800
+#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11
+#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000
+#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16
+#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000
+#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21
+#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000
+#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27
+
+#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0))
+#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020
+#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f
+#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0
+#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0
+#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5
+#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00
+#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10
+#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000
+#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16
+#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000
+#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21
+#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000
+#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26
+
+#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0))
+#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_Y8__LEN 0x00000010
+#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff
+#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0
+#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00
+#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8
+#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000
+#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16
+#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000
+#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24
+
+#define NVC0_2D_DRAW_SHAPE 0x00000580
+#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000
+#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001
+#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002
+#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003
+#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004
+
+#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584
+
+#define NVC0_2D_DRAW_COLOR 0x00000588
+
+#define NVC0_2D_UNK58C 0x0000058c
+#define NVC0_2D_UNK58C_0 0x00000001
+#define NVC0_2D_UNK58C_1 0x00000010
+#define NVC0_2D_UNK58C_2 0x00000100
+#define NVC0_2D_UNK58C_3 0x00001000
+
+#define NVC0_2D_DRAW_POINT16 0x000005e0
+#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff
+#define NVC0_2D_DRAW_POINT16_X__SHIFT 0
+#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000
+#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16
+
+#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0))
+#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008
+#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040
+
+#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0))
+#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008
+#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040
+
+#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800
+
+#define NVC0_2D_SIFC_FORMAT 0x00000804
+
+#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002
+
+#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c
+
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002
+
+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814
+
+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818
+
+#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c
+
+#define NVC0_2D_SIFC_WIDTH 0x00000838
+
+#define NVC0_2D_SIFC_HEIGHT 0x0000083c
+
+#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840
+
+#define NVC0_2D_SIFC_DX_DU_INT 0x00000844
+
+#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848
+
+#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c
+
+#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850
+
+#define NVC0_2D_SIFC_DST_X_INT 0x00000854
+
+#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858
+
+#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c
+
+#define NVC0_2D_SIFC_DATA 0x00000860
+
+#define NVC0_2D_UNK0870 0x00000870
+
+#define NVC0_2D_UNK0880 0x00000880
+
+#define NVC0_2D_UNK0884 0x00000884
+
+#define NVC0_2D_UNK0888 0x00000888
+
+#define NVC0_2D_BLIT_CONTROL 0x0000088c
+#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001
+#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0
+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000
+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001
+#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010
+#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4
+#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000
+#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010
+
+#define NVC0_2D_BLIT_DST_X 0x000008b0
+
+#define NVC0_2D_BLIT_DST_Y 0x000008b4
+
+#define NVC0_2D_BLIT_DST_W 0x000008b8
+
+#define NVC0_2D_BLIT_DST_H 0x000008bc
+
+#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0
+
+#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4
+
+#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8
+
+#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc
+
+#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0
+
+#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4
+
+#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8
+
+#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc
+
+
+#endif /* NVC0_2D_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
new file mode 100644
index 0000000..d3f719d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
@@ -0,0 +1,1350 @@
+#ifndef NVC0_3D_XML
+#define NVC0_3D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07)
+- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40)
+- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20)
+- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28)
+- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17)
+
+Copyright (C) 2006-2011 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104
+#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108
+#define NVC0_3D_NOTIFY 0x0000010c
+
+#define NVC0_3D_SERIALIZE 0x00000110
+
+#define NVC0_3D_LINE_WIDTH_SEPARATE 0x0000020c
+
+#define NVC0_3D_FORCE_EARLY_FRAGMENT_TESTS 0x00000210
+
+#define NVC0_3D_MEM_BARRIER 0x0000021c
+#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001
+#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002
+#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004
+#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010
+#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100
+#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000
+
+#define NVC0_3D_CACHE_SPLIT 0x00000308
+#define NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001
+#define NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1 0x00000002
+#define NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003
+
+#define NVC0_3D_TESS_MODE 0x00000320
+#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f
+#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0
+#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000
+#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001
+#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002
+#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0
+#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4
+#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000
+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010
+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020
+#define NVC0_3D_TESS_MODE_CW 0x00000100
+#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200
+
+#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0))
+#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004
+#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004
+
+#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0))
+#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004
+#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002
+
+#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c
+
+#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0))
+#define NVC0_3D_TFB__ESIZE 0x00000020
+#define NVC0_3D_TFB__LEN 0x00000004
+
+#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0))
+
+#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0))
+
+#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0))
+
+#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0))
+
+#define NVC0_3D_TFB_BUFFER_OFFSET(i0) (0x00000390 + 0x20*(i0))
+
+#define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0))
+#define NVC0_3D_TFB_STREAM__ESIZE 0x00000010
+#define NVC0_3D_TFB_STREAM__LEN 0x00000004
+
+#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0))
+#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010
+#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004
+
+#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0))
+#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010
+#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004
+
+#define NVC0_3D_TFB_ENABLE 0x00000744
+
+#define NVC0_3D_SAMPLE_SHADING 0x00000754
+#define NVC0_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f
+#define NVC0_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0
+#define NVC0_3D_SAMPLE_SHADING_ENABLE 0x00000010
+
+#define NVC0_3D_LOCAL_BASE 0x0000077c
+
+#define NVC0_3D_TEMP_ADDRESS_HIGH 0x00000790
+
+#define NVC0_3D_TEMP_ADDRESS_LOW 0x00000794
+
+#define NVC0_3D_TEMP_SIZE_HIGH 0x00000798
+
+#define NVC0_3D_TEMP_SIZE_LOW 0x0000079c
+
+#define NVC0_3D_WARP_TEMP_ALLOC 0x000007a0
+
+#define NVC0_3D_ZCULL_WIDTH 0x000007c0
+
+#define NVC0_3D_ZCULL_HEIGHT 0x000007c4
+
+#define NVC0_3D_ZCULL_ADDRESS_HIGH 0x000007e8
+
+#define NVC0_3D_ZCULL_ADDRESS_LOW 0x000007ec
+
+#define NVC0_3D_ZCULL_LIMIT_HIGH 0x000007f0
+
+#define NVC0_3D_ZCULL_LIMIT_LOW 0x000007f4
+
+#define NVC0_3D_RT(i0) (0x00000800 + 0x40*(i0))
+#define NVC0_3D_RT__ESIZE 0x00000040
+#define NVC0_3D_RT__LEN 0x00000008
+
+#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0))
+
+#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0))
+
+#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0))
+
+#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0))
+
+#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0))
+
+#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0))
+#define NVC0_3D_RT_TILE_MODE_X 0x00000001
+#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070
+#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4
+#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700
+#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8
+#define NVC0_3D_RT_TILE_MODE_LINEAR 0x00001000
+#define NVC0_3D_RT_TILE_MODE_UNK16 0x00010000
+
+#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0))
+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
+#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000
+
+#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0))
+
+#define NVC0_3D_RT_BASE_LAYER(i0) (0x00000820 + 0x40*(i0))
+
+#define NVC0_3D_RT_UNK14(i0) (0x00000824 + 0x40*(i0))
+
+#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0))
+#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010
+#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010
+#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0
+#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16
+
+#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0))
+#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010
+#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010
+#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0
+#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000
+#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16
+
+#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0))
+#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010
+#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010
+
+#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0))
+#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
+#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010
+
+#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0))
+#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008
+#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008
+#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff
+#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0
+#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000
+#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16
+
+#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0))
+#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008
+#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008
+#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff
+#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0
+#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000
+#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16
+
+#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
+#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
+#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004
+#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0
+#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16
+
+#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0))
+#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008
+#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004
+#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0
+#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
+#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16
+
+#define NVC0_3D_CALL_LIMIT_LOG 0x00000d64
+
+#define NVC0_3D_COUNTER_ENABLE 0x00000d68
+#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001
+#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002
+#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004
+#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008
+#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010
+#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020
+#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040
+#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080
+#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100
+#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200
+#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400
+#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800
+#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000
+#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000
+#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000
+#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000
+
+#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74
+
+#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78
+
+#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0))
+#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004
+#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004
+
+#define NVC0_3D_CLEAR_DEPTH 0x00000d90
+
+#define NVC0_3D_CLEAR_STENCIL 0x00000da0
+
+#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4
+
+#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
+
+#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
+
+#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
+
+#define NVC0_3D_PATCH_VERTICES 0x00000dcc
+
+#define NVC0_3D_WATCHDOG_TIMER 0x00000de4
+
+#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8
+
+#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc
+
+#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010
+
+#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010
+#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff
+#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0
+#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000
+#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16
+
+#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010
+#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff
+#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0
+#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000
+#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16
+
+#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54
+
+#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58
+
+#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c
+
+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84
+
+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88
+
+#define NVC0_3D_COLOR_MASK_COMMON 0x00000f90
+
+#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0))
+#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004
+#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002
+
+#define NVC0_3D_RT_SEPARATE_FRAG_DATA 0x00000fac
+
+#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0))
+#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004
+#define NVC0_3D_MSAA_MASK__LEN 0x00000004
+
+#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc
+
+#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0
+
+#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0
+
+#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4
+
+#define NVC0_3D_ZETA_FORMAT 0x00000fe8
+
+#define NVC0_3D_ZETA_TILE_MODE 0x00000fec
+
+#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0
+
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0
+
+#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8
+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000
+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16
+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
+
+#define NVC0_3D_CLEAR_FLAGS 0x000010f8
+#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001
+#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010
+#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100
+#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000
+
+#define NVC0_3D_VERTEX_ID 0x00001118
+
+#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c
+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff
+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000
+
+#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0))
+#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004
+#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004
+
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0))
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000001f
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_10_10_10_2 0x06000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x38000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000
+
+#define NVC0_3D_RT_CONTROL 0x0000121c
+#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f
+#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0
+#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070
+#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4
+#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380
+#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7
+#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00
+#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10
+#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000
+#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13
+#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000
+#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16
+#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000
+#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19
+#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000
+#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22
+#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000
+#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25
+
+#define NVC0_3D_ZETA_HORIZ 0x00001228
+
+#define NVC0_3D_ZETA_VERT 0x0000122c
+
+#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230
+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0
+#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000
+
+#define NVC0_3D_LINKED_TSC 0x00001234
+
+#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c
+
+#define NVC0_3D_FP_RESULT_COUNT 0x00001298
+
+#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc
+
+#define NVC0_3D_D3D_FILL_MODE 0x000012d0
+#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001
+#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002
+#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003
+
+#define NVC0_3D_SHADE_MODEL 0x000012d4
+#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00
+#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01
+
+#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4
+
+#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8
+
+#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec
+
+#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0
+
+#define NVC0_3D_VB_ELEMENT_U8 0x00001304
+#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff
+#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0
+#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00
+#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8
+#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000
+#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16
+#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000
+#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24
+
+#define NVC0_3D_D3D_CULL_MODE 0x00001308
+#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001
+#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002
+#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003
+
+#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c
+#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200
+#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201
+#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202
+#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_ALPHA_TEST_REF 0x00001310
+
+#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314
+#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318
+#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001
+#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff
+
+#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0))
+#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004
+#define NVC0_3D_BLEND_COLOR__LEN 0x00000004
+
+#define NVC0_3D_TSC_FLUSH 0x00001330
+#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_3D_TIC_FLUSH 0x00001334
+#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_3D_TEX_CACHE_CTL 0x00001338
+#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NVC0_3D_BLEND_SEPARATE_ALPHA 0x0000133c
+
+#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344
+
+#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348
+
+#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350
+
+#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358
+
+#define NVC0_3D_BLEND_ENABLE_COMMON 0x0000135c
+
+#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0))
+#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004
+#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008
+
+#define NVC0_3D_STENCIL_ENABLE 0x00001380
+
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398
+
+#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c
+
+#define NVC0_3D_DRAW_TFB_BASE 0x000013a4
+
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000
+
+#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac
+#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001
+#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010
+
+#define NVC0_3D_LINE_WIDTH_SMOOTH 0x000013b0
+
+#define NVC0_3D_LINE_WIDTH_ALIASED 0x000013b4
+
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400
+
+#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c
+
+#define NVC0_3D_VB_ELEMENT_BASE 0x00001434
+
+#define NVC0_3D_VB_INSTANCE_BASE 0x00001438
+
+#define NVC0_3D_CODE_CB_FLUSH 0x00001440
+
+#define NVC0_3D_CLIPID_HEIGHT 0x00001504
+#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000
+
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16
+
+#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16
+
+#define NVC0_3D_CLIP_DISTANCE_ENABLE 0x00001510
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_0 0x00000001
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_1 0x00000002
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_2 0x00000004
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_3 0x00000008
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_4 0x00000010
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_5 0x00000020
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_6 0x00000040
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_7 0x00000080
+
+#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514
+
+#define NVC0_3D_POINT_SIZE 0x00001518
+
+#define NVC0_3D_ZCULL_STATCTRS_ENABLE 0x0000151c
+
+#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520
+
+#define NVC0_3D_COUNTER_RESET 0x00001530
+#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001
+#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002
+#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003
+#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004
+#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010
+#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011
+#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012
+#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013
+#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015
+#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016
+#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017
+#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018
+#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a
+#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b
+#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c
+#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d
+#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e
+#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f
+
+#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534
+
+#define NVC0_3D_ZETA_ENABLE 0x00001538
+
+#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c
+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001
+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010
+
+#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550
+
+#define NVC0_3D_COND_ADDRESS_LOW 0x00001554
+
+#define NVC0_3D_COND_MODE 0x00001558
+#define NVC0_3D_COND_MODE_NEVER 0x00000000
+#define NVC0_3D_COND_MODE_ALWAYS 0x00000001
+#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_3D_COND_MODE_EQUAL 0x00000003
+#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560
+#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NVC0_3D_TSC_LIMIT 0x00001564
+#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff
+
+#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c
+
+#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570
+
+#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574
+
+#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578
+
+#define NVC0_3D_TIC_LIMIT 0x0000157c
+
+#define NVC0_3D_ZCULL_REGION 0x00001590
+
+#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594
+
+#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_CSAA_ENABLE 0x000015b4
+
+#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8
+
+#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc
+
+#define NVC0_3D_LAYER 0x000015cc
+#define NVC0_3D_LAYER_IDX__MASK 0x0000ffff
+#define NVC0_3D_LAYER_IDX__SHIFT 0
+#define NVC0_3D_LAYER_USE_GP 0x00010000
+
+#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0
+#define NVC0_3D_MULTISAMPLE_MODE_MS1 0x00000000
+#define NVC0_3D_MULTISAMPLE_MODE_MS2 0x00000001
+#define NVC0_3D_MULTISAMPLE_MODE_MS4 0x00000002
+#define NVC0_3D_MULTISAMPLE_MODE_MS8 0x00000003
+#define NVC0_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004
+#define NVC0_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005
+#define NVC0_3D_MULTISAMPLE_MODE_UNK6 0x00000006
+#define NVC0_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008
+#define NVC0_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009
+#define NVC0_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a
+#define NVC0_3D_MULTISAMPLE_MODE_MS8_CS24 0x0000000b
+
+#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000
+
+#define NVC0_3D_VERTEX_END_D3D 0x000015d8
+#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001
+#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002
+
+#define NVC0_3D_EDGEFLAG 0x000015e4
+
+#define NVC0_3D_VB_ELEMENT_U32 0x000015e8
+
+#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0
+
+#define NVC0_3D_VB_ELEMENT_U16 0x000015f0
+#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff
+#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0
+#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000
+#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16
+
+#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4
+
+#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8
+
+#define NVC0_3D_ZCULL_WINDOW_OFFSET_X 0x000015fc
+
+#define NVC0_3D_ZCULL_WINDOW_OFFSET_Y 0x00001600
+
+#define NVC0_3D_POINT_COORD_REPLACE 0x00001604
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004
+#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8
+#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3
+
+#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608
+
+#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c
+
+#define NVC0_3D_VERTEX_END_GL 0x00001614
+#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001
+#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002
+
+#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e
+#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000
+#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000
+
+#define NVC0_3D_VERTEX_ID_REPLACE 0x0000161c
+#define NVC0_3D_VERTEX_ID_REPLACE_ENABLE 0x00000001
+#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE__MASK 0x00000ff0
+#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT 4
+
+#define NVC0_3D_VERTEX_DATA 0x00001640
+
+#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644
+
+#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648
+
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000
+
+#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658
+
+#define NVC0_3D_POINT_RASTER_RULES 0x0000165c
+#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000
+#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001
+
+#define NVC0_3D_TEX_MISC 0x00001664
+#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
+
+#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c
+
+#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680
+
+#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684
+
+#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688
+
+#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c
+
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0))
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020
+
+#define NVC0_3D_ZETA_BASE_LAYER 0x0000179c
+
+#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_HIGH 0x000017bc
+
+#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_LOW 0x000017c0
+
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE 0x000017c4
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE_16K 0x00000001
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE_32K 0x00000002
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE_64K 0x00000003
+
+#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0))
+#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004
+#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004
+
+#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc
+
+#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0
+
+#define NVC0_3D_UNK17BC_LIMIT 0x000017c4
+
+#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8
+
+#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc
+
+#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0
+
+#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4
+
+#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8
+
+#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc
+
+#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0
+
+#define NVC0_3D_POLYGON_OFFSET_CLAMP 0x0000187c
+
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0))
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020
+
+#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910
+
+#define NVC0_3D_CULL_FACE_ENABLE 0x00001918
+
+#define NVC0_3D_FRONT_FACE 0x0000191c
+#define NVC0_3D_FRONT_FACE_CW 0x00000900
+#define NVC0_3D_FRONT_FACE_CCW 0x00000901
+
+#define NVC0_3D_CULL_FACE 0x00001920
+#define NVC0_3D_CULL_FACE_FRONT 0x00000404
+#define NVC0_3D_CULL_FACE_BACK 0x00000405
+#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408
+
+#define NVC0_3D_LINE_LAST_PIXEL 0x00001924
+
+#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c
+
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1 0x00000001
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000
+
+#define NVC0_3D_CLIP_DISTANCE_MODE 0x00001940
+#define NVC0_3D_CLIP_DISTANCE_MODE_0__MASK 0x00000001
+#define NVC0_3D_CLIP_DISTANCE_MODE_0__SHIFT 0
+#define NVC0_3D_CLIP_DISTANCE_MODE_0_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_0_CULL 0x00000001
+#define NVC0_3D_CLIP_DISTANCE_MODE_1__MASK 0x00000010
+#define NVC0_3D_CLIP_DISTANCE_MODE_1__SHIFT 4
+#define NVC0_3D_CLIP_DISTANCE_MODE_1_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_1_CULL 0x00000010
+#define NVC0_3D_CLIP_DISTANCE_MODE_2__MASK 0x00000100
+#define NVC0_3D_CLIP_DISTANCE_MODE_2__SHIFT 8
+#define NVC0_3D_CLIP_DISTANCE_MODE_2_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_2_CULL 0x00000100
+#define NVC0_3D_CLIP_DISTANCE_MODE_3__MASK 0x00001000
+#define NVC0_3D_CLIP_DISTANCE_MODE_3__SHIFT 12
+#define NVC0_3D_CLIP_DISTANCE_MODE_3_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_3_CULL 0x00001000
+#define NVC0_3D_CLIP_DISTANCE_MODE_4__MASK 0x00010000
+#define NVC0_3D_CLIP_DISTANCE_MODE_4__SHIFT 16
+#define NVC0_3D_CLIP_DISTANCE_MODE_4_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_4_CULL 0x00010000
+#define NVC0_3D_CLIP_DISTANCE_MODE_5__MASK 0x00100000
+#define NVC0_3D_CLIP_DISTANCE_MODE_5__SHIFT 20
+#define NVC0_3D_CLIP_DISTANCE_MODE_5_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_5_CULL 0x00100000
+#define NVC0_3D_CLIP_DISTANCE_MODE_6__MASK 0x01000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_6__SHIFT 24
+#define NVC0_3D_CLIP_DISTANCE_MODE_6_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_6_CULL 0x01000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_7__MASK 0x10000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_7__SHIFT 28
+#define NVC0_3D_CLIP_DISTANCE_MODE_7_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_7_CULL 0x10000000
+
+#define NVC0_3D_CLIP_RECTS_EN 0x0000194c
+
+#define NVC0_3D_CLIP_RECTS_MODE 0x00001950
+#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000
+#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001
+#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002
+
+#define NVC0_3D_ZCULL_INVALIDATE 0x00001958
+
+#define NVC0_3D_ZCULL_TEST_MASK 0x0000196c
+#define NVC0_3D_ZCULL_TEST_MASK_FAIL_GT_PASS_LT 0x00000001
+#define NVC0_3D_ZCULL_TEST_MASK_PASS_GT_FAIL_LT 0x00000010
+
+#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c
+#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001
+#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010
+
+#define NVC0_3D_CLIPID_ENABLE 0x0000197c
+
+#define NVC0_3D_CLIPID_WIDTH 0x00001980
+#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000
+#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040
+
+#define NVC0_3D_CLIPID_ID 0x00001984
+
+#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc
+
+#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4
+
+#define NVC0_3D_LOGIC_OP 0x000019c8
+#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500
+#define NVC0_3D_LOGIC_OP_AND 0x00001501
+#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502
+#define NVC0_3D_LOGIC_OP_COPY 0x00001503
+#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504
+#define NVC0_3D_LOGIC_OP_NOOP 0x00001505
+#define NVC0_3D_LOGIC_OP_XOR 0x00001506
+#define NVC0_3D_LOGIC_OP_OR 0x00001507
+#define NVC0_3D_LOGIC_OP_NOR 0x00001508
+#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509
+#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a
+#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NVC0_3D_LOGIC_OP_NAND 0x0000150e
+#define NVC0_3D_LOGIC_OP_SET 0x0000150f
+
+#define NVC0_3D_ZETA_COMP_ENABLE 0x000019cc
+
+#define NVC0_3D_CLEAR_BUFFERS 0x000019d0
+#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001
+#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002
+#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004
+#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008
+#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010
+#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020
+#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0
+#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6
+#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
+#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
+
+#define NVC0_3D_CLIPID_FILL 0x000019d4
+
+#define NVC0_3D_RT_COMP_ENABLE(i0) (0x000019e0 + 0x4*(i0))
+#define NVC0_3D_RT_COMP_ENABLE__ESIZE 0x00000004
+#define NVC0_3D_RT_COMP_ENABLE__LEN 0x00000008
+
+#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
+#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004
+#define NVC0_3D_COLOR_MASK__LEN 0x00000008
+#define NVC0_3D_COLOR_MASK_R 0x0000000f
+#define NVC0_3D_COLOR_MASK_G 0x000000f0
+#define NVC0_3D_COLOR_MASK_B 0x00000f00
+#define NVC0_3D_COLOR_MASK_A 0x0000f000
+
+#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NVC0_3D_QUERY_SEQUENCE 0x00001b08
+
+#define NVC0_3D_QUERY_GET 0x00001b0c
+#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003
+#define NVC0_3D_QUERY_GET_MODE__SHIFT 0
+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000
+#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001
+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002
+#define NVC0_3D_QUERY_GET_FENCE 0x00000010
+#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0
+#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5
+#define NVC0_3D_QUERY_GET_UNK8 0x00000100
+#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000
+#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12
+#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000
+#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16
+#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000
+#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000
+#define NVC0_3D_QUERY_GET_INTR 0x00100000
+#define NVC0_3D_QUERY_GET_UNK21 0x00200000
+#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000
+#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23
+#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000
+#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000
+#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000
+#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000
+#define NVC0_3D_QUERY_GET_SHORT 0x10000000
+
+#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020
+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff
+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0
+#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000
+
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020
+
+#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0))
+#define NVC0_3D_IBLEND__ESIZE 0x00000020
+#define NVC0_3D_IBLEND__LEN 0x00000008
+
+#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0))
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007
+#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0))
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0))
+
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0))
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0))
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020
+
+#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0))
+#define NVC0_3D_SP__ESIZE 0x00000040
+#define NVC0_3D_SP__LEN 0x00000006
+
+#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0))
+#define NVC0_3D_SP_SELECT_ENABLE 0x00000001
+#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070
+#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4
+#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000
+#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010
+#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020
+#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030
+#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040
+#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050
+
+#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0))
+
+#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0))
+
+#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0))
+#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010
+#define NVC0_3D_TEX_LIMITS__LEN 0x00000005
+
+#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0))
+#define NVC0_3D_FIRMWARE__ESIZE 0x00000004
+#define NVC0_3D_FIRMWARE__LEN 0x00000020
+
+#define NVC0_3D_CB_SIZE 0x00002380
+
+#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384
+
+#define NVC0_3D_CB_ADDRESS_LOW 0x00002388
+
+#define NVC0_3D_CB_POS 0x0000238c
+
+#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0))
+#define NVC0_3D_CB_DATA__ESIZE 0x00000004
+#define NVC0_3D_CB_DATA__LEN 0x00000010
+
+#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0))
+#define NVC0_3D_BIND_TSC__ESIZE 0x00000020
+#define NVC0_3D_BIND_TSC__LEN 0x00000005
+#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001
+#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0
+#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4
+#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000
+#define NVC0_3D_BIND_TSC_TSC__SHIFT 12
+
+#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0))
+#define NVC0_3D_BIND_TIC__ESIZE 0x00000020
+#define NVC0_3D_BIND_TIC__LEN 0x00000005
+#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001
+#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1
+#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NVC0_3D_BIND_TIC_TIC__SHIFT 9
+
+#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0))
+#define NVC0_3D_CB_BIND__ESIZE 0x00000020
+#define NVC0_3D_CB_BIND__LEN 0x00000005
+#define NVC0_3D_CB_BIND_VALID 0x00000001
+#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0
+#define NVC0_3D_CB_BIND_INDEX__SHIFT 4
+
+#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600
+
+#define NVE4_3D_TEX_CB_INDEX 0x00002608
+#define NVE4_3D_TEX_CB_INDEX__MIN 0x00000000
+#define NVE4_3D_TEX_CB_INDEX__MAX 0x00000010
+
+#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1))
+#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
+#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020
+
+#define NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE 0x00003800
+
+#define NVC0_3D_MACRO_VERTEX_ARRAY_SELECT 0x00003808
+
+#define NVC0_3D_MACRO_BLEND_ENABLES 0x00003810
+
+#define NVC0_3D_MACRO_POLYGON_MODE_FRONT 0x00003818
+#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_FILL 0x00001b02
+
+#define NVC0_3D_MACRO_POLYGON_MODE_BACK 0x00003820
+#define NVC0_3D_MACRO_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NVC0_3D_MACRO_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NVC0_3D_MACRO_POLYGON_MODE_BACK_FILL 0x00001b02
+
+#define NVC0_3D_MACRO_GP_SELECT 0x00003828
+
+#define NVC0_3D_MACRO_TEP_SELECT 0x00003830
+
+
+#endif /* NVC0_3D_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h
new file mode 100644
index 0000000..84b1522
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h
@@ -0,0 +1,98 @@
+#ifndef NV_3DDEFS_XML
+#define NV_3DDEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38)
+- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28)
+- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000
+#define NV50_3D_BLEND_FACTOR_ONE 0x00004001
+#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308
+#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903
+
+#endif /* NV_3DDEFS_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
new file mode 100644
index 0000000..b49f1ae
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2013 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christoph Bumiller, Samuel Pitoiset
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_compute.h"
+
+int
+nvc0_screen_compute_setup(struct nvc0_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_object *chan = screen->base.channel;
+ struct nouveau_device *dev = screen->base.device;
+ uint32_t obj_class;
+ int ret;
+ int i;
+
+ switch (dev->chipset & 0xf0) {
+ case 0xc0:
+ if (dev->chipset == 0xc8)
+ obj_class = NVC8_COMPUTE_CLASS;
+ else
+ obj_class = NVC0_COMPUTE_CLASS;
+ break;
+ case 0xd0:
+ obj_class = NVC0_COMPUTE_CLASS;
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ return -1;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
+ return ret;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL,
+ &screen->parm);
+ if (ret)
+ return ret;
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->oclass);
+
+ /* hardware limit */
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
+ PUSH_DATA (push, screen->mp_count);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
+ PUSH_DATA (push, 0xf);
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
+ PUSH_DATA (push, 0x8000);
+
+ /* global memory setup */
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
+ for (i = 0; i <= 0xff; i++)
+ PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ PUSH_DATA (push, 1);
+
+ /* local memory and cstack setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->size);
+ PUSH_DATA (push, screen->tls->size);
+ BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
+ PUSH_DATA (push, 1 << 24);
+
+ /* shared memory setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
+ PUSH_DATA (push, 2 << 24);
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
+ PUSH_DATA (push, 0);
+
+ /* code segment setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+
+ /* bind parameters buffer */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, screen->parm->size);
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+
+ /* TODO: textures & samplers */
+
+ return 0;
+}
+
+boolean
+nvc0_compute_validate_program(struct nvc0_context *nvc0)
+{
+ struct nvc0_program *prog = nvc0->compprog;
+
+ if (prog->mem)
+ return TRUE;
+
+ if (!prog->translated) {
+ prog->translated = nvc0_program_translate(
+ prog, nvc0->screen->base.device->chipset);
+ if (!prog->translated)
+ return FALSE;
+ }
+ if (unlikely(!prog->code_size))
+ return FALSE;
+
+ if (likely(prog->code_size)) {
+ if (nvc0_program_upload_code(nvc0, prog)) {
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static boolean
+nvc0_compute_state_validate(struct nvc0_context *nvc0)
+{
+ if (!nvc0_compute_validate_program(nvc0))
+ return FALSE;
+
+ /* TODO: textures, samplers, surfaces, global memory buffers */
+
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
+ return FALSE;
+ if (unlikely(nvc0->state.flushed))
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+
+ return TRUE;
+
+}
+
+static void
+nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nvc0_program *cp = nvc0->compprog;
+
+ if (cp->parm_size) {
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, align(cp->parm_size, 0x100));
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+ /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
+ BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATAp(push, input, cp->parm_size / 4);
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
+ }
+}
+
+void
+nvc0_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label,
+ const void *input)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *cp = nvc0->compprog;
+ unsigned s, i;
+ int ret;
+
+ ret = !nvc0_compute_state_validate(nvc0);
+ if (ret)
+ goto out;
+
+ nvc0_compute_upload_input(nvc0, input);
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
+ PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
+ PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
+ PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
+ PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
+ PUSH_DATA (push, cp->num_barriers);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
+ PUSH_DATA (push, cp->num_gprs);
+
+ /* grid/block setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
+ PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
+ PUSH_DATA (push, grid_layout[2]);
+ BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
+ PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
+ PUSH_DATA (push, block_layout[2]);
+
+ /* launch preliminary setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
+ PUSH_DATA (push, 0x1);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
+
+ /* kernel launching */
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0x1000);
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
+ PUSH_DATA (push, 0x1);
+
+ /* rebind all the 3D constant buffers
+ * (looks like binding a CB on COMPUTE clobbers 3D state) */
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ for (s = 0; s < 6; s++) {
+ for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
+ if (nvc0->constbuf[s][i].u.buf)
+ nvc0->constbuf_dirty[s] |= 1 << i;
+ }
+ memset(nvc0->state.uniform_buffer_bound, 0,
+ sizeof(nvc0->state.uniform_buffer_bound));
+
+out:
+ if (ret)
+ NOUVEAU_ERR("Failed to launch grid !\n");
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
new file mode 100644
index 0000000..9a1a717
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
@@ -0,0 +1,10 @@
+#ifndef NVC0_COMPUTE_H
+#define NVC0_COMPUTE_H
+
+#include "nv50/nv50_defs.xml.h"
+#include "nvc0/nvc0_compute.xml.h"
+
+boolean
+nvc0_compute_validate_program(struct nvc0_context *nvc0);
+
+#endif /* NVC0_COMPUTE_H */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h
new file mode 100644
index 0000000..35e6bfd
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h
@@ -0,0 +1,410 @@
+#ifndef NVC0_COMPUTE_XML
+#define NVC0_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_compute.xml ( 11145 bytes, from 2013-04-27 14:00:13)
+- copyright.xml ( 6452 bytes, from 2013-02-27 22:13:22)
+- nvchipsets.xml ( 3954 bytes, from 2013-04-27 14:00:13)
+- nv_object.xml ( 14395 bytes, from 2013-04-27 14:00:13)
+- nv_defs.xml ( 4437 bytes, from 2013-02-27 22:13:22)
+- nv50_defs.xml ( 16652 bytes, from 2013-06-20 13:45:33)
+
+Copyright (C) 2006-2013 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_COMPUTE_LOCAL_POS_ALLOC 0x00000204
+
+#define NVC0_COMPUTE_LOCAL_NEG_ALLOC 0x00000208
+
+#define NVC0_COMPUTE_WARP_CSTACK_SIZE 0x0000020c
+
+#define NVC0_COMPUTE_TEX_LIMITS 0x00000210
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007
+
+#define NVC0_COMPUTE_SHARED_BASE 0x00000214
+
+#define NVC0_COMPUTE_MEM_BARRIER 0x0000021c
+#define NVC0_COMPUTE_MEM_BARRIER_UNK0 0x00000001
+#define NVC0_COMPUTE_MEM_BARRIER_UNK1 0x00000002
+#define NVC0_COMPUTE_MEM_BARRIER_UNK2 0x00000004
+#define NVC0_COMPUTE_MEM_BARRIER_UNK4 0x00000010
+#define NVC0_COMPUTE_MEM_BARRIER_UNK8 0x00000100
+#define NVC0_COMPUTE_MEM_BARRIER_UNK12 0x00001000
+
+#define NVC0_COMPUTE_BIND_TSC 0x00000228
+#define NVC0_COMPUTE_BIND_TSC_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TSC_SAMPLER__MASK 0x00000ff0
+#define NVC0_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4
+#define NVC0_COMPUTE_BIND_TSC_TSC__MASK 0x01fff000
+#define NVC0_COMPUTE_BIND_TSC_TSC__SHIFT 12
+
+#define NVC0_COMPUTE_BIND_TIC 0x0000022c
+#define NVC0_COMPUTE_BIND_TIC_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NVC0_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1
+#define NVC0_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NVC0_COMPUTE_BIND_TIC_TIC__SHIFT 9
+
+#define NVC0_COMPUTE_BIND_TSC2 0x00000230
+#define NVC0_COMPUTE_BIND_TSC2_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__MASK 0x00000010
+#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__SHIFT 4
+#define NVC0_COMPUTE_BIND_TSC2_TSC__MASK 0x01fff000
+#define NVC0_COMPUTE_BIND_TSC2_TSC__SHIFT 12
+
+#define NVC0_COMPUTE_BIND_TIC2 0x00000234
+#define NVC0_COMPUTE_BIND_TIC2_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__MASK 0x00000002
+#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__SHIFT 1
+#define NVC0_COMPUTE_BIND_TIC2_TIC__MASK 0x7ffffe00
+#define NVC0_COMPUTE_BIND_TIC2_TIC__SHIFT 9
+
+#define NVC0_COMPUTE_GRIDDIM_YX 0x00000238
+#define NVC0_COMPUTE_GRIDDIM_YX_X__MASK 0x0000ffff
+#define NVC0_COMPUTE_GRIDDIM_YX_X__SHIFT 0
+#define NVC0_COMPUTE_GRIDDIM_YX_Y__MASK 0xffff0000
+#define NVC0_COMPUTE_GRIDDIM_YX_Y__SHIFT 16
+
+#define NVC0_COMPUTE_GRIDDIM_Z 0x0000023c
+
+#define NVC0_COMPUTE_UNK244_TIC_FLUSH 0x00000244
+
+#define NVC0_COMPUTE_SHARED_SIZE 0x0000024c
+
+#define NVC0_COMPUTE_THREADS_ALLOC 0x00000250
+
+#define NVC0_COMPUTE_BARRIER_ALLOC 0x00000254
+
+#define NVC0_COMPUTE_UNK028C 0x0000028c
+
+#define NVC0_COMPUTE_COMPUTE_BEGIN 0x0000029c
+#define NVC0_COMPUTE_COMPUTE_BEGIN_UNK0 0x00000001
+
+#define NVC0_COMPUTE_UNK02A0 0x000002a0
+
+#define NVC0_COMPUTE_CP_GPR_ALLOC 0x000002c0
+
+#define NVC0_COMPUTE_UNK02C4 0x000002c4
+
+#define NVC0_COMPUTE_GLOBAL_BASE 0x000002c8
+#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__MASK 0x000000ff
+#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__SHIFT 0
+#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__MASK 0x00ff0000
+#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__SHIFT 16
+#define NVC0_COMPUTE_GLOBAL_BASE_READ_OK 0x40000000
+#define NVC0_COMPUTE_GLOBAL_BASE_WRITE_OK 0x80000000
+
+#define NVC8_COMPUTE_UNK02E0 0x000002e0
+
+#define NVC0_COMPUTE_CACHE_SPLIT 0x00000308
+#define NVC0_COMPUTE_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001
+#define NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003
+
+#define NVC0_COMPUTE_UNK030C 0x0000030c
+
+#define NVC0_COMPUTE_UNK0360 0x00000360
+#define NVC0_COMPUTE_UNK0360_UNK0 0x00000001
+#define NVC0_COMPUTE_UNK0360_UNK8__MASK 0x00000300
+#define NVC0_COMPUTE_UNK0360_UNK8__SHIFT 8
+#define NVC8_COMPUTE_UNK0360_UNK10__MASK 0x00000c00
+#define NVC8_COMPUTE_UNK0360_UNK10__SHIFT 10
+
+#define NVC0_COMPUTE_LAUNCH 0x00000368
+
+#define NVC0_COMPUTE_UNK036C 0x0000036c
+#define NVC0_COMPUTE_UNK036C_UNK0__MASK 0x00000003
+#define NVC0_COMPUTE_UNK036C_UNK0__SHIFT 0
+#define NVC8_COMPUTE_UNK036C_UNK2__MASK 0x0000000c
+#define NVC8_COMPUTE_UNK036C_UNK2__SHIFT 2
+
+#define NVC0_COMPUTE_BLOCKDIM_YX 0x000003ac
+#define NVC0_COMPUTE_BLOCKDIM_YX_X__MASK 0x0000ffff
+#define NVC0_COMPUTE_BLOCKDIM_YX_X__SHIFT 0
+#define NVC0_COMPUTE_BLOCKDIM_YX_Y__MASK 0xffff0000
+#define NVC0_COMPUTE_BLOCKDIM_YX_Y__SHIFT 16
+
+#define NVC0_COMPUTE_BLOCKDIM_Z 0x000003b0
+
+#define NVC0_COMPUTE_CP_START_ID 0x000003b4
+
+#define NVC0_COMPUTE_FIRMWARE(i0) (0x00000500 + 0x4*(i0))
+#define NVC0_COMPUTE_FIRMWARE__ESIZE 0x00000004
+#define NVC0_COMPUTE_FIRMWARE__LEN 0x00000020
+
+#define NVC0_COMPUTE_MP_LIMIT 0x00000758
+
+#define NVC0_COMPUTE_LOCAL_BASE 0x0000077c
+
+#define NVC0_COMPUTE_GRIDID 0x00000780
+
+#define NVC0_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790
+
+#define NVC0_COMPUTE_TEMP_ADDRESS_LOW 0x00000794
+
+#define NVC0_COMPUTE_TEMP_SIZE_HIGH 0x00000798
+
+#define NVC0_COMPUTE_TEMP_SIZE_LOW 0x0000079c
+
+#define NVC0_COMPUTE_WARP_TEMP_ALLOC 0x000007a0
+
+#define NVC0_COMPUTE_COMPUTE_END 0x00000a04
+#define NVC0_COMPUTE_COMPUTE_END_UNK0 0x00000001
+
+#define NVC0_COMPUTE_UNK0A08 0x00000a08
+
+#define NVC0_COMPUTE_CALL_LIMIT_LOG 0x00000d64
+
+#define NVC0_COMPUTE_UNK0D94 0x00000d94
+
+#define NVC0_COMPUTE_WATCHDOG_TIMER 0x00000de4
+
+#define NVC0_COMPUTE_UNK10F4 0x000010f4
+#define NVC0_COMPUTE_UNK10F4_UNK0 0x00000001
+#define NVC0_COMPUTE_UNK10F4_UNK4 0x00000010
+#define NVC0_COMPUTE_UNK10F4_UNK8 0x00000100
+
+#define NVC0_COMPUTE_LINKED_TSC 0x00001234
+
+#define NVC0_COMPUTE_UNK1288_TIC_FLUSH 0x00001288
+
+#define NVC0_COMPUTE_UNK12AC 0x000012ac
+
+#define NVC0_COMPUTE_TSC_FLUSH 0x00001330
+#define NVC0_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_COMPUTE_TIC_FLUSH 0x00001334
+#define NVC0_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_COMPUTE_TEX_CACHE_CTL 0x00001338
+#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__MASK 0x00000007
+#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__SHIFT 0
+#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0
+#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4
+
+#define NVC0_COMPUTE_UNK1354 0x00001354
+
+#define NVC0_COMPUTE_UNK1424_TSC_FLUSH 0x00001424
+
+#define NVC0_COMPUTE_COND_ADDRESS_HIGH 0x00001550
+
+#define NVC0_COMPUTE_COND_ADDRESS_LOW 0x00001554
+
+#define NVC0_COMPUTE_COND_MODE 0x00001558
+#define NVC0_COMPUTE_COND_MODE_NEVER 0x00000000
+#define NVC0_COMPUTE_COND_MODE_ALWAYS 0x00000001
+#define NVC0_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_COMPUTE_COND_MODE_EQUAL 0x00000003
+#define NVC0_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NVC0_COMPUTE_TSC_ADDRESS_LOW 0x00001560
+
+#define NVC0_COMPUTE_TSC_LIMIT 0x00001564
+
+#define NVC0_COMPUTE_TIC_ADDRESS_HIGH 0x00001574
+
+#define NVC0_COMPUTE_TIC_ADDRESS_LOW 0x00001578
+
+#define NVC0_COMPUTE_TIC_LIMIT 0x0000157c
+
+#define NVC0_COMPUTE_CODE_ADDRESS_HIGH 0x00001608
+
+#define NVC0_COMPUTE_CODE_ADDRESS_LOW 0x0000160c
+
+#define NVC0_COMPUTE_TEX_MISC 0x00001664
+#define NVC0_COMPUTE_TEX_MISC_UNK 0x00000001
+#define NVC0_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002
+
+#define NVC0_COMPUTE_UNK1690 0x00001690
+#define NVC0_COMPUTE_UNK1690_ALWAYS_DERIV 0x00000001
+#define NVC0_COMPUTE_UNK1690_UNK16 0x00010000
+
+#define NVC0_COMPUTE_CB_BIND 0x00001694
+#define NVC0_COMPUTE_CB_BIND_VALID 0x00000001
+#define NVC0_COMPUTE_CB_BIND_INDEX__MASK 0x00001f00
+#define NVC0_COMPUTE_CB_BIND_INDEX__SHIFT 8
+
+#define NVC0_COMPUTE_FLUSH 0x00001698
+#define NVC0_COMPUTE_FLUSH_CODE 0x00000001
+#define NVC0_COMPUTE_FLUSH_GLOBAL 0x00000010
+#define NVC0_COMPUTE_FLUSH_UNK8 0x00000100
+#define NVC0_COMPUTE_FLUSH_CB 0x00001000
+
+#define NVC0_COMPUTE_UNK1930 0x00001930
+
+#define NVC0_COMPUTE_UNK1944 0x00001944
+
+#define NVC0_COMPUTE_DELAY 0x00001a24
+
+#define NVC0_COMPUTE_UNK1A2C(i0) (0x00001a2c + 0x4*(i0))
+#define NVC0_COMPUTE_UNK1A2C__ESIZE 0x00000004
+#define NVC0_COMPUTE_UNK1A2C__LEN 0x00000005
+
+#define NVC0_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NVC0_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NVC0_COMPUTE_QUERY_SEQUENCE 0x00001b08
+
+#define NVC0_COMPUTE_QUERY_GET 0x00001b0c
+#define NVC0_COMPUTE_QUERY_GET_MODE__MASK 0x00000003
+#define NVC0_COMPUTE_QUERY_GET_MODE__SHIFT 0
+#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000
+#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003
+#define NVC0_COMPUTE_QUERY_GET_INTR 0x00100000
+#define NVC0_COMPUTE_QUERY_GET_SHORT 0x10000000
+
+#define NVC0_COMPUTE_CB_SIZE 0x00002380
+
+#define NVC0_COMPUTE_CB_ADDRESS_HIGH 0x00002384
+
+#define NVC0_COMPUTE_CB_ADDRESS_LOW 0x00002388
+
+#define NVC0_COMPUTE_CB_POS 0x0000238c
+
+#define NVC0_COMPUTE_CB_DATA(i0) (0x00002390 + 0x4*(i0))
+#define NVC0_COMPUTE_CB_DATA__ESIZE 0x00000004
+#define NVC0_COMPUTE_CB_DATA__LEN 0x00000010
+
+#define NVC0_COMPUTE_IMAGE(i0) (0x00002700 + 0x20*(i0))
+#define NVC0_COMPUTE_IMAGE__ESIZE 0x00000020
+#define NVC0_COMPUTE_IMAGE__LEN 0x00000008
+
+#define NVC0_COMPUTE_IMAGE_ADDRESS_HIGH(i0) (0x00002700 + 0x20*(i0))
+
+#define NVC0_COMPUTE_IMAGE_ADDRESS_LOW(i0) (0x00002704 + 0x20*(i0))
+
+#define NVC0_COMPUTE_IMAGE_WIDTH(i0) (0x00002708 + 0x20*(i0))
+
+#define NVC0_COMPUTE_IMAGE_HEIGHT(i0) (0x0000270c + 0x20*(i0))
+#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__MASK 0x0000ffff
+#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__SHIFT 0
+#define NVC0_COMPUTE_IMAGE_HEIGHT_UNK16 0x00010000
+#define NVC0_COMPUTE_IMAGE_HEIGHT_LINEAR 0x00100000
+
+#define NVC0_COMPUTE_IMAGE_FORMAT(i0) (0x00002710 + 0x20*(i0))
+#define NVC0_COMPUTE_IMAGE_FORMAT_UNK0 0x00000001
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__MASK 0x00000ff0
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__SHIFT 4
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__MASK 0x0001f000
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__SHIFT 12
+
+#define NVC0_COMPUTE_IMAGE_TILE_MODE(i0) (0x00002714 + 0x20*(i0))
+
+#define NVC0_COMPUTE_MP_PM_SET(i0) (0x0000335c + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_SET__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_SET__LEN 0x00000008
+
+#define NVC0_COMPUTE_MP_PM_SIGSEL(i0) (0x0000337c + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_SIGSEL__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_SIGSEL__LEN 0x00000008
+
+#define NVC0_COMPUTE_MP_PM_SRCSEL(i0) (0x0000339c + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_SRCSEL__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_SRCSEL__LEN 0x00000008
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__MASK 0x00000007
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__SHIFT 0
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__MASK 0x00000070
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__SHIFT 4
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__MASK 0x00000700
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__SHIFT 8
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__MASK 0x00007000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__SHIFT 12
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__MASK 0x00070000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__SHIFT 16
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__MASK 0x00700000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__SHIFT 20
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__MASK 0x07000000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__SHIFT 24
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__MASK 0x70000000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__SHIFT 28
+
+#define NVC0_COMPUTE_MP_PM_OP(i0) (0x000033bc + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_OP__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_OP__LEN 0x00000008
+#define NVC0_COMPUTE_MP_PM_OP_MODE__MASK 0x00000001
+#define NVC0_COMPUTE_MP_PM_OP_MODE__SHIFT 0
+#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP 0x00000000
+#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP_PULSE 0x00000001
+#define NVC0_COMPUTE_MP_PM_OP_FUNC__MASK 0x000ffff0
+#define NVC0_COMPUTE_MP_PM_OP_FUNC__SHIFT 4
+
+#define NVC0_COMPUTE_MP_PM_UNK33DC 0x000033dc
+
+
+#endif /* NVC0_COMPUTE_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
new file mode 100644
index 0000000..e0c2b74
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
+
+#ifdef NVC0_WITH_DRAW_MODULE
+#include "draw/draw_context.h"
+#endif
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_screen.h"
+#include "nvc0/nvc0_resource.h"
+
+static void
+nvc0_flush(struct pipe_context *pipe,
+ struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_screen *screen = &nvc0->screen->base;
+
+ if (fence)
+ nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
+
+ PUSH_KICK(nvc0->base.pushbuf); /* fencing handled in kick_notify */
+
+ nouveau_context_update_frame_stats(&nvc0->base);
+}
+
+static void
+nvc0_texture_barrier(struct pipe_context *pipe)
+{
+ struct nouveau_pushbuf *push = nvc0_context(pipe)->base.pushbuf;
+
+ IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
+ IMMED_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 0);
+}
+
+static void
+nvc0_context_unreference_resources(struct nvc0_context *nvc0)
+{
+ unsigned s, i;
+
+ nouveau_bufctx_del(&nvc0->bufctx_3d);
+ nouveau_bufctx_del(&nvc0->bufctx);
+ nouveau_bufctx_del(&nvc0->bufctx_cp);
+
+ util_unreference_framebuffer_state(&nvc0->framebuffer);
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
+ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
+
+ pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
+
+ for (s = 0; s < 6; ++s) {
+ for (i = 0; i < nvc0->num_textures[s]; ++i)
+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+
+ for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; ++i)
+ if (!nvc0->constbuf[s][i].user)
+ pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, NULL);
+ }
+
+ for (s = 0; s < 2; ++s) {
+ for (i = 0; i < NVC0_MAX_SURFACE_SLOTS; ++i)
+ pipe_surface_reference(&nvc0->surfaces[s][i], NULL);
+ }
+
+ for (i = 0; i < nvc0->num_tfbbufs; ++i)
+ pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
+
+ for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource **res = util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, i);
+ pipe_resource_reference(res, NULL);
+ }
+ util_dynarray_fini(&nvc0->global_residents);
+}
+
+static void
+nvc0_destroy(struct pipe_context *pipe)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ if (nvc0->screen->cur_ctx == nvc0) {
+ nvc0->base.pushbuf->kick_notify = NULL;
+ nvc0->screen->cur_ctx = NULL;
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, NULL);
+ }
+ nouveau_pushbuf_kick(nvc0->base.pushbuf, nvc0->base.pushbuf->channel);
+
+ nvc0_context_unreference_resources(nvc0);
+ nvc0_blitctx_destroy(nvc0);
+
+#ifdef NVC0_WITH_DRAW_MODULE
+ draw_destroy(nvc0->draw);
+#endif
+
+ nouveau_context_destroy(&nvc0->base);
+}
+
+void
+nvc0_default_kick_notify(struct nouveau_pushbuf *push)
+{
+ struct nvc0_screen *screen = push->user_priv;
+
+ if (screen) {
+ nouveau_fence_next(&screen->base);
+ nouveau_fence_update(&screen->base, TRUE);
+ if (screen->cur_ctx)
+ screen->cur_ctx->state.flushed = TRUE;
+ }
+ NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
+}
+
+static int
+nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
+ struct pipe_resource *res,
+ int ref)
+{
+ struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
+ unsigned s, i;
+
+ if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
+ if (nvc0->framebuffer.cbufs[i] &&
+ nvc0->framebuffer.cbufs[i]->texture == res) {
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (nvc0->framebuffer.zsbuf &&
+ nvc0->framebuffer.zsbuf->texture == res) {
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_VERTEX_BUFFER) {
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ if (nvc0->vtxbuf[i].buffer == res) {
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_INDEX_BUFFER) {
+ if (nvc0->idxbuf.buffer == res) {
+ nvc0->dirty |= NVC0_NEW_IDXBUF;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_SAMPLER_VIEW) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ if (nvc0->textures[s][i] &&
+ nvc0->textures[s][i]->texture == res) {
+ nvc0->textures_dirty[s] |= 1 << i;
+ nvc0->dirty |= NVC0_NEW_TEXTURES;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ if (res->bind & PIPE_BIND_CONSTANT_BUFFER) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ if (!nvc0->constbuf[s][i].user &&
+ nvc0->constbuf[s][i].u.buf == res) {
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ nvc0->constbuf_dirty[s] |= 1 << i;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ return ref;
+}
+
+static void
+nvc0_context_get_sample_position(struct pipe_context *, unsigned, unsigned,
+ float *);
+
+struct pipe_context *
+nvc0_create(struct pipe_screen *pscreen, void *priv)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ struct nvc0_context *nvc0;
+ struct pipe_context *pipe;
+ int ret;
+ uint32_t flags;
+
+ nvc0 = CALLOC_STRUCT(nvc0_context);
+ if (!nvc0)
+ return NULL;
+ pipe = &nvc0->base.pipe;
+
+ if (!nvc0_blitctx_create(nvc0))
+ goto out_err;
+
+ nvc0->base.pushbuf = screen->base.pushbuf;
+ nvc0->base.client = screen->base.client;
+
+ ret = nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_3D_COUNT,
+ &nvc0->bufctx_3d);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_CP_COUNT,
+ &nvc0->bufctx_cp);
+ if (ret)
+ goto out_err;
+
+ nvc0->screen = screen;
+ nvc0->base.screen = &screen->base;
+
+ pipe->screen = pscreen;
+ pipe->priv = priv;
+
+ pipe->destroy = nvc0_destroy;
+
+ pipe->draw_vbo = nvc0_draw_vbo;
+ pipe->clear = nvc0_clear;
+ pipe->launch_grid = (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) ?
+ nve4_launch_grid : nvc0_launch_grid;
+
+ pipe->flush = nvc0_flush;
+ pipe->texture_barrier = nvc0_texture_barrier;
+ pipe->get_sample_position = nvc0_context_get_sample_position;
+
+ if (!screen->cur_ctx) {
+ screen->cur_ctx = nvc0;
+ nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx);
+ }
+ screen->base.pushbuf->kick_notify = nvc0_default_kick_notify;
+
+ nvc0_init_query_functions(nvc0);
+ nvc0_init_surface_functions(nvc0);
+ nvc0_init_state_functions(nvc0);
+ nvc0_init_transfer_functions(nvc0);
+ nvc0_init_resource_functions(pipe);
+
+ nvc0->base.invalidate_resource_storage = nvc0_invalidate_resource_storage;
+
+#ifdef NVC0_WITH_DRAW_MODULE
+ /* no software fallbacks implemented */
+ nvc0->draw = draw_create(pipe);
+ assert(nvc0->draw);
+ draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0));
+#endif
+
+ pipe->create_video_codec = nvc0_create_decoder;
+ pipe->create_video_buffer = nvc0_video_buffer_create;
+
+ /* shader builtin library is per-screen, but we need a context for m2mf */
+ nvc0_program_library_upload(nvc0);
+
+ /* add permanently resident buffers to bufctxts */
+
+ flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc);
+ if (screen->compute) {
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text);
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm);
+ }
+
+ flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache);
+ if (screen->compute)
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls);
+
+ flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo);
+ BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo);
+ if (screen->compute)
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
+
+ nvc0->base.scratch.bo_size = 2 << 20;
+
+ memset(nvc0->tex_handles, ~0, sizeof(nvc0->tex_handles));
+
+ util_dynarray_init(&nvc0->global_residents);
+
+ return pipe;
+
+out_err:
+ if (nvc0) {
+ if (nvc0->bufctx_3d)
+ nouveau_bufctx_del(&nvc0->bufctx_3d);
+ if (nvc0->bufctx_cp)
+ nouveau_bufctx_del(&nvc0->bufctx_cp);
+ if (nvc0->bufctx)
+ nouveau_bufctx_del(&nvc0->bufctx);
+ if (nvc0->blit)
+ FREE(nvc0->blit);
+ FREE(nvc0);
+ }
+ return NULL;
+}
+
+void
+nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx,
+ boolean on_flush)
+{
+ struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
+ struct nouveau_list *it;
+ NOUVEAU_DRV_STAT_IFD(unsigned count = 0);
+
+ for (it = list->next; it != list; it = it->next) {
+ struct nouveau_bufref *ref = (struct nouveau_bufref *)it;
+ struct nv04_resource *res = ref->priv;
+ if (res)
+ nvc0_resource_validate(res, (unsigned)ref->priv_data);
+ NOUVEAU_DRV_STAT_IFD(count++);
+ }
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, resource_validate_count, count);
+}
+
+static void
+nvc0_context_get_sample_position(struct pipe_context *pipe,
+ unsigned sample_count, unsigned sample_index,
+ float *xy)
+{
+ static const uint8_t ms1[1][2] = { { 0x8, 0x8 } };
+ static const uint8_t ms2[2][2] = {
+ { 0x4, 0x4 }, { 0xc, 0xc } }; /* surface coords (0,0), (1,0) */
+ static const uint8_t ms4[4][2] = {
+ { 0x6, 0x2 }, { 0xe, 0x6 }, /* (0,0), (1,0) */
+ { 0x2, 0xa }, { 0xa, 0xe } }; /* (0,1), (1,1) */
+ static const uint8_t ms8[8][2] = {
+ { 0x1, 0x7 }, { 0x5, 0x3 }, /* (0,0), (1,0) */
+ { 0x3, 0xd }, { 0x7, 0xb }, /* (0,1), (1,1) */
+ { 0x9, 0x5 }, { 0xf, 0x1 }, /* (2,0), (3,0) */
+ { 0xb, 0xf }, { 0xd, 0x9 } }; /* (2,1), (3,1) */
+#if 0
+ /* NOTE: there are alternative modes for MS2 and MS8, currently not used */
+ static const uint8_t ms8_alt[8][2] = {
+ { 0x9, 0x5 }, { 0x7, 0xb }, /* (2,0), (1,1) */
+ { 0xd, 0x9 }, { 0x5, 0x3 }, /* (3,1), (1,0) */
+ { 0x3, 0xd }, { 0x1, 0x7 }, /* (0,1), (0,0) */
+ { 0xb, 0xf }, { 0xf, 0x1 } }; /* (2,1), (3,0) */
+#endif
+
+ const uint8_t (*ptr)[2];
+
+ switch (sample_count) {
+ case 0:
+ case 1: ptr = ms1; break;
+ case 2: ptr = ms2; break;
+ case 4: ptr = ms4; break;
+ case 8: ptr = ms8; break;
+ default:
+ assert(0);
+ return; /* bad sample count -> undefined locations */
+ }
+ xy[0] = ptr[sample_index][0] * 0.0625f;
+ xy[1] = ptr[sample_index][1] * 0.0625f;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
new file mode 100644
index 0000000..3fbecdc
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -0,0 +1,357 @@
+#ifndef __NVC0_CONTEXT_H__
+#define __NVC0_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_inlines.h"
+#include "util/u_dynarray.h"
+
+#ifdef NVC0_WITH_DRAW_MODULE
+#include "draw/draw_vertex.h"
+#endif
+
+#include "nv50/nv50_debug.h"
+#include "nvc0/nvc0_winsys.h"
+#include "nvc0/nvc0_stateobj.h"
+#include "nvc0/nvc0_screen.h"
+#include "nvc0/nvc0_program.h"
+#include "nvc0/nvc0_resource.h"
+
+#include "nv50/nv50_transfer.h"
+
+#include "nouveau_context.h"
+
+#include "nvc0/nvc0_3ddefs.xml.h"
+#include "nvc0/nvc0_3d.xml.h"
+#include "nvc0/nvc0_2d.xml.h"
+#include "nvc0/nvc0_m2mf.xml.h"
+#include "nvc0/nve4_p2mf.xml.h"
+
+/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */
+#define NVC0_NEW_BLEND (1 << 0)
+#define NVC0_NEW_RASTERIZER (1 << 1)
+#define NVC0_NEW_ZSA (1 << 2)
+#define NVC0_NEW_VERTPROG (1 << 3)
+#define NVC0_NEW_TCTLPROG (1 << 4)
+#define NVC0_NEW_TEVLPROG (1 << 5)
+#define NVC0_NEW_GMTYPROG (1 << 6)
+#define NVC0_NEW_FRAGPROG (1 << 7)
+#define NVC0_NEW_BLEND_COLOUR (1 << 8)
+#define NVC0_NEW_STENCIL_REF (1 << 9)
+#define NVC0_NEW_CLIP (1 << 10)
+#define NVC0_NEW_SAMPLE_MASK (1 << 11)
+#define NVC0_NEW_FRAMEBUFFER (1 << 12)
+#define NVC0_NEW_STIPPLE (1 << 13)
+#define NVC0_NEW_SCISSOR (1 << 14)
+#define NVC0_NEW_VIEWPORT (1 << 15)
+#define NVC0_NEW_ARRAYS (1 << 16)
+#define NVC0_NEW_VERTEX (1 << 17)
+#define NVC0_NEW_CONSTBUF (1 << 18)
+#define NVC0_NEW_TEXTURES (1 << 19)
+#define NVC0_NEW_SAMPLERS (1 << 20)
+#define NVC0_NEW_TFB_TARGETS (1 << 21)
+#define NVC0_NEW_IDXBUF (1 << 22)
+#define NVC0_NEW_SURFACES (1 << 23)
+
+#define NVC0_NEW_CP_PROGRAM (1 << 0)
+#define NVC0_NEW_CP_SURFACES (1 << 1)
+#define NVC0_NEW_CP_TEXTURES (1 << 2)
+#define NVC0_NEW_CP_SAMPLERS (1 << 3)
+#define NVC0_NEW_CP_CONSTBUF (1 << 4)
+#define NVC0_NEW_CP_GLOBALS (1 << 5)
+
+/* 3d bufctx (during draw_vbo, blit_3d) */
+#define NVC0_BIND_FB 0
+#define NVC0_BIND_VTX 1
+#define NVC0_BIND_VTX_TMP 2
+#define NVC0_BIND_IDX 3
+#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i))
+#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
+#define NVC0_BIND_TFB 244
+#define NVC0_BIND_SUF 245
+#define NVC0_BIND_SCREEN 246
+#define NVC0_BIND_TLS 247
+#define NVC0_BIND_3D_COUNT 248
+
+/* compute bufctx (during launch_grid) */
+#define NVC0_BIND_CP_CB(i) ( 0 + (i))
+#define NVC0_BIND_CP_TEX(i) ( 16 + (i))
+#define NVC0_BIND_CP_SUF 48
+#define NVC0_BIND_CP_GLOBAL 49
+#define NVC0_BIND_CP_DESC 50
+#define NVC0_BIND_CP_SCREEN 51
+#define NVC0_BIND_CP_QUERY 52
+#define NVC0_BIND_CP_COUNT 53
+
+/* bufctx for other operations */
+#define NVC0_BIND_2D 0
+#define NVC0_BIND_M2MF 0
+#define NVC0_BIND_FENCE 1
+
+
+struct nvc0_blitctx;
+
+boolean nvc0_blitctx_create(struct nvc0_context *);
+void nvc0_blitctx_destroy(struct nvc0_context *);
+
+struct nvc0_context {
+ struct nouveau_context base;
+
+ struct nouveau_bufctx *bufctx_3d;
+ struct nouveau_bufctx *bufctx;
+ struct nouveau_bufctx *bufctx_cp;
+
+ struct nvc0_screen *screen;
+
+ void (*m2mf_copy_rect)(struct nvc0_context *,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy);
+
+ uint32_t dirty;
+ uint32_t dirty_cp; /* dirty flags for compute state */
+
+ struct {
+ boolean flushed;
+ boolean rasterizer_discard;
+ boolean early_z_forced;
+ boolean prim_restart;
+ uint32_t instance_elts; /* bitmask of per-instance elements */
+ uint32_t instance_base;
+ uint32_t constant_vbos;
+ uint32_t constant_elts;
+ int32_t index_bias;
+ uint16_t scissor;
+ uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
+ uint8_t num_vtxbufs;
+ uint8_t num_vtxelts;
+ uint8_t num_textures[6];
+ uint8_t num_samplers[6];
+ uint8_t tls_required; /* bitmask of shader types using l[] */
+ uint8_t c14_bound; /* whether immediate array constbuf is bound */
+ uint8_t clip_enable;
+ uint32_t clip_mode;
+ uint32_t uniform_buffer_bound[5];
+ struct nvc0_transform_feedback_state *tfb;
+ } state;
+
+ struct nvc0_blend_stateobj *blend;
+ struct nvc0_rasterizer_stateobj *rast;
+ struct nvc0_zsa_stateobj *zsa;
+ struct nvc0_vertex_stateobj *vertex;
+
+ struct nvc0_program *vertprog;
+ struct nvc0_program *tctlprog;
+ struct nvc0_program *tevlprog;
+ struct nvc0_program *gmtyprog;
+ struct nvc0_program *fragprog;
+ struct nvc0_program *compprog;
+
+ struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
+ uint16_t constbuf_dirty[6];
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned num_vtxbufs;
+ struct pipe_index_buffer idxbuf;
+ uint32_t constant_vbos;
+ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
+ uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */
+ uint32_t vb_elt_limit; /* max - min element (count - 1) */
+ uint32_t instance_off; /* current base vertex for instanced arrays */
+ uint32_t instance_max; /* last instance for current draw call */
+
+ struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS];
+ unsigned num_textures[6];
+ uint32_t textures_dirty[6];
+ struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS];
+ unsigned num_samplers[6];
+ uint16_t samplers_dirty[6];
+
+ uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
+
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_blend_color blend_colour;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_poly_stipple stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_clip_state clip;
+
+ unsigned sample_mask;
+
+ boolean vbo_push_hint;
+
+ uint8_t tfbbuf_dirty;
+ struct pipe_stream_output_target *tfbbuf[4];
+ unsigned num_tfbbufs;
+
+ struct pipe_query *cond_query;
+ boolean cond_cond;
+ uint cond_mode;
+
+ struct nvc0_blitctx *blit;
+
+ struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS];
+ uint16_t surfaces_dirty[2];
+ uint16_t surfaces_valid[2];
+ uint32_t vport_int[2];
+
+ struct util_dynarray global_residents;
+
+#ifdef NVC0_WITH_DRAW_MODULE
+ struct draw_context *draw;
+#endif
+};
+
+static INLINE struct nvc0_context *
+nvc0_context(struct pipe_context *pipe)
+{
+ return (struct nvc0_context *)pipe;
+}
+
+static INLINE unsigned
+nvc0_shader_stage(unsigned pipe)
+{
+ switch (pipe) {
+ case PIPE_SHADER_VERTEX: return 0;
+/* case PIPE_SHADER_TESSELLATION_CONTROL: return 1; */
+/* case PIPE_SHADER_TESSELLATION_EVALUATION: return 2; */
+ case PIPE_SHADER_GEOMETRY: return 3;
+ case PIPE_SHADER_FRAGMENT: return 4;
+ case PIPE_SHADER_COMPUTE: return 5;
+ default:
+ assert(!"invalid PIPE_SHADER type");
+ return 0;
+ }
+}
+
+
+/* nvc0_context.c */
+struct pipe_context *nvc0_create(struct pipe_screen *, void *);
+void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *,
+ boolean on_flush);
+void nvc0_default_kick_notify(struct nouveau_pushbuf *);
+
+/* nvc0_draw.c */
+extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
+
+/* nvc0_program.c */
+boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
+boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
+void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
+void nvc0_program_library_upload(struct nvc0_context *);
+uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
+ uint32_t label);
+
+/* nvc0_query.c */
+void nvc0_init_query_functions(struct nvc0_context *);
+void nvc0_query_pushbuf_submit(struct nouveau_pushbuf *,
+ struct pipe_query *, unsigned result_offset);
+void nvc0_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
+void nvc0_so_target_save_offset(struct pipe_context *,
+ struct pipe_stream_output_target *, unsigned i,
+ boolean *serialize);
+
+#define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
+
+/* nvc0_shader_state.c */
+void nvc0_vertprog_validate(struct nvc0_context *);
+void nvc0_tctlprog_validate(struct nvc0_context *);
+void nvc0_tevlprog_validate(struct nvc0_context *);
+void nvc0_gmtyprog_validate(struct nvc0_context *);
+void nvc0_fragprog_validate(struct nvc0_context *);
+
+void nvc0_tfb_validate(struct nvc0_context *);
+
+/* nvc0_state.c */
+extern void nvc0_init_state_functions(struct nvc0_context *);
+
+/* nvc0_state_validate.c */
+void nvc0_validate_global_residents(struct nvc0_context *,
+ struct nouveau_bufctx *, int bin);
+extern boolean nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
+ unsigned space_words);
+
+/* nvc0_surface.c */
+extern void nvc0_clear(struct pipe_context *, unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil);
+extern void nvc0_init_surface_functions(struct nvc0_context *);
+
+/* nvc0_tex.c */
+boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s);
+void nvc0_validate_textures(struct nvc0_context *);
+void nvc0_validate_samplers(struct nvc0_context *);
+void nve4_set_tex_handles(struct nvc0_context *);
+void nvc0_validate_surfaces(struct nvc0_context *);
+void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_surface *,
+ struct nvc0_screen *);
+
+struct pipe_sampler_view *
+nvc0_create_texture_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *,
+ uint32_t flags,
+ enum pipe_texture_target);
+struct pipe_sampler_view *
+nvc0_create_sampler_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *);
+
+/* nvc0_transfer.c */
+void
+nvc0_init_transfer_functions(struct nvc0_context *);
+
+void
+nvc0_m2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data);
+void
+nve4_p2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data);
+void
+nvc0_cb_push(struct nouveau_context *,
+ struct nouveau_bo *bo, unsigned domain,
+ unsigned base, unsigned size,
+ unsigned offset, unsigned words, const uint32_t *data);
+
+/* nvc0_vbo.c */
+void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
+
+void *
+nvc0_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements);
+void
+nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
+
+void nvc0_vertex_arrays_validate(struct nvc0_context *);
+
+void nvc0_idxbuf_validate(struct nvc0_context *);
+
+/* nvc0_video.c */
+struct pipe_video_codec *
+nvc0_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ);
+
+struct pipe_video_buffer *
+nvc0_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *templat);
+
+/* nvc0_push.c */
+void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
+
+/* nve4_compute.c */
+void nve4_launch_grid(struct pipe_context *,
+ const uint *, const uint *, uint32_t, const void *);
+
+/* nvc0_compute.c */
+void nvc0_launch_grid(struct pipe_context *,
+ const uint *, const uint *, uint32_t, const void *);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c
new file mode 100644
index 0000000..e261d50
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "draw/draw_pipe.h"
+
+#include "nvc0/nvc0_context.h"
+
+struct nvc0_render_stage {
+ struct draw_stage stage;
+ struct nvc0_context *nvc0;
+};
+
+static INLINE struct nvc0_render_stage *
+nvc0_render_stage(struct draw_stage *stage)
+{
+ return (struct nvc0_render_stage *)stage;
+}
+
+static void
+nvc0_render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_flush(struct draw_stage *stage, unsigned flags)
+{
+}
+
+static void
+nvc0_render_reset_stipple_counter(struct draw_stage *stage)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_destroy(struct draw_stage *stage)
+{
+ FREE(stage);
+}
+
+struct draw_stage *
+nvc0_draw_render_stage(struct nvc0_context *nvc0)
+{
+ struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage);
+
+ rs->nvc0 = nvc0;
+ rs->stage.draw = nvc0->draw;
+ rs->stage.destroy = nvc0_render_destroy;
+ rs->stage.point = nvc0_render_point;
+ rs->stage.line = nvc0_render_line;
+ rs->stage.tri = nvc0_render_tri;
+ rs->stage.flush = nvc0_render_flush;
+ rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter;
+
+ return &rs->stage;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c b/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c
new file mode 100644
index 0000000..2bfdb0e
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NOUVEAU_DRIVER 0xc0
+
+#include "nv50/nv50_formats.c"
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h
new file mode 100644
index 0000000..f009980
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h
@@ -0,0 +1,236 @@
+
+#ifndef __NVC0_PGRAPH_MACROS_H__
+#define __NVC0_PGRAPH_MACROS_H__
+
+/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1
+ * with bits [src:src+size) in r2
+ *
+ * bra(n)z annul: no delay slot
+ */
+
+/* Bitfield version of NVC0_3D_VERTEX_ARRAY_PER_INSTANCE[].
+ * Args: size, bitfield
+ */
+static const uint32_t nvc0_9097_per_instance_bf[] =
+{
+ 0x00000301, /* parm $r3 (the bitfield) */
+ 0x00000211, /* mov $r2 0 */
+ 0x05880021, /* maddr [NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(0), increment = 4] */
+ 0xffffc911, /* mov $r1 (add $r1 -0x1) */
+ 0x0040d043, /* send (extrshl $r3 $r2 0x1 0) */
+ 0xffff8897, /* exit branz $r1 0x3 */
+ 0x00005211 /* mov $r2 (add $r2 0x1) */
+};
+
+/* The comments above the macros describe what they *should* be doing,
+ * but we use less functionality for now.
+ */
+
+/*
+ * for (i = 0; i < 8; ++i)
+ * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg);
+ *
+ * [3428] = arg;
+ *
+ * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0)
+ * [0d9c] = 0;
+ * else
+ * [0d9c] = [342c];
+ */
+static const uint32_t nvc0_9097_blend_enables[] =
+{
+ 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */
+ 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */
+ 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */
+ 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */
+ 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */
+ 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */
+ 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */
+ 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */
+ 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */
+};
+
+/*
+ * uint64 limit = (parm(0) << 32) | parm(1);
+ * uint64 start = (parm(2) << 32);
+ *
+ * if (limit) {
+ * start |= parm(3);
+ * --limit;
+ * } else {
+ * start |= 1;
+ * }
+ *
+ * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff;
+ * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff;
+ * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff;
+ * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff;
+ */
+static const uint32_t nvc0_9097_vertex_array_select[] =
+{
+ 0x00000201, /* 0x00: parm $r2 */
+ 0x00000301, /* 0x01: parm $r3 */
+ 0x00000401, /* 0x02: parm $r4 */
+ 0x00000501, /* 0x03: parm $r5 */
+ 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */
+ 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */
+ 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */
+ 0x00002041, /* 0x07: send $r4 */
+ 0x00002841, /* 0x08: send $r5 */
+ 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */
+ 0x000010c1, /* 0x0a: exit send $r2 */
+ 0x00001841, /* 0x0b: send $r3 */
+};
+
+/*
+ * [GL_POLYGON_MODE_FRONT] = arg;
+ *
+ * if (BIT(31 of [0x3410]))
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
+ * [02ec] = 0;
+ * else
+ * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE)
+ * [02ec] = BYTE(1 of [0x3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [0x3410]) << 4;
+ */
+static const uint32_t nvc0_9097_poly_mode_front[] =
+{
+ 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */
+ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
+ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
+ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x00dac021, /* 0x08: maddr 0x36b */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
+ 0x00003041 /* 0x10: send $r6 */
+};
+
+/*
+ * [GL_POLYGON_MODE_BACK] = arg;
+ *
+ * if (BIT(31 of [0x3410]))
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
+ * [02ec] = 0;
+ * else
+ * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE)
+ * [02ec] = BYTE(1 of [0x3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [0x3410]) << 4;
+ */
+/* NOTE: 0x3410 = 0x80002006 by default,
+ * POLYGON_MODE == GL_LINE check replaced by (MODE & 1)
+ * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1
+ */
+static const uint32_t nvc0_9097_poly_mode_back[] =
+{
+ 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */
+ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
+ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
+ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x00db0021, /* 0x08: maddr 0x36c */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
+ 0x00003041 /* 0x10: send $r6 */
+};
+
+/*
+ * [NVC0_3D_SP_SELECT(4)] = arg
+ *
+ * if BIT(31 of [0x3410]) == 0
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41)
+ * [02ec] = 0
+ * else
+ * if (any POLYGON MODE == LINE)
+ * [02ec] = BYTE(1 of [3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1
+ */
+static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */
+{
+ 0x00dac215, /* 0x00: read $r2 0x36b */
+ 0x00db0315, /* 0x01: read $r3 0x36c */
+ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
+ 0x020c0415, /* 0x03: read $r4 0x830 */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x02100021, /* 0x08: maddr 0x840 */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
+ 0x00003041, /* 0x10: send $r6 */
+};
+
+/*
+ * [NVC0_3D_SP_SELECT(3)] = arg
+ *
+ * if BIT(31 of [0x3410]) == 0
+ * [1a24] = 0x7353;
+ *
+ * if (arg == 0x31) {
+ * if (BIT(2 of [0x3430])) {
+ * int i = 15; do { --i; } while(i);
+ * [0x1a2c] = 0;
+ * }
+ * }
+ *
+ * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31)
+ * [02ec] = 0
+ * else
+ * if ([any POLYGON_MODE] == GL_LINE)
+ * [02ec] = BYTE(1 of [3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [3410]) << 4;
+ */
+static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */
+{
+ 0x00dac215, /* 0x00: read $r2 0x36b */
+ 0x00db0315, /* 0x01: read $r3 0x36c */
+ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
+ 0x02100415, /* 0x03: read $r4 0x840 */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x020c0021, /* 0x08: maddr 0x830 */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
+ 0x00003041, /* 0x10: send $r6 */
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h
new file mode 100644
index 0000000..3bf628d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h
@@ -0,0 +1,138 @@
+#ifndef NVC0_M2MF_XML
+#define NVC0_M2MF_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_M2MF_TILING_MODE_IN 0x00000204
+
+#define NVC0_M2MF_TILING_PITCH_IN 0x00000208
+
+#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c
+
+#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210
+
+#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214
+
+#define NVC0_M2MF_TILING_MODE_OUT 0x00000220
+
+#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224
+
+#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228
+
+#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c
+
+#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230
+
+#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238
+
+#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c
+
+#define NVC0_M2MF_EXEC 0x00000300
+#define NVC0_M2MF_EXEC_PUSH 0x00000001
+#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010
+#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100
+#define NVC0_M2MF_EXEC_NOTIFY 0x00002000
+#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000
+#define NVC0_M2MF_EXEC_INC__SHIFT 20
+
+#define NVC0_M2MF_DATA 0x00000304
+
+#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c
+
+#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310
+
+#define NVC0_M2MF_PITCH_IN 0x00000314
+
+#define NVC0_M2MF_PITCH_OUT 0x00000318
+
+#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c
+
+#define NVC0_M2MF_LINE_COUNT 0x00000320
+
+#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c
+
+#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330
+
+#define NVC0_M2MF_NOTIFY 0x00000334
+
+#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344
+
+#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348
+
+#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c
+
+#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350
+
+
+#endif /* NVC0_M2MF_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
new file mode 100644
index 0000000..79c9390
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+
+static uint32_t
+nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz)
+{
+ return nv50_tex_choose_tile_dims_helper(nx, ny, nz);
+}
+
+static uint32_t
+nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
+{
+ const unsigned ms = util_logbase2(mt->base.base.nr_samples);
+
+ uint32_t tile_flags;
+
+ if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR))
+ return 0;
+ if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR))
+ return 0;
+
+ switch (mt->base.base.format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ if (compressed)
+ tile_flags = 0x02 + ms;
+ else
+ tile_flags = 0x01;
+ break;
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ if (compressed)
+ tile_flags = 0x51 + ms;
+ else
+ tile_flags = 0x46;
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ if (compressed)
+ tile_flags = 0x17 + ms;
+ else
+ tile_flags = 0x11;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ if (compressed)
+ tile_flags = 0x86 + ms;
+ else
+ tile_flags = 0x7b;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ if (compressed)
+ tile_flags = 0xce + ms;
+ else
+ tile_flags = 0xc3;
+ break;
+ default:
+ switch (util_format_get_blocksizebits(mt->base.base.format)) {
+ case 128:
+ if (compressed)
+ tile_flags = 0xf4 + ms * 2;
+ else
+ tile_flags = 0xfe;
+ break;
+ case 64:
+ if (compressed) {
+ switch (ms) {
+ case 0: tile_flags = 0xe6; break;
+ case 1: tile_flags = 0xeb; break;
+ case 2: tile_flags = 0xed; break;
+ case 3: tile_flags = 0xf2; break;
+ default:
+ return 0;
+ }
+ } else {
+ tile_flags = 0xfe;
+ }
+ break;
+ case 32:
+ if (compressed && ms) {
+ switch (ms) {
+ /* This one makes things blurry:
+ case 0: tile_flags = 0xdb; break;
+ */
+ case 1: tile_flags = 0xdd; break;
+ case 2: tile_flags = 0xdf; break;
+ case 3: tile_flags = 0xe4; break;
+ default:
+ return 0;
+ }
+ } else {
+ tile_flags = 0xfe;
+ }
+ break;
+ case 16:
+ case 8:
+ tile_flags = 0xfe;
+ break;
+ default:
+ return 0;
+ }
+ break;
+ }
+
+ return tile_flags;
+}
+
+static INLINE boolean
+nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)
+{
+ switch (mt->base.base.nr_samples) {
+ case 8:
+ mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS8;
+ mt->ms_x = 2;
+ mt->ms_y = 1;
+ break;
+ case 4:
+ mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS4;
+ mt->ms_x = 1;
+ mt->ms_y = 1;
+ break;
+ case 2:
+ mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS2;
+ mt->ms_x = 1;
+ break;
+ case 1:
+ case 0:
+ mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
+ break;
+ default:
+ NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static void
+nvc0_miptree_init_layout_video(struct nv50_miptree *mt)
+{
+ const struct pipe_resource *pt = &mt->base.base;
+ const unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ assert(pt->last_level == 0);
+ assert(mt->ms_x == 0 && mt->ms_y == 0);
+ assert(!util_format_is_compressed(pt->format));
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ mt->level[0].tile_mode = 0x10;
+ mt->level[0].pitch = align(pt->width0 * blocksize, 64);
+ mt->total_size = align(pt->height0, 16) * mt->level[0].pitch * (mt->layout_3d ? pt->depth0 : 1);
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size, NVC0_TILE_SIZE(0x10));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+}
+
+static void
+nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt)
+{
+ struct pipe_resource *pt = &mt->base.base;
+ unsigned w, h, d, l;
+ const unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ w = pt->width0 << mt->ms_x;
+ h = pt->height0 << mt->ms_y;
+
+ /* For 3D textures, a mipmap is spanned by all the layers, for array
+ * textures and cube maps, each layer contains its own mipmaps.
+ */
+ d = mt->layout_3d ? pt->depth0 : 1;
+
+ assert(!mt->ms_mode || !pt->last_level);
+
+ for (l = 0; l <= pt->last_level; ++l) {
+ struct nv50_miptree_level *lvl = &mt->level[l];
+ unsigned tsx, tsy, tsz;
+ unsigned nbx = util_format_get_nblocksx(pt->format, w);
+ unsigned nby = util_format_get_nblocksy(pt->format, h);
+
+ lvl->offset = mt->total_size;
+
+ lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d);
+
+ tsx = NVC0_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */
+ tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode);
+ tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode);
+
+ lvl->pitch = align(nbx * blocksize, tsx);
+
+ mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz);
+
+ w = u_minify(w, 1);
+ h = u_minify(h, 1);
+ d = u_minify(d, 1);
+ }
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size,
+ NVC0_TILE_SIZE(mt->level[0].tile_mode));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+}
+
+const struct u_resource_vtbl nvc0_miptree_vtbl =
+{
+ nv50_miptree_get_handle, /* get_handle */
+ nv50_miptree_destroy, /* resource_destroy */
+ nvc0_miptree_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ nvc0_miptree_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *
+nvc0_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ)
+{
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
+ struct pipe_resource *pt = &mt->base.base;
+ boolean compressed = dev->drm_version >= 0x01000101;
+ int ret;
+ union nouveau_bo_config bo_config;
+ uint32_t bo_flags;
+
+ if (!mt)
+ return NULL;
+
+ mt->base.vtbl = &nvc0_miptree_vtbl;
+ *pt = *templ;
+ pipe_reference_init(&pt->reference, 1);
+ pt->screen = pscreen;
+
+ if (pt->usage == PIPE_USAGE_STAGING) {
+ switch (pt->target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ if (pt->last_level == 0 &&
+ !util_format_is_depth_or_stencil(pt->format) &&
+ pt->nr_samples <= 1)
+ pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (pt->bind & PIPE_BIND_LINEAR)
+ pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
+
+ bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
+
+ if (!nvc0_miptree_init_ms_mode(mt)) {
+ FREE(mt);
+ return NULL;
+ }
+
+ if (unlikely(pt->flags & NVC0_RESOURCE_FLAG_VIDEO)) {
+ nvc0_miptree_init_layout_video(mt);
+ } else
+ if (likely(bo_config.nvc0.memtype)) {
+ nvc0_miptree_init_layout_tiled(mt);
+ } else
+ if (!nv50_miptree_init_layout_linear(mt, 128)) {
+ FREE(mt);
+ return NULL;
+ }
+ bo_config.nvc0.tile_mode = mt->level[0].tile_mode;
+
+ if (!bo_config.nvc0.memtype && pt->usage == PIPE_USAGE_STAGING)
+ mt->base.domain = NOUVEAU_BO_GART;
+ else
+ mt->base.domain = NOUVEAU_BO_VRAM;
+
+ bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP;
+
+ if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET))
+ bo_flags |= NOUVEAU_BO_CONTIG;
+
+ ret = nouveau_bo_new(dev, bo_flags, 4096, mt->total_size, &bo_config,
+ &mt->base.bo);
+ if (ret) {
+ FREE(mt);
+ return NULL;
+ }
+ mt->base.address = mt->base.bo->offset;
+
+ NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_count, 1);
+ NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_bytes,
+ mt->total_size);
+
+ return pt;
+}
+
+/* Offset of zslice @z from start of level @l. */
+INLINE unsigned
+nvc0_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)
+{
+ const struct pipe_resource *pt = &mt->base.base;
+
+ unsigned tds = NVC0_TILE_SHIFT_Z(mt->level[l].tile_mode);
+ unsigned ths = NVC0_TILE_SHIFT_Y(mt->level[l].tile_mode);
+
+ unsigned nby = util_format_get_nblocksy(pt->format,
+ u_minify(pt->height0, l));
+
+ /* to next 2D tile slice within a 3D tile */
+ unsigned stride_2d = NVC0_TILE_SIZE_2D(mt->level[l].tile_mode);
+
+ /* to slice in the next (in z direction) 3D tile */
+ unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds;
+
+ return (z & (1 << (tds - 1))) * stride_2d + (z >> tds) * stride_3d;
+}
+
+/* Surface functions.
+ */
+
+struct pipe_surface *
+nvc0_miptree_surface_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *templ)
+{
+ struct nv50_surface *ns = nv50_surface_from_miptree(nv50_miptree(pt), templ);
+ if (!ns)
+ return NULL;
+ ns->base.context = pipe;
+ return &ns->base;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
new file mode 100644
index 0000000..71deb34
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+
+#include "nvc0/nvc0_context.h"
+
+#include "codegen/nv50_ir_driver.h"
+#include "nvc0/nve4_compute.h"
+
+/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than
+ * 124 scalar varying values.
+ */
+static uint32_t
+nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
+{
+ switch (sn) {
+ case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_PRIMID: return 0x060;
+ case TGSI_SEMANTIC_PSIZE: return 0x06c;
+ case TGSI_SEMANTIC_POSITION: return 0x070;
+ case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_FOG: return 0x2e8;
+ case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
+ case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
+ case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
+ case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
+ case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
+ case TGSI_SEMANTIC_PCOORD: return 0x2e0;
+ case NV50_SEMANTIC_TESSCOORD: return 0x2f0;
+ case TGSI_SEMANTIC_INSTANCEID: return 0x2f8;
+ case TGSI_SEMANTIC_VERTEXID: return 0x2fc;
+ case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
+ case TGSI_SEMANTIC_FACE: return 0x3fc;
+ case NV50_SEMANTIC_INVOCATIONID: return ~0;
+ default:
+ assert(!"invalid TGSI input semantic");
+ return ~0;
+ }
+}
+
+static uint32_t
+nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
+{
+ switch (sn) {
+ case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_PRIMID: return 0x060;
+ case NV50_SEMANTIC_LAYER: return 0x064;
+ case NV50_SEMANTIC_VIEWPORTINDEX: return 0x068;
+ case TGSI_SEMANTIC_PSIZE: return 0x06c;
+ case TGSI_SEMANTIC_POSITION: return 0x070;
+ case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_FOG: return 0x2e8;
+ case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
+ case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
+ case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
+ case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
+ case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
+ case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
+ case TGSI_SEMANTIC_EDGEFLAG: return ~0;
+ default:
+ assert(!"invalid TGSI output semantic");
+ return ~0;
+ }
+}
+
+static int
+nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info)
+{
+ unsigned i, c, n;
+
+ for (n = 0, i = 0; i < info->numInputs; ++i) {
+ switch (info->in[i].sn) {
+ case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */
+ case TGSI_SEMANTIC_VERTEXID:
+ info->in[i].mask = 0x1;
+ info->in[i].slot[0] =
+ nvc0_shader_input_address(info->in[i].sn, 0, 0) / 4;
+ continue;
+ default:
+ break;
+ }
+ for (c = 0; c < 4; ++c)
+ info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4;
+ ++n;
+ }
+
+ return 0;
+}
+
+static int
+nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
+{
+ unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
+ unsigned offset;
+ unsigned i, c;
+
+ for (i = 0; i < info->numInputs; ++i) {
+ offset = nvc0_shader_input_address(info->in[i].sn,
+ info->in[i].si, ubase);
+ if (info->in[i].patch && offset >= 0x20)
+ offset = 0x20 + info->in[i].si * 0x10;
+
+ if (info->in[i].sn == NV50_SEMANTIC_TESSCOORD)
+ info->in[i].mask &= 3;
+
+ for (c = 0; c < 4; ++c)
+ info->in[i].slot[c] = (offset + c * 0x4) / 4;
+ }
+
+ return 0;
+}
+
+static int
+nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
+{
+ unsigned count = info->prop.fp.numColourResults * 4;
+ unsigned i, c;
+
+ for (i = 0; i < info->numOutputs; ++i)
+ if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
+ for (c = 0; c < 4; ++c)
+ info->out[i].slot[c] = info->out[i].si * 4 + c;
+
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.sampleMask].slot[0] = count++;
+ else
+ if (info->target >= 0xe0)
+ count++; /* on Kepler, depth is always last colour reg + 2 */
+
+ if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.fragDepth].slot[2] = count;
+
+ return 0;
+}
+
+static int
+nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info)
+{
+ unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
+ unsigned offset;
+ unsigned i, c;
+
+ for (i = 0; i < info->numOutputs; ++i) {
+ offset = nvc0_shader_output_address(info->out[i].sn,
+ info->out[i].si, ubase);
+ if (info->out[i].patch && offset >= 0x20)
+ offset = 0x20 + info->out[i].si * 0x10;
+
+ for (c = 0; c < 4; ++c)
+ info->out[i].slot[c] = (offset + c * 0x4) / 4;
+ }
+
+ return 0;
+}
+
+static int
+nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info)
+{
+ int ret;
+
+ if (info->type == PIPE_SHADER_VERTEX)
+ ret = nvc0_vp_assign_input_slots(info);
+ else
+ ret = nvc0_sp_assign_input_slots(info);
+ if (ret)
+ return ret;
+
+ if (info->type == PIPE_SHADER_FRAGMENT)
+ ret = nvc0_fp_assign_output_slots(info);
+ else
+ ret = nvc0_sp_assign_output_slots(info);
+ return ret;
+}
+
+static INLINE void
+nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot)
+{
+ uint8_t min = (vp->hdr[4] >> 12) & 0xff;
+ uint8_t max = (vp->hdr[4] >> 24);
+
+ min = MIN2(min, slot);
+ max = MAX2(max, slot);
+
+ vp->hdr[4] = (max << 24) | (min << 12);
+}
+
+/* Common part of header generation for VP, TCP, TEP and GP. */
+static int
+nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
+{
+ unsigned i, c, a;
+
+ for (i = 0; i < info->numInputs; ++i) {
+ if (info->in[i].patch)
+ continue;
+ for (c = 0; c < 4; ++c) {
+ a = info->in[i].slot[c];
+ if (info->in[i].mask & (1 << c)) {
+ if (info->in[i].sn != NV50_SEMANTIC_TESSCOORD)
+ vp->hdr[5 + a / 32] |= 1 << (a % 32);
+ else
+ nvc0_vtgp_hdr_update_oread(vp, info->in[i].slot[c]);
+ }
+ }
+ }
+
+ for (i = 0; i < info->numOutputs; ++i) {
+ if (info->out[i].patch)
+ continue;
+ for (c = 0; c < 4; ++c) {
+ if (!(info->out[i].mask & (1 << c)))
+ continue;
+ assert(info->out[i].slot[c] >= 0x40 / 4);
+ a = info->out[i].slot[c] - 0x40 / 4;
+ vp->hdr[13 + a / 32] |= 1 << (a % 32);
+ if (info->out[i].oread)
+ nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]);
+ }
+ }
+
+ for (i = 0; i < info->numSysVals; ++i) {
+ switch (info->sv[i].sn) {
+ case TGSI_SEMANTIC_PRIMID:
+ vp->hdr[5] |= 1 << 24;
+ break;
+ case TGSI_SEMANTIC_INSTANCEID:
+ vp->hdr[10] |= 1 << 30;
+ break;
+ case TGSI_SEMANTIC_VERTEXID:
+ vp->hdr[10] |= 1 << 31;
+ break;
+ default:
+ break;
+ }
+ }
+
+ vp->vp.clip_enable = info->io.clipDistanceMask;
+ for (i = 0; i < 8; ++i)
+ if (info->io.cullDistanceMask & (1 << i))
+ vp->vp.clip_mode |= 1 << (i * 4);
+
+ if (info->io.genUserClip < 0)
+ vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
+
+ return 0;
+}
+
+static int
+nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
+{
+ vp->hdr[0] = 0x20061 | (1 << 10);
+ vp->hdr[4] = 0xff000;
+
+ vp->hdr[18] = info->io.clipDistanceMask;
+
+ return nvc0_vtgp_gen_header(vp, info);
+}
+
+#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN)
+static void
+nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
+{
+ if (info->prop.tp.outputPrim == PIPE_PRIM_MAX) {
+ tp->tp.tess_mode = ~0;
+ return;
+ }
+ switch (info->prop.tp.domain) {
+ case PIPE_PRIM_LINES:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES;
+ if (info->prop.tp.winding > 0)
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
+ break;
+ case PIPE_PRIM_QUADS:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS;
+ break;
+ default:
+ tp->tp.tess_mode = ~0;
+ return;
+ }
+ if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS)
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED;
+
+ switch (info->prop.tp.partitioning) {
+ case PIPE_TESS_PART_INTEGER:
+ case PIPE_TESS_PART_POW2:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL;
+ break;
+ case PIPE_TESS_PART_FRACT_ODD:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD;
+ break;
+ case PIPE_TESS_PART_FRACT_EVEN:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN;
+ break;
+ default:
+ assert(!"invalid tessellator partitioning");
+ break;
+ }
+}
+#endif
+
+#ifdef PIPE_SHADER_HULL
+static int
+nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info)
+{
+ unsigned opcs = 6; /* output patch constants (at least the TessFactors) */
+
+ tcp->tp.input_patch_size = info->prop.tp.inputPatchSize;
+
+ if (info->numPatchConstants)
+ opcs = 8 + info->numPatchConstants * 4;
+
+ tcp->hdr[0] = 0x20061 | (2 << 10);
+
+ tcp->hdr[1] = opcs << 24;
+ tcp->hdr[2] = info->prop.tp.outputPatchSize << 24;
+
+ tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */
+
+ nvc0_vtgp_gen_header(tcp, info);
+
+ nvc0_tp_get_tess_mode(tcp, info);
+
+ return 0;
+}
+#endif
+
+#ifdef PIPE_SHADER_DOMAIN
+static int
+nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
+{
+ tep->tp.input_patch_size = ~0;
+
+ tep->hdr[0] = 0x20061 | (3 << 10);
+ tep->hdr[4] = 0xff000;
+
+ nvc0_vtgp_gen_header(tep, info);
+
+ nvc0_tp_get_tess_mode(tep, info);
+
+ tep->hdr[18] |= 0x3 << 12; /* ? */
+
+ return 0;
+}
+#endif
+
+static int
+nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info)
+{
+ gp->hdr[0] = 0x20061 | (4 << 10);
+
+ gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24;
+
+ switch (info->prop.gp.outputPrim) {
+ case PIPE_PRIM_POINTS:
+ gp->hdr[3] = 0x01000000;
+ gp->hdr[0] |= 0xf0000000;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ gp->hdr[3] = 0x06000000;
+ gp->hdr[0] |= 0x10000000;
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ gp->hdr[3] = 0x07000000;
+ gp->hdr[0] |= 0x10000000;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff;
+
+ return nvc0_vtgp_gen_header(gp, info);
+}
+
+#define NVC0_INTERP_FLAT (1 << 0)
+#define NVC0_INTERP_PERSPECTIVE (2 << 0)
+#define NVC0_INTERP_LINEAR (3 << 0)
+#define NVC0_INTERP_CENTROID (1 << 2)
+
+static uint8_t
+nvc0_hdr_interp_mode(const struct nv50_ir_varying *var)
+{
+ if (var->linear)
+ return NVC0_INTERP_LINEAR;
+ if (var->flat)
+ return NVC0_INTERP_FLAT;
+ return NVC0_INTERP_PERSPECTIVE;
+}
+
+static int
+nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
+{
+ unsigned i, c, a, m;
+
+ /* just 00062 on Kepler */
+ fp->hdr[0] = 0x20062 | (5 << 10);
+ fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
+
+ if (info->prop.fp.usesDiscard)
+ fp->hdr[0] |= 0x8000;
+ if (info->prop.fp.numColourResults > 1)
+ fp->hdr[0] |= 0x4000;
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ fp->hdr[19] |= 0x1;
+ if (info->prop.fp.writesDepth) {
+ fp->hdr[19] |= 0x2;
+ fp->flags[0] = 0x11; /* deactivate ZCULL */
+ }
+
+ for (i = 0; i < info->numInputs; ++i) {
+ m = nvc0_hdr_interp_mode(&info->in[i]);
+ for (c = 0; c < 4; ++c) {
+ if (!(info->in[i].mask & (1 << c)))
+ continue;
+ a = info->in[i].slot[c];
+ if (info->in[i].slot[0] >= (0x060 / 4) &&
+ info->in[i].slot[0] <= (0x07c / 4)) {
+ fp->hdr[5] |= 1 << (24 + (a - 0x060 / 4));
+ } else
+ if (info->in[i].slot[0] >= (0x2c0 / 4) &&
+ info->in[i].slot[0] <= (0x2fc / 4)) {
+ fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000;
+ } else {
+ if (info->in[i].slot[c] < (0x040 / 4) ||
+ info->in[i].slot[c] > (0x380 / 4))
+ continue;
+ a *= 2;
+ if (info->in[i].slot[0] >= (0x300 / 4))
+ a -= 32;
+ fp->hdr[4 + a / 32] |= m << (a % 32);
+ }
+ }
+ }
+
+ for (i = 0; i < info->numOutputs; ++i) {
+ if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
+ fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
+ }
+
+ fp->fp.early_z = info->prop.fp.earlyFragTests;
+
+ return 0;
+}
+
+static struct nvc0_transform_feedback_state *
+nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info,
+ const struct pipe_stream_output_info *pso)
+{
+ struct nvc0_transform_feedback_state *tfb;
+ unsigned b, i, c;
+
+ tfb = MALLOC_STRUCT(nvc0_transform_feedback_state);
+ if (!tfb)
+ return NULL;
+ for (b = 0; b < 4; ++b) {
+ tfb->stride[b] = pso->stride[b] * 4;
+ tfb->varying_count[b] = 0;
+ }
+ memset(tfb->varying_index, 0xff, sizeof(tfb->varying_index)); /* = skip */
+
+ for (i = 0; i < pso->num_outputs; ++i) {
+ unsigned s = pso->output[i].start_component;
+ unsigned p = pso->output[i].dst_offset;
+ b = pso->output[i].output_buffer;
+
+ for (c = 0; c < pso->output[i].num_components; ++c)
+ tfb->varying_index[b][p++] =
+ info->out[pso->output[i].register_index].slot[s + c];
+
+ tfb->varying_count[b] = MAX2(tfb->varying_count[b], p);
+ }
+ for (b = 0; b < 4; ++b) // zero unused indices (looks nicer)
+ for (c = tfb->varying_count[b]; c & 3; ++c)
+ tfb->varying_index[b][c] = 0;
+
+ return tfb;
+}
+
+#ifdef DEBUG
+static void
+nvc0_program_dump(struct nvc0_program *prog)
+{
+ unsigned pos;
+
+ if (prog->type != PIPE_SHADER_COMPUTE) {
+ for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
+ debug_printf("HDR[%02lx] = 0x%08x\n",
+ pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
+ }
+ debug_printf("shader binary code (0x%x bytes):", prog->code_size);
+ for (pos = 0; pos < prog->code_size / 4; ++pos) {
+ if ((pos % 8) == 0)
+ debug_printf("\n");
+ debug_printf("%08x ", prog->code[pos]);
+ }
+ debug_printf("\n");
+}
+#endif
+
+boolean
+nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
+{
+ struct nv50_ir_prog_info *info;
+ int ret;
+
+ info = CALLOC_STRUCT(nv50_ir_prog_info);
+ if (!info)
+ return FALSE;
+
+ info->type = prog->type;
+ info->target = chipset;
+ info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
+ info->bin.source = (void *)prog->pipe.tokens;
+
+ info->io.genUserClip = prog->vp.num_ucps;
+ info->io.ucpBase = 256;
+ info->io.ucpCBSlot = 15;
+
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ if (chipset >= NVISA_GK104_CHIPSET) {
+ info->io.resInfoCBSlot = 0;
+ info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
+ info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
+ info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
+ }
+ info->io.msInfoCBSlot = 0;
+ info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
+ } else {
+ if (chipset >= NVISA_GK104_CHIPSET) {
+ info->io.resInfoCBSlot = 15;
+ info->io.texBindBase = 0x20;
+ info->io.suInfoBase = 0; /* TODO */
+ }
+ info->io.msInfoCBSlot = 15;
+ info->io.msInfoBase = 0; /* TODO */
+ }
+
+ info->assignSlots = nvc0_program_assign_varying_slots;
+
+#ifdef DEBUG
+ info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
+ info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
+#else
+ info->optLevel = 3;
+#endif
+
+ ret = nv50_ir_generate_code(info);
+ if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
+ }
+ if (prog->type != PIPE_SHADER_COMPUTE)
+ FREE(info->bin.syms);
+
+ prog->code = info->bin.code;
+ prog->code_size = info->bin.codeSize;
+ prog->immd_data = info->immd.buf;
+ prog->immd_size = info->immd.bufSize;
+ prog->relocs = info->bin.relocData;
+ prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
+ prog->num_barriers = info->numBarriers;
+
+ prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
+
+ if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS)
+ info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */
+ prog->vp.edgeflag = info->io.edgeFlagIn;
+
+ switch (prog->type) {
+ case PIPE_SHADER_VERTEX:
+ ret = nvc0_vp_gen_header(prog, info);
+ break;
+#ifdef PIPE_SHADER_HULL
+ case PIPE_SHADER_HULL:
+ ret = nvc0_tcp_gen_header(prog, info);
+ break;
+#endif
+#ifdef PIPE_SHADER_DOMAIN
+ case PIPE_SHADER_DOMAIN:
+ ret = nvc0_tep_gen_header(prog, info);
+ break;
+#endif
+ case PIPE_SHADER_GEOMETRY:
+ ret = nvc0_gp_gen_header(prog, info);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ ret = nvc0_fp_gen_header(prog, info);
+ break;
+ case PIPE_SHADER_COMPUTE:
+ prog->cp.syms = info->bin.syms;
+ prog->cp.num_syms = info->bin.numSyms;
+ break;
+ default:
+ ret = -1;
+ NOUVEAU_ERR("unknown program type: %u\n", prog->type);
+ break;
+ }
+ if (ret)
+ goto out;
+
+ if (info->bin.tlsSpace) {
+ assert(info->bin.tlsSpace < (1 << 24));
+ prog->hdr[0] |= 1 << 26;
+ prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */
+ prog->need_tls = TRUE;
+ }
+ /* TODO: factor 2 only needed where joinat/precont is used,
+ * and we only have to count non-uniform branches
+ */
+ /*
+ if ((info->maxCFDepth * 2) > 16) {
+ prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
+ prog->need_tls = TRUE;
+ }
+ */
+ if (info->io.globalAccess)
+ prog->hdr[0] |= 1 << 16;
+
+ if (prog->pipe.stream_output.num_outputs)
+ prog->tfb = nvc0_program_create_tfb_state(info,
+ &prog->pipe.stream_output);
+
+out:
+ FREE(info);
+ return !ret;
+}
+
+boolean
+nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE;
+ int ret;
+ uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+ uint32_t lib_pos = screen->lib_code->start;
+ uint32_t code_pos;
+
+ /* c[] bindings need to be aligned to 0x100, but we could use relocations
+ * to save space. */
+ if (prog->immd_size) {
+ prog->immd_base = size;
+ size = align(size, 0x40);
+ size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */
+ }
+ /* On Fermi, SP_START_ID must be aligned to 0x40.
+ * On Kepler, the first instruction must be aligned to 0x80 because
+ * latency information is expected only at certain positions.
+ */
+ if (screen->base.class_3d >= NVE4_3D_CLASS)
+ size = size + (is_cp ? 0x40 : 0x70);
+ size = align(size, 0x40);
+
+ ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
+ if (ret) {
+ struct nouveau_heap *heap = screen->text_heap;
+ struct nouveau_heap *iter;
+ for (iter = heap; iter && iter->next != heap; iter = iter->next) {
+ struct nvc0_program *evict = iter->priv;
+ if (evict)
+ nouveau_heap_free(&evict->mem);
+ }
+ debug_printf("WARNING: out of code space, evicting all shaders.\n");
+ ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
+ if (ret) {
+ NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
+ return FALSE;
+ }
+ IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0);
+ }
+ prog->code_base = prog->mem->start;
+ prog->immd_base = align(prog->mem->start + prog->immd_base, 0x100);
+ assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <=
+ prog->mem->start + prog->mem->size));
+
+ if (!is_cp) {
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ switch (prog->mem->start & 0xff) {
+ case 0x40: prog->code_base += 0x70; break;
+ case 0x80: prog->code_base += 0x30; break;
+ case 0xc0: prog->code_base += 0x70; break;
+ default:
+ prog->code_base += 0x30;
+ assert((prog->mem->start & 0xff) == 0x00);
+ break;
+ }
+ }
+ code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
+ } else {
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ if (prog->mem->start & 0x40)
+ prog->code_base += 0x40;
+ assert((prog->code_base & 0x7f) == 0x00);
+ }
+ code_pos = prog->code_base;
+ }
+
+ if (prog->relocs)
+ nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
+
+#ifdef DEBUG
+ if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE))
+ nvc0_program_dump(prog);
+#endif
+
+ if (!is_cp)
+ nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
+ NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
+ NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+ if (prog->immd_size)
+ nvc0->base.push_data(&nvc0->base,
+ screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
+ prog->immd_size, prog->immd_data);
+
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0x1011);
+
+ return TRUE;
+}
+
+/* Upload code for builtin functions like integer division emulation. */
+void
+nvc0_program_library_upload(struct nvc0_context *nvc0)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ int ret;
+ uint32_t size;
+ const uint32_t *code;
+
+ if (screen->lib_code)
+ return;
+
+ nv50_ir_get_target_library(screen->base.device->chipset, &code, &size);
+ if (!size)
+ return;
+
+ ret = nouveau_heap_alloc(screen->text_heap, align(size, 0x100), NULL,
+ &screen->lib_code);
+ if (ret)
+ return;
+
+ nvc0->base.push_data(&nvc0->base,
+ screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
+ size, code);
+ /* no need for a memory barrier, will be emitted with first program */
+}
+
+void
+nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+ const struct pipe_shader_state pipe = prog->pipe;
+ const ubyte type = prog->type;
+
+ if (prog->mem)
+ nouveau_heap_free(&prog->mem);
+ if (prog->code)
+ FREE(prog->code); /* may be 0 for hardcoded shaders */
+ FREE(prog->immd_data);
+ FREE(prog->relocs);
+ if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)
+ FREE(prog->cp.syms);
+ if (prog->tfb) {
+ if (nvc0->state.tfb == prog->tfb)
+ nvc0->state.tfb = NULL;
+ FREE(prog->tfb);
+ }
+
+ memset(prog, 0, sizeof(*prog));
+
+ prog->pipe = pipe;
+ prog->type = type;
+}
+
+uint32_t
+nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
+{
+ const struct nv50_ir_prog_symbol *syms =
+ (const struct nv50_ir_prog_symbol *)prog->cp.syms;
+ unsigned base = 0;
+ unsigned i;
+ if (prog->type != PIPE_SHADER_COMPUTE)
+ base = NVC0_SHADER_HEADER_SIZE;
+ for (i = 0; i < prog->cp.num_syms; ++i)
+ if (syms[i].label == label)
+ return prog->code_base + base + syms[i].offset;
+ return prog->code_base; /* no symbols or symbol not found */
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
new file mode 100644
index 0000000..9c184d1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -0,0 +1,68 @@
+
+#ifndef __NVC0_PROGRAM_H__
+#define __NVC0_PROGRAM_H__
+
+#include "pipe/p_state.h"
+
+#define NVC0_CAP_MAX_PROGRAM_TEMPS 128
+
+
+struct nvc0_transform_feedback_state {
+ uint32_t stride[4];
+ uint8_t varying_count[4];
+ uint8_t varying_index[4][128];
+};
+
+
+#define NVC0_SHADER_HEADER_SIZE (20 * 4)
+
+struct nvc0_program {
+ struct pipe_shader_state pipe;
+
+ ubyte type;
+ boolean translated;
+ boolean need_tls;
+ uint8_t num_gprs;
+
+ uint32_t *code;
+ uint32_t *immd_data;
+ unsigned code_base;
+ unsigned code_size;
+ unsigned immd_base;
+ unsigned immd_size; /* size of immediate array data */
+ unsigned parm_size; /* size of non-bindable uniforms (c0[]) */
+
+ uint32_t hdr[20];
+ uint32_t flags[2];
+
+ struct {
+ uint32_t clip_mode; /* clip/cull selection */
+ uint8_t clip_enable; /* mask of defined clip planes */
+ uint8_t num_ucps; /* also set to max if ClipDistance is used */
+ uint8_t edgeflag; /* attribute index of edgeflag input */
+ boolean need_vertex_id;
+ } vp;
+ struct {
+ uint8_t early_z;
+ uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
+ } fp;
+ struct {
+ uint32_t tess_mode; /* ~0 if defined by the other stage */
+ uint32_t input_patch_size;
+ } tp;
+ struct {
+ uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
+ uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
+ void *syms;
+ unsigned num_syms;
+ } cp;
+ uint8_t num_barriers;
+
+ void *relocs;
+
+ struct nvc0_transform_feedback_state *tfb;
+
+ struct nouveau_heap *mem;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_push.c b/src/gallium/drivers/nouveau/nvc0/nvc0_push.c
new file mode 100644
index 0000000..15e8be6
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_push.c
@@ -0,0 +1,409 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+
+#include "nvc0/nvc0_3d.xml.h"
+
+struct push_context {
+ struct nouveau_pushbuf *push;
+
+ void *idxbuf;
+
+ uint32_t vertex_words;
+ uint32_t packet_vertex_limit;
+
+ struct translate *translate;
+
+ boolean primitive_restart;
+ boolean need_vertex_id;
+ uint32_t prim;
+ uint32_t restart_index;
+ uint32_t instance_id;
+
+ struct {
+ int buffer;
+ float value;
+ uint8_t *data;
+ unsigned offset;
+ unsigned stride;
+ } edgeflag;
+};
+
+static void
+init_push_context(struct nvc0_context *nvc0, struct push_context *ctx)
+{
+ struct pipe_vertex_element *ve;
+
+ ctx->push = nvc0->base.pushbuf;
+ ctx->translate = nvc0->vertex->translate;
+
+ if (likely(nvc0->vertex->num_elements < 32))
+ ctx->need_vertex_id = nvc0->vertprog->vp.need_vertex_id;
+ else
+ ctx->need_vertex_id = FALSE;
+
+ ctx->edgeflag.buffer = -1;
+ ctx->edgeflag.value = 0.5f;
+
+ if (unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) {
+ ve = &nvc0->vertex->element[nvc0->vertprog->vp.edgeflag].pipe;
+ ctx->edgeflag.buffer = ve->vertex_buffer_index;
+ ctx->edgeflag.offset = ve->src_offset;
+ ctx->packet_vertex_limit = 1;
+ } else {
+ ctx->packet_vertex_limit = nvc0->vertex->vtx_per_packet_max;
+ if (unlikely(ctx->need_vertex_id))
+ ctx->packet_vertex_limit = 1;
+ }
+
+ ctx->vertex_words = nvc0->vertex->vtx_size;
+}
+
+static INLINE void
+set_edgeflag(struct push_context *ctx, unsigned vtx_id)
+{
+ float f = *(float *)(ctx->edgeflag.data + vtx_id * ctx->edgeflag.stride);
+
+ if (ctx->edgeflag.value != f) {
+ ctx->edgeflag.value = f;
+ IMMED_NVC0(ctx->push, NVC0_3D(EDGEFLAG), f ? 1 : 0);
+ }
+}
+
+static INLINE void
+set_vertexid(struct push_context *ctx, uint32_t vtx_id)
+{
+#if 0
+ BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_ID), 1); /* broken on nvc0 */
+#else
+ BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_DATA), 1); /* as last attribute */
+#endif
+ PUSH_DATA (ctx->push, vtx_id);
+}
+
+static INLINE unsigned
+prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static void
+emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i08(elts, push, ctx->restart_index);
+
+ if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr))
+ set_edgeflag(ctx, elts[0]);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+ ctx->push->cur += size;
+
+ if (unlikely(ctx->need_vertex_id) && likely(size))
+ set_vertexid(ctx, elts[0]);
+
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2);
+ PUSH_DATA (ctx->push, 0);
+ PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
+ }
+ }
+}
+
+static void
+emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i16(elts, push, ctx->restart_index);
+
+ if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr))
+ set_edgeflag(ctx, elts[0]);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+ ctx->push->cur += size;
+
+ if (unlikely(ctx->need_vertex_id))
+ set_vertexid(ctx, elts[0]);
+
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2);
+ PUSH_DATA (ctx->push, 0);
+ PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
+ }
+ }
+}
+
+static void
+emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i32(elts, push, ctx->restart_index);
+
+ if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr))
+ set_edgeflag(ctx, elts[0]);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+ ctx->push->cur += size;
+
+ if (unlikely(ctx->need_vertex_id))
+ set_vertexid(ctx, elts[0]);
+
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2);
+ PUSH_DATA (ctx->push, 0);
+ PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
+ }
+ }
+}
+
+static void
+emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size = ctx->vertex_words * push;
+
+ if (unlikely(ctx->edgeflag.buffer >= 0))
+ set_edgeflag(ctx, start);
+
+ BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size);
+
+ ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id,
+ ctx->push->cur);
+ ctx->push->cur += size;
+
+ if (unlikely(ctx->need_vertex_id))
+ set_vertexid(ctx, start);
+
+ count -= push;
+ start += push;
+ }
+}
+
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+void
+nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, index_size;
+ unsigned inst_count = info->instance_count;
+ unsigned vert_count = info->count;
+ boolean apply_bias = info->indexed && info->index_bias;
+
+ init_push_context(nvc0, &ctx);
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ uint8_t *data;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
+ struct nv04_resource *res = nv04_resource(vb->buffer);
+
+ data = nouveau_resource_map_offset(&nvc0->base, res,
+ vb->buffer_offset, NOUVEAU_BO_RD);
+
+ if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i))))
+ data += info->index_bias * vb->stride;
+
+ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+
+ if (unlikely(i == ctx.edgeflag.buffer)) {
+ ctx.edgeflag.data = data + ctx.edgeflag.offset;
+ ctx.edgeflag.stride = vb->stride;
+ }
+ }
+
+ if (info->indexed) {
+ ctx.idxbuf =
+ nouveau_resource_map_offset(&nvc0->base,
+ nv04_resource(nvc0->idxbuf.buffer),
+ nvc0->idxbuf.offset, NOUVEAU_BO_RD);
+ if (!ctx.idxbuf)
+ return;
+ index_size = nvc0->idxbuf.index_size;
+ ctx.primitive_restart = info->primitive_restart;
+ ctx.restart_index = info->restart_index;
+ } else {
+ ctx.idxbuf = NULL;
+ index_size = 0;
+ ctx.primitive_restart = FALSE;
+ ctx.restart_index = 0;
+
+ if (info->count_from_stream_output) {
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct nvc0_so_target *targ;
+ targ = nvc0_so_target(info->count_from_stream_output);
+ pipe->get_query_result(pipe, targ->pq, TRUE, (void*)&vert_count);
+ vert_count /= targ->stride;
+ }
+ }
+
+ ctx.instance_id = info->start_instance;
+ ctx.prim = nvc0_prim_gl(info->mode);
+
+ if (unlikely(ctx.need_vertex_id)) {
+ const unsigned a = nvc0->vertex->num_elements;
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1);
+ PUSH_DATA (ctx.push, (a << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32);
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 1);
+ PUSH_DATA (ctx.push, (((0x80 + a * 0x10) / 4) << 4) | 1);
+ }
+
+ while (inst_count--) {
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (ctx.push, ctx.prim);
+ switch (index_size) {
+ case 0:
+ emit_vertices_seq(&ctx, info->start, vert_count);
+ break;
+ case 1:
+ emit_vertices_i08(&ctx, info->start, vert_count);
+ break;
+ case 2:
+ emit_vertices_i16(&ctx, info->start, vert_count);
+ break;
+ case 4:
+ emit_vertices_i32(&ctx, info->start, vert_count);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0);
+
+ ctx.instance_id++;
+ ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+
+ if (unlikely(ctx.edgeflag.value == 0.0f))
+ IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1);
+
+ if (unlikely(ctx.need_vertex_id)) {
+ const unsigned a = nvc0->vertex->num_elements;
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0);
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1);
+ PUSH_DATA (ctx.push,
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32);
+ }
+
+ if (info->indexed)
+ nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer));
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
+ nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer));
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
new file mode 100644
index 0000000..21aa358
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -0,0 +1,1448 @@
+/*
+ * Copyright 2011 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
+ */
+
+#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nvc0/nvc0_context.h"
+#include "nv_object.xml.h"
+#include "nvc0/nve4_compute.xml.h"
+#include "nvc0/nvc0_compute.xml.h"
+
+#define NVC0_QUERY_STATE_READY 0
+#define NVC0_QUERY_STATE_ACTIVE 1
+#define NVC0_QUERY_STATE_ENDED 2
+#define NVC0_QUERY_STATE_FLUSHED 3
+
+struct nvc0_query {
+ uint32_t *data;
+ uint16_t type;
+ uint16_t index;
+ int8_t ctr[4];
+ uint32_t sequence;
+ struct nouveau_bo *bo;
+ uint32_t base;
+ uint32_t offset; /* base + i * rotate */
+ uint8_t state;
+ boolean is64bit;
+ uint8_t rotate;
+ int nesting; /* only used for occlusion queries */
+ union {
+ struct nouveau_mm_allocation *mm;
+ uint64_t value;
+ } u;
+ struct nouveau_fence *fence;
+};
+
+#define NVC0_QUERY_ALLOC_SPACE 256
+
+static void nvc0_mp_pm_query_begin(struct nvc0_context *, struct nvc0_query *);
+static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
+static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
+ struct nvc0_query *, void *, boolean);
+
+static INLINE struct nvc0_query *
+nvc0_query(struct pipe_query *pipe)
+{
+ return (struct nvc0_query *)pipe;
+}
+
+static boolean
+nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ int ret;
+
+ if (q->bo) {
+ nouveau_bo_ref(NULL, &q->bo);
+ if (q->u.mm) {
+ if (q->state == NVC0_QUERY_STATE_READY)
+ nouveau_mm_free(q->u.mm);
+ else
+ nouveau_fence_work(screen->base.fence.current,
+ nouveau_mm_free_work, q->u.mm);
+ }
+ }
+ if (size) {
+ q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
+ if (!q->bo)
+ return FALSE;
+ q->offset = q->base;
+
+ ret = nouveau_bo_map(q->bo, 0, screen->base.client);
+ if (ret) {
+ nvc0_query_allocate(nvc0, q, 0);
+ return FALSE;
+ }
+ q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
+ }
+ return TRUE;
+}
+
+static void
+nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
+ nouveau_fence_ref(NULL, &nvc0_query(pq)->fence);
+ FREE(nvc0_query(pq));
+}
+
+static struct pipe_query *
+nvc0_query_create(struct pipe_context *pipe, unsigned type)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_query *q;
+ unsigned space = NVC0_QUERY_ALLOC_SPACE;
+
+ q = CALLOC_STRUCT(nvc0_query);
+ if (!q)
+ return NULL;
+
+ switch (type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ q->rotate = 32;
+ space = NVC0_QUERY_ALLOC_SPACE;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ q->is64bit = TRUE;
+ space = 512;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ q->is64bit = TRUE;
+ space = 64;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ q->is64bit = TRUE;
+ space = 32;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_GPU_FINISHED:
+ space = 32;
+ break;
+ case NVC0_QUERY_TFB_BUFFER_OFFSET:
+ space = 16;
+ break;
+ default:
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) {
+ space = 0;
+ q->is64bit = true;
+ q->index = type - NVC0_QUERY_DRV_STAT(0);
+ break;
+ } else
+#endif
+ if (nvc0->screen->base.device->drm_version >= 0x01000101) {
+ if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
+ /* for each MP:
+ * [00] = WS0.C0
+ * [04] = WS0.C1
+ * [08] = WS0.C2
+ * [0c] = WS0.C3
+ * [10] = WS1.C0
+ * [14] = WS1.C1
+ * [18] = WS1.C2
+ * [1c] = WS1.C3
+ * [20] = WS2.C0
+ * [24] = WS2.C1
+ * [28] = WS2.C2
+ * [2c] = WS2.C3
+ * [30] = WS3.C0
+ * [34] = WS3.C1
+ * [38] = WS3.C2
+ * [3c] = WS3.C3
+ * [40] = MP.C4
+ * [44] = MP.C5
+ * [48] = MP.C6
+ * [4c] = MP.C7
+ * [50] = WS0.sequence
+ * [54] = WS1.sequence
+ * [58] = WS2.sequence
+ * [5c] = WS3.sequence
+ */
+ space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
+ break;
+ } else
+ if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
+ /* for each MP:
+ * [00] = MP.C0
+ * [04] = MP.C1
+ * [08] = MP.C2
+ * [0c] = MP.C3
+ * [10] = MP.C4
+ * [14] = MP.C5
+ * [18] = MP.C6
+ * [1c] = MP.C7
+ * [20] = MP.sequence
+ */
+ space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
+ break;
+ }
+ }
+ debug_printf("invalid query type: %u\n", type);
+ FREE(q);
+ return NULL;
+ }
+ if (!nvc0_query_allocate(nvc0, q, space)) {
+ FREE(q);
+ return NULL;
+ }
+
+ q->type = type;
+
+ if (q->rotate) {
+ /* we advance before query_begin ! */
+ q->offset -= q->rotate;
+ q->data -= q->rotate / sizeof(*q->data);
+ } else
+ if (!q->is64bit)
+ q->data[0] = 0; /* initialize sequence */
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
+ unsigned offset, uint32_t get)
+{
+ offset += q->offset;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, q->bo->offset + offset);
+ PUSH_DATA (push, q->bo->offset + offset);
+ PUSH_DATA (push, q->sequence);
+ PUSH_DATA (push, get);
+}
+
+static void
+nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ q->offset += q->rotate;
+ q->data += q->rotate / sizeof(*q->data);
+ if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
+ nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+}
+
+static void
+nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_query *q = nvc0_query(pq);
+
+ /* For occlusion queries we have to change the storage, because a previous
+ * query might set the initial render conition to FALSE even *after* we re-
+ * initialized it to TRUE.
+ */
+ if (q->rotate) {
+ nvc0_query_rotate(nvc0, q);
+
+ /* XXX: can we do this with the GPU, and sync with respect to a previous
+ * query ?
+ */
+ q->data[0] = q->sequence; /* initialize sequence */
+ q->data[1] = 1; /* initial render condition = TRUE */
+ q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
+ q->data[5] = 0;
+ }
+ q->sequence++;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ q->nesting = nvc0->screen->num_occlusion_queries_active++;
+ if (q->nesting) {
+ nvc0_query_get(push, q, 0x10, 0x0100f002);
+ } else {
+ PUSH_SPACE(push, 3);
+ BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
+ PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
+ IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
+ nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_query_get(push, q, 0x10, 0x00005002);
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
+ nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
+ nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
+ nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
+ nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
+ nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
+ nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+ nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
+ nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
+ nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
+ break;
+ default:
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
+ q->type <= NVC0_QUERY_DRV_STAT_LAST) {
+ if (q->index >= 5)
+ q->u.value = nvc0->screen->base.stats.v[q->index];
+ else
+ q->u.value = 0;
+ } else
+#endif
+ if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
+ (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
+ nvc0_mp_pm_query_begin(nvc0, q);
+ }
+ break;
+ }
+ q->state = NVC0_QUERY_STATE_ACTIVE;
+}
+
+static void
+nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_query *q = nvc0_query(pq);
+
+ if (q->state != NVC0_QUERY_STATE_ACTIVE) {
+ /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
+ if (q->rotate)
+ nvc0_query_rotate(nvc0, q);
+ q->sequence++;
+ }
+ q->state = NVC0_QUERY_STATE_ENDED;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ nvc0_query_get(push, q, 0, 0x0100f002);
+ if (--nvc0->screen->num_occlusion_queries_active == 0) {
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
+ nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ /* TODO: How do we sum over all streams for render condition ? */
+ /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
+ nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
+ nvc0_query_get(push, q, 0x20, 0x00005002);
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_query_get(push, q, 0, 0x00005002);
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ nvc0_query_get(push, q, 0, 0x1000f010);
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
+ nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
+ nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
+ nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
+ nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
+ nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
+ nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+ nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
+ nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
+ nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
+ break;
+ case NVC0_QUERY_TFB_BUFFER_OFFSET:
+ /* indexed by TFB buffer instead of by vertex stream */
+ nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
+ break;
+ default:
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
+ q->type <= NVC0_QUERY_DRV_STAT_LAST) {
+ q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value;
+ return;
+ } else
+#endif
+ if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
+ (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
+ nvc0_mp_pm_query_end(nvc0, q);
+ }
+ break;
+ }
+ if (q->is64bit)
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
+}
+
+static INLINE void
+nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
+{
+ if (q->is64bit) {
+ if (nouveau_fence_signalled(q->fence))
+ q->state = NVC0_QUERY_STATE_READY;
+ } else {
+ if (q->data[0] == q->sequence)
+ q->state = NVC0_QUERY_STATE_READY;
+ }
+}
+
+static boolean
+nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, union pipe_query_result *result)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_query *q = nvc0_query(pq);
+ uint64_t *res64 = (uint64_t*)result;
+ uint32_t *res32 = (uint32_t*)result;
+ boolean *res8 = (boolean*)result;
+ uint64_t *data64 = (uint64_t *)q->data;
+ unsigned i;
+
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
+ q->type <= NVC0_QUERY_DRV_STAT_LAST) {
+ res64[0] = q->u.value;
+ return TRUE;
+ } else
+#endif
+ if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
+ (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
+ return nvc0_mp_pm_query_result(nvc0, q, result, wait);
+ }
+
+ if (q->state != NVC0_QUERY_STATE_READY)
+ nvc0_query_update(nvc0->screen->base.client, q);
+
+ if (q->state != NVC0_QUERY_STATE_READY) {
+ if (!wait) {
+ if (q->state != NVC0_QUERY_STATE_FLUSHED) {
+ q->state = NVC0_QUERY_STATE_FLUSHED;
+ /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
+ PUSH_KICK(nvc0->base.pushbuf);
+ }
+ return FALSE;
+ }
+ if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
+ return FALSE;
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
+ }
+ q->state = NVC0_QUERY_STATE_READY;
+
+ switch (q->type) {
+ case PIPE_QUERY_GPU_FINISHED:
+ res8[0] = TRUE;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
+ res64[0] = q->data[1] - q->data[5];
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ res8[0] = q->data[1] != q->data[5];
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
+ res64[0] = data64[0] - data64[2];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ res64[0] = data64[0] - data64[4];
+ res64[1] = data64[2] - data64[6];
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ res8[0] = data64[0] != data64[2];
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ res64[0] = data64[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ res64[0] = 1000000000;
+ res8[8] = FALSE;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ res64[0] = data64[1] - data64[3];
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ for (i = 0; i < 10; ++i)
+ res64[i] = data64[i * 2] - data64[24 + i * 2];
+ break;
+ case NVC0_QUERY_TFB_BUFFER_OFFSET:
+ res32[0] = q->data[1];
+ break;
+ default:
+ assert(0); /* can't happen, we don't create queries with invalid type */
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void
+nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+ unsigned offset = q->offset;
+
+ if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, q->bo->offset + offset);
+ PUSH_DATA (push, q->bo->offset + offset);
+ PUSH_DATA (push, q->sequence);
+ PUSH_DATA (push, (1 << 12) |
+ NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+}
+
+static void
+nvc0_render_condition(struct pipe_context *pipe,
+ struct pipe_query *pq,
+ boolean condition, uint mode)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_query *q;
+ uint32_t cond;
+ boolean negated = FALSE;
+ boolean wait =
+ mode != PIPE_RENDER_COND_NO_WAIT &&
+ mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
+
+ nvc0->cond_query = pq;
+ nvc0->cond_cond = condition;
+ nvc0->cond_mode = mode;
+
+ if (!pq) {
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+ return;
+ }
+ q = nvc0_query(pq);
+
+ /* NOTE: comparison of 2 queries only works if both have completed */
+ switch (q->type) {
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ cond = negated ? NVC0_3D_COND_MODE_EQUAL :
+ NVC0_3D_COND_MODE_NOT_EQUAL;
+ wait = TRUE;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ if (likely(!negated)) {
+ if (unlikely(q->nesting))
+ cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
+ NVC0_3D_COND_MODE_ALWAYS;
+ else
+ cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
+ } else {
+ cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
+ }
+ break;
+ default:
+ assert(!"render condition query not a predicate");
+ mode = NVC0_3D_COND_MODE_ALWAYS;
+ break;
+ }
+
+ if (wait)
+ nvc0_query_fifo_wait(push, pq);
+
+ PUSH_SPACE(push, 4);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, q->bo->offset + q->offset);
+ PUSH_DATA (push, q->bo->offset + q->offset);
+ PUSH_DATA (push, cond);
+}
+
+void
+nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
+ struct pipe_query *pq, unsigned result_offset)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+
+#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
+
+ nouveau_pushbuf_space(push, 0, 0, 1);
+ nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
+ NVC0_IB_ENTRY_1_NO_PREFETCH);
+}
+
+void
+nvc0_so_target_save_offset(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg,
+ unsigned index, boolean *serialize)
+{
+ struct nvc0_so_target *targ = nvc0_so_target(ptarg);
+
+ if (*serialize) {
+ *serialize = FALSE;
+ PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
+ IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
+
+ NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1);
+ }
+
+ nvc0_query(targ->pq)->index = index;
+
+ nvc0_query_end(pipe, targ->pq);
+}
+
+
+/* === DRIVER STATISTICS === */
+
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+
+static const char *nvc0_drv_stat_names[] =
+{
+ "drv-tex_obj_current_count",
+ "drv-tex_obj_current_bytes",
+ "drv-buf_obj_current_count",
+ "drv-buf_obj_current_bytes_vid",
+ "drv-buf_obj_current_bytes_sys",
+ "drv-tex_transfers_rd",
+ "drv-tex_transfers_wr",
+ "drv-tex_copy_count",
+ "drv-tex_blit_count",
+ "drv-tex_cache_flush_count",
+ "drv-buf_transfers_rd",
+ "drv-buf_transfers_wr",
+ "drv-buf_read_bytes_staging_vid",
+ "drv-buf_write_bytes_direct",
+ "drv-buf_write_bytes_staging_vid",
+ "drv-buf_write_bytes_staging_sys",
+ "drv-buf_copy_bytes",
+ "drv-buf_non_kernel_fence_sync_count",
+ "drv-any_non_kernel_fence_sync_count",
+ "drv-query_sync_count",
+ "drv-gpu_serialize_count",
+ "drv-draw_calls_array",
+ "drv-draw_calls_indexed",
+ "drv-draw_calls_fallback_count",
+ "drv-user_buffer_upload_bytes",
+ "drv-constbuf_upload_count",
+ "drv-constbuf_upload_bytes",
+ "drv-pushbuf_count",
+ "drv-resource_validate_count"
+};
+
+#endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
+
+
+/* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
+
+/* Code to read out MP counters: They are accessible via mmio, too, but let's
+ * just avoid mapping registers in userspace. We'd have to know which MPs are
+ * enabled/present, too, and that information is not presently exposed.
+ * We could add a kernel interface for it, but reading the counters like this
+ * has the advantage of being async (if get_result isn't called immediately).
+ */
+static const uint64_t nve4_read_mp_pm_counters_code[] =
+{
+ /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
+ * mov b32 $r8 $tidx
+ * mov b32 $r12 $physid
+ * mov b32 $r0 $pm0
+ * mov b32 $r1 $pm1
+ * mov b32 $r2 $pm2
+ * mov b32 $r3 $pm3
+ * mov b32 $r4 $pm4
+ * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
+ * mov b32 $r5 $pm5
+ * mov b32 $r6 $pm6
+ * mov b32 $r7 $pm7
+ * set $p0 0x1 eq u32 $r8 0x0
+ * mov b32 $r10 c0[0x0]
+ * ext u32 $r8 $r12 0x414
+ * mov b32 $r11 c0[0x4]
+ * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
+ * ext u32 $r9 $r12 0x208
+ * (not $p0) exit
+ * set $p1 0x1 eq u32 $r9 0x0
+ * mul $r8 u32 $r8 u32 96
+ * mul $r12 u32 $r9 u32 16
+ * mul $r13 u32 $r9 u32 4
+ * add b32 $r9 $r8 $r13
+ * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
+ * add b32 $r8 $r8 $r12
+ * mov b32 $r12 $r10
+ * add b32 $r10 $c $r10 $r8
+ * mov b32 $r13 $r11
+ * add b32 $r11 $r11 0x0 $c
+ * add b32 $r12 $c $r12 $r9
+ * st b128 wt g[$r10d] $r0q
+ * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
+ * mov b32 $r0 c0[0x8]
+ * add b32 $r13 $r13 0x0 $c
+ * $p1 st b128 wt g[$r12d+0x40] $r4q
+ * st b32 wt g[$r12d+0x50] $r0
+ * exit */
+ 0x2202020202020207ULL,
+ 0x2c00000084021c04ULL,
+ 0x2c0000000c031c04ULL,
+ 0x2c00000010001c04ULL,
+ 0x2c00000014005c04ULL,
+ 0x2c00000018009c04ULL,
+ 0x2c0000001c00dc04ULL,
+ 0x2c00000020011c04ULL,
+ 0x22b0420042320207ULL,
+ 0x2c00000024015c04ULL,
+ 0x2c00000028019c04ULL,
+ 0x2c0000002c01dc04ULL,
+ 0x190e0000fc81dc03ULL,
+ 0x2800400000029de4ULL,
+ 0x7000c01050c21c03ULL,
+ 0x280040001002dde4ULL,
+ 0x204282020042e047ULL,
+ 0x7000c00820c25c03ULL,
+ 0x80000000000021e7ULL,
+ 0x190e0000fc93dc03ULL,
+ 0x1000000180821c02ULL,
+ 0x1000000040931c02ULL,
+ 0x1000000010935c02ULL,
+ 0x4800000034825c03ULL,
+ 0x22c042c042c04287ULL,
+ 0x4800000030821c03ULL,
+ 0x2800000028031de4ULL,
+ 0x4801000020a29c03ULL,
+ 0x280000002c035de4ULL,
+ 0x0800000000b2dc42ULL,
+ 0x4801000024c31c03ULL,
+ 0x9400000000a01fc5ULL,
+ 0x200002e04202c047ULL,
+ 0x2800400020001de4ULL,
+ 0x0800000000d35c42ULL,
+ 0x9400000100c107c5ULL,
+ 0x9400000140c01f85ULL,
+ 0x8000000000001de7ULL
+};
+
+/* NOTE: intentionally using the same names as NV */
+static const char *nve4_pm_query_names[] =
+{
+ /* MP counters */
+ "prof_trigger_00",
+ "prof_trigger_01",
+ "prof_trigger_02",
+ "prof_trigger_03",
+ "prof_trigger_04",
+ "prof_trigger_05",
+ "prof_trigger_06",
+ "prof_trigger_07",
+ "warps_launched",
+ "threads_launched",
+ "sm_cta_launched",
+ "inst_issued1",
+ "inst_issued2",
+ "inst_executed",
+ "local_load",
+ "local_store",
+ "shared_load",
+ "shared_store",
+ "l1_local_load_hit",
+ "l1_local_load_miss",
+ "l1_local_store_hit",
+ "l1_local_store_miss",
+ "gld_request",
+ "gst_request",
+ "l1_global_load_hit",
+ "l1_global_load_miss",
+ "uncached_global_load_transaction",
+ "global_store_transaction",
+ "branch",
+ "divergent_branch",
+ "active_warps",
+ "active_cycles",
+ "inst_issued",
+ "atom_count",
+ "gred_count",
+ "shared_load_replay",
+ "shared_store_replay",
+ "local_load_transactions",
+ "local_store_transactions",
+ "l1_shared_load_transactions",
+ "l1_shared_store_transactions",
+ "global_ld_mem_divergence_replays",
+ "global_st_mem_divergence_replays",
+ /* metrics, i.e. functions of the MP counters */
+ "metric-ipc", /* inst_executed, clock */
+ "metric-ipac", /* inst_executed, active_cycles */
+ "metric-ipec", /* inst_executed, (bool)inst_executed */
+ "metric-achieved_occupancy", /* active_warps, active_cycles */
+ "metric-sm_efficiency", /* active_cycles, clock */
+ "metric-inst_replay_overhead" /* inst_issued, inst_executed */
+};
+
+/* For simplicity, we will allocate as many group slots as we allocate counter
+ * slots. This means that a single counter which wants to source from 2 groups
+ * will have to be declared as using 2 counter slots. This shouldn't really be
+ * a problem because such queries don't make much sense ... (unless someone is
+ * really creative).
+ */
+struct nvc0_mp_counter_cfg
+{
+ uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
+ uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
+ uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */
+ uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
+ uint32_t sig_sel : 8; /* signal group */
+ uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */
+};
+
+#define NVC0_COUNTER_OPn_SUM 0
+#define NVC0_COUNTER_OPn_OR 1
+#define NVC0_COUNTER_OPn_AND 2
+#define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
+#define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
+#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
+#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
+
+struct nvc0_mp_pm_query_cfg
+{
+ struct nvc0_mp_counter_cfg ctr[4];
+ uint8_t num_counters;
+ uint8_t op;
+ uint8_t norm[2]; /* normalization num,denom */
+};
+
+#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
+#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
+#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
+ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
+ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
+ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
+#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
+ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
+ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
+ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
+#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
+ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
+ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
+ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
+
+/* NOTES:
+ * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
+ * inst_executed etc.: we only count a single warp scheduler
+ * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
+ * this is inaccurate !
+ */
+static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
+{
+ _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
+ _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
+ _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
+ _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
+ _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
+ _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
+ _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
+ _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
+ _Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
+ _Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
+ _Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1),
+ _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
+ _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
+ _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
+ _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
+ _Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1),
+ _Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1),
+ _Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1),
+ _Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1),
+ _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
+ _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
+ _Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
+ _Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
+ _Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
+ _Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
+ _Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
+ _Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
+ _Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1),
+ _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
+ _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
+ _Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
+ _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
+ _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
+ _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
+ _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
+ _Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
+ _Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
+ _Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
+ _Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
+ _Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
+ _Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
+ _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
+ _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
+ _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
+ _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
+ _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
+ _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
+ _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
+ _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
+};
+
+#undef _Q1A
+#undef _Q1B
+#undef _M2A
+#undef _M2B
+
+/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
+static const uint64_t nvc0_read_mp_pm_counters_code[] =
+{
+ /* mov b32 $r8 $tidx
+ * mov b32 $r9 $physid
+ * mov b32 $r0 $pm0
+ * mov b32 $r1 $pm1
+ * mov b32 $r2 $pm2
+ * mov b32 $r3 $pm3
+ * mov b32 $r4 $pm4
+ * mov b32 $r5 $pm5
+ * mov b32 $r6 $pm6
+ * mov b32 $r7 $pm7
+ * set $p0 0x1 eq u32 $r8 0x0
+ * mov b32 $r10 c0[0x0]
+ * mov b32 $r11 c0[0x4]
+ * ext u32 $r8 $r9 0x414
+ * (not $p0) exit
+ * mul $r8 u32 $r8 u32 36
+ * add b32 $r10 $c $r10 $r8
+ * add b32 $r11 $r11 0x0 $c
+ * mov b32 $r8 c0[0x8]
+ * st b128 wt g[$r10d+0x00] $r0q
+ * st b128 wt g[$r10d+0x10] $r4q
+ * st b32 wt g[$r10d+0x20] $r8
+ * exit */
+ 0x2c00000084021c04ULL,
+ 0x2c0000000c025c04ULL,
+ 0x2c00000010001c04ULL,
+ 0x2c00000014005c04ULL,
+ 0x2c00000018009c04ULL,
+ 0x2c0000001c00dc04ULL,
+ 0x2c00000020011c04ULL,
+ 0x2c00000024015c04ULL,
+ 0x2c00000028019c04ULL,
+ 0x2c0000002c01dc04ULL,
+ 0x190e0000fc81dc03ULL,
+ 0x2800400000029de4ULL,
+ 0x280040001002dde4ULL,
+ 0x7000c01050921c03ULL,
+ 0x80000000000021e7ULL,
+ 0x1000000090821c02ULL,
+ 0x4801000020a29c03ULL,
+ 0x0800000000b2dc42ULL,
+ 0x2800400020021de4ULL,
+ 0x9400000000a01fc5ULL,
+ 0x9400000040a11fc5ULL,
+ 0x9400000080a21f85ULL,
+ 0x8000000000001de7ULL
+};
+
+static const char *nvc0_pm_query_names[] =
+{
+ /* MP counters */
+ "inst_executed",
+ "branch",
+ "divergent_branch",
+ "active_warps",
+ "active_cycles",
+ "warps_launched",
+ "threads_launched",
+ "shared_load",
+ "shared_store",
+ "local_load",
+ "local_store",
+ "gred_count",
+ "atom_count",
+ "gld_request",
+ "gst_request",
+ "inst_issued1_0",
+ "inst_issued1_1",
+ "inst_issued2_0",
+ "inst_issued2_1",
+ "thread_inst_executed_0",
+ "thread_inst_executed_1",
+ "thread_inst_executed_2",
+ "thread_inst_executed_3",
+ "prof_trigger_00",
+ "prof_trigger_01",
+ "prof_trigger_02",
+ "prof_trigger_03",
+ "prof_trigger_04",
+ "prof_trigger_05",
+ "prof_trigger_06",
+ "prof_trigger_07",
+};
+
+#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
+
+static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
+{
+ _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
+ _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
+ _Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
+ _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
+ _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
+ _Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+ _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+ _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+ _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+ _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
+};
+
+#undef _Q
+
+static const struct nvc0_mp_pm_query_cfg *
+nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+
+ if (screen->base.class_3d >= NVE4_3D_CLASS)
+ return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
+ return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
+}
+
+void
+nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const struct nvc0_mp_pm_query_cfg *cfg;
+ unsigned i, c;
+ unsigned num_ab[2] = { 0, 0 };
+
+ cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+
+ /* check if we have enough free counter slots */
+ for (i = 0; i < cfg->num_counters; ++i)
+ num_ab[cfg->ctr[i].sig_dom]++;
+
+ if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
+ screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
+ NOUVEAU_ERR("Not enough free MP counter slots !\n");
+ return;
+ }
+
+ assert(cfg->num_counters <= 4);
+ PUSH_SPACE(push, 4 * 8 + 6);
+
+ if (!screen->pm.mp_counters_enabled) {
+ screen->pm.mp_counters_enabled = TRUE;
+ BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
+ PUSH_DATA (push, 0x1fcb);
+ }
+
+ /* set sequence field to 0 (used to check if result is available) */
+ for (i = 0; i < screen->mp_count; ++i)
+ q->data[i * 10 + 10] = 0;
+
+ for (i = 0; i < cfg->num_counters; ++i) {
+ const unsigned d = cfg->ctr[i].sig_dom;
+
+ if (!screen->pm.num_mp_pm_active[d]) {
+ uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
+ if (screen->pm.num_mp_pm_active[!d])
+ m |= 1 << (7 + (8 * d));
+ BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
+ PUSH_DATA (push, m);
+ }
+ screen->pm.num_mp_pm_active[d]++;
+
+ for (c = d * 4; c < (d * 4 + 4); ++c) {
+ if (!screen->pm.mp_counter[c]) {
+ q->ctr[i] = c;
+ screen->pm.mp_counter[c] = (struct pipe_query *)q;
+ break;
+ }
+ }
+ assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
+
+ /* configure and reset the counter(s) */
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ if (d == 0)
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
+ else
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
+ PUSH_DATA (push, cfg->ctr[i].sig_sel);
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
+ PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
+ PUSH_DATA (push, 0);
+ } else {
+ unsigned s;
+
+ for (s = 0; s < cfg->ctr[i].num_src; s++) {
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1);
+ PUSH_DATA (push, cfg->ctr[i].sig_sel);
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff);
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1);
+ PUSH_DATA (push, 0);
+ }
+ }
+ }
+}
+
+static void
+nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
+ uint32_t mask;
+ uint32_t input[3];
+ const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
+ const uint grid[3] = { screen->mp_count, 1, 1 };
+ unsigned c;
+ const struct nvc0_mp_pm_query_cfg *cfg;
+
+ cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+
+ if (unlikely(!screen->pm.prog)) {
+ struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
+ prog->type = PIPE_SHADER_COMPUTE;
+ prog->translated = TRUE;
+ prog->num_gprs = 14;
+ prog->parm_size = 12;
+ if (is_nve4) {
+ prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
+ prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
+ } else {
+ prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
+ prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
+ }
+ screen->pm.prog = prog;
+ }
+
+ /* disable all counting */
+ PUSH_SPACE(push, 8);
+ for (c = 0; c < 8; ++c)
+ if (screen->pm.mp_counter[c]) {
+ if (is_nve4) {
+ IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
+ } else {
+ IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
+ }
+ }
+ /* release counters for this query */
+ for (c = 0; c < 8; ++c) {
+ if (nvc0_query(screen->pm.mp_counter[c]) == q) {
+ screen->pm.num_mp_pm_active[c / 4]--;
+ screen->pm.mp_counter[c] = NULL;
+ }
+ }
+
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
+ q->bo);
+
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
+
+ pipe->bind_compute_state(pipe, screen->pm.prog);
+ input[0] = (q->bo->offset + q->base);
+ input[1] = (q->bo->offset + q->base) >> 32;
+ input[2] = q->sequence;
+ pipe->launch_grid(pipe, block, grid, 0, input);
+
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
+
+ /* re-activate other counters */
+ PUSH_SPACE(push, 16);
+ mask = 0;
+ for (c = 0; c < 8; ++c) {
+ unsigned i;
+ q = nvc0_query(screen->pm.mp_counter[c]);
+ if (!q)
+ continue;
+ cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+ for (i = 0; i < cfg->num_counters; ++i) {
+ if (mask & (1 << q->ctr[i]))
+ break;
+ mask |= 1 << q->ctr[i];
+ if (is_nve4) {
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
+ } else {
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1);
+ }
+ PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
+ }
+ }
+}
+
+static INLINE boolean
+nvc0_mp_pm_query_read_data(uint32_t count[32][4],
+ struct nvc0_context *nvc0, boolean wait,
+ struct nvc0_query *q,
+ const struct nvc0_mp_pm_query_cfg *cfg,
+ unsigned mp_count)
+{
+ unsigned p, c;
+
+ for (p = 0; p < mp_count; ++p) {
+ const unsigned b = (0x24 / 4) * p;
+
+ for (c = 0; c < cfg->num_counters; ++c) {
+ if (q->data[b + 8] != q->sequence) {
+ if (!wait)
+ return FALSE;
+ if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
+ return FALSE;
+ }
+ count[p][c] = q->data[b + q->ctr[c]];
+ }
+ }
+ return TRUE;
+}
+
+static INLINE boolean
+nve4_mp_pm_query_read_data(uint32_t count[32][4],
+ struct nvc0_context *nvc0, boolean wait,
+ struct nvc0_query *q,
+ const struct nvc0_mp_pm_query_cfg *cfg,
+ unsigned mp_count)
+{
+ unsigned p, c, d;
+
+ for (p = 0; p < mp_count; ++p) {
+ const unsigned b = (0x60 / 4) * p;
+
+ for (c = 0; c < cfg->num_counters; ++c) {
+ count[p][c] = 0;
+ for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
+ if (q->data[b + 20 + d] != q->sequence) {
+ if (!wait)
+ return FALSE;
+ if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
+ return FALSE;
+ }
+ if (q->ctr[c] & ~0x3)
+ count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
+ else
+ count[p][c] += q->data[b + d * 4 + q->ctr[c]];
+ }
+ }
+ }
+ return TRUE;
+}
+
+/* Metric calculations:
+ * sum(x) ... sum of x over all MPs
+ * avg(x) ... average of x over all MPs
+ *
+ * IPC : sum(inst_executed) / clock
+ * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
+ * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
+ * MP_EFFICIENCY : avg(active_cycles / clock)
+ *
+ * NOTE: Interpretation of IPC requires knowledge of MP count.
+ */
+static boolean
+nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
+ void *result, boolean wait)
+{
+ uint32_t count[32][4];
+ uint64_t value = 0;
+ unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
+ unsigned p, c;
+ const struct nvc0_mp_pm_query_cfg *cfg;
+ boolean ret;
+
+ cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+ ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
+ else
+ ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
+ if (!ret)
+ return FALSE;
+
+ if (cfg->op == NVC0_COUNTER_OPn_SUM) {
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ value += count[p][c];
+ value = (value * cfg->norm[0]) / cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OPn_OR) {
+ uint32_t v = 0;
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ v |= count[p][c];
+ value = (v * cfg->norm[0]) / cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OPn_AND) {
+ uint32_t v = ~0;
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ v &= count[p][c];
+ value = (v * cfg->norm[0]) / cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
+ uint64_t v[2] = { 0, 0 };
+ for (p = 0; p < mp_count; ++p) {
+ v[0] += count[p][0];
+ v[1] += count[p][1];
+ }
+ if (v[0])
+ value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
+ for (p = 0; p < mp_count; ++p)
+ value += count[p][0];
+ if (count[0][1])
+ value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
+ else
+ value = 0;
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
+ unsigned mp_used = 0;
+ for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
+ if (count[p][1])
+ value += (count[p][0] * cfg->norm[0]) / count[p][1];
+ if (mp_used)
+ value /= mp_used * cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
+ unsigned mp_used = 0;
+ for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
+ value += count[p][0];
+ if (count[0][1] && mp_used) {
+ value *= cfg->norm[0];
+ value /= count[0][1] * mp_used * cfg->norm[1];
+ } else {
+ value = 0;
+ }
+ }
+
+ *(uint64_t *)result = value;
+ return TRUE;
+}
+
+int
+nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
+ unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ int count = 0;
+
+ count += NVC0_QUERY_DRV_STAT_COUNT;
+
+ if (screen->base.device->drm_version >= 0x01000101) {
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ count += NVE4_PM_QUERY_COUNT;
+ } else
+ if (screen->compute) {
+ count += NVC0_PM_QUERY_COUNT; /* NVC0_COMPUTE is not always enabled */
+ }
+ }
+
+ if (!info)
+ return count;
+
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (id < NVC0_QUERY_DRV_STAT_COUNT) {
+ info->name = nvc0_drv_stat_names[id];
+ info->query_type = NVC0_QUERY_DRV_STAT(id);
+ info->max_value = ~0ULL;
+ info->uses_byte_units = !!strstr(info->name, "bytes");
+ return 1;
+ } else
+#endif
+ if (id < count) {
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
+ info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
+ info->max_value = (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ?
+ ~0ULL : 100;
+ info->uses_byte_units = FALSE;
+ return 1;
+ } else
+ if (screen->compute) {
+ info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
+ info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
+ info->max_value = ~0ULL;
+ info->uses_byte_units = FALSE;
+ return 1;
+ }
+ }
+ /* user asked for info about non-existing query */
+ info->name = "this_is_not_the_query_you_are_looking_for";
+ info->query_type = 0xdeadd01d;
+ info->max_value = 0;
+ info->uses_byte_units = FALSE;
+ return 0;
+}
+
+void
+nvc0_init_query_functions(struct nvc0_context *nvc0)
+{
+ struct pipe_context *pipe = &nvc0->base.pipe;
+
+ pipe->create_query = nvc0_query_create;
+ pipe->destroy_query = nvc0_query_destroy;
+ pipe->begin_query = nvc0_query_begin;
+ pipe->end_query = nvc0_query_end;
+ pipe->get_query_result = nvc0_query_result;
+ pipe->render_condition = nvc0_render_condition;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
new file mode 100644
index 0000000..4e70903
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
@@ -0,0 +1,62 @@
+
+#include "pipe/p_context.h"
+#include "nvc0/nvc0_resource.h"
+#include "nouveau_screen.h"
+
+
+static struct pipe_resource *
+nvc0_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ switch (templ->target) {
+ case PIPE_BUFFER:
+ return nouveau_buffer_create(screen, templ);
+ default:
+ return nvc0_miptree_create(screen, templ);
+ }
+}
+
+static struct pipe_resource *
+nvc0_resource_from_handle(struct pipe_screen * screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ if (templ->target == PIPE_BUFFER) {
+ return NULL;
+ } else {
+ struct pipe_resource *res = nv50_miptree_from_handle(screen,
+ templ, whandle);
+ nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
+ return res;
+ }
+}
+
+static struct pipe_surface *
+nvc0_surface_create(struct pipe_context *pipe,
+ struct pipe_resource *pres,
+ const struct pipe_surface *templ)
+{
+ if (unlikely(pres->target == PIPE_BUFFER))
+ return nv50_surface_from_buffer(pipe, pres, templ);
+ return nvc0_miptree_surface_new(pipe, pres, templ);
+}
+
+void
+nvc0_init_resource_functions(struct pipe_context *pcontext)
+{
+ pcontext->transfer_map = u_transfer_map_vtbl;
+ pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
+ pcontext->transfer_unmap = u_transfer_unmap_vtbl;
+ pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
+ pcontext->create_surface = nvc0_surface_create;
+ pcontext->surface_destroy = nv50_surface_destroy;
+}
+
+void
+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen)
+{
+ pscreen->resource_create = nvc0_resource_create;
+ pscreen->resource_from_handle = nvc0_resource_from_handle;
+ pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+ pscreen->resource_destroy = u_resource_destroy_vtbl;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h
new file mode 100644
index 0000000..0d5f026
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h
@@ -0,0 +1,58 @@
+
+#ifndef __NVC0_RESOURCE_H__
+#define __NVC0_RESOURCE_H__
+
+#include "nv50/nv50_resource.h"
+
+#define NVC0_RESOURCE_FLAG_VIDEO (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 0)
+
+
+#define NVC0_TILE_SHIFT_X(m) ((((m) >> 0) & 0xf) + 6)
+#define NVC0_TILE_SHIFT_Y(m) ((((m) >> 4) & 0xf) + 3)
+#define NVC0_TILE_SHIFT_Z(m) ((((m) >> 8) & 0xf) + 0)
+
+#define NVC0_TILE_SIZE_X(m) (64 << (((m) >> 0) & 0xf))
+#define NVC0_TILE_SIZE_Y(m) ( 8 << (((m) >> 4) & 0xf))
+#define NVC0_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf))
+
+/* it's ok to mask only in the end because max value is 3 * 5 */
+
+#define NVC0_TILE_SIZE_2D(m) ((64 * 8) << (((m) + ((m) >> 4)) & 0xf))
+
+#define NVC0_TILE_SIZE(m) ((64 * 8) << (((m) + ((m) >> 4) + ((m) >> 8)) & 0xf))
+
+
+void
+nvc0_init_resource_functions(struct pipe_context *pcontext);
+
+void
+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen);
+
+/* Internal functions:
+ */
+struct pipe_resource *
+nvc0_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmp);
+
+const struct u_resource_vtbl nvc0_miptree_vtbl;
+
+struct pipe_surface *
+nvc0_miptree_surface_new(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_surface *templ);
+
+unsigned
+nvc0_mt_zslice_offset(const struct nv50_miptree *, unsigned l, unsigned z);
+
+void *
+nvc0_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer);
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
new file mode 100644
index 0000000..ff7890b
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -0,0 +1,967 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "pipe/p_screen.h"
+
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+
+#include "nouveau_vp3_video.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_screen.h"
+
+#include "nvc0/nvc0_graph_macros.h"
+
+#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
+# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
+#endif
+
+static boolean
+nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bindings)
+{
+ if (sample_count > 8)
+ return FALSE;
+ if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
+ return FALSE;
+
+ if (!util_format_is_supported(format, bindings))
+ return FALSE;
+
+ if ((bindings & PIPE_BIND_SAMPLER_VIEW) && (target != PIPE_BUFFER))
+ if (util_format_get_blocksizebits(format) == 3 * 32)
+ return FALSE;
+
+ /* transfers & shared are always supported */
+ bindings &= ~(PIPE_BIND_TRANSFER_READ |
+ PIPE_BIND_TRANSFER_WRITE |
+ PIPE_BIND_SHARED);
+
+ return (nvc0_format_table[format].usage & bindings) == bindings;
+}
+
+static int
+nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
+ switch (param) {
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 16 * 5;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 15;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return (class_3d >= NVE4_3D_CLASS) ? 13 : 12;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return 2048;
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ return -8;
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ return 7;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return 65536;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_TGSI_TEXCOORD:
+ return 1;
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return 150;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ return 1;
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ return 1;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return 4;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return 128;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 0;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 0;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_TEXTURE_BARRIER:
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_START_INSTANCE:
+ return 1;
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ return 0; /* state trackers will know better */
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ case PIPE_CAP_USER_INDEX_BUFFERS:
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ return 1;
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 256;
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return 1; /* 256 for binding as RT, but that's not possible in GL */
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ return 0;
+ case PIPE_CAP_COMPUTE:
+ return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return 1;
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_LITTLE;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static int
+nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ /*
+ case PIPE_SHADER_TESSELLATION_CONTROL:
+ case PIPE_SHADER_TESSELLATION_EVALUATION:
+ */
+ case PIPE_SHADER_GEOMETRY:
+ case PIPE_SHADER_FRAGMENT:
+ break;
+ case PIPE_SHADER_COMPUTE:
+ if (class_3d < NVE4_3D_CLASS)
+ return 0;
+ break;
+ default:
+ return 0;
+ }
+
+ switch (param) {
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 16;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_VERTEX)
+ return 32;
+ /* NOTE: These only count our slots for GENERIC varyings.
+ * The address space may be larger, but the actual hard limit seems to be
+ * less than what the address space layout permits, so don't add TEXCOORD,
+ * COLOR, etc. here.
+ */
+ if (shader == PIPE_SHADER_FRAGMENT)
+ return 0x1f0 / 16;
+ /* Actually this counts CLIPVERTEX, which occupies the last generic slot,
+ * and excludes 0x60 per-patch inputs.
+ */
+ return 0x200 / 16;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return 65536 / 16;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS)
+ return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE;
+ return NVC0_MAX_PIPE_CONSTBUFS;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return 1;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ return shader != PIPE_SHADER_FRAGMENT;
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return NVC0_CAP_MAX_PROGRAM_TEMPS;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ return 0;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 1;
+ case PIPE_SHADER_CAP_INTEGERS:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ return 16; /* would be 32 in linked (OpenGL-style) mode */
+ /*
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLER_VIEWS:
+ return 32;
+ */
+ default:
+ NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ return 10.0f;
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ return 63.0f;
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 63.375f;
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 15.0f;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0.0f;
+ }
+}
+
+static int
+nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
+ enum pipe_compute_cap param, void *data)
+{
+ uint64_t *data64 = (uint64_t *)data;
+ const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ data64[0] = 3;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535;
+ data64[1] = 65535;
+ data64[2] = 65535;
+ return 24;
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ data64[0] = 1024;
+ data64[1] = 1024;
+ data64[2] = 64;
+ return 24;
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ data64[0] = 1024;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
+ data64[0] = (uint64_t)1 << 40;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+ data64[0] = 48 << 10;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+ data64[0] = 512 << 10;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+ data64[0] = 4096;
+ return 8;
+ default:
+ return 0;
+ }
+}
+
+static void
+nvc0_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+
+ if (screen->base.fence.current) {
+ nouveau_fence_wait(screen->base.fence.current);
+ nouveau_fence_ref(NULL, &screen->base.fence.current);
+ }
+ if (screen->base.pushbuf)
+ screen->base.pushbuf->user_priv = NULL;
+
+ if (screen->blitter)
+ nvc0_blitter_destroy(screen);
+ if (screen->pm.prog) {
+ screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
+ nvc0_program_destroy(NULL, screen->pm.prog);
+ }
+
+ nouveau_bo_ref(NULL, &screen->text);
+ nouveau_bo_ref(NULL, &screen->uniform_bo);
+ nouveau_bo_ref(NULL, &screen->tls);
+ nouveau_bo_ref(NULL, &screen->txc);
+ nouveau_bo_ref(NULL, &screen->fence.bo);
+ nouveau_bo_ref(NULL, &screen->poly_cache);
+ nouveau_bo_ref(NULL, &screen->parm);
+
+ nouveau_heap_destroy(&screen->lib_code);
+ nouveau_heap_destroy(&screen->text_heap);
+
+ FREE(screen->tic.entries);
+
+ nouveau_mm_destroy(screen->mm_VRAM_fe0);
+
+ nouveau_object_del(&screen->eng3d);
+ nouveau_object_del(&screen->eng2d);
+ nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->compute);
+
+ nouveau_screen_fini(&screen->base);
+
+ FREE(screen);
+}
+
+static int
+nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
+ unsigned size, const uint32_t *data)
+{
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+ size /= 4;
+
+ BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2);
+ PUSH_DATA (push, (m - 0x3800) / 8);
+ PUSH_DATA (push, pos);
+ BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
+ PUSH_DATA (push, pos);
+ PUSH_DATAp(push, data, size);
+
+ return pos + size;
+}
+
+static void
+nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
+{
+ BEGIN_NVC0(push, SUBC_3D(0x10cc), 1);
+ PUSH_DATA (push, 0xff);
+ BEGIN_NVC0(push, SUBC_3D(0x10e0), 2);
+ PUSH_DATA (push, 0xff);
+ PUSH_DATA (push, 0xff);
+ BEGIN_NVC0(push, SUBC_3D(0x10ec), 2);
+ PUSH_DATA (push, 0xff);
+ PUSH_DATA (push, 0xff);
+ BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
+ PUSH_DATA (push, 0x3f);
+
+ BEGIN_NVC0(push, SUBC_3D(0x16a8), 1);
+ PUSH_DATA (push, (3 << 16) | 3);
+ BEGIN_NVC0(push, SUBC_3D(0x1794), 1);
+ PUSH_DATA (push, (2 << 16) | 2);
+ BEGIN_NVC0(push, SUBC_3D(0x0de8), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NVC0(push, SUBC_3D(0x12ac), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_3D(0x0218), 1);
+ PUSH_DATA (push, 0x10);
+ BEGIN_NVC0(push, SUBC_3D(0x10fc), 1);
+ PUSH_DATA (push, 0x10);
+ BEGIN_NVC0(push, SUBC_3D(0x1290), 1);
+ PUSH_DATA (push, 0x10);
+ BEGIN_NVC0(push, SUBC_3D(0x12d8), 2);
+ PUSH_DATA (push, 0x10);
+ PUSH_DATA (push, 0x10);
+ BEGIN_NVC0(push, SUBC_3D(0x1140), 1);
+ PUSH_DATA (push, 0x10);
+ BEGIN_NVC0(push, SUBC_3D(0x1610), 1);
+ PUSH_DATA (push, 0xe);
+
+ BEGIN_NVC0(push, SUBC_3D(0x164c), 1);
+ PUSH_DATA (push, 1 << 12);
+ BEGIN_NVC0(push, SUBC_3D(0x030c), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_3D(0x0300), 1);
+ PUSH_DATA (push, 3);
+
+ BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
+ PUSH_DATA (push, 0x3fffff);
+ BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, SUBC_3D(0x19c0), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, SUBC_3D(0x075c), 1);
+ PUSH_DATA (push, 3);
+
+ if (obj_class >= NVE4_3D_CLASS) {
+ BEGIN_NVC0(push, SUBC_3D(0x07fc), 1);
+ PUSH_DATA (push, 1);
+ }
+
+ /* TODO: find out what software methods 0x1528, 0x1280 and (on nve4) 0x02dc
+ * are supposed to do */
+}
+
+static void
+nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+ /* we need to do it after possible flush in MARK_RING */
+ *sequence = ++screen->base.fence.sequence;
+
+ BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, screen->fence.bo->offset);
+ PUSH_DATA (push, screen->fence.bo->offset);
+ PUSH_DATA (push, *sequence);
+ PUSH_DATA (push, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT |
+ (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT));
+}
+
+static u32
+nvc0_screen_fence_update(struct pipe_screen *pscreen)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ return screen->fence.map[0];
+}
+
+static int
+nvc0_screen_init_compute(struct nvc0_screen *screen)
+{
+ screen->base.base.get_compute_param = nvc0_screen_get_compute_param;
+
+ switch (screen->base.device->chipset & 0xf0) {
+ case 0xc0:
+ case 0xd0:
+ /* Using COMPUTE has weird effects on 3D state, we need to
+ * investigate this further before enabling it by default.
+ */
+ if (debug_get_bool_option("NVC0_COMPUTE", FALSE))
+ return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
+ return 0;
+ case 0xe0:
+ case 0xf0:
+ return nve4_screen_compute_setup(screen, screen->base.pushbuf);
+ default:
+ return -1;
+ }
+}
+
+boolean
+nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
+ uint32_t lpos, uint32_t lneg, uint32_t cstack)
+{
+ struct nouveau_bo *bo = NULL;
+ int ret;
+ uint64_t size = (lpos + lneg) * 32 + cstack;
+
+ if (size >= (1 << 20)) {
+ NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size);
+ return FALSE;
+ }
+
+ size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */
+ size = align(size, 0x8000);
+ size *= screen->mp_count;
+
+ size = align(size, 1 << 17);
+
+ ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size,
+ NULL, &bo);
+ if (ret) {
+ NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size);
+ return FALSE;
+ }
+ nouveau_bo_ref(NULL, &screen->tls);
+ screen->tls = bo;
+ return TRUE;
+}
+
+#define FAIL_SCREEN_INIT(str, err) \
+ do { \
+ NOUVEAU_ERR(str, err); \
+ nvc0_screen_destroy(pscreen); \
+ return NULL; \
+ } while(0)
+
+struct pipe_screen *
+nvc0_screen_create(struct nouveau_device *dev)
+{
+ struct nvc0_screen *screen;
+ struct pipe_screen *pscreen;
+ struct nouveau_object *chan;
+ struct nouveau_pushbuf *push;
+ uint64_t value;
+ uint32_t obj_class;
+ int ret;
+ unsigned i;
+ union nouveau_bo_config mm_config;
+
+ switch (dev->chipset & ~0xf) {
+ case 0xc0:
+ case 0xd0:
+ case 0xe0:
+ case 0xf0:
+ break;
+ default:
+ return NULL;
+ }
+
+ screen = CALLOC_STRUCT(nvc0_screen);
+ if (!screen)
+ return NULL;
+ pscreen = &screen->base.base;
+
+ ret = nouveau_screen_init(&screen->base, dev);
+ if (ret) {
+ nvc0_screen_destroy(pscreen);
+ return NULL;
+ }
+ chan = screen->base.channel;
+ push = screen->base.pushbuf;
+ push->user_priv = screen;
+ push->rsvd_kick = 5;
+
+ screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
+ screen->base.sysmem_bindings |=
+ PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
+
+ pscreen->destroy = nvc0_screen_destroy;
+ pscreen->context_create = nvc0_create;
+ pscreen->is_format_supported = nvc0_screen_is_format_supported;
+ pscreen->get_param = nvc0_screen_get_param;
+ pscreen->get_shader_param = nvc0_screen_get_shader_param;
+ pscreen->get_paramf = nvc0_screen_get_paramf;
+ pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info;
+
+ nvc0_screen_init_resource_functions(pscreen);
+
+ screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
+ screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, NULL,
+ &screen->fence.bo);
+ if (ret)
+ goto fail;
+ nouveau_bo_map(screen->fence.bo, 0, NULL);
+ screen->fence.map = screen->fence.bo->map;
+ screen->base.fence.emit = nvc0_screen_fence_emit;
+ screen->base.fence.update = nvc0_screen_fence_update;
+
+ switch (dev->chipset & 0xf0) {
+ case 0xf0:
+ obj_class = NVF0_P2MF_CLASS;
+ break;
+ case 0xe0:
+ obj_class = NVE4_P2MF_CLASS;
+ break;
+ default:
+ obj_class = NVC0_M2MF_CLASS;
+ break;
+ }
+ ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0,
+ &screen->m2mf);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
+
+ BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->m2mf->oclass);
+ if (screen->m2mf->oclass == NVE4_P2MF_CLASS) {
+ BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, 0xa0b5);
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0,
+ &screen->eng2d);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret);
+
+ BEGIN_NVC0(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->eng2d->oclass);
+ BEGIN_NVC0(push, NVC0_2D(SINGLE_GPC), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_2D(OPERATION), 1);
+ PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY);
+ BEGIN_NVC0(push, NVC0_2D(CLIP_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_2D(COLOR_KEY_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_2D(0x0884), 1);
+ PUSH_DATA (push, 0x3f);
+ BEGIN_NVC0(push, SUBC_2D(0x0888), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->fence.bo->offset + 16);
+ PUSH_DATA (push, screen->fence.bo->offset + 16);
+
+ switch (dev->chipset & 0xf0) {
+ case 0xf0:
+ obj_class = NVF0_3D_CLASS;
+ break;
+ case 0xe0:
+ obj_class = NVE4_3D_CLASS;
+ break;
+ case 0xd0:
+ case 0xc0:
+ default:
+ switch (dev->chipset) {
+ case 0xd9:
+ case 0xc8:
+ obj_class = NVC8_3D_CLASS;
+ break;
+ case 0xc1:
+ obj_class = NVC1_3D_CLASS;
+ break;
+ default:
+ obj_class = NVC0_3D_CLASS;
+ break;
+ }
+ break;
+ }
+ ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0,
+ &screen->eng3d);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
+ screen->base.class_3d = obj_class;
+
+ BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->eng3d->oclass);
+
+ BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1);
+ PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS);
+
+ if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+ /* kill shaders after about 1 second (at 100 MHz) */
+ BEGIN_NVC0(push, NVC0_3D(WATCHDOG_TIMER), 1);
+ PUSH_DATA (push, 0x17);
+ }
+
+ IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE), dev->drm_version >= 0x01000101);
+ BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8);
+ for (i = 0; i < 8; ++i)
+ PUSH_DATA(push, dev->drm_version >= 0x01000101);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NVC0(push, NVC0_3D(CSAA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 1);
+ PUSH_DATA (push, NVC0_3D_MULTISAMPLE_MODE_MS1);
+ BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_CTRL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(LINE_WIDTH_SEPARATE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(LINE_LAST_PIXEL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(BLEND_SEPARATE_ALPHA), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
+ PUSH_DATA (push, 0);
+ if (screen->eng3d->oclass < NVE4_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
+ PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ } else {
+ BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1);
+ PUSH_DATA (push, 15);
+ }
+ BEGIN_NVC0(push, NVC0_3D(CALL_LIMIT_LOG), 1);
+ PUSH_DATA (push, 8); /* 128 */
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_STATCTRS_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ if (screen->eng3d->oclass >= NVC1_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(CACHE_SPLIT), 1);
+ PUSH_DATA (push, NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1);
+ }
+
+ nvc0_magic_3d_init(push, screen->eng3d->oclass);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
+ &screen->text);
+ if (ret)
+ goto fail;
+
+ /* XXX: getting a page fault at the end of the code buffer every few
+ * launches, don't use the last 256 bytes to work around them - prefetch ?
+ */
+ nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL,
+ &screen->uniform_bo);
+ if (ret)
+ goto fail;
+
+ for (i = 0; i < 5; ++i) {
+ /* TIC and TSC entries for each unit (nve4+ only) */
+ /* auxiliary constants (6 user clip planes, base instance id) */
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
+ PUSH_DATA (push, (15 << 4) | 1);
+ if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
+ unsigned j;
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
+ PUSH_DATA (push, 0);
+ for (j = 0; j < 8; ++j)
+ PUSH_DATA(push, j);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
+ PUSH_DATA (push, 0x54);
+ }
+ }
+ BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
+ PUSH_DATA (push, 0);
+
+ /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 256);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
+ PUSH_DATA (push, 0);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+
+ if (dev->drm_version >= 0x01000101) {
+ ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
+ if (ret) {
+ NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n");
+ goto fail;
+ }
+ } else {
+ if (dev->chipset >= 0xe0 && dev->chipset < 0xf0)
+ value = (8 << 8) | 4;
+ else
+ value = (16 << 8) | 4;
+ }
+ screen->mp_count = value >> 8;
+ screen->mp_count_compute = screen->mp_count;
+
+ nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200);
+
+ BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+ BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->size >> 32);
+ PUSH_DATA (push, screen->tls->size);
+ BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
+ PUSH_DATA (push, 0);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
+ &screen->poly_cache);
+ if (ret)
+ goto fail;
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->poly_cache->offset);
+ PUSH_DATA (push, screen->poly_cache->offset);
+ PUSH_DATA (push, 3);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL,
+ &screen->txc);
+ if (ret)
+ goto fail;
+
+ BEGIN_NVC0(push, NVC0_3D(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_NVC0(push, NVC0_3D(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_Y_CONTROL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(WINDOW_OFFSET_X), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1); /* deactivate ZCULL */
+ PUSH_DATA (push, 0x3f);
+
+ BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), 1);
+ PUSH_DATA (push, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY);
+ BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
+ for (i = 0; i < 8 * 2; ++i)
+ PUSH_DATA(push, 0);
+ BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_EN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(CLIPID_ENABLE), 1);
+ PUSH_DATA (push, 0);
+
+ /* neither scissors, viewport nor stencil mask should affect clears */
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_FLAGS), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(0)), 2);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 1.0f);
+ BEGIN_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+ PUSH_DATA (push, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1);
+
+ /* We use scissors instead of exact view volume clipping,
+ * so they're always enabled.
+ */
+ BEGIN_NVC0(push, NVC0_3D(SCISSOR_ENABLE(0)), 3);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 8192 << 16);
+ PUSH_DATA (push, 8192 << 16);
+
+#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
+
+ i = 0;
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, nvc0_9097_per_instance_bf);
+ MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, nvc0_9097_blend_enables);
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select);
+ MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, nvc0_9097_tep_select);
+ MK_MACRO(NVC0_3D_MACRO_GP_SELECT, nvc0_9097_gp_select);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back);
+
+ BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(RT_SEPARATE_FRAG_DATA), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
+ PUSH_DATA (push, 0x40);
+ BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
+ PUSH_DATA (push, 0x30);
+ BEGIN_NVC0(push, NVC0_3D(PATCH_VERTICES), 1);
+ PUSH_DATA (push, 3);
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
+ PUSH_DATA (push, 0x20);
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(0)), 1);
+ PUSH_DATA (push, 0x00);
+
+ BEGIN_NVC0(push, NVC0_3D(POINT_COORD_REPLACE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(POINT_RASTER_RULES), 1);
+ PUSH_DATA (push, NVC0_3D_POINT_RASTER_RULES_OGL);
+
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), 1);
+
+ if (nvc0_screen_init_compute(screen))
+ goto fail;
+
+ PUSH_KICK (push);
+
+ screen->tic.entries = CALLOC(4096, sizeof(void *));
+ screen->tsc.entries = screen->tic.entries + 2048;
+
+ mm_config.nvc0.tile_mode = 0;
+ mm_config.nvc0.memtype = 0xfe0;
+ screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
+
+ if (!nvc0_blitter_create(screen))
+ goto fail;
+
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+
+ return pscreen;
+
+fail:
+ nvc0_screen_destroy(pscreen);
+ return NULL;
+}
+
+int
+nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry)
+{
+ int i = screen->tic.next;
+
+ while (screen->tic.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
+
+ screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
+
+ if (screen->tic.entries[i])
+ nv50_tic_entry(screen->tic.entries[i])->id = -1;
+
+ screen->tic.entries[i] = entry;
+ return i;
+}
+
+int
+nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry)
+{
+ int i = screen->tsc.next;
+
+ while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
+
+ screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
+
+ if (screen->tsc.entries[i])
+ nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
+
+ screen->tsc.entries[i] = entry;
+ return i;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
new file mode 100644
index 0000000..27a0c5f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -0,0 +1,325 @@
+#ifndef __NVC0_SCREEN_H__
+#define __NVC0_SCREEN_H__
+
+#include "nouveau_screen.h"
+#include "nouveau_mm.h"
+#include "nouveau_fence.h"
+#include "nouveau_heap.h"
+
+#include "nv_object.xml.h"
+
+#include "nvc0/nvc0_winsys.h"
+#include "nvc0/nvc0_stateobj.h"
+
+#define NVC0_TIC_MAX_ENTRIES 2048
+#define NVC0_TSC_MAX_ENTRIES 2048
+
+/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
+#define NVC0_MAX_PIPE_CONSTBUFS 14
+#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7
+
+#define NVC0_MAX_SURFACE_SLOTS 16
+
+struct nvc0_context;
+
+struct nvc0_blitter;
+
+struct nvc0_screen {
+ struct nouveau_screen base;
+
+ struct nvc0_context *cur_ctx;
+
+ int num_occlusion_queries_active;
+
+ struct nouveau_bo *text;
+ struct nouveau_bo *parm; /* for COMPUTE */
+ struct nouveau_bo *uniform_bo; /* for 3D */
+ struct nouveau_bo *tls;
+ struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
+ struct nouveau_bo *poly_cache;
+
+ uint16_t mp_count;
+ uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */
+
+ struct nouveau_heap *text_heap;
+ struct nouveau_heap *lib_code; /* allocated from text_heap */
+
+ struct nvc0_blitter *blitter;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
+ } tic;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
+ } tsc;
+
+ struct {
+ struct nouveau_bo *bo;
+ uint32_t *map;
+ } fence;
+
+ struct {
+ struct nvc0_program *prog; /* compute state object to read MP counters */
+ struct pipe_query *mp_counter[8]; /* counter to query allocation */
+ uint8_t num_mp_pm_active[2];
+ boolean mp_counters_enabled;
+ } pm;
+
+ struct nouveau_mman *mm_VRAM_fe0;
+
+ struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
+ struct nouveau_object *eng2d;
+ struct nouveau_object *m2mf;
+ struct nouveau_object *compute;
+};
+
+static INLINE struct nvc0_screen *
+nvc0_screen(struct pipe_screen *screen)
+{
+ return (struct nvc0_screen *)screen;
+}
+
+
+/* Performance counter queries:
+ */
+#define NVE4_PM_QUERY_COUNT 49
+#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
+#define NVE4_PM_QUERY_PROF_TRIGGER_0 0
+#define NVE4_PM_QUERY_PROF_TRIGGER_1 1
+#define NVE4_PM_QUERY_PROF_TRIGGER_2 2
+#define NVE4_PM_QUERY_PROF_TRIGGER_3 3
+#define NVE4_PM_QUERY_PROF_TRIGGER_4 4
+#define NVE4_PM_QUERY_PROF_TRIGGER_5 5
+#define NVE4_PM_QUERY_PROF_TRIGGER_6 6
+#define NVE4_PM_QUERY_PROF_TRIGGER_7 7
+#define NVE4_PM_QUERY_LAUNCHED_WARPS 8
+#define NVE4_PM_QUERY_LAUNCHED_THREADS 9
+#define NVE4_PM_QUERY_LAUNCHED_CTA 10
+#define NVE4_PM_QUERY_INST_ISSUED1 11
+#define NVE4_PM_QUERY_INST_ISSUED2 12
+#define NVE4_PM_QUERY_INST_EXECUTED 13
+#define NVE4_PM_QUERY_LD_LOCAL 14
+#define NVE4_PM_QUERY_ST_LOCAL 15
+#define NVE4_PM_QUERY_LD_SHARED 16
+#define NVE4_PM_QUERY_ST_SHARED 17
+#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT 18
+#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS 19
+#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT 20
+#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS 21
+#define NVE4_PM_QUERY_GLD_REQUEST 22
+#define NVE4_PM_QUERY_GST_REQUEST 23
+#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT 24
+#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS 25
+#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26
+#define NVE4_PM_QUERY_GST_TRANSACTIONS 27
+#define NVE4_PM_QUERY_BRANCH 28
+#define NVE4_PM_QUERY_BRANCH_DIVERGENT 29
+#define NVE4_PM_QUERY_ACTIVE_WARPS 30
+#define NVE4_PM_QUERY_ACTIVE_CYCLES 31
+#define NVE4_PM_QUERY_INST_ISSUED 32
+#define NVE4_PM_QUERY_ATOM_COUNT 33
+#define NVE4_PM_QUERY_GRED_COUNT 34
+#define NVE4_PM_QUERY_LD_SHARED_REPLAY 35
+#define NVE4_PM_QUERY_ST_SHARED_REPLAY 36
+#define NVE4_PM_QUERY_LD_LOCAL_TRANSACTIONS 37
+#define NVE4_PM_QUERY_ST_LOCAL_TRANSACTIONS 38
+#define NVE4_PM_QUERY_L1_LD_SHARED_TRANSACTIONS 39
+#define NVE4_PM_QUERY_L1_ST_SHARED_TRANSACTIONS 40
+#define NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY 41
+#define NVE4_PM_QUERY_GST_MEM_DIV_REPLAY 42
+#define NVE4_PM_QUERY_METRIC_IPC 43
+#define NVE4_PM_QUERY_METRIC_IPAC 44
+#define NVE4_PM_QUERY_METRIC_IPEC 45
+#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY 46
+#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY 47
+#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD 48
+
+/*
+#define NVE4_PM_QUERY_GR_IDLE 50
+#define NVE4_PM_QUERY_BSP_IDLE 51
+#define NVE4_PM_QUERY_VP_IDLE 52
+#define NVE4_PM_QUERY_PPP_IDLE 53
+#define NVE4_PM_QUERY_CE0_IDLE 54
+#define NVE4_PM_QUERY_CE1_IDLE 55
+#define NVE4_PM_QUERY_CE2_IDLE 56
+*/
+/* L2 queries (PCOUNTER) */
+/*
+#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57
+...
+*/
+/* TEX queries (PCOUNTER) */
+/*
+#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58
+...
+*/
+
+#define NVC0_PM_QUERY_COUNT 31
+#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
+#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1)
+#define NVC0_PM_QUERY_INST_EXECUTED 0
+#define NVC0_PM_QUERY_BRANCH 1
+#define NVC0_PM_QUERY_BRANCH_DIVERGENT 2
+#define NVC0_PM_QUERY_ACTIVE_WARPS 3
+#define NVC0_PM_QUERY_ACTIVE_CYCLES 4
+#define NVC0_PM_QUERY_LAUNCHED_WARPS 5
+#define NVC0_PM_QUERY_LAUNCHED_THREADS 6
+#define NVC0_PM_QUERY_LD_SHARED 7
+#define NVC0_PM_QUERY_ST_SHARED 8
+#define NVC0_PM_QUERY_LD_LOCAL 9
+#define NVC0_PM_QUERY_ST_LOCAL 10
+#define NVC0_PM_QUERY_GRED_COUNT 11
+#define NVC0_PM_QUERY_ATOM_COUNT 12
+#define NVC0_PM_QUERY_GLD_REQUEST 13
+#define NVC0_PM_QUERY_GST_REQUEST 14
+#define NVC0_PM_QUERY_INST_ISSUED1_0 15
+#define NVC0_PM_QUERY_INST_ISSUED1_1 16
+#define NVC0_PM_QUERY_INST_ISSUED2_0 17
+#define NVC0_PM_QUERY_INST_ISSUED2_1 18
+#define NVC0_PM_QUERY_TH_INST_EXECUTED_0 19
+#define NVC0_PM_QUERY_TH_INST_EXECUTED_1 20
+#define NVC0_PM_QUERY_TH_INST_EXECUTED_2 21
+#define NVC0_PM_QUERY_TH_INST_EXECUTED_3 22
+#define NVC0_PM_QUERY_PROF_TRIGGER_0 23
+#define NVC0_PM_QUERY_PROF_TRIGGER_1 24
+#define NVC0_PM_QUERY_PROF_TRIGGER_2 25
+#define NVC0_PM_QUERY_PROF_TRIGGER_3 26
+#define NVC0_PM_QUERY_PROF_TRIGGER_4 27
+#define NVC0_PM_QUERY_PROF_TRIGGER_5 28
+#define NVC0_PM_QUERY_PROF_TRIGGER_6 29
+#define NVC0_PM_QUERY_PROF_TRIGGER_7 30
+
+/* Driver statistics queries:
+ */
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+
+#define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
+#define NVC0_QUERY_DRV_STAT_COUNT 29
+#define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
+#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT 0
+#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES 1
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT 2
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID 3
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS 4
+#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ 5
+#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE 6
+#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT 7
+#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT 8
+#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT 9
+#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ 10
+#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE 11
+#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID 12
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT 13
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID 14
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS 15
+#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES 16
+#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17
+#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18
+#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT 19
+#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT 20
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY 21
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED 22
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT 23
+#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES 24
+#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT 25
+#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES 26
+#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT 27
+#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT 28
+
+#else
+
+#define NVC0_QUERY_DRV_STAT_COUNT 0
+
+#endif
+
+int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+ struct pipe_driver_query_info *);
+
+boolean nvc0_blitter_create(struct nvc0_screen *);
+void nvc0_blitter_destroy(struct nvc0_screen *);
+
+void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
+
+int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
+int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
+
+int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
+int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
+
+boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
+ uint32_t lneg, uint32_t cstack);
+
+static INLINE void
+nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
+{
+ struct nvc0_screen *screen = nvc0_screen(res->base.screen);
+
+ if (res->mm) {
+ nouveau_fence_ref(screen->base.fence.current, &res->fence);
+ if (flags & NOUVEAU_BO_WR)
+ nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
+ }
+}
+
+static INLINE void
+nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
+{
+ if (likely(res->bo)) {
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
+ NOUVEAU_BUFFER_STATUS_DIRTY;
+ if (flags & NOUVEAU_BO_RD)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0_resource_fence(res, flags);
+ }
+}
+
+struct nvc0_format {
+ uint32_t rt;
+ uint32_t tic;
+ uint32_t vtx;
+ uint32_t usage;
+};
+
+extern const struct nvc0_format nvc0_format_table[];
+
+static INLINE void
+nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0)
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+}
+
+static INLINE void
+nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0)
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+}
+
+static INLINE void
+nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0) {
+ screen->tic.entries[tic->id] = NULL;
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+ }
+}
+
+static INLINE void
+nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0) {
+ screen->tsc.entries[tsc->id] = NULL;
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
new file mode 100644
index 0000000..b820ef2
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+#include "nvc0/nvc0_context.h"
+
+static INLINE void
+nvc0_program_update_context_state(struct nvc0_context *nvc0,
+ struct nvc0_program *prog, int stage)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ if (prog && prog->need_tls) {
+ const uint32_t flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+ if (!nvc0->state.tls_required)
+ BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls);
+ nvc0->state.tls_required |= 1 << stage;
+ } else {
+ if (nvc0->state.tls_required == (1 << stage))
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS);
+ nvc0->state.tls_required &= ~(1 << stage);
+ }
+
+ if (prog && prog->immd_size) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ /* NOTE: may overlap code of a different shader */
+ PUSH_DATA (push, align(prog->immd_size, 0x100));
+ PUSH_DATAh(push, nvc0->screen->text->offset + prog->immd_base);
+ PUSH_DATA (push, nvc0->screen->text->offset + prog->immd_base);
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
+ PUSH_DATA (push, (14 << 4) | 1);
+
+ nvc0->state.c14_bound |= 1 << stage;
+ } else
+ if (nvc0->state.c14_bound & (1 << stage)) {
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
+ PUSH_DATA (push, (14 << 4) | 0);
+
+ nvc0->state.c14_bound &= ~(1 << stage);
+ }
+}
+
+static INLINE boolean
+nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+ if (prog->mem)
+ return TRUE;
+
+ if (!prog->translated) {
+ prog->translated = nvc0_program_translate(
+ prog, nvc0->screen->base.device->chipset);
+ if (!prog->translated)
+ return FALSE;
+ }
+
+ if (likely(prog->code_size))
+ return nvc0_program_upload_code(nvc0, prog);
+ return TRUE; /* stream output info only */
+}
+
+void
+nvc0_vertprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *vp = nvc0->vertprog;
+
+ if (!nvc0_program_validate(nvc0, vp))
+ return;
+ nvc0_program_update_context_state(nvc0, vp, 0);
+
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2);
+ PUSH_DATA (push, 0x11);
+ PUSH_DATA (push, vp->code_base);
+ BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
+ PUSH_DATA (push, vp->num_gprs);
+
+ // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
+ // PUSH_DATA (push, 0);
+}
+
+void
+nvc0_fragprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *fp = nvc0->fragprog;
+
+ if (!nvc0_program_validate(nvc0, fp))
+ return;
+ nvc0_program_update_context_state(nvc0, fp, 4);
+
+ if (fp->fp.early_z != nvc0->state.early_z_forced) {
+ nvc0->state.early_z_forced = fp->fp.early_z;
+ IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
+ PUSH_DATA (push, 0x51);
+ PUSH_DATA (push, fp->code_base);
+ BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
+ PUSH_DATA (push, fp->num_gprs);
+
+ BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
+ PUSH_DATA (push, 0x20164010);
+ PUSH_DATA (push, 0x20);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);
+ PUSH_DATA (push, fp->flags[0]);
+}
+
+void
+nvc0_tctlprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *tp = nvc0->tctlprog;
+
+ if (tp && nvc0_program_validate(nvc0, tp)) {
+ if (tp->tp.tess_mode != ~0) {
+ BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
+ PUSH_DATA (push, tp->tp.tess_mode);
+ }
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
+ PUSH_DATA (push, 0x21);
+ PUSH_DATA (push, tp->code_base);
+ BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
+ PUSH_DATA (push, tp->num_gprs);
+
+ if (tp->tp.input_patch_size <= 32)
+ IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
+ PUSH_DATA (push, 0x20);
+ }
+ nvc0_program_update_context_state(nvc0, tp, 1);
+}
+
+void
+nvc0_tevlprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *tp = nvc0->tevlprog;
+
+ if (tp && nvc0_program_validate(nvc0, tp)) {
+ if (tp->tp.tess_mode != ~0) {
+ BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
+ PUSH_DATA (push, tp->tp.tess_mode);
+ }
+ BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
+ PUSH_DATA (push, 0x31);
+ BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
+ PUSH_DATA (push, tp->code_base);
+ BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
+ PUSH_DATA (push, tp->num_gprs);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
+ PUSH_DATA (push, 0x30);
+ }
+ nvc0_program_update_context_state(nvc0, tp, 2);
+}
+
+void
+nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *gp = nvc0->gmtyprog;
+
+ if (gp)
+ nvc0_program_validate(nvc0, gp);
+
+ /* we allow GPs with no code for specifying stream output state only */
+ if (gp && gp->code_size) {
+ const boolean gp_selects_layer = gp->hdr[13] & (1 << 9);
+
+ BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
+ PUSH_DATA (push, 0x41);
+ BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
+ PUSH_DATA (push, gp->code_base);
+ BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
+ PUSH_DATA (push, gp->num_gprs);
+ BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
+ PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
+ } else {
+ IMMED_NVC0(push, NVC0_3D(LAYER), 0);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
+ PUSH_DATA (push, 0x40);
+ }
+ nvc0_program_update_context_state(nvc0, gp, 3);
+}
+
+void
+nvc0_tfb_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_transform_feedback_state *tfb;
+ unsigned b;
+
+ if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
+ else
+ if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
+ else
+ tfb = nvc0->vertprog->tfb;
+
+ IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
+
+ if (tfb && tfb != nvc0->state.tfb) {
+ for (b = 0; b < 4; ++b) {
+ if (tfb->varying_count[b]) {
+ unsigned n = (tfb->varying_count[b] + 3) / 4;
+
+ BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, tfb->varying_count[b]);
+ PUSH_DATA (push, tfb->stride[b]);
+ BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n);
+ PUSH_DATAp(push, tfb->varying_index[b], n);
+
+ if (nvc0->tfbbuf[b])
+ nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
+ } else {
+ IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0);
+ }
+ }
+ }
+ nvc0->state.tfb = tfb;
+
+ if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS))
+ return;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB);
+
+ for (b = 0; b < nvc0->num_tfbbufs; ++b) {
+ struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
+ struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
+
+ if (tfb)
+ targ->stride = tfb->stride[b];
+
+ if (!(nvc0->tfbbuf_dirty & (1 << b)))
+ continue;
+
+ if (!targ->clean)
+ nvc0_query_fifo_wait(push, targ->pq);
+ BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
+ PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
+ PUSH_DATA (push, targ->pipe.buffer_size);
+ if (!targ->clean) {
+ nvc0_query_pushbuf_submit(push, targ->pq, 0x4);
+ } else {
+ PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
+ targ->clean = FALSE;
+ }
+ BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR);
+ }
+ for (; b < 4; ++b)
+ IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
new file mode 100644
index 0000000..e56ef01
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -0,0 +1,1247 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_helpers.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nvc0/nvc0_stateobj.h"
+#include "nvc0/nvc0_context.h"
+
+#include "nvc0/nvc0_3d.xml.h"
+#include "nv50/nv50_texture.xml.h"
+
+#include "nouveau_gldefs.h"
+
+static INLINE uint32_t
+nvc0_colormask(unsigned mask)
+{
+ uint32_t ret = 0;
+
+ if (mask & PIPE_MASK_R)
+ ret |= 0x0001;
+ if (mask & PIPE_MASK_G)
+ ret |= 0x0010;
+ if (mask & PIPE_MASK_B)
+ ret |= 0x0100;
+ if (mask & PIPE_MASK_A)
+ ret |= 0x1000;
+
+ return ret;
+}
+
+#define NVC0_BLEND_FACTOR_CASE(a, b) \
+ case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b
+
+static INLINE uint32_t
+nvc0_blend_fac(unsigned factor)
+{
+ switch (factor) {
+ NVC0_BLEND_FACTOR_CASE(ONE, ONE);
+ NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE);
+ NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR);
+ NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(ZERO, ZERO);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA);
+ default:
+ return NV50_3D_BLEND_FACTOR_ZERO;
+ }
+}
+
+static void *
+nvc0_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj);
+ int i;
+ int r; /* reference */
+ uint32_t ms;
+ uint8_t blend_en = 0;
+ boolean indep_masks = FALSE;
+ boolean indep_funcs = FALSE;
+
+ so->pipe = *cso;
+
+ /* check which states actually have differing values */
+ if (cso->independent_blend_enable) {
+ for (r = 0; r < 8 && !cso->rt[r].blend_enable; ++r);
+ blend_en |= 1 << r;
+ for (i = r + 1; i < 8; ++i) {
+ if (!cso->rt[i].blend_enable)
+ continue;
+ blend_en |= 1 << i;
+ if (cso->rt[i].rgb_func != cso->rt[r].rgb_func ||
+ cso->rt[i].rgb_src_factor != cso->rt[r].rgb_src_factor ||
+ cso->rt[i].rgb_dst_factor != cso->rt[r].rgb_dst_factor ||
+ cso->rt[i].alpha_func != cso->rt[r].alpha_func ||
+ cso->rt[i].alpha_src_factor != cso->rt[r].alpha_src_factor ||
+ cso->rt[i].alpha_dst_factor != cso->rt[r].alpha_dst_factor) {
+ indep_funcs = TRUE;
+ break;
+ }
+ }
+ for (; i < 8; ++i)
+ blend_en |= (cso->rt[i].blend_enable ? 1 : 0) << i;
+
+ for (i = 1; i < 8; ++i) {
+ if (cso->rt[i].colormask != cso->rt[0].colormask) {
+ indep_masks = TRUE;
+ break;
+ }
+ }
+ } else {
+ r = 0;
+ if (cso->rt[0].blend_enable)
+ blend_en = 0xff;
+ }
+
+ if (cso->logicop_enable) {
+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_logicop_func(cso->logicop_func));
+
+ SB_IMMED_3D(so, MACRO_BLEND_ENABLES, 0);
+ } else {
+ SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0);
+
+ SB_IMMED_3D(so, BLEND_INDEPENDENT, indep_funcs);
+ SB_IMMED_3D(so, MACRO_BLEND_ENABLES, blend_en);
+ if (indep_funcs) {
+ for (i = 0; i < 8; ++i) {
+ if (cso->rt[i].blend_enable) {
+ SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor));
+ }
+ }
+ } else
+ if (blend_en) {
+ SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[r].rgb_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[r].rgb_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[r].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[r].alpha_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[r].alpha_src_factor));
+ SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1);
+ SB_DATA (so, nvc0_blend_fac(cso->rt[r].alpha_dst_factor));
+ }
+
+ SB_IMMED_3D(so, COLOR_MASK_COMMON, !indep_masks);
+ if (indep_masks) {
+ SB_BEGIN_3D(so, COLOR_MASK(0), 8);
+ for (i = 0; i < 8; ++i)
+ SB_DATA(so, nvc0_colormask(cso->rt[i].colormask));
+ } else {
+ SB_BEGIN_3D(so, COLOR_MASK(0), 1);
+ SB_DATA (so, nvc0_colormask(cso->rt[0].colormask));
+ }
+ }
+
+ ms = 0;
+ if (cso->alpha_to_coverage)
+ ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
+ if (cso->alpha_to_one)
+ ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
+
+ SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
+ SB_DATA (so, ms);
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return so;
+}
+
+static void
+nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->blend = hwcso;
+ nvc0->dirty |= NVC0_NEW_BLEND;
+}
+
+static void
+nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */
+static void *
+nvc0_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nvc0_rasterizer_stateobj *so;
+ uint32_t reg;
+
+ so = CALLOC_STRUCT(nvc0_rasterizer_stateobj);
+ if (!so)
+ return NULL;
+ so->pipe = *cso;
+
+ /* Scissor enables are handled in scissor state, we will not want to
+ * always emit 16 commands, one for each scissor rectangle, here.
+ */
+
+ SB_BEGIN_3D(so, SHADE_MODEL, 1);
+ SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
+ NVC0_3D_SHADE_MODEL_SMOOTH);
+ SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
+ SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
+
+ SB_IMMED_3D(so, VERT_COLOR_CLAMP_EN, cso->clamp_vertex_color);
+ SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1);
+ SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000);
+
+ SB_IMMED_3D(so, MULTISAMPLE_ENABLE, cso->multisample);
+
+ SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth);
+ if (cso->line_smooth)
+ SB_BEGIN_3D(so, LINE_WIDTH_SMOOTH, 1);
+ else
+ SB_BEGIN_3D(so, LINE_WIDTH_ALIASED, 1);
+ SB_DATA (so, fui(cso->line_width));
+
+ SB_IMMED_3D(so, LINE_STIPPLE_ENABLE, cso->line_stipple_enable);
+ if (cso->line_stipple_enable) {
+ SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1);
+ SB_DATA (so, (cso->line_stipple_pattern << 8) |
+ cso->line_stipple_factor);
+
+ }
+
+ SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex);
+ if (!cso->point_size_per_vertex) {
+ SB_BEGIN_3D(so, POINT_SIZE, 1);
+ SB_DATA (so, fui(cso->point_size));
+ }
+
+ reg = (cso->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) ?
+ NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT :
+ NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT;
+
+ SB_BEGIN_3D(so, POINT_COORD_REPLACE, 1);
+ SB_DATA (so, ((cso->sprite_coord_enable & 0xff) << 3) | reg);
+ SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization);
+ SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth);
+
+ SB_BEGIN_3D(so, MACRO_POLYGON_MODE_FRONT, 1);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_front));
+ SB_BEGIN_3D(so, MACRO_POLYGON_MODE_BACK, 1);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_back));
+ SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth);
+
+ SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3);
+ SB_DATA (so, cso->cull_face != PIPE_FACE_NONE);
+ SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW :
+ NVC0_3D_FRONT_FACE_CW);
+ switch (cso->cull_face) {
+ case PIPE_FACE_FRONT_AND_BACK:
+ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK);
+ break;
+ case PIPE_FACE_FRONT:
+ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT);
+ break;
+ case PIPE_FACE_BACK:
+ default:
+ SB_DATA(so, NVC0_3D_CULL_FACE_BACK);
+ break;
+ }
+
+ SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable);
+ SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3);
+ SB_DATA (so, cso->offset_point);
+ SB_DATA (so, cso->offset_line);
+ SB_DATA (so, cso->offset_tri);
+
+ if (cso->offset_point || cso->offset_line || cso->offset_tri) {
+ SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
+ SB_DATA (so, fui(cso->offset_scale));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
+ SB_DATA (so, fui(cso->offset_units * 2.0f));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_CLAMP, 1);
+ SB_DATA (so, fui(cso->offset_clamp));
+ }
+
+ if (cso->depth_clip)
+ reg = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1;
+ else
+ reg =
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2;
+
+ SB_BEGIN_3D(so, VIEW_VOLUME_CLIP_CTRL, 1);
+ SB_DATA (so, reg);
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->rast = hwcso;
+ nvc0->dirty |= NVC0_NEW_RASTERIZER;
+}
+
+static void
+nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nvc0_zsa_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj);
+
+ so->pipe = *cso;
+
+ SB_IMMED_3D(so, DEPTH_TEST_ENABLE, cso->depth.enabled);
+ if (cso->depth.enabled) {
+ SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask);
+ SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1);
+ SB_DATA (so, nvgl_comparison_op(cso->depth.func));
+ }
+
+ if (cso->stencil[0].enabled) {
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
+ SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2);
+ SB_DATA (so, cso->stencil[0].valuemask);
+ SB_DATA (so, cso->stencil[0].writemask);
+ } else {
+ SB_IMMED_3D(so, STENCIL_ENABLE, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ assert(cso->stencil[0].enabled);
+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func));
+ SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
+ SB_DATA (so, cso->stencil[1].writemask);
+ SB_DATA (so, cso->stencil[1].valuemask);
+ } else
+ if (cso->stencil[0].enabled) {
+ SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0);
+ }
+
+ SB_IMMED_3D(so, ALPHA_TEST_ENABLE, cso->alpha.enabled);
+ if (cso->alpha.enabled) {
+ SB_BEGIN_3D(so, ALPHA_TEST_REF, 2);
+ SB_DATA (so, fui(cso->alpha.ref_value));
+ SB_DATA (so, nvgl_comparison_op(cso->alpha.func));
+ }
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->zsa = hwcso;
+ nvc0->dirty |= NVC0_NEW_ZSA;
+}
+
+static void
+nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+/* ====================== SAMPLERS AND TEXTURES ================================
+ */
+
+#define NV50_TSC_WRAP_CASE(n) \
+ case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
+
+static INLINE unsigned
+nv50_tsc_wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ NV50_TSC_WRAP_CASE(REPEAT);
+ NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(CLAMP);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ return NV50_TSC_WRAP_REPEAT;
+ }
+}
+
+static void
+nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ unsigned s, i;
+
+ for (s = 0; s < 5; ++s)
+ for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i)
+ if (nvc0_context(pipe)->samplers[s][i] == hwcso)
+ nvc0_context(pipe)->samplers[s][i] = NULL;
+
+ nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nv50_tsc_entry(hwcso));
+
+ FREE(hwcso);
+}
+
+static INLINE void
+nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
+ unsigned nr, void **hwcso)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tsc_entry *old = nvc0->samplers[s][i];
+
+ if (hwcso[i] == old)
+ continue;
+ nvc0->samplers_dirty[s] |= 1 << i;
+
+ nvc0->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
+ if (old)
+ nvc0_screen_tsc_unlock(nvc0->screen, old);
+ }
+ for (; i < nvc0->num_samplers[s]; ++i) {
+ if (nvc0->samplers[s][i]) {
+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+ nvc0->samplers[s][i] = NULL;
+ }
+ }
+
+ nvc0->num_samplers[s] = nr;
+
+ nvc0->dirty |= NVC0_NEW_SAMPLERS;
+}
+
+static void
+nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s);
+}
+
+static void
+nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s);
+}
+
+static void
+nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
+}
+
+static void
+nvc0_stage_sampler_states_bind_range(struct nvc0_context *nvc0,
+ const unsigned s,
+ unsigned start, unsigned nr, void **cso)
+{
+ const unsigned end = start + nr;
+ int last_valid = -1;
+ unsigned i;
+
+ if (cso) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (cso[p])
+ last_valid = i;
+ if (cso[p] == nvc0->samplers[s][i])
+ continue;
+ nvc0->samplers_dirty[s] |= 1 << i;
+
+ if (nvc0->samplers[s][i])
+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+ nvc0->samplers[s][i] = cso[p];
+ }
+ } else {
+ for (i = start; i < end; ++i) {
+ if (nvc0->samplers[s][i]) {
+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+ nvc0->samplers[s][i] = NULL;
+ nvc0->samplers_dirty[s] |= 1 << i;
+ }
+ }
+ }
+
+ if (nvc0->num_samplers[s] <= end) {
+ if (last_valid < 0) {
+ for (i = start; i && !nvc0->samplers[s][i - 1]; --i);
+ nvc0->num_samplers[s] = i;
+ } else {
+ nvc0->num_samplers[s] = last_valid + 1;
+ }
+ }
+}
+
+static void
+nvc0_cp_sampler_states_bind(struct pipe_context *pipe,
+ unsigned start, unsigned nr, void **cso)
+{
+ nvc0_stage_sampler_states_bind_range(nvc0_context(pipe), 5, start, nr, cso);
+
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
+}
+
+/* NOTE: only called when not referenced anywhere, won't be bound */
+static void
+nvc0_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+
+ nvc0_screen_tic_free(nvc0_context(pipe)->screen, nv50_tic_entry(view));
+
+ FREE(nv50_tic_entry(view));
+}
+
+static INLINE void
+nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+
+ if (views[i] == nvc0->textures[s][i])
+ continue;
+ nvc0->textures_dirty[s] |= 1 << i;
+
+ if (old) {
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+ }
+
+ pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]);
+ }
+
+ for (i = nr; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+ if (old) {
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+ }
+ }
+
+ nvc0->num_textures[s] = nr;
+
+ nvc0->dirty |= NVC0_NEW_TEXTURES;
+}
+
+static void
+nvc0_vp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views);
+}
+
+static void
+nvc0_fp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views);
+}
+
+static void
+nvc0_gp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
+}
+
+static void
+nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s,
+ unsigned start, unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d;
+ const unsigned end = start + nr;
+ const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0);
+ int last_valid = -1;
+ unsigned i;
+
+ if (views) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (views[p])
+ last_valid = i;
+ if (views[p] == nvc0->textures[s][i])
+ continue;
+ nvc0->textures_dirty[s] |= 1 << i;
+
+ if (nvc0->textures[s][i]) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+ nouveau_bufctx_reset(bctx, bin + i);
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+ }
+ pipe_sampler_view_reference(&nvc0->textures[s][i], views[p]);
+ }
+ } else {
+ for (i = start; i < end; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+ if (!old)
+ continue;
+ nvc0->textures_dirty[s] |= 1 << i;
+
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+ nouveau_bufctx_reset(bctx, bin + i);
+ }
+ }
+
+ if (nvc0->num_textures[s] <= end) {
+ if (last_valid < 0) {
+ for (i = start; i && !nvc0->textures[s][i - 1]; --i);
+ nvc0->num_textures[s] = i;
+ } else {
+ nvc0->num_textures[s] = last_valid + 1;
+ }
+ }
+}
+
+static void
+nvc0_cp_set_sampler_views(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views_range(nvc0_context(pipe), 5, start, nr, views);
+
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_TEXTURES;
+}
+
+
+/* ============================= SHADERS =======================================
+ */
+
+static void *
+nvc0_sp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso, unsigned type)
+{
+ struct nvc0_program *prog;
+
+ prog = CALLOC_STRUCT(nvc0_program);
+ if (!prog)
+ return NULL;
+
+ prog->type = type;
+
+ if (cso->tokens)
+ prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ if (cso->stream_output.num_outputs)
+ prog->pipe.stream_output = cso->stream_output;
+
+ return (void *)prog;
+}
+
+static void
+nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_program *prog = (struct nvc0_program *)hwcso;
+
+ nvc0_program_destroy(nvc0_context(pipe), prog);
+
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
+}
+
+static void *
+nvc0_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX);
+}
+
+static void
+nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->vertprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_VERTPROG;
+}
+
+static void *
+nvc0_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT);
+}
+
+static void
+nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->fragprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_FRAGPROG;
+}
+
+static void *
+nvc0_gp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY);
+}
+
+static void
+nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->gmtyprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_GMTYPROG;
+}
+
+static void *
+nvc0_cp_state_create(struct pipe_context *pipe,
+ const struct pipe_compute_state *cso)
+{
+ struct nvc0_program *prog;
+
+ prog = CALLOC_STRUCT(nvc0_program);
+ if (!prog)
+ return NULL;
+ prog->type = PIPE_SHADER_COMPUTE;
+
+ prog->cp.smem_size = cso->req_local_mem;
+ prog->cp.lmem_size = cso->req_private_mem;
+ prog->parm_size = cso->req_input_mem;
+
+ prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog);
+
+ return (void *)prog;
+}
+
+static void
+nvc0_cp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->compprog = hwcso;
+ nvc0->dirty_cp |= NVC0_NEW_CP_PROGRAM;
+}
+
+static void
+nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ struct pipe_constant_buffer *cb)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct pipe_resource *res = cb ? cb->buffer : NULL;
+ const unsigned s = nvc0_shader_stage(shader);
+ const unsigned i = index;
+
+ if (unlikely(shader == PIPE_SHADER_COMPUTE)) {
+ assert(!cb || !cb->user_buffer);
+ if (nvc0->constbuf[s][i].u.buf)
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
+
+ nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
+ } else {
+ if (nvc0->constbuf[s][i].user)
+ nvc0->constbuf[s][i].u.buf = NULL;
+ else
+ if (nvc0->constbuf[s][i].u.buf)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ }
+ nvc0->constbuf_dirty[s] |= 1 << i;
+
+ pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
+
+ nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
+ if (nvc0->constbuf[s][i].user) {
+ nvc0->constbuf[s][i].u.data = cb->user_buffer;
+ nvc0->constbuf[s][i].size = cb->buffer_size;
+ } else
+ if (cb) {
+ nvc0->constbuf[s][i].offset = cb->buffer_offset;
+ nvc0->constbuf[s][i].size = align(cb->buffer_size, 0x100);
+ }
+}
+
+/* =============================================================================
+ */
+
+static void
+nvc0_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->blend_colour = *bcol;
+ nvc0->dirty |= NVC0_NEW_BLEND_COLOUR;
+}
+
+static void
+nvc0_set_stencil_ref(struct pipe_context *pipe,
+ const struct pipe_stencil_ref *sr)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->stencil_ref = *sr;
+ nvc0->dirty |= NVC0_NEW_STENCIL_REF;
+}
+
+static void
+nvc0_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ memcpy(nvc0->clip.ucp, clip->ucp, sizeof(clip->ucp));
+
+ nvc0->dirty |= NVC0_NEW_CLIP;
+}
+
+static void
+nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->sample_mask = sample_mask;
+ nvc0->dirty |= NVC0_NEW_SAMPLE_MASK;
+}
+
+
+static void
+nvc0_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned i;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+
+ for (i = 0; i < fb->nr_cbufs; ++i)
+ pipe_surface_reference(&nvc0->framebuffer.cbufs[i], fb->cbufs[i]);
+ for (; i < nvc0->framebuffer.nr_cbufs; ++i)
+ pipe_surface_reference(&nvc0->framebuffer.cbufs[i], NULL);
+
+ nvc0->framebuffer.nr_cbufs = fb->nr_cbufs;
+
+ nvc0->framebuffer.width = fb->width;
+ nvc0->framebuffer.height = fb->height;
+
+ pipe_surface_reference(&nvc0->framebuffer.zsbuf, fb->zsbuf);
+
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
+nvc0_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->stipple = *stipple;
+ nvc0->dirty |= NVC0_NEW_STIPPLE;
+}
+
+static void
+nvc0_set_scissor_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *scissor)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->scissor = *scissor;
+ nvc0->dirty |= NVC0_NEW_SCISSOR;
+}
+
+static void
+nvc0_set_viewport_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->viewport = *vpt;
+ nvc0->dirty |= NVC0_NEW_VIEWPORT;
+}
+
+static void
+nvc0_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned start_slot, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned i;
+
+ util_set_vertex_buffers_count(nvc0->vtxbuf, &nvc0->num_vtxbufs, vb,
+ start_slot, count);
+
+ if (!vb) {
+ nvc0->vbo_user &= ~(((1ull << count) - 1) << start_slot);
+ nvc0->constant_vbos &= ~(((1ull << count) - 1) << start_slot);
+ return;
+ }
+
+ for (i = 0; i < count; ++i) {
+ unsigned dst_index = start_slot + i;
+
+ if (vb[i].user_buffer) {
+ nvc0->vbo_user |= 1 << dst_index;
+ if (!vb[i].stride)
+ nvc0->constant_vbos |= 1 << dst_index;
+ else
+ nvc0->constant_vbos &= ~(1 << dst_index);
+ } else {
+ nvc0->vbo_user &= ~(1 << dst_index);
+ nvc0->constant_vbos &= ~(1 << dst_index);
+ }
+ }
+
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+}
+
+static void
+nvc0_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ if (nvc0->idxbuf.buffer)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX);
+
+ if (ib) {
+ pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer);
+ nvc0->idxbuf.index_size = ib->index_size;
+ if (ib->buffer) {
+ nvc0->idxbuf.offset = ib->offset;
+ nvc0->dirty |= NVC0_NEW_IDXBUF;
+ } else {
+ nvc0->idxbuf.user_buffer = ib->user_buffer;
+ nvc0->dirty &= ~NVC0_NEW_IDXBUF;
+ }
+ } else {
+ nvc0->dirty &= ~NVC0_NEW_IDXBUF;
+ pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
+ }
+}
+
+static void
+nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->vertex = hwcso;
+ nvc0->dirty |= NVC0_NEW_VERTEX;
+}
+
+static struct pipe_stream_output_target *
+nvc0_so_target_create(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size)
+{
+ struct nvc0_so_target *targ = MALLOC_STRUCT(nvc0_so_target);
+ if (!targ)
+ return NULL;
+
+ targ->pq = pipe->create_query(pipe, NVC0_QUERY_TFB_BUFFER_OFFSET);
+ if (!targ->pq) {
+ FREE(targ);
+ return NULL;
+ }
+ targ->clean = TRUE;
+
+ targ->pipe.buffer_size = size;
+ targ->pipe.buffer_offset = offset;
+ targ->pipe.context = pipe;
+ targ->pipe.buffer = NULL;
+ pipe_resource_reference(&targ->pipe.buffer, res);
+ pipe_reference_init(&targ->pipe.reference, 1);
+
+ return &targ->pipe;
+}
+
+static void
+nvc0_so_target_destroy(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg)
+{
+ struct nvc0_so_target *targ = nvc0_so_target(ptarg);
+ pipe->destroy_query(pipe, targ->pq);
+ pipe_resource_reference(&targ->pipe.buffer, NULL);
+ FREE(targ);
+}
+
+static void
+nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ unsigned append_mask)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned i;
+ boolean serialize = TRUE;
+
+ assert(num_targets <= 4);
+
+ for (i = 0; i < num_targets; ++i) {
+ if (nvc0->tfbbuf[i] == targets[i] && (append_mask & (1 << i)))
+ continue;
+ nvc0->tfbbuf_dirty |= 1 << i;
+
+ if (nvc0->tfbbuf[i] && nvc0->tfbbuf[i] != targets[i])
+ nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
+
+ if (targets[i] && !(append_mask & (1 << i)))
+ nvc0_so_target(targets[i])->clean = TRUE;
+
+ pipe_so_target_reference(&nvc0->tfbbuf[i], targets[i]);
+ }
+ for (; i < nvc0->num_tfbbufs; ++i) {
+ nvc0->tfbbuf_dirty |= 1 << i;
+ nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
+ pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
+ }
+ nvc0->num_tfbbufs = num_targets;
+
+ if (nvc0->tfbbuf_dirty)
+ nvc0->dirty |= NVC0_NEW_TFB_TARGETS;
+}
+
+static void
+nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t,
+ unsigned start, unsigned nr,
+ struct pipe_surface **psurfaces)
+{
+ const unsigned end = start + nr;
+ const unsigned mask = ((1 << nr) - 1) << start;
+ unsigned i;
+
+ if (psurfaces) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (psurfaces[p])
+ nvc0->surfaces_valid[t] |= (1 << i);
+ else
+ nvc0->surfaces_valid[t] &= ~(1 << i);
+ pipe_surface_reference(&nvc0->surfaces[t][i], psurfaces[p]);
+ }
+ } else {
+ for (i = start; i < end; ++i)
+ pipe_surface_reference(&nvc0->surfaces[t][i], NULL);
+ nvc0->surfaces_valid[t] &= ~mask;
+ }
+ nvc0->surfaces_dirty[t] |= mask;
+
+ if (t == 0)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF);
+ else
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
+}
+
+static void
+nvc0_set_compute_resources(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_surface **resources)
+{
+ nvc0_bind_surfaces_range(nvc0_context(pipe), 1, start, nr, resources);
+
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SURFACES;
+}
+
+static void
+nvc0_set_shader_resources(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_surface **resources)
+{
+ nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, resources);
+
+ nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
+}
+
+static INLINE void
+nvc0_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
+{
+ struct nv04_resource *buf = nv04_resource(res);
+ if (buf) {
+ uint64_t limit = (buf->address + buf->base.width0) - 1;
+ if (limit < (1ULL << 32)) {
+ *phandle = (uint32_t)buf->address;
+ } else {
+ NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: "
+ "resource not contained within 32-bit address space !\n");
+ *phandle = 0;
+ }
+ } else {
+ *phandle = 0;
+ }
+}
+
+static void
+nvc0_set_global_bindings(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct pipe_resource **ptr;
+ unsigned i;
+ const unsigned end = start + nr;
+
+ if (nvc0->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
+ const unsigned old_size = nvc0->global_residents.size;
+ const unsigned req_size = end * sizeof(struct pipe_resource *);
+ util_dynarray_resize(&nvc0->global_residents, req_size);
+ memset((uint8_t *)nvc0->global_residents.data + old_size, 0,
+ req_size - old_size);
+ }
+
+ if (resources) {
+ ptr = util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i) {
+ pipe_resource_reference(&ptr[i], resources[i]);
+ nvc0_set_global_handle(handles[i], resources[i]);
+ }
+ } else {
+ ptr = util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i)
+ pipe_resource_reference(&ptr[i], NULL);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
+
+ nvc0->dirty_cp = NVC0_NEW_CP_GLOBALS;
+}
+
+void
+nvc0_init_state_functions(struct nvc0_context *nvc0)
+{
+ struct pipe_context *pipe = &nvc0->base.pipe;
+
+ pipe->create_blend_state = nvc0_blend_state_create;
+ pipe->bind_blend_state = nvc0_blend_state_bind;
+ pipe->delete_blend_state = nvc0_blend_state_delete;
+
+ pipe->create_rasterizer_state = nvc0_rasterizer_state_create;
+ pipe->bind_rasterizer_state = nvc0_rasterizer_state_bind;
+ pipe->delete_rasterizer_state = nvc0_rasterizer_state_delete;
+
+ pipe->create_depth_stencil_alpha_state = nvc0_zsa_state_create;
+ pipe->bind_depth_stencil_alpha_state = nvc0_zsa_state_bind;
+ pipe->delete_depth_stencil_alpha_state = nvc0_zsa_state_delete;
+
+ pipe->create_sampler_state = nv50_sampler_state_create;
+ pipe->delete_sampler_state = nvc0_sampler_state_delete;
+ pipe->bind_vertex_sampler_states = nvc0_vp_sampler_states_bind;
+ pipe->bind_fragment_sampler_states = nvc0_fp_sampler_states_bind;
+ pipe->bind_geometry_sampler_states = nvc0_gp_sampler_states_bind;
+ pipe->bind_compute_sampler_states = nvc0_cp_sampler_states_bind;
+
+ pipe->create_sampler_view = nvc0_create_sampler_view;
+ pipe->sampler_view_destroy = nvc0_sampler_view_destroy;
+ pipe->set_vertex_sampler_views = nvc0_vp_set_sampler_views;
+ pipe->set_fragment_sampler_views = nvc0_fp_set_sampler_views;
+ pipe->set_geometry_sampler_views = nvc0_gp_set_sampler_views;
+ pipe->set_compute_sampler_views = nvc0_cp_set_sampler_views;
+
+ pipe->create_vs_state = nvc0_vp_state_create;
+ pipe->create_fs_state = nvc0_fp_state_create;
+ pipe->create_gs_state = nvc0_gp_state_create;
+ pipe->bind_vs_state = nvc0_vp_state_bind;
+ pipe->bind_fs_state = nvc0_fp_state_bind;
+ pipe->bind_gs_state = nvc0_gp_state_bind;
+ pipe->delete_vs_state = nvc0_sp_state_delete;
+ pipe->delete_fs_state = nvc0_sp_state_delete;
+ pipe->delete_gs_state = nvc0_sp_state_delete;
+
+ pipe->create_compute_state = nvc0_cp_state_create;
+ pipe->bind_compute_state = nvc0_cp_state_bind;
+ pipe->delete_compute_state = nvc0_sp_state_delete;
+
+ pipe->set_blend_color = nvc0_set_blend_color;
+ pipe->set_stencil_ref = nvc0_set_stencil_ref;
+ pipe->set_clip_state = nvc0_set_clip_state;
+ pipe->set_sample_mask = nvc0_set_sample_mask;
+ pipe->set_constant_buffer = nvc0_set_constant_buffer;
+ pipe->set_framebuffer_state = nvc0_set_framebuffer_state;
+ pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
+ pipe->set_scissor_states = nvc0_set_scissor_states;
+ pipe->set_viewport_states = nvc0_set_viewport_states;
+
+ pipe->create_vertex_elements_state = nvc0_vertex_state_create;
+ pipe->delete_vertex_elements_state = nvc0_vertex_state_delete;
+ pipe->bind_vertex_elements_state = nvc0_vertex_state_bind;
+
+ pipe->set_vertex_buffers = nvc0_set_vertex_buffers;
+ pipe->set_index_buffer = nvc0_set_index_buffer;
+
+ pipe->create_stream_output_target = nvc0_so_target_create;
+ pipe->stream_output_target_destroy = nvc0_so_target_destroy;
+ pipe->set_stream_output_targets = nvc0_set_transform_feedback_targets;
+
+ pipe->set_global_binding = nvc0_set_global_bindings;
+ pipe->set_compute_resources = nvc0_set_compute_resources;
+ pipe->set_shader_resources = nvc0_set_shader_resources;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
new file mode 100644
index 0000000..0ba4bad
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -0,0 +1,577 @@
+
+#include "util/u_math.h"
+
+#include "nvc0/nvc0_context.h"
+
+#if 0
+static void
+nvc0_validate_zcull(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ struct nv50_surface *sf = nv50_surface(fb->zsbuf);
+ struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t size;
+ uint32_t offset = align(mt->total_size, 1 << 17);
+ unsigned width, height;
+
+ assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
+
+ size = mt->total_size * 2;
+
+ height = align(fb->height, 32);
+ width = fb->width % 224;
+ if (width)
+ width = fb->width + (224 - width);
+ else
+ width = fb->width;
+
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ offset += 1 << 17;
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ BEGIN_NVC0(push, SUBC_3D(0x07e0), 2);
+ PUSH_DATA (push, size);
+ PUSH_DATA (push, size >> 16);
+ BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */
+ PUSH_DATA (push, 2);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1);
+ PUSH_DATA (push, 0);
+}
+#endif
+
+static void
+nvc0_validate_fb(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ unsigned i;
+ unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
+ boolean serialize = FALSE;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, fb->width << 16);
+ PUSH_DATA (push, fb->height << 16);
+
+ for (i = 0; i < fb->nr_cbufs; ++i) {
+ struct nv50_surface *sf = nv50_surface(fb->cbufs[i]);
+ struct nv04_resource *res = nv04_resource(sf->base.texture);
+ struct nouveau_bo *bo = res->bo;
+
+ BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
+ PUSH_DATAh(push, res->address + sf->offset);
+ PUSH_DATA (push, res->address + sf->offset);
+ if (likely(nouveau_bo_memtype(bo))) {
+ struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
+
+ assert(sf->base.texture->target != PIPE_BUFFER);
+
+ PUSH_DATA(push, sf->width);
+ PUSH_DATA(push, sf->height);
+ PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
+ PUSH_DATA(push, (mt->layout_3d << 16) |
+ mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth);
+ PUSH_DATA(push, mt->layer_stride >> 2);
+ PUSH_DATA(push, sf->base.u.tex.first_layer);
+
+ ms_mode = mt->ms_mode;
+ } else {
+ if (res->base.target == PIPE_BUFFER) {
+ PUSH_DATA(push, 262144);
+ PUSH_DATA(push, 1);
+ } else {
+ PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch);
+ PUSH_DATA(push, sf->height);
+ }
+ PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
+ PUSH_DATA(push, 1 << 12);
+ PUSH_DATA(push, 1);
+ PUSH_DATA(push, 0);
+ PUSH_DATA(push, 0);
+
+ nvc0_resource_fence(res, NOUVEAU_BO_WR);
+
+ assert(!fb->zsbuf);
+ }
+
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ /* only register for writing, otherwise we'd always serialize here */
+ BCTX_REFN(nvc0->bufctx_3d, FB, res, WR);
+ }
+
+ if (fb->zsbuf) {
+ struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
+ struct nv50_surface *sf = nv50_surface(fb->zsbuf);
+ int unk = mt->base.base.target == PIPE_TEXTURE_2D;
+
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
+ PUSH_DATAh(push, mt->base.address + sf->offset);
+ PUSH_DATA (push, mt->base.address + sf->offset);
+ PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, mt->layer_stride >> 2);
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ PUSH_DATA (push, (unk << 16) |
+ (sf->base.u.tex.first_layer + sf->depth));
+ BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
+ PUSH_DATA (push, sf->base.u.tex.first_layer);
+
+ ms_mode = mt->ms_mode;
+
+ if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ BCTX_REFN(nvc0->bufctx_3d, FB, &mt->base, WR);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
+
+ if (serialize)
+ IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize);
+}
+
+static void
+nvc0_validate_blend_colour(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4);
+ PUSH_DATAf(push, nvc0->blend_colour.color[0]);
+ PUSH_DATAf(push, nvc0->blend_colour.color[1]);
+ PUSH_DATAf(push, nvc0->blend_colour.color[2]);
+ PUSH_DATAf(push, nvc0->blend_colour.color[3]);
+}
+
+static void
+nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const ubyte *ref = &nvc0->stencil_ref.ref_value[0];
+
+ IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]);
+ IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]);
+}
+
+static void
+nvc0_validate_stipple(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+
+ BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
+ for (i = 0; i < 32; ++i)
+ PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i]));
+}
+
+static void
+nvc0_validate_scissor(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_scissor_state *s = &nvc0->scissor;
+
+ if (!(nvc0->dirty & NVC0_NEW_SCISSOR) &&
+ nvc0->rast->pipe.scissor == nvc0->state.scissor)
+ return;
+ nvc0->state.scissor = nvc0->rast->pipe.scissor;
+
+ BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2);
+ if (nvc0->rast->pipe.scissor) {
+ PUSH_DATA(push, (s->maxx << 16) | s->minx);
+ PUSH_DATA(push, (s->maxy << 16) | s->miny);
+ } else {
+ PUSH_DATA(push, (0xffff << 16) | 0);
+ PUSH_DATA(push, (0xffff << 16) | 0);
+ }
+}
+
+static void
+nvc0_validate_viewport(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_viewport_state *vp = &nvc0->viewport;
+ int x, y, w, h;
+ float zmin, zmax;
+
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(0)), 3);
+ PUSH_DATAf(push, vp->translate[0]);
+ PUSH_DATAf(push, vp->translate[1]);
+ PUSH_DATAf(push, vp->translate[2]);
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(0)), 3);
+ PUSH_DATAf(push, vp->scale[0]);
+ PUSH_DATAf(push, vp->scale[1]);
+ PUSH_DATAf(push, vp->scale[2]);
+
+ /* now set the viewport rectangle to viewport dimensions for clipping */
+
+ x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0])));
+ y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1])));
+ w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x;
+ h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y;
+
+ zmin = vp->translate[2] - fabsf(vp->scale[2]);
+ zmax = vp->translate[2] + fabsf(vp->scale[2]);
+
+ nvc0->vport_int[0] = (w << 16) | x;
+ nvc0->vport_int[1] = (h << 16) | y;
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, nvc0->vport_int[0]);
+ PUSH_DATA (push, nvc0->vport_int[1]);
+ BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(0)), 2);
+ PUSH_DATAf(push, zmin);
+ PUSH_DATAf(push, zmax);
+}
+
+static INLINE void
+nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bo *bo = nvc0->screen->uniform_bo;
+
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9));
+ PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
+ PUSH_DATA (push, 256);
+ PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
+}
+
+static INLINE void
+nvc0_check_program_ucps(struct nvc0_context *nvc0,
+ struct nvc0_program *vp, uint8_t mask)
+{
+ const unsigned n = util_logbase2(mask) + 1;
+
+ if (vp->vp.num_ucps >= n)
+ return;
+ nvc0_program_destroy(nvc0, vp);
+
+ vp->vp.num_ucps = n;
+ if (likely(vp == nvc0->vertprog))
+ nvc0_vertprog_validate(nvc0);
+ else
+ if (likely(vp == nvc0->gmtyprog))
+ nvc0_vertprog_validate(nvc0);
+ else
+ nvc0_tevlprog_validate(nvc0);
+}
+
+static void
+nvc0_validate_clip(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *vp;
+ unsigned stage;
+ uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable;
+
+ if (nvc0->gmtyprog) {
+ stage = 3;
+ vp = nvc0->gmtyprog;
+ } else
+ if (nvc0->tevlprog) {
+ stage = 2;
+ vp = nvc0->tevlprog;
+ } else {
+ stage = 0;
+ vp = nvc0->vertprog;
+ }
+
+ if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
+ nvc0_check_program_ucps(nvc0, vp, clip_enable);
+
+ if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage)))
+ if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
+ nvc0_upload_uclip_planes(nvc0, stage);
+
+ clip_enable &= vp->vp.clip_enable;
+
+ if (nvc0->state.clip_enable != clip_enable) {
+ nvc0->state.clip_enable = clip_enable;
+ IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable);
+ }
+ if (nvc0->state.clip_mode != vp->vp.clip_mode) {
+ nvc0->state.clip_mode = vp->vp.clip_mode;
+ BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1);
+ PUSH_DATA (push, vp->vp.clip_mode);
+ }
+}
+
+static void
+nvc0_validate_blend(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ PUSH_SPACE(push, nvc0->blend->size);
+ PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size);
+}
+
+static void
+nvc0_validate_zsa(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ PUSH_SPACE(push, nvc0->zsa->size);
+ PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size);
+}
+
+static void
+nvc0_validate_rasterizer(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ PUSH_SPACE(push, nvc0->rast->size);
+ PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size);
+}
+
+static void
+nvc0_constbufs_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned s;
+
+ for (s = 0; s < 5; ++s) {
+ while (nvc0->constbuf_dirty[s]) {
+ int i = ffs(nvc0->constbuf_dirty[s]) - 1;
+ nvc0->constbuf_dirty[s] &= ~(1 << i);
+
+ if (nvc0->constbuf[s][i].user) {
+ struct nouveau_bo *bo = nvc0->screen->uniform_bo;
+ const unsigned base = s << 16;
+ const unsigned size = nvc0->constbuf[s][0].size;
+ assert(i == 0); /* we really only want OpenGL uniforms here */
+ assert(nvc0->constbuf[s][0].u.data);
+
+ if (nvc0->state.uniform_buffer_bound[s] < size) {
+ nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
+
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
+ PUSH_DATAh(push, bo->offset + base);
+ PUSH_DATA (push, bo->offset + base);
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
+ PUSH_DATA (push, (0 << 4) | 1);
+ }
+ nvc0_cb_push(&nvc0->base, bo, NOUVEAU_BO_VRAM,
+ base, nvc0->state.uniform_buffer_bound[s],
+ 0, (size + 3) / 4,
+ nvc0->constbuf[s][0].u.data);
+ } else {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->constbuf[s][i].u.buf);
+ if (res) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, nvc0->constbuf[s][i].size);
+ PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
+ PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
+ PUSH_DATA (push, (i << 4) | 1);
+
+ BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
+ PUSH_DATA (push, (i << 4) | 0);
+ }
+ if (i == 0)
+ nvc0->state.uniform_buffer_bound[s] = 0;
+ }
+ }
+ }
+}
+
+static void
+nvc0_validate_sample_mask(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ unsigned mask[4] =
+ {
+ nvc0->sample_mask & 0xffff,
+ nvc0->sample_mask & 0xffff,
+ nvc0->sample_mask & 0xffff,
+ nvc0->sample_mask & 0xffff
+ };
+
+ BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);
+ PUSH_DATA (push, mask[0]);
+ PUSH_DATA (push, mask[1]);
+ PUSH_DATA (push, mask[2]);
+ PUSH_DATA (push, mask[3]);
+ BEGIN_NVC0(push, NVC0_3D(SAMPLE_SHADING), 1);
+ PUSH_DATA (push, 0x01);
+}
+
+void
+nvc0_validate_global_residents(struct nvc0_context *nvc0,
+ struct nouveau_bufctx *bctx, int bin)
+{
+ unsigned i;
+
+ for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource *res = *util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, i);
+ if (res)
+ nvc0_add_resident(bctx, bin, nv04_resource(res), NOUVEAU_BO_RDWR);
+ }
+}
+
+static void
+nvc0_validate_derived_1(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ boolean rasterizer_discard;
+
+ if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {
+ rasterizer_discard = TRUE;
+ } else {
+ boolean zs = nvc0->zsa &&
+ (nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled);
+ rasterizer_discard = !zs &&
+ (!nvc0->fragprog || !nvc0->fragprog->hdr[18]);
+ }
+
+ if (rasterizer_discard != nvc0->state.rasterizer_discard) {
+ nvc0->state.rasterizer_discard = rasterizer_discard;
+ IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard);
+ }
+}
+
+static void
+nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
+{
+ struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx;
+ unsigned s;
+
+ if (ctx_from)
+ ctx_to->state = ctx_from->state;
+
+ ctx_to->dirty = ~0;
+
+ for (s = 0; s < 5; ++s) {
+ ctx_to->samplers_dirty[s] = ~0;
+ ctx_to->textures_dirty[s] = ~0;
+ }
+
+ if (!ctx_to->vertex)
+ ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
+ if (!ctx_to->idxbuf.buffer)
+ ctx_to->dirty &= ~NVC0_NEW_IDXBUF;
+
+ if (!ctx_to->vertprog)
+ ctx_to->dirty &= ~NVC0_NEW_VERTPROG;
+ if (!ctx_to->fragprog)
+ ctx_to->dirty &= ~NVC0_NEW_FRAGPROG;
+
+ if (!ctx_to->blend)
+ ctx_to->dirty &= ~NVC0_NEW_BLEND;
+ if (!ctx_to->rast)
+ ctx_to->dirty &= ~(NVC0_NEW_RASTERIZER | NVC0_NEW_SCISSOR);
+ if (!ctx_to->zsa)
+ ctx_to->dirty &= ~NVC0_NEW_ZSA;
+
+ ctx_to->screen->cur_ctx = ctx_to;
+}
+
+static struct state_validate {
+ void (*func)(struct nvc0_context *);
+ uint32_t states;
+} validate_list[] = {
+ { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER },
+ { nvc0_validate_blend, NVC0_NEW_BLEND },
+ { nvc0_validate_zsa, NVC0_NEW_ZSA },
+ { nvc0_validate_sample_mask, NVC0_NEW_SAMPLE_MASK },
+ { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER },
+ { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR },
+ { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF },
+ { nvc0_validate_stipple, NVC0_NEW_STIPPLE },
+ { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER },
+ { nvc0_validate_viewport, NVC0_NEW_VIEWPORT },
+ { nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
+ { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG },
+ { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
+ { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
+ { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
+ { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
+ NVC0_NEW_RASTERIZER },
+ { nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
+ NVC0_NEW_VERTPROG |
+ NVC0_NEW_TEVLPROG |
+ NVC0_NEW_GMTYPROG },
+ { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
+ { nvc0_validate_textures, NVC0_NEW_TEXTURES },
+ { nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
+ { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
+ { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
+ { nvc0_validate_surfaces, NVC0_NEW_SURFACES },
+ { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
+ { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
+
+boolean
+nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask, unsigned words)
+{
+ uint32_t state_mask;
+ int ret;
+ unsigned i;
+
+ if (nvc0->screen->cur_ctx != nvc0)
+ nvc0_switch_pipe_context(nvc0);
+
+ state_mask = nvc0->dirty & mask;
+
+ if (state_mask) {
+ for (i = 0; i < validate_list_len; ++i) {
+ struct state_validate *validate = &validate_list[i];
+
+ if (state_mask & validate->states)
+ validate->func(nvc0);
+ }
+ nvc0->dirty &= ~state_mask;
+
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, FALSE);
+ }
+
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_3d);
+ ret = nouveau_pushbuf_validate(nvc0->base.pushbuf);
+
+ if (unlikely(nvc0->state.flushed)) {
+ nvc0->state.flushed = FALSE;
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, TRUE);
+ }
+ return !ret;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
new file mode 100644
index 0000000..80c3342
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
@@ -0,0 +1,77 @@
+
+#ifndef __NVC0_STATEOBJ_H__
+#define __NVC0_STATEOBJ_H__
+
+#include "pipe/p_state.h"
+
+#define SB_BEGIN_3D(so, m, s) \
+ (so)->state[(so)->size++] = NVC0_FIFO_PKHDR_SQ(NVC0_3D(m), s)
+
+#define SB_IMMED_3D(so, m, d) \
+ (so)->state[(so)->size++] = NVC0_FIFO_PKHDR_IL(NVC0_3D(m), d)
+
+#define SB_DATA(so, u) (so)->state[(so)->size++] = (u)
+
+#include "nv50/nv50_stateobj_tex.h"
+
+struct nvc0_blend_stateobj {
+ struct pipe_blend_state pipe;
+ int size;
+ uint32_t state[72];
+};
+
+struct nvc0_rasterizer_stateobj {
+ struct pipe_rasterizer_state pipe;
+ int size;
+ uint32_t state[43];
+};
+
+struct nvc0_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state pipe;
+ int size;
+ uint32_t state[26];
+};
+
+struct nvc0_constbuf {
+ union {
+ struct pipe_resource *buf;
+ const void *data;
+ } u;
+ uint32_t size;
+ uint32_t offset;
+ boolean user; /* should only be TRUE if u.data is valid and non-NULL */
+};
+
+struct nvc0_vertex_element {
+ struct pipe_vertex_element pipe;
+ uint32_t state;
+ uint32_t state_alt; /* buffer 0 and with source offset (for translate) */
+};
+
+struct nvc0_vertex_stateobj {
+ uint32_t min_instance_div[PIPE_MAX_ATTRIBS];
+ uint16_t vb_access_size[PIPE_MAX_ATTRIBS];
+ struct translate *translate;
+ unsigned num_elements;
+ uint32_t instance_elts;
+ uint32_t instance_bufs;
+ boolean shared_slots;
+ boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
+ unsigned size; /* size of vertex in bytes (when packed) */
+ struct nvc0_vertex_element element[0];
+};
+
+struct nvc0_so_target {
+ struct pipe_stream_output_target pipe;
+ struct pipe_query *pq;
+ unsigned stride;
+ boolean clean;
+};
+
+static INLINE struct nvc0_so_target *
+nvc0_so_target(struct pipe_stream_output_target *ptarg)
+{
+ return (struct nvc0_so_target *)ptarg;
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
new file mode 100644
index 0000000..5070df8
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -0,0 +1,1265 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include "pipe/p_defines.h"
+
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+#include "util/u_format.h"
+#include "util/u_surface.h"
+
+#include "os/os_thread.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+
+#include "nv50/nv50_defs.xml.h"
+#include "nv50/nv50_texture.xml.h"
+
+/* these are used in nv50_blit.h */
+#define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
+#define NV50_ENG2D_NOCONVERT_FORMATS 0x009cc02000000000ULL
+#define NV50_ENG2D_LUMINANCE_FORMATS 0x001cc02000000000ULL
+#define NV50_ENG2D_INTENSITY_FORMATS 0x0080000000000000ULL
+#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000638000ULL
+
+#define NOUVEAU_DRIVER 0xc0
+#include "nv50/nv50_blit.h"
+
+static INLINE uint8_t
+nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+{
+ uint8_t id = nvc0_format_table[format].rt;
+
+ /* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */
+ if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal)
+ return NV50_SURFACE_FORMAT_A8_UNORM;
+
+ /* Hardware values for color formats range from 0xc0 to 0xff,
+ * but the 2D engine doesn't support all of them.
+ */
+ if (nv50_2d_format_supported(format))
+ return id;
+ assert(dst_src_equal);
+
+ switch (util_format_get_blocksize(format)) {
+ case 1:
+ return NV50_SURFACE_FORMAT_R8_UNORM;
+ case 2:
+ return NV50_SURFACE_FORMAT_R16_UNORM;
+ case 4:
+ return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+ case 8:
+ return NV50_SURFACE_FORMAT_RGBA16_UNORM;
+ case 16:
+ return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static int
+nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst,
+ struct nv50_miptree *mt, unsigned level, unsigned layer,
+ enum pipe_format pformat, boolean dst_src_pformat_equal)
+{
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t width, height, depth;
+ uint32_t format;
+ uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT;
+ uint32_t offset = mt->level[level].offset;
+
+ format = nvc0_2d_format(pformat, dst, dst_src_pformat_equal);
+ if (!format) {
+ NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+ util_format_name(pformat));
+ return 1;
+ }
+
+ width = u_minify(mt->base.base.width0, level) << mt->ms_x;
+ height = u_minify(mt->base.base.height0, level) << mt->ms_y;
+ depth = u_minify(mt->base.base.depth0, level);
+
+ /* layer has to be < depth, and depth > tile depth / 2 */
+
+ if (!mt->layout_3d) {
+ offset += mt->layer_stride * layer;
+ layer = 0;
+ depth = 1;
+ } else
+ if (!dst) {
+ offset += nvc0_mt_zslice_offset(mt, level, layer);
+ layer = 0;
+ }
+
+ if (!nouveau_bo_memtype(bo)) {
+ BEGIN_NVC0(push, SUBC_2D(mthd), 2);
+ PUSH_DATA (push, format);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, SUBC_2D(mthd + 0x14), 5);
+ PUSH_DATA (push, mt->level[level].pitch);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ } else {
+ BEGIN_NVC0(push, SUBC_2D(mthd), 5);
+ PUSH_DATA (push, format);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, mt->level[level].tile_mode);
+ PUSH_DATA (push, depth);
+ PUSH_DATA (push, layer);
+ BEGIN_NVC0(push, SUBC_2D(mthd + 0x18), 4);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ }
+
+#if 0
+ if (dst) {
+ BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ }
+#endif
+ return 0;
+}
+
+static int
+nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push,
+ struct nv50_miptree *dst, unsigned dst_level,
+ unsigned dx, unsigned dy, unsigned dz,
+ struct nv50_miptree *src, unsigned src_level,
+ unsigned sx, unsigned sy, unsigned sz,
+ unsigned w, unsigned h)
+{
+ const enum pipe_format dfmt = dst->base.base.format;
+ const enum pipe_format sfmt = src->base.base.format;
+ int ret;
+ boolean eqfmt = dfmt == sfmt;
+
+ if (!PUSH_SPACE(push, 2 * 16 + 32))
+ return PIPE_ERROR;
+
+ ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt);
+ if (ret)
+ return ret;
+
+ ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt);
+ if (ret)
+ return ret;
+
+ IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), 0x00);
+ BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4);
+ PUSH_DATA (push, dx << dst->ms_x);
+ PUSH_DATA (push, dy << dst->ms_y);
+ PUSH_DATA (push, w << dst->ms_x);
+ PUSH_DATA (push, h << dst->ms_y);
+ BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, sx << src->ms_x);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, sy << src->ms_x);
+
+ return 0;
+}
+
+static void
+nvc0_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ int ret;
+ boolean m2mf;
+ unsigned dst_layer = dstz, src_layer = src_box->z;
+
+ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+ nouveau_copy_buffer(&nvc0->base,
+ nv04_resource(dst), dstx,
+ nv04_resource(src), src_box->x, src_box->width);
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, buf_copy_bytes, src_box->width);
+ return;
+ }
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_copy_count, 1);
+
+ /* 0 and 1 are equal, only supporting 0/1, 2, 4 and 8 */
+ assert((src->nr_samples | 1) == (dst->nr_samples | 1));
+
+ m2mf = (src->format == dst->format) ||
+ (util_format_get_blocksizebits(src->format) ==
+ util_format_get_blocksizebits(dst->format));
+
+ nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+
+ if (m2mf) {
+ struct nv50_m2mf_rect drect, srect;
+ unsigned i;
+ unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
+ unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
+
+ nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
+ nv50_m2mf_rect_setup(&srect, src, src_level,
+ src_box->x, src_box->y, src_box->z);
+
+ for (i = 0; i < src_box->depth; ++i) {
+ nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny);
+
+ if (nv50_miptree(dst)->layout_3d)
+ drect.z++;
+ else
+ drect.base += nv50_miptree(dst)->layer_stride;
+
+ if (nv50_miptree(src)->layout_3d)
+ srect.z++;
+ else
+ srect.base += nv50_miptree(src)->layer_stride;
+ }
+ return;
+ }
+
+ assert(nv50_2d_dst_format_faithful(dst->format));
+ assert(nv50_2d_src_format_faithful(src->format));
+
+ BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(src), RD);
+ BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(dst), WR);
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx);
+ nouveau_pushbuf_validate(nvc0->base.pushbuf);
+
+ for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) {
+ ret = nvc0_2d_texture_do_copy(nvc0->base.pushbuf,
+ nv50_miptree(dst), dst_level,
+ dstx, dsty, dst_layer,
+ nv50_miptree(src), src_level,
+ src_box->x, src_box->y, src_layer,
+ src_box->width, src_box->height);
+ if (ret)
+ break;
+ }
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const union pipe_color_union *color,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv50_surface *sf = nv50_surface(dst);
+ struct nv04_resource *res = nv04_resource(sf->base.texture);
+ unsigned z;
+
+ if (!PUSH_SPACE(push, 32 + sf->depth))
+ return;
+
+ PUSH_REFN (push, res->bo, res->domain | NOUVEAU_BO_WR);
+
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color->f[0]);
+ PUSH_DATAf(push, color->f[1]);
+ PUSH_DATAf(push, color->f[2]);
+ PUSH_DATAf(push, color->f[3]);
+
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, ( width << 16) | dstx);
+ PUSH_DATA (push, (height << 16) | dsty);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
+ PUSH_DATAh(push, res->address + sf->offset);
+ PUSH_DATA (push, res->address + sf->offset);
+ if (likely(nouveau_bo_memtype(res->bo))) {
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+
+ PUSH_DATA(push, sf->width);
+ PUSH_DATA(push, sf->height);
+ PUSH_DATA(push, nvc0_format_table[dst->format].rt);
+ PUSH_DATA(push, (mt->layout_3d << 16) |
+ mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA(push, dst->u.tex.first_layer + sf->depth);
+ PUSH_DATA(push, mt->layer_stride >> 2);
+ PUSH_DATA(push, dst->u.tex.first_layer);
+ } else {
+ if (res->base.target == PIPE_BUFFER) {
+ PUSH_DATA(push, 262144);
+ PUSH_DATA(push, 1);
+ } else {
+ PUSH_DATA(push, nv50_miptree(&res->base)->level[0].pitch);
+ PUSH_DATA(push, sf->height);
+ }
+ PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
+ PUSH_DATA(push, 1 << 12);
+ PUSH_DATA(push, 1);
+ PUSH_DATA(push, 0);
+ PUSH_DATA(push, 0);
+
+ IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
+
+ /* tiled textures don't have to be fenced, they're not mapped directly */
+ nvc0_resource_fence(res, NOUVEAU_BO_WR);
+ }
+
+ BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
+ for (z = 0; z < sf->depth; ++z) {
+ PUSH_DATA (push, 0x3c |
+ (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
+ }
+
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
+nvc0_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+ struct nv50_surface *sf = nv50_surface(dst);
+ uint32_t mode = 0;
+ int unk = mt->base.base.target == PIPE_TEXTURE_2D;
+ unsigned z;
+
+ if (!PUSH_SPACE(push, 32 + sf->depth))
+ return;
+
+ PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR);
+
+ if (clear_flags & PIPE_CLEAR_DEPTH) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1);
+ PUSH_DATAf(push, depth);
+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (clear_flags & PIPE_CLEAR_STENCIL) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1);
+ PUSH_DATA (push, stencil & 0xff);
+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, ( width << 16) | dstx);
+ PUSH_DATA (push, (height << 16) | dsty);
+
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
+ PUSH_DATAh(push, mt->base.address + sf->offset);
+ PUSH_DATA (push, mt->base.address + sf->offset);
+ PUSH_DATA (push, nvc0_format_table[dst->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, mt->layer_stride >> 2);
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
+ BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
+ PUSH_DATA (push, dst->u.tex.first_layer);
+
+ BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
+ for (z = 0; z < sf->depth; ++z) {
+ PUSH_DATA (push, mode |
+ (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
+ }
+
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+void
+nvc0_clear(struct pipe_context *pipe, unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ unsigned i;
+ uint32_t mode = 0;
+
+ /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
+ if (!nvc0_state_validate(nvc0, NVC0_NEW_FRAMEBUFFER, 9 + (fb->nr_cbufs * 2)))
+ return;
+
+ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color->f[0]);
+ PUSH_DATAf(push, color->f[1]);
+ PUSH_DATAf(push, color->f[2]);
+ PUSH_DATAf(push, color->f[3]);
+ mode =
+ NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G |
+ NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A;
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1);
+ PUSH_DATA (push, fui(depth));
+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1);
+ PUSH_DATA (push, stencil & 0xff);
+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, mode);
+
+ for (i = 1; i < fb->nr_cbufs; i++) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, (i << 6) | 0x3c);
+ }
+}
+
+
+/* =============================== BLIT CODE ===================================
+ */
+
+struct nvc0_blitter
+{
+ struct nvc0_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES];
+ struct nvc0_program vp;
+
+ struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */
+
+ pipe_mutex mutex;
+
+ struct nvc0_screen *screen;
+};
+
+struct nvc0_blitctx
+{
+ struct nvc0_context *nvc0;
+ struct nvc0_program *fp;
+ uint8_t mode;
+ uint16_t color_mask;
+ uint8_t filter;
+ enum pipe_texture_target target;
+ struct {
+ struct pipe_framebuffer_state fb;
+ struct nvc0_rasterizer_stateobj *rast;
+ struct nvc0_program *vp;
+ struct nvc0_program *tcp;
+ struct nvc0_program *tep;
+ struct nvc0_program *gp;
+ struct nvc0_program *fp;
+ unsigned num_textures[5];
+ unsigned num_samplers[5];
+ struct pipe_sampler_view *texture[2];
+ struct nv50_tsc_entry *sampler[2];
+ uint32_t dirty;
+ } saved;
+ struct nvc0_rasterizer_stateobj rast;
+};
+
+static void
+nvc0_blitter_make_vp(struct nvc0_blitter *blit)
+{
+ static const uint32_t code_nvc0[] =
+ {
+ 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
+ 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
+ 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
+ 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
+ 0x00001de7, 0x80000000, /* exit */
+ };
+ static const uint32_t code_nve4[] =
+ {
+ 0x00000007, 0x20000000, /* sched */
+ 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
+ 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
+ 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
+ 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
+ 0x00001de7, 0x80000000, /* exit */
+ };
+
+ blit->vp.type = PIPE_SHADER_VERTEX;
+ blit->vp.translated = TRUE;
+ if (blit->screen->base.class_3d >= NVE4_3D_CLASS) {
+ blit->vp.code = (uint32_t *)code_nve4; /* const_cast */
+ blit->vp.code_size = sizeof(code_nve4);
+ } else {
+ blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */
+ blit->vp.code_size = sizeof(code_nvc0);
+ }
+ blit->vp.num_gprs = 6;
+ blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
+
+ blit->vp.hdr[0] = 0x00020461; /* vertprog magic */
+ blit->vp.hdr[4] = 0x000ff000; /* no outputs read */
+ blit->vp.hdr[6] = 0x00000073; /* a[0x80].xy, a[0x90].xyz */
+ blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */
+}
+
+static void
+nvc0_blitter_make_sampler(struct nvc0_blitter *blit)
+{
+ /* clamp to edge, min/max lod = 0, nearest filtering */
+
+ blit->sampler[0].id = -1;
+
+ blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT);
+ blit->sampler[0].tsc[1] =
+ NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE;
+
+ /* clamp to edge, min/max lod = 0, bilinear filtering */
+
+ blit->sampler[1].id = -1;
+
+ blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0];
+ blit->sampler[1].tsc[1] =
+ NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE;
+}
+
+static void
+nvc0_blit_select_fp(struct nvc0_blitctx *ctx, const struct pipe_blit_info *info)
+{
+ struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter;
+
+ const enum pipe_texture_target ptarg =
+ nv50_blit_reinterpret_pipe_texture_target(info->src.resource->target);
+
+ const unsigned targ = nv50_blit_texture_type(ptarg);
+ const unsigned mode = ctx->mode;
+
+ if (!blitter->fp[targ][mode]) {
+ pipe_mutex_lock(blitter->mutex);
+ if (!blitter->fp[targ][mode])
+ blitter->fp[targ][mode] =
+ nv50_blitter_make_fp(&ctx->nvc0->base.pipe, mode, ptarg);
+ pipe_mutex_unlock(blitter->mutex);
+ }
+ ctx->fp = blitter->fp[targ][mode];
+}
+
+static void
+nvc0_blit_set_dst(struct nvc0_blitctx *ctx,
+ struct pipe_resource *res, unsigned level, unsigned layer,
+ enum pipe_format format)
+{
+ struct nvc0_context *nvc0 = ctx->nvc0;
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct pipe_surface templ;
+
+ if (util_format_is_depth_or_stencil(format))
+ templ.format = nv50_blit_zeta_to_colour_format(format);
+ else
+ templ.format = format;
+
+ templ.u.tex.level = level;
+ templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+
+ if (layer == -1) {
+ templ.u.tex.first_layer = 0;
+ templ.u.tex.last_layer =
+ (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1;
+ }
+
+ nvc0->framebuffer.cbufs[0] = nvc0_miptree_surface_new(pipe, res, &templ);
+ nvc0->framebuffer.nr_cbufs = 1;
+ nvc0->framebuffer.zsbuf = NULL;
+ nvc0->framebuffer.width = nvc0->framebuffer.cbufs[0]->width;
+ nvc0->framebuffer.height = nvc0->framebuffer.cbufs[0]->height;
+}
+
+static void
+nvc0_blit_set_src(struct nvc0_blitctx *ctx,
+ struct pipe_resource *res, unsigned level, unsigned layer,
+ enum pipe_format format, const uint8_t filter)
+{
+ struct nvc0_context *nvc0 = ctx->nvc0;
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct pipe_sampler_view templ;
+ uint32_t flags;
+ unsigned s;
+ enum pipe_texture_target target;
+
+ target = nv50_blit_reinterpret_pipe_texture_target(res->target);
+
+ templ.format = format;
+ templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+ templ.u.tex.first_level = templ.u.tex.last_level = level;
+ templ.swizzle_r = PIPE_SWIZZLE_RED;
+ templ.swizzle_g = PIPE_SWIZZLE_GREEN;
+ templ.swizzle_b = PIPE_SWIZZLE_BLUE;
+ templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
+
+ if (layer == -1) {
+ templ.u.tex.first_layer = 0;
+ templ.u.tex.last_layer =
+ (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1;
+ }
+
+ flags = res->last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS;
+ flags |= NV50_TEXVIEW_ACCESS_RESOLVE;
+ if (filter && res->nr_samples == 8)
+ flags |= NV50_TEXVIEW_FILTER_MSAA8;
+
+ nvc0->textures[4][0] = nvc0_create_texture_view(
+ pipe, res, &templ, flags, target);
+ nvc0->textures[4][1] = NULL;
+
+ for (s = 0; s <= 3; ++s)
+ nvc0->num_textures[s] = 0;
+ nvc0->num_textures[4] = 1;
+
+ templ.format = nv50_zs_to_s_format(format);
+ if (templ.format != format) {
+ nvc0->textures[4][1] = nvc0_create_texture_view(
+ pipe, res, &templ, flags, target);
+ nvc0->num_textures[4] = 2;
+ }
+}
+
+static void
+nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)
+{
+ struct nouveau_pushbuf *push = blit->nvc0->base.pushbuf;
+
+ /* TODO: maybe make this a MACRO (if we need more logic) ? */
+
+ if (blit->nvc0->cond_query)
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
+ /* blend state */
+ BEGIN_NVC0(push, NVC0_3D(COLOR_MASK(0)), 1);
+ PUSH_DATA (push, blit->color_mask);
+ IMMED_NVC0(push, NVC0_3D(BLEND_ENABLE(0)), 0);
+ IMMED_NVC0(push, NVC0_3D(LOGIC_OP_ENABLE), 0);
+
+ /* rasterizer state */
+ IMMED_NVC0(push, NVC0_3D(FRAG_COLOR_CLAMP_EN), 0);
+ IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 0);
+ BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_POLYGON_MODE_FRONT), 1);
+ PUSH_DATA (push, NVC0_3D_MACRO_POLYGON_MODE_FRONT_FILL);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_POLYGON_MODE_BACK), 1);
+ PUSH_DATA (push, NVC0_3D_MACRO_POLYGON_MODE_BACK_FILL);
+ IMMED_NVC0(push, NVC0_3D(POLYGON_SMOOTH_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(POLYGON_OFFSET_FILL_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(POLYGON_STIPPLE_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(CULL_FACE_ENABLE), 0);
+
+ /* zsa state */
+ IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0);
+
+ /* disable transform feedback */
+ IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), 0);
+}
+
+static void
+nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx)
+{
+ struct nvc0_context *nvc0 = ctx->nvc0;
+ struct nvc0_blitter *blitter = nvc0->screen->blitter;
+ int s;
+
+ ctx->saved.fb.width = nvc0->framebuffer.width;
+ ctx->saved.fb.height = nvc0->framebuffer.height;
+ ctx->saved.fb.nr_cbufs = nvc0->framebuffer.nr_cbufs;
+ ctx->saved.fb.cbufs[0] = nvc0->framebuffer.cbufs[0];
+ ctx->saved.fb.zsbuf = nvc0->framebuffer.zsbuf;
+
+ ctx->saved.rast = nvc0->rast;
+
+ ctx->saved.vp = nvc0->vertprog;
+ ctx->saved.tcp = nvc0->tctlprog;
+ ctx->saved.tep = nvc0->tevlprog;
+ ctx->saved.gp = nvc0->gmtyprog;
+ ctx->saved.fp = nvc0->fragprog;
+
+ nvc0->rast = &ctx->rast;
+
+ nvc0->vertprog = &blitter->vp;
+ nvc0->tctlprog = NULL;
+ nvc0->tevlprog = NULL;
+ nvc0->gmtyprog = NULL;
+ nvc0->fragprog = ctx->fp;
+
+ for (s = 0; s <= 4; ++s) {
+ ctx->saved.num_textures[s] = nvc0->num_textures[s];
+ ctx->saved.num_samplers[s] = nvc0->num_samplers[s];
+ nvc0->textures_dirty[s] = (1 << nvc0->num_textures[s]) - 1;
+ nvc0->samplers_dirty[s] = (1 << nvc0->num_samplers[s]) - 1;
+ }
+ ctx->saved.texture[0] = nvc0->textures[4][0];
+ ctx->saved.texture[1] = nvc0->textures[4][1];
+ ctx->saved.sampler[0] = nvc0->samplers[4][0];
+ ctx->saved.sampler[1] = nvc0->samplers[4][1];
+
+ nvc0->samplers[4][0] = &blitter->sampler[ctx->filter];
+ nvc0->samplers[4][1] = &blitter->sampler[ctx->filter];
+
+ for (s = 0; s <= 3; ++s)
+ nvc0->num_samplers[s] = 0;
+ nvc0->num_samplers[4] = 2;
+
+ ctx->saved.dirty = nvc0->dirty;
+
+ nvc0->textures_dirty[4] |= 3;
+ nvc0->samplers_dirty[4] |= 3;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
+
+ nvc0->dirty = NVC0_NEW_FRAMEBUFFER |
+ NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
+ NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
+ NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS;
+}
+
+static void
+nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
+{
+ struct nvc0_context *nvc0 = blit->nvc0;
+ int s;
+
+ pipe_surface_reference(&nvc0->framebuffer.cbufs[0], NULL);
+
+ nvc0->framebuffer.width = blit->saved.fb.width;
+ nvc0->framebuffer.height = blit->saved.fb.height;
+ nvc0->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs;
+ nvc0->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0];
+ nvc0->framebuffer.zsbuf = blit->saved.fb.zsbuf;
+
+ nvc0->rast = blit->saved.rast;
+
+ nvc0->vertprog = blit->saved.vp;
+ nvc0->tctlprog = blit->saved.tcp;
+ nvc0->tevlprog = blit->saved.tep;
+ nvc0->gmtyprog = blit->saved.gp;
+ nvc0->fragprog = blit->saved.fp;
+
+ pipe_sampler_view_reference(&nvc0->textures[4][0], NULL);
+ pipe_sampler_view_reference(&nvc0->textures[4][1], NULL);
+
+ for (s = 0; s <= 4; ++s) {
+ nvc0->num_textures[s] = blit->saved.num_textures[s];
+ nvc0->num_samplers[s] = blit->saved.num_samplers[s];
+ nvc0->textures_dirty[s] = (1 << nvc0->num_textures[s]) - 1;
+ nvc0->samplers_dirty[s] = (1 << nvc0->num_samplers[s]) - 1;
+ }
+ nvc0->textures[4][0] = blit->saved.texture[0];
+ nvc0->textures[4][1] = blit->saved.texture[1];
+ nvc0->samplers[4][0] = blit->saved.sampler[0];
+ nvc0->samplers[4][1] = blit->saved.sampler[1];
+
+ nvc0->textures_dirty[4] |= 3;
+ nvc0->samplers_dirty[4] |= 3;
+
+ if (nvc0->cond_query)
+ nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
+ nvc0->cond_cond, nvc0->cond_mode);
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
+
+ nvc0->dirty = blit->saved.dirty |
+ (NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK |
+ NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND |
+ NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS |
+ NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
+ NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
+ NVC0_NEW_TFB_TARGETS);
+}
+
+static void
+nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+{
+ struct nvc0_blitctx *blit = nvc0->blit;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_resource *src = info->src.resource;
+ struct pipe_resource *dst = info->dst.resource;
+ int32_t minx, maxx, miny, maxy;
+ int32_t i;
+ float x0, x1, y0, y1, z;
+ float dz;
+ float x_range, y_range;
+
+ blit->mode = nv50_blit_select_mode(info);
+ blit->color_mask = nv50_blit_derive_color_mask(info);
+ blit->filter = nv50_blit_get_filter(info);
+
+ nvc0_blit_select_fp(blit, info);
+ nvc0_blitctx_pre_blit(blit);
+
+ nvc0_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format);
+ nvc0_blit_set_src(blit, src, info->src.level, -1, info->src.format,
+ blit->filter);
+
+ nvc0_blitctx_prepare_state(blit);
+
+ nvc0_state_validate(nvc0, ~0, 48);
+
+ x_range = (float)info->src.box.width / (float)info->dst.box.width;
+ y_range = (float)info->src.box.height / (float)info->dst.box.height;
+
+ x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x;
+ y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y;
+
+ x1 = x0 + 16384.0f * x_range;
+ y1 = y0 + 16384.0f * y_range;
+
+ x0 *= (float)(1 << nv50_miptree(src)->ms_x);
+ x1 *= (float)(1 << nv50_miptree(src)->ms_x);
+ y0 *= (float)(1 << nv50_miptree(src)->ms_y);
+ y1 *= (float)(1 << nv50_miptree(src)->ms_y);
+
+ if (src->last_level > 0) {
+ /* If there are mip maps, GPU always assumes normalized coordinates. */
+ const unsigned l = info->src.level;
+ const float fh = u_minify(src->width0 << nv50_miptree(src)->ms_x, l);
+ const float fv = u_minify(src->height0 << nv50_miptree(src)->ms_y, l);
+ x0 /= fh;
+ x1 /= fh;
+ y0 /= fv;
+ y1 /= fv;
+ }
+
+ dz = (float)info->src.box.depth / (float)info->dst.box.depth;
+ z = (float)info->src.box.z;
+ if (nv50_miptree(src)->layout_3d)
+ z += 0.5f * dz;
+
+ IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
+ IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, nvc0->framebuffer.width << 16);
+ PUSH_DATA (push, nvc0->framebuffer.height << 16);
+
+ /* Draw a large triangle in screen coordinates covering the whole
+ * render target, with scissors defining the destination region.
+ * The vertex is supplied with non-normalized texture coordinates
+ * arranged in a way to yield the desired offset and scale.
+ */
+
+ minx = info->dst.box.x;
+ maxx = info->dst.box.x + info->dst.box.width;
+ miny = info->dst.box.y;
+ maxy = info->dst.box.y + info->dst.box.height;
+ if (info->scissor_enable) {
+ minx = MAX2(minx, info->scissor.minx);
+ maxx = MIN2(maxx, info->scissor.maxx);
+ miny = MAX2(miny, info->scissor.miny);
+ maxy = MIN2(maxy, info->scissor.maxy);
+ }
+ BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2);
+ PUSH_DATA (push, (maxx << 16) | minx);
+ PUSH_DATA (push, (maxy << 16) | miny);
+
+ for (i = 0; i < info->dst.box.depth; ++i, z += dz) {
+ if (info->dst.box.z + i) {
+ BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
+ PUSH_DATA (push, info->dst.box.z + i);
+ }
+
+ IMMED_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL),
+ NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
+
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4);
+ PUSH_DATA (push, 0x74301);
+ PUSH_DATAf(push, x0);
+ PUSH_DATAf(push, y0);
+ PUSH_DATAf(push, z);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3);
+ PUSH_DATA (push, 0x74200);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4);
+ PUSH_DATA (push, 0x74301);
+ PUSH_DATAf(push, x1);
+ PUSH_DATAf(push, y0);
+ PUSH_DATAf(push, z);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3);
+ PUSH_DATA (push, 0x74200);
+ PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_x);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4);
+ PUSH_DATA (push, 0x74301);
+ PUSH_DATAf(push, x0);
+ PUSH_DATAf(push, y1);
+ PUSH_DATAf(push, z);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3);
+ PUSH_DATA (push, 0x74200);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_y);
+
+ IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
+ }
+ if (info->dst.box.z + info->dst.box.depth - 1)
+ IMMED_NVC0(push, NVC0_3D(LAYER), 0);
+
+ nvc0_blitctx_post_blit(blit);
+
+ /* restore viewport */
+
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, nvc0->vport_int[0]);
+ PUSH_DATA (push, nvc0->vport_int[1]);
+ IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
+}
+
+static void
+nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv50_miptree *dst = nv50_miptree(info->dst.resource);
+ struct nv50_miptree *src = nv50_miptree(info->src.resource);
+ const int32_t srcx_adj = info->src.box.width < 0 ? -1 : 0;
+ const int32_t srcy_adj = info->src.box.height < 0 ? -1 : 0;
+ const int dz = info->dst.box.z;
+ const int sz = info->src.box.z;
+ uint32_t dstw, dsth;
+ int32_t dstx, dsty;
+ int64_t srcx, srcy;
+ int64_t du_dx, dv_dy;
+ int i;
+ uint32_t mode;
+ uint32_t mask = nv50_blit_eng2d_get_mask(info);
+ boolean b;
+
+ mode = nv50_blit_get_filter(info) ?
+ NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR :
+ NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE;
+ mode |= (src->base.base.nr_samples > dst->base.base.nr_samples) ?
+ NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER : NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER;
+
+ du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width;
+ dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height;
+
+ b = info->dst.format == info->src.format;
+ nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b);
+ nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b);
+
+ if (info->scissor_enable) {
+ BEGIN_NVC0(push, NVC0_2D(CLIP_X), 5);
+ PUSH_DATA (push, info->scissor.minx << dst->ms_x);
+ PUSH_DATA (push, info->scissor.miny << dst->ms_y);
+ PUSH_DATA (push, (info->scissor.maxx - info->scissor.minx) << dst->ms_x);
+ PUSH_DATA (push, (info->scissor.maxy - info->scissor.miny) << dst->ms_y);
+ PUSH_DATA (push, 1); /* enable */
+ }
+
+ if (mask != 0xffffffff) {
+ IMMED_NVC0(push, NVC0_2D(ROP), 0xca); /* DPSDxax */
+ IMMED_NVC0(push, NVC0_2D(PATTERN_COLOR_FORMAT),
+ NVC0_2D_PATTERN_COLOR_FORMAT_32BPP);
+ BEGIN_NVC0(push, NVC0_2D(PATTERN_COLOR(0)), 4);
+ PUSH_DATA (push, 0x00000000);
+ PUSH_DATA (push, mask);
+ PUSH_DATA (push, 0xffffffff);
+ PUSH_DATA (push, 0xffffffff);
+ IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_ROP);
+ } else
+ if (info->src.format != info->dst.format) {
+ if (info->src.format == PIPE_FORMAT_R8_UNORM ||
+ info->src.format == PIPE_FORMAT_R8_SNORM ||
+ info->src.format == PIPE_FORMAT_R16_UNORM ||
+ info->src.format == PIPE_FORMAT_R16_SNORM ||
+ info->src.format == PIPE_FORMAT_R16_FLOAT ||
+ info->src.format == PIPE_FORMAT_R32_FLOAT) {
+ mask = 0xffff0000; /* also makes condition for OPERATION reset true */
+ BEGIN_NVC0(push, NVC0_2D(BETA4), 2);
+ PUSH_DATA (push, mask);
+ PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT);
+ } else
+ if (info->src.format == PIPE_FORMAT_A8_UNORM) {
+ mask = 0xff000000;
+ BEGIN_NVC0(push, NVC0_2D(BETA4), 2);
+ PUSH_DATA (push, mask);
+ PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT);
+ }
+ }
+
+ if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) {
+ /* ms_x is always >= ms_y */
+ du_dx <<= src->ms_x - dst->ms_x;
+ dv_dy <<= src->ms_y - dst->ms_y;
+ } else {
+ du_dx >>= dst->ms_x - src->ms_x;
+ dv_dy >>= dst->ms_y - src->ms_y;
+ }
+
+ srcx = (int64_t)(info->src.box.x + srcx_adj) << (src->ms_x + 32);
+ srcy = (int64_t)(info->src.box.y + srcy_adj) << (src->ms_y + 32);
+
+ if (src->base.base.nr_samples > dst->base.base.nr_samples) {
+ /* center src coorinates for proper MS resolve filtering */
+ srcx += (int64_t)(src->ms_x + 0) << 32;
+ srcy += (int64_t)(src->ms_y + 1) << 31;
+ }
+
+ dstx = info->dst.box.x << dst->ms_x;
+ dsty = info->dst.box.y << dst->ms_y;
+
+ dstw = info->dst.box.width << dst->ms_x;
+ dsth = info->dst.box.height << dst->ms_y;
+
+ if (dstx < 0) {
+ dstw += dstx;
+ srcx -= du_dx * dstx;
+ dstx = 0;
+ }
+ if (dsty < 0) {
+ dsth += dsty;
+ srcy -= dv_dy * dsty;
+ dsty = 0;
+ }
+
+ IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), mode);
+ BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4);
+ PUSH_DATA (push, dstx);
+ PUSH_DATA (push, dsty);
+ PUSH_DATA (push, dstw);
+ PUSH_DATA (push, dsth);
+ BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4);
+ PUSH_DATA (push, du_dx);
+ PUSH_DATA (push, du_dx >> 32);
+ PUSH_DATA (push, dv_dy);
+ PUSH_DATA (push, dv_dy >> 32);
+
+ BCTX_REFN(nvc0->bufctx, 2D, &dst->base, WR);
+ BCTX_REFN(nvc0->bufctx, 2D, &src->base, RD);
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx);
+ if (nouveau_pushbuf_validate(nvc0->base.pushbuf))
+ return;
+
+ for (i = 0; i < info->dst.box.depth; ++i) {
+ if (i > 0) {
+ /* no scaling in z-direction possible for eng2d blits */
+ if (dst->layout_3d) {
+ BEGIN_NVC0(push, NVC0_2D(DST_LAYER), 1);
+ PUSH_DATA (push, info->dst.box.z + i);
+ } else {
+ const unsigned z = info->dst.box.z + i;
+ BEGIN_NVC0(push, NVC0_2D(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, dst->base.address + z * dst->layer_stride);
+ PUSH_DATA (push, dst->base.address + z * dst->layer_stride);
+ }
+ if (src->layout_3d) {
+ /* not possible because of depth tiling */
+ assert(0);
+ } else {
+ const unsigned z = info->src.box.z + i;
+ BEGIN_NVC0(push, NVC0_2D(SRC_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, src->base.address + z * src->layer_stride);
+ PUSH_DATA (push, src->base.address + z * src->layer_stride);
+ }
+ BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_Y_INT), 1); /* trigger */
+ PUSH_DATA (push, srcy >> 32);
+ } else {
+ BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4);
+ PUSH_DATA (push, srcx);
+ PUSH_DATA (push, srcx >> 32);
+ PUSH_DATA (push, srcy);
+ PUSH_DATA (push, srcy >> 32);
+ }
+ }
+ nvc0_resource_validate(&dst->base, NOUVEAU_BO_WR);
+ nvc0_resource_validate(&src->base, NOUVEAU_BO_RD);
+
+ nouveau_bufctx_reset(nvc0->bufctx, NVC0_BIND_2D);
+
+ if (info->scissor_enable)
+ IMMED_NVC0(push, NVC0_2D(CLIP_ENABLE), 0);
+ if (mask != 0xffffffff)
+ IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_SRCCOPY);
+}
+
+static void
+nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ boolean eng3d = FALSE;
+
+ if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
+ if (!(info->mask & PIPE_MASK_ZS))
+ return;
+ if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
+ info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ eng3d = TRUE;
+ if (info->filter != PIPE_TEX_FILTER_NEAREST)
+ eng3d = TRUE;
+ } else {
+ if (!(info->mask & PIPE_MASK_RGBA))
+ return;
+ if (info->mask != PIPE_MASK_RGBA)
+ eng3d = TRUE;
+ }
+
+ if (nv50_miptree(info->src.resource)->layout_3d) {
+ eng3d = TRUE;
+ } else
+ if (info->src.box.depth != info->dst.box.depth) {
+ eng3d = TRUE;
+ debug_printf("blit: cannot filter array or cube textures in z direction");
+ }
+
+ if (!eng3d && info->dst.format != info->src.format) {
+ if (!nv50_2d_dst_format_faithful(info->dst.format)) {
+ eng3d = TRUE;
+ } else
+ if (!nv50_2d_src_format_faithful(info->src.format)) {
+ if (!util_format_is_luminance(info->src.format)) {
+ if (util_format_is_intensity(info->src.format))
+ eng3d = info->src.format != PIPE_FORMAT_I8_UNORM;
+ else
+ if (!nv50_2d_dst_format_ops_supported(info->dst.format))
+ eng3d = TRUE;
+ else
+ eng3d = !nv50_2d_format_supported(info->src.format);
+ }
+ } else
+ if (util_format_is_luminance_alpha(info->src.format))
+ eng3d = TRUE;
+ }
+
+ if (info->src.resource->nr_samples == 8 &&
+ info->dst.resource->nr_samples <= 1)
+ eng3d = TRUE;
+#if 0
+ /* FIXME: can't make this work with eng2d anymore, at least not on nv50 */
+ if (info->src.resource->nr_samples > 1 ||
+ info->dst.resource->nr_samples > 1)
+ eng3d = TRUE;
+#endif
+ /* FIXME: find correct src coordinates adjustments */
+ if ((info->src.box.width != info->dst.box.width &&
+ info->src.box.width != -info->dst.box.width) ||
+ (info->src.box.height != info->dst.box.height &&
+ info->src.box.height != -info->dst.box.height))
+ eng3d = TRUE;
+
+ if (!eng3d)
+ nvc0_blit_eng2d(nvc0, info);
+ else
+ nvc0_blit_3d(nvc0, info);
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1);
+}
+
+boolean
+nvc0_blitter_create(struct nvc0_screen *screen)
+{
+ screen->blitter = CALLOC_STRUCT(nvc0_blitter);
+ if (!screen->blitter) {
+ NOUVEAU_ERR("failed to allocate blitter struct\n");
+ return FALSE;
+ }
+ screen->blitter->screen = screen;
+
+ pipe_mutex_init(screen->blitter->mutex);
+
+ nvc0_blitter_make_vp(screen->blitter);
+ nvc0_blitter_make_sampler(screen->blitter);
+
+ return TRUE;
+}
+
+void
+nvc0_blitter_destroy(struct nvc0_screen *screen)
+{
+ struct nvc0_blitter *blitter = screen->blitter;
+ unsigned i, m;
+
+ for (i = 0; i < NV50_BLIT_MAX_TEXTURE_TYPES; ++i) {
+ for (m = 0; m < NV50_BLIT_MODES; ++m) {
+ struct nvc0_program *prog = blitter->fp[i][m];
+ if (prog) {
+ nvc0_program_destroy(NULL, prog);
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
+ }
+ }
+ }
+
+ FREE(blitter);
+}
+
+boolean
+nvc0_blitctx_create(struct nvc0_context *nvc0)
+{
+ nvc0->blit = CALLOC_STRUCT(nvc0_blitctx);
+ if (!nvc0->blit) {
+ NOUVEAU_ERR("failed to allocate blit context\n");
+ return FALSE;
+ }
+
+ nvc0->blit->nvc0 = nvc0;
+
+ nvc0->blit->rast.pipe.half_pixel_center = 1;
+
+ return TRUE;
+}
+
+void
+nvc0_blitctx_destroy(struct nvc0_context *nvc0)
+{
+ if (nvc0->blit)
+ FREE(nvc0->blit);
+}
+
+void
+nvc0_init_surface_functions(struct nvc0_context *nvc0)
+{
+ struct pipe_context *pipe = &nvc0->base.pipe;
+
+ pipe->resource_copy_region = nvc0_resource_copy_region;
+ pipe->blit = nvc0_blit;
+ pipe->clear_render_target = nvc0_clear_render_target;
+ pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
new file mode 100644
index 0000000..765cd2d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+#include "nv50/nv50_texture.xml.h"
+#include "nv50/nv50_defs.xml.h"
+
+#include "util/u_format.h"
+
+#define NVE4_TIC_ENTRY_INVALID 0x000fffff
+#define NVE4_TSC_ENTRY_INVALID 0xfff00000
+
+#define NV50_TIC_0_SWIZZLE__MASK \
+ (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
+ NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
+
+static INLINE uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+{
+ switch (swz) {
+ case PIPE_SWIZZLE_RED:
+ return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
+ case PIPE_SWIZZLE_GREEN:
+ return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
+ case PIPE_SWIZZLE_BLUE:
+ return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
+ case PIPE_SWIZZLE_ALPHA:
+ return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_ONE:
+ return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT;
+ case PIPE_SWIZZLE_ZERO:
+ default:
+ return NV50_TIC_MAP_ZERO;
+ }
+}
+
+struct pipe_sampler_view *
+nvc0_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ const struct pipe_sampler_view *templ)
+{
+ uint32_t flags = 0;
+
+ if (res->target == PIPE_TEXTURE_RECT || res->target == PIPE_BUFFER)
+ flags |= NV50_TEXVIEW_SCALED_COORDS;
+
+ return nvc0_create_texture_view(pipe, res, templ, flags, res->target);
+}
+
+struct pipe_sampler_view *
+nvc0_create_texture_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ,
+ uint32_t flags,
+ enum pipe_texture_target target)
+{
+ const struct util_format_description *desc;
+ uint64_t address;
+ uint32_t *tic;
+ uint32_t swz[4];
+ uint32_t width, height;
+ uint32_t depth;
+ struct nv50_tic_entry *view;
+ struct nv50_miptree *mt;
+ boolean tex_int;
+
+ view = MALLOC_STRUCT(nv50_tic_entry);
+ if (!view)
+ return NULL;
+ mt = nv50_miptree(texture);
+
+ view->pipe = *templ;
+ view->pipe.reference.count = 1;
+ view->pipe.texture = NULL;
+ view->pipe.context = pipe;
+
+ view->id = -1;
+
+ pipe_resource_reference(&view->pipe.texture, texture);
+
+ tic = &view->tic[0];
+
+ desc = util_format_description(view->pipe.format);
+
+ tic[0] = nvc0_format_table[view->pipe.format].tic;
+
+ tex_int = util_format_is_pure_integer(view->pipe.format);
+
+ swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int);
+ tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
+ (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
+ (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
+ (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
+ (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+
+ address = mt->base.address;
+
+ tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+
+ if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
+ tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+
+ /* check for linear storage type */
+ if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
+ if (texture->target == PIPE_BUFFER) {
+ assert(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS));
+ address +=
+ view->pipe.u.buf.first_element * desc->block.bits / 8;
+ tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER;
+ tic[3] = 0;
+ tic[4] = /* width */
+ view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1;
+ tic[5] = 0;
+ } else {
+ /* must be 2D texture without mip maps */
+ tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT;
+ tic[3] = mt->level[0].pitch;
+ tic[4] = mt->base.base.width0;
+ tic[5] = (1 << 16) | mt->base.base.height0;
+ }
+ tic[6] =
+ tic[7] = 0;
+ tic[1] = address;
+ tic[2] |= address >> 32;
+ return &view->pipe;
+ }
+
+ tic[2] |=
+ ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
+ ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
+
+ depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
+
+ if (mt->base.base.array_size > 1) {
+ /* there doesn't seem to be a base layer field in TIC */
+ address += view->pipe.u.tex.first_layer * mt->layer_stride;
+ depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
+ }
+ tic[1] = address;
+ tic[2] |= address >> 32;
+
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ tic[2] |= NV50_TIC_2_TARGET_1D;
+ break;
+ case PIPE_TEXTURE_2D:
+ tic[2] |= NV50_TIC_2_TARGET_2D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ tic[2] |= NV50_TIC_2_TARGET_RECT;
+ break;
+ case PIPE_TEXTURE_3D:
+ tic[2] |= NV50_TIC_2_TARGET_3D;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ depth /= 6;
+ tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ break;
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ depth /= 6;
+ tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ break;
+ default:
+ NOUVEAU_ERR("unexpected/invalid texture target: %d\n",
+ mt->base.base.target);
+ return FALSE;
+ }
+
+ tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
+
+ if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
+ width = mt->base.base.width0 << mt->ms_x;
+ height = mt->base.base.height0 << mt->ms_y;
+ } else {
+ width = mt->base.base.width0;
+ height = mt->base.base.height0;
+ }
+
+ tic[4] = (1 << 31) | width;
+
+ tic[5] = height & 0xffff;
+ tic[5] |= depth << 16;
+ tic[5] |= mt->base.base.last_level << 28;
+
+ /* sampling points: (?) */
+ if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
+ tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
+ else
+ tic[6] = 0x03000000;
+
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+ tic[7] |= mt->ms_mode << 12;
+
+ return &view->pipe;
+}
+
+static boolean
+nvc0_validate_tic(struct nvc0_context *nvc0, int s)
+{
+ uint32_t commands[32];
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ unsigned i;
+ unsigned n = 0;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct nv04_resource *res;
+ const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+
+ if (!tic) {
+ if (dirty)
+ commands[n++] = (i << 1) | 0;
+ continue;
+ }
+ res = nv04_resource(tic->pipe.texture);
+
+ if (tic->id < 0) {
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ PUSH_SPACE(push, 17);
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + (tic->id * 32));
+ PUSH_DATA (push, txc->offset + (tic->id * 32));
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, 0x100111);
+ BEGIN_NIC0(push, NVC0_M2MF(DATA), 8);
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ need_flush = TRUE;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, (tic->id << 4) | 1);
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ if (!dirty)
+ continue;
+ commands[n++] = (tic->id << 9) | (i << 1) | 1;
+
+ BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i)
+ commands[n++] = (i << 1) | 0;
+
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ if (n) {
+ BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
+ PUSH_DATAp(push, commands, n);
+ }
+ nvc0->textures_dirty[s] = 0;
+
+ return need_flush;
+}
+
+static boolean
+nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct nv04_resource *res;
+ const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+
+ if (!tic) {
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+ continue;
+ }
+ res = nv04_resource(tic->pipe.texture);
+
+ if (tic->id < 0) {
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + (tic->id * 32));
+ PUSH_DATA (push, txc->offset + (tic->id * 32));
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ need_flush = TRUE;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, (tic->id << 4) | 1);
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tic->id;
+ if (dirty)
+ BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i) {
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+ nvc0->textures_dirty[s] |= 1 << i;
+ }
+
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ return need_flush;
+}
+
+void nvc0_validate_textures(struct nvc0_context *nvc0)
+{
+ boolean need_flush;
+
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ need_flush = nve4_validate_tic(nvc0, 0);
+ need_flush |= nve4_validate_tic(nvc0, 3);
+ need_flush |= nve4_validate_tic(nvc0, 4);
+ } else {
+ need_flush = nvc0_validate_tic(nvc0, 0);
+ need_flush |= nvc0_validate_tic(nvc0, 3);
+ need_flush |= nvc0_validate_tic(nvc0, 4);
+ }
+
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+
+static boolean
+nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
+{
+ uint32_t commands[16];
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+ unsigned n = 0;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_samplers[s]; ++i) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
+
+ if (!(nvc0->samplers_dirty[s] & (1 << i)))
+ continue;
+ if (!tsc) {
+ commands[n++] = (i << 4) | 0;
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
+
+ nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
+ 65536 + tsc->id * 32, NOUVEAU_BO_VRAM,
+ 32, tsc->tsc);
+ need_flush = TRUE;
+ }
+ nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ commands[n++] = (tsc->id << 12) | (i << 4) | 1;
+ }
+ for (; i < nvc0->state.num_samplers[s]; ++i)
+ commands[n++] = (i << 4) | 0;
+
+ nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
+
+ if (n) {
+ BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
+ PUSH_DATAp(push, commands, n);
+ }
+ nvc0->samplers_dirty[s] = 0;
+
+ return need_flush;
+}
+
+boolean
+nve4_validate_tsc(struct nvc0_context *nvc0, int s)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_samplers[s]; ++i) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
+
+ if (!tsc) {
+ nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32));
+ PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32));
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, &tsc->tsc[0], 8);
+
+ need_flush = TRUE;
+ }
+ nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tsc->id << 20;
+ }
+ for (; i < nvc0->state.num_samplers[s]; ++i) {
+ nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
+ nvc0->samplers_dirty[s] |= 1 << i;
+ }
+
+ nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
+
+ return need_flush;
+}
+
+void nvc0_validate_samplers(struct nvc0_context *nvc0)
+{
+ boolean need_flush;
+
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ need_flush = nve4_validate_tsc(nvc0, 0);
+ need_flush |= nve4_validate_tsc(nvc0, 3);
+ need_flush |= nve4_validate_tsc(nvc0, 4);
+ } else {
+ need_flush = nvc0_validate_tsc(nvc0, 0);
+ need_flush |= nvc0_validate_tsc(nvc0, 3);
+ need_flush |= nvc0_validate_tsc(nvc0, 4);
+ }
+
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+
+/* Upload the "diagonal" entries for the possible texture sources ($t == $s).
+ * At some point we might want to get a list of the combinations used by a
+ * shader and fill in those entries instead of having it extract the handles.
+ */
+void
+nve4_set_tex_handles(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ uint64_t address;
+ unsigned s;
+
+ if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
+ return;
+ address = nvc0->screen->uniform_bo->offset + (5 << 16);
+
+ for (s = 0; s < 5; ++s, address += (1 << 9)) {
+ uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
+ if (!dirty)
+ continue;
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ do {
+ int i = ffs(dirty) - 1;
+ dirty &= ~(1 << i);
+
+ BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
+ PUSH_DATA (push, (8 + i) * 4);
+ PUSH_DATA (push, nvc0->tex_handles[s][i]);
+ } while (dirty);
+
+ nvc0->textures_dirty[s] = 0;
+ nvc0->samplers_dirty[s] = 0;
+ }
+}
+
+
+static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
+static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
+static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
+
+void
+nve4_set_surface_info(struct nouveau_pushbuf *push,
+ struct pipe_surface *psf,
+ struct nvc0_screen *screen)
+{
+ struct nv50_surface *sf = nv50_surface(psf);
+ struct nv04_resource *res;
+ uint64_t address;
+ uint32_t *const info = push->cur;
+ uint8_t log2cpp;
+
+ if (psf && !nve4_su_format_map[psf->format])
+ NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
+
+ push->cur += 16;
+
+ if (!psf || !nve4_su_format_map[psf->format]) {
+ memset(info, 0, 16 * sizeof(*info));
+
+ info[0] = 0xbadf0000;
+ info[1] = 0x80004000;
+ info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
+ screen->lib_code->start;
+ return;
+ }
+ res = nv04_resource(sf->base.texture);
+
+ address = res->address + sf->offset;
+
+ info[8] = sf->width;
+ info[9] = sf->height;
+ info[10] = sf->depth;
+ switch (res->base.target) {
+ case PIPE_TEXTURE_1D_ARRAY:
+ info[11] = 1;
+ break;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ info[11] = 2;
+ break;
+ case PIPE_TEXTURE_3D:
+ info[11] = 3;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ info[11] = 4;
+ break;
+ default:
+ info[11] = 0;
+ break;
+ }
+ log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12;
+
+ info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start;
+
+ /* limit in bytes for raw access */
+ info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1);
+
+ info[1] = nve4_su_format_map[sf->base.format];
+
+#if 0
+ switch (util_format_get_blocksizebits(sf->base.format)) {
+ case 16: info[1] |= 1 << 16; break;
+ case 32: info[1] |= 2 << 16; break;
+ case 64: info[1] |= 3 << 16; break;
+ case 128: info[1] |= 4 << 16; break;
+ default:
+ break;
+ }
+#else
+ info[1] |= log2cpp << 16;
+ info[1] |= 0x4000;
+ info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]);
+#endif
+
+ if (res->base.target == PIPE_BUFFER) {
+ info[0] = address >> 8;
+ info[2] = sf->width - 1;
+ info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
+ info[3] = 0;
+ info[4] = 0;
+ info[5] = 0;
+ info[6] = 0;
+ info[7] = 0;
+ info[14] = 0;
+ info[15] = 0;
+ } else {
+ struct nv50_miptree *mt = nv50_miptree(&res->base);
+ struct nv50_miptree_level *lvl = &mt->level[sf->base.u.tex.level];
+ const unsigned z = sf->base.u.tex.first_layer;
+
+ if (z) {
+ if (mt->layout_3d) {
+ address += nvc0_mt_zslice_offset(mt, psf->u.tex.level, z);
+ /* doesn't work if z passes z-tile boundary */
+ assert(sf->depth == 1);
+ } else {
+ address += mt->layer_stride * z;
+ }
+ }
+ info[0] = address >> 8;
+ info[2] = sf->width - 1;
+ /* NOTE: this is really important: */
+ info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
+ info[3] = (0x88 << 24) | (lvl->pitch / 64);
+ info[4] = sf->height - 1;
+ info[4] |= (lvl->tile_mode & 0x0f0) << 25;
+ info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
+ info[5] = mt->layer_stride >> 8;
+ info[6] = sf->depth - 1;
+ info[6] |= (lvl->tile_mode & 0xf00) << 21;
+ info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
+ info[7] = 0;
+ info[14] = mt->ms_x;
+ info[15] = mt->ms_y;
+ }
+}
+
+static INLINE void
+nvc0_update_surface_bindings(struct nvc0_context *nvc0)
+{
+ /* TODO */
+}
+
+static INLINE void
+nve4_update_surface_bindings(struct nvc0_context *nvc0)
+{
+ /* TODO */
+}
+
+void
+nvc0_validate_surfaces(struct nvc0_context *nvc0)
+{
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ nve4_update_surface_bindings(nvc0);
+ } else {
+ nvc0_update_surface_bindings(nvc0);
+ }
+}
+
+
+static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */
+ [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT,
+ [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT,
+ [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT,
+ [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT,
+ [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM,
+ [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT,
+ [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT,
+ [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM,
+ [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM,
+ [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT,
+ [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT,
+ [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT,
+ [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT,
+ [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT,
+ [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT,
+ [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM,
+ [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT,
+ [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT,
+ [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM,
+ [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM,
+ [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT,
+ [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT,
+};
+
+/* Auxiliary format description values for surface instructions.
+ * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
+ */
+static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
+
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
+
+ [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
+ [PIPE_FORMAT_R32G32_SINT] = 0x3433,
+ [PIPE_FORMAT_R32G32_UINT] = 0x3433,
+
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24, */
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
+
+ [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
+ [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
+ [PIPE_FORMAT_R16G16_SINT] = 0x2524,
+ [PIPE_FORMAT_R16G16_UINT] = 0x2524,
+ [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
+
+ [PIPE_FORMAT_R32_SINT] = 0x2024,
+ [PIPE_FORMAT_R32_UINT] = 0x2024,
+ [PIPE_FORMAT_R32_FLOAT] = 0x2024,
+
+ [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
+ [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
+ [PIPE_FORMAT_R8G8_SINT] = 0x1615,
+ [PIPE_FORMAT_R8G8_UINT] = 0x1615,
+
+ [PIPE_FORMAT_R16_UNORM] = 0x1115,
+ [PIPE_FORMAT_R16_SNORM] = 0x1115,
+ [PIPE_FORMAT_R16_SINT] = 0x1115,
+ [PIPE_FORMAT_R16_UINT] = 0x1115,
+ [PIPE_FORMAT_R16_FLOAT] = 0x1115,
+
+ [PIPE_FORMAT_R8_UNORM] = 0x0206,
+ [PIPE_FORMAT_R8_SNORM] = 0x0206,
+ [PIPE_FORMAT_R8_SINT] = 0x0206,
+ [PIPE_FORMAT_R8_UINT] = 0x0206
+};
+
+/* NOTE: These are hardcoded offsets for the shader library.
+ * TODO: Automate them.
+ */
+static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
+ [PIPE_FORMAT_R32G32_FLOAT] = 0x428,
+ [PIPE_FORMAT_R32G32_SINT] = 0x468,
+ [PIPE_FORMAT_R32G32_UINT] = 0x468,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530, */
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,
+ [PIPE_FORMAT_B5G6R5_UNORM] = 0x718,
+ [PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,
+ [PIPE_FORMAT_R16G16_UNORM] = 0x828,
+ [PIPE_FORMAT_R16G16_SNORM] = 0x890,
+ [PIPE_FORMAT_R16G16_SINT] = 0x8f0,
+ [PIPE_FORMAT_R16G16_UINT] = 0x948,
+ [PIPE_FORMAT_R16G16_FLOAT] = 0x998,
+ [PIPE_FORMAT_R32_FLOAT] = 0x9e8,
+ [PIPE_FORMAT_R32_SINT] = 0xa30,
+ [PIPE_FORMAT_R32_UINT] = 0xa30,
+ [PIPE_FORMAT_R8G8_UNORM] = 0xa78,
+ [PIPE_FORMAT_R8G8_SNORM] = 0xae0,
+ [PIPE_FORMAT_R8G8_UINT] = 0xb48,
+ [PIPE_FORMAT_R8G8_SINT] = 0xb98,
+ [PIPE_FORMAT_R16_UNORM] = 0xbe8,
+ [PIPE_FORMAT_R16_SNORM] = 0xc48,
+ [PIPE_FORMAT_R16_SINT] = 0xca0,
+ [PIPE_FORMAT_R16_UINT] = 0xce8,
+ [PIPE_FORMAT_R16_FLOAT] = 0xd30,
+ [PIPE_FORMAT_R8_UNORM] = 0xd88,
+ [PIPE_FORMAT_R8_SNORM] = 0xde0,
+ [PIPE_FORMAT_R8_SINT] = 0xe38,
+ [PIPE_FORMAT_R8_UINT] = 0xe88,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
new file mode 100644
index 0000000..82f1ffc
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -0,0 +1,558 @@
+
+#include "util/u_format.h"
+
+#include "nvc0/nvc0_context.h"
+
+#include "nv50/nv50_defs.xml.h"
+
+struct nvc0_transfer {
+ struct pipe_transfer base;
+ struct nv50_m2mf_rect rect[2];
+ uint32_t nblocksx;
+ uint16_t nblocksy;
+ uint16_t nlayers;
+};
+
+static void
+nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bufctx *bctx = nvc0->bufctx;
+ const int cpp = dst->cpp;
+ uint32_t src_ofst = src->base;
+ uint32_t dst_ofst = dst->base;
+ uint32_t height = nblocksy;
+ uint32_t sy = src->y;
+ uint32_t dy = dst->y;
+ uint32_t exec = (1 << 20);
+
+ assert(dst->cpp == src->cpp);
+
+ nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ if (nouveau_bo_memtype(src->bo)) {
+ BEGIN_NVC0(push, NVC0_M2MF(TILING_MODE_IN), 5);
+ PUSH_DATA (push, src->tile_mode);
+ PUSH_DATA (push, src->width * cpp);
+ PUSH_DATA (push, src->height);
+ PUSH_DATA (push, src->depth);
+ PUSH_DATA (push, src->z);
+ } else {
+ src_ofst += src->y * src->pitch + src->x * cpp;
+
+ BEGIN_NVC0(push, NVC0_M2MF(PITCH_IN), 1);
+ PUSH_DATA (push, src->width * cpp);
+
+ exec |= NVC0_M2MF_EXEC_LINEAR_IN;
+ }
+
+ if (nouveau_bo_memtype(dst->bo)) {
+ BEGIN_NVC0(push, NVC0_M2MF(TILING_MODE_OUT), 5);
+ PUSH_DATA (push, dst->tile_mode);
+ PUSH_DATA (push, dst->width * cpp);
+ PUSH_DATA (push, dst->height);
+ PUSH_DATA (push, dst->depth);
+ PUSH_DATA (push, dst->z);
+ } else {
+ dst_ofst += dst->y * dst->pitch + dst->x * cpp;
+
+ BEGIN_NVC0(push, NVC0_M2MF(PITCH_OUT), 1);
+ PUSH_DATA (push, dst->width * cpp);
+
+ exec |= NVC0_M2MF_EXEC_LINEAR_OUT;
+ }
+
+ while (height) {
+ int line_count = height > 2047 ? 2047 : height;
+
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_IN_HIGH), 2);
+ PUSH_DATAh(push, src->bo->offset + src_ofst);
+ PUSH_DATA (push, src->bo->offset + src_ofst);
+
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, dst->bo->offset + dst_ofst);
+ PUSH_DATA (push, dst->bo->offset + dst_ofst);
+
+ if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) {
+ BEGIN_NVC0(push, NVC0_M2MF(TILING_POSITION_IN_X), 2);
+ PUSH_DATA (push, src->x * cpp);
+ PUSH_DATA (push, sy);
+ } else {
+ src_ofst += line_count * src->pitch;
+ }
+ if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) {
+ BEGIN_NVC0(push, NVC0_M2MF(TILING_POSITION_OUT_X), 2);
+ PUSH_DATA (push, dst->x * cpp);
+ PUSH_DATA (push, dy);
+ } else {
+ dst_ofst += line_count * dst->pitch;
+ }
+
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, nblocksx * cpp);
+ PUSH_DATA (push, line_count);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, exec);
+
+ height -= line_count;
+ sy += line_count;
+ dy += line_count;
+ }
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+static void
+nve4_m2mf_transfer_rect(struct nvc0_context *nvc0,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bufctx *bctx = nvc0->bufctx;
+ uint32_t exec;
+ uint32_t src_base = src->base;
+ uint32_t dst_base = dst->base;
+ const int cpp = dst->cpp;
+
+ assert(dst->cpp == src->cpp);
+
+ nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);
+ nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ exec = 0x200 /* 2D_ENABLE */ | 0x6 /* UNK */;
+
+ if (!nouveau_bo_memtype(dst->bo)) {
+ assert(!dst->z);
+ dst_base += dst->y * dst->pitch + dst->x * cpp;
+ exec |= 0x100; /* DST_MODE_2D_LINEAR */
+ }
+ if (!nouveau_bo_memtype(src->bo)) {
+ assert(!src->z);
+ src_base += src->y * src->pitch + src->x * cpp;
+ exec |= 0x080; /* SRC_MODE_2D_LINEAR */
+ }
+
+ BEGIN_NVC0(push, SUBC_COPY(0x070c), 6);
+ PUSH_DATA (push, 0x1000 | dst->tile_mode);
+ PUSH_DATA (push, dst->pitch);
+ PUSH_DATA (push, dst->height);
+ PUSH_DATA (push, dst->depth);
+ PUSH_DATA (push, dst->z);
+ PUSH_DATA (push, (dst->y << 16) | (dst->x * cpp));
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0728), 6);
+ PUSH_DATA (push, 0x1000 | src->tile_mode);
+ PUSH_DATA (push, src->pitch);
+ PUSH_DATA (push, src->height);
+ PUSH_DATA (push, src->depth);
+ PUSH_DATA (push, src->z);
+ PUSH_DATA (push, (src->y << 16) | (src->x * cpp));
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0400), 8);
+ PUSH_DATAh(push, src->bo->offset + src_base);
+ PUSH_DATA (push, src->bo->offset + src_base);
+ PUSH_DATAh(push, dst->bo->offset + dst_base);
+ PUSH_DATA (push, dst->bo->offset + dst_base);
+ PUSH_DATA (push, src->pitch);
+ PUSH_DATA (push, dst->pitch);
+ PUSH_DATA (push, nblocksx * cpp);
+ PUSH_DATA (push, nblocksy);
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0300), 1);
+ PUSH_DATA (push, exec);
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+void
+nvc0_m2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data)
+{
+ struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ uint32_t *src = (uint32_t *)data;
+ unsigned count = (size + 3) / 4;
+
+ nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ while (count) {
+ unsigned nr;
+
+ if (!PUSH_SPACE(push, 16))
+ break;
+ nr = PUSH_AVAIL(push);
+ assert(nr >= 16);
+ nr = MIN2(count, nr - 9);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, dst->offset + offset);
+ PUSH_DATA (push, dst->offset + offset);
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, 0x100111);
+
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_NIC0(push, NVC0_M2MF(DATA), nr);
+ PUSH_DATAp(push, src, nr);
+
+ count -= nr;
+ src += nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+void
+nve4_p2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data)
+{
+ struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ uint32_t *src = (uint32_t *)data;
+ unsigned count = (size + 3) / 4;
+
+ nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ while (count) {
+ unsigned nr;
+
+ if (!PUSH_SPACE(push, 16))
+ break;
+ nr = PUSH_AVAIL(push);
+ assert(nr >= 16);
+ nr = MIN2(count, nr - 8);
+ nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));
+
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, dst->offset + offset);
+ PUSH_DATA (push, dst->offset + offset);
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, src, nr);
+
+ count -= nr;
+ src += nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_m2mf_copy_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx;
+
+ nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ while (size) {
+ unsigned bytes = MIN2(size, 1 << 17);
+
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, dst->offset + dstoff);
+ PUSH_DATA (push, dst->offset + dstoff);
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_IN_HIGH), 2);
+ PUSH_DATAh(push, src->offset + srcoff);
+ PUSH_DATA (push, src->offset + srcoff);
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, bytes);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, (1 << NVC0_M2MF_EXEC_INC__SHIFT) |
+ NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT);
+
+ srcoff += bytes;
+ dstoff += bytes;
+ size -= bytes;
+ }
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+static void
+nve4_m2mf_copy_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx;
+
+ nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0400), 4);
+ PUSH_DATAh(push, src->offset + srcoff);
+ PUSH_DATA (push, src->offset + srcoff);
+ PUSH_DATAh(push, dst->offset + dstoff);
+ PUSH_DATA (push, dst->offset + dstoff);
+ BEGIN_NVC0(push, SUBC_COPY(0x0418), 1);
+ PUSH_DATA (push, size);
+ BEGIN_NVC0(push, SUBC_COPY(0x0300), 1);
+ PUSH_DATA (push, 0x186);
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+
+static INLINE boolean
+nvc0_mt_transfer_can_map_directly(struct nv50_miptree *mt)
+{
+ if (mt->base.domain == NOUVEAU_BO_VRAM)
+ return FALSE;
+ if (mt->base.base.usage != PIPE_USAGE_STAGING)
+ return FALSE;
+ return !nouveau_bo_memtype(mt->base.bo);
+}
+
+static INLINE boolean
+nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage)
+{
+ if (!mt->base.mm) {
+ uint32_t access = (usage & PIPE_TRANSFER_WRITE) ?
+ NOUVEAU_BO_WR : NOUVEAU_BO_RD;
+ return !nouveau_bo_wait(mt->base.bo, access, nvc0->base.client);
+ }
+ if (usage & PIPE_TRANSFER_WRITE)
+ return !mt->base.fence || nouveau_fence_wait(mt->base.fence);
+ return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr);
+}
+
+void *
+nvc0_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pctx);
+ struct nouveau_device *dev = nvc0->screen->base.device;
+ struct nv50_miptree *mt = nv50_miptree(res);
+ struct nvc0_transfer *tx;
+ uint32_t size;
+ int ret;
+ unsigned flags = 0;
+
+ if (nvc0_mt_transfer_can_map_directly(mt)) {
+ ret = !nvc0_mt_sync(nvc0, mt, usage);
+ if (!ret)
+ ret = nouveau_bo_map(mt->base.bo, 0, NULL);
+ if (ret &&
+ (usage & PIPE_TRANSFER_MAP_DIRECTLY))
+ return NULL;
+ if (!ret)
+ usage |= PIPE_TRANSFER_MAP_DIRECTLY;
+ } else
+ if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+ return NULL;
+
+ tx = CALLOC_STRUCT(nvc0_transfer);
+ if (!tx)
+ return NULL;
+
+ pipe_resource_reference(&tx->base.resource, res);
+
+ tx->base.level = level;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+
+ if (util_format_is_plain(res->format)) {
+ tx->nblocksx = box->width << mt->ms_x;
+ tx->nblocksy = box->height << mt->ms_y;
+ } else {
+ tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
+ tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
+ }
+ tx->nlayers = box->depth;
+
+ tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
+ tx->base.layer_stride = tx->nblocksy * tx->base.stride;
+
+ if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
+ tx->base.stride = align(tx->base.stride, 128);
+ *ptransfer = &tx->base;
+ return mt->base.bo->map + mt->base.offset;
+ }
+
+ nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z);
+
+ size = tx->base.layer_stride;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+ size * tx->nlayers, NULL, &tx->rect[1].bo);
+ if (ret) {
+ pipe_resource_reference(&tx->base.resource, NULL);
+ FREE(tx);
+ return NULL;
+ }
+
+ tx->rect[1].cpp = tx->rect[0].cpp;
+ tx->rect[1].width = tx->nblocksx;
+ tx->rect[1].height = tx->nblocksy;
+ tx->rect[1].depth = 1;
+ tx->rect[1].pitch = tx->base.stride;
+ tx->rect[1].domain = NOUVEAU_BO_GART;
+
+ if (usage & PIPE_TRANSFER_READ) {
+ unsigned base = tx->rect[0].base;
+ unsigned z = tx->rect[0].z;
+ unsigned i;
+ for (i = 0; i < tx->nlayers; ++i) {
+ nvc0->m2mf_copy_rect(nvc0, &tx->rect[1], &tx->rect[0],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += size;
+ }
+ tx->rect[0].z = z;
+ tx->rect[0].base = base;
+ tx->rect[1].base = 0;
+ }
+
+ if (tx->rect[1].bo->map) {
+ *ptransfer = &tx->base;
+ return tx->rect[1].bo->map;
+ }
+
+ if (usage & PIPE_TRANSFER_READ)
+ flags = NOUVEAU_BO_RD;
+ if (usage & PIPE_TRANSFER_WRITE)
+ flags |= NOUVEAU_BO_WR;
+
+ ret = nouveau_bo_map(tx->rect[1].bo, flags, nvc0->screen->base.client);
+ if (ret) {
+ pipe_resource_reference(&tx->base.resource, NULL);
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ FREE(tx);
+ return NULL;
+ }
+
+ *ptransfer = &tx->base;
+ return tx->rect[1].bo->map;
+}
+
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pctx);
+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+ struct nv50_miptree *mt = nv50_miptree(tx->base.resource);
+ unsigned i;
+
+ if (tx->base.usage & PIPE_TRANSFER_MAP_DIRECTLY) {
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ FREE(tx);
+ return;
+ }
+
+ if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+ for (i = 0; i < tx->nlayers; ++i) {
+ nvc0->m2mf_copy_rect(nvc0, &tx->rect[0], &tx->rect[1],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += tx->nblocksy * tx->base.stride;
+ }
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1);
+ }
+ if (tx->base.usage & PIPE_TRANSFER_READ)
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1);
+
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ FREE(tx);
+}
+
+/* This happens rather often with DTD9/st. */
+void
+nvc0_cb_push(struct nouveau_context *nv,
+ struct nouveau_bo *bo, unsigned domain,
+ unsigned base, unsigned size,
+ unsigned offset, unsigned words, const uint32_t *data)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+
+ NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1);
+ NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_bytes, words * 4);
+
+ assert(!(offset & 3));
+ size = align(size, 0x100);
+
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, size);
+ PUSH_DATAh(push, bo->offset + base);
+ PUSH_DATA (push, bo->offset + base);
+
+ while (words) {
+ unsigned nr = PUSH_AVAIL(push);
+ nr = MIN2(nr, words);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+
+ PUSH_SPACE(push, nr + 2);
+ PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), nr + 1);
+ PUSH_DATA (push, offset);
+ PUSH_DATAp(push, data, nr);
+
+ words -= nr;
+ data += nr;
+ offset += nr * 4;
+ }
+}
+
+void
+nvc0_init_transfer_functions(struct nvc0_context *nvc0)
+{
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ nvc0->m2mf_copy_rect = nve4_m2mf_transfer_rect;
+ nvc0->base.copy_data = nve4_m2mf_copy_linear;
+ nvc0->base.push_data = nve4_p2mf_push_linear;
+ } else {
+ nvc0->m2mf_copy_rect = nvc0_m2mf_transfer_rect;
+ nvc0->base.copy_data = nvc0_m2mf_copy_linear;
+ nvc0->base.push_data = nvc0_m2mf_push_linear;
+ }
+ nvc0->base.push_cb = nvc0_cb_push;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
new file mode 100644
index 0000000..c4bc7dc
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -0,0 +1,891 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+
+#include "nvc0/nvc0_3d.xml.h"
+
+void
+nvc0_vertex_state_delete(struct pipe_context *pipe,
+ void *hwcso)
+{
+ struct nvc0_vertex_stateobj *so = hwcso;
+
+ if (so->translate)
+ so->translate->release(so->translate);
+ FREE(hwcso);
+}
+
+void *
+nvc0_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nvc0_vertex_stateobj *so;
+ struct translate_key transkey;
+ unsigned i;
+ unsigned src_offset_max = 0;
+
+ so = MALLOC(sizeof(*so) +
+ num_elements * sizeof(struct nvc0_vertex_element));
+ if (!so)
+ return NULL;
+ so->num_elements = num_elements;
+ so->instance_elts = 0;
+ so->instance_bufs = 0;
+ so->shared_slots = FALSE;
+ so->need_conversion = FALSE;
+
+ memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
+
+ for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
+ so->min_instance_div[i] = 0xffffffff;
+
+ transkey.nr_elements = 0;
+ transkey.output_stride = 0;
+
+ for (i = 0; i < num_elements; ++i) {
+ const struct pipe_vertex_element *ve = &elements[i];
+ const unsigned vbi = ve->vertex_buffer_index;
+ unsigned size;
+ enum pipe_format fmt = ve->src_format;
+
+ so->element[i].pipe = elements[i];
+ so->element[i].state = nvc0_format_table[fmt].vtx;
+
+ if (!so->element[i].state) {
+ switch (util_format_get_nr_components(fmt)) {
+ case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
+ case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
+ case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
+ case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
+ default:
+ assert(0);
+ FREE(so);
+ return NULL;
+ }
+ so->element[i].state = nvc0_format_table[fmt].vtx;
+ so->need_conversion = TRUE;
+ }
+ size = util_format_get_blocksize(fmt);
+
+ src_offset_max = MAX2(src_offset_max, ve->src_offset);
+
+ if (so->vb_access_size[vbi] < (ve->src_offset + size))
+ so->vb_access_size[vbi] = ve->src_offset + size;
+
+ if (unlikely(ve->instance_divisor)) {
+ so->instance_elts |= 1 << i;
+ so->instance_bufs |= 1 << vbi;
+ if (ve->instance_divisor < so->min_instance_div[vbi])
+ so->min_instance_div[vbi] = ve->instance_divisor;
+ }
+
+ if (1) {
+ unsigned ca;
+ unsigned j = transkey.nr_elements++;
+
+ ca = util_format_description(fmt)->channel[0].size / 8;
+ if (ca != 1 && ca != 2)
+ ca = 4;
+
+ transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
+ transkey.element[j].input_format = ve->src_format;
+ transkey.element[j].input_buffer = vbi;
+ transkey.element[j].input_offset = ve->src_offset;
+ transkey.element[j].instance_divisor = ve->instance_divisor;
+
+ transkey.output_stride = align(transkey.output_stride, ca);
+ transkey.element[j].output_format = fmt;
+ transkey.element[j].output_offset = transkey.output_stride;
+ transkey.output_stride += size;
+
+ so->element[i].state_alt = so->element[i].state;
+ so->element[i].state_alt |= transkey.element[j].output_offset << 7;
+ }
+
+ so->element[i].state |= i << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT;
+ }
+ transkey.output_stride = align(transkey.output_stride, 4);
+
+ so->size = transkey.output_stride;
+ so->translate = translate_create(&transkey);
+
+ if (so->instance_elts || src_offset_max >= (1 << 14))
+ return so;
+ so->shared_slots = TRUE;
+
+ for (i = 0; i < num_elements; ++i) {
+ const unsigned b = elements[i].vertex_buffer_index;
+ const unsigned s = elements[i].src_offset;
+ so->element[i].state &= ~NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK;
+ so->element[i].state |= b << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT;
+ so->element[i].state |= s << NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT;
+ }
+ return so;
+}
+
+#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST
+
+#define VTX_ATTR(a, c, t, s) \
+ ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \
+ (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \
+ ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \
+ ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT))
+
+static void
+nvc0_set_constant_vertex_attrib(struct nvc0_context *nvc0, const unsigned a)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[a].pipe;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
+ uint32_t mode;
+ const struct util_format_description *desc;
+ void *dst;
+ const void *src = (const uint8_t *)vb->user_buffer + ve->src_offset;
+ assert(!vb->buffer);
+
+ desc = util_format_description(ve->src_format);
+
+ PUSH_SPACE(push, 6);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 5);
+ dst = &push->cur[1];
+ if (desc->channel[0].pure_integer) {
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ mode = VTX_ATTR(a, 4, SINT, 32);
+ desc->unpack_rgba_sint(dst, 0, src, 0, 1, 1);
+ } else {
+ mode = VTX_ATTR(a, 4, UINT, 32);
+ desc->unpack_rgba_uint(dst, 0, src, 0, 1, 1);
+ }
+ } else {
+ mode = VTX_ATTR(a, 4, FLOAT, 32);
+ desc->unpack_rgba_float(dst, 0, src, 0, 1, 1);
+ }
+ push->cur[0] = mode;
+ push->cur += 5;
+}
+
+static INLINE void
+nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi,
+ uint32_t *base, uint32_t *size)
+{
+ if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) {
+ const uint32_t div = nvc0->vertex->min_instance_div[vbi];
+ *base = nvc0->instance_off * nvc0->vtxbuf[vbi].stride;
+ *size = (nvc0->instance_max / div) * nvc0->vtxbuf[vbi].stride +
+ nvc0->vertex->vb_access_size[vbi];
+ } else {
+ /* NOTE: if there are user buffers, we *must* have index bounds */
+ assert(nvc0->vb_elt_limit != ~0);
+ *base = nvc0->vb_elt_first * nvc0->vtxbuf[vbi].stride;
+ *size = nvc0->vb_elt_limit * nvc0->vtxbuf[vbi].stride +
+ nvc0->vertex->vb_access_size[vbi];
+ }
+}
+
+static INLINE void
+nvc0_release_user_vbufs(struct nvc0_context *nvc0)
+{
+ if (nvc0->vbo_user) {
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
+ nouveau_scratch_done(&nvc0->base);
+ }
+}
+
+static void
+nvc0_update_user_vbufs(struct nvc0_context *nvc0)
+{
+ uint64_t address[PIPE_MAX_ATTRIBS];
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ int i;
+ uint32_t written = 0;
+
+ PUSH_SPACE(push, nvc0->vertex->num_elements * 8);
+ for (i = 0; i < nvc0->vertex->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
+ const unsigned b = ve->vertex_buffer_index;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
+ uint32_t base, size;
+
+ if (!(nvc0->vbo_user & (1 << b)))
+ continue;
+ if (!vb->stride) {
+ nvc0_set_constant_vertex_attrib(nvc0, i);
+ continue;
+ }
+ nvc0_user_vbuf_range(nvc0, b, &base, &size);
+
+ if (!(written & (1 << b))) {
+ struct nouveau_bo *bo;
+ const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART;
+ written |= 1 << b;
+ address[b] = nouveau_scratch_data(&nvc0->base, vb->user_buffer,
+ base, size, &bo);
+ if (bo)
+ BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo);
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size);
+ }
+
+ BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5);
+ PUSH_DATA (push, i);
+ PUSH_DATAh(push, address[b] + base + size - 1);
+ PUSH_DATA (push, address[b] + base + size - 1);
+ PUSH_DATAh(push, address[b] + ve->src_offset);
+ PUSH_DATA (push, address[b] + ve->src_offset);
+ }
+ nvc0->base.vbo_dirty = TRUE;
+}
+
+static void
+nvc0_update_user_vbufs_shared(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ uint32_t mask = nvc0->vbo_user & ~nvc0->constant_vbos;
+
+ PUSH_SPACE(push, nvc0->num_vtxbufs * 8);
+ while (mask) {
+ struct nouveau_bo *bo;
+ const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART;
+ uint64_t address;
+ uint32_t base, size;
+ const int b = ffs(mask) - 1;
+ mask &= ~(1 << b);
+
+ nvc0_user_vbuf_range(nvc0, b, &base, &size);
+
+ address = nouveau_scratch_data(&nvc0->base, nvc0->vtxbuf[b].user_buffer,
+ base, size, &bo);
+ if (bo)
+ BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo);
+
+ BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5);
+ PUSH_DATA (push, b);
+ PUSH_DATAh(push, address + base + size - 1);
+ PUSH_DATA (push, address + base + size - 1);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size);
+ }
+
+ mask = nvc0->state.constant_elts;
+ while (mask) {
+ int i = ffs(mask) - 1;
+ mask &= ~(1 << i);
+ nvc0_set_constant_vertex_attrib(nvc0, i);
+ }
+}
+
+static void
+nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
+ uint32_t refd = 0;
+ unsigned i;
+
+ PUSH_SPACE(push, vertex->num_elements * 8);
+ for (i = 0; i < vertex->num_elements; ++i) {
+ const struct nvc0_vertex_element *ve;
+ const struct pipe_vertex_buffer *vb;
+ struct nv04_resource *res;
+ unsigned b;
+ unsigned limit, offset;
+
+ if (nvc0->state.constant_elts & (1 << i))
+ continue;
+ ve = &vertex->element[i];
+ b = ve->pipe.vertex_buffer_index;
+ vb = &nvc0->vtxbuf[b];
+
+ if (!vb->buffer) {
+ if (vb->stride) {
+ if (ve->pipe.instance_divisor) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
+ PUSH_DATA (push, ve->pipe.instance_divisor);
+ }
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, (1 << 12) | vb->stride);
+ }
+ /* address/value set in nvc0_update_user_vbufs */
+ continue;
+ }
+ res = nv04_resource(vb->buffer);
+ offset = ve->pipe.src_offset + vb->buffer_offset;
+ limit = vb->buffer->width0 - 1;
+
+ if (unlikely(ve->pipe.instance_divisor)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4);
+ PUSH_DATA (push, (1 << 12) | vb->stride);
+ PUSH_DATAh(push, res->address + offset);
+ PUSH_DATA (push, res->address + offset);
+ PUSH_DATA (push, ve->pipe.instance_divisor);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3);
+ PUSH_DATA (push, (1 << 12) | vb->stride);
+ PUSH_DATAh(push, res->address + offset);
+ PUSH_DATA (push, res->address + offset);
+ }
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+ PUSH_DATAh(push, res->address + limit);
+ PUSH_DATA (push, res->address + limit);
+
+ if (!(refd & (1 << b))) {
+ refd |= 1 << b;
+ BCTX_REFN(nvc0->bufctx_3d, VTX, res, RD);
+ }
+ }
+ if (nvc0->vbo_user)
+ nvc0_update_user_vbufs(nvc0);
+}
+
+static void
+nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned b;
+ const uint32_t mask = nvc0->vbo_user;
+
+ PUSH_SPACE(push, nvc0->num_vtxbufs * 8);
+ for (b = 0; b < nvc0->num_vtxbufs; ++b) {
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
+ struct nv04_resource *buf;
+ uint32_t offset, limit;
+
+ if (mask & (1 << b)) {
+ if (vb->stride) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 1);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
+ }
+ /* address/value set in nvc0_update_user_vbufs_shared */
+ continue;
+ }
+ buf = nv04_resource(vb->buffer);
+ offset = vb->buffer_offset;
+ limit = buf->base.width0 - 1;
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 3);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
+ PUSH_DATAh(push, buf->address + limit);
+ PUSH_DATA (push, buf->address + limit);
+
+ BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD);
+ }
+ if (nvc0->vbo_user)
+ nvc0_update_user_vbufs_shared(nvc0);
+}
+
+void
+nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
+ struct nvc0_vertex_element *ve;
+ uint32_t const_vbos;
+ unsigned i;
+ uint8_t vbo_mode;
+ boolean update_vertex;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+
+ if (unlikely(vertex->need_conversion) ||
+ unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) {
+ vbo_mode = 3;
+ } else {
+ vbo_mode = (nvc0->vbo_user && nvc0->vbo_push_hint) ? 1 : 0;
+ }
+ const_vbos = vbo_mode ? 0 : nvc0->constant_vbos;
+
+ update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) ||
+ (const_vbos != nvc0->state.constant_vbos) ||
+ (vbo_mode != nvc0->state.vbo_mode);
+
+ if (update_vertex) {
+ const unsigned n = MAX2(vertex->num_elements, nvc0->state.num_vtxelts);
+
+ nvc0->state.constant_vbos = const_vbos;
+ nvc0->state.constant_elts = 0;
+ nvc0->state.num_vtxelts = vertex->num_elements;
+ nvc0->state.vbo_mode = vbo_mode;
+
+ if (unlikely(vbo_mode)) {
+ if (unlikely(nvc0->state.instance_elts & 3)) {
+ /* translate mode uses only 2 vertex buffers */
+ nvc0->state.instance_elts &= ~3;
+ PUSH_SPACE(push, 3);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(0)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ }
+
+ PUSH_SPACE(push, n * 2 + 4);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
+ for (i = 0; i < vertex->num_elements; ++i)
+ PUSH_DATA(push, vertex->element[i].state_alt);
+ for (; i < n; ++i)
+ PUSH_DATA(push, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 1);
+ PUSH_DATA (push, (1 << 12) | vertex->size);
+ for (i = 1; i < n; ++i)
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
+ } else {
+ uint32_t *restrict data;
+
+ if (unlikely(vertex->instance_elts != nvc0->state.instance_elts)) {
+ nvc0->state.instance_elts = vertex->instance_elts;
+ assert(n); /* if (n == 0), both masks should be 0 */
+ PUSH_SPACE(push, 3);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
+ PUSH_DATA (push, n);
+ PUSH_DATA (push, vertex->instance_elts);
+ }
+
+ PUSH_SPACE(push, n * 2 + 1);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
+ data = push->cur;
+ push->cur += n;
+ for (i = 0; i < vertex->num_elements; ++i) {
+ ve = &vertex->element[i];
+ data[i] = ve->state;
+ if (unlikely(const_vbos & (1 << ve->pipe.vertex_buffer_index))) {
+ nvc0->state.constant_elts |= 1 << i;
+ data[i] |= NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST;
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
+ }
+ }
+ for (; i < n; ++i) {
+ data[i] = NVC0_3D_VERTEX_ATTRIB_INACTIVE;
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
+ }
+ }
+ }
+ if (nvc0->state.vbo_mode) /* using translate, don't set up arrays here */
+ return;
+
+ if (vertex->shared_slots)
+ nvc0_validate_vertex_buffers_shared(nvc0);
+ else
+ nvc0_validate_vertex_buffers(nvc0);
+}
+
+void
+nvc0_idxbuf_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(nvc0->idxbuf.buffer);
+
+ assert(buf);
+ assert(nouveau_resource_mapped_by_gpu(&buf->base));
+
+ PUSH_SPACE(push, 6);
+ BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
+ PUSH_DATAh(push, buf->address + nvc0->idxbuf.offset);
+ PUSH_DATA (push, buf->address + nvc0->idxbuf.offset);
+ PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
+ PUSH_DATA (push, buf->address + buf->base.width0 - 1);
+ PUSH_DATA (push, nvc0->idxbuf.index_size >> 1);
+
+ BCTX_REFN(nvc0->bufctx_3d, IDX, buf, RD);
+}
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ }
+}
+
+static void
+nvc0_draw_vbo_kick_notify(struct nouveau_pushbuf *push)
+{
+ struct nvc0_screen *screen = push->user_priv;
+
+ nouveau_fence_update(&screen->base, TRUE);
+
+ NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
+}
+
+static void
+nvc0_draw_arrays(struct nvc0_context *nvc0,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned prim;
+
+ if (nvc0->state.index_bias) {
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
+ nvc0->state.index_bias = 0;
+ }
+
+ prim = nvc0_prim_gl(mode);
+
+ while (instance_count--) {
+ PUSH_SPACE(push, 6);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, prim);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, start);
+ PUSH_DATA (push, count);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
+
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_array, 1);
+}
+
+static void
+nvc0_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 3) {
+ unsigned i;
+ PUSH_SPACE(push, 4);
+ BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U32), count & 3);
+ for (i = 0; i < (count & 3); ++i)
+ PUSH_DATA(push, *map++);
+ count &= ~3;
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;
+
+ PUSH_SPACE(push, nr + 1);
+ BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U8), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push,
+ (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
+ map += 4;
+ }
+ count -= nr * 4;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count &= ~1;
+ PUSH_SPACE(push, 2);
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ PUSH_SPACE(push, nr + 1);
+ BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ while (count) {
+ const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
+
+ PUSH_SPACE(push, nr + 1);
+ BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U32), nr);
+ PUSH_DATAp(push, map, nr);
+
+ map += nr;
+ count -= nr;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
+ const uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count--;
+ PUSH_SPACE(push, 1);
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ PUSH_SPACE(push, nr + 1);
+ BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count, int32_t index_bias)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned prim;
+ const unsigned index_size = nvc0->idxbuf.index_size;
+
+ prim = nvc0_prim_gl(mode);
+
+ if (index_bias != nvc0->state.index_bias) {
+ PUSH_SPACE(push, 2);
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 1);
+ PUSH_DATA (push, index_bias);
+ nvc0->state.index_bias = index_bias;
+ }
+
+ if (nvc0->idxbuf.buffer) {
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), prim);
+ do {
+ PUSH_SPACE(push, 7);
+ BEGIN_NVC0(push, NVC0_3D(INDEX_BATCH_FIRST), 2);
+ PUSH_DATA (push, start);
+ PUSH_DATA (push, count);
+ if (--instance_count) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, prim | NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT);
+ }
+ } while (instance_count);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
+ } else {
+ const void *data = nvc0->idxbuf.user_buffer;
+
+ while (instance_count--) {
+ PUSH_SPACE(push, 2);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, prim);
+ switch (index_size) {
+ case 1:
+ nvc0_draw_elements_inline_u08(push, data, start, count);
+ break;
+ case 2:
+ nvc0_draw_elements_inline_u16(push, data, start, count);
+ break;
+ case 4:
+ if (shorten)
+ nvc0_draw_elements_inline_u32_short(push, data, start, count);
+ else
+ nvc0_draw_elements_inline_u32(push, data, start, count);
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
+
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ }
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_indexed, 1);
+}
+
+static void
+nvc0_draw_stream_output(struct nvc0_context *nvc0,
+ const struct pipe_draw_info *info)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_so_target *so = nvc0_so_target(info->count_from_stream_output);
+ struct nv04_resource *res = nv04_resource(so->pipe.buffer);
+ unsigned mode = nvc0_prim_gl(info->mode);
+ unsigned num_instances = info->instance_count;
+
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ PUSH_SPACE(push, 2);
+ IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
+ nvc0_query_fifo_wait(push, so->pq);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, 1);
+ }
+
+ while (num_instances--) {
+ PUSH_SPACE(push, 8);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, mode);
+ BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_STRIDE), 1);
+ PUSH_DATA (push, so->stride);
+ BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BYTES), 1);
+ nvc0_query_pushbuf_submit(push, so->pq, 0x4);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
+
+ mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+}
+
+void
+nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
+ nvc0->vb_elt_first = info->min_index + info->index_bias;
+ nvc0->vb_elt_limit = info->max_index - info->min_index;
+ nvc0->instance_off = info->start_instance;
+ nvc0->instance_max = info->instance_count - 1;
+
+ /* For picking only a few vertices from a large user buffer, push is better,
+ * if index count is larger and we expect repeated vertices, suggest upload.
+ */
+ nvc0->vbo_push_hint =
+ info->indexed && (nvc0->vb_elt_limit >= (info->count * 2));
+
+ /* Check whether we want to switch vertex-submission mode. */
+ if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_ARRAYS | NVC0_NEW_VERTEX))) {
+ if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode)
+ if (nvc0->state.vbo_mode != 3)
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+
+ if (!(nvc0->dirty & NVC0_NEW_ARRAYS) && nvc0->state.vbo_mode == 0) {
+ if (nvc0->vertex->shared_slots)
+ nvc0_update_user_vbufs_shared(nvc0);
+ else
+ nvc0_update_user_vbufs(nvc0);
+ }
+ }
+
+ /* 8 as minimum to avoid immediate double validation of new buffers */
+ nvc0_state_validate(nvc0, ~0, 8);
+
+ push->kick_notify = nvc0_draw_vbo_kick_notify;
+
+ if (nvc0->state.vbo_mode) {
+ nvc0_push_vbo(nvc0, info);
+ push->kick_notify = nvc0_default_kick_notify;
+ return;
+ }
+
+ /* space for base instance, flush, and prim restart */
+ PUSH_SPACE(push, 8);
+
+ if (nvc0->state.instance_base != info->start_instance) {
+ nvc0->state.instance_base = info->start_instance;
+ /* NOTE: this does not affect the shader input, should it ? */
+ BEGIN_NVC0(push, NVC0_3D(VB_INSTANCE_BASE), 1);
+ PUSH_DATA (push, info->start_instance);
+ }
+
+ if (nvc0->base.vbo_dirty) {
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
+ nvc0->base.vbo_dirty = FALSE;
+ }
+
+ if (info->indexed) {
+ boolean shorten = info->max_index <= 65535;
+
+ if (info->primitive_restart != nvc0->state.prim_restart) {
+ if (info->primitive_restart) {
+ BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ } else {
+ IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
+ }
+ nvc0->state.prim_restart = info->primitive_restart;
+ } else
+ if (info->primitive_restart) {
+ BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_INDEX), 1);
+ PUSH_DATA (push, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ }
+
+ nvc0_draw_elements(nvc0, shorten,
+ info->mode, info->start, info->count,
+ info->instance_count, info->index_bias);
+ } else
+ if (unlikely(info->count_from_stream_output)) {
+ nvc0_draw_stream_output(nvc0, info);
+ } else {
+ nvc0_draw_arrays(nvc0,
+ info->mode, info->start, info->count,
+ info->instance_count);
+ }
+ push->kick_notify = nvc0_default_kick_notify;
+
+ nvc0_release_user_vbufs(nvc0);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
new file mode 100644
index 0000000..51e751c
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -0,0 +1,649 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+
+#include "nvc0/nvc0_3d.xml.h"
+
+struct push_context {
+ struct nouveau_pushbuf *push;
+
+ struct translate *translate;
+ void *dest;
+ const void *idxbuf;
+
+ uint32_t vertex_size;
+ uint32_t restart_index;
+ uint32_t instance_id;
+
+ boolean prim_restart;
+ boolean need_vertex_id;
+
+ struct {
+ boolean enabled;
+ boolean value;
+ unsigned stride;
+ const uint8_t *data;
+ } edgeflag;
+};
+
+static void nvc0_push_upload_vertex_ids(struct push_context *,
+ struct nvc0_context *,
+ const struct pipe_draw_info *);
+
+static void
+nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
+{
+ ctx->push = nvc0->base.pushbuf;
+
+ ctx->translate = nvc0->vertex->translate;
+ ctx->vertex_size = nvc0->vertex->size;
+
+ ctx->need_vertex_id =
+ nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32);
+
+ ctx->edgeflag.value = TRUE;
+ ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS;
+
+ /* silence warnings */
+ ctx->edgeflag.data = NULL;
+ ctx->edgeflag.stride = 0;
+}
+
+static INLINE void
+nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias)
+{
+ struct translate *translate = nvc0->vertex->translate;
+ unsigned i;
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ const uint8_t *map;
+ const struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
+
+ if (likely(!vb->buffer))
+ map = (const uint8_t *)vb->user_buffer;
+ else
+ map = nouveau_resource_map_offset(&nvc0->base,
+ nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD);
+
+ if (index_bias && !unlikely(nvc0->vertex->instance_bufs & (1 << i)))
+ map += (intptr_t)index_bias * vb->stride;
+
+ translate->set_buffer(translate, i, map, vb->stride, ~0);
+ }
+}
+
+static INLINE void
+nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0)
+{
+ if (nvc0->idxbuf.buffer) {
+ struct nv04_resource *buf = nv04_resource(nvc0->idxbuf.buffer);
+ ctx->idxbuf = nouveau_resource_map_offset(&nvc0->base,
+ buf, nvc0->idxbuf.offset, NOUVEAU_BO_RD);
+ } else {
+ ctx->idxbuf = nvc0->idxbuf.user_buffer;
+ }
+}
+
+static INLINE void
+nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
+ int32_t index_bias)
+{
+ unsigned attr = nvc0->vertprog->vp.edgeflag;
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[attr].pipe;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
+ struct nv04_resource *buf = nv04_resource(vb->buffer);
+ unsigned offset = vb->buffer_offset + ve->src_offset;
+
+ ctx->edgeflag.stride = vb->stride;
+ ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base,
+ buf, offset, NOUVEAU_BO_RD);
+ if (index_bias)
+ ctx->edgeflag.data += (intptr_t)index_bias * vb->stride;
+}
+
+static INLINE unsigned
+prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index)
+{
+ unsigned i;
+ for (i = 0; i < push && elts[i] != index; ++i);
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index)
+{
+ unsigned i;
+ for (i = 0; i < push && elts[i] != index; ++i);
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
+{
+ unsigned i;
+ for (i = 0; i < push && elts[i] != index; ++i);
+ return i;
+}
+
+static INLINE boolean
+ef_value(const struct push_context *ctx, uint32_t index)
+{
+ float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
+ return *pf ? TRUE : FALSE;
+}
+
+static INLINE boolean
+ef_toggle(struct push_context *ctx)
+{
+ ctx->edgeflag.value = !ctx->edgeflag.value;
+ return ctx->edgeflag.value;
+}
+
+static INLINE unsigned
+ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE unsigned
+ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE unsigned
+ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE unsigned
+ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE void *
+nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bo *bo;
+ uint64_t va;
+ const unsigned size = count * nvc0->vertex->size;
+
+ void *const dest = nouveau_scratch_get(&nvc0->base, size, &va, &bo);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2);
+ PUSH_DATAh(push, va);
+ PUSH_DATA (push, va);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+ PUSH_DATAh(push, va + size - 1);
+ PUSH_DATA (push, va + size - 1);
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ bo);
+ nouveau_pushbuf_validate(push);
+
+ return dest;
+}
+
+static void
+disp_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ const uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start;
+ unsigned pos = 0;
+
+ do {
+ unsigned nR = count;
+
+ if (unlikely(ctx->prim_restart))
+ nR = prim_restart_search_i08(elts, nR, ctx->restart_index);
+
+ translate->run_elts8(translate, elts, nR, 0, ctx->instance_id, ctx->dest);
+ count -= nR;
+ ctx->dest += nR * ctx->vertex_size;
+
+ while (nR) {
+ unsigned nE = nR;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nE = ef_toggle_search_i08(ctx, elts, nR);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nE >= 2)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nE);
+ } else
+ if (nE) {
+ if (pos <= 0xff) {
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, pos);
+ }
+ }
+ if (unlikely(nE != nR))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nE;
+ elts += nE;
+ nR -= nE;
+ }
+ if (count) {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, ctx->restart_index);
+ ++elts;
+ ctx->dest += ctx->vertex_size;
+ ++pos;
+ --count;
+ }
+ } while (count);
+}
+
+static void
+disp_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ const uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start;
+ unsigned pos = 0;
+
+ do {
+ unsigned nR = count;
+
+ if (unlikely(ctx->prim_restart))
+ nR = prim_restart_search_i16(elts, nR, ctx->restart_index);
+
+ translate->run_elts16(translate, elts, nR, 0, ctx->instance_id, ctx->dest);
+ count -= nR;
+ ctx->dest += nR * ctx->vertex_size;
+
+ while (nR) {
+ unsigned nE = nR;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nE = ef_toggle_search_i16(ctx, elts, nR);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nE >= 2)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nE);
+ } else
+ if (nE) {
+ if (pos <= 0xff) {
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, pos);
+ }
+ }
+ if (unlikely(nE != nR))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nE;
+ elts += nE;
+ nR -= nE;
+ }
+ if (count) {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, ctx->restart_index);
+ ++elts;
+ ctx->dest += ctx->vertex_size;
+ ++pos;
+ --count;
+ }
+ } while (count);
+}
+
+static void
+disp_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ const uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start;
+ unsigned pos = 0;
+
+ do {
+ unsigned nR = count;
+
+ if (unlikely(ctx->prim_restart))
+ nR = prim_restart_search_i32(elts, nR, ctx->restart_index);
+
+ translate->run_elts(translate, elts, nR, 0, ctx->instance_id, ctx->dest);
+ count -= nR;
+ ctx->dest += nR * ctx->vertex_size;
+
+ while (nR) {
+ unsigned nE = nR;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nE = ef_toggle_search_i32(ctx, elts, nR);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nE >= 2)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nE);
+ } else
+ if (nE) {
+ if (pos <= 0xff) {
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, pos);
+ }
+ }
+ if (unlikely(nE != nR))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nE;
+ elts += nE;
+ nR -= nE;
+ }
+ if (count) {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, ctx->restart_index);
+ ++elts;
+ ctx->dest += ctx->vertex_size;
+ ++pos;
+ --count;
+ }
+ } while (count);
+}
+
+static void
+disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ unsigned pos = 0;
+
+ translate->run(translate, start, count, 0, ctx->instance_id, ctx->dest);
+ do {
+ unsigned nr = count;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nr = ef_toggle_search_seq(ctx, start + pos, nr);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nr)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nr);
+ }
+ if (unlikely(nr != count))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nr;
+ count -= nr;
+ } while (count);
+}
+
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ }
+}
+
+void
+nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, index_size;
+ unsigned inst_count = info->instance_count;
+ unsigned vert_count = info->count;
+ unsigned prim;
+
+ nvc0_push_context_init(nvc0, &ctx);
+
+ nvc0_vertex_configure_translate(nvc0, info->index_bias);
+
+ if (unlikely(ctx.edgeflag.enabled))
+ nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias);
+
+ ctx.prim_restart = info->primitive_restart;
+ ctx.restart_index = info->restart_index;
+
+ if (info->indexed) {
+ nvc0_push_map_idxbuf(&ctx, nvc0);
+ index_size = nvc0->idxbuf.index_size;
+
+ if (info->primitive_restart) {
+ BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
+ PUSH_DATA (ctx.push, 1);
+ PUSH_DATA (ctx.push, info->restart_index);
+ } else
+ if (nvc0->state.prim_restart) {
+ IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
+ }
+ nvc0->state.prim_restart = info->primitive_restart;
+ } else {
+ if (unlikely(info->count_from_stream_output)) {
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct nvc0_so_target *targ;
+ targ = nvc0_so_target(info->count_from_stream_output);
+ pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+ vert_count /= targ->stride;
+ }
+ ctx.idxbuf = NULL; /* shut up warnings */
+ index_size = 0;
+ }
+
+ ctx.instance_id = info->start_instance;
+
+ prim = nvc0_prim_gl(info->mode);
+ do {
+ PUSH_SPACE(ctx.push, 9);
+
+ ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count);
+ if (unlikely(!ctx.dest))
+ break;
+
+ if (unlikely(ctx.need_vertex_id))
+ nvc0_push_upload_vertex_ids(&ctx, nvc0, info);
+
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (ctx.push, prim);
+ switch (index_size) {
+ case 1:
+ disp_vertices_i08(&ctx, info->start, vert_count);
+ break;
+ case 2:
+ disp_vertices_i16(&ctx, info->start, vert_count);
+ break;
+ case 4:
+ disp_vertices_i32(&ctx, info->start, vert_count);
+ break;
+ default:
+ assert(index_size == 0);
+ disp_vertices_seq(&ctx, info->start, vert_count);
+ break;
+ }
+ PUSH_SPACE(ctx.push, 1);
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0);
+
+ if (--inst_count) {
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ ++ctx.instance_id;
+ }
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
+ nouveau_scratch_done(&nvc0->base);
+ } while (inst_count);
+
+
+ /* reset state and unmap buffers (no-op) */
+
+ if (unlikely(!ctx.edgeflag.value)) {
+ PUSH_SPACE(ctx.push, 1);
+ IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1);
+ }
+
+ if (unlikely(ctx.need_vertex_id)) {
+ PUSH_SPACE(ctx.push, 4);
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0);
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1);
+ PUSH_DATA (ctx.push,
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32);
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0);
+ }
+
+ if (info->indexed)
+ nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer));
+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
+ nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer));
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1);
+}
+
+static INLINE void
+copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n; ++i)
+ dst[i] = elts[i] + bias;
+}
+
+static INLINE void
+copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n; ++i)
+ dst[i] = elts[i] + bias;
+}
+
+static INLINE void
+copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n; ++i)
+ dst[i] = elts[i] + bias;
+}
+
+static void
+nvc0_push_upload_vertex_ids(struct push_context *ctx,
+ struct nvc0_context *nvc0,
+ const struct pipe_draw_info *info)
+
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct nouveau_bo *bo;
+ uint64_t va;
+ uint32_t *data;
+ uint32_t format;
+ unsigned index_size = nvc0->idxbuf.index_size;
+ unsigned i;
+ unsigned a = nvc0->vertex->num_elements;
+
+ if (!index_size || info->index_bias)
+ index_size = 4;
+ data = (uint32_t *)nouveau_scratch_get(&nvc0->base,
+ info->count * index_size, &va, &bo);
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ bo);
+ nouveau_pushbuf_validate(push);
+
+ if (info->indexed) {
+ if (!info->index_bias) {
+ memcpy(data, ctx->idxbuf, info->count * index_size);
+ } else {
+ switch (nvc0->idxbuf.index_size) {
+ case 1:
+ copy_indices_u8(data, ctx->idxbuf, info->index_bias, info->count);
+ break;
+ case 2:
+ copy_indices_u16(data, ctx->idxbuf, info->index_bias, info->count);
+ break;
+ default:
+ copy_indices_u32(data, ctx->idxbuf, info->index_bias, info->count);
+ break;
+ }
+ }
+ } else {
+ for (i = 0; i < info->count; ++i)
+ data[i] = i + (info->start + info->index_bias);
+ }
+
+ format = (1 << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT;
+
+ switch (index_size) {
+ case 1:
+ format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8;
+ break;
+ case 2:
+ format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16;
+ break;
+ default:
+ format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32;
+ break;
+ }
+
+ PUSH_SPACE(push, 12);
+
+ if (unlikely(nvc0->state.instance_elts & 2)) {
+ nvc0->state.instance_elts &= ~2;
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(1)), 0);
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1);
+ PUSH_DATA (push, format);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 3);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size);
+ PUSH_DATAh(push, va);
+ PUSH_DATA (push, va);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
+ PUSH_DATAh(push, va + info->count * index_size - 1);
+ PUSH_DATA (push, va + info->count * index_size - 1);
+
+#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) \
+ (((0x80 + (a) * 0x10) / 4) << NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT)
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_REPLACE), 1);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) | 1);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
new file mode 100644
index 0000000..5871f59
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_video.h"
+
+#include "util/u_sampler.h"
+#include "util/u_format.h"
+
+static void
+nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *video_target,
+ struct pipe_picture_desc *picture,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes)
+{
+ struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
+ struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target;
+ uint32_t comm_seq = ++dec->fence_seq;
+ union pipe_desc desc;
+
+ unsigned vp_caps, is_ref, ret;
+ struct nouveau_vp3_video_buffer *refs[16] = {};
+
+ desc.base = picture;
+
+ assert(target->base.buffer_format == PIPE_FORMAT_NV12);
+
+ ret = nvc0_decoder_bsp(dec, desc, target, comm_seq,
+ num_buffers, data, num_bytes,
+ &vp_caps, &is_ref, refs);
+
+ /* did we decode bitstream correctly? */
+ assert(ret == 2);
+
+ nvc0_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
+ nvc0_decoder_ppp(dec, desc, target, comm_seq);
+}
+
+struct pipe_video_codec *
+nvc0_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ)
+{
+ struct nouveau_screen *screen = &((struct nvc0_context *)context)->screen->base;
+ struct nouveau_vp3_decoder *dec;
+ struct nouveau_pushbuf **push;
+ union nouveau_bo_config cfg;
+ bool kepler = screen->device->chipset >= 0xe0;
+
+ cfg.nvc0.tile_mode = 0x10;
+ cfg.nvc0.memtype = 0xfe;
+
+ int ret, i;
+ uint32_t codec = 1, ppp_codec = 3;
+ uint32_t timeout;
+ u32 tmp_size = 0;
+
+ if (getenv("XVMC_VL"))
+ return vl_create_decoder(context, templ);
+
+ if (templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ debug_printf("%x\n", templ->entrypoint);
+ return NULL;
+ }
+
+ dec = CALLOC_STRUCT(nouveau_vp3_decoder);
+ if (!dec)
+ return NULL;
+ dec->client = screen->client;
+ dec->base = *templ;
+ nouveau_vp3_decoder_init_common(&dec->base);
+
+ if (!kepler) {
+ dec->bsp_idx = 5;
+ dec->vp_idx = 6;
+ dec->ppp_idx = 7;
+ } else {
+ dec->bsp_idx = 2;
+ dec->vp_idx = 2;
+ dec->ppp_idx = 2;
+ }
+
+ for (i = 0; i < 3; ++i)
+ if (i && !kepler) {
+ dec->channel[i] = dec->channel[0];
+ dec->pushbuf[i] = dec->pushbuf[0];
+ } else {
+ void *data;
+ u32 size;
+ struct nvc0_fifo nvc0_args = {};
+ struct nve0_fifo nve0_args = {};
+
+ if (!kepler) {
+ size = sizeof(nvc0_args);
+ data = &nvc0_args;
+ } else {
+ unsigned engine[] = {
+ NVE0_FIFO_ENGINE_BSP,
+ NVE0_FIFO_ENGINE_VP,
+ NVE0_FIFO_ENGINE_PPP
+ };
+
+ nve0_args.engine = engine[i];
+ size = sizeof(nve0_args);
+ data = &nve0_args;
+ }
+
+ ret = nouveau_object_new(&screen->device->object, 0,
+ NOUVEAU_FIFO_CHANNEL_CLASS,
+ data, size, &dec->channel[i]);
+
+ if (!ret)
+ ret = nouveau_pushbuf_new(screen->client, dec->channel[i], 4,
+ 32 * 1024, true, &dec->pushbuf[i]);
+ if (ret)
+ break;
+ }
+ push = dec->pushbuf;
+
+ if (!kepler) {
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x90b1, NULL, 0, &dec->bsp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x90b2, NULL, 0, &dec->vp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x90b3, NULL, 0, &dec->ppp);
+ } else {
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[0], 0x95b1, 0x95b1, NULL, 0, &dec->bsp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[1], 0x95b2, 0x95b2, NULL, 0, &dec->vp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[2], 0x90b3, 0x90b3, NULL, 0, &dec->ppp);
+ }
+ if (ret)
+ goto fail;
+
+ BEGIN_NVC0(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[0], dec->bsp->handle);
+
+ BEGIN_NVC0(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[1], dec->vp->handle);
+
+ BEGIN_NVC0(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[2], dec->ppp->handle);
+
+ dec->base.context = context;
+ dec->base.decode_bitstream = nvc0_decoder_decode_bitstream;
+
+ for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i)
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0, 1 << 20, &cfg, &dec->bsp_bo[i]);
+ if (!ret)
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0x100, 4 << 20, &cfg, &dec->inter_bo[0]);
+ if (!ret) {
+ if (!kepler)
+ nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]);
+ else
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0x100, dec->inter_bo[0]->size, &cfg,
+ &dec->inter_bo[1]);
+ }
+ if (ret)
+ goto fail;
+
+ switch (u_reduce_video_profile(templ->profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG12: {
+ codec = 1;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4: {
+ codec = 4;
+ tmp_size = mb(templ->height)*16 * mb(templ->width)*16;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_VC1: {
+ ppp_codec = codec = 2;
+ tmp_size = mb(templ->height)*16 * mb(templ->width)*16;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
+ codec = 3;
+ dec->tmp_stride = 16 * mb_half(templ->width) * nouveau_vp3_video_align(templ->height) * 3 / 2;
+ tmp_size = dec->tmp_stride * (templ->max_references + 1);
+ assert(templ->max_references <= 16);
+ break;
+ }
+ default:
+ fprintf(stderr, "invalid codec\n");
+ goto fail;
+ }
+
+ if (screen->device->chipset < 0xd0) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ 0x4000, &cfg, &dec->fw_bo);
+ if (ret)
+ goto fail;
+
+ ret = nouveau_vp3_load_firmware(dec, templ->profile, screen->device->chipset);
+ if (ret)
+ goto fw_fail;
+ }
+
+ if (codec != 3) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ 0x400, &cfg, &dec->bitplane_bo);
+ if (ret)
+ goto fail;
+ }
+
+ dec->ref_stride = mb(templ->width)*16 * (mb_half(templ->height)*32 + nouveau_vp3_video_align(templ->height)/2);
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ dec->ref_stride * (templ->max_references+2) + tmp_size,
+ &cfg, &dec->ref_bo);
+ if (ret)
+ goto fail;
+
+ timeout = 0;
+
+ BEGIN_NVC0(push[0], SUBC_BSP(0x200), 2);
+ PUSH_DATA (push[0], codec);
+ PUSH_DATA (push[0], timeout);
+
+ BEGIN_NVC0(push[1], SUBC_VP(0x200), 2);
+ PUSH_DATA (push[1], codec);
+ PUSH_DATA (push[1], timeout);
+
+ BEGIN_NVC0(push[2], SUBC_PPP(0x200), 2);
+ PUSH_DATA (push[2], ppp_codec);
+ PUSH_DATA (push[2], timeout);
+
+ ++dec->fence_seq;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP,
+ 0, 0x1000, NULL, &dec->fence_bo);
+ if (ret)
+ goto fail;
+
+ nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client);
+ dec->fence_map = dec->fence_bo->map;
+ dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0;
+ dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
+
+ /* So lets test if the fence is working? */
+ nouveau_pushbuf_space(push[0], 6, 1, 0);
+ PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3);
+ PUSH_DATAh(push[0], dec->fence_bo->offset);
+ PUSH_DATA (push[0], dec->fence_bo->offset);
+ PUSH_DATA (push[0], dec->fence_seq);
+
+ BEGIN_NVC0(push[0], SUBC_BSP(0x304), 1);
+ PUSH_DATA (push[0], 0);
+ PUSH_KICK (push[0]);
+
+ nouveau_pushbuf_space(push[1], 6, 1, 0);
+ PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NVC0(push[1], SUBC_VP(0x240), 3);
+ PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push[1], dec->fence_seq);
+
+ BEGIN_NVC0(push[1], SUBC_VP(0x304), 1);
+ PUSH_DATA (push[1], 0);
+ PUSH_KICK (push[1]);
+
+ nouveau_pushbuf_space(push[2], 6, 1, 0);
+ PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3);
+ PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push[2], dec->fence_seq);
+
+ BEGIN_NVC0(push[2], SUBC_PPP(0x304), 1);
+ PUSH_DATA (push[2], 0);
+ PUSH_KICK (push[2]);
+
+ usleep(100);
+ while (dec->fence_seq > dec->fence_map[0] ||
+ dec->fence_seq > dec->fence_map[4] ||
+ dec->fence_seq > dec->fence_map[8]) {
+ debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]);
+ usleep(100);
+ }
+ debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]);
+#endif
+
+ return &dec->base;
+
+fw_fail:
+ debug_printf("Cannot create decoder without firmware..\n");
+ dec->base.destroy(&dec->base);
+ return NULL;
+
+fail:
+ debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret);
+ dec->base.destroy(&dec->base);
+ return NULL;
+}
+
+struct pipe_video_buffer *
+nvc0_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *templat)
+{
+ return nouveau_vp3_video_buffer_create(
+ pipe, templat, NVC0_RESOURCE_FLAG_VIDEO);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.h b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h
new file mode 100644
index 0000000..9ee0280
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_screen.h"
+#include "nouveau_vp3_video.h"
+
+#include "vl/vl_decoder.h"
+#include "vl/vl_types.h"
+
+#include "util/u_video.h"
+
+extern unsigned
+nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target,
+ unsigned comm_seq, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes,
+ unsigned *vp_caps, unsigned *is_ref,
+ struct nouveau_vp3_video_buffer *refs[16]);
+
+extern void
+nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq,
+ unsigned caps, unsigned is_ref,
+ struct nouveau_vp3_video_buffer *refs[16]);
+
+extern void
+nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
new file mode 100644
index 0000000..40696fa
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_video.h"
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+static void dump_comm_bsp(struct comm *comm)
+{
+ unsigned idx = comm->bsp_cur_index & 0xf;
+ debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs);
+ debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]);
+}
+#endif
+
+unsigned
+nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target,
+ unsigned comm_seq, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes,
+ unsigned *vp_caps, unsigned *is_ref,
+ struct nouveau_vp3_video_buffer *refs[16])
+{
+ struct nouveau_pushbuf *push = dec->pushbuf[0];
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ uint32_t bsp_addr, comm_addr, inter_addr;
+ uint32_t slice_size, bucket_size, ring_size;
+ uint32_t caps;
+ int ret;
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
+ unsigned fence_extra = 0;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+ { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
+
+ if (!dec->bitplane_bo)
+ num_refs--;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
+ if (ret) {
+ debug_printf("map failed: %i %s\n", ret, strerror(-ret));
+ return -1;
+ }
+
+ caps = nouveau_vp3_bsp(dec, desc, target, comm_seq,
+ num_buffers, data, num_bytes);
+
+ nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
+
+ nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 7) + fence_extra + 2, num_refs, 0);
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ bsp_addr = bsp_bo->offset >> 8;
+ inter_addr = inter_bo->offset >> 8;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ memset(dec->comm, 0, 0x200);
+ comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8;
+#else
+ comm_addr = bsp_addr + (COMM_OFFSET>>8);
+#endif
+
+ BEGIN_NVC0(push, SUBC_BSP(0x700), 5);
+ PUSH_DATA (push, caps); // 700 cmd
+ PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp
+ PUSH_DATA (push, bsp_addr + 7); // 708 str addr
+ PUSH_DATA (push, comm_addr); // 70c comm
+ PUSH_DATA (push, comm_seq); // 710 seq
+
+ if (codec != PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ u32 bitplane_addr;
+
+ bitplane_addr = dec->bitplane_bo->offset >> 8;
+
+ nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
+ BEGIN_NVC0(push, SUBC_BSP(0x400), 6);
+ PUSH_DATA (push, bsp_addr); // 400 picparm addr
+ PUSH_DATA (push, inter_addr); // 404 interparm addr
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr
+ PUSH_DATA (push, ring_size << 8); // 40c interdata_size
+ PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA
+ PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE
+ } else {
+ nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
+ BEGIN_NVC0(push, SUBC_BSP(0x400), 8);
+ PUSH_DATA (push, bsp_addr); // 400 picparm addr
+ PUSH_DATA (push, inter_addr); // 404 interparm addr
+ PUSH_DATA (push, slice_size << 8); // 408 interparm size?
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr
+ PUSH_DATA (push, ring_size << 8); // 410 interdata size
+ PUSH_DATA (push, inter_addr + slice_size); // 414 bucket?
+ PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted..
+ PUSH_DATA (push, 0); // 41c targets
+ // TODO: Double check 414 / 418 with nvidia trace
+ }
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NVC0(push, SUBC_BSP(0x240), 3);
+ PUSH_DATAh(push, dec->fence_bo->offset);
+ PUSH_DATA (push, dec->fence_bo->offset);
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NVC0(push, SUBC_BSP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK (push);
+
+ {
+ unsigned spin = 0;
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff) {
+ debug_printf("b%u: %u\n", dec->fence_seq, dec->fence_map[0]);
+ dump_comm_bsp(dec->comm);
+ }
+ } while (dec->fence_seq > dec->fence_map[0]);
+ }
+
+ dump_comm_bsp(dec->comm);
+ return dec->comm->status[comm_seq & 0xf];
+#else
+ BEGIN_NVC0(push, SUBC_BSP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+ return 2;
+#endif
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c
new file mode 100644
index 0000000..4ceec4f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_video.h"
+
+static void
+nvc0_decoder_setup_ppp(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target, uint32_t low700) {
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+
+ uint32_t stride_in = mb(dec->base.width);
+ uint32_t stride_out = mb(target->resources[0]->width0);
+ uint32_t dec_h = mb(dec->base.height);
+ uint32_t dec_w = mb(dec->base.width);
+ uint64_t in_addr;
+ uint32_t y2, cbcr, cbcr2, i;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ };
+ unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
+
+ for (i = 0; i < 2; ++i) {
+ struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
+ bo_refs[i].bo = mt->base.bo;
+ }
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+ nouveau_vp3_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2);
+
+ BEGIN_NVC0(push, SUBC_PPP(0x700), 10);
+ in_addr = nouveau_vp3_video_addr(dec, target) >> 8;
+
+ PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700
+ PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704
+ assert(dec_w == stride_in);
+
+ /* Input: */
+ PUSH_DATA (push, in_addr); // 708
+ PUSH_DATA (push, in_addr + y2); // 70c
+ PUSH_DATA (push, in_addr + cbcr); // 710
+ PUSH_DATA (push, in_addr + cbcr2); // 714
+
+ for (i = 0; i < 2; ++i) {
+ struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
+
+ PUSH_DATA (push, mt->base.address >> 8);
+ PUSH_DATA (push, (mt->base.address + mt->total_size/2) >> 8);
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ }
+}
+
+static uint32_t
+nvc0_decoder_vc1_ppp(struct nouveau_vp3_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nouveau_vp3_video_buffer *target) {
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+
+ nvc0_decoder_setup_ppp(dec, target, 0x1412);
+ assert(!desc->deblockEnable);
+ assert(!(dec->base.width & 0xf));
+ assert(!(dec->base.height & 0xf));
+
+ BEGIN_NVC0(push, SUBC_PPP(0x400), 1);
+ PUSH_DATA (push, desc->pquant << 11);
+
+ // 728 = wtf?
+ return 0x10;
+}
+
+void
+nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct nouveau_vp3_video_buffer *target, unsigned comm_seq) {
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+ unsigned ppp_caps = 0x10;
+ unsigned fence_extra = 0;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
+
+ switch (codec) {
+ case PIPE_VIDEO_FORMAT_MPEG12: {
+ unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1;
+ nvc0_decoder_setup_ppp(dec, target, 0x1410 | mpeg2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4: nvc0_decoder_setup_ppp(dec, target, 0x1414); break;
+ case PIPE_VIDEO_FORMAT_VC1: ppp_caps = nvc0_decoder_vc1_ppp(dec, desc.vc1, target); break;
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: nvc0_decoder_setup_ppp(dec, target, 0x1413); break;
+ default: assert(0);
+ }
+ BEGIN_NVC0(push, SUBC_PPP(0x734), 2);
+ PUSH_DATA (push, comm_seq);
+ PUSH_DATA (push, ppp_caps);
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NVC0(push, SUBC_PPP(0x240), 3);
+ PUSH_DATAh(push, (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push, (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NVC0(push, SUBC_PPP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK (push);
+
+ {
+ unsigned spin = 0;
+
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff)
+ debug_printf("p%u: %u\n", dec->fence_seq, dec->fence_map[8]);
+ } while (dec->fence_seq > dec->fence_map[8]);
+ }
+#else
+ BEGIN_NVC0(push, SUBC_PPP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+#endif
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
new file mode 100644
index 0000000..0d152b9
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_video.h"
+#include <sys/mman.h>
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq,
+ struct nouveau_bo *inter_bo, unsigned slice_size)
+{
+ unsigned i, idx = comm->pvp_cur_index & 0xf;
+ debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
+#if 0
+ debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
+ debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
+
+ for (i = 0; i != comm->irq_index; ++i)
+ debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
+ for (i = 0; i != comm->parse_endpos_index; ++i)
+ debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
+#endif
+ debug_printf("mb_y = %u\n", comm->mb_y[idx]);
+ if (comm->status_vp[idx] == 1)
+ return;
+
+ if ((comm->pvp_stage & 0xff) != 0xff) {
+ unsigned *map;
+ assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0);
+ map = inter_bo->map;
+ for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
+ debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
+ }
+ munmap(inter_bo->map, inter_bo->size);
+ inter_bo->map = NULL;
+ }
+ assert((comm->pvp_stage & 0xff) == 0xff);
+}
+#endif
+
+static void
+nvc0_decoder_kick_ref(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target)
+{
+ dec->refs[target->valid_ref].vidbuf = NULL;
+ dec->refs[target->valid_ref].last_used = 0;
+// debug_printf("Unreffed %p\n", target);
+}
+
+void
+nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq,
+ unsigned caps, unsigned is_ref,
+ struct nouveau_vp3_video_buffer *refs[16])
+{
+ struct nouveau_pushbuf *push = dec->pushbuf[1];
+ uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr;
+ uint32_t slice_size, bucket_size, ring_size, i;
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
+ u32 fence_extra = 0, codec_extra = 0;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
+ codec_extra += 2;
+ } else
+ nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
+
+ if (dec->base.max_references > 2)
+ codec_extra += 1 + (dec->base.max_references - 2);
+
+ pic_addr[16] = nouveau_vp3_video_addr(dec, target) >> 8;
+ last_addr = null_addr = nouveau_vp3_video_addr(dec, NULL) >> 8;
+
+ for (i = 0; i < dec->base.max_references; ++i) {
+ if (!refs[i])
+ pic_addr[i] = last_addr;
+ else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i])
+ last_addr = pic_addr[i] = nouveau_vp3_video_addr(dec, refs[i]) >> 8;
+ else
+ pic_addr[i] = null_addr;
+ }
+ if (!is_ref)
+ nvc0_decoder_kick_ref(dec, target);
+
+ nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
+ 6 + codec_extra + fence_extra + 2, num_refs, 0);
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ bsp_addr = bsp_bo->offset >> 8;
+#if NOUVEAU_VP3_DEBUG_FENCE
+ comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8;
+#else
+ comm_addr = bsp_addr + (COMM_OFFSET>>8);
+#endif
+ inter_addr = inter_bo->offset >> 8;
+ if (dec->fw_bo)
+ ucode_addr = dec->fw_bo->offset >> 8;
+ else
+ ucode_addr = 0;
+
+ BEGIN_NVC0(push, SUBC_VP(0x700), 7);
+ PUSH_DATA (push, caps); // 700
+ PUSH_DATA (push, comm_seq); // 704
+ PUSH_DATA (push, 0); // 708 fuc targets, ignored for nvc0
+ PUSH_DATA (push, dec->fw_sizes); // 70c
+ PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr
+ PUSH_DATA (push, inter_addr); // 714 inter_parm
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs
+
+ if (bucket_size) {
+ uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2);
+
+ BEGIN_NVC0(push, SUBC_VP(0x71c), 2);
+ PUSH_DATA (push, tmpimg_addr >> 8); // 71c
+ PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs
+ }
+
+ BEGIN_NVC0(push, SUBC_VP(0x724), 5);
+ PUSH_DATA (push, comm_addr); // 724
+ PUSH_DATA (push, ucode_addr); // 728
+ PUSH_DATA (push, pic_addr[16]); // 734
+ PUSH_DATA (push, pic_addr[0]); // 72c
+ PUSH_DATA (push, pic_addr[1]); // 730
+
+ if (dec->base.max_references > 2) {
+ int i;
+
+ BEGIN_NVC0(push, SUBC_VP(0x400), dec->base.max_references - 2);
+ for (i = 2; i < dec->base.max_references; ++i) {
+ assert(0x400 + (i - 2) * 4 < 0x438);
+ PUSH_DATA (push, pic_addr[i]);
+ }
+ }
+
+ if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ BEGIN_NVC0(push, SUBC_VP(0x438), 1);
+ PUSH_DATA (push, desc.h264->slice_count);
+ }
+
+ //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]);
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NVC0(push, SUBC_VP(0x240), 3);
+ PUSH_DATAh(push, (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push, (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NVC0(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK(push);
+
+ {
+ unsigned spin = 0;
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff) {
+ debug_printf("v%u: %u\n", dec->fence_seq, dec->fence_map[4]);
+ dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
+ }
+ } while (dec->fence_seq > dec->fence_map[4]);
+ }
+ dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
+#else
+ BEGIN_NVC0(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+#endif
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
new file mode 100644
index 0000000..3514d9d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
@@ -0,0 +1,144 @@
+
+#ifndef __NVC0_WINSYS_H__
+#define __NVC0_WINSYS_H__
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include "pipe/p_defines.h"
+
+#include "nouveau_winsys.h"
+#include "nouveau_buffer.h"
+
+#ifndef NV04_PFIFO_MAX_PACKET_LEN
+#define NV04_PFIFO_MAX_PACKET_LEN 2047
+#endif
+
+
+static INLINE void
+nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin,
+ unsigned flags, struct nouveau_bo *bo)
+{
+ nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL;
+}
+
+static INLINE void
+nvc0_add_resident(struct nouveau_bufctx *bufctx, int bin,
+ struct nv04_resource *res, unsigned flags)
+{
+ struct nouveau_bufref *ref =
+ nouveau_bufctx_refn(bufctx, bin, res->bo, flags | res->domain);
+ ref->priv = res;
+ ref->priv_data = flags;
+}
+
+#define BCTX_REFN_bo(ctx, bin, fl, bo) \
+ nv50_add_bufctx_resident_bo(ctx, NVC0_BIND_##bin, fl, bo);
+
+#define BCTX_REFN(bctx, bin, res, acc) \
+ nvc0_add_resident(bctx, NVC0_BIND_##bin, res, NOUVEAU_BO_##acc)
+
+static INLINE void
+PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
+{
+ struct nouveau_pushbuf_refn ref = { bo, flags };
+ nouveau_pushbuf_refn(push, &ref, 1);
+}
+
+
+#define SUBC_3D(m) 0, (m)
+#define NVC0_3D(n) SUBC_3D(NVC0_3D_##n)
+#define NVE4_3D(n) SUBC_3D(NVE4_3D_##n)
+
+#define SUBC_COMPUTE(m) 1, (m)
+#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n)
+#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n)
+
+#define SUBC_M2MF(m) 2, (m)
+#define SUBC_P2MF(m) 2, (m)
+#define NVC0_M2MF(n) SUBC_M2MF(NVC0_M2MF_##n)
+#define NVE4_P2MF(n) SUBC_P2MF(NVE4_P2MF_##n)
+
+#define SUBC_2D(m) 3, (m)
+#define NVC0_2D(n) SUBC_2D(NVC0_2D_##n)
+
+#define SUBC_COPY(m) 4, (m)
+#define NVE4_COPY(m) SUBC_COPY(NVE4_COPY_##n)
+
+#define SUBC_SW(m) 7, (m)
+
+static INLINE uint32_t
+NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size)
+{
+ return 0x20000000 | (size << 16) | (subc << 13) | (mthd >> 2);
+}
+
+static INLINE uint32_t
+NVC0_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
+{
+ return 0x60000000 | (size << 16) | (subc << 13) | (mthd >> 2);
+}
+
+static INLINE uint32_t
+NVC0_FIFO_PKHDR_IL(int subc, int mthd, uint8_t data)
+{
+ return 0x80000000 | (data << 16) | (subc << 13) | (mthd >> 2);
+}
+
+static INLINE uint32_t
+NVC0_FIFO_PKHDR_1I(int subc, int mthd, unsigned size)
+{
+ return 0xa0000000 | (size << 16) | (subc << 13) | (mthd >> 2);
+}
+
+
+static INLINE uint8_t
+nouveau_bo_memtype(const struct nouveau_bo *bo)
+{
+ return bo->config.nvc0.memtype;
+}
+
+
+static INLINE void
+PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data)
+{
+ *push->cur++ = (uint32_t)(data >> 32);
+}
+
+static INLINE void
+BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
+{
+#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, size + 1);
+#endif
+ PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(subc, mthd, size));
+}
+
+static INLINE void
+BEGIN_NIC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
+{
+#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, size + 1);
+#endif
+ PUSH_DATA (push, NVC0_FIFO_PKHDR_NI(subc, mthd, size));
+}
+
+static INLINE void
+BEGIN_1IC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
+{
+#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, size + 1);
+#endif
+ PUSH_DATA (push, NVC0_FIFO_PKHDR_1I(subc, mthd, size));
+}
+
+static INLINE void
+IMMED_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, uint8_t data)
+{
+#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, 1);
+#endif
+ PUSH_DATA (push, NVC0_FIFO_PKHDR_IL(subc, mthd, data));
+}
+
+#endif /* __NVC0_WINSYS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
new file mode 100644
index 0000000..06c914f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -0,0 +1,652 @@
+/*
+ * Copyright 2012 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_compute.h"
+#include "nvc0/nve4_compute.h"
+
+#include "codegen/nv50_ir_driver.h"
+
+#ifdef DEBUG
+static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *);
+#endif
+
+
+int
+nve4_screen_compute_setup(struct nvc0_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_device *dev = screen->base.device;
+ struct nouveau_object *chan = screen->base.channel;
+ unsigned i;
+ int ret;
+ uint32_t obj_class;
+
+ switch (dev->chipset & 0xf0) {
+ case 0xf0:
+ obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
+ break;
+ case 0xe0:
+ obj_class = NVE4_COMPUTE_CLASS; /* GK104 */
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ return -1;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
+ return ret;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL,
+ &screen->parm);
+ if (ret)
+ return ret;
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->oclass);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->offset);
+ /* No idea why there are 2. Divide size by 2 to be safe.
+ * Actually this might be per-MP TEMP size and looks like I'm only using
+ * 2 MPs instead of all 8.
+ */
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3);
+ PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+ PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
+ PUSH_DATA (push, 0xff);
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3);
+ PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+ PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
+ PUSH_DATA (push, 0xff);
+
+ /* Unified address space ? Who needs that ? Certainly not OpenCL.
+ *
+ * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
+ * accessible. We cannot prevent that at the moment, so expect failure.
+ */
+ BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1);
+ PUSH_DATA (push, 1 << 24);
+ BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1);
+ PUSH_DATA (push, 2 << 24);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1);
+ PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
+
+ /* NOTE: these do not affect the state used by the 3D object */
+ BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
+ BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
+
+ if (obj_class >= NVF0_COMPUTE_CLASS) {
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1);
+ PUSH_DATA (push, 0x100);
+ BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63);
+ for (i = 63; i >= 1; --i)
+ PUSH_DATA(push, 0x38000 | i);
+ IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
+ IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0);
+ }
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1);
+ PUSH_DATA (push, 0); /* does not interefere with 3D */
+
+ if (obj_class >= NVF0_COMPUTE_CLASS)
+ IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+
+ /* MS sample coordinate offsets: these do not work with _ALT modes ! */
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 64);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATA (push, 0); /* 0 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1); /* 1 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0); /* 2 */
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 1); /* 3 */
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 2); /* 4 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 3); /* 5 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 2); /* 6 */
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 3); /* 7 */
+ PUSH_DATA (push, 1);
+
+#ifdef DEBUG
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 28);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 8);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
+ PUSH_DATA (push, screen->tls->offset);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->size / 2); /* MP TEMP block size */
+ PUSH_DATA (push, screen->tls->size / 2 / 64); /* warp TEMP block size */
+ PUSH_DATA (push, 0); /* warp cfstack size */
+#endif
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+
+ return 0;
+}
+
+
+static void
+nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv50_surface *sf;
+ struct nv04_resource *res;
+ uint32_t mask;
+ unsigned i;
+ const unsigned t = 1;
+
+ mask = nvc0->surfaces_dirty[t];
+ while (mask) {
+ i = ffs(mask) - 1;
+ mask &= ~(1 << i);
+
+ /*
+ * NVE4's surface load/store instructions receive all the information
+ * directly instead of via binding points, so we have to supply them.
+ */
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 64);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+
+ nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
+
+ sf = nv50_surface(nvc0->surfaces[t][i]);
+ if (sf) {
+ res = nv04_resource(sf->base.texture);
+
+ if (sf->base.writable)
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
+ else
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
+ }
+ }
+ if (nvc0->surfaces_dirty[t]) {
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+ }
+
+ /* re-reference non-dirty surfaces */
+ mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t];
+ while (mask) {
+ i = ffs(mask) - 1;
+ mask &= ~(1 << i);
+
+ sf = nv50_surface(nvc0->surfaces[t][i]);
+ res = nv04_resource(sf->base.texture);
+
+ if (sf->base.writable)
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
+ else
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
+ }
+
+ nvc0->surfaces_dirty[t] = 0;
+}
+
+
+/* Thankfully, textures with samplers follow the normal rules. */
+static void
+nve4_compute_validate_samplers(struct nvc0_context *nvc0)
+{
+ boolean need_flush = nve4_validate_tsc(nvc0, 5);
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+/* (Code duplicated at bottom for various non-convincing reasons.
+ * E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
+ * entries to avoid a subchannel switch.
+ * Same for texture cache flushes.
+ * Also, the bufctx differs, and more IFs in the 3D version looks ugly.)
+ */
+static void nve4_compute_validate_textures(struct nvc0_context *);
+
+static void
+nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ uint64_t address;
+ const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
+ unsigned i, n;
+ uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
+
+ if (!dirty)
+ return;
+ i = ffs(dirty) - 1;
+ n = util_logbase2(dirty) + 1 - i;
+ assert(n);
+
+ address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, n * 4);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATAp(push, &nvc0->tex_handles[s][i], n);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+
+ nvc0->textures_dirty[s] = 0;
+ nvc0->samplers_dirty[s] = 0;
+}
+
+
+static boolean
+nve4_compute_state_validate(struct nvc0_context *nvc0)
+{
+ if (!nvc0_compute_validate_program(nvc0))
+ return FALSE;
+ if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
+ nve4_compute_validate_textures(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
+ nve4_compute_validate_samplers(nvc0);
+ if (nvc0->dirty_cp & (NVC0_NEW_CP_TEXTURES | NVC0_NEW_CP_SAMPLERS))
+ nve4_compute_set_tex_handles(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_SURFACES)
+ nve4_compute_validate_surfaces(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS)
+ nvc0_validate_global_residents(nvc0,
+ nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
+
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
+ return FALSE;
+ if (unlikely(nvc0->state.flushed))
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+
+ return TRUE;
+}
+
+
+static void
+nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
+ const uint *block_layout,
+ const uint *grid_layout)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *cp = nvc0->compprog;
+
+ if (cp->parm_size) {
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, cp->parm_size);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATAp(push, input, cp->parm_size / 4);
+ }
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 7 * 4);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATAp(push, block_layout, 3);
+ PUSH_DATAp(push, grid_layout, 3);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+}
+
+static INLINE uint8_t
+nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
+{
+ if (shared_size > (32 << 10))
+ return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1;
+ if (shared_size > (16 << 10))
+ return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1;
+ return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1;
+}
+
+static void
+nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
+ struct nve4_cp_launch_desc *desc,
+ uint32_t label,
+ const uint *block_layout,
+ const uint *grid_layout)
+{
+ const struct nvc0_screen *screen = nvc0->screen;
+ const struct nvc0_program *cp = nvc0->compprog;
+ unsigned i;
+
+ nve4_cp_launch_desc_init_default(desc);
+
+ desc->entry = nvc0_program_symbol_offset(cp, label);
+
+ desc->griddim_x = grid_layout[0];
+ desc->griddim_y = grid_layout[1];
+ desc->griddim_z = grid_layout[2];
+ desc->blockdim_x = block_layout[0];
+ desc->blockdim_y = block_layout[1];
+ desc->blockdim_z = block_layout[2];
+
+ desc->shared_size = align(cp->cp.smem_size, 0x100);
+ desc->local_size_p = align(cp->cp.lmem_size, 0x10);
+ desc->local_size_n = 0;
+ desc->cstack_size = 0x800;
+ desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);
+
+ desc->gpr_alloc = cp->num_gprs;
+ desc->bar_alloc = cp->num_barriers;
+
+ for (i = 0; i < 7; ++i) {
+ const unsigned s = 5;
+ if (nvc0->constbuf[s][i].u.buf)
+ nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
+ }
+ nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
+}
+
+static INLINE struct nve4_cp_launch_desc *
+nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
+ struct nouveau_bo **pbo, uint64_t *pgpuaddr)
+{
+ uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo);
+ if (!ptr)
+ return NULL;
+ if (*pgpuaddr & 255) {
+ unsigned adj = 256 - (*pgpuaddr & 255);
+ ptr += adj;
+ *pgpuaddr += adj;
+ }
+ return (struct nve4_cp_launch_desc *)ptr;
+}
+
+void
+nve4_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label,
+ const void *input)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nve4_cp_launch_desc *desc;
+ uint64_t desc_gpuaddr;
+ struct nouveau_bo *desc_bo;
+ int ret;
+
+ desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
+ if (!desc) {
+ ret = -1;
+ goto out;
+ }
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ desc_bo);
+
+ ret = !nve4_compute_state_validate(nvc0);
+ if (ret)
+ goto out;
+
+ nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout);
+#ifdef DEBUG
+ if (debug_get_num_option("NV50_PROG_DEBUG", 0))
+ nve4_compute_dump_launch_desc(desc);
+#endif
+
+ nve4_compute_upload_input(nvc0, input, block_layout, grid_layout);
+
+ /* upload descriptor and flush */
+#if 0
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, desc_gpuaddr);
+ PUSH_DATA (push, desc_gpuaddr);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 256);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4));
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
+ PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE);
+#endif
+ BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1);
+ PUSH_DATA (push, desc_gpuaddr >> 8);
+ BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0x3);
+ BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+
+out:
+ if (ret)
+ NOUVEAU_ERR("Failed to launch grid !\n");
+ nouveau_scratch_done(&nvc0->base);
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
+}
+
+
+#define NVE4_TIC_ENTRY_INVALID 0x000fffff
+
+static void
+nve4_compute_validate_textures(struct nvc0_context *nvc0)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const unsigned s = 5;
+ unsigned i;
+ uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
+ unsigned n[2] = { 0, 0 };
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct nv04_resource *res;
+ const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+
+ if (!tic) {
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+ continue;
+ }
+ res = nv04_resource(tic->pipe.texture);
+
+ if (tic->id < 0) {
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + (tic->id * 32));
+ PUSH_DATA (push, txc->offset + (tic->id * 32));
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ commands[0][n[0]++] = (tic->id << 4) | 1;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ commands[1][n[1]++] = (tic->id << 4) | 1;
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tic->id;
+ if (dirty)
+ BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i)
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+
+ if (n[0]) {
+ BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]);
+ PUSH_DATAp(push, commands[0], n[0]);
+ }
+ if (n[1]) {
+ BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]);
+ PUSH_DATAp(push, commands[1], n[1]);
+ }
+
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+}
+
+
+#ifdef DEBUG
+static const char *nve4_cache_split_name(unsigned value)
+{
+ switch (value) {
+ case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1";
+ case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1";
+ case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1";
+ default:
+ return "(invalid)";
+ }
+}
+
+static void
+nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
+{
+ const uint32_t *data = (const uint32_t *)desc;
+ unsigned i;
+ boolean zero = FALSE;
+
+ debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
+
+ for (i = 0; i < sizeof(*desc); i += 4) {
+ if (data[i / 4]) {
+ debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
+ zero = FALSE;
+ } else
+ if (!zero) {
+ debug_printf("...\n");
+ zero = TRUE;
+ }
+ }
+
+ debug_printf("entry = 0x%x\n", desc->entry);
+ debug_printf("grid dimensions = %ux%ux%u\n",
+ desc->griddim_x, desc->griddim_y, desc->griddim_z);
+ debug_printf("block dimensions = %ux%ux%u\n",
+ desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
+ debug_printf("s[] size: 0x%x\n", desc->shared_size);
+ debug_printf("l[] size: -0x%x / +0x%x\n",
+ desc->local_size_n, desc->local_size_p);
+ debug_printf("stack size: 0x%x\n", desc->cstack_size);
+ debug_printf("barrier count: %u\n", desc->bar_alloc);
+ debug_printf("$r count: %u\n", desc->gpr_alloc);
+ debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
+
+ for (i = 0; i < 8; ++i) {
+ uint64_t address;
+ uint32_t size = desc->cb[i].size;
+ boolean valid = !!(desc->cb_mask & (1 << i));
+
+ address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
+
+ if (!valid && !address && !size)
+ continue;
+ debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
+ i, address, size, valid ? "" : " (invalid)");
+ }
+}
+#endif
+
+#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
+static void
+nve4_compute_trap_info(struct nvc0_context *nvc0)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_bo *bo = screen->parm;
+ int ret, i;
+ volatile struct nve4_mp_trap_info *info;
+ uint8_t *map;
+
+ ret = nouveau_bo_map(bo, NOUVEAU_BO_RDWR, nvc0->base.client);
+ if (ret)
+ return;
+ map = (uint8_t *)bo->map;
+ info = (volatile struct nve4_mp_trap_info *)(map + NVE4_CP_PARAM_TRAP_INFO);
+
+ if (info->lock) {
+ debug_printf("trapstat = %08x\n", info->trapstat);
+ debug_printf("warperr = %08x\n", info->warperr);
+ debug_printf("PC = %x\n", info->pc);
+ debug_printf("tid = %u %u %u\n",
+ info->tid[0], info->tid[1], info->tid[2]);
+ debug_printf("ctaid = %u %u %u\n",
+ info->ctaid[0], info->ctaid[1], info->ctaid[2]);
+ for (i = 0; i <= 63; ++i)
+ debug_printf("$r%i = %08x\n", i, info->r[i]);
+ for (i = 0; i <= 6; ++i)
+ debug_printf("$p%i = %i\n", i, (info->flags >> i) & 1);
+ debug_printf("$c = %x\n", info->flags >> 12);
+ }
+ info->lock = 0;
+}
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
new file mode 100644
index 0000000..79862b7
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
@@ -0,0 +1,131 @@
+
+#ifndef NVE4_COMPUTE_H
+#define NVE4_COMPUTE_H
+
+#include "nv50/nv50_defs.xml.h"
+#include "nvc0/nve4_compute.xml.h"
+
+/* Input space is implemented as c0[], to which we bind the screen->parm bo.
+ */
+#define NVE4_CP_INPUT_USER 0x0000
+#define NVE4_CP_INPUT_USER_LIMIT 0x1000
+#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4)
+#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4)
+#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4)
+#define NVE4_CP_INPUT_GRIDID 0x1018
+#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4)
+#define NVE4_CP_INPUT_TEX_STRIDE 4
+#define NVE4_CP_INPUT_TEX_MAX 32
+#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
+#define NVE4_CP_INPUT_SUF_STRIDE 64
+#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
+#define NVE4_CP_INPUT_SUF_MAX 32
+#define NVE4_CP_INPUT_TRAP_INFO_PTR 0x1900
+#define NVE4_CP_INPUT_TEMP_PTR 0x1908
+#define NVE4_CP_INPUT_MP_TEMP_SIZE 0x1910
+#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914
+#define NVE4_CP_INPUT_CSTACK_SIZE 0x1918
+#define NVE4_CP_INPUT_SIZE 0x1a00
+#define NVE4_CP_PARAM_TRAP_INFO 0x2000
+#define NVE4_CP_PARAM_TRAP_INFO_SZ (1 << 16)
+#define NVE4_CP_PARAM_SIZE (NVE4_CP_PARAM_TRAP_INFO + (1 << 16))
+
+struct nve4_cp_launch_desc
+{
+ u32 unk0[8];
+ u32 entry;
+ u32 unk9[3];
+ u32 griddim_x : 31;
+ u32 unk12 : 1;
+ u16 griddim_y;
+ u16 griddim_z;
+ u32 unk14[3];
+ u16 shared_size; /* must be aligned to 0x100 */
+ u16 unk15;
+ u16 unk16;
+ u16 blockdim_x;
+ u16 blockdim_y;
+ u16 blockdim_z;
+ u32 cb_mask : 8;
+ u32 unk20_8 : 21;
+ u32 cache_split : 2;
+ u32 unk20_31 : 1;
+ u32 unk21[8];
+ struct {
+ u32 address_l;
+ u32 address_h : 8;
+ u32 reserved : 7;
+ u32 size : 17;
+ } cb[8];
+ u32 local_size_p : 20;
+ u32 unk45_20 : 7;
+ u32 bar_alloc : 5;
+ u32 local_size_n : 20;
+ u32 unk46_20 : 4;
+ u32 gpr_alloc : 8;
+ u32 cstack_size : 20;
+ u32 unk47_20 : 12;
+ u32 unk48[16];
+};
+
+static INLINE void
+nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
+{
+ memset(desc, 0, sizeof(*desc));
+
+ desc->unk0[7] = 0xbc000000;
+ desc->unk9[2] = 0x44014000;
+ desc->unk47_20 = 0x300;
+}
+
+static INLINE void
+nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
+ unsigned index,
+ struct nouveau_bo *bo,
+ uint32_t base, uint16_t size)
+{
+ uint64_t address = bo->offset + base;
+
+ assert(index < 8);
+ assert(!(base & 0xff));
+ assert(size <= 65536);
+
+ desc->cb[index].address_l = address;
+ desc->cb[index].address_h = address >> 32;
+ desc->cb[index].size = size;
+
+ desc->cb_mask |= 1 << index;
+}
+
+static INLINE void
+nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc,
+ unsigned index,
+ const struct nvc0_constbuf *cb)
+{
+ assert(index < 8);
+
+ if (!cb->u.buf) {
+ desc->cb_mask &= ~(1 << index);
+ } else {
+ const struct nv04_resource *buf = nv04_resource(cb->u.buf);
+ assert(!cb->user);
+ nve4_cp_launch_desc_set_cb(desc, index,
+ buf->bo, buf->offset + cb->offset, cb->size);
+ }
+}
+
+struct nve4_mp_trap_info {
+ u32 lock;
+ u32 pc;
+ u32 trapstat;
+ u32 warperr;
+ u32 tid[3];
+ u32 ctaid[3];
+ u32 pad028[2];
+ u32 r[64];
+ u32 flags;
+ u32 pad134[3];
+ u32 s[0x3000];
+};
+
+#endif /* NVE4_COMPUTE_H */
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
new file mode 100644
index 0000000..e971fc1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
@@ -0,0 +1,429 @@
+#ifndef NVE4_COMPUTE_XML
+#define NVE4_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nve4_compute.xml ( 10168 bytes, from 2013-06-04 13:57:02)
+- copyright.xml ( 6452 bytes, from 2012-04-16 22:51:01)
+- nvchipsets.xml ( 3954 bytes, from 2013-06-04 13:57:02)
+- nv_object.xml ( 14395 bytes, from 2013-06-04 13:57:02)
+- nv_defs.xml ( 4437 bytes, from 2012-04-16 22:51:01)
+- nv50_defs.xml ( 16877 bytes, from 2013-07-17 09:10:01)
+- nve4_p2mf.xml ( 2373 bytes, from 2013-06-04 13:57:02)
+
+Copyright (C) 2006-2013 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+
+#define NVE4_COMPUTE_UNK0144 0x00000144
+
+#define NVE4_COMPUTE_UPLOAD 0x00000000
+
+#define NVE4_COMPUTE_UPLOAD_LINE_LENGTH_IN 0x00000180
+
+#define NVE4_COMPUTE_UPLOAD_LINE_COUNT 0x00000184
+
+#define NVE4_COMPUTE_UPLOAD_DST_ADDRESS_HIGH 0x00000188
+
+#define NVE4_COMPUTE_UPLOAD_DST_ADDRESS_LOW 0x0000018c
+
+#define NVE4_COMPUTE_UPLOAD_DST_PITCH 0x00000190
+
+#define NVE4_COMPUTE_UPLOAD_DST_TILE_MODE 0x00000194
+
+#define NVE4_COMPUTE_UPLOAD_DST_WIDTH 0x00000198
+
+#define NVE4_COMPUTE_UPLOAD_DST_HEIGHT 0x0000019c
+
+#define NVE4_COMPUTE_UPLOAD_DST_DEPTH 0x000001a0
+
+#define NVE4_COMPUTE_UPLOAD_DST_Z 0x000001a4
+
+#define NVE4_COMPUTE_UPLOAD_DST_X 0x000001a8
+
+#define NVE4_COMPUTE_UPLOAD_DST_Y 0x000001ac
+
+#define NVE4_COMPUTE_UPLOAD_EXEC 0x000001b0
+#define NVE4_COMPUTE_UPLOAD_EXEC_LINEAR 0x00000001
+#define NVE4_COMPUTE_UPLOAD_EXEC_UNK1__MASK 0x0000007e
+#define NVE4_COMPUTE_UPLOAD_EXEC_UNK1__SHIFT 1
+#define NVE4_COMPUTE_UPLOAD_EXEC_BUF_NOTIFY 0x00000300
+#define NVE4_COMPUTE_UPLOAD_EXEC_UNK12__MASK 0x0000f000
+#define NVE4_COMPUTE_UPLOAD_EXEC_UNK12__SHIFT 12
+
+#define NVE4_COMPUTE_UPLOAD_DATA 0x000001b4
+
+#define NVE4_COMPUTE_UPLOAD_QUERY_ADDRESS_HIGH 0x000001dc
+
+#define NVE4_COMPUTE_UPLOAD_QUERY_ADDRESS_LOW 0x000001e0
+
+#define NVE4_COMPUTE_UPLOAD_QUERY_SEQUENCE 0x000001e4
+
+#define NVE4_COMPUTE_UPLOAD_UNK01F0 0x000001f0
+
+#define NVE4_COMPUTE_UPLOAD_UNK01F4 0x000001f4
+
+#define NVE4_COMPUTE_UPLOAD_UNK01F8 0x000001f8
+
+#define NVE4_COMPUTE_UPLOAD_UNK01FC 0x000001fc
+
+#define NVE4_COMPUTE_SHARED_BASE 0x00000214
+
+#define NVE4_COMPUTE_MEM_BARRIER 0x0000021c
+#define NVE4_COMPUTE_MEM_BARRIER_UNK0__MASK 0x00000007
+#define NVE4_COMPUTE_MEM_BARRIER_UNK0__SHIFT 0
+#define NVE4_COMPUTE_MEM_BARRIER_UNK4 0x00000010
+#define NVE4_COMPUTE_MEM_BARRIER_UNK12 0x00001000
+
+#define NVE4_COMPUTE_UNK0240 0x00000240
+
+#define NVE4_COMPUTE_UNK244_TIC_FLUSH 0x00000244
+
+#define NVE4_COMPUTE_UNK0248 0x00000248
+#define NVE4_COMPUTE_UNK0248_UNK0__MASK 0x0000003f
+#define NVE4_COMPUTE_UNK0248_UNK0__SHIFT 0
+#define NVE4_COMPUTE_UNK0248_UNK8__MASK 0x00ffff00
+#define NVE4_COMPUTE_UNK0248_UNK8__SHIFT 8
+
+#define NVE4_COMPUTE_UNK0274 0x00000274
+
+#define NVE4_COMPUTE_UNK0278 0x00000278
+
+#define NVE4_COMPUTE_UNK027C 0x0000027c
+
+#define NVE4_COMPUTE_UNK0280 0x00000280
+
+#define NVE4_COMPUTE_UNK0284 0x00000284
+
+#define NVE4_COMPUTE_UNK0288 0x00000288
+
+#define NVE4_COMPUTE_UNK0290 0x00000290
+
+#define NVE4_COMPUTE_UNK02B0 0x000002b0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS 0x000002b4
+#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS__SHR 8
+
+#define NVE4_COMPUTE_UNK02B8 0x000002b8
+
+#define NVE4_COMPUTE_LAUNCH 0x000002bc
+
+#define NVE4_COMPUTE_MP_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0))
+#define NVE4_COMPUTE_MP_TEMP_SIZE__ESIZE 0x0000000c
+#define NVE4_COMPUTE_MP_TEMP_SIZE__LEN 0x00000002
+
+#define NVE4_COMPUTE_MP_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0))
+
+#define NVE4_COMPUTE_MP_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0))
+
+#define NVE4_COMPUTE_MP_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0))
+
+#define NVE4_COMPUTE_UNK0310 0x00000310
+
+#define NVE4_COMPUTE_FIRMWARE(i0) (0x00000500 + 0x4*(i0))
+#define NVE4_COMPUTE_FIRMWARE__ESIZE 0x00000004
+#define NVE4_COMPUTE_FIRMWARE__LEN 0x00000020
+
+#define NVE4_COMPUTE_LOCAL_BASE 0x0000077c
+
+#define NVE4_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790
+
+#define NVE4_COMPUTE_TEMP_ADDRESS_LOW 0x00000794
+
+#define NVE4_COMPUTE_UNK0D94 0x00000d94
+
+#define NVE4_COMPUTE_WATCHDOG_TIMER 0x00000de4
+
+#define NVE4_COMPUTE_UNK0F44(i0) (0x00000f44 + 0x4*(i0))
+#define NVE4_COMPUTE_UNK0F44__ESIZE 0x00000004
+#define NVE4_COMPUTE_UNK0F44__LEN 0x00000004
+
+#define NVE4_COMPUTE_UNK1040(i0) (0x00001040 + 0x4*(i0))
+#define NVE4_COMPUTE_UNK1040__ESIZE 0x00000004
+#define NVE4_COMPUTE_UNK1040__LEN 0x0000000c
+
+#define NVE4_COMPUTE_UNK1288_TIC_FLUSH 0x00001288
+
+#define NVE4_COMPUTE_TSC_FLUSH 0x00001330
+#define NVE4_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NVE4_COMPUTE_TIC_FLUSH 0x00001334
+#define NVE4_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NVE4_COMPUTE_TEX_CACHE_CTL 0x00001338
+#define NVE4_COMPUTE_TEX_CACHE_CTL_UNK0 0x00000001
+#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0
+#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4
+
+#define NVE4_COMPUTE_UNK1424_TSC_FLUSH 0x00001424
+
+#define NVE4_COMPUTE_COND_ADDRESS_HIGH 0x00001550
+
+#define NVE4_COMPUTE_COND_ADDRESS_LOW 0x00001554
+
+#define NVE4_COMPUTE_COND_MODE 0x00001558
+#define NVE4_COMPUTE_COND_MODE_NEVER 0x00000000
+#define NVE4_COMPUTE_COND_MODE_ALWAYS 0x00000001
+#define NVE4_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVE4_COMPUTE_COND_MODE_EQUAL 0x00000003
+#define NVE4_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVE4_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NVE4_COMPUTE_TSC_ADDRESS_LOW 0x00001560
+
+#define NVE4_COMPUTE_TSC_LIMIT 0x00001564
+
+#define NVE4_COMPUTE_TIC_ADDRESS_HIGH 0x00001574
+
+#define NVE4_COMPUTE_TIC_ADDRESS_LOW 0x00001578
+
+#define NVE4_COMPUTE_TIC_LIMIT 0x0000157c
+
+#define NVE4_COMPUTE_CODE_ADDRESS_HIGH 0x00001608
+
+#define NVE4_COMPUTE_CODE_ADDRESS_LOW 0x0000160c
+
+#define NVE4_COMPUTE_UNK1690 0x00001690
+
+#define NVE4_COMPUTE_FLUSH 0x00001698
+#define NVE4_COMPUTE_FLUSH_CODE 0x00000001
+#define NVE4_COMPUTE_FLUSH_GLOBAL 0x00000010
+#define NVE4_COMPUTE_FLUSH_CB 0x00001000
+
+#define NVE4_COMPUTE_UNK1944 0x00001944
+
+#define NVE4_COMPUTE_DELAY 0x00001a24
+
+#define NVE4_COMPUTE_UNK1A2C(i0) (0x00001a2c + 0x4*(i0))
+#define NVE4_COMPUTE_UNK1A2C__ESIZE 0x00000004
+#define NVE4_COMPUTE_UNK1A2C__LEN 0x00000005
+
+#define NVE4_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NVE4_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NVE4_COMPUTE_QUERY_SEQUENCE 0x00001b08
+
+#define NVE4_COMPUTE_QUERY_GET 0x00001b0c
+#define NVE4_COMPUTE_QUERY_GET_MODE__MASK 0x00000003
+#define NVE4_COMPUTE_QUERY_GET_MODE__SHIFT 0
+#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000
+#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003
+#define NVE4_COMPUTE_QUERY_GET_INTR 0x00100000
+#define NVE4_COMPUTE_QUERY_GET_SHORT 0x10000000
+
+#define NVE4_COMPUTE_TEX_CB_INDEX 0x00002608
+
+#define NVE4_COMPUTE_UNK260C 0x0000260c
+
+#define NVE4_COMPUTE_MP_PM_SET(i0) (0x0000335c + 0x4*(i0))
+#define NVE4_COMPUTE_MP_PM_SET__ESIZE 0x00000004
+#define NVE4_COMPUTE_MP_PM_SET__LEN 0x00000008
+
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL(i0) (0x0000337c + 0x4*(i0))
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL__ESIZE 0x00000004
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL__LEN 0x00000004
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_NONE 0x00000000
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_USER 0x00000001
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_LAUNCH 0x00000003
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_EXEC 0x00000004
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_ISSUE 0x00000005
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_LDST 0x0000001b
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_BRANCH 0x0000001c
+
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL(i0) (0x0000338c + 0x4*(i0))
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL__ESIZE 0x00000004
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL__LEN 0x00000004
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_NONE 0x00000000
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_WARP 0x00000002
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_REPLAY 0x00000008
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_TRANSACTION 0x0000000e
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_L1 0x00000010
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_MEM 0x00000011
+
+#define NVE4_COMPUTE_MP_PM_SRCSEL(i0) (0x0000339c + 0x4*(i0))
+#define NVE4_COMPUTE_MP_PM_SRCSEL__ESIZE 0x00000004
+#define NVE4_COMPUTE_MP_PM_SRCSEL__LEN 0x00000008
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP0__MASK 0x00000003
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP0__SHIFT 0
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG0__MASK 0x0000001c
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG0__SHIFT 2
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP1__MASK 0x00000060
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP1__SHIFT 5
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG1__MASK 0x00000380
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG1__SHIFT 7
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP2__MASK 0x00000c00
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP2__SHIFT 10
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG2__MASK 0x00007000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG2__SHIFT 12
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP3__MASK 0x00018000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP3__SHIFT 15
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG3__MASK 0x000e0000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG3__SHIFT 17
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP4__MASK 0x00300000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP4__SHIFT 20
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG4__MASK 0x01c00000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG4__SHIFT 22
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP5__MASK 0x06000000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP5__SHIFT 25
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG5__MASK 0x38000000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG5__SHIFT 27
+
+#define NVE4_COMPUTE_MP_PM_FUNC(i0) (0x000033bc + 0x4*(i0))
+#define NVE4_COMPUTE_MP_PM_FUNC__ESIZE 0x00000004
+#define NVE4_COMPUTE_MP_PM_FUNC__LEN 0x00000008
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE__MASK 0x0000000f
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE__SHIFT 0
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP 0x00000000
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_PULSE 0x00000001
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_B6 0x00000002
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK3 0x00000003
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_B6 0x00000004
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_B6_PULSE 0x00000005
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK6 0x00000006
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK7 0x00000007
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK8 0x00000008
+#define NVE4_COMPUTE_MP_PM_FUNC_FUNC__MASK 0x000ffff0
+#define NVE4_COMPUTE_MP_PM_FUNC_FUNC__SHIFT 4
+
+#define NVE4_COMPUTE_MP_PM_UNK33DC 0x000033dc
+
+#define NVE4_COMPUTE_LAUNCH_DESC__SIZE 0x00000100
+#define NVE4_COMPUTE_LAUNCH_DESC_6 0x00000018
+#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__MASK 0x00000c00
+#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__SHIFT 10
+
+#define NVE4_COMPUTE_LAUNCH_DESC_PROG_START 0x00000020
+
+#define NVE4_COMPUTE_LAUNCH_DESC_12 0x00000030
+#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__MASK 0x7fffffff
+#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__SHIFT 0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ 0x00000034
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__MASK 0x0000ffff
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__MASK 0xffff0000
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__SHIFT 16
+
+#define NVE4_COMPUTE_LAUNCH_DESC_17 0x00000044
+#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__MASK 0x0000ffff
+#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__SHIFT 0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_18 0x00000048
+#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__MASK 0xffff0000
+#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__SHIFT 16
+
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ 0x0000004c
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__MASK 0x0000ffff
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__MASK 0xffff0000
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__SHIFT 16
+
+#define NVE4_COMPUTE_LAUNCH_DESC_20 0x00000050
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__MASK 0x000000ff
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__MASK 0x60000000
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__SHIFT 29
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_16K_SHARED_48K_L1 0x20000000
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_32K_SHARED_32K_L1 0x40000000
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_48K_SHARED_16K_L1 0x60000000
+
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0(i0) (0x00000074 + 0x8*(i0))
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__ESIZE 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__LEN 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__MASK 0xffffffff
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__SHIFT 0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1(i0) (0x00000078 + 0x8*(i0))
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__ESIZE 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__LEN 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__MASK 0x000000ff
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__MASK 0xffff8000
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__SHIFT 15
+
+#define NVE4_COMPUTE_LAUNCH_DESC_45 0x000000b4
+#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__MASK 0x000fffff
+#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__MASK 0xf8000000
+#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__SHIFT 27
+
+#define NVE4_COMPUTE_LAUNCH_DESC_46 0x000000b8
+#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__MASK 0x000fffff
+#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__MASK 0x3f000000
+#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__SHIFT 24
+
+#define NVE4_COMPUTE_LAUNCH_DESC_47 0x000000bc
+#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__MASK 0x000fffff
+#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__SHIFT 0
+
+
+#endif /* NVE4_COMPUTE_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h b/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
new file mode 100644
index 0000000..68a742f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
@@ -0,0 +1,107 @@
+#ifndef RNNDB_NVE4_P2MF_XML
+#define RNNDB_NVE4_P2MF_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nve4_p2mf.xml ( 1400 bytes, from 2012-04-14 21:29:11)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv_object.xml ( 12736 bytes, from 2012-04-14 21:30:24)
+- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59)
+- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12)
+
+Copyright (C) 2006-2012 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVE4_P2MF_LINE_LENGTH_IN 0x00000180
+
+#define NVE4_P2MF_LINE_COUNT 0x00000184
+
+#define NVE4_P2MF_DST_ADDRESS_HIGH 0x00000188
+
+#define NVE4_P2MF_DST_ADDRESS_LOW 0x0000018c
+
+#define NVE4_P2MF_DST_TILE_MODE 0x00000194
+
+#define NVE4_P2MF_DST_PITCH 0x00000198
+
+#define NVE4_P2MF_DST_HEIGHT 0x0000019c
+
+#define NVE4_P2MF_DST_DEPTH 0x000001a0
+
+#define NVE4_P2MF_DST_Z 0x000001a4
+
+#define NVE4_P2MF_DST_X 0x000001a8
+
+#define NVE4_P2MF_DST_Y 0x000001ac
+
+#define NVE4_P2MF_EXEC 0x000001b0
+#define NVE4_P2MF_EXEC_LINEAR 0x00000001
+#define NVE4_P2MF_EXEC_UNK12 0x00001000
+
+#define NVE4_P2MF_DATA 0x000001b4
+
+
+#endif /* RNNDB_NVE4_P2MF_XML */