summaryrefslogtreecommitdiffstats
path: root/src/gallium/drivers/nouveau/nvc0
diff options
context:
space:
mode:
authorJohannes Obermayr <johannesobermayr@gmx.de>2013-08-20 20:14:00 +0200
committerChristoph Bumiller <e0425955@student.tuwien.ac.at>2013-09-11 21:47:07 +0200
commit5eb7ff1175a644ffe3b0f1a75cb235400355f9fb (patch)
tree613342591e12a96725df715853a5e579ba1ec8ea /src/gallium/drivers/nouveau/nvc0
parentebcdaa7bbc3a10fe59447ae77b508ee85eaa582f (diff)
downloadexternal_mesa3d-5eb7ff1175a644ffe3b0f1a75cb235400355f9fb.zip
external_mesa3d-5eb7ff1175a644ffe3b0f1a75cb235400355f9fb.tar.gz
external_mesa3d-5eb7ff1175a644ffe3b0f1a75cb235400355f9fb.tar.bz2
Move nv30, nv50 and nvc0 to nouveau.
It is planned to ship openSUSE 13.1 with -shared libs. nouveau.la, nv30.la, nv50.la and nvc0.la are currently LIBADDs in all nouveau related targets. This change makes it possible to easily build one shared libnouveau.so which is then LIBADDed. Also dlopen will be faster for one library instead of three and build time on -jX will be reduced. Whitespace fixes were requested by 'git am'. Signed-off-by: Johannes Obermayr <johannesobermayr@gmx.de> Acked-by: Christoph Bumiller <christoph.bumiller@speed.at> Acked-by: Ian Romanick <ian.d.romanick@intel.com>
Diffstat (limited to 'src/gallium/drivers/nouveau/nvc0')
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h380
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h1350
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h98
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.c271
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.h10
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h410
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.c402
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_context.h357
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_draw.c88
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_formats.c25
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h236
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h138
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c358
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.c811
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_program.h68
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_push.c409
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_query.c1448
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_resource.c62
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_resource.h58
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.c967
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_screen.h325
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c278
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state.c1247
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c577
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h77
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_surface.c1265
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_tex.c814
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c558
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c891
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c649
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video.c331
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video.h48
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c155
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c143
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c202
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h144
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.c652
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.h131
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h429
-rw-r--r--src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h107
40 files changed, 16969 insertions, 0 deletions
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h
new file mode 100644
index 0000000..9a488c1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_2d.xml.h
@@ -0,0 +1,380 @@
+#ifndef NVC0_2D_XML
+#define NVC0_2D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_2d.xml ( 9454 bytes, from 2010-10-16 16:03:11)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_2D_DST_FORMAT 0x00000200
+
+#define NVC0_2D_DST_LINEAR 0x00000204
+
+#define NVC0_2D_DST_TILE_MODE 0x00000208
+
+#define NVC0_2D_DST_DEPTH 0x0000020c
+
+#define NVC0_2D_DST_LAYER 0x00000210
+
+#define NVC0_2D_DST_PITCH 0x00000214
+
+#define NVC0_2D_DST_WIDTH 0x00000218
+
+#define NVC0_2D_DST_HEIGHT 0x0000021c
+
+#define NVC0_2D_DST_ADDRESS_HIGH 0x00000220
+
+#define NVC0_2D_DST_ADDRESS_LOW 0x00000224
+
+#define NVC0_2D_UNK228 0x00000228
+
+#define NVC0_2D_SRC_FORMAT 0x00000230
+
+#define NVC0_2D_SRC_LINEAR 0x00000234
+
+#define NVC0_2D_SRC_TILE_MODE 0x00000238
+
+#define NVC0_2D_SRC_DEPTH 0x0000023c
+
+#define NVC0_2D_SRC_LAYER 0x00000240
+
+#define NVC0_2D_SRC_PITCH 0x00000244
+#define NVC0_2D_SRC_PITCH__MAX 0x00040000
+
+#define NVC0_2D_SRC_WIDTH 0x00000248
+#define NVC0_2D_SRC_WIDTH__MAX 0x00010000
+
+#define NVC0_2D_SRC_HEIGHT 0x0000024c
+#define NVC0_2D_SRC_HEIGHT__MAX 0x00010000
+
+#define NVC0_2D_SRC_ADDRESS_HIGH 0x00000250
+
+#define NVC0_2D_SRC_ADDRESS_LOW 0x00000254
+
+#define NVC0_2D_UNK258 0x00000258
+
+#define NVC0_2D_SINGLE_GPC 0x00000260
+
+#define NVC0_2D_COND_ADDRESS_HIGH 0x00000264
+
+#define NVC0_2D_COND_ADDRESS_LOW 0x00000268
+
+#define NVC0_2D_COND_MODE 0x0000026c
+#define NVC0_2D_COND_MODE_NEVER 0x00000000
+#define NVC0_2D_COND_MODE_ALWAYS 0x00000001
+#define NVC0_2D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_2D_COND_MODE_EQUAL 0x00000003
+#define NVC0_2D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_2D_CLIP_X 0x00000280
+
+#define NVC0_2D_CLIP_Y 0x00000284
+
+#define NVC0_2D_CLIP_W 0x00000288
+
+#define NVC0_2D_CLIP_H 0x0000028c
+
+#define NVC0_2D_CLIP_ENABLE 0x00000290
+
+#define NVC0_2D_COLOR_KEY_FORMAT 0x00000294
+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP 0x00000000
+#define NVC0_2D_COLOR_KEY_FORMAT_15BPP 0x00000001
+#define NVC0_2D_COLOR_KEY_FORMAT_24BPP 0x00000002
+#define NVC0_2D_COLOR_KEY_FORMAT_30BPP 0x00000003
+#define NVC0_2D_COLOR_KEY_FORMAT_8BPP 0x00000004
+#define NVC0_2D_COLOR_KEY_FORMAT_16BPP2 0x00000005
+#define NVC0_2D_COLOR_KEY_FORMAT_32BPP 0x00000006
+
+#define NVC0_2D_COLOR_KEY 0x00000298
+
+#define NVC0_2D_COLOR_KEY_ENABLE 0x0000029c
+
+#define NVC0_2D_ROP 0x000002a0
+
+#define NVC0_2D_BETA1 0x000002a4
+
+#define NVC0_2D_BETA4 0x000002a8
+
+#define NVC0_2D_OPERATION 0x000002ac
+#define NVC0_2D_OPERATION_SRCCOPY_AND 0x00000000
+#define NVC0_2D_OPERATION_ROP_AND 0x00000001
+#define NVC0_2D_OPERATION_BLEND 0x00000002
+#define NVC0_2D_OPERATION_SRCCOPY 0x00000003
+#define NVC0_2D_OPERATION_ROP 0x00000004
+#define NVC0_2D_OPERATION_SRCCOPY_PREMULT 0x00000005
+#define NVC0_2D_OPERATION_BLEND_PREMULT 0x00000006
+
+#define NVC0_2D_UNK2B0 0x000002b0
+#define NVC0_2D_UNK2B0_UNK0__MASK 0x0000003f
+#define NVC0_2D_UNK2B0_UNK0__SHIFT 0
+#define NVC0_2D_UNK2B0_UNK1__MASK 0x00003f00
+#define NVC0_2D_UNK2B0_UNK1__SHIFT 8
+
+#define NVC0_2D_PATTERN_SELECT 0x000002b4
+#define NVC0_2D_PATTERN_SELECT_MONO_8X8 0x00000000
+#define NVC0_2D_PATTERN_SELECT_MONO_64X1 0x00000001
+#define NVC0_2D_PATTERN_SELECT_MONO_1X64 0x00000002
+#define NVC0_2D_PATTERN_SELECT_COLOR 0x00000003
+
+#define NVC0_2D_PATTERN_COLOR_FORMAT 0x000002e8
+#define NVC0_2D_PATTERN_COLOR_FORMAT_16BPP 0x00000000
+#define NVC0_2D_PATTERN_COLOR_FORMAT_15BPP 0x00000001
+#define NVC0_2D_PATTERN_COLOR_FORMAT_32BPP 0x00000002
+#define NVC0_2D_PATTERN_COLOR_FORMAT_8BPP 0x00000003
+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK4 0x00000004
+#define NVC0_2D_PATTERN_COLOR_FORMAT_UNK5 0x00000005
+
+#define NVC0_2D_PATTERN_MONO_FORMAT 0x000002ec
+#define NVC0_2D_PATTERN_MONO_FORMAT_CGA6 0x00000000
+#define NVC0_2D_PATTERN_MONO_FORMAT_LE 0x00000001
+
+#define NVC0_2D_PATTERN_COLOR(i0) (0x000002f0 + 0x4*(i0))
+#define NVC0_2D_PATTERN_COLOR__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_COLOR__LEN 0x00000002
+
+#define NVC0_2D_PATTERN_BITMAP(i0) (0x000002f8 + 0x4*(i0))
+#define NVC0_2D_PATTERN_BITMAP__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_BITMAP__LEN 0x00000002
+
+#define NVC0_2D_PATTERN_X8R8G8B8(i0) (0x00000300 + 0x4*(i0))
+#define NVC0_2D_PATTERN_X8R8G8B8__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_X8R8G8B8__LEN 0x00000040
+#define NVC0_2D_PATTERN_X8R8G8B8_B__MASK 0x000000ff
+#define NVC0_2D_PATTERN_X8R8G8B8_B__SHIFT 0
+#define NVC0_2D_PATTERN_X8R8G8B8_G__MASK 0x0000ff00
+#define NVC0_2D_PATTERN_X8R8G8B8_G__SHIFT 8
+#define NVC0_2D_PATTERN_X8R8G8B8_R__MASK 0x00ff0000
+#define NVC0_2D_PATTERN_X8R8G8B8_R__SHIFT 16
+
+#define NVC0_2D_PATTERN_R5G6B5(i0) (0x00000400 + 0x4*(i0))
+#define NVC0_2D_PATTERN_R5G6B5__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_R5G6B5__LEN 0x00000020
+#define NVC0_2D_PATTERN_R5G6B5_B0__MASK 0x0000001f
+#define NVC0_2D_PATTERN_R5G6B5_B0__SHIFT 0
+#define NVC0_2D_PATTERN_R5G6B5_G0__MASK 0x000007e0
+#define NVC0_2D_PATTERN_R5G6B5_G0__SHIFT 5
+#define NVC0_2D_PATTERN_R5G6B5_R0__MASK 0x0000f800
+#define NVC0_2D_PATTERN_R5G6B5_R0__SHIFT 11
+#define NVC0_2D_PATTERN_R5G6B5_B1__MASK 0x001f0000
+#define NVC0_2D_PATTERN_R5G6B5_B1__SHIFT 16
+#define NVC0_2D_PATTERN_R5G6B5_G1__MASK 0x07e00000
+#define NVC0_2D_PATTERN_R5G6B5_G1__SHIFT 21
+#define NVC0_2D_PATTERN_R5G6B5_R1__MASK 0xf8000000
+#define NVC0_2D_PATTERN_R5G6B5_R1__SHIFT 27
+
+#define NVC0_2D_PATTERN_X1R5G5B5(i0) (0x00000480 + 0x4*(i0))
+#define NVC0_2D_PATTERN_X1R5G5B5__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_X1R5G5B5__LEN 0x00000020
+#define NVC0_2D_PATTERN_X1R5G5B5_B0__MASK 0x0000001f
+#define NVC0_2D_PATTERN_X1R5G5B5_B0__SHIFT 0
+#define NVC0_2D_PATTERN_X1R5G5B5_G0__MASK 0x000003e0
+#define NVC0_2D_PATTERN_X1R5G5B5_G0__SHIFT 5
+#define NVC0_2D_PATTERN_X1R5G5B5_R0__MASK 0x00007c00
+#define NVC0_2D_PATTERN_X1R5G5B5_R0__SHIFT 10
+#define NVC0_2D_PATTERN_X1R5G5B5_B1__MASK 0x001f0000
+#define NVC0_2D_PATTERN_X1R5G5B5_B1__SHIFT 16
+#define NVC0_2D_PATTERN_X1R5G5B5_G1__MASK 0x03e00000
+#define NVC0_2D_PATTERN_X1R5G5B5_G1__SHIFT 21
+#define NVC0_2D_PATTERN_X1R5G5B5_R1__MASK 0x7c000000
+#define NVC0_2D_PATTERN_X1R5G5B5_R1__SHIFT 26
+
+#define NVC0_2D_PATTERN_Y8(i0) (0x00000500 + 0x4*(i0))
+#define NVC0_2D_PATTERN_Y8__ESIZE 0x00000004
+#define NVC0_2D_PATTERN_Y8__LEN 0x00000010
+#define NVC0_2D_PATTERN_Y8_Y0__MASK 0x000000ff
+#define NVC0_2D_PATTERN_Y8_Y0__SHIFT 0
+#define NVC0_2D_PATTERN_Y8_Y1__MASK 0x0000ff00
+#define NVC0_2D_PATTERN_Y8_Y1__SHIFT 8
+#define NVC0_2D_PATTERN_Y8_Y2__MASK 0x00ff0000
+#define NVC0_2D_PATTERN_Y8_Y2__SHIFT 16
+#define NVC0_2D_PATTERN_Y8_Y3__MASK 0xff000000
+#define NVC0_2D_PATTERN_Y8_Y3__SHIFT 24
+
+#define NVC0_2D_DRAW_SHAPE 0x00000580
+#define NVC0_2D_DRAW_SHAPE_POINTS 0x00000000
+#define NVC0_2D_DRAW_SHAPE_LINES 0x00000001
+#define NVC0_2D_DRAW_SHAPE_LINE_STRIP 0x00000002
+#define NVC0_2D_DRAW_SHAPE_TRIANGLES 0x00000003
+#define NVC0_2D_DRAW_SHAPE_RECTANGLES 0x00000004
+
+#define NVC0_2D_DRAW_COLOR_FORMAT 0x00000584
+
+#define NVC0_2D_DRAW_COLOR 0x00000588
+
+#define NVC0_2D_UNK58C 0x0000058c
+#define NVC0_2D_UNK58C_0 0x00000001
+#define NVC0_2D_UNK58C_1 0x00000010
+#define NVC0_2D_UNK58C_2 0x00000100
+#define NVC0_2D_UNK58C_3 0x00001000
+
+#define NVC0_2D_DRAW_POINT16 0x000005e0
+#define NVC0_2D_DRAW_POINT16_X__MASK 0x0000ffff
+#define NVC0_2D_DRAW_POINT16_X__SHIFT 0
+#define NVC0_2D_DRAW_POINT16_Y__MASK 0xffff0000
+#define NVC0_2D_DRAW_POINT16_Y__SHIFT 16
+
+#define NVC0_2D_DRAW_POINT32_X(i0) (0x00000600 + 0x8*(i0))
+#define NVC0_2D_DRAW_POINT32_X__ESIZE 0x00000008
+#define NVC0_2D_DRAW_POINT32_X__LEN 0x00000040
+
+#define NVC0_2D_DRAW_POINT32_Y(i0) (0x00000604 + 0x8*(i0))
+#define NVC0_2D_DRAW_POINT32_Y__ESIZE 0x00000008
+#define NVC0_2D_DRAW_POINT32_Y__LEN 0x00000040
+
+#define NVC0_2D_SIFC_BITMAP_ENABLE 0x00000800
+
+#define NVC0_2D_SIFC_FORMAT 0x00000804
+
+#define NVC0_2D_SIFC_BITMAP_FORMAT 0x00000808
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I1 0x00000000
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I4 0x00000001
+#define NVC0_2D_SIFC_BITMAP_FORMAT_I8 0x00000002
+
+#define NVC0_2D_SIFC_BITMAP_LSB_FIRST 0x0000080c
+
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE 0x00000810
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_PACKED 0x00000000
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_BYTE 0x00000001
+#define NVC0_2D_SIFC_BITMAP_LINE_PACK_MODE_ALIGN_WORD 0x00000002
+
+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT0 0x00000814
+
+#define NVC0_2D_SIFC_BITMAP_COLOR_BIT1 0x00000818
+
+#define NVC0_2D_SIFC_BITMAP_WRITE_BIT0_ENABLE 0x0000081c
+
+#define NVC0_2D_SIFC_WIDTH 0x00000838
+
+#define NVC0_2D_SIFC_HEIGHT 0x0000083c
+
+#define NVC0_2D_SIFC_DX_DU_FRACT 0x00000840
+
+#define NVC0_2D_SIFC_DX_DU_INT 0x00000844
+
+#define NVC0_2D_SIFC_DY_DV_FRACT 0x00000848
+
+#define NVC0_2D_SIFC_DY_DV_INT 0x0000084c
+
+#define NVC0_2D_SIFC_DST_X_FRACT 0x00000850
+
+#define NVC0_2D_SIFC_DST_X_INT 0x00000854
+
+#define NVC0_2D_SIFC_DST_Y_FRACT 0x00000858
+
+#define NVC0_2D_SIFC_DST_Y_INT 0x0000085c
+
+#define NVC0_2D_SIFC_DATA 0x00000860
+
+#define NVC0_2D_UNK0870 0x00000870
+
+#define NVC0_2D_UNK0880 0x00000880
+
+#define NVC0_2D_UNK0884 0x00000884
+
+#define NVC0_2D_UNK0888 0x00000888
+
+#define NVC0_2D_BLIT_CONTROL 0x0000088c
+#define NVC0_2D_BLIT_CONTROL_ORIGIN__MASK 0x00000001
+#define NVC0_2D_BLIT_CONTROL_ORIGIN__SHIFT 0
+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER 0x00000000
+#define NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER 0x00000001
+#define NVC0_2D_BLIT_CONTROL_FILTER__MASK 0x00000010
+#define NVC0_2D_BLIT_CONTROL_FILTER__SHIFT 4
+#define NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE 0x00000000
+#define NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR 0x00000010
+
+#define NVC0_2D_BLIT_DST_X 0x000008b0
+
+#define NVC0_2D_BLIT_DST_Y 0x000008b4
+
+#define NVC0_2D_BLIT_DST_W 0x000008b8
+
+#define NVC0_2D_BLIT_DST_H 0x000008bc
+
+#define NVC0_2D_BLIT_DU_DX_FRACT 0x000008c0
+
+#define NVC0_2D_BLIT_DU_DX_INT 0x000008c4
+
+#define NVC0_2D_BLIT_DV_DY_FRACT 0x000008c8
+
+#define NVC0_2D_BLIT_DV_DY_INT 0x000008cc
+
+#define NVC0_2D_BLIT_SRC_X_FRACT 0x000008d0
+
+#define NVC0_2D_BLIT_SRC_X_INT 0x000008d4
+
+#define NVC0_2D_BLIT_SRC_Y_FRACT 0x000008d8
+
+#define NVC0_2D_BLIT_SRC_Y_INT 0x000008dc
+
+
+#endif /* NVC0_2D_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
new file mode 100644
index 0000000..d3f719d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3d.xml.h
@@ -0,0 +1,1350 @@
+#ifndef NVC0_3D_XML
+#define NVC0_3D_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_3d.xml ( 30827 bytes, from 2011-01-13 18:23:07)
+- copyright.xml ( 6452 bytes, from 2010-11-25 23:28:20)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16394 bytes, from 2010-12-17 15:10:40)
+- nv_object.xml ( 11898 bytes, from 2010-12-23 14:14:20)
+- nvchipsets.xml ( 3074 bytes, from 2010-11-07 00:36:28)
+- nv50_defs.xml ( 4487 bytes, from 2010-12-10 00:37:17)
+
+Copyright (C) 2006-2011 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_3D_NOTIFY_ADDRESS_HIGH 0x00000104
+#define NVC0_3D_NOTIFY_ADDRESS_LOW 0x00000108
+#define NVC0_3D_NOTIFY 0x0000010c
+
+#define NVC0_3D_SERIALIZE 0x00000110
+
+#define NVC0_3D_LINE_WIDTH_SEPARATE 0x0000020c
+
+#define NVC0_3D_FORCE_EARLY_FRAGMENT_TESTS 0x00000210
+
+#define NVC0_3D_MEM_BARRIER 0x0000021c
+#define NVC0_3D_MEM_BARRIER_UNK0 0x00000001
+#define NVC0_3D_MEM_BARRIER_UNK1 0x00000002
+#define NVC0_3D_MEM_BARRIER_UNK2 0x00000004
+#define NVC0_3D_MEM_BARRIER_UNK4 0x00000010
+#define NVC0_3D_MEM_BARRIER_UNK8 0x00000100
+#define NVC0_3D_MEM_BARRIER_UNK12 0x00001000
+
+#define NVC0_3D_CACHE_SPLIT 0x00000308
+#define NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001
+#define NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1 0x00000002
+#define NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003
+
+#define NVC0_3D_TESS_MODE 0x00000320
+#define NVC0_3D_TESS_MODE_PRIM__MASK 0x0000000f
+#define NVC0_3D_TESS_MODE_PRIM__SHIFT 0
+#define NVC0_3D_TESS_MODE_PRIM_ISOLINES 0x00000000
+#define NVC0_3D_TESS_MODE_PRIM_TRIANGLES 0x00000001
+#define NVC0_3D_TESS_MODE_PRIM_QUADS 0x00000002
+#define NVC0_3D_TESS_MODE_SPACING__MASK 0x000000f0
+#define NVC0_3D_TESS_MODE_SPACING__SHIFT 4
+#define NVC0_3D_TESS_MODE_SPACING_EQUAL 0x00000000
+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD 0x00000010
+#define NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN 0x00000020
+#define NVC0_3D_TESS_MODE_CW 0x00000100
+#define NVC0_3D_TESS_MODE_CONNECTED 0x00000200
+
+#define NVC0_3D_TESS_LEVEL_OUTER(i0) (0x00000324 + 0x4*(i0))
+#define NVC0_3D_TESS_LEVEL_OUTER__ESIZE 0x00000004
+#define NVC0_3D_TESS_LEVEL_OUTER__LEN 0x00000004
+
+#define NVC0_3D_TESS_LEVEL_INNER(i0) (0x00000334 + 0x4*(i0))
+#define NVC0_3D_TESS_LEVEL_INNER__ESIZE 0x00000004
+#define NVC0_3D_TESS_LEVEL_INNER__LEN 0x00000002
+
+#define NVC0_3D_RASTERIZE_ENABLE 0x0000037c
+
+#define NVC0_3D_TFB(i0) (0x00000380 + 0x20*(i0))
+#define NVC0_3D_TFB__ESIZE 0x00000020
+#define NVC0_3D_TFB__LEN 0x00000004
+
+#define NVC0_3D_TFB_BUFFER_ENABLE(i0) (0x00000380 + 0x20*(i0))
+
+#define NVC0_3D_TFB_ADDRESS_HIGH(i0) (0x00000384 + 0x20*(i0))
+
+#define NVC0_3D_TFB_ADDRESS_LOW(i0) (0x00000388 + 0x20*(i0))
+
+#define NVC0_3D_TFB_BUFFER_SIZE(i0) (0x0000038c + 0x20*(i0))
+
+#define NVC0_3D_TFB_BUFFER_OFFSET(i0) (0x00000390 + 0x20*(i0))
+
+#define NVC0_3D_TFB_STREAM(i0) (0x00000700 + 0x10*(i0))
+#define NVC0_3D_TFB_STREAM__ESIZE 0x00000010
+#define NVC0_3D_TFB_STREAM__LEN 0x00000004
+
+#define NVC0_3D_TFB_VARYING_COUNT(i0) (0x00000704 + 0x10*(i0))
+#define NVC0_3D_TFB_VARYING_COUNT__ESIZE 0x00000010
+#define NVC0_3D_TFB_VARYING_COUNT__LEN 0x00000004
+
+#define NVC0_3D_TFB_BUFFER_STRIDE(i0) (0x00000708 + 0x10*(i0))
+#define NVC0_3D_TFB_BUFFER_STRIDE__ESIZE 0x00000010
+#define NVC0_3D_TFB_BUFFER_STRIDE__LEN 0x00000004
+
+#define NVC0_3D_TFB_ENABLE 0x00000744
+
+#define NVC0_3D_SAMPLE_SHADING 0x00000754
+#define NVC0_3D_SAMPLE_SHADING_MIN_SAMPLES__MASK 0x0000000f
+#define NVC0_3D_SAMPLE_SHADING_MIN_SAMPLES__SHIFT 0
+#define NVC0_3D_SAMPLE_SHADING_ENABLE 0x00000010
+
+#define NVC0_3D_LOCAL_BASE 0x0000077c
+
+#define NVC0_3D_TEMP_ADDRESS_HIGH 0x00000790
+
+#define NVC0_3D_TEMP_ADDRESS_LOW 0x00000794
+
+#define NVC0_3D_TEMP_SIZE_HIGH 0x00000798
+
+#define NVC0_3D_TEMP_SIZE_LOW 0x0000079c
+
+#define NVC0_3D_WARP_TEMP_ALLOC 0x000007a0
+
+#define NVC0_3D_ZCULL_WIDTH 0x000007c0
+
+#define NVC0_3D_ZCULL_HEIGHT 0x000007c4
+
+#define NVC0_3D_ZCULL_ADDRESS_HIGH 0x000007e8
+
+#define NVC0_3D_ZCULL_ADDRESS_LOW 0x000007ec
+
+#define NVC0_3D_ZCULL_LIMIT_HIGH 0x000007f0
+
+#define NVC0_3D_ZCULL_LIMIT_LOW 0x000007f4
+
+#define NVC0_3D_RT(i0) (0x00000800 + 0x40*(i0))
+#define NVC0_3D_RT__ESIZE 0x00000040
+#define NVC0_3D_RT__LEN 0x00000008
+
+#define NVC0_3D_RT_ADDRESS_HIGH(i0) (0x00000800 + 0x40*(i0))
+
+#define NVC0_3D_RT_ADDRESS_LOW(i0) (0x00000804 + 0x40*(i0))
+
+#define NVC0_3D_RT_HORIZ(i0) (0x00000808 + 0x40*(i0))
+
+#define NVC0_3D_RT_VERT(i0) (0x0000080c + 0x40*(i0))
+
+#define NVC0_3D_RT_FORMAT(i0) (0x00000810 + 0x40*(i0))
+
+#define NVC0_3D_RT_TILE_MODE(i0) (0x00000814 + 0x40*(i0))
+#define NVC0_3D_RT_TILE_MODE_X 0x00000001
+#define NVC0_3D_RT_TILE_MODE_Y__MASK 0x00000070
+#define NVC0_3D_RT_TILE_MODE_Y__SHIFT 4
+#define NVC0_3D_RT_TILE_MODE_Z__MASK 0x00000700
+#define NVC0_3D_RT_TILE_MODE_Z__SHIFT 8
+#define NVC0_3D_RT_TILE_MODE_LINEAR 0x00001000
+#define NVC0_3D_RT_TILE_MODE_UNK16 0x00010000
+
+#define NVC0_3D_RT_ARRAY_MODE(i0) (0x00000818 + 0x40*(i0))
+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NVC0_3D_RT_ARRAY_MODE_LAYERS__SHIFT 0
+#define NVC0_3D_RT_ARRAY_MODE_VOLUME 0x00010000
+
+#define NVC0_3D_RT_LAYER_STRIDE(i0) (0x0000081c + 0x40*(i0))
+
+#define NVC0_3D_RT_BASE_LAYER(i0) (0x00000820 + 0x40*(i0))
+
+#define NVC0_3D_RT_UNK14(i0) (0x00000824 + 0x40*(i0))
+
+#define NVC0_3D_VIEWPORT_SCALE_X(i0) (0x00000a00 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_X__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_X__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_SCALE_Y(i0) (0x00000a04 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_Y__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_Y__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_SCALE_Z(i0) (0x00000a08 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_SCALE_Z__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_SCALE_Z__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_X(i0) (0x00000a0c + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_X__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_X__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y(i0) (0x00000a10 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_Y__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z(i0) (0x00000a14 + 0x20*(i0))
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__ESIZE 0x00000020
+#define NVC0_3D_VIEWPORT_TRANSLATE_Z__LEN 0x00000010
+
+#define NVC0_3D_VIEWPORT_HORIZ(i0) (0x00000c00 + 0x10*(i0))
+#define NVC0_3D_VIEWPORT_HORIZ__ESIZE 0x00000010
+#define NVC0_3D_VIEWPORT_HORIZ__LEN 0x00000010
+#define NVC0_3D_VIEWPORT_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_VIEWPORT_HORIZ_X__SHIFT 0
+#define NVC0_3D_VIEWPORT_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_VIEWPORT_HORIZ_W__SHIFT 16
+
+#define NVC0_3D_VIEWPORT_VERT(i0) (0x00000c04 + 0x10*(i0))
+#define NVC0_3D_VIEWPORT_VERT__ESIZE 0x00000010
+#define NVC0_3D_VIEWPORT_VERT__LEN 0x00000010
+#define NVC0_3D_VIEWPORT_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_VIEWPORT_VERT_Y__SHIFT 0
+#define NVC0_3D_VIEWPORT_VERT_H__MASK 0xffff0000
+#define NVC0_3D_VIEWPORT_VERT_H__SHIFT 16
+
+#define NVC0_3D_DEPTH_RANGE_NEAR(i0) (0x00000c08 + 0x10*(i0))
+#define NVC0_3D_DEPTH_RANGE_NEAR__ESIZE 0x00000010
+#define NVC0_3D_DEPTH_RANGE_NEAR__LEN 0x00000010
+
+#define NVC0_3D_DEPTH_RANGE_FAR(i0) (0x00000c0c + 0x10*(i0))
+#define NVC0_3D_DEPTH_RANGE_FAR__ESIZE 0x00000010
+#define NVC0_3D_DEPTH_RANGE_FAR__LEN 0x00000010
+
+#define NVC0_3D_CLIP_RECT_HORIZ(i0) (0x00000d00 + 0x8*(i0))
+#define NVC0_3D_CLIP_RECT_HORIZ__ESIZE 0x00000008
+#define NVC0_3D_CLIP_RECT_HORIZ__LEN 0x00000008
+#define NVC0_3D_CLIP_RECT_HORIZ_MIN__MASK 0x0000ffff
+#define NVC0_3D_CLIP_RECT_HORIZ_MIN__SHIFT 0
+#define NVC0_3D_CLIP_RECT_HORIZ_MAX__MASK 0xffff0000
+#define NVC0_3D_CLIP_RECT_HORIZ_MAX__SHIFT 16
+
+#define NVC0_3D_CLIP_RECT_VERT(i0) (0x00000d04 + 0x8*(i0))
+#define NVC0_3D_CLIP_RECT_VERT__ESIZE 0x00000008
+#define NVC0_3D_CLIP_RECT_VERT__LEN 0x00000008
+#define NVC0_3D_CLIP_RECT_VERT_MIN__MASK 0x0000ffff
+#define NVC0_3D_CLIP_RECT_VERT_MIN__SHIFT 0
+#define NVC0_3D_CLIP_RECT_VERT_MAX__MASK 0xffff0000
+#define NVC0_3D_CLIP_RECT_VERT_MAX__SHIFT 16
+
+#define NVC0_3D_CLIPID_REGION_HORIZ(i0) (0x00000d40 + 0x8*(i0))
+#define NVC0_3D_CLIPID_REGION_HORIZ__ESIZE 0x00000008
+#define NVC0_3D_CLIPID_REGION_HORIZ__LEN 0x00000004
+#define NVC0_3D_CLIPID_REGION_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_REGION_HORIZ_X__SHIFT 0
+#define NVC0_3D_CLIPID_REGION_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_CLIPID_REGION_HORIZ_W__SHIFT 16
+
+#define NVC0_3D_CLIPID_REGION_VERT(i0) (0x00000d44 + 0x8*(i0))
+#define NVC0_3D_CLIPID_REGION_VERT__ESIZE 0x00000008
+#define NVC0_3D_CLIPID_REGION_VERT__LEN 0x00000004
+#define NVC0_3D_CLIPID_REGION_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_REGION_VERT_Y__SHIFT 0
+#define NVC0_3D_CLIPID_REGION_VERT_H__MASK 0xffff0000
+#define NVC0_3D_CLIPID_REGION_VERT_H__SHIFT 16
+
+#define NVC0_3D_CALL_LIMIT_LOG 0x00000d64
+
+#define NVC0_3D_COUNTER_ENABLE 0x00000d68
+#define NVC0_3D_COUNTER_ENABLE_UNK00 0x00000001
+#define NVC0_3D_COUNTER_ENABLE_UNK01 0x00000002
+#define NVC0_3D_COUNTER_ENABLE_UNK02 0x00000004
+#define NVC0_3D_COUNTER_ENABLE_UNK03 0x00000008
+#define NVC0_3D_COUNTER_ENABLE_UNK04 0x00000010
+#define NVC0_3D_COUNTER_ENABLE_EMITTED_PRIMITIVES 0x00000020
+#define NVC0_3D_COUNTER_ENABLE_UNK06 0x00000040
+#define NVC0_3D_COUNTER_ENABLE_UNK07 0x00000080
+#define NVC0_3D_COUNTER_ENABLE_UNK08 0x00000100
+#define NVC0_3D_COUNTER_ENABLE_UNK09 0x00000200
+#define NVC0_3D_COUNTER_ENABLE_GENERATED_PRIMITIVES 0x00000400
+#define NVC0_3D_COUNTER_ENABLE_UNK0B 0x00000800
+#define NVC0_3D_COUNTER_ENABLE_UNK0C 0x00001000
+#define NVC0_3D_COUNTER_ENABLE_UNK0D 0x00002000
+#define NVC0_3D_COUNTER_ENABLE_UNK0E 0x00004000
+#define NVC0_3D_COUNTER_ENABLE_UNK0F 0x00008000
+
+#define NVC0_3D_VERTEX_BUFFER_FIRST 0x00000d74
+
+#define NVC0_3D_VERTEX_BUFFER_COUNT 0x00000d78
+
+#define NVC0_3D_CLEAR_COLOR(i0) (0x00000d80 + 0x4*(i0))
+#define NVC0_3D_CLEAR_COLOR__ESIZE 0x00000004
+#define NVC0_3D_CLEAR_COLOR__LEN 0x00000004
+
+#define NVC0_3D_CLEAR_DEPTH 0x00000d90
+
+#define NVC0_3D_CLEAR_STENCIL 0x00000da0
+
+#define NVC0_3D_POLYGON_SMOOTH_ENABLE 0x00000db4
+
+#define NVC0_3D_POLYGON_OFFSET_POINT_ENABLE 0x00000dc0
+
+#define NVC0_3D_POLYGON_OFFSET_LINE_ENABLE 0x00000dc4
+
+#define NVC0_3D_POLYGON_OFFSET_FILL_ENABLE 0x00000dc8
+
+#define NVC0_3D_PATCH_VERTICES 0x00000dcc
+
+#define NVC0_3D_WATCHDOG_TIMER 0x00000de4
+
+#define NVC0_3D_WINDOW_OFFSET_X 0x00000df8
+
+#define NVC0_3D_WINDOW_OFFSET_Y 0x00000dfc
+
+#define NVC0_3D_SCISSOR_ENABLE(i0) (0x00000e00 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_ENABLE__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_ENABLE__LEN 0x00000010
+
+#define NVC0_3D_SCISSOR_HORIZ(i0) (0x00000e04 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_HORIZ__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_HORIZ__LEN 0x00000010
+#define NVC0_3D_SCISSOR_HORIZ_MIN__MASK 0x0000ffff
+#define NVC0_3D_SCISSOR_HORIZ_MIN__SHIFT 0
+#define NVC0_3D_SCISSOR_HORIZ_MAX__MASK 0xffff0000
+#define NVC0_3D_SCISSOR_HORIZ_MAX__SHIFT 16
+
+#define NVC0_3D_SCISSOR_VERT(i0) (0x00000e08 + 0x10*(i0))
+#define NVC0_3D_SCISSOR_VERT__ESIZE 0x00000010
+#define NVC0_3D_SCISSOR_VERT__LEN 0x00000010
+#define NVC0_3D_SCISSOR_VERT_MIN__MASK 0x0000ffff
+#define NVC0_3D_SCISSOR_VERT_MIN__SHIFT 0
+#define NVC0_3D_SCISSOR_VERT_MAX__MASK 0xffff0000
+#define NVC0_3D_SCISSOR_VERT_MAX__SHIFT 16
+
+#define NVC0_3D_STENCIL_BACK_FUNC_REF 0x00000f54
+
+#define NVC0_3D_STENCIL_BACK_MASK 0x00000f58
+
+#define NVC0_3D_STENCIL_BACK_FUNC_MASK 0x00000f5c
+
+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_HIGH 0x00000f84
+
+#define NVC0_3D_VERTEX_RUNOUT_ADDRESS_LOW 0x00000f88
+
+#define NVC0_3D_COLOR_MASK_COMMON 0x00000f90
+
+#define NVC0_3D_DEPTH_BOUNDS(i0) (0x00000f9c + 0x4*(i0))
+#define NVC0_3D_DEPTH_BOUNDS__ESIZE 0x00000004
+#define NVC0_3D_DEPTH_BOUNDS__LEN 0x00000002
+
+#define NVC0_3D_RT_SEPARATE_FRAG_DATA 0x00000fac
+
+#define NVC0_3D_MSAA_MASK(i0) (0x00000fbc + 0x4*(i0))
+#define NVC0_3D_MSAA_MASK__ESIZE 0x00000004
+#define NVC0_3D_MSAA_MASK__LEN 0x00000004
+
+#define NVC0_3D_CLIPID_ADDRESS_HIGH 0x00000fcc
+
+#define NVC0_3D_CLIPID_ADDRESS_LOW 0x00000fd0
+
+#define NVC0_3D_ZETA_ADDRESS_HIGH 0x00000fe0
+
+#define NVC0_3D_ZETA_ADDRESS_LOW 0x00000fe4
+
+#define NVC0_3D_ZETA_FORMAT 0x00000fe8
+
+#define NVC0_3D_ZETA_TILE_MODE 0x00000fec
+
+#define NVC0_3D_ZETA_LAYER_STRIDE 0x00000ff0
+
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ 0x00000ff4
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__MASK 0xffff0000
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_W__SHIFT 16
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__MASK 0x0000ffff
+#define NVC0_3D_SCREEN_SCISSOR_HORIZ_X__SHIFT 0
+
+#define NVC0_3D_SCREEN_SCISSOR_VERT 0x00000ff8
+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__MASK 0xffff0000
+#define NVC0_3D_SCREEN_SCISSOR_VERT_H__SHIFT 16
+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__MASK 0x0000ffff
+#define NVC0_3D_SCREEN_SCISSOR_VERT_Y__SHIFT 0
+
+#define NVC0_3D_CLEAR_FLAGS 0x000010f8
+#define NVC0_3D_CLEAR_FLAGS_STENCIL_MASK 0x00000001
+#define NVC0_3D_CLEAR_FLAGS_UNK4 0x00000010
+#define NVC0_3D_CLEAR_FLAGS_SCISSOR 0x00000100
+#define NVC0_3D_CLEAR_FLAGS_VIEWPORT 0x00001000
+
+#define NVC0_3D_VERTEX_ID 0x00001118
+
+#define NVC0_3D_VTX_ATTR_DEFINE 0x0000114c
+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__MASK 0x000000ff
+#define NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT 0
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MASK 0x00000700
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT 8
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MIN 0x00000001
+#define NVC0_3D_VTX_ATTR_DEFINE_COMP__MAX 0x00000004
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__MASK 0x00007000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE__SHIFT 12
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_8 0x00001000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_16 0x00002000
+#define NVC0_3D_VTX_ATTR_DEFINE_SIZE_32 0x00004000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__MASK 0x00070000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE__SHIFT 16
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SNORM 0x00010000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UNORM 0x00020000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SINT 0x00030000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_UINT 0x00040000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_USCALED 0x00050000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_SSCALED 0x00060000
+#define NVC0_3D_VTX_ATTR_DEFINE_TYPE_FLOAT 0x00070000
+
+#define NVC0_3D_VTX_ATTR_DATA(i0) (0x00001150 + 0x4*(i0))
+#define NVC0_3D_VTX_ATTR_DATA__ESIZE 0x00000004
+#define NVC0_3D_VTX_ATTR_DATA__LEN 0x00000004
+
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT(i0) (0x00001160 + 0x4*(i0))
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__ESIZE 0x00000004
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT__LEN 0x00000020
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK 0x0000001f
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT 0
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST 0x00000040
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__MASK 0x001fff80
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT 7
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__MASK 0x07e00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE__SHIFT 21
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32_32 0x00200000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32_32 0x00400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16_16 0x00600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32_32 0x00800000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16_16 0x00a00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8_8 0x01400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16_16 0x01e00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 0x02400000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8_8 0x02600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8_8 0x03000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16 0x03600000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8 0x03a00000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_10_10_10_2 0x06000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__MASK 0x38000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE__SHIFT 27
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SNORM 0x08000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UNORM 0x10000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SINT 0x18000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT 0x20000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_USCALED 0x28000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_SSCALED 0x30000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT 0x38000000
+#define NVC0_3D_VERTEX_ATTRIB_FORMAT_BGRA 0x80000000
+
+#define NVC0_3D_RT_CONTROL 0x0000121c
+#define NVC0_3D_RT_CONTROL_COUNT__MASK 0x0000000f
+#define NVC0_3D_RT_CONTROL_COUNT__SHIFT 0
+#define NVC0_3D_RT_CONTROL_MAP0__MASK 0x00000070
+#define NVC0_3D_RT_CONTROL_MAP0__SHIFT 4
+#define NVC0_3D_RT_CONTROL_MAP1__MASK 0x00000380
+#define NVC0_3D_RT_CONTROL_MAP1__SHIFT 7
+#define NVC0_3D_RT_CONTROL_MAP2__MASK 0x00001c00
+#define NVC0_3D_RT_CONTROL_MAP2__SHIFT 10
+#define NVC0_3D_RT_CONTROL_MAP3__MASK 0x0000e000
+#define NVC0_3D_RT_CONTROL_MAP3__SHIFT 13
+#define NVC0_3D_RT_CONTROL_MAP4__MASK 0x00070000
+#define NVC0_3D_RT_CONTROL_MAP4__SHIFT 16
+#define NVC0_3D_RT_CONTROL_MAP5__MASK 0x00380000
+#define NVC0_3D_RT_CONTROL_MAP5__SHIFT 19
+#define NVC0_3D_RT_CONTROL_MAP6__MASK 0x01c00000
+#define NVC0_3D_RT_CONTROL_MAP6__SHIFT 22
+#define NVC0_3D_RT_CONTROL_MAP7__MASK 0x0e000000
+#define NVC0_3D_RT_CONTROL_MAP7__SHIFT 25
+
+#define NVC0_3D_ZETA_HORIZ 0x00001228
+
+#define NVC0_3D_ZETA_VERT 0x0000122c
+
+#define NVC0_3D_ZETA_ARRAY_MODE 0x00001230
+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__MASK 0x0000ffff
+#define NVC0_3D_ZETA_ARRAY_MODE_LAYERS__SHIFT 0
+#define NVC0_3D_ZETA_ARRAY_MODE_UNK 0x00010000
+
+#define NVC0_3D_LINKED_TSC 0x00001234
+
+#define NVC0_3D_DRAW_TFB_BYTES 0x0000123c
+
+#define NVC0_3D_FP_RESULT_COUNT 0x00001298
+
+#define NVC0_3D_DEPTH_TEST_ENABLE 0x000012cc
+
+#define NVC0_3D_D3D_FILL_MODE 0x000012d0
+#define NVC0_3D_D3D_FILL_MODE_POINT 0x00000001
+#define NVC0_3D_D3D_FILL_MODE_WIREFRAME 0x00000002
+#define NVC0_3D_D3D_FILL_MODE_SOLID 0x00000003
+
+#define NVC0_3D_SHADE_MODEL 0x000012d4
+#define NVC0_3D_SHADE_MODEL_FLAT 0x00001d00
+#define NVC0_3D_SHADE_MODEL_SMOOTH 0x00001d01
+
+#define NVC0_3D_BLEND_INDEPENDENT 0x000012e4
+
+#define NVC0_3D_DEPTH_WRITE_ENABLE 0x000012e8
+
+#define NVC0_3D_ALPHA_TEST_ENABLE 0x000012ec
+
+#define NVC0_3D_VB_ELEMENT_U8_SETUP 0x00001300
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__MASK 0xc0000000
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_OFFSET__SHIFT 30
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__MASK 0x3fffffff
+#define NVC0_3D_VB_ELEMENT_U8_SETUP_COUNT__SHIFT 0
+
+#define NVC0_3D_VB_ELEMENT_U8 0x00001304
+#define NVC0_3D_VB_ELEMENT_U8_I0__MASK 0x000000ff
+#define NVC0_3D_VB_ELEMENT_U8_I0__SHIFT 0
+#define NVC0_3D_VB_ELEMENT_U8_I1__MASK 0x0000ff00
+#define NVC0_3D_VB_ELEMENT_U8_I1__SHIFT 8
+#define NVC0_3D_VB_ELEMENT_U8_I2__MASK 0x00ff0000
+#define NVC0_3D_VB_ELEMENT_U8_I2__SHIFT 16
+#define NVC0_3D_VB_ELEMENT_U8_I3__MASK 0xff000000
+#define NVC0_3D_VB_ELEMENT_U8_I3__SHIFT 24
+
+#define NVC0_3D_D3D_CULL_MODE 0x00001308
+#define NVC0_3D_D3D_CULL_MODE_NONE 0x00000001
+#define NVC0_3D_D3D_CULL_MODE_FRONT 0x00000002
+#define NVC0_3D_D3D_CULL_MODE_BACK 0x00000003
+
+#define NVC0_3D_DEPTH_TEST_FUNC 0x0000130c
+#define NVC0_3D_DEPTH_TEST_FUNC_NEVER 0x00000200
+#define NVC0_3D_DEPTH_TEST_FUNC_LESS 0x00000201
+#define NVC0_3D_DEPTH_TEST_FUNC_EQUAL 0x00000202
+#define NVC0_3D_DEPTH_TEST_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_DEPTH_TEST_FUNC_GREATER 0x00000204
+#define NVC0_3D_DEPTH_TEST_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_DEPTH_TEST_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_DEPTH_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_ALPHA_TEST_REF 0x00001310
+
+#define NVC0_3D_ALPHA_TEST_FUNC 0x00001314
+#define NVC0_3D_ALPHA_TEST_FUNC_NEVER 0x00000200
+#define NVC0_3D_ALPHA_TEST_FUNC_LESS 0x00000201
+#define NVC0_3D_ALPHA_TEST_FUNC_EQUAL 0x00000202
+#define NVC0_3D_ALPHA_TEST_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_ALPHA_TEST_FUNC_GREATER 0x00000204
+#define NVC0_3D_ALPHA_TEST_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_ALPHA_TEST_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_ALPHA_TEST_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_DRAW_TFB_STRIDE 0x00001318
+#define NVC0_3D_DRAW_TFB_STRIDE__MIN 0x00000001
+#define NVC0_3D_DRAW_TFB_STRIDE__MAX 0x00000fff
+
+#define NVC0_3D_BLEND_COLOR(i0) (0x0000131c + 0x4*(i0))
+#define NVC0_3D_BLEND_COLOR__ESIZE 0x00000004
+#define NVC0_3D_BLEND_COLOR__LEN 0x00000004
+
+#define NVC0_3D_TSC_FLUSH 0x00001330
+#define NVC0_3D_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_3D_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_3D_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_3D_TIC_FLUSH 0x00001334
+#define NVC0_3D_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_3D_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_3D_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_3D_TEX_CACHE_CTL 0x00001338
+#define NVC0_3D_TEX_CACHE_CTL_UNK1__MASK 0x00000030
+#define NVC0_3D_TEX_CACHE_CTL_UNK1__SHIFT 4
+
+#define NVC0_3D_BLEND_SEPARATE_ALPHA 0x0000133c
+
+#define NVC0_3D_BLEND_EQUATION_RGB 0x00001340
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVC0_3D_BLEND_EQUATION_RGB_MIN 0x00008007
+#define NVC0_3D_BLEND_EQUATION_RGB_MAX 0x00008008
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_BLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_BLEND_FUNC_SRC_RGB 0x00001344
+
+#define NVC0_3D_BLEND_FUNC_DST_RGB 0x00001348
+
+#define NVC0_3D_BLEND_EQUATION_ALPHA 0x0000134c
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVC0_3D_BLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVC0_3D_BLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_BLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_BLEND_FUNC_SRC_ALPHA 0x00001350
+
+#define NVC0_3D_BLEND_FUNC_DST_ALPHA 0x00001358
+
+#define NVC0_3D_BLEND_ENABLE_COMMON 0x0000135c
+
+#define NVC0_3D_BLEND_ENABLE(i0) (0x00001360 + 0x4*(i0))
+#define NVC0_3D_BLEND_ENABLE__ESIZE 0x00000004
+#define NVC0_3D_BLEND_ENABLE__LEN 0x00000008
+
+#define NVC0_3D_STENCIL_ENABLE 0x00001380
+
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL 0x00001384
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL 0x00001388
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS 0x0000138c
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_ZERO 0x00000000
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR 0x00001e02
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR 0x00001e03
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_FRONT_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC 0x00001390
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NEVER 0x00000200
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LESS 0x00000201
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_EQUAL 0x00000202
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GREATER 0x00000204
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_STENCIL_FRONT_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_REF 0x00001394
+
+#define NVC0_3D_STENCIL_FRONT_FUNC_MASK 0x00001398
+
+#define NVC0_3D_STENCIL_FRONT_MASK 0x0000139c
+
+#define NVC0_3D_DRAW_TFB_BASE 0x000013a4
+
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN 0x000013a8
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_0 0x00000001
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_1 0x00000010
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_2 0x00000100
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_3 0x00001000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_4 0x00010000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_5 0x00100000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_6 0x01000000
+#define NVC0_3D_FRAG_COLOR_CLAMP_EN_7 0x10000000
+
+#define NVC0_3D_SCREEN_Y_CONTROL 0x000013ac
+#define NVC0_3D_SCREEN_Y_CONTROL_Y_NEGATE 0x00000001
+#define NVC0_3D_SCREEN_Y_CONTROL_TRIANGLE_RAST_FLIP 0x00000010
+
+#define NVC0_3D_LINE_WIDTH_SMOOTH 0x000013b0
+
+#define NVC0_3D_LINE_WIDTH_ALIASED 0x000013b4
+
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT 0x00001420
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MIN 0x00000001
+#define NVC0_3D_GP_VERTEX_OUTPUT_COUNT__MAX 0x00000400
+
+#define NVC0_3D_VERTEX_ARRAY_FLUSH 0x0000142c
+
+#define NVC0_3D_VB_ELEMENT_BASE 0x00001434
+
+#define NVC0_3D_VB_INSTANCE_BASE 0x00001438
+
+#define NVC0_3D_CODE_CB_FLUSH 0x00001440
+
+#define NVC0_3D_CLIPID_HEIGHT 0x00001504
+#define NVC0_3D_CLIPID_HEIGHT__MAX 0x00002000
+
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ 0x00001508
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_LOW__SHIFT 0
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__MASK 0xffff0000
+#define NVC0_3D_CLIPID_FILL_RECT_HORIZ_HIGH__SHIFT 16
+
+#define NVC0_3D_CLIPID_FILL_RECT_VERT 0x0000150c
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__MASK 0x0000ffff
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_LOW__SHIFT 0
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__MASK 0xffff0000
+#define NVC0_3D_CLIPID_FILL_RECT_VERT_HIGH__SHIFT 16
+
+#define NVC0_3D_CLIP_DISTANCE_ENABLE 0x00001510
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_0 0x00000001
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_1 0x00000002
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_2 0x00000004
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_3 0x00000008
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_4 0x00000010
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_5 0x00000020
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_6 0x00000040
+#define NVC0_3D_CLIP_DISTANCE_ENABLE_7 0x00000080
+
+#define NVC0_3D_SAMPLECNT_ENABLE 0x00001514
+
+#define NVC0_3D_POINT_SIZE 0x00001518
+
+#define NVC0_3D_ZCULL_STATCTRS_ENABLE 0x0000151c
+
+#define NVC0_3D_POINT_SPRITE_ENABLE 0x00001520
+
+#define NVC0_3D_COUNTER_RESET 0x00001530
+#define NVC0_3D_COUNTER_RESET_SAMPLECNT 0x00000001
+#define NVC0_3D_COUNTER_RESET_UNK02 0x00000002
+#define NVC0_3D_COUNTER_RESET_UNK03 0x00000003
+#define NVC0_3D_COUNTER_RESET_UNK04 0x00000004
+#define NVC0_3D_COUNTER_RESET_EMITTED_PRIMITIVES 0x00000010
+#define NVC0_3D_COUNTER_RESET_UNK11 0x00000011
+#define NVC0_3D_COUNTER_RESET_UNK12 0x00000012
+#define NVC0_3D_COUNTER_RESET_UNK13 0x00000013
+#define NVC0_3D_COUNTER_RESET_UNK15 0x00000015
+#define NVC0_3D_COUNTER_RESET_UNK16 0x00000016
+#define NVC0_3D_COUNTER_RESET_UNK17 0x00000017
+#define NVC0_3D_COUNTER_RESET_UNK18 0x00000018
+#define NVC0_3D_COUNTER_RESET_UNK1A 0x0000001a
+#define NVC0_3D_COUNTER_RESET_UNK1B 0x0000001b
+#define NVC0_3D_COUNTER_RESET_UNK1C 0x0000001c
+#define NVC0_3D_COUNTER_RESET_UNK1D 0x0000001d
+#define NVC0_3D_COUNTER_RESET_UNK1E 0x0000001e
+#define NVC0_3D_COUNTER_RESET_GENERATED_PRIMITIVES 0x0000001f
+
+#define NVC0_3D_MULTISAMPLE_ENABLE 0x00001534
+
+#define NVC0_3D_ZETA_ENABLE 0x00001538
+
+#define NVC0_3D_MULTISAMPLE_CTRL 0x0000153c
+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE 0x00000001
+#define NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE 0x00000010
+
+#define NVC0_3D_COND_ADDRESS_HIGH 0x00001550
+
+#define NVC0_3D_COND_ADDRESS_LOW 0x00001554
+
+#define NVC0_3D_COND_MODE 0x00001558
+#define NVC0_3D_COND_MODE_NEVER 0x00000000
+#define NVC0_3D_COND_MODE_ALWAYS 0x00000001
+#define NVC0_3D_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_3D_COND_MODE_EQUAL 0x00000003
+#define NVC0_3D_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_3D_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NVC0_3D_TSC_ADDRESS_LOW 0x00001560
+#define NVC0_3D_TSC_ADDRESS_LOW__ALIGN 0x00000020
+
+#define NVC0_3D_TSC_LIMIT 0x00001564
+#define NVC0_3D_TSC_LIMIT__MAX 0x00001fff
+
+#define NVC0_3D_POLYGON_OFFSET_FACTOR 0x0000156c
+
+#define NVC0_3D_LINE_SMOOTH_ENABLE 0x00001570
+
+#define NVC0_3D_TIC_ADDRESS_HIGH 0x00001574
+
+#define NVC0_3D_TIC_ADDRESS_LOW 0x00001578
+
+#define NVC0_3D_TIC_LIMIT 0x0000157c
+
+#define NVC0_3D_ZCULL_REGION 0x00001590
+
+#define NVC0_3D_STENCIL_TWO_SIDE_ENABLE 0x00001594
+
+#define NVC0_3D_STENCIL_BACK_OP_FAIL 0x00001598
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_FAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL 0x0000159c
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_ZFAIL_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS 0x000015a0
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_ZERO 0x00000000
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INVERT 0x0000150a
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_KEEP 0x00001e00
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_REPLACE 0x00001e01
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR 0x00001e02
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR 0x00001e03
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_INCR_WRAP 0x00008507
+#define NVC0_3D_STENCIL_BACK_OP_ZPASS_DECR_WRAP 0x00008508
+
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC 0x000015a4
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NEVER 0x00000200
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LESS 0x00000201
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_EQUAL 0x00000202
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_LEQUAL 0x00000203
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GREATER 0x00000204
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_NOTEQUAL 0x00000205
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_GEQUAL 0x00000206
+#define NVC0_3D_STENCIL_BACK_FUNC_FUNC_ALWAYS 0x00000207
+
+#define NVC0_3D_CSAA_ENABLE 0x000015b4
+
+#define NVC0_3D_FRAMEBUFFER_SRGB 0x000015b8
+
+#define NVC0_3D_POLYGON_OFFSET_UNITS 0x000015bc
+
+#define NVC0_3D_LAYER 0x000015cc
+#define NVC0_3D_LAYER_IDX__MASK 0x0000ffff
+#define NVC0_3D_LAYER_IDX__SHIFT 0
+#define NVC0_3D_LAYER_USE_GP 0x00010000
+
+#define NVC0_3D_MULTISAMPLE_MODE 0x000015d0
+#define NVC0_3D_MULTISAMPLE_MODE_MS1 0x00000000
+#define NVC0_3D_MULTISAMPLE_MODE_MS2 0x00000001
+#define NVC0_3D_MULTISAMPLE_MODE_MS4 0x00000002
+#define NVC0_3D_MULTISAMPLE_MODE_MS8 0x00000003
+#define NVC0_3D_MULTISAMPLE_MODE_MS8_ALT 0x00000004
+#define NVC0_3D_MULTISAMPLE_MODE_MS2_ALT 0x00000005
+#define NVC0_3D_MULTISAMPLE_MODE_UNK6 0x00000006
+#define NVC0_3D_MULTISAMPLE_MODE_MS4_CS4 0x00000008
+#define NVC0_3D_MULTISAMPLE_MODE_MS4_CS12 0x00000009
+#define NVC0_3D_MULTISAMPLE_MODE_MS8_CS8 0x0000000a
+#define NVC0_3D_MULTISAMPLE_MODE_MS8_CS24 0x0000000b
+
+#define NVC0_3D_VERTEX_BEGIN_D3D 0x000015d4
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__MASK 0x0fffffff
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE__SHIFT 0
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_POINTS 0x00000001
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES 0x00000002
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP 0x00000003
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES 0x00000004
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NVC0_3D_VERTEX_BEGIN_D3D_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NVC0_3D_VERTEX_BEGIN_D3D_INSTANCE_NEXT 0x10000000
+
+#define NVC0_3D_VERTEX_END_D3D 0x000015d8
+#define NVC0_3D_VERTEX_END_D3D_UNK0 0x00000001
+#define NVC0_3D_VERTEX_END_D3D_UNK1 0x00000002
+
+#define NVC0_3D_EDGEFLAG 0x000015e4
+
+#define NVC0_3D_VB_ELEMENT_U32 0x000015e8
+
+#define NVC0_3D_VB_ELEMENT_U16_SETUP 0x000015ec
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__MASK 0xc0000000
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_OFFSET__SHIFT 30
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__MASK 0x3fffffff
+#define NVC0_3D_VB_ELEMENT_U16_SETUP_COUNT__SHIFT 0
+
+#define NVC0_3D_VB_ELEMENT_U16 0x000015f0
+#define NVC0_3D_VB_ELEMENT_U16_I0__MASK 0x0000ffff
+#define NVC0_3D_VB_ELEMENT_U16_I0__SHIFT 0
+#define NVC0_3D_VB_ELEMENT_U16_I1__MASK 0xffff0000
+#define NVC0_3D_VB_ELEMENT_U16_I1__SHIFT 16
+
+#define NVC0_3D_VERTEX_BASE_HIGH 0x000015f4
+
+#define NVC0_3D_VERTEX_BASE_LOW 0x000015f8
+
+#define NVC0_3D_ZCULL_WINDOW_OFFSET_X 0x000015fc
+
+#define NVC0_3D_ZCULL_WINDOW_OFFSET_Y 0x00001600
+
+#define NVC0_3D_POINT_COORD_REPLACE 0x00001604
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__MASK 0x00000004
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN__SHIFT 2
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT 0x00000000
+#define NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT 0x00000004
+#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__MASK 0x000007f8
+#define NVC0_3D_POINT_COORD_REPLACE_ENABLE__SHIFT 3
+
+#define NVC0_3D_CODE_ADDRESS_HIGH 0x00001608
+
+#define NVC0_3D_CODE_ADDRESS_LOW 0x0000160c
+
+#define NVC0_3D_VERTEX_END_GL 0x00001614
+#define NVC0_3D_VERTEX_END_GL_UNK0 0x00000001
+#define NVC0_3D_VERTEX_END_GL_UNK1 0x00000002
+
+#define NVC0_3D_VERTEX_BEGIN_GL 0x00001618
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__MASK 0x0fffffff
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE__SHIFT 0
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS 0x00000000
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES 0x00000001
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_LOOP 0x00000002
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP 0x00000003
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES 0x00000004
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP 0x00000005
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_FAN 0x00000006
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUADS 0x00000007
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_QUAD_STRIP 0x00000008
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POLYGON 0x00000009
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINES_ADJACENCY 0x0000000a
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_LINE_STRIP_ADJACENCY 0x0000000b
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES_ADJACENCY 0x0000000c
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLE_STRIP_ADJACENCY 0x0000000d
+#define NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_PATCHES 0x0000000e
+#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT 0x04000000
+#define NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT 0x08000000
+
+#define NVC0_3D_VERTEX_ID_REPLACE 0x0000161c
+#define NVC0_3D_VERTEX_ID_REPLACE_ENABLE 0x00000001
+#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE__MASK 0x00000ff0
+#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT 4
+
+#define NVC0_3D_VERTEX_DATA 0x00001640
+
+#define NVC0_3D_PRIM_RESTART_ENABLE 0x00001644
+
+#define NVC0_3D_PRIM_RESTART_INDEX 0x00001648
+
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN 0x0000164c
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_VERTEX_ID 0x00000001
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_INSTANCE_ID 0x00000010
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_PRIMITIVE_ID 0x00000100
+#define NVC0_3D_VP_GP_BUILTIN_ATTR_EN_UNK12 0x00001000
+
+#define NVC0_3D_POINT_SMOOTH_ENABLE 0x00001658
+
+#define NVC0_3D_POINT_RASTER_RULES 0x0000165c
+#define NVC0_3D_POINT_RASTER_RULES_OGL 0x00000000
+#define NVC0_3D_POINT_RASTER_RULES_D3D 0x00000001
+
+#define NVC0_3D_TEX_MISC 0x00001664
+#define NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000004
+
+#define NVC0_3D_LINE_STIPPLE_ENABLE 0x0000166c
+
+#define NVC0_3D_LINE_STIPPLE_PATTERN 0x00001680
+
+#define NVC0_3D_PROVOKING_VERTEX_LAST 0x00001684
+
+#define NVC0_3D_VERTEX_TWO_SIDE_ENABLE 0x00001688
+
+#define NVC0_3D_POLYGON_STIPPLE_ENABLE 0x0000168c
+
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN(i0) (0x00001700 + 0x4*(i0))
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__ESIZE 0x00000004
+#define NVC0_3D_POLYGON_STIPPLE_PATTERN__LEN 0x00000020
+
+#define NVC0_3D_ZETA_BASE_LAYER 0x0000179c
+
+#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_HIGH 0x000017bc
+
+#define NVC0_3D_VERTEX_QUARANTINE_ADDRESS_LOW 0x000017c0
+
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE 0x000017c4
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE_16K 0x00000001
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE_32K 0x00000002
+#define NVC0_3D_VERTEX_QUARANTINE_SIZE_64K 0x00000003
+
+#define NVC0_3D_STRMOUT_UNK1780(i0) (0x00001780 + 0x4*(i0))
+#define NVC0_3D_STRMOUT_UNK1780__ESIZE 0x00000004
+#define NVC0_3D_STRMOUT_UNK1780__LEN 0x00000004
+
+#define NVC0_3D_UNK17BC_ADDRESS_HIGH 0x000017bc
+
+#define NVC0_3D_UNK17BC_ADDRESS_LOW 0x000017c0
+
+#define NVC0_3D_UNK17BC_LIMIT 0x000017c4
+
+#define NVC0_3D_INDEX_ARRAY_START_HIGH 0x000017c8
+
+#define NVC0_3D_INDEX_ARRAY_START_LOW 0x000017cc
+
+#define NVC0_3D_INDEX_ARRAY_LIMIT_HIGH 0x000017d0
+
+#define NVC0_3D_INDEX_ARRAY_LIMIT_LOW 0x000017d4
+
+#define NVC0_3D_INDEX_LOG2_SIZE 0x000017d8
+
+#define NVC0_3D_INDEX_BATCH_FIRST 0x000017dc
+
+#define NVC0_3D_INDEX_BATCH_COUNT 0x000017e0
+
+#define NVC0_3D_POLYGON_OFFSET_CLAMP 0x0000187c
+
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(i0) (0x00001880 + 0x4*(i0))
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__ESIZE 0x00000004
+#define NVC0_3D_VERTEX_ARRAY_PER_INSTANCE__LEN 0x00000020
+
+#define NVC0_3D_VP_POINT_SIZE_EN 0x00001910
+
+#define NVC0_3D_CULL_FACE_ENABLE 0x00001918
+
+#define NVC0_3D_FRONT_FACE 0x0000191c
+#define NVC0_3D_FRONT_FACE_CW 0x00000900
+#define NVC0_3D_FRONT_FACE_CCW 0x00000901
+
+#define NVC0_3D_CULL_FACE 0x00001920
+#define NVC0_3D_CULL_FACE_FRONT 0x00000404
+#define NVC0_3D_CULL_FACE_BACK 0x00000405
+#define NVC0_3D_CULL_FACE_FRONT_AND_BACK 0x00000408
+
+#define NVC0_3D_LINE_LAST_PIXEL 0x00001924
+
+#define NVC0_3D_VIEWPORT_TRANSFORM_EN 0x0000192c
+
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL 0x0000193c
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1 0x00000001
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__MASK 0x00000006
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1__SHIFT 1
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK0 0x00000000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 0x00000002
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK2 0x00000004
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR 0x00000008
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR 0x00000010
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK7 0x00000080
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK10 0x00000400
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK11 0x00000800
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__MASK 0x00003000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12__SHIFT 12
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK0 0x00000000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK1 0x00001000
+#define NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2 0x00002000
+
+#define NVC0_3D_CLIP_DISTANCE_MODE 0x00001940
+#define NVC0_3D_CLIP_DISTANCE_MODE_0__MASK 0x00000001
+#define NVC0_3D_CLIP_DISTANCE_MODE_0__SHIFT 0
+#define NVC0_3D_CLIP_DISTANCE_MODE_0_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_0_CULL 0x00000001
+#define NVC0_3D_CLIP_DISTANCE_MODE_1__MASK 0x00000010
+#define NVC0_3D_CLIP_DISTANCE_MODE_1__SHIFT 4
+#define NVC0_3D_CLIP_DISTANCE_MODE_1_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_1_CULL 0x00000010
+#define NVC0_3D_CLIP_DISTANCE_MODE_2__MASK 0x00000100
+#define NVC0_3D_CLIP_DISTANCE_MODE_2__SHIFT 8
+#define NVC0_3D_CLIP_DISTANCE_MODE_2_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_2_CULL 0x00000100
+#define NVC0_3D_CLIP_DISTANCE_MODE_3__MASK 0x00001000
+#define NVC0_3D_CLIP_DISTANCE_MODE_3__SHIFT 12
+#define NVC0_3D_CLIP_DISTANCE_MODE_3_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_3_CULL 0x00001000
+#define NVC0_3D_CLIP_DISTANCE_MODE_4__MASK 0x00010000
+#define NVC0_3D_CLIP_DISTANCE_MODE_4__SHIFT 16
+#define NVC0_3D_CLIP_DISTANCE_MODE_4_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_4_CULL 0x00010000
+#define NVC0_3D_CLIP_DISTANCE_MODE_5__MASK 0x00100000
+#define NVC0_3D_CLIP_DISTANCE_MODE_5__SHIFT 20
+#define NVC0_3D_CLIP_DISTANCE_MODE_5_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_5_CULL 0x00100000
+#define NVC0_3D_CLIP_DISTANCE_MODE_6__MASK 0x01000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_6__SHIFT 24
+#define NVC0_3D_CLIP_DISTANCE_MODE_6_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_6_CULL 0x01000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_7__MASK 0x10000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_7__SHIFT 28
+#define NVC0_3D_CLIP_DISTANCE_MODE_7_CLIP 0x00000000
+#define NVC0_3D_CLIP_DISTANCE_MODE_7_CULL 0x10000000
+
+#define NVC0_3D_CLIP_RECTS_EN 0x0000194c
+
+#define NVC0_3D_CLIP_RECTS_MODE 0x00001950
+#define NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY 0x00000000
+#define NVC0_3D_CLIP_RECTS_MODE_OUTSIDE_ALL 0x00000001
+#define NVC0_3D_CLIP_RECTS_MODE_NEVER 0x00000002
+
+#define NVC0_3D_ZCULL_INVALIDATE 0x00001958
+
+#define NVC0_3D_ZCULL_TEST_MASK 0x0000196c
+#define NVC0_3D_ZCULL_TEST_MASK_FAIL_GT_PASS_LT 0x00000001
+#define NVC0_3D_ZCULL_TEST_MASK_PASS_GT_FAIL_LT 0x00000010
+
+#define NVC0_3D_FP_ZORDER_CTRL 0x0000196c
+#define NVC0_3D_FP_ZORDER_CTRL_0 0x00000001
+#define NVC0_3D_FP_ZORDER_CTRL_1 0x00000010
+
+#define NVC0_3D_CLIPID_ENABLE 0x0000197c
+
+#define NVC0_3D_CLIPID_WIDTH 0x00001980
+#define NVC0_3D_CLIPID_WIDTH__MAX 0x00002000
+#define NVC0_3D_CLIPID_WIDTH__ALIGN 0x00000040
+
+#define NVC0_3D_CLIPID_ID 0x00001984
+
+#define NVC0_3D_DEPTH_BOUNDS_EN 0x000019bc
+
+#define NVC0_3D_LOGIC_OP_ENABLE 0x000019c4
+
+#define NVC0_3D_LOGIC_OP 0x000019c8
+#define NVC0_3D_LOGIC_OP_CLEAR 0x00001500
+#define NVC0_3D_LOGIC_OP_AND 0x00001501
+#define NVC0_3D_LOGIC_OP_AND_REVERSE 0x00001502
+#define NVC0_3D_LOGIC_OP_COPY 0x00001503
+#define NVC0_3D_LOGIC_OP_AND_INVERTED 0x00001504
+#define NVC0_3D_LOGIC_OP_NOOP 0x00001505
+#define NVC0_3D_LOGIC_OP_XOR 0x00001506
+#define NVC0_3D_LOGIC_OP_OR 0x00001507
+#define NVC0_3D_LOGIC_OP_NOR 0x00001508
+#define NVC0_3D_LOGIC_OP_EQUIV 0x00001509
+#define NVC0_3D_LOGIC_OP_INVERT 0x0000150a
+#define NVC0_3D_LOGIC_OP_OR_REVERSE 0x0000150b
+#define NVC0_3D_LOGIC_OP_COPY_INVERTED 0x0000150c
+#define NVC0_3D_LOGIC_OP_OR_INVERTED 0x0000150d
+#define NVC0_3D_LOGIC_OP_NAND 0x0000150e
+#define NVC0_3D_LOGIC_OP_SET 0x0000150f
+
+#define NVC0_3D_ZETA_COMP_ENABLE 0x000019cc
+
+#define NVC0_3D_CLEAR_BUFFERS 0x000019d0
+#define NVC0_3D_CLEAR_BUFFERS_Z 0x00000001
+#define NVC0_3D_CLEAR_BUFFERS_S 0x00000002
+#define NVC0_3D_CLEAR_BUFFERS_R 0x00000004
+#define NVC0_3D_CLEAR_BUFFERS_G 0x00000008
+#define NVC0_3D_CLEAR_BUFFERS_B 0x00000010
+#define NVC0_3D_CLEAR_BUFFERS_A 0x00000020
+#define NVC0_3D_CLEAR_BUFFERS_RT__MASK 0x000003c0
+#define NVC0_3D_CLEAR_BUFFERS_RT__SHIFT 6
+#define NVC0_3D_CLEAR_BUFFERS_LAYER__MASK 0x001ffc00
+#define NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT 10
+
+#define NVC0_3D_CLIPID_FILL 0x000019d4
+
+#define NVC0_3D_RT_COMP_ENABLE(i0) (0x000019e0 + 0x4*(i0))
+#define NVC0_3D_RT_COMP_ENABLE__ESIZE 0x00000004
+#define NVC0_3D_RT_COMP_ENABLE__LEN 0x00000008
+
+#define NVC0_3D_COLOR_MASK(i0) (0x00001a00 + 0x4*(i0))
+#define NVC0_3D_COLOR_MASK__ESIZE 0x00000004
+#define NVC0_3D_COLOR_MASK__LEN 0x00000008
+#define NVC0_3D_COLOR_MASK_R 0x0000000f
+#define NVC0_3D_COLOR_MASK_G 0x000000f0
+#define NVC0_3D_COLOR_MASK_B 0x00000f00
+#define NVC0_3D_COLOR_MASK_A 0x0000f000
+
+#define NVC0_3D_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NVC0_3D_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NVC0_3D_QUERY_SEQUENCE 0x00001b08
+
+#define NVC0_3D_QUERY_GET 0x00001b0c
+#define NVC0_3D_QUERY_GET_MODE__MASK 0x00000003
+#define NVC0_3D_QUERY_GET_MODE__SHIFT 0
+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK0 0x00000000
+#define NVC0_3D_QUERY_GET_MODE_SYNC 0x00000001
+#define NVC0_3D_QUERY_GET_MODE_WRITE_UNK2 0x00000002
+#define NVC0_3D_QUERY_GET_FENCE 0x00000010
+#define NVC0_3D_QUERY_GET_STREAM__MASK 0x000000e0
+#define NVC0_3D_QUERY_GET_STREAM__SHIFT 5
+#define NVC0_3D_QUERY_GET_UNK8 0x00000100
+#define NVC0_3D_QUERY_GET_UNIT__MASK 0x0000f000
+#define NVC0_3D_QUERY_GET_UNIT__SHIFT 12
+#define NVC0_3D_QUERY_GET_SYNC_COND__MASK 0x00010000
+#define NVC0_3D_QUERY_GET_SYNC_COND__SHIFT 16
+#define NVC0_3D_QUERY_GET_SYNC_COND_NEQUAL 0x00000000
+#define NVC0_3D_QUERY_GET_SYNC_COND_GREATER 0x00010000
+#define NVC0_3D_QUERY_GET_INTR 0x00100000
+#define NVC0_3D_QUERY_GET_UNK21 0x00200000
+#define NVC0_3D_QUERY_GET_SELECT__MASK 0x0f800000
+#define NVC0_3D_QUERY_GET_SELECT__SHIFT 23
+#define NVC0_3D_QUERY_GET_SELECT_ZERO 0x00000000
+#define NVC0_3D_QUERY_GET_SELECT_SAMPLECNT 0x01000000
+#define NVC0_3D_QUERY_GET_SELECT_EMITTED_PRIMS 0x05800000
+#define NVC0_3D_QUERY_GET_SELECT_GENERATED_PRIMS 0x09000000
+#define NVC0_3D_QUERY_GET_SHORT 0x10000000
+
+#define NVC0_3D_VERTEX_ARRAY_FETCH(i0) (0x00001c00 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_FETCH__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_FETCH__LEN 0x00000020
+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__MASK 0x00000fff
+#define NVC0_3D_VERTEX_ARRAY_FETCH_STRIDE__SHIFT 0
+#define NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE 0x00001000
+
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH(i0) (0x00001c04 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_START_HIGH__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_START_LOW(i0) (0x00001c08 + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_START_LOW__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_START_LOW__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR(i0) (0x00001c0c + 0x10*(i0))
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__ESIZE 0x00000010
+#define NVC0_3D_VERTEX_ARRAY_DIVISOR__LEN 0x00000020
+
+#define NVC0_3D_IBLEND(i0) (0x00001e00 + 0x20*(i0))
+#define NVC0_3D_IBLEND__ESIZE 0x00000020
+#define NVC0_3D_IBLEND__LEN 0x00000008
+
+#define NVC0_3D_IBLEND_EQUATION_RGB(i0) (0x00001e04 + 0x20*(i0))
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_ADD 0x00008006
+#define NVC0_3D_IBLEND_EQUATION_RGB_MIN 0x00008007
+#define NVC0_3D_IBLEND_EQUATION_RGB_MAX 0x00008008
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_IBLEND_EQUATION_RGB_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_IBLEND_FUNC_SRC_RGB(i0) (0x00001e08 + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_FUNC_DST_RGB(i0) (0x00001e0c + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_EQUATION_ALPHA(i0) (0x00001e10 + 0x20*(i0))
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_ADD 0x00008006
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MIN 0x00008007
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_MAX 0x00008008
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_SUBTRACT 0x0000800a
+#define NVC0_3D_IBLEND_EQUATION_ALPHA_FUNC_REVERSE_SUBTRACT 0x0000800b
+
+#define NVC0_3D_IBLEND_FUNC_SRC_ALPHA(i0) (0x00001e14 + 0x20*(i0))
+
+#define NVC0_3D_IBLEND_FUNC_DST_ALPHA(i0) (0x00001e18 + 0x20*(i0))
+
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH(i0) (0x00001f00 + 0x8*(i0))
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__ESIZE 0x00000008
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_HIGH__LEN 0x00000020
+
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW(i0) (0x00001f04 + 0x8*(i0))
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__ESIZE 0x00000008
+#define NVC0_3D_VERTEX_ARRAY_LIMIT_LOW__LEN 0x00000020
+
+#define NVC0_3D_SP(i0) (0x00002000 + 0x40*(i0))
+#define NVC0_3D_SP__ESIZE 0x00000040
+#define NVC0_3D_SP__LEN 0x00000006
+
+#define NVC0_3D_SP_SELECT(i0) (0x00002000 + 0x40*(i0))
+#define NVC0_3D_SP_SELECT_ENABLE 0x00000001
+#define NVC0_3D_SP_SELECT_PROGRAM__MASK 0x00000070
+#define NVC0_3D_SP_SELECT_PROGRAM__SHIFT 4
+#define NVC0_3D_SP_SELECT_PROGRAM_VP_A 0x00000000
+#define NVC0_3D_SP_SELECT_PROGRAM_VP_B 0x00000010
+#define NVC0_3D_SP_SELECT_PROGRAM_TCP 0x00000020
+#define NVC0_3D_SP_SELECT_PROGRAM_TEP 0x00000030
+#define NVC0_3D_SP_SELECT_PROGRAM_GP 0x00000040
+#define NVC0_3D_SP_SELECT_PROGRAM_FP 0x00000050
+
+#define NVC0_3D_SP_START_ID(i0) (0x00002004 + 0x40*(i0))
+
+#define NVC0_3D_SP_GPR_ALLOC(i0) (0x0000200c + 0x40*(i0))
+
+#define NVC0_3D_TEX_LIMITS(i0) (0x00002200 + 0x10*(i0))
+#define NVC0_3D_TEX_LIMITS__ESIZE 0x00000010
+#define NVC0_3D_TEX_LIMITS__LEN 0x00000005
+
+#define NVC0_3D_FIRMWARE(i0) (0x00002300 + 0x4*(i0))
+#define NVC0_3D_FIRMWARE__ESIZE 0x00000004
+#define NVC0_3D_FIRMWARE__LEN 0x00000020
+
+#define NVC0_3D_CB_SIZE 0x00002380
+
+#define NVC0_3D_CB_ADDRESS_HIGH 0x00002384
+
+#define NVC0_3D_CB_ADDRESS_LOW 0x00002388
+
+#define NVC0_3D_CB_POS 0x0000238c
+
+#define NVC0_3D_CB_DATA(i0) (0x00002390 + 0x4*(i0))
+#define NVC0_3D_CB_DATA__ESIZE 0x00000004
+#define NVC0_3D_CB_DATA__LEN 0x00000010
+
+#define NVC0_3D_BIND_TSC(i0) (0x00002400 + 0x20*(i0))
+#define NVC0_3D_BIND_TSC__ESIZE 0x00000020
+#define NVC0_3D_BIND_TSC__LEN 0x00000005
+#define NVC0_3D_BIND_TSC_ACTIVE 0x00000001
+#define NVC0_3D_BIND_TSC_SAMPLER__MASK 0x00000ff0
+#define NVC0_3D_BIND_TSC_SAMPLER__SHIFT 4
+#define NVC0_3D_BIND_TSC_TSC__MASK 0x01fff000
+#define NVC0_3D_BIND_TSC_TSC__SHIFT 12
+
+#define NVC0_3D_BIND_TIC(i0) (0x00002404 + 0x20*(i0))
+#define NVC0_3D_BIND_TIC__ESIZE 0x00000020
+#define NVC0_3D_BIND_TIC__LEN 0x00000005
+#define NVC0_3D_BIND_TIC_ACTIVE 0x00000001
+#define NVC0_3D_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NVC0_3D_BIND_TIC_TEXTURE__SHIFT 1
+#define NVC0_3D_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NVC0_3D_BIND_TIC_TIC__SHIFT 9
+
+#define NVC0_3D_CB_BIND(i0) (0x00002410 + 0x20*(i0))
+#define NVC0_3D_CB_BIND__ESIZE 0x00000020
+#define NVC0_3D_CB_BIND__LEN 0x00000005
+#define NVC0_3D_CB_BIND_VALID 0x00000001
+#define NVC0_3D_CB_BIND_INDEX__MASK 0x000000f0
+#define NVC0_3D_CB_BIND_INDEX__SHIFT 4
+
+#define NVC0_3D_VERT_COLOR_CLAMP_EN 0x00002600
+
+#define NVE4_3D_TEX_CB_INDEX 0x00002608
+#define NVE4_3D_TEX_CB_INDEX__MIN 0x00000000
+#define NVE4_3D_TEX_CB_INDEX__MAX 0x00000010
+
+#define NVC0_3D_TFB_VARYING_LOCS(i0, i1) (0x00002800 + 0x80*(i0) + 0x4*(i1))
+#define NVC0_3D_TFB_VARYING_LOCS__ESIZE 0x00000004
+#define NVC0_3D_TFB_VARYING_LOCS__LEN 0x00000020
+
+#define NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE 0x00003800
+
+#define NVC0_3D_MACRO_VERTEX_ARRAY_SELECT 0x00003808
+
+#define NVC0_3D_MACRO_BLEND_ENABLES 0x00003810
+
+#define NVC0_3D_MACRO_POLYGON_MODE_FRONT 0x00003818
+#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_POINT 0x00001b00
+#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_LINE 0x00001b01
+#define NVC0_3D_MACRO_POLYGON_MODE_FRONT_FILL 0x00001b02
+
+#define NVC0_3D_MACRO_POLYGON_MODE_BACK 0x00003820
+#define NVC0_3D_MACRO_POLYGON_MODE_BACK_POINT 0x00001b00
+#define NVC0_3D_MACRO_POLYGON_MODE_BACK_LINE 0x00001b01
+#define NVC0_3D_MACRO_POLYGON_MODE_BACK_FILL 0x00001b02
+
+#define NVC0_3D_MACRO_GP_SELECT 0x00003828
+
+#define NVC0_3D_MACRO_TEP_SELECT 0x00003830
+
+
+#endif /* NVC0_3D_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h
new file mode 100644
index 0000000..84b1522
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_3ddefs.xml.h
@@ -0,0 +1,98 @@
+#ifndef NV_3DDEFS_XML
+#define NV_3DDEFS_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_3d.xml ( 26312 bytes, from 2010-10-08 10:10:01)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+- nv_3ddefs.xml ( 16397 bytes, from 2010-10-08 13:30:38)
+- nv_object.xml ( 11249 bytes, from 2010-10-07 15:31:28)
+- nvchipsets.xml ( 2824 bytes, from 2010-07-07 13:41:20)
+- nv50_defs.xml ( 4482 bytes, from 2010-10-03 13:18:37)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+#define NV50_3D_BLEND_FACTOR_ZERO 0x00004000
+#define NV50_3D_BLEND_FACTOR_ONE 0x00004001
+#define NV50_3D_BLEND_FACTOR_SRC_COLOR 0x00004300
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_COLOR 0x00004301
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA 0x00004302
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA 0x00004303
+#define NV50_3D_BLEND_FACTOR_DST_ALPHA 0x00004304
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_ALPHA 0x00004305
+#define NV50_3D_BLEND_FACTOR_DST_COLOR 0x00004306
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_DST_COLOR 0x00004307
+#define NV50_3D_BLEND_FACTOR_SRC_ALPHA_SATURATE 0x00004308
+#define NV50_3D_BLEND_FACTOR_CONSTANT_COLOR 0x0000c001
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR 0x0000c002
+#define NV50_3D_BLEND_FACTOR_CONSTANT_ALPHA 0x0000c003
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA 0x0000c004
+#define NV50_3D_BLEND_FACTOR_SRC1_COLOR 0x0000c900
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR 0x0000c901
+#define NV50_3D_BLEND_FACTOR_SRC1_ALPHA 0x0000c902
+#define NV50_3D_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA 0x0000c903
+
+#endif /* NV_3DDEFS_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
new file mode 100644
index 0000000..b49f1ae
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.c
@@ -0,0 +1,271 @@
+/*
+ * Copyright 2013 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christoph Bumiller, Samuel Pitoiset
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_compute.h"
+
+int
+nvc0_screen_compute_setup(struct nvc0_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_object *chan = screen->base.channel;
+ struct nouveau_device *dev = screen->base.device;
+ uint32_t obj_class;
+ int ret;
+ int i;
+
+ switch (dev->chipset & 0xf0) {
+ case 0xc0:
+ if (dev->chipset == 0xc8)
+ obj_class = NVC8_COMPUTE_CLASS;
+ else
+ obj_class = NVC0_COMPUTE_CLASS;
+ break;
+ case 0xd0:
+ obj_class = NVC0_COMPUTE_CLASS;
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ return -1;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef90c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
+ return ret;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, 1 << 12, NULL,
+ &screen->parm);
+ if (ret)
+ return ret;
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->oclass);
+
+ /* hardware limit */
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_LIMIT), 1);
+ PUSH_DATA (push, screen->mp_count);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CALL_LIMIT_LOG), 1);
+ PUSH_DATA (push, 0xf);
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02a0), 1);
+ PUSH_DATA (push, 0x8000);
+
+ /* global memory setup */
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NIC0(push, NVC0_COMPUTE(GLOBAL_BASE), 0x100);
+ for (i = 0; i <= 0xff; i++)
+ PUSH_DATA (push, (0xc << 28) | (i << 16) | i);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+ PUSH_DATA (push, 1);
+
+ /* local memory and cstack setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(TEMP_SIZE_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->size);
+ PUSH_DATA (push, screen->tls->size);
+ BEGIN_NVC0(push, NVC0_COMPUTE(WARP_TEMP_ALLOC), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_BASE), 1);
+ PUSH_DATA (push, 1 << 24);
+
+ /* shared memory setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CACHE_SPLIT), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1);
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_BASE), 1);
+ PUSH_DATA (push, 2 << 24);
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 1);
+ PUSH_DATA (push, 0);
+
+ /* code segment setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+
+ /* bind parameters buffer */
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, screen->parm->size);
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+
+ /* TODO: textures & samplers */
+
+ return 0;
+}
+
+boolean
+nvc0_compute_validate_program(struct nvc0_context *nvc0)
+{
+ struct nvc0_program *prog = nvc0->compprog;
+
+ if (prog->mem)
+ return TRUE;
+
+ if (!prog->translated) {
+ prog->translated = nvc0_program_translate(
+ prog, nvc0->screen->base.device->chipset);
+ if (!prog->translated)
+ return FALSE;
+ }
+ if (unlikely(!prog->code_size))
+ return FALSE;
+
+ if (likely(prog->code_size)) {
+ if (nvc0_program_upload_code(nvc0, prog)) {
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CODE);
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
+static boolean
+nvc0_compute_state_validate(struct nvc0_context *nvc0)
+{
+ if (!nvc0_compute_validate_program(nvc0))
+ return FALSE;
+
+ /* TODO: textures, samplers, surfaces, global memory buffers */
+
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
+ return FALSE;
+ if (unlikely(nvc0->state.flushed))
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+
+ return TRUE;
+
+}
+
+static void
+nvc0_compute_upload_input(struct nvc0_context *nvc0, const void *input)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nvc0_program *cp = nvc0->compprog;
+
+ if (cp->parm_size) {
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_SIZE), 3);
+ PUSH_DATA (push, align(cp->parm_size, 0x100));
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CB_BIND), 1);
+ PUSH_DATA (push, (0 << 8) | 1);
+ /* NOTE: size is limited to 4 KiB, which is < NV04_PFIFO_MAX_PACKET_LEN */
+ BEGIN_1IC0(push, NVC0_COMPUTE(CB_POS), 1 + cp->parm_size / 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATAp(push, input, cp->parm_size / 4);
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_CB);
+ }
+}
+
+void
+nvc0_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label,
+ const void *input)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *cp = nvc0->compprog;
+ unsigned s, i;
+ int ret;
+
+ ret = !nvc0_compute_state_validate(nvc0);
+ if (ret)
+ goto out;
+
+ nvc0_compute_upload_input(nvc0, input);
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(CP_START_ID), 1);
+ PUSH_DATA (push, nvc0_program_symbol_offset(cp, label));
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(LOCAL_POS_ALLOC), 3);
+ PUSH_DATA (push, align(cp->cp.lmem_size, 0x10));
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0x800); /* WARP_CSTACK_SIZE */
+
+ BEGIN_NVC0(push, NVC0_COMPUTE(SHARED_SIZE), 3);
+ PUSH_DATA (push, align(cp->cp.smem_size, 0x100));
+ PUSH_DATA (push, block_layout[0] * block_layout[1] * block_layout[2]);
+ PUSH_DATA (push, cp->num_barriers);
+ BEGIN_NVC0(push, NVC0_COMPUTE(CP_GPR_ALLOC), 1);
+ PUSH_DATA (push, cp->num_gprs);
+
+ /* grid/block setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDDIM_YX), 2);
+ PUSH_DATA (push, (grid_layout[1] << 16) | grid_layout[0]);
+ PUSH_DATA (push, grid_layout[2]);
+ BEGIN_NVC0(push, NVC0_COMPUTE(BLOCKDIM_YX), 2);
+ PUSH_DATA (push, (block_layout[1] << 16) | block_layout[0]);
+ PUSH_DATA (push, block_layout[2]);
+
+ /* launch preliminary setup */
+ BEGIN_NVC0(push, NVC0_COMPUTE(GRIDID), 1);
+ PUSH_DATA (push, 0x1);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x036c), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVC0_COMPUTE_FLUSH_GLOBAL | NVC0_COMPUTE_FLUSH_UNK8);
+
+ /* kernel launching */
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_BEGIN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0a08), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0x1000);
+ BEGIN_NVC0(push, NVC0_COMPUTE(COMPUTE_END), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0360), 1);
+ PUSH_DATA (push, 0x1);
+
+ /* rebind all the 3D constant buffers
+ * (looks like binding a CB on COMPUTE clobbers 3D state) */
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ for (s = 0; s < 6; s++) {
+ for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; i++)
+ if (nvc0->constbuf[s][i].u.buf)
+ nvc0->constbuf_dirty[s] |= 1 << i;
+ }
+ memset(nvc0->state.uniform_buffer_bound, 0,
+ sizeof(nvc0->state.uniform_buffer_bound));
+
+out:
+ if (ret)
+ NOUVEAU_ERR("Failed to launch grid !\n");
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
new file mode 100644
index 0000000..9a1a717
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.h
@@ -0,0 +1,10 @@
+#ifndef NVC0_COMPUTE_H
+#define NVC0_COMPUTE_H
+
+#include "nv50/nv50_defs.xml.h"
+#include "nvc0/nvc0_compute.xml.h"
+
+boolean
+nvc0_compute_validate_program(struct nvc0_context *nvc0);
+
+#endif /* NVC0_COMPUTE_H */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h
new file mode 100644
index 0000000..35e6bfd
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_compute.xml.h
@@ -0,0 +1,410 @@
+#ifndef NVC0_COMPUTE_XML
+#define NVC0_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_compute.xml ( 11145 bytes, from 2013-04-27 14:00:13)
+- copyright.xml ( 6452 bytes, from 2013-02-27 22:13:22)
+- nvchipsets.xml ( 3954 bytes, from 2013-04-27 14:00:13)
+- nv_object.xml ( 14395 bytes, from 2013-04-27 14:00:13)
+- nv_defs.xml ( 4437 bytes, from 2013-02-27 22:13:22)
+- nv50_defs.xml ( 16652 bytes, from 2013-06-20 13:45:33)
+
+Copyright (C) 2006-2013 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_COMPUTE_LOCAL_POS_ALLOC 0x00000204
+
+#define NVC0_COMPUTE_LOCAL_NEG_ALLOC 0x00000208
+
+#define NVC0_COMPUTE_WARP_CSTACK_SIZE 0x0000020c
+
+#define NVC0_COMPUTE_TEX_LIMITS 0x00000210
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MASK 0x0000000f
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__SHIFT 0
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MIN 0x00000000
+#define NVC0_COMPUTE_TEX_LIMITS_SAMPLERS_LOG2__MAX 0x00000004
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MASK 0x000000f0
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__SHIFT 4
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MIN 0x00000000
+#define NVC0_COMPUTE_TEX_LIMITS_TEXTURES_LOG2__MAX 0x00000007
+
+#define NVC0_COMPUTE_SHARED_BASE 0x00000214
+
+#define NVC0_COMPUTE_MEM_BARRIER 0x0000021c
+#define NVC0_COMPUTE_MEM_BARRIER_UNK0 0x00000001
+#define NVC0_COMPUTE_MEM_BARRIER_UNK1 0x00000002
+#define NVC0_COMPUTE_MEM_BARRIER_UNK2 0x00000004
+#define NVC0_COMPUTE_MEM_BARRIER_UNK4 0x00000010
+#define NVC0_COMPUTE_MEM_BARRIER_UNK8 0x00000100
+#define NVC0_COMPUTE_MEM_BARRIER_UNK12 0x00001000
+
+#define NVC0_COMPUTE_BIND_TSC 0x00000228
+#define NVC0_COMPUTE_BIND_TSC_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TSC_SAMPLER__MASK 0x00000ff0
+#define NVC0_COMPUTE_BIND_TSC_SAMPLER__SHIFT 4
+#define NVC0_COMPUTE_BIND_TSC_TSC__MASK 0x01fff000
+#define NVC0_COMPUTE_BIND_TSC_TSC__SHIFT 12
+
+#define NVC0_COMPUTE_BIND_TIC 0x0000022c
+#define NVC0_COMPUTE_BIND_TIC_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TIC_TEXTURE__MASK 0x000001fe
+#define NVC0_COMPUTE_BIND_TIC_TEXTURE__SHIFT 1
+#define NVC0_COMPUTE_BIND_TIC_TIC__MASK 0x7ffffe00
+#define NVC0_COMPUTE_BIND_TIC_TIC__SHIFT 9
+
+#define NVC0_COMPUTE_BIND_TSC2 0x00000230
+#define NVC0_COMPUTE_BIND_TSC2_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__MASK 0x00000010
+#define NVC0_COMPUTE_BIND_TSC2_SAMPLER__SHIFT 4
+#define NVC0_COMPUTE_BIND_TSC2_TSC__MASK 0x01fff000
+#define NVC0_COMPUTE_BIND_TSC2_TSC__SHIFT 12
+
+#define NVC0_COMPUTE_BIND_TIC2 0x00000234
+#define NVC0_COMPUTE_BIND_TIC2_ACTIVE 0x00000001
+#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__MASK 0x00000002
+#define NVC0_COMPUTE_BIND_TIC2_TEXTURE__SHIFT 1
+#define NVC0_COMPUTE_BIND_TIC2_TIC__MASK 0x7ffffe00
+#define NVC0_COMPUTE_BIND_TIC2_TIC__SHIFT 9
+
+#define NVC0_COMPUTE_GRIDDIM_YX 0x00000238
+#define NVC0_COMPUTE_GRIDDIM_YX_X__MASK 0x0000ffff
+#define NVC0_COMPUTE_GRIDDIM_YX_X__SHIFT 0
+#define NVC0_COMPUTE_GRIDDIM_YX_Y__MASK 0xffff0000
+#define NVC0_COMPUTE_GRIDDIM_YX_Y__SHIFT 16
+
+#define NVC0_COMPUTE_GRIDDIM_Z 0x0000023c
+
+#define NVC0_COMPUTE_UNK244_TIC_FLUSH 0x00000244
+
+#define NVC0_COMPUTE_SHARED_SIZE 0x0000024c
+
+#define NVC0_COMPUTE_THREADS_ALLOC 0x00000250
+
+#define NVC0_COMPUTE_BARRIER_ALLOC 0x00000254
+
+#define NVC0_COMPUTE_UNK028C 0x0000028c
+
+#define NVC0_COMPUTE_COMPUTE_BEGIN 0x0000029c
+#define NVC0_COMPUTE_COMPUTE_BEGIN_UNK0 0x00000001
+
+#define NVC0_COMPUTE_UNK02A0 0x000002a0
+
+#define NVC0_COMPUTE_CP_GPR_ALLOC 0x000002c0
+
+#define NVC0_COMPUTE_UNK02C4 0x000002c4
+
+#define NVC0_COMPUTE_GLOBAL_BASE 0x000002c8
+#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__MASK 0x000000ff
+#define NVC0_COMPUTE_GLOBAL_BASE_HIGH__SHIFT 0
+#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__MASK 0x00ff0000
+#define NVC0_COMPUTE_GLOBAL_BASE_INDEX__SHIFT 16
+#define NVC0_COMPUTE_GLOBAL_BASE_READ_OK 0x40000000
+#define NVC0_COMPUTE_GLOBAL_BASE_WRITE_OK 0x80000000
+
+#define NVC8_COMPUTE_UNK02E0 0x000002e0
+
+#define NVC0_COMPUTE_CACHE_SPLIT 0x00000308
+#define NVC0_COMPUTE_CACHE_SPLIT_16K_SHARED_48K_L1 0x00000001
+#define NVC0_COMPUTE_CACHE_SPLIT_48K_SHARED_16K_L1 0x00000003
+
+#define NVC0_COMPUTE_UNK030C 0x0000030c
+
+#define NVC0_COMPUTE_UNK0360 0x00000360
+#define NVC0_COMPUTE_UNK0360_UNK0 0x00000001
+#define NVC0_COMPUTE_UNK0360_UNK8__MASK 0x00000300
+#define NVC0_COMPUTE_UNK0360_UNK8__SHIFT 8
+#define NVC8_COMPUTE_UNK0360_UNK10__MASK 0x00000c00
+#define NVC8_COMPUTE_UNK0360_UNK10__SHIFT 10
+
+#define NVC0_COMPUTE_LAUNCH 0x00000368
+
+#define NVC0_COMPUTE_UNK036C 0x0000036c
+#define NVC0_COMPUTE_UNK036C_UNK0__MASK 0x00000003
+#define NVC0_COMPUTE_UNK036C_UNK0__SHIFT 0
+#define NVC8_COMPUTE_UNK036C_UNK2__MASK 0x0000000c
+#define NVC8_COMPUTE_UNK036C_UNK2__SHIFT 2
+
+#define NVC0_COMPUTE_BLOCKDIM_YX 0x000003ac
+#define NVC0_COMPUTE_BLOCKDIM_YX_X__MASK 0x0000ffff
+#define NVC0_COMPUTE_BLOCKDIM_YX_X__SHIFT 0
+#define NVC0_COMPUTE_BLOCKDIM_YX_Y__MASK 0xffff0000
+#define NVC0_COMPUTE_BLOCKDIM_YX_Y__SHIFT 16
+
+#define NVC0_COMPUTE_BLOCKDIM_Z 0x000003b0
+
+#define NVC0_COMPUTE_CP_START_ID 0x000003b4
+
+#define NVC0_COMPUTE_FIRMWARE(i0) (0x00000500 + 0x4*(i0))
+#define NVC0_COMPUTE_FIRMWARE__ESIZE 0x00000004
+#define NVC0_COMPUTE_FIRMWARE__LEN 0x00000020
+
+#define NVC0_COMPUTE_MP_LIMIT 0x00000758
+
+#define NVC0_COMPUTE_LOCAL_BASE 0x0000077c
+
+#define NVC0_COMPUTE_GRIDID 0x00000780
+
+#define NVC0_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790
+
+#define NVC0_COMPUTE_TEMP_ADDRESS_LOW 0x00000794
+
+#define NVC0_COMPUTE_TEMP_SIZE_HIGH 0x00000798
+
+#define NVC0_COMPUTE_TEMP_SIZE_LOW 0x0000079c
+
+#define NVC0_COMPUTE_WARP_TEMP_ALLOC 0x000007a0
+
+#define NVC0_COMPUTE_COMPUTE_END 0x00000a04
+#define NVC0_COMPUTE_COMPUTE_END_UNK0 0x00000001
+
+#define NVC0_COMPUTE_UNK0A08 0x00000a08
+
+#define NVC0_COMPUTE_CALL_LIMIT_LOG 0x00000d64
+
+#define NVC0_COMPUTE_UNK0D94 0x00000d94
+
+#define NVC0_COMPUTE_WATCHDOG_TIMER 0x00000de4
+
+#define NVC0_COMPUTE_UNK10F4 0x000010f4
+#define NVC0_COMPUTE_UNK10F4_UNK0 0x00000001
+#define NVC0_COMPUTE_UNK10F4_UNK4 0x00000010
+#define NVC0_COMPUTE_UNK10F4_UNK8 0x00000100
+
+#define NVC0_COMPUTE_LINKED_TSC 0x00001234
+
+#define NVC0_COMPUTE_UNK1288_TIC_FLUSH 0x00001288
+
+#define NVC0_COMPUTE_UNK12AC 0x000012ac
+
+#define NVC0_COMPUTE_TSC_FLUSH 0x00001330
+#define NVC0_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_COMPUTE_TIC_FLUSH 0x00001334
+#define NVC0_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVC0_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NVC0_COMPUTE_TEX_CACHE_CTL 0x00001338
+#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__MASK 0x00000007
+#define NVC0_COMPUTE_TEX_CACHE_CTL_UNK0__SHIFT 0
+#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0
+#define NVC0_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4
+
+#define NVC0_COMPUTE_UNK1354 0x00001354
+
+#define NVC0_COMPUTE_UNK1424_TSC_FLUSH 0x00001424
+
+#define NVC0_COMPUTE_COND_ADDRESS_HIGH 0x00001550
+
+#define NVC0_COMPUTE_COND_ADDRESS_LOW 0x00001554
+
+#define NVC0_COMPUTE_COND_MODE 0x00001558
+#define NVC0_COMPUTE_COND_MODE_NEVER 0x00000000
+#define NVC0_COMPUTE_COND_MODE_ALWAYS 0x00000001
+#define NVC0_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVC0_COMPUTE_COND_MODE_EQUAL 0x00000003
+#define NVC0_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVC0_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NVC0_COMPUTE_TSC_ADDRESS_LOW 0x00001560
+
+#define NVC0_COMPUTE_TSC_LIMIT 0x00001564
+
+#define NVC0_COMPUTE_TIC_ADDRESS_HIGH 0x00001574
+
+#define NVC0_COMPUTE_TIC_ADDRESS_LOW 0x00001578
+
+#define NVC0_COMPUTE_TIC_LIMIT 0x0000157c
+
+#define NVC0_COMPUTE_CODE_ADDRESS_HIGH 0x00001608
+
+#define NVC0_COMPUTE_CODE_ADDRESS_LOW 0x0000160c
+
+#define NVC0_COMPUTE_TEX_MISC 0x00001664
+#define NVC0_COMPUTE_TEX_MISC_UNK 0x00000001
+#define NVC0_COMPUTE_TEX_MISC_SEAMLESS_CUBE_MAP 0x00000002
+
+#define NVC0_COMPUTE_UNK1690 0x00001690
+#define NVC0_COMPUTE_UNK1690_ALWAYS_DERIV 0x00000001
+#define NVC0_COMPUTE_UNK1690_UNK16 0x00010000
+
+#define NVC0_COMPUTE_CB_BIND 0x00001694
+#define NVC0_COMPUTE_CB_BIND_VALID 0x00000001
+#define NVC0_COMPUTE_CB_BIND_INDEX__MASK 0x00001f00
+#define NVC0_COMPUTE_CB_BIND_INDEX__SHIFT 8
+
+#define NVC0_COMPUTE_FLUSH 0x00001698
+#define NVC0_COMPUTE_FLUSH_CODE 0x00000001
+#define NVC0_COMPUTE_FLUSH_GLOBAL 0x00000010
+#define NVC0_COMPUTE_FLUSH_UNK8 0x00000100
+#define NVC0_COMPUTE_FLUSH_CB 0x00001000
+
+#define NVC0_COMPUTE_UNK1930 0x00001930
+
+#define NVC0_COMPUTE_UNK1944 0x00001944
+
+#define NVC0_COMPUTE_DELAY 0x00001a24
+
+#define NVC0_COMPUTE_UNK1A2C(i0) (0x00001a2c + 0x4*(i0))
+#define NVC0_COMPUTE_UNK1A2C__ESIZE 0x00000004
+#define NVC0_COMPUTE_UNK1A2C__LEN 0x00000005
+
+#define NVC0_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NVC0_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NVC0_COMPUTE_QUERY_SEQUENCE 0x00001b08
+
+#define NVC0_COMPUTE_QUERY_GET 0x00001b0c
+#define NVC0_COMPUTE_QUERY_GET_MODE__MASK 0x00000003
+#define NVC0_COMPUTE_QUERY_GET_MODE__SHIFT 0
+#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000
+#define NVC0_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003
+#define NVC0_COMPUTE_QUERY_GET_INTR 0x00100000
+#define NVC0_COMPUTE_QUERY_GET_SHORT 0x10000000
+
+#define NVC0_COMPUTE_CB_SIZE 0x00002380
+
+#define NVC0_COMPUTE_CB_ADDRESS_HIGH 0x00002384
+
+#define NVC0_COMPUTE_CB_ADDRESS_LOW 0x00002388
+
+#define NVC0_COMPUTE_CB_POS 0x0000238c
+
+#define NVC0_COMPUTE_CB_DATA(i0) (0x00002390 + 0x4*(i0))
+#define NVC0_COMPUTE_CB_DATA__ESIZE 0x00000004
+#define NVC0_COMPUTE_CB_DATA__LEN 0x00000010
+
+#define NVC0_COMPUTE_IMAGE(i0) (0x00002700 + 0x20*(i0))
+#define NVC0_COMPUTE_IMAGE__ESIZE 0x00000020
+#define NVC0_COMPUTE_IMAGE__LEN 0x00000008
+
+#define NVC0_COMPUTE_IMAGE_ADDRESS_HIGH(i0) (0x00002700 + 0x20*(i0))
+
+#define NVC0_COMPUTE_IMAGE_ADDRESS_LOW(i0) (0x00002704 + 0x20*(i0))
+
+#define NVC0_COMPUTE_IMAGE_WIDTH(i0) (0x00002708 + 0x20*(i0))
+
+#define NVC0_COMPUTE_IMAGE_HEIGHT(i0) (0x0000270c + 0x20*(i0))
+#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__MASK 0x0000ffff
+#define NVC0_COMPUTE_IMAGE_HEIGHT_HEIGHT__SHIFT 0
+#define NVC0_COMPUTE_IMAGE_HEIGHT_UNK16 0x00010000
+#define NVC0_COMPUTE_IMAGE_HEIGHT_LINEAR 0x00100000
+
+#define NVC0_COMPUTE_IMAGE_FORMAT(i0) (0x00002710 + 0x20*(i0))
+#define NVC0_COMPUTE_IMAGE_FORMAT_UNK0 0x00000001
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__MASK 0x00000ff0
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_COLOR__SHIFT 4
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__MASK 0x0001f000
+#define NVC0_COMPUTE_IMAGE_FORMAT_FORMAT_ZETA__SHIFT 12
+
+#define NVC0_COMPUTE_IMAGE_TILE_MODE(i0) (0x00002714 + 0x20*(i0))
+
+#define NVC0_COMPUTE_MP_PM_SET(i0) (0x0000335c + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_SET__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_SET__LEN 0x00000008
+
+#define NVC0_COMPUTE_MP_PM_SIGSEL(i0) (0x0000337c + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_SIGSEL__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_SIGSEL__LEN 0x00000008
+
+#define NVC0_COMPUTE_MP_PM_SRCSEL(i0) (0x0000339c + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_SRCSEL__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_SRCSEL__LEN 0x00000008
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__MASK 0x00000007
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP0__SHIFT 0
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__MASK 0x00000070
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG0__SHIFT 4
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__MASK 0x00000700
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP1__SHIFT 8
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__MASK 0x00007000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG1__SHIFT 12
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__MASK 0x00070000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP2__SHIFT 16
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__MASK 0x00700000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG2__SHIFT 20
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__MASK 0x07000000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_GRP3__SHIFT 24
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__MASK 0x70000000
+#define NVC0_COMPUTE_MP_PM_SRCSEL_SIG3__SHIFT 28
+
+#define NVC0_COMPUTE_MP_PM_OP(i0) (0x000033bc + 0x4*(i0))
+#define NVC0_COMPUTE_MP_PM_OP__ESIZE 0x00000004
+#define NVC0_COMPUTE_MP_PM_OP__LEN 0x00000008
+#define NVC0_COMPUTE_MP_PM_OP_MODE__MASK 0x00000001
+#define NVC0_COMPUTE_MP_PM_OP_MODE__SHIFT 0
+#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP 0x00000000
+#define NVC0_COMPUTE_MP_PM_OP_MODE_LOGOP_PULSE 0x00000001
+#define NVC0_COMPUTE_MP_PM_OP_FUNC__MASK 0x000ffff0
+#define NVC0_COMPUTE_MP_PM_OP_FUNC__SHIFT 4
+
+#define NVC0_COMPUTE_MP_PM_UNK33DC 0x000033dc
+
+
+#endif /* NVC0_COMPUTE_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.c b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
new file mode 100644
index 0000000..e0c2b74
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_framebuffer.h"
+
+#ifdef NVC0_WITH_DRAW_MODULE
+#include "draw/draw_context.h"
+#endif
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_screen.h"
+#include "nvc0/nvc0_resource.h"
+
+static void
+nvc0_flush(struct pipe_context *pipe,
+ struct pipe_fence_handle **fence,
+ unsigned flags)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_screen *screen = &nvc0->screen->base;
+
+ if (fence)
+ nouveau_fence_ref(screen->fence.current, (struct nouveau_fence **)fence);
+
+ PUSH_KICK(nvc0->base.pushbuf); /* fencing handled in kick_notify */
+
+ nouveau_context_update_frame_stats(&nvc0->base);
+}
+
+static void
+nvc0_texture_barrier(struct pipe_context *pipe)
+{
+ struct nouveau_pushbuf *push = nvc0_context(pipe)->base.pushbuf;
+
+ IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
+ IMMED_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 0);
+}
+
+static void
+nvc0_context_unreference_resources(struct nvc0_context *nvc0)
+{
+ unsigned s, i;
+
+ nouveau_bufctx_del(&nvc0->bufctx_3d);
+ nouveau_bufctx_del(&nvc0->bufctx);
+ nouveau_bufctx_del(&nvc0->bufctx_cp);
+
+ util_unreference_framebuffer_state(&nvc0->framebuffer);
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
+ pipe_resource_reference(&nvc0->vtxbuf[i].buffer, NULL);
+
+ pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
+
+ for (s = 0; s < 6; ++s) {
+ for (i = 0; i < nvc0->num_textures[s]; ++i)
+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+
+ for (i = 0; i < NVC0_MAX_PIPE_CONSTBUFS; ++i)
+ if (!nvc0->constbuf[s][i].user)
+ pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, NULL);
+ }
+
+ for (s = 0; s < 2; ++s) {
+ for (i = 0; i < NVC0_MAX_SURFACE_SLOTS; ++i)
+ pipe_surface_reference(&nvc0->surfaces[s][i], NULL);
+ }
+
+ for (i = 0; i < nvc0->num_tfbbufs; ++i)
+ pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
+
+ for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource **res = util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, i);
+ pipe_resource_reference(res, NULL);
+ }
+ util_dynarray_fini(&nvc0->global_residents);
+}
+
+static void
+nvc0_destroy(struct pipe_context *pipe)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ if (nvc0->screen->cur_ctx == nvc0) {
+ nvc0->base.pushbuf->kick_notify = NULL;
+ nvc0->screen->cur_ctx = NULL;
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, NULL);
+ }
+ nouveau_pushbuf_kick(nvc0->base.pushbuf, nvc0->base.pushbuf->channel);
+
+ nvc0_context_unreference_resources(nvc0);
+ nvc0_blitctx_destroy(nvc0);
+
+#ifdef NVC0_WITH_DRAW_MODULE
+ draw_destroy(nvc0->draw);
+#endif
+
+ nouveau_context_destroy(&nvc0->base);
+}
+
+void
+nvc0_default_kick_notify(struct nouveau_pushbuf *push)
+{
+ struct nvc0_screen *screen = push->user_priv;
+
+ if (screen) {
+ nouveau_fence_next(&screen->base);
+ nouveau_fence_update(&screen->base, TRUE);
+ if (screen->cur_ctx)
+ screen->cur_ctx->state.flushed = TRUE;
+ }
+ NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
+}
+
+static int
+nvc0_invalidate_resource_storage(struct nouveau_context *ctx,
+ struct pipe_resource *res,
+ int ref)
+{
+ struct nvc0_context *nvc0 = nvc0_context(&ctx->pipe);
+ unsigned s, i;
+
+ if (res->bind & PIPE_BIND_RENDER_TARGET) {
+ for (i = 0; i < nvc0->framebuffer.nr_cbufs; ++i) {
+ if (nvc0->framebuffer.cbufs[i] &&
+ nvc0->framebuffer.cbufs[i]->texture == res) {
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_DEPTH_STENCIL) {
+ if (nvc0->framebuffer.zsbuf &&
+ nvc0->framebuffer.zsbuf->texture == res) {
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_VERTEX_BUFFER) {
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ if (nvc0->vtxbuf[i].buffer == res) {
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ if (res->bind & PIPE_BIND_INDEX_BUFFER) {
+ if (nvc0->idxbuf.buffer == res) {
+ nvc0->dirty |= NVC0_NEW_IDXBUF;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX);
+ if (!--ref)
+ return ref;
+ }
+ }
+
+ if (res->bind & PIPE_BIND_SAMPLER_VIEW) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ if (nvc0->textures[s][i] &&
+ nvc0->textures[s][i]->texture == res) {
+ nvc0->textures_dirty[s] |= 1 << i;
+ nvc0->dirty |= NVC0_NEW_TEXTURES;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ if (res->bind & PIPE_BIND_CONSTANT_BUFFER) {
+ for (s = 0; s < 5; ++s) {
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ if (!nvc0->constbuf[s][i].user &&
+ nvc0->constbuf[s][i].u.buf == res) {
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ nvc0->constbuf_dirty[s] |= 1 << i;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+ if (!--ref)
+ return ref;
+ }
+ }
+ }
+ }
+
+ return ref;
+}
+
+static void
+nvc0_context_get_sample_position(struct pipe_context *, unsigned, unsigned,
+ float *);
+
+struct pipe_context *
+nvc0_create(struct pipe_screen *pscreen, void *priv)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ struct nvc0_context *nvc0;
+ struct pipe_context *pipe;
+ int ret;
+ uint32_t flags;
+
+ nvc0 = CALLOC_STRUCT(nvc0_context);
+ if (!nvc0)
+ return NULL;
+ pipe = &nvc0->base.pipe;
+
+ if (!nvc0_blitctx_create(nvc0))
+ goto out_err;
+
+ nvc0->base.pushbuf = screen->base.pushbuf;
+ nvc0->base.client = screen->base.client;
+
+ ret = nouveau_bufctx_new(screen->base.client, 2, &nvc0->bufctx);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_3D_COUNT,
+ &nvc0->bufctx_3d);
+ if (!ret)
+ ret = nouveau_bufctx_new(screen->base.client, NVC0_BIND_CP_COUNT,
+ &nvc0->bufctx_cp);
+ if (ret)
+ goto out_err;
+
+ nvc0->screen = screen;
+ nvc0->base.screen = &screen->base;
+
+ pipe->screen = pscreen;
+ pipe->priv = priv;
+
+ pipe->destroy = nvc0_destroy;
+
+ pipe->draw_vbo = nvc0_draw_vbo;
+ pipe->clear = nvc0_clear;
+ pipe->launch_grid = (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) ?
+ nve4_launch_grid : nvc0_launch_grid;
+
+ pipe->flush = nvc0_flush;
+ pipe->texture_barrier = nvc0_texture_barrier;
+ pipe->get_sample_position = nvc0_context_get_sample_position;
+
+ if (!screen->cur_ctx) {
+ screen->cur_ctx = nvc0;
+ nouveau_pushbuf_bufctx(screen->base.pushbuf, nvc0->bufctx);
+ }
+ screen->base.pushbuf->kick_notify = nvc0_default_kick_notify;
+
+ nvc0_init_query_functions(nvc0);
+ nvc0_init_surface_functions(nvc0);
+ nvc0_init_state_functions(nvc0);
+ nvc0_init_transfer_functions(nvc0);
+ nvc0_init_resource_functions(pipe);
+
+ nvc0->base.invalidate_resource_storage = nvc0_invalidate_resource_storage;
+
+#ifdef NVC0_WITH_DRAW_MODULE
+ /* no software fallbacks implemented */
+ nvc0->draw = draw_create(pipe);
+ assert(nvc0->draw);
+ draw_set_rasterize_stage(nvc0->draw, nvc0_draw_render_stage(nvc0));
+#endif
+
+ pipe->create_video_codec = nvc0_create_decoder;
+ pipe->create_video_buffer = nvc0_video_buffer_create;
+
+ /* shader builtin library is per-screen, but we need a context for m2mf */
+ nvc0_program_library_upload(nvc0);
+
+ /* add permanently resident buffers to bufctxts */
+
+ flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RD;
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->text);
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->uniform_bo);
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->txc);
+ if (screen->compute) {
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->text);
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->txc);
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->parm);
+ }
+
+ flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->poly_cache);
+ if (screen->compute)
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->tls);
+
+ flags = NOUVEAU_BO_GART | NOUVEAU_BO_WR;
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, SCREEN, flags, screen->fence.bo);
+ BCTX_REFN_bo(nvc0->bufctx, FENCE, flags, screen->fence.bo);
+ if (screen->compute)
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_SCREEN, flags, screen->fence.bo);
+
+ nvc0->base.scratch.bo_size = 2 << 20;
+
+ memset(nvc0->tex_handles, ~0, sizeof(nvc0->tex_handles));
+
+ util_dynarray_init(&nvc0->global_residents);
+
+ return pipe;
+
+out_err:
+ if (nvc0) {
+ if (nvc0->bufctx_3d)
+ nouveau_bufctx_del(&nvc0->bufctx_3d);
+ if (nvc0->bufctx_cp)
+ nouveau_bufctx_del(&nvc0->bufctx_cp);
+ if (nvc0->bufctx)
+ nouveau_bufctx_del(&nvc0->bufctx);
+ if (nvc0->blit)
+ FREE(nvc0->blit);
+ FREE(nvc0);
+ }
+ return NULL;
+}
+
+void
+nvc0_bufctx_fence(struct nvc0_context *nvc0, struct nouveau_bufctx *bufctx,
+ boolean on_flush)
+{
+ struct nouveau_list *list = on_flush ? &bufctx->current : &bufctx->pending;
+ struct nouveau_list *it;
+ NOUVEAU_DRV_STAT_IFD(unsigned count = 0);
+
+ for (it = list->next; it != list; it = it->next) {
+ struct nouveau_bufref *ref = (struct nouveau_bufref *)it;
+ struct nv04_resource *res = ref->priv;
+ if (res)
+ nvc0_resource_validate(res, (unsigned)ref->priv_data);
+ NOUVEAU_DRV_STAT_IFD(count++);
+ }
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, resource_validate_count, count);
+}
+
+static void
+nvc0_context_get_sample_position(struct pipe_context *pipe,
+ unsigned sample_count, unsigned sample_index,
+ float *xy)
+{
+ static const uint8_t ms1[1][2] = { { 0x8, 0x8 } };
+ static const uint8_t ms2[2][2] = {
+ { 0x4, 0x4 }, { 0xc, 0xc } }; /* surface coords (0,0), (1,0) */
+ static const uint8_t ms4[4][2] = {
+ { 0x6, 0x2 }, { 0xe, 0x6 }, /* (0,0), (1,0) */
+ { 0x2, 0xa }, { 0xa, 0xe } }; /* (0,1), (1,1) */
+ static const uint8_t ms8[8][2] = {
+ { 0x1, 0x7 }, { 0x5, 0x3 }, /* (0,0), (1,0) */
+ { 0x3, 0xd }, { 0x7, 0xb }, /* (0,1), (1,1) */
+ { 0x9, 0x5 }, { 0xf, 0x1 }, /* (2,0), (3,0) */
+ { 0xb, 0xf }, { 0xd, 0x9 } }; /* (2,1), (3,1) */
+#if 0
+ /* NOTE: there are alternative modes for MS2 and MS8, currently not used */
+ static const uint8_t ms8_alt[8][2] = {
+ { 0x9, 0x5 }, { 0x7, 0xb }, /* (2,0), (1,1) */
+ { 0xd, 0x9 }, { 0x5, 0x3 }, /* (3,1), (1,0) */
+ { 0x3, 0xd }, { 0x1, 0x7 }, /* (0,1), (0,0) */
+ { 0xb, 0xf }, { 0xf, 0x1 } }; /* (2,1), (3,0) */
+#endif
+
+ const uint8_t (*ptr)[2];
+
+ switch (sample_count) {
+ case 0:
+ case 1: ptr = ms1; break;
+ case 2: ptr = ms2; break;
+ case 4: ptr = ms4; break;
+ case 8: ptr = ms8; break;
+ default:
+ assert(0);
+ return; /* bad sample count -> undefined locations */
+ }
+ xy[0] = ptr[sample_index][0] * 0.0625f;
+ xy[1] = ptr[sample_index][1] * 0.0625f;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_context.h b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
new file mode 100644
index 0000000..3fbecdc
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_context.h
@@ -0,0 +1,357 @@
+#ifndef __NVC0_CONTEXT_H__
+#define __NVC0_CONTEXT_H__
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "util/u_inlines.h"
+#include "util/u_dynarray.h"
+
+#ifdef NVC0_WITH_DRAW_MODULE
+#include "draw/draw_vertex.h"
+#endif
+
+#include "nv50/nv50_debug.h"
+#include "nvc0/nvc0_winsys.h"
+#include "nvc0/nvc0_stateobj.h"
+#include "nvc0/nvc0_screen.h"
+#include "nvc0/nvc0_program.h"
+#include "nvc0/nvc0_resource.h"
+
+#include "nv50/nv50_transfer.h"
+
+#include "nouveau_context.h"
+
+#include "nvc0/nvc0_3ddefs.xml.h"
+#include "nvc0/nvc0_3d.xml.h"
+#include "nvc0/nvc0_2d.xml.h"
+#include "nvc0/nvc0_m2mf.xml.h"
+#include "nvc0/nve4_p2mf.xml.h"
+
+/* NOTE: must keep NVC0_NEW_...PROG in consecutive bits in this order */
+#define NVC0_NEW_BLEND (1 << 0)
+#define NVC0_NEW_RASTERIZER (1 << 1)
+#define NVC0_NEW_ZSA (1 << 2)
+#define NVC0_NEW_VERTPROG (1 << 3)
+#define NVC0_NEW_TCTLPROG (1 << 4)
+#define NVC0_NEW_TEVLPROG (1 << 5)
+#define NVC0_NEW_GMTYPROG (1 << 6)
+#define NVC0_NEW_FRAGPROG (1 << 7)
+#define NVC0_NEW_BLEND_COLOUR (1 << 8)
+#define NVC0_NEW_STENCIL_REF (1 << 9)
+#define NVC0_NEW_CLIP (1 << 10)
+#define NVC0_NEW_SAMPLE_MASK (1 << 11)
+#define NVC0_NEW_FRAMEBUFFER (1 << 12)
+#define NVC0_NEW_STIPPLE (1 << 13)
+#define NVC0_NEW_SCISSOR (1 << 14)
+#define NVC0_NEW_VIEWPORT (1 << 15)
+#define NVC0_NEW_ARRAYS (1 << 16)
+#define NVC0_NEW_VERTEX (1 << 17)
+#define NVC0_NEW_CONSTBUF (1 << 18)
+#define NVC0_NEW_TEXTURES (1 << 19)
+#define NVC0_NEW_SAMPLERS (1 << 20)
+#define NVC0_NEW_TFB_TARGETS (1 << 21)
+#define NVC0_NEW_IDXBUF (1 << 22)
+#define NVC0_NEW_SURFACES (1 << 23)
+
+#define NVC0_NEW_CP_PROGRAM (1 << 0)
+#define NVC0_NEW_CP_SURFACES (1 << 1)
+#define NVC0_NEW_CP_TEXTURES (1 << 2)
+#define NVC0_NEW_CP_SAMPLERS (1 << 3)
+#define NVC0_NEW_CP_CONSTBUF (1 << 4)
+#define NVC0_NEW_CP_GLOBALS (1 << 5)
+
+/* 3d bufctx (during draw_vbo, blit_3d) */
+#define NVC0_BIND_FB 0
+#define NVC0_BIND_VTX 1
+#define NVC0_BIND_VTX_TMP 2
+#define NVC0_BIND_IDX 3
+#define NVC0_BIND_TEX(s, i) ( 4 + 32 * (s) + (i))
+#define NVC0_BIND_CB(s, i) (164 + 16 * (s) + (i))
+#define NVC0_BIND_TFB 244
+#define NVC0_BIND_SUF 245
+#define NVC0_BIND_SCREEN 246
+#define NVC0_BIND_TLS 247
+#define NVC0_BIND_3D_COUNT 248
+
+/* compute bufctx (during launch_grid) */
+#define NVC0_BIND_CP_CB(i) ( 0 + (i))
+#define NVC0_BIND_CP_TEX(i) ( 16 + (i))
+#define NVC0_BIND_CP_SUF 48
+#define NVC0_BIND_CP_GLOBAL 49
+#define NVC0_BIND_CP_DESC 50
+#define NVC0_BIND_CP_SCREEN 51
+#define NVC0_BIND_CP_QUERY 52
+#define NVC0_BIND_CP_COUNT 53
+
+/* bufctx for other operations */
+#define NVC0_BIND_2D 0
+#define NVC0_BIND_M2MF 0
+#define NVC0_BIND_FENCE 1
+
+
+struct nvc0_blitctx;
+
+boolean nvc0_blitctx_create(struct nvc0_context *);
+void nvc0_blitctx_destroy(struct nvc0_context *);
+
+struct nvc0_context {
+ struct nouveau_context base;
+
+ struct nouveau_bufctx *bufctx_3d;
+ struct nouveau_bufctx *bufctx;
+ struct nouveau_bufctx *bufctx_cp;
+
+ struct nvc0_screen *screen;
+
+ void (*m2mf_copy_rect)(struct nvc0_context *,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy);
+
+ uint32_t dirty;
+ uint32_t dirty_cp; /* dirty flags for compute state */
+
+ struct {
+ boolean flushed;
+ boolean rasterizer_discard;
+ boolean early_z_forced;
+ boolean prim_restart;
+ uint32_t instance_elts; /* bitmask of per-instance elements */
+ uint32_t instance_base;
+ uint32_t constant_vbos;
+ uint32_t constant_elts;
+ int32_t index_bias;
+ uint16_t scissor;
+ uint8_t vbo_mode; /* 0 = normal, 1 = translate, 3 = translate, forced */
+ uint8_t num_vtxbufs;
+ uint8_t num_vtxelts;
+ uint8_t num_textures[6];
+ uint8_t num_samplers[6];
+ uint8_t tls_required; /* bitmask of shader types using l[] */
+ uint8_t c14_bound; /* whether immediate array constbuf is bound */
+ uint8_t clip_enable;
+ uint32_t clip_mode;
+ uint32_t uniform_buffer_bound[5];
+ struct nvc0_transform_feedback_state *tfb;
+ } state;
+
+ struct nvc0_blend_stateobj *blend;
+ struct nvc0_rasterizer_stateobj *rast;
+ struct nvc0_zsa_stateobj *zsa;
+ struct nvc0_vertex_stateobj *vertex;
+
+ struct nvc0_program *vertprog;
+ struct nvc0_program *tctlprog;
+ struct nvc0_program *tevlprog;
+ struct nvc0_program *gmtyprog;
+ struct nvc0_program *fragprog;
+ struct nvc0_program *compprog;
+
+ struct nvc0_constbuf constbuf[6][NVC0_MAX_PIPE_CONSTBUFS];
+ uint16_t constbuf_dirty[6];
+
+ struct pipe_vertex_buffer vtxbuf[PIPE_MAX_ATTRIBS];
+ unsigned num_vtxbufs;
+ struct pipe_index_buffer idxbuf;
+ uint32_t constant_vbos;
+ uint32_t vbo_user; /* bitmask of vertex buffers pointing to user memory */
+ uint32_t vb_elt_first; /* from pipe_draw_info, for vertex upload */
+ uint32_t vb_elt_limit; /* max - min element (count - 1) */
+ uint32_t instance_off; /* current base vertex for instanced arrays */
+ uint32_t instance_max; /* last instance for current draw call */
+
+ struct pipe_sampler_view *textures[6][PIPE_MAX_SAMPLERS];
+ unsigned num_textures[6];
+ uint32_t textures_dirty[6];
+ struct nv50_tsc_entry *samplers[6][PIPE_MAX_SAMPLERS];
+ unsigned num_samplers[6];
+ uint16_t samplers_dirty[6];
+
+ uint32_t tex_handles[6][PIPE_MAX_SAMPLERS]; /* for nve4 */
+
+ struct pipe_framebuffer_state framebuffer;
+ struct pipe_blend_color blend_colour;
+ struct pipe_stencil_ref stencil_ref;
+ struct pipe_poly_stipple stipple;
+ struct pipe_scissor_state scissor;
+ struct pipe_viewport_state viewport;
+ struct pipe_clip_state clip;
+
+ unsigned sample_mask;
+
+ boolean vbo_push_hint;
+
+ uint8_t tfbbuf_dirty;
+ struct pipe_stream_output_target *tfbbuf[4];
+ unsigned num_tfbbufs;
+
+ struct pipe_query *cond_query;
+ boolean cond_cond;
+ uint cond_mode;
+
+ struct nvc0_blitctx *blit;
+
+ struct pipe_surface *surfaces[2][NVC0_MAX_SURFACE_SLOTS];
+ uint16_t surfaces_dirty[2];
+ uint16_t surfaces_valid[2];
+ uint32_t vport_int[2];
+
+ struct util_dynarray global_residents;
+
+#ifdef NVC0_WITH_DRAW_MODULE
+ struct draw_context *draw;
+#endif
+};
+
+static INLINE struct nvc0_context *
+nvc0_context(struct pipe_context *pipe)
+{
+ return (struct nvc0_context *)pipe;
+}
+
+static INLINE unsigned
+nvc0_shader_stage(unsigned pipe)
+{
+ switch (pipe) {
+ case PIPE_SHADER_VERTEX: return 0;
+/* case PIPE_SHADER_TESSELLATION_CONTROL: return 1; */
+/* case PIPE_SHADER_TESSELLATION_EVALUATION: return 2; */
+ case PIPE_SHADER_GEOMETRY: return 3;
+ case PIPE_SHADER_FRAGMENT: return 4;
+ case PIPE_SHADER_COMPUTE: return 5;
+ default:
+ assert(!"invalid PIPE_SHADER type");
+ return 0;
+ }
+}
+
+
+/* nvc0_context.c */
+struct pipe_context *nvc0_create(struct pipe_screen *, void *);
+void nvc0_bufctx_fence(struct nvc0_context *, struct nouveau_bufctx *,
+ boolean on_flush);
+void nvc0_default_kick_notify(struct nouveau_pushbuf *);
+
+/* nvc0_draw.c */
+extern struct draw_stage *nvc0_draw_render_stage(struct nvc0_context *);
+
+/* nvc0_program.c */
+boolean nvc0_program_translate(struct nvc0_program *, uint16_t chipset);
+boolean nvc0_program_upload_code(struct nvc0_context *, struct nvc0_program *);
+void nvc0_program_destroy(struct nvc0_context *, struct nvc0_program *);
+void nvc0_program_library_upload(struct nvc0_context *);
+uint32_t nvc0_program_symbol_offset(const struct nvc0_program *,
+ uint32_t label);
+
+/* nvc0_query.c */
+void nvc0_init_query_functions(struct nvc0_context *);
+void nvc0_query_pushbuf_submit(struct nouveau_pushbuf *,
+ struct pipe_query *, unsigned result_offset);
+void nvc0_query_fifo_wait(struct nouveau_pushbuf *, struct pipe_query *);
+void nvc0_so_target_save_offset(struct pipe_context *,
+ struct pipe_stream_output_target *, unsigned i,
+ boolean *serialize);
+
+#define NVC0_QUERY_TFB_BUFFER_OFFSET (PIPE_QUERY_TYPES + 0)
+
+/* nvc0_shader_state.c */
+void nvc0_vertprog_validate(struct nvc0_context *);
+void nvc0_tctlprog_validate(struct nvc0_context *);
+void nvc0_tevlprog_validate(struct nvc0_context *);
+void nvc0_gmtyprog_validate(struct nvc0_context *);
+void nvc0_fragprog_validate(struct nvc0_context *);
+
+void nvc0_tfb_validate(struct nvc0_context *);
+
+/* nvc0_state.c */
+extern void nvc0_init_state_functions(struct nvc0_context *);
+
+/* nvc0_state_validate.c */
+void nvc0_validate_global_residents(struct nvc0_context *,
+ struct nouveau_bufctx *, int bin);
+extern boolean nvc0_state_validate(struct nvc0_context *, uint32_t state_mask,
+ unsigned space_words);
+
+/* nvc0_surface.c */
+extern void nvc0_clear(struct pipe_context *, unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil);
+extern void nvc0_init_surface_functions(struct nvc0_context *);
+
+/* nvc0_tex.c */
+boolean nve4_validate_tsc(struct nvc0_context *nvc0, int s);
+void nvc0_validate_textures(struct nvc0_context *);
+void nvc0_validate_samplers(struct nvc0_context *);
+void nve4_set_tex_handles(struct nvc0_context *);
+void nvc0_validate_surfaces(struct nvc0_context *);
+void nve4_set_surface_info(struct nouveau_pushbuf *, struct pipe_surface *,
+ struct nvc0_screen *);
+
+struct pipe_sampler_view *
+nvc0_create_texture_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *,
+ uint32_t flags,
+ enum pipe_texture_target);
+struct pipe_sampler_view *
+nvc0_create_sampler_view(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_sampler_view *);
+
+/* nvc0_transfer.c */
+void
+nvc0_init_transfer_functions(struct nvc0_context *);
+
+void
+nvc0_m2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data);
+void
+nve4_p2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data);
+void
+nvc0_cb_push(struct nouveau_context *,
+ struct nouveau_bo *bo, unsigned domain,
+ unsigned base, unsigned size,
+ unsigned offset, unsigned words, const uint32_t *data);
+
+/* nvc0_vbo.c */
+void nvc0_draw_vbo(struct pipe_context *, const struct pipe_draw_info *);
+
+void *
+nvc0_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements);
+void
+nvc0_vertex_state_delete(struct pipe_context *pipe, void *hwcso);
+
+void nvc0_vertex_arrays_validate(struct nvc0_context *);
+
+void nvc0_idxbuf_validate(struct nvc0_context *);
+
+/* nvc0_video.c */
+struct pipe_video_codec *
+nvc0_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ);
+
+struct pipe_video_buffer *
+nvc0_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *templat);
+
+/* nvc0_push.c */
+void nvc0_push_vbo(struct nvc0_context *, const struct pipe_draw_info *);
+
+/* nve4_compute.c */
+void nve4_launch_grid(struct pipe_context *,
+ const uint *, const uint *, uint32_t, const void *);
+
+/* nvc0_compute.c */
+void nvc0_launch_grid(struct pipe_context *,
+ const uint *, const uint *, uint32_t, const void *);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c b/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c
new file mode 100644
index 0000000..e261d50
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_draw.c
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "draw/draw_pipe.h"
+
+#include "nvc0/nvc0_context.h"
+
+struct nvc0_render_stage {
+ struct draw_stage stage;
+ struct nvc0_context *nvc0;
+};
+
+static INLINE struct nvc0_render_stage *
+nvc0_render_stage(struct draw_stage *stage)
+{
+ return (struct nvc0_render_stage *)stage;
+}
+
+static void
+nvc0_render_point(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_line(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_tri(struct draw_stage *stage, struct prim_header *prim)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_flush(struct draw_stage *stage, unsigned flags)
+{
+}
+
+static void
+nvc0_render_reset_stipple_counter(struct draw_stage *stage)
+{
+ NOUVEAU_ERR("\n");
+}
+
+static void
+nvc0_render_destroy(struct draw_stage *stage)
+{
+ FREE(stage);
+}
+
+struct draw_stage *
+nvc0_draw_render_stage(struct nvc0_context *nvc0)
+{
+ struct nvc0_render_stage *rs = CALLOC_STRUCT(nvc0_render_stage);
+
+ rs->nvc0 = nvc0;
+ rs->stage.draw = nvc0->draw;
+ rs->stage.destroy = nvc0_render_destroy;
+ rs->stage.point = nvc0_render_point;
+ rs->stage.line = nvc0_render_line;
+ rs->stage.tri = nvc0_render_tri;
+ rs->stage.flush = nvc0_render_flush;
+ rs->stage.reset_stipple_counter = nvc0_render_reset_stipple_counter;
+
+ return &rs->stage;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c b/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c
new file mode 100644
index 0000000..2bfdb0e
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_formats.c
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NOUVEAU_DRIVER 0xc0
+
+#include "nv50/nv50_formats.c"
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h b/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h
new file mode 100644
index 0000000..f009980
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_graph_macros.h
@@ -0,0 +1,236 @@
+
+#ifndef __NVC0_PGRAPH_MACROS_H__
+#define __NVC0_PGRAPH_MACROS_H__
+
+/* extrinsrt r1, r2, src, size, dst: replace bits [dst:dst+size) in r1
+ * with bits [src:src+size) in r2
+ *
+ * bra(n)z annul: no delay slot
+ */
+
+/* Bitfield version of NVC0_3D_VERTEX_ARRAY_PER_INSTANCE[].
+ * Args: size, bitfield
+ */
+static const uint32_t nvc0_9097_per_instance_bf[] =
+{
+ 0x00000301, /* parm $r3 (the bitfield) */
+ 0x00000211, /* mov $r2 0 */
+ 0x05880021, /* maddr [NVC0_3D_VERTEX_ARRAY_PER_INSTANCE(0), increment = 4] */
+ 0xffffc911, /* mov $r1 (add $r1 -0x1) */
+ 0x0040d043, /* send (extrshl $r3 $r2 0x1 0) */
+ 0xffff8897, /* exit branz $r1 0x3 */
+ 0x00005211 /* mov $r2 (add $r2 0x1) */
+};
+
+/* The comments above the macros describe what they *should* be doing,
+ * but we use less functionality for now.
+ */
+
+/*
+ * for (i = 0; i < 8; ++i)
+ * [NVC0_3D_BLEND_ENABLE(i)] = BIT(i of arg);
+ *
+ * [3428] = arg;
+ *
+ * if (arg == 0 || [NVC0_3D_MULTISAMPLE_ENABLE] == 0)
+ * [0d9c] = 0;
+ * else
+ * [0d9c] = [342c];
+ */
+static const uint32_t nvc0_9097_blend_enables[] =
+{
+ 0x05360021, /* 0x00: maddr [NVC0_3D_BLEND_ENABLE(0), increment = 4] */
+ 0x00404042, /* 0x01: send extrinsrt 0 $r1 0 0x1 0 */
+ 0x00424042, /* 0x02: send extrinsrt 0 $r1 0x1 0x1 0 */
+ 0x00444042, /* 0x03: send extrinsrt 0 $r1 0x2 0x1 0 */
+ 0x00464042, /* 0x04: send extrinsrt 0 $r1 0x3 0x1 0 */
+ 0x00484042, /* 0x05: send extrinsrt 0 $r1 0x4 0x1 0 */
+ 0x004a4042, /* 0x06: send extrinsrt 0 $r1 0x5 0x1 0 */
+ 0x004c40c2, /* 0x07: exit send extrinsrt 0 $r1 0x6 0x1 0 */
+ 0x004e4042, /* 0x08: send extrinsrt 0 $r1 0x7 0x1 0 */
+};
+
+/*
+ * uint64 limit = (parm(0) << 32) | parm(1);
+ * uint64 start = (parm(2) << 32);
+ *
+ * if (limit) {
+ * start |= parm(3);
+ * --limit;
+ * } else {
+ * start |= 1;
+ * }
+ *
+ * [0x1c04 + (arg & 0xf) * 16 + 0] = (start >> 32) & 0xff;
+ * [0x1c04 + (arg & 0xf) * 16 + 4] = start & 0xffffffff;
+ * [0x1f00 + (arg & 0xf) * 8 + 0] = (limit >> 32) & 0xff;
+ * [0x1f00 + (arg & 0xf) * 8 + 4] = limit & 0xffffffff;
+ */
+static const uint32_t nvc0_9097_vertex_array_select[] =
+{
+ 0x00000201, /* 0x00: parm $r2 */
+ 0x00000301, /* 0x01: parm $r3 */
+ 0x00000401, /* 0x02: parm $r4 */
+ 0x00000501, /* 0x03: parm $r5 */
+ 0x11004612, /* 0x04: mov $r6 extrinsrt 0 $r1 0 4 2 */
+ 0x09004712, /* 0x05: mov $r7 extrinsrt 0 $r1 0 4 1 */
+ 0x05c07621, /* 0x06: maddr $r6 add $6 0x1701 */
+ 0x00002041, /* 0x07: send $r4 */
+ 0x00002841, /* 0x08: send $r5 */
+ 0x05f03f21, /* 0x09: maddr $r7 add $7 0x17c0 */
+ 0x000010c1, /* 0x0a: exit send $r2 */
+ 0x00001841, /* 0x0b: send $r3 */
+};
+
+/*
+ * [GL_POLYGON_MODE_FRONT] = arg;
+ *
+ * if (BIT(31 of [0x3410]))
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
+ * [02ec] = 0;
+ * else
+ * if ([GL_POLYGON_MODE_BACK] == GL_LINE || arg == GL_LINE)
+ * [02ec] = BYTE(1 of [0x3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [0x3410]) << 4;
+ */
+static const uint32_t nvc0_9097_poly_mode_front[] =
+{
+ 0x00db0215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_BACK] */
+ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
+ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
+ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x00dac021, /* 0x08: maddr 0x36b */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
+ 0x00003041 /* 0x10: send $r6 */
+};
+
+/*
+ * [GL_POLYGON_MODE_BACK] = arg;
+ *
+ * if (BIT(31 of [0x3410]))
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || [NVC0_3D_SP_SELECT(4)] == 0x41)
+ * [02ec] = 0;
+ * else
+ * if ([GL_POLYGON_MODE_FRONT] == GL_LINE || arg == GL_LINE)
+ * [02ec] = BYTE(1 of [0x3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [0x3410]) << 4;
+ */
+/* NOTE: 0x3410 = 0x80002006 by default,
+ * POLYGON_MODE == GL_LINE check replaced by (MODE & 1)
+ * SP_SELECT(i) == (i << 4) | 1 check replaced by SP_SELECT(i) & 1
+ */
+static const uint32_t nvc0_9097_poly_mode_back[] =
+{
+ 0x00dac215, /* 0x00: read $r2 [NVC0_3D_POLYGON_MODE_FRONT] */
+ 0x020c0315, /* 0x01: read $r3 [NVC0_3D_SP_SELECT(3)] */
+ 0x00128f10, /* 0x02: mov $r7 or $r1 $r2 */
+ 0x02100415, /* 0x03: read $r4 [NVC0_3D_SP_SELECT(4)] */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x00db0021, /* 0x08: maddr 0x36c */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00131f10, /* 0x0a: mov $r7 or $r3 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr [02ec] */
+ 0x00003041 /* 0x10: send $r6 */
+};
+
+/*
+ * [NVC0_3D_SP_SELECT(4)] = arg
+ *
+ * if BIT(31 of [0x3410]) == 0
+ * [1a24] = 0x7353;
+ *
+ * if ([NVC0_3D_SP_SELECT(3)] == 0x31 || arg == 0x41)
+ * [02ec] = 0
+ * else
+ * if (any POLYGON MODE == LINE)
+ * [02ec] = BYTE(1 of [3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [3410]) << 4; // 02ec valid bits are 0xff1
+ */
+static const uint32_t nvc0_9097_gp_select[] = /* 0x0f */
+{
+ 0x00dac215, /* 0x00: read $r2 0x36b */
+ 0x00db0315, /* 0x01: read $r3 0x36c */
+ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
+ 0x020c0415, /* 0x03: read $r4 0x830 */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x02100021, /* 0x08: maddr 0x840 */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
+ 0x00003041, /* 0x10: send $r6 */
+};
+
+/*
+ * [NVC0_3D_SP_SELECT(3)] = arg
+ *
+ * if BIT(31 of [0x3410]) == 0
+ * [1a24] = 0x7353;
+ *
+ * if (arg == 0x31) {
+ * if (BIT(2 of [0x3430])) {
+ * int i = 15; do { --i; } while(i);
+ * [0x1a2c] = 0;
+ * }
+ * }
+ *
+ * if ([NVC0_3D_SP_SELECT(4)] == 0x41 || arg == 0x31)
+ * [02ec] = 0
+ * else
+ * if ([any POLYGON_MODE] == GL_LINE)
+ * [02ec] = BYTE(1 of [3410]) << 4;
+ * else
+ * [02ec] = BYTE(0 of [3410]) << 4;
+ */
+static const uint32_t nvc0_9097_tep_select[] = /* 0x10 */
+{
+ 0x00dac215, /* 0x00: read $r2 0x36b */
+ 0x00db0315, /* 0x01: read $r3 0x36c */
+ 0x0012d710, /* 0x02: mov $r7 or $r2 $r3 */
+ 0x02100415, /* 0x03: read $r4 0x840 */
+ 0x00004211, /* 0x04: mov $r2 0x1 */
+ 0x00180611, /* 0x05: mov $r6 0x60 */
+ 0x0014bf10, /* 0x06: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x07: braz $r7 0xa */
+ 0x020c0021, /* 0x08: maddr 0x830 */
+ 0x00800611, /* 0x09: mov $r6 0x200 */
+ 0x00130f10, /* 0x0a: mov $r7 or $r1 $r4 */
+ 0x0014bf10, /* 0x0b: mov $r7 and $r7 $r2 */
+ 0x0000f807, /* 0x0c: braz $r7 0xf */
+ 0x00000841, /* 0x0d: send $r1 */
+ 0x00000611, /* 0x0e: mov $r6 0 */
+ 0x002ec0a1, /* 0x0f: exit maddr 0xbb */
+ 0x00003041, /* 0x10: send $r6 */
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h b/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h
new file mode 100644
index 0000000..3bf628d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_m2mf.xml.h
@@ -0,0 +1,138 @@
+#ifndef NVC0_M2MF_XML
+#define NVC0_M2MF_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nvc0_m2mf.xml ( 2227 bytes, from 2010-10-16 16:10:29)
+- copyright.xml ( 6498 bytes, from 2010-10-03 13:18:37)
+- nv_object.xml ( 11379 bytes, from 2010-10-16 11:43:24)
+- nvchipsets.xml ( 2907 bytes, from 2010-10-15 16:28:21)
+- nv_defs.xml ( 4437 bytes, from 2010-07-06 07:43:58)
+
+Copyright (C) 2006-2010 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro, curro_, currojerez)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVC0_M2MF_TILING_MODE_IN 0x00000204
+
+#define NVC0_M2MF_TILING_PITCH_IN 0x00000208
+
+#define NVC0_M2MF_TILING_HEIGHT_IN 0x0000020c
+
+#define NVC0_M2MF_TILING_DEPTH_IN 0x00000210
+
+#define NVC0_M2MF_TILING_POSITION_IN_Z 0x00000214
+
+#define NVC0_M2MF_TILING_MODE_OUT 0x00000220
+
+#define NVC0_M2MF_TILING_PITCH_OUT 0x00000224
+
+#define NVC0_M2MF_TILING_HEIGHT_OUT 0x00000228
+
+#define NVC0_M2MF_TILING_DEPTH_OUT 0x0000022c
+
+#define NVC0_M2MF_TILING_POSITION_OUT_Z 0x00000230
+
+#define NVC0_M2MF_OFFSET_OUT_HIGH 0x00000238
+
+#define NVC0_M2MF_OFFSET_OUT_LOW 0x0000023c
+
+#define NVC0_M2MF_EXEC 0x00000300
+#define NVC0_M2MF_EXEC_PUSH 0x00000001
+#define NVC0_M2MF_EXEC_LINEAR_IN 0x00000010
+#define NVC0_M2MF_EXEC_LINEAR_OUT 0x00000100
+#define NVC0_M2MF_EXEC_NOTIFY 0x00002000
+#define NVC0_M2MF_EXEC_INC__MASK 0x00f00000
+#define NVC0_M2MF_EXEC_INC__SHIFT 20
+
+#define NVC0_M2MF_DATA 0x00000304
+
+#define NVC0_M2MF_OFFSET_IN_HIGH 0x0000030c
+
+#define NVC0_M2MF_OFFSET_IN_LOW 0x00000310
+
+#define NVC0_M2MF_PITCH_IN 0x00000314
+
+#define NVC0_M2MF_PITCH_OUT 0x00000318
+
+#define NVC0_M2MF_LINE_LENGTH_IN 0x0000031c
+
+#define NVC0_M2MF_LINE_COUNT 0x00000320
+
+#define NVC0_M2MF_NOTIFY_ADDRESS_HIGH 0x0000032c
+
+#define NVC0_M2MF_NOTIFY_ADDRESS_LOW 0x00000330
+
+#define NVC0_M2MF_NOTIFY 0x00000334
+
+#define NVC0_M2MF_TILING_POSITION_IN_X 0x00000344
+
+#define NVC0_M2MF_TILING_POSITION_IN_Y 0x00000348
+
+#define NVC0_M2MF_TILING_POSITION_OUT_X 0x0000034c
+
+#define NVC0_M2MF_TILING_POSITION_OUT_Y 0x00000350
+
+
+#endif /* NVC0_M2MF_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
new file mode 100644
index 0000000..79c9390
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_miptree.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+
+static uint32_t
+nvc0_tex_choose_tile_dims(unsigned nx, unsigned ny, unsigned nz)
+{
+ return nv50_tex_choose_tile_dims_helper(nx, ny, nz);
+}
+
+static uint32_t
+nvc0_mt_choose_storage_type(struct nv50_miptree *mt, boolean compressed)
+{
+ const unsigned ms = util_logbase2(mt->base.base.nr_samples);
+
+ uint32_t tile_flags;
+
+ if (unlikely(mt->base.base.bind & PIPE_BIND_CURSOR))
+ return 0;
+ if (unlikely(mt->base.base.flags & NOUVEAU_RESOURCE_FLAG_LINEAR))
+ return 0;
+
+ switch (mt->base.base.format) {
+ case PIPE_FORMAT_Z16_UNORM:
+ if (compressed)
+ tile_flags = 0x02 + ms;
+ else
+ tile_flags = 0x01;
+ break;
+ case PIPE_FORMAT_S8_UINT_Z24_UNORM:
+ if (compressed)
+ tile_flags = 0x51 + ms;
+ else
+ tile_flags = 0x46;
+ break;
+ case PIPE_FORMAT_Z24X8_UNORM:
+ case PIPE_FORMAT_Z24_UNORM_S8_UINT:
+ if (compressed)
+ tile_flags = 0x17 + ms;
+ else
+ tile_flags = 0x11;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT:
+ if (compressed)
+ tile_flags = 0x86 + ms;
+ else
+ tile_flags = 0x7b;
+ break;
+ case PIPE_FORMAT_Z32_FLOAT_S8X24_UINT:
+ if (compressed)
+ tile_flags = 0xce + ms;
+ else
+ tile_flags = 0xc3;
+ break;
+ default:
+ switch (util_format_get_blocksizebits(mt->base.base.format)) {
+ case 128:
+ if (compressed)
+ tile_flags = 0xf4 + ms * 2;
+ else
+ tile_flags = 0xfe;
+ break;
+ case 64:
+ if (compressed) {
+ switch (ms) {
+ case 0: tile_flags = 0xe6; break;
+ case 1: tile_flags = 0xeb; break;
+ case 2: tile_flags = 0xed; break;
+ case 3: tile_flags = 0xf2; break;
+ default:
+ return 0;
+ }
+ } else {
+ tile_flags = 0xfe;
+ }
+ break;
+ case 32:
+ if (compressed && ms) {
+ switch (ms) {
+ /* This one makes things blurry:
+ case 0: tile_flags = 0xdb; break;
+ */
+ case 1: tile_flags = 0xdd; break;
+ case 2: tile_flags = 0xdf; break;
+ case 3: tile_flags = 0xe4; break;
+ default:
+ return 0;
+ }
+ } else {
+ tile_flags = 0xfe;
+ }
+ break;
+ case 16:
+ case 8:
+ tile_flags = 0xfe;
+ break;
+ default:
+ return 0;
+ }
+ break;
+ }
+
+ return tile_flags;
+}
+
+static INLINE boolean
+nvc0_miptree_init_ms_mode(struct nv50_miptree *mt)
+{
+ switch (mt->base.base.nr_samples) {
+ case 8:
+ mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS8;
+ mt->ms_x = 2;
+ mt->ms_y = 1;
+ break;
+ case 4:
+ mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS4;
+ mt->ms_x = 1;
+ mt->ms_y = 1;
+ break;
+ case 2:
+ mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS2;
+ mt->ms_x = 1;
+ break;
+ case 1:
+ case 0:
+ mt->ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
+ break;
+ default:
+ NOUVEAU_ERR("invalid nr_samples: %u\n", mt->base.base.nr_samples);
+ return FALSE;
+ }
+ return TRUE;
+}
+
+static void
+nvc0_miptree_init_layout_video(struct nv50_miptree *mt)
+{
+ const struct pipe_resource *pt = &mt->base.base;
+ const unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ assert(pt->last_level == 0);
+ assert(mt->ms_x == 0 && mt->ms_y == 0);
+ assert(!util_format_is_compressed(pt->format));
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ mt->level[0].tile_mode = 0x10;
+ mt->level[0].pitch = align(pt->width0 * blocksize, 64);
+ mt->total_size = align(pt->height0, 16) * mt->level[0].pitch * (mt->layout_3d ? pt->depth0 : 1);
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size, NVC0_TILE_SIZE(0x10));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+}
+
+static void
+nvc0_miptree_init_layout_tiled(struct nv50_miptree *mt)
+{
+ struct pipe_resource *pt = &mt->base.base;
+ unsigned w, h, d, l;
+ const unsigned blocksize = util_format_get_blocksize(pt->format);
+
+ mt->layout_3d = pt->target == PIPE_TEXTURE_3D;
+
+ w = pt->width0 << mt->ms_x;
+ h = pt->height0 << mt->ms_y;
+
+ /* For 3D textures, a mipmap is spanned by all the layers, for array
+ * textures and cube maps, each layer contains its own mipmaps.
+ */
+ d = mt->layout_3d ? pt->depth0 : 1;
+
+ assert(!mt->ms_mode || !pt->last_level);
+
+ for (l = 0; l <= pt->last_level; ++l) {
+ struct nv50_miptree_level *lvl = &mt->level[l];
+ unsigned tsx, tsy, tsz;
+ unsigned nbx = util_format_get_nblocksx(pt->format, w);
+ unsigned nby = util_format_get_nblocksy(pt->format, h);
+
+ lvl->offset = mt->total_size;
+
+ lvl->tile_mode = nvc0_tex_choose_tile_dims(nbx, nby, d);
+
+ tsx = NVC0_TILE_SIZE_X(lvl->tile_mode); /* x is tile row pitch in bytes */
+ tsy = NVC0_TILE_SIZE_Y(lvl->tile_mode);
+ tsz = NVC0_TILE_SIZE_Z(lvl->tile_mode);
+
+ lvl->pitch = align(nbx * blocksize, tsx);
+
+ mt->total_size += lvl->pitch * align(nby, tsy) * align(d, tsz);
+
+ w = u_minify(w, 1);
+ h = u_minify(h, 1);
+ d = u_minify(d, 1);
+ }
+
+ if (pt->array_size > 1) {
+ mt->layer_stride = align(mt->total_size,
+ NVC0_TILE_SIZE(mt->level[0].tile_mode));
+ mt->total_size = mt->layer_stride * pt->array_size;
+ }
+}
+
+const struct u_resource_vtbl nvc0_miptree_vtbl =
+{
+ nv50_miptree_get_handle, /* get_handle */
+ nv50_miptree_destroy, /* resource_destroy */
+ nvc0_miptree_transfer_map, /* transfer_map */
+ u_default_transfer_flush_region, /* transfer_flush_region */
+ nvc0_miptree_transfer_unmap, /* transfer_unmap */
+ u_default_transfer_inline_write /* transfer_inline_write */
+};
+
+struct pipe_resource *
+nvc0_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *templ)
+{
+ struct nouveau_device *dev = nouveau_screen(pscreen)->device;
+ struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
+ struct pipe_resource *pt = &mt->base.base;
+ boolean compressed = dev->drm_version >= 0x01000101;
+ int ret;
+ union nouveau_bo_config bo_config;
+ uint32_t bo_flags;
+
+ if (!mt)
+ return NULL;
+
+ mt->base.vtbl = &nvc0_miptree_vtbl;
+ *pt = *templ;
+ pipe_reference_init(&pt->reference, 1);
+ pt->screen = pscreen;
+
+ if (pt->usage == PIPE_USAGE_STAGING) {
+ switch (pt->target) {
+ case PIPE_TEXTURE_1D:
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ if (pt->last_level == 0 &&
+ !util_format_is_depth_or_stencil(pt->format) &&
+ pt->nr_samples <= 1)
+ pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (pt->bind & PIPE_BIND_LINEAR)
+ pt->flags |= NOUVEAU_RESOURCE_FLAG_LINEAR;
+
+ bo_config.nvc0.memtype = nvc0_mt_choose_storage_type(mt, compressed);
+
+ if (!nvc0_miptree_init_ms_mode(mt)) {
+ FREE(mt);
+ return NULL;
+ }
+
+ if (unlikely(pt->flags & NVC0_RESOURCE_FLAG_VIDEO)) {
+ nvc0_miptree_init_layout_video(mt);
+ } else
+ if (likely(bo_config.nvc0.memtype)) {
+ nvc0_miptree_init_layout_tiled(mt);
+ } else
+ if (!nv50_miptree_init_layout_linear(mt, 128)) {
+ FREE(mt);
+ return NULL;
+ }
+ bo_config.nvc0.tile_mode = mt->level[0].tile_mode;
+
+ if (!bo_config.nvc0.memtype && pt->usage == PIPE_USAGE_STAGING)
+ mt->base.domain = NOUVEAU_BO_GART;
+ else
+ mt->base.domain = NOUVEAU_BO_VRAM;
+
+ bo_flags = mt->base.domain | NOUVEAU_BO_NOSNOOP;
+
+ if (mt->base.base.bind & (PIPE_BIND_CURSOR | PIPE_BIND_DISPLAY_TARGET))
+ bo_flags |= NOUVEAU_BO_CONTIG;
+
+ ret = nouveau_bo_new(dev, bo_flags, 4096, mt->total_size, &bo_config,
+ &mt->base.bo);
+ if (ret) {
+ FREE(mt);
+ return NULL;
+ }
+ mt->base.address = mt->base.bo->offset;
+
+ NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_count, 1);
+ NOUVEAU_DRV_STAT(nouveau_screen(pscreen), tex_obj_current_bytes,
+ mt->total_size);
+
+ return pt;
+}
+
+/* Offset of zslice @z from start of level @l. */
+INLINE unsigned
+nvc0_mt_zslice_offset(const struct nv50_miptree *mt, unsigned l, unsigned z)
+{
+ const struct pipe_resource *pt = &mt->base.base;
+
+ unsigned tds = NVC0_TILE_SHIFT_Z(mt->level[l].tile_mode);
+ unsigned ths = NVC0_TILE_SHIFT_Y(mt->level[l].tile_mode);
+
+ unsigned nby = util_format_get_nblocksy(pt->format,
+ u_minify(pt->height0, l));
+
+ /* to next 2D tile slice within a 3D tile */
+ unsigned stride_2d = NVC0_TILE_SIZE_2D(mt->level[l].tile_mode);
+
+ /* to slice in the next (in z direction) 3D tile */
+ unsigned stride_3d = (align(nby, (1 << ths)) * mt->level[l].pitch) << tds;
+
+ return (z & (1 << (tds - 1))) * stride_2d + (z >> tds) * stride_3d;
+}
+
+/* Surface functions.
+ */
+
+struct pipe_surface *
+nvc0_miptree_surface_new(struct pipe_context *pipe,
+ struct pipe_resource *pt,
+ const struct pipe_surface *templ)
+{
+ struct nv50_surface *ns = nv50_surface_from_miptree(nv50_miptree(pt), templ);
+ if (!ns)
+ return NULL;
+ ns->base.context = pipe;
+ return &ns->base;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.c b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
new file mode 100644
index 0000000..71deb34
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.c
@@ -0,0 +1,811 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+
+#include "nvc0/nvc0_context.h"
+
+#include "codegen/nv50_ir_driver.h"
+#include "nvc0/nve4_compute.h"
+
+/* NOTE: Using a[0x270] in FP may cause an error even if we're using less than
+ * 124 scalar varying values.
+ */
+static uint32_t
+nvc0_shader_input_address(unsigned sn, unsigned si, unsigned ubase)
+{
+ switch (sn) {
+ case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_PRIMID: return 0x060;
+ case TGSI_SEMANTIC_PSIZE: return 0x06c;
+ case TGSI_SEMANTIC_POSITION: return 0x070;
+ case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_FOG: return 0x2e8;
+ case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
+ case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
+ case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
+ case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
+ case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
+ case TGSI_SEMANTIC_PCOORD: return 0x2e0;
+ case NV50_SEMANTIC_TESSCOORD: return 0x2f0;
+ case TGSI_SEMANTIC_INSTANCEID: return 0x2f8;
+ case TGSI_SEMANTIC_VERTEXID: return 0x2fc;
+ case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
+ case TGSI_SEMANTIC_FACE: return 0x3fc;
+ case NV50_SEMANTIC_INVOCATIONID: return ~0;
+ default:
+ assert(!"invalid TGSI input semantic");
+ return ~0;
+ }
+}
+
+static uint32_t
+nvc0_shader_output_address(unsigned sn, unsigned si, unsigned ubase)
+{
+ switch (sn) {
+ case NV50_SEMANTIC_TESSFACTOR: return 0x000 + si * 0x4;
+ case TGSI_SEMANTIC_PRIMID: return 0x060;
+ case NV50_SEMANTIC_LAYER: return 0x064;
+ case NV50_SEMANTIC_VIEWPORTINDEX: return 0x068;
+ case TGSI_SEMANTIC_PSIZE: return 0x06c;
+ case TGSI_SEMANTIC_POSITION: return 0x070;
+ case TGSI_SEMANTIC_GENERIC: return ubase + si * 0x10;
+ case TGSI_SEMANTIC_FOG: return 0x2e8;
+ case TGSI_SEMANTIC_COLOR: return 0x280 + si * 0x10;
+ case TGSI_SEMANTIC_BCOLOR: return 0x2a0 + si * 0x10;
+ case NV50_SEMANTIC_CLIPDISTANCE: return 0x2c0 + si * 0x4;
+ case TGSI_SEMANTIC_CLIPDIST: return 0x2c0 + si * 0x10;
+ case TGSI_SEMANTIC_CLIPVERTEX: return 0x270;
+ case TGSI_SEMANTIC_TEXCOORD: return 0x300 + si * 0x10;
+ case TGSI_SEMANTIC_EDGEFLAG: return ~0;
+ default:
+ assert(!"invalid TGSI output semantic");
+ return ~0;
+ }
+}
+
+static int
+nvc0_vp_assign_input_slots(struct nv50_ir_prog_info *info)
+{
+ unsigned i, c, n;
+
+ for (n = 0, i = 0; i < info->numInputs; ++i) {
+ switch (info->in[i].sn) {
+ case TGSI_SEMANTIC_INSTANCEID: /* for SM4 only, in TGSI they're SVs */
+ case TGSI_SEMANTIC_VERTEXID:
+ info->in[i].mask = 0x1;
+ info->in[i].slot[0] =
+ nvc0_shader_input_address(info->in[i].sn, 0, 0) / 4;
+ continue;
+ default:
+ break;
+ }
+ for (c = 0; c < 4; ++c)
+ info->in[i].slot[c] = (0x80 + n * 0x10 + c * 0x4) / 4;
+ ++n;
+ }
+
+ return 0;
+}
+
+static int
+nvc0_sp_assign_input_slots(struct nv50_ir_prog_info *info)
+{
+ unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
+ unsigned offset;
+ unsigned i, c;
+
+ for (i = 0; i < info->numInputs; ++i) {
+ offset = nvc0_shader_input_address(info->in[i].sn,
+ info->in[i].si, ubase);
+ if (info->in[i].patch && offset >= 0x20)
+ offset = 0x20 + info->in[i].si * 0x10;
+
+ if (info->in[i].sn == NV50_SEMANTIC_TESSCOORD)
+ info->in[i].mask &= 3;
+
+ for (c = 0; c < 4; ++c)
+ info->in[i].slot[c] = (offset + c * 0x4) / 4;
+ }
+
+ return 0;
+}
+
+static int
+nvc0_fp_assign_output_slots(struct nv50_ir_prog_info *info)
+{
+ unsigned count = info->prop.fp.numColourResults * 4;
+ unsigned i, c;
+
+ for (i = 0; i < info->numOutputs; ++i)
+ if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
+ for (c = 0; c < 4; ++c)
+ info->out[i].slot[c] = info->out[i].si * 4 + c;
+
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.sampleMask].slot[0] = count++;
+ else
+ if (info->target >= 0xe0)
+ count++; /* on Kepler, depth is always last colour reg + 2 */
+
+ if (info->io.fragDepth < PIPE_MAX_SHADER_OUTPUTS)
+ info->out[info->io.fragDepth].slot[2] = count;
+
+ return 0;
+}
+
+static int
+nvc0_sp_assign_output_slots(struct nv50_ir_prog_info *info)
+{
+ unsigned ubase = MAX2(0x80, 0x20 + info->numPatchConstants * 0x10);
+ unsigned offset;
+ unsigned i, c;
+
+ for (i = 0; i < info->numOutputs; ++i) {
+ offset = nvc0_shader_output_address(info->out[i].sn,
+ info->out[i].si, ubase);
+ if (info->out[i].patch && offset >= 0x20)
+ offset = 0x20 + info->out[i].si * 0x10;
+
+ for (c = 0; c < 4; ++c)
+ info->out[i].slot[c] = (offset + c * 0x4) / 4;
+ }
+
+ return 0;
+}
+
+static int
+nvc0_program_assign_varying_slots(struct nv50_ir_prog_info *info)
+{
+ int ret;
+
+ if (info->type == PIPE_SHADER_VERTEX)
+ ret = nvc0_vp_assign_input_slots(info);
+ else
+ ret = nvc0_sp_assign_input_slots(info);
+ if (ret)
+ return ret;
+
+ if (info->type == PIPE_SHADER_FRAGMENT)
+ ret = nvc0_fp_assign_output_slots(info);
+ else
+ ret = nvc0_sp_assign_output_slots(info);
+ return ret;
+}
+
+static INLINE void
+nvc0_vtgp_hdr_update_oread(struct nvc0_program *vp, uint8_t slot)
+{
+ uint8_t min = (vp->hdr[4] >> 12) & 0xff;
+ uint8_t max = (vp->hdr[4] >> 24);
+
+ min = MIN2(min, slot);
+ max = MAX2(max, slot);
+
+ vp->hdr[4] = (max << 24) | (min << 12);
+}
+
+/* Common part of header generation for VP, TCP, TEP and GP. */
+static int
+nvc0_vtgp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
+{
+ unsigned i, c, a;
+
+ for (i = 0; i < info->numInputs; ++i) {
+ if (info->in[i].patch)
+ continue;
+ for (c = 0; c < 4; ++c) {
+ a = info->in[i].slot[c];
+ if (info->in[i].mask & (1 << c)) {
+ if (info->in[i].sn != NV50_SEMANTIC_TESSCOORD)
+ vp->hdr[5 + a / 32] |= 1 << (a % 32);
+ else
+ nvc0_vtgp_hdr_update_oread(vp, info->in[i].slot[c]);
+ }
+ }
+ }
+
+ for (i = 0; i < info->numOutputs; ++i) {
+ if (info->out[i].patch)
+ continue;
+ for (c = 0; c < 4; ++c) {
+ if (!(info->out[i].mask & (1 << c)))
+ continue;
+ assert(info->out[i].slot[c] >= 0x40 / 4);
+ a = info->out[i].slot[c] - 0x40 / 4;
+ vp->hdr[13 + a / 32] |= 1 << (a % 32);
+ if (info->out[i].oread)
+ nvc0_vtgp_hdr_update_oread(vp, info->out[i].slot[c]);
+ }
+ }
+
+ for (i = 0; i < info->numSysVals; ++i) {
+ switch (info->sv[i].sn) {
+ case TGSI_SEMANTIC_PRIMID:
+ vp->hdr[5] |= 1 << 24;
+ break;
+ case TGSI_SEMANTIC_INSTANCEID:
+ vp->hdr[10] |= 1 << 30;
+ break;
+ case TGSI_SEMANTIC_VERTEXID:
+ vp->hdr[10] |= 1 << 31;
+ break;
+ default:
+ break;
+ }
+ }
+
+ vp->vp.clip_enable = info->io.clipDistanceMask;
+ for (i = 0; i < 8; ++i)
+ if (info->io.cullDistanceMask & (1 << i))
+ vp->vp.clip_mode |= 1 << (i * 4);
+
+ if (info->io.genUserClip < 0)
+ vp->vp.num_ucps = PIPE_MAX_CLIP_PLANES + 1; /* prevent rebuilding */
+
+ return 0;
+}
+
+static int
+nvc0_vp_gen_header(struct nvc0_program *vp, struct nv50_ir_prog_info *info)
+{
+ vp->hdr[0] = 0x20061 | (1 << 10);
+ vp->hdr[4] = 0xff000;
+
+ vp->hdr[18] = info->io.clipDistanceMask;
+
+ return nvc0_vtgp_gen_header(vp, info);
+}
+
+#if defined(PIPE_SHADER_HULL) || defined(PIPE_SHADER_DOMAIN)
+static void
+nvc0_tp_get_tess_mode(struct nvc0_program *tp, struct nv50_ir_prog_info *info)
+{
+ if (info->prop.tp.outputPrim == PIPE_PRIM_MAX) {
+ tp->tp.tess_mode = ~0;
+ return;
+ }
+ switch (info->prop.tp.domain) {
+ case PIPE_PRIM_LINES:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_ISOLINES;
+ break;
+ case PIPE_PRIM_TRIANGLES:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_TRIANGLES;
+ if (info->prop.tp.winding > 0)
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CW;
+ break;
+ case PIPE_PRIM_QUADS:
+ tp->tp.tess_mode = NVC0_3D_TESS_MODE_PRIM_QUADS;
+ break;
+ default:
+ tp->tp.tess_mode = ~0;
+ return;
+ }
+ if (info->prop.tp.outputPrim != PIPE_PRIM_POINTS)
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_CONNECTED;
+
+ switch (info->prop.tp.partitioning) {
+ case PIPE_TESS_PART_INTEGER:
+ case PIPE_TESS_PART_POW2:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_EQUAL;
+ break;
+ case PIPE_TESS_PART_FRACT_ODD:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_ODD;
+ break;
+ case PIPE_TESS_PART_FRACT_EVEN:
+ tp->tp.tess_mode |= NVC0_3D_TESS_MODE_SPACING_FRACTIONAL_EVEN;
+ break;
+ default:
+ assert(!"invalid tessellator partitioning");
+ break;
+ }
+}
+#endif
+
+#ifdef PIPE_SHADER_HULL
+static int
+nvc0_tcp_gen_header(struct nvc0_program *tcp, struct nv50_ir_prog_info *info)
+{
+ unsigned opcs = 6; /* output patch constants (at least the TessFactors) */
+
+ tcp->tp.input_patch_size = info->prop.tp.inputPatchSize;
+
+ if (info->numPatchConstants)
+ opcs = 8 + info->numPatchConstants * 4;
+
+ tcp->hdr[0] = 0x20061 | (2 << 10);
+
+ tcp->hdr[1] = opcs << 24;
+ tcp->hdr[2] = info->prop.tp.outputPatchSize << 24;
+
+ tcp->hdr[4] = 0xff000; /* initial min/max parallel output read address */
+
+ nvc0_vtgp_gen_header(tcp, info);
+
+ nvc0_tp_get_tess_mode(tcp, info);
+
+ return 0;
+}
+#endif
+
+#ifdef PIPE_SHADER_DOMAIN
+static int
+nvc0_tep_gen_header(struct nvc0_program *tep, struct nv50_ir_prog_info *info)
+{
+ tep->tp.input_patch_size = ~0;
+
+ tep->hdr[0] = 0x20061 | (3 << 10);
+ tep->hdr[4] = 0xff000;
+
+ nvc0_vtgp_gen_header(tep, info);
+
+ nvc0_tp_get_tess_mode(tep, info);
+
+ tep->hdr[18] |= 0x3 << 12; /* ? */
+
+ return 0;
+}
+#endif
+
+static int
+nvc0_gp_gen_header(struct nvc0_program *gp, struct nv50_ir_prog_info *info)
+{
+ gp->hdr[0] = 0x20061 | (4 << 10);
+
+ gp->hdr[2] = MIN2(info->prop.gp.instanceCount, 32) << 24;
+
+ switch (info->prop.gp.outputPrim) {
+ case PIPE_PRIM_POINTS:
+ gp->hdr[3] = 0x01000000;
+ gp->hdr[0] |= 0xf0000000;
+ break;
+ case PIPE_PRIM_LINE_STRIP:
+ gp->hdr[3] = 0x06000000;
+ gp->hdr[0] |= 0x10000000;
+ break;
+ case PIPE_PRIM_TRIANGLE_STRIP:
+ gp->hdr[3] = 0x07000000;
+ gp->hdr[0] |= 0x10000000;
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ gp->hdr[4] = info->prop.gp.maxVertices & 0x1ff;
+
+ return nvc0_vtgp_gen_header(gp, info);
+}
+
+#define NVC0_INTERP_FLAT (1 << 0)
+#define NVC0_INTERP_PERSPECTIVE (2 << 0)
+#define NVC0_INTERP_LINEAR (3 << 0)
+#define NVC0_INTERP_CENTROID (1 << 2)
+
+static uint8_t
+nvc0_hdr_interp_mode(const struct nv50_ir_varying *var)
+{
+ if (var->linear)
+ return NVC0_INTERP_LINEAR;
+ if (var->flat)
+ return NVC0_INTERP_FLAT;
+ return NVC0_INTERP_PERSPECTIVE;
+}
+
+static int
+nvc0_fp_gen_header(struct nvc0_program *fp, struct nv50_ir_prog_info *info)
+{
+ unsigned i, c, a, m;
+
+ /* just 00062 on Kepler */
+ fp->hdr[0] = 0x20062 | (5 << 10);
+ fp->hdr[5] = 0x80000000; /* getting a trap if FRAG_COORD_UMASK.w = 0 */
+
+ if (info->prop.fp.usesDiscard)
+ fp->hdr[0] |= 0x8000;
+ if (info->prop.fp.numColourResults > 1)
+ fp->hdr[0] |= 0x4000;
+ if (info->io.sampleMask < PIPE_MAX_SHADER_OUTPUTS)
+ fp->hdr[19] |= 0x1;
+ if (info->prop.fp.writesDepth) {
+ fp->hdr[19] |= 0x2;
+ fp->flags[0] = 0x11; /* deactivate ZCULL */
+ }
+
+ for (i = 0; i < info->numInputs; ++i) {
+ m = nvc0_hdr_interp_mode(&info->in[i]);
+ for (c = 0; c < 4; ++c) {
+ if (!(info->in[i].mask & (1 << c)))
+ continue;
+ a = info->in[i].slot[c];
+ if (info->in[i].slot[0] >= (0x060 / 4) &&
+ info->in[i].slot[0] <= (0x07c / 4)) {
+ fp->hdr[5] |= 1 << (24 + (a - 0x060 / 4));
+ } else
+ if (info->in[i].slot[0] >= (0x2c0 / 4) &&
+ info->in[i].slot[0] <= (0x2fc / 4)) {
+ fp->hdr[14] |= (1 << (a - 0x280 / 4)) & 0x07ff0000;
+ } else {
+ if (info->in[i].slot[c] < (0x040 / 4) ||
+ info->in[i].slot[c] > (0x380 / 4))
+ continue;
+ a *= 2;
+ if (info->in[i].slot[0] >= (0x300 / 4))
+ a -= 32;
+ fp->hdr[4 + a / 32] |= m << (a % 32);
+ }
+ }
+ }
+
+ for (i = 0; i < info->numOutputs; ++i) {
+ if (info->out[i].sn == TGSI_SEMANTIC_COLOR)
+ fp->hdr[18] |= info->out[i].mask << info->out[i].slot[0];
+ }
+
+ fp->fp.early_z = info->prop.fp.earlyFragTests;
+
+ return 0;
+}
+
+static struct nvc0_transform_feedback_state *
+nvc0_program_create_tfb_state(const struct nv50_ir_prog_info *info,
+ const struct pipe_stream_output_info *pso)
+{
+ struct nvc0_transform_feedback_state *tfb;
+ unsigned b, i, c;
+
+ tfb = MALLOC_STRUCT(nvc0_transform_feedback_state);
+ if (!tfb)
+ return NULL;
+ for (b = 0; b < 4; ++b) {
+ tfb->stride[b] = pso->stride[b] * 4;
+ tfb->varying_count[b] = 0;
+ }
+ memset(tfb->varying_index, 0xff, sizeof(tfb->varying_index)); /* = skip */
+
+ for (i = 0; i < pso->num_outputs; ++i) {
+ unsigned s = pso->output[i].start_component;
+ unsigned p = pso->output[i].dst_offset;
+ b = pso->output[i].output_buffer;
+
+ for (c = 0; c < pso->output[i].num_components; ++c)
+ tfb->varying_index[b][p++] =
+ info->out[pso->output[i].register_index].slot[s + c];
+
+ tfb->varying_count[b] = MAX2(tfb->varying_count[b], p);
+ }
+ for (b = 0; b < 4; ++b) // zero unused indices (looks nicer)
+ for (c = tfb->varying_count[b]; c & 3; ++c)
+ tfb->varying_index[b][c] = 0;
+
+ return tfb;
+}
+
+#ifdef DEBUG
+static void
+nvc0_program_dump(struct nvc0_program *prog)
+{
+ unsigned pos;
+
+ if (prog->type != PIPE_SHADER_COMPUTE) {
+ for (pos = 0; pos < sizeof(prog->hdr) / sizeof(prog->hdr[0]); ++pos)
+ debug_printf("HDR[%02lx] = 0x%08x\n",
+ pos * sizeof(prog->hdr[0]), prog->hdr[pos]);
+ }
+ debug_printf("shader binary code (0x%x bytes):", prog->code_size);
+ for (pos = 0; pos < prog->code_size / 4; ++pos) {
+ if ((pos % 8) == 0)
+ debug_printf("\n");
+ debug_printf("%08x ", prog->code[pos]);
+ }
+ debug_printf("\n");
+}
+#endif
+
+boolean
+nvc0_program_translate(struct nvc0_program *prog, uint16_t chipset)
+{
+ struct nv50_ir_prog_info *info;
+ int ret;
+
+ info = CALLOC_STRUCT(nv50_ir_prog_info);
+ if (!info)
+ return FALSE;
+
+ info->type = prog->type;
+ info->target = chipset;
+ info->bin.sourceRep = NV50_PROGRAM_IR_TGSI;
+ info->bin.source = (void *)prog->pipe.tokens;
+
+ info->io.genUserClip = prog->vp.num_ucps;
+ info->io.ucpBase = 256;
+ info->io.ucpCBSlot = 15;
+
+ if (prog->type == PIPE_SHADER_COMPUTE) {
+ if (chipset >= NVISA_GK104_CHIPSET) {
+ info->io.resInfoCBSlot = 0;
+ info->io.texBindBase = NVE4_CP_INPUT_TEX(0);
+ info->io.suInfoBase = NVE4_CP_INPUT_SUF(0);
+ info->prop.cp.gridInfoBase = NVE4_CP_INPUT_GRID_INFO(0);
+ }
+ info->io.msInfoCBSlot = 0;
+ info->io.msInfoBase = NVE4_CP_INPUT_MS_OFFSETS;
+ } else {
+ if (chipset >= NVISA_GK104_CHIPSET) {
+ info->io.resInfoCBSlot = 15;
+ info->io.texBindBase = 0x20;
+ info->io.suInfoBase = 0; /* TODO */
+ }
+ info->io.msInfoCBSlot = 15;
+ info->io.msInfoBase = 0; /* TODO */
+ }
+
+ info->assignSlots = nvc0_program_assign_varying_slots;
+
+#ifdef DEBUG
+ info->optLevel = debug_get_num_option("NV50_PROG_OPTIMIZE", 3);
+ info->dbgFlags = debug_get_num_option("NV50_PROG_DEBUG", 0);
+#else
+ info->optLevel = 3;
+#endif
+
+ ret = nv50_ir_generate_code(info);
+ if (ret) {
+ NOUVEAU_ERR("shader translation failed: %i\n", ret);
+ goto out;
+ }
+ if (prog->type != PIPE_SHADER_COMPUTE)
+ FREE(info->bin.syms);
+
+ prog->code = info->bin.code;
+ prog->code_size = info->bin.codeSize;
+ prog->immd_data = info->immd.buf;
+ prog->immd_size = info->immd.bufSize;
+ prog->relocs = info->bin.relocData;
+ prog->num_gprs = MAX2(4, (info->bin.maxGPR + 1));
+ prog->num_barriers = info->numBarriers;
+
+ prog->vp.need_vertex_id = info->io.vertexId < PIPE_MAX_SHADER_INPUTS;
+
+ if (info->io.edgeFlagOut < PIPE_MAX_ATTRIBS)
+ info->out[info->io.edgeFlagOut].mask = 0; /* for headergen */
+ prog->vp.edgeflag = info->io.edgeFlagIn;
+
+ switch (prog->type) {
+ case PIPE_SHADER_VERTEX:
+ ret = nvc0_vp_gen_header(prog, info);
+ break;
+#ifdef PIPE_SHADER_HULL
+ case PIPE_SHADER_HULL:
+ ret = nvc0_tcp_gen_header(prog, info);
+ break;
+#endif
+#ifdef PIPE_SHADER_DOMAIN
+ case PIPE_SHADER_DOMAIN:
+ ret = nvc0_tep_gen_header(prog, info);
+ break;
+#endif
+ case PIPE_SHADER_GEOMETRY:
+ ret = nvc0_gp_gen_header(prog, info);
+ break;
+ case PIPE_SHADER_FRAGMENT:
+ ret = nvc0_fp_gen_header(prog, info);
+ break;
+ case PIPE_SHADER_COMPUTE:
+ prog->cp.syms = info->bin.syms;
+ prog->cp.num_syms = info->bin.numSyms;
+ break;
+ default:
+ ret = -1;
+ NOUVEAU_ERR("unknown program type: %u\n", prog->type);
+ break;
+ }
+ if (ret)
+ goto out;
+
+ if (info->bin.tlsSpace) {
+ assert(info->bin.tlsSpace < (1 << 24));
+ prog->hdr[0] |= 1 << 26;
+ prog->hdr[1] |= info->bin.tlsSpace; /* l[] size */
+ prog->need_tls = TRUE;
+ }
+ /* TODO: factor 2 only needed where joinat/precont is used,
+ * and we only have to count non-uniform branches
+ */
+ /*
+ if ((info->maxCFDepth * 2) > 16) {
+ prog->hdr[2] |= (((info->maxCFDepth * 2) + 47) / 48) * 0x200;
+ prog->need_tls = TRUE;
+ }
+ */
+ if (info->io.globalAccess)
+ prog->hdr[0] |= 1 << 16;
+
+ if (prog->pipe.stream_output.num_outputs)
+ prog->tfb = nvc0_program_create_tfb_state(info,
+ &prog->pipe.stream_output);
+
+out:
+ FREE(info);
+ return !ret;
+}
+
+boolean
+nvc0_program_upload_code(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ const boolean is_cp = prog->type == PIPE_SHADER_COMPUTE;
+ int ret;
+ uint32_t size = prog->code_size + (is_cp ? 0 : NVC0_SHADER_HEADER_SIZE);
+ uint32_t lib_pos = screen->lib_code->start;
+ uint32_t code_pos;
+
+ /* c[] bindings need to be aligned to 0x100, but we could use relocations
+ * to save space. */
+ if (prog->immd_size) {
+ prog->immd_base = size;
+ size = align(size, 0x40);
+ size += prog->immd_size + 0xc0; /* add 0xc0 for align 0x40 -> 0x100 */
+ }
+ /* On Fermi, SP_START_ID must be aligned to 0x40.
+ * On Kepler, the first instruction must be aligned to 0x80 because
+ * latency information is expected only at certain positions.
+ */
+ if (screen->base.class_3d >= NVE4_3D_CLASS)
+ size = size + (is_cp ? 0x40 : 0x70);
+ size = align(size, 0x40);
+
+ ret = nouveau_heap_alloc(screen->text_heap, size, prog, &prog->mem);
+ if (ret) {
+ struct nouveau_heap *heap = screen->text_heap;
+ struct nouveau_heap *iter;
+ for (iter = heap; iter && iter->next != heap; iter = iter->next) {
+ struct nvc0_program *evict = iter->priv;
+ if (evict)
+ nouveau_heap_free(&evict->mem);
+ }
+ debug_printf("WARNING: out of code space, evicting all shaders.\n");
+ ret = nouveau_heap_alloc(heap, size, prog, &prog->mem);
+ if (ret) {
+ NOUVEAU_ERR("shader too large (0x%x) to fit in code space ?\n", size);
+ return FALSE;
+ }
+ IMMED_NVC0(nvc0->base.pushbuf, NVC0_3D(SERIALIZE), 0);
+ }
+ prog->code_base = prog->mem->start;
+ prog->immd_base = align(prog->mem->start + prog->immd_base, 0x100);
+ assert((prog->immd_size == 0) || (prog->immd_base + prog->immd_size <=
+ prog->mem->start + prog->mem->size));
+
+ if (!is_cp) {
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ switch (prog->mem->start & 0xff) {
+ case 0x40: prog->code_base += 0x70; break;
+ case 0x80: prog->code_base += 0x30; break;
+ case 0xc0: prog->code_base += 0x70; break;
+ default:
+ prog->code_base += 0x30;
+ assert((prog->mem->start & 0xff) == 0x00);
+ break;
+ }
+ }
+ code_pos = prog->code_base + NVC0_SHADER_HEADER_SIZE;
+ } else {
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ if (prog->mem->start & 0x40)
+ prog->code_base += 0x40;
+ assert((prog->code_base & 0x7f) == 0x00);
+ }
+ code_pos = prog->code_base;
+ }
+
+ if (prog->relocs)
+ nv50_ir_relocate_code(prog->relocs, prog->code, code_pos, lib_pos, 0);
+
+#ifdef DEBUG
+ if (debug_get_bool_option("NV50_PROG_DEBUG", FALSE))
+ nvc0_program_dump(prog);
+#endif
+
+ if (!is_cp)
+ nvc0->base.push_data(&nvc0->base, screen->text, prog->code_base,
+ NOUVEAU_BO_VRAM, NVC0_SHADER_HEADER_SIZE, prog->hdr);
+ nvc0->base.push_data(&nvc0->base, screen->text, code_pos,
+ NOUVEAU_BO_VRAM, prog->code_size, prog->code);
+ if (prog->immd_size)
+ nvc0->base.push_data(&nvc0->base,
+ screen->text, prog->immd_base, NOUVEAU_BO_VRAM,
+ prog->immd_size, prog->immd_data);
+
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(MEM_BARRIER), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0x1011);
+
+ return TRUE;
+}
+
+/* Upload code for builtin functions like integer division emulation. */
+void
+nvc0_program_library_upload(struct nvc0_context *nvc0)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ int ret;
+ uint32_t size;
+ const uint32_t *code;
+
+ if (screen->lib_code)
+ return;
+
+ nv50_ir_get_target_library(screen->base.device->chipset, &code, &size);
+ if (!size)
+ return;
+
+ ret = nouveau_heap_alloc(screen->text_heap, align(size, 0x100), NULL,
+ &screen->lib_code);
+ if (ret)
+ return;
+
+ nvc0->base.push_data(&nvc0->base,
+ screen->text, screen->lib_code->start, NOUVEAU_BO_VRAM,
+ size, code);
+ /* no need for a memory barrier, will be emitted with first program */
+}
+
+void
+nvc0_program_destroy(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+ const struct pipe_shader_state pipe = prog->pipe;
+ const ubyte type = prog->type;
+
+ if (prog->mem)
+ nouveau_heap_free(&prog->mem);
+ if (prog->code)
+ FREE(prog->code); /* may be 0 for hardcoded shaders */
+ FREE(prog->immd_data);
+ FREE(prog->relocs);
+ if (prog->type == PIPE_SHADER_COMPUTE && prog->cp.syms)
+ FREE(prog->cp.syms);
+ if (prog->tfb) {
+ if (nvc0->state.tfb == prog->tfb)
+ nvc0->state.tfb = NULL;
+ FREE(prog->tfb);
+ }
+
+ memset(prog, 0, sizeof(*prog));
+
+ prog->pipe = pipe;
+ prog->type = type;
+}
+
+uint32_t
+nvc0_program_symbol_offset(const struct nvc0_program *prog, uint32_t label)
+{
+ const struct nv50_ir_prog_symbol *syms =
+ (const struct nv50_ir_prog_symbol *)prog->cp.syms;
+ unsigned base = 0;
+ unsigned i;
+ if (prog->type != PIPE_SHADER_COMPUTE)
+ base = NVC0_SHADER_HEADER_SIZE;
+ for (i = 0; i < prog->cp.num_syms; ++i)
+ if (syms[i].label == label)
+ return prog->code_base + base + syms[i].offset;
+ return prog->code_base; /* no symbols or symbol not found */
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_program.h b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
new file mode 100644
index 0000000..9c184d1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_program.h
@@ -0,0 +1,68 @@
+
+#ifndef __NVC0_PROGRAM_H__
+#define __NVC0_PROGRAM_H__
+
+#include "pipe/p_state.h"
+
+#define NVC0_CAP_MAX_PROGRAM_TEMPS 128
+
+
+struct nvc0_transform_feedback_state {
+ uint32_t stride[4];
+ uint8_t varying_count[4];
+ uint8_t varying_index[4][128];
+};
+
+
+#define NVC0_SHADER_HEADER_SIZE (20 * 4)
+
+struct nvc0_program {
+ struct pipe_shader_state pipe;
+
+ ubyte type;
+ boolean translated;
+ boolean need_tls;
+ uint8_t num_gprs;
+
+ uint32_t *code;
+ uint32_t *immd_data;
+ unsigned code_base;
+ unsigned code_size;
+ unsigned immd_base;
+ unsigned immd_size; /* size of immediate array data */
+ unsigned parm_size; /* size of non-bindable uniforms (c0[]) */
+
+ uint32_t hdr[20];
+ uint32_t flags[2];
+
+ struct {
+ uint32_t clip_mode; /* clip/cull selection */
+ uint8_t clip_enable; /* mask of defined clip planes */
+ uint8_t num_ucps; /* also set to max if ClipDistance is used */
+ uint8_t edgeflag; /* attribute index of edgeflag input */
+ boolean need_vertex_id;
+ } vp;
+ struct {
+ uint8_t early_z;
+ uint8_t in_pos[PIPE_MAX_SHADER_INPUTS];
+ } fp;
+ struct {
+ uint32_t tess_mode; /* ~0 if defined by the other stage */
+ uint32_t input_patch_size;
+ } tp;
+ struct {
+ uint32_t lmem_size; /* local memory (TGSI PRIVATE resource) size */
+ uint32_t smem_size; /* shared memory (TGSI LOCAL resource) size */
+ void *syms;
+ unsigned num_syms;
+ } cp;
+ uint8_t num_barriers;
+
+ void *relocs;
+
+ struct nvc0_transform_feedback_state *tfb;
+
+ struct nouveau_heap *mem;
+};
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_push.c b/src/gallium/drivers/nouveau/nvc0/nvc0_push.c
new file mode 100644
index 0000000..15e8be6
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_push.c
@@ -0,0 +1,409 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+
+#include "nvc0/nvc0_3d.xml.h"
+
+struct push_context {
+ struct nouveau_pushbuf *push;
+
+ void *idxbuf;
+
+ uint32_t vertex_words;
+ uint32_t packet_vertex_limit;
+
+ struct translate *translate;
+
+ boolean primitive_restart;
+ boolean need_vertex_id;
+ uint32_t prim;
+ uint32_t restart_index;
+ uint32_t instance_id;
+
+ struct {
+ int buffer;
+ float value;
+ uint8_t *data;
+ unsigned offset;
+ unsigned stride;
+ } edgeflag;
+};
+
+static void
+init_push_context(struct nvc0_context *nvc0, struct push_context *ctx)
+{
+ struct pipe_vertex_element *ve;
+
+ ctx->push = nvc0->base.pushbuf;
+ ctx->translate = nvc0->vertex->translate;
+
+ if (likely(nvc0->vertex->num_elements < 32))
+ ctx->need_vertex_id = nvc0->vertprog->vp.need_vertex_id;
+ else
+ ctx->need_vertex_id = FALSE;
+
+ ctx->edgeflag.buffer = -1;
+ ctx->edgeflag.value = 0.5f;
+
+ if (unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) {
+ ve = &nvc0->vertex->element[nvc0->vertprog->vp.edgeflag].pipe;
+ ctx->edgeflag.buffer = ve->vertex_buffer_index;
+ ctx->edgeflag.offset = ve->src_offset;
+ ctx->packet_vertex_limit = 1;
+ } else {
+ ctx->packet_vertex_limit = nvc0->vertex->vtx_per_packet_max;
+ if (unlikely(ctx->need_vertex_id))
+ ctx->packet_vertex_limit = 1;
+ }
+
+ ctx->vertex_words = nvc0->vertex->vtx_size;
+}
+
+static INLINE void
+set_edgeflag(struct push_context *ctx, unsigned vtx_id)
+{
+ float f = *(float *)(ctx->edgeflag.data + vtx_id * ctx->edgeflag.stride);
+
+ if (ctx->edgeflag.value != f) {
+ ctx->edgeflag.value = f;
+ IMMED_NVC0(ctx->push, NVC0_3D(EDGEFLAG), f ? 1 : 0);
+ }
+}
+
+static INLINE void
+set_vertexid(struct push_context *ctx, uint32_t vtx_id)
+{
+#if 0
+ BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_ID), 1); /* broken on nvc0 */
+#else
+ BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_DATA), 1); /* as last attribute */
+#endif
+ PUSH_DATA (ctx->push, vtx_id);
+}
+
+static INLINE unsigned
+prim_restart_search_i08(uint8_t *elts, unsigned push, uint8_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i16(uint16_t *elts, unsigned push, uint16_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i32(uint32_t *elts, unsigned push, uint32_t index)
+{
+ unsigned i;
+ for (i = 0; i < push; ++i)
+ if (elts[i] == index)
+ break;
+ return i;
+}
+
+static void
+emit_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i08(elts, push, ctx->restart_index);
+
+ if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr))
+ set_edgeflag(ctx, elts[0]);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts8(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+ ctx->push->cur += size;
+
+ if (unlikely(ctx->need_vertex_id) && likely(size))
+ set_vertexid(ctx, elts[0]);
+
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2);
+ PUSH_DATA (ctx->push, 0);
+ PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
+ }
+ }
+}
+
+static void
+emit_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i16(elts, push, ctx->restart_index);
+
+ if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr))
+ set_edgeflag(ctx, elts[0]);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts16(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+ ctx->push->cur += size;
+
+ if (unlikely(ctx->need_vertex_id))
+ set_vertexid(ctx, elts[0]);
+
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2);
+ PUSH_DATA (ctx->push, 0);
+ PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
+ }
+ }
+}
+
+static void
+emit_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start;
+
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size, nr;
+
+ nr = push;
+ if (ctx->primitive_restart)
+ nr = prim_restart_search_i32(elts, push, ctx->restart_index);
+
+ if (unlikely(ctx->edgeflag.buffer >= 0) && likely(nr))
+ set_edgeflag(ctx, elts[0]);
+
+ size = ctx->vertex_words * nr;
+
+ BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size);
+
+ ctx->translate->run_elts(ctx->translate, elts, nr, 0, ctx->instance_id,
+ ctx->push->cur);
+ ctx->push->cur += size;
+
+ if (unlikely(ctx->need_vertex_id))
+ set_vertexid(ctx, elts[0]);
+
+ count -= nr;
+ elts += nr;
+
+ if (nr != push) {
+ count--;
+ elts++;
+ BEGIN_NVC0(ctx->push, NVC0_3D(VERTEX_END_GL), 2);
+ PUSH_DATA (ctx->push, 0);
+ PUSH_DATA (ctx->push, NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_CONT |
+ (ctx->prim & ~NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT));
+ }
+ }
+}
+
+static void
+emit_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ while (count) {
+ unsigned push = MIN2(count, ctx->packet_vertex_limit);
+ unsigned size = ctx->vertex_words * push;
+
+ if (unlikely(ctx->edgeflag.buffer >= 0))
+ set_edgeflag(ctx, start);
+
+ BEGIN_NIC0(ctx->push, NVC0_3D(VERTEX_DATA), size);
+
+ ctx->translate->run(ctx->translate, start, push, 0, ctx->instance_id,
+ ctx->push->cur);
+ ctx->push->cur += size;
+
+ if (unlikely(ctx->need_vertex_id))
+ set_vertexid(ctx, start);
+
+ count -= push;
+ start += push;
+ }
+}
+
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ break;
+ }
+}
+
+void
+nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, index_size;
+ unsigned inst_count = info->instance_count;
+ unsigned vert_count = info->count;
+ boolean apply_bias = info->indexed && info->index_bias;
+
+ init_push_context(nvc0, &ctx);
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ uint8_t *data;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
+ struct nv04_resource *res = nv04_resource(vb->buffer);
+
+ data = nouveau_resource_map_offset(&nvc0->base, res,
+ vb->buffer_offset, NOUVEAU_BO_RD);
+
+ if (apply_bias && likely(!(nvc0->vertex->instance_bufs & (1 << i))))
+ data += info->index_bias * vb->stride;
+
+ ctx.translate->set_buffer(ctx.translate, i, data, vb->stride, ~0);
+
+ if (unlikely(i == ctx.edgeflag.buffer)) {
+ ctx.edgeflag.data = data + ctx.edgeflag.offset;
+ ctx.edgeflag.stride = vb->stride;
+ }
+ }
+
+ if (info->indexed) {
+ ctx.idxbuf =
+ nouveau_resource_map_offset(&nvc0->base,
+ nv04_resource(nvc0->idxbuf.buffer),
+ nvc0->idxbuf.offset, NOUVEAU_BO_RD);
+ if (!ctx.idxbuf)
+ return;
+ index_size = nvc0->idxbuf.index_size;
+ ctx.primitive_restart = info->primitive_restart;
+ ctx.restart_index = info->restart_index;
+ } else {
+ ctx.idxbuf = NULL;
+ index_size = 0;
+ ctx.primitive_restart = FALSE;
+ ctx.restart_index = 0;
+
+ if (info->count_from_stream_output) {
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct nvc0_so_target *targ;
+ targ = nvc0_so_target(info->count_from_stream_output);
+ pipe->get_query_result(pipe, targ->pq, TRUE, (void*)&vert_count);
+ vert_count /= targ->stride;
+ }
+ }
+
+ ctx.instance_id = info->start_instance;
+ ctx.prim = nvc0_prim_gl(info->mode);
+
+ if (unlikely(ctx.need_vertex_id)) {
+ const unsigned a = nvc0->vertex->num_elements;
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1);
+ PUSH_DATA (ctx.push, (a << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32);
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 1);
+ PUSH_DATA (ctx.push, (((0x80 + a * 0x10) / 4) << 4) | 1);
+ }
+
+ while (inst_count--) {
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (ctx.push, ctx.prim);
+ switch (index_size) {
+ case 0:
+ emit_vertices_seq(&ctx, info->start, vert_count);
+ break;
+ case 1:
+ emit_vertices_i08(&ctx, info->start, vert_count);
+ break;
+ case 2:
+ emit_vertices_i16(&ctx, info->start, vert_count);
+ break;
+ case 4:
+ emit_vertices_i32(&ctx, info->start, vert_count);
+ break;
+ default:
+ assert(0);
+ break;
+ }
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0);
+
+ ctx.instance_id++;
+ ctx.prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+
+ if (unlikely(ctx.edgeflag.value == 0.0f))
+ IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1);
+
+ if (unlikely(ctx.need_vertex_id)) {
+ const unsigned a = nvc0->vertex->num_elements;
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0);
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1);
+ PUSH_DATA (ctx.push,
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32);
+ }
+
+ if (info->indexed)
+ nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer));
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
+ nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer));
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_query.c b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
new file mode 100644
index 0000000..21aa358
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_query.c
@@ -0,0 +1,1448 @@
+/*
+ * Copyright 2011 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
+ */
+
+#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "nvc0/nvc0_context.h"
+#include "nv_object.xml.h"
+#include "nvc0/nve4_compute.xml.h"
+#include "nvc0/nvc0_compute.xml.h"
+
+#define NVC0_QUERY_STATE_READY 0
+#define NVC0_QUERY_STATE_ACTIVE 1
+#define NVC0_QUERY_STATE_ENDED 2
+#define NVC0_QUERY_STATE_FLUSHED 3
+
+struct nvc0_query {
+ uint32_t *data;
+ uint16_t type;
+ uint16_t index;
+ int8_t ctr[4];
+ uint32_t sequence;
+ struct nouveau_bo *bo;
+ uint32_t base;
+ uint32_t offset; /* base + i * rotate */
+ uint8_t state;
+ boolean is64bit;
+ uint8_t rotate;
+ int nesting; /* only used for occlusion queries */
+ union {
+ struct nouveau_mm_allocation *mm;
+ uint64_t value;
+ } u;
+ struct nouveau_fence *fence;
+};
+
+#define NVC0_QUERY_ALLOC_SPACE 256
+
+static void nvc0_mp_pm_query_begin(struct nvc0_context *, struct nvc0_query *);
+static void nvc0_mp_pm_query_end(struct nvc0_context *, struct nvc0_query *);
+static boolean nvc0_mp_pm_query_result(struct nvc0_context *,
+ struct nvc0_query *, void *, boolean);
+
+static INLINE struct nvc0_query *
+nvc0_query(struct pipe_query *pipe)
+{
+ return (struct nvc0_query *)pipe;
+}
+
+static boolean
+nvc0_query_allocate(struct nvc0_context *nvc0, struct nvc0_query *q, int size)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ int ret;
+
+ if (q->bo) {
+ nouveau_bo_ref(NULL, &q->bo);
+ if (q->u.mm) {
+ if (q->state == NVC0_QUERY_STATE_READY)
+ nouveau_mm_free(q->u.mm);
+ else
+ nouveau_fence_work(screen->base.fence.current,
+ nouveau_mm_free_work, q->u.mm);
+ }
+ }
+ if (size) {
+ q->u.mm = nouveau_mm_allocate(screen->base.mm_GART, size, &q->bo, &q->base);
+ if (!q->bo)
+ return FALSE;
+ q->offset = q->base;
+
+ ret = nouveau_bo_map(q->bo, 0, screen->base.client);
+ if (ret) {
+ nvc0_query_allocate(nvc0, q, 0);
+ return FALSE;
+ }
+ q->data = (uint32_t *)((uint8_t *)q->bo->map + q->base);
+ }
+ return TRUE;
+}
+
+static void
+nvc0_query_destroy(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ nvc0_query_allocate(nvc0_context(pipe), nvc0_query(pq), 0);
+ nouveau_fence_ref(NULL, &nvc0_query(pq)->fence);
+ FREE(nvc0_query(pq));
+}
+
+static struct pipe_query *
+nvc0_query_create(struct pipe_context *pipe, unsigned type)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_query *q;
+ unsigned space = NVC0_QUERY_ALLOC_SPACE;
+
+ q = CALLOC_STRUCT(nvc0_query);
+ if (!q)
+ return NULL;
+
+ switch (type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ q->rotate = 32;
+ space = NVC0_QUERY_ALLOC_SPACE;
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ q->is64bit = TRUE;
+ space = 512;
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ q->is64bit = TRUE;
+ space = 64;
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ q->is64bit = TRUE;
+ space = 32;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ case PIPE_QUERY_GPU_FINISHED:
+ space = 32;
+ break;
+ case NVC0_QUERY_TFB_BUFFER_OFFSET:
+ space = 16;
+ break;
+ default:
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (type >= NVC0_QUERY_DRV_STAT(0) && type <= NVC0_QUERY_DRV_STAT_LAST) {
+ space = 0;
+ q->is64bit = true;
+ q->index = type - NVC0_QUERY_DRV_STAT(0);
+ break;
+ } else
+#endif
+ if (nvc0->screen->base.device->drm_version >= 0x01000101) {
+ if (type >= NVE4_PM_QUERY(0) && type <= NVE4_PM_QUERY_LAST) {
+ /* for each MP:
+ * [00] = WS0.C0
+ * [04] = WS0.C1
+ * [08] = WS0.C2
+ * [0c] = WS0.C3
+ * [10] = WS1.C0
+ * [14] = WS1.C1
+ * [18] = WS1.C2
+ * [1c] = WS1.C3
+ * [20] = WS2.C0
+ * [24] = WS2.C1
+ * [28] = WS2.C2
+ * [2c] = WS2.C3
+ * [30] = WS3.C0
+ * [34] = WS3.C1
+ * [38] = WS3.C2
+ * [3c] = WS3.C3
+ * [40] = MP.C4
+ * [44] = MP.C5
+ * [48] = MP.C6
+ * [4c] = MP.C7
+ * [50] = WS0.sequence
+ * [54] = WS1.sequence
+ * [58] = WS2.sequence
+ * [5c] = WS3.sequence
+ */
+ space = (4 * 4 + 4 + 4) * nvc0->screen->mp_count * sizeof(uint32_t);
+ break;
+ } else
+ if (type >= NVC0_PM_QUERY(0) && type <= NVC0_PM_QUERY_LAST) {
+ /* for each MP:
+ * [00] = MP.C0
+ * [04] = MP.C1
+ * [08] = MP.C2
+ * [0c] = MP.C3
+ * [10] = MP.C4
+ * [14] = MP.C5
+ * [18] = MP.C6
+ * [1c] = MP.C7
+ * [20] = MP.sequence
+ */
+ space = (8 + 1) * nvc0->screen->mp_count * sizeof(uint32_t);
+ break;
+ }
+ }
+ debug_printf("invalid query type: %u\n", type);
+ FREE(q);
+ return NULL;
+ }
+ if (!nvc0_query_allocate(nvc0, q, space)) {
+ FREE(q);
+ return NULL;
+ }
+
+ q->type = type;
+
+ if (q->rotate) {
+ /* we advance before query_begin ! */
+ q->offset -= q->rotate;
+ q->data -= q->rotate / sizeof(*q->data);
+ } else
+ if (!q->is64bit)
+ q->data[0] = 0; /* initialize sequence */
+
+ return (struct pipe_query *)q;
+}
+
+static void
+nvc0_query_get(struct nouveau_pushbuf *push, struct nvc0_query *q,
+ unsigned offset, uint32_t get)
+{
+ offset += q->offset;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_WR);
+ BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, q->bo->offset + offset);
+ PUSH_DATA (push, q->bo->offset + offset);
+ PUSH_DATA (push, q->sequence);
+ PUSH_DATA (push, get);
+}
+
+static void
+nvc0_query_rotate(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ q->offset += q->rotate;
+ q->data += q->rotate / sizeof(*q->data);
+ if (q->offset - q->base == NVC0_QUERY_ALLOC_SPACE)
+ nvc0_query_allocate(nvc0, q, NVC0_QUERY_ALLOC_SPACE);
+}
+
+static void
+nvc0_query_begin(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_query *q = nvc0_query(pq);
+
+ /* For occlusion queries we have to change the storage, because a previous
+ * query might set the initial render conition to FALSE even *after* we re-
+ * initialized it to TRUE.
+ */
+ if (q->rotate) {
+ nvc0_query_rotate(nvc0, q);
+
+ /* XXX: can we do this with the GPU, and sync with respect to a previous
+ * query ?
+ */
+ q->data[0] = q->sequence; /* initialize sequence */
+ q->data[1] = 1; /* initial render condition = TRUE */
+ q->data[4] = q->sequence + 1; /* for comparison COND_MODE */
+ q->data[5] = 0;
+ }
+ q->sequence++;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ q->nesting = nvc0->screen->num_occlusion_queries_active++;
+ if (q->nesting) {
+ nvc0_query_get(push, q, 0x10, 0x0100f002);
+ } else {
+ PUSH_SPACE(push, 3);
+ BEGIN_NVC0(push, NVC0_3D(COUNTER_RESET), 1);
+ PUSH_DATA (push, NVC0_3D_COUNTER_RESET_SAMPLECNT);
+ IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 1);
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nvc0_query_get(push, q, 0x10, 0x09005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nvc0_query_get(push, q, 0x10, 0x05805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nvc0_query_get(push, q, 0x20, 0x05805002 | (q->index << 5));
+ nvc0_query_get(push, q, 0x30, 0x06805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ nvc0_query_get(push, q, 0x10, 0x03005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_query_get(push, q, 0x10, 0x00005002);
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ nvc0_query_get(push, q, 0xc0 + 0x00, 0x00801002); /* VFETCH, VERTICES */
+ nvc0_query_get(push, q, 0xc0 + 0x10, 0x01801002); /* VFETCH, PRIMS */
+ nvc0_query_get(push, q, 0xc0 + 0x20, 0x02802002); /* VP, LAUNCHES */
+ nvc0_query_get(push, q, 0xc0 + 0x30, 0x03806002); /* GP, LAUNCHES */
+ nvc0_query_get(push, q, 0xc0 + 0x40, 0x04806002); /* GP, PRIMS_OUT */
+ nvc0_query_get(push, q, 0xc0 + 0x50, 0x07804002); /* RAST, PRIMS_IN */
+ nvc0_query_get(push, q, 0xc0 + 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+ nvc0_query_get(push, q, 0xc0 + 0x70, 0x0980a002); /* ROP, PIXELS */
+ nvc0_query_get(push, q, 0xc0 + 0x80, 0x0d808002); /* TCP, LAUNCHES */
+ nvc0_query_get(push, q, 0xc0 + 0x90, 0x0e809002); /* TEP, LAUNCHES */
+ break;
+ default:
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
+ q->type <= NVC0_QUERY_DRV_STAT_LAST) {
+ if (q->index >= 5)
+ q->u.value = nvc0->screen->base.stats.v[q->index];
+ else
+ q->u.value = 0;
+ } else
+#endif
+ if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
+ (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
+ nvc0_mp_pm_query_begin(nvc0, q);
+ }
+ break;
+ }
+ q->state = NVC0_QUERY_STATE_ACTIVE;
+}
+
+static void
+nvc0_query_end(struct pipe_context *pipe, struct pipe_query *pq)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_query *q = nvc0_query(pq);
+
+ if (q->state != NVC0_QUERY_STATE_ACTIVE) {
+ /* some queries don't require 'begin' to be called (e.g. GPU_FINISHED) */
+ if (q->rotate)
+ nvc0_query_rotate(nvc0, q);
+ q->sequence++;
+ }
+ q->state = NVC0_QUERY_STATE_ENDED;
+
+ switch (q->type) {
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ nvc0_query_get(push, q, 0, 0x0100f002);
+ if (--nvc0->screen->num_occlusion_queries_active == 0) {
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(SAMPLECNT_ENABLE), 0);
+ }
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED:
+ nvc0_query_get(push, q, 0, 0x09005002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_PRIMITIVES_EMITTED:
+ nvc0_query_get(push, q, 0, 0x05805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ nvc0_query_get(push, q, 0x00, 0x05805002 | (q->index << 5));
+ nvc0_query_get(push, q, 0x10, 0x06805002 | (q->index << 5));
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ /* TODO: How do we sum over all streams for render condition ? */
+ /* PRIMS_DROPPED doesn't write sequence, use a ZERO query to sync on */
+ nvc0_query_get(push, q, 0x00, 0x03005002 | (q->index << 5));
+ nvc0_query_get(push, q, 0x20, 0x00005002);
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ case PIPE_QUERY_TIME_ELAPSED:
+ nvc0_query_get(push, q, 0, 0x00005002);
+ break;
+ case PIPE_QUERY_GPU_FINISHED:
+ nvc0_query_get(push, q, 0, 0x1000f010);
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ nvc0_query_get(push, q, 0x00, 0x00801002); /* VFETCH, VERTICES */
+ nvc0_query_get(push, q, 0x10, 0x01801002); /* VFETCH, PRIMS */
+ nvc0_query_get(push, q, 0x20, 0x02802002); /* VP, LAUNCHES */
+ nvc0_query_get(push, q, 0x30, 0x03806002); /* GP, LAUNCHES */
+ nvc0_query_get(push, q, 0x40, 0x04806002); /* GP, PRIMS_OUT */
+ nvc0_query_get(push, q, 0x50, 0x07804002); /* RAST, PRIMS_IN */
+ nvc0_query_get(push, q, 0x60, 0x08804002); /* RAST, PRIMS_OUT */
+ nvc0_query_get(push, q, 0x70, 0x0980a002); /* ROP, PIXELS */
+ nvc0_query_get(push, q, 0x80, 0x0d808002); /* TCP, LAUNCHES */
+ nvc0_query_get(push, q, 0x90, 0x0e809002); /* TEP, LAUNCHES */
+ break;
+ case NVC0_QUERY_TFB_BUFFER_OFFSET:
+ /* indexed by TFB buffer instead of by vertex stream */
+ nvc0_query_get(push, q, 0x00, 0x0d005002 | (q->index << 5));
+ break;
+ default:
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
+ q->type <= NVC0_QUERY_DRV_STAT_LAST) {
+ q->u.value = nvc0->screen->base.stats.v[q->index] - q->u.value;
+ return;
+ } else
+#endif
+ if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
+ (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
+ nvc0_mp_pm_query_end(nvc0, q);
+ }
+ break;
+ }
+ if (q->is64bit)
+ nouveau_fence_ref(nvc0->screen->base.fence.current, &q->fence);
+}
+
+static INLINE void
+nvc0_query_update(struct nouveau_client *cli, struct nvc0_query *q)
+{
+ if (q->is64bit) {
+ if (nouveau_fence_signalled(q->fence))
+ q->state = NVC0_QUERY_STATE_READY;
+ } else {
+ if (q->data[0] == q->sequence)
+ q->state = NVC0_QUERY_STATE_READY;
+ }
+}
+
+static boolean
+nvc0_query_result(struct pipe_context *pipe, struct pipe_query *pq,
+ boolean wait, union pipe_query_result *result)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nvc0_query *q = nvc0_query(pq);
+ uint64_t *res64 = (uint64_t*)result;
+ uint32_t *res32 = (uint32_t*)result;
+ boolean *res8 = (boolean*)result;
+ uint64_t *data64 = (uint64_t *)q->data;
+ unsigned i;
+
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (q->type >= NVC0_QUERY_DRV_STAT(0) &&
+ q->type <= NVC0_QUERY_DRV_STAT_LAST) {
+ res64[0] = q->u.value;
+ return TRUE;
+ } else
+#endif
+ if ((q->type >= NVE4_PM_QUERY(0) && q->type <= NVE4_PM_QUERY_LAST) ||
+ (q->type >= NVC0_PM_QUERY(0) && q->type <= NVC0_PM_QUERY_LAST)) {
+ return nvc0_mp_pm_query_result(nvc0, q, result, wait);
+ }
+
+ if (q->state != NVC0_QUERY_STATE_READY)
+ nvc0_query_update(nvc0->screen->base.client, q);
+
+ if (q->state != NVC0_QUERY_STATE_READY) {
+ if (!wait) {
+ if (q->state != NVC0_QUERY_STATE_FLUSHED) {
+ q->state = NVC0_QUERY_STATE_FLUSHED;
+ /* flush for silly apps that spin on GL_QUERY_RESULT_AVAILABLE */
+ PUSH_KICK(nvc0->base.pushbuf);
+ }
+ return FALSE;
+ }
+ if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->screen->base.client))
+ return FALSE;
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, query_sync_count, 1);
+ }
+ q->state = NVC0_QUERY_STATE_READY;
+
+ switch (q->type) {
+ case PIPE_QUERY_GPU_FINISHED:
+ res8[0] = TRUE;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER: /* u32 sequence, u32 count, u64 time */
+ res64[0] = q->data[1] - q->data[5];
+ break;
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ res8[0] = q->data[1] != q->data[5];
+ break;
+ case PIPE_QUERY_PRIMITIVES_GENERATED: /* u64 count, u64 time */
+ case PIPE_QUERY_PRIMITIVES_EMITTED: /* u64 count, u64 time */
+ res64[0] = data64[0] - data64[2];
+ break;
+ case PIPE_QUERY_SO_STATISTICS:
+ res64[0] = data64[0] - data64[4];
+ res64[1] = data64[2] - data64[6];
+ break;
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ res8[0] = data64[0] != data64[2];
+ break;
+ case PIPE_QUERY_TIMESTAMP:
+ res64[0] = data64[1];
+ break;
+ case PIPE_QUERY_TIMESTAMP_DISJOINT:
+ res64[0] = 1000000000;
+ res8[8] = FALSE;
+ break;
+ case PIPE_QUERY_TIME_ELAPSED:
+ res64[0] = data64[1] - data64[3];
+ break;
+ case PIPE_QUERY_PIPELINE_STATISTICS:
+ for (i = 0; i < 10; ++i)
+ res64[i] = data64[i * 2] - data64[24 + i * 2];
+ break;
+ case NVC0_QUERY_TFB_BUFFER_OFFSET:
+ res32[0] = q->data[1];
+ break;
+ default:
+ assert(0); /* can't happen, we don't create queries with invalid type */
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
+void
+nvc0_query_fifo_wait(struct nouveau_pushbuf *push, struct pipe_query *pq)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+ unsigned offset = q->offset;
+
+ if (q->type == PIPE_QUERY_SO_OVERFLOW_PREDICATE) offset += 0x20;
+
+ PUSH_SPACE(push, 5);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_NVC0(push, SUBC_3D(NV84_SUBCHAN_SEMAPHORE_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, q->bo->offset + offset);
+ PUSH_DATA (push, q->bo->offset + offset);
+ PUSH_DATA (push, q->sequence);
+ PUSH_DATA (push, (1 << 12) |
+ NV84_SUBCHAN_SEMAPHORE_TRIGGER_ACQUIRE_EQUAL);
+}
+
+static void
+nvc0_render_condition(struct pipe_context *pipe,
+ struct pipe_query *pq,
+ boolean condition, uint mode)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_query *q;
+ uint32_t cond;
+ boolean negated = FALSE;
+ boolean wait =
+ mode != PIPE_RENDER_COND_NO_WAIT &&
+ mode != PIPE_RENDER_COND_BY_REGION_NO_WAIT;
+
+ nvc0->cond_query = pq;
+ nvc0->cond_cond = condition;
+ nvc0->cond_mode = mode;
+
+ if (!pq) {
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+ return;
+ }
+ q = nvc0_query(pq);
+
+ /* NOTE: comparison of 2 queries only works if both have completed */
+ switch (q->type) {
+ case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
+ cond = negated ? NVC0_3D_COND_MODE_EQUAL :
+ NVC0_3D_COND_MODE_NOT_EQUAL;
+ wait = TRUE;
+ break;
+ case PIPE_QUERY_OCCLUSION_COUNTER:
+ case PIPE_QUERY_OCCLUSION_PREDICATE:
+ if (likely(!negated)) {
+ if (unlikely(q->nesting))
+ cond = wait ? NVC0_3D_COND_MODE_NOT_EQUAL :
+ NVC0_3D_COND_MODE_ALWAYS;
+ else
+ cond = NVC0_3D_COND_MODE_RES_NON_ZERO;
+ } else {
+ cond = wait ? NVC0_3D_COND_MODE_EQUAL : NVC0_3D_COND_MODE_ALWAYS;
+ }
+ break;
+ default:
+ assert(!"render condition query not a predicate");
+ mode = NVC0_3D_COND_MODE_ALWAYS;
+ break;
+ }
+
+ if (wait)
+ nvc0_query_fifo_wait(push, pq);
+
+ PUSH_SPACE(push, 4);
+ PUSH_REFN (push, q->bo, NOUVEAU_BO_GART | NOUVEAU_BO_RD);
+ BEGIN_NVC0(push, NVC0_3D(COND_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, q->bo->offset + q->offset);
+ PUSH_DATA (push, q->bo->offset + q->offset);
+ PUSH_DATA (push, cond);
+}
+
+void
+nvc0_query_pushbuf_submit(struct nouveau_pushbuf *push,
+ struct pipe_query *pq, unsigned result_offset)
+{
+ struct nvc0_query *q = nvc0_query(pq);
+
+#define NVC0_IB_ENTRY_1_NO_PREFETCH (1 << (31 - 8))
+
+ nouveau_pushbuf_space(push, 0, 0, 1);
+ nouveau_pushbuf_data(push, q->bo, q->offset + result_offset, 4 |
+ NVC0_IB_ENTRY_1_NO_PREFETCH);
+}
+
+void
+nvc0_so_target_save_offset(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg,
+ unsigned index, boolean *serialize)
+{
+ struct nvc0_so_target *targ = nvc0_so_target(ptarg);
+
+ if (*serialize) {
+ *serialize = FALSE;
+ PUSH_SPACE(nvc0_context(pipe)->base.pushbuf, 1);
+ IMMED_NVC0(nvc0_context(pipe)->base.pushbuf, NVC0_3D(SERIALIZE), 0);
+
+ NOUVEAU_DRV_STAT(nouveau_screen(pipe->screen), gpu_serialize_count, 1);
+ }
+
+ nvc0_query(targ->pq)->index = index;
+
+ nvc0_query_end(pipe, targ->pq);
+}
+
+
+/* === DRIVER STATISTICS === */
+
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+
+static const char *nvc0_drv_stat_names[] =
+{
+ "drv-tex_obj_current_count",
+ "drv-tex_obj_current_bytes",
+ "drv-buf_obj_current_count",
+ "drv-buf_obj_current_bytes_vid",
+ "drv-buf_obj_current_bytes_sys",
+ "drv-tex_transfers_rd",
+ "drv-tex_transfers_wr",
+ "drv-tex_copy_count",
+ "drv-tex_blit_count",
+ "drv-tex_cache_flush_count",
+ "drv-buf_transfers_rd",
+ "drv-buf_transfers_wr",
+ "drv-buf_read_bytes_staging_vid",
+ "drv-buf_write_bytes_direct",
+ "drv-buf_write_bytes_staging_vid",
+ "drv-buf_write_bytes_staging_sys",
+ "drv-buf_copy_bytes",
+ "drv-buf_non_kernel_fence_sync_count",
+ "drv-any_non_kernel_fence_sync_count",
+ "drv-query_sync_count",
+ "drv-gpu_serialize_count",
+ "drv-draw_calls_array",
+ "drv-draw_calls_indexed",
+ "drv-draw_calls_fallback_count",
+ "drv-user_buffer_upload_bytes",
+ "drv-constbuf_upload_count",
+ "drv-constbuf_upload_bytes",
+ "drv-pushbuf_count",
+ "drv-resource_validate_count"
+};
+
+#endif /* NOUVEAU_ENABLE_DRIVER_STATISTICS */
+
+
+/* === PERFORMANCE MONITORING COUNTERS for NVE4+ === */
+
+/* Code to read out MP counters: They are accessible via mmio, too, but let's
+ * just avoid mapping registers in userspace. We'd have to know which MPs are
+ * enabled/present, too, and that information is not presently exposed.
+ * We could add a kernel interface for it, but reading the counters like this
+ * has the advantage of being async (if get_result isn't called immediately).
+ */
+static const uint64_t nve4_read_mp_pm_counters_code[] =
+{
+ /* sched 0x20 0x20 0x20 0x20 0x20 0x20 0x20
+ * mov b32 $r8 $tidx
+ * mov b32 $r12 $physid
+ * mov b32 $r0 $pm0
+ * mov b32 $r1 $pm1
+ * mov b32 $r2 $pm2
+ * mov b32 $r3 $pm3
+ * mov b32 $r4 $pm4
+ * sched 0x20 0x20 0x23 0x04 0x20 0x04 0x2b
+ * mov b32 $r5 $pm5
+ * mov b32 $r6 $pm6
+ * mov b32 $r7 $pm7
+ * set $p0 0x1 eq u32 $r8 0x0
+ * mov b32 $r10 c0[0x0]
+ * ext u32 $r8 $r12 0x414
+ * mov b32 $r11 c0[0x4]
+ * sched 0x04 0x2e 0x04 0x20 0x20 0x28 0x04
+ * ext u32 $r9 $r12 0x208
+ * (not $p0) exit
+ * set $p1 0x1 eq u32 $r9 0x0
+ * mul $r8 u32 $r8 u32 96
+ * mul $r12 u32 $r9 u32 16
+ * mul $r13 u32 $r9 u32 4
+ * add b32 $r9 $r8 $r13
+ * sched 0x28 0x04 0x2c 0x04 0x2c 0x04 0x2c
+ * add b32 $r8 $r8 $r12
+ * mov b32 $r12 $r10
+ * add b32 $r10 $c $r10 $r8
+ * mov b32 $r13 $r11
+ * add b32 $r11 $r11 0x0 $c
+ * add b32 $r12 $c $r12 $r9
+ * st b128 wt g[$r10d] $r0q
+ * sched 0x4 0x2c 0x20 0x04 0x2e 0x00 0x00
+ * mov b32 $r0 c0[0x8]
+ * add b32 $r13 $r13 0x0 $c
+ * $p1 st b128 wt g[$r12d+0x40] $r4q
+ * st b32 wt g[$r12d+0x50] $r0
+ * exit */
+ 0x2202020202020207ULL,
+ 0x2c00000084021c04ULL,
+ 0x2c0000000c031c04ULL,
+ 0x2c00000010001c04ULL,
+ 0x2c00000014005c04ULL,
+ 0x2c00000018009c04ULL,
+ 0x2c0000001c00dc04ULL,
+ 0x2c00000020011c04ULL,
+ 0x22b0420042320207ULL,
+ 0x2c00000024015c04ULL,
+ 0x2c00000028019c04ULL,
+ 0x2c0000002c01dc04ULL,
+ 0x190e0000fc81dc03ULL,
+ 0x2800400000029de4ULL,
+ 0x7000c01050c21c03ULL,
+ 0x280040001002dde4ULL,
+ 0x204282020042e047ULL,
+ 0x7000c00820c25c03ULL,
+ 0x80000000000021e7ULL,
+ 0x190e0000fc93dc03ULL,
+ 0x1000000180821c02ULL,
+ 0x1000000040931c02ULL,
+ 0x1000000010935c02ULL,
+ 0x4800000034825c03ULL,
+ 0x22c042c042c04287ULL,
+ 0x4800000030821c03ULL,
+ 0x2800000028031de4ULL,
+ 0x4801000020a29c03ULL,
+ 0x280000002c035de4ULL,
+ 0x0800000000b2dc42ULL,
+ 0x4801000024c31c03ULL,
+ 0x9400000000a01fc5ULL,
+ 0x200002e04202c047ULL,
+ 0x2800400020001de4ULL,
+ 0x0800000000d35c42ULL,
+ 0x9400000100c107c5ULL,
+ 0x9400000140c01f85ULL,
+ 0x8000000000001de7ULL
+};
+
+/* NOTE: intentionally using the same names as NV */
+static const char *nve4_pm_query_names[] =
+{
+ /* MP counters */
+ "prof_trigger_00",
+ "prof_trigger_01",
+ "prof_trigger_02",
+ "prof_trigger_03",
+ "prof_trigger_04",
+ "prof_trigger_05",
+ "prof_trigger_06",
+ "prof_trigger_07",
+ "warps_launched",
+ "threads_launched",
+ "sm_cta_launched",
+ "inst_issued1",
+ "inst_issued2",
+ "inst_executed",
+ "local_load",
+ "local_store",
+ "shared_load",
+ "shared_store",
+ "l1_local_load_hit",
+ "l1_local_load_miss",
+ "l1_local_store_hit",
+ "l1_local_store_miss",
+ "gld_request",
+ "gst_request",
+ "l1_global_load_hit",
+ "l1_global_load_miss",
+ "uncached_global_load_transaction",
+ "global_store_transaction",
+ "branch",
+ "divergent_branch",
+ "active_warps",
+ "active_cycles",
+ "inst_issued",
+ "atom_count",
+ "gred_count",
+ "shared_load_replay",
+ "shared_store_replay",
+ "local_load_transactions",
+ "local_store_transactions",
+ "l1_shared_load_transactions",
+ "l1_shared_store_transactions",
+ "global_ld_mem_divergence_replays",
+ "global_st_mem_divergence_replays",
+ /* metrics, i.e. functions of the MP counters */
+ "metric-ipc", /* inst_executed, clock */
+ "metric-ipac", /* inst_executed, active_cycles */
+ "metric-ipec", /* inst_executed, (bool)inst_executed */
+ "metric-achieved_occupancy", /* active_warps, active_cycles */
+ "metric-sm_efficiency", /* active_cycles, clock */
+ "metric-inst_replay_overhead" /* inst_issued, inst_executed */
+};
+
+/* For simplicity, we will allocate as many group slots as we allocate counter
+ * slots. This means that a single counter which wants to source from 2 groups
+ * will have to be declared as using 2 counter slots. This shouldn't really be
+ * a problem because such queries don't make much sense ... (unless someone is
+ * really creative).
+ */
+struct nvc0_mp_counter_cfg
+{
+ uint32_t func : 16; /* mask or 4-bit logic op (depending on mode) */
+ uint32_t mode : 4; /* LOGOP,B6,LOGOP_B6(_PULSE) */
+ uint32_t num_src : 3; /* number of sources (1 - 6, only for NVC0:NVE4) */
+ uint32_t sig_dom : 1; /* if 0, MP_PM_A (per warp-sched), if 1, MP_PM_B */
+ uint32_t sig_sel : 8; /* signal group */
+ uint64_t src_sel; /* signal selection for up to 6 sources (48 bit) */
+};
+
+#define NVC0_COUNTER_OPn_SUM 0
+#define NVC0_COUNTER_OPn_OR 1
+#define NVC0_COUNTER_OPn_AND 2
+#define NVC0_COUNTER_OP2_REL_SUM_MM 3 /* (sum(ctr0) - sum(ctr1)) / sum(ctr0) */
+#define NVC0_COUNTER_OP2_DIV_SUM_M0 4 /* sum(ctr0) / ctr1 of MP[0]) */
+#define NVC0_COUNTER_OP2_AVG_DIV_MM 5 /* avg(ctr0 / ctr1) */
+#define NVC0_COUNTER_OP2_AVG_DIV_M0 6 /* avg(ctr0) / ctr1 of MP[0]) */
+
+struct nvc0_mp_pm_query_cfg
+{
+ struct nvc0_mp_counter_cfg ctr[4];
+ uint8_t num_counters;
+ uint8_t op;
+ uint8_t norm[2]; /* normalization num,denom */
+};
+
+#define _Q1A(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
+#define _Q1B(n, f, m, g, s, nu, dn) [NVE4_PM_QUERY_##n] = { { { f, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g, s }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { nu, dn } }
+#define _M2A(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
+ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
+ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g1, s1 }, \
+ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
+#define _M2B(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
+ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g0, s0 }, \
+ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
+ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
+#define _M2AB(n, f0, m0, g0, s0, f1, m1, g1, s1, o, nu, dn) [NVE4_PM_QUERY_METRIC_##n] = { { \
+ { f0, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m0, 0, 0, NVE4_COMPUTE_MP_PM_A_SIGSEL_##g0, s0 }, \
+ { f1, NVE4_COMPUTE_MP_PM_FUNC_MODE_##m1, 0, 1, NVE4_COMPUTE_MP_PM_B_SIGSEL_##g1, s1 }, \
+ {}, {}, }, 2, NVC0_COUNTER_OP2_##o, { nu, dn } }
+
+/* NOTES:
+ * active_warps: bit 0 alternates btw 0 and 1 for odd nr of warps
+ * inst_executed etc.: we only count a single warp scheduler
+ * metric-ipXc: we simply multiply by 4 to account for the 4 warp schedulers;
+ * this is inaccurate !
+ */
+static const struct nvc0_mp_pm_query_cfg nve4_mp_pm_queries[] =
+{
+ _Q1A(PROF_TRIGGER_0, 0x0001, B6, USER, 0x00000000, 1, 1),
+ _Q1A(PROF_TRIGGER_1, 0x0001, B6, USER, 0x00000004, 1, 1),
+ _Q1A(PROF_TRIGGER_2, 0x0001, B6, USER, 0x00000008, 1, 1),
+ _Q1A(PROF_TRIGGER_3, 0x0001, B6, USER, 0x0000000c, 1, 1),
+ _Q1A(PROF_TRIGGER_4, 0x0001, B6, USER, 0x00000010, 1, 1),
+ _Q1A(PROF_TRIGGER_5, 0x0001, B6, USER, 0x00000014, 1, 1),
+ _Q1A(PROF_TRIGGER_6, 0x0001, B6, USER, 0x00000018, 1, 1),
+ _Q1A(PROF_TRIGGER_7, 0x0001, B6, USER, 0x0000001c, 1, 1),
+ _Q1A(LAUNCHED_WARPS, 0x0001, B6, LAUNCH, 0x00000004, 1, 1),
+ _Q1A(LAUNCHED_THREADS, 0x003f, B6, LAUNCH, 0x398a4188, 1, 1),
+ _Q1B(LAUNCHED_CTA, 0x0001, B6, WARP, 0x0000001c, 1, 1),
+ _Q1A(INST_ISSUED1, 0x0001, B6, ISSUE, 0x00000004, 1, 1),
+ _Q1A(INST_ISSUED2, 0x0001, B6, ISSUE, 0x00000008, 1, 1),
+ _Q1A(INST_ISSUED, 0x0003, B6, ISSUE, 0x00000104, 1, 1),
+ _Q1A(INST_EXECUTED, 0x0003, B6, EXEC, 0x00000398, 1, 1),
+ _Q1A(LD_SHARED, 0x0001, B6, LDST, 0x00000000, 1, 1),
+ _Q1A(ST_SHARED, 0x0001, B6, LDST, 0x00000004, 1, 1),
+ _Q1A(LD_LOCAL, 0x0001, B6, LDST, 0x00000008, 1, 1),
+ _Q1A(ST_LOCAL, 0x0001, B6, LDST, 0x0000000c, 1, 1),
+ _Q1A(GLD_REQUEST, 0x0001, B6, LDST, 0x00000010, 1, 1),
+ _Q1A(GST_REQUEST, 0x0001, B6, LDST, 0x00000014, 1, 1),
+ _Q1B(L1_LOCAL_LOAD_HIT, 0x0001, B6, L1, 0x00000000, 1, 1),
+ _Q1B(L1_LOCAL_LOAD_MISS, 0x0001, B6, L1, 0x00000004, 1, 1),
+ _Q1B(L1_LOCAL_STORE_HIT, 0x0001, B6, L1, 0x00000008, 1, 1),
+ _Q1B(L1_LOCAL_STORE_MISS, 0x0001, B6, L1, 0x0000000c, 1, 1),
+ _Q1B(L1_GLOBAL_LOAD_HIT, 0x0001, B6, L1, 0x00000010, 1, 1),
+ _Q1B(L1_GLOBAL_LOAD_MISS, 0x0001, B6, L1, 0x00000014, 1, 1),
+ _Q1B(GLD_TRANSACTIONS_UNCACHED, 0x0001, B6, MEM, 0x00000000, 1, 1),
+ _Q1B(GST_TRANSACTIONS, 0x0001, B6, MEM, 0x00000004, 1, 1),
+ _Q1A(BRANCH, 0x0001, B6, BRANCH, 0x0000000c, 1, 1),
+ _Q1A(BRANCH_DIVERGENT, 0x0001, B6, BRANCH, 0x00000010, 1, 1),
+ _Q1B(ACTIVE_WARPS, 0x003f, B6, WARP, 0x31483104, 2, 1),
+ _Q1B(ACTIVE_CYCLES, 0x0001, B6, WARP, 0x00000000, 1, 1),
+ _Q1A(ATOM_COUNT, 0x0001, B6, BRANCH, 0x00000000, 1, 1),
+ _Q1A(GRED_COUNT, 0x0001, B6, BRANCH, 0x00000008, 1, 1),
+ _Q1B(LD_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x00000008, 1, 1),
+ _Q1B(ST_SHARED_REPLAY, 0x0001, B6, REPLAY, 0x0000000c, 1, 1),
+ _Q1B(LD_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000000, 1, 1),
+ _Q1B(ST_LOCAL_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000004, 1, 1),
+ _Q1B(L1_LD_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x00000008, 1, 1),
+ _Q1B(L1_ST_SHARED_TRANSACTIONS, 0x0001, B6, TRANSACTION, 0x0000000c, 1, 1),
+ _Q1B(GLD_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000010, 1, 1),
+ _Q1B(GST_MEM_DIV_REPLAY, 0x0001, B6, REPLAY, 0x00000014, 1, 1),
+ _M2AB(IPC, 0x3, B6, EXEC, 0x398, 0xffff, LOGOP, WARP, 0x0, DIV_SUM_M0, 10, 1),
+ _M2AB(IPAC, 0x3, B6, EXEC, 0x398, 0x1, B6, WARP, 0x0, AVG_DIV_MM, 10, 1),
+ _M2A(IPEC, 0x3, B6, EXEC, 0x398, 0xe, LOGOP, EXEC, 0x398, AVG_DIV_MM, 10, 1),
+ _M2A(INST_REPLAY_OHEAD, 0x3, B6, ISSUE, 0x104, 0x3, B6, EXEC, 0x398, REL_SUM_MM, 100, 1),
+ _M2B(MP_OCCUPANCY, 0x3f, B6, WARP, 0x31483104, 0x01, B6, WARP, 0x0, AVG_DIV_MM, 200, 64),
+ _M2B(MP_EFFICIENCY, 0x01, B6, WARP, 0x0, 0xffff, LOGOP, WARP, 0x0, AVG_DIV_M0, 100, 1),
+};
+
+#undef _Q1A
+#undef _Q1B
+#undef _M2A
+#undef _M2B
+
+/* === PERFORMANCE MONITORING COUNTERS for NVC0:NVE4 === */
+static const uint64_t nvc0_read_mp_pm_counters_code[] =
+{
+ /* mov b32 $r8 $tidx
+ * mov b32 $r9 $physid
+ * mov b32 $r0 $pm0
+ * mov b32 $r1 $pm1
+ * mov b32 $r2 $pm2
+ * mov b32 $r3 $pm3
+ * mov b32 $r4 $pm4
+ * mov b32 $r5 $pm5
+ * mov b32 $r6 $pm6
+ * mov b32 $r7 $pm7
+ * set $p0 0x1 eq u32 $r8 0x0
+ * mov b32 $r10 c0[0x0]
+ * mov b32 $r11 c0[0x4]
+ * ext u32 $r8 $r9 0x414
+ * (not $p0) exit
+ * mul $r8 u32 $r8 u32 36
+ * add b32 $r10 $c $r10 $r8
+ * add b32 $r11 $r11 0x0 $c
+ * mov b32 $r8 c0[0x8]
+ * st b128 wt g[$r10d+0x00] $r0q
+ * st b128 wt g[$r10d+0x10] $r4q
+ * st b32 wt g[$r10d+0x20] $r8
+ * exit */
+ 0x2c00000084021c04ULL,
+ 0x2c0000000c025c04ULL,
+ 0x2c00000010001c04ULL,
+ 0x2c00000014005c04ULL,
+ 0x2c00000018009c04ULL,
+ 0x2c0000001c00dc04ULL,
+ 0x2c00000020011c04ULL,
+ 0x2c00000024015c04ULL,
+ 0x2c00000028019c04ULL,
+ 0x2c0000002c01dc04ULL,
+ 0x190e0000fc81dc03ULL,
+ 0x2800400000029de4ULL,
+ 0x280040001002dde4ULL,
+ 0x7000c01050921c03ULL,
+ 0x80000000000021e7ULL,
+ 0x1000000090821c02ULL,
+ 0x4801000020a29c03ULL,
+ 0x0800000000b2dc42ULL,
+ 0x2800400020021de4ULL,
+ 0x9400000000a01fc5ULL,
+ 0x9400000040a11fc5ULL,
+ 0x9400000080a21f85ULL,
+ 0x8000000000001de7ULL
+};
+
+static const char *nvc0_pm_query_names[] =
+{
+ /* MP counters */
+ "inst_executed",
+ "branch",
+ "divergent_branch",
+ "active_warps",
+ "active_cycles",
+ "warps_launched",
+ "threads_launched",
+ "shared_load",
+ "shared_store",
+ "local_load",
+ "local_store",
+ "gred_count",
+ "atom_count",
+ "gld_request",
+ "gst_request",
+ "inst_issued1_0",
+ "inst_issued1_1",
+ "inst_issued2_0",
+ "inst_issued2_1",
+ "thread_inst_executed_0",
+ "thread_inst_executed_1",
+ "thread_inst_executed_2",
+ "thread_inst_executed_3",
+ "prof_trigger_00",
+ "prof_trigger_01",
+ "prof_trigger_02",
+ "prof_trigger_03",
+ "prof_trigger_04",
+ "prof_trigger_05",
+ "prof_trigger_06",
+ "prof_trigger_07",
+};
+
+#define _Q(n, f, m, g, c, s0, s1, s2, s3, s4, s5) [NVC0_PM_QUERY_##n] = { { { f, NVC0_COMPUTE_MP_PM_OP_MODE_##m, c, 0, g, s0|(s1 << 8)|(s2 << 16)|(s3 << 24)|(s4##ULL << 32)|(s5##ULL << 40) }, {}, {}, {} }, 1, NVC0_COUNTER_OPn_SUM, { 1, 1 } }
+
+static const struct nvc0_mp_pm_query_cfg nvc0_mp_pm_queries[] =
+{
+ _Q(INST_EXECUTED, 0xaaaa, LOGOP, 0x2d, 3, 0x00, 0x11, 0x22, 0x00, 0x00, 0x00),
+ _Q(BRANCH, 0xaaaa, LOGOP, 0x1a, 2, 0x00, 0x11, 0x00, 0x00, 0x00, 0x00),
+ _Q(BRANCH_DIVERGENT, 0xaaaa, LOGOP, 0x19, 2, 0x20, 0x31, 0x00, 0x00, 0x00, 0x00),
+ _Q(ACTIVE_WARPS, 0xaaaa, LOGOP, 0x24, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
+ _Q(ACTIVE_CYCLES, 0xaaaa, LOGOP, 0x11, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(LAUNCHED_WARPS, 0xaaaa, LOGOP, 0x26, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(LAUNCHED_THREADS, 0xaaaa, LOGOP, 0x26, 6, 0x10, 0x21, 0x32, 0x43, 0x54, 0x65),
+ _Q(LD_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(ST_SHARED, 0xaaaa, LOGOP, 0x64, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(LD_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(ST_LOCAL, 0xaaaa, LOGOP, 0x64, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(GRED_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(ATOM_COUNT, 0xaaaa, LOGOP, 0x63, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(GLD_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(GST_REQUEST, 0xaaaa, LOGOP, 0x64, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(INST_ISSUED1_0, 0xaaaa, LOGOP, 0x7e, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(INST_ISSUED1_1, 0xaaaa, LOGOP, 0x7e, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(INST_ISSUED2_0, 0xaaaa, LOGOP, 0x7e, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(INST_ISSUED2_1, 0xaaaa, LOGOP, 0x7e, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(TH_INST_EXECUTED_0, 0xaaaa, LOGOP, 0xa3, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+ _Q(TH_INST_EXECUTED_1, 0xaaaa, LOGOP, 0xa5, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+ _Q(TH_INST_EXECUTED_2, 0xaaaa, LOGOP, 0xa4, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+ _Q(TH_INST_EXECUTED_3, 0xaaaa, LOGOP, 0xa6, 6, 0x00, 0x11, 0x22, 0x33, 0x44, 0x55),
+ _Q(PROF_TRIGGER_0, 0xaaaa, LOGOP, 0x01, 1, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_1, 0xaaaa, LOGOP, 0x01, 1, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_2, 0xaaaa, LOGOP, 0x01, 1, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_3, 0xaaaa, LOGOP, 0x01, 1, 0x30, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_4, 0xaaaa, LOGOP, 0x01, 1, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_5, 0xaaaa, LOGOP, 0x01, 1, 0x50, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_6, 0xaaaa, LOGOP, 0x01, 1, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00),
+ _Q(PROF_TRIGGER_7, 0xaaaa, LOGOP, 0x01, 1, 0x70, 0x00, 0x00, 0x00, 0x00, 0x00),
+};
+
+#undef _Q
+
+static const struct nvc0_mp_pm_query_cfg *
+nvc0_mp_pm_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+
+ if (screen->base.class_3d >= NVE4_3D_CLASS)
+ return &nve4_mp_pm_queries[q->type - PIPE_QUERY_DRIVER_SPECIFIC];
+ return &nvc0_mp_pm_queries[q->type - NVC0_PM_QUERY(0)];
+}
+
+void
+nvc0_mp_pm_query_begin(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const struct nvc0_mp_pm_query_cfg *cfg;
+ unsigned i, c;
+ unsigned num_ab[2] = { 0, 0 };
+
+ cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+
+ /* check if we have enough free counter slots */
+ for (i = 0; i < cfg->num_counters; ++i)
+ num_ab[cfg->ctr[i].sig_dom]++;
+
+ if (screen->pm.num_mp_pm_active[0] + num_ab[0] > 4 ||
+ screen->pm.num_mp_pm_active[1] + num_ab[1] > 4) {
+ NOUVEAU_ERR("Not enough free MP counter slots !\n");
+ return;
+ }
+
+ assert(cfg->num_counters <= 4);
+ PUSH_SPACE(push, 4 * 8 + 6);
+
+ if (!screen->pm.mp_counters_enabled) {
+ screen->pm.mp_counters_enabled = TRUE;
+ BEGIN_NVC0(push, SUBC_SW(0x06ac), 1);
+ PUSH_DATA (push, 0x1fcb);
+ }
+
+ /* set sequence field to 0 (used to check if result is available) */
+ for (i = 0; i < screen->mp_count; ++i)
+ q->data[i * 10 + 10] = 0;
+
+ for (i = 0; i < cfg->num_counters; ++i) {
+ const unsigned d = cfg->ctr[i].sig_dom;
+
+ if (!screen->pm.num_mp_pm_active[d]) {
+ uint32_t m = (1 << 22) | (1 << (7 + (8 * !d)));
+ if (screen->pm.num_mp_pm_active[!d])
+ m |= 1 << (7 + (8 * d));
+ BEGIN_NVC0(push, SUBC_SW(0x0600), 1);
+ PUSH_DATA (push, m);
+ }
+ screen->pm.num_mp_pm_active[d]++;
+
+ for (c = d * 4; c < (d * 4 + 4); ++c) {
+ if (!screen->pm.mp_counter[c]) {
+ q->ctr[i] = c;
+ screen->pm.mp_counter[c] = (struct pipe_query *)q;
+ break;
+ }
+ }
+ assert(c <= (d * 4 + 3)); /* must succeed, already checked for space */
+
+ /* configure and reset the counter(s) */
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ if (d == 0)
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_A_SIGSEL(c & 3)), 1);
+ else
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_B_SIGSEL(c & 3)), 1);
+ PUSH_DATA (push, cfg->ctr[i].sig_sel);
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SRCSEL(c)), 1);
+ PUSH_DATA (push, cfg->ctr[i].src_sel + 0x2108421 * (c & 3));
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_SET(c)), 1);
+ PUSH_DATA (push, 0);
+ } else {
+ unsigned s;
+
+ for (s = 0; s < cfg->ctr[i].num_src; s++) {
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SIGSEL(s)), 1);
+ PUSH_DATA (push, cfg->ctr[i].sig_sel);
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SRCSEL(s)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].src_sel >> (s * 8)) & 0xff);
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(s)), 1);
+ PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_SET(s)), 1);
+ PUSH_DATA (push, 0);
+ }
+ }
+ }
+}
+
+static void
+nvc0_mp_pm_query_end(struct nvc0_context *nvc0, struct nvc0_query *q)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const boolean is_nve4 = screen->base.class_3d >= NVE4_3D_CLASS;
+ uint32_t mask;
+ uint32_t input[3];
+ const uint block[3] = { 32, is_nve4 ? 4 : 1, 1 };
+ const uint grid[3] = { screen->mp_count, 1, 1 };
+ unsigned c;
+ const struct nvc0_mp_pm_query_cfg *cfg;
+
+ cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+
+ if (unlikely(!screen->pm.prog)) {
+ struct nvc0_program *prog = CALLOC_STRUCT(nvc0_program);
+ prog->type = PIPE_SHADER_COMPUTE;
+ prog->translated = TRUE;
+ prog->num_gprs = 14;
+ prog->parm_size = 12;
+ if (is_nve4) {
+ prog->code = (uint32_t *)nve4_read_mp_pm_counters_code;
+ prog->code_size = sizeof(nve4_read_mp_pm_counters_code);
+ } else {
+ prog->code = (uint32_t *)nvc0_read_mp_pm_counters_code;
+ prog->code_size = sizeof(nvc0_read_mp_pm_counters_code);
+ }
+ screen->pm.prog = prog;
+ }
+
+ /* disable all counting */
+ PUSH_SPACE(push, 8);
+ for (c = 0; c < 8; ++c)
+ if (screen->pm.mp_counter[c]) {
+ if (is_nve4) {
+ IMMED_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(c)), 0);
+ } else {
+ IMMED_NVC0(push, NVC0_COMPUTE(MP_PM_OP(c)), 0);
+ }
+ }
+ /* release counters for this query */
+ for (c = 0; c < 8; ++c) {
+ if (nvc0_query(screen->pm.mp_counter[c]) == q) {
+ screen->pm.num_mp_pm_active[c / 4]--;
+ screen->pm.mp_counter[c] = NULL;
+ }
+ }
+
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_QUERY, NOUVEAU_BO_GART | NOUVEAU_BO_WR,
+ q->bo);
+
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
+
+ pipe->bind_compute_state(pipe, screen->pm.prog);
+ input[0] = (q->bo->offset + q->base);
+ input[1] = (q->bo->offset + q->base) >> 32;
+ input[2] = q->sequence;
+ pipe->launch_grid(pipe, block, grid, 0, input);
+
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_QUERY);
+
+ /* re-activate other counters */
+ PUSH_SPACE(push, 16);
+ mask = 0;
+ for (c = 0; c < 8; ++c) {
+ unsigned i;
+ q = nvc0_query(screen->pm.mp_counter[c]);
+ if (!q)
+ continue;
+ cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+ for (i = 0; i < cfg->num_counters; ++i) {
+ if (mask & (1 << q->ctr[i]))
+ break;
+ mask |= 1 << q->ctr[i];
+ if (is_nve4) {
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_PM_FUNC(q->ctr[i])), 1);
+ } else {
+ BEGIN_NVC0(push, NVC0_COMPUTE(MP_PM_OP(q->ctr[i])), 1);
+ }
+ PUSH_DATA (push, (cfg->ctr[i].func << 4) | cfg->ctr[i].mode);
+ }
+ }
+}
+
+static INLINE boolean
+nvc0_mp_pm_query_read_data(uint32_t count[32][4],
+ struct nvc0_context *nvc0, boolean wait,
+ struct nvc0_query *q,
+ const struct nvc0_mp_pm_query_cfg *cfg,
+ unsigned mp_count)
+{
+ unsigned p, c;
+
+ for (p = 0; p < mp_count; ++p) {
+ const unsigned b = (0x24 / 4) * p;
+
+ for (c = 0; c < cfg->num_counters; ++c) {
+ if (q->data[b + 8] != q->sequence) {
+ if (!wait)
+ return FALSE;
+ if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
+ return FALSE;
+ }
+ count[p][c] = q->data[b + q->ctr[c]];
+ }
+ }
+ return TRUE;
+}
+
+static INLINE boolean
+nve4_mp_pm_query_read_data(uint32_t count[32][4],
+ struct nvc0_context *nvc0, boolean wait,
+ struct nvc0_query *q,
+ const struct nvc0_mp_pm_query_cfg *cfg,
+ unsigned mp_count)
+{
+ unsigned p, c, d;
+
+ for (p = 0; p < mp_count; ++p) {
+ const unsigned b = (0x60 / 4) * p;
+
+ for (c = 0; c < cfg->num_counters; ++c) {
+ count[p][c] = 0;
+ for (d = 0; d < ((q->ctr[c] & ~3) ? 1 : 4); ++d) {
+ if (q->data[b + 20 + d] != q->sequence) {
+ if (!wait)
+ return FALSE;
+ if (nouveau_bo_wait(q->bo, NOUVEAU_BO_RD, nvc0->base.client))
+ return FALSE;
+ }
+ if (q->ctr[c] & ~0x3)
+ count[p][c] = q->data[b + 16 + (q->ctr[c] & 3)];
+ else
+ count[p][c] += q->data[b + d * 4 + q->ctr[c]];
+ }
+ }
+ }
+ return TRUE;
+}
+
+/* Metric calculations:
+ * sum(x) ... sum of x over all MPs
+ * avg(x) ... average of x over all MPs
+ *
+ * IPC : sum(inst_executed) / clock
+ * INST_REPLAY_OHEAD: (sum(inst_issued) - sum(inst_executed)) / sum(inst_issued)
+ * MP_OCCUPANCY : avg((active_warps / 64) / active_cycles)
+ * MP_EFFICIENCY : avg(active_cycles / clock)
+ *
+ * NOTE: Interpretation of IPC requires knowledge of MP count.
+ */
+static boolean
+nvc0_mp_pm_query_result(struct nvc0_context *nvc0, struct nvc0_query *q,
+ void *result, boolean wait)
+{
+ uint32_t count[32][4];
+ uint64_t value = 0;
+ unsigned mp_count = MIN2(nvc0->screen->mp_count_compute, 32);
+ unsigned p, c;
+ const struct nvc0_mp_pm_query_cfg *cfg;
+ boolean ret;
+
+ cfg = nvc0_mp_pm_query_get_cfg(nvc0, q);
+
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS)
+ ret = nve4_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
+ else
+ ret = nvc0_mp_pm_query_read_data(count, nvc0, wait, q, cfg, mp_count);
+ if (!ret)
+ return FALSE;
+
+ if (cfg->op == NVC0_COUNTER_OPn_SUM) {
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ value += count[p][c];
+ value = (value * cfg->norm[0]) / cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OPn_OR) {
+ uint32_t v = 0;
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ v |= count[p][c];
+ value = (v * cfg->norm[0]) / cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OPn_AND) {
+ uint32_t v = ~0;
+ for (c = 0; c < cfg->num_counters; ++c)
+ for (p = 0; p < mp_count; ++p)
+ v &= count[p][c];
+ value = (v * cfg->norm[0]) / cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_REL_SUM_MM) {
+ uint64_t v[2] = { 0, 0 };
+ for (p = 0; p < mp_count; ++p) {
+ v[0] += count[p][0];
+ v[1] += count[p][1];
+ }
+ if (v[0])
+ value = ((v[0] - v[1]) * cfg->norm[0]) / (v[0] * cfg->norm[1]);
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_DIV_SUM_M0) {
+ for (p = 0; p < mp_count; ++p)
+ value += count[p][0];
+ if (count[0][1])
+ value = (value * cfg->norm[0]) / (count[0][1] * cfg->norm[1]);
+ else
+ value = 0;
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_MM) {
+ unsigned mp_used = 0;
+ for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
+ if (count[p][1])
+ value += (count[p][0] * cfg->norm[0]) / count[p][1];
+ if (mp_used)
+ value /= mp_used * cfg->norm[1];
+ } else
+ if (cfg->op == NVC0_COUNTER_OP2_AVG_DIV_M0) {
+ unsigned mp_used = 0;
+ for (p = 0; p < mp_count; ++p, mp_used += !!count[p][0])
+ value += count[p][0];
+ if (count[0][1] && mp_used) {
+ value *= cfg->norm[0];
+ value /= count[0][1] * mp_used * cfg->norm[1];
+ } else {
+ value = 0;
+ }
+ }
+
+ *(uint64_t *)result = value;
+ return TRUE;
+}
+
+int
+nvc0_screen_get_driver_query_info(struct pipe_screen *pscreen,
+ unsigned id,
+ struct pipe_driver_query_info *info)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ int count = 0;
+
+ count += NVC0_QUERY_DRV_STAT_COUNT;
+
+ if (screen->base.device->drm_version >= 0x01000101) {
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ count += NVE4_PM_QUERY_COUNT;
+ } else
+ if (screen->compute) {
+ count += NVC0_PM_QUERY_COUNT; /* NVC0_COMPUTE is not always enabled */
+ }
+ }
+
+ if (!info)
+ return count;
+
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+ if (id < NVC0_QUERY_DRV_STAT_COUNT) {
+ info->name = nvc0_drv_stat_names[id];
+ info->query_type = NVC0_QUERY_DRV_STAT(id);
+ info->max_value = ~0ULL;
+ info->uses_byte_units = !!strstr(info->name, "bytes");
+ return 1;
+ } else
+#endif
+ if (id < count) {
+ if (screen->base.class_3d >= NVE4_3D_CLASS) {
+ info->name = nve4_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
+ info->query_type = NVE4_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
+ info->max_value = (id < NVE4_PM_QUERY_METRIC_MP_OCCUPANCY) ?
+ ~0ULL : 100;
+ info->uses_byte_units = FALSE;
+ return 1;
+ } else
+ if (screen->compute) {
+ info->name = nvc0_pm_query_names[id - NVC0_QUERY_DRV_STAT_COUNT];
+ info->query_type = NVC0_PM_QUERY(id - NVC0_QUERY_DRV_STAT_COUNT);
+ info->max_value = ~0ULL;
+ info->uses_byte_units = FALSE;
+ return 1;
+ }
+ }
+ /* user asked for info about non-existing query */
+ info->name = "this_is_not_the_query_you_are_looking_for";
+ info->query_type = 0xdeadd01d;
+ info->max_value = 0;
+ info->uses_byte_units = FALSE;
+ return 0;
+}
+
+void
+nvc0_init_query_functions(struct nvc0_context *nvc0)
+{
+ struct pipe_context *pipe = &nvc0->base.pipe;
+
+ pipe->create_query = nvc0_query_create;
+ pipe->destroy_query = nvc0_query_destroy;
+ pipe->begin_query = nvc0_query_begin;
+ pipe->end_query = nvc0_query_end;
+ pipe->get_query_result = nvc0_query_result;
+ pipe->render_condition = nvc0_render_condition;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
new file mode 100644
index 0000000..4e70903
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.c
@@ -0,0 +1,62 @@
+
+#include "pipe/p_context.h"
+#include "nvc0/nvc0_resource.h"
+#include "nouveau_screen.h"
+
+
+static struct pipe_resource *
+nvc0_resource_create(struct pipe_screen *screen,
+ const struct pipe_resource *templ)
+{
+ switch (templ->target) {
+ case PIPE_BUFFER:
+ return nouveau_buffer_create(screen, templ);
+ default:
+ return nvc0_miptree_create(screen, templ);
+ }
+}
+
+static struct pipe_resource *
+nvc0_resource_from_handle(struct pipe_screen * screen,
+ const struct pipe_resource *templ,
+ struct winsys_handle *whandle)
+{
+ if (templ->target == PIPE_BUFFER) {
+ return NULL;
+ } else {
+ struct pipe_resource *res = nv50_miptree_from_handle(screen,
+ templ, whandle);
+ nv04_resource(res)->vtbl = &nvc0_miptree_vtbl;
+ return res;
+ }
+}
+
+static struct pipe_surface *
+nvc0_surface_create(struct pipe_context *pipe,
+ struct pipe_resource *pres,
+ const struct pipe_surface *templ)
+{
+ if (unlikely(pres->target == PIPE_BUFFER))
+ return nv50_surface_from_buffer(pipe, pres, templ);
+ return nvc0_miptree_surface_new(pipe, pres, templ);
+}
+
+void
+nvc0_init_resource_functions(struct pipe_context *pcontext)
+{
+ pcontext->transfer_map = u_transfer_map_vtbl;
+ pcontext->transfer_flush_region = u_transfer_flush_region_vtbl;
+ pcontext->transfer_unmap = u_transfer_unmap_vtbl;
+ pcontext->transfer_inline_write = u_transfer_inline_write_vtbl;
+ pcontext->create_surface = nvc0_surface_create;
+ pcontext->surface_destroy = nv50_surface_destroy;
+}
+
+void
+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen)
+{
+ pscreen->resource_create = nvc0_resource_create;
+ pscreen->resource_from_handle = nvc0_resource_from_handle;
+ pscreen->resource_get_handle = u_resource_get_handle_vtbl;
+ pscreen->resource_destroy = u_resource_destroy_vtbl;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h
new file mode 100644
index 0000000..0d5f026
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_resource.h
@@ -0,0 +1,58 @@
+
+#ifndef __NVC0_RESOURCE_H__
+#define __NVC0_RESOURCE_H__
+
+#include "nv50/nv50_resource.h"
+
+#define NVC0_RESOURCE_FLAG_VIDEO (NOUVEAU_RESOURCE_FLAG_DRV_PRIV << 0)
+
+
+#define NVC0_TILE_SHIFT_X(m) ((((m) >> 0) & 0xf) + 6)
+#define NVC0_TILE_SHIFT_Y(m) ((((m) >> 4) & 0xf) + 3)
+#define NVC0_TILE_SHIFT_Z(m) ((((m) >> 8) & 0xf) + 0)
+
+#define NVC0_TILE_SIZE_X(m) (64 << (((m) >> 0) & 0xf))
+#define NVC0_TILE_SIZE_Y(m) ( 8 << (((m) >> 4) & 0xf))
+#define NVC0_TILE_SIZE_Z(m) ( 1 << (((m) >> 8) & 0xf))
+
+/* it's ok to mask only in the end because max value is 3 * 5 */
+
+#define NVC0_TILE_SIZE_2D(m) ((64 * 8) << (((m) + ((m) >> 4)) & 0xf))
+
+#define NVC0_TILE_SIZE(m) ((64 * 8) << (((m) + ((m) >> 4) + ((m) >> 8)) & 0xf))
+
+
+void
+nvc0_init_resource_functions(struct pipe_context *pcontext);
+
+void
+nvc0_screen_init_resource_functions(struct pipe_screen *pscreen);
+
+/* Internal functions:
+ */
+struct pipe_resource *
+nvc0_miptree_create(struct pipe_screen *pscreen,
+ const struct pipe_resource *tmp);
+
+const struct u_resource_vtbl nvc0_miptree_vtbl;
+
+struct pipe_surface *
+nvc0_miptree_surface_new(struct pipe_context *,
+ struct pipe_resource *,
+ const struct pipe_surface *templ);
+
+unsigned
+nvc0_mt_zslice_offset(const struct nv50_miptree *, unsigned l, unsigned z);
+
+void *
+nvc0_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer);
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pcontext,
+ struct pipe_transfer *ptx);
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
new file mode 100644
index 0000000..ff7890b
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.c
@@ -0,0 +1,967 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "util/u_format.h"
+#include "util/u_format_s3tc.h"
+#include "pipe/p_screen.h"
+
+#include "vl/vl_decoder.h"
+#include "vl/vl_video_buffer.h"
+
+#include "nouveau_vp3_video.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_screen.h"
+
+#include "nvc0/nvc0_graph_macros.h"
+
+#ifndef NOUVEAU_GETPARAM_GRAPH_UNITS
+# define NOUVEAU_GETPARAM_GRAPH_UNITS 13
+#endif
+
+static boolean
+nvc0_screen_is_format_supported(struct pipe_screen *pscreen,
+ enum pipe_format format,
+ enum pipe_texture_target target,
+ unsigned sample_count,
+ unsigned bindings)
+{
+ if (sample_count > 8)
+ return FALSE;
+ if (!(0x117 & (1 << sample_count))) /* 0, 1, 2, 4 or 8 */
+ return FALSE;
+
+ if (!util_format_is_supported(format, bindings))
+ return FALSE;
+
+ if ((bindings & PIPE_BIND_SAMPLER_VIEW) && (target != PIPE_BUFFER))
+ if (util_format_get_blocksizebits(format) == 3 * 32)
+ return FALSE;
+
+ /* transfers & shared are always supported */
+ bindings &= ~(PIPE_BIND_TRANSFER_READ |
+ PIPE_BIND_TRANSFER_WRITE |
+ PIPE_BIND_SHARED);
+
+ return (nvc0_format_table[format].usage & bindings) == bindings;
+}
+
+static int
+nvc0_screen_get_param(struct pipe_screen *pscreen, enum pipe_cap param)
+{
+ const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
+ switch (param) {
+ case PIPE_CAP_MAX_COMBINED_SAMPLERS:
+ return 16 * 5;
+ case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+ case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+ return 15;
+ case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+ return (class_3d >= NVE4_3D_CLASS) ? 13 : 12;
+ case PIPE_CAP_MAX_TEXTURE_ARRAY_LAYERS:
+ return 2048;
+ case PIPE_CAP_MIN_TEXEL_OFFSET:
+ return -8;
+ case PIPE_CAP_MAX_TEXEL_OFFSET:
+ return 7;
+ case PIPE_CAP_TEXTURE_MIRROR_CLAMP:
+ case PIPE_CAP_TEXTURE_SWIZZLE:
+ case PIPE_CAP_TEXTURE_SHADOW_MAP:
+ case PIPE_CAP_NPOT_TEXTURES:
+ case PIPE_CAP_ANISOTROPIC_FILTER:
+ case PIPE_CAP_SEAMLESS_CUBE_MAP:
+ case PIPE_CAP_CUBE_MAP_ARRAY:
+ case PIPE_CAP_TEXTURE_BUFFER_OBJECTS:
+ case PIPE_CAP_TEXTURE_MULTISAMPLE:
+ return 1;
+ case PIPE_CAP_MAX_TEXTURE_BUFFER_SIZE:
+ return 65536;
+ case PIPE_CAP_SEAMLESS_CUBE_MAP_PER_TEXTURE:
+ return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
+ case PIPE_CAP_TWO_SIDED_STENCIL:
+ case PIPE_CAP_DEPTH_CLIP_DISABLE:
+ case PIPE_CAP_POINT_SPRITE:
+ case PIPE_CAP_TGSI_TEXCOORD:
+ return 1;
+ case PIPE_CAP_SM3:
+ return 1;
+ case PIPE_CAP_GLSL_FEATURE_LEVEL:
+ return 150;
+ case PIPE_CAP_MAX_RENDER_TARGETS:
+ return 8;
+ case PIPE_CAP_MAX_DUAL_SOURCE_RENDER_TARGETS:
+ return 1;
+ case PIPE_CAP_FRAGMENT_COLOR_CLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_UNCLAMPED:
+ case PIPE_CAP_VERTEX_COLOR_CLAMPED:
+ return 1;
+ case PIPE_CAP_QUERY_TIMESTAMP:
+ case PIPE_CAP_QUERY_TIME_ELAPSED:
+ case PIPE_CAP_OCCLUSION_QUERY:
+ case PIPE_CAP_STREAM_OUTPUT_PAUSE_RESUME:
+ case PIPE_CAP_QUERY_PIPELINE_STATISTICS:
+ return 1;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_BUFFERS:
+ return 4;
+ case PIPE_CAP_MAX_STREAM_OUTPUT_SEPARATE_COMPONENTS:
+ case PIPE_CAP_MAX_STREAM_OUTPUT_INTERLEAVED_COMPONENTS:
+ return 128;
+ case PIPE_CAP_BLEND_EQUATION_SEPARATE:
+ case PIPE_CAP_INDEP_BLEND_ENABLE:
+ case PIPE_CAP_INDEP_BLEND_FUNC:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_UPPER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_HALF_INTEGER:
+ return 1;
+ case PIPE_CAP_TGSI_FS_COORD_ORIGIN_LOWER_LEFT:
+ case PIPE_CAP_TGSI_FS_COORD_PIXEL_CENTER_INTEGER:
+ return 0;
+ case PIPE_CAP_SHADER_STENCIL_EXPORT:
+ return 0;
+ case PIPE_CAP_PRIMITIVE_RESTART:
+ case PIPE_CAP_TGSI_INSTANCEID:
+ case PIPE_CAP_VERTEX_ELEMENT_INSTANCE_DIVISOR:
+ case PIPE_CAP_MIXED_COLORBUFFER_FORMATS:
+ case PIPE_CAP_CONDITIONAL_RENDER:
+ case PIPE_CAP_TEXTURE_BARRIER:
+ case PIPE_CAP_QUADS_FOLLOW_PROVOKING_VERTEX_CONVENTION:
+ case PIPE_CAP_START_INSTANCE:
+ return 1;
+ case PIPE_CAP_TGSI_CAN_COMPACT_CONSTANTS:
+ return 0; /* state trackers will know better */
+ case PIPE_CAP_USER_CONSTANT_BUFFERS:
+ case PIPE_CAP_USER_INDEX_BUFFERS:
+ case PIPE_CAP_USER_VERTEX_BUFFERS:
+ return 1;
+ case PIPE_CAP_CONSTANT_BUFFER_OFFSET_ALIGNMENT:
+ return 256;
+ case PIPE_CAP_TEXTURE_BUFFER_OFFSET_ALIGNMENT:
+ return 1; /* 256 for binding as RT, but that's not possible in GL */
+ case PIPE_CAP_MIN_MAP_BUFFER_ALIGNMENT:
+ return NOUVEAU_MIN_BUFFER_MAP_ALIGN;
+ case PIPE_CAP_VERTEX_BUFFER_OFFSET_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_BUFFER_STRIDE_4BYTE_ALIGNED_ONLY:
+ case PIPE_CAP_VERTEX_ELEMENT_SRC_OFFSET_4BYTE_ALIGNED_ONLY:
+ return 0;
+ case PIPE_CAP_COMPUTE:
+ return (class_3d >= NVE4_3D_CLASS) ? 1 : 0;
+ case PIPE_CAP_PREFER_BLIT_BASED_TEXTURE_TRANSFER:
+ return 1;
+ case PIPE_CAP_TEXTURE_BORDER_COLOR_QUIRK:
+ return PIPE_QUIRK_TEXTURE_BORDER_COLOR_SWIZZLE_NV50;
+ case PIPE_CAP_ENDIANNESS:
+ return PIPE_ENDIAN_LITTLE;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static int
+nvc0_screen_get_shader_param(struct pipe_screen *pscreen, unsigned shader,
+ enum pipe_shader_cap param)
+{
+ const uint16_t class_3d = nouveau_screen(pscreen)->class_3d;
+
+ switch (shader) {
+ case PIPE_SHADER_VERTEX:
+ /*
+ case PIPE_SHADER_TESSELLATION_CONTROL:
+ case PIPE_SHADER_TESSELLATION_EVALUATION:
+ */
+ case PIPE_SHADER_GEOMETRY:
+ case PIPE_SHADER_FRAGMENT:
+ break;
+ case PIPE_SHADER_COMPUTE:
+ if (class_3d < NVE4_3D_CLASS)
+ return 0;
+ break;
+ default:
+ return 0;
+ }
+
+ switch (param) {
+ case PIPE_SHADER_CAP_PREFERRED_IR:
+ return PIPE_SHADER_IR_TGSI;
+ case PIPE_SHADER_CAP_MAX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_ALU_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INSTRUCTIONS:
+ case PIPE_SHADER_CAP_MAX_TEX_INDIRECTIONS:
+ return 16384;
+ case PIPE_SHADER_CAP_MAX_CONTROL_FLOW_DEPTH:
+ return 16;
+ case PIPE_SHADER_CAP_MAX_INPUTS:
+ if (shader == PIPE_SHADER_VERTEX)
+ return 32;
+ /* NOTE: These only count our slots for GENERIC varyings.
+ * The address space may be larger, but the actual hard limit seems to be
+ * less than what the address space layout permits, so don't add TEXCOORD,
+ * COLOR, etc. here.
+ */
+ if (shader == PIPE_SHADER_FRAGMENT)
+ return 0x1f0 / 16;
+ /* Actually this counts CLIPVERTEX, which occupies the last generic slot,
+ * and excludes 0x60 per-patch inputs.
+ */
+ return 0x200 / 16;
+ case PIPE_SHADER_CAP_MAX_CONSTS:
+ return 65536 / 16;
+ case PIPE_SHADER_CAP_MAX_CONST_BUFFERS:
+ if (shader == PIPE_SHADER_COMPUTE && class_3d >= NVE4_3D_CLASS)
+ return NVE4_MAX_PIPE_CONSTBUFS_COMPUTE;
+ return NVC0_MAX_PIPE_CONSTBUFS;
+ case PIPE_SHADER_CAP_MAX_ADDRS:
+ return 1;
+ case PIPE_SHADER_CAP_INDIRECT_INPUT_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_OUTPUT_ADDR:
+ return shader != PIPE_SHADER_FRAGMENT;
+ case PIPE_SHADER_CAP_INDIRECT_TEMP_ADDR:
+ case PIPE_SHADER_CAP_INDIRECT_CONST_ADDR:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_PREDS:
+ return 0;
+ case PIPE_SHADER_CAP_MAX_TEMPS:
+ return NVC0_CAP_MAX_PROGRAM_TEMPS;
+ case PIPE_SHADER_CAP_TGSI_CONT_SUPPORTED:
+ return 1;
+ case PIPE_SHADER_CAP_TGSI_SQRT_SUPPORTED:
+ return 0;
+ case PIPE_SHADER_CAP_SUBROUTINES:
+ return 1;
+ case PIPE_SHADER_CAP_INTEGERS:
+ return 1;
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLERS:
+ return 16; /* would be 32 in linked (OpenGL-style) mode */
+ /*
+ case PIPE_SHADER_CAP_MAX_TEXTURE_SAMPLER_VIEWS:
+ return 32;
+ */
+ default:
+ NOUVEAU_ERR("unknown PIPE_SHADER_CAP %d\n", param);
+ return 0;
+ }
+}
+
+static float
+nvc0_screen_get_paramf(struct pipe_screen *pscreen, enum pipe_capf param)
+{
+ switch (param) {
+ case PIPE_CAPF_MAX_LINE_WIDTH:
+ case PIPE_CAPF_MAX_LINE_WIDTH_AA:
+ return 10.0f;
+ case PIPE_CAPF_MAX_POINT_WIDTH:
+ return 63.0f;
+ case PIPE_CAPF_MAX_POINT_WIDTH_AA:
+ return 63.375f;
+ case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY:
+ return 16.0f;
+ case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS:
+ return 15.0f;
+ default:
+ NOUVEAU_ERR("unknown PIPE_CAP %d\n", param);
+ return 0.0f;
+ }
+}
+
+static int
+nvc0_screen_get_compute_param(struct pipe_screen *pscreen,
+ enum pipe_compute_cap param, void *data)
+{
+ uint64_t *data64 = (uint64_t *)data;
+ const uint16_t obj_class = nvc0_screen(pscreen)->compute->oclass;
+
+ switch (param) {
+ case PIPE_COMPUTE_CAP_GRID_DIMENSION:
+ data64[0] = 3;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_GRID_SIZE:
+ data64[0] = (obj_class >= NVE4_COMPUTE_CLASS) ? 0x7fffffff : 65535;
+ data64[1] = 65535;
+ data64[2] = 65535;
+ return 24;
+ case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE:
+ data64[0] = 1024;
+ data64[1] = 1024;
+ data64[2] = 64;
+ return 24;
+ case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK:
+ data64[0] = 1024;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: /* g[] */
+ data64[0] = (uint64_t)1 << 40;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: /* s[] */
+ data64[0] = 48 << 10;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: /* l[] */
+ data64[0] = 512 << 10;
+ return 8;
+ case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: /* c[], arbitrary limit */
+ data64[0] = 4096;
+ return 8;
+ default:
+ return 0;
+ }
+}
+
+static void
+nvc0_screen_destroy(struct pipe_screen *pscreen)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+
+ if (screen->base.fence.current) {
+ nouveau_fence_wait(screen->base.fence.current);
+ nouveau_fence_ref(NULL, &screen->base.fence.current);
+ }
+ if (screen->base.pushbuf)
+ screen->base.pushbuf->user_priv = NULL;
+
+ if (screen->blitter)
+ nvc0_blitter_destroy(screen);
+ if (screen->pm.prog) {
+ screen->pm.prog->code = NULL; /* hardcoded, don't FREE */
+ nvc0_program_destroy(NULL, screen->pm.prog);
+ }
+
+ nouveau_bo_ref(NULL, &screen->text);
+ nouveau_bo_ref(NULL, &screen->uniform_bo);
+ nouveau_bo_ref(NULL, &screen->tls);
+ nouveau_bo_ref(NULL, &screen->txc);
+ nouveau_bo_ref(NULL, &screen->fence.bo);
+ nouveau_bo_ref(NULL, &screen->poly_cache);
+ nouveau_bo_ref(NULL, &screen->parm);
+
+ nouveau_heap_destroy(&screen->lib_code);
+ nouveau_heap_destroy(&screen->text_heap);
+
+ FREE(screen->tic.entries);
+
+ nouveau_mm_destroy(screen->mm_VRAM_fe0);
+
+ nouveau_object_del(&screen->eng3d);
+ nouveau_object_del(&screen->eng2d);
+ nouveau_object_del(&screen->m2mf);
+ nouveau_object_del(&screen->compute);
+
+ nouveau_screen_fini(&screen->base);
+
+ FREE(screen);
+}
+
+static int
+nvc0_graph_set_macro(struct nvc0_screen *screen, uint32_t m, unsigned pos,
+ unsigned size, const uint32_t *data)
+{
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+ size /= 4;
+
+ BEGIN_NVC0(push, SUBC_3D(NVC0_GRAPH_MACRO_ID), 2);
+ PUSH_DATA (push, (m - 0x3800) / 8);
+ PUSH_DATA (push, pos);
+ BEGIN_1IC0(push, SUBC_3D(NVC0_GRAPH_MACRO_UPLOAD_POS), size + 1);
+ PUSH_DATA (push, pos);
+ PUSH_DATAp(push, data, size);
+
+ return pos + size;
+}
+
+static void
+nvc0_magic_3d_init(struct nouveau_pushbuf *push, uint16_t obj_class)
+{
+ BEGIN_NVC0(push, SUBC_3D(0x10cc), 1);
+ PUSH_DATA (push, 0xff);
+ BEGIN_NVC0(push, SUBC_3D(0x10e0), 2);
+ PUSH_DATA (push, 0xff);
+ PUSH_DATA (push, 0xff);
+ BEGIN_NVC0(push, SUBC_3D(0x10ec), 2);
+ PUSH_DATA (push, 0xff);
+ PUSH_DATA (push, 0xff);
+ BEGIN_NVC0(push, SUBC_3D(0x074c), 1);
+ PUSH_DATA (push, 0x3f);
+
+ BEGIN_NVC0(push, SUBC_3D(0x16a8), 1);
+ PUSH_DATA (push, (3 << 16) | 3);
+ BEGIN_NVC0(push, SUBC_3D(0x1794), 1);
+ PUSH_DATA (push, (2 << 16) | 2);
+ BEGIN_NVC0(push, SUBC_3D(0x0de8), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NVC0(push, SUBC_3D(0x12ac), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_3D(0x0218), 1);
+ PUSH_DATA (push, 0x10);
+ BEGIN_NVC0(push, SUBC_3D(0x10fc), 1);
+ PUSH_DATA (push, 0x10);
+ BEGIN_NVC0(push, SUBC_3D(0x1290), 1);
+ PUSH_DATA (push, 0x10);
+ BEGIN_NVC0(push, SUBC_3D(0x12d8), 2);
+ PUSH_DATA (push, 0x10);
+ PUSH_DATA (push, 0x10);
+ BEGIN_NVC0(push, SUBC_3D(0x1140), 1);
+ PUSH_DATA (push, 0x10);
+ BEGIN_NVC0(push, SUBC_3D(0x1610), 1);
+ PUSH_DATA (push, 0xe);
+
+ BEGIN_NVC0(push, SUBC_3D(0x164c), 1);
+ PUSH_DATA (push, 1 << 12);
+ BEGIN_NVC0(push, SUBC_3D(0x030c), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_3D(0x0300), 1);
+ PUSH_DATA (push, 3);
+
+ BEGIN_NVC0(push, SUBC_3D(0x02d0), 1);
+ PUSH_DATA (push, 0x3fffff);
+ BEGIN_NVC0(push, SUBC_3D(0x0fdc), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, SUBC_3D(0x19c0), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, SUBC_3D(0x075c), 1);
+ PUSH_DATA (push, 3);
+
+ if (obj_class >= NVE4_3D_CLASS) {
+ BEGIN_NVC0(push, SUBC_3D(0x07fc), 1);
+ PUSH_DATA (push, 1);
+ }
+
+ /* TODO: find out what software methods 0x1528, 0x1280 and (on nve4) 0x02dc
+ * are supposed to do */
+}
+
+static void
+nvc0_screen_fence_emit(struct pipe_screen *pscreen, u32 *sequence)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ struct nouveau_pushbuf *push = screen->base.pushbuf;
+
+ /* we need to do it after possible flush in MARK_RING */
+ *sequence = ++screen->base.fence.sequence;
+
+ BEGIN_NVC0(push, NVC0_3D(QUERY_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, screen->fence.bo->offset);
+ PUSH_DATA (push, screen->fence.bo->offset);
+ PUSH_DATA (push, *sequence);
+ PUSH_DATA (push, NVC0_3D_QUERY_GET_FENCE | NVC0_3D_QUERY_GET_SHORT |
+ (0xf << NVC0_3D_QUERY_GET_UNIT__SHIFT));
+}
+
+static u32
+nvc0_screen_fence_update(struct pipe_screen *pscreen)
+{
+ struct nvc0_screen *screen = nvc0_screen(pscreen);
+ return screen->fence.map[0];
+}
+
+static int
+nvc0_screen_init_compute(struct nvc0_screen *screen)
+{
+ screen->base.base.get_compute_param = nvc0_screen_get_compute_param;
+
+ switch (screen->base.device->chipset & 0xf0) {
+ case 0xc0:
+ case 0xd0:
+ /* Using COMPUTE has weird effects on 3D state, we need to
+ * investigate this further before enabling it by default.
+ */
+ if (debug_get_bool_option("NVC0_COMPUTE", FALSE))
+ return nvc0_screen_compute_setup(screen, screen->base.pushbuf);
+ return 0;
+ case 0xe0:
+ case 0xf0:
+ return nve4_screen_compute_setup(screen, screen->base.pushbuf);
+ default:
+ return -1;
+ }
+}
+
+boolean
+nvc0_screen_resize_tls_area(struct nvc0_screen *screen,
+ uint32_t lpos, uint32_t lneg, uint32_t cstack)
+{
+ struct nouveau_bo *bo = NULL;
+ int ret;
+ uint64_t size = (lpos + lneg) * 32 + cstack;
+
+ if (size >= (1 << 20)) {
+ NOUVEAU_ERR("requested TLS size too large: 0x%"PRIx64"\n", size);
+ return FALSE;
+ }
+
+ size *= (screen->base.device->chipset >= 0xe0) ? 64 : 48; /* max warps */
+ size = align(size, 0x8000);
+ size *= screen->mp_count;
+
+ size = align(size, 1 << 17);
+
+ ret = nouveau_bo_new(screen->base.device, NOUVEAU_BO_VRAM, 1 << 17, size,
+ NULL, &bo);
+ if (ret) {
+ NOUVEAU_ERR("failed to allocate TLS area, size: 0x%"PRIx64"\n", size);
+ return FALSE;
+ }
+ nouveau_bo_ref(NULL, &screen->tls);
+ screen->tls = bo;
+ return TRUE;
+}
+
+#define FAIL_SCREEN_INIT(str, err) \
+ do { \
+ NOUVEAU_ERR(str, err); \
+ nvc0_screen_destroy(pscreen); \
+ return NULL; \
+ } while(0)
+
+struct pipe_screen *
+nvc0_screen_create(struct nouveau_device *dev)
+{
+ struct nvc0_screen *screen;
+ struct pipe_screen *pscreen;
+ struct nouveau_object *chan;
+ struct nouveau_pushbuf *push;
+ uint64_t value;
+ uint32_t obj_class;
+ int ret;
+ unsigned i;
+ union nouveau_bo_config mm_config;
+
+ switch (dev->chipset & ~0xf) {
+ case 0xc0:
+ case 0xd0:
+ case 0xe0:
+ case 0xf0:
+ break;
+ default:
+ return NULL;
+ }
+
+ screen = CALLOC_STRUCT(nvc0_screen);
+ if (!screen)
+ return NULL;
+ pscreen = &screen->base.base;
+
+ ret = nouveau_screen_init(&screen->base, dev);
+ if (ret) {
+ nvc0_screen_destroy(pscreen);
+ return NULL;
+ }
+ chan = screen->base.channel;
+ push = screen->base.pushbuf;
+ push->user_priv = screen;
+ push->rsvd_kick = 5;
+
+ screen->base.vidmem_bindings |= PIPE_BIND_CONSTANT_BUFFER |
+ PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
+ screen->base.sysmem_bindings |=
+ PIPE_BIND_VERTEX_BUFFER | PIPE_BIND_INDEX_BUFFER;
+
+ pscreen->destroy = nvc0_screen_destroy;
+ pscreen->context_create = nvc0_create;
+ pscreen->is_format_supported = nvc0_screen_is_format_supported;
+ pscreen->get_param = nvc0_screen_get_param;
+ pscreen->get_shader_param = nvc0_screen_get_shader_param;
+ pscreen->get_paramf = nvc0_screen_get_paramf;
+ pscreen->get_driver_query_info = nvc0_screen_get_driver_query_info;
+
+ nvc0_screen_init_resource_functions(pscreen);
+
+ screen->base.base.get_video_param = nouveau_vp3_screen_get_video_param;
+ screen->base.base.is_video_format_supported = nouveau_vp3_screen_video_supported;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0, 4096, NULL,
+ &screen->fence.bo);
+ if (ret)
+ goto fail;
+ nouveau_bo_map(screen->fence.bo, 0, NULL);
+ screen->fence.map = screen->fence.bo->map;
+ screen->base.fence.emit = nvc0_screen_fence_emit;
+ screen->base.fence.update = nvc0_screen_fence_update;
+
+ switch (dev->chipset & 0xf0) {
+ case 0xf0:
+ obj_class = NVF0_P2MF_CLASS;
+ break;
+ case 0xe0:
+ obj_class = NVE4_P2MF_CLASS;
+ break;
+ default:
+ obj_class = NVC0_M2MF_CLASS;
+ break;
+ }
+ ret = nouveau_object_new(chan, 0xbeef323f, obj_class, NULL, 0,
+ &screen->m2mf);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for M2MF: %d\n", ret);
+
+ BEGIN_NVC0(push, SUBC_M2MF(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->m2mf->oclass);
+ if (screen->m2mf->oclass == NVE4_P2MF_CLASS) {
+ BEGIN_NVC0(push, SUBC_COPY(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, 0xa0b5);
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef902d, NVC0_2D_CLASS, NULL, 0,
+ &screen->eng2d);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 2D: %d\n", ret);
+
+ BEGIN_NVC0(push, SUBC_2D(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->eng2d->oclass);
+ BEGIN_NVC0(push, NVC0_2D(SINGLE_GPC), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_2D(OPERATION), 1);
+ PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY);
+ BEGIN_NVC0(push, NVC0_2D(CLIP_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_2D(COLOR_KEY_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, SUBC_2D(0x0884), 1);
+ PUSH_DATA (push, 0x3f);
+ BEGIN_NVC0(push, SUBC_2D(0x0888), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NVC0(push, SUBC_2D(NVC0_GRAPH_NOTIFY_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->fence.bo->offset + 16);
+ PUSH_DATA (push, screen->fence.bo->offset + 16);
+
+ switch (dev->chipset & 0xf0) {
+ case 0xf0:
+ obj_class = NVF0_3D_CLASS;
+ break;
+ case 0xe0:
+ obj_class = NVE4_3D_CLASS;
+ break;
+ case 0xd0:
+ case 0xc0:
+ default:
+ switch (dev->chipset) {
+ case 0xd9:
+ case 0xc8:
+ obj_class = NVC8_3D_CLASS;
+ break;
+ case 0xc1:
+ obj_class = NVC1_3D_CLASS;
+ break;
+ default:
+ obj_class = NVC0_3D_CLASS;
+ break;
+ }
+ break;
+ }
+ ret = nouveau_object_new(chan, 0xbeef003d, obj_class, NULL, 0,
+ &screen->eng3d);
+ if (ret)
+ FAIL_SCREEN_INIT("Error allocating PGRAPH context for 3D: %d\n", ret);
+ screen->base.class_3d = obj_class;
+
+ BEGIN_NVC0(push, SUBC_3D(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->eng3d->oclass);
+
+ BEGIN_NVC0(push, NVC0_3D(COND_MODE), 1);
+ PUSH_DATA (push, NVC0_3D_COND_MODE_ALWAYS);
+
+ if (debug_get_bool_option("NOUVEAU_SHADER_WATCHDOG", TRUE)) {
+ /* kill shaders after about 1 second (at 100 MHz) */
+ BEGIN_NVC0(push, NVC0_3D(WATCHDOG_TIMER), 1);
+ PUSH_DATA (push, 0x17);
+ }
+
+ IMMED_NVC0(push, NVC0_3D(ZETA_COMP_ENABLE), dev->drm_version >= 0x01000101);
+ BEGIN_NVC0(push, NVC0_3D(RT_COMP_ENABLE(0)), 8);
+ for (i = 0; i < 8; ++i)
+ PUSH_DATA(push, dev->drm_version >= 0x01000101);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+
+ BEGIN_NVC0(push, NVC0_3D(CSAA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), 1);
+ PUSH_DATA (push, NVC0_3D_MULTISAMPLE_MODE_MS1);
+ BEGIN_NVC0(push, NVC0_3D(MULTISAMPLE_CTRL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(LINE_WIDTH_SEPARATE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(LINE_LAST_PIXEL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(BLEND_SEPARATE_ALPHA), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(BLEND_ENABLE_COMMON), 1);
+ PUSH_DATA (push, 0);
+ if (screen->eng3d->oclass < NVE4_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_MISC), 1);
+ PUSH_DATA (push, NVC0_3D_TEX_MISC_SEAMLESS_CUBE_MAP);
+ } else {
+ BEGIN_NVC0(push, NVE4_3D(TEX_CB_INDEX), 1);
+ PUSH_DATA (push, 15);
+ }
+ BEGIN_NVC0(push, NVC0_3D(CALL_LIMIT_LOG), 1);
+ PUSH_DATA (push, 8); /* 128 */
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_STATCTRS_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ if (screen->eng3d->oclass >= NVC1_3D_CLASS) {
+ BEGIN_NVC0(push, NVC0_3D(CACHE_SPLIT), 1);
+ PUSH_DATA (push, NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1);
+ }
+
+ nvc0_magic_3d_init(push, screen->eng3d->oclass);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
+ &screen->text);
+ if (ret)
+ goto fail;
+
+ /* XXX: getting a page fault at the end of the code buffer every few
+ * launches, don't use the last 256 bytes to work around them - prefetch ?
+ */
+ nouveau_heap_init(&screen->text_heap, 0, (1 << 20) - 0x100);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 12, 6 << 16, NULL,
+ &screen->uniform_bo);
+ if (ret)
+ goto fail;
+
+ for (i = 0; i < 5; ++i) {
+ /* TIC and TSC entries for each unit (nve4+ only) */
+ /* auxiliary constants (6 user clip planes, base instance id) */
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (i << 9));
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(i)), 1);
+ PUSH_DATA (push, (15 << 4) | 1);
+ if (screen->eng3d->oclass >= NVE4_3D_CLASS) {
+ unsigned j;
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 9);
+ PUSH_DATA (push, 0);
+ for (j = 0; j < 8; ++j)
+ PUSH_DATA(push, j);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(TEX_LIMITS(i)), 1);
+ PUSH_DATA (push, 0x54);
+ }
+ }
+ BEGIN_NVC0(push, NVC0_3D(LINKED_TSC), 1);
+ PUSH_DATA (push, 0);
+
+ /* return { 0.0, 0.0, 0.0, 0.0 } for out-of-bounds vtxbuf access */
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 256);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), 5);
+ PUSH_DATA (push, 0);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_RUNOUT_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+ PUSH_DATA (push, screen->uniform_bo->offset + (5 << 16) + (6 << 9));
+
+ if (dev->drm_version >= 0x01000101) {
+ ret = nouveau_getparam(dev, NOUVEAU_GETPARAM_GRAPH_UNITS, &value);
+ if (ret) {
+ NOUVEAU_ERR("NOUVEAU_GETPARAM_GRAPH_UNITS failed.\n");
+ goto fail;
+ }
+ } else {
+ if (dev->chipset >= 0xe0 && dev->chipset < 0xf0)
+ value = (8 << 8) | 4;
+ else
+ value = (16 << 8) | 4;
+ }
+ screen->mp_count = value >> 8;
+ screen->mp_count_compute = screen->mp_count;
+
+ nvc0_screen_resize_tls_area(screen, 128 * 16, 0, 0x200);
+
+ BEGIN_NVC0(push, NVC0_3D(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+ BEGIN_NVC0(push, NVC0_3D(TEMP_ADDRESS_HIGH), 4);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->size >> 32);
+ PUSH_DATA (push, screen->tls->size);
+ BEGIN_NVC0(push, NVC0_3D(WARP_TEMP_ALLOC), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(LOCAL_BASE), 1);
+ PUSH_DATA (push, 0);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 20, NULL,
+ &screen->poly_cache);
+ if (ret)
+ goto fail;
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_QUARANTINE_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->poly_cache->offset);
+ PUSH_DATA (push, screen->poly_cache->offset);
+ PUSH_DATA (push, 3);
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 1 << 17, 1 << 17, NULL,
+ &screen->txc);
+ if (ret)
+ goto fail;
+
+ BEGIN_NVC0(push, NVC0_3D(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
+
+ BEGIN_NVC0(push, NVC0_3D(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
+
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_Y_CONTROL), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(WINDOW_OFFSET_X), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1); /* deactivate ZCULL */
+ PUSH_DATA (push, 0x3f);
+
+ BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_MODE), 1);
+ PUSH_DATA (push, NVC0_3D_CLIP_RECTS_MODE_INSIDE_ANY);
+ BEGIN_NVC0(push, NVC0_3D(CLIP_RECT_HORIZ(0)), 8 * 2);
+ for (i = 0; i < 8 * 2; ++i)
+ PUSH_DATA(push, 0);
+ BEGIN_NVC0(push, NVC0_3D(CLIP_RECTS_EN), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(CLIPID_ENABLE), 1);
+ PUSH_DATA (push, 0);
+
+ /* neither scissors, viewport nor stencil mask should affect clears */
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_FLAGS), 1);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(0)), 2);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 1.0f);
+ BEGIN_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 1);
+ PUSH_DATA (push, NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1);
+
+ /* We use scissors instead of exact view volume clipping,
+ * so they're always enabled.
+ */
+ BEGIN_NVC0(push, NVC0_3D(SCISSOR_ENABLE(0)), 3);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 8192 << 16);
+ PUSH_DATA (push, 8192 << 16);
+
+#define MK_MACRO(m, n) i = nvc0_graph_set_macro(screen, m, i, sizeof(n), n);
+
+ i = 0;
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_PER_INSTANCE, nvc0_9097_per_instance_bf);
+ MK_MACRO(NVC0_3D_MACRO_BLEND_ENABLES, nvc0_9097_blend_enables);
+ MK_MACRO(NVC0_3D_MACRO_VERTEX_ARRAY_SELECT, nvc0_9097_vertex_array_select);
+ MK_MACRO(NVC0_3D_MACRO_TEP_SELECT, nvc0_9097_tep_select);
+ MK_MACRO(NVC0_3D_MACRO_GP_SELECT, nvc0_9097_gp_select);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_FRONT, nvc0_9097_poly_mode_front);
+ MK_MACRO(NVC0_3D_MACRO_POLYGON_MODE_BACK, nvc0_9097_poly_mode_back);
+
+ BEGIN_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(RT_SEPARATE_FRAG_DATA), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
+ PUSH_DATA (push, 0x40);
+ BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
+ PUSH_DATA (push, 0x30);
+ BEGIN_NVC0(push, NVC0_3D(PATCH_VERTICES), 1);
+ PUSH_DATA (push, 3);
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
+ PUSH_DATA (push, 0x20);
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(0)), 1);
+ PUSH_DATA (push, 0x00);
+
+ BEGIN_NVC0(push, NVC0_3D(POINT_COORD_REPLACE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(POINT_RASTER_RULES), 1);
+ PUSH_DATA (push, NVC0_3D_POINT_RASTER_RULES_OGL);
+
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), 1);
+
+ if (nvc0_screen_init_compute(screen))
+ goto fail;
+
+ PUSH_KICK (push);
+
+ screen->tic.entries = CALLOC(4096, sizeof(void *));
+ screen->tsc.entries = screen->tic.entries + 2048;
+
+ mm_config.nvc0.tile_mode = 0;
+ mm_config.nvc0.memtype = 0xfe0;
+ screen->mm_VRAM_fe0 = nouveau_mm_create(dev, NOUVEAU_BO_VRAM, &mm_config);
+
+ if (!nvc0_blitter_create(screen))
+ goto fail;
+
+ nouveau_fence_new(&screen->base, &screen->base.fence.current, FALSE);
+
+ return pscreen;
+
+fail:
+ nvc0_screen_destroy(pscreen);
+ return NULL;
+}
+
+int
+nvc0_screen_tic_alloc(struct nvc0_screen *screen, void *entry)
+{
+ int i = screen->tic.next;
+
+ while (screen->tic.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
+
+ screen->tic.next = (i + 1) & (NVC0_TIC_MAX_ENTRIES - 1);
+
+ if (screen->tic.entries[i])
+ nv50_tic_entry(screen->tic.entries[i])->id = -1;
+
+ screen->tic.entries[i] = entry;
+ return i;
+}
+
+int
+nvc0_screen_tsc_alloc(struct nvc0_screen *screen, void *entry)
+{
+ int i = screen->tsc.next;
+
+ while (screen->tsc.lock[i / 32] & (1 << (i % 32)))
+ i = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
+
+ screen->tsc.next = (i + 1) & (NVC0_TSC_MAX_ENTRIES - 1);
+
+ if (screen->tsc.entries[i])
+ nv50_tsc_entry(screen->tsc.entries[i])->id = -1;
+
+ screen->tsc.entries[i] = entry;
+ return i;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
new file mode 100644
index 0000000..27a0c5f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_screen.h
@@ -0,0 +1,325 @@
+#ifndef __NVC0_SCREEN_H__
+#define __NVC0_SCREEN_H__
+
+#include "nouveau_screen.h"
+#include "nouveau_mm.h"
+#include "nouveau_fence.h"
+#include "nouveau_heap.h"
+
+#include "nv_object.xml.h"
+
+#include "nvc0/nvc0_winsys.h"
+#include "nvc0/nvc0_stateobj.h"
+
+#define NVC0_TIC_MAX_ENTRIES 2048
+#define NVC0_TSC_MAX_ENTRIES 2048
+
+/* doesn't count reserved slots (for auxiliary constants, immediates, etc.) */
+#define NVC0_MAX_PIPE_CONSTBUFS 14
+#define NVE4_MAX_PIPE_CONSTBUFS_COMPUTE 7
+
+#define NVC0_MAX_SURFACE_SLOTS 16
+
+struct nvc0_context;
+
+struct nvc0_blitter;
+
+struct nvc0_screen {
+ struct nouveau_screen base;
+
+ struct nvc0_context *cur_ctx;
+
+ int num_occlusion_queries_active;
+
+ struct nouveau_bo *text;
+ struct nouveau_bo *parm; /* for COMPUTE */
+ struct nouveau_bo *uniform_bo; /* for 3D */
+ struct nouveau_bo *tls;
+ struct nouveau_bo *txc; /* TIC (offset 0) and TSC (65536) */
+ struct nouveau_bo *poly_cache;
+
+ uint16_t mp_count;
+ uint16_t mp_count_compute; /* magic reg can make compute use fewer MPs */
+
+ struct nouveau_heap *text_heap;
+ struct nouveau_heap *lib_code; /* allocated from text_heap */
+
+ struct nvc0_blitter *blitter;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NVC0_TIC_MAX_ENTRIES / 32];
+ } tic;
+
+ struct {
+ void **entries;
+ int next;
+ uint32_t lock[NVC0_TSC_MAX_ENTRIES / 32];
+ } tsc;
+
+ struct {
+ struct nouveau_bo *bo;
+ uint32_t *map;
+ } fence;
+
+ struct {
+ struct nvc0_program *prog; /* compute state object to read MP counters */
+ struct pipe_query *mp_counter[8]; /* counter to query allocation */
+ uint8_t num_mp_pm_active[2];
+ boolean mp_counters_enabled;
+ } pm;
+
+ struct nouveau_mman *mm_VRAM_fe0;
+
+ struct nouveau_object *eng3d; /* sqrt(1/2)|kepler> + sqrt(1/2)|fermi> */
+ struct nouveau_object *eng2d;
+ struct nouveau_object *m2mf;
+ struct nouveau_object *compute;
+};
+
+static INLINE struct nvc0_screen *
+nvc0_screen(struct pipe_screen *screen)
+{
+ return (struct nvc0_screen *)screen;
+}
+
+
+/* Performance counter queries:
+ */
+#define NVE4_PM_QUERY_COUNT 49
+#define NVE4_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + (i))
+#define NVE4_PM_QUERY_LAST NVE4_PM_QUERY(NVE4_PM_QUERY_COUNT - 1)
+#define NVE4_PM_QUERY_PROF_TRIGGER_0 0
+#define NVE4_PM_QUERY_PROF_TRIGGER_1 1
+#define NVE4_PM_QUERY_PROF_TRIGGER_2 2
+#define NVE4_PM_QUERY_PROF_TRIGGER_3 3
+#define NVE4_PM_QUERY_PROF_TRIGGER_4 4
+#define NVE4_PM_QUERY_PROF_TRIGGER_5 5
+#define NVE4_PM_QUERY_PROF_TRIGGER_6 6
+#define NVE4_PM_QUERY_PROF_TRIGGER_7 7
+#define NVE4_PM_QUERY_LAUNCHED_WARPS 8
+#define NVE4_PM_QUERY_LAUNCHED_THREADS 9
+#define NVE4_PM_QUERY_LAUNCHED_CTA 10
+#define NVE4_PM_QUERY_INST_ISSUED1 11
+#define NVE4_PM_QUERY_INST_ISSUED2 12
+#define NVE4_PM_QUERY_INST_EXECUTED 13
+#define NVE4_PM_QUERY_LD_LOCAL 14
+#define NVE4_PM_QUERY_ST_LOCAL 15
+#define NVE4_PM_QUERY_LD_SHARED 16
+#define NVE4_PM_QUERY_ST_SHARED 17
+#define NVE4_PM_QUERY_L1_LOCAL_LOAD_HIT 18
+#define NVE4_PM_QUERY_L1_LOCAL_LOAD_MISS 19
+#define NVE4_PM_QUERY_L1_LOCAL_STORE_HIT 20
+#define NVE4_PM_QUERY_L1_LOCAL_STORE_MISS 21
+#define NVE4_PM_QUERY_GLD_REQUEST 22
+#define NVE4_PM_QUERY_GST_REQUEST 23
+#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_HIT 24
+#define NVE4_PM_QUERY_L1_GLOBAL_LOAD_MISS 25
+#define NVE4_PM_QUERY_GLD_TRANSACTIONS_UNCACHED 26
+#define NVE4_PM_QUERY_GST_TRANSACTIONS 27
+#define NVE4_PM_QUERY_BRANCH 28
+#define NVE4_PM_QUERY_BRANCH_DIVERGENT 29
+#define NVE4_PM_QUERY_ACTIVE_WARPS 30
+#define NVE4_PM_QUERY_ACTIVE_CYCLES 31
+#define NVE4_PM_QUERY_INST_ISSUED 32
+#define NVE4_PM_QUERY_ATOM_COUNT 33
+#define NVE4_PM_QUERY_GRED_COUNT 34
+#define NVE4_PM_QUERY_LD_SHARED_REPLAY 35
+#define NVE4_PM_QUERY_ST_SHARED_REPLAY 36
+#define NVE4_PM_QUERY_LD_LOCAL_TRANSACTIONS 37
+#define NVE4_PM_QUERY_ST_LOCAL_TRANSACTIONS 38
+#define NVE4_PM_QUERY_L1_LD_SHARED_TRANSACTIONS 39
+#define NVE4_PM_QUERY_L1_ST_SHARED_TRANSACTIONS 40
+#define NVE4_PM_QUERY_GLD_MEM_DIV_REPLAY 41
+#define NVE4_PM_QUERY_GST_MEM_DIV_REPLAY 42
+#define NVE4_PM_QUERY_METRIC_IPC 43
+#define NVE4_PM_QUERY_METRIC_IPAC 44
+#define NVE4_PM_QUERY_METRIC_IPEC 45
+#define NVE4_PM_QUERY_METRIC_MP_OCCUPANCY 46
+#define NVE4_PM_QUERY_METRIC_MP_EFFICIENCY 47
+#define NVE4_PM_QUERY_METRIC_INST_REPLAY_OHEAD 48
+
+/*
+#define NVE4_PM_QUERY_GR_IDLE 50
+#define NVE4_PM_QUERY_BSP_IDLE 51
+#define NVE4_PM_QUERY_VP_IDLE 52
+#define NVE4_PM_QUERY_PPP_IDLE 53
+#define NVE4_PM_QUERY_CE0_IDLE 54
+#define NVE4_PM_QUERY_CE1_IDLE 55
+#define NVE4_PM_QUERY_CE2_IDLE 56
+*/
+/* L2 queries (PCOUNTER) */
+/*
+#define NVE4_PM_QUERY_L2_SUBP_WRITE_L1_SECTOR_QUERIES 57
+...
+*/
+/* TEX queries (PCOUNTER) */
+/*
+#define NVE4_PM_QUERY_TEX0_CACHE_SECTOR_QUERIES 58
+...
+*/
+
+#define NVC0_PM_QUERY_COUNT 31
+#define NVC0_PM_QUERY(i) (PIPE_QUERY_DRIVER_SPECIFIC + 2048 + (i))
+#define NVC0_PM_QUERY_LAST NVC0_PM_QUERY(NVC0_PM_QUERY_COUNT - 1)
+#define NVC0_PM_QUERY_INST_EXECUTED 0
+#define NVC0_PM_QUERY_BRANCH 1
+#define NVC0_PM_QUERY_BRANCH_DIVERGENT 2
+#define NVC0_PM_QUERY_ACTIVE_WARPS 3
+#define NVC0_PM_QUERY_ACTIVE_CYCLES 4
+#define NVC0_PM_QUERY_LAUNCHED_WARPS 5
+#define NVC0_PM_QUERY_LAUNCHED_THREADS 6
+#define NVC0_PM_QUERY_LD_SHARED 7
+#define NVC0_PM_QUERY_ST_SHARED 8
+#define NVC0_PM_QUERY_LD_LOCAL 9
+#define NVC0_PM_QUERY_ST_LOCAL 10
+#define NVC0_PM_QUERY_GRED_COUNT 11
+#define NVC0_PM_QUERY_ATOM_COUNT 12
+#define NVC0_PM_QUERY_GLD_REQUEST 13
+#define NVC0_PM_QUERY_GST_REQUEST 14
+#define NVC0_PM_QUERY_INST_ISSUED1_0 15
+#define NVC0_PM_QUERY_INST_ISSUED1_1 16
+#define NVC0_PM_QUERY_INST_ISSUED2_0 17
+#define NVC0_PM_QUERY_INST_ISSUED2_1 18
+#define NVC0_PM_QUERY_TH_INST_EXECUTED_0 19
+#define NVC0_PM_QUERY_TH_INST_EXECUTED_1 20
+#define NVC0_PM_QUERY_TH_INST_EXECUTED_2 21
+#define NVC0_PM_QUERY_TH_INST_EXECUTED_3 22
+#define NVC0_PM_QUERY_PROF_TRIGGER_0 23
+#define NVC0_PM_QUERY_PROF_TRIGGER_1 24
+#define NVC0_PM_QUERY_PROF_TRIGGER_2 25
+#define NVC0_PM_QUERY_PROF_TRIGGER_3 26
+#define NVC0_PM_QUERY_PROF_TRIGGER_4 27
+#define NVC0_PM_QUERY_PROF_TRIGGER_5 28
+#define NVC0_PM_QUERY_PROF_TRIGGER_6 29
+#define NVC0_PM_QUERY_PROF_TRIGGER_7 30
+
+/* Driver statistics queries:
+ */
+#ifdef NOUVEAU_ENABLE_DRIVER_STATISTICS
+
+#define NVC0_QUERY_DRV_STAT(i) (PIPE_QUERY_DRIVER_SPECIFIC + 1024 + (i))
+#define NVC0_QUERY_DRV_STAT_COUNT 29
+#define NVC0_QUERY_DRV_STAT_LAST NVC0_QUERY_DRV_STAT(NVC0_QUERY_DRV_STAT_COUNT - 1)
+#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_COUNT 0
+#define NVC0_QUERY_DRV_STAT_TEX_OBJECT_CURRENT_BYTES 1
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_COUNT 2
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_VID 3
+#define NVC0_QUERY_DRV_STAT_BUF_OBJECT_CURRENT_BYTES_SYS 4
+#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_READ 5
+#define NVC0_QUERY_DRV_STAT_TEX_TRANSFERS_WRITE 6
+#define NVC0_QUERY_DRV_STAT_TEX_COPY_COUNT 7
+#define NVC0_QUERY_DRV_STAT_TEX_BLIT_COUNT 8
+#define NVC0_QUERY_DRV_STAT_TEX_CACHE_FLUSH_COUNT 9
+#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_READ 10
+#define NVC0_QUERY_DRV_STAT_BUF_TRANSFERS_WRITE 11
+#define NVC0_QUERY_DRV_STAT_BUF_READ_BYTES_STAGING_VID 12
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_DIRECT 13
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_VID 14
+#define NVC0_QUERY_DRV_STAT_BUF_WRITE_BYTES_STAGING_SYS 15
+#define NVC0_QUERY_DRV_STAT_BUF_COPY_BYTES 16
+#define NVC0_QUERY_DRV_STAT_BUF_NON_KERNEL_FENCE_SYNC_COUNT 17
+#define NVC0_QUERY_DRV_STAT_ANY_NON_KERNEL_FENCE_SYNC_COUNT 18
+#define NVC0_QUERY_DRV_STAT_QUERY_SYNC_COUNT 19
+#define NVC0_QUERY_DRV_STAT_GPU_SERIALIZE_COUNT 20
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_ARRAY 21
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_INDEXED 22
+#define NVC0_QUERY_DRV_STAT_DRAW_CALLS_FALLBACK_COUNT 23
+#define NVC0_QUERY_DRV_STAT_USER_BUFFER_UPLOAD_BYTES 24
+#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_COUNT 25
+#define NVC0_QUERY_DRV_STAT_CONSTBUF_UPLOAD_BYTES 26
+#define NVC0_QUERY_DRV_STAT_PUSHBUF_COUNT 27
+#define NVC0_QUERY_DRV_STAT_RESOURCE_VALIDATE_COUNT 28
+
+#else
+
+#define NVC0_QUERY_DRV_STAT_COUNT 0
+
+#endif
+
+int nvc0_screen_get_driver_query_info(struct pipe_screen *, unsigned,
+ struct pipe_driver_query_info *);
+
+boolean nvc0_blitter_create(struct nvc0_screen *);
+void nvc0_blitter_destroy(struct nvc0_screen *);
+
+void nvc0_screen_make_buffers_resident(struct nvc0_screen *);
+
+int nvc0_screen_tic_alloc(struct nvc0_screen *, void *);
+int nvc0_screen_tsc_alloc(struct nvc0_screen *, void *);
+
+int nve4_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
+int nvc0_screen_compute_setup(struct nvc0_screen *, struct nouveau_pushbuf *);
+
+boolean nvc0_screen_resize_tls_area(struct nvc0_screen *, uint32_t lpos,
+ uint32_t lneg, uint32_t cstack);
+
+static INLINE void
+nvc0_resource_fence(struct nv04_resource *res, uint32_t flags)
+{
+ struct nvc0_screen *screen = nvc0_screen(res->base.screen);
+
+ if (res->mm) {
+ nouveau_fence_ref(screen->base.fence.current, &res->fence);
+ if (flags & NOUVEAU_BO_WR)
+ nouveau_fence_ref(screen->base.fence.current, &res->fence_wr);
+ }
+}
+
+static INLINE void
+nvc0_resource_validate(struct nv04_resource *res, uint32_t flags)
+{
+ if (likely(res->bo)) {
+ if (flags & NOUVEAU_BO_WR)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING |
+ NOUVEAU_BUFFER_STATUS_DIRTY;
+ if (flags & NOUVEAU_BO_RD)
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0_resource_fence(res, flags);
+ }
+}
+
+struct nvc0_format {
+ uint32_t rt;
+ uint32_t tic;
+ uint32_t vtx;
+ uint32_t usage;
+};
+
+extern const struct nvc0_format nvc0_format_table[];
+
+static INLINE void
+nvc0_screen_tic_unlock(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0)
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+}
+
+static INLINE void
+nvc0_screen_tsc_unlock(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0)
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+}
+
+static INLINE void
+nvc0_screen_tic_free(struct nvc0_screen *screen, struct nv50_tic_entry *tic)
+{
+ if (tic->id >= 0) {
+ screen->tic.entries[tic->id] = NULL;
+ screen->tic.lock[tic->id / 32] &= ~(1 << (tic->id % 32));
+ }
+}
+
+static INLINE void
+nvc0_screen_tsc_free(struct nvc0_screen *screen, struct nv50_tsc_entry *tsc)
+{
+ if (tsc->id >= 0) {
+ screen->tsc.entries[tsc->id] = NULL;
+ screen->tsc.lock[tsc->id / 32] &= ~(1 << (tsc->id % 32));
+ }
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
new file mode 100644
index 0000000..b820ef2
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_shader_state.c
@@ -0,0 +1,278 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+
+#include "nvc0/nvc0_context.h"
+
+static INLINE void
+nvc0_program_update_context_state(struct nvc0_context *nvc0,
+ struct nvc0_program *prog, int stage)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ if (prog && prog->need_tls) {
+ const uint32_t flags = NOUVEAU_BO_VRAM | NOUVEAU_BO_RDWR;
+ if (!nvc0->state.tls_required)
+ BCTX_REFN_bo(nvc0->bufctx_3d, TLS, flags, nvc0->screen->tls);
+ nvc0->state.tls_required |= 1 << stage;
+ } else {
+ if (nvc0->state.tls_required == (1 << stage))
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TLS);
+ nvc0->state.tls_required &= ~(1 << stage);
+ }
+
+ if (prog && prog->immd_size) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ /* NOTE: may overlap code of a different shader */
+ PUSH_DATA (push, align(prog->immd_size, 0x100));
+ PUSH_DATAh(push, nvc0->screen->text->offset + prog->immd_base);
+ PUSH_DATA (push, nvc0->screen->text->offset + prog->immd_base);
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
+ PUSH_DATA (push, (14 << 4) | 1);
+
+ nvc0->state.c14_bound |= 1 << stage;
+ } else
+ if (nvc0->state.c14_bound & (1 << stage)) {
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(stage)), 1);
+ PUSH_DATA (push, (14 << 4) | 0);
+
+ nvc0->state.c14_bound &= ~(1 << stage);
+ }
+}
+
+static INLINE boolean
+nvc0_program_validate(struct nvc0_context *nvc0, struct nvc0_program *prog)
+{
+ if (prog->mem)
+ return TRUE;
+
+ if (!prog->translated) {
+ prog->translated = nvc0_program_translate(
+ prog, nvc0->screen->base.device->chipset);
+ if (!prog->translated)
+ return FALSE;
+ }
+
+ if (likely(prog->code_size))
+ return nvc0_program_upload_code(nvc0, prog);
+ return TRUE; /* stream output info only */
+}
+
+void
+nvc0_vertprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *vp = nvc0->vertprog;
+
+ if (!nvc0_program_validate(nvc0, vp))
+ return;
+ nvc0_program_update_context_state(nvc0, vp, 0);
+
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(1)), 2);
+ PUSH_DATA (push, 0x11);
+ PUSH_DATA (push, vp->code_base);
+ BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(1)), 1);
+ PUSH_DATA (push, vp->num_gprs);
+
+ // BEGIN_NVC0(push, NVC0_3D_(0x163c), 1);
+ // PUSH_DATA (push, 0);
+}
+
+void
+nvc0_fragprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *fp = nvc0->fragprog;
+
+ if (!nvc0_program_validate(nvc0, fp))
+ return;
+ nvc0_program_update_context_state(nvc0, fp, 4);
+
+ if (fp->fp.early_z != nvc0->state.early_z_forced) {
+ nvc0->state.early_z_forced = fp->fp.early_z;
+ IMMED_NVC0(push, NVC0_3D(FORCE_EARLY_FRAGMENT_TESTS), fp->fp.early_z);
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(5)), 2);
+ PUSH_DATA (push, 0x51);
+ PUSH_DATA (push, fp->code_base);
+ BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(5)), 1);
+ PUSH_DATA (push, fp->num_gprs);
+
+ BEGIN_NVC0(push, SUBC_3D(0x0360), 2);
+ PUSH_DATA (push, 0x20164010);
+ PUSH_DATA (push, 0x20);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_TEST_MASK), 1);
+ PUSH_DATA (push, fp->flags[0]);
+}
+
+void
+nvc0_tctlprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *tp = nvc0->tctlprog;
+
+ if (tp && nvc0_program_validate(nvc0, tp)) {
+ if (tp->tp.tess_mode != ~0) {
+ BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
+ PUSH_DATA (push, tp->tp.tess_mode);
+ }
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 2);
+ PUSH_DATA (push, 0x21);
+ PUSH_DATA (push, tp->code_base);
+ BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(2)), 1);
+ PUSH_DATA (push, tp->num_gprs);
+
+ if (tp->tp.input_patch_size <= 32)
+ IMMED_NVC0(push, NVC0_3D(PATCH_VERTICES), tp->tp.input_patch_size);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(SP_SELECT(2)), 1);
+ PUSH_DATA (push, 0x20);
+ }
+ nvc0_program_update_context_state(nvc0, tp, 1);
+}
+
+void
+nvc0_tevlprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *tp = nvc0->tevlprog;
+
+ if (tp && nvc0_program_validate(nvc0, tp)) {
+ if (tp->tp.tess_mode != ~0) {
+ BEGIN_NVC0(push, NVC0_3D(TESS_MODE), 1);
+ PUSH_DATA (push, tp->tp.tess_mode);
+ }
+ BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
+ PUSH_DATA (push, 0x31);
+ BEGIN_NVC0(push, NVC0_3D(SP_START_ID(3)), 1);
+ PUSH_DATA (push, tp->code_base);
+ BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(3)), 1);
+ PUSH_DATA (push, tp->num_gprs);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(MACRO_TEP_SELECT), 1);
+ PUSH_DATA (push, 0x30);
+ }
+ nvc0_program_update_context_state(nvc0, tp, 2);
+}
+
+void
+nvc0_gmtyprog_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *gp = nvc0->gmtyprog;
+
+ if (gp)
+ nvc0_program_validate(nvc0, gp);
+
+ /* we allow GPs with no code for specifying stream output state only */
+ if (gp && gp->code_size) {
+ const boolean gp_selects_layer = gp->hdr[13] & (1 << 9);
+
+ BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
+ PUSH_DATA (push, 0x41);
+ BEGIN_NVC0(push, NVC0_3D(SP_START_ID(4)), 1);
+ PUSH_DATA (push, gp->code_base);
+ BEGIN_NVC0(push, NVC0_3D(SP_GPR_ALLOC(4)), 1);
+ PUSH_DATA (push, gp->num_gprs);
+ BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
+ PUSH_DATA (push, gp_selects_layer ? NVC0_3D_LAYER_USE_GP : 0);
+ } else {
+ IMMED_NVC0(push, NVC0_3D(LAYER), 0);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_GP_SELECT), 1);
+ PUSH_DATA (push, 0x40);
+ }
+ nvc0_program_update_context_state(nvc0, gp, 3);
+}
+
+void
+nvc0_tfb_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_transform_feedback_state *tfb;
+ unsigned b;
+
+ if (nvc0->gmtyprog) tfb = nvc0->gmtyprog->tfb;
+ else
+ if (nvc0->tevlprog) tfb = nvc0->tevlprog->tfb;
+ else
+ tfb = nvc0->vertprog->tfb;
+
+ IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), (tfb && nvc0->num_tfbbufs) ? 1 : 0);
+
+ if (tfb && tfb != nvc0->state.tfb) {
+ for (b = 0; b < 4; ++b) {
+ if (tfb->varying_count[b]) {
+ unsigned n = (tfb->varying_count[b] + 3) / 4;
+
+ BEGIN_NVC0(push, NVC0_3D(TFB_STREAM(b)), 3);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, tfb->varying_count[b]);
+ PUSH_DATA (push, tfb->stride[b]);
+ BEGIN_NVC0(push, NVC0_3D(TFB_VARYING_LOCS(b, 0)), n);
+ PUSH_DATAp(push, tfb->varying_index[b], n);
+
+ if (nvc0->tfbbuf[b])
+ nvc0_so_target(nvc0->tfbbuf[b])->stride = tfb->stride[b];
+ } else {
+ IMMED_NVC0(push, NVC0_3D(TFB_VARYING_COUNT(b)), 0);
+ }
+ }
+ }
+ nvc0->state.tfb = tfb;
+
+ if (!(nvc0->dirty & NVC0_NEW_TFB_TARGETS))
+ return;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TFB);
+
+ for (b = 0; b < nvc0->num_tfbbufs; ++b) {
+ struct nvc0_so_target *targ = nvc0_so_target(nvc0->tfbbuf[b]);
+ struct nv04_resource *buf = nv04_resource(targ->pipe.buffer);
+
+ if (tfb)
+ targ->stride = tfb->stride[b];
+
+ if (!(nvc0->tfbbuf_dirty & (1 << b)))
+ continue;
+
+ if (!targ->clean)
+ nvc0_query_fifo_wait(push, targ->pq);
+ BEGIN_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 5);
+ PUSH_DATA (push, 1);
+ PUSH_DATAh(push, buf->address + targ->pipe.buffer_offset);
+ PUSH_DATA (push, buf->address + targ->pipe.buffer_offset);
+ PUSH_DATA (push, targ->pipe.buffer_size);
+ if (!targ->clean) {
+ nvc0_query_pushbuf_submit(push, targ->pq, 0x4);
+ } else {
+ PUSH_DATA(push, 0); /* TFB_BUFFER_OFFSET */
+ targ->clean = FALSE;
+ }
+ BCTX_REFN(nvc0->bufctx_3d, TFB, buf, WR);
+ }
+ for (; b < 4; ++b)
+ IMMED_NVC0(push, NVC0_3D(TFB_BUFFER_ENABLE(b)), 0);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
new file mode 100644
index 0000000..e56ef01
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state.c
@@ -0,0 +1,1247 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "pipe/p_defines.h"
+#include "util/u_helpers.h"
+#include "util/u_inlines.h"
+#include "util/u_transfer.h"
+
+#include "tgsi/tgsi_parse.h"
+
+#include "nvc0/nvc0_stateobj.h"
+#include "nvc0/nvc0_context.h"
+
+#include "nvc0/nvc0_3d.xml.h"
+#include "nv50/nv50_texture.xml.h"
+
+#include "nouveau_gldefs.h"
+
+static INLINE uint32_t
+nvc0_colormask(unsigned mask)
+{
+ uint32_t ret = 0;
+
+ if (mask & PIPE_MASK_R)
+ ret |= 0x0001;
+ if (mask & PIPE_MASK_G)
+ ret |= 0x0010;
+ if (mask & PIPE_MASK_B)
+ ret |= 0x0100;
+ if (mask & PIPE_MASK_A)
+ ret |= 0x1000;
+
+ return ret;
+}
+
+#define NVC0_BLEND_FACTOR_CASE(a, b) \
+ case PIPE_BLENDFACTOR_##a: return NV50_3D_BLEND_FACTOR_##b
+
+static INLINE uint32_t
+nvc0_blend_fac(unsigned factor)
+{
+ switch (factor) {
+ NVC0_BLEND_FACTOR_CASE(ONE, ONE);
+ NVC0_BLEND_FACTOR_CASE(SRC_COLOR, SRC_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC_ALPHA, SRC_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(DST_ALPHA, DST_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(DST_COLOR, DST_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC_ALPHA_SATURATE, SRC_ALPHA_SATURATE);
+ NVC0_BLEND_FACTOR_CASE(CONST_COLOR, CONSTANT_COLOR);
+ NVC0_BLEND_FACTOR_CASE(CONST_ALPHA, CONSTANT_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(SRC1_COLOR, SRC1_COLOR);
+ NVC0_BLEND_FACTOR_CASE(SRC1_ALPHA, SRC1_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(ZERO, ZERO);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC_COLOR, ONE_MINUS_SRC_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC_ALPHA, ONE_MINUS_SRC_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_DST_ALPHA, ONE_MINUS_DST_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_DST_COLOR, ONE_MINUS_DST_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_CONST_COLOR, ONE_MINUS_CONSTANT_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_CONST_ALPHA, ONE_MINUS_CONSTANT_ALPHA);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC1_COLOR, ONE_MINUS_SRC1_COLOR);
+ NVC0_BLEND_FACTOR_CASE(INV_SRC1_ALPHA, ONE_MINUS_SRC1_ALPHA);
+ default:
+ return NV50_3D_BLEND_FACTOR_ZERO;
+ }
+}
+
+static void *
+nvc0_blend_state_create(struct pipe_context *pipe,
+ const struct pipe_blend_state *cso)
+{
+ struct nvc0_blend_stateobj *so = CALLOC_STRUCT(nvc0_blend_stateobj);
+ int i;
+ int r; /* reference */
+ uint32_t ms;
+ uint8_t blend_en = 0;
+ boolean indep_masks = FALSE;
+ boolean indep_funcs = FALSE;
+
+ so->pipe = *cso;
+
+ /* check which states actually have differing values */
+ if (cso->independent_blend_enable) {
+ for (r = 0; r < 8 && !cso->rt[r].blend_enable; ++r);
+ blend_en |= 1 << r;
+ for (i = r + 1; i < 8; ++i) {
+ if (!cso->rt[i].blend_enable)
+ continue;
+ blend_en |= 1 << i;
+ if (cso->rt[i].rgb_func != cso->rt[r].rgb_func ||
+ cso->rt[i].rgb_src_factor != cso->rt[r].rgb_src_factor ||
+ cso->rt[i].rgb_dst_factor != cso->rt[r].rgb_dst_factor ||
+ cso->rt[i].alpha_func != cso->rt[r].alpha_func ||
+ cso->rt[i].alpha_src_factor != cso->rt[r].alpha_src_factor ||
+ cso->rt[i].alpha_dst_factor != cso->rt[r].alpha_dst_factor) {
+ indep_funcs = TRUE;
+ break;
+ }
+ }
+ for (; i < 8; ++i)
+ blend_en |= (cso->rt[i].blend_enable ? 1 : 0) << i;
+
+ for (i = 1; i < 8; ++i) {
+ if (cso->rt[i].colormask != cso->rt[0].colormask) {
+ indep_masks = TRUE;
+ break;
+ }
+ }
+ } else {
+ r = 0;
+ if (cso->rt[0].blend_enable)
+ blend_en = 0xff;
+ }
+
+ if (cso->logicop_enable) {
+ SB_BEGIN_3D(so, LOGIC_OP_ENABLE, 2);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_logicop_func(cso->logicop_func));
+
+ SB_IMMED_3D(so, MACRO_BLEND_ENABLES, 0);
+ } else {
+ SB_IMMED_3D(so, LOGIC_OP_ENABLE, 0);
+
+ SB_IMMED_3D(so, BLEND_INDEPENDENT, indep_funcs);
+ SB_IMMED_3D(so, MACRO_BLEND_ENABLES, blend_en);
+ if (indep_funcs) {
+ for (i = 0; i < 8; ++i) {
+ if (cso->rt[i].blend_enable) {
+ SB_BEGIN_3D(so, IBLEND_EQUATION_RGB(i), 6);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].rgb_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[i].alpha_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[i].alpha_dst_factor));
+ }
+ }
+ } else
+ if (blend_en) {
+ SB_BEGIN_3D(so, BLEND_EQUATION_RGB, 5);
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[r].rgb_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[r].rgb_src_factor));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[r].rgb_dst_factor));
+ SB_DATA (so, nvgl_blend_eqn(cso->rt[r].alpha_func));
+ SB_DATA (so, nvc0_blend_fac(cso->rt[r].alpha_src_factor));
+ SB_BEGIN_3D(so, BLEND_FUNC_DST_ALPHA, 1);
+ SB_DATA (so, nvc0_blend_fac(cso->rt[r].alpha_dst_factor));
+ }
+
+ SB_IMMED_3D(so, COLOR_MASK_COMMON, !indep_masks);
+ if (indep_masks) {
+ SB_BEGIN_3D(so, COLOR_MASK(0), 8);
+ for (i = 0; i < 8; ++i)
+ SB_DATA(so, nvc0_colormask(cso->rt[i].colormask));
+ } else {
+ SB_BEGIN_3D(so, COLOR_MASK(0), 1);
+ SB_DATA (so, nvc0_colormask(cso->rt[0].colormask));
+ }
+ }
+
+ ms = 0;
+ if (cso->alpha_to_coverage)
+ ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_COVERAGE;
+ if (cso->alpha_to_one)
+ ms |= NVC0_3D_MULTISAMPLE_CTRL_ALPHA_TO_ONE;
+
+ SB_BEGIN_3D(so, MULTISAMPLE_CTRL, 1);
+ SB_DATA (so, ms);
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return so;
+}
+
+static void
+nvc0_blend_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->blend = hwcso;
+ nvc0->dirty |= NVC0_NEW_BLEND;
+}
+
+static void
+nvc0_blend_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+/* NOTE: ignoring line_last_pixel, using FALSE (set on screen init) */
+static void *
+nvc0_rasterizer_state_create(struct pipe_context *pipe,
+ const struct pipe_rasterizer_state *cso)
+{
+ struct nvc0_rasterizer_stateobj *so;
+ uint32_t reg;
+
+ so = CALLOC_STRUCT(nvc0_rasterizer_stateobj);
+ if (!so)
+ return NULL;
+ so->pipe = *cso;
+
+ /* Scissor enables are handled in scissor state, we will not want to
+ * always emit 16 commands, one for each scissor rectangle, here.
+ */
+
+ SB_BEGIN_3D(so, SHADE_MODEL, 1);
+ SB_DATA (so, cso->flatshade ? NVC0_3D_SHADE_MODEL_FLAT :
+ NVC0_3D_SHADE_MODEL_SMOOTH);
+ SB_IMMED_3D(so, PROVOKING_VERTEX_LAST, !cso->flatshade_first);
+ SB_IMMED_3D(so, VERTEX_TWO_SIDE_ENABLE, cso->light_twoside);
+
+ SB_IMMED_3D(so, VERT_COLOR_CLAMP_EN, cso->clamp_vertex_color);
+ SB_BEGIN_3D(so, FRAG_COLOR_CLAMP_EN, 1);
+ SB_DATA (so, cso->clamp_fragment_color ? 0x11111111 : 0x00000000);
+
+ SB_IMMED_3D(so, MULTISAMPLE_ENABLE, cso->multisample);
+
+ SB_IMMED_3D(so, LINE_SMOOTH_ENABLE, cso->line_smooth);
+ if (cso->line_smooth)
+ SB_BEGIN_3D(so, LINE_WIDTH_SMOOTH, 1);
+ else
+ SB_BEGIN_3D(so, LINE_WIDTH_ALIASED, 1);
+ SB_DATA (so, fui(cso->line_width));
+
+ SB_IMMED_3D(so, LINE_STIPPLE_ENABLE, cso->line_stipple_enable);
+ if (cso->line_stipple_enable) {
+ SB_BEGIN_3D(so, LINE_STIPPLE_PATTERN, 1);
+ SB_DATA (so, (cso->line_stipple_pattern << 8) |
+ cso->line_stipple_factor);
+
+ }
+
+ SB_IMMED_3D(so, VP_POINT_SIZE_EN, cso->point_size_per_vertex);
+ if (!cso->point_size_per_vertex) {
+ SB_BEGIN_3D(so, POINT_SIZE, 1);
+ SB_DATA (so, fui(cso->point_size));
+ }
+
+ reg = (cso->sprite_coord_mode == PIPE_SPRITE_COORD_UPPER_LEFT) ?
+ NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_UPPER_LEFT :
+ NVC0_3D_POINT_COORD_REPLACE_COORD_ORIGIN_LOWER_LEFT;
+
+ SB_BEGIN_3D(so, POINT_COORD_REPLACE, 1);
+ SB_DATA (so, ((cso->sprite_coord_enable & 0xff) << 3) | reg);
+ SB_IMMED_3D(so, POINT_SPRITE_ENABLE, cso->point_quad_rasterization);
+ SB_IMMED_3D(so, POINT_SMOOTH_ENABLE, cso->point_smooth);
+
+ SB_BEGIN_3D(so, MACRO_POLYGON_MODE_FRONT, 1);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_front));
+ SB_BEGIN_3D(so, MACRO_POLYGON_MODE_BACK, 1);
+ SB_DATA (so, nvgl_polygon_mode(cso->fill_back));
+ SB_IMMED_3D(so, POLYGON_SMOOTH_ENABLE, cso->poly_smooth);
+
+ SB_BEGIN_3D(so, CULL_FACE_ENABLE, 3);
+ SB_DATA (so, cso->cull_face != PIPE_FACE_NONE);
+ SB_DATA (so, cso->front_ccw ? NVC0_3D_FRONT_FACE_CCW :
+ NVC0_3D_FRONT_FACE_CW);
+ switch (cso->cull_face) {
+ case PIPE_FACE_FRONT_AND_BACK:
+ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT_AND_BACK);
+ break;
+ case PIPE_FACE_FRONT:
+ SB_DATA(so, NVC0_3D_CULL_FACE_FRONT);
+ break;
+ case PIPE_FACE_BACK:
+ default:
+ SB_DATA(so, NVC0_3D_CULL_FACE_BACK);
+ break;
+ }
+
+ SB_IMMED_3D(so, POLYGON_STIPPLE_ENABLE, cso->poly_stipple_enable);
+ SB_BEGIN_3D(so, POLYGON_OFFSET_POINT_ENABLE, 3);
+ SB_DATA (so, cso->offset_point);
+ SB_DATA (so, cso->offset_line);
+ SB_DATA (so, cso->offset_tri);
+
+ if (cso->offset_point || cso->offset_line || cso->offset_tri) {
+ SB_BEGIN_3D(so, POLYGON_OFFSET_FACTOR, 1);
+ SB_DATA (so, fui(cso->offset_scale));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_UNITS, 1);
+ SB_DATA (so, fui(cso->offset_units * 2.0f));
+ SB_BEGIN_3D(so, POLYGON_OFFSET_CLAMP, 1);
+ SB_DATA (so, fui(cso->offset_clamp));
+ }
+
+ if (cso->depth_clip)
+ reg = NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1;
+ else
+ reg =
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK1_UNK1 |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_NEAR |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_CLAMP_FAR |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_UNK12_UNK2;
+
+ SB_BEGIN_3D(so, VIEW_VOLUME_CLIP_CTRL, 1);
+ SB_DATA (so, reg);
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nvc0_rasterizer_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->rast = hwcso;
+ nvc0->dirty |= NVC0_NEW_RASTERIZER;
+}
+
+static void
+nvc0_rasterizer_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+static void *
+nvc0_zsa_state_create(struct pipe_context *pipe,
+ const struct pipe_depth_stencil_alpha_state *cso)
+{
+ struct nvc0_zsa_stateobj *so = CALLOC_STRUCT(nvc0_zsa_stateobj);
+
+ so->pipe = *cso;
+
+ SB_IMMED_3D(so, DEPTH_TEST_ENABLE, cso->depth.enabled);
+ if (cso->depth.enabled) {
+ SB_IMMED_3D(so, DEPTH_WRITE_ENABLE, cso->depth.writemask);
+ SB_BEGIN_3D(so, DEPTH_TEST_FUNC, 1);
+ SB_DATA (so, nvgl_comparison_op(cso->depth.func));
+ }
+
+ if (cso->stencil[0].enabled) {
+ SB_BEGIN_3D(so, STENCIL_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[0].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[0].func));
+ SB_BEGIN_3D(so, STENCIL_FRONT_FUNC_MASK, 2);
+ SB_DATA (so, cso->stencil[0].valuemask);
+ SB_DATA (so, cso->stencil[0].writemask);
+ } else {
+ SB_IMMED_3D(so, STENCIL_ENABLE, 0);
+ }
+
+ if (cso->stencil[1].enabled) {
+ assert(cso->stencil[0].enabled);
+ SB_BEGIN_3D(so, STENCIL_TWO_SIDE_ENABLE, 5);
+ SB_DATA (so, 1);
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].fail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zfail_op));
+ SB_DATA (so, nvgl_stencil_op(cso->stencil[1].zpass_op));
+ SB_DATA (so, nvgl_comparison_op(cso->stencil[1].func));
+ SB_BEGIN_3D(so, STENCIL_BACK_MASK, 2);
+ SB_DATA (so, cso->stencil[1].writemask);
+ SB_DATA (so, cso->stencil[1].valuemask);
+ } else
+ if (cso->stencil[0].enabled) {
+ SB_IMMED_3D(so, STENCIL_TWO_SIDE_ENABLE, 0);
+ }
+
+ SB_IMMED_3D(so, ALPHA_TEST_ENABLE, cso->alpha.enabled);
+ if (cso->alpha.enabled) {
+ SB_BEGIN_3D(so, ALPHA_TEST_REF, 2);
+ SB_DATA (so, fui(cso->alpha.ref_value));
+ SB_DATA (so, nvgl_comparison_op(cso->alpha.func));
+ }
+
+ assert(so->size <= (sizeof(so->state) / sizeof(so->state[0])));
+ return (void *)so;
+}
+
+static void
+nvc0_zsa_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->zsa = hwcso;
+ nvc0->dirty |= NVC0_NEW_ZSA;
+}
+
+static void
+nvc0_zsa_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ FREE(hwcso);
+}
+
+/* ====================== SAMPLERS AND TEXTURES ================================
+ */
+
+#define NV50_TSC_WRAP_CASE(n) \
+ case PIPE_TEX_WRAP_##n: return NV50_TSC_WRAP_##n
+
+static INLINE unsigned
+nv50_tsc_wrap_mode(unsigned wrap)
+{
+ switch (wrap) {
+ NV50_TSC_WRAP_CASE(REPEAT);
+ NV50_TSC_WRAP_CASE(MIRROR_REPEAT);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(CLAMP);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_EDGE);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP_TO_BORDER);
+ NV50_TSC_WRAP_CASE(MIRROR_CLAMP);
+ default:
+ NOUVEAU_ERR("unknown wrap mode: %d\n", wrap);
+ return NV50_TSC_WRAP_REPEAT;
+ }
+}
+
+static void
+nvc0_sampler_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ unsigned s, i;
+
+ for (s = 0; s < 5; ++s)
+ for (i = 0; i < nvc0_context(pipe)->num_samplers[s]; ++i)
+ if (nvc0_context(pipe)->samplers[s][i] == hwcso)
+ nvc0_context(pipe)->samplers[s][i] = NULL;
+
+ nvc0_screen_tsc_free(nvc0_context(pipe)->screen, nv50_tsc_entry(hwcso));
+
+ FREE(hwcso);
+}
+
+static INLINE void
+nvc0_stage_sampler_states_bind(struct nvc0_context *nvc0, int s,
+ unsigned nr, void **hwcso)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tsc_entry *old = nvc0->samplers[s][i];
+
+ if (hwcso[i] == old)
+ continue;
+ nvc0->samplers_dirty[s] |= 1 << i;
+
+ nvc0->samplers[s][i] = nv50_tsc_entry(hwcso[i]);
+ if (old)
+ nvc0_screen_tsc_unlock(nvc0->screen, old);
+ }
+ for (; i < nvc0->num_samplers[s]; ++i) {
+ if (nvc0->samplers[s][i]) {
+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+ nvc0->samplers[s][i] = NULL;
+ }
+ }
+
+ nvc0->num_samplers[s] = nr;
+
+ nvc0->dirty |= NVC0_NEW_SAMPLERS;
+}
+
+static void
+nvc0_vp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 0, nr, s);
+}
+
+static void
+nvc0_fp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 4, nr, s);
+}
+
+static void
+nvc0_gp_sampler_states_bind(struct pipe_context *pipe, unsigned nr, void **s)
+{
+ nvc0_stage_sampler_states_bind(nvc0_context(pipe), 3, nr, s);
+}
+
+static void
+nvc0_stage_sampler_states_bind_range(struct nvc0_context *nvc0,
+ const unsigned s,
+ unsigned start, unsigned nr, void **cso)
+{
+ const unsigned end = start + nr;
+ int last_valid = -1;
+ unsigned i;
+
+ if (cso) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (cso[p])
+ last_valid = i;
+ if (cso[p] == nvc0->samplers[s][i])
+ continue;
+ nvc0->samplers_dirty[s] |= 1 << i;
+
+ if (nvc0->samplers[s][i])
+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+ nvc0->samplers[s][i] = cso[p];
+ }
+ } else {
+ for (i = start; i < end; ++i) {
+ if (nvc0->samplers[s][i]) {
+ nvc0_screen_tsc_unlock(nvc0->screen, nvc0->samplers[s][i]);
+ nvc0->samplers[s][i] = NULL;
+ nvc0->samplers_dirty[s] |= 1 << i;
+ }
+ }
+ }
+
+ if (nvc0->num_samplers[s] <= end) {
+ if (last_valid < 0) {
+ for (i = start; i && !nvc0->samplers[s][i - 1]; --i);
+ nvc0->num_samplers[s] = i;
+ } else {
+ nvc0->num_samplers[s] = last_valid + 1;
+ }
+ }
+}
+
+static void
+nvc0_cp_sampler_states_bind(struct pipe_context *pipe,
+ unsigned start, unsigned nr, void **cso)
+{
+ nvc0_stage_sampler_states_bind_range(nvc0_context(pipe), 5, start, nr, cso);
+
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SAMPLERS;
+}
+
+/* NOTE: only called when not referenced anywhere, won't be bound */
+static void
+nvc0_sampler_view_destroy(struct pipe_context *pipe,
+ struct pipe_sampler_view *view)
+{
+ pipe_resource_reference(&view->texture, NULL);
+
+ nvc0_screen_tic_free(nvc0_context(pipe)->screen, nv50_tic_entry(view));
+
+ FREE(nv50_tic_entry(view));
+}
+
+static INLINE void
+nvc0_stage_set_sampler_views(struct nvc0_context *nvc0, int s,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ unsigned i;
+
+ for (i = 0; i < nr; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+
+ if (views[i] == nvc0->textures[s][i])
+ continue;
+ nvc0->textures_dirty[s] |= 1 << i;
+
+ if (old) {
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+ }
+
+ pipe_sampler_view_reference(&nvc0->textures[s][i], views[i]);
+ }
+
+ for (i = nr; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+ if (old) {
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(s, i));
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+ }
+ }
+
+ nvc0->num_textures[s] = nr;
+
+ nvc0->dirty |= NVC0_NEW_TEXTURES;
+}
+
+static void
+nvc0_vp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 0, nr, views);
+}
+
+static void
+nvc0_fp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 4, nr, views);
+}
+
+static void
+nvc0_gp_set_sampler_views(struct pipe_context *pipe,
+ unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views(nvc0_context(pipe), 3, nr, views);
+}
+
+static void
+nvc0_stage_set_sampler_views_range(struct nvc0_context *nvc0, const unsigned s,
+ unsigned start, unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ struct nouveau_bufctx *bctx = (s == 5) ? nvc0->bufctx_cp : nvc0->bufctx_3d;
+ const unsigned end = start + nr;
+ const unsigned bin = (s == 5) ? NVC0_BIND_CP_TEX(0) : NVC0_BIND_TEX(s, 0);
+ int last_valid = -1;
+ unsigned i;
+
+ if (views) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (views[p])
+ last_valid = i;
+ if (views[p] == nvc0->textures[s][i])
+ continue;
+ nvc0->textures_dirty[s] |= 1 << i;
+
+ if (nvc0->textures[s][i]) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+ nouveau_bufctx_reset(bctx, bin + i);
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+ }
+ pipe_sampler_view_reference(&nvc0->textures[s][i], views[p]);
+ }
+ } else {
+ for (i = start; i < end; ++i) {
+ struct nv50_tic_entry *old = nv50_tic_entry(nvc0->textures[s][i]);
+ if (!old)
+ continue;
+ nvc0->textures_dirty[s] |= 1 << i;
+
+ nvc0_screen_tic_unlock(nvc0->screen, old);
+ pipe_sampler_view_reference(&nvc0->textures[s][i], NULL);
+ nouveau_bufctx_reset(bctx, bin + i);
+ }
+ }
+
+ if (nvc0->num_textures[s] <= end) {
+ if (last_valid < 0) {
+ for (i = start; i && !nvc0->textures[s][i - 1]; --i);
+ nvc0->num_textures[s] = i;
+ } else {
+ nvc0->num_textures[s] = last_valid + 1;
+ }
+ }
+}
+
+static void
+nvc0_cp_set_sampler_views(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_sampler_view **views)
+{
+ nvc0_stage_set_sampler_views_range(nvc0_context(pipe), 5, start, nr, views);
+
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_TEXTURES;
+}
+
+
+/* ============================= SHADERS =======================================
+ */
+
+static void *
+nvc0_sp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso, unsigned type)
+{
+ struct nvc0_program *prog;
+
+ prog = CALLOC_STRUCT(nvc0_program);
+ if (!prog)
+ return NULL;
+
+ prog->type = type;
+
+ if (cso->tokens)
+ prog->pipe.tokens = tgsi_dup_tokens(cso->tokens);
+
+ if (cso->stream_output.num_outputs)
+ prog->pipe.stream_output = cso->stream_output;
+
+ return (void *)prog;
+}
+
+static void
+nvc0_sp_state_delete(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_program *prog = (struct nvc0_program *)hwcso;
+
+ nvc0_program_destroy(nvc0_context(pipe), prog);
+
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
+}
+
+static void *
+nvc0_vp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_VERTEX);
+}
+
+static void
+nvc0_vp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->vertprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_VERTPROG;
+}
+
+static void *
+nvc0_fp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_FRAGMENT);
+}
+
+static void
+nvc0_fp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->fragprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_FRAGPROG;
+}
+
+static void *
+nvc0_gp_state_create(struct pipe_context *pipe,
+ const struct pipe_shader_state *cso)
+{
+ return nvc0_sp_state_create(pipe, cso, PIPE_SHADER_GEOMETRY);
+}
+
+static void
+nvc0_gp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->gmtyprog = hwcso;
+ nvc0->dirty |= NVC0_NEW_GMTYPROG;
+}
+
+static void *
+nvc0_cp_state_create(struct pipe_context *pipe,
+ const struct pipe_compute_state *cso)
+{
+ struct nvc0_program *prog;
+
+ prog = CALLOC_STRUCT(nvc0_program);
+ if (!prog)
+ return NULL;
+ prog->type = PIPE_SHADER_COMPUTE;
+
+ prog->cp.smem_size = cso->req_local_mem;
+ prog->cp.lmem_size = cso->req_private_mem;
+ prog->parm_size = cso->req_input_mem;
+
+ prog->pipe.tokens = tgsi_dup_tokens((const struct tgsi_token *)cso->prog);
+
+ return (void *)prog;
+}
+
+static void
+nvc0_cp_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->compprog = hwcso;
+ nvc0->dirty_cp |= NVC0_NEW_CP_PROGRAM;
+}
+
+static void
+nvc0_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index,
+ struct pipe_constant_buffer *cb)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct pipe_resource *res = cb ? cb->buffer : NULL;
+ const unsigned s = nvc0_shader_stage(shader);
+ const unsigned i = index;
+
+ if (unlikely(shader == PIPE_SHADER_COMPUTE)) {
+ assert(!cb || !cb->user_buffer);
+ if (nvc0->constbuf[s][i].u.buf)
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_CB(i));
+
+ nvc0->dirty_cp |= NVC0_NEW_CP_CONSTBUF;
+ } else {
+ if (nvc0->constbuf[s][i].user)
+ nvc0->constbuf[s][i].u.buf = NULL;
+ else
+ if (nvc0->constbuf[s][i].u.buf)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_CB(s, i));
+
+ nvc0->dirty |= NVC0_NEW_CONSTBUF;
+ }
+ nvc0->constbuf_dirty[s] |= 1 << i;
+
+ pipe_resource_reference(&nvc0->constbuf[s][i].u.buf, res);
+
+ nvc0->constbuf[s][i].user = (cb && cb->user_buffer) ? TRUE : FALSE;
+ if (nvc0->constbuf[s][i].user) {
+ nvc0->constbuf[s][i].u.data = cb->user_buffer;
+ nvc0->constbuf[s][i].size = cb->buffer_size;
+ } else
+ if (cb) {
+ nvc0->constbuf[s][i].offset = cb->buffer_offset;
+ nvc0->constbuf[s][i].size = align(cb->buffer_size, 0x100);
+ }
+}
+
+/* =============================================================================
+ */
+
+static void
+nvc0_set_blend_color(struct pipe_context *pipe,
+ const struct pipe_blend_color *bcol)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->blend_colour = *bcol;
+ nvc0->dirty |= NVC0_NEW_BLEND_COLOUR;
+}
+
+static void
+nvc0_set_stencil_ref(struct pipe_context *pipe,
+ const struct pipe_stencil_ref *sr)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->stencil_ref = *sr;
+ nvc0->dirty |= NVC0_NEW_STENCIL_REF;
+}
+
+static void
+nvc0_set_clip_state(struct pipe_context *pipe,
+ const struct pipe_clip_state *clip)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ memcpy(nvc0->clip.ucp, clip->ucp, sizeof(clip->ucp));
+
+ nvc0->dirty |= NVC0_NEW_CLIP;
+}
+
+static void
+nvc0_set_sample_mask(struct pipe_context *pipe, unsigned sample_mask)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->sample_mask = sample_mask;
+ nvc0->dirty |= NVC0_NEW_SAMPLE_MASK;
+}
+
+
+static void
+nvc0_set_framebuffer_state(struct pipe_context *pipe,
+ const struct pipe_framebuffer_state *fb)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned i;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+
+ for (i = 0; i < fb->nr_cbufs; ++i)
+ pipe_surface_reference(&nvc0->framebuffer.cbufs[i], fb->cbufs[i]);
+ for (; i < nvc0->framebuffer.nr_cbufs; ++i)
+ pipe_surface_reference(&nvc0->framebuffer.cbufs[i], NULL);
+
+ nvc0->framebuffer.nr_cbufs = fb->nr_cbufs;
+
+ nvc0->framebuffer.width = fb->width;
+ nvc0->framebuffer.height = fb->height;
+
+ pipe_surface_reference(&nvc0->framebuffer.zsbuf, fb->zsbuf);
+
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
+nvc0_set_polygon_stipple(struct pipe_context *pipe,
+ const struct pipe_poly_stipple *stipple)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->stipple = *stipple;
+ nvc0->dirty |= NVC0_NEW_STIPPLE;
+}
+
+static void
+nvc0_set_scissor_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_scissors,
+ const struct pipe_scissor_state *scissor)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->scissor = *scissor;
+ nvc0->dirty |= NVC0_NEW_SCISSOR;
+}
+
+static void
+nvc0_set_viewport_states(struct pipe_context *pipe,
+ unsigned start_slot,
+ unsigned num_viewports,
+ const struct pipe_viewport_state *vpt)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->viewport = *vpt;
+ nvc0->dirty |= NVC0_NEW_VIEWPORT;
+}
+
+static void
+nvc0_set_vertex_buffers(struct pipe_context *pipe,
+ unsigned start_slot, unsigned count,
+ const struct pipe_vertex_buffer *vb)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned i;
+
+ util_set_vertex_buffers_count(nvc0->vtxbuf, &nvc0->num_vtxbufs, vb,
+ start_slot, count);
+
+ if (!vb) {
+ nvc0->vbo_user &= ~(((1ull << count) - 1) << start_slot);
+ nvc0->constant_vbos &= ~(((1ull << count) - 1) << start_slot);
+ return;
+ }
+
+ for (i = 0; i < count; ++i) {
+ unsigned dst_index = start_slot + i;
+
+ if (vb[i].user_buffer) {
+ nvc0->vbo_user |= 1 << dst_index;
+ if (!vb[i].stride)
+ nvc0->constant_vbos |= 1 << dst_index;
+ else
+ nvc0->constant_vbos &= ~(1 << dst_index);
+ } else {
+ nvc0->vbo_user &= ~(1 << dst_index);
+ nvc0->constant_vbos &= ~(1 << dst_index);
+ }
+ }
+
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+}
+
+static void
+nvc0_set_index_buffer(struct pipe_context *pipe,
+ const struct pipe_index_buffer *ib)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ if (nvc0->idxbuf.buffer)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_IDX);
+
+ if (ib) {
+ pipe_resource_reference(&nvc0->idxbuf.buffer, ib->buffer);
+ nvc0->idxbuf.index_size = ib->index_size;
+ if (ib->buffer) {
+ nvc0->idxbuf.offset = ib->offset;
+ nvc0->dirty |= NVC0_NEW_IDXBUF;
+ } else {
+ nvc0->idxbuf.user_buffer = ib->user_buffer;
+ nvc0->dirty &= ~NVC0_NEW_IDXBUF;
+ }
+ } else {
+ nvc0->dirty &= ~NVC0_NEW_IDXBUF;
+ pipe_resource_reference(&nvc0->idxbuf.buffer, NULL);
+ }
+}
+
+static void
+nvc0_vertex_state_bind(struct pipe_context *pipe, void *hwcso)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+
+ nvc0->vertex = hwcso;
+ nvc0->dirty |= NVC0_NEW_VERTEX;
+}
+
+static struct pipe_stream_output_target *
+nvc0_so_target_create(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ unsigned offset, unsigned size)
+{
+ struct nvc0_so_target *targ = MALLOC_STRUCT(nvc0_so_target);
+ if (!targ)
+ return NULL;
+
+ targ->pq = pipe->create_query(pipe, NVC0_QUERY_TFB_BUFFER_OFFSET);
+ if (!targ->pq) {
+ FREE(targ);
+ return NULL;
+ }
+ targ->clean = TRUE;
+
+ targ->pipe.buffer_size = size;
+ targ->pipe.buffer_offset = offset;
+ targ->pipe.context = pipe;
+ targ->pipe.buffer = NULL;
+ pipe_resource_reference(&targ->pipe.buffer, res);
+ pipe_reference_init(&targ->pipe.reference, 1);
+
+ return &targ->pipe;
+}
+
+static void
+nvc0_so_target_destroy(struct pipe_context *pipe,
+ struct pipe_stream_output_target *ptarg)
+{
+ struct nvc0_so_target *targ = nvc0_so_target(ptarg);
+ pipe->destroy_query(pipe, targ->pq);
+ pipe_resource_reference(&targ->pipe.buffer, NULL);
+ FREE(targ);
+}
+
+static void
+nvc0_set_transform_feedback_targets(struct pipe_context *pipe,
+ unsigned num_targets,
+ struct pipe_stream_output_target **targets,
+ unsigned append_mask)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ unsigned i;
+ boolean serialize = TRUE;
+
+ assert(num_targets <= 4);
+
+ for (i = 0; i < num_targets; ++i) {
+ if (nvc0->tfbbuf[i] == targets[i] && (append_mask & (1 << i)))
+ continue;
+ nvc0->tfbbuf_dirty |= 1 << i;
+
+ if (nvc0->tfbbuf[i] && nvc0->tfbbuf[i] != targets[i])
+ nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
+
+ if (targets[i] && !(append_mask & (1 << i)))
+ nvc0_so_target(targets[i])->clean = TRUE;
+
+ pipe_so_target_reference(&nvc0->tfbbuf[i], targets[i]);
+ }
+ for (; i < nvc0->num_tfbbufs; ++i) {
+ nvc0->tfbbuf_dirty |= 1 << i;
+ nvc0_so_target_save_offset(pipe, nvc0->tfbbuf[i], i, &serialize);
+ pipe_so_target_reference(&nvc0->tfbbuf[i], NULL);
+ }
+ nvc0->num_tfbbufs = num_targets;
+
+ if (nvc0->tfbbuf_dirty)
+ nvc0->dirty |= NVC0_NEW_TFB_TARGETS;
+}
+
+static void
+nvc0_bind_surfaces_range(struct nvc0_context *nvc0, const unsigned t,
+ unsigned start, unsigned nr,
+ struct pipe_surface **psurfaces)
+{
+ const unsigned end = start + nr;
+ const unsigned mask = ((1 << nr) - 1) << start;
+ unsigned i;
+
+ if (psurfaces) {
+ for (i = start; i < end; ++i) {
+ const unsigned p = i - start;
+ if (psurfaces[p])
+ nvc0->surfaces_valid[t] |= (1 << i);
+ else
+ nvc0->surfaces_valid[t] &= ~(1 << i);
+ pipe_surface_reference(&nvc0->surfaces[t][i], psurfaces[p]);
+ }
+ } else {
+ for (i = start; i < end; ++i)
+ pipe_surface_reference(&nvc0->surfaces[t][i], NULL);
+ nvc0->surfaces_valid[t] &= ~mask;
+ }
+ nvc0->surfaces_dirty[t] |= mask;
+
+ if (t == 0)
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_SUF);
+ else
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_SUF);
+}
+
+static void
+nvc0_set_compute_resources(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_surface **resources)
+{
+ nvc0_bind_surfaces_range(nvc0_context(pipe), 1, start, nr, resources);
+
+ nvc0_context(pipe)->dirty_cp |= NVC0_NEW_CP_SURFACES;
+}
+
+static void
+nvc0_set_shader_resources(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_surface **resources)
+{
+ nvc0_bind_surfaces_range(nvc0_context(pipe), 0, start, nr, resources);
+
+ nvc0_context(pipe)->dirty |= NVC0_NEW_SURFACES;
+}
+
+static INLINE void
+nvc0_set_global_handle(uint32_t *phandle, struct pipe_resource *res)
+{
+ struct nv04_resource *buf = nv04_resource(res);
+ if (buf) {
+ uint64_t limit = (buf->address + buf->base.width0) - 1;
+ if (limit < (1ULL << 32)) {
+ *phandle = (uint32_t)buf->address;
+ } else {
+ NOUVEAU_ERR("Cannot map into TGSI_RESOURCE_GLOBAL: "
+ "resource not contained within 32-bit address space !\n");
+ *phandle = 0;
+ }
+ } else {
+ *phandle = 0;
+ }
+}
+
+static void
+nvc0_set_global_bindings(struct pipe_context *pipe,
+ unsigned start, unsigned nr,
+ struct pipe_resource **resources,
+ uint32_t **handles)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct pipe_resource **ptr;
+ unsigned i;
+ const unsigned end = start + nr;
+
+ if (nvc0->global_residents.size <= (end * sizeof(struct pipe_resource *))) {
+ const unsigned old_size = nvc0->global_residents.size;
+ const unsigned req_size = end * sizeof(struct pipe_resource *);
+ util_dynarray_resize(&nvc0->global_residents, req_size);
+ memset((uint8_t *)nvc0->global_residents.data + old_size, 0,
+ req_size - old_size);
+ }
+
+ if (resources) {
+ ptr = util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i) {
+ pipe_resource_reference(&ptr[i], resources[i]);
+ nvc0_set_global_handle(handles[i], resources[i]);
+ }
+ } else {
+ ptr = util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, start);
+ for (i = 0; i < nr; ++i)
+ pipe_resource_reference(&ptr[i], NULL);
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
+
+ nvc0->dirty_cp = NVC0_NEW_CP_GLOBALS;
+}
+
+void
+nvc0_init_state_functions(struct nvc0_context *nvc0)
+{
+ struct pipe_context *pipe = &nvc0->base.pipe;
+
+ pipe->create_blend_state = nvc0_blend_state_create;
+ pipe->bind_blend_state = nvc0_blend_state_bind;
+ pipe->delete_blend_state = nvc0_blend_state_delete;
+
+ pipe->create_rasterizer_state = nvc0_rasterizer_state_create;
+ pipe->bind_rasterizer_state = nvc0_rasterizer_state_bind;
+ pipe->delete_rasterizer_state = nvc0_rasterizer_state_delete;
+
+ pipe->create_depth_stencil_alpha_state = nvc0_zsa_state_create;
+ pipe->bind_depth_stencil_alpha_state = nvc0_zsa_state_bind;
+ pipe->delete_depth_stencil_alpha_state = nvc0_zsa_state_delete;
+
+ pipe->create_sampler_state = nv50_sampler_state_create;
+ pipe->delete_sampler_state = nvc0_sampler_state_delete;
+ pipe->bind_vertex_sampler_states = nvc0_vp_sampler_states_bind;
+ pipe->bind_fragment_sampler_states = nvc0_fp_sampler_states_bind;
+ pipe->bind_geometry_sampler_states = nvc0_gp_sampler_states_bind;
+ pipe->bind_compute_sampler_states = nvc0_cp_sampler_states_bind;
+
+ pipe->create_sampler_view = nvc0_create_sampler_view;
+ pipe->sampler_view_destroy = nvc0_sampler_view_destroy;
+ pipe->set_vertex_sampler_views = nvc0_vp_set_sampler_views;
+ pipe->set_fragment_sampler_views = nvc0_fp_set_sampler_views;
+ pipe->set_geometry_sampler_views = nvc0_gp_set_sampler_views;
+ pipe->set_compute_sampler_views = nvc0_cp_set_sampler_views;
+
+ pipe->create_vs_state = nvc0_vp_state_create;
+ pipe->create_fs_state = nvc0_fp_state_create;
+ pipe->create_gs_state = nvc0_gp_state_create;
+ pipe->bind_vs_state = nvc0_vp_state_bind;
+ pipe->bind_fs_state = nvc0_fp_state_bind;
+ pipe->bind_gs_state = nvc0_gp_state_bind;
+ pipe->delete_vs_state = nvc0_sp_state_delete;
+ pipe->delete_fs_state = nvc0_sp_state_delete;
+ pipe->delete_gs_state = nvc0_sp_state_delete;
+
+ pipe->create_compute_state = nvc0_cp_state_create;
+ pipe->bind_compute_state = nvc0_cp_state_bind;
+ pipe->delete_compute_state = nvc0_sp_state_delete;
+
+ pipe->set_blend_color = nvc0_set_blend_color;
+ pipe->set_stencil_ref = nvc0_set_stencil_ref;
+ pipe->set_clip_state = nvc0_set_clip_state;
+ pipe->set_sample_mask = nvc0_set_sample_mask;
+ pipe->set_constant_buffer = nvc0_set_constant_buffer;
+ pipe->set_framebuffer_state = nvc0_set_framebuffer_state;
+ pipe->set_polygon_stipple = nvc0_set_polygon_stipple;
+ pipe->set_scissor_states = nvc0_set_scissor_states;
+ pipe->set_viewport_states = nvc0_set_viewport_states;
+
+ pipe->create_vertex_elements_state = nvc0_vertex_state_create;
+ pipe->delete_vertex_elements_state = nvc0_vertex_state_delete;
+ pipe->bind_vertex_elements_state = nvc0_vertex_state_bind;
+
+ pipe->set_vertex_buffers = nvc0_set_vertex_buffers;
+ pipe->set_index_buffer = nvc0_set_index_buffer;
+
+ pipe->create_stream_output_target = nvc0_so_target_create;
+ pipe->stream_output_target_destroy = nvc0_so_target_destroy;
+ pipe->set_stream_output_targets = nvc0_set_transform_feedback_targets;
+
+ pipe->set_global_binding = nvc0_set_global_bindings;
+ pipe->set_compute_resources = nvc0_set_compute_resources;
+ pipe->set_shader_resources = nvc0_set_shader_resources;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
new file mode 100644
index 0000000..0ba4bad
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_state_validate.c
@@ -0,0 +1,577 @@
+
+#include "util/u_math.h"
+
+#include "nvc0/nvc0_context.h"
+
+#if 0
+static void
+nvc0_validate_zcull(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ struct nv50_surface *sf = nv50_surface(fb->zsbuf);
+ struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t size;
+ uint32_t offset = align(mt->total_size, 1 << 17);
+ unsigned width, height;
+
+ assert(mt->base.base.depth0 == 1 && mt->base.base.array_size < 2);
+
+ size = mt->total_size * 2;
+
+ height = align(fb->height, 32);
+ width = fb->width % 224;
+ if (width)
+ width = fb->width + (224 - width);
+ else
+ width = fb->width;
+
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_REGION), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ offset += 1 << 17;
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_LIMIT_HIGH), 2);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ BEGIN_NVC0(push, SUBC_3D(0x07e0), 2);
+ PUSH_DATA (push, size);
+ PUSH_DATA (push, size >> 16);
+ BEGIN_NVC0(push, SUBC_3D(0x15c8), 1); /* bits 0x3 */
+ PUSH_DATA (push, 2);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_WIDTH), 4);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_WINDOW_OFFSET_X), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(ZCULL_INVALIDATE), 1);
+ PUSH_DATA (push, 0);
+}
+#endif
+
+static void
+nvc0_validate_fb(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ unsigned i;
+ unsigned ms_mode = NVC0_3D_MULTISAMPLE_MODE_MS1;
+ boolean serialize = FALSE;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, (076543210 << 4) | fb->nr_cbufs);
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, fb->width << 16);
+ PUSH_DATA (push, fb->height << 16);
+
+ for (i = 0; i < fb->nr_cbufs; ++i) {
+ struct nv50_surface *sf = nv50_surface(fb->cbufs[i]);
+ struct nv04_resource *res = nv04_resource(sf->base.texture);
+ struct nouveau_bo *bo = res->bo;
+
+ BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(i)), 9);
+ PUSH_DATAh(push, res->address + sf->offset);
+ PUSH_DATA (push, res->address + sf->offset);
+ if (likely(nouveau_bo_memtype(bo))) {
+ struct nv50_miptree *mt = nv50_miptree(sf->base.texture);
+
+ assert(sf->base.texture->target != PIPE_BUFFER);
+
+ PUSH_DATA(push, sf->width);
+ PUSH_DATA(push, sf->height);
+ PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
+ PUSH_DATA(push, (mt->layout_3d << 16) |
+ mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA(push, sf->base.u.tex.first_layer + sf->depth);
+ PUSH_DATA(push, mt->layer_stride >> 2);
+ PUSH_DATA(push, sf->base.u.tex.first_layer);
+
+ ms_mode = mt->ms_mode;
+ } else {
+ if (res->base.target == PIPE_BUFFER) {
+ PUSH_DATA(push, 262144);
+ PUSH_DATA(push, 1);
+ } else {
+ PUSH_DATA(push, nv50_miptree(sf->base.texture)->level[0].pitch);
+ PUSH_DATA(push, sf->height);
+ }
+ PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
+ PUSH_DATA(push, 1 << 12);
+ PUSH_DATA(push, 1);
+ PUSH_DATA(push, 0);
+ PUSH_DATA(push, 0);
+
+ nvc0_resource_fence(res, NOUVEAU_BO_WR);
+
+ assert(!fb->zsbuf);
+ }
+
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ /* only register for writing, otherwise we'd always serialize here */
+ BCTX_REFN(nvc0->bufctx_3d, FB, res, WR);
+ }
+
+ if (fb->zsbuf) {
+ struct nv50_miptree *mt = nv50_miptree(fb->zsbuf->texture);
+ struct nv50_surface *sf = nv50_surface(fb->zsbuf);
+ int unk = mt->base.base.target == PIPE_TEXTURE_2D;
+
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
+ PUSH_DATAh(push, mt->base.address + sf->offset);
+ PUSH_DATA (push, mt->base.address + sf->offset);
+ PUSH_DATA (push, nvc0_format_table[fb->zsbuf->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, mt->layer_stride >> 2);
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ PUSH_DATA (push, (unk << 16) |
+ (sf->base.u.tex.first_layer + sf->depth));
+ BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
+ PUSH_DATA (push, sf->base.u.tex.first_layer);
+
+ ms_mode = mt->ms_mode;
+
+ if (mt->base.status & NOUVEAU_BUFFER_STATUS_GPU_READING)
+ serialize = TRUE;
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ mt->base.status &= ~NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ BCTX_REFN(nvc0->bufctx_3d, FB, &mt->base, WR);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 0);
+ }
+
+ IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_MODE), ms_mode);
+
+ if (serialize)
+ IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, serialize);
+}
+
+static void
+nvc0_validate_blend_colour(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ BEGIN_NVC0(push, NVC0_3D(BLEND_COLOR(0)), 4);
+ PUSH_DATAf(push, nvc0->blend_colour.color[0]);
+ PUSH_DATAf(push, nvc0->blend_colour.color[1]);
+ PUSH_DATAf(push, nvc0->blend_colour.color[2]);
+ PUSH_DATAf(push, nvc0->blend_colour.color[3]);
+}
+
+static void
+nvc0_validate_stencil_ref(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const ubyte *ref = &nvc0->stencil_ref.ref_value[0];
+
+ IMMED_NVC0(push, NVC0_3D(STENCIL_FRONT_FUNC_REF), ref[0]);
+ IMMED_NVC0(push, NVC0_3D(STENCIL_BACK_FUNC_REF), ref[1]);
+}
+
+static void
+nvc0_validate_stipple(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+
+ BEGIN_NVC0(push, NVC0_3D(POLYGON_STIPPLE_PATTERN(0)), 32);
+ for (i = 0; i < 32; ++i)
+ PUSH_DATA(push, util_bswap32(nvc0->stipple.stipple[i]));
+}
+
+static void
+nvc0_validate_scissor(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_scissor_state *s = &nvc0->scissor;
+
+ if (!(nvc0->dirty & NVC0_NEW_SCISSOR) &&
+ nvc0->rast->pipe.scissor == nvc0->state.scissor)
+ return;
+ nvc0->state.scissor = nvc0->rast->pipe.scissor;
+
+ BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2);
+ if (nvc0->rast->pipe.scissor) {
+ PUSH_DATA(push, (s->maxx << 16) | s->minx);
+ PUSH_DATA(push, (s->maxy << 16) | s->miny);
+ } else {
+ PUSH_DATA(push, (0xffff << 16) | 0);
+ PUSH_DATA(push, (0xffff << 16) | 0);
+ }
+}
+
+static void
+nvc0_validate_viewport(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_viewport_state *vp = &nvc0->viewport;
+ int x, y, w, h;
+ float zmin, zmax;
+
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_TRANSLATE_X(0)), 3);
+ PUSH_DATAf(push, vp->translate[0]);
+ PUSH_DATAf(push, vp->translate[1]);
+ PUSH_DATAf(push, vp->translate[2]);
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_SCALE_X(0)), 3);
+ PUSH_DATAf(push, vp->scale[0]);
+ PUSH_DATAf(push, vp->scale[1]);
+ PUSH_DATAf(push, vp->scale[2]);
+
+ /* now set the viewport rectangle to viewport dimensions for clipping */
+
+ x = util_iround(MAX2(0.0f, vp->translate[0] - fabsf(vp->scale[0])));
+ y = util_iround(MAX2(0.0f, vp->translate[1] - fabsf(vp->scale[1])));
+ w = util_iround(vp->translate[0] + fabsf(vp->scale[0])) - x;
+ h = util_iround(vp->translate[1] + fabsf(vp->scale[1])) - y;
+
+ zmin = vp->translate[2] - fabsf(vp->scale[2]);
+ zmax = vp->translate[2] + fabsf(vp->scale[2]);
+
+ nvc0->vport_int[0] = (w << 16) | x;
+ nvc0->vport_int[1] = (h << 16) | y;
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, nvc0->vport_int[0]);
+ PUSH_DATA (push, nvc0->vport_int[1]);
+ BEGIN_NVC0(push, NVC0_3D(DEPTH_RANGE_NEAR(0)), 2);
+ PUSH_DATAf(push, zmin);
+ PUSH_DATAf(push, zmax);
+}
+
+static INLINE void
+nvc0_upload_uclip_planes(struct nvc0_context *nvc0, unsigned s)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bo *bo = nvc0->screen->uniform_bo;
+
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, bo->offset + (5 << 16) + (s << 9));
+ PUSH_DATA (push, bo->offset + (5 << 16) + (s << 9));
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), PIPE_MAX_CLIP_PLANES * 4 + 1);
+ PUSH_DATA (push, 256);
+ PUSH_DATAp(push, &nvc0->clip.ucp[0][0], PIPE_MAX_CLIP_PLANES * 4);
+}
+
+static INLINE void
+nvc0_check_program_ucps(struct nvc0_context *nvc0,
+ struct nvc0_program *vp, uint8_t mask)
+{
+ const unsigned n = util_logbase2(mask) + 1;
+
+ if (vp->vp.num_ucps >= n)
+ return;
+ nvc0_program_destroy(nvc0, vp);
+
+ vp->vp.num_ucps = n;
+ if (likely(vp == nvc0->vertprog))
+ nvc0_vertprog_validate(nvc0);
+ else
+ if (likely(vp == nvc0->gmtyprog))
+ nvc0_vertprog_validate(nvc0);
+ else
+ nvc0_tevlprog_validate(nvc0);
+}
+
+static void
+nvc0_validate_clip(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *vp;
+ unsigned stage;
+ uint8_t clip_enable = nvc0->rast->pipe.clip_plane_enable;
+
+ if (nvc0->gmtyprog) {
+ stage = 3;
+ vp = nvc0->gmtyprog;
+ } else
+ if (nvc0->tevlprog) {
+ stage = 2;
+ vp = nvc0->tevlprog;
+ } else {
+ stage = 0;
+ vp = nvc0->vertprog;
+ }
+
+ if (clip_enable && vp->vp.num_ucps < PIPE_MAX_CLIP_PLANES)
+ nvc0_check_program_ucps(nvc0, vp, clip_enable);
+
+ if (nvc0->dirty & (NVC0_NEW_CLIP | (NVC0_NEW_VERTPROG << stage)))
+ if (vp->vp.num_ucps > 0 && vp->vp.num_ucps <= PIPE_MAX_CLIP_PLANES)
+ nvc0_upload_uclip_planes(nvc0, stage);
+
+ clip_enable &= vp->vp.clip_enable;
+
+ if (nvc0->state.clip_enable != clip_enable) {
+ nvc0->state.clip_enable = clip_enable;
+ IMMED_NVC0(push, NVC0_3D(CLIP_DISTANCE_ENABLE), clip_enable);
+ }
+ if (nvc0->state.clip_mode != vp->vp.clip_mode) {
+ nvc0->state.clip_mode = vp->vp.clip_mode;
+ BEGIN_NVC0(push, NVC0_3D(CLIP_DISTANCE_MODE), 1);
+ PUSH_DATA (push, vp->vp.clip_mode);
+ }
+}
+
+static void
+nvc0_validate_blend(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ PUSH_SPACE(push, nvc0->blend->size);
+ PUSH_DATAp(push, nvc0->blend->state, nvc0->blend->size);
+}
+
+static void
+nvc0_validate_zsa(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ PUSH_SPACE(push, nvc0->zsa->size);
+ PUSH_DATAp(push, nvc0->zsa->state, nvc0->zsa->size);
+}
+
+static void
+nvc0_validate_rasterizer(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ PUSH_SPACE(push, nvc0->rast->size);
+ PUSH_DATAp(push, nvc0->rast->state, nvc0->rast->size);
+}
+
+static void
+nvc0_constbufs_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned s;
+
+ for (s = 0; s < 5; ++s) {
+ while (nvc0->constbuf_dirty[s]) {
+ int i = ffs(nvc0->constbuf_dirty[s]) - 1;
+ nvc0->constbuf_dirty[s] &= ~(1 << i);
+
+ if (nvc0->constbuf[s][i].user) {
+ struct nouveau_bo *bo = nvc0->screen->uniform_bo;
+ const unsigned base = s << 16;
+ const unsigned size = nvc0->constbuf[s][0].size;
+ assert(i == 0); /* we really only want OpenGL uniforms here */
+ assert(nvc0->constbuf[s][0].u.data);
+
+ if (nvc0->state.uniform_buffer_bound[s] < size) {
+ nvc0->state.uniform_buffer_bound[s] = align(size, 0x100);
+
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, nvc0->state.uniform_buffer_bound[s]);
+ PUSH_DATAh(push, bo->offset + base);
+ PUSH_DATA (push, bo->offset + base);
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
+ PUSH_DATA (push, (0 << 4) | 1);
+ }
+ nvc0_cb_push(&nvc0->base, bo, NOUVEAU_BO_VRAM,
+ base, nvc0->state.uniform_buffer_bound[s],
+ 0, (size + 3) / 4,
+ nvc0->constbuf[s][0].u.data);
+ } else {
+ struct nv04_resource *res =
+ nv04_resource(nvc0->constbuf[s][i].u.buf);
+ if (res) {
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, nvc0->constbuf[s][i].size);
+ PUSH_DATAh(push, res->address + nvc0->constbuf[s][i].offset);
+ PUSH_DATA (push, res->address + nvc0->constbuf[s][i].offset);
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
+ PUSH_DATA (push, (i << 4) | 1);
+
+ BCTX_REFN(nvc0->bufctx_3d, CB(s, i), res, RD);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(CB_BIND(s)), 1);
+ PUSH_DATA (push, (i << 4) | 0);
+ }
+ if (i == 0)
+ nvc0->state.uniform_buffer_bound[s] = 0;
+ }
+ }
+ }
+}
+
+static void
+nvc0_validate_sample_mask(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ unsigned mask[4] =
+ {
+ nvc0->sample_mask & 0xffff,
+ nvc0->sample_mask & 0xffff,
+ nvc0->sample_mask & 0xffff,
+ nvc0->sample_mask & 0xffff
+ };
+
+ BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);
+ PUSH_DATA (push, mask[0]);
+ PUSH_DATA (push, mask[1]);
+ PUSH_DATA (push, mask[2]);
+ PUSH_DATA (push, mask[3]);
+ BEGIN_NVC0(push, NVC0_3D(SAMPLE_SHADING), 1);
+ PUSH_DATA (push, 0x01);
+}
+
+void
+nvc0_validate_global_residents(struct nvc0_context *nvc0,
+ struct nouveau_bufctx *bctx, int bin)
+{
+ unsigned i;
+
+ for (i = 0; i < nvc0->global_residents.size / sizeof(struct pipe_resource *);
+ ++i) {
+ struct pipe_resource *res = *util_dynarray_element(
+ &nvc0->global_residents, struct pipe_resource *, i);
+ if (res)
+ nvc0_add_resident(bctx, bin, nv04_resource(res), NOUVEAU_BO_RDWR);
+ }
+}
+
+static void
+nvc0_validate_derived_1(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ boolean rasterizer_discard;
+
+ if (nvc0->rast && nvc0->rast->pipe.rasterizer_discard) {
+ rasterizer_discard = TRUE;
+ } else {
+ boolean zs = nvc0->zsa &&
+ (nvc0->zsa->pipe.depth.enabled || nvc0->zsa->pipe.stencil[0].enabled);
+ rasterizer_discard = !zs &&
+ (!nvc0->fragprog || !nvc0->fragprog->hdr[18]);
+ }
+
+ if (rasterizer_discard != nvc0->state.rasterizer_discard) {
+ nvc0->state.rasterizer_discard = rasterizer_discard;
+ IMMED_NVC0(push, NVC0_3D(RASTERIZE_ENABLE), !rasterizer_discard);
+ }
+}
+
+static void
+nvc0_switch_pipe_context(struct nvc0_context *ctx_to)
+{
+ struct nvc0_context *ctx_from = ctx_to->screen->cur_ctx;
+ unsigned s;
+
+ if (ctx_from)
+ ctx_to->state = ctx_from->state;
+
+ ctx_to->dirty = ~0;
+
+ for (s = 0; s < 5; ++s) {
+ ctx_to->samplers_dirty[s] = ~0;
+ ctx_to->textures_dirty[s] = ~0;
+ }
+
+ if (!ctx_to->vertex)
+ ctx_to->dirty &= ~(NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS);
+ if (!ctx_to->idxbuf.buffer)
+ ctx_to->dirty &= ~NVC0_NEW_IDXBUF;
+
+ if (!ctx_to->vertprog)
+ ctx_to->dirty &= ~NVC0_NEW_VERTPROG;
+ if (!ctx_to->fragprog)
+ ctx_to->dirty &= ~NVC0_NEW_FRAGPROG;
+
+ if (!ctx_to->blend)
+ ctx_to->dirty &= ~NVC0_NEW_BLEND;
+ if (!ctx_to->rast)
+ ctx_to->dirty &= ~(NVC0_NEW_RASTERIZER | NVC0_NEW_SCISSOR);
+ if (!ctx_to->zsa)
+ ctx_to->dirty &= ~NVC0_NEW_ZSA;
+
+ ctx_to->screen->cur_ctx = ctx_to;
+}
+
+static struct state_validate {
+ void (*func)(struct nvc0_context *);
+ uint32_t states;
+} validate_list[] = {
+ { nvc0_validate_fb, NVC0_NEW_FRAMEBUFFER },
+ { nvc0_validate_blend, NVC0_NEW_BLEND },
+ { nvc0_validate_zsa, NVC0_NEW_ZSA },
+ { nvc0_validate_sample_mask, NVC0_NEW_SAMPLE_MASK },
+ { nvc0_validate_rasterizer, NVC0_NEW_RASTERIZER },
+ { nvc0_validate_blend_colour, NVC0_NEW_BLEND_COLOUR },
+ { nvc0_validate_stencil_ref, NVC0_NEW_STENCIL_REF },
+ { nvc0_validate_stipple, NVC0_NEW_STIPPLE },
+ { nvc0_validate_scissor, NVC0_NEW_SCISSOR | NVC0_NEW_RASTERIZER },
+ { nvc0_validate_viewport, NVC0_NEW_VIEWPORT },
+ { nvc0_vertprog_validate, NVC0_NEW_VERTPROG },
+ { nvc0_tctlprog_validate, NVC0_NEW_TCTLPROG },
+ { nvc0_tevlprog_validate, NVC0_NEW_TEVLPROG },
+ { nvc0_gmtyprog_validate, NVC0_NEW_GMTYPROG },
+ { nvc0_fragprog_validate, NVC0_NEW_FRAGPROG },
+ { nvc0_validate_derived_1, NVC0_NEW_FRAGPROG | NVC0_NEW_ZSA |
+ NVC0_NEW_RASTERIZER },
+ { nvc0_validate_clip, NVC0_NEW_CLIP | NVC0_NEW_RASTERIZER |
+ NVC0_NEW_VERTPROG |
+ NVC0_NEW_TEVLPROG |
+ NVC0_NEW_GMTYPROG },
+ { nvc0_constbufs_validate, NVC0_NEW_CONSTBUF },
+ { nvc0_validate_textures, NVC0_NEW_TEXTURES },
+ { nvc0_validate_samplers, NVC0_NEW_SAMPLERS },
+ { nve4_set_tex_handles, NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS },
+ { nvc0_vertex_arrays_validate, NVC0_NEW_VERTEX | NVC0_NEW_ARRAYS },
+ { nvc0_validate_surfaces, NVC0_NEW_SURFACES },
+ { nvc0_idxbuf_validate, NVC0_NEW_IDXBUF },
+ { nvc0_tfb_validate, NVC0_NEW_TFB_TARGETS | NVC0_NEW_GMTYPROG }
+};
+#define validate_list_len (sizeof(validate_list) / sizeof(validate_list[0]))
+
+boolean
+nvc0_state_validate(struct nvc0_context *nvc0, uint32_t mask, unsigned words)
+{
+ uint32_t state_mask;
+ int ret;
+ unsigned i;
+
+ if (nvc0->screen->cur_ctx != nvc0)
+ nvc0_switch_pipe_context(nvc0);
+
+ state_mask = nvc0->dirty & mask;
+
+ if (state_mask) {
+ for (i = 0; i < validate_list_len; ++i) {
+ struct state_validate *validate = &validate_list[i];
+
+ if (state_mask & validate->states)
+ validate->func(nvc0);
+ }
+ nvc0->dirty &= ~state_mask;
+
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, FALSE);
+ }
+
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_3d);
+ ret = nouveau_pushbuf_validate(nvc0->base.pushbuf);
+
+ if (unlikely(nvc0->state.flushed)) {
+ nvc0->state.flushed = FALSE;
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_3d, TRUE);
+ }
+ return !ret;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
new file mode 100644
index 0000000..80c3342
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_stateobj.h
@@ -0,0 +1,77 @@
+
+#ifndef __NVC0_STATEOBJ_H__
+#define __NVC0_STATEOBJ_H__
+
+#include "pipe/p_state.h"
+
+#define SB_BEGIN_3D(so, m, s) \
+ (so)->state[(so)->size++] = NVC0_FIFO_PKHDR_SQ(NVC0_3D(m), s)
+
+#define SB_IMMED_3D(so, m, d) \
+ (so)->state[(so)->size++] = NVC0_FIFO_PKHDR_IL(NVC0_3D(m), d)
+
+#define SB_DATA(so, u) (so)->state[(so)->size++] = (u)
+
+#include "nv50/nv50_stateobj_tex.h"
+
+struct nvc0_blend_stateobj {
+ struct pipe_blend_state pipe;
+ int size;
+ uint32_t state[72];
+};
+
+struct nvc0_rasterizer_stateobj {
+ struct pipe_rasterizer_state pipe;
+ int size;
+ uint32_t state[43];
+};
+
+struct nvc0_zsa_stateobj {
+ struct pipe_depth_stencil_alpha_state pipe;
+ int size;
+ uint32_t state[26];
+};
+
+struct nvc0_constbuf {
+ union {
+ struct pipe_resource *buf;
+ const void *data;
+ } u;
+ uint32_t size;
+ uint32_t offset;
+ boolean user; /* should only be TRUE if u.data is valid and non-NULL */
+};
+
+struct nvc0_vertex_element {
+ struct pipe_vertex_element pipe;
+ uint32_t state;
+ uint32_t state_alt; /* buffer 0 and with source offset (for translate) */
+};
+
+struct nvc0_vertex_stateobj {
+ uint32_t min_instance_div[PIPE_MAX_ATTRIBS];
+ uint16_t vb_access_size[PIPE_MAX_ATTRIBS];
+ struct translate *translate;
+ unsigned num_elements;
+ uint32_t instance_elts;
+ uint32_t instance_bufs;
+ boolean shared_slots;
+ boolean need_conversion; /* e.g. VFETCH cannot convert f64 to f32 */
+ unsigned size; /* size of vertex in bytes (when packed) */
+ struct nvc0_vertex_element element[0];
+};
+
+struct nvc0_so_target {
+ struct pipe_stream_output_target pipe;
+ struct pipe_query *pq;
+ unsigned stride;
+ boolean clean;
+};
+
+static INLINE struct nvc0_so_target *
+nvc0_so_target(struct pipe_stream_output_target *ptarg)
+{
+ return (struct nvc0_so_target *)ptarg;
+}
+
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
new file mode 100644
index 0000000..5070df8
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_surface.c
@@ -0,0 +1,1265 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include <stdint.h>
+
+#include "pipe/p_defines.h"
+
+#include "util/u_inlines.h"
+#include "util/u_pack_color.h"
+#include "util/u_format.h"
+#include "util/u_surface.h"
+
+#include "os/os_thread.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+
+#include "nv50/nv50_defs.xml.h"
+#include "nv50/nv50_texture.xml.h"
+
+/* these are used in nv50_blit.h */
+#define NV50_ENG2D_SUPPORTED_FORMATS 0xff9ccfe1cce3ccc9ULL
+#define NV50_ENG2D_NOCONVERT_FORMATS 0x009cc02000000000ULL
+#define NV50_ENG2D_LUMINANCE_FORMATS 0x001cc02000000000ULL
+#define NV50_ENG2D_INTENSITY_FORMATS 0x0080000000000000ULL
+#define NV50_ENG2D_OPERATION_FORMATS 0x060001c000638000ULL
+
+#define NOUVEAU_DRIVER 0xc0
+#include "nv50/nv50_blit.h"
+
+static INLINE uint8_t
+nvc0_2d_format(enum pipe_format format, boolean dst, boolean dst_src_equal)
+{
+ uint8_t id = nvc0_format_table[format].rt;
+
+ /* A8_UNORM is treated as I8_UNORM as far as the 2D engine is concerned. */
+ if (!dst && unlikely(format == PIPE_FORMAT_I8_UNORM) && !dst_src_equal)
+ return NV50_SURFACE_FORMAT_A8_UNORM;
+
+ /* Hardware values for color formats range from 0xc0 to 0xff,
+ * but the 2D engine doesn't support all of them.
+ */
+ if (nv50_2d_format_supported(format))
+ return id;
+ assert(dst_src_equal);
+
+ switch (util_format_get_blocksize(format)) {
+ case 1:
+ return NV50_SURFACE_FORMAT_R8_UNORM;
+ case 2:
+ return NV50_SURFACE_FORMAT_R16_UNORM;
+ case 4:
+ return NV50_SURFACE_FORMAT_BGRA8_UNORM;
+ case 8:
+ return NV50_SURFACE_FORMAT_RGBA16_UNORM;
+ case 16:
+ return NV50_SURFACE_FORMAT_RGBA32_FLOAT;
+ default:
+ assert(0);
+ return 0;
+ }
+}
+
+static int
+nvc0_2d_texture_set(struct nouveau_pushbuf *push, boolean dst,
+ struct nv50_miptree *mt, unsigned level, unsigned layer,
+ enum pipe_format pformat, boolean dst_src_pformat_equal)
+{
+ struct nouveau_bo *bo = mt->base.bo;
+ uint32_t width, height, depth;
+ uint32_t format;
+ uint32_t mthd = dst ? NVC0_2D_DST_FORMAT : NVC0_2D_SRC_FORMAT;
+ uint32_t offset = mt->level[level].offset;
+
+ format = nvc0_2d_format(pformat, dst, dst_src_pformat_equal);
+ if (!format) {
+ NOUVEAU_ERR("invalid/unsupported surface format: %s\n",
+ util_format_name(pformat));
+ return 1;
+ }
+
+ width = u_minify(mt->base.base.width0, level) << mt->ms_x;
+ height = u_minify(mt->base.base.height0, level) << mt->ms_y;
+ depth = u_minify(mt->base.base.depth0, level);
+
+ /* layer has to be < depth, and depth > tile depth / 2 */
+
+ if (!mt->layout_3d) {
+ offset += mt->layer_stride * layer;
+ layer = 0;
+ depth = 1;
+ } else
+ if (!dst) {
+ offset += nvc0_mt_zslice_offset(mt, level, layer);
+ layer = 0;
+ }
+
+ if (!nouveau_bo_memtype(bo)) {
+ BEGIN_NVC0(push, SUBC_2D(mthd), 2);
+ PUSH_DATA (push, format);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, SUBC_2D(mthd + 0x14), 5);
+ PUSH_DATA (push, mt->level[level].pitch);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ } else {
+ BEGIN_NVC0(push, SUBC_2D(mthd), 5);
+ PUSH_DATA (push, format);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, mt->level[level].tile_mode);
+ PUSH_DATA (push, depth);
+ PUSH_DATA (push, layer);
+ BEGIN_NVC0(push, SUBC_2D(mthd + 0x18), 4);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ PUSH_DATAh(push, bo->offset + offset);
+ PUSH_DATA (push, bo->offset + offset);
+ }
+
+#if 0
+ if (dst) {
+ BEGIN_NVC0(push, SUBC_2D(NVC0_2D_CLIP_X), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, width);
+ PUSH_DATA (push, height);
+ }
+#endif
+ return 0;
+}
+
+static int
+nvc0_2d_texture_do_copy(struct nouveau_pushbuf *push,
+ struct nv50_miptree *dst, unsigned dst_level,
+ unsigned dx, unsigned dy, unsigned dz,
+ struct nv50_miptree *src, unsigned src_level,
+ unsigned sx, unsigned sy, unsigned sz,
+ unsigned w, unsigned h)
+{
+ const enum pipe_format dfmt = dst->base.base.format;
+ const enum pipe_format sfmt = src->base.base.format;
+ int ret;
+ boolean eqfmt = dfmt == sfmt;
+
+ if (!PUSH_SPACE(push, 2 * 16 + 32))
+ return PIPE_ERROR;
+
+ ret = nvc0_2d_texture_set(push, TRUE, dst, dst_level, dz, dfmt, eqfmt);
+ if (ret)
+ return ret;
+
+ ret = nvc0_2d_texture_set(push, FALSE, src, src_level, sz, sfmt, eqfmt);
+ if (ret)
+ return ret;
+
+ IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), 0x00);
+ BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4);
+ PUSH_DATA (push, dx << dst->ms_x);
+ PUSH_DATA (push, dy << dst->ms_y);
+ PUSH_DATA (push, w << dst->ms_x);
+ PUSH_DATA (push, h << dst->ms_y);
+ BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, sx << src->ms_x);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, sy << src->ms_x);
+
+ return 0;
+}
+
+static void
+nvc0_resource_copy_region(struct pipe_context *pipe,
+ struct pipe_resource *dst, unsigned dst_level,
+ unsigned dstx, unsigned dsty, unsigned dstz,
+ struct pipe_resource *src, unsigned src_level,
+ const struct pipe_box *src_box)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ int ret;
+ boolean m2mf;
+ unsigned dst_layer = dstz, src_layer = src_box->z;
+
+ if (dst->target == PIPE_BUFFER && src->target == PIPE_BUFFER) {
+ nouveau_copy_buffer(&nvc0->base,
+ nv04_resource(dst), dstx,
+ nv04_resource(src), src_box->x, src_box->width);
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, buf_copy_bytes, src_box->width);
+ return;
+ }
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_copy_count, 1);
+
+ /* 0 and 1 are equal, only supporting 0/1, 2, 4 and 8 */
+ assert((src->nr_samples | 1) == (dst->nr_samples | 1));
+
+ m2mf = (src->format == dst->format) ||
+ (util_format_get_blocksizebits(src->format) ==
+ util_format_get_blocksizebits(dst->format));
+
+ nv04_resource(dst)->status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+
+ if (m2mf) {
+ struct nv50_m2mf_rect drect, srect;
+ unsigned i;
+ unsigned nx = util_format_get_nblocksx(src->format, src_box->width);
+ unsigned ny = util_format_get_nblocksy(src->format, src_box->height);
+
+ nv50_m2mf_rect_setup(&drect, dst, dst_level, dstx, dsty, dstz);
+ nv50_m2mf_rect_setup(&srect, src, src_level,
+ src_box->x, src_box->y, src_box->z);
+
+ for (i = 0; i < src_box->depth; ++i) {
+ nvc0->m2mf_copy_rect(nvc0, &drect, &srect, nx, ny);
+
+ if (nv50_miptree(dst)->layout_3d)
+ drect.z++;
+ else
+ drect.base += nv50_miptree(dst)->layer_stride;
+
+ if (nv50_miptree(src)->layout_3d)
+ srect.z++;
+ else
+ srect.base += nv50_miptree(src)->layer_stride;
+ }
+ return;
+ }
+
+ assert(nv50_2d_dst_format_faithful(dst->format));
+ assert(nv50_2d_src_format_faithful(src->format));
+
+ BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(src), RD);
+ BCTX_REFN(nvc0->bufctx, 2D, nv04_resource(dst), WR);
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx);
+ nouveau_pushbuf_validate(nvc0->base.pushbuf);
+
+ for (; dst_layer < dstz + src_box->depth; ++dst_layer, ++src_layer) {
+ ret = nvc0_2d_texture_do_copy(nvc0->base.pushbuf,
+ nv50_miptree(dst), dst_level,
+ dstx, dsty, dst_layer,
+ nv50_miptree(src), src_level,
+ src_box->x, src_box->y, src_layer,
+ src_box->width, src_box->height);
+ if (ret)
+ break;
+ }
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_clear_render_target(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ const union pipe_color_union *color,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv50_surface *sf = nv50_surface(dst);
+ struct nv04_resource *res = nv04_resource(sf->base.texture);
+ unsigned z;
+
+ if (!PUSH_SPACE(push, 32 + sf->depth))
+ return;
+
+ PUSH_REFN (push, res->bo, res->domain | NOUVEAU_BO_WR);
+
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color->f[0]);
+ PUSH_DATAf(push, color->f[1]);
+ PUSH_DATAf(push, color->f[2]);
+ PUSH_DATAf(push, color->f[3]);
+
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, ( width << 16) | dstx);
+ PUSH_DATA (push, (height << 16) | dsty);
+
+ BEGIN_NVC0(push, NVC0_3D(RT_CONTROL), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(RT_ADDRESS_HIGH(0)), 9);
+ PUSH_DATAh(push, res->address + sf->offset);
+ PUSH_DATA (push, res->address + sf->offset);
+ if (likely(nouveau_bo_memtype(res->bo))) {
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+
+ PUSH_DATA(push, sf->width);
+ PUSH_DATA(push, sf->height);
+ PUSH_DATA(push, nvc0_format_table[dst->format].rt);
+ PUSH_DATA(push, (mt->layout_3d << 16) |
+ mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA(push, dst->u.tex.first_layer + sf->depth);
+ PUSH_DATA(push, mt->layer_stride >> 2);
+ PUSH_DATA(push, dst->u.tex.first_layer);
+ } else {
+ if (res->base.target == PIPE_BUFFER) {
+ PUSH_DATA(push, 262144);
+ PUSH_DATA(push, 1);
+ } else {
+ PUSH_DATA(push, nv50_miptree(&res->base)->level[0].pitch);
+ PUSH_DATA(push, sf->height);
+ }
+ PUSH_DATA(push, nvc0_format_table[sf->base.format].rt);
+ PUSH_DATA(push, 1 << 12);
+ PUSH_DATA(push, 1);
+ PUSH_DATA(push, 0);
+ PUSH_DATA(push, 0);
+
+ IMMED_NVC0(push, NVC0_3D(ZETA_ENABLE), 0);
+
+ /* tiled textures don't have to be fenced, they're not mapped directly */
+ nvc0_resource_fence(res, NOUVEAU_BO_WR);
+ }
+
+ BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
+ for (z = 0; z < sf->depth; ++z) {
+ PUSH_DATA (push, 0x3c |
+ (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
+ }
+
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+static void
+nvc0_clear_depth_stencil(struct pipe_context *pipe,
+ struct pipe_surface *dst,
+ unsigned clear_flags,
+ double depth,
+ unsigned stencil,
+ unsigned dstx, unsigned dsty,
+ unsigned width, unsigned height)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv50_miptree *mt = nv50_miptree(dst->texture);
+ struct nv50_surface *sf = nv50_surface(dst);
+ uint32_t mode = 0;
+ int unk = mt->base.base.target == PIPE_TEXTURE_2D;
+ unsigned z;
+
+ if (!PUSH_SPACE(push, 32 + sf->depth))
+ return;
+
+ PUSH_REFN (push, mt->base.bo, mt->base.domain | NOUVEAU_BO_WR);
+
+ if (clear_flags & PIPE_CLEAR_DEPTH) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1);
+ PUSH_DATAf(push, depth);
+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (clear_flags & PIPE_CLEAR_STENCIL) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1);
+ PUSH_DATA (push, stencil & 0xff);
+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(SCREEN_SCISSOR_HORIZ), 2);
+ PUSH_DATA (push, ( width << 16) | dstx);
+ PUSH_DATA (push, (height << 16) | dsty);
+
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ADDRESS_HIGH), 5);
+ PUSH_DATAh(push, mt->base.address + sf->offset);
+ PUSH_DATA (push, mt->base.address + sf->offset);
+ PUSH_DATA (push, nvc0_format_table[dst->format].rt);
+ PUSH_DATA (push, mt->level[sf->base.u.tex.level].tile_mode);
+ PUSH_DATA (push, mt->layer_stride >> 2);
+ BEGIN_NVC0(push, NVC0_3D(ZETA_ENABLE), 1);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_3D(ZETA_HORIZ), 3);
+ PUSH_DATA (push, sf->width);
+ PUSH_DATA (push, sf->height);
+ PUSH_DATA (push, (unk << 16) | (dst->u.tex.first_layer + sf->depth));
+ BEGIN_NVC0(push, NVC0_3D(ZETA_BASE_LAYER), 1);
+ PUSH_DATA (push, dst->u.tex.first_layer);
+
+ BEGIN_NIC0(push, NVC0_3D(CLEAR_BUFFERS), sf->depth);
+ for (z = 0; z < sf->depth; ++z) {
+ PUSH_DATA (push, mode |
+ (z << NVC0_3D_CLEAR_BUFFERS_LAYER__SHIFT));
+ }
+
+ nvc0->dirty |= NVC0_NEW_FRAMEBUFFER;
+}
+
+void
+nvc0_clear(struct pipe_context *pipe, unsigned buffers,
+ const union pipe_color_union *color,
+ double depth, unsigned stencil)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_framebuffer_state *fb = &nvc0->framebuffer;
+ unsigned i;
+ uint32_t mode = 0;
+
+ /* don't need NEW_BLEND, COLOR_MASK doesn't affect CLEAR_BUFFERS */
+ if (!nvc0_state_validate(nvc0, NVC0_NEW_FRAMEBUFFER, 9 + (fb->nr_cbufs * 2)))
+ return;
+
+ if (buffers & PIPE_CLEAR_COLOR && fb->nr_cbufs) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_COLOR(0)), 4);
+ PUSH_DATAf(push, color->f[0]);
+ PUSH_DATAf(push, color->f[1]);
+ PUSH_DATAf(push, color->f[2]);
+ PUSH_DATAf(push, color->f[3]);
+ mode =
+ NVC0_3D_CLEAR_BUFFERS_R | NVC0_3D_CLEAR_BUFFERS_G |
+ NVC0_3D_CLEAR_BUFFERS_B | NVC0_3D_CLEAR_BUFFERS_A;
+ }
+
+ if (buffers & PIPE_CLEAR_DEPTH) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_DEPTH), 1);
+ PUSH_DATA (push, fui(depth));
+ mode |= NVC0_3D_CLEAR_BUFFERS_Z;
+ }
+
+ if (buffers & PIPE_CLEAR_STENCIL) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_STENCIL), 1);
+ PUSH_DATA (push, stencil & 0xff);
+ mode |= NVC0_3D_CLEAR_BUFFERS_S;
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, mode);
+
+ for (i = 1; i < fb->nr_cbufs; i++) {
+ BEGIN_NVC0(push, NVC0_3D(CLEAR_BUFFERS), 1);
+ PUSH_DATA (push, (i << 6) | 0x3c);
+ }
+}
+
+
+/* =============================== BLIT CODE ===================================
+ */
+
+struct nvc0_blitter
+{
+ struct nvc0_program *fp[NV50_BLIT_MAX_TEXTURE_TYPES][NV50_BLIT_MODES];
+ struct nvc0_program vp;
+
+ struct nv50_tsc_entry sampler[2]; /* nearest, bilinear */
+
+ pipe_mutex mutex;
+
+ struct nvc0_screen *screen;
+};
+
+struct nvc0_blitctx
+{
+ struct nvc0_context *nvc0;
+ struct nvc0_program *fp;
+ uint8_t mode;
+ uint16_t color_mask;
+ uint8_t filter;
+ enum pipe_texture_target target;
+ struct {
+ struct pipe_framebuffer_state fb;
+ struct nvc0_rasterizer_stateobj *rast;
+ struct nvc0_program *vp;
+ struct nvc0_program *tcp;
+ struct nvc0_program *tep;
+ struct nvc0_program *gp;
+ struct nvc0_program *fp;
+ unsigned num_textures[5];
+ unsigned num_samplers[5];
+ struct pipe_sampler_view *texture[2];
+ struct nv50_tsc_entry *sampler[2];
+ uint32_t dirty;
+ } saved;
+ struct nvc0_rasterizer_stateobj rast;
+};
+
+static void
+nvc0_blitter_make_vp(struct nvc0_blitter *blit)
+{
+ static const uint32_t code_nvc0[] =
+ {
+ 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
+ 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
+ 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
+ 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
+ 0x00001de7, 0x80000000, /* exit */
+ };
+ static const uint32_t code_nve4[] =
+ {
+ 0x00000007, 0x20000000, /* sched */
+ 0xfff11c26, 0x06000080, /* vfetch b64 $r4:$r5 a[0x80] */
+ 0xfff01c46, 0x06000090, /* vfetch b96 $r0:$r1:$r2 a[0x90] */
+ 0x13f01c26, 0x0a7e0070, /* export b64 o[0x70] $r4:$r5 */
+ 0x03f01c46, 0x0a7e0080, /* export b96 o[0x80] $r0:$r1:$r2 */
+ 0x00001de7, 0x80000000, /* exit */
+ };
+
+ blit->vp.type = PIPE_SHADER_VERTEX;
+ blit->vp.translated = TRUE;
+ if (blit->screen->base.class_3d >= NVE4_3D_CLASS) {
+ blit->vp.code = (uint32_t *)code_nve4; /* const_cast */
+ blit->vp.code_size = sizeof(code_nve4);
+ } else {
+ blit->vp.code = (uint32_t *)code_nvc0; /* const_cast */
+ blit->vp.code_size = sizeof(code_nvc0);
+ }
+ blit->vp.num_gprs = 6;
+ blit->vp.vp.edgeflag = PIPE_MAX_ATTRIBS;
+
+ blit->vp.hdr[0] = 0x00020461; /* vertprog magic */
+ blit->vp.hdr[4] = 0x000ff000; /* no outputs read */
+ blit->vp.hdr[6] = 0x00000073; /* a[0x80].xy, a[0x90].xyz */
+ blit->vp.hdr[13] = 0x00073000; /* o[0x70].xy, o[0x80].xyz */
+}
+
+static void
+nvc0_blitter_make_sampler(struct nvc0_blitter *blit)
+{
+ /* clamp to edge, min/max lod = 0, nearest filtering */
+
+ blit->sampler[0].id = -1;
+
+ blit->sampler[0].tsc[0] = NV50_TSC_0_SRGB_CONVERSION_ALLOWED |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPS__SHIFT) |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPT__SHIFT) |
+ (NV50_TSC_WRAP_CLAMP_TO_EDGE << NV50_TSC_0_WRAPR__SHIFT);
+ blit->sampler[0].tsc[1] =
+ NV50_TSC_1_MAGF_NEAREST | NV50_TSC_1_MINF_NEAREST | NV50_TSC_1_MIPF_NONE;
+
+ /* clamp to edge, min/max lod = 0, bilinear filtering */
+
+ blit->sampler[1].id = -1;
+
+ blit->sampler[1].tsc[0] = blit->sampler[0].tsc[0];
+ blit->sampler[1].tsc[1] =
+ NV50_TSC_1_MAGF_LINEAR | NV50_TSC_1_MINF_LINEAR | NV50_TSC_1_MIPF_NONE;
+}
+
+static void
+nvc0_blit_select_fp(struct nvc0_blitctx *ctx, const struct pipe_blit_info *info)
+{
+ struct nvc0_blitter *blitter = ctx->nvc0->screen->blitter;
+
+ const enum pipe_texture_target ptarg =
+ nv50_blit_reinterpret_pipe_texture_target(info->src.resource->target);
+
+ const unsigned targ = nv50_blit_texture_type(ptarg);
+ const unsigned mode = ctx->mode;
+
+ if (!blitter->fp[targ][mode]) {
+ pipe_mutex_lock(blitter->mutex);
+ if (!blitter->fp[targ][mode])
+ blitter->fp[targ][mode] =
+ nv50_blitter_make_fp(&ctx->nvc0->base.pipe, mode, ptarg);
+ pipe_mutex_unlock(blitter->mutex);
+ }
+ ctx->fp = blitter->fp[targ][mode];
+}
+
+static void
+nvc0_blit_set_dst(struct nvc0_blitctx *ctx,
+ struct pipe_resource *res, unsigned level, unsigned layer,
+ enum pipe_format format)
+{
+ struct nvc0_context *nvc0 = ctx->nvc0;
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct pipe_surface templ;
+
+ if (util_format_is_depth_or_stencil(format))
+ templ.format = nv50_blit_zeta_to_colour_format(format);
+ else
+ templ.format = format;
+
+ templ.u.tex.level = level;
+ templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+
+ if (layer == -1) {
+ templ.u.tex.first_layer = 0;
+ templ.u.tex.last_layer =
+ (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1;
+ }
+
+ nvc0->framebuffer.cbufs[0] = nvc0_miptree_surface_new(pipe, res, &templ);
+ nvc0->framebuffer.nr_cbufs = 1;
+ nvc0->framebuffer.zsbuf = NULL;
+ nvc0->framebuffer.width = nvc0->framebuffer.cbufs[0]->width;
+ nvc0->framebuffer.height = nvc0->framebuffer.cbufs[0]->height;
+}
+
+static void
+nvc0_blit_set_src(struct nvc0_blitctx *ctx,
+ struct pipe_resource *res, unsigned level, unsigned layer,
+ enum pipe_format format, const uint8_t filter)
+{
+ struct nvc0_context *nvc0 = ctx->nvc0;
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct pipe_sampler_view templ;
+ uint32_t flags;
+ unsigned s;
+ enum pipe_texture_target target;
+
+ target = nv50_blit_reinterpret_pipe_texture_target(res->target);
+
+ templ.format = format;
+ templ.u.tex.first_layer = templ.u.tex.last_layer = layer;
+ templ.u.tex.first_level = templ.u.tex.last_level = level;
+ templ.swizzle_r = PIPE_SWIZZLE_RED;
+ templ.swizzle_g = PIPE_SWIZZLE_GREEN;
+ templ.swizzle_b = PIPE_SWIZZLE_BLUE;
+ templ.swizzle_a = PIPE_SWIZZLE_ALPHA;
+
+ if (layer == -1) {
+ templ.u.tex.first_layer = 0;
+ templ.u.tex.last_layer =
+ (res->target == PIPE_TEXTURE_3D ? res->depth0 : res->array_size) - 1;
+ }
+
+ flags = res->last_level ? 0 : NV50_TEXVIEW_SCALED_COORDS;
+ flags |= NV50_TEXVIEW_ACCESS_RESOLVE;
+ if (filter && res->nr_samples == 8)
+ flags |= NV50_TEXVIEW_FILTER_MSAA8;
+
+ nvc0->textures[4][0] = nvc0_create_texture_view(
+ pipe, res, &templ, flags, target);
+ nvc0->textures[4][1] = NULL;
+
+ for (s = 0; s <= 3; ++s)
+ nvc0->num_textures[s] = 0;
+ nvc0->num_textures[4] = 1;
+
+ templ.format = nv50_zs_to_s_format(format);
+ if (templ.format != format) {
+ nvc0->textures[4][1] = nvc0_create_texture_view(
+ pipe, res, &templ, flags, target);
+ nvc0->num_textures[4] = 2;
+ }
+}
+
+static void
+nvc0_blitctx_prepare_state(struct nvc0_blitctx *blit)
+{
+ struct nouveau_pushbuf *push = blit->nvc0->base.pushbuf;
+
+ /* TODO: maybe make this a MACRO (if we need more logic) ? */
+
+ if (blit->nvc0->cond_query)
+ IMMED_NVC0(push, NVC0_3D(COND_MODE), NVC0_3D_COND_MODE_ALWAYS);
+
+ /* blend state */
+ BEGIN_NVC0(push, NVC0_3D(COLOR_MASK(0)), 1);
+ PUSH_DATA (push, blit->color_mask);
+ IMMED_NVC0(push, NVC0_3D(BLEND_ENABLE(0)), 0);
+ IMMED_NVC0(push, NVC0_3D(LOGIC_OP_ENABLE), 0);
+
+ /* rasterizer state */
+ IMMED_NVC0(push, NVC0_3D(FRAG_COLOR_CLAMP_EN), 0);
+ IMMED_NVC0(push, NVC0_3D(MULTISAMPLE_ENABLE), 0);
+ BEGIN_NVC0(push, NVC0_3D(MSAA_MASK(0)), 4);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ PUSH_DATA (push, 0xffff);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_POLYGON_MODE_FRONT), 1);
+ PUSH_DATA (push, NVC0_3D_MACRO_POLYGON_MODE_FRONT_FILL);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_POLYGON_MODE_BACK), 1);
+ PUSH_DATA (push, NVC0_3D_MACRO_POLYGON_MODE_BACK_FILL);
+ IMMED_NVC0(push, NVC0_3D(POLYGON_SMOOTH_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(POLYGON_OFFSET_FILL_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(POLYGON_STIPPLE_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(CULL_FACE_ENABLE), 0);
+
+ /* zsa state */
+ IMMED_NVC0(push, NVC0_3D(DEPTH_TEST_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(STENCIL_ENABLE), 0);
+ IMMED_NVC0(push, NVC0_3D(ALPHA_TEST_ENABLE), 0);
+
+ /* disable transform feedback */
+ IMMED_NVC0(push, NVC0_3D(TFB_ENABLE), 0);
+}
+
+static void
+nvc0_blitctx_pre_blit(struct nvc0_blitctx *ctx)
+{
+ struct nvc0_context *nvc0 = ctx->nvc0;
+ struct nvc0_blitter *blitter = nvc0->screen->blitter;
+ int s;
+
+ ctx->saved.fb.width = nvc0->framebuffer.width;
+ ctx->saved.fb.height = nvc0->framebuffer.height;
+ ctx->saved.fb.nr_cbufs = nvc0->framebuffer.nr_cbufs;
+ ctx->saved.fb.cbufs[0] = nvc0->framebuffer.cbufs[0];
+ ctx->saved.fb.zsbuf = nvc0->framebuffer.zsbuf;
+
+ ctx->saved.rast = nvc0->rast;
+
+ ctx->saved.vp = nvc0->vertprog;
+ ctx->saved.tcp = nvc0->tctlprog;
+ ctx->saved.tep = nvc0->tevlprog;
+ ctx->saved.gp = nvc0->gmtyprog;
+ ctx->saved.fp = nvc0->fragprog;
+
+ nvc0->rast = &ctx->rast;
+
+ nvc0->vertprog = &blitter->vp;
+ nvc0->tctlprog = NULL;
+ nvc0->tevlprog = NULL;
+ nvc0->gmtyprog = NULL;
+ nvc0->fragprog = ctx->fp;
+
+ for (s = 0; s <= 4; ++s) {
+ ctx->saved.num_textures[s] = nvc0->num_textures[s];
+ ctx->saved.num_samplers[s] = nvc0->num_samplers[s];
+ nvc0->textures_dirty[s] = (1 << nvc0->num_textures[s]) - 1;
+ nvc0->samplers_dirty[s] = (1 << nvc0->num_samplers[s]) - 1;
+ }
+ ctx->saved.texture[0] = nvc0->textures[4][0];
+ ctx->saved.texture[1] = nvc0->textures[4][1];
+ ctx->saved.sampler[0] = nvc0->samplers[4][0];
+ ctx->saved.sampler[1] = nvc0->samplers[4][1];
+
+ nvc0->samplers[4][0] = &blitter->sampler[ctx->filter];
+ nvc0->samplers[4][1] = &blitter->sampler[ctx->filter];
+
+ for (s = 0; s <= 3; ++s)
+ nvc0->num_samplers[s] = 0;
+ nvc0->num_samplers[4] = 2;
+
+ ctx->saved.dirty = nvc0->dirty;
+
+ nvc0->textures_dirty[4] |= 3;
+ nvc0->samplers_dirty[4] |= 3;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
+
+ nvc0->dirty = NVC0_NEW_FRAMEBUFFER |
+ NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
+ NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
+ NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS;
+}
+
+static void
+nvc0_blitctx_post_blit(struct nvc0_blitctx *blit)
+{
+ struct nvc0_context *nvc0 = blit->nvc0;
+ int s;
+
+ pipe_surface_reference(&nvc0->framebuffer.cbufs[0], NULL);
+
+ nvc0->framebuffer.width = blit->saved.fb.width;
+ nvc0->framebuffer.height = blit->saved.fb.height;
+ nvc0->framebuffer.nr_cbufs = blit->saved.fb.nr_cbufs;
+ nvc0->framebuffer.cbufs[0] = blit->saved.fb.cbufs[0];
+ nvc0->framebuffer.zsbuf = blit->saved.fb.zsbuf;
+
+ nvc0->rast = blit->saved.rast;
+
+ nvc0->vertprog = blit->saved.vp;
+ nvc0->tctlprog = blit->saved.tcp;
+ nvc0->tevlprog = blit->saved.tep;
+ nvc0->gmtyprog = blit->saved.gp;
+ nvc0->fragprog = blit->saved.fp;
+
+ pipe_sampler_view_reference(&nvc0->textures[4][0], NULL);
+ pipe_sampler_view_reference(&nvc0->textures[4][1], NULL);
+
+ for (s = 0; s <= 4; ++s) {
+ nvc0->num_textures[s] = blit->saved.num_textures[s];
+ nvc0->num_samplers[s] = blit->saved.num_samplers[s];
+ nvc0->textures_dirty[s] = (1 << nvc0->num_textures[s]) - 1;
+ nvc0->samplers_dirty[s] = (1 << nvc0->num_samplers[s]) - 1;
+ }
+ nvc0->textures[4][0] = blit->saved.texture[0];
+ nvc0->textures[4][1] = blit->saved.texture[1];
+ nvc0->samplers[4][0] = blit->saved.sampler[0];
+ nvc0->samplers[4][1] = blit->saved.sampler[1];
+
+ nvc0->textures_dirty[4] |= 3;
+ nvc0->samplers_dirty[4] |= 3;
+
+ if (nvc0->cond_query)
+ nvc0->base.pipe.render_condition(&nvc0->base.pipe, nvc0->cond_query,
+ nvc0->cond_cond, nvc0->cond_mode);
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_FB);
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 0));
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_TEX(4, 1));
+
+ nvc0->dirty = blit->saved.dirty |
+ (NVC0_NEW_FRAMEBUFFER | NVC0_NEW_SCISSOR | NVC0_NEW_SAMPLE_MASK |
+ NVC0_NEW_RASTERIZER | NVC0_NEW_ZSA | NVC0_NEW_BLEND |
+ NVC0_NEW_TEXTURES | NVC0_NEW_SAMPLERS |
+ NVC0_NEW_VERTPROG | NVC0_NEW_FRAGPROG |
+ NVC0_NEW_TCTLPROG | NVC0_NEW_TEVLPROG | NVC0_NEW_GMTYPROG |
+ NVC0_NEW_TFB_TARGETS);
+}
+
+static void
+nvc0_blit_3d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+{
+ struct nvc0_blitctx *blit = nvc0->blit;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_resource *src = info->src.resource;
+ struct pipe_resource *dst = info->dst.resource;
+ int32_t minx, maxx, miny, maxy;
+ int32_t i;
+ float x0, x1, y0, y1, z;
+ float dz;
+ float x_range, y_range;
+
+ blit->mode = nv50_blit_select_mode(info);
+ blit->color_mask = nv50_blit_derive_color_mask(info);
+ blit->filter = nv50_blit_get_filter(info);
+
+ nvc0_blit_select_fp(blit, info);
+ nvc0_blitctx_pre_blit(blit);
+
+ nvc0_blit_set_dst(blit, dst, info->dst.level, -1, info->dst.format);
+ nvc0_blit_set_src(blit, src, info->src.level, -1, info->src.format,
+ blit->filter);
+
+ nvc0_blitctx_prepare_state(blit);
+
+ nvc0_state_validate(nvc0, ~0, 48);
+
+ x_range = (float)info->src.box.width / (float)info->dst.box.width;
+ y_range = (float)info->src.box.height / (float)info->dst.box.height;
+
+ x0 = (float)info->src.box.x - x_range * (float)info->dst.box.x;
+ y0 = (float)info->src.box.y - y_range * (float)info->dst.box.y;
+
+ x1 = x0 + 16384.0f * x_range;
+ y1 = y0 + 16384.0f * y_range;
+
+ x0 *= (float)(1 << nv50_miptree(src)->ms_x);
+ x1 *= (float)(1 << nv50_miptree(src)->ms_x);
+ y0 *= (float)(1 << nv50_miptree(src)->ms_y);
+ y1 *= (float)(1 << nv50_miptree(src)->ms_y);
+
+ if (src->last_level > 0) {
+ /* If there are mip maps, GPU always assumes normalized coordinates. */
+ const unsigned l = info->src.level;
+ const float fh = u_minify(src->width0 << nv50_miptree(src)->ms_x, l);
+ const float fv = u_minify(src->height0 << nv50_miptree(src)->ms_y, l);
+ x0 /= fh;
+ x1 /= fh;
+ y0 /= fv;
+ y1 /= fv;
+ }
+
+ dz = (float)info->src.box.depth / (float)info->dst.box.depth;
+ z = (float)info->src.box.z;
+ if (nv50_miptree(src)->layout_3d)
+ z += 0.5f * dz;
+
+ IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 0);
+ IMMED_NVC0(push, NVC0_3D(VIEW_VOLUME_CLIP_CTRL), 0x2 |
+ NVC0_3D_VIEW_VOLUME_CLIP_CTRL_DEPTH_RANGE_0_1);
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, nvc0->framebuffer.width << 16);
+ PUSH_DATA (push, nvc0->framebuffer.height << 16);
+
+ /* Draw a large triangle in screen coordinates covering the whole
+ * render target, with scissors defining the destination region.
+ * The vertex is supplied with non-normalized texture coordinates
+ * arranged in a way to yield the desired offset and scale.
+ */
+
+ minx = info->dst.box.x;
+ maxx = info->dst.box.x + info->dst.box.width;
+ miny = info->dst.box.y;
+ maxy = info->dst.box.y + info->dst.box.height;
+ if (info->scissor_enable) {
+ minx = MAX2(minx, info->scissor.minx);
+ maxx = MIN2(maxx, info->scissor.maxx);
+ miny = MAX2(miny, info->scissor.miny);
+ maxy = MIN2(maxy, info->scissor.maxy);
+ }
+ BEGIN_NVC0(push, NVC0_3D(SCISSOR_HORIZ(0)), 2);
+ PUSH_DATA (push, (maxx << 16) | minx);
+ PUSH_DATA (push, (maxy << 16) | miny);
+
+ for (i = 0; i < info->dst.box.depth; ++i, z += dz) {
+ if (info->dst.box.z + i) {
+ BEGIN_NVC0(push, NVC0_3D(LAYER), 1);
+ PUSH_DATA (push, info->dst.box.z + i);
+ }
+
+ IMMED_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL),
+ NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_TRIANGLES);
+
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4);
+ PUSH_DATA (push, 0x74301);
+ PUSH_DATAf(push, x0);
+ PUSH_DATAf(push, y0);
+ PUSH_DATAf(push, z);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3);
+ PUSH_DATA (push, 0x74200);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4);
+ PUSH_DATA (push, 0x74301);
+ PUSH_DATAf(push, x1);
+ PUSH_DATAf(push, y0);
+ PUSH_DATAf(push, z);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3);
+ PUSH_DATA (push, 0x74200);
+ PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_x);
+ PUSH_DATAf(push, 0.0f);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 4);
+ PUSH_DATA (push, 0x74301);
+ PUSH_DATAf(push, x0);
+ PUSH_DATAf(push, y1);
+ PUSH_DATAf(push, z);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 3);
+ PUSH_DATA (push, 0x74200);
+ PUSH_DATAf(push, 0.0f);
+ PUSH_DATAf(push, 16384 << nv50_miptree(dst)->ms_y);
+
+ IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
+ }
+ if (info->dst.box.z + info->dst.box.depth - 1)
+ IMMED_NVC0(push, NVC0_3D(LAYER), 0);
+
+ nvc0_blitctx_post_blit(blit);
+
+ /* restore viewport */
+
+ BEGIN_NVC0(push, NVC0_3D(VIEWPORT_HORIZ(0)), 2);
+ PUSH_DATA (push, nvc0->vport_int[0]);
+ PUSH_DATA (push, nvc0->vport_int[1]);
+ IMMED_NVC0(push, NVC0_3D(VIEWPORT_TRANSFORM_EN), 1);
+}
+
+static void
+nvc0_blit_eng2d(struct nvc0_context *nvc0, const struct pipe_blit_info *info)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv50_miptree *dst = nv50_miptree(info->dst.resource);
+ struct nv50_miptree *src = nv50_miptree(info->src.resource);
+ const int32_t srcx_adj = info->src.box.width < 0 ? -1 : 0;
+ const int32_t srcy_adj = info->src.box.height < 0 ? -1 : 0;
+ const int dz = info->dst.box.z;
+ const int sz = info->src.box.z;
+ uint32_t dstw, dsth;
+ int32_t dstx, dsty;
+ int64_t srcx, srcy;
+ int64_t du_dx, dv_dy;
+ int i;
+ uint32_t mode;
+ uint32_t mask = nv50_blit_eng2d_get_mask(info);
+ boolean b;
+
+ mode = nv50_blit_get_filter(info) ?
+ NVC0_2D_BLIT_CONTROL_FILTER_BILINEAR :
+ NVC0_2D_BLIT_CONTROL_FILTER_POINT_SAMPLE;
+ mode |= (src->base.base.nr_samples > dst->base.base.nr_samples) ?
+ NVC0_2D_BLIT_CONTROL_ORIGIN_CORNER : NVC0_2D_BLIT_CONTROL_ORIGIN_CENTER;
+
+ du_dx = ((int64_t)info->src.box.width << 32) / info->dst.box.width;
+ dv_dy = ((int64_t)info->src.box.height << 32) / info->dst.box.height;
+
+ b = info->dst.format == info->src.format;
+ nvc0_2d_texture_set(push, 1, dst, info->dst.level, dz, info->dst.format, b);
+ nvc0_2d_texture_set(push, 0, src, info->src.level, sz, info->src.format, b);
+
+ if (info->scissor_enable) {
+ BEGIN_NVC0(push, NVC0_2D(CLIP_X), 5);
+ PUSH_DATA (push, info->scissor.minx << dst->ms_x);
+ PUSH_DATA (push, info->scissor.miny << dst->ms_y);
+ PUSH_DATA (push, (info->scissor.maxx - info->scissor.minx) << dst->ms_x);
+ PUSH_DATA (push, (info->scissor.maxy - info->scissor.miny) << dst->ms_y);
+ PUSH_DATA (push, 1); /* enable */
+ }
+
+ if (mask != 0xffffffff) {
+ IMMED_NVC0(push, NVC0_2D(ROP), 0xca); /* DPSDxax */
+ IMMED_NVC0(push, NVC0_2D(PATTERN_COLOR_FORMAT),
+ NVC0_2D_PATTERN_COLOR_FORMAT_32BPP);
+ BEGIN_NVC0(push, NVC0_2D(PATTERN_COLOR(0)), 4);
+ PUSH_DATA (push, 0x00000000);
+ PUSH_DATA (push, mask);
+ PUSH_DATA (push, 0xffffffff);
+ PUSH_DATA (push, 0xffffffff);
+ IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_ROP);
+ } else
+ if (info->src.format != info->dst.format) {
+ if (info->src.format == PIPE_FORMAT_R8_UNORM ||
+ info->src.format == PIPE_FORMAT_R8_SNORM ||
+ info->src.format == PIPE_FORMAT_R16_UNORM ||
+ info->src.format == PIPE_FORMAT_R16_SNORM ||
+ info->src.format == PIPE_FORMAT_R16_FLOAT ||
+ info->src.format == PIPE_FORMAT_R32_FLOAT) {
+ mask = 0xffff0000; /* also makes condition for OPERATION reset true */
+ BEGIN_NVC0(push, NVC0_2D(BETA4), 2);
+ PUSH_DATA (push, mask);
+ PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT);
+ } else
+ if (info->src.format == PIPE_FORMAT_A8_UNORM) {
+ mask = 0xff000000;
+ BEGIN_NVC0(push, NVC0_2D(BETA4), 2);
+ PUSH_DATA (push, mask);
+ PUSH_DATA (push, NVC0_2D_OPERATION_SRCCOPY_PREMULT);
+ }
+ }
+
+ if (src->ms_x > dst->ms_x || src->ms_y > dst->ms_y) {
+ /* ms_x is always >= ms_y */
+ du_dx <<= src->ms_x - dst->ms_x;
+ dv_dy <<= src->ms_y - dst->ms_y;
+ } else {
+ du_dx >>= dst->ms_x - src->ms_x;
+ dv_dy >>= dst->ms_y - src->ms_y;
+ }
+
+ srcx = (int64_t)(info->src.box.x + srcx_adj) << (src->ms_x + 32);
+ srcy = (int64_t)(info->src.box.y + srcy_adj) << (src->ms_y + 32);
+
+ if (src->base.base.nr_samples > dst->base.base.nr_samples) {
+ /* center src coorinates for proper MS resolve filtering */
+ srcx += (int64_t)(src->ms_x + 0) << 32;
+ srcy += (int64_t)(src->ms_y + 1) << 31;
+ }
+
+ dstx = info->dst.box.x << dst->ms_x;
+ dsty = info->dst.box.y << dst->ms_y;
+
+ dstw = info->dst.box.width << dst->ms_x;
+ dsth = info->dst.box.height << dst->ms_y;
+
+ if (dstx < 0) {
+ dstw += dstx;
+ srcx -= du_dx * dstx;
+ dstx = 0;
+ }
+ if (dsty < 0) {
+ dsth += dsty;
+ srcy -= dv_dy * dsty;
+ dsty = 0;
+ }
+
+ IMMED_NVC0(push, NVC0_2D(BLIT_CONTROL), mode);
+ BEGIN_NVC0(push, NVC0_2D(BLIT_DST_X), 4);
+ PUSH_DATA (push, dstx);
+ PUSH_DATA (push, dsty);
+ PUSH_DATA (push, dstw);
+ PUSH_DATA (push, dsth);
+ BEGIN_NVC0(push, NVC0_2D(BLIT_DU_DX_FRACT), 4);
+ PUSH_DATA (push, du_dx);
+ PUSH_DATA (push, du_dx >> 32);
+ PUSH_DATA (push, dv_dy);
+ PUSH_DATA (push, dv_dy >> 32);
+
+ BCTX_REFN(nvc0->bufctx, 2D, &dst->base, WR);
+ BCTX_REFN(nvc0->bufctx, 2D, &src->base, RD);
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx);
+ if (nouveau_pushbuf_validate(nvc0->base.pushbuf))
+ return;
+
+ for (i = 0; i < info->dst.box.depth; ++i) {
+ if (i > 0) {
+ /* no scaling in z-direction possible for eng2d blits */
+ if (dst->layout_3d) {
+ BEGIN_NVC0(push, NVC0_2D(DST_LAYER), 1);
+ PUSH_DATA (push, info->dst.box.z + i);
+ } else {
+ const unsigned z = info->dst.box.z + i;
+ BEGIN_NVC0(push, NVC0_2D(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, dst->base.address + z * dst->layer_stride);
+ PUSH_DATA (push, dst->base.address + z * dst->layer_stride);
+ }
+ if (src->layout_3d) {
+ /* not possible because of depth tiling */
+ assert(0);
+ } else {
+ const unsigned z = info->src.box.z + i;
+ BEGIN_NVC0(push, NVC0_2D(SRC_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, src->base.address + z * src->layer_stride);
+ PUSH_DATA (push, src->base.address + z * src->layer_stride);
+ }
+ BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_Y_INT), 1); /* trigger */
+ PUSH_DATA (push, srcy >> 32);
+ } else {
+ BEGIN_NVC0(push, NVC0_2D(BLIT_SRC_X_FRACT), 4);
+ PUSH_DATA (push, srcx);
+ PUSH_DATA (push, srcx >> 32);
+ PUSH_DATA (push, srcy);
+ PUSH_DATA (push, srcy >> 32);
+ }
+ }
+ nvc0_resource_validate(&dst->base, NOUVEAU_BO_WR);
+ nvc0_resource_validate(&src->base, NOUVEAU_BO_RD);
+
+ nouveau_bufctx_reset(nvc0->bufctx, NVC0_BIND_2D);
+
+ if (info->scissor_enable)
+ IMMED_NVC0(push, NVC0_2D(CLIP_ENABLE), 0);
+ if (mask != 0xffffffff)
+ IMMED_NVC0(push, NVC0_2D(OPERATION), NVC0_2D_OPERATION_SRCCOPY);
+}
+
+static void
+nvc0_blit(struct pipe_context *pipe, const struct pipe_blit_info *info)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ boolean eng3d = FALSE;
+
+ if (util_format_is_depth_or_stencil(info->dst.resource->format)) {
+ if (!(info->mask & PIPE_MASK_ZS))
+ return;
+ if (info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT ||
+ info->dst.resource->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT)
+ eng3d = TRUE;
+ if (info->filter != PIPE_TEX_FILTER_NEAREST)
+ eng3d = TRUE;
+ } else {
+ if (!(info->mask & PIPE_MASK_RGBA))
+ return;
+ if (info->mask != PIPE_MASK_RGBA)
+ eng3d = TRUE;
+ }
+
+ if (nv50_miptree(info->src.resource)->layout_3d) {
+ eng3d = TRUE;
+ } else
+ if (info->src.box.depth != info->dst.box.depth) {
+ eng3d = TRUE;
+ debug_printf("blit: cannot filter array or cube textures in z direction");
+ }
+
+ if (!eng3d && info->dst.format != info->src.format) {
+ if (!nv50_2d_dst_format_faithful(info->dst.format)) {
+ eng3d = TRUE;
+ } else
+ if (!nv50_2d_src_format_faithful(info->src.format)) {
+ if (!util_format_is_luminance(info->src.format)) {
+ if (util_format_is_intensity(info->src.format))
+ eng3d = info->src.format != PIPE_FORMAT_I8_UNORM;
+ else
+ if (!nv50_2d_dst_format_ops_supported(info->dst.format))
+ eng3d = TRUE;
+ else
+ eng3d = !nv50_2d_format_supported(info->src.format);
+ }
+ } else
+ if (util_format_is_luminance_alpha(info->src.format))
+ eng3d = TRUE;
+ }
+
+ if (info->src.resource->nr_samples == 8 &&
+ info->dst.resource->nr_samples <= 1)
+ eng3d = TRUE;
+#if 0
+ /* FIXME: can't make this work with eng2d anymore, at least not on nv50 */
+ if (info->src.resource->nr_samples > 1 ||
+ info->dst.resource->nr_samples > 1)
+ eng3d = TRUE;
+#endif
+ /* FIXME: find correct src coordinates adjustments */
+ if ((info->src.box.width != info->dst.box.width &&
+ info->src.box.width != -info->dst.box.width) ||
+ (info->src.box.height != info->dst.box.height &&
+ info->src.box.height != -info->dst.box.height))
+ eng3d = TRUE;
+
+ if (!eng3d)
+ nvc0_blit_eng2d(nvc0, info);
+ else
+ nvc0_blit_3d(nvc0, info);
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_blit_count, 1);
+}
+
+boolean
+nvc0_blitter_create(struct nvc0_screen *screen)
+{
+ screen->blitter = CALLOC_STRUCT(nvc0_blitter);
+ if (!screen->blitter) {
+ NOUVEAU_ERR("failed to allocate blitter struct\n");
+ return FALSE;
+ }
+ screen->blitter->screen = screen;
+
+ pipe_mutex_init(screen->blitter->mutex);
+
+ nvc0_blitter_make_vp(screen->blitter);
+ nvc0_blitter_make_sampler(screen->blitter);
+
+ return TRUE;
+}
+
+void
+nvc0_blitter_destroy(struct nvc0_screen *screen)
+{
+ struct nvc0_blitter *blitter = screen->blitter;
+ unsigned i, m;
+
+ for (i = 0; i < NV50_BLIT_MAX_TEXTURE_TYPES; ++i) {
+ for (m = 0; m < NV50_BLIT_MODES; ++m) {
+ struct nvc0_program *prog = blitter->fp[i][m];
+ if (prog) {
+ nvc0_program_destroy(NULL, prog);
+ FREE((void *)prog->pipe.tokens);
+ FREE(prog);
+ }
+ }
+ }
+
+ FREE(blitter);
+}
+
+boolean
+nvc0_blitctx_create(struct nvc0_context *nvc0)
+{
+ nvc0->blit = CALLOC_STRUCT(nvc0_blitctx);
+ if (!nvc0->blit) {
+ NOUVEAU_ERR("failed to allocate blit context\n");
+ return FALSE;
+ }
+
+ nvc0->blit->nvc0 = nvc0;
+
+ nvc0->blit->rast.pipe.half_pixel_center = 1;
+
+ return TRUE;
+}
+
+void
+nvc0_blitctx_destroy(struct nvc0_context *nvc0)
+{
+ if (nvc0->blit)
+ FREE(nvc0->blit);
+}
+
+void
+nvc0_init_surface_functions(struct nvc0_context *nvc0)
+{
+ struct pipe_context *pipe = &nvc0->base.pipe;
+
+ pipe->resource_copy_region = nvc0_resource_copy_region;
+ pipe->blit = nvc0_blit;
+ pipe->clear_render_target = nvc0_clear_render_target;
+ pipe->clear_depth_stencil = nvc0_clear_depth_stencil;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
new file mode 100644
index 0000000..765cd2d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_tex.c
@@ -0,0 +1,814 @@
+/*
+ * Copyright 2008 Ben Skeggs
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+#include "nv50/nv50_texture.xml.h"
+#include "nv50/nv50_defs.xml.h"
+
+#include "util/u_format.h"
+
+#define NVE4_TIC_ENTRY_INVALID 0x000fffff
+#define NVE4_TSC_ENTRY_INVALID 0xfff00000
+
+#define NV50_TIC_0_SWIZZLE__MASK \
+ (NV50_TIC_0_MAPA__MASK | NV50_TIC_0_MAPB__MASK | \
+ NV50_TIC_0_MAPG__MASK | NV50_TIC_0_MAPR__MASK)
+
+static INLINE uint32_t
+nv50_tic_swizzle(uint32_t tc, unsigned swz, boolean tex_int)
+{
+ switch (swz) {
+ case PIPE_SWIZZLE_RED:
+ return (tc & NV50_TIC_0_MAPR__MASK) >> NV50_TIC_0_MAPR__SHIFT;
+ case PIPE_SWIZZLE_GREEN:
+ return (tc & NV50_TIC_0_MAPG__MASK) >> NV50_TIC_0_MAPG__SHIFT;
+ case PIPE_SWIZZLE_BLUE:
+ return (tc & NV50_TIC_0_MAPB__MASK) >> NV50_TIC_0_MAPB__SHIFT;
+ case PIPE_SWIZZLE_ALPHA:
+ return (tc & NV50_TIC_0_MAPA__MASK) >> NV50_TIC_0_MAPA__SHIFT;
+ case PIPE_SWIZZLE_ONE:
+ return tex_int ? NV50_TIC_MAP_ONE_INT : NV50_TIC_MAP_ONE_FLOAT;
+ case PIPE_SWIZZLE_ZERO:
+ default:
+ return NV50_TIC_MAP_ZERO;
+ }
+}
+
+struct pipe_sampler_view *
+nvc0_create_sampler_view(struct pipe_context *pipe,
+ struct pipe_resource *res,
+ const struct pipe_sampler_view *templ)
+{
+ uint32_t flags = 0;
+
+ if (res->target == PIPE_TEXTURE_RECT || res->target == PIPE_BUFFER)
+ flags |= NV50_TEXVIEW_SCALED_COORDS;
+
+ return nvc0_create_texture_view(pipe, res, templ, flags, res->target);
+}
+
+struct pipe_sampler_view *
+nvc0_create_texture_view(struct pipe_context *pipe,
+ struct pipe_resource *texture,
+ const struct pipe_sampler_view *templ,
+ uint32_t flags,
+ enum pipe_texture_target target)
+{
+ const struct util_format_description *desc;
+ uint64_t address;
+ uint32_t *tic;
+ uint32_t swz[4];
+ uint32_t width, height;
+ uint32_t depth;
+ struct nv50_tic_entry *view;
+ struct nv50_miptree *mt;
+ boolean tex_int;
+
+ view = MALLOC_STRUCT(nv50_tic_entry);
+ if (!view)
+ return NULL;
+ mt = nv50_miptree(texture);
+
+ view->pipe = *templ;
+ view->pipe.reference.count = 1;
+ view->pipe.texture = NULL;
+ view->pipe.context = pipe;
+
+ view->id = -1;
+
+ pipe_resource_reference(&view->pipe.texture, texture);
+
+ tic = &view->tic[0];
+
+ desc = util_format_description(view->pipe.format);
+
+ tic[0] = nvc0_format_table[view->pipe.format].tic;
+
+ tex_int = util_format_is_pure_integer(view->pipe.format);
+
+ swz[0] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_r, tex_int);
+ swz[1] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_g, tex_int);
+ swz[2] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_b, tex_int);
+ swz[3] = nv50_tic_swizzle(tic[0], view->pipe.swizzle_a, tex_int);
+ tic[0] = (tic[0] & ~NV50_TIC_0_SWIZZLE__MASK) |
+ (swz[0] << NV50_TIC_0_MAPR__SHIFT) |
+ (swz[1] << NV50_TIC_0_MAPG__SHIFT) |
+ (swz[2] << NV50_TIC_0_MAPB__SHIFT) |
+ (swz[3] << NV50_TIC_0_MAPA__SHIFT);
+
+ address = mt->base.address;
+
+ tic[2] = 0x10001000 | NV50_TIC_2_NO_BORDER;
+
+ if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB)
+ tic[2] |= NV50_TIC_2_COLORSPACE_SRGB;
+
+ if (!(flags & NV50_TEXVIEW_SCALED_COORDS))
+ tic[2] |= NV50_TIC_2_NORMALIZED_COORDS;
+
+ /* check for linear storage type */
+ if (unlikely(!nouveau_bo_memtype(nv04_resource(texture)->bo))) {
+ if (texture->target == PIPE_BUFFER) {
+ assert(!(tic[2] & NV50_TIC_2_NORMALIZED_COORDS));
+ address +=
+ view->pipe.u.buf.first_element * desc->block.bits / 8;
+ tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_BUFFER;
+ tic[3] = 0;
+ tic[4] = /* width */
+ view->pipe.u.buf.last_element - view->pipe.u.buf.first_element + 1;
+ tic[5] = 0;
+ } else {
+ /* must be 2D texture without mip maps */
+ tic[2] |= NV50_TIC_2_LINEAR | NV50_TIC_2_TARGET_RECT;
+ tic[3] = mt->level[0].pitch;
+ tic[4] = mt->base.base.width0;
+ tic[5] = (1 << 16) | mt->base.base.height0;
+ }
+ tic[6] =
+ tic[7] = 0;
+ tic[1] = address;
+ tic[2] |= address >> 32;
+ return &view->pipe;
+ }
+
+ tic[2] |=
+ ((mt->level[0].tile_mode & 0x0f0) << (22 - 4)) |
+ ((mt->level[0].tile_mode & 0xf00) << (25 - 8));
+
+ depth = MAX2(mt->base.base.array_size, mt->base.base.depth0);
+
+ if (mt->base.base.array_size > 1) {
+ /* there doesn't seem to be a base layer field in TIC */
+ address += view->pipe.u.tex.first_layer * mt->layer_stride;
+ depth = view->pipe.u.tex.last_layer - view->pipe.u.tex.first_layer + 1;
+ }
+ tic[1] = address;
+ tic[2] |= address >> 32;
+
+ switch (target) {
+ case PIPE_TEXTURE_1D:
+ tic[2] |= NV50_TIC_2_TARGET_1D;
+ break;
+ case PIPE_TEXTURE_2D:
+ tic[2] |= NV50_TIC_2_TARGET_2D;
+ break;
+ case PIPE_TEXTURE_RECT:
+ tic[2] |= NV50_TIC_2_TARGET_RECT;
+ break;
+ case PIPE_TEXTURE_3D:
+ tic[2] |= NV50_TIC_2_TARGET_3D;
+ break;
+ case PIPE_TEXTURE_CUBE:
+ depth /= 6;
+ tic[2] |= NV50_TIC_2_TARGET_CUBE;
+ break;
+ case PIPE_TEXTURE_1D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_1D_ARRAY;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ tic[2] |= NV50_TIC_2_TARGET_2D_ARRAY;
+ break;
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ depth /= 6;
+ tic[2] |= NV50_TIC_2_TARGET_CUBE_ARRAY;
+ break;
+ default:
+ NOUVEAU_ERR("unexpected/invalid texture target: %d\n",
+ mt->base.base.target);
+ return FALSE;
+ }
+
+ tic[3] = (flags & NV50_TEXVIEW_FILTER_MSAA8) ? 0x20000000 : 0x00300000;
+
+ if (flags & NV50_TEXVIEW_ACCESS_RESOLVE) {
+ width = mt->base.base.width0 << mt->ms_x;
+ height = mt->base.base.height0 << mt->ms_y;
+ } else {
+ width = mt->base.base.width0;
+ height = mt->base.base.height0;
+ }
+
+ tic[4] = (1 << 31) | width;
+
+ tic[5] = height & 0xffff;
+ tic[5] |= depth << 16;
+ tic[5] |= mt->base.base.last_level << 28;
+
+ /* sampling points: (?) */
+ if (flags & NV50_TEXVIEW_ACCESS_RESOLVE)
+ tic[6] = (mt->ms_x > 1) ? 0x88000000 : 0x03000000;
+ else
+ tic[6] = 0x03000000;
+
+ tic[7] = (view->pipe.u.tex.last_level << 4) | view->pipe.u.tex.first_level;
+ tic[7] |= mt->ms_mode << 12;
+
+ return &view->pipe;
+}
+
+static boolean
+nvc0_validate_tic(struct nvc0_context *nvc0, int s)
+{
+ uint32_t commands[32];
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ unsigned i;
+ unsigned n = 0;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct nv04_resource *res;
+ const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+
+ if (!tic) {
+ if (dirty)
+ commands[n++] = (i << 1) | 0;
+ continue;
+ }
+ res = nv04_resource(tic->pipe.texture);
+
+ if (tic->id < 0) {
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ PUSH_SPACE(push, 17);
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + (tic->id * 32));
+ PUSH_DATA (push, txc->offset + (tic->id * 32));
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, 0x100111);
+ BEGIN_NIC0(push, NVC0_M2MF(DATA), 8);
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ need_flush = TRUE;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, (tic->id << 4) | 1);
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_cache_flush_count, 1);
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ if (!dirty)
+ continue;
+ commands[n++] = (tic->id << 9) | (i << 1) | 1;
+
+ BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i)
+ commands[n++] = (i << 1) | 0;
+
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ if (n) {
+ BEGIN_NIC0(push, NVC0_3D(BIND_TIC(s)), n);
+ PUSH_DATAp(push, commands, n);
+ }
+ nvc0->textures_dirty[s] = 0;
+
+ return need_flush;
+}
+
+static boolean
+nve4_validate_tic(struct nvc0_context *nvc0, unsigned s)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct nv04_resource *res;
+ const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+
+ if (!tic) {
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+ continue;
+ }
+ res = nv04_resource(tic->pipe.texture);
+
+ if (tic->id < 0) {
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + (tic->id * 32));
+ PUSH_DATA (push, txc->offset + (tic->id * 32));
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ need_flush = TRUE;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ BEGIN_NVC0(push, NVC0_3D(TEX_CACHE_CTL), 1);
+ PUSH_DATA (push, (tic->id << 4) | 1);
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tic->id;
+ if (dirty)
+ BCTX_REFN(nvc0->bufctx_3d, TEX(s, i), res, RD);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i) {
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+ nvc0->textures_dirty[s] |= 1 << i;
+ }
+
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+
+ return need_flush;
+}
+
+void nvc0_validate_textures(struct nvc0_context *nvc0)
+{
+ boolean need_flush;
+
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ need_flush = nve4_validate_tic(nvc0, 0);
+ need_flush |= nve4_validate_tic(nvc0, 3);
+ need_flush |= nve4_validate_tic(nvc0, 4);
+ } else {
+ need_flush = nvc0_validate_tic(nvc0, 0);
+ need_flush |= nvc0_validate_tic(nvc0, 3);
+ need_flush |= nvc0_validate_tic(nvc0, 4);
+ }
+
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TIC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+
+static boolean
+nvc0_validate_tsc(struct nvc0_context *nvc0, int s)
+{
+ uint32_t commands[16];
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+ unsigned n = 0;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_samplers[s]; ++i) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
+
+ if (!(nvc0->samplers_dirty[s] & (1 << i)))
+ continue;
+ if (!tsc) {
+ commands[n++] = (i << 4) | 0;
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
+
+ nvc0_m2mf_push_linear(&nvc0->base, nvc0->screen->txc,
+ 65536 + tsc->id * 32, NOUVEAU_BO_VRAM,
+ 32, tsc->tsc);
+ need_flush = TRUE;
+ }
+ nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ commands[n++] = (tsc->id << 12) | (i << 4) | 1;
+ }
+ for (; i < nvc0->state.num_samplers[s]; ++i)
+ commands[n++] = (i << 4) | 0;
+
+ nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
+
+ if (n) {
+ BEGIN_NIC0(push, NVC0_3D(BIND_TSC(s)), n);
+ PUSH_DATAp(push, commands, n);
+ }
+ nvc0->samplers_dirty[s] = 0;
+
+ return need_flush;
+}
+
+boolean
+nve4_validate_tsc(struct nvc0_context *nvc0, int s)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned i;
+ boolean need_flush = FALSE;
+
+ for (i = 0; i < nvc0->num_samplers[s]; ++i) {
+ struct nv50_tsc_entry *tsc = nv50_tsc_entry(nvc0->samplers[s][i]);
+
+ if (!tsc) {
+ nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
+ continue;
+ }
+ if (tsc->id < 0) {
+ tsc->id = nvc0_screen_tsc_alloc(nvc0->screen, tsc);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + 65536 + (tsc->id * 32));
+ PUSH_DATA (push, txc->offset + 65536 + (tsc->id * 32));
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), 9);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, &tsc->tsc[0], 8);
+
+ need_flush = TRUE;
+ }
+ nvc0->screen->tsc.lock[tsc->id / 32] |= 1 << (tsc->id % 32);
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TSC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tsc->id << 20;
+ }
+ for (; i < nvc0->state.num_samplers[s]; ++i) {
+ nvc0->tex_handles[s][i] |= NVE4_TSC_ENTRY_INVALID;
+ nvc0->samplers_dirty[s] |= 1 << i;
+ }
+
+ nvc0->state.num_samplers[s] = nvc0->num_samplers[s];
+
+ return need_flush;
+}
+
+void nvc0_validate_samplers(struct nvc0_context *nvc0)
+{
+ boolean need_flush;
+
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ need_flush = nve4_validate_tsc(nvc0, 0);
+ need_flush |= nve4_validate_tsc(nvc0, 3);
+ need_flush |= nve4_validate_tsc(nvc0, 4);
+ } else {
+ need_flush = nvc0_validate_tsc(nvc0, 0);
+ need_flush |= nvc0_validate_tsc(nvc0, 3);
+ need_flush |= nvc0_validate_tsc(nvc0, 4);
+ }
+
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVC0_3D(TSC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+
+/* Upload the "diagonal" entries for the possible texture sources ($t == $s).
+ * At some point we might want to get a list of the combinations used by a
+ * shader and fill in those entries instead of having it extract the handles.
+ */
+void
+nve4_set_tex_handles(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ uint64_t address;
+ unsigned s;
+
+ if (nvc0->screen->base.class_3d < NVE4_3D_CLASS)
+ return;
+ address = nvc0->screen->uniform_bo->offset + (5 << 16);
+
+ for (s = 0; s < 5; ++s, address += (1 << 9)) {
+ uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
+ if (!dirty)
+ continue;
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, 512);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ do {
+ int i = ffs(dirty) - 1;
+ dirty &= ~(1 << i);
+
+ BEGIN_NVC0(push, NVC0_3D(CB_POS), 2);
+ PUSH_DATA (push, (8 + i) * 4);
+ PUSH_DATA (push, nvc0->tex_handles[s][i]);
+ } while (dirty);
+
+ nvc0->textures_dirty[s] = 0;
+ nvc0->samplers_dirty[s] = 0;
+ }
+}
+
+
+static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT];
+static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT];
+static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT];
+
+void
+nve4_set_surface_info(struct nouveau_pushbuf *push,
+ struct pipe_surface *psf,
+ struct nvc0_screen *screen)
+{
+ struct nv50_surface *sf = nv50_surface(psf);
+ struct nv04_resource *res;
+ uint64_t address;
+ uint32_t *const info = push->cur;
+ uint8_t log2cpp;
+
+ if (psf && !nve4_su_format_map[psf->format])
+ NOUVEAU_ERR("unsupported surface format, try is_format_supported() !\n");
+
+ push->cur += 16;
+
+ if (!psf || !nve4_su_format_map[psf->format]) {
+ memset(info, 0, 16 * sizeof(*info));
+
+ info[0] = 0xbadf0000;
+ info[1] = 0x80004000;
+ info[12] = nve4_suldp_lib_offset[PIPE_FORMAT_R32G32B32A32_UINT] +
+ screen->lib_code->start;
+ return;
+ }
+ res = nv04_resource(sf->base.texture);
+
+ address = res->address + sf->offset;
+
+ info[8] = sf->width;
+ info[9] = sf->height;
+ info[10] = sf->depth;
+ switch (res->base.target) {
+ case PIPE_TEXTURE_1D_ARRAY:
+ info[11] = 1;
+ break;
+ case PIPE_TEXTURE_2D:
+ case PIPE_TEXTURE_RECT:
+ info[11] = 2;
+ break;
+ case PIPE_TEXTURE_3D:
+ info[11] = 3;
+ break;
+ case PIPE_TEXTURE_2D_ARRAY:
+ case PIPE_TEXTURE_CUBE:
+ case PIPE_TEXTURE_CUBE_ARRAY:
+ info[11] = 4;
+ break;
+ default:
+ info[11] = 0;
+ break;
+ }
+ log2cpp = (0xf000 & nve4_su_format_aux_map[sf->base.format]) >> 12;
+
+ info[12] = nve4_suldp_lib_offset[sf->base.format] + screen->lib_code->start;
+
+ /* limit in bytes for raw access */
+ info[13] = (0x06 << 22) | ((sf->width << log2cpp) - 1);
+
+ info[1] = nve4_su_format_map[sf->base.format];
+
+#if 0
+ switch (util_format_get_blocksizebits(sf->base.format)) {
+ case 16: info[1] |= 1 << 16; break;
+ case 32: info[1] |= 2 << 16; break;
+ case 64: info[1] |= 3 << 16; break;
+ case 128: info[1] |= 4 << 16; break;
+ default:
+ break;
+ }
+#else
+ info[1] |= log2cpp << 16;
+ info[1] |= 0x4000;
+ info[1] |= (0x0f00 & nve4_su_format_aux_map[sf->base.format]);
+#endif
+
+ if (res->base.target == PIPE_BUFFER) {
+ info[0] = address >> 8;
+ info[2] = sf->width - 1;
+ info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
+ info[3] = 0;
+ info[4] = 0;
+ info[5] = 0;
+ info[6] = 0;
+ info[7] = 0;
+ info[14] = 0;
+ info[15] = 0;
+ } else {
+ struct nv50_miptree *mt = nv50_miptree(&res->base);
+ struct nv50_miptree_level *lvl = &mt->level[sf->base.u.tex.level];
+ const unsigned z = sf->base.u.tex.first_layer;
+
+ if (z) {
+ if (mt->layout_3d) {
+ address += nvc0_mt_zslice_offset(mt, psf->u.tex.level, z);
+ /* doesn't work if z passes z-tile boundary */
+ assert(sf->depth == 1);
+ } else {
+ address += mt->layer_stride * z;
+ }
+ }
+ info[0] = address >> 8;
+ info[2] = sf->width - 1;
+ /* NOTE: this is really important: */
+ info[2] |= (0xff & nve4_su_format_aux_map[sf->base.format]) << 22;
+ info[3] = (0x88 << 24) | (lvl->pitch / 64);
+ info[4] = sf->height - 1;
+ info[4] |= (lvl->tile_mode & 0x0f0) << 25;
+ info[4] |= NVC0_TILE_SHIFT_Y(lvl->tile_mode) << 22;
+ info[5] = mt->layer_stride >> 8;
+ info[6] = sf->depth - 1;
+ info[6] |= (lvl->tile_mode & 0xf00) << 21;
+ info[6] |= NVC0_TILE_SHIFT_Z(lvl->tile_mode) << 22;
+ info[7] = 0;
+ info[14] = mt->ms_x;
+ info[15] = mt->ms_y;
+ }
+}
+
+static INLINE void
+nvc0_update_surface_bindings(struct nvc0_context *nvc0)
+{
+ /* TODO */
+}
+
+static INLINE void
+nve4_update_surface_bindings(struct nvc0_context *nvc0)
+{
+ /* TODO */
+}
+
+void
+nvc0_validate_surfaces(struct nvc0_context *nvc0)
+{
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ nve4_update_surface_bindings(nvc0);
+ } else {
+ nvc0_update_surface_bindings(nvc0);
+ }
+}
+
+
+static const uint8_t nve4_su_format_map[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = NVE4_IMAGE_FORMAT_RGBA32_FLOAT,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = NVE4_IMAGE_FORMAT_RGBA32_SINT,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = NVE4_IMAGE_FORMAT_RGBA32_UINT,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = NVE4_IMAGE_FORMAT_RGBA16_FLOAT,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = NVE4_IMAGE_FORMAT_RGBA16_UNORM,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = NVE4_IMAGE_FORMAT_RGBA16_SNORM,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = NVE4_IMAGE_FORMAT_RGBA16_SINT,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = NVE4_IMAGE_FORMAT_RGBA16_UINT,
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = NVE4_IMAGE_FORMAT_RGBA8_UNORM,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = NVE4_IMAGE_FORMAT_RGBA8_SNORM,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = NVE4_IMAGE_FORMAT_RGBA8_SINT,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = NVE4_IMAGE_FORMAT_RGBA8_UINT,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = NVE4_IMAGE_FORMAT_R11G11B10_FLOAT,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = NVE4_IMAGE_FORMAT_RGB10_A2_UNORM,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = NVE4_IMAGE_FORMAT_RGB10_A2_UINT, */
+ [PIPE_FORMAT_R32G32_FLOAT] = NVE4_IMAGE_FORMAT_RG32_FLOAT,
+ [PIPE_FORMAT_R32G32_SINT] = NVE4_IMAGE_FORMAT_RG32_SINT,
+ [PIPE_FORMAT_R32G32_UINT] = NVE4_IMAGE_FORMAT_RG32_UINT,
+ [PIPE_FORMAT_R16G16_FLOAT] = NVE4_IMAGE_FORMAT_RG16_FLOAT,
+ [PIPE_FORMAT_R16G16_UNORM] = NVE4_IMAGE_FORMAT_RG16_UNORM,
+ [PIPE_FORMAT_R16G16_SNORM] = NVE4_IMAGE_FORMAT_RG16_SNORM,
+ [PIPE_FORMAT_R16G16_SINT] = NVE4_IMAGE_FORMAT_RG16_SINT,
+ [PIPE_FORMAT_R16G16_UINT] = NVE4_IMAGE_FORMAT_RG16_UINT,
+ [PIPE_FORMAT_R8G8_UNORM] = NVE4_IMAGE_FORMAT_RG8_UNORM,
+ [PIPE_FORMAT_R8G8_SNORM] = NVE4_IMAGE_FORMAT_RG8_SNORM,
+ [PIPE_FORMAT_R8G8_SINT] = NVE4_IMAGE_FORMAT_RG8_SINT,
+ [PIPE_FORMAT_R8G8_UINT] = NVE4_IMAGE_FORMAT_RG8_UINT,
+ [PIPE_FORMAT_R32_FLOAT] = NVE4_IMAGE_FORMAT_R32_FLOAT,
+ [PIPE_FORMAT_R32_SINT] = NVE4_IMAGE_FORMAT_R32_SINT,
+ [PIPE_FORMAT_R32_UINT] = NVE4_IMAGE_FORMAT_R32_UINT,
+ [PIPE_FORMAT_R16_FLOAT] = NVE4_IMAGE_FORMAT_R16_FLOAT,
+ [PIPE_FORMAT_R16_UNORM] = NVE4_IMAGE_FORMAT_R16_UNORM,
+ [PIPE_FORMAT_R16_SNORM] = NVE4_IMAGE_FORMAT_R16_SNORM,
+ [PIPE_FORMAT_R16_SINT] = NVE4_IMAGE_FORMAT_R16_SINT,
+ [PIPE_FORMAT_R16_UINT] = NVE4_IMAGE_FORMAT_R16_UINT,
+ [PIPE_FORMAT_R8_UNORM] = NVE4_IMAGE_FORMAT_R8_UNORM,
+ [PIPE_FORMAT_R8_SNORM] = NVE4_IMAGE_FORMAT_R8_SNORM,
+ [PIPE_FORMAT_R8_SINT] = NVE4_IMAGE_FORMAT_R8_SINT,
+ [PIPE_FORMAT_R8_UINT] = NVE4_IMAGE_FORMAT_R8_UINT,
+};
+
+/* Auxiliary format description values for surface instructions.
+ * (log2(bytes per pixel) << 12) | (unk8 << 8) | unk22
+ */
+static const uint16_t nve4_su_format_aux_map[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x4842,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = 0x4842,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = 0x4842,
+
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = 0x3933,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3933,
+
+ [PIPE_FORMAT_R32G32_FLOAT] = 0x3433,
+ [PIPE_FORMAT_R32G32_SINT] = 0x3433,
+ [PIPE_FORMAT_R32G32_UINT] = 0x3433,
+
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x2a24,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x2a24, */
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = 0x2a24,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = 0x2a24,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = 0x2a24,
+
+ [PIPE_FORMAT_R16G16_UNORM] = 0x2524,
+ [PIPE_FORMAT_R16G16_SNORM] = 0x2524,
+ [PIPE_FORMAT_R16G16_SINT] = 0x2524,
+ [PIPE_FORMAT_R16G16_UINT] = 0x2524,
+ [PIPE_FORMAT_R16G16_FLOAT] = 0x2524,
+
+ [PIPE_FORMAT_R32_SINT] = 0x2024,
+ [PIPE_FORMAT_R32_UINT] = 0x2024,
+ [PIPE_FORMAT_R32_FLOAT] = 0x2024,
+
+ [PIPE_FORMAT_R8G8_UNORM] = 0x1615,
+ [PIPE_FORMAT_R8G8_SNORM] = 0x1615,
+ [PIPE_FORMAT_R8G8_SINT] = 0x1615,
+ [PIPE_FORMAT_R8G8_UINT] = 0x1615,
+
+ [PIPE_FORMAT_R16_UNORM] = 0x1115,
+ [PIPE_FORMAT_R16_SNORM] = 0x1115,
+ [PIPE_FORMAT_R16_SINT] = 0x1115,
+ [PIPE_FORMAT_R16_UINT] = 0x1115,
+ [PIPE_FORMAT_R16_FLOAT] = 0x1115,
+
+ [PIPE_FORMAT_R8_UNORM] = 0x0206,
+ [PIPE_FORMAT_R8_SNORM] = 0x0206,
+ [PIPE_FORMAT_R8_SINT] = 0x0206,
+ [PIPE_FORMAT_R8_UINT] = 0x0206
+};
+
+/* NOTE: These are hardcoded offsets for the shader library.
+ * TODO: Automate them.
+ */
+static const uint16_t nve4_suldp_lib_offset[PIPE_FORMAT_COUNT] =
+{
+ [PIPE_FORMAT_R32G32B32A32_FLOAT] = 0x218,
+ [PIPE_FORMAT_R32G32B32A32_SINT] = 0x218,
+ [PIPE_FORMAT_R32G32B32A32_UINT] = 0x218,
+ [PIPE_FORMAT_R16G16B16A16_UNORM] = 0x248,
+ [PIPE_FORMAT_R16G16B16A16_SNORM] = 0x2b8,
+ [PIPE_FORMAT_R16G16B16A16_SINT] = 0x330,
+ [PIPE_FORMAT_R16G16B16A16_UINT] = 0x388,
+ [PIPE_FORMAT_R16G16B16A16_FLOAT] = 0x3d8,
+ [PIPE_FORMAT_R32G32_FLOAT] = 0x428,
+ [PIPE_FORMAT_R32G32_SINT] = 0x468,
+ [PIPE_FORMAT_R32G32_UINT] = 0x468,
+ [PIPE_FORMAT_R10G10B10A2_UNORM] = 0x4a8,
+/* [PIPE_FORMAT_R10G10B10A2_UINT] = 0x530, */
+ [PIPE_FORMAT_R8G8B8A8_UNORM] = 0x588,
+ [PIPE_FORMAT_R8G8B8A8_SNORM] = 0x5f8,
+ [PIPE_FORMAT_R8G8B8A8_SINT] = 0x670,
+ [PIPE_FORMAT_R8G8B8A8_UINT] = 0x6c8,
+ [PIPE_FORMAT_B5G6R5_UNORM] = 0x718,
+ [PIPE_FORMAT_B5G5R5X1_UNORM] = 0x7a0,
+ [PIPE_FORMAT_R16G16_UNORM] = 0x828,
+ [PIPE_FORMAT_R16G16_SNORM] = 0x890,
+ [PIPE_FORMAT_R16G16_SINT] = 0x8f0,
+ [PIPE_FORMAT_R16G16_UINT] = 0x948,
+ [PIPE_FORMAT_R16G16_FLOAT] = 0x998,
+ [PIPE_FORMAT_R32_FLOAT] = 0x9e8,
+ [PIPE_FORMAT_R32_SINT] = 0xa30,
+ [PIPE_FORMAT_R32_UINT] = 0xa30,
+ [PIPE_FORMAT_R8G8_UNORM] = 0xa78,
+ [PIPE_FORMAT_R8G8_SNORM] = 0xae0,
+ [PIPE_FORMAT_R8G8_UINT] = 0xb48,
+ [PIPE_FORMAT_R8G8_SINT] = 0xb98,
+ [PIPE_FORMAT_R16_UNORM] = 0xbe8,
+ [PIPE_FORMAT_R16_SNORM] = 0xc48,
+ [PIPE_FORMAT_R16_SINT] = 0xca0,
+ [PIPE_FORMAT_R16_UINT] = 0xce8,
+ [PIPE_FORMAT_R16_FLOAT] = 0xd30,
+ [PIPE_FORMAT_R8_UNORM] = 0xd88,
+ [PIPE_FORMAT_R8_SNORM] = 0xde0,
+ [PIPE_FORMAT_R8_SINT] = 0xe38,
+ [PIPE_FORMAT_R8_UINT] = 0xe88,
+ [PIPE_FORMAT_R11G11B10_FLOAT] = 0xed0
+};
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
new file mode 100644
index 0000000..82f1ffc
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_transfer.c
@@ -0,0 +1,558 @@
+
+#include "util/u_format.h"
+
+#include "nvc0/nvc0_context.h"
+
+#include "nv50/nv50_defs.xml.h"
+
+struct nvc0_transfer {
+ struct pipe_transfer base;
+ struct nv50_m2mf_rect rect[2];
+ uint32_t nblocksx;
+ uint16_t nblocksy;
+ uint16_t nlayers;
+};
+
+static void
+nvc0_m2mf_transfer_rect(struct nvc0_context *nvc0,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bufctx *bctx = nvc0->bufctx;
+ const int cpp = dst->cpp;
+ uint32_t src_ofst = src->base;
+ uint32_t dst_ofst = dst->base;
+ uint32_t height = nblocksy;
+ uint32_t sy = src->y;
+ uint32_t dy = dst->y;
+ uint32_t exec = (1 << 20);
+
+ assert(dst->cpp == src->cpp);
+
+ nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ if (nouveau_bo_memtype(src->bo)) {
+ BEGIN_NVC0(push, NVC0_M2MF(TILING_MODE_IN), 5);
+ PUSH_DATA (push, src->tile_mode);
+ PUSH_DATA (push, src->width * cpp);
+ PUSH_DATA (push, src->height);
+ PUSH_DATA (push, src->depth);
+ PUSH_DATA (push, src->z);
+ } else {
+ src_ofst += src->y * src->pitch + src->x * cpp;
+
+ BEGIN_NVC0(push, NVC0_M2MF(PITCH_IN), 1);
+ PUSH_DATA (push, src->width * cpp);
+
+ exec |= NVC0_M2MF_EXEC_LINEAR_IN;
+ }
+
+ if (nouveau_bo_memtype(dst->bo)) {
+ BEGIN_NVC0(push, NVC0_M2MF(TILING_MODE_OUT), 5);
+ PUSH_DATA (push, dst->tile_mode);
+ PUSH_DATA (push, dst->width * cpp);
+ PUSH_DATA (push, dst->height);
+ PUSH_DATA (push, dst->depth);
+ PUSH_DATA (push, dst->z);
+ } else {
+ dst_ofst += dst->y * dst->pitch + dst->x * cpp;
+
+ BEGIN_NVC0(push, NVC0_M2MF(PITCH_OUT), 1);
+ PUSH_DATA (push, dst->width * cpp);
+
+ exec |= NVC0_M2MF_EXEC_LINEAR_OUT;
+ }
+
+ while (height) {
+ int line_count = height > 2047 ? 2047 : height;
+
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_IN_HIGH), 2);
+ PUSH_DATAh(push, src->bo->offset + src_ofst);
+ PUSH_DATA (push, src->bo->offset + src_ofst);
+
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, dst->bo->offset + dst_ofst);
+ PUSH_DATA (push, dst->bo->offset + dst_ofst);
+
+ if (!(exec & NVC0_M2MF_EXEC_LINEAR_IN)) {
+ BEGIN_NVC0(push, NVC0_M2MF(TILING_POSITION_IN_X), 2);
+ PUSH_DATA (push, src->x * cpp);
+ PUSH_DATA (push, sy);
+ } else {
+ src_ofst += line_count * src->pitch;
+ }
+ if (!(exec & NVC0_M2MF_EXEC_LINEAR_OUT)) {
+ BEGIN_NVC0(push, NVC0_M2MF(TILING_POSITION_OUT_X), 2);
+ PUSH_DATA (push, dst->x * cpp);
+ PUSH_DATA (push, dy);
+ } else {
+ dst_ofst += line_count * dst->pitch;
+ }
+
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, nblocksx * cpp);
+ PUSH_DATA (push, line_count);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, exec);
+
+ height -= line_count;
+ sy += line_count;
+ dy += line_count;
+ }
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+static void
+nve4_m2mf_transfer_rect(struct nvc0_context *nvc0,
+ const struct nv50_m2mf_rect *dst,
+ const struct nv50_m2mf_rect *src,
+ uint32_t nblocksx, uint32_t nblocksy)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bufctx *bctx = nvc0->bufctx;
+ uint32_t exec;
+ uint32_t src_base = src->base;
+ uint32_t dst_base = dst->base;
+ const int cpp = dst->cpp;
+
+ assert(dst->cpp == src->cpp);
+
+ nouveau_bufctx_refn(bctx, 0, dst->bo, dst->domain | NOUVEAU_BO_WR);
+ nouveau_bufctx_refn(bctx, 0, src->bo, src->domain | NOUVEAU_BO_RD);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ exec = 0x200 /* 2D_ENABLE */ | 0x6 /* UNK */;
+
+ if (!nouveau_bo_memtype(dst->bo)) {
+ assert(!dst->z);
+ dst_base += dst->y * dst->pitch + dst->x * cpp;
+ exec |= 0x100; /* DST_MODE_2D_LINEAR */
+ }
+ if (!nouveau_bo_memtype(src->bo)) {
+ assert(!src->z);
+ src_base += src->y * src->pitch + src->x * cpp;
+ exec |= 0x080; /* SRC_MODE_2D_LINEAR */
+ }
+
+ BEGIN_NVC0(push, SUBC_COPY(0x070c), 6);
+ PUSH_DATA (push, 0x1000 | dst->tile_mode);
+ PUSH_DATA (push, dst->pitch);
+ PUSH_DATA (push, dst->height);
+ PUSH_DATA (push, dst->depth);
+ PUSH_DATA (push, dst->z);
+ PUSH_DATA (push, (dst->y << 16) | (dst->x * cpp));
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0728), 6);
+ PUSH_DATA (push, 0x1000 | src->tile_mode);
+ PUSH_DATA (push, src->pitch);
+ PUSH_DATA (push, src->height);
+ PUSH_DATA (push, src->depth);
+ PUSH_DATA (push, src->z);
+ PUSH_DATA (push, (src->y << 16) | (src->x * cpp));
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0400), 8);
+ PUSH_DATAh(push, src->bo->offset + src_base);
+ PUSH_DATA (push, src->bo->offset + src_base);
+ PUSH_DATAh(push, dst->bo->offset + dst_base);
+ PUSH_DATA (push, dst->bo->offset + dst_base);
+ PUSH_DATA (push, src->pitch);
+ PUSH_DATA (push, dst->pitch);
+ PUSH_DATA (push, nblocksx * cpp);
+ PUSH_DATA (push, nblocksy);
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0300), 1);
+ PUSH_DATA (push, exec);
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+void
+nvc0_m2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data)
+{
+ struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ uint32_t *src = (uint32_t *)data;
+ unsigned count = (size + 3) / 4;
+
+ nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ while (count) {
+ unsigned nr;
+
+ if (!PUSH_SPACE(push, 16))
+ break;
+ nr = PUSH_AVAIL(push);
+ assert(nr >= 16);
+ nr = MIN2(count, nr - 9);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN);
+
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, dst->offset + offset);
+ PUSH_DATA (push, dst->offset + offset);
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, 0x100111);
+
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_NIC0(push, NVC0_M2MF(DATA), nr);
+ PUSH_DATAp(push, src, nr);
+
+ count -= nr;
+ src += nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+void
+nve4_p2mf_push_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned offset, unsigned domain,
+ unsigned size, const void *data)
+{
+ struct nvc0_context *nvc0 = nvc0_context(&nv->pipe);
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ uint32_t *src = (uint32_t *)data;
+ unsigned count = (size + 3) / 4;
+
+ nouveau_bufctx_refn(nvc0->bufctx, 0, dst, domain | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, nvc0->bufctx);
+ nouveau_pushbuf_validate(push);
+
+ while (count) {
+ unsigned nr;
+
+ if (!PUSH_SPACE(push, 16))
+ break;
+ nr = PUSH_AVAIL(push);
+ assert(nr >= 16);
+ nr = MIN2(count, nr - 8);
+ nr = MIN2(nr, (NV04_PFIFO_MAX_PACKET_LEN - 1));
+
+ BEGIN_NVC0(push, NVE4_P2MF(DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, dst->offset + offset);
+ PUSH_DATA (push, dst->offset + offset);
+ BEGIN_NVC0(push, NVE4_P2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, MIN2(size, nr * 4));
+ PUSH_DATA (push, 1);
+ /* must not be interrupted (trap on QUERY fence, 0x50 works however) */
+ BEGIN_1IC0(push, NVE4_P2MF(EXEC), nr + 1);
+ PUSH_DATA (push, 0x1001);
+ PUSH_DATAp(push, src, nr);
+
+ count -= nr;
+ src += nr;
+ offset += nr * 4;
+ size -= nr * 4;
+ }
+
+ nouveau_bufctx_reset(nvc0->bufctx, 0);
+}
+
+static void
+nvc0_m2mf_copy_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx;
+
+ nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ while (size) {
+ unsigned bytes = MIN2(size, 1 << 17);
+
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_OUT_HIGH), 2);
+ PUSH_DATAh(push, dst->offset + dstoff);
+ PUSH_DATA (push, dst->offset + dstoff);
+ BEGIN_NVC0(push, NVC0_M2MF(OFFSET_IN_HIGH), 2);
+ PUSH_DATAh(push, src->offset + srcoff);
+ PUSH_DATA (push, src->offset + srcoff);
+ BEGIN_NVC0(push, NVC0_M2MF(LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, bytes);
+ PUSH_DATA (push, 1);
+ BEGIN_NVC0(push, NVC0_M2MF(EXEC), 1);
+ PUSH_DATA (push, (1 << NVC0_M2MF_EXEC_INC__SHIFT) |
+ NVC0_M2MF_EXEC_LINEAR_IN | NVC0_M2MF_EXEC_LINEAR_OUT);
+
+ srcoff += bytes;
+ dstoff += bytes;
+ size -= bytes;
+ }
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+static void
+nve4_m2mf_copy_linear(struct nouveau_context *nv,
+ struct nouveau_bo *dst, unsigned dstoff, unsigned dstdom,
+ struct nouveau_bo *src, unsigned srcoff, unsigned srcdom,
+ unsigned size)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+ struct nouveau_bufctx *bctx = nvc0_context(&nv->pipe)->bufctx;
+
+ nouveau_bufctx_refn(bctx, 0, src, srcdom | NOUVEAU_BO_RD);
+ nouveau_bufctx_refn(bctx, 0, dst, dstdom | NOUVEAU_BO_WR);
+ nouveau_pushbuf_bufctx(push, bctx);
+ nouveau_pushbuf_validate(push);
+
+ BEGIN_NVC0(push, SUBC_COPY(0x0400), 4);
+ PUSH_DATAh(push, src->offset + srcoff);
+ PUSH_DATA (push, src->offset + srcoff);
+ PUSH_DATAh(push, dst->offset + dstoff);
+ PUSH_DATA (push, dst->offset + dstoff);
+ BEGIN_NVC0(push, SUBC_COPY(0x0418), 1);
+ PUSH_DATA (push, size);
+ BEGIN_NVC0(push, SUBC_COPY(0x0300), 1);
+ PUSH_DATA (push, 0x186);
+
+ nouveau_bufctx_reset(bctx, 0);
+}
+
+
+static INLINE boolean
+nvc0_mt_transfer_can_map_directly(struct nv50_miptree *mt)
+{
+ if (mt->base.domain == NOUVEAU_BO_VRAM)
+ return FALSE;
+ if (mt->base.base.usage != PIPE_USAGE_STAGING)
+ return FALSE;
+ return !nouveau_bo_memtype(mt->base.bo);
+}
+
+static INLINE boolean
+nvc0_mt_sync(struct nvc0_context *nvc0, struct nv50_miptree *mt, unsigned usage)
+{
+ if (!mt->base.mm) {
+ uint32_t access = (usage & PIPE_TRANSFER_WRITE) ?
+ NOUVEAU_BO_WR : NOUVEAU_BO_RD;
+ return !nouveau_bo_wait(mt->base.bo, access, nvc0->base.client);
+ }
+ if (usage & PIPE_TRANSFER_WRITE)
+ return !mt->base.fence || nouveau_fence_wait(mt->base.fence);
+ return !mt->base.fence_wr || nouveau_fence_wait(mt->base.fence_wr);
+}
+
+void *
+nvc0_miptree_transfer_map(struct pipe_context *pctx,
+ struct pipe_resource *res,
+ unsigned level,
+ unsigned usage,
+ const struct pipe_box *box,
+ struct pipe_transfer **ptransfer)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pctx);
+ struct nouveau_device *dev = nvc0->screen->base.device;
+ struct nv50_miptree *mt = nv50_miptree(res);
+ struct nvc0_transfer *tx;
+ uint32_t size;
+ int ret;
+ unsigned flags = 0;
+
+ if (nvc0_mt_transfer_can_map_directly(mt)) {
+ ret = !nvc0_mt_sync(nvc0, mt, usage);
+ if (!ret)
+ ret = nouveau_bo_map(mt->base.bo, 0, NULL);
+ if (ret &&
+ (usage & PIPE_TRANSFER_MAP_DIRECTLY))
+ return NULL;
+ if (!ret)
+ usage |= PIPE_TRANSFER_MAP_DIRECTLY;
+ } else
+ if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+ return NULL;
+
+ tx = CALLOC_STRUCT(nvc0_transfer);
+ if (!tx)
+ return NULL;
+
+ pipe_resource_reference(&tx->base.resource, res);
+
+ tx->base.level = level;
+ tx->base.usage = usage;
+ tx->base.box = *box;
+
+ if (util_format_is_plain(res->format)) {
+ tx->nblocksx = box->width << mt->ms_x;
+ tx->nblocksy = box->height << mt->ms_y;
+ } else {
+ tx->nblocksx = util_format_get_nblocksx(res->format, box->width);
+ tx->nblocksy = util_format_get_nblocksy(res->format, box->height);
+ }
+ tx->nlayers = box->depth;
+
+ tx->base.stride = tx->nblocksx * util_format_get_blocksize(res->format);
+ tx->base.layer_stride = tx->nblocksy * tx->base.stride;
+
+ if (usage & PIPE_TRANSFER_MAP_DIRECTLY) {
+ tx->base.stride = align(tx->base.stride, 128);
+ *ptransfer = &tx->base;
+ return mt->base.bo->map + mt->base.offset;
+ }
+
+ nv50_m2mf_rect_setup(&tx->rect[0], res, level, box->x, box->y, box->z);
+
+ size = tx->base.layer_stride;
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_GART | NOUVEAU_BO_MAP, 0,
+ size * tx->nlayers, NULL, &tx->rect[1].bo);
+ if (ret) {
+ pipe_resource_reference(&tx->base.resource, NULL);
+ FREE(tx);
+ return NULL;
+ }
+
+ tx->rect[1].cpp = tx->rect[0].cpp;
+ tx->rect[1].width = tx->nblocksx;
+ tx->rect[1].height = tx->nblocksy;
+ tx->rect[1].depth = 1;
+ tx->rect[1].pitch = tx->base.stride;
+ tx->rect[1].domain = NOUVEAU_BO_GART;
+
+ if (usage & PIPE_TRANSFER_READ) {
+ unsigned base = tx->rect[0].base;
+ unsigned z = tx->rect[0].z;
+ unsigned i;
+ for (i = 0; i < tx->nlayers; ++i) {
+ nvc0->m2mf_copy_rect(nvc0, &tx->rect[1], &tx->rect[0],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += size;
+ }
+ tx->rect[0].z = z;
+ tx->rect[0].base = base;
+ tx->rect[1].base = 0;
+ }
+
+ if (tx->rect[1].bo->map) {
+ *ptransfer = &tx->base;
+ return tx->rect[1].bo->map;
+ }
+
+ if (usage & PIPE_TRANSFER_READ)
+ flags = NOUVEAU_BO_RD;
+ if (usage & PIPE_TRANSFER_WRITE)
+ flags |= NOUVEAU_BO_WR;
+
+ ret = nouveau_bo_map(tx->rect[1].bo, flags, nvc0->screen->base.client);
+ if (ret) {
+ pipe_resource_reference(&tx->base.resource, NULL);
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ FREE(tx);
+ return NULL;
+ }
+
+ *ptransfer = &tx->base;
+ return tx->rect[1].bo->map;
+}
+
+void
+nvc0_miptree_transfer_unmap(struct pipe_context *pctx,
+ struct pipe_transfer *transfer)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pctx);
+ struct nvc0_transfer *tx = (struct nvc0_transfer *)transfer;
+ struct nv50_miptree *mt = nv50_miptree(tx->base.resource);
+ unsigned i;
+
+ if (tx->base.usage & PIPE_TRANSFER_MAP_DIRECTLY) {
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ FREE(tx);
+ return;
+ }
+
+ if (tx->base.usage & PIPE_TRANSFER_WRITE) {
+ for (i = 0; i < tx->nlayers; ++i) {
+ nvc0->m2mf_copy_rect(nvc0, &tx->rect[0], &tx->rect[1],
+ tx->nblocksx, tx->nblocksy);
+ if (mt->layout_3d)
+ tx->rect[0].z++;
+ else
+ tx->rect[0].base += mt->layer_stride;
+ tx->rect[1].base += tx->nblocksy * tx->base.stride;
+ }
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_wr, 1);
+ }
+ if (tx->base.usage & PIPE_TRANSFER_READ)
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, tex_transfers_rd, 1);
+
+ nouveau_bo_ref(NULL, &tx->rect[1].bo);
+ pipe_resource_reference(&transfer->resource, NULL);
+
+ FREE(tx);
+}
+
+/* This happens rather often with DTD9/st. */
+void
+nvc0_cb_push(struct nouveau_context *nv,
+ struct nouveau_bo *bo, unsigned domain,
+ unsigned base, unsigned size,
+ unsigned offset, unsigned words, const uint32_t *data)
+{
+ struct nouveau_pushbuf *push = nv->pushbuf;
+
+ NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_count, 1);
+ NOUVEAU_DRV_STAT(nv->screen, constbuf_upload_bytes, words * 4);
+
+ assert(!(offset & 3));
+ size = align(size, 0x100);
+
+ BEGIN_NVC0(push, NVC0_3D(CB_SIZE), 3);
+ PUSH_DATA (push, size);
+ PUSH_DATAh(push, bo->offset + base);
+ PUSH_DATA (push, bo->offset + base);
+
+ while (words) {
+ unsigned nr = PUSH_AVAIL(push);
+ nr = MIN2(nr, words);
+ nr = MIN2(nr, NV04_PFIFO_MAX_PACKET_LEN - 1);
+
+ PUSH_SPACE(push, nr + 2);
+ PUSH_REFN (push, bo, NOUVEAU_BO_WR | domain);
+ BEGIN_1IC0(push, NVC0_3D(CB_POS), nr + 1);
+ PUSH_DATA (push, offset);
+ PUSH_DATAp(push, data, nr);
+
+ words -= nr;
+ data += nr;
+ offset += nr * 4;
+ }
+}
+
+void
+nvc0_init_transfer_functions(struct nvc0_context *nvc0)
+{
+ if (nvc0->screen->base.class_3d >= NVE4_3D_CLASS) {
+ nvc0->m2mf_copy_rect = nve4_m2mf_transfer_rect;
+ nvc0->base.copy_data = nve4_m2mf_copy_linear;
+ nvc0->base.push_data = nve4_p2mf_push_linear;
+ } else {
+ nvc0->m2mf_copy_rect = nvc0_m2mf_transfer_rect;
+ nvc0->base.copy_data = nvc0_m2mf_copy_linear;
+ nvc0->base.push_data = nvc0_m2mf_push_linear;
+ }
+ nvc0->base.push_cb = nvc0_cb_push;
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
new file mode 100644
index 0000000..c4bc7dc
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo.c
@@ -0,0 +1,891 @@
+/*
+ * Copyright 2010 Christoph Bumiller
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#define NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+
+#include "nvc0/nvc0_3d.xml.h"
+
+void
+nvc0_vertex_state_delete(struct pipe_context *pipe,
+ void *hwcso)
+{
+ struct nvc0_vertex_stateobj *so = hwcso;
+
+ if (so->translate)
+ so->translate->release(so->translate);
+ FREE(hwcso);
+}
+
+void *
+nvc0_vertex_state_create(struct pipe_context *pipe,
+ unsigned num_elements,
+ const struct pipe_vertex_element *elements)
+{
+ struct nvc0_vertex_stateobj *so;
+ struct translate_key transkey;
+ unsigned i;
+ unsigned src_offset_max = 0;
+
+ so = MALLOC(sizeof(*so) +
+ num_elements * sizeof(struct nvc0_vertex_element));
+ if (!so)
+ return NULL;
+ so->num_elements = num_elements;
+ so->instance_elts = 0;
+ so->instance_bufs = 0;
+ so->shared_slots = FALSE;
+ so->need_conversion = FALSE;
+
+ memset(so->vb_access_size, 0, sizeof(so->vb_access_size));
+
+ for (i = 0; i < PIPE_MAX_ATTRIBS; ++i)
+ so->min_instance_div[i] = 0xffffffff;
+
+ transkey.nr_elements = 0;
+ transkey.output_stride = 0;
+
+ for (i = 0; i < num_elements; ++i) {
+ const struct pipe_vertex_element *ve = &elements[i];
+ const unsigned vbi = ve->vertex_buffer_index;
+ unsigned size;
+ enum pipe_format fmt = ve->src_format;
+
+ so->element[i].pipe = elements[i];
+ so->element[i].state = nvc0_format_table[fmt].vtx;
+
+ if (!so->element[i].state) {
+ switch (util_format_get_nr_components(fmt)) {
+ case 1: fmt = PIPE_FORMAT_R32_FLOAT; break;
+ case 2: fmt = PIPE_FORMAT_R32G32_FLOAT; break;
+ case 3: fmt = PIPE_FORMAT_R32G32B32_FLOAT; break;
+ case 4: fmt = PIPE_FORMAT_R32G32B32A32_FLOAT; break;
+ default:
+ assert(0);
+ FREE(so);
+ return NULL;
+ }
+ so->element[i].state = nvc0_format_table[fmt].vtx;
+ so->need_conversion = TRUE;
+ }
+ size = util_format_get_blocksize(fmt);
+
+ src_offset_max = MAX2(src_offset_max, ve->src_offset);
+
+ if (so->vb_access_size[vbi] < (ve->src_offset + size))
+ so->vb_access_size[vbi] = ve->src_offset + size;
+
+ if (unlikely(ve->instance_divisor)) {
+ so->instance_elts |= 1 << i;
+ so->instance_bufs |= 1 << vbi;
+ if (ve->instance_divisor < so->min_instance_div[vbi])
+ so->min_instance_div[vbi] = ve->instance_divisor;
+ }
+
+ if (1) {
+ unsigned ca;
+ unsigned j = transkey.nr_elements++;
+
+ ca = util_format_description(fmt)->channel[0].size / 8;
+ if (ca != 1 && ca != 2)
+ ca = 4;
+
+ transkey.element[j].type = TRANSLATE_ELEMENT_NORMAL;
+ transkey.element[j].input_format = ve->src_format;
+ transkey.element[j].input_buffer = vbi;
+ transkey.element[j].input_offset = ve->src_offset;
+ transkey.element[j].instance_divisor = ve->instance_divisor;
+
+ transkey.output_stride = align(transkey.output_stride, ca);
+ transkey.element[j].output_format = fmt;
+ transkey.element[j].output_offset = transkey.output_stride;
+ transkey.output_stride += size;
+
+ so->element[i].state_alt = so->element[i].state;
+ so->element[i].state_alt |= transkey.element[j].output_offset << 7;
+ }
+
+ so->element[i].state |= i << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT;
+ }
+ transkey.output_stride = align(transkey.output_stride, 4);
+
+ so->size = transkey.output_stride;
+ so->translate = translate_create(&transkey);
+
+ if (so->instance_elts || src_offset_max >= (1 << 14))
+ return so;
+ so->shared_slots = TRUE;
+
+ for (i = 0; i < num_elements; ++i) {
+ const unsigned b = elements[i].vertex_buffer_index;
+ const unsigned s = elements[i].src_offset;
+ so->element[i].state &= ~NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__MASK;
+ so->element[i].state |= b << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT;
+ so->element[i].state |= s << NVC0_3D_VERTEX_ATTRIB_FORMAT_OFFSET__SHIFT;
+ }
+ return so;
+}
+
+#define NVC0_3D_VERTEX_ATTRIB_INACTIVE \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT | \
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32 | NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST
+
+#define VTX_ATTR(a, c, t, s) \
+ ((NVC0_3D_VTX_ATTR_DEFINE_TYPE_##t) | \
+ (NVC0_3D_VTX_ATTR_DEFINE_SIZE_##s) | \
+ ((a) << NVC0_3D_VTX_ATTR_DEFINE_ATTR__SHIFT) | \
+ ((c) << NVC0_3D_VTX_ATTR_DEFINE_COMP__SHIFT))
+
+static void
+nvc0_set_constant_vertex_attrib(struct nvc0_context *nvc0, const unsigned a)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[a].pipe;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
+ uint32_t mode;
+ const struct util_format_description *desc;
+ void *dst;
+ const void *src = (const uint8_t *)vb->user_buffer + ve->src_offset;
+ assert(!vb->buffer);
+
+ desc = util_format_description(ve->src_format);
+
+ PUSH_SPACE(push, 6);
+ BEGIN_NVC0(push, NVC0_3D(VTX_ATTR_DEFINE), 5);
+ dst = &push->cur[1];
+ if (desc->channel[0].pure_integer) {
+ if (desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
+ mode = VTX_ATTR(a, 4, SINT, 32);
+ desc->unpack_rgba_sint(dst, 0, src, 0, 1, 1);
+ } else {
+ mode = VTX_ATTR(a, 4, UINT, 32);
+ desc->unpack_rgba_uint(dst, 0, src, 0, 1, 1);
+ }
+ } else {
+ mode = VTX_ATTR(a, 4, FLOAT, 32);
+ desc->unpack_rgba_float(dst, 0, src, 0, 1, 1);
+ }
+ push->cur[0] = mode;
+ push->cur += 5;
+}
+
+static INLINE void
+nvc0_user_vbuf_range(struct nvc0_context *nvc0, int vbi,
+ uint32_t *base, uint32_t *size)
+{
+ if (unlikely(nvc0->vertex->instance_bufs & (1 << vbi))) {
+ const uint32_t div = nvc0->vertex->min_instance_div[vbi];
+ *base = nvc0->instance_off * nvc0->vtxbuf[vbi].stride;
+ *size = (nvc0->instance_max / div) * nvc0->vtxbuf[vbi].stride +
+ nvc0->vertex->vb_access_size[vbi];
+ } else {
+ /* NOTE: if there are user buffers, we *must* have index bounds */
+ assert(nvc0->vb_elt_limit != ~0);
+ *base = nvc0->vb_elt_first * nvc0->vtxbuf[vbi].stride;
+ *size = nvc0->vb_elt_limit * nvc0->vtxbuf[vbi].stride +
+ nvc0->vertex->vb_access_size[vbi];
+ }
+}
+
+static INLINE void
+nvc0_release_user_vbufs(struct nvc0_context *nvc0)
+{
+ if (nvc0->vbo_user) {
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
+ nouveau_scratch_done(&nvc0->base);
+ }
+}
+
+static void
+nvc0_update_user_vbufs(struct nvc0_context *nvc0)
+{
+ uint64_t address[PIPE_MAX_ATTRIBS];
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ int i;
+ uint32_t written = 0;
+
+ PUSH_SPACE(push, nvc0->vertex->num_elements * 8);
+ for (i = 0; i < nvc0->vertex->num_elements; ++i) {
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[i].pipe;
+ const unsigned b = ve->vertex_buffer_index;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
+ uint32_t base, size;
+
+ if (!(nvc0->vbo_user & (1 << b)))
+ continue;
+ if (!vb->stride) {
+ nvc0_set_constant_vertex_attrib(nvc0, i);
+ continue;
+ }
+ nvc0_user_vbuf_range(nvc0, b, &base, &size);
+
+ if (!(written & (1 << b))) {
+ struct nouveau_bo *bo;
+ const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART;
+ written |= 1 << b;
+ address[b] = nouveau_scratch_data(&nvc0->base, vb->user_buffer,
+ base, size, &bo);
+ if (bo)
+ BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo);
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size);
+ }
+
+ BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5);
+ PUSH_DATA (push, i);
+ PUSH_DATAh(push, address[b] + base + size - 1);
+ PUSH_DATA (push, address[b] + base + size - 1);
+ PUSH_DATAh(push, address[b] + ve->src_offset);
+ PUSH_DATA (push, address[b] + ve->src_offset);
+ }
+ nvc0->base.vbo_dirty = TRUE;
+}
+
+static void
+nvc0_update_user_vbufs_shared(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ uint32_t mask = nvc0->vbo_user & ~nvc0->constant_vbos;
+
+ PUSH_SPACE(push, nvc0->num_vtxbufs * 8);
+ while (mask) {
+ struct nouveau_bo *bo;
+ const uint32_t bo_flags = NOUVEAU_BO_RD | NOUVEAU_BO_GART;
+ uint64_t address;
+ uint32_t base, size;
+ const int b = ffs(mask) - 1;
+ mask &= ~(1 << b);
+
+ nvc0_user_vbuf_range(nvc0, b, &base, &size);
+
+ address = nouveau_scratch_data(&nvc0->base, nvc0->vtxbuf[b].user_buffer,
+ base, size, &bo);
+ if (bo)
+ BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, bo_flags, bo);
+
+ BEGIN_1IC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_SELECT), 5);
+ PUSH_DATA (push, b);
+ PUSH_DATAh(push, address + base + size - 1);
+ PUSH_DATA (push, address + base + size - 1);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, user_buffer_upload_bytes, size);
+ }
+
+ mask = nvc0->state.constant_elts;
+ while (mask) {
+ int i = ffs(mask) - 1;
+ mask &= ~(1 << i);
+ nvc0_set_constant_vertex_attrib(nvc0, i);
+ }
+}
+
+static void
+nvc0_validate_vertex_buffers(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
+ uint32_t refd = 0;
+ unsigned i;
+
+ PUSH_SPACE(push, vertex->num_elements * 8);
+ for (i = 0; i < vertex->num_elements; ++i) {
+ const struct nvc0_vertex_element *ve;
+ const struct pipe_vertex_buffer *vb;
+ struct nv04_resource *res;
+ unsigned b;
+ unsigned limit, offset;
+
+ if (nvc0->state.constant_elts & (1 << i))
+ continue;
+ ve = &vertex->element[i];
+ b = ve->pipe.vertex_buffer_index;
+ vb = &nvc0->vtxbuf[b];
+
+ if (!vb->buffer) {
+ if (vb->stride) {
+ if (ve->pipe.instance_divisor) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_DIVISOR(i)), 1);
+ PUSH_DATA (push, ve->pipe.instance_divisor);
+ }
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 1);
+ PUSH_DATA (push, (1 << 12) | vb->stride);
+ }
+ /* address/value set in nvc0_update_user_vbufs */
+ continue;
+ }
+ res = nv04_resource(vb->buffer);
+ offset = ve->pipe.src_offset + vb->buffer_offset;
+ limit = vb->buffer->width0 - 1;
+
+ if (unlikely(ve->pipe.instance_divisor)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 4);
+ PUSH_DATA (push, (1 << 12) | vb->stride);
+ PUSH_DATAh(push, res->address + offset);
+ PUSH_DATA (push, res->address + offset);
+ PUSH_DATA (push, ve->pipe.instance_divisor);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 3);
+ PUSH_DATA (push, (1 << 12) | vb->stride);
+ PUSH_DATAh(push, res->address + offset);
+ PUSH_DATA (push, res->address + offset);
+ }
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(i)), 2);
+ PUSH_DATAh(push, res->address + limit);
+ PUSH_DATA (push, res->address + limit);
+
+ if (!(refd & (1 << b))) {
+ refd |= 1 << b;
+ BCTX_REFN(nvc0->bufctx_3d, VTX, res, RD);
+ }
+ }
+ if (nvc0->vbo_user)
+ nvc0_update_user_vbufs(nvc0);
+}
+
+static void
+nvc0_validate_vertex_buffers_shared(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned b;
+ const uint32_t mask = nvc0->vbo_user;
+
+ PUSH_SPACE(push, nvc0->num_vtxbufs * 8);
+ for (b = 0; b < nvc0->num_vtxbufs; ++b) {
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[b];
+ struct nv04_resource *buf;
+ uint32_t offset, limit;
+
+ if (mask & (1 << b)) {
+ if (vb->stride) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 1);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
+ }
+ /* address/value set in nvc0_update_user_vbufs_shared */
+ continue;
+ }
+ buf = nv04_resource(vb->buffer);
+ offset = vb->buffer_offset;
+ limit = buf->base.width0 - 1;
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(b)), 3);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | vb->stride);
+ PUSH_DATAh(push, buf->address + offset);
+ PUSH_DATA (push, buf->address + offset);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(b)), 2);
+ PUSH_DATAh(push, buf->address + limit);
+ PUSH_DATA (push, buf->address + limit);
+
+ BCTX_REFN(nvc0->bufctx_3d, VTX, buf, RD);
+ }
+ if (nvc0->vbo_user)
+ nvc0_update_user_vbufs_shared(nvc0);
+}
+
+void
+nvc0_vertex_arrays_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_vertex_stateobj *vertex = nvc0->vertex;
+ struct nvc0_vertex_element *ve;
+ uint32_t const_vbos;
+ unsigned i;
+ uint8_t vbo_mode;
+ boolean update_vertex;
+
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX);
+
+ if (unlikely(vertex->need_conversion) ||
+ unlikely(nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS)) {
+ vbo_mode = 3;
+ } else {
+ vbo_mode = (nvc0->vbo_user && nvc0->vbo_push_hint) ? 1 : 0;
+ }
+ const_vbos = vbo_mode ? 0 : nvc0->constant_vbos;
+
+ update_vertex = (nvc0->dirty & NVC0_NEW_VERTEX) ||
+ (const_vbos != nvc0->state.constant_vbos) ||
+ (vbo_mode != nvc0->state.vbo_mode);
+
+ if (update_vertex) {
+ const unsigned n = MAX2(vertex->num_elements, nvc0->state.num_vtxelts);
+
+ nvc0->state.constant_vbos = const_vbos;
+ nvc0->state.constant_elts = 0;
+ nvc0->state.num_vtxelts = vertex->num_elements;
+ nvc0->state.vbo_mode = vbo_mode;
+
+ if (unlikely(vbo_mode)) {
+ if (unlikely(nvc0->state.instance_elts & 3)) {
+ /* translate mode uses only 2 vertex buffers */
+ nvc0->state.instance_elts &= ~3;
+ PUSH_SPACE(push, 3);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(0)), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0);
+ }
+
+ PUSH_SPACE(push, n * 2 + 4);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
+ for (i = 0; i < vertex->num_elements; ++i)
+ PUSH_DATA(push, vertex->element[i].state_alt);
+ for (; i < n; ++i)
+ PUSH_DATA(push, NVC0_3D_VERTEX_ATTRIB_INACTIVE);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(0)), 1);
+ PUSH_DATA (push, (1 << 12) | vertex->size);
+ for (i = 1; i < n; ++i)
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
+ } else {
+ uint32_t *restrict data;
+
+ if (unlikely(vertex->instance_elts != nvc0->state.instance_elts)) {
+ nvc0->state.instance_elts = vertex->instance_elts;
+ assert(n); /* if (n == 0), both masks should be 0 */
+ PUSH_SPACE(push, 3);
+ BEGIN_NVC0(push, NVC0_3D(MACRO_VERTEX_ARRAY_PER_INSTANCE), 2);
+ PUSH_DATA (push, n);
+ PUSH_DATA (push, vertex->instance_elts);
+ }
+
+ PUSH_SPACE(push, n * 2 + 1);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(0)), n);
+ data = push->cur;
+ push->cur += n;
+ for (i = 0; i < vertex->num_elements; ++i) {
+ ve = &vertex->element[i];
+ data[i] = ve->state;
+ if (unlikely(const_vbos & (1 << ve->pipe.vertex_buffer_index))) {
+ nvc0->state.constant_elts |= 1 << i;
+ data[i] |= NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST;
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
+ }
+ }
+ for (; i < n; ++i) {
+ data[i] = NVC0_3D_VERTEX_ATTRIB_INACTIVE;
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(i)), 0);
+ }
+ }
+ }
+ if (nvc0->state.vbo_mode) /* using translate, don't set up arrays here */
+ return;
+
+ if (vertex->shared_slots)
+ nvc0_validate_vertex_buffers_shared(nvc0);
+ else
+ nvc0_validate_vertex_buffers(nvc0);
+}
+
+void
+nvc0_idxbuf_validate(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv04_resource *buf = nv04_resource(nvc0->idxbuf.buffer);
+
+ assert(buf);
+ assert(nouveau_resource_mapped_by_gpu(&buf->base));
+
+ PUSH_SPACE(push, 6);
+ BEGIN_NVC0(push, NVC0_3D(INDEX_ARRAY_START_HIGH), 5);
+ PUSH_DATAh(push, buf->address + nvc0->idxbuf.offset);
+ PUSH_DATA (push, buf->address + nvc0->idxbuf.offset);
+ PUSH_DATAh(push, buf->address + buf->base.width0 - 1);
+ PUSH_DATA (push, buf->address + buf->base.width0 - 1);
+ PUSH_DATA (push, nvc0->idxbuf.index_size >> 1);
+
+ BCTX_REFN(nvc0->bufctx_3d, IDX, buf, RD);
+}
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ }
+}
+
+static void
+nvc0_draw_vbo_kick_notify(struct nouveau_pushbuf *push)
+{
+ struct nvc0_screen *screen = push->user_priv;
+
+ nouveau_fence_update(&screen->base, TRUE);
+
+ NOUVEAU_DRV_STAT(&screen->base, pushbuf_count, 1);
+}
+
+static void
+nvc0_draw_arrays(struct nvc0_context *nvc0,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned prim;
+
+ if (nvc0->state.index_bias) {
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 0);
+ nvc0->state.index_bias = 0;
+ }
+
+ prim = nvc0_prim_gl(mode);
+
+ while (instance_count--) {
+ PUSH_SPACE(push, 6);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, prim);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, start);
+ PUSH_DATA (push, count);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
+
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_array, 1);
+}
+
+static void
+nvc0_draw_elements_inline_u08(struct nouveau_pushbuf *push, const uint8_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 3) {
+ unsigned i;
+ PUSH_SPACE(push, 4);
+ BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U32), count & 3);
+ for (i = 0; i < (count & 3); ++i)
+ PUSH_DATA(push, *map++);
+ count &= ~3;
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 4) / 4;
+
+ PUSH_SPACE(push, nr + 1);
+ BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U8), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push,
+ (map[3] << 24) | (map[2] << 16) | (map[1] << 8) | map[0]);
+ map += 4;
+ }
+ count -= nr * 4;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u16(struct nouveau_pushbuf *push, const uint16_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count &= ~1;
+ PUSH_SPACE(push, 2);
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ PUSH_SPACE(push, nr + 1);
+ BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u32(struct nouveau_pushbuf *push, const uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ while (count) {
+ const unsigned nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN);
+
+ PUSH_SPACE(push, nr + 1);
+ BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U32), nr);
+ PUSH_DATAp(push, map, nr);
+
+ map += nr;
+ count -= nr;
+ }
+}
+
+static void
+nvc0_draw_elements_inline_u32_short(struct nouveau_pushbuf *push,
+ const uint32_t *map,
+ unsigned start, unsigned count)
+{
+ map += start;
+
+ if (count & 1) {
+ count--;
+ PUSH_SPACE(push, 1);
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, *map++);
+ }
+ while (count) {
+ unsigned i, nr = MIN2(count, NV04_PFIFO_MAX_PACKET_LEN * 2) / 2;
+
+ PUSH_SPACE(push, nr + 1);
+ BEGIN_NIC0(push, NVC0_3D(VB_ELEMENT_U16), nr);
+ for (i = 0; i < nr; ++i) {
+ PUSH_DATA(push, (map[1] << 16) | map[0]);
+ map += 2;
+ }
+ count -= nr * 2;
+ }
+}
+
+static void
+nvc0_draw_elements(struct nvc0_context *nvc0, boolean shorten,
+ unsigned mode, unsigned start, unsigned count,
+ unsigned instance_count, int32_t index_bias)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ unsigned prim;
+ const unsigned index_size = nvc0->idxbuf.index_size;
+
+ prim = nvc0_prim_gl(mode);
+
+ if (index_bias != nvc0->state.index_bias) {
+ PUSH_SPACE(push, 2);
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_BASE), 1);
+ PUSH_DATA (push, index_bias);
+ nvc0->state.index_bias = index_bias;
+ }
+
+ if (nvc0->idxbuf.buffer) {
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), prim);
+ do {
+ PUSH_SPACE(push, 7);
+ BEGIN_NVC0(push, NVC0_3D(INDEX_BATCH_FIRST), 2);
+ PUSH_DATA (push, start);
+ PUSH_DATA (push, count);
+ if (--instance_count) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_END_GL), 2);
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, prim | NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT);
+ }
+ } while (instance_count);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
+ } else {
+ const void *data = nvc0->idxbuf.user_buffer;
+
+ while (instance_count--) {
+ PUSH_SPACE(push, 2);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, prim);
+ switch (index_size) {
+ case 1:
+ nvc0_draw_elements_inline_u08(push, data, start, count);
+ break;
+ case 2:
+ nvc0_draw_elements_inline_u16(push, data, start, count);
+ break;
+ case 4:
+ if (shorten)
+ nvc0_draw_elements_inline_u32_short(push, data, start, count);
+ else
+ nvc0_draw_elements_inline_u32(push, data, start, count);
+ break;
+ default:
+ assert(0);
+ return;
+ }
+ PUSH_SPACE(push, 1);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
+
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+ }
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_indexed, 1);
+}
+
+static void
+nvc0_draw_stream_output(struct nvc0_context *nvc0,
+ const struct pipe_draw_info *info)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_so_target *so = nvc0_so_target(info->count_from_stream_output);
+ struct nv04_resource *res = nv04_resource(so->pipe.buffer);
+ unsigned mode = nvc0_prim_gl(info->mode);
+ unsigned num_instances = info->instance_count;
+
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ PUSH_SPACE(push, 2);
+ IMMED_NVC0(push, NVC0_3D(SERIALIZE), 0);
+ nvc0_query_fifo_wait(push, so->pq);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, gpu_serialize_count, 1);
+ }
+
+ while (num_instances--) {
+ PUSH_SPACE(push, 8);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (push, mode);
+ BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BASE), 1);
+ PUSH_DATA (push, 0);
+ BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_STRIDE), 1);
+ PUSH_DATA (push, so->stride);
+ BEGIN_NVC0(push, NVC0_3D(DRAW_TFB_BYTES), 1);
+ nvc0_query_pushbuf_submit(push, so->pq, 0x4);
+ IMMED_NVC0(push, NVC0_3D(VERTEX_END_GL), 0);
+
+ mode |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ }
+}
+
+void
+nvc0_draw_vbo(struct pipe_context *pipe, const struct pipe_draw_info *info)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+
+ /* NOTE: caller must ensure that (min_index + index_bias) is >= 0 */
+ nvc0->vb_elt_first = info->min_index + info->index_bias;
+ nvc0->vb_elt_limit = info->max_index - info->min_index;
+ nvc0->instance_off = info->start_instance;
+ nvc0->instance_max = info->instance_count - 1;
+
+ /* For picking only a few vertices from a large user buffer, push is better,
+ * if index count is larger and we expect repeated vertices, suggest upload.
+ */
+ nvc0->vbo_push_hint =
+ info->indexed && (nvc0->vb_elt_limit >= (info->count * 2));
+
+ /* Check whether we want to switch vertex-submission mode. */
+ if (nvc0->vbo_user && !(nvc0->dirty & (NVC0_NEW_ARRAYS | NVC0_NEW_VERTEX))) {
+ if (nvc0->vbo_push_hint != !!nvc0->state.vbo_mode)
+ if (nvc0->state.vbo_mode != 3)
+ nvc0->dirty |= NVC0_NEW_ARRAYS;
+
+ if (!(nvc0->dirty & NVC0_NEW_ARRAYS) && nvc0->state.vbo_mode == 0) {
+ if (nvc0->vertex->shared_slots)
+ nvc0_update_user_vbufs_shared(nvc0);
+ else
+ nvc0_update_user_vbufs(nvc0);
+ }
+ }
+
+ /* 8 as minimum to avoid immediate double validation of new buffers */
+ nvc0_state_validate(nvc0, ~0, 8);
+
+ push->kick_notify = nvc0_draw_vbo_kick_notify;
+
+ if (nvc0->state.vbo_mode) {
+ nvc0_push_vbo(nvc0, info);
+ push->kick_notify = nvc0_default_kick_notify;
+ return;
+ }
+
+ /* space for base instance, flush, and prim restart */
+ PUSH_SPACE(push, 8);
+
+ if (nvc0->state.instance_base != info->start_instance) {
+ nvc0->state.instance_base = info->start_instance;
+ /* NOTE: this does not affect the shader input, should it ? */
+ BEGIN_NVC0(push, NVC0_3D(VB_INSTANCE_BASE), 1);
+ PUSH_DATA (push, info->start_instance);
+ }
+
+ if (nvc0->base.vbo_dirty) {
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
+ nvc0->base.vbo_dirty = FALSE;
+ }
+
+ if (info->indexed) {
+ boolean shorten = info->max_index <= 65535;
+
+ if (info->primitive_restart != nvc0->state.prim_restart) {
+ if (info->primitive_restart) {
+ BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ } else {
+ IMMED_NVC0(push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
+ }
+ nvc0->state.prim_restart = info->primitive_restart;
+ } else
+ if (info->primitive_restart) {
+ BEGIN_NVC0(push, NVC0_3D(PRIM_RESTART_INDEX), 1);
+ PUSH_DATA (push, info->restart_index);
+
+ if (info->restart_index > 65535)
+ shorten = FALSE;
+ }
+
+ nvc0_draw_elements(nvc0, shorten,
+ info->mode, info->start, info->count,
+ info->instance_count, info->index_bias);
+ } else
+ if (unlikely(info->count_from_stream_output)) {
+ nvc0_draw_stream_output(nvc0, info);
+ } else {
+ nvc0_draw_arrays(nvc0,
+ info->mode, info->start, info->count,
+ info->instance_count);
+ }
+ push->kick_notify = nvc0_default_kick_notify;
+
+ nvc0_release_user_vbufs(nvc0);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
new file mode 100644
index 0000000..51e751c
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_vbo_translate.c
@@ -0,0 +1,649 @@
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "util/u_inlines.h"
+#include "util/u_format.h"
+#include "translate/translate.h"
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_resource.h"
+
+#include "nvc0/nvc0_3d.xml.h"
+
+struct push_context {
+ struct nouveau_pushbuf *push;
+
+ struct translate *translate;
+ void *dest;
+ const void *idxbuf;
+
+ uint32_t vertex_size;
+ uint32_t restart_index;
+ uint32_t instance_id;
+
+ boolean prim_restart;
+ boolean need_vertex_id;
+
+ struct {
+ boolean enabled;
+ boolean value;
+ unsigned stride;
+ const uint8_t *data;
+ } edgeflag;
+};
+
+static void nvc0_push_upload_vertex_ids(struct push_context *,
+ struct nvc0_context *,
+ const struct pipe_draw_info *);
+
+static void
+nvc0_push_context_init(struct nvc0_context *nvc0, struct push_context *ctx)
+{
+ ctx->push = nvc0->base.pushbuf;
+
+ ctx->translate = nvc0->vertex->translate;
+ ctx->vertex_size = nvc0->vertex->size;
+
+ ctx->need_vertex_id =
+ nvc0->vertprog->vp.need_vertex_id && (nvc0->vertex->num_elements < 32);
+
+ ctx->edgeflag.value = TRUE;
+ ctx->edgeflag.enabled = nvc0->vertprog->vp.edgeflag < PIPE_MAX_ATTRIBS;
+
+ /* silence warnings */
+ ctx->edgeflag.data = NULL;
+ ctx->edgeflag.stride = 0;
+}
+
+static INLINE void
+nvc0_vertex_configure_translate(struct nvc0_context *nvc0, int32_t index_bias)
+{
+ struct translate *translate = nvc0->vertex->translate;
+ unsigned i;
+
+ for (i = 0; i < nvc0->num_vtxbufs; ++i) {
+ const uint8_t *map;
+ const struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[i];
+
+ if (likely(!vb->buffer))
+ map = (const uint8_t *)vb->user_buffer;
+ else
+ map = nouveau_resource_map_offset(&nvc0->base,
+ nv04_resource(vb->buffer), vb->buffer_offset, NOUVEAU_BO_RD);
+
+ if (index_bias && !unlikely(nvc0->vertex->instance_bufs & (1 << i)))
+ map += (intptr_t)index_bias * vb->stride;
+
+ translate->set_buffer(translate, i, map, vb->stride, ~0);
+ }
+}
+
+static INLINE void
+nvc0_push_map_idxbuf(struct push_context *ctx, struct nvc0_context *nvc0)
+{
+ if (nvc0->idxbuf.buffer) {
+ struct nv04_resource *buf = nv04_resource(nvc0->idxbuf.buffer);
+ ctx->idxbuf = nouveau_resource_map_offset(&nvc0->base,
+ buf, nvc0->idxbuf.offset, NOUVEAU_BO_RD);
+ } else {
+ ctx->idxbuf = nvc0->idxbuf.user_buffer;
+ }
+}
+
+static INLINE void
+nvc0_push_map_edgeflag(struct push_context *ctx, struct nvc0_context *nvc0,
+ int32_t index_bias)
+{
+ unsigned attr = nvc0->vertprog->vp.edgeflag;
+ struct pipe_vertex_element *ve = &nvc0->vertex->element[attr].pipe;
+ struct pipe_vertex_buffer *vb = &nvc0->vtxbuf[ve->vertex_buffer_index];
+ struct nv04_resource *buf = nv04_resource(vb->buffer);
+ unsigned offset = vb->buffer_offset + ve->src_offset;
+
+ ctx->edgeflag.stride = vb->stride;
+ ctx->edgeflag.data = nouveau_resource_map_offset(&nvc0->base,
+ buf, offset, NOUVEAU_BO_RD);
+ if (index_bias)
+ ctx->edgeflag.data += (intptr_t)index_bias * vb->stride;
+}
+
+static INLINE unsigned
+prim_restart_search_i08(const uint8_t *elts, unsigned push, uint8_t index)
+{
+ unsigned i;
+ for (i = 0; i < push && elts[i] != index; ++i);
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i16(const uint16_t *elts, unsigned push, uint16_t index)
+{
+ unsigned i;
+ for (i = 0; i < push && elts[i] != index; ++i);
+ return i;
+}
+
+static INLINE unsigned
+prim_restart_search_i32(const uint32_t *elts, unsigned push, uint32_t index)
+{
+ unsigned i;
+ for (i = 0; i < push && elts[i] != index; ++i);
+ return i;
+}
+
+static INLINE boolean
+ef_value(const struct push_context *ctx, uint32_t index)
+{
+ float *pf = (float *)&ctx->edgeflag.data[index * ctx->edgeflag.stride];
+ return *pf ? TRUE : FALSE;
+}
+
+static INLINE boolean
+ef_toggle(struct push_context *ctx)
+{
+ ctx->edgeflag.value = !ctx->edgeflag.value;
+ return ctx->edgeflag.value;
+}
+
+static INLINE unsigned
+ef_toggle_search_i08(struct push_context *ctx, const uint8_t *elts, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE unsigned
+ef_toggle_search_i16(struct push_context *ctx, const uint16_t *elts, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE unsigned
+ef_toggle_search_i32(struct push_context *ctx, const uint32_t *elts, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, elts[i]) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE unsigned
+ef_toggle_search_seq(struct push_context *ctx, unsigned start, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n && ef_value(ctx, start++) == ctx->edgeflag.value; ++i);
+ return i;
+}
+
+static INLINE void *
+nvc0_push_setup_vertex_array(struct nvc0_context *nvc0, const unsigned count)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nouveau_bo *bo;
+ uint64_t va;
+ const unsigned size = count * nvc0->vertex->size;
+
+ void *const dest = nouveau_scratch_get(&nvc0->base, size, &va, &bo);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_START_HIGH(0)), 2);
+ PUSH_DATAh(push, va);
+ PUSH_DATA (push, va);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(0)), 2);
+ PUSH_DATAh(push, va + size - 1);
+ PUSH_DATA (push, va + size - 1);
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ bo);
+ nouveau_pushbuf_validate(push);
+
+ return dest;
+}
+
+static void
+disp_vertices_i08(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ const uint8_t *restrict elts = (uint8_t *)ctx->idxbuf + start;
+ unsigned pos = 0;
+
+ do {
+ unsigned nR = count;
+
+ if (unlikely(ctx->prim_restart))
+ nR = prim_restart_search_i08(elts, nR, ctx->restart_index);
+
+ translate->run_elts8(translate, elts, nR, 0, ctx->instance_id, ctx->dest);
+ count -= nR;
+ ctx->dest += nR * ctx->vertex_size;
+
+ while (nR) {
+ unsigned nE = nR;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nE = ef_toggle_search_i08(ctx, elts, nR);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nE >= 2)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nE);
+ } else
+ if (nE) {
+ if (pos <= 0xff) {
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, pos);
+ }
+ }
+ if (unlikely(nE != nR))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nE;
+ elts += nE;
+ nR -= nE;
+ }
+ if (count) {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, ctx->restart_index);
+ ++elts;
+ ctx->dest += ctx->vertex_size;
+ ++pos;
+ --count;
+ }
+ } while (count);
+}
+
+static void
+disp_vertices_i16(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ const uint16_t *restrict elts = (uint16_t *)ctx->idxbuf + start;
+ unsigned pos = 0;
+
+ do {
+ unsigned nR = count;
+
+ if (unlikely(ctx->prim_restart))
+ nR = prim_restart_search_i16(elts, nR, ctx->restart_index);
+
+ translate->run_elts16(translate, elts, nR, 0, ctx->instance_id, ctx->dest);
+ count -= nR;
+ ctx->dest += nR * ctx->vertex_size;
+
+ while (nR) {
+ unsigned nE = nR;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nE = ef_toggle_search_i16(ctx, elts, nR);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nE >= 2)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nE);
+ } else
+ if (nE) {
+ if (pos <= 0xff) {
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, pos);
+ }
+ }
+ if (unlikely(nE != nR))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nE;
+ elts += nE;
+ nR -= nE;
+ }
+ if (count) {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, ctx->restart_index);
+ ++elts;
+ ctx->dest += ctx->vertex_size;
+ ++pos;
+ --count;
+ }
+ } while (count);
+}
+
+static void
+disp_vertices_i32(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ const uint32_t *restrict elts = (uint32_t *)ctx->idxbuf + start;
+ unsigned pos = 0;
+
+ do {
+ unsigned nR = count;
+
+ if (unlikely(ctx->prim_restart))
+ nR = prim_restart_search_i32(elts, nR, ctx->restart_index);
+
+ translate->run_elts(translate, elts, nR, 0, ctx->instance_id, ctx->dest);
+ count -= nR;
+ ctx->dest += nR * ctx->vertex_size;
+
+ while (nR) {
+ unsigned nE = nR;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nE = ef_toggle_search_i32(ctx, elts, nR);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nE >= 2)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nE);
+ } else
+ if (nE) {
+ if (pos <= 0xff) {
+ IMMED_NVC0(push, NVC0_3D(VB_ELEMENT_U32), pos);
+ } else {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, pos);
+ }
+ }
+ if (unlikely(nE != nR))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nE;
+ elts += nE;
+ nR -= nE;
+ }
+ if (count) {
+ BEGIN_NVC0(push, NVC0_3D(VB_ELEMENT_U32), 1);
+ PUSH_DATA (push, ctx->restart_index);
+ ++elts;
+ ctx->dest += ctx->vertex_size;
+ ++pos;
+ --count;
+ }
+ } while (count);
+}
+
+static void
+disp_vertices_seq(struct push_context *ctx, unsigned start, unsigned count)
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct translate *translate = ctx->translate;
+ unsigned pos = 0;
+
+ translate->run(translate, start, count, 0, ctx->instance_id, ctx->dest);
+ do {
+ unsigned nr = count;
+
+ if (unlikely(ctx->edgeflag.enabled))
+ nr = ef_toggle_search_seq(ctx, start + pos, nr);
+
+ PUSH_SPACE(push, 4);
+ if (likely(nr)) {
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_BUFFER_FIRST), 2);
+ PUSH_DATA (push, pos);
+ PUSH_DATA (push, nr);
+ }
+ if (unlikely(nr != count))
+ IMMED_NVC0(push, NVC0_3D(EDGEFLAG), ef_toggle(ctx));
+
+ pos += nr;
+ count -= nr;
+ } while (count);
+}
+
+
+#define NVC0_PRIM_GL_CASE(n) \
+ case PIPE_PRIM_##n: return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_##n
+
+static INLINE unsigned
+nvc0_prim_gl(unsigned prim)
+{
+ switch (prim) {
+ NVC0_PRIM_GL_CASE(POINTS);
+ NVC0_PRIM_GL_CASE(LINES);
+ NVC0_PRIM_GL_CASE(LINE_LOOP);
+ NVC0_PRIM_GL_CASE(LINE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLES);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP);
+ NVC0_PRIM_GL_CASE(TRIANGLE_FAN);
+ NVC0_PRIM_GL_CASE(QUADS);
+ NVC0_PRIM_GL_CASE(QUAD_STRIP);
+ NVC0_PRIM_GL_CASE(POLYGON);
+ NVC0_PRIM_GL_CASE(LINES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(LINE_STRIP_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLES_ADJACENCY);
+ NVC0_PRIM_GL_CASE(TRIANGLE_STRIP_ADJACENCY);
+ /*
+ NVC0_PRIM_GL_CASE(PATCHES); */
+ default:
+ return NVC0_3D_VERTEX_BEGIN_GL_PRIMITIVE_POINTS;
+ }
+}
+
+void
+nvc0_push_vbo(struct nvc0_context *nvc0, const struct pipe_draw_info *info)
+{
+ struct push_context ctx;
+ unsigned i, index_size;
+ unsigned inst_count = info->instance_count;
+ unsigned vert_count = info->count;
+ unsigned prim;
+
+ nvc0_push_context_init(nvc0, &ctx);
+
+ nvc0_vertex_configure_translate(nvc0, info->index_bias);
+
+ if (unlikely(ctx.edgeflag.enabled))
+ nvc0_push_map_edgeflag(&ctx, nvc0, info->index_bias);
+
+ ctx.prim_restart = info->primitive_restart;
+ ctx.restart_index = info->restart_index;
+
+ if (info->indexed) {
+ nvc0_push_map_idxbuf(&ctx, nvc0);
+ index_size = nvc0->idxbuf.index_size;
+
+ if (info->primitive_restart) {
+ BEGIN_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 2);
+ PUSH_DATA (ctx.push, 1);
+ PUSH_DATA (ctx.push, info->restart_index);
+ } else
+ if (nvc0->state.prim_restart) {
+ IMMED_NVC0(ctx.push, NVC0_3D(PRIM_RESTART_ENABLE), 0);
+ }
+ nvc0->state.prim_restart = info->primitive_restart;
+ } else {
+ if (unlikely(info->count_from_stream_output)) {
+ struct pipe_context *pipe = &nvc0->base.pipe;
+ struct nvc0_so_target *targ;
+ targ = nvc0_so_target(info->count_from_stream_output);
+ pipe->get_query_result(pipe, targ->pq, TRUE, (void *)&vert_count);
+ vert_count /= targ->stride;
+ }
+ ctx.idxbuf = NULL; /* shut up warnings */
+ index_size = 0;
+ }
+
+ ctx.instance_id = info->start_instance;
+
+ prim = nvc0_prim_gl(info->mode);
+ do {
+ PUSH_SPACE(ctx.push, 9);
+
+ ctx.dest = nvc0_push_setup_vertex_array(nvc0, vert_count);
+ if (unlikely(!ctx.dest))
+ break;
+
+ if (unlikely(ctx.need_vertex_id))
+ nvc0_push_upload_vertex_ids(&ctx, nvc0, info);
+
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FLUSH), 0);
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_BEGIN_GL), 1);
+ PUSH_DATA (ctx.push, prim);
+ switch (index_size) {
+ case 1:
+ disp_vertices_i08(&ctx, info->start, vert_count);
+ break;
+ case 2:
+ disp_vertices_i16(&ctx, info->start, vert_count);
+ break;
+ case 4:
+ disp_vertices_i32(&ctx, info->start, vert_count);
+ break;
+ default:
+ assert(index_size == 0);
+ disp_vertices_seq(&ctx, info->start, vert_count);
+ break;
+ }
+ PUSH_SPACE(ctx.push, 1);
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_END_GL), 0);
+
+ if (--inst_count) {
+ prim |= NVC0_3D_VERTEX_BEGIN_GL_INSTANCE_NEXT;
+ ++ctx.instance_id;
+ }
+ nouveau_bufctx_reset(nvc0->bufctx_3d, NVC0_BIND_VTX_TMP);
+ nouveau_scratch_done(&nvc0->base);
+ } while (inst_count);
+
+
+ /* reset state and unmap buffers (no-op) */
+
+ if (unlikely(!ctx.edgeflag.value)) {
+ PUSH_SPACE(ctx.push, 1);
+ IMMED_NVC0(ctx.push, NVC0_3D(EDGEFLAG), 1);
+ }
+
+ if (unlikely(ctx.need_vertex_id)) {
+ PUSH_SPACE(ctx.push, 4);
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ID_REPLACE), 0);
+ BEGIN_NVC0(ctx.push, NVC0_3D(VERTEX_ATTRIB_FORMAT(1)), 1);
+ PUSH_DATA (ctx.push,
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_CONST |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_FLOAT |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32);
+ IMMED_NVC0(ctx.push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 0);
+ }
+
+ if (info->indexed)
+ nouveau_resource_unmap(nv04_resource(nvc0->idxbuf.buffer));
+ for (i = 0; i < nvc0->num_vtxbufs; ++i)
+ nouveau_resource_unmap(nv04_resource(nvc0->vtxbuf[i].buffer));
+
+ NOUVEAU_DRV_STAT(&nvc0->screen->base, draw_calls_fallback_count, 1);
+}
+
+static INLINE void
+copy_indices_u8(uint32_t *dst, const uint8_t *elts, uint32_t bias, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n; ++i)
+ dst[i] = elts[i] + bias;
+}
+
+static INLINE void
+copy_indices_u16(uint32_t *dst, const uint16_t *elts, uint32_t bias, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n; ++i)
+ dst[i] = elts[i] + bias;
+}
+
+static INLINE void
+copy_indices_u32(uint32_t *dst, const uint32_t *elts, uint32_t bias, unsigned n)
+{
+ unsigned i;
+ for (i = 0; i < n; ++i)
+ dst[i] = elts[i] + bias;
+}
+
+static void
+nvc0_push_upload_vertex_ids(struct push_context *ctx,
+ struct nvc0_context *nvc0,
+ const struct pipe_draw_info *info)
+
+{
+ struct nouveau_pushbuf *push = ctx->push;
+ struct nouveau_bo *bo;
+ uint64_t va;
+ uint32_t *data;
+ uint32_t format;
+ unsigned index_size = nvc0->idxbuf.index_size;
+ unsigned i;
+ unsigned a = nvc0->vertex->num_elements;
+
+ if (!index_size || info->index_bias)
+ index_size = 4;
+ data = (uint32_t *)nouveau_scratch_get(&nvc0->base,
+ info->count * index_size, &va, &bo);
+
+ BCTX_REFN_bo(nvc0->bufctx_3d, VTX_TMP, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ bo);
+ nouveau_pushbuf_validate(push);
+
+ if (info->indexed) {
+ if (!info->index_bias) {
+ memcpy(data, ctx->idxbuf, info->count * index_size);
+ } else {
+ switch (nvc0->idxbuf.index_size) {
+ case 1:
+ copy_indices_u8(data, ctx->idxbuf, info->index_bias, info->count);
+ break;
+ case 2:
+ copy_indices_u16(data, ctx->idxbuf, info->index_bias, info->count);
+ break;
+ default:
+ copy_indices_u32(data, ctx->idxbuf, info->index_bias, info->count);
+ break;
+ }
+ }
+ } else {
+ for (i = 0; i < info->count; ++i)
+ data[i] = i + (info->start + info->index_bias);
+ }
+
+ format = (1 << NVC0_3D_VERTEX_ATTRIB_FORMAT_BUFFER__SHIFT) |
+ NVC0_3D_VERTEX_ATTRIB_FORMAT_TYPE_UINT;
+
+ switch (index_size) {
+ case 1:
+ format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_8;
+ break;
+ case 2:
+ format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_16;
+ break;
+ default:
+ format |= NVC0_3D_VERTEX_ATTRIB_FORMAT_SIZE_32;
+ break;
+ }
+
+ PUSH_SPACE(push, 12);
+
+ if (unlikely(nvc0->state.instance_elts & 2)) {
+ nvc0->state.instance_elts &= ~2;
+ IMMED_NVC0(push, NVC0_3D(VERTEX_ARRAY_PER_INSTANCE(1)), 0);
+ }
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ATTRIB_FORMAT(a)), 1);
+ PUSH_DATA (push, format);
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_FETCH(1)), 3);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ARRAY_FETCH_ENABLE | index_size);
+ PUSH_DATAh(push, va);
+ PUSH_DATA (push, va);
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ARRAY_LIMIT_HIGH(1)), 2);
+ PUSH_DATAh(push, va + info->count * index_size - 1);
+ PUSH_DATA (push, va + info->count * index_size - 1);
+
+#define NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) \
+ (((0x80 + (a) * 0x10) / 4) << NVC0_3D_VERTEX_ID_REPLACE_SOURCE__SHIFT)
+
+ BEGIN_NVC0(push, NVC0_3D(VERTEX_ID_REPLACE), 1);
+ PUSH_DATA (push, NVC0_3D_VERTEX_ID_REPLACE_SOURCE_ATTR_X(a) | 1);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
new file mode 100644
index 0000000..5871f59
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.c
@@ -0,0 +1,331 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_video.h"
+
+#include "util/u_sampler.h"
+#include "util/u_format.h"
+
+static void
+nvc0_decoder_decode_bitstream(struct pipe_video_codec *decoder,
+ struct pipe_video_buffer *video_target,
+ struct pipe_picture_desc *picture,
+ unsigned num_buffers,
+ const void *const *data,
+ const unsigned *num_bytes)
+{
+ struct nouveau_vp3_decoder *dec = (struct nouveau_vp3_decoder *)decoder;
+ struct nouveau_vp3_video_buffer *target = (struct nouveau_vp3_video_buffer *)video_target;
+ uint32_t comm_seq = ++dec->fence_seq;
+ union pipe_desc desc;
+
+ unsigned vp_caps, is_ref, ret;
+ struct nouveau_vp3_video_buffer *refs[16] = {};
+
+ desc.base = picture;
+
+ assert(target->base.buffer_format == PIPE_FORMAT_NV12);
+
+ ret = nvc0_decoder_bsp(dec, desc, target, comm_seq,
+ num_buffers, data, num_bytes,
+ &vp_caps, &is_ref, refs);
+
+ /* did we decode bitstream correctly? */
+ assert(ret == 2);
+
+ nvc0_decoder_vp(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
+ nvc0_decoder_ppp(dec, desc, target, comm_seq);
+}
+
+struct pipe_video_codec *
+nvc0_create_decoder(struct pipe_context *context,
+ const struct pipe_video_codec *templ)
+{
+ struct nouveau_screen *screen = &((struct nvc0_context *)context)->screen->base;
+ struct nouveau_vp3_decoder *dec;
+ struct nouveau_pushbuf **push;
+ union nouveau_bo_config cfg;
+ bool kepler = screen->device->chipset >= 0xe0;
+
+ cfg.nvc0.tile_mode = 0x10;
+ cfg.nvc0.memtype = 0xfe;
+
+ int ret, i;
+ uint32_t codec = 1, ppp_codec = 3;
+ uint32_t timeout;
+ u32 tmp_size = 0;
+
+ if (getenv("XVMC_VL"))
+ return vl_create_decoder(context, templ);
+
+ if (templ->entrypoint != PIPE_VIDEO_ENTRYPOINT_BITSTREAM) {
+ debug_printf("%x\n", templ->entrypoint);
+ return NULL;
+ }
+
+ dec = CALLOC_STRUCT(nouveau_vp3_decoder);
+ if (!dec)
+ return NULL;
+ dec->client = screen->client;
+ dec->base = *templ;
+ nouveau_vp3_decoder_init_common(&dec->base);
+
+ if (!kepler) {
+ dec->bsp_idx = 5;
+ dec->vp_idx = 6;
+ dec->ppp_idx = 7;
+ } else {
+ dec->bsp_idx = 2;
+ dec->vp_idx = 2;
+ dec->ppp_idx = 2;
+ }
+
+ for (i = 0; i < 3; ++i)
+ if (i && !kepler) {
+ dec->channel[i] = dec->channel[0];
+ dec->pushbuf[i] = dec->pushbuf[0];
+ } else {
+ void *data;
+ u32 size;
+ struct nvc0_fifo nvc0_args = {};
+ struct nve0_fifo nve0_args = {};
+
+ if (!kepler) {
+ size = sizeof(nvc0_args);
+ data = &nvc0_args;
+ } else {
+ unsigned engine[] = {
+ NVE0_FIFO_ENGINE_BSP,
+ NVE0_FIFO_ENGINE_VP,
+ NVE0_FIFO_ENGINE_PPP
+ };
+
+ nve0_args.engine = engine[i];
+ size = sizeof(nve0_args);
+ data = &nve0_args;
+ }
+
+ ret = nouveau_object_new(&screen->device->object, 0,
+ NOUVEAU_FIFO_CHANNEL_CLASS,
+ data, size, &dec->channel[i]);
+
+ if (!ret)
+ ret = nouveau_pushbuf_new(screen->client, dec->channel[i], 4,
+ 32 * 1024, true, &dec->pushbuf[i]);
+ if (ret)
+ break;
+ }
+ push = dec->pushbuf;
+
+ if (!kepler) {
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[0], 0x390b1, 0x90b1, NULL, 0, &dec->bsp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[1], 0x190b2, 0x90b2, NULL, 0, &dec->vp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[2], 0x290b3, 0x90b3, NULL, 0, &dec->ppp);
+ } else {
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[0], 0x95b1, 0x95b1, NULL, 0, &dec->bsp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[1], 0x95b2, 0x95b2, NULL, 0, &dec->vp);
+ if (!ret)
+ ret = nouveau_object_new(dec->channel[2], 0x90b3, 0x90b3, NULL, 0, &dec->ppp);
+ }
+ if (ret)
+ goto fail;
+
+ BEGIN_NVC0(push[0], SUBC_BSP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[0], dec->bsp->handle);
+
+ BEGIN_NVC0(push[1], SUBC_VP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[1], dec->vp->handle);
+
+ BEGIN_NVC0(push[2], SUBC_PPP(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push[2], dec->ppp->handle);
+
+ dec->base.context = context;
+ dec->base.decode_bitstream = nvc0_decoder_decode_bitstream;
+
+ for (i = 0; i < NOUVEAU_VP3_VIDEO_QDEPTH && !ret; ++i)
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0, 1 << 20, &cfg, &dec->bsp_bo[i]);
+ if (!ret)
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0x100, 4 << 20, &cfg, &dec->inter_bo[0]);
+ if (!ret) {
+ if (!kepler)
+ nouveau_bo_ref(dec->inter_bo[0], &dec->inter_bo[1]);
+ else
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM,
+ 0x100, dec->inter_bo[0]->size, &cfg,
+ &dec->inter_bo[1]);
+ }
+ if (ret)
+ goto fail;
+
+ switch (u_reduce_video_profile(templ->profile)) {
+ case PIPE_VIDEO_FORMAT_MPEG12: {
+ codec = 1;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4: {
+ codec = 4;
+ tmp_size = mb(templ->height)*16 * mb(templ->width)*16;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_VC1: {
+ ppp_codec = codec = 2;
+ tmp_size = mb(templ->height)*16 * mb(templ->width)*16;
+ assert(templ->max_references <= 2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: {
+ codec = 3;
+ dec->tmp_stride = 16 * mb_half(templ->width) * nouveau_vp3_video_align(templ->height) * 3 / 2;
+ tmp_size = dec->tmp_stride * (templ->max_references + 1);
+ assert(templ->max_references <= 16);
+ break;
+ }
+ default:
+ fprintf(stderr, "invalid codec\n");
+ goto fail;
+ }
+
+ if (screen->device->chipset < 0xd0) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ 0x4000, &cfg, &dec->fw_bo);
+ if (ret)
+ goto fail;
+
+ ret = nouveau_vp3_load_firmware(dec, templ->profile, screen->device->chipset);
+ if (ret)
+ goto fw_fail;
+ }
+
+ if (codec != 3) {
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ 0x400, &cfg, &dec->bitplane_bo);
+ if (ret)
+ goto fail;
+ }
+
+ dec->ref_stride = mb(templ->width)*16 * (mb_half(templ->height)*32 + nouveau_vp3_video_align(templ->height)/2);
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_VRAM, 0,
+ dec->ref_stride * (templ->max_references+2) + tmp_size,
+ &cfg, &dec->ref_bo);
+ if (ret)
+ goto fail;
+
+ timeout = 0;
+
+ BEGIN_NVC0(push[0], SUBC_BSP(0x200), 2);
+ PUSH_DATA (push[0], codec);
+ PUSH_DATA (push[0], timeout);
+
+ BEGIN_NVC0(push[1], SUBC_VP(0x200), 2);
+ PUSH_DATA (push[1], codec);
+ PUSH_DATA (push[1], timeout);
+
+ BEGIN_NVC0(push[2], SUBC_PPP(0x200), 2);
+ PUSH_DATA (push[2], ppp_codec);
+ PUSH_DATA (push[2], timeout);
+
+ ++dec->fence_seq;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ ret = nouveau_bo_new(screen->device, NOUVEAU_BO_GART|NOUVEAU_BO_MAP,
+ 0, 0x1000, NULL, &dec->fence_bo);
+ if (ret)
+ goto fail;
+
+ nouveau_bo_map(dec->fence_bo, NOUVEAU_BO_RDWR, screen->client);
+ dec->fence_map = dec->fence_bo->map;
+ dec->fence_map[0] = dec->fence_map[4] = dec->fence_map[8] = 0;
+ dec->comm = (struct comm *)(dec->fence_map + (COMM_OFFSET/sizeof(*dec->fence_map)));
+
+ /* So lets test if the fence is working? */
+ nouveau_pushbuf_space(push[0], 6, 1, 0);
+ PUSH_REFN (push[0], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NVC0(push[0], SUBC_BSP(0x240), 3);
+ PUSH_DATAh(push[0], dec->fence_bo->offset);
+ PUSH_DATA (push[0], dec->fence_bo->offset);
+ PUSH_DATA (push[0], dec->fence_seq);
+
+ BEGIN_NVC0(push[0], SUBC_BSP(0x304), 1);
+ PUSH_DATA (push[0], 0);
+ PUSH_KICK (push[0]);
+
+ nouveau_pushbuf_space(push[1], 6, 1, 0);
+ PUSH_REFN (push[1], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NVC0(push[1], SUBC_VP(0x240), 3);
+ PUSH_DATAh(push[1], (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push[1], (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push[1], dec->fence_seq);
+
+ BEGIN_NVC0(push[1], SUBC_VP(0x304), 1);
+ PUSH_DATA (push[1], 0);
+ PUSH_KICK (push[1]);
+
+ nouveau_pushbuf_space(push[2], 6, 1, 0);
+ PUSH_REFN (push[2], dec->fence_bo, NOUVEAU_BO_GART|NOUVEAU_BO_RDWR);
+ BEGIN_NVC0(push[2], SUBC_PPP(0x240), 3);
+ PUSH_DATAh(push[2], (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push[2], (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push[2], dec->fence_seq);
+
+ BEGIN_NVC0(push[2], SUBC_PPP(0x304), 1);
+ PUSH_DATA (push[2], 0);
+ PUSH_KICK (push[2]);
+
+ usleep(100);
+ while (dec->fence_seq > dec->fence_map[0] ||
+ dec->fence_seq > dec->fence_map[4] ||
+ dec->fence_seq > dec->fence_map[8]) {
+ debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]);
+ usleep(100);
+ }
+ debug_printf("%u: %u %u %u\n", dec->fence_seq, dec->fence_map[0], dec->fence_map[4], dec->fence_map[8]);
+#endif
+
+ return &dec->base;
+
+fw_fail:
+ debug_printf("Cannot create decoder without firmware..\n");
+ dec->base.destroy(&dec->base);
+ return NULL;
+
+fail:
+ debug_printf("Creation failed: %s (%i)\n", strerror(-ret), ret);
+ dec->base.destroy(&dec->base);
+ return NULL;
+}
+
+struct pipe_video_buffer *
+nvc0_video_buffer_create(struct pipe_context *pipe,
+ const struct pipe_video_buffer *templat)
+{
+ return nouveau_vp3_video_buffer_create(
+ pipe, templat, NVC0_RESOURCE_FLAG_VIDEO);
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video.h b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h
new file mode 100644
index 0000000..9ee0280
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video.h
@@ -0,0 +1,48 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_screen.h"
+#include "nouveau_vp3_video.h"
+
+#include "vl/vl_decoder.h"
+#include "vl/vl_types.h"
+
+#include "util/u_video.h"
+
+extern unsigned
+nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target,
+ unsigned comm_seq, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes,
+ unsigned *vp_caps, unsigned *is_ref,
+ struct nouveau_vp3_video_buffer *refs[16]);
+
+extern void
+nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq,
+ unsigned caps, unsigned is_ref,
+ struct nouveau_vp3_video_buffer *refs[16]);
+
+extern void
+nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq);
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
new file mode 100644
index 0000000..40696fa
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_bsp.c
@@ -0,0 +1,155 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_video.h"
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+static void dump_comm_bsp(struct comm *comm)
+{
+ unsigned idx = comm->bsp_cur_index & 0xf;
+ debug_printf("Cur seq: %x, bsp byte ofs: %x\n", comm->bsp_cur_index, comm->byte_ofs);
+ debug_printf("Status: %08x, pos: %08x\n", comm->status[idx], comm->pos[idx]);
+}
+#endif
+
+unsigned
+nvc0_decoder_bsp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target,
+ unsigned comm_seq, unsigned num_buffers,
+ const void *const *data, const unsigned *num_bytes,
+ unsigned *vp_caps, unsigned *is_ref,
+ struct nouveau_vp3_video_buffer *refs[16])
+{
+ struct nouveau_pushbuf *push = dec->pushbuf[0];
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ uint32_t bsp_addr, comm_addr, inter_addr;
+ uint32_t slice_size, bucket_size, ring_size;
+ uint32_t caps;
+ int ret;
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
+ unsigned fence_extra = 0;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+ { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ { dec->bitplane_bo, NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
+
+ if (!dec->bitplane_bo)
+ num_refs--;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ ret = nouveau_bo_map(bsp_bo, NOUVEAU_BO_WR, dec->client);
+ if (ret) {
+ debug_printf("map failed: %i %s\n", ret, strerror(-ret));
+ return -1;
+ }
+
+ caps = nouveau_vp3_bsp(dec, desc, target, comm_seq,
+ num_buffers, data, num_bytes);
+
+ nouveau_vp3_vp_caps(dec, desc, target, comm_seq, vp_caps, is_ref, refs);
+
+ nouveau_pushbuf_space(push, 6 + (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC ? 9 : 7) + fence_extra + 2, num_refs, 0);
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ bsp_addr = bsp_bo->offset >> 8;
+ inter_addr = inter_bo->offset >> 8;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ memset(dec->comm, 0, 0x200);
+ comm_addr = (dec->fence_bo->offset + COMM_OFFSET) >> 8;
+#else
+ comm_addr = bsp_addr + (COMM_OFFSET>>8);
+#endif
+
+ BEGIN_NVC0(push, SUBC_BSP(0x700), 5);
+ PUSH_DATA (push, caps); // 700 cmd
+ PUSH_DATA (push, bsp_addr + 1); // 704 strparm_bsp
+ PUSH_DATA (push, bsp_addr + 7); // 708 str addr
+ PUSH_DATA (push, comm_addr); // 70c comm
+ PUSH_DATA (push, comm_seq); // 710 seq
+
+ if (codec != PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ u32 bitplane_addr;
+
+ bitplane_addr = dec->bitplane_bo->offset >> 8;
+
+ nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
+ BEGIN_NVC0(push, SUBC_BSP(0x400), 6);
+ PUSH_DATA (push, bsp_addr); // 400 picparm addr
+ PUSH_DATA (push, inter_addr); // 404 interparm addr
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 408 interdata addr
+ PUSH_DATA (push, ring_size << 8); // 40c interdata_size
+ PUSH_DATA (push, bitplane_addr); // 410 BITPLANE_DATA
+ PUSH_DATA (push, 0x400); // 414 BITPLANE_DATA_SIZE
+ } else {
+ nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
+ BEGIN_NVC0(push, SUBC_BSP(0x400), 8);
+ PUSH_DATA (push, bsp_addr); // 400 picparm addr
+ PUSH_DATA (push, inter_addr); // 404 interparm addr
+ PUSH_DATA (push, slice_size << 8); // 408 interparm size?
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 40c interdata addr
+ PUSH_DATA (push, ring_size << 8); // 410 interdata size
+ PUSH_DATA (push, inter_addr + slice_size); // 414 bucket?
+ PUSH_DATA (push, bucket_size << 8); // 418 bucket size? unshifted..
+ PUSH_DATA (push, 0); // 41c targets
+ // TODO: Double check 414 / 418 with nvidia trace
+ }
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NVC0(push, SUBC_BSP(0x240), 3);
+ PUSH_DATAh(push, dec->fence_bo->offset);
+ PUSH_DATA (push, dec->fence_bo->offset);
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NVC0(push, SUBC_BSP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK (push);
+
+ {
+ unsigned spin = 0;
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff) {
+ debug_printf("b%u: %u\n", dec->fence_seq, dec->fence_map[0]);
+ dump_comm_bsp(dec->comm);
+ }
+ } while (dec->fence_seq > dec->fence_map[0]);
+ }
+
+ dump_comm_bsp(dec->comm);
+ return dec->comm->status[comm_seq & 0xf];
+#else
+ BEGIN_NVC0(push, SUBC_BSP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+ return 2;
+#endif
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c
new file mode 100644
index 0000000..4ceec4f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_ppp.c
@@ -0,0 +1,143 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_video.h"
+
+static void
+nvc0_decoder_setup_ppp(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target, uint32_t low700) {
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+
+ uint32_t stride_in = mb(dec->base.width);
+ uint32_t stride_out = mb(target->resources[0]->width0);
+ uint32_t dec_h = mb(dec->base.height);
+ uint32_t dec_w = mb(dec->base.width);
+ uint64_t in_addr;
+ uint32_t y2, cbcr, cbcr2, i;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { NULL, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { dec->ref_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ };
+ unsigned num_refs = sizeof(bo_refs)/sizeof(*bo_refs);
+
+ for (i = 0; i < 2; ++i) {
+ struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
+ bo_refs[i].bo = mt->base.bo;
+ }
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+ nouveau_vp3_ycbcr_offsets(dec, &y2, &cbcr, &cbcr2);
+
+ BEGIN_NVC0(push, SUBC_PPP(0x700), 10);
+ in_addr = nouveau_vp3_video_addr(dec, target) >> 8;
+
+ PUSH_DATA (push, (stride_out << 24) | (stride_out << 16) | low700); // 700
+ PUSH_DATA (push, (stride_in << 24) | (stride_in << 16) | (dec_h << 8) | dec_w); // 704
+ assert(dec_w == stride_in);
+
+ /* Input: */
+ PUSH_DATA (push, in_addr); // 708
+ PUSH_DATA (push, in_addr + y2); // 70c
+ PUSH_DATA (push, in_addr + cbcr); // 710
+ PUSH_DATA (push, in_addr + cbcr2); // 714
+
+ for (i = 0; i < 2; ++i) {
+ struct nv50_miptree *mt = (struct nv50_miptree *)target->resources[i];
+
+ PUSH_DATA (push, mt->base.address >> 8);
+ PUSH_DATA (push, (mt->base.address + mt->total_size/2) >> 8);
+ mt->base.status |= NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ }
+}
+
+static uint32_t
+nvc0_decoder_vc1_ppp(struct nouveau_vp3_decoder *dec, struct pipe_vc1_picture_desc *desc, struct nouveau_vp3_video_buffer *target) {
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+
+ nvc0_decoder_setup_ppp(dec, target, 0x1412);
+ assert(!desc->deblockEnable);
+ assert(!(dec->base.width & 0xf));
+ assert(!(dec->base.height & 0xf));
+
+ BEGIN_NVC0(push, SUBC_PPP(0x400), 1);
+ PUSH_DATA (push, desc->pquant << 11);
+
+ // 728 = wtf?
+ return 0x10;
+}
+
+void
+nvc0_decoder_ppp(struct nouveau_vp3_decoder *dec, union pipe_desc desc, struct nouveau_vp3_video_buffer *target, unsigned comm_seq) {
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ struct nouveau_pushbuf *push = dec->pushbuf[2];
+ unsigned ppp_caps = 0x10;
+ unsigned fence_extra = 0;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ nouveau_pushbuf_space(push, 11 + (codec == PIPE_VIDEO_FORMAT_VC1 ? 2 : 0) + 3 + fence_extra + 2, 4, 0);
+
+ switch (codec) {
+ case PIPE_VIDEO_FORMAT_MPEG12: {
+ unsigned mpeg2 = dec->base.profile != PIPE_VIDEO_PROFILE_MPEG1;
+ nvc0_decoder_setup_ppp(dec, target, 0x1410 | mpeg2);
+ break;
+ }
+ case PIPE_VIDEO_FORMAT_MPEG4: nvc0_decoder_setup_ppp(dec, target, 0x1414); break;
+ case PIPE_VIDEO_FORMAT_VC1: ppp_caps = nvc0_decoder_vc1_ppp(dec, desc.vc1, target); break;
+ case PIPE_VIDEO_FORMAT_MPEG4_AVC: nvc0_decoder_setup_ppp(dec, target, 0x1413); break;
+ default: assert(0);
+ }
+ BEGIN_NVC0(push, SUBC_PPP(0x734), 2);
+ PUSH_DATA (push, comm_seq);
+ PUSH_DATA (push, ppp_caps);
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NVC0(push, SUBC_PPP(0x240), 3);
+ PUSH_DATAh(push, (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push, (dec->fence_bo->offset + 0x20));
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NVC0(push, SUBC_PPP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK (push);
+
+ {
+ unsigned spin = 0;
+
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff)
+ debug_printf("p%u: %u\n", dec->fence_seq, dec->fence_map[8]);
+ } while (dec->fence_seq > dec->fence_map[8]);
+ }
+#else
+ BEGIN_NVC0(push, SUBC_PPP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+#endif
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
new file mode 100644
index 0000000..0d152b9
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_video_vp.c
@@ -0,0 +1,202 @@
+/*
+ * Copyright 2011-2013 Maarten Lankhorst
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "nvc0/nvc0_video.h"
+#include <sys/mman.h>
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+static void dump_comm_vp(struct nouveau_vp3_decoder *dec, struct comm *comm, u32 comm_seq,
+ struct nouveau_bo *inter_bo, unsigned slice_size)
+{
+ unsigned i, idx = comm->pvp_cur_index & 0xf;
+ debug_printf("Status: %08x, stage: %08x\n", comm->status_vp[idx], comm->pvp_stage);
+#if 0
+ debug_printf("Acked byte ofs: %x, bsp byte ofs: %x\n", comm->acked_byte_ofs, comm->byte_ofs);
+ debug_printf("Irq/parse indexes: %i %i\n", comm->irq_index, comm->parse_endpos_index);
+
+ for (i = 0; i != comm->irq_index; ++i)
+ debug_printf("irq[%i] = { @ %08x -> %04x }\n", i, comm->irq_pos[i], comm->irq_470[i]);
+ for (i = 0; i != comm->parse_endpos_index; ++i)
+ debug_printf("parse_endpos[%i] = { @ %08x}\n", i, comm->parse_endpos[i]);
+#endif
+ debug_printf("mb_y = %u\n", comm->mb_y[idx]);
+ if (comm->status_vp[idx] == 1)
+ return;
+
+ if ((comm->pvp_stage & 0xff) != 0xff) {
+ unsigned *map;
+ assert(nouveau_bo_map(inter_bo, NOUVEAU_BO_RD|NOUVEAU_BO_NOBLOCK, dec->client) >= 0);
+ map = inter_bo->map;
+ for (i = 0; i < comm->byte_ofs + slice_size; i += 0x10) {
+ debug_printf("%05x: %08x %08x %08x %08x\n", i, map[i/4], map[i/4+1], map[i/4+2], map[i/4+3]);
+ }
+ munmap(inter_bo->map, inter_bo->size);
+ inter_bo->map = NULL;
+ }
+ assert((comm->pvp_stage & 0xff) == 0xff);
+}
+#endif
+
+static void
+nvc0_decoder_kick_ref(struct nouveau_vp3_decoder *dec, struct nouveau_vp3_video_buffer *target)
+{
+ dec->refs[target->valid_ref].vidbuf = NULL;
+ dec->refs[target->valid_ref].last_used = 0;
+// debug_printf("Unreffed %p\n", target);
+}
+
+void
+nvc0_decoder_vp(struct nouveau_vp3_decoder *dec, union pipe_desc desc,
+ struct nouveau_vp3_video_buffer *target, unsigned comm_seq,
+ unsigned caps, unsigned is_ref,
+ struct nouveau_vp3_video_buffer *refs[16])
+{
+ struct nouveau_pushbuf *push = dec->pushbuf[1];
+ uint32_t bsp_addr, comm_addr, inter_addr, ucode_addr, pic_addr[17], last_addr, null_addr;
+ uint32_t slice_size, bucket_size, ring_size, i;
+ enum pipe_video_format codec = u_reduce_video_profile(dec->base.profile);
+ struct nouveau_bo *bsp_bo = dec->bsp_bo[comm_seq % NOUVEAU_VP3_VIDEO_QDEPTH];
+ struct nouveau_bo *inter_bo = dec->inter_bo[comm_seq & 1];
+ u32 fence_extra = 0, codec_extra = 0;
+ struct nouveau_pushbuf_refn bo_refs[] = {
+ { inter_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { dec->ref_bo, NOUVEAU_BO_WR | NOUVEAU_BO_VRAM },
+ { bsp_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+#if NOUVEAU_VP3_DEBUG_FENCE
+ { dec->fence_bo, NOUVEAU_BO_WR | NOUVEAU_BO_GART },
+#endif
+ { dec->fw_bo, NOUVEAU_BO_RD | NOUVEAU_BO_VRAM },
+ };
+ int num_refs = sizeof(bo_refs)/sizeof(*bo_refs) - !dec->fw_bo;
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ fence_extra = 4;
+#endif
+
+ if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ nouveau_vp3_inter_sizes(dec, desc.h264->slice_count, &slice_size, &bucket_size, &ring_size);
+ codec_extra += 2;
+ } else
+ nouveau_vp3_inter_sizes(dec, 1, &slice_size, &bucket_size, &ring_size);
+
+ if (dec->base.max_references > 2)
+ codec_extra += 1 + (dec->base.max_references - 2);
+
+ pic_addr[16] = nouveau_vp3_video_addr(dec, target) >> 8;
+ last_addr = null_addr = nouveau_vp3_video_addr(dec, NULL) >> 8;
+
+ for (i = 0; i < dec->base.max_references; ++i) {
+ if (!refs[i])
+ pic_addr[i] = last_addr;
+ else if (dec->refs[refs[i]->valid_ref].vidbuf == refs[i])
+ last_addr = pic_addr[i] = nouveau_vp3_video_addr(dec, refs[i]) >> 8;
+ else
+ pic_addr[i] = null_addr;
+ }
+ if (!is_ref)
+ nvc0_decoder_kick_ref(dec, target);
+
+ nouveau_pushbuf_space(push, 8 + 3 * (codec != PIPE_VIDEO_FORMAT_MPEG12) +
+ 6 + codec_extra + fence_extra + 2, num_refs, 0);
+
+ nouveau_pushbuf_refn(push, bo_refs, num_refs);
+
+ bsp_addr = bsp_bo->offset >> 8;
+#if NOUVEAU_VP3_DEBUG_FENCE
+ comm_addr = (dec->fence_bo->offset + COMM_OFFSET)>>8;
+#else
+ comm_addr = bsp_addr + (COMM_OFFSET>>8);
+#endif
+ inter_addr = inter_bo->offset >> 8;
+ if (dec->fw_bo)
+ ucode_addr = dec->fw_bo->offset >> 8;
+ else
+ ucode_addr = 0;
+
+ BEGIN_NVC0(push, SUBC_VP(0x700), 7);
+ PUSH_DATA (push, caps); // 700
+ PUSH_DATA (push, comm_seq); // 704
+ PUSH_DATA (push, 0); // 708 fuc targets, ignored for nvc0
+ PUSH_DATA (push, dec->fw_sizes); // 70c
+ PUSH_DATA (push, bsp_addr+(VP_OFFSET>>8)); // 710 picparm_addr
+ PUSH_DATA (push, inter_addr); // 714 inter_parm
+ PUSH_DATA (push, inter_addr + slice_size + bucket_size); // 718 inter_data_ofs
+
+ if (bucket_size) {
+ uint64_t tmpimg_addr = dec->ref_bo->offset + dec->ref_stride * (dec->base.max_references+2);
+
+ BEGIN_NVC0(push, SUBC_VP(0x71c), 2);
+ PUSH_DATA (push, tmpimg_addr >> 8); // 71c
+ PUSH_DATA (push, inter_addr + slice_size); // 720 bucket_ofs
+ }
+
+ BEGIN_NVC0(push, SUBC_VP(0x724), 5);
+ PUSH_DATA (push, comm_addr); // 724
+ PUSH_DATA (push, ucode_addr); // 728
+ PUSH_DATA (push, pic_addr[16]); // 734
+ PUSH_DATA (push, pic_addr[0]); // 72c
+ PUSH_DATA (push, pic_addr[1]); // 730
+
+ if (dec->base.max_references > 2) {
+ int i;
+
+ BEGIN_NVC0(push, SUBC_VP(0x400), dec->base.max_references - 2);
+ for (i = 2; i < dec->base.max_references; ++i) {
+ assert(0x400 + (i - 2) * 4 < 0x438);
+ PUSH_DATA (push, pic_addr[i]);
+ }
+ }
+
+ if (codec == PIPE_VIDEO_FORMAT_MPEG4_AVC) {
+ BEGIN_NVC0(push, SUBC_VP(0x438), 1);
+ PUSH_DATA (push, desc.h264->slice_count);
+ }
+
+ //debug_printf("Decoding %08lx with %08lx and %08lx\n", pic_addr[16], pic_addr[0], pic_addr[1]);
+
+#if NOUVEAU_VP3_DEBUG_FENCE
+ BEGIN_NVC0(push, SUBC_VP(0x240), 3);
+ PUSH_DATAh(push, (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push, (dec->fence_bo->offset + 0x10));
+ PUSH_DATA (push, dec->fence_seq);
+
+ BEGIN_NVC0(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 1);
+ PUSH_KICK(push);
+
+ {
+ unsigned spin = 0;
+ do {
+ usleep(100);
+ if ((spin++ & 0xff) == 0xff) {
+ debug_printf("v%u: %u\n", dec->fence_seq, dec->fence_map[4]);
+ dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
+ }
+ } while (dec->fence_seq > dec->fence_map[4]);
+ }
+ dump_comm_vp(dec, dec->comm, comm_seq, inter_bo, slice_size << 8);
+#else
+ BEGIN_NVC0(push, SUBC_VP(0x300), 1);
+ PUSH_DATA (push, 0);
+ PUSH_KICK (push);
+#endif
+}
diff --git a/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
new file mode 100644
index 0000000..3514d9d
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nvc0_winsys.h
@@ -0,0 +1,144 @@
+
+#ifndef __NVC0_WINSYS_H__
+#define __NVC0_WINSYS_H__
+
+#include <stdint.h>
+#include <unistd.h>
+
+#include "pipe/p_defines.h"
+
+#include "nouveau_winsys.h"
+#include "nouveau_buffer.h"
+
+#ifndef NV04_PFIFO_MAX_PACKET_LEN
+#define NV04_PFIFO_MAX_PACKET_LEN 2047
+#endif
+
+
+static INLINE void
+nv50_add_bufctx_resident_bo(struct nouveau_bufctx *bufctx, int bin,
+ unsigned flags, struct nouveau_bo *bo)
+{
+ nouveau_bufctx_refn(bufctx, bin, bo, flags)->priv = NULL;
+}
+
+static INLINE void
+nvc0_add_resident(struct nouveau_bufctx *bufctx, int bin,
+ struct nv04_resource *res, unsigned flags)
+{
+ struct nouveau_bufref *ref =
+ nouveau_bufctx_refn(bufctx, bin, res->bo, flags | res->domain);
+ ref->priv = res;
+ ref->priv_data = flags;
+}
+
+#define BCTX_REFN_bo(ctx, bin, fl, bo) \
+ nv50_add_bufctx_resident_bo(ctx, NVC0_BIND_##bin, fl, bo);
+
+#define BCTX_REFN(bctx, bin, res, acc) \
+ nvc0_add_resident(bctx, NVC0_BIND_##bin, res, NOUVEAU_BO_##acc)
+
+static INLINE void
+PUSH_REFN(struct nouveau_pushbuf *push, struct nouveau_bo *bo, uint32_t flags)
+{
+ struct nouveau_pushbuf_refn ref = { bo, flags };
+ nouveau_pushbuf_refn(push, &ref, 1);
+}
+
+
+#define SUBC_3D(m) 0, (m)
+#define NVC0_3D(n) SUBC_3D(NVC0_3D_##n)
+#define NVE4_3D(n) SUBC_3D(NVE4_3D_##n)
+
+#define SUBC_COMPUTE(m) 1, (m)
+#define NVC0_COMPUTE(n) SUBC_COMPUTE(NVC0_COMPUTE_##n)
+#define NVE4_COMPUTE(n) SUBC_COMPUTE(NVE4_COMPUTE_##n)
+
+#define SUBC_M2MF(m) 2, (m)
+#define SUBC_P2MF(m) 2, (m)
+#define NVC0_M2MF(n) SUBC_M2MF(NVC0_M2MF_##n)
+#define NVE4_P2MF(n) SUBC_P2MF(NVE4_P2MF_##n)
+
+#define SUBC_2D(m) 3, (m)
+#define NVC0_2D(n) SUBC_2D(NVC0_2D_##n)
+
+#define SUBC_COPY(m) 4, (m)
+#define NVE4_COPY(m) SUBC_COPY(NVE4_COPY_##n)
+
+#define SUBC_SW(m) 7, (m)
+
+static INLINE uint32_t
+NVC0_FIFO_PKHDR_SQ(int subc, int mthd, unsigned size)
+{
+ return 0x20000000 | (size << 16) | (subc << 13) | (mthd >> 2);
+}
+
+static INLINE uint32_t
+NVC0_FIFO_PKHDR_NI(int subc, int mthd, unsigned size)
+{
+ return 0x60000000 | (size << 16) | (subc << 13) | (mthd >> 2);
+}
+
+static INLINE uint32_t
+NVC0_FIFO_PKHDR_IL(int subc, int mthd, uint8_t data)
+{
+ return 0x80000000 | (data << 16) | (subc << 13) | (mthd >> 2);
+}
+
+static INLINE uint32_t
+NVC0_FIFO_PKHDR_1I(int subc, int mthd, unsigned size)
+{
+ return 0xa0000000 | (size << 16) | (subc << 13) | (mthd >> 2);
+}
+
+
+static INLINE uint8_t
+nouveau_bo_memtype(const struct nouveau_bo *bo)
+{
+ return bo->config.nvc0.memtype;
+}
+
+
+static INLINE void
+PUSH_DATAh(struct nouveau_pushbuf *push, uint64_t data)
+{
+ *push->cur++ = (uint32_t)(data >> 32);
+}
+
+static INLINE void
+BEGIN_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
+{
+#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, size + 1);
+#endif
+ PUSH_DATA (push, NVC0_FIFO_PKHDR_SQ(subc, mthd, size));
+}
+
+static INLINE void
+BEGIN_NIC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
+{
+#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, size + 1);
+#endif
+ PUSH_DATA (push, NVC0_FIFO_PKHDR_NI(subc, mthd, size));
+}
+
+static INLINE void
+BEGIN_1IC0(struct nouveau_pushbuf *push, int subc, int mthd, unsigned size)
+{
+#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, size + 1);
+#endif
+ PUSH_DATA (push, NVC0_FIFO_PKHDR_1I(subc, mthd, size));
+}
+
+static INLINE void
+IMMED_NVC0(struct nouveau_pushbuf *push, int subc, int mthd, uint8_t data)
+{
+#ifndef NVC0_PUSH_EXPLICIT_SPACE_CHECKING
+ PUSH_SPACE(push, 1);
+#endif
+ PUSH_DATA (push, NVC0_FIFO_PKHDR_IL(subc, mthd, data));
+}
+
+#endif /* __NVC0_WINSYS_H__ */
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.c b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
new file mode 100644
index 0000000..06c914f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.c
@@ -0,0 +1,652 @@
+/*
+ * Copyright 2012 Nouveau Project
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
+ * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
+ * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * Authors: Christoph Bumiller
+ */
+
+#include "nvc0/nvc0_context.h"
+#include "nvc0/nvc0_compute.h"
+#include "nvc0/nve4_compute.h"
+
+#include "codegen/nv50_ir_driver.h"
+
+#ifdef DEBUG
+static void nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *);
+#endif
+
+
+int
+nve4_screen_compute_setup(struct nvc0_screen *screen,
+ struct nouveau_pushbuf *push)
+{
+ struct nouveau_device *dev = screen->base.device;
+ struct nouveau_object *chan = screen->base.channel;
+ unsigned i;
+ int ret;
+ uint32_t obj_class;
+
+ switch (dev->chipset & 0xf0) {
+ case 0xf0:
+ obj_class = NVF0_COMPUTE_CLASS; /* GK110 */
+ break;
+ case 0xe0:
+ obj_class = NVE4_COMPUTE_CLASS; /* GK104 */
+ break;
+ default:
+ NOUVEAU_ERR("unsupported chipset: NV%02x\n", dev->chipset);
+ return -1;
+ }
+
+ ret = nouveau_object_new(chan, 0xbeef00c0, obj_class, NULL, 0,
+ &screen->compute);
+ if (ret) {
+ NOUVEAU_ERR("Failed to allocate compute object: %d\n", ret);
+ return ret;
+ }
+
+ ret = nouveau_bo_new(dev, NOUVEAU_BO_VRAM, 0, NVE4_CP_PARAM_SIZE, NULL,
+ &screen->parm);
+ if (ret)
+ return ret;
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(NV01_SUBCHAN_OBJECT), 1);
+ PUSH_DATA (push, screen->compute->oclass);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(TEMP_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->offset);
+ /* No idea why there are 2. Divide size by 2 to be safe.
+ * Actually this might be per-MP TEMP size and looks like I'm only using
+ * 2 MPs instead of all 8.
+ */
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(0)), 3);
+ PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+ PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
+ PUSH_DATA (push, 0xff);
+ BEGIN_NVC0(push, NVE4_COMPUTE(MP_TEMP_SIZE_HIGH(1)), 3);
+ PUSH_DATAh(push, screen->tls->size / screen->mp_count);
+ PUSH_DATA (push, (screen->tls->size / screen->mp_count) & ~0x7fff);
+ PUSH_DATA (push, 0xff);
+
+ /* Unified address space ? Who needs that ? Certainly not OpenCL.
+ *
+ * FATAL: Buffers with addresses inside [0x1000000, 0x3000000] will NOT be
+ * accessible. We cannot prevent that at the moment, so expect failure.
+ */
+ BEGIN_NVC0(push, NVE4_COMPUTE(LOCAL_BASE), 1);
+ PUSH_DATA (push, 1 << 24);
+ BEGIN_NVC0(push, NVE4_COMPUTE(SHARED_BASE), 1);
+ PUSH_DATA (push, 2 << 24);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(CODE_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->text->offset);
+ PUSH_DATA (push, screen->text->offset);
+
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0310), 1);
+ PUSH_DATA (push, (obj_class >= NVF0_COMPUTE_CLASS) ? 0x400 : 0x300);
+
+ /* NOTE: these do not affect the state used by the 3D object */
+ BEGIN_NVC0(push, NVE4_COMPUTE(TIC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset);
+ PUSH_DATA (push, screen->txc->offset);
+ PUSH_DATA (push, NVC0_TIC_MAX_ENTRIES - 1);
+ BEGIN_NVC0(push, NVE4_COMPUTE(TSC_ADDRESS_HIGH), 3);
+ PUSH_DATAh(push, screen->txc->offset + 65536);
+ PUSH_DATA (push, screen->txc->offset + 65536);
+ PUSH_DATA (push, NVC0_TSC_MAX_ENTRIES - 1);
+
+ if (obj_class >= NVF0_COMPUTE_CLASS) {
+ BEGIN_NVC0(push, SUBC_COMPUTE(0x0248), 1);
+ PUSH_DATA (push, 0x100);
+ BEGIN_NIC0(push, SUBC_COMPUTE(0x0248), 63);
+ for (i = 63; i >= 1; --i)
+ PUSH_DATA(push, 0x38000 | i);
+ IMMED_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 0);
+ IMMED_NVC0(push, SUBC_COMPUTE(0x518), 0);
+ }
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(TEX_CB_INDEX), 1);
+ PUSH_DATA (push, 0); /* does not interefere with 3D */
+
+ if (obj_class >= NVF0_COMPUTE_CLASS)
+ IMMED_NVC0(push, SUBC_COMPUTE(0x02c4), 1);
+
+ /* MS sample coordinate offsets: these do not work with _ALT modes ! */
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_MS_OFFSETS);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 64);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATA (push, 0); /* 0 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 1); /* 1 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 0); /* 2 */
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 1); /* 3 */
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 2); /* 4 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 3); /* 5 */
+ PUSH_DATA (push, 0);
+ PUSH_DATA (push, 2); /* 6 */
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, 3); /* 7 */
+ PUSH_DATA (push, 1);
+
+#ifdef DEBUG
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_TRAP_INFO_PTR);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 28);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 8);
+ PUSH_DATA (push, 1);
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_PARAM_TRAP_INFO);
+ PUSH_DATA (push, screen->tls->offset);
+ PUSH_DATAh(push, screen->tls->offset);
+ PUSH_DATA (push, screen->tls->size / 2); /* MP TEMP block size */
+ PUSH_DATA (push, screen->tls->size / 2 / 64); /* warp TEMP block size */
+ PUSH_DATA (push, 0); /* warp cfstack size */
+#endif
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+
+ return 0;
+}
+
+
+static void
+nve4_compute_validate_surfaces(struct nvc0_context *nvc0)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nv50_surface *sf;
+ struct nv04_resource *res;
+ uint32_t mask;
+ unsigned i;
+ const unsigned t = 1;
+
+ mask = nvc0->surfaces_dirty[t];
+ while (mask) {
+ i = ffs(mask) - 1;
+ mask &= ~(1 << i);
+
+ /*
+ * NVE4's surface load/store instructions receive all the information
+ * directly instead of via binding points, so we have to supply them.
+ */
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_SUF(i));
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 64);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 17);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+
+ nve4_set_surface_info(push, nvc0->surfaces[t][i], screen);
+
+ sf = nv50_surface(nvc0->surfaces[t][i]);
+ if (sf) {
+ res = nv04_resource(sf->base.texture);
+
+ if (sf->base.writable)
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
+ else
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
+ }
+ }
+ if (nvc0->surfaces_dirty[t]) {
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+ }
+
+ /* re-reference non-dirty surfaces */
+ mask = nvc0->surfaces_valid[t] & ~nvc0->surfaces_dirty[t];
+ while (mask) {
+ i = ffs(mask) - 1;
+ mask &= ~(1 << i);
+
+ sf = nv50_surface(nvc0->surfaces[t][i]);
+ res = nv04_resource(sf->base.texture);
+
+ if (sf->base.writable)
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RDWR);
+ else
+ BCTX_REFN(nvc0->bufctx_cp, CP_SUF, res, RD);
+ }
+
+ nvc0->surfaces_dirty[t] = 0;
+}
+
+
+/* Thankfully, textures with samplers follow the normal rules. */
+static void
+nve4_compute_validate_samplers(struct nvc0_context *nvc0)
+{
+ boolean need_flush = nve4_validate_tsc(nvc0, 5);
+ if (need_flush) {
+ BEGIN_NVC0(nvc0->base.pushbuf, NVE4_COMPUTE(TSC_FLUSH), 1);
+ PUSH_DATA (nvc0->base.pushbuf, 0);
+ }
+}
+/* (Code duplicated at bottom for various non-convincing reasons.
+ * E.g. we might want to use the COMPUTE subchannel to upload TIC/TSC
+ * entries to avoid a subchannel switch.
+ * Same for texture cache flushes.
+ * Also, the bufctx differs, and more IFs in the 3D version looks ugly.)
+ */
+static void nve4_compute_validate_textures(struct nvc0_context *);
+
+static void
+nve4_compute_set_tex_handles(struct nvc0_context *nvc0)
+{
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ uint64_t address;
+ const unsigned s = nvc0_shader_stage(PIPE_SHADER_COMPUTE);
+ unsigned i, n;
+ uint32_t dirty = nvc0->textures_dirty[s] | nvc0->samplers_dirty[s];
+
+ if (!dirty)
+ return;
+ i = ffs(dirty) - 1;
+ n = util_logbase2(dirty) + 1 - i;
+ assert(n);
+
+ address = nvc0->screen->parm->offset + NVE4_CP_INPUT_TEX(i);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, address);
+ PUSH_DATA (push, address);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, n * 4);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + n);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATAp(push, &nvc0->tex_handles[s][i], n);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+
+ nvc0->textures_dirty[s] = 0;
+ nvc0->samplers_dirty[s] = 0;
+}
+
+
+static boolean
+nve4_compute_state_validate(struct nvc0_context *nvc0)
+{
+ if (!nvc0_compute_validate_program(nvc0))
+ return FALSE;
+ if (nvc0->dirty_cp & NVC0_NEW_CP_TEXTURES)
+ nve4_compute_validate_textures(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_SAMPLERS)
+ nve4_compute_validate_samplers(nvc0);
+ if (nvc0->dirty_cp & (NVC0_NEW_CP_TEXTURES | NVC0_NEW_CP_SAMPLERS))
+ nve4_compute_set_tex_handles(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_SURFACES)
+ nve4_compute_validate_surfaces(nvc0);
+ if (nvc0->dirty_cp & NVC0_NEW_CP_GLOBALS)
+ nvc0_validate_global_residents(nvc0,
+ nvc0->bufctx_cp, NVC0_BIND_CP_GLOBAL);
+
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, FALSE);
+
+ nouveau_pushbuf_bufctx(nvc0->base.pushbuf, nvc0->bufctx_cp);
+ if (unlikely(nouveau_pushbuf_validate(nvc0->base.pushbuf)))
+ return FALSE;
+ if (unlikely(nvc0->state.flushed))
+ nvc0_bufctx_fence(nvc0, nvc0->bufctx_cp, TRUE);
+
+ return TRUE;
+}
+
+
+static void
+nve4_compute_upload_input(struct nvc0_context *nvc0, const void *input,
+ const uint *block_layout,
+ const uint *grid_layout)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nvc0_program *cp = nvc0->compprog;
+
+ if (cp->parm_size) {
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset);
+ PUSH_DATA (push, screen->parm->offset);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, cp->parm_size);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (cp->parm_size / 4));
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATAp(push, input, cp->parm_size / 4);
+ }
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
+ PUSH_DATA (push, screen->parm->offset + NVE4_CP_INPUT_GRID_INFO(0));
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 7 * 4);
+ PUSH_DATA (push, 0x1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + 7);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATAp(push, block_layout, 3);
+ PUSH_DATAp(push, grid_layout, 3);
+ PUSH_DATA (push, 0);
+
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB);
+}
+
+static INLINE uint8_t
+nve4_compute_derive_cache_split(struct nvc0_context *nvc0, uint32_t shared_size)
+{
+ if (shared_size > (32 << 10))
+ return NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1;
+ if (shared_size > (16 << 10))
+ return NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1;
+ return NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1;
+}
+
+static void
+nve4_compute_setup_launch_desc(struct nvc0_context *nvc0,
+ struct nve4_cp_launch_desc *desc,
+ uint32_t label,
+ const uint *block_layout,
+ const uint *grid_layout)
+{
+ const struct nvc0_screen *screen = nvc0->screen;
+ const struct nvc0_program *cp = nvc0->compprog;
+ unsigned i;
+
+ nve4_cp_launch_desc_init_default(desc);
+
+ desc->entry = nvc0_program_symbol_offset(cp, label);
+
+ desc->griddim_x = grid_layout[0];
+ desc->griddim_y = grid_layout[1];
+ desc->griddim_z = grid_layout[2];
+ desc->blockdim_x = block_layout[0];
+ desc->blockdim_y = block_layout[1];
+ desc->blockdim_z = block_layout[2];
+
+ desc->shared_size = align(cp->cp.smem_size, 0x100);
+ desc->local_size_p = align(cp->cp.lmem_size, 0x10);
+ desc->local_size_n = 0;
+ desc->cstack_size = 0x800;
+ desc->cache_split = nve4_compute_derive_cache_split(nvc0, cp->cp.smem_size);
+
+ desc->gpr_alloc = cp->num_gprs;
+ desc->bar_alloc = cp->num_barriers;
+
+ for (i = 0; i < 7; ++i) {
+ const unsigned s = 5;
+ if (nvc0->constbuf[s][i].u.buf)
+ nve4_cp_launch_desc_set_ctx_cb(desc, i + 1, &nvc0->constbuf[s][i]);
+ }
+ nve4_cp_launch_desc_set_cb(desc, 0, screen->parm, 0, NVE4_CP_INPUT_SIZE);
+}
+
+static INLINE struct nve4_cp_launch_desc *
+nve4_compute_alloc_launch_desc(struct nouveau_context *nv,
+ struct nouveau_bo **pbo, uint64_t *pgpuaddr)
+{
+ uint8_t *ptr = nouveau_scratch_get(nv, 512, pgpuaddr, pbo);
+ if (!ptr)
+ return NULL;
+ if (*pgpuaddr & 255) {
+ unsigned adj = 256 - (*pgpuaddr & 255);
+ ptr += adj;
+ *pgpuaddr += adj;
+ }
+ return (struct nve4_cp_launch_desc *)ptr;
+}
+
+void
+nve4_launch_grid(struct pipe_context *pipe,
+ const uint *block_layout, const uint *grid_layout,
+ uint32_t label,
+ const void *input)
+{
+ struct nvc0_context *nvc0 = nvc0_context(pipe);
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ struct nve4_cp_launch_desc *desc;
+ uint64_t desc_gpuaddr;
+ struct nouveau_bo *desc_bo;
+ int ret;
+
+ desc = nve4_compute_alloc_launch_desc(&nvc0->base, &desc_bo, &desc_gpuaddr);
+ if (!desc) {
+ ret = -1;
+ goto out;
+ }
+ BCTX_REFN_bo(nvc0->bufctx_cp, CP_DESC, NOUVEAU_BO_GART | NOUVEAU_BO_RD,
+ desc_bo);
+
+ ret = !nve4_compute_state_validate(nvc0);
+ if (ret)
+ goto out;
+
+ nve4_compute_setup_launch_desc(nvc0, desc, label, block_layout, grid_layout);
+#ifdef DEBUG
+ if (debug_get_num_option("NV50_PROG_DEBUG", 0))
+ nve4_compute_dump_launch_desc(desc);
+#endif
+
+ nve4_compute_upload_input(nvc0, input, block_layout, grid_layout);
+
+ /* upload descriptor and flush */
+#if 0
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, desc_gpuaddr);
+ PUSH_DATA (push, desc_gpuaddr);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 256);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 1 + (256 / 4));
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x08 << 1));
+ PUSH_DATAp(push, (const uint32_t *)desc, 256 / 4);
+ BEGIN_NVC0(push, NVE4_COMPUTE(FLUSH), 1);
+ PUSH_DATA (push, NVE4_COMPUTE_FLUSH_CB | NVE4_COMPUTE_FLUSH_CODE);
+#endif
+ BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH_DESC_ADDRESS), 1);
+ PUSH_DATA (push, desc_gpuaddr >> 8);
+ BEGIN_NVC0(push, NVE4_COMPUTE(LAUNCH), 1);
+ PUSH_DATA (push, 0x3);
+ BEGIN_NVC0(push, SUBC_COMPUTE(NV50_GRAPH_SERIALIZE), 1);
+ PUSH_DATA (push, 0);
+
+out:
+ if (ret)
+ NOUVEAU_ERR("Failed to launch grid !\n");
+ nouveau_scratch_done(&nvc0->base);
+ nouveau_bufctx_reset(nvc0->bufctx_cp, NVC0_BIND_CP_DESC);
+}
+
+
+#define NVE4_TIC_ENTRY_INVALID 0x000fffff
+
+static void
+nve4_compute_validate_textures(struct nvc0_context *nvc0)
+{
+ struct nouveau_bo *txc = nvc0->screen->txc;
+ struct nouveau_pushbuf *push = nvc0->base.pushbuf;
+ const unsigned s = 5;
+ unsigned i;
+ uint32_t commands[2][NVE4_CP_INPUT_TEX_MAX];
+ unsigned n[2] = { 0, 0 };
+
+ for (i = 0; i < nvc0->num_textures[s]; ++i) {
+ struct nv50_tic_entry *tic = nv50_tic_entry(nvc0->textures[s][i]);
+ struct nv04_resource *res;
+ const boolean dirty = !!(nvc0->textures_dirty[s] & (1 << i));
+
+ if (!tic) {
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+ continue;
+ }
+ res = nv04_resource(tic->pipe.texture);
+
+ if (tic->id < 0) {
+ tic->id = nvc0_screen_tic_alloc(nvc0->screen, tic);
+
+ PUSH_SPACE(push, 16);
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_DST_ADDRESS_HIGH), 2);
+ PUSH_DATAh(push, txc->offset + (tic->id * 32));
+ PUSH_DATA (push, txc->offset + (tic->id * 32));
+ BEGIN_NVC0(push, NVE4_COMPUTE(UPLOAD_LINE_LENGTH_IN), 2);
+ PUSH_DATA (push, 32);
+ PUSH_DATA (push, 1);
+ BEGIN_1IC0(push, NVE4_COMPUTE(UPLOAD_EXEC), 9);
+ PUSH_DATA (push, NVE4_COMPUTE_UPLOAD_EXEC_LINEAR | (0x20 << 1));
+ PUSH_DATAp(push, &tic->tic[0], 8);
+
+ commands[0][n[0]++] = (tic->id << 4) | 1;
+ } else
+ if (res->status & NOUVEAU_BUFFER_STATUS_GPU_WRITING) {
+ commands[1][n[1]++] = (tic->id << 4) | 1;
+ }
+ nvc0->screen->tic.lock[tic->id / 32] |= 1 << (tic->id % 32);
+
+ res->status &= ~NOUVEAU_BUFFER_STATUS_GPU_WRITING;
+ res->status |= NOUVEAU_BUFFER_STATUS_GPU_READING;
+
+ nvc0->tex_handles[s][i] &= ~NVE4_TIC_ENTRY_INVALID;
+ nvc0->tex_handles[s][i] |= tic->id;
+ if (dirty)
+ BCTX_REFN(nvc0->bufctx_cp, CP_TEX(i), res, RD);
+ }
+ for (; i < nvc0->state.num_textures[s]; ++i)
+ nvc0->tex_handles[s][i] |= NVE4_TIC_ENTRY_INVALID;
+
+ if (n[0]) {
+ BEGIN_NIC0(push, NVE4_COMPUTE(TIC_FLUSH), n[0]);
+ PUSH_DATAp(push, commands[0], n[0]);
+ }
+ if (n[1]) {
+ BEGIN_NIC0(push, NVE4_COMPUTE(TEX_CACHE_CTL), n[1]);
+ PUSH_DATAp(push, commands[1], n[1]);
+ }
+
+ nvc0->state.num_textures[s] = nvc0->num_textures[s];
+}
+
+
+#ifdef DEBUG
+static const char *nve4_cache_split_name(unsigned value)
+{
+ switch (value) {
+ case NVC1_3D_CACHE_SPLIT_16K_SHARED_48K_L1: return "16K_SHARED_48K_L1";
+ case NVE4_3D_CACHE_SPLIT_32K_SHARED_32K_L1: return "32K_SHARED_32K_L1";
+ case NVC0_3D_CACHE_SPLIT_48K_SHARED_16K_L1: return "48K_SHARED_16K_L1";
+ default:
+ return "(invalid)";
+ }
+}
+
+static void
+nve4_compute_dump_launch_desc(const struct nve4_cp_launch_desc *desc)
+{
+ const uint32_t *data = (const uint32_t *)desc;
+ unsigned i;
+ boolean zero = FALSE;
+
+ debug_printf("COMPUTE LAUNCH DESCRIPTOR:\n");
+
+ for (i = 0; i < sizeof(*desc); i += 4) {
+ if (data[i / 4]) {
+ debug_printf("[%x]: 0x%08x\n", i, data[i / 4]);
+ zero = FALSE;
+ } else
+ if (!zero) {
+ debug_printf("...\n");
+ zero = TRUE;
+ }
+ }
+
+ debug_printf("entry = 0x%x\n", desc->entry);
+ debug_printf("grid dimensions = %ux%ux%u\n",
+ desc->griddim_x, desc->griddim_y, desc->griddim_z);
+ debug_printf("block dimensions = %ux%ux%u\n",
+ desc->blockdim_x, desc->blockdim_y, desc->blockdim_z);
+ debug_printf("s[] size: 0x%x\n", desc->shared_size);
+ debug_printf("l[] size: -0x%x / +0x%x\n",
+ desc->local_size_n, desc->local_size_p);
+ debug_printf("stack size: 0x%x\n", desc->cstack_size);
+ debug_printf("barrier count: %u\n", desc->bar_alloc);
+ debug_printf("$r count: %u\n", desc->gpr_alloc);
+ debug_printf("cache split: %s\n", nve4_cache_split_name(desc->cache_split));
+
+ for (i = 0; i < 8; ++i) {
+ uint64_t address;
+ uint32_t size = desc->cb[i].size;
+ boolean valid = !!(desc->cb_mask & (1 << i));
+
+ address = ((uint64_t)desc->cb[i].address_h << 32) | desc->cb[i].address_l;
+
+ if (!valid && !address && !size)
+ continue;
+ debug_printf("CB[%u]: address = 0x%"PRIx64", size 0x%x%s\n",
+ i, address, size, valid ? "" : " (invalid)");
+ }
+}
+#endif
+
+#ifdef NOUVEAU_NVE4_MP_TRAP_HANDLER
+static void
+nve4_compute_trap_info(struct nvc0_context *nvc0)
+{
+ struct nvc0_screen *screen = nvc0->screen;
+ struct nouveau_bo *bo = screen->parm;
+ int ret, i;
+ volatile struct nve4_mp_trap_info *info;
+ uint8_t *map;
+
+ ret = nouveau_bo_map(bo, NOUVEAU_BO_RDWR, nvc0->base.client);
+ if (ret)
+ return;
+ map = (uint8_t *)bo->map;
+ info = (volatile struct nve4_mp_trap_info *)(map + NVE4_CP_PARAM_TRAP_INFO);
+
+ if (info->lock) {
+ debug_printf("trapstat = %08x\n", info->trapstat);
+ debug_printf("warperr = %08x\n", info->warperr);
+ debug_printf("PC = %x\n", info->pc);
+ debug_printf("tid = %u %u %u\n",
+ info->tid[0], info->tid[1], info->tid[2]);
+ debug_printf("ctaid = %u %u %u\n",
+ info->ctaid[0], info->ctaid[1], info->ctaid[2]);
+ for (i = 0; i <= 63; ++i)
+ debug_printf("$r%i = %08x\n", i, info->r[i]);
+ for (i = 0; i <= 6; ++i)
+ debug_printf("$p%i = %i\n", i, (info->flags >> i) & 1);
+ debug_printf("$c = %x\n", info->flags >> 12);
+ }
+ info->lock = 0;
+}
+#endif
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
new file mode 100644
index 0000000..79862b7
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.h
@@ -0,0 +1,131 @@
+
+#ifndef NVE4_COMPUTE_H
+#define NVE4_COMPUTE_H
+
+#include "nv50/nv50_defs.xml.h"
+#include "nvc0/nve4_compute.xml.h"
+
+/* Input space is implemented as c0[], to which we bind the screen->parm bo.
+ */
+#define NVE4_CP_INPUT_USER 0x0000
+#define NVE4_CP_INPUT_USER_LIMIT 0x1000
+#define NVE4_CP_INPUT_GRID_INFO(i) (0x1000 + (i) * 4)
+#define NVE4_CP_INPUT_NTID(i) (0x1000 + (i) * 4)
+#define NVE4_CP_INPUT_NCTAID(i) (0x100c + (i) * 4)
+#define NVE4_CP_INPUT_GRIDID 0x1018
+#define NVE4_CP_INPUT_TEX(i) (0x1040 + (i) * 4)
+#define NVE4_CP_INPUT_TEX_STRIDE 4
+#define NVE4_CP_INPUT_TEX_MAX 32
+#define NVE4_CP_INPUT_MS_OFFSETS 0x10c0
+#define NVE4_CP_INPUT_SUF_STRIDE 64
+#define NVE4_CP_INPUT_SUF(i) (0x1100 + (i) * NVE4_CP_INPUT_SUF_STRIDE)
+#define NVE4_CP_INPUT_SUF_MAX 32
+#define NVE4_CP_INPUT_TRAP_INFO_PTR 0x1900
+#define NVE4_CP_INPUT_TEMP_PTR 0x1908
+#define NVE4_CP_INPUT_MP_TEMP_SIZE 0x1910
+#define NVE4_CP_INPUT_WARP_TEMP_SIZE 0x1914
+#define NVE4_CP_INPUT_CSTACK_SIZE 0x1918
+#define NVE4_CP_INPUT_SIZE 0x1a00
+#define NVE4_CP_PARAM_TRAP_INFO 0x2000
+#define NVE4_CP_PARAM_TRAP_INFO_SZ (1 << 16)
+#define NVE4_CP_PARAM_SIZE (NVE4_CP_PARAM_TRAP_INFO + (1 << 16))
+
+struct nve4_cp_launch_desc
+{
+ u32 unk0[8];
+ u32 entry;
+ u32 unk9[3];
+ u32 griddim_x : 31;
+ u32 unk12 : 1;
+ u16 griddim_y;
+ u16 griddim_z;
+ u32 unk14[3];
+ u16 shared_size; /* must be aligned to 0x100 */
+ u16 unk15;
+ u16 unk16;
+ u16 blockdim_x;
+ u16 blockdim_y;
+ u16 blockdim_z;
+ u32 cb_mask : 8;
+ u32 unk20_8 : 21;
+ u32 cache_split : 2;
+ u32 unk20_31 : 1;
+ u32 unk21[8];
+ struct {
+ u32 address_l;
+ u32 address_h : 8;
+ u32 reserved : 7;
+ u32 size : 17;
+ } cb[8];
+ u32 local_size_p : 20;
+ u32 unk45_20 : 7;
+ u32 bar_alloc : 5;
+ u32 local_size_n : 20;
+ u32 unk46_20 : 4;
+ u32 gpr_alloc : 8;
+ u32 cstack_size : 20;
+ u32 unk47_20 : 12;
+ u32 unk48[16];
+};
+
+static INLINE void
+nve4_cp_launch_desc_init_default(struct nve4_cp_launch_desc *desc)
+{
+ memset(desc, 0, sizeof(*desc));
+
+ desc->unk0[7] = 0xbc000000;
+ desc->unk9[2] = 0x44014000;
+ desc->unk47_20 = 0x300;
+}
+
+static INLINE void
+nve4_cp_launch_desc_set_cb(struct nve4_cp_launch_desc *desc,
+ unsigned index,
+ struct nouveau_bo *bo,
+ uint32_t base, uint16_t size)
+{
+ uint64_t address = bo->offset + base;
+
+ assert(index < 8);
+ assert(!(base & 0xff));
+ assert(size <= 65536);
+
+ desc->cb[index].address_l = address;
+ desc->cb[index].address_h = address >> 32;
+ desc->cb[index].size = size;
+
+ desc->cb_mask |= 1 << index;
+}
+
+static INLINE void
+nve4_cp_launch_desc_set_ctx_cb(struct nve4_cp_launch_desc *desc,
+ unsigned index,
+ const struct nvc0_constbuf *cb)
+{
+ assert(index < 8);
+
+ if (!cb->u.buf) {
+ desc->cb_mask &= ~(1 << index);
+ } else {
+ const struct nv04_resource *buf = nv04_resource(cb->u.buf);
+ assert(!cb->user);
+ nve4_cp_launch_desc_set_cb(desc, index,
+ buf->bo, buf->offset + cb->offset, cb->size);
+ }
+}
+
+struct nve4_mp_trap_info {
+ u32 lock;
+ u32 pc;
+ u32 trapstat;
+ u32 warperr;
+ u32 tid[3];
+ u32 ctaid[3];
+ u32 pad028[2];
+ u32 r[64];
+ u32 flags;
+ u32 pad134[3];
+ u32 s[0x3000];
+};
+
+#endif /* NVE4_COMPUTE_H */
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h b/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
new file mode 100644
index 0000000..e971fc1
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_compute.xml.h
@@ -0,0 +1,429 @@
+#ifndef NVE4_COMPUTE_XML
+#define NVE4_COMPUTE_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- nve4_compute.xml ( 10168 bytes, from 2013-06-04 13:57:02)
+- copyright.xml ( 6452 bytes, from 2012-04-16 22:51:01)
+- nvchipsets.xml ( 3954 bytes, from 2013-06-04 13:57:02)
+- nv_object.xml ( 14395 bytes, from 2013-06-04 13:57:02)
+- nv_defs.xml ( 4437 bytes, from 2012-04-16 22:51:01)
+- nv50_defs.xml ( 16877 bytes, from 2013-07-17 09:10:01)
+- nve4_p2mf.xml ( 2373 bytes, from 2013-06-04 13:57:02)
+
+Copyright (C) 2006-2013 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+
+#define NVE4_COMPUTE_UNK0144 0x00000144
+
+#define NVE4_COMPUTE_UPLOAD 0x00000000
+
+#define NVE4_COMPUTE_UPLOAD_LINE_LENGTH_IN 0x00000180
+
+#define NVE4_COMPUTE_UPLOAD_LINE_COUNT 0x00000184
+
+#define NVE4_COMPUTE_UPLOAD_DST_ADDRESS_HIGH 0x00000188
+
+#define NVE4_COMPUTE_UPLOAD_DST_ADDRESS_LOW 0x0000018c
+
+#define NVE4_COMPUTE_UPLOAD_DST_PITCH 0x00000190
+
+#define NVE4_COMPUTE_UPLOAD_DST_TILE_MODE 0x00000194
+
+#define NVE4_COMPUTE_UPLOAD_DST_WIDTH 0x00000198
+
+#define NVE4_COMPUTE_UPLOAD_DST_HEIGHT 0x0000019c
+
+#define NVE4_COMPUTE_UPLOAD_DST_DEPTH 0x000001a0
+
+#define NVE4_COMPUTE_UPLOAD_DST_Z 0x000001a4
+
+#define NVE4_COMPUTE_UPLOAD_DST_X 0x000001a8
+
+#define NVE4_COMPUTE_UPLOAD_DST_Y 0x000001ac
+
+#define NVE4_COMPUTE_UPLOAD_EXEC 0x000001b0
+#define NVE4_COMPUTE_UPLOAD_EXEC_LINEAR 0x00000001
+#define NVE4_COMPUTE_UPLOAD_EXEC_UNK1__MASK 0x0000007e
+#define NVE4_COMPUTE_UPLOAD_EXEC_UNK1__SHIFT 1
+#define NVE4_COMPUTE_UPLOAD_EXEC_BUF_NOTIFY 0x00000300
+#define NVE4_COMPUTE_UPLOAD_EXEC_UNK12__MASK 0x0000f000
+#define NVE4_COMPUTE_UPLOAD_EXEC_UNK12__SHIFT 12
+
+#define NVE4_COMPUTE_UPLOAD_DATA 0x000001b4
+
+#define NVE4_COMPUTE_UPLOAD_QUERY_ADDRESS_HIGH 0x000001dc
+
+#define NVE4_COMPUTE_UPLOAD_QUERY_ADDRESS_LOW 0x000001e0
+
+#define NVE4_COMPUTE_UPLOAD_QUERY_SEQUENCE 0x000001e4
+
+#define NVE4_COMPUTE_UPLOAD_UNK01F0 0x000001f0
+
+#define NVE4_COMPUTE_UPLOAD_UNK01F4 0x000001f4
+
+#define NVE4_COMPUTE_UPLOAD_UNK01F8 0x000001f8
+
+#define NVE4_COMPUTE_UPLOAD_UNK01FC 0x000001fc
+
+#define NVE4_COMPUTE_SHARED_BASE 0x00000214
+
+#define NVE4_COMPUTE_MEM_BARRIER 0x0000021c
+#define NVE4_COMPUTE_MEM_BARRIER_UNK0__MASK 0x00000007
+#define NVE4_COMPUTE_MEM_BARRIER_UNK0__SHIFT 0
+#define NVE4_COMPUTE_MEM_BARRIER_UNK4 0x00000010
+#define NVE4_COMPUTE_MEM_BARRIER_UNK12 0x00001000
+
+#define NVE4_COMPUTE_UNK0240 0x00000240
+
+#define NVE4_COMPUTE_UNK244_TIC_FLUSH 0x00000244
+
+#define NVE4_COMPUTE_UNK0248 0x00000248
+#define NVE4_COMPUTE_UNK0248_UNK0__MASK 0x0000003f
+#define NVE4_COMPUTE_UNK0248_UNK0__SHIFT 0
+#define NVE4_COMPUTE_UNK0248_UNK8__MASK 0x00ffff00
+#define NVE4_COMPUTE_UNK0248_UNK8__SHIFT 8
+
+#define NVE4_COMPUTE_UNK0274 0x00000274
+
+#define NVE4_COMPUTE_UNK0278 0x00000278
+
+#define NVE4_COMPUTE_UNK027C 0x0000027c
+
+#define NVE4_COMPUTE_UNK0280 0x00000280
+
+#define NVE4_COMPUTE_UNK0284 0x00000284
+
+#define NVE4_COMPUTE_UNK0288 0x00000288
+
+#define NVE4_COMPUTE_UNK0290 0x00000290
+
+#define NVE4_COMPUTE_UNK02B0 0x000002b0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS 0x000002b4
+#define NVE4_COMPUTE_LAUNCH_DESC_ADDRESS__SHR 8
+
+#define NVE4_COMPUTE_UNK02B8 0x000002b8
+
+#define NVE4_COMPUTE_LAUNCH 0x000002bc
+
+#define NVE4_COMPUTE_MP_TEMP_SIZE(i0) (0x000002e4 + 0xc*(i0))
+#define NVE4_COMPUTE_MP_TEMP_SIZE__ESIZE 0x0000000c
+#define NVE4_COMPUTE_MP_TEMP_SIZE__LEN 0x00000002
+
+#define NVE4_COMPUTE_MP_TEMP_SIZE_HIGH(i0) (0x000002e4 + 0xc*(i0))
+
+#define NVE4_COMPUTE_MP_TEMP_SIZE_LOW(i0) (0x000002e8 + 0xc*(i0))
+
+#define NVE4_COMPUTE_MP_TEMP_SIZE_MASK(i0) (0x000002ec + 0xc*(i0))
+
+#define NVE4_COMPUTE_UNK0310 0x00000310
+
+#define NVE4_COMPUTE_FIRMWARE(i0) (0x00000500 + 0x4*(i0))
+#define NVE4_COMPUTE_FIRMWARE__ESIZE 0x00000004
+#define NVE4_COMPUTE_FIRMWARE__LEN 0x00000020
+
+#define NVE4_COMPUTE_LOCAL_BASE 0x0000077c
+
+#define NVE4_COMPUTE_TEMP_ADDRESS_HIGH 0x00000790
+
+#define NVE4_COMPUTE_TEMP_ADDRESS_LOW 0x00000794
+
+#define NVE4_COMPUTE_UNK0D94 0x00000d94
+
+#define NVE4_COMPUTE_WATCHDOG_TIMER 0x00000de4
+
+#define NVE4_COMPUTE_UNK0F44(i0) (0x00000f44 + 0x4*(i0))
+#define NVE4_COMPUTE_UNK0F44__ESIZE 0x00000004
+#define NVE4_COMPUTE_UNK0F44__LEN 0x00000004
+
+#define NVE4_COMPUTE_UNK1040(i0) (0x00001040 + 0x4*(i0))
+#define NVE4_COMPUTE_UNK1040__ESIZE 0x00000004
+#define NVE4_COMPUTE_UNK1040__LEN 0x0000000c
+
+#define NVE4_COMPUTE_UNK1288_TIC_FLUSH 0x00001288
+
+#define NVE4_COMPUTE_TSC_FLUSH 0x00001330
+#define NVE4_COMPUTE_TSC_FLUSH_SPECIFIC 0x00000001
+#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVE4_COMPUTE_TSC_FLUSH_ENTRY__SHIFT 4
+
+#define NVE4_COMPUTE_TIC_FLUSH 0x00001334
+#define NVE4_COMPUTE_TIC_FLUSH_SPECIFIC 0x00000001
+#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__MASK 0x03fffff0
+#define NVE4_COMPUTE_TIC_FLUSH_ENTRY__SHIFT 4
+
+#define NVE4_COMPUTE_TEX_CACHE_CTL 0x00001338
+#define NVE4_COMPUTE_TEX_CACHE_CTL_UNK0 0x00000001
+#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__MASK 0x03fffff0
+#define NVE4_COMPUTE_TEX_CACHE_CTL_ENTRY__SHIFT 4
+
+#define NVE4_COMPUTE_UNK1424_TSC_FLUSH 0x00001424
+
+#define NVE4_COMPUTE_COND_ADDRESS_HIGH 0x00001550
+
+#define NVE4_COMPUTE_COND_ADDRESS_LOW 0x00001554
+
+#define NVE4_COMPUTE_COND_MODE 0x00001558
+#define NVE4_COMPUTE_COND_MODE_NEVER 0x00000000
+#define NVE4_COMPUTE_COND_MODE_ALWAYS 0x00000001
+#define NVE4_COMPUTE_COND_MODE_RES_NON_ZERO 0x00000002
+#define NVE4_COMPUTE_COND_MODE_EQUAL 0x00000003
+#define NVE4_COMPUTE_COND_MODE_NOT_EQUAL 0x00000004
+
+#define NVE4_COMPUTE_TSC_ADDRESS_HIGH 0x0000155c
+
+#define NVE4_COMPUTE_TSC_ADDRESS_LOW 0x00001560
+
+#define NVE4_COMPUTE_TSC_LIMIT 0x00001564
+
+#define NVE4_COMPUTE_TIC_ADDRESS_HIGH 0x00001574
+
+#define NVE4_COMPUTE_TIC_ADDRESS_LOW 0x00001578
+
+#define NVE4_COMPUTE_TIC_LIMIT 0x0000157c
+
+#define NVE4_COMPUTE_CODE_ADDRESS_HIGH 0x00001608
+
+#define NVE4_COMPUTE_CODE_ADDRESS_LOW 0x0000160c
+
+#define NVE4_COMPUTE_UNK1690 0x00001690
+
+#define NVE4_COMPUTE_FLUSH 0x00001698
+#define NVE4_COMPUTE_FLUSH_CODE 0x00000001
+#define NVE4_COMPUTE_FLUSH_GLOBAL 0x00000010
+#define NVE4_COMPUTE_FLUSH_CB 0x00001000
+
+#define NVE4_COMPUTE_UNK1944 0x00001944
+
+#define NVE4_COMPUTE_DELAY 0x00001a24
+
+#define NVE4_COMPUTE_UNK1A2C(i0) (0x00001a2c + 0x4*(i0))
+#define NVE4_COMPUTE_UNK1A2C__ESIZE 0x00000004
+#define NVE4_COMPUTE_UNK1A2C__LEN 0x00000005
+
+#define NVE4_COMPUTE_QUERY_ADDRESS_HIGH 0x00001b00
+
+#define NVE4_COMPUTE_QUERY_ADDRESS_LOW 0x00001b04
+
+#define NVE4_COMPUTE_QUERY_SEQUENCE 0x00001b08
+
+#define NVE4_COMPUTE_QUERY_GET 0x00001b0c
+#define NVE4_COMPUTE_QUERY_GET_MODE__MASK 0x00000003
+#define NVE4_COMPUTE_QUERY_GET_MODE__SHIFT 0
+#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE 0x00000000
+#define NVE4_COMPUTE_QUERY_GET_MODE_WRITE_INTR_NRHOST 0x00000003
+#define NVE4_COMPUTE_QUERY_GET_INTR 0x00100000
+#define NVE4_COMPUTE_QUERY_GET_SHORT 0x10000000
+
+#define NVE4_COMPUTE_TEX_CB_INDEX 0x00002608
+
+#define NVE4_COMPUTE_UNK260C 0x0000260c
+
+#define NVE4_COMPUTE_MP_PM_SET(i0) (0x0000335c + 0x4*(i0))
+#define NVE4_COMPUTE_MP_PM_SET__ESIZE 0x00000004
+#define NVE4_COMPUTE_MP_PM_SET__LEN 0x00000008
+
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL(i0) (0x0000337c + 0x4*(i0))
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL__ESIZE 0x00000004
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL__LEN 0x00000004
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_NONE 0x00000000
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_USER 0x00000001
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_LAUNCH 0x00000003
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_EXEC 0x00000004
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_ISSUE 0x00000005
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_LDST 0x0000001b
+#define NVE4_COMPUTE_MP_PM_A_SIGSEL_BRANCH 0x0000001c
+
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL(i0) (0x0000338c + 0x4*(i0))
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL__ESIZE 0x00000004
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL__LEN 0x00000004
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_NONE 0x00000000
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_WARP 0x00000002
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_REPLAY 0x00000008
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_TRANSACTION 0x0000000e
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_L1 0x00000010
+#define NVE4_COMPUTE_MP_PM_B_SIGSEL_MEM 0x00000011
+
+#define NVE4_COMPUTE_MP_PM_SRCSEL(i0) (0x0000339c + 0x4*(i0))
+#define NVE4_COMPUTE_MP_PM_SRCSEL__ESIZE 0x00000004
+#define NVE4_COMPUTE_MP_PM_SRCSEL__LEN 0x00000008
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP0__MASK 0x00000003
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP0__SHIFT 0
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG0__MASK 0x0000001c
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG0__SHIFT 2
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP1__MASK 0x00000060
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP1__SHIFT 5
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG1__MASK 0x00000380
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG1__SHIFT 7
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP2__MASK 0x00000c00
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP2__SHIFT 10
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG2__MASK 0x00007000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG2__SHIFT 12
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP3__MASK 0x00018000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP3__SHIFT 15
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG3__MASK 0x000e0000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG3__SHIFT 17
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP4__MASK 0x00300000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP4__SHIFT 20
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG4__MASK 0x01c00000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG4__SHIFT 22
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP5__MASK 0x06000000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_GRP5__SHIFT 25
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG5__MASK 0x38000000
+#define NVE4_COMPUTE_MP_PM_SRCSEL_SIG5__SHIFT 27
+
+#define NVE4_COMPUTE_MP_PM_FUNC(i0) (0x000033bc + 0x4*(i0))
+#define NVE4_COMPUTE_MP_PM_FUNC__ESIZE 0x00000004
+#define NVE4_COMPUTE_MP_PM_FUNC__LEN 0x00000008
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE__MASK 0x0000000f
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE__SHIFT 0
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP 0x00000000
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_PULSE 0x00000001
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_B6 0x00000002
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK3 0x00000003
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_B6 0x00000004
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_LOGOP_B6_PULSE 0x00000005
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK6 0x00000006
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK7 0x00000007
+#define NVE4_COMPUTE_MP_PM_FUNC_MODE_UNK8 0x00000008
+#define NVE4_COMPUTE_MP_PM_FUNC_FUNC__MASK 0x000ffff0
+#define NVE4_COMPUTE_MP_PM_FUNC_FUNC__SHIFT 4
+
+#define NVE4_COMPUTE_MP_PM_UNK33DC 0x000033dc
+
+#define NVE4_COMPUTE_LAUNCH_DESC__SIZE 0x00000100
+#define NVE4_COMPUTE_LAUNCH_DESC_6 0x00000018
+#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__MASK 0x00000c00
+#define NVE4_COMPUTE_LAUNCH_DESC_6_NOTIFY__SHIFT 10
+
+#define NVE4_COMPUTE_LAUNCH_DESC_PROG_START 0x00000020
+
+#define NVE4_COMPUTE_LAUNCH_DESC_12 0x00000030
+#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__MASK 0x7fffffff
+#define NVE4_COMPUTE_LAUNCH_DESC_12_GRIDDIM_X__SHIFT 0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ 0x00000034
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__MASK 0x0000ffff
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Y__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__MASK 0xffff0000
+#define NVE4_COMPUTE_LAUNCH_DESC_GRIDDIM_YZ_Z__SHIFT 16
+
+#define NVE4_COMPUTE_LAUNCH_DESC_17 0x00000044
+#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__MASK 0x0000ffff
+#define NVE4_COMPUTE_LAUNCH_DESC_17_SHARED_ALLOC__SHIFT 0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_18 0x00000048
+#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__MASK 0xffff0000
+#define NVE4_COMPUTE_LAUNCH_DESC_18_BLOCKDIM_X__SHIFT 16
+
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ 0x0000004c
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__MASK 0x0000ffff
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Y__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__MASK 0xffff0000
+#define NVE4_COMPUTE_LAUNCH_DESC_BLOCKDIM_YZ_Z__SHIFT 16
+
+#define NVE4_COMPUTE_LAUNCH_DESC_20 0x00000050
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__MASK 0x000000ff
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CB_VALID__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__MASK 0x60000000
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT__SHIFT 29
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_16K_SHARED_48K_L1 0x20000000
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_32K_SHARED_32K_L1 0x40000000
+#define NVE4_COMPUTE_LAUNCH_DESC_20_CACHE_SPLIT_48K_SHARED_16K_L1 0x60000000
+
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0(i0) (0x00000074 + 0x8*(i0))
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__ESIZE 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0__LEN 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__MASK 0xffffffff
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_0_ADDRESS_LOW__SHIFT 0
+
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1(i0) (0x00000078 + 0x8*(i0))
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__ESIZE 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1__LEN 0x00000008
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__MASK 0x000000ff
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_ADDRESS_HIGH__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__MASK 0xffff8000
+#define NVE4_COMPUTE_LAUNCH_DESC_CB_CONFIG_1_SIZE__SHIFT 15
+
+#define NVE4_COMPUTE_LAUNCH_DESC_45 0x000000b4
+#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__MASK 0x000fffff
+#define NVE4_COMPUTE_LAUNCH_DESC_45_LOCAL_POS_ALLOC__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__MASK 0xf8000000
+#define NVE4_COMPUTE_LAUNCH_DESC_45_BARRIER_ALLOC__SHIFT 27
+
+#define NVE4_COMPUTE_LAUNCH_DESC_46 0x000000b8
+#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__MASK 0x000fffff
+#define NVE4_COMPUTE_LAUNCH_DESC_46_LOCAL_NEG_ALLOC__SHIFT 0
+#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__MASK 0x3f000000
+#define NVE4_COMPUTE_LAUNCH_DESC_46_GPR_ALLOC__SHIFT 24
+
+#define NVE4_COMPUTE_LAUNCH_DESC_47 0x000000bc
+#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__MASK 0x000fffff
+#define NVE4_COMPUTE_LAUNCH_DESC_47_WARP_CSTACK_SIZE__SHIFT 0
+
+
+#endif /* NVE4_COMPUTE_XML */
diff --git a/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h b/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
new file mode 100644
index 0000000..68a742f
--- /dev/null
+++ b/src/gallium/drivers/nouveau/nvc0/nve4_p2mf.xml.h
@@ -0,0 +1,107 @@
+#ifndef RNNDB_NVE4_P2MF_XML
+#define RNNDB_NVE4_P2MF_XML
+
+/* Autogenerated file, DO NOT EDIT manually!
+
+This file was generated by the rules-ng-ng headergen tool in this git repository:
+http://0x04.net/cgit/index.cgi/rules-ng-ng
+git clone git://0x04.net/rules-ng-ng
+
+The rules-ng-ng source files this header was generated from are:
+- rnndb/nve4_p2mf.xml ( 1400 bytes, from 2012-04-14 21:29:11)
+- ./rnndb/copyright.xml ( 6452 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv_object.xml ( 12736 bytes, from 2012-04-14 21:30:24)
+- ./rnndb/nvchipsets.xml ( 3701 bytes, from 2012-03-22 20:40:59)
+- ./rnndb/nv_defs.xml ( 4437 bytes, from 2011-08-11 18:25:12)
+- ./rnndb/nv50_defs.xml ( 5468 bytes, from 2011-08-11 18:25:12)
+
+Copyright (C) 2006-2012 by the following authors:
+- Artur Huillet <arthur.huillet@free.fr> (ahuillet)
+- Ben Skeggs (darktama, darktama_)
+- B. R. <koala_br@users.sourceforge.net> (koala_br)
+- Carlos Martin <carlosmn@users.sf.net> (carlosmn)
+- Christoph Bumiller <e0425955@student.tuwien.ac.at> (calim, chrisbmr)
+- Dawid Gajownik <gajownik@users.sf.net> (gajownik)
+- Dmitry Baryshkov
+- Dmitry Eremin-Solenikov <lumag@users.sf.net> (lumag)
+- EdB <edb_@users.sf.net> (edb_)
+- Erik Waling <erikwailing@users.sf.net> (erikwaling)
+- Francisco Jerez <currojerez@riseup.net> (curro)
+- imirkin <imirkin@users.sf.net> (imirkin)
+- jb17bsome <jb17bsome@bellsouth.net> (jb17bsome)
+- Jeremy Kolb <kjeremy@users.sf.net> (kjeremy)
+- Laurent Carlier <lordheavym@gmail.com> (lordheavy)
+- Luca Barbieri <luca@luca-barbieri.com> (lb, lb1)
+- Maarten Maathuis <madman2003@gmail.com> (stillunknown)
+- Marcin Kościelnicki <koriakin@0x04.net> (mwk, koriakin)
+- Mark Carey <mark.carey@gmail.com> (careym)
+- Matthieu Castet <matthieu.castet@parrot.com> (mat-c)
+- nvidiaman <nvidiaman@users.sf.net> (nvidiaman)
+- Patrice Mandin <patmandin@gmail.com> (pmandin, pmdata)
+- Pekka Paalanen <pq@iki.fi> (pq, ppaalanen)
+- Peter Popov <ironpeter@users.sf.net> (ironpeter)
+- Richard Hughes <hughsient@users.sf.net> (hughsient)
+- Rudi Cilibrasi <cilibrar@users.sf.net> (cilibrar)
+- Serge Martin
+- Simon Raffeiner
+- Stephane Loeuillet <leroutier@users.sf.net> (leroutier)
+- Stephane Marchesin <stephane.marchesin@gmail.com> (marcheu)
+- sturmflut <sturmflut@users.sf.net> (sturmflut)
+- Sylvain Munaut <tnt@246tNt.com>
+- Victor Stinner <victor.stinner@haypocalc.com> (haypo)
+- Wladmir van der Laan <laanwj@gmail.com> (miathan6)
+- Younes Manton <younes.m@gmail.com> (ymanton)
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice (including the
+next paragraph) shall be included in all copies or substantial
+portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+*/
+
+
+
+#define NVE4_P2MF_LINE_LENGTH_IN 0x00000180
+
+#define NVE4_P2MF_LINE_COUNT 0x00000184
+
+#define NVE4_P2MF_DST_ADDRESS_HIGH 0x00000188
+
+#define NVE4_P2MF_DST_ADDRESS_LOW 0x0000018c
+
+#define NVE4_P2MF_DST_TILE_MODE 0x00000194
+
+#define NVE4_P2MF_DST_PITCH 0x00000198
+
+#define NVE4_P2MF_DST_HEIGHT 0x0000019c
+
+#define NVE4_P2MF_DST_DEPTH 0x000001a0
+
+#define NVE4_P2MF_DST_Z 0x000001a4
+
+#define NVE4_P2MF_DST_X 0x000001a8
+
+#define NVE4_P2MF_DST_Y 0x000001ac
+
+#define NVE4_P2MF_EXEC 0x000001b0
+#define NVE4_P2MF_EXEC_LINEAR 0x00000001
+#define NVE4_P2MF_EXEC_UNK12 0x00001000
+
+#define NVE4_P2MF_DATA 0x000001b4
+
+
+#endif /* RNNDB_NVE4_P2MF_XML */