diff --git a/.clang-format b/.clang-format
index 56804459cbc..21973f605e3 100644
--- a/.clang-format
+++ b/.clang-format
@@ -71,8 +71,8 @@ IndentWidth:     4
 AccessModifierOffset: -4
 IndentWrappedFunctionNames: false
 KeepEmptyLinesAtTheStartOfBlocks: false
-MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ?
-MacroBlockEnd:   '.*_END$'
+#MacroBlockBegin: '.*_BEGIN$' # only PREC_BEGIN ?
+#MacroBlockEnd:   '.*_END$'
 MaxEmptyLinesToKeep: 2
 #PenaltyBreakBeforeFirstCallParameter: 19
 #PenaltyBreakComment: 300
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index e226945185a..b7a61eb8517 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -67,7 +67,7 @@ jobs:
           build_param:
           artifact_name: xemu-win-release
     env:
-      DOCKER_IMAGE_NAME: ghcr.io/xemu-project/xemu-win64-toolchain:sha-8152913
+      DOCKER_IMAGE_NAME: ghcr.io/xemu-project/xemu-win64-toolchain:sha-c6bad86
 
     steps:
     - name: Download source package
diff --git a/.gitmodules b/.gitmodules
index 4118661130c..420d7d9cd2d 100644
--- a/.gitmodules
+++ b/.gitmodules
@@ -82,9 +82,18 @@
 [submodule "tomlplusplus"]
 	path = tomlplusplus
 	url = https://github.com/marzer/tomlplusplus
-[submodule "hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"]
-	path = hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu
+[submodule "hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu"]
+	path = hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu
 	url = https://github.com/abaire/nv2a_vsh_cpu.git
 [submodule "ui/thirdparty/httplib"]
 	path = ui/thirdparty/httplib
 	url = https://github.com/yhirose/cpp-httplib
+[submodule "hw/xbox/nv2a/pgraph/vk/thirdparty/VulkanMemoryAllocator"]
+	path = thirdparty/VulkanMemoryAllocator
+	url = https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator
+[submodule "thirdparty/volk"]
+	path = thirdparty/volk
+	url = https://github.com/zeux/volk
+[submodule "thirdparty/SPIRV-Reflect"]
+	path = thirdparty/SPIRV-Reflect
+	url = https://github.com/KhronosGroup/SPIRV-Reflect
diff --git a/config_spec.yml b/config_spec.yml
index b858606e685..087d255faef 100644
--- a/config_spec.yml
+++ b/config_spec.yml
@@ -130,6 +130,14 @@ input:
       default: 18 # w
 
 display:
+  renderer:
+    type: enum
+    values: ["NULL", OPENGL, VULKAN]
+    default: OPENGL
+  vulkan:
+    validation_layers: bool
+    debug_shaders: bool
+    assert_on_validation_msg: bool
   quality:
     surface_scale:
       type: integer
diff --git a/configure b/configure
index 761cd5e4313..5de197723de 100755
--- a/configure
+++ b/configure
@@ -237,7 +237,7 @@ else
     git_submodules_action="ignore"
 fi
 
-git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"
+git_submodules="ui/keycodemapdb ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect"
 git="git"
 
 # Don't accept a target_list environment variable.
diff --git a/debian/control b/debian/control
index 91ed61433f4..0792bdd71d9 100644
--- a/debian/control
+++ b/debian/control
@@ -16,6 +16,7 @@ Build-Depends: debhelper (>= 11),
  libssl-dev,
  libpcap-dev,
  libslirp-dev,
+ libvulkan-dev,
 Standards-Version: 3.9.8
 Homepage: https://xemu.app
 XS-Debian-Vcs-Browser: https://github.com/mborgerson/xemu
diff --git a/debian/rules b/debian/rules
index 25a0f7b9d08..184c3a848c4 100755
--- a/debian/rules
+++ b/debian/rules
@@ -63,7 +63,7 @@ override_dh_auto_configure:
 
 override_dh_auto_build:
 	./build.sh ${XEMU_BUILD_OPTIONS} ${common_configure_opts} || \
-	{ echo ===== BUILD FAILED ===; tail -n 50 config.log; exit 1; }
+	{ echo ===== BUILD FAILED ===; cat build/meson-logs/meson-log.txt; exit 1; }
 	cp debian/copyright debian/qemu.deb.copyright
 	cp dist/LICENSE.txt debian/copyright
 
diff --git a/hw/xbox/nv2a/debug.h b/hw/xbox/nv2a/debug.h
index 0c2c3d5f769..a843df3259a 100644
--- a/hw/xbox/nv2a/debug.h
+++ b/hw/xbox/nv2a/debug.h
@@ -1,8 +1,9 @@
 /*
- * QEMU Geforce NV2A debug helpers
+ * QEMU Geforce NV2A profiling and debug helpers
  *
- * Copyright (c) 2015 Jannik Vogel
  * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2023 Matt Borgerson
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Lesser General Public
@@ -18,8 +19,8 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef HW_NV2A_DEBUG_H
-#define HW_NV2A_DEBUG_H
+#ifndef HW_XBOX_NV2A_DEBUG_H
+#define HW_XBOX_NV2A_DEBUG_H
 
 #include <stdint.h>
 
@@ -36,54 +37,6 @@
 # define NV2A_DPRINTF(format, ...)       do { } while (0)
 #endif
 
-// #define DEBUG_NV2A_GL
-#ifdef DEBUG_NV2A_GL
-
-#include <stdbool.h>
-#include "gl/gloffscreen.h"
-#include "config-host.h"
-
-void gl_debug_initialize(void);
-void gl_debug_message(bool cc, const char *fmt, ...);
-void gl_debug_group_begin(const char *fmt, ...);
-void gl_debug_group_end(void);
-void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...);
-void gl_debug_frame_terminator(void);
-
-# define NV2A_GL_DPRINTF(cc, format, ...) \
-    gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__)
-# define NV2A_GL_DGROUP_BEGIN(format, ...) \
-    gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__)
-# define NV2A_GL_DGROUP_END() \
-    gl_debug_group_end()
-# define NV2A_GL_DLABEL(target, name, format, ...)  \
-    gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__)
-#define NV2A_GL_DFRAME_TERMINATOR() \
-    gl_debug_frame_terminator()
-
-#ifdef __cplusplus
-extern "C" {
-#endif
-
-#ifdef CONFIG_RENDERDOC
-bool nv2a_dbg_renderdoc_available(void);
-void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
-#endif
-
-#ifdef __cplusplus
-}
-#endif
-
-#else
-# define NV2A_GL_DPRINTF(cc, format, ...)          do { \
-        if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \
-    } while (0)
-# define NV2A_GL_DGROUP_BEGIN(format, ...)         do { } while (0)
-# define NV2A_GL_DGROUP_END()                      do { } while (0)
-# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0)
-# define NV2A_GL_DFRAME_TERMINATOR()               do { } while (0)
-#endif
-
 /* Debug prints to identify when unimplemented or unconfirmed features
  * are being exercised. These cases likely result in graphical problems of
  * varying degree, but should otherwise not crash the system. Enable this
@@ -111,6 +64,22 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
 #endif
 
 #define NV2A_PROF_COUNTERS_XMAC \
+    _X(NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY) \
+    _X(NV2A_PROF_FINISH_SURFACE_CREATE) \
+    _X(NV2A_PROF_FINISH_SURFACE_DOWN) \
+    _X(NV2A_PROF_FINISH_NEED_BUFFER_SPACE) \
+    _X(NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY) \
+    _X(NV2A_PROF_FINISH_PRESENTING) \
+    _X(NV2A_PROF_FINISH_FLIP_STALL) \
+    _X(NV2A_PROF_FINISH_FLUSH) \
+    _X(NV2A_PROF_FINISH_STALLED) \
+    _X(NV2A_PROF_CLEAR) \
+    _X(NV2A_PROF_QUEUE_SUBMIT) \
+    _X(NV2A_PROF_QUEUE_SUBMIT_AUX) \
+    _X(NV2A_PROF_PIPELINE_NOTDIRTY) \
+    _X(NV2A_PROF_PIPELINE_GEN) \
+    _X(NV2A_PROF_PIPELINE_BIND) \
+    _X(NV2A_PROF_PIPELINE_RENDERPASSES) \
     _X(NV2A_PROF_BEGIN_ENDS) \
     _X(NV2A_PROF_DRAW_ARRAYS) \
     _X(NV2A_PROF_INLINE_BUFFERS) \
@@ -120,18 +89,26 @@ void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames);
     _X(NV2A_PROF_SHADER_GEN) \
     _X(NV2A_PROF_SHADER_BIND) \
     _X(NV2A_PROF_SHADER_BIND_NOTDIRTY) \
+    _X(NV2A_PROF_SHADER_UBO_DIRTY) \
+    _X(NV2A_PROF_SHADER_UBO_NOTDIRTY) \
     _X(NV2A_PROF_ATTR_BIND) \
     _X(NV2A_PROF_TEX_UPLOAD) \
-    _X(NV2A_PROF_TEX_BIND) \
     _X(NV2A_PROF_GEOM_BUFFER_UPDATE_1) \
     _X(NV2A_PROF_GEOM_BUFFER_UPDATE_2) \
     _X(NV2A_PROF_GEOM_BUFFER_UPDATE_3) \
     _X(NV2A_PROF_GEOM_BUFFER_UPDATE_4) \
     _X(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY) \
+    _X(NV2A_PROF_SURF_SWIZZLE) \
+    _X(NV2A_PROF_SURF_CREATE) \
     _X(NV2A_PROF_SURF_DOWNLOAD) \
     _X(NV2A_PROF_SURF_UPLOAD) \
     _X(NV2A_PROF_SURF_TO_TEX) \
     _X(NV2A_PROF_SURF_TO_TEX_FALLBACK) \
+    _X(NV2A_PROF_QUEUE_SUBMIT_1) \
+    _X(NV2A_PROF_QUEUE_SUBMIT_2) \
+    _X(NV2A_PROF_QUEUE_SUBMIT_3) \
+    _X(NV2A_PROF_QUEUE_SUBMIT_4) \
+    _X(NV2A_PROF_QUEUE_SUBMIT_5) \
 
 enum NV2A_PROF_COUNTERS_ENUM {
     #define _X(x) x,
@@ -161,6 +138,21 @@ extern NV2AStats g_nv2a_stats;
 
 const char *nv2a_profile_get_counter_name(unsigned int cnt);
 int nv2a_profile_get_counter_value(unsigned int cnt);
+void nv2a_profile_increment(void);
+void nv2a_profile_flip_stall(void);
+
+static inline void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt)
+{
+    g_nv2a_stats.frame_working.counters[cnt] += 1;
+}
+
+#ifdef CONFIG_RENDERDOC
+void nv2a_dbg_renderdoc_init(void);
+void *nv2a_dbg_renderdoc_get_api(void);
+bool nv2a_dbg_renderdoc_available(void);
+void nv2a_dbg_renderdoc_capture_frames(int num_frames);
+extern int renderdoc_capture_frames;
+#endif
 
 #ifdef __cplusplus
 }
diff --git a/hw/xbox/nv2a/gl/meson.build b/hw/xbox/nv2a/gl/meson.build
deleted file mode 100644
index 973a9aa8c11..00000000000
--- a/hw/xbox/nv2a/gl/meson.build
+++ /dev/null
@@ -1,6 +0,0 @@
-softmmu_ss.add([sdl, files(
-	'gloffscreen_common.c',
-	'gloffscreen_sdl.c',
-	)])
-
-# gloffscreen_sdl.o-cflags := $(SDL_CFLAGS)
diff --git a/hw/xbox/nv2a/meson.build b/hw/xbox/nv2a/meson.build
index d3b159a3bc8..29eff86e273 100644
--- a/hw/xbox/nv2a/meson.build
+++ b/hw/xbox/nv2a/meson.build
@@ -1,27 +1,17 @@
 specific_ss.add(files(
 	'nv2a.c',
-	'debug.c',
 	'pbus.c',
 	'pcrtc.c',
 	'pfb.c',
 	'pfifo.c',
-	'pgraph.c',
 	'pmc.c',
 	'pramdac.c',
 	'prmcio.c',
 	'prmdio.c',
 	'prmvio.c',
-	'psh.c',
 	'ptimer.c',
 	'pvideo.c',
-	'shaders.c',
 	'stubs.c',
 	'user.c',
-	'vsh.c',
-	'swizzle.c',
-	's3tc.c',
 	))
-subdir('gl')
-
-subdir('thirdparty')
-specific_ss.add(nv2a_vsh_cpu)
+subdir('pgraph')
diff --git a/hw/xbox/nv2a/nv2a.c b/hw/xbox/nv2a/nv2a.c
index e068f76dc91..fd1bcbaba6c 100644
--- a/hw/xbox/nv2a/nv2a.c
+++ b/hw/xbox/nv2a/nv2a.c
@@ -172,6 +172,16 @@ static void nv2a_get_offsets(VGACommonState *s,
     *pline_compare = line_compare;
 }
 
+const uint8_t *nv2a_get_dac_palette(void)
+{
+    return g_nv2a->puserdac.palette;
+}
+
+int nv2a_get_screen_off(void)
+{
+    return g_nv2a->vga.sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_SCREEN_OFF;
+}
+
 static void nv2a_vga_gfx_update(void *opaque)
 {
     VGACommonState *vga = opaque;
@@ -277,7 +287,7 @@ static void nv2a_reset(NV2AState *d)
     }
 
     memset(d->pfifo.regs, 0, sizeof(d->pfifo.regs));
-    memset(d->pgraph.regs, 0, sizeof(d->pgraph.regs));
+    memset(d->pgraph.regs_, 0, sizeof(d->pgraph.regs_));
     memset(d->pvideo.regs, 0, sizeof(d->pvideo.regs));
 
     d->pcrtc.start = 0;
@@ -365,11 +375,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state)
     if (state == RUN_STATE_SAVE_VM) {
         nv2a_lock_fifo(d);
         qatomic_set(&d->pfifo.halt, true);
-        qatomic_set(&d->pgraph.download_dirty_surfaces_pending, true);
-        qemu_event_reset(&d->pgraph.dirty_surfaces_download_complete);
+        pgraph_pre_savevm_trigger(d);
         nv2a_unlock_fifo(d);
         qemu_mutex_unlock_iothread();
-        qemu_event_wait(&d->pgraph.dirty_surfaces_download_complete);
+        pgraph_pre_savevm_wait(d);
         qemu_mutex_lock_iothread();
         nv2a_lock_fifo(d);
     } else if (state == RUN_STATE_RESTORE_VM) {
@@ -382,11 +391,10 @@ static void nv2a_vm_state_change(void *opaque, bool running, RunState state)
         nv2a_unlock_fifo(d);
     } else if (state == RUN_STATE_SHUTDOWN) {
         nv2a_lock_fifo(d);
-        qatomic_set(&d->pgraph.shader_cache_writeback_pending, true);
-        qemu_event_reset(&d->pgraph.shader_cache_writeback_complete);
+        pgraph_pre_shutdown_trigger(d);
         nv2a_unlock_fifo(d);
         qemu_mutex_unlock_iothread();
-        qemu_event_wait(&d->pgraph.shader_cache_writeback_complete);
+        pgraph_pre_shutdown_wait(d);
         qemu_mutex_lock_iothread();
     }
 }
@@ -515,9 +523,9 @@ static const VMStateDescription vmstate_nv2a = {
         VMSTATE_UINT32(pgraph.inline_buffer_length, NV2AState), // fixme
         VMSTATE_UINT32(pgraph.draw_arrays_length, NV2AState),
         VMSTATE_UINT32(pgraph.draw_arrays_max_count, NV2AState),
-        VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_start, NV2AState, 1250),
-        VMSTATE_INT32_ARRAY(pgraph.gl_draw_arrays_count, NV2AState, 1250),
-        VMSTATE_UINT32_ARRAY(pgraph.regs, NV2AState, 0x2000),
+        VMSTATE_INT32_ARRAY(pgraph.draw_arrays_start, NV2AState, 1250),
+        VMSTATE_INT32_ARRAY(pgraph.draw_arrays_count, NV2AState, 1250),
+        VMSTATE_UINT32_ARRAY(pgraph.regs_, NV2AState, 0x2000),
         VMSTATE_UINT32(pmc.pending_interrupts, NV2AState),
         VMSTATE_UINT32(pmc.enabled_interrupts, NV2AState),
         VMSTATE_UINT32(pfifo.pending_interrupts, NV2AState),
diff --git a/hw/xbox/nv2a/nv2a.h b/hw/xbox/nv2a/nv2a.h
index 35b63749e46..2a9b7312d4b 100644
--- a/hw/xbox/nv2a/nv2a.h
+++ b/hw/xbox/nv2a/nv2a.h
@@ -22,8 +22,9 @@
 #define HW_NV2A_H
 
 void nv2a_init(PCIBus *bus, int devfn, MemoryRegion *ram);
-void nv2a_gl_context_init(void);
+void nv2a_context_init(void);
 int nv2a_get_framebuffer_surface(void);
+void nv2a_release_framebuffer_surface(void);
 void nv2a_set_surface_scale_factor(unsigned int scale);
 unsigned int nv2a_get_surface_scale_factor(void);
 const uint8_t *nv2a_get_dac_palette(void);
diff --git a/hw/xbox/nv2a/nv2a_int.h b/hw/xbox/nv2a/nv2a_int.h
index 31ab6d89ca6..9b0189ebc8a 100644
--- a/hw/xbox/nv2a/nv2a_int.h
+++ b/hw/xbox/nv2a/nv2a_int.h
@@ -44,25 +44,12 @@
 #include "cpu.h"
 
 #include "trace.h"
-#include "swizzle.h"
-#include "lru.h"
-#include "gl/gloffscreen.h"
 
 #include "nv2a.h"
+#include "pgraph/pgraph.h"
 #include "debug.h"
-#include "shaders.h"
 #include "nv2a_regs.h"
 
-#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask))
-
-#define SET_MASK(v, mask, val)                            \
-    ({                                                    \
-        const unsigned int __val = (val);                 \
-        const unsigned int __mask = (mask);               \
-        (v) &= ~(__mask);                                 \
-        (v) |= ((__val) << ctz32(__mask)) & (__mask);     \
-    })
-
 #define NV2A_DEVICE(obj) OBJECT_CHECK(NV2AState, (obj), "nv2a")
 
 enum FIFOEngine {
@@ -78,347 +65,6 @@ typedef struct DMAObject {
     hwaddr limit;
 } DMAObject;
 
-typedef struct VertexAttribute {
-    bool dma_select;
-    hwaddr offset;
-
-    /* inline arrays are packed in order?
-     * Need to pass the offset to converted attributes */
-    unsigned int inline_array_offset;
-
-    float inline_value[4];
-
-    unsigned int format;
-    unsigned int size; /* size of the data type */
-    unsigned int count; /* number of components */
-    uint32_t stride;
-
-    bool needs_conversion;
-
-    float *inline_buffer;
-    bool inline_buffer_populated;
-
-    GLint gl_count;
-    GLenum gl_type;
-    GLboolean gl_normalize;
-
-    GLuint gl_inline_buffer;
-} VertexAttribute;
-
-typedef struct SurfaceFormatInfo {
-    unsigned int bytes_per_pixel;
-    GLint gl_internal_format;
-    GLenum gl_format;
-    GLenum gl_type;
-    GLenum gl_attachment;
-} SurfaceFormatInfo;
-
-typedef struct Surface {
-    bool draw_dirty;
-    bool buffer_dirty;
-    bool write_enabled_cache;
-    unsigned int pitch;
-
-    hwaddr offset;
-} Surface;
-
-typedef struct SurfaceShape {
-    unsigned int z_format;
-    unsigned int color_format;
-    unsigned int zeta_format;
-    unsigned int log_width, log_height;
-    unsigned int clip_x, clip_y;
-    unsigned int clip_width, clip_height;
-    unsigned int anti_aliasing;
-} SurfaceShape;
-
-typedef struct SurfaceBinding {
-    QTAILQ_ENTRY(SurfaceBinding) entry;
-    MemAccessCallback *access_cb;
-
-    hwaddr vram_addr;
-
-    SurfaceFormatInfo fmt;
-    SurfaceShape shape;
-    uintptr_t dma_addr;
-    uintptr_t dma_len;
-    bool color;
-    bool swizzle;
-
-    unsigned int width;
-    unsigned int height;
-    unsigned int pitch;
-    size_t size;
-
-    GLuint gl_buffer;
-
-    bool cleared;
-    int frame_time;
-    int draw_time;
-    bool draw_dirty;
-    bool download_pending;
-    bool upload_pending;
-} SurfaceBinding;
-
-typedef struct TextureShape {
-    bool cubemap;
-    unsigned int dimensionality;
-    unsigned int color_format;
-    unsigned int levels;
-    unsigned int width, height, depth;
-    bool border;
-
-    unsigned int min_mipmap_level, max_mipmap_level;
-    unsigned int pitch;
-} TextureShape;
-
-typedef struct TextureBinding {
-    GLenum gl_target;
-    GLuint gl_texture;
-    unsigned int refcnt;
-    int draw_time;
-    uint64_t data_hash;
-    unsigned int scale;
-    unsigned int min_filter;
-    unsigned int mag_filter;
-    unsigned int addru;
-    unsigned int addrv;
-    unsigned int addrp;
-    uint32_t border_color;
-    bool border_color_set;
-} TextureBinding;
-
-typedef struct TextureKey {
-    TextureShape state;
-    hwaddr texture_vram_offset;
-    hwaddr texture_length;
-    hwaddr palette_vram_offset;
-    hwaddr palette_length;
-} TextureKey;
-
-typedef struct TextureLruNode {
-    LruNode node;
-    TextureKey key;
-    TextureBinding *binding;
-    bool possibly_dirty;
-} TextureLruNode;
-
-typedef struct VertexKey {
-    size_t count;
-    GLuint gl_type;
-    GLboolean gl_normalize;
-    size_t stride;
-    hwaddr addr;
-} VertexKey;
-
-typedef struct VertexLruNode {
-    LruNode node;
-    VertexKey key;
-    GLuint gl_buffer;
-    bool initialized;
-} VertexLruNode;
-
-typedef struct KelvinState {
-    hwaddr object_instance;
-} KelvinState;
-
-typedef struct ContextSurfaces2DState {
-    hwaddr object_instance;
-    hwaddr dma_image_source;
-    hwaddr dma_image_dest;
-    unsigned int color_format;
-    unsigned int source_pitch, dest_pitch;
-    hwaddr source_offset, dest_offset;
-} ContextSurfaces2DState;
-
-typedef struct ImageBlitState {
-    hwaddr object_instance;
-    hwaddr context_surfaces;
-    unsigned int operation;
-    unsigned int in_x, in_y;
-    unsigned int out_x, out_y;
-    unsigned int width, height;
-} ImageBlitState;
-
-typedef struct BetaState {
-  hwaddr object_instance;
-  uint32_t beta;
-} BetaState;
-
-typedef struct QueryReport {
-    QSIMPLEQ_ENTRY(QueryReport) entry;
-    bool clear;
-    uint32_t parameter;
-    unsigned int query_count;
-    GLuint *queries;
-} QueryReport;
-
-typedef struct PGRAPHState {
-    QemuMutex lock;
-
-    uint32_t pending_interrupts;
-    uint32_t enabled_interrupts;
-
-    int frame_time;
-    int draw_time;
-
-    struct s2t_rndr {
-        GLuint fbo, vao, vbo, prog;
-        GLuint tex_loc, surface_size_loc;
-    } s2t_rndr;
-
-    struct disp_rndr {
-        GLuint fbo, vao, vbo, prog;
-        GLuint display_size_loc;
-        GLuint line_offset_loc;
-        GLuint tex_loc;
-        GLuint pvideo_tex;
-        GLint pvideo_enable_loc;
-        GLint pvideo_tex_loc;
-        GLint pvideo_in_pos_loc;
-        GLint pvideo_pos_loc;
-        GLint pvideo_scale_loc;
-        GLint pvideo_color_key_enable_loc;
-        GLint pvideo_color_key_loc;
-        GLint palette_loc[256];
-    } disp_rndr;
-
-    /* subchannels state we're not sure the location of... */
-    ContextSurfaces2DState context_surfaces_2d;
-    ImageBlitState image_blit;
-    KelvinState kelvin;
-    BetaState beta;
-
-    hwaddr dma_color, dma_zeta;
-    Surface surface_color, surface_zeta;
-    unsigned int surface_type;
-    SurfaceShape surface_shape;
-    SurfaceShape last_surface_shape;
-    QTAILQ_HEAD(, SurfaceBinding) surfaces;
-    SurfaceBinding *color_binding, *zeta_binding;
-    struct {
-        int clip_x;
-        int clip_width;
-        int clip_y;
-        int clip_height;
-        int width;
-        int height;
-    } surface_binding_dim; // FIXME: Refactor
-
-    hwaddr dma_a, dma_b;
-    Lru texture_cache;
-    TextureLruNode *texture_cache_entries;
-    bool texture_dirty[NV2A_MAX_TEXTURES];
-    TextureBinding *texture_binding[NV2A_MAX_TEXTURES];
-
-    Lru shader_cache;
-    ShaderLruNode *shader_cache_entries;
-    ShaderBinding *shader_binding;
-    QemuMutex shader_cache_lock;
-    QemuThread shader_disk_thread;
-
-    bool texture_matrix_enable[NV2A_MAX_TEXTURES];
-
-    GLuint gl_framebuffer;
-
-    GLuint gl_display_buffer;
-    GLint gl_display_buffer_internal_format;
-    GLsizei gl_display_buffer_width;
-    GLsizei gl_display_buffer_height;
-    GLenum gl_display_buffer_format;
-    GLenum gl_display_buffer_type;
-
-    hwaddr dma_state;
-    hwaddr dma_notifies;
-    hwaddr dma_semaphore;
-
-    hwaddr dma_report;
-    hwaddr report_offset;
-    bool zpass_pixel_count_enable;
-    unsigned int zpass_pixel_count_result;
-    unsigned int gl_zpass_pixel_count_query_count;
-    GLuint *gl_zpass_pixel_count_queries;
-    QSIMPLEQ_HEAD(, QueryReport) report_queue;
-
-    hwaddr dma_vertex_a, dma_vertex_b;
-
-    uint32_t primitive_mode;
-
-    bool enable_vertex_program_write;
-
-    uint32_t vertex_state_shader_v0[4];
-    uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
-    bool program_data_dirty;
-
-    uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
-    bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS];
-
-    /* lighting constant arrays */
-    uint32_t ltctxa[NV2A_LTCTXA_COUNT][4];
-    bool ltctxa_dirty[NV2A_LTCTXA_COUNT];
-    uint32_t ltctxb[NV2A_LTCTXB_COUNT][4];
-    bool ltctxb_dirty[NV2A_LTCTXB_COUNT];
-    uint32_t ltc1[NV2A_LTC1_COUNT][4];
-    bool ltc1_dirty[NV2A_LTC1_COUNT];
-
-    float material_alpha;
-
-    // should figure out where these are in lighting context
-    float light_infinite_half_vector[NV2A_MAX_LIGHTS][3];
-    float light_infinite_direction[NV2A_MAX_LIGHTS][3];
-    float light_local_position[NV2A_MAX_LIGHTS][3];
-    float light_local_attenuation[NV2A_MAX_LIGHTS][3];
-
-    float point_params[8];
-
-    VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
-    uint16_t compressed_attrs;
-
-    Lru element_cache;
-    VertexLruNode *element_cache_entries;
-
-    unsigned int inline_array_length;
-    uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
-    GLuint gl_inline_array_buffer;
-
-    unsigned int inline_elements_length;
-    uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
-
-    unsigned int inline_buffer_length;
-
-    unsigned int draw_arrays_length;
-    unsigned int draw_arrays_min_start;
-    unsigned int draw_arrays_max_count;
-    /* FIXME: Unknown size, possibly endless, 1250 will do for now */
-    /* Keep in sync with size used in nv2a.c */
-    GLint gl_draw_arrays_start[1250];
-    GLsizei gl_draw_arrays_count[1250];
-    bool draw_arrays_prevent_connect;
-
-    GLuint gl_memory_buffer;
-    GLuint gl_vertex_array;
-
-    uint32_t regs[0x2000];
-
-    bool clearing;
-    bool waiting_for_nop;
-    bool waiting_for_flip;
-    bool waiting_for_context_switch;
-    bool downloads_pending;
-    bool download_dirty_surfaces_pending;
-    bool flush_pending;
-    bool gl_sync_pending;
-    bool shader_cache_writeback_pending;
-    QemuEvent downloads_complete;
-    QemuEvent dirty_surfaces_download_complete;
-    QemuEvent flush_complete;
-    QemuEvent gl_sync_complete;
-    QemuEvent shader_cache_writeback_complete;
-
-    unsigned int surface_scale_factor;
-    uint8_t *scale_buf;
-} PGRAPHState;
-
 typedef struct NV2AState {
     /*< private >*/
     PCIDevice parent_obj;
@@ -512,9 +158,6 @@ typedef struct NV2ABlockInfo {
 } NV2ABlockInfo;
 extern const NV2ABlockInfo blocktable[NV_NUM_BLOCKS];
 
-extern GloContext *g_nv2a_context_render;
-extern GloContext *g_nv2a_context_display;
-
 void nv2a_update_irq(NV2AState *d);
 
 static inline
@@ -566,20 +209,5 @@ DEFINE_PROTO(user)
 DMAObject nv_dma_load(NV2AState *d, hwaddr dma_obj_address);
 void *nv_dma_map(NV2AState *d, hwaddr dma_obj_address, hwaddr *len);
 
-void pgraph_init(NV2AState *d);
-void pgraph_destroy(PGRAPHState *pg);
-void pgraph_context_switch(NV2AState *d, unsigned int channel_id);
-int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method,
-                  uint32_t parameter, uint32_t *parameters,
-                  size_t num_words_available, size_t max_lookahead_words,
-                  bool inc);
-void pgraph_gl_sync(NV2AState *d);
-void pgraph_process_pending_reports(NV2AState *d);
-void pgraph_process_pending_downloads(NV2AState *d);
-void pgraph_download_dirty_surfaces(NV2AState *d);
-void pgraph_flush(NV2AState *d);
-
-void *pfifo_thread(void *arg);
-void pfifo_kick(NV2AState *d);
 
 #endif
diff --git a/hw/xbox/nv2a/nv2a_regs.h b/hw/xbox/nv2a/nv2a_regs.h
index 108db8f716d..78a9091eb54 100644
--- a/hw/xbox/nv2a/nv2a_regs.h
+++ b/hw/xbox/nv2a/nv2a_regs.h
@@ -21,6 +21,17 @@
 #ifndef HW_NV2A_REGS_H
 #define HW_NV2A_REGS_H
 
+
+#define GET_MASK(v, mask) (((v) & (mask)) >> ctz32(mask))
+
+#define SET_MASK(v, mask, val)                            \
+    ({                                                    \
+        const unsigned int __val = (val);                 \
+        const unsigned int __mask = (mask);               \
+        (v) &= ~(__mask);                                 \
+        (v) |= ((__val) << ctz32(__mask)) & (__mask);     \
+    })
+
 #define NV_NUM_BLOCKS 21
 #define NV_PMC          0   /* card master control */
 #define NV_PBUS         1   /* bus control */
diff --git a/hw/xbox/nv2a/pfifo.c b/hw/xbox/nv2a/pfifo.c
index 77dd175098b..0e55826ad3c 100644
--- a/hw/xbox/nv2a/pfifo.c
+++ b/hw/xbox/nv2a/pfifo.c
@@ -95,23 +95,25 @@ void pfifo_kick(NV2AState *d)
     qemu_cond_broadcast(&d->pfifo.fifo_cond);
 }
 
-static bool pgraph_can_fifo_access(NV2AState *d) {
-    return qatomic_read(&d->pgraph.regs[NV_PGRAPH_FIFO]) & NV_PGRAPH_FIFO_ACCESS;
+static bool can_fifo_access(NV2AState *d) {
+    return qatomic_read(&d->pgraph.regs_[NV_PGRAPH_FIFO]) &
+           NV_PGRAPH_FIFO_ACCESS;
 }
 
 /* If NV097_FLIP_STALL was executed, check if the flip has completed.
  * This will usually happen in the VSYNC interrupt handler.
  */
-static bool pgraph_is_flip_stall_complete(NV2AState *d)
+static bool is_flip_stall_complete(NV2AState *d)
 {
     PGRAPHState *pg = &d->pgraph;
 
+    uint32_t s = pgraph_reg_r(pg, NV_PGRAPH_SURFACE);
+
     NV2A_DPRINTF("flip stall read: %d, write: %d, modulo: %d\n",
-        GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D),
-        GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D),
-        GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D));
+        GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D),
+        GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D),
+        GET_MASK(s, NV_PGRAPH_SURFACE_MODULO_3D));
 
-    uint32_t s = pg->regs[NV_PGRAPH_SURFACE];
     if (GET_MASK(s, NV_PGRAPH_SURFACE_READ_3D)
         != GET_MASK(s, NV_PGRAPH_SURFACE_WRITE_3D)) {
         return true;
@@ -126,7 +128,7 @@ static bool pfifo_stall_for_flip(NV2AState *d)
 
     if (qatomic_read(&d->pgraph.waiting_for_flip)) {
         qemu_mutex_lock(&d->pgraph.lock);
-        if (!pgraph_is_flip_stall_complete(d)) {
+        if (!is_flip_stall_complete(d)) {
             should_stall = true;
         } else {
             d->pgraph.waiting_for_flip = false;
@@ -141,7 +143,7 @@ static bool pfifo_puller_should_stall(NV2AState *d)
 {
     return pfifo_stall_for_flip(d) || qatomic_read(&d->pgraph.waiting_for_nop) ||
            qatomic_read(&d->pgraph.waiting_for_context_switch) ||
-           !pgraph_can_fifo_access(d);
+           !can_fifo_access(d);
 }
 
 static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
@@ -187,7 +189,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
         qemu_mutex_lock(&d->pgraph.lock);
 
         // Switch contexts if necessary
-        if (pgraph_can_fifo_access(d)) {
+        if (can_fifo_access(d)) {
             pgraph_context_switch(d, entry.channel_id);
             if (!d->pgraph.waiting_for_context_switch) {
                 num_proc =
@@ -221,7 +223,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
         qemu_mutex_unlock(&d->pfifo.lock);
         qemu_mutex_lock(&d->pgraph.lock);
 
-        if (pgraph_can_fifo_access(d)) {
+        if (can_fifo_access(d)) {
             num_proc =
                 pgraph_method(d, subchannel, method, parameter, parameters,
                               num_words_available, max_lookahead_words, inc);
@@ -242,7 +244,7 @@ static ssize_t pfifo_run_puller(NV2AState *d, uint32_t method_entry,
 
 static bool pfifo_pusher_should_stall(NV2AState *d)
 {
-    return !pgraph_can_fifo_access(d) ||
+    return !can_fifo_access(d) ||
            qatomic_read(&d->pgraph.waiting_for_nop);
 }
 
@@ -447,39 +449,11 @@ static void pfifo_run_pusher(NV2AState *d)
     }
 }
 
-static void process_requests(NV2AState *d)
-{
-    if (qatomic_read(&d->pgraph.downloads_pending) ||
-        qatomic_read(&d->pgraph.download_dirty_surfaces_pending) ||
-        qatomic_read(&d->pgraph.gl_sync_pending) ||
-        qatomic_read(&d->pgraph.flush_pending) ||
-        qatomic_read(&d->pgraph.shader_cache_writeback_pending)) {
-        qemu_mutex_unlock(&d->pfifo.lock);
-        qemu_mutex_lock(&d->pgraph.lock);
-        if (qatomic_read(&d->pgraph.downloads_pending)) {
-            pgraph_process_pending_downloads(d);
-        }
-        if (qatomic_read(&d->pgraph.download_dirty_surfaces_pending)) {
-            pgraph_download_dirty_surfaces(d);
-        }
-        if (qatomic_read(&d->pgraph.gl_sync_pending)) {
-            pgraph_gl_sync(d);
-        }
-        if (qatomic_read(&d->pgraph.flush_pending)) {
-            pgraph_flush(d);
-        }
-        if (qatomic_read(&d->pgraph.shader_cache_writeback_pending)) {
-            shader_write_cache_reload_list(&d->pgraph);
-        }
-        qemu_mutex_unlock(&d->pgraph.lock);
-        qemu_mutex_lock(&d->pfifo.lock);
-    }
-}
-
 void *pfifo_thread(void *arg)
 {
     NV2AState *d = (NV2AState *)arg;
-    glo_set_current(g_nv2a_context_render);
+
+    pgraph_init_thread(d);
 
     rcu_register_thread();
 
@@ -487,7 +461,7 @@ void *pfifo_thread(void *arg)
     while (true) {
         d->pfifo.fifo_kick = false;
 
-        process_requests(d);
+        pgraph_process_pending(d);
 
         if (!d->pfifo.halt) {
             pfifo_run_pusher(d);
diff --git a/hw/xbox/nv2a/pgraph.c b/hw/xbox/nv2a/pgraph.c
deleted file mode 100644
index 0127371df2b..00000000000
--- a/hw/xbox/nv2a/pgraph.c
+++ /dev/null
@@ -1,7768 +0,0 @@
-/*
- * QEMU Geforce NV2A implementation
- *
- * Copyright (c) 2012 espes
- * Copyright (c) 2015 Jannik Vogel
- * Copyright (c) 2018-2021 Matt Borgerson
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "nv2a_int.h"
-
-#include <math.h>
-
-#include "nv2a_vsh_emulator.h"
-#include "s3tc.h"
-#include "ui/xemu-settings.h"
-#include "qemu/fast-hash.h"
-
-const float f16_max = 511.9375f;
-const float f24_max = 1.0E30;
-
-static NV2AState *g_nv2a;
-GloContext *g_nv2a_context_render;
-GloContext *g_nv2a_context_display;
-
-NV2AStats g_nv2a_stats;
-
-static void nv2a_profile_increment(void)
-{
-    int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
-    const int64_t fps_update_interval = 250000;
-    g_nv2a_stats.last_flip_time = now;
-
-    static int64_t frame_count = 0;
-    frame_count++;
-
-    static int64_t ts = 0;
-    int64_t delta = now - ts;
-    if (delta >= fps_update_interval) {
-        g_nv2a_stats.increment_fps = frame_count * 1000000 / delta;
-        ts = now;
-        frame_count = 0;
-    }
-}
-
-static void nv2a_profile_flip_stall(void)
-{
-    glFinish();
-
-    int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
-    int64_t render_time = (now-g_nv2a_stats.last_flip_time)/1000;
-
-    g_nv2a_stats.frame_working.mspf = render_time;
-    g_nv2a_stats.frame_history[g_nv2a_stats.frame_ptr] =
-        g_nv2a_stats.frame_working;
-    g_nv2a_stats.frame_ptr =
-        (g_nv2a_stats.frame_ptr + 1) % NV2A_PROF_NUM_FRAMES;
-    g_nv2a_stats.frame_count++;
-    memset(&g_nv2a_stats.frame_working, 0, sizeof(g_nv2a_stats.frame_working));
-}
-
-static void nv2a_profile_inc_counter(enum NV2A_PROF_COUNTERS_ENUM cnt)
-{
-    g_nv2a_stats.frame_working.counters[cnt] += 1;
-}
-
-const char *nv2a_profile_get_counter_name(unsigned int cnt)
-{
-    const char *default_names[NV2A_PROF__COUNT] = {
-        #define _X(x) stringify(x),
-        NV2A_PROF_COUNTERS_XMAC
-        #undef _X
-    };
-
-    assert(cnt < NV2A_PROF__COUNT);
-    return default_names[cnt] + 10; /* 'NV2A_PROF_' */
-}
-
-int nv2a_profile_get_counter_value(unsigned int cnt)
-{
-    assert(cnt < NV2A_PROF__COUNT);
-    unsigned int idx = (g_nv2a_stats.frame_ptr + NV2A_PROF_NUM_FRAMES - 1) %
-                       NV2A_PROF_NUM_FRAMES;
-    return g_nv2a_stats.frame_history[idx].counters[cnt];
-}
-
-static const GLenum pgraph_texture_min_filter_map[] = {
-    0,
-    GL_NEAREST,
-    GL_LINEAR,
-    GL_NEAREST_MIPMAP_NEAREST,
-    GL_LINEAR_MIPMAP_NEAREST,
-    GL_NEAREST_MIPMAP_LINEAR,
-    GL_LINEAR_MIPMAP_LINEAR,
-    GL_LINEAR,
-};
-
-static const GLenum pgraph_texture_mag_filter_map[] = {
-    0,
-    GL_NEAREST,
-    GL_LINEAR,
-    0,
-    GL_LINEAR /* TODO: Convolution filter... */
-};
-
-static const GLenum pgraph_texture_addr_map[] = {
-    0,
-    GL_REPEAT,
-    GL_MIRRORED_REPEAT,
-    GL_CLAMP_TO_EDGE,
-    GL_CLAMP_TO_BORDER,
-    GL_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */
-};
-
-static const GLenum pgraph_blend_factor_map[] = {
-    GL_ZERO,
-    GL_ONE,
-    GL_SRC_COLOR,
-    GL_ONE_MINUS_SRC_COLOR,
-    GL_SRC_ALPHA,
-    GL_ONE_MINUS_SRC_ALPHA,
-    GL_DST_ALPHA,
-    GL_ONE_MINUS_DST_ALPHA,
-    GL_DST_COLOR,
-    GL_ONE_MINUS_DST_COLOR,
-    GL_SRC_ALPHA_SATURATE,
-    0,
-    GL_CONSTANT_COLOR,
-    GL_ONE_MINUS_CONSTANT_COLOR,
-    GL_CONSTANT_ALPHA,
-    GL_ONE_MINUS_CONSTANT_ALPHA,
-};
-
-static const GLenum pgraph_blend_equation_map[] = {
-    GL_FUNC_SUBTRACT,
-    GL_FUNC_REVERSE_SUBTRACT,
-    GL_FUNC_ADD,
-    GL_MIN,
-    GL_MAX,
-    GL_FUNC_REVERSE_SUBTRACT,
-    GL_FUNC_ADD,
-};
-
-/* FIXME
-static const GLenum pgraph_blend_logicop_map[] = {
-    GL_CLEAR,
-    GL_AND,
-    GL_AND_REVERSE,
-    GL_COPY,
-    GL_AND_INVERTED,
-    GL_NOOP,
-    GL_XOR,
-    GL_OR,
-    GL_NOR,
-    GL_EQUIV,
-    GL_INVERT,
-    GL_OR_REVERSE,
-    GL_COPY_INVERTED,
-    GL_OR_INVERTED,
-    GL_NAND,
-    GL_SET,
-};
-*/
-
-static const GLenum pgraph_cull_face_map[] = {
-    0,
-    GL_FRONT,
-    GL_BACK,
-    GL_FRONT_AND_BACK
-};
-
-static const GLenum pgraph_depth_func_map[] = {
-    GL_NEVER,
-    GL_LESS,
-    GL_EQUAL,
-    GL_LEQUAL,
-    GL_GREATER,
-    GL_NOTEQUAL,
-    GL_GEQUAL,
-    GL_ALWAYS,
-};
-
-static const GLenum pgraph_stencil_func_map[] = {
-    GL_NEVER,
-    GL_LESS,
-    GL_EQUAL,
-    GL_LEQUAL,
-    GL_GREATER,
-    GL_NOTEQUAL,
-    GL_GEQUAL,
-    GL_ALWAYS,
-};
-
-static const GLenum pgraph_stencil_op_map[] = {
-    0,
-    GL_KEEP,
-    GL_ZERO,
-    GL_REPLACE,
-    GL_INCR,
-    GL_DECR,
-    GL_INVERT,
-    GL_INCR_WRAP,
-    GL_DECR_WRAP,
-};
-
-typedef struct ColorFormatInfo {
-    unsigned int bytes_per_pixel;
-    bool linear;
-    GLint gl_internal_format;
-    GLenum gl_format;
-    GLenum gl_type;
-    GLenum gl_swizzle_mask[4];
-    bool depth;
-} ColorFormatInfo;
-
-static const ColorFormatInfo kelvin_color_format_map[66] = {
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] =
-        {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
-         {GL_RED, GL_RED, GL_RED, GL_ONE}},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] =
-        {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
-         {GL_RED, GL_RED, GL_RED, GL_RED}},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] =
-        {2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] =
-        {2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] =
-        {2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] =
-        {2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] =
-        {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] =
-        {4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
-
-    /* paletted texture */
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] =
-        {1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
-
-    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] =
-        {4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] =
-        {4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] =
-        {4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] =
-        {2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] =
-        {2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] =
-        {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] =
-        {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
-         {GL_RED, GL_RED, GL_RED, GL_ONE}},
-
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] =
-        {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
-         {GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
-
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] =
-        {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
-         {GL_ONE, GL_ONE, GL_ONE, GL_RED}},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] =
-        {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
-         {GL_RED, GL_RED, GL_RED, GL_GREEN}},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] =
-        {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
-         {GL_RED, GL_RED, GL_RED, GL_RED}},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] =
-        {2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] =
-        {2, true, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] =
-        {4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] =
-        {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
-         {GL_ONE, GL_ONE, GL_ONE, GL_RED}},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] =
-        {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
-         {GL_RED, GL_RED, GL_RED, GL_GREEN}},
-
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] =
-        {2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] =
-        {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
-         {GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] =
-        {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
-         {GL_GREEN, GL_RED, GL_RED, GL_GREEN}},
-
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] =
-        {2, true, GL_RGBA8,  GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] =
-        {2, true, GL_RGBA8,  GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
-
-    /* Additional information is passed to the pixel shader via the swizzle:
-     * RED: The depth value.
-     * GREEN: 0 for 16-bit, 1 for 24 bit
-     * BLUE: 0 for fixed, 1 for float
-     */
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] =
-        {2, false, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
-         {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] =
-        {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
-         {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] =
-        /* FIXME: Uses fixed-point format to match surface format hack below. */
-        {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
-         {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] =
-        {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
-         {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] =
-        {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_HALF_FLOAT,
-          {GL_RED, GL_ZERO, GL_ONE, GL_ZERO}, true},
-
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] =
-        {2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT,
-         {GL_RED, GL_RED, GL_RED, GL_ONE}},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] =
-        {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] =
-        {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
-
-    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] =
-        {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8},
-
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] =
-        {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] =
-        {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
-    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] =
-        {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}
-};
-
-static const SurfaceFormatInfo kelvin_surface_color_format_map[] = {
-    [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
-        {2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, GL_COLOR_ATTACHMENT0},
-    [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
-        {2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_COLOR_ATTACHMENT0},
-    [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
-        {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
-    [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
-        {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
-
-    // FIXME: Map channel color
-    [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
-        {1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0},
-    [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
-        {2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0},
-};
-
-static const SurfaceFormatInfo kelvin_surface_zeta_float_format_map[] = {
-    [NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
-        {2, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, GL_DEPTH_ATTACHMENT},
-    [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
-        /* FIXME: GL does not support packing floating-point Z24S8 OOTB, so for
-         *        now just emulate this with fixed-point Z24S8. Possible compat
-         *        improvement with custom conversion.
-         */
-        {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
-};
-
-static const SurfaceFormatInfo kelvin_surface_zeta_fixed_format_map[] = {
-    [NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
-        {2, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, GL_DEPTH_ATTACHMENT},
-    [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
-        {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
-};
-
-
-// static void pgraph_set_context_user(NV2AState *d, uint32_t val);
-static void pgraph_gl_fence(void);
-static GLuint pgraph_compile_shader(const char *vs_src, const char *fs_src);
-static void pgraph_init_render_to_texture(NV2AState *d);
-static void pgraph_init_display_renderer(NV2AState *d);
-static void pgraph_method_log(unsigned int subchannel, unsigned int graphics_class, unsigned int method, uint32_t parameter);
-static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr);
-static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg);
-static void pgraph_shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function);
-static void pgraph_bind_shaders(PGRAPHState *pg);
-static bool pgraph_framebuffer_dirty(PGRAPHState *pg);
-static bool pgraph_color_write_enabled(PGRAPHState *pg);
-static bool pgraph_zeta_write_enabled(PGRAPHState *pg);
-static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta);
-static void pgraph_wait_for_surface_download(SurfaceBinding *e);
-static void pgraph_surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr, hwaddr len, bool write);
-static SurfaceBinding *pgraph_surface_put(NV2AState *d, hwaddr addr, SurfaceBinding *e);
-static SurfaceBinding *pgraph_surface_get(NV2AState *d, hwaddr addr);
-static SurfaceBinding *pgraph_surface_get_within(NV2AState *d, hwaddr addr);
-static void pgraph_unbind_surface(NV2AState *d, bool color);
-static void pgraph_surface_invalidate(NV2AState *d, SurfaceBinding *e);
-static void pgraph_surface_evict_old(NV2AState *d);
-static void pgraph_download_surface_data_if_dirty(NV2AState *d, SurfaceBinding *surface);
-static void pgraph_download_surface_data(NV2AState *d, SurfaceBinding *surface, bool force);
-static void pgraph_download_surface_data_to_buffer(NV2AState *d,
-                                                   SurfaceBinding *surface,
-                                                   bool swizzle, bool flip,
-                                                   bool downscale,
-                                                   uint8_t *pixels);
-static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force);
-static bool pgraph_check_surface_compatibility(SurfaceBinding *s1, SurfaceBinding *s2, bool strict);
-static bool pgraph_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape);
-static void pgraph_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit);
-static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color);
-static void pgraph_update_surface(NV2AState *d, bool upload, bool color_write, bool zeta_write);
-static void pgraph_bind_textures(NV2AState *d);
-static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg, unsigned int *width, unsigned int *height);
-static void pgraph_apply_scaling_factor(PGRAPHState *pg, unsigned int *width, unsigned int *height);
-static void pgraph_get_surface_dimensions(PGRAPHState *pg, unsigned int *width, unsigned int *height);
-static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size, bool quick);
-static void pgraph_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element);
-static unsigned int pgraph_bind_inline_array(NV2AState *d);
-static bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage);
-
-static float convert_f16_to_float(uint16_t f16);
-static float convert_f24_to_float(uint32_t f24);
-static uint8_t cliptobyte(int x);
-static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix, uint8_t *r, uint8_t *g, uint8_t* b);
-static void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix, uint8_t *r, uint8_t *g, uint8_t* b);
-static uint8_t* convert_texture_data(const TextureShape s, const uint8_t *data, const uint8_t *palette_data, unsigned int width, unsigned int height, unsigned int depth, unsigned int row_pitch, unsigned int slice_pitch);
-static void upload_gl_texture(GLenum gl_target, const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data);
-static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data);
-static void texture_binding_destroy(gpointer data);
-static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key);
-static void texture_cache_entry_post_evict(Lru *lru, LruNode *node);
-static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key);
-
-static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key)
-{
-    VertexLruNode *vnode = container_of(node, VertexLruNode, node);
-    memcpy(&vnode->key, key, sizeof(struct VertexKey));
-    vnode->initialized = false;
-}
-
-static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key)
-{
-    VertexLruNode *vnode = container_of(node, VertexLruNode, node);
-    return memcmp(&vnode->key, key, sizeof(VertexKey));
-}
-
-static void pgraph_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size);
-static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size);
-static unsigned int kelvin_map_stencil_op(uint32_t parameter);
-static unsigned int kelvin_map_polygon_mode(uint32_t parameter);
-static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel);
-static void pgraph_reload_surface_scale_factor(NV2AState *d);
-
-static uint32_t pgraph_rdi_read(PGRAPHState *pg,
-                                unsigned int select, unsigned int address)
-{
-    uint32_t r = 0;
-    switch(select) {
-    case RDI_INDEX_VTX_CONSTANTS0:
-    case RDI_INDEX_VTX_CONSTANTS1:
-        assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
-        r = pg->vsh_constants[address / 4][3 - address % 4];
-        break;
-    default:
-        fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n",
-                select, address);
-        assert(false);
-        break;
-    }
-    return r;
-}
-
-static void pgraph_rdi_write(PGRAPHState *pg,
-                             unsigned int select, unsigned int address,
-                             uint32_t val)
-{
-    switch(select) {
-    case RDI_INDEX_VTX_CONSTANTS0:
-    case RDI_INDEX_VTX_CONSTANTS1:
-        assert(false); /* Untested */
-        assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
-        pg->vsh_constants_dirty[address / 4] |=
-            (val != pg->vsh_constants[address / 4][3 - address % 4]);
-        pg->vsh_constants[address / 4][3 - address % 4] = val;
-        break;
-    default:
-        NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n",
-                     select, address, val);
-        break;
-    }
-}
-
-uint64_t pgraph_read(void *opaque, hwaddr addr, unsigned int size)
-{
-    NV2AState *d = (NV2AState *)opaque;
-    PGRAPHState *pg = &d->pgraph;
-
-    qemu_mutex_lock(&pg->lock);
-
-    uint64_t r = 0;
-    switch (addr) {
-    case NV_PGRAPH_INTR:
-        r = pg->pending_interrupts;
-        break;
-    case NV_PGRAPH_INTR_EN:
-        r = pg->enabled_interrupts;
-        break;
-    case NV_PGRAPH_RDI_DATA: {
-        unsigned int select = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
-                                       NV_PGRAPH_RDI_INDEX_SELECT);
-        unsigned int address = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
-                                        NV_PGRAPH_RDI_INDEX_ADDRESS);
-
-        r = pgraph_rdi_read(pg, select, address);
-
-        /* FIXME: Overflow into select? */
-        assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS,
-                                  NV_PGRAPH_RDI_INDEX_ADDRESS));
-        SET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
-                 NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1);
-        break;
-    }
-    default:
-        r = pg->regs[addr];
-        break;
-    }
-
-    qemu_mutex_unlock(&pg->lock);
-
-    nv2a_reg_log_read(NV_PGRAPH, addr, size, r);
-    return r;
-}
-
-void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
-{
-    NV2AState *d = (NV2AState *)opaque;
-    PGRAPHState *pg = &d->pgraph;
-
-    nv2a_reg_log_write(NV_PGRAPH, addr, size, val);
-
-    qemu_mutex_lock(&d->pfifo.lock); // FIXME: Factor out fifo lock here
-    qemu_mutex_lock(&pg->lock);
-
-    switch (addr) {
-    case NV_PGRAPH_INTR:
-        pg->pending_interrupts &= ~val;
-
-        if (!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)) {
-            pg->waiting_for_nop = false;
-        }
-        if (!(pg->pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH)) {
-            pg->waiting_for_context_switch = false;
-        }
-        pfifo_kick(d);
-        break;
-    case NV_PGRAPH_INTR_EN:
-        pg->enabled_interrupts = val;
-        break;
-    case NV_PGRAPH_INCREMENT:
-        if (val & NV_PGRAPH_INCREMENT_READ_3D) {
-            SET_MASK(pg->regs[NV_PGRAPH_SURFACE],
-                     NV_PGRAPH_SURFACE_READ_3D,
-                     (GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
-                              NV_PGRAPH_SURFACE_READ_3D)+1)
-                        % GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
-                                   NV_PGRAPH_SURFACE_MODULO_3D) );
-            nv2a_profile_increment();
-            pfifo_kick(d);
-        }
-        break;
-    case NV_PGRAPH_RDI_DATA: {
-        unsigned int select = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
-                                       NV_PGRAPH_RDI_INDEX_SELECT);
-        unsigned int address = GET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
-                                        NV_PGRAPH_RDI_INDEX_ADDRESS);
-
-        pgraph_rdi_write(pg, select, address, val);
-
-        /* FIXME: Overflow into select? */
-        assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS,
-                                  NV_PGRAPH_RDI_INDEX_ADDRESS));
-        SET_MASK(pg->regs[NV_PGRAPH_RDI_INDEX],
-                 NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1);
-        break;
-    }
-    case NV_PGRAPH_CHANNEL_CTX_TRIGGER: {
-        hwaddr context_address =
-            GET_MASK(pg->regs[NV_PGRAPH_CHANNEL_CTX_POINTER],
-                     NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4;
-
-        if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) {
-#ifdef DEBUG_NV2A
-            unsigned pgraph_channel_id =
-                GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID);
-#endif
-            NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n",
-                         pgraph_channel_id, context_address);
-
-            assert(context_address < memory_region_size(&d->ramin));
-
-            uint8_t *context_ptr = d->ramin_ptr + context_address;
-            uint32_t context_user = ldl_le_p((uint32_t*)context_ptr);
-
-            NV2A_DPRINTF("    - CTX_USER = 0x%x\n", context_user);
-
-            pg->regs[NV_PGRAPH_CTX_USER] = context_user;
-            // pgraph_set_context_user(d, context_user);
-        }
-        if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) {
-            /* do stuff ... */
-        }
-
-        break;
-    }
-    default:
-        pg->regs[addr] = val;
-        break;
-    }
-
-    // events
-    switch (addr) {
-    case NV_PGRAPH_FIFO:
-        pfifo_kick(d);
-        break;
-    }
-
-    qemu_mutex_unlock(&pg->lock);
-    qemu_mutex_unlock(&d->pfifo.lock);
-}
-
-void pgraph_flush(NV2AState *d)
-{
-    PGRAPHState *pg = &d->pgraph;
-
-    bool update_surface = (pg->color_binding || pg->zeta_binding);
-
-    /* Clear last surface shape to force recreation of buffers at next draw */
-    pg->surface_color.draw_dirty = false;
-    pg->surface_zeta.draw_dirty = false;
-    memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape));
-    pgraph_unbind_surface(d, true);
-    pgraph_unbind_surface(d, false);
-
-    SurfaceBinding *s, *next;
-    QTAILQ_FOREACH_SAFE(s, &d->pgraph.surfaces, entry, next) {
-        pgraph_surface_invalidate(d, s);
-    }
-
-    pgraph_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram));
-
-    /* Sync all RAM */
-    glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer);
-    glBufferSubData(GL_ARRAY_BUFFER, 0, memory_region_size(d->vram), d->vram_ptr);
-
-    /* FIXME: Flush more? */
-
-    pgraph_reload_surface_scale_factor(d);
-
-    if (update_surface) {
-        pgraph_update_surface(d, true, true, true);
-    }
-
-    qatomic_set(&d->pgraph.flush_pending, false);
-    qemu_event_set(&d->pgraph.flush_complete);
-}
-
-#define METHOD_ADDR(gclass, name) \
-    gclass ## _ ## name
-#define METHOD_ADDR_TO_INDEX(x) ((x)>>2)
-#define METHOD_NAME_STR(gclass, name) \
-    tostring(gclass ## _ ## name)
-#define METHOD_FUNC_NAME(gclass, name) \
-    pgraph_ ## gclass ## _ ## name ## _handler
-#define METHOD_HANDLER_ARG_DECL \
-    NV2AState *d, PGRAPHState *pg, \
-    unsigned int subchannel, unsigned int method, \
-    uint32_t parameter, uint32_t *parameters, \
-    size_t num_words_available, size_t *num_words_consumed, bool inc
-#define METHOD_HANDLER_ARGS \
-    d, pg, subchannel, method, parameter, parameters, \
-    num_words_available, num_words_consumed, inc
-#define DEF_METHOD_PROTO(gclass, name) \
-    static void METHOD_FUNC_NAME(gclass, name)(METHOD_HANDLER_ARG_DECL)
-
-#define DEF_METHOD(gclass, name) \
-    DEF_METHOD_PROTO(gclass, name);
-#define DEF_METHOD_RANGE(gclass, name, range) \
-    DEF_METHOD_PROTO(gclass, name);
-#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* Drop */
-#define DEF_METHOD_CASE_4(gclass, name, stride) \
-    DEF_METHOD_PROTO(gclass, name);
-#include "pgraph_methods.h"
-#undef DEF_METHOD
-#undef DEF_METHOD_RANGE
-#undef DEF_METHOD_CASE_4_OFFSET
-#undef DEF_METHOD_CASE_4
-
-typedef void (*MethodFunc)(METHOD_HANDLER_ARG_DECL);
-static const struct {
-    uint32_t base;
-    const char *name;
-    MethodFunc handler;
-} pgraph_kelvin_methods[0x800] = {
-#define DEF_METHOD(gclass, name)                        \
-    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name))] = \
-    { \
-        METHOD_ADDR(gclass, name), \
-        METHOD_NAME_STR(gclass, name), \
-        METHOD_FUNC_NAME(gclass, name), \
-    },
-#define DEF_METHOD_RANGE(gclass, name, range) \
-    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name)) \
-     ... METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + 4*range - 1)] = \
-    { \
-        METHOD_ADDR(gclass, name), \
-        METHOD_NAME_STR(gclass, name), \
-        METHOD_FUNC_NAME(gclass, name), \
-    },
-#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) \
-    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset)] = \
-    { \
-        METHOD_ADDR(gclass, name), \
-        METHOD_NAME_STR(gclass, name), \
-        METHOD_FUNC_NAME(gclass, name), \
-    }, \
-    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride)] = \
-    { \
-        METHOD_ADDR(gclass, name), \
-        METHOD_NAME_STR(gclass, name), \
-        METHOD_FUNC_NAME(gclass, name), \
-    }, \
-    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 2)] = \
-    { \
-        METHOD_ADDR(gclass, name), \
-        METHOD_NAME_STR(gclass, name), \
-        METHOD_FUNC_NAME(gclass, name), \
-    }, \
-    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 3)] = \
-    { \
-        METHOD_ADDR(gclass, name), \
-        METHOD_NAME_STR(gclass, name), \
-        METHOD_FUNC_NAME(gclass, name), \
-    },
-#define DEF_METHOD_CASE_4(gclass, name, stride) \
-    DEF_METHOD_CASE_4_OFFSET(gclass, name, 0, stride)
-#include "pgraph_methods.h"
-#undef DEF_METHOD
-#undef DEF_METHOD_RANGE
-#undef DEF_METHOD_CASE_4_OFFSET
-#undef DEF_METHOD_CASE_4
-};
-
-#define METHOD_RANGE_END_NAME(gclass, name) \
-    pgraph_ ## gclass ## _ ## name ## __END
-#define DEF_METHOD(gclass, name) \
-    static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
-        METHOD_ADDR(gclass, name) + 4;
-#define DEF_METHOD_RANGE(gclass, name, range) \
-    static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
-        METHOD_ADDR(gclass, name) + 4*range;
-#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* drop */
-#define DEF_METHOD_CASE_4(gclass, name, stride) \
-    static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
-        METHOD_ADDR(gclass, name) + 4*stride;
-#include "pgraph_methods.h"
-#undef DEF_METHOD
-#undef DEF_METHOD_RANGE
-#undef DEF_METHOD_CASE_4_OFFSET
-#undef DEF_METHOD_CASE_4
-
-static void pgraph_method_inc(MethodFunc handler, uint32_t end,
-                              METHOD_HANDLER_ARG_DECL)
-{
-    if (!inc) {
-        handler(METHOD_HANDLER_ARGS);
-        return;
-    }
-    size_t count = MIN(num_words_available, (end - method) / 4);
-    for (size_t i = 0; i < count; i++) {
-        parameter = ldl_le_p(parameters + i);
-        if (i) {
-            pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method,
-                              parameter);
-        }
-        handler(METHOD_HANDLER_ARGS);
-        method += 4;
-    }
-    *num_words_consumed = count;
-}
-
-static void pgraph_method_non_inc(MethodFunc handler, METHOD_HANDLER_ARG_DECL)
-{
-    if (inc) {
-        handler(METHOD_HANDLER_ARGS);
-        return;
-    }
-
-    for (size_t i = 0; i < num_words_available; i++) {
-        parameter = ldl_le_p(parameters + i);
-        if (i) {
-            pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method,
-                              parameter);
-        }
-        handler(METHOD_HANDLER_ARGS);
-    }
-    *num_words_consumed = num_words_available;
-}
-
-#define METHOD_FUNC_NAME_INT(gclass, name) METHOD_FUNC_NAME(gclass, name##_int)
-#define DEF_METHOD_INT(gclass, name) DEF_METHOD(gclass, name##_int)
-#define DEF_METHOD(gclass, name) DEF_METHOD_PROTO(gclass, name)
-
-#define DEF_METHOD_INC(gclass, name)                           \
-    DEF_METHOD_INT(gclass, name);                              \
-    DEF_METHOD(gclass, name)                                   \
-    {                                                          \
-        pgraph_method_inc(METHOD_FUNC_NAME_INT(gclass, name),  \
-                          METHOD_RANGE_END_NAME(gclass, name), \
-                          METHOD_HANDLER_ARGS);                \
-    }                                                          \
-    DEF_METHOD_INT(gclass, name)
-
-#define DEF_METHOD_NON_INC(gclass, name)                          \
-    DEF_METHOD_INT(gclass, name);                                 \
-    DEF_METHOD(gclass, name)                                      \
-    {                                                             \
-        pgraph_method_non_inc(METHOD_FUNC_NAME_INT(gclass, name), \
-                              METHOD_HANDLER_ARGS);               \
-    }                                                             \
-    DEF_METHOD_INT(gclass, name)
-
-// TODO: Optimize. Ideally this should all be done via OpenGL.
-static void pgraph_image_blit(NV2AState *d)
-{
-    PGRAPHState *pg = &d->pgraph;
-    ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d;
-    ImageBlitState *image_blit = &pg->image_blit;
-    BetaState *beta = &pg->beta;
-
-    pgraph_update_surface(d, false, true, true);
-
-    assert(context_surfaces->object_instance == image_blit->context_surfaces);
-
-    unsigned int bytes_per_pixel;
-    switch (context_surfaces->color_format) {
-        case NV062_SET_COLOR_FORMAT_LE_Y8:
-            bytes_per_pixel = 1;
-            break;
-        case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
-            bytes_per_pixel = 2;
-            break;
-        case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
-        case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
-        case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
-        case NV062_SET_COLOR_FORMAT_LE_Y32:
-            bytes_per_pixel = 4;
-            break;
-        default:
-            fprintf(stderr, "Unknown blit surface format: 0x%x\n",
-                    context_surfaces->color_format);
-            assert(false);
-            break;
-    }
-
-    hwaddr source_dma_len, dest_dma_len;
-
-    uint8_t *source = (uint8_t *)nv_dma_map(
-        d, context_surfaces->dma_image_source, &source_dma_len);
-    assert(context_surfaces->source_offset < source_dma_len);
-    source += context_surfaces->source_offset;
-
-    uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest,
-                                          &dest_dma_len);
-    assert(context_surfaces->dest_offset < dest_dma_len);
-    dest += context_surfaces->dest_offset;
-
-    hwaddr source_addr = source - d->vram_ptr;
-    hwaddr dest_addr = dest - d->vram_ptr;
-
-    SurfaceBinding *surf_src = pgraph_surface_get(d, source_addr);
-    if (surf_src) {
-        pgraph_download_surface_data_if_dirty(d, surf_src);
-    }
-
-    SurfaceBinding *surf_dest = pgraph_surface_get(d, dest_addr);
-    if (surf_dest) {
-        if (image_blit->height < surf_dest->height ||
-            image_blit->width < surf_dest->width) {
-            pgraph_download_surface_data_if_dirty(d, surf_dest);
-        } else {
-            // The blit will completely replace the surface so any pending
-            // download should be discarded.
-            surf_dest->download_pending = false;
-            surf_dest->draw_dirty = false;
-        }
-        surf_dest->upload_pending = true;
-        pg->draw_time++;
-    }
-
-    hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch +
-                           image_blit->in_x * bytes_per_pixel;
-    hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch +
-                         image_blit->out_x * bytes_per_pixel;
-
-    hwaddr source_size =
-        (image_blit->height - 1) * context_surfaces->source_pitch +
-        image_blit->width * bytes_per_pixel;
-    hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch +
-                       image_blit->width * bytes_per_pixel;
-
-    /* FIXME: What does hardware do in this case? */
-    assert(source_addr + source_offset + source_size <=
-           memory_region_size(d->vram));
-    assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram));
-
-    uint8_t *source_row = source + source_offset;
-    uint8_t *dest_row = dest + dest_offset;
-
-    if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
-        NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY");
-        for (unsigned int y = 0; y < image_blit->height; y++) {
-            memmove(dest_row, source_row, image_blit->width * bytes_per_pixel);
-            source_row += context_surfaces->source_pitch;
-            dest_row += context_surfaces->dest_pitch;
-        }
-    } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) {
-        NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND");
-        uint32_t max_beta_mult = 0x7f80;
-        uint32_t beta_mult = beta->beta >> 16;
-        uint32_t inv_beta_mult = max_beta_mult - beta_mult;
-        for (unsigned int y = 0; y < image_blit->height; y++) {
-            for (unsigned int x = 0; x < image_blit->width; x++) {
-                for (unsigned int ch = 0; ch < 3; ch++) {
-                    uint32_t a = source_row[x * 4 + ch] * beta_mult;
-                    uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult;
-                    dest_row[x * 4 + ch] = (a + b) / max_beta_mult;
-                }
-            }
-            source_row += context_surfaces->source_pitch;
-            dest_row += context_surfaces->dest_pitch;
-        }
-    } else {
-        fprintf(stderr, "Unknown blit operation: 0x%x\n",
-                image_blit->operation);
-        assert(false && "Unknown blit operation");
-    }
-
-    NV2A_DPRINTF("  - 0x%tx -> 0x%tx\n", source_addr, dest_addr);
-
-    bool needs_alpha_patching;
-    uint8_t alpha_override;
-    switch (context_surfaces->color_format) {
-    case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
-        needs_alpha_patching = true;
-        alpha_override = 0xff;
-        break;
-    case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
-        needs_alpha_patching = true;
-        alpha_override = 0;
-        break;
-    default:
-        needs_alpha_patching = false;
-        alpha_override = 0;
-    }
-
-    if (needs_alpha_patching) {
-        dest_row = dest + dest_offset;
-        for (unsigned int y = 0; y < image_blit->height; y++) {
-            for (unsigned int x = 0; x < image_blit->width; x++) {
-                dest_row[x * 4 + 3] = alpha_override;
-            }
-            dest_row += context_surfaces->dest_pitch;
-        }
-    }
-
-    dest_addr += dest_offset;
-    memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
-                                   DIRTY_MEMORY_VGA);
-    memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
-                                   DIRTY_MEMORY_NV2A_TEX);
-}
-
-int pgraph_method(NV2AState *d, unsigned int subchannel,
-                   unsigned int method, uint32_t parameter,
-                   uint32_t *parameters, size_t num_words_available,
-                   size_t max_lookahead_words, bool inc)
-{
-    int num_processed = 1;
-
-    assert(glGetError() == GL_NO_ERROR);
-
-    PGRAPHState *pg = &d->pgraph;
-
-    bool channel_valid =
-        d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID;
-    assert(channel_valid);
-
-    ContextSurfaces2DState *context_surfaces_2d = &pg->context_surfaces_2d;
-    ImageBlitState *image_blit = &pg->image_blit;
-    BetaState *beta = &pg->beta;
-
-    assert(subchannel < 8);
-
-    if (method == NV_SET_OBJECT) {
-        assert(parameter < memory_region_size(&d->ramin));
-        uint8_t *obj_ptr = d->ramin_ptr + parameter;
-
-        uint32_t ctx_1 = ldl_le_p((uint32_t*)obj_ptr);
-        uint32_t ctx_2 = ldl_le_p((uint32_t*)(obj_ptr+4));
-        uint32_t ctx_3 = ldl_le_p((uint32_t*)(obj_ptr+8));
-        uint32_t ctx_4 = ldl_le_p((uint32_t*)(obj_ptr+12));
-        uint32_t ctx_5 = parameter;
-
-        pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4] = ctx_1;
-        pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4] = ctx_2;
-        pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4] = ctx_3;
-        pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4] = ctx_4;
-        pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4] = ctx_5;
-    }
-
-    // is this right?
-    pg->regs[NV_PGRAPH_CTX_SWITCH1] = pg->regs[NV_PGRAPH_CTX_CACHE1 + subchannel * 4];
-    pg->regs[NV_PGRAPH_CTX_SWITCH2] = pg->regs[NV_PGRAPH_CTX_CACHE2 + subchannel * 4];
-    pg->regs[NV_PGRAPH_CTX_SWITCH3] = pg->regs[NV_PGRAPH_CTX_CACHE3 + subchannel * 4];
-    pg->regs[NV_PGRAPH_CTX_SWITCH4] = pg->regs[NV_PGRAPH_CTX_CACHE4 + subchannel * 4];
-    pg->regs[NV_PGRAPH_CTX_SWITCH5] = pg->regs[NV_PGRAPH_CTX_CACHE5 + subchannel * 4];
-
-    uint32_t graphics_class = GET_MASK(pg->regs[NV_PGRAPH_CTX_SWITCH1],
-                                       NV_PGRAPH_CTX_SWITCH1_GRCLASS);
-
-    pgraph_method_log(subchannel, graphics_class, method, parameter);
-
-    if (subchannel != 0) {
-        // catches context switching issues on xbox d3d
-        assert(graphics_class != 0x97);
-    }
-
-    /* ugly switch for now */
-    switch (graphics_class) {
-    case NV_BETA: {
-        switch (method) {
-        case NV012_SET_OBJECT:
-            beta->object_instance = parameter;
-            break;
-        case NV012_SET_BETA:
-            if (parameter & 0x80000000) {
-                beta->beta = 0;
-            } else {
-                // The parameter is a signed fixed-point number with a sign bit
-                // and 31 fractional bits. Note that negative values are clamped
-                // to 0, and only 8 fractional bits are actually implemented in
-                // hardware.
-                beta->beta = parameter & 0x7f800000;
-            }
-            break;
-        default:
-            goto unhandled;
-        }
-        break;
-    }
-    case NV_CONTEXT_PATTERN: {
-        switch (method) {
-        case NV044_SET_MONOCHROME_COLOR0:
-            pg->regs[NV_PGRAPH_PATT_COLOR0] = parameter;
-            break;
-        default:
-            goto unhandled;
-        }
-        break;
-    }
-    case NV_CONTEXT_SURFACES_2D: {
-        switch (method) {
-        case NV062_SET_OBJECT:
-            context_surfaces_2d->object_instance = parameter;
-            break;
-        case NV062_SET_CONTEXT_DMA_IMAGE_SOURCE:
-            context_surfaces_2d->dma_image_source = parameter;
-            break;
-        case NV062_SET_CONTEXT_DMA_IMAGE_DESTIN:
-            context_surfaces_2d->dma_image_dest = parameter;
-            break;
-        case NV062_SET_COLOR_FORMAT:
-            context_surfaces_2d->color_format = parameter;
-            break;
-        case NV062_SET_PITCH:
-            context_surfaces_2d->source_pitch = parameter & 0xFFFF;
-            context_surfaces_2d->dest_pitch = parameter >> 16;
-            break;
-        case NV062_SET_OFFSET_SOURCE:
-            context_surfaces_2d->source_offset = parameter & 0x07FFFFFF;
-            break;
-        case NV062_SET_OFFSET_DESTIN:
-            context_surfaces_2d->dest_offset = parameter & 0x07FFFFFF;
-            break;
-        default:
-            goto unhandled;
-        }
-        break;
-    }
-    case NV_IMAGE_BLIT: {
-        switch (method) {
-        case NV09F_SET_OBJECT:
-            image_blit->object_instance = parameter;
-            break;
-        case NV09F_SET_CONTEXT_SURFACES:
-            image_blit->context_surfaces = parameter;
-            break;
-        case NV09F_SET_OPERATION:
-            image_blit->operation = parameter;
-            break;
-        case NV09F_CONTROL_POINT_IN:
-            image_blit->in_x = parameter & 0xFFFF;
-            image_blit->in_y = parameter >> 16;
-            break;
-        case NV09F_CONTROL_POINT_OUT:
-            image_blit->out_x = parameter & 0xFFFF;
-            image_blit->out_y = parameter >> 16;
-            break;
-        case NV09F_SIZE:
-            image_blit->width = parameter & 0xFFFF;
-            image_blit->height = parameter >> 16;
-
-            if (image_blit->width && image_blit->height) {
-                pgraph_image_blit(d);
-            }
-            break;
-        default:
-            goto unhandled;
-        }
-        break;
-    }
-    case NV_KELVIN_PRIMITIVE: {
-        MethodFunc handler =
-            pgraph_kelvin_methods[METHOD_ADDR_TO_INDEX(method)].handler;
-        if (handler == NULL) {
-            goto unhandled;
-        }
-        size_t num_words_consumed = 1;
-        handler(d, pg, subchannel, method, parameter, parameters,
-                num_words_available, &num_words_consumed, inc);
-
-        /* Squash repeated BEGIN,DRAW_ARRAYS,END */
-        #define LAM(i, mthd) ((parameters[i*2+1] & 0x31fff) == (mthd))
-        #define LAP(i, prm) (parameters[i*2+2] == (prm))
-        #define LAMP(i, mthd, prm) (LAM(i, mthd) && LAP(i, prm))
-
-        if (method == NV097_DRAW_ARRAYS && (max_lookahead_words >= 7) &&
-            pg->inline_elements_length == 0 &&
-            pg->draw_arrays_length <
-                (ARRAY_SIZE(pg->gl_draw_arrays_start) - 1) &&
-            LAMP(0, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END) &&
-            LAMP(1, NV097_SET_BEGIN_END, pg->primitive_mode) &&
-            LAM(2, NV097_DRAW_ARRAYS)) {
-            num_words_consumed += 4;
-            pg->draw_arrays_prevent_connect = true;
-        }
-
-        #undef LAM
-        #undef LAP
-        #undef LAMP
-
-        num_processed = num_words_consumed;
-        break;
-    }
-    default:
-        goto unhandled;
-    }
-
-    return num_processed;
-
-unhandled:
-    trace_nv2a_pgraph_method_unhandled(subchannel, graphics_class,
-                                           method, parameter);
-    return num_processed;
-}
-
-DEF_METHOD(NV097, SET_OBJECT)
-{
-    pg->kelvin.object_instance = parameter;
-}
-
-DEF_METHOD(NV097, NO_OPERATION)
-{
-    /* The bios uses nop as a software method call -
-     * it seems to expect a notify interrupt if the parameter isn't 0.
-     * According to a nouveau guy it should still be a nop regardless
-     * of the parameter. It's possible a debug register enables this,
-     * but nothing obvious sticks out. Weird.
-     */
-    if (parameter == 0) {
-        return;
-    }
-
-    unsigned channel_id =
-        GET_MASK(pg->regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID);
-
-    assert(!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR));
-
-    SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_CHID,
-             channel_id);
-    SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_SUBCH,
-             subchannel);
-    SET_MASK(pg->regs[NV_PGRAPH_TRAPPED_ADDR], NV_PGRAPH_TRAPPED_ADDR_MTHD,
-             method);
-    pg->regs[NV_PGRAPH_TRAPPED_DATA_LOW] = parameter;
-    pg->regs[NV_PGRAPH_NSOURCE] =
-        NV_PGRAPH_NSOURCE_NOTIFICATION; /* TODO: check this */
-    pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR;
-    pg->waiting_for_nop = true;
-
-    qemu_mutex_unlock(&pg->lock);
-    qemu_mutex_lock_iothread();
-    nv2a_update_irq(d);
-    qemu_mutex_unlock_iothread();
-    qemu_mutex_lock(&pg->lock);
-}
-
-DEF_METHOD(NV097, WAIT_FOR_IDLE)
-{
-    pgraph_update_surface(d, false, true, true);
-}
-
-DEF_METHOD(NV097, SET_FLIP_READ)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_READ_3D,
-             parameter);
-}
-
-DEF_METHOD(NV097, SET_FLIP_WRITE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D,
-             parameter);
-}
-
-DEF_METHOD(NV097, SET_FLIP_MODULO)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_MODULO_3D,
-             parameter);
-}
-
-DEF_METHOD(NV097, FLIP_INCREMENT_WRITE)
-{
-    uint32_t old =
-        GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D);
-
-    SET_MASK(pg->regs[NV_PGRAPH_SURFACE],
-             NV_PGRAPH_SURFACE_WRITE_3D,
-             (GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
-                      NV_PGRAPH_SURFACE_WRITE_3D)+1)
-                % GET_MASK(pg->regs[NV_PGRAPH_SURFACE],
-                           NV_PGRAPH_SURFACE_MODULO_3D) );
-
-    uint32_t new =
-        GET_MASK(pg->regs[NV_PGRAPH_SURFACE], NV_PGRAPH_SURFACE_WRITE_3D);
-
-    trace_nv2a_pgraph_flip_increment_write(old, new);
-    NV2A_GL_DFRAME_TERMINATOR();
-    pg->frame_time++;
-}
-
-DEF_METHOD(NV097, FLIP_STALL)
-{
-    trace_nv2a_pgraph_flip_stall();
-    pgraph_update_surface(d, false, true, true);
-    nv2a_profile_flip_stall();
-    pg->waiting_for_flip = true;
-}
-
-// TODO: these should be loading the dma objects from ramin here?
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_NOTIFIES)
-{
-    pg->dma_notifies = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_A)
-{
-    pg->dma_a = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_B)
-{
-    pg->dma_b = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_STATE)
-{
-    pg->dma_state = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_COLOR)
-{
-    /* try to get any straggling draws in before the surface's changed :/ */
-    pgraph_update_surface(d, false, true, true);
-
-    pg->dma_color = parameter;
-    pg->surface_color.buffer_dirty = true;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_ZETA)
-{
-    pg->dma_zeta = parameter;
-    pg->surface_zeta.buffer_dirty = true;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_A)
-{
-    pg->dma_vertex_a = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_B)
-{
-    pg->dma_vertex_b = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_SEMAPHORE)
-{
-    pg->dma_semaphore = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTEXT_DMA_REPORT)
-{
-    pgraph_process_pending_reports(d);
-
-    pg->dma_report = parameter;
-}
-
-DEF_METHOD(NV097, SET_SURFACE_CLIP_HORIZONTAL)
-{
-    pgraph_update_surface(d, false, true, true);
-
-    pg->surface_shape.clip_x =
-        GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_X);
-    pg->surface_shape.clip_width =
-        GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_WIDTH);
-}
-
-DEF_METHOD(NV097, SET_SURFACE_CLIP_VERTICAL)
-{
-    pgraph_update_surface(d, false, true, true);
-
-    pg->surface_shape.clip_y =
-        GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_Y);
-    pg->surface_shape.clip_height =
-        GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_HEIGHT);
-}
-
-DEF_METHOD(NV097, SET_SURFACE_FORMAT)
-{
-    pgraph_update_surface(d, false, true, true);
-
-    pg->surface_shape.color_format =
-        GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_COLOR);
-    pg->surface_shape.zeta_format =
-        GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ZETA);
-    pg->surface_shape.anti_aliasing =
-        GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ANTI_ALIASING);
-    pg->surface_shape.log_width =
-        GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_WIDTH);
-    pg->surface_shape.log_height =
-        GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_HEIGHT);
-
-    int surface_type = GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_TYPE);
-    if (surface_type != pg->surface_type) {
-        pg->surface_type = surface_type;
-        pg->surface_color.buffer_dirty = true;
-        pg->surface_zeta.buffer_dirty = true;
-    }
-}
-
-DEF_METHOD(NV097, SET_SURFACE_PITCH)
-{
-    pgraph_update_surface(d, false, true, true);
-    unsigned int color_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_COLOR);
-    unsigned int zeta_pitch  = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_ZETA);
-
-    pg->surface_color.buffer_dirty |= (pg->surface_color.pitch != color_pitch);
-    pg->surface_color.pitch = color_pitch;
-
-    pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.pitch != zeta_pitch);
-    pg->surface_zeta.pitch = zeta_pitch;
-}
-
-DEF_METHOD(NV097, SET_SURFACE_COLOR_OFFSET)
-{
-    pgraph_update_surface(d, false, true, true);
-    pg->surface_color.buffer_dirty |= (pg->surface_color.offset != parameter);
-    pg->surface_color.offset = parameter;
-}
-
-DEF_METHOD(NV097, SET_SURFACE_ZETA_OFFSET)
-{
-    pgraph_update_surface(d, false, true, true);
-    pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.offset != parameter);
-    pg->surface_zeta.offset = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_ICW)
-{
-    int slot = (method - NV097_SET_COMBINER_ALPHA_ICW) / 4;
-    pg->regs[NV_PGRAPH_COMBINEALPHAI0 + slot*4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW0)
-{
-    pg->regs[NV_PGRAPH_COMBINESPECFOG0] = parameter;
-}
-
-DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW1)
-{
-    pg->regs[NV_PGRAPH_COMBINESPECFOG1] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_ADDRESS)
-{
-    int slot = (method - NV097_SET_TEXTURE_ADDRESS) / 64;
-    pg->regs[NV_PGRAPH_TEXADDRESS0 + slot * 4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_CONTROL0)
-{
-    pgraph_update_surface(d, false, true, true);
-
-    bool stencil_write_enable =
-        parameter & NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE;
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE,
-             stencil_write_enable);
-
-    uint32_t z_format = GET_MASK(parameter, NV097_SET_CONTROL0_Z_FORMAT);
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_Z_FORMAT, z_format);
-
-    bool z_perspective =
-        parameter & NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE;
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE,
-             z_perspective);
-}
-
-DEF_METHOD(NV097, SET_COLOR_MATERIAL)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_EMISSION,
-             (parameter >> 0) & 3);
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_AMBIENT,
-             (parameter >> 2) & 3);
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_DIFFUSE,
-             (parameter >> 4) & 3);
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_SPECULAR,
-             (parameter >> 6) & 3);
-}
-
-DEF_METHOD(NV097, SET_FOG_MODE)
-{
-    /* FIXME: There is also NV_PGRAPH_CSV0_D_FOG_MODE */
-    unsigned int mode;
-    switch (parameter) {
-    case NV097_SET_FOG_MODE_V_LINEAR:
-        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR; break;
-    case NV097_SET_FOG_MODE_V_EXP:
-        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP; break;
-    case NV097_SET_FOG_MODE_V_EXP2:
-        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2; break;
-    case NV097_SET_FOG_MODE_V_EXP_ABS:
-        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP_ABS; break;
-    case NV097_SET_FOG_MODE_V_EXP2_ABS:
-        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2_ABS; break;
-    case NV097_SET_FOG_MODE_V_LINEAR_ABS:
-        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR_ABS; break;
-    default:
-        assert(false);
-        break;
-    }
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_FOG_MODE,
-             mode);
-}
-
-DEF_METHOD(NV097, SET_FOG_GEN_MODE)
-{
-    unsigned int mode;
-    switch (parameter) {
-    case NV097_SET_FOG_GEN_MODE_V_SPEC_ALPHA:
-        mode = NV_PGRAPH_CSV0_D_FOGGENMODE_SPEC_ALPHA; break;
-    case NV097_SET_FOG_GEN_MODE_V_RADIAL:
-        mode = NV_PGRAPH_CSV0_D_FOGGENMODE_RADIAL; break;
-    case NV097_SET_FOG_GEN_MODE_V_PLANAR:
-        mode = NV_PGRAPH_CSV0_D_FOGGENMODE_PLANAR; break;
-    case NV097_SET_FOG_GEN_MODE_V_ABS_PLANAR:
-        mode = NV_PGRAPH_CSV0_D_FOGGENMODE_ABS_PLANAR; break;
-    case NV097_SET_FOG_GEN_MODE_V_FOG_X:
-        mode = NV_PGRAPH_CSV0_D_FOGGENMODE_FOG_X; break;
-    default:
-        assert(false);
-        break;
-    }
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_FOGGENMODE, mode);
-}
-
-DEF_METHOD(NV097, SET_FOG_ENABLE)
-{
-    /*
-      FIXME: There is also:
-        SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_FOGENABLE,
-             parameter);
-    */
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_FOGENABLE,
-         parameter);
-}
-
-DEF_METHOD(NV097, SET_FOG_COLOR)
-{
-    /* PGRAPH channels are ARGB, parameter channels are ABGR */
-    uint8_t red = GET_MASK(parameter, NV097_SET_FOG_COLOR_RED);
-    uint8_t green = GET_MASK(parameter, NV097_SET_FOG_COLOR_GREEN);
-    uint8_t blue = GET_MASK(parameter, NV097_SET_FOG_COLOR_BLUE);
-    uint8_t alpha = GET_MASK(parameter, NV097_SET_FOG_COLOR_ALPHA);
-    SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_RED, red);
-    SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_GREEN, green);
-    SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_BLUE, blue);
-    SET_MASK(pg->regs[NV_PGRAPH_FOGCOLOR], NV_PGRAPH_FOGCOLOR_ALPHA, alpha);
-}
-
-DEF_METHOD(NV097, SET_WINDOW_CLIP_TYPE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE, parameter);
-}
-
-DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_HORIZONTAL)
-{
-    int slot = (method - NV097_SET_WINDOW_CLIP_HORIZONTAL) / 4;
-    for (; slot < 8; ++slot) {
-        pg->regs[NV_PGRAPH_WINDOWCLIPX0 + slot * 4] = parameter;
-    }
-}
-
-DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_VERTICAL)
-{
-    int slot = (method - NV097_SET_WINDOW_CLIP_VERTICAL) / 4;
-    for (; slot < 8; ++slot) {
-        pg->regs[NV_PGRAPH_WINDOWCLIPY0 + slot * 4] = parameter;
-    }
-}
-
-DEF_METHOD(NV097, SET_ALPHA_TEST_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_BLEND_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_EN, parameter);
-}
-
-DEF_METHOD(NV097, SET_CULL_FACE_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_CULLENABLE,
-             parameter);
-}
-
-DEF_METHOD(NV097, SET_DEPTH_TEST_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ZENABLE,
-             parameter);
-}
-
-DEF_METHOD(NV097, SET_DITHER_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_DITHERENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_LIGHTING_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_LIGHTING,
-             parameter);
-}
-
-DEF_METHOD(NV097, SET_POINT_PARAMS_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_POINTPARAMSENABLE,
-             parameter);
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3],
-             NV_PGRAPH_CONTROL_3_POINTPARAMSENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_POINT_SMOOTH_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_LINE_SMOOTH_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_POLY_SMOOTH_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_SKIN_MODE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_SKIN,
-             parameter);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_TEST_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
-             NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_POLY_OFFSET_POINT_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_POLY_OFFSET_LINE_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_POLY_OFFSET_FILL_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_ALPHA_FUNC)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_ALPHAFUNC, parameter & 0xF);
-}
-
-DEF_METHOD(NV097, SET_ALPHA_REF)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_ALPHAREF, parameter);
-}
-
-DEF_METHOD(NV097, SET_BLEND_FUNC_SFACTOR)
-{
-    unsigned int factor;
-    switch (parameter) {
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_ZERO:
-        factor = NV_PGRAPH_BLEND_SFACTOR_ZERO; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE:
-        factor = NV_PGRAPH_BLEND_SFACTOR_ONE; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_COLOR:
-        factor = NV_PGRAPH_BLEND_SFACTOR_SRC_COLOR; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_COLOR:
-        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_COLOR; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA:
-        factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA:
-        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_ALPHA; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_ALPHA:
-        factor = NV_PGRAPH_BLEND_SFACTOR_DST_ALPHA; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_ALPHA:
-        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_ALPHA; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_COLOR:
-        factor = NV_PGRAPH_BLEND_SFACTOR_DST_COLOR; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_COLOR:
-        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_COLOR; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA_SATURATE:
-        factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA_SATURATE; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_COLOR:
-        factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_COLOR; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_COLOR:
-        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_COLOR; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_ALPHA:
-        factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_ALPHA; break;
-    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_ALPHA:
-        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_ALPHA; break;
-    default:
-        NV2A_DPRINTF("Unknown blend source factor: 0x%08x\n", parameter);
-        return; /* discard */
-    }
-    SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_SFACTOR, factor);
-}
-
-DEF_METHOD(NV097, SET_BLEND_FUNC_DFACTOR)
-{
-    unsigned int factor;
-    switch (parameter) {
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_ZERO:
-        factor = NV_PGRAPH_BLEND_DFACTOR_ZERO; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE:
-        factor = NV_PGRAPH_BLEND_DFACTOR_ONE; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_COLOR:
-        factor = NV_PGRAPH_BLEND_DFACTOR_SRC_COLOR; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_COLOR:
-        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_COLOR; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA:
-        factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_ALPHA:
-        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_ALPHA; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_ALPHA:
-        factor = NV_PGRAPH_BLEND_DFACTOR_DST_ALPHA; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_ALPHA:
-        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_ALPHA; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_COLOR:
-        factor = NV_PGRAPH_BLEND_DFACTOR_DST_COLOR; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_COLOR:
-        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_COLOR; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA_SATURATE:
-        factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA_SATURATE; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_COLOR:
-        factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_COLOR; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_COLOR:
-        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_COLOR; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_ALPHA:
-        factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_ALPHA; break;
-    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_ALPHA:
-        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_ALPHA; break;
-    default:
-        NV2A_DPRINTF("Unknown blend destination factor: 0x%08x\n", parameter);
-        return; /* discard */
-    }
-    SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_DFACTOR, factor);
-}
-
-DEF_METHOD(NV097, SET_BLEND_COLOR)
-{
-    pg->regs[NV_PGRAPH_BLENDCOLOR] = parameter;
-}
-
-DEF_METHOD(NV097, SET_BLEND_EQUATION)
-{
-    unsigned int equation;
-    switch (parameter) {
-    case NV097_SET_BLEND_EQUATION_V_FUNC_SUBTRACT:
-        equation = 0; break;
-    case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT:
-        equation = 1; break;
-    case NV097_SET_BLEND_EQUATION_V_FUNC_ADD:
-        equation = 2; break;
-    case NV097_SET_BLEND_EQUATION_V_MIN:
-        equation = 3; break;
-    case NV097_SET_BLEND_EQUATION_V_MAX:
-        equation = 4; break;
-    case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT_SIGNED:
-        equation = 5; break;
-    case NV097_SET_BLEND_EQUATION_V_FUNC_ADD_SIGNED:
-        equation = 6; break;
-    default:
-        NV2A_DPRINTF("Unknown blend equation: 0x%08x\n", parameter);
-        return; /* discard */
-    }
-    SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_EQN, equation);
-}
-
-DEF_METHOD(NV097, SET_DEPTH_FUNC)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0], NV_PGRAPH_CONTROL_0_ZFUNC,
-             parameter & 0xF);
-}
-
-DEF_METHOD(NV097, SET_COLOR_MASK)
-{
-    pg->surface_color.write_enabled_cache |= pgraph_color_write_enabled(pg);
-
-    bool alpha = parameter & NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE;
-    bool red = parameter & NV097_SET_COLOR_MASK_RED_WRITE_ENABLE;
-    bool green = parameter & NV097_SET_COLOR_MASK_GREEN_WRITE_ENABLE;
-    bool blue = parameter & NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE;
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE, alpha);
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE, red);
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE, green);
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE, blue);
-}
-
-DEF_METHOD(NV097, SET_DEPTH_MASK)
-{
-    pg->surface_zeta.write_enabled_cache |= pgraph_zeta_write_enabled(pg);
-
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-             NV_PGRAPH_CONTROL_0_ZWRITEENABLE, parameter);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_MASK)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
-             NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE, parameter);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_FUNC)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
-             NV_PGRAPH_CONTROL_1_STENCIL_FUNC, parameter & 0xF);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_FUNC_REF)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
-             NV_PGRAPH_CONTROL_1_STENCIL_REF, parameter);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_FUNC_MASK)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
-             NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ, parameter);
-}
-
-DEF_METHOD(NV097, SET_STENCIL_OP_FAIL)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
-             NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL,
-             kelvin_map_stencil_op(parameter));
-}
-
-DEF_METHOD(NV097, SET_STENCIL_OP_ZFAIL)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
-             NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL,
-             kelvin_map_stencil_op(parameter));
-}
-
-DEF_METHOD(NV097, SET_STENCIL_OP_ZPASS)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
-             NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS,
-             kelvin_map_stencil_op(parameter));
-}
-
-DEF_METHOD(NV097, SET_SHADE_MODE)
-{
-    switch (parameter) {
-    case NV097_SET_SHADE_MODE_V_FLAT:
-        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE,
-                 NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT);
-        break;
-    case NV097_SET_SHADE_MODE_V_SMOOTH:
-        SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE,
-                 NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH);
-        break;
-    default:
-        /* Discard */
-        break;
-    }
-}
-
-DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR)
-{
-    pg->regs[NV_PGRAPH_ZOFFSETFACTOR] = parameter;
-}
-
-DEF_METHOD(NV097, SET_POLYGON_OFFSET_BIAS)
-{
-    pg->regs[NV_PGRAPH_ZOFFSETBIAS] = parameter;
-}
-
-DEF_METHOD(NV097, SET_FRONT_POLYGON_MODE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_FRONTFACEMODE,
-             kelvin_map_polygon_mode(parameter));
-}
-
-DEF_METHOD(NV097, SET_BACK_POLYGON_MODE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_BACKFACEMODE,
-             kelvin_map_polygon_mode(parameter));
-}
-
-DEF_METHOD(NV097, SET_CLIP_MIN)
-{
-    pg->regs[NV_PGRAPH_ZCLIPMIN] = parameter;
-}
-
-DEF_METHOD(NV097, SET_CLIP_MAX)
-{
-    pg->regs[NV_PGRAPH_ZCLIPMAX] = parameter;
-}
-
-DEF_METHOD(NV097, SET_CULL_FACE)
-{
-    unsigned int face;
-    switch (parameter) {
-    case NV097_SET_CULL_FACE_V_FRONT:
-        face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT; break;
-    case NV097_SET_CULL_FACE_V_BACK:
-        face = NV_PGRAPH_SETUPRASTER_CULLCTRL_BACK; break;
-    case NV097_SET_CULL_FACE_V_FRONT_AND_BACK:
-        face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT_AND_BACK; break;
-    default:
-        assert(false);
-        break;
-    }
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_CULLCTRL,
-             face);
-}
-
-DEF_METHOD(NV097, SET_FRONT_FACE)
-{
-    bool ccw;
-    switch (parameter) {
-    case NV097_SET_FRONT_FACE_V_CW:
-        ccw = false; break;
-    case NV097_SET_FRONT_FACE_V_CCW:
-        ccw = true; break;
-    default:
-        NV2A_DPRINTF("Unknown front face: 0x%08x\n", parameter);
-        return; /* discard */
-    }
-    SET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-             NV_PGRAPH_SETUPRASTER_FRONTFACE,
-             ccw ? 1 : 0);
-}
-
-DEF_METHOD(NV097, SET_NORMALIZATION_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
-             NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE,
-             parameter);
-}
-
-DEF_METHOD_INC(NV097, SET_MATERIAL_EMISSION)
-{
-    int slot = (method - NV097_SET_MATERIAL_EMISSION) / 4;
-    // FIXME: Verify NV_IGRAPH_XF_LTCTXA_CM_COL is correct
-    pg->ltctxa[NV_IGRAPH_XF_LTCTXA_CM_COL][slot] = parameter;
-    pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_CM_COL] = true;
-}
-
-DEF_METHOD(NV097, SET_MATERIAL_ALPHA)
-{
-    pg->material_alpha = *(float*)&parameter;
-}
-
-DEF_METHOD(NV097, SET_LIGHT_ENABLE_MASK)
-{
-    SET_MASK(d->pgraph.regs[NV_PGRAPH_CSV0_D],
-             NV_PGRAPH_CSV0_D_LIGHTS,
-             parameter);
-}
-
-DEF_METHOD(NV097, SET_TEXGEN_S)
-{
-    int slot = (method - NV097_SET_TEXGEN_S) / 16;
-    unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
-                                  : NV_PGRAPH_CSV1_B;
-    unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_S
-                                   : NV_PGRAPH_CSV1_A_T0_S;
-    SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 0));
-}
-
-DEF_METHOD(NV097, SET_TEXGEN_T)
-{
-    int slot = (method - NV097_SET_TEXGEN_T) / 16;
-    unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
-                                  : NV_PGRAPH_CSV1_B;
-    unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_T
-                                   : NV_PGRAPH_CSV1_A_T0_T;
-    SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 1));
-}
-
-DEF_METHOD(NV097, SET_TEXGEN_R)
-{
-    int slot = (method - NV097_SET_TEXGEN_R) / 16;
-    unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
-                                  : NV_PGRAPH_CSV1_B;
-    unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_R
-                                   : NV_PGRAPH_CSV1_A_T0_R;
-    SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 2));
-}
-
-DEF_METHOD(NV097, SET_TEXGEN_Q)
-{
-    int slot = (method - NV097_SET_TEXGEN_Q) / 16;
-    unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
-                                  : NV_PGRAPH_CSV1_B;
-    unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_Q
-                                   : NV_PGRAPH_CSV1_A_T0_Q;
-    SET_MASK(pg->regs[reg], mask, kelvin_map_texgen(parameter, 3));
-}
-
-DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX_ENABLE)
-{
-    int slot = (method - NV097_SET_TEXTURE_MATRIX_ENABLE) / 4;
-    pg->texture_matrix_enable[slot] = parameter;
-}
-
-DEF_METHOD(NV097, SET_POINT_SIZE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_POINTSIZE], NV097_SET_POINT_SIZE_V, parameter);
-}
-
-DEF_METHOD_INC(NV097, SET_PROJECTION_MATRIX)
-{
-    int slot = (method - NV097_SET_PROJECTION_MATRIX) / 4;
-    // pg->projection_matrix[slot] = *(float*)&parameter;
-    unsigned int row = NV_IGRAPH_XF_XFCTX_PMAT0 + slot/4;
-    pg->vsh_constants[row][slot%4] = parameter;
-    pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_MODEL_VIEW_MATRIX)
-{
-    int slot = (method - NV097_SET_MODEL_VIEW_MATRIX) / 4;
-    unsigned int matnum = slot / 16;
-    unsigned int entry = slot % 16;
-    unsigned int row = NV_IGRAPH_XF_XFCTX_MMAT0 + matnum*8 + entry/4;
-    pg->vsh_constants[row][entry % 4] = parameter;
-    pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_INVERSE_MODEL_VIEW_MATRIX)
-{
-    int slot = (method - NV097_SET_INVERSE_MODEL_VIEW_MATRIX) / 4;
-    unsigned int matnum = slot / 16;
-    unsigned int entry = slot % 16;
-    unsigned int row = NV_IGRAPH_XF_XFCTX_IMMAT0 + matnum*8 + entry/4;
-    pg->vsh_constants[row][entry % 4] = parameter;
-    pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_COMPOSITE_MATRIX)
-{
-    int slot = (method - NV097_SET_COMPOSITE_MATRIX) / 4;
-    unsigned int row = NV_IGRAPH_XF_XFCTX_CMAT0 + slot/4;
-    pg->vsh_constants[row][slot%4] = parameter;
-    pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX)
-{
-    int slot = (method - NV097_SET_TEXTURE_MATRIX) / 4;
-    unsigned int tex = slot / 16;
-    unsigned int entry = slot % 16;
-    unsigned int row = NV_IGRAPH_XF_XFCTX_T0MAT + tex*8 + entry/4;
-    pg->vsh_constants[row][entry%4] = parameter;
-    pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_FOG_PARAMS)
-{
-    int slot = (method - NV097_SET_FOG_PARAMS) / 4;
-    if (slot < 2) {
-        pg->regs[NV_PGRAPH_FOGPARAM0 + slot*4] = parameter;
-    } else {
-        /* FIXME: No idea where slot = 2 is */
-    }
-
-    pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FOG_K][slot] = parameter;
-    pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FOG_K] = true;
-}
-
-/* Handles NV097_SET_TEXGEN_PLANE_S,T,R,Q */
-DEF_METHOD_INC(NV097, SET_TEXGEN_PLANE_S)
-{
-    int slot = (method - NV097_SET_TEXGEN_PLANE_S) / 4;
-    unsigned int tex = slot / 16;
-    unsigned int entry = slot % 16;
-    unsigned int row = NV_IGRAPH_XF_XFCTX_TG0MAT + tex*8 + entry/4;
-    pg->vsh_constants[row][entry%4] = parameter;
-    pg->vsh_constants_dirty[row] = true;
-}
-
-DEF_METHOD(NV097, SET_TEXGEN_VIEW_MODEL)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_TEXGEN_REF,
-             parameter);
-}
-
-DEF_METHOD_INC(NV097, SET_FOG_PLANE)
-{
-    int slot = (method - NV097_SET_FOG_PLANE) / 4;
-    pg->vsh_constants[NV_IGRAPH_XF_XFCTX_FOG][slot] = parameter;
-    pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_FOG] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_SCENE_AMBIENT_COLOR)
-{
-    int slot = (method - NV097_SET_SCENE_AMBIENT_COLOR) / 4;
-    // ??
-    pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FR_AMB][slot] = parameter;
-    pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FR_AMB] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_VIEWPORT_OFFSET)
-{
-    int slot = (method - NV097_SET_VIEWPORT_OFFSET) / 4;
-    pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][slot] = parameter;
-    pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPOFF] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_POINT_PARAMS)
-{
-    int slot = (method - NV097_SET_POINT_PARAMS) / 4;
-    pg->point_params[slot] = *(float *)&parameter; /* FIXME: Where? */
-}
-
-DEF_METHOD_INC(NV097, SET_EYE_POSITION)
-{
-    int slot = (method - NV097_SET_EYE_POSITION) / 4;
-    pg->vsh_constants[NV_IGRAPH_XF_XFCTX_EYEP][slot] = parameter;
-    pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_EYEP] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR0)
-{
-    int slot = (method - NV097_SET_COMBINER_FACTOR0) / 4;
-    pg->regs[NV_PGRAPH_COMBINEFACTOR0 + slot*4] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR1)
-{
-    int slot = (method - NV097_SET_COMBINER_FACTOR1) / 4;
-    pg->regs[NV_PGRAPH_COMBINEFACTOR1 + slot*4] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_OCW)
-{
-    int slot = (method - NV097_SET_COMBINER_ALPHA_OCW) / 4;
-    pg->regs[NV_PGRAPH_COMBINEALPHAO0 + slot*4] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_ICW)
-{
-    int slot = (method - NV097_SET_COMBINER_COLOR_ICW) / 4;
-    pg->regs[NV_PGRAPH_COMBINECOLORI0 + slot*4] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_VIEWPORT_SCALE)
-{
-    int slot = (method - NV097_SET_VIEWPORT_SCALE) / 4;
-    pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPSCL][slot] = parameter;
-    pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPSCL] = true;
-}
-
-DEF_METHOD_INC(NV097, SET_TRANSFORM_PROGRAM)
-{
-    int slot = (method - NV097_SET_TRANSFORM_PROGRAM) / 4;
-
-    int program_load = GET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
-                                NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR);
-
-    assert(program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
-    pg->program_data[program_load][slot%4] = parameter;
-    pg->program_data_dirty = true;
-
-    if (slot % 4 == 3) {
-        SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
-                 NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, program_load+1);
-    }
-}
-
-DEF_METHOD_INC(NV097, SET_TRANSFORM_CONSTANT)
-{
-    int slot = (method - NV097_SET_TRANSFORM_CONSTANT) / 4;
-    int const_load = GET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
-                              NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR);
-
-    assert(const_load < NV2A_VERTEXSHADER_CONSTANTS);
-    // VertexShaderConstant *constant = &pg->constants[const_load];
-    pg->vsh_constants_dirty[const_load] |=
-        (parameter != pg->vsh_constants[const_load][slot%4]);
-    pg->vsh_constants[const_load][slot%4] = parameter;
-
-    if (slot % 4 == 3) {
-        SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
-                 NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, const_load+1);
-    }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX3F)
-{
-    int slot = (method - NV097_SET_VERTEX3F) / 4;
-    VertexAttribute *attribute =
-        &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION];
-    pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION);
-    attribute->inline_value[slot] = *(float*)&parameter;
-    attribute->inline_value[3] = 1.0f;
-    if (slot == 2) {
-        pgraph_finish_inline_buffer_vertex(pg);
-    }
-}
-
-/* Handles NV097_SET_BACK_LIGHT_* */
-DEF_METHOD_INC(NV097, SET_BACK_LIGHT_AMBIENT_COLOR)
-{
-    int slot = (method - NV097_SET_BACK_LIGHT_AMBIENT_COLOR) / 4;
-    unsigned int part = NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4 + slot % 16;
-    slot /= 16; /* [Light index] */
-    assert(slot < 8);
-    switch(part * 4) {
-    case NV097_SET_BACK_LIGHT_AMBIENT_COLOR ...
-            NV097_SET_BACK_LIGHT_AMBIENT_COLOR + 8:
-        part -= NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4;
-        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6][part] = parameter;
-        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6] = true;
-        break;
-    case NV097_SET_BACK_LIGHT_DIFFUSE_COLOR ...
-            NV097_SET_BACK_LIGHT_DIFFUSE_COLOR + 8:
-        part -= NV097_SET_BACK_LIGHT_DIFFUSE_COLOR / 4;
-        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6][part] = parameter;
-        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6] = true;
-        break;
-    case NV097_SET_BACK_LIGHT_SPECULAR_COLOR ...
-            NV097_SET_BACK_LIGHT_SPECULAR_COLOR + 8:
-        part -= NV097_SET_BACK_LIGHT_SPECULAR_COLOR / 4;
-        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6][part] = parameter;
-        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6] = true;
-        break;
-    default:
-        assert(false);
-        break;
-    }
-}
-
-/* Handles all the light source props except for NV097_SET_BACK_LIGHT_* */
-DEF_METHOD_INC(NV097, SET_LIGHT_AMBIENT_COLOR)
-{
-    int slot = (method - NV097_SET_LIGHT_AMBIENT_COLOR) / 4;
-    unsigned int part = NV097_SET_LIGHT_AMBIENT_COLOR / 4 + slot % 32;
-    slot /= 32; /* [Light index] */
-    assert(slot < 8);
-    switch(part * 4) {
-    case NV097_SET_LIGHT_AMBIENT_COLOR ...
-            NV097_SET_LIGHT_AMBIENT_COLOR + 8:
-        part -= NV097_SET_LIGHT_AMBIENT_COLOR / 4;
-        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6][part] = parameter;
-        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6] = true;
-        break;
-    case NV097_SET_LIGHT_DIFFUSE_COLOR ...
-           NV097_SET_LIGHT_DIFFUSE_COLOR + 8:
-        part -= NV097_SET_LIGHT_DIFFUSE_COLOR / 4;
-        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6][part] = parameter;
-        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6] = true;
-        break;
-    case NV097_SET_LIGHT_SPECULAR_COLOR ...
-            NV097_SET_LIGHT_SPECULAR_COLOR + 8:
-        part -= NV097_SET_LIGHT_SPECULAR_COLOR / 4;
-        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6][part] = parameter;
-        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6] = true;
-        break;
-    case NV097_SET_LIGHT_LOCAL_RANGE:
-        pg->ltc1[NV_IGRAPH_XF_LTC1_r0 + slot][0] = parameter;
-        pg->ltc1_dirty[NV_IGRAPH_XF_LTC1_r0 + slot] = true;
-        break;
-    case NV097_SET_LIGHT_INFINITE_HALF_VECTOR ...
-            NV097_SET_LIGHT_INFINITE_HALF_VECTOR + 8:
-        part -= NV097_SET_LIGHT_INFINITE_HALF_VECTOR / 4;
-        pg->light_infinite_half_vector[slot][part] = *(float*)&parameter;
-        break;
-    case NV097_SET_LIGHT_INFINITE_DIRECTION ...
-            NV097_SET_LIGHT_INFINITE_DIRECTION + 8:
-        part -= NV097_SET_LIGHT_INFINITE_DIRECTION / 4;
-        pg->light_infinite_direction[slot][part] = *(float*)&parameter;
-        break;
-    case NV097_SET_LIGHT_SPOT_FALLOFF ...
-            NV097_SET_LIGHT_SPOT_FALLOFF + 8:
-        part -= NV097_SET_LIGHT_SPOT_FALLOFF / 4;
-        pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2][part] = parameter;
-        pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2] = true;
-        break;
-    case NV097_SET_LIGHT_SPOT_DIRECTION ...
-            NV097_SET_LIGHT_SPOT_DIRECTION + 12:
-        part -= NV097_SET_LIGHT_SPOT_DIRECTION / 4;
-        pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2][part] = parameter;
-        pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2] = true;
-        break;
-    case NV097_SET_LIGHT_LOCAL_POSITION ...
-            NV097_SET_LIGHT_LOCAL_POSITION + 8:
-        part -= NV097_SET_LIGHT_LOCAL_POSITION / 4;
-        pg->light_local_position[slot][part] = *(float*)&parameter;
-        break;
-    case NV097_SET_LIGHT_LOCAL_ATTENUATION ...
-            NV097_SET_LIGHT_LOCAL_ATTENUATION + 8:
-        part -= NV097_SET_LIGHT_LOCAL_ATTENUATION / 4;
-        pg->light_local_attenuation[slot][part] = *(float*)&parameter;
-        break;
-    default:
-        assert(false);
-        break;
-    }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX4F)
-{
-    int slot = (method - NV097_SET_VERTEX4F) / 4;
-    VertexAttribute *attribute =
-        &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION];
-    pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION);
-    attribute->inline_value[slot] = *(float*)&parameter;
-    if (slot == 3) {
-        pgraph_finish_inline_buffer_vertex(pg);
-    }
-}
-
-DEF_METHOD_INC(NV097, SET_NORMAL3S)
-{
-    int slot = (method - NV097_SET_NORMAL3S) / 4;
-    unsigned int part = slot % 2;
-    VertexAttribute *attribute =
-        &pg->vertex_attributes[NV2A_VERTEX_ATTR_NORMAL];
-    pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_NORMAL);
-    int16_t val = parameter & 0xFFFF;
-    attribute->inline_value[part * 2 + 0] = MAX(-1.0f, (float)val / 32767.0f);
-    val = parameter >> 16;
-    attribute->inline_value[part * 2 + 1] = MAX(-1.0f, (float)val / 32767.0f);
-}
-
-#define SET_VERTEX_ATTRIBUTE_4S(command, attr_index)                     \
-    do {                                                                   \
-        int slot = (method - (command)) / 4;                               \
-        unsigned int part = slot % 2;                                      \
-        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
-        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
-        attribute->inline_value[part * 2 + 0] =                            \
-            (float)(int16_t)(parameter & 0xFFFF);                          \
-        attribute->inline_value[part * 2 + 1] =                            \
-            (float)(int16_t)(parameter >> 16);                             \
-    } while (0)
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD0_4S)
-{
-    SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD0_4S, NV2A_VERTEX_ATTR_TEXTURE0);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD1_4S)
-{
-    SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD1_4S, NV2A_VERTEX_ATTR_TEXTURE1);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD2_4S)
-{
-    SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD2_4S, NV2A_VERTEX_ATTR_TEXTURE2);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD3_4S)
-{
-    SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD3_4S, NV2A_VERTEX_ATTR_TEXTURE3);
-}
-
-#undef SET_VERTEX_ATTRIBUTE_4S
-
-#define SET_VERTEX_ATRIBUTE_TEX_2S(attr_index)                             \
-    do {                                                                   \
-        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
-        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
-        attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); \
-        attribute->inline_value[1] = (float)(int16_t)(parameter >> 16);    \
-        attribute->inline_value[2] = 0.0f;                                 \
-        attribute->inline_value[3] = 1.0f;                                 \
-    } while (0)
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD0_2S)
-{
-    SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE0);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD1_2S)
-{
-    SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE1);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD2_2S)
-{
-    SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE2);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD3_2S)
-{
-    SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE3);
-}
-
-#undef SET_VERTEX_ATRIBUTE_TEX_2S
-
-#define SET_VERTEX_COLOR_3F(command, attr_index)                           \
-    do {                                                                   \
-        int slot = (method - (command)) / 4;                               \
-        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
-        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
-        attribute->inline_value[slot] = *(float*)&parameter;               \
-        attribute->inline_value[3] = 1.0f;                                 \
-    } while (0)
-
-DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR3F)
-{
-    SET_VERTEX_COLOR_3F(NV097_SET_DIFFUSE_COLOR3F, NV2A_VERTEX_ATTR_DIFFUSE);
-}
-
-DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR3F)
-{
-    SET_VERTEX_COLOR_3F(NV097_SET_SPECULAR_COLOR3F, NV2A_VERTEX_ATTR_SPECULAR);
-}
-
-#undef SET_VERTEX_COLOR_3F
-
-#define SET_VERTEX_ATTRIBUTE_F(command, attr_index)                        \
-    do {                                                                   \
-        int slot = (method - (command)) / 4;                               \
-        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
-        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
-        attribute->inline_value[slot] = *(float*)&parameter;               \
-    } while (0)
-
-DEF_METHOD_INC(NV097, SET_NORMAL3F)
-{
-    SET_VERTEX_ATTRIBUTE_F(NV097_SET_NORMAL3F, NV2A_VERTEX_ATTR_NORMAL);
-}
-
-DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4F)
-{
-    SET_VERTEX_ATTRIBUTE_F(NV097_SET_DIFFUSE_COLOR4F, NV2A_VERTEX_ATTR_DIFFUSE);
-}
-
-DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4F)
-{
-    SET_VERTEX_ATTRIBUTE_F(NV097_SET_SPECULAR_COLOR4F,
-                           NV2A_VERTEX_ATTR_SPECULAR);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD0_4F)
-{
-    SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD0_4F, NV2A_VERTEX_ATTR_TEXTURE0);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD1_4F)
-{
-    SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD1_4F, NV2A_VERTEX_ATTR_TEXTURE1);
-}
-
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD2_4F)
-{
-    SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD2_4F, NV2A_VERTEX_ATTR_TEXTURE2);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD3_4F)
-{
-    SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD3_4F, NV2A_VERTEX_ATTR_TEXTURE3);
-}
-
-#undef SET_VERTEX_ATTRIBUTE_F
-
-#define SET_VERTEX_ATRIBUTE_TEX_2F(command, attr_index)                    \
-    do {                                                                   \
-        int slot = (method - (command)) / 4;                               \
-        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
-        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
-        attribute->inline_value[slot] = *(float*)&parameter;               \
-        attribute->inline_value[2] = 0.0f;                                 \
-        attribute->inline_value[3] = 1.0f;                                 \
-    } while (0)
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD0_2F)
-{
-    SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD0_2F,
-                               NV2A_VERTEX_ATTR_TEXTURE0);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD1_2F)
-{
-    SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD1_2F,
-                               NV2A_VERTEX_ATTR_TEXTURE1);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD2_2F)
-{
-    SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD2_2F,
-                               NV2A_VERTEX_ATTR_TEXTURE2);
-}
-
-DEF_METHOD_INC(NV097, SET_TEXCOORD3_2F)
-{
-    SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD3_2F,
-                               NV2A_VERTEX_ATTR_TEXTURE3);
-}
-
-#undef SET_VERTEX_ATRIBUTE_TEX_2F
-
-#define SET_VERTEX_ATTRIBUTE_4UB(command, attr_index)                       \
-    do {                                                                   \
-        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
-        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
-        attribute->inline_value[0] = (parameter & 0xFF) / 255.0f;          \
-        attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0f;   \
-        attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0f;  \
-        attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0f;  \
-    } while (0)
-
-DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4UB)
-{
-    SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_DIFFUSE_COLOR4UB,
-                             NV2A_VERTEX_ATTR_DIFFUSE);
-}
-
-DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4UB)
-{
-    SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_SPECULAR_COLOR4UB,
-                             NV2A_VERTEX_ATTR_SPECULAR);
-}
-
-#undef SET_VERTEX_ATTRIBUTE_4UB
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_FORMAT)
-{
-    int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4;
-    VertexAttribute *attr = &pg->vertex_attributes[slot];
-    attr->format = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE);
-    attr->count = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE);
-    attr->stride = GET_MASK(parameter,
-                            NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE);
-    attr->gl_count = attr->count;
-
-    NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n",
-                 attr->format, attr->count, attr->stride);
-
-    switch (attr->format) {
-    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
-        attr->gl_type = GL_UNSIGNED_BYTE;
-        attr->gl_normalize = GL_TRUE;
-        attr->size = 1;
-        assert(attr->count == 4);
-        // http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt
-        attr->gl_count = GL_BGRA;
-        attr->needs_conversion = false;
-        break;
-    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
-        attr->gl_type = GL_UNSIGNED_BYTE;
-        attr->gl_normalize = GL_TRUE;
-        attr->size = 1;
-        attr->needs_conversion = false;
-        break;
-    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
-        attr->gl_type = GL_SHORT;
-        attr->gl_normalize = GL_TRUE;
-        attr->size = 2;
-        attr->needs_conversion = false;
-        break;
-    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
-        attr->gl_type = GL_FLOAT;
-        attr->gl_normalize = GL_FALSE;
-        attr->size = 4;
-        attr->needs_conversion = false;
-        break;
-    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
-        attr->gl_type = GL_SHORT;
-        attr->gl_normalize = GL_FALSE;
-        attr->size = 2;
-        attr->needs_conversion = false;
-        break;
-    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
-        /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
-        attr->gl_type = GL_INT;
-        attr->size = 4;
-        assert(attr->count == 1);
-        attr->needs_conversion = true;
-        break;
-    default:
-        fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
-        assert(false);
-        break;
-    }
-
-    if (attr->needs_conversion) {
-        pg->compressed_attrs |= (1 << slot);
-    } else {
-        pg->compressed_attrs &= ~(1 << slot);
-    }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_OFFSET)
-{
-    int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4;
-
-    pg->vertex_attributes[slot].dma_select = parameter & 0x80000000;
-    pg->vertex_attributes[slot].offset = parameter & 0x7fffffff;
-}
-
-DEF_METHOD(NV097, SET_LOGIC_OP_ENABLE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_LOGICOP_ENABLE,
-             parameter);
-}
-
-DEF_METHOD(NV097, SET_LOGIC_OP)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_BLEND], NV_PGRAPH_BLEND_LOGICOP,
-             parameter & 0xF);
-}
-
-static void pgraph_process_pending_report(NV2AState *d, QueryReport *r)
-{
-    PGRAPHState *pg = &d->pgraph;
-
-    if (r->clear) {
-        pg->zpass_pixel_count_result = 0;
-        return;
-    }
-
-    uint8_t type = GET_MASK(r->parameter, NV097_GET_REPORT_TYPE);
-    assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
-
-    /* FIXME: Multisampling affects this (both: OGL and Xbox GPU),
-     *        not sure if CLEARs also count
-     */
-    /* FIXME: What about clipping regions etc? */
-    for (int i = 0; i < r->query_count; i++) {
-        GLuint gl_query_result = 0;
-        glGetQueryObjectuiv(r->queries[i], GL_QUERY_RESULT, &gl_query_result);
-        gl_query_result /= pg->surface_scale_factor * pg->surface_scale_factor;
-        pg->zpass_pixel_count_result += gl_query_result;
-    }
-
-    if (r->query_count) {
-        glDeleteQueries(r->query_count, r->queries);
-        g_free(r->queries);
-    }
-
-    uint64_t timestamp = 0x0011223344556677; /* FIXME: Update timestamp?! */
-    uint32_t done = 0;
-
-    hwaddr report_dma_len;
-    uint8_t *report_data =
-        (uint8_t *)nv_dma_map(d, pg->dma_report, &report_dma_len);
-
-    hwaddr offset = GET_MASK(r->parameter, NV097_GET_REPORT_OFFSET);
-    assert(offset < report_dma_len);
-    report_data += offset;
-
-    stq_le_p((uint64_t *)&report_data[0], timestamp);
-    stl_le_p((uint32_t *)&report_data[8], pg->zpass_pixel_count_result);
-    stl_le_p((uint32_t *)&report_data[12], done);
-}
-
-void pgraph_process_pending_reports(NV2AState *d)
-{
-    PGRAPHState *pg = &d->pgraph;
-    QueryReport *r, *next;
-
-    QSIMPLEQ_FOREACH_SAFE(r, &pg->report_queue, entry, next) {
-        pgraph_process_pending_report(d, r);
-        QSIMPLEQ_REMOVE_HEAD(&pg->report_queue, entry);
-        g_free(r);
-    }
-}
-
-DEF_METHOD(NV097, CLEAR_REPORT_VALUE)
-{
-    /* FIXME: Does this have a value in parameter? Also does this (also?) modify
-     *        the report memory block?
-     */
-    if (pg->gl_zpass_pixel_count_query_count) {
-        glDeleteQueries(pg->gl_zpass_pixel_count_query_count,
-                        pg->gl_zpass_pixel_count_queries);
-        pg->gl_zpass_pixel_count_query_count = 0;
-    }
-
-    QueryReport *r = g_malloc(sizeof(QueryReport));
-    r->clear = true;
-    QSIMPLEQ_INSERT_TAIL(&pg->report_queue, r, entry);
-}
-
-DEF_METHOD(NV097, SET_ZPASS_PIXEL_COUNT_ENABLE)
-{
-    pg->zpass_pixel_count_enable = parameter;
-}
-
-DEF_METHOD(NV097, GET_REPORT)
-{
-    uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE);
-    assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
-
-    QueryReport *r = g_malloc(sizeof(QueryReport));
-    r->clear = false;
-    r->parameter = parameter;
-    r->query_count = pg->gl_zpass_pixel_count_query_count;
-    r->queries = pg->gl_zpass_pixel_count_queries;
-    QSIMPLEQ_INSERT_TAIL(&pg->report_queue, r, entry);
-
-    pg->gl_zpass_pixel_count_query_count = 0;
-    pg->gl_zpass_pixel_count_queries = NULL;
-}
-
-DEF_METHOD_INC(NV097, SET_EYE_DIRECTION)
-{
-    int slot = (method - NV097_SET_EYE_DIRECTION) / 4;
-    pg->ltctxa[NV_IGRAPH_XF_LTCTXA_EYED][slot] = parameter;
-    pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_EYED] = true;
-}
-
-static void pgraph_reset_draw_arrays(PGRAPHState *pg)
-{
-    pg->draw_arrays_length = 0;
-    pg->draw_arrays_min_start = -1;
-    pg->draw_arrays_max_count = 0;
-    pg->draw_arrays_prevent_connect = false;
-}
-
-static void pgraph_reset_inline_buffers(PGRAPHState *pg)
-{
-    pg->inline_elements_length = 0;
-    pg->inline_array_length = 0;
-    pg->inline_buffer_length = 0;
-    pgraph_reset_draw_arrays(pg);
-}
-
-static void pgraph_flush_draw(NV2AState *d)
-{
-    PGRAPHState *pg = &d->pgraph;
-    if (!(pg->color_binding || pg->zeta_binding)) {
-        pgraph_reset_inline_buffers(pg);
-        return;
-    }
-    assert(pg->shader_binding);
-
-    if (pg->draw_arrays_length) {
-        NV2A_GL_DPRINTF(false, "Draw Arrays");
-        nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS);
-        assert(pg->inline_elements_length == 0);
-        assert(pg->inline_buffer_length == 0);
-        assert(pg->inline_array_length == 0);
-
-        pgraph_bind_vertex_attributes(d, pg->draw_arrays_min_start,
-                                      pg->draw_arrays_max_count - 1,
-                                      false, 0,
-                                      pg->draw_arrays_max_count - 1);
-        glMultiDrawArrays(pg->shader_binding->gl_primitive_mode,
-                          pg->gl_draw_arrays_start,
-                          pg->gl_draw_arrays_count,
-                          pg->draw_arrays_length);
-    } else if (pg->inline_elements_length) {
-        NV2A_GL_DPRINTF(false, "Inline Elements");
-        nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS);
-        assert(pg->inline_buffer_length == 0);
-        assert(pg->inline_array_length == 0);
-
-        uint32_t min_element = (uint32_t)-1;
-        uint32_t max_element = 0;
-        for (int i=0; i < pg->inline_elements_length; i++) {
-            max_element = MAX(pg->inline_elements[i], max_element);
-            min_element = MIN(pg->inline_elements[i], min_element);
-        }
-
-        pgraph_bind_vertex_attributes(
-                d, min_element, max_element, false, 0,
-                pg->inline_elements[pg->inline_elements_length - 1]);
-
-        VertexKey k;
-        memset(&k, 0, sizeof(VertexKey));
-        k.count = pg->inline_elements_length;
-        k.gl_type = GL_UNSIGNED_INT;
-        k.gl_normalize = GL_FALSE;
-        k.stride = sizeof(uint32_t);
-        uint64_t h = fast_hash((uint8_t*)pg->inline_elements,
-                               pg->inline_elements_length * 4);
-
-        LruNode *node = lru_lookup(&pg->element_cache, h, &k);
-        VertexLruNode *found = container_of(node, VertexLruNode, node);
-        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, found->gl_buffer);
-        if (!found->initialized) {
-            nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4);
-            glBufferData(GL_ELEMENT_ARRAY_BUFFER,
-                         pg->inline_elements_length * 4,
-                         pg->inline_elements, GL_STATIC_DRAW);
-            found->initialized = true;
-        } else {
-            nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY);
-        }
-        glDrawElements(pg->shader_binding->gl_primitive_mode,
-                       pg->inline_elements_length, GL_UNSIGNED_INT,
-                       (void *)0);
-    } else if (pg->inline_buffer_length) {
-        NV2A_GL_DPRINTF(false, "Inline Buffer");
-        nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS);
-        assert(pg->inline_array_length == 0);
-
-        if (pg->compressed_attrs) {
-            pg->compressed_attrs = 0;
-            pgraph_bind_shaders(pg);
-        }
-
-        for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
-            VertexAttribute *attr = &pg->vertex_attributes[i];
-            if (attr->inline_buffer_populated) {
-                nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3);
-                glBindBuffer(GL_ARRAY_BUFFER, attr->gl_inline_buffer);
-                glBufferData(GL_ARRAY_BUFFER,
-                             pg->inline_buffer_length * sizeof(float) * 4,
-                             attr->inline_buffer, GL_STREAM_DRAW);
-                glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
-                glEnableVertexAttribArray(i);
-                attr->inline_buffer_populated = false;
-                memcpy(attr->inline_value,
-                       attr->inline_buffer + (pg->inline_buffer_length - 1) * 4,
-                       sizeof(attr->inline_value));
-            } else {
-                glDisableVertexAttribArray(i);
-                glVertexAttrib4fv(i, attr->inline_value);
-            }
-        }
-
-        glDrawArrays(pg->shader_binding->gl_primitive_mode,
-                     0, pg->inline_buffer_length);
-    } else if (pg->inline_array_length) {
-        NV2A_GL_DPRINTF(false, "Inline Array");
-        nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS);
-
-        unsigned int index_count = pgraph_bind_inline_array(d);
-        glDrawArrays(pg->shader_binding->gl_primitive_mode,
-                     0, index_count);
-    } else {
-        NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END");
-        NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END");
-    }
-
-    pgraph_reset_inline_buffers(pg);
-}
-
-DEF_METHOD(NV097, SET_BEGIN_END)
-{
-    uint32_t control_0 = pg->regs[NV_PGRAPH_CONTROL_0];
-    bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
-    bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
-    bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
-    bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
-    bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
-    bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
-    bool stencil_test =
-        pg->regs[NV_PGRAPH_CONTROL_1] & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
-    bool is_nop_draw = !(color_write || depth_test || stencil_test);
-
-    if (parameter == NV097_SET_BEGIN_END_OP_END) {
-        if (pg->primitive_mode == PRIM_TYPE_INVALID) {
-            NV2A_DPRINTF("End without Begin!\n");
-        }
-        nv2a_profile_inc_counter(NV2A_PROF_BEGIN_ENDS);
-
-        if (is_nop_draw) {
-            // FIXME: Check PGRAPH register 0x880.
-            // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit
-            // check that will raise an exception in the case that a draw should
-            // modify the color and/or zeta buffer but the target(s) are masked
-            // off. This check only seems to trigger during the fragment
-            // processing, it is legal to attempt a draw that is entirely
-            // clipped regardless of 0x880. See xemu#635 for context.
-            return;
-        }
-
-        pgraph_flush_draw(d);
-
-        /* End of visibility testing */
-        if (pg->zpass_pixel_count_enable) {
-            nv2a_profile_inc_counter(NV2A_PROF_QUERY);
-            glEndQuery(GL_SAMPLES_PASSED);
-        }
-
-        pg->draw_time++;
-        if (pg->color_binding && pgraph_color_write_enabled(pg)) {
-            pg->color_binding->draw_time = pg->draw_time;
-        }
-        if (pg->zeta_binding && pgraph_zeta_write_enabled(pg)) {
-            pg->zeta_binding->draw_time = pg->draw_time;
-        }
-
-        pgraph_set_surface_dirty(pg, color_write, depth_test || stencil_test);
-
-        NV2A_GL_DGROUP_END();
-        pg->primitive_mode = PRIM_TYPE_INVALID;
-    } else {
-        NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", parameter);
-        if (pg->primitive_mode != PRIM_TYPE_INVALID) {
-            NV2A_DPRINTF("Begin without End!\n");
-        }
-        assert(parameter <= NV097_SET_BEGIN_END_OP_POLYGON);
-        pg->primitive_mode = parameter;
-
-        pgraph_update_surface(d, true, true, depth_test || stencil_test);
-        pgraph_reset_inline_buffers(pg);
-
-        if (is_nop_draw) {
-            return;
-        }
-
-        assert(pg->color_binding || pg->zeta_binding);
-
-        pgraph_bind_textures(d);
-        pgraph_bind_shaders(pg);
-
-        glColorMask(mask_red, mask_green, mask_blue, mask_alpha);
-        glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE));
-        glStencilMask(GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
-                               NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE));
-
-        if (pg->regs[NV_PGRAPH_BLEND] & NV_PGRAPH_BLEND_EN) {
-            glEnable(GL_BLEND);
-            uint32_t sfactor = GET_MASK(pg->regs[NV_PGRAPH_BLEND],
-                                        NV_PGRAPH_BLEND_SFACTOR);
-            uint32_t dfactor = GET_MASK(pg->regs[NV_PGRAPH_BLEND],
-                                        NV_PGRAPH_BLEND_DFACTOR);
-            assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_map));
-            assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_map));
-            glBlendFunc(pgraph_blend_factor_map[sfactor],
-                        pgraph_blend_factor_map[dfactor]);
-
-            uint32_t equation = GET_MASK(pg->regs[NV_PGRAPH_BLEND],
-                                         NV_PGRAPH_BLEND_EQN);
-            assert(equation < ARRAY_SIZE(pgraph_blend_equation_map));
-            glBlendEquation(pgraph_blend_equation_map[equation]);
-
-            uint32_t blend_color = pg->regs[NV_PGRAPH_BLENDCOLOR];
-            glBlendColor( ((blend_color >> 16) & 0xFF) / 255.0f, /* red */
-                          ((blend_color >> 8) & 0xFF) / 255.0f,  /* green */
-                          (blend_color & 0xFF) / 255.0f,         /* blue */
-                          ((blend_color >> 24) & 0xFF) / 255.0f);/* alpha */
-        } else {
-            glDisable(GL_BLEND);
-        }
-
-        /* Face culling */
-        if (pg->regs[NV_PGRAPH_SETUPRASTER]
-                & NV_PGRAPH_SETUPRASTER_CULLENABLE) {
-            uint32_t cull_face = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-                                          NV_PGRAPH_SETUPRASTER_CULLCTRL);
-            assert(cull_face < ARRAY_SIZE(pgraph_cull_face_map));
-            glCullFace(pgraph_cull_face_map[cull_face]);
-            glEnable(GL_CULL_FACE);
-        } else {
-            glDisable(GL_CULL_FACE);
-        }
-
-        /* Clipping */
-        glEnable(GL_CLIP_DISTANCE0);
-        glEnable(GL_CLIP_DISTANCE1);
-
-        /* Front-face select */
-        glFrontFace(pg->regs[NV_PGRAPH_SETUPRASTER]
-                        & NV_PGRAPH_SETUPRASTER_FRONTFACE
-                            ? GL_CCW : GL_CW);
-
-        /* Polygon offset */
-        /* FIXME: GL implementation-specific, maybe do this in VS? */
-        if (pg->regs[NV_PGRAPH_SETUPRASTER] &
-                NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) {
-            glEnable(GL_POLYGON_OFFSET_FILL);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_FILL);
-        }
-        if (pg->regs[NV_PGRAPH_SETUPRASTER] &
-                NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) {
-            glEnable(GL_POLYGON_OFFSET_LINE);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_LINE);
-        }
-        if (pg->regs[NV_PGRAPH_SETUPRASTER] &
-                NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) {
-            glEnable(GL_POLYGON_OFFSET_POINT);
-        } else {
-            glDisable(GL_POLYGON_OFFSET_POINT);
-        }
-        if (pg->regs[NV_PGRAPH_SETUPRASTER] &
-                (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
-                 NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
-                 NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
-            GLfloat zfactor = *(float*)&pg->regs[NV_PGRAPH_ZOFFSETFACTOR];
-            GLfloat zbias = *(float*)&pg->regs[NV_PGRAPH_ZOFFSETBIAS];
-            glPolygonOffset(zfactor, zbias);
-        }
-
-        /* Depth testing */
-        if (depth_test) {
-            glEnable(GL_DEPTH_TEST);
-
-            uint32_t depth_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-                                           NV_PGRAPH_CONTROL_0_ZFUNC);
-            assert(depth_func < ARRAY_SIZE(pgraph_depth_func_map));
-            glDepthFunc(pgraph_depth_func_map[depth_func]);
-        } else {
-            glDisable(GL_DEPTH_TEST);
-        }
-
-        if (GET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE],
-                     NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
-            NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) {
-            glEnable(GL_DEPTH_CLAMP);
-        } else {
-            glDisable(GL_DEPTH_CLAMP);
-        }
-
-        if (GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3],
-                     NV_PGRAPH_CONTROL_3_SHADEMODE) ==
-            NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) {
-            glProvokingVertex(GL_FIRST_VERTEX_CONVENTION);
-        }
-
-        if (stencil_test) {
-            glEnable(GL_STENCIL_TEST);
-
-            uint32_t stencil_func = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
-                                        NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
-            uint32_t stencil_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
-                                        NV_PGRAPH_CONTROL_1_STENCIL_REF);
-            uint32_t func_mask = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_1],
-                                    NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
-            uint32_t op_fail = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
-                                    NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
-            uint32_t op_zfail = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
-                                    NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
-            uint32_t op_zpass = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_2],
-                                    NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
-
-            assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_map));
-            assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_map));
-            assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_map));
-            assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_map));
-
-            glStencilFunc(
-                pgraph_stencil_func_map[stencil_func],
-                stencil_ref,
-                func_mask);
-
-            glStencilOp(
-                pgraph_stencil_op_map[op_fail],
-                pgraph_stencil_op_map[op_zfail],
-                pgraph_stencil_op_map[op_zpass]);
-
-        } else {
-            glDisable(GL_STENCIL_TEST);
-        }
-
-        /* Dither */
-        /* FIXME: GL implementation dependent */
-        if (pg->regs[NV_PGRAPH_CONTROL_0] &
-                NV_PGRAPH_CONTROL_0_DITHERENABLE) {
-            glEnable(GL_DITHER);
-        } else {
-            glDisable(GL_DITHER);
-        }
-
-        glEnable(GL_PROGRAM_POINT_SIZE);
-
-        bool anti_aliasing = GET_MASK(pg->regs[NV_PGRAPH_ANTIALIASING], NV_PGRAPH_ANTIALIASING_ENABLE);
-
-        /* Edge Antialiasing */
-        if (!anti_aliasing && pg->regs[NV_PGRAPH_SETUPRASTER] &
-                                  NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) {
-            glEnable(GL_LINE_SMOOTH);
-        } else {
-            glDisable(GL_LINE_SMOOTH);
-        }
-        if (!anti_aliasing && pg->regs[NV_PGRAPH_SETUPRASTER] &
-                                  NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) {
-            glEnable(GL_POLYGON_SMOOTH);
-        } else {
-            glDisable(GL_POLYGON_SMOOTH);
-        }
-
-        unsigned int vp_width = pg->surface_binding_dim.width,
-                     vp_height = pg->surface_binding_dim.height;
-        pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
-        glViewport(0, 0, vp_width, vp_height);
-
-        /* Surface clip */
-        /* FIXME: Consider moving to PSH w/ window clip */
-        unsigned int xmin = pg->surface_shape.clip_x - pg->surface_binding_dim.clip_x,
-                     ymin = pg->surface_shape.clip_y - pg->surface_binding_dim.clip_y;
-        unsigned int xmax = xmin + pg->surface_shape.clip_width - 1,
-                     ymax = ymin + pg->surface_shape.clip_height - 1;
-
-        unsigned int scissor_width = xmax - xmin + 1,
-                     scissor_height = ymax - ymin + 1;
-        pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
-        pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
-        ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
-        pgraph_apply_scaling_factor(pg, &xmin, &ymin);
-        pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
-
-        glEnable(GL_SCISSOR_TEST);
-        glScissor(xmin, ymin, scissor_width, scissor_height);
-
-        /* Visibility testing */
-        if (pg->zpass_pixel_count_enable) {
-            pg->gl_zpass_pixel_count_query_count++;
-            pg->gl_zpass_pixel_count_queries = (GLuint*)g_realloc(
-                pg->gl_zpass_pixel_count_queries,
-                sizeof(GLuint) * pg->gl_zpass_pixel_count_query_count);
-
-            GLuint gl_query;
-            glGenQueries(1, &gl_query);
-            pg->gl_zpass_pixel_count_queries[
-                pg->gl_zpass_pixel_count_query_count - 1] = gl_query;
-            glBeginQuery(GL_SAMPLES_PASSED, gl_query);
-        }
-    }
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_OFFSET)
-{
-    int slot = (method - NV097_SET_TEXTURE_OFFSET) / 64;
-    pg->regs[NV_PGRAPH_TEXOFFSET0 + slot * 4] = parameter;
-    pg->texture_dirty[slot] = true;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_FORMAT)
-{
-    int slot = (method - NV097_SET_TEXTURE_FORMAT) / 64;
-
-    bool dma_select =
-        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA) == 2;
-    bool cubemap =
-        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE);
-    unsigned int border_source =
-        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE);
-    unsigned int dimensionality =
-        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY);
-    unsigned int color_format =
-        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_COLOR);
-    unsigned int levels =
-        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS);
-    unsigned int log_width =
-        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U);
-    unsigned int log_height =
-        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V);
-    unsigned int log_depth =
-        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P);
-
-    uint32_t *reg = &pg->regs[NV_PGRAPH_TEXFMT0 + slot * 4];
-    SET_MASK(*reg, NV_PGRAPH_TEXFMT0_CONTEXT_DMA, dma_select);
-    SET_MASK(*reg, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE, cubemap);
-    SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BORDER_SOURCE, border_source);
-    SET_MASK(*reg, NV_PGRAPH_TEXFMT0_DIMENSIONALITY, dimensionality);
-    SET_MASK(*reg, NV_PGRAPH_TEXFMT0_COLOR, color_format);
-    SET_MASK(*reg, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS, levels);
-    SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_U, log_width);
-    SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_V, log_height);
-    SET_MASK(*reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_P, log_depth);
-
-    pg->texture_dirty[slot] = true;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_CONTROL0)
-{
-    int slot = (method - NV097_SET_TEXTURE_CONTROL0) / 64;
-    pg->regs[NV_PGRAPH_TEXCTL0_0 + slot*4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_CONTROL1)
-{
-    int slot = (method - NV097_SET_TEXTURE_CONTROL1) / 64;
-    pg->regs[NV_PGRAPH_TEXCTL1_0 + slot*4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_FILTER)
-{
-    int slot = (method - NV097_SET_TEXTURE_FILTER) / 64;
-    pg->regs[NV_PGRAPH_TEXFILTER0 + slot * 4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_IMAGE_RECT)
-{
-    int slot = (method - NV097_SET_TEXTURE_IMAGE_RECT) / 64;
-    pg->regs[NV_PGRAPH_TEXIMAGERECT0 + slot * 4] = parameter;
-    pg->texture_dirty[slot] = true;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_PALETTE)
-{
-    int slot = (method - NV097_SET_TEXTURE_PALETTE) / 64;
-
-    bool dma_select =
-        GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_CONTEXT_DMA) == 1;
-    unsigned int length =
-        GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_LENGTH);
-    unsigned int offset =
-        GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_OFFSET);
-
-    uint32_t *reg = &pg->regs[NV_PGRAPH_TEXPALETTE0 + slot * 4];
-    SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA, dma_select);
-    SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_LENGTH, length);
-    SET_MASK(*reg, NV_PGRAPH_TEXPALETTE0_OFFSET, offset);
-
-    pg->texture_dirty[slot] = true;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_BORDER_COLOR)
-{
-    int slot = (method - NV097_SET_TEXTURE_BORDER_COLOR) / 64;
-    pg->regs[NV_PGRAPH_BORDERCOLOR0 + slot * 4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_MAT)
-{
-    int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_MAT) / 4;
-    if (slot < 16) {
-        /* discard */
-        return;
-    }
-
-    slot -= 16;
-    const int swizzle[4] = { NV_PGRAPH_BUMPMAT00, NV_PGRAPH_BUMPMAT01,
-                             NV_PGRAPH_BUMPMAT11, NV_PGRAPH_BUMPMAT10 };
-    pg->regs[swizzle[slot % 4] + slot / 4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_SCALE)
-{
-    int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE) / 64;
-    if (slot == 0) {
-        /* discard */
-        return;
-    }
-
-    slot--;
-    pg->regs[NV_PGRAPH_BUMPSCALE1 + slot * 4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_OFFSET)
-{
-    int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET) / 64;
-    if (slot == 0) {
-        /* discard */
-        return;
-    }
-
-    slot--;
-    pg->regs[NV_PGRAPH_BUMPOFFSET1 + slot * 4] = parameter;
-}
-
-static void pgraph_expand_draw_arrays(NV2AState *d)
-{
-    PGRAPHState *pg = &d->pgraph;
-    GLint start = pg->gl_draw_arrays_start[pg->draw_arrays_length - 1];
-    GLsizei count = pg->gl_draw_arrays_count[pg->draw_arrays_length - 1];
-
-    /* Render any previously squashed DRAW_ARRAYS calls. This case would be
-     * triggered if a set of BEGIN+DA+END triplets is followed by the
-     * BEGIN+DA+ARRAY_ELEMENT+... chain that caused this expansion. */
-    if (pg->draw_arrays_length > 1) {
-        pgraph_flush_draw(d);
-    }
-    assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH);
-    for (unsigned int i = 0; i < count; i++) {
-        pg->inline_elements[pg->inline_elements_length++] = start + i;
-    }
-
-    pgraph_reset_draw_arrays(pg);
-}
-
-static void pgraph_check_within_begin_end_block(PGRAPHState *pg)
-{
-    if (pg->primitive_mode == PRIM_TYPE_INVALID) {
-        NV2A_DPRINTF("Vertex data being sent outside of begin/end block!\n");
-    }
-}
-
-DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT16)
-{
-    pgraph_check_within_begin_end_block(pg);
-
-    if (pg->draw_arrays_length) {
-        pgraph_expand_draw_arrays(d);
-    }
-
-    assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
-    pg->inline_elements[pg->inline_elements_length++] = parameter & 0xFFFF;
-    pg->inline_elements[pg->inline_elements_length++] = parameter >> 16;
-}
-
-DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT32)
-{
-    pgraph_check_within_begin_end_block(pg);
-
-    if (pg->draw_arrays_length) {
-        pgraph_expand_draw_arrays(d);
-    }
-
-    assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
-    pg->inline_elements[pg->inline_elements_length++] = parameter;
-}
-
-DEF_METHOD(NV097, DRAW_ARRAYS)
-{
-    pgraph_check_within_begin_end_block(pg);
-
-    unsigned int start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX);
-    unsigned int count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT) + 1;
-
-    if (pg->inline_elements_length) {
-        /* FIXME: Determine HW behavior for overflow case. */
-        assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH);
-        assert(!pg->draw_arrays_prevent_connect);
-
-        for (unsigned int i = 0; i < count; i++) {
-            pg->inline_elements[pg->inline_elements_length++] = start + i;
-        }
-        return;
-    }
-
-    pg->draw_arrays_min_start = MIN(pg->draw_arrays_min_start, start);
-    pg->draw_arrays_max_count = MAX(pg->draw_arrays_max_count, start + count);
-
-    assert(pg->draw_arrays_length < ARRAY_SIZE(pg->gl_draw_arrays_start));
-
-    /* Attempt to connect contiguous primitives */
-    if (!pg->draw_arrays_prevent_connect && pg->draw_arrays_length > 0) {
-        unsigned int last_start =
-            pg->gl_draw_arrays_start[pg->draw_arrays_length - 1];
-        GLsizei* last_count =
-            &pg->gl_draw_arrays_count[pg->draw_arrays_length - 1];
-        if (start == (last_start + *last_count)) {
-            *last_count += count;
-            return;
-        }
-    }
-
-    pg->gl_draw_arrays_start[pg->draw_arrays_length] = start;
-    pg->gl_draw_arrays_count[pg->draw_arrays_length] = count;
-    pg->draw_arrays_length++;
-    pg->draw_arrays_prevent_connect = false;
-}
-
-DEF_METHOD_NON_INC(NV097, INLINE_ARRAY)
-{
-    pgraph_check_within_begin_end_block(pg);
-    assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH);
-    pg->inline_array[pg->inline_array_length++] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_EYE_VECTOR)
-{
-    int slot = (method - NV097_SET_EYE_VECTOR) / 4;
-    pg->regs[NV_PGRAPH_EYEVEC0 + slot * 4] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA2F_M)
-{
-    int slot = (method - NV097_SET_VERTEX_DATA2F_M) / 4;
-    unsigned int part = slot % 2;
-    slot /= 2;
-    VertexAttribute *attribute = &pg->vertex_attributes[slot];
-    pgraph_allocate_inline_buffer_vertices(pg, slot);
-    attribute->inline_value[part] = *(float*)&parameter;
-    /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */
-    attribute->inline_value[2] = 0.0;
-    attribute->inline_value[3] = 1.0;
-    if ((slot == 0) && (part == 1)) {
-        pgraph_finish_inline_buffer_vertex(pg);
-    }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA4F_M)
-{
-    int slot = (method - NV097_SET_VERTEX_DATA4F_M) / 4;
-    unsigned int part = slot % 4;
-    slot /= 4;
-    VertexAttribute *attribute = &pg->vertex_attributes[slot];
-    pgraph_allocate_inline_buffer_vertices(pg, slot);
-    attribute->inline_value[part] = *(float*)&parameter;
-    if ((slot == 0) && (part == 3)) {
-        pgraph_finish_inline_buffer_vertex(pg);
-    }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA2S)
-{
-    int slot = (method - NV097_SET_VERTEX_DATA2S) / 4;
-    VertexAttribute *attribute = &pg->vertex_attributes[slot];
-    pgraph_allocate_inline_buffer_vertices(pg, slot);
-    attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF);
-    attribute->inline_value[1] = (float)(int16_t)(parameter >> 16);
-    attribute->inline_value[2] = 0.0;
-    attribute->inline_value[3] = 1.0;
-    if (slot == 0) {
-        pgraph_finish_inline_buffer_vertex(pg);
-    }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA4UB)
-{
-    int slot = (method - NV097_SET_VERTEX_DATA4UB) / 4;
-    VertexAttribute *attribute = &pg->vertex_attributes[slot];
-    pgraph_allocate_inline_buffer_vertices(pg, slot);
-    attribute->inline_value[0] = (parameter & 0xFF) / 255.0;
-    attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0;
-    attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0;
-    attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0;
-    if (slot == 0) {
-        pgraph_finish_inline_buffer_vertex(pg);
-    }
-}
-
-DEF_METHOD_INC(NV097, SET_VERTEX_DATA4S_M)
-{
-    int slot = (method - NV097_SET_VERTEX_DATA4S_M) / 4;
-    unsigned int part = slot % 2;
-    slot /= 2;
-    VertexAttribute *attribute = &pg->vertex_attributes[slot];
-    pgraph_allocate_inline_buffer_vertices(pg, slot);
-
-    attribute->inline_value[part * 2 + 0] = (float)(int16_t)(parameter & 0xFFFF);
-    attribute->inline_value[part * 2 + 1] = (float)(int16_t)(parameter >> 16);
-    if ((slot == 0) && (part == 1)) {
-        pgraph_finish_inline_buffer_vertex(pg);
-    }
-}
-
-DEF_METHOD(NV097, SET_SEMAPHORE_OFFSET)
-{
-    pg->regs[NV_PGRAPH_SEMAPHOREOFFSET] = parameter;
-}
-
-DEF_METHOD(NV097, BACK_END_WRITE_SEMAPHORE_RELEASE)
-{
-    pgraph_update_surface(d, false, true, true);
-
-    //qemu_mutex_unlock(&d->pgraph.lock);
-    //qemu_mutex_lock_iothread();
-
-    uint32_t semaphore_offset = pg->regs[NV_PGRAPH_SEMAPHOREOFFSET];
-
-    hwaddr semaphore_dma_len;
-    uint8_t *semaphore_data = (uint8_t*)nv_dma_map(d, pg->dma_semaphore,
-                                                   &semaphore_dma_len);
-    assert(semaphore_offset < semaphore_dma_len);
-    semaphore_data += semaphore_offset;
-
-    stl_le_p((uint32_t*)semaphore_data, parameter);
-
-    //qemu_mutex_lock(&d->pgraph.lock);
-    //qemu_mutex_unlock_iothread();
-}
-
-DEF_METHOD(NV097, SET_ZMIN_MAX_CONTROL)
-{
-    switch (GET_MASK(parameter, NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN)) {
-    case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CULL:
-        SET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE],
-                 NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN,
-                 NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL);
-        break;
-    case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CLAMP:
-        SET_MASK(pg->regs[NV_PGRAPH_ZCOMPRESSOCCLUDE],
-                 NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN,
-                 NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP);
-        break;
-    default:
-        /* FIXME: Should raise NV_PGRAPH_NSOURCE_DATA_ERROR_PENDING */
-        assert(!"Invalid zclamp value");
-        break;
-    }
-}
-
-DEF_METHOD(NV097, SET_ANTI_ALIASING_CONTROL)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_ANTIALIASING], NV_PGRAPH_ANTIALIASING_ENABLE,
-             GET_MASK(parameter, NV097_SET_ANTI_ALIASING_CONTROL_ENABLE));
-    // FIXME: Handle the remaining bits (observed values 0xFFFF0000, 0xFFFF0001)
-}
-
-DEF_METHOD(NV097, SET_ZSTENCIL_CLEAR_VALUE)
-{
-    pg->regs[NV_PGRAPH_ZSTENCILCLEARVALUE] = parameter;
-}
-
-DEF_METHOD(NV097, SET_COLOR_CLEAR_VALUE)
-{
-    pg->regs[NV_PGRAPH_COLORCLEARVALUE] = parameter;
-}
-
-DEF_METHOD(NV097, CLEAR_SURFACE)
-{
-    pg->clearing = true;
-
-    NV2A_DPRINTF("---------PRE CLEAR ------\n");
-    GLbitfield gl_mask = 0;
-
-    bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
-    bool write_zeta =
-        (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
-
-    if (write_zeta) {
-        uint32_t clear_zstencil =
-            d->pgraph.regs[NV_PGRAPH_ZSTENCILCLEARVALUE];
-        GLint gl_clear_stencil;
-        GLfloat gl_clear_depth;
-
-        switch(pg->surface_shape.zeta_format) {
-        case NV097_SET_SURFACE_FORMAT_ZETA_Z16: {
-            uint16_t z = clear_zstencil & 0xFFFF;
-            /* FIXME: Remove bit for stencil clear? */
-            if (pg->surface_shape.z_format) {
-                gl_clear_depth = convert_f16_to_float(z) / f16_max;
-            } else {
-                gl_clear_depth = z / (float)0xFFFF;
-            }
-            break;
-        }
-        case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: {
-            gl_clear_stencil = clear_zstencil & 0xFF;
-            uint32_t z = clear_zstencil >> 8;
-            if (pg->surface_shape.z_format) {
-                gl_clear_depth = convert_f24_to_float(z) / f24_max;
-            } else {
-                gl_clear_depth = z / (float)0xFFFFFF;
-            }
-            break;
-        }
-        default:
-            fprintf(stderr, "Unknown zeta surface format: 0x%x\n", pg->surface_shape.zeta_format);
-            assert(false);
-            break;
-        }
-        if (parameter & NV097_CLEAR_SURFACE_Z) {
-            gl_mask |= GL_DEPTH_BUFFER_BIT;
-            glDepthMask(GL_TRUE);
-            glClearDepth(gl_clear_depth);
-        }
-        if (parameter & NV097_CLEAR_SURFACE_STENCIL) {
-            gl_mask |= GL_STENCIL_BUFFER_BIT;
-            glStencilMask(0xff);
-            glClearStencil(gl_clear_stencil);
-        }
-    }
-    if (write_color) {
-        gl_mask |= GL_COLOR_BUFFER_BIT;
-        glColorMask((parameter & NV097_CLEAR_SURFACE_R)
-                         ? GL_TRUE : GL_FALSE,
-                    (parameter & NV097_CLEAR_SURFACE_G)
-                         ? GL_TRUE : GL_FALSE,
-                    (parameter & NV097_CLEAR_SURFACE_B)
-                         ? GL_TRUE : GL_FALSE,
-                    (parameter & NV097_CLEAR_SURFACE_A)
-                         ? GL_TRUE : GL_FALSE);
-        uint32_t clear_color = d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE];
-
-        /* Handle RGB */
-        GLfloat red, green, blue;
-        switch(pg->surface_shape.color_format) {
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5:
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_O1R5G5B5:
-            red = ((clear_color >> 10) & 0x1F) / 31.0f;
-            green = ((clear_color >> 5) & 0x1F) / 31.0f;
-            blue = (clear_color & 0x1F) / 31.0f;
-            break;
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5:
-            red = ((clear_color >> 11) & 0x1F) / 31.0f;
-            green = ((clear_color >> 5) & 0x3F) / 63.0f;
-            blue = (clear_color & 0x1F) / 31.0f;
-            break;
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8:
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_O8R8G8B8:
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8:
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8:
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8:
-            red = ((clear_color >> 16) & 0xFF) / 255.0f;
-            green = ((clear_color >> 8) & 0xFF) / 255.0f;
-            blue = (clear_color & 0xFF) / 255.0f;
-            break;
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_B8:
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8:
-            /* Xbox D3D doesn't support clearing those */
-        default:
-            red = 1.0f;
-            green = 0.0f;
-            blue = 1.0f;
-            fprintf(stderr, "CLEAR_SURFACE for color_format 0x%x unsupported",
-                    pg->surface_shape.color_format);
-            assert(false);
-            break;
-        }
-
-        /* Handle alpha */
-        GLfloat alpha;
-        switch(pg->surface_shape.color_format) {
-        /* FIXME: CLEAR_SURFACE seems to work like memset, so maybe we
-         *        also have to clear non-alpha bits with alpha value?
-         *        As GL doesn't own those pixels we'd have to do this on
-         *        our own in xbox memory.
-         */
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8:
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8:
-            alpha = ((clear_color >> 24) & 0x7F) / 127.0f;
-            assert(false); /* Untested */
-            break;
-        case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8:
-            alpha = ((clear_color >> 24) & 0xFF) / 255.0f;
-            break;
-        default:
-            alpha = 1.0f;
-            break;
-        }
-
-        glClearColor(red, green, blue, alpha);
-    }
-
-    pgraph_update_surface(d, true, write_color, write_zeta);
-
-    /* FIXME: Needs confirmation */
-    unsigned int xmin =
-        GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTX], NV_PGRAPH_CLEARRECTX_XMIN);
-    unsigned int xmax =
-        GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTX], NV_PGRAPH_CLEARRECTX_XMAX);
-    unsigned int ymin =
-        GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTY], NV_PGRAPH_CLEARRECTY_YMIN);
-    unsigned int ymax =
-        GET_MASK(pg->regs[NV_PGRAPH_CLEARRECTY], NV_PGRAPH_CLEARRECTY_YMAX);
-
-    NV2A_DPRINTF(
-        "------------------CLEAR 0x%x %d,%d - %d,%d  %x---------------\n",
-        parameter, xmin, ymin, xmax, ymax,
-        d->pgraph.regs[NV_PGRAPH_COLORCLEARVALUE]);
-
-    unsigned int scissor_width = xmax - xmin + 1,
-                 scissor_height = ymax - ymin + 1;
-    pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
-    pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
-    ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
-
-    NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin,
-                 xmin + scissor_width - 1, ymin + scissor_height - 1);
-
-    bool full_clear = !xmin && !ymin &&
-                      scissor_width >= pg->surface_binding_dim.width &&
-                      scissor_height >= pg->surface_binding_dim.height;
-
-    pgraph_apply_scaling_factor(pg, &xmin, &ymin);
-    pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
-
-    /* FIXME: Respect window clip?!?! */
-    glEnable(GL_SCISSOR_TEST);
-    glScissor(xmin, ymin, scissor_width, scissor_height);
-
-    /* Dither */
-    /* FIXME: Maybe also disable it here? + GL implementation dependent */
-    if (pg->regs[NV_PGRAPH_CONTROL_0] & NV_PGRAPH_CONTROL_0_DITHERENABLE) {
-        glEnable(GL_DITHER);
-    } else {
-        glDisable(GL_DITHER);
-    }
-
-    glClear(gl_mask);
-
-    glDisable(GL_SCISSOR_TEST);
-
-    pgraph_set_surface_dirty(pg, write_color, write_zeta);
-
-    if (pg->color_binding) {
-        pg->color_binding->cleared = full_clear && write_color;
-    }
-    if (pg->zeta_binding) {
-        pg->zeta_binding->cleared = full_clear && write_zeta;
-    }
-
-    pg->clearing = false;
-}
-
-DEF_METHOD(NV097, SET_CLEAR_RECT_HORIZONTAL)
-{
-    pg->regs[NV_PGRAPH_CLEARRECTX] = parameter;
-}
-
-DEF_METHOD(NV097, SET_CLEAR_RECT_VERTICAL)
-{
-    pg->regs[NV_PGRAPH_CLEARRECTY] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_SPECULAR_FOG_FACTOR)
-{
-    int slot = (method - NV097_SET_SPECULAR_FOG_FACTOR) / 4;
-    pg->regs[NV_PGRAPH_SPECFOGFACTOR0 + slot*4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_SHADER_CLIP_PLANE_MODE)
-{
-    pg->regs[NV_PGRAPH_SHADERCLIPMODE] = parameter;
-}
-
-DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_OCW)
-{
-    int slot = (method - NV097_SET_COMBINER_COLOR_OCW) / 4;
-    pg->regs[NV_PGRAPH_COMBINECOLORO0 + slot*4] = parameter;
-}
-
-DEF_METHOD(NV097, SET_COMBINER_CONTROL)
-{
-    pg->regs[NV_PGRAPH_COMBINECTL] = parameter;
-}
-
-DEF_METHOD(NV097, SET_SHADOW_ZSLOPE_THRESHOLD)
-{
-    pg->regs[NV_PGRAPH_SHADOWZSLOPETHRESHOLD] = parameter;
-    assert(parameter == 0x7F800000); /* FIXME: Unimplemented */
-}
-
-DEF_METHOD(NV097, SET_SHADOW_DEPTH_FUNC)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SHADOWCTL], NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC,
-             parameter);
-}
-
-DEF_METHOD(NV097, SET_SHADER_STAGE_PROGRAM)
-{
-    pg->regs[NV_PGRAPH_SHADERPROG] = parameter;
-}
-
-DEF_METHOD(NV097, SET_DOT_RGBMAPPING)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SHADERCTL], 0xFFF,
-             GET_MASK(parameter, 0xFFF));
-}
-
-DEF_METHOD(NV097, SET_SHADER_OTHER_STAGE_INPUT)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_SHADERCTL], 0xFFFF000,
-             GET_MASK(parameter, 0xFFFF000));
-}
-
-DEF_METHOD_INC(NV097, SET_TRANSFORM_DATA)
-{
-    int slot = (method - NV097_SET_TRANSFORM_DATA) / 4;
-    pg->vertex_state_shader_v0[slot] = parameter;
-}
-
-DEF_METHOD(NV097, LAUNCH_TRANSFORM_PROGRAM)
-{
-    unsigned int program_start = parameter;
-    assert(program_start < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
-    Nv2aVshProgram program;
-    Nv2aVshParseResult result = nv2a_vsh_parse_program(
-            &program,
-            pg->program_data[program_start],
-            NV2A_MAX_TRANSFORM_PROGRAM_LENGTH - program_start);
-    assert(result == NV2AVPR_SUCCESS);
-
-    Nv2aVshCPUXVSSExecutionState state_linkage;
-    Nv2aVshExecutionState state = nv2a_vsh_emu_initialize_xss_execution_state(
-            &state_linkage, (float*)pg->vsh_constants);
-    memcpy(state_linkage.input_regs, pg->vertex_state_shader_v0, sizeof(pg->vertex_state_shader_v0));
-
-    nv2a_vsh_emu_execute_track_context_writes(&state, &program, pg->vsh_constants_dirty);
-
-    nv2a_vsh_program_destroy(&program);
-}
-
-DEF_METHOD(NV097, SET_TRANSFORM_EXECUTION_MODE)
-{
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_MODE,
-             GET_MASK(parameter,
-                      NV097_SET_TRANSFORM_EXECUTION_MODE_MODE));
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_D], NV_PGRAPH_CSV0_D_RANGE_MODE,
-             GET_MASK(parameter,
-                      NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE));
-}
-
-DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_CXT_WRITE_EN)
-{
-    pg->enable_vertex_program_write = parameter;
-}
-
-DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_LOAD)
-{
-    assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
-    SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
-             NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, parameter);
-}
-
-DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_START)
-{
-    assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
-    SET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
-             NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START, parameter);
-}
-
-DEF_METHOD(NV097, SET_TRANSFORM_CONSTANT_LOAD)
-{
-    assert(parameter < NV2A_VERTEXSHADER_CONSTANTS);
-    SET_MASK(pg->regs[NV_PGRAPH_CHEOPS_OFFSET],
-             NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, parameter);
-}
-
-
-void pgraph_context_switch(NV2AState *d, unsigned int channel_id)
-{
-    bool channel_valid =
-        d->pgraph.regs[NV_PGRAPH_CTX_CONTROL] & NV_PGRAPH_CTX_CONTROL_CHID;
-    unsigned pgraph_channel_id = GET_MASK(d->pgraph.regs[NV_PGRAPH_CTX_USER], NV_PGRAPH_CTX_USER_CHID);
-
-    bool valid = channel_valid && pgraph_channel_id == channel_id;
-    if (!valid) {
-        SET_MASK(d->pgraph.regs[NV_PGRAPH_TRAPPED_ADDR],
-                 NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id);
-
-        NV2A_DPRINTF("pgraph switching to ch %d\n", channel_id);
-
-        /* TODO: hardware context switching */
-        assert(!(d->pgraph.regs[NV_PGRAPH_DEBUG_3]
-                & NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH));
-
-        d->pgraph.waiting_for_context_switch = true;
-        qemu_mutex_unlock(&d->pgraph.lock);
-        qemu_mutex_lock_iothread();
-        d->pgraph.pending_interrupts |= NV_PGRAPH_INTR_CONTEXT_SWITCH;
-        nv2a_update_irq(d);
-        qemu_mutex_unlock_iothread();
-        qemu_mutex_lock(&d->pgraph.lock);
-    }
-}
-
-static void pgraph_method_log(unsigned int subchannel,
-                              unsigned int graphics_class,
-                              unsigned int method, uint32_t parameter)
-{
-    const char *method_name = "?";
-    static unsigned int last = 0;
-    static unsigned int count = 0;
-
-    if (last == NV097_ARRAY_ELEMENT16 && method != last) {
-        method_name = "NV097_ARRAY_ELEMENT16";
-        trace_nv2a_pgraph_method_abbrev(subchannel, graphics_class, last,
-                                        method_name, count);
-        NV2A_GL_DPRINTF(false, "pgraph method (%d) 0x%x %s * %d", subchannel,
-                        last, method_name, count);
-    }
-
-    if (method != NV097_ARRAY_ELEMENT16) {
-        uint32_t base = method;
-        switch (graphics_class) {
-        case NV_KELVIN_PRIMITIVE: {
-            int idx = METHOD_ADDR_TO_INDEX(method);
-            if (idx < ARRAY_SIZE(pgraph_kelvin_methods) &&
-                pgraph_kelvin_methods[idx].handler) {
-                method_name = pgraph_kelvin_methods[idx].name;
-                base = pgraph_kelvin_methods[idx].base;
-            }
-            break;
-        }
-        default:
-            break;
-        }
-
-        uint32_t offset = method - base;
-        trace_nv2a_pgraph_method(subchannel, graphics_class, method,
-                                 method_name, offset, parameter);
-        NV2A_GL_DPRINTF(false,
-                        "pgraph method (%d): 0x%" PRIx32 " -> 0x%04" PRIx32
-                        " %s[%" PRId32 "] 0x%" PRIx32,
-                        subchannel, graphics_class, method, method_name, offset,
-                        parameter);
-    }
-
-    if (method == last) {
-        count++;
-    } else {
-        count = 0;
-    }
-    last = method;
-}
-
-static void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg,
-                                                   unsigned int attr)
-{
-    VertexAttribute *attribute = &pg->vertex_attributes[attr];
-
-    if (attribute->inline_buffer_populated || pg->inline_buffer_length == 0) {
-        return;
-    }
-
-    /* Now upload the previous attribute value */
-    attribute->inline_buffer_populated = true;
-    for (int i = 0; i < pg->inline_buffer_length; i++) {
-        memcpy(&attribute->inline_buffer[i * 4], attribute->inline_value,
-               sizeof(float) * 4);
-    }
-}
-
-static void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg)
-{
-    pgraph_check_within_begin_end_block(pg);
-    assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH);
-
-    for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
-        VertexAttribute *attribute = &pg->vertex_attributes[i];
-        if (attribute->inline_buffer_populated) {
-            memcpy(&attribute->inline_buffer[pg->inline_buffer_length * 4],
-                   attribute->inline_value, sizeof(float) * 4);
-        }
-    }
-
-    pg->inline_buffer_length++;
-}
-
-void nv2a_gl_context_init(void)
-{
-    g_nv2a_context_render = glo_context_create();
-    g_nv2a_context_display = glo_context_create();
-}
-
-void nv2a_set_surface_scale_factor(unsigned int scale)
-{
-    NV2AState *d = g_nv2a;
-
-    g_config.display.quality.surface_scale = scale < 1 ? 1 : scale;
-
-    qemu_mutex_unlock_iothread();
-
-    qemu_mutex_lock(&d->pfifo.lock);
-    qatomic_set(&d->pfifo.halt, true);
-    qemu_mutex_unlock(&d->pfifo.lock);
-
-    qemu_mutex_lock(&d->pgraph.lock);
-    qemu_event_reset(&d->pgraph.dirty_surfaces_download_complete);
-    qatomic_set(&d->pgraph.download_dirty_surfaces_pending, true);
-    qemu_mutex_unlock(&d->pgraph.lock);
-    qemu_mutex_lock(&d->pfifo.lock);
-    pfifo_kick(d);
-    qemu_mutex_unlock(&d->pfifo.lock);
-    qemu_event_wait(&d->pgraph.dirty_surfaces_download_complete);
-
-    qemu_mutex_lock(&d->pgraph.lock);
-    qemu_event_reset(&d->pgraph.flush_complete);
-    qatomic_set(&d->pgraph.flush_pending, true);
-    qemu_mutex_unlock(&d->pgraph.lock);
-    qemu_mutex_lock(&d->pfifo.lock);
-    pfifo_kick(d);
-    qemu_mutex_unlock(&d->pfifo.lock);
-    qemu_event_wait(&d->pgraph.flush_complete);
-
-    qemu_mutex_lock(&d->pfifo.lock);
-    qatomic_set(&d->pfifo.halt, false);
-    pfifo_kick(d);
-    qemu_mutex_unlock(&d->pfifo.lock);
-
-    qemu_mutex_lock_iothread();
-}
-
-unsigned int nv2a_get_surface_scale_factor(void)
-{
-    return g_nv2a->pgraph.surface_scale_factor;
-}
-
-static void pgraph_reload_surface_scale_factor(NV2AState *d)
-{
-    int factor = g_config.display.quality.surface_scale;
-    d->pgraph.surface_scale_factor = factor < 1 ? 1 : factor;
-}
-
-void pgraph_init(NV2AState *d)
-{
-    int i;
-
-    g_nv2a = d;
-    PGRAPHState *pg = &d->pgraph;
-
-    pgraph_reload_surface_scale_factor(d);
-
-    pg->frame_time = 0;
-    pg->draw_time = 0;
-    pg->downloads_pending = false;
-
-    qemu_mutex_init(&pg->lock);
-    qemu_mutex_init(&pg->shader_cache_lock);
-    qemu_event_init(&pg->gl_sync_complete, false);
-    qemu_event_init(&pg->downloads_complete, false);
-    qemu_event_init(&pg->dirty_surfaces_download_complete, false);
-    qemu_event_init(&pg->flush_complete, false);
-    qemu_event_init(&pg->shader_cache_writeback_complete, false);
-
-    /* fire up opengl */
-    glo_set_current(g_nv2a_context_render);
-
-#ifdef DEBUG_NV2A_GL
-    gl_debug_initialize();
-#endif
-
-    /* DXT textures */
-    assert(glo_check_extension("GL_EXT_texture_compression_s3tc"));
-    /*  Internal RGB565 texture format */
-    assert(glo_check_extension("GL_ARB_ES2_compatibility"));
-
-    GLint max_vertex_attributes;
-    glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes);
-    assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES);
-
-
-    glGenFramebuffers(1, &pg->gl_framebuffer);
-    glBindFramebuffer(GL_FRAMEBUFFER, pg->gl_framebuffer);
-
-    pgraph_init_render_to_texture(d);
-    QTAILQ_INIT(&pg->surfaces);
-
-    QSIMPLEQ_INIT(&pg->report_queue);
-
-    //glPolygonMode( GL_FRONT_AND_BACK, GL_LINE );
-
-    // Initialize texture cache
-    const size_t texture_cache_size = 512;
-    lru_init(&pg->texture_cache);
-    pg->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode));
-    assert(pg->texture_cache_entries != NULL);
-    for (i = 0; i < texture_cache_size; i++) {
-        lru_add_free(&pg->texture_cache, &pg->texture_cache_entries[i].node);
-    }
-
-    pg->texture_cache.init_node = texture_cache_entry_init;
-    pg->texture_cache.compare_nodes = texture_cache_entry_compare;
-    pg->texture_cache.post_node_evict = texture_cache_entry_post_evict;
-
-    // Initialize element cache
-    const size_t element_cache_size = 50*1024;
-    lru_init(&pg->element_cache);
-    pg->element_cache_entries = malloc(element_cache_size * sizeof(VertexLruNode));
-    assert(pg->element_cache_entries != NULL);
-    GLuint element_cache_buffers[element_cache_size];
-    glGenBuffers(element_cache_size, element_cache_buffers);
-    for (i = 0; i < element_cache_size; i++) {
-        pg->element_cache_entries[i].gl_buffer = element_cache_buffers[i];
-        lru_add_free(&pg->element_cache, &pg->element_cache_entries[i].node);
-    }
-
-    pg->element_cache.init_node = vertex_cache_entry_init;
-    pg->element_cache.compare_nodes = vertex_cache_entry_compare;
-
-    shader_cache_init(pg);
-
-    pg->material_alpha = 0.0f;
-    SET_MASK(pg->regs[NV_PGRAPH_CONTROL_3], NV_PGRAPH_CONTROL_3_SHADEMODE,
-         NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH);
-    pg->primitive_mode = PRIM_TYPE_INVALID;
-
-    for (i=0; i<NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
-        VertexAttribute *attribute = &pg->vertex_attributes[i];
-        glGenBuffers(1, &attribute->gl_inline_buffer);
-        attribute->inline_buffer = (float*)g_malloc(NV2A_MAX_BATCH_LENGTH
-                                              * sizeof(float) * 4);
-        attribute->inline_buffer_populated = false;
-    }
-    glGenBuffers(1, &pg->gl_inline_array_buffer);
-
-    glGenBuffers(1, &pg->gl_memory_buffer);
-    glBindBuffer(GL_ARRAY_BUFFER, pg->gl_memory_buffer);
-    glBufferData(GL_ARRAY_BUFFER, memory_region_size(d->vram),
-                 NULL, GL_DYNAMIC_DRAW);
-
-    glGenVertexArrays(1, &pg->gl_vertex_array);
-    glBindVertexArray(pg->gl_vertex_array);
-
-    assert(glGetError() == GL_NO_ERROR);
-
-    glo_set_current(g_nv2a_context_display);
-    pgraph_init_display_renderer(d);
-
-    glo_set_current(NULL);
-}
-
-void pgraph_destroy(PGRAPHState *pg)
-{
-    qemu_mutex_destroy(&pg->lock);
-    qemu_mutex_destroy(&pg->shader_cache_lock);
-
-    glo_set_current(g_nv2a_context_render);
-
-    // TODO: clear out surfaces
-
-    glDeleteFramebuffers(1, &pg->gl_framebuffer);
-
-    // Clear out shader cache
-    shader_write_cache_reload_list(pg);
-    free(pg->shader_cache_entries);
-
-    // Clear out texture cache
-    lru_flush(&pg->texture_cache);
-    free(pg->texture_cache_entries);
-
-    glo_set_current(NULL);
-    glo_context_destroy(g_nv2a_context_render);
-    glo_context_destroy(g_nv2a_context_display);
-}
-
-static void pgraph_shader_update_constants(PGRAPHState *pg,
-                                           ShaderBinding *binding,
-                                           bool binding_changed,
-                                           bool vertex_program,
-                                           bool fixed_function)
-{
-    int i, j;
-
-    /* update combiner constants */
-    for (i = 0; i < 9; i++) {
-        uint32_t constant[2];
-        if (i == 8) {
-            /* final combiner */
-            constant[0] = pg->regs[NV_PGRAPH_SPECFOGFACTOR0];
-            constant[1] = pg->regs[NV_PGRAPH_SPECFOGFACTOR1];
-        } else {
-            constant[0] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4];
-            constant[1] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4];
-        }
-
-        for (j = 0; j < 2; j++) {
-            GLint loc = binding->psh_constant_loc[i][j];
-            if (loc != -1) {
-                float value[4];
-                value[0] = (float) ((constant[j] >> 16) & 0xFF) / 255.0f;
-                value[1] = (float) ((constant[j] >> 8) & 0xFF) / 255.0f;
-                value[2] = (float) (constant[j] & 0xFF) / 255.0f;
-                value[3] = (float) ((constant[j] >> 24) & 0xFF) / 255.0f;
-
-                glUniform4fv(loc, 1, value);
-            }
-        }
-    }
-    if (binding->alpha_ref_loc != -1) {
-        float alpha_ref = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-                                   NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0;
-        glUniform1f(binding->alpha_ref_loc, alpha_ref);
-    }
-
-
-    /* For each texture stage */
-    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
-        GLint loc;
-
-        /* Bump luminance only during stages 1 - 3 */
-        if (i > 0) {
-            loc = binding->bump_mat_loc[i];
-            if (loc != -1) {
-                float m[4];
-                m[0] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT00 + 4 * (i - 1)];
-                m[1] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT01 + 4 * (i - 1)];
-                m[2] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT10 + 4 * (i - 1)];
-                m[3] = *(float*)&pg->regs[NV_PGRAPH_BUMPMAT11 + 4 * (i - 1)];
-                glUniformMatrix2fv(loc, 1, GL_FALSE, m);
-            }
-            loc = binding->bump_scale_loc[i];
-            if (loc != -1) {
-                glUniform1f(loc, *(float*)&pg->regs[
-                                NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4]);
-            }
-            loc = binding->bump_offset_loc[i];
-            if (loc != -1) {
-                glUniform1f(loc, *(float*)&pg->regs[
-                            NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4]);
-            }
-        }
-
-        loc = pg->shader_binding->tex_scale_loc[i];
-        if (loc != -1) {
-            assert(pg->texture_binding[i] != NULL);
-            glUniform1f(loc, (float)pg->texture_binding[i]->scale);
-        }
-    }
-
-    if (binding->fog_color_loc != -1) {
-        uint32_t fog_color = pg->regs[NV_PGRAPH_FOGCOLOR];
-        glUniform4f(binding->fog_color_loc,
-                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
-                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
-                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
-                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
-    }
-    if (binding->fog_param_loc[0] != -1) {
-        glUniform1f(binding->fog_param_loc[0],
-                    *(float*)&pg->regs[NV_PGRAPH_FOGPARAM0]);
-    }
-    if (binding->fog_param_loc[1] != -1) {
-        glUniform1f(binding->fog_param_loc[1],
-                    *(float*)&pg->regs[NV_PGRAPH_FOGPARAM1]);
-    }
-
-    float zmax;
-    switch (pg->surface_shape.zeta_format) {
-    case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
-        zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
-        break;
-    case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
-        zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
-        break;
-    default:
-        assert(0);
-    }
-
-    if (fixed_function) {
-        /* update lighting constants */
-        struct {
-            uint32_t* v;
-            bool* dirty;
-            GLint* locs;
-            size_t len;
-        } lighting_arrays[] = {
-            {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT},
-            {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT},
-            {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT},
-        };
-
-        for (i=0; i<ARRAY_SIZE(lighting_arrays); i++) {
-            uint32_t *lighting_v = lighting_arrays[i].v;
-            bool *lighting_dirty = lighting_arrays[i].dirty;
-            GLint *lighting_locs = lighting_arrays[i].locs;
-            size_t lighting_len = lighting_arrays[i].len;
-            for (j=0; j<lighting_len; j++) {
-                if (!lighting_dirty[j] && !binding_changed) continue;
-                GLint loc = lighting_locs[j];
-                if (loc != -1) {
-                    glUniform4fv(loc, 1, (const GLfloat*)&lighting_v[j*4]);
-                }
-                lighting_dirty[j] = false;
-            }
-        }
-
-        for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
-            GLint loc;
-            loc = binding->light_infinite_half_vector_loc[i];
-            if (loc != -1) {
-                glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]);
-            }
-            loc = binding->light_infinite_direction_loc[i];
-            if (loc != -1) {
-                glUniform3fv(loc, 1, pg->light_infinite_direction[i]);
-            }
-
-            loc = binding->light_local_position_loc[i];
-            if (loc != -1) {
-                glUniform3fv(loc, 1, pg->light_local_position[i]);
-            }
-            loc = binding->light_local_attenuation_loc[i];
-            if (loc != -1) {
-                glUniform3fv(loc, 1, pg->light_local_attenuation[i]);
-            }
-        }
-
-        /* estimate the viewport by assuming it matches the surface ... */
-        unsigned int aa_width = 1, aa_height = 1;
-        pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
-
-        float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width);
-        float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height);
-        float m33 = zmax;
-        float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
-        float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
-
-        float invViewport[16] = {
-            1.0/m11, 0, 0, 0,
-            0, 1.0/m22, 0, 0,
-            0, 0, 1.0/m33, 0,
-            -1.0+m41/m11, 1.0+m42/m22, 0, 1.0
-        };
-
-        if (binding->inv_viewport_loc != -1) {
-            glUniformMatrix4fv(binding->inv_viewport_loc,
-                               1, GL_FALSE, &invViewport[0]);
-        }
-    }
-
-    /* update vertex program constants */
-    for (i=0; i<NV2A_VERTEXSHADER_CONSTANTS; i++) {
-        if (!pg->vsh_constants_dirty[i] && !binding_changed) continue;
-
-        GLint loc = binding->vsh_constant_loc[i];
-        if ((loc != -1) &&
-            memcmp(binding->vsh_constants[i], pg->vsh_constants[i],
-                   sizeof(pg->vsh_constants[1]))) {
-            glUniform4fv(loc, 1, (const GLfloat *)pg->vsh_constants[i]);
-            memcpy(binding->vsh_constants[i], pg->vsh_constants[i],
-                   sizeof(pg->vsh_constants[i]));
-        }
-
-        pg->vsh_constants_dirty[i] = false;
-    }
-
-    if (binding->surface_size_loc != -1) {
-        unsigned int aa_width = 1, aa_height = 1;
-        pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
-        glUniform2f(binding->surface_size_loc,
-                    pg->surface_binding_dim.width / aa_width,
-                    pg->surface_binding_dim.height / aa_height);
-    }
-
-    if (binding->clip_range_loc != -1) {
-        float zclip_min = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMIN] / zmax * 2.0 - 1.0;
-        float zclip_max = *(float*)&pg->regs[NV_PGRAPH_ZCLIPMAX] / zmax * 2.0 - 1.0;
-        glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max);
-    }
-
-    /* Clipping regions */
-    unsigned int max_gl_width = pg->surface_binding_dim.width;
-    unsigned int max_gl_height = pg->surface_binding_dim.height;
-    pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
-
-    for (i = 0; i < 8; i++) {
-        uint32_t x = pg->regs[NV_PGRAPH_WINDOWCLIPX0 + i * 4];
-        unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
-        unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
-        uint32_t y = pg->regs[NV_PGRAPH_WINDOWCLIPY0 + i * 4];
-        unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
-        unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
-        pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
-        pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
-
-        pgraph_apply_scaling_factor(pg, &x_min, &y_min);
-        pgraph_apply_scaling_factor(pg, &x_max, &y_max);
-
-        /* Translate for the GL viewport origin */
-        int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0);
-        int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height);
-
-        glUniform4i(pg->shader_binding->clip_region_loc[i],
-                    x_min, y_min_xlat, x_max, y_max_xlat);
-    }
-
-    if (binding->material_alpha_loc != -1) {
-        glUniform1f(binding->material_alpha_loc, pg->material_alpha);
-    }
-}
-
-static bool pgraph_bind_shaders_test_dirty(PGRAPHState *pg)
-{
-    #define CR_1(reg) CR_x(reg, 1)
-    #define CR_4(reg) CR_x(reg, 4)
-    #define CR_8(reg) CR_x(reg, 8)
-    #define CF(src, name)  CF_x(typeof(src), (&src), name, 1)
-    #define CFA(src, name) CF_x(typeof(src[0]), src, name, ARRAY_SIZE(src))
-    #define CNAME(name) reg_check__ ## name
-    #define CX_x__define(type, name, x) static type CNAME(name)[x];
-    #define CR_x__define(reg, x) CX_x__define(uint32_t, reg, x)
-    #define CF_x__define(type, src, name, x) CX_x__define(type, name, x)
-    #define CR_x__check(reg, x) \
-        for (int i = 0; i < x; i++) { if (pg->regs[reg+i*4] != CNAME(reg)[i]) goto dirty; }
-    #define CF_x__check(type, src, name, x) \
-        for (int i = 0; i < x; i++) { if (src[i] != CNAME(name)[i]) goto dirty; }
-    #define CR_x__update(reg, x) \
-        for (int i = 0; i < x; i++) { CNAME(reg)[i] = pg->regs[reg+i*4]; }
-    #define CF_x__update(type, src, name, x) \
-        for (int i = 0; i < x; i++) { CNAME(name)[i] = src[i]; }
-
-    #define DIRTY_REGS \
-        CR_1(NV_PGRAPH_COMBINECTL) \
-        CR_1(NV_PGRAPH_SHADERCTL) \
-        CR_1(NV_PGRAPH_SHADOWCTL) \
-        CR_1(NV_PGRAPH_COMBINESPECFOG0) \
-        CR_1(NV_PGRAPH_COMBINESPECFOG1) \
-        CR_1(NV_PGRAPH_CONTROL_0) \
-        CR_1(NV_PGRAPH_CONTROL_3) \
-        CR_1(NV_PGRAPH_CSV0_C) \
-        CR_1(NV_PGRAPH_CSV0_D) \
-        CR_1(NV_PGRAPH_CSV1_A) \
-        CR_1(NV_PGRAPH_CSV1_B) \
-        CR_1(NV_PGRAPH_SETUPRASTER) \
-        CR_1(NV_PGRAPH_SHADERPROG) \
-        CR_8(NV_PGRAPH_COMBINECOLORI0) \
-        CR_8(NV_PGRAPH_COMBINECOLORO0) \
-        CR_8(NV_PGRAPH_COMBINEALPHAI0) \
-        CR_8(NV_PGRAPH_COMBINEALPHAO0) \
-        CR_8(NV_PGRAPH_COMBINEFACTOR0) \
-        CR_8(NV_PGRAPH_COMBINEFACTOR1) \
-        CR_1(NV_PGRAPH_SHADERCLIPMODE) \
-        CR_4(NV_PGRAPH_TEXCTL0_0) \
-        CR_4(NV_PGRAPH_TEXFMT0) \
-        CR_4(NV_PGRAPH_TEXFILTER0) \
-        CR_8(NV_PGRAPH_WINDOWCLIPX0) \
-        CR_8(NV_PGRAPH_WINDOWCLIPY0) \
-        CF(pg->primitive_mode, primitive_mode) \
-        CF(pg->surface_scale_factor, surface_scale_factor) \
-        CF(pg->compressed_attrs, compressed_attrs) \
-        CFA(pg->texture_matrix_enable, texture_matrix_enable)
-
-    #define CR_x(reg, x) CR_x__define(reg, x)
-    #define CF_x(type, src, name, x) CF_x__define(type, src, name, x)
-    DIRTY_REGS
-    #undef CR_x
-    #undef CF_x
-
-    #define CR_x(reg, x) CR_x__check(reg, x)
-    #define CF_x(type, src, name, x) CF_x__check(type, src, name, x)
-    DIRTY_REGS
-    #undef CR_x
-    #undef CF_x
-    return false;
-
-dirty:
-    #define CR_x(reg, x) CR_x__update(reg, x)
-    #define CF_x(type, src, name, x) CF_x__update(type, src, name, x)
-    DIRTY_REGS
-    #undef CR_x
-    #undef CF_x
-    return true;
-}
-
-static void pgraph_bind_shaders(PGRAPHState *pg)
-{
-    int i, j;
-
-    bool vertex_program = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
-                                   NV_PGRAPH_CSV0_D_MODE) == 2;
-
-    bool fixed_function = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
-                                   NV_PGRAPH_CSV0_D_MODE) == 0;
-
-    int program_start = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
-                                 NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START);
-
-    NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__,
-                         vertex_program ? "yes" : "no",
-                         fixed_function ? "yes" : "no");
-
-    bool binding_changed = false;
-    if (!pgraph_bind_shaders_test_dirty(pg) && !pg->program_data_dirty) {
-        nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
-        goto update_constants;
-    }
-
-    pg->program_data_dirty = false;
-
-    ShaderBinding* old_binding = pg->shader_binding;
-
-    ShaderState state;
-    memset(&state, 0, sizeof(ShaderState));
-
-    state.surface_scale_factor = pg->surface_scale_factor;
-
-    state.compressed_attrs = pg->compressed_attrs;
-
-    /* register combiner stuff */
-    state.psh.window_clip_exclusive = pg->regs[NV_PGRAPH_SETUPRASTER]
-                                       & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE;
-    state.psh.combiner_control = pg->regs[NV_PGRAPH_COMBINECTL];
-    state.psh.shader_stage_program = pg->regs[NV_PGRAPH_SHADERPROG];
-    state.psh.other_stage_input = pg->regs[NV_PGRAPH_SHADERCTL];
-    state.psh.final_inputs_0 = pg->regs[NV_PGRAPH_COMBINESPECFOG0];
-    state.psh.final_inputs_1 = pg->regs[NV_PGRAPH_COMBINESPECFOG1];
-
-    state.psh.alpha_test = pg->regs[NV_PGRAPH_CONTROL_0]
-                            & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE;
-    state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK(pg->regs[NV_PGRAPH_CONTROL_0],
-                                   NV_PGRAPH_CONTROL_0_ALPHAFUNC);
-
-    state.psh.point_sprite = pg->regs[NV_PGRAPH_SETUPRASTER] &
-                                 NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE;
-
-    state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK(
-        pg->regs[NV_PGRAPH_SHADOWCTL], NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC);
-
-    state.fixed_function = fixed_function;
-
-    /* fixed function stuff */
-    if (fixed_function) {
-        state.skinning = (enum VshSkinning)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
-                                               NV_PGRAPH_CSV0_D_SKIN);
-        state.lighting = GET_MASK(pg->regs[NV_PGRAPH_CSV0_C],
-                             NV_PGRAPH_CSV0_C_LIGHTING);
-        state.normalization = pg->regs[NV_PGRAPH_CSV0_C]
-                           & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE;
-
-        /* color material */
-        state.emission_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_EMISSION);
-        state.ambient_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_AMBIENT);
-        state.diffuse_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_DIFFUSE);
-        state.specular_src = (enum MaterialColorSource)GET_MASK(pg->regs[NV_PGRAPH_CSV0_C], NV_PGRAPH_CSV0_C_SPECULAR);
-    }
-
-    /* vertex program stuff */
-    state.vertex_program = vertex_program,
-    state.z_perspective = pg->regs[NV_PGRAPH_CONTROL_0]
-                        & NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE;
-
-    state.point_params_enable = GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
-                                         NV_PGRAPH_CSV0_D_POINTPARAMSENABLE);
-    state.point_size =
-        GET_MASK(pg->regs[NV_PGRAPH_POINTSIZE], NV097_SET_POINT_SIZE_V) / 8.0f;
-    if (state.point_params_enable) {
-        for (int i = 0; i < 8; i++) {
-            state.point_params[i] = pg->point_params[i];
-        }
-    }
-
-    /* geometry shader stuff */
-    state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode;
-    state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-                                                           NV_PGRAPH_SETUPRASTER_FRONTFACEMODE);
-    state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-                                                          NV_PGRAPH_SETUPRASTER_BACKFACEMODE);
-
-    state.smooth_shading = GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3],
-                                      NV_PGRAPH_CONTROL_3_SHADEMODE) ==
-                             NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH;
-    state.psh.smooth_shading = state.smooth_shading;
-
-    state.program_length = 0;
-
-    if (vertex_program) {
-        // copy in vertex program tokens
-        for (i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH; i++) {
-            uint32_t *cur_token = (uint32_t*)&pg->program_data[i];
-            memcpy(&state.program_data[state.program_length],
-                   cur_token,
-                   VSH_TOKEN_SIZE * sizeof(uint32_t));
-            state.program_length++;
-
-            if (vsh_get_field(cur_token, FLD_FINAL)) {
-                break;
-            }
-        }
-    }
-
-    /* Texgen */
-    for (i = 0; i < 4; i++) {
-        unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B;
-        for (j = 0; j < 4; j++) {
-            unsigned int masks[] = {
-                (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S,
-                (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T,
-                (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R,
-                (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q
-            };
-            state.texgen[i][j] = (enum VshTexgen)GET_MASK(pg->regs[reg], masks[j]);
-        }
-    }
-
-    /* Fog */
-    state.fog_enable = pg->regs[NV_PGRAPH_CONTROL_3]
-                           & NV_PGRAPH_CONTROL_3_FOGENABLE;
-    if (state.fog_enable) {
-        /*FIXME: Use CSV0_D? */
-        state.fog_mode = (enum VshFogMode)GET_MASK(pg->regs[NV_PGRAPH_CONTROL_3],
-                                  NV_PGRAPH_CONTROL_3_FOG_MODE);
-        state.foggen = (enum VshFoggen)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
-                                NV_PGRAPH_CSV0_D_FOGGENMODE);
-    } else {
-        /* FIXME: Do we still pass the fogmode? */
-        state.fog_mode = (enum VshFogMode)0;
-        state.foggen = (enum VshFoggen)0;
-    }
-
-    /* Texture matrices */
-    for (i = 0; i < 4; i++) {
-        state.texture_matrix_enable[i] = pg->texture_matrix_enable[i];
-    }
-
-    /* Lighting */
-    if (state.lighting) {
-        for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
-            state.light[i] = (enum VshLight)GET_MASK(pg->regs[NV_PGRAPH_CSV0_D],
-                                      NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2));
-        }
-    }
-
-    /* Copy content of enabled combiner stages */
-    int num_stages = pg->regs[NV_PGRAPH_COMBINECTL] & 0xFF;
-    for (i = 0; i < num_stages; i++) {
-        state.psh.rgb_inputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORI0 + i * 4];
-        state.psh.rgb_outputs[i] = pg->regs[NV_PGRAPH_COMBINECOLORO0 + i * 4];
-        state.psh.alpha_inputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAI0 + i * 4];
-        state.psh.alpha_outputs[i] = pg->regs[NV_PGRAPH_COMBINEALPHAO0 + i * 4];
-        //constant_0[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR0 + i * 4];
-        //constant_1[i] = pg->regs[NV_PGRAPH_COMBINEFACTOR1 + i * 4];
-    }
-
-    for (i = 0; i < 4; i++) {
-        for (j = 0; j < 4; j++) {
-            state.psh.compare_mode[i][j] =
-                (pg->regs[NV_PGRAPH_SHADERCLIPMODE] >> (4 * i + j)) & 1;
-        }
-
-        uint32_t ctl_0 = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4];
-        bool enabled = pgraph_is_texture_stage_active(pg, i) &&
-                       (ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE);
-        if (!enabled) {
-            continue;
-        }
-
-        state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN;
-
-        uint32_t tex_fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4];
-        unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR);
-        ColorFormatInfo f = kelvin_color_format_map[color_format];
-        state.psh.rect_tex[i] = f.linear;
-
-        uint32_t border_source = GET_MASK(tex_fmt,
-                                          NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
-        bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
-        state.psh.border_logical_size[i][0] = 0.0f;
-        state.psh.border_logical_size[i][1] = 0.0f;
-        state.psh.border_logical_size[i][2] = 0.0f;
-        if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) {
-            if (!f.linear && !cubemap) {
-                // The actual texture will be (at least) double the reported
-                // size and shifted by a 4 texel border but texture coordinates
-                // will still be relative to the reported size.
-                unsigned int reported_width =
-                        1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
-                unsigned int reported_height =
-                        1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
-                unsigned int reported_depth =
-                    1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
-
-                state.psh.border_logical_size[i][0] = reported_width;
-                state.psh.border_logical_size[i][1] = reported_height;
-                state.psh.border_logical_size[i][2] = reported_depth;
-
-                if (reported_width < 8) {
-                    state.psh.border_inv_real_size[i][0] = 0.0625f;
-                } else {
-                    state.psh.border_inv_real_size[i][0] =
-                            1.0f / (reported_width * 2.0f);
-                }
-                if (reported_height < 8) {
-                    state.psh.border_inv_real_size[i][1] = 0.0625f;
-                } else {
-                    state.psh.border_inv_real_size[i][1] =
-                            1.0f / (reported_height * 2.0f);
-                }
-                if (reported_depth < 8) {
-                    state.psh.border_inv_real_size[i][2] = 0.0625f;
-                } else {
-                    state.psh.border_inv_real_size[i][2] =
-                            1.0f / (reported_depth * 2.0f);
-                }
-            } else {
-                NV2A_UNIMPLEMENTED("Border source texture with linear %d cubemap %d",
-                                   f.linear, cubemap);
-            }
-        }
-
-        /* Keep track of whether texture data has been loaded as signed
-         * normalized integers or not. This dictates whether or not we will need
-         * to re-map in fragment shader for certain texture modes (e.g.
-         * bumpenvmap).
-         *
-         * FIXME: When signed texture data is loaded as unsigned and remapped in
-         * fragment shader, there may be interpolation artifacts. Fix this to
-         * support signed textures more appropriately.
-         */
-        state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM)
-                                 || (f.gl_internal_format == GL_RG8_SNORM);
-
-        state.psh.shadow_map[i] = f.depth;
-
-        uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4];
-        unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
-        enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED;
-        /* FIXME: We do not distinguish between min and mag when
-         * performing convolution. Just use it if specified for min (common AA
-         * case).
-         */
-        if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) {
-            int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL);
-            assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX ||
-                   k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3);
-            kernel = (enum ConvolutionFilter)k;
-        }
-
-        state.psh.conv_tex[i] = kernel;
-    }
-
-    uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState));
-    qemu_mutex_lock(&pg->shader_cache_lock);
-    LruNode *node = lru_lookup(&pg->shader_cache, shader_state_hash, &state);
-    ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
-    if (snode->binding || shader_load_from_memory(snode)) {
-        pg->shader_binding = snode->binding;
-    } else {
-        pg->shader_binding = generate_shaders(&state);
-        nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
-
-        /* cache it */
-        snode->binding = pg->shader_binding;
-        if (g_config.perf.cache_shaders) {
-            shader_cache_to_disk(snode);
-        }
-    }
-
-    qemu_mutex_unlock(&pg->shader_cache_lock);
-
-    binding_changed = (pg->shader_binding != old_binding);
-    if (binding_changed) {
-        nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
-        glUseProgram(pg->shader_binding->gl_program);
-    }
-
-update_constants:
-    pgraph_shader_update_constants(pg, pg->shader_binding, binding_changed,
-                                   vertex_program, fixed_function);
-
-    NV2A_GL_DGROUP_END();
-}
-
-static bool pgraph_framebuffer_dirty(PGRAPHState *pg)
-{
-    bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape,
-                                sizeof(SurfaceShape)) != 0;
-    if (!shape_changed || (!pg->surface_shape.color_format
-            && !pg->surface_shape.zeta_format)) {
-        return false;
-    }
-    return true;
-}
-
-static bool pgraph_color_write_enabled(PGRAPHState *pg)
-{
-    return pg->regs[NV_PGRAPH_CONTROL_0] & (
-        NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE
-        | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE
-        | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE
-        | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE);
-}
-
-static bool pgraph_zeta_write_enabled(PGRAPHState *pg)
-{
-    return pg->regs[NV_PGRAPH_CONTROL_0] & (
-        NV_PGRAPH_CONTROL_0_ZWRITEENABLE
-        | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE);
-}
-
-static void pgraph_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta)
-{
-    NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n",
-                 color, zeta,
-                 pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg));
-    /* FIXME: Does this apply to CLEARs too? */
-    color = color && pgraph_color_write_enabled(pg);
-    zeta = zeta && pgraph_zeta_write_enabled(pg);
-    pg->surface_color.draw_dirty |= color;
-    pg->surface_zeta.draw_dirty |= zeta;
-
-    if (pg->color_binding) {
-        pg->color_binding->draw_dirty |= color;
-        pg->color_binding->frame_time = pg->frame_time;
-        pg->color_binding->cleared = false;
-
-    }
-
-    if (pg->zeta_binding) {
-        pg->zeta_binding->draw_dirty |= zeta;
-        pg->zeta_binding->frame_time = pg->frame_time;
-        pg->zeta_binding->cleared = false;
-
-    }
-}
-
-static GLuint pgraph_compile_shader(const char *vs_src, const char *fs_src)
-{
-    GLint status;
-    char err_buf[512];
-
-    // Compile vertex shader
-    GLuint vs = glCreateShader(GL_VERTEX_SHADER);
-    glShaderSource(vs, 1, &vs_src, NULL);
-    glCompileShader(vs);
-    glGetShaderiv(vs, GL_COMPILE_STATUS, &status);
-    if (status != GL_TRUE) {
-        glGetShaderInfoLog(vs, sizeof(err_buf), NULL, err_buf);
-        err_buf[sizeof(err_buf)-1] = '\0';
-        fprintf(stderr, "Vertex shader compilation failed: %s\n", err_buf);
-        exit(1);
-    }
-
-    // Compile fragment shader
-    GLuint fs = glCreateShader(GL_FRAGMENT_SHADER);
-    glShaderSource(fs, 1, &fs_src, NULL);
-    glCompileShader(fs);
-    glGetShaderiv(fs, GL_COMPILE_STATUS, &status);
-    if (status != GL_TRUE) {
-        glGetShaderInfoLog(fs, sizeof(err_buf), NULL, err_buf);
-        err_buf[sizeof(err_buf)-1] = '\0';
-        fprintf(stderr, "Fragment shader compilation failed: %s\n", err_buf);
-        exit(1);
-    }
-
-    // Link vertex and fragment shaders
-    GLuint prog = glCreateProgram();
-    glAttachShader(prog, vs);
-    glAttachShader(prog, fs);
-    glLinkProgram(prog);
-    glUseProgram(prog);
-
-    // Flag shaders for deletion (will still be retained for lifetime of prog)
-    glDeleteShader(vs);
-    glDeleteShader(fs);
-
-    return prog;
-}
-
-static void pgraph_init_render_to_texture(NV2AState *d)
-{
-    struct PGRAPHState *pg = &d->pgraph;
-    const char *vs =
-        "#version 330\n"
-        "void main()\n"
-        "{\n"
-        "    float x = -1.0 + float((gl_VertexID & 1) << 2);\n"
-        "    float y = -1.0 + float((gl_VertexID & 2) << 1);\n"
-        "    gl_Position = vec4(x, y, 0, 1);\n"
-        "}\n";
-    const char *fs =
-        "#version 330\n"
-        "uniform sampler2D tex;\n"
-        "uniform vec2 surface_size;\n"
-        "layout(location = 0) out vec4 out_Color;\n"
-        "void main()\n"
-        "{\n"
-        "    vec2 texCoord;\n"
-        "    texCoord.x = gl_FragCoord.x;\n"
-        "    texCoord.y = (surface_size.y - gl_FragCoord.y)\n"
-        "                 + (textureSize(tex,0).y - surface_size.y);\n"
-        "    texCoord /= textureSize(tex,0).xy;\n"
-        "    out_Color.rgba = texture(tex, texCoord);\n"
-        "}\n";
-
-    pg->s2t_rndr.prog = pgraph_compile_shader(vs, fs);
-    pg->s2t_rndr.tex_loc = glGetUniformLocation(pg->s2t_rndr.prog, "tex");
-    pg->s2t_rndr.surface_size_loc = glGetUniformLocation(pg->s2t_rndr.prog,
-                                                    "surface_size");
-
-    glGenVertexArrays(1, &pg->s2t_rndr.vao);
-    glBindVertexArray(pg->s2t_rndr.vao);
-    glGenBuffers(1, &pg->s2t_rndr.vbo);
-    glBindBuffer(GL_ARRAY_BUFFER, pg->s2t_rndr.vbo);
-    glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
-    glGenFramebuffers(1, &pg->s2t_rndr.fbo);
-}
-
-static bool pgraph_surface_to_texture_can_fastpath(SurfaceBinding *surface,
-                                                   TextureShape *shape)
-{
-    // FIXME: Better checks/handling on formats and surface-texture compat
-
-    int surface_fmt = surface->shape.color_format;
-    int texture_fmt = shape->color_format;
-
-    if (!surface->color) {
-        // FIXME: Support zeta to color
-        return false;
-    }
-
-    switch (surface_fmt) {
-    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) {
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true;
-        default: break;
-        }
-        break;
-    case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) {
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true;
-        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true;
-        default: break;
-        }
-        break;
-    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) {
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true;
-        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true;
-        default: break;
-        }
-        break;
-    case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) {
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true;
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true;
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true;
-        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true;
-        default: break;
-        }
-        break;
-    default: break;
-    }
-
-    trace_nv2a_pgraph_surface_texture_compat_failed(
-        surface_fmt, texture_fmt);
-    return false;
-}
-
-
-static void pgraph_render_surface_to(NV2AState *d, SurfaceBinding *surface,
-                                     int texture_unit, GLuint gl_target,
-                                     GLuint gl_texture, unsigned int width,
-                                     unsigned int height)
-{
-    glActiveTexture(GL_TEXTURE0 + texture_unit);
-    glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.s2t_rndr.fbo);
-
-    GLenum draw_buffers[1] = { GL_COLOR_ATTACHMENT0 };
-    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target,
-                           gl_texture, 0);
-    glDrawBuffers(1, draw_buffers);
-    assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
-    assert(glGetError() == GL_NO_ERROR);
-
-    float color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
-    glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
-    glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, color);
-
-    glBindVertexArray(d->pgraph.s2t_rndr.vao);
-    glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.s2t_rndr.vbo);
-    glUseProgram(d->pgraph.s2t_rndr.prog);
-    glProgramUniform1i(d->pgraph.s2t_rndr.prog, d->pgraph.s2t_rndr.tex_loc,
-                       texture_unit);
-    glProgramUniform2f(d->pgraph.s2t_rndr.prog,
-                       d->pgraph.s2t_rndr.surface_size_loc, width, height);
-
-    glViewport(0, 0, width, height);
-    glColorMask(true, true, true, true);
-    glDisable(GL_DITHER);
-    glDisable(GL_SCISSOR_TEST);
-    glDisable(GL_BLEND);
-    glDisable(GL_STENCIL_TEST);
-    glDisable(GL_CULL_FACE);
-    glDisable(GL_DEPTH_TEST);
-    glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
-    glClearColor(0.0f, 0.0f, 1.0f, 1.0f);
-    glClear(GL_COLOR_BUFFER_BIT);
-    glDrawArrays(GL_TRIANGLES, 0, 3);
-
-    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, 0,
-                           0);
-    glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.gl_framebuffer);
-    glBindVertexArray(d->pgraph.gl_vertex_array);
-    glBindTexture(gl_target, gl_texture);
-    glUseProgram(
-        d->pgraph.shader_binding ? d->pgraph.shader_binding->gl_program : 0);
-}
-
-static void pgraph_render_surface_to_texture_slow(
-    NV2AState *d, SurfaceBinding *surface, TextureBinding *texture,
-    TextureShape *texture_shape, int texture_unit)
-{
-    PGRAPHState *pg = &d->pgraph;
-
-    const ColorFormatInfo *f = &kelvin_color_format_map[texture_shape->color_format];
-    assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_map));
-    nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX_FALLBACK);
-
-    glActiveTexture(GL_TEXTURE0 + texture_unit);
-    glBindTexture(texture->gl_target, texture->gl_texture);
-
-    unsigned int width = surface->width,
-                 height = surface->height;
-    pgraph_apply_scaling_factor(pg, &width, &height);
-
-    size_t bufsize = width * height * surface->fmt.bytes_per_pixel;
-
-    uint8_t *buf = g_malloc(bufsize);
-    pgraph_download_surface_data_to_buffer(d, surface, false, true, false, buf);
-
-    width = texture_shape->width;
-    height = texture_shape->height;
-    pgraph_apply_scaling_factor(pg, &width, &height);
-
-    glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0,
-                 f->gl_format, f->gl_type, buf);
-    g_free(buf);
-    glBindTexture(texture->gl_target, texture->gl_texture);
-}
-
-/* Note: This function is intended to be called before PGRAPH configures GL
- * state for rendering; it will configure GL state here but only restore a
- * couple of items.
- */
-static void pgraph_render_surface_to_texture(NV2AState *d,
-                                             SurfaceBinding *surface,
-                                             TextureBinding *texture,
-                                             TextureShape *texture_shape,
-                                             int texture_unit)
-{
-    PGRAPHState *pg = &d->pgraph;
-
-    const ColorFormatInfo *f =
-        &kelvin_color_format_map[texture_shape->color_format];
-    assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_map));
-
-    nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX);
-
-    if (!pgraph_surface_to_texture_can_fastpath(surface, texture_shape)) {
-        pgraph_render_surface_to_texture_slow(d, surface, texture,
-                                              texture_shape, texture_unit);
-        return;
-    }
-
-
-    unsigned int width = texture_shape->width,
-                 height = texture_shape->height;
-    pgraph_apply_scaling_factor(pg, &width, &height);
-
-    glActiveTexture(GL_TEXTURE0 + texture_unit);
-    glBindTexture(texture->gl_target, texture->gl_texture);
-    glTexParameteri(texture->gl_target, GL_TEXTURE_BASE_LEVEL, 0);
-    glTexParameteri(texture->gl_target, GL_TEXTURE_MAX_LEVEL, 0);
-    glTexParameteri(texture->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0,
-                 f->gl_format, f->gl_type, NULL);
-    glBindTexture(texture->gl_target, 0);
-    pgraph_render_surface_to(d, surface, texture_unit, texture->gl_target,
-                             texture->gl_texture, width, height);
-    glBindTexture(texture->gl_target, texture->gl_texture);
-    glUseProgram(
-        d->pgraph.shader_binding ? d->pgraph.shader_binding->gl_program : 0);
-}
-
-static void pgraph_gl_fence(void)
-{
-    GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-    int result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT,
-                                         (GLuint64)(5000000000));
-    assert(result == GL_CONDITION_SATISFIED || result == GL_ALREADY_SIGNALED);
-    glDeleteSync(fence);
-}
-
-static void pgraph_init_display_renderer(NV2AState *d)
-{
-    struct PGRAPHState *pg = &d->pgraph;
-
-    glGenTextures(1, &pg->gl_display_buffer);
-    pg->gl_display_buffer_internal_format = 0;
-    pg->gl_display_buffer_width = 0;
-    pg->gl_display_buffer_height = 0;
-    pg->gl_display_buffer_format = 0;
-    pg->gl_display_buffer_type = 0;
-
-    const char *vs =
-        "#version 330\n"
-        "void main()\n"
-        "{\n"
-        "    float x = -1.0 + float((gl_VertexID & 1) << 2);\n"
-        "    float y = -1.0 + float((gl_VertexID & 2) << 1);\n"
-        "    gl_Position = vec4(x, y, 0, 1);\n"
-        "}\n";
-    /* FIXME: improve interlace handling, pvideo */
-
-    const char *fs =
-        "#version 330\n"
-        "uniform sampler2D tex;\n"
-        "uniform bool pvideo_enable;\n"
-        "uniform sampler2D pvideo_tex;\n"
-        "uniform vec2 pvideo_in_pos;\n"
-        "uniform vec4 pvideo_pos;\n"
-        "uniform vec3 pvideo_scale;\n"
-        "uniform bool pvideo_color_key_enable;\n"
-        "uniform vec4 pvideo_color_key;\n"
-        "uniform vec2 display_size;\n"
-        "uniform float line_offset;\n"
-        "layout(location = 0) out vec4 out_Color;\n"
-        "void main()\n"
-        "{\n"
-        "    vec2 texCoord = gl_FragCoord.xy/display_size;\n"
-        "    float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
-        "    texCoord.y = 1 + rel*(texCoord.y - 1);"
-        "    out_Color.rgba = texture(tex, texCoord);\n"
-        "    if (pvideo_enable) {\n"
-        "        vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"
-        "        vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n"
-        "        bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n"
-        "                           greaterThan(screenCoord, output_region.zw));\n"
-        "        if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n"
-        "            vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n"
-        "            vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n"
-        "            in_st.y *= -1.0;\n"
-        "            out_Color.rgba = texture(pvideo_tex, in_st);\n"
-        "        }\n"
-        "    }\n"
-        "}\n";
-
-    pg->disp_rndr.prog = pgraph_compile_shader(vs, fs);
-    pg->disp_rndr.tex_loc = glGetUniformLocation(pg->disp_rndr.prog, "tex");
-    pg->disp_rndr.pvideo_enable_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_enable");
-    pg->disp_rndr.pvideo_tex_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_tex");
-    pg->disp_rndr.pvideo_in_pos_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_in_pos");
-    pg->disp_rndr.pvideo_pos_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_pos");
-    pg->disp_rndr.pvideo_scale_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_scale");
-    pg->disp_rndr.pvideo_color_key_enable_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_color_key_enable");
-    pg->disp_rndr.pvideo_color_key_loc = glGetUniformLocation(pg->disp_rndr.prog, "pvideo_color_key");
-    pg->disp_rndr.display_size_loc = glGetUniformLocation(pg->disp_rndr.prog, "display_size");
-    pg->disp_rndr.line_offset_loc = glGetUniformLocation(pg->disp_rndr.prog, "line_offset");
-
-    glGenVertexArrays(1, &pg->disp_rndr.vao);
-    glBindVertexArray(pg->disp_rndr.vao);
-    glGenBuffers(1, &pg->disp_rndr.vbo);
-    glBindBuffer(GL_ARRAY_BUFFER, pg->disp_rndr.vbo);
-    glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
-    glGenFramebuffers(1, &pg->disp_rndr.fbo);
-    glGenTextures(1, &pg->disp_rndr.pvideo_tex);
-    assert(glGetError() == GL_NO_ERROR);
-}
-
-static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data,
-                                                   unsigned int width,
-                                                   unsigned int height,
-                                                   unsigned int pitch)
-{
-    uint8_t *converted_data = (uint8_t *)g_malloc(width * height * 4);
-    int x, y;
-    for (y = 0; y < height; y++) {
-        const uint8_t *line = &data[y * pitch];
-        const uint32_t row_offset = y * width;
-        for (x = 0; x < width; x++) {
-            uint8_t *pixel = &converted_data[(row_offset + x) * 4];
-            convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
-            pixel[3] = 255;
-        }
-    }
-    return converted_data;
-}
-
-static inline float pvideo_calculate_scale(unsigned int din_dout,
-                                           unsigned int output_size)
-{
-    float calculated_in = din_dout * (output_size - 1);
-    calculated_in = floorf(calculated_in / (1 << 20) + 0.5f);
-    return (calculated_in + 1.0f) / output_size;
-}
-
-static void pgraph_render_display_pvideo_overlay(NV2AState *d)
-{
-    PGRAPHState *pg = &d->pgraph;
-
-    // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior.
-    // Many games seem to pass this value when initializing or tearing down
-    // PVIDEO. On its own, this generally does not result in the overlay being
-    // hidden, however there are certain games (e.g., Ultimate Beach Soccer)
-    // that use an unknown mechanism to hide the overlay without explicitly
-    // stopping it.
-    // Since the value seems to be set to 0xFFFFFFFF only in cases where the
-    // content is not valid, it is probably good enough to treat it as an
-    // implicit stop.
-    bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)
-        && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF;
-    glUniform1ui(d->pgraph.disp_rndr.pvideo_enable_loc, enabled);
-    if (!enabled) {
-        return;
-    }
-
-    hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
-    hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
-    hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
-
-    int in_width =
-        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH);
-    int in_height =
-        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT);
-
-    int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
-                        NV_PVIDEO_POINT_IN_S);
-    int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
-                        NV_PVIDEO_POINT_IN_T);
-
-    int in_pitch =
-        GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH);
-    int in_color =
-        GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR);
-
-    unsigned int out_width =
-        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH);
-    unsigned int out_height =
-        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT);
-
-    float scale_x = 1.0f;
-    float scale_y = 1.0f;
-    unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX];
-    unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY];
-    if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) {
-        scale_x = pvideo_calculate_scale(ds_dx, out_width);
-    }
-    if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) {
-        scale_y = pvideo_calculate_scale(dt_dy, out_height);
-    }
-
-    // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results
-    // in them being capped to the output size, content is not scaled. This is
-    // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF
-    // during initialization or teardown.
-    if (in_width > out_width) {
-        in_width = floorf((float)out_width * scale_x + 0.5f);
-    }
-    if (in_height > out_height) {
-        in_height = floorf((float)out_height * scale_y + 0.5f);
-    }
-
-    /* TODO: support other color formats */
-    assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
-
-    unsigned int out_x =
-        GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X);
-    unsigned int out_y =
-        GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y);
-
-    unsigned int color_key_enabled =
-        GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY);
-    glUniform1ui(d->pgraph.disp_rndr.pvideo_color_key_enable_loc,
-                 color_key_enabled);
-
-    // TODO: Verify that masking off the top byte is correct.
-    // SeaBlade sets a color key of 0x80000000 but the texture passed into the
-    // shader is cleared to 0 alpha.
-    unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF;
-    glUniform4f(d->pgraph.disp_rndr.pvideo_color_key_loc,
-                GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0,
-                GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0,
-                GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0,
-                GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0);
-
-    assert(offset + in_pitch * in_height <= limit);
-    hwaddr end = base + offset + in_pitch * in_height;
-    assert(end <= memory_region_size(d->vram));
-
-    pgraph_apply_scaling_factor(pg, &out_x, &out_y);
-    pgraph_apply_scaling_factor(pg, &out_width, &out_height);
-
-    // Translate for the GL viewport origin.
-    out_y = MAX(pg->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0);
-
-    glActiveTexture(GL_TEXTURE0 + 1);
-    glBindTexture(GL_TEXTURE_2D, g_nv2a->pgraph.disp_rndr.pvideo_tex);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
-    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-    uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8(
-        d->vram_ptr + base + offset, in_width, in_height, in_pitch);
-    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA,
-                 GL_UNSIGNED_BYTE, tex_rgba);
-    g_free(tex_rgba);
-    glUniform1i(d->pgraph.disp_rndr.pvideo_tex_loc, 1);
-    glUniform2f(d->pgraph.disp_rndr.pvideo_in_pos_loc, in_s, in_t);
-    glUniform4f(d->pgraph.disp_rndr.pvideo_pos_loc,
-                out_x, out_y, out_width, out_height);
-    glUniform3f(d->pgraph.disp_rndr.pvideo_scale_loc,
-                scale_x, scale_y, 1.0f / pg->surface_scale_factor);
-}
-
-static void pgraph_render_display(NV2AState *d, SurfaceBinding *surface)
-{
-    struct PGRAPHState *pg = &d->pgraph;
-
-    unsigned int width, height;
-    uint32_t pline_offset, pstart_addr, pline_compare;
-    d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height);
-    d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
-    int line_offset = surface->pitch / pline_offset;
-
-    /* Adjust viewport height for interlaced mode, used only in 1080i */
-    if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
-        height *= 2;
-    }
-
-    pgraph_apply_scaling_factor(pg, &width, &height);
-
-    glBindFramebuffer(GL_FRAMEBUFFER, d->pgraph.disp_rndr.fbo);
-    glActiveTexture(GL_TEXTURE0);
-    glBindTexture(GL_TEXTURE_2D, pg->gl_display_buffer);
-    bool recreate = (
-        surface->fmt.gl_internal_format != pg->gl_display_buffer_internal_format
-        || width != pg->gl_display_buffer_width
-        || height != pg->gl_display_buffer_height
-        || surface->fmt.gl_format != pg->gl_display_buffer_format
-        || surface->fmt.gl_type != pg->gl_display_buffer_type
-        );
-
-    if (recreate) {
-        /* XXX: There's apparently a bug in some Intel OpenGL drivers for
-         * Windows that will leak this texture when its orphaned after use in
-         * another context, apparently regardless of which thread it's created
-         * or released on.
-         *
-         * Driver: 27.20.100.8729 9/11/2020 W10 x64
-         * Track: https://community.intel.com/t5/Graphics/OpenGL-Windows-drivers-for-Intel-HD-630-leaking-GPU-memory-when/td-p/1274423
-         */
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
-        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-        pg->gl_display_buffer_internal_format = surface->fmt.gl_internal_format;
-        pg->gl_display_buffer_width = width;
-        pg->gl_display_buffer_height = height;
-        pg->gl_display_buffer_format = surface->fmt.gl_format;
-        pg->gl_display_buffer_type = surface->fmt.gl_type;
-        glTexImage2D(GL_TEXTURE_2D, 0,
-            pg->gl_display_buffer_internal_format,
-            pg->gl_display_buffer_width,
-            pg->gl_display_buffer_height,
-            0,
-            pg->gl_display_buffer_format,
-            pg->gl_display_buffer_type,
-            NULL);
-    }
-
-    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
-        GL_TEXTURE_2D, pg->gl_display_buffer, 0);
-    GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
-    glDrawBuffers(1, DrawBuffers);
-    assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
-
-    glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
-    glBindVertexArray(pg->disp_rndr.vao);
-    glBindBuffer(GL_ARRAY_BUFFER, pg->disp_rndr.vbo);
-    glUseProgram(pg->disp_rndr.prog);
-    glProgramUniform1i(pg->disp_rndr.prog, pg->disp_rndr.tex_loc, 0);
-    glUniform2f(d->pgraph.disp_rndr.display_size_loc, width, height);
-    glUniform1f(d->pgraph.disp_rndr.line_offset_loc, line_offset);
-    pgraph_render_display_pvideo_overlay(d);
-
-    glViewport(0, 0, width, height);
-    glColorMask(true, true, true, true);
-    glDisable(GL_SCISSOR_TEST);
-    glDisable(GL_BLEND);
-    glDisable(GL_STENCIL_TEST);
-    glDisable(GL_CULL_FACE);
-    glDisable(GL_DEPTH_TEST);
-    glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
-    glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
-    glClear(GL_COLOR_BUFFER_BIT);
-    glDrawArrays(GL_TRIANGLES, 0, 3);
-
-    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
-        GL_TEXTURE_2D, 0, 0);
-}
-
-void pgraph_gl_sync(NV2AState *d)
-{
-    uint32_t pline_offset, pstart_addr, pline_compare;
-    d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
-    SurfaceBinding *surface = pgraph_surface_get_within(d, d->pcrtc.start + pline_offset);
-    if (surface == NULL) {
-        qemu_event_set(&d->pgraph.gl_sync_complete);
-        return;
-    }
-
-    /* FIXME: Sanity check surface dimensions */
-
-    /* Wait for queued commands to complete */
-    pgraph_upload_surface_data(d, surface, !tcg_enabled());
-    pgraph_gl_fence();
-    assert(glGetError() == GL_NO_ERROR);
-
-    /* Render framebuffer in display context */
-    glo_set_current(g_nv2a_context_display);
-    pgraph_render_display(d, surface);
-    pgraph_gl_fence();
-    assert(glGetError() == GL_NO_ERROR);
-
-    /* Switch back to original context */
-    glo_set_current(g_nv2a_context_render);
-
-    qatomic_set(&d->pgraph.gl_sync_pending, false);
-    qemu_event_set(&d->pgraph.gl_sync_complete);
-}
-
-const uint8_t *nv2a_get_dac_palette(void)
-{
-    return g_nv2a->puserdac.palette;
-}
-
-int nv2a_get_screen_off(void)
-{
-    return g_nv2a->vga.sr[VGA_SEQ_CLOCK_MODE] & VGA_SR01_SCREEN_OFF;
-}
-
-int nv2a_get_framebuffer_surface(void)
-{
-    NV2AState *d = g_nv2a;
-    PGRAPHState *pg = &d->pgraph;
-
-    qemu_mutex_lock(&d->pfifo.lock);
-    // FIXME: Possible race condition with pgraph, consider lock
-    uint32_t pline_offset, pstart_addr, pline_compare;
-    d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
-    SurfaceBinding *surface = pgraph_surface_get_within(d, d->pcrtc.start + pline_offset);
-    if (surface == NULL || !surface->color) {
-        qemu_mutex_unlock(&d->pfifo.lock);
-        return 0;
-    }
-
-    assert(surface->color);
-    assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0);
-    assert(surface->fmt.gl_format == GL_RGBA
-        || surface->fmt.gl_format == GL_RGB
-        || surface->fmt.gl_format == GL_BGR
-        || surface->fmt.gl_format == GL_BGRA
-        );
-
-    surface->frame_time = pg->frame_time;
-    qemu_event_reset(&d->pgraph.gl_sync_complete);
-    qatomic_set(&pg->gl_sync_pending, true);
-    pfifo_kick(d);
-    qemu_mutex_unlock(&d->pfifo.lock);
-    qemu_event_wait(&d->pgraph.gl_sync_complete);
-
-    return pg->gl_display_buffer;
-}
-
-static bool pgraph_check_surface_to_texture_compatibility(
-    const SurfaceBinding *surface,
-    const TextureShape *shape)
-{
-    // FIXME: Better checks/handling on formats and surface-texture compat
-
-    if ((!surface->swizzle && surface->pitch != shape->pitch) ||
-        surface->width != shape->width ||
-        surface->height != shape->height) {
-        return false;
-    }
-
-    int surface_fmt = surface->shape.color_format;
-    int texture_fmt = shape->color_format;
-
-    if (!surface->color) {
-        // FIXME: Support zeta to color
-        return false;
-    }
-
-    if (shape->cubemap) {
-        // FIXME: Support rendering surface to cubemap face
-        return false;
-    }
-
-    if (shape->levels > 1) {
-        // FIXME: Support rendering surface to mip levels
-        return false;
-    }
-
-    switch (surface_fmt) {
-    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) {
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true;
-        default: break;
-        }
-        break;
-    case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) {
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true;
-        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true;
-        default: break;
-        }
-        break;
-    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) {
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true;
-        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true;
-        default: break;
-        }
-        break;
-    case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) {
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true;
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true;
-        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true;
-        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true;
-        default: break;
-        }
-        break;
-    default:
-        break;
-    }
-
-    trace_nv2a_pgraph_surface_texture_compat_failed(
-        surface_fmt, texture_fmt);
-    return false;
-}
-
-static void pgraph_wait_for_surface_download(SurfaceBinding *e)
-{
-    NV2AState *d = g_nv2a;
-
-    if (qatomic_read(&e->draw_dirty)) {
-        qemu_mutex_lock(&d->pfifo.lock);
-        qemu_event_reset(&d->pgraph.downloads_complete);
-        qatomic_set(&e->download_pending, true);
-        qatomic_set(&d->pgraph.downloads_pending, true);
-        pfifo_kick(d);
-        qemu_mutex_unlock(&d->pfifo.lock);
-        qemu_event_wait(&d->pgraph.downloads_complete);
-    }
-}
-
-static void pgraph_surface_access_callback(
-    void *opaque,
-    MemoryRegion *mr,
-    hwaddr addr,
-    hwaddr len,
-    bool write)
-{
-    SurfaceBinding *e = opaque;
-    assert(addr >= e->vram_addr);
-    hwaddr offset = addr - e->vram_addr;
-    assert(offset < e->size);
-
-    if (qatomic_read(&e->draw_dirty)) {
-        trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
-        pgraph_wait_for_surface_download(e);
-    }
-
-    if (write && !qatomic_read(&e->upload_pending)) {
-        trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
-        qatomic_set(&e->upload_pending, true);
-    }
-}
-
-static SurfaceBinding *pgraph_surface_put(NV2AState *d,
-    hwaddr addr,
-    SurfaceBinding *surface_in)
-{
-    assert(pgraph_surface_get(d, addr) == NULL);
-
-    SurfaceBinding *surface, *next;
-    uintptr_t e_end = surface_in->vram_addr + surface_in->size - 1;
-    QTAILQ_FOREACH_SAFE(surface, &d->pgraph.surfaces, entry, next) {
-        uintptr_t s_end = surface->vram_addr + surface->size - 1;
-        bool overlapping = !(surface->vram_addr > e_end
-                             || surface_in->vram_addr > s_end);
-        if (overlapping) {
-            trace_nv2a_pgraph_surface_evict_overlapping(
-                surface->vram_addr, surface->width, surface->height,
-                surface->pitch);
-            pgraph_download_surface_data_if_dirty(d, surface);
-            pgraph_surface_invalidate(d, surface);
-        }
-    }
-
-    SurfaceBinding *surface_out = g_malloc(sizeof(SurfaceBinding));
-    assert(surface_out != NULL);
-    *surface_out = *surface_in;
-
-    if (tcg_enabled()) {
-        qemu_mutex_unlock(&d->pgraph.lock);
-        qemu_mutex_lock_iothread();
-        mem_access_callback_insert(qemu_get_cpu(0),
-            d->vram, surface_out->vram_addr, surface_out->size,
-            &surface_out->access_cb, &pgraph_surface_access_callback,
-            surface_out);
-        qemu_mutex_unlock_iothread();
-        qemu_mutex_lock(&d->pgraph.lock);
-    }
-
-    QTAILQ_INSERT_TAIL(&d->pgraph.surfaces, surface_out, entry);
-
-    return surface_out;
-}
-
-static SurfaceBinding *pgraph_surface_get(NV2AState *d, hwaddr addr)
-{
-    SurfaceBinding *surface;
-    QTAILQ_FOREACH (surface, &d->pgraph.surfaces, entry) {
-        if (surface->vram_addr == addr) {
-            return surface;
-        }
-    }
-
-    return NULL;
-}
-
-static SurfaceBinding *pgraph_surface_get_within(NV2AState *d, hwaddr addr)
-{
-    SurfaceBinding *surface;
-    QTAILQ_FOREACH (surface, &d->pgraph.surfaces, entry) {
-        if (addr >= surface->vram_addr &&
-            addr < (surface->vram_addr + surface->size)) {
-            return surface;
-        }
-    }
-
-    return NULL;
-}
-
-static void pgraph_surface_invalidate(NV2AState *d, SurfaceBinding *surface)
-{
-    trace_nv2a_pgraph_surface_invalidated(surface->vram_addr);
-
-    if (surface == d->pgraph.color_binding) {
-        assert(d->pgraph.surface_color.buffer_dirty);
-        pgraph_unbind_surface(d, true);
-    }
-    if (surface == d->pgraph.zeta_binding) {
-        assert(d->pgraph.surface_zeta.buffer_dirty);
-        pgraph_unbind_surface(d, false);
-    }
-
-    if (tcg_enabled()) {
-        qemu_mutex_unlock(&d->pgraph.lock);
-        qemu_mutex_lock_iothread();
-        mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb);
-        qemu_mutex_unlock_iothread();
-        qemu_mutex_lock(&d->pgraph.lock);
-    }
-
-    glDeleteTextures(1, &surface->gl_buffer);
-
-    QTAILQ_REMOVE(&d->pgraph.surfaces, surface, entry);
-    g_free(surface);
-}
-
-static void pgraph_surface_evict_old(NV2AState *d)
-{
-    const int surface_age_limit = 5;
-
-    SurfaceBinding *s, *next;
-    QTAILQ_FOREACH_SAFE(s, &d->pgraph.surfaces, entry, next) {
-        int last_used = d->pgraph.frame_time - s->frame_time;
-        if (last_used >= surface_age_limit) {
-            trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr);
-            pgraph_download_surface_data_if_dirty(d, s);
-            pgraph_surface_invalidate(d, s);
-        }
-    }
-}
-
-static bool pgraph_check_surface_compatibility(SurfaceBinding *s1,
-                                               SurfaceBinding *s2, bool strict)
-{
-    bool format_compatible =
-        (s1->color == s2->color) &&
-        (s1->fmt.gl_attachment == s2->fmt.gl_attachment) &&
-        (s1->fmt.gl_internal_format == s2->fmt.gl_internal_format) &&
-        (s1->pitch == s2->pitch) &&
-        (s1->shape.clip_x <= s2->shape.clip_x) &&
-        (s1->shape.clip_y <= s2->shape.clip_y);
-    if (!format_compatible) {
-        return false;
-    }
-
-    if (!strict) {
-        return (s1->width >= s2->width) && (s1->height >= s2->height);
-    } else {
-        return (s1->width == s2->width) && (s1->height == s2->height);
-    }
-}
-
-static void pgraph_download_surface_data_if_dirty(NV2AState *d,
-    SurfaceBinding *surface)
-{
-    if (surface->draw_dirty) {
-        pgraph_download_surface_data(d, surface, true);
-    }
-}
-
-static void pgraph_bind_current_surface(NV2AState *d)
-{
-    PGRAPHState *pg = &d->pgraph;
-
-    if (pg->color_binding) {
-        glFramebufferTexture2D(GL_FRAMEBUFFER, pg->color_binding->fmt.gl_attachment,
-                               GL_TEXTURE_2D, pg->color_binding->gl_buffer, 0);
-    }
-
-    if (pg->zeta_binding) {
-        glFramebufferTexture2D(GL_FRAMEBUFFER, pg->zeta_binding->fmt.gl_attachment,
-                               GL_TEXTURE_2D, pg->zeta_binding->gl_buffer, 0);
-    }
-
-    if (pg->color_binding || pg->zeta_binding) {
-        assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) ==
-               GL_FRAMEBUFFER_COMPLETE);
-    }
-}
-
-static void surface_copy_shrink_row(uint8_t *out, uint8_t *in,
-                                    unsigned int width,
-                                    unsigned int bytes_per_pixel,
-                                    unsigned int factor)
-{
-    if (bytes_per_pixel == 4) {
-        for (unsigned int x = 0; x < width; x++) {
-            *(uint32_t *)out = *(uint32_t *)in;
-            out += 4;
-            in += 4 * factor;
-        }
-    } else if (bytes_per_pixel == 2) {
-        for (unsigned int x = 0; x < width; x++) {
-            *(uint16_t *)out = *(uint16_t *)in;
-            out += 2;
-            in += 2 * factor;
-        }
-    } else {
-        for (unsigned int x = 0; x < width; x++) {
-            memcpy(out, in, bytes_per_pixel);
-            out += bytes_per_pixel;
-            in += bytes_per_pixel * factor;
-        }
-    }
-}
-
-
-static void pgraph_download_surface_data_to_buffer(NV2AState *d,
-                                                   SurfaceBinding *surface,
-                                                   bool swizzle, bool flip,
-                                                   bool downscale,
-                                                   uint8_t *pixels)
-{
-    PGRAPHState *pg = &d->pgraph;
-    swizzle &= surface->swizzle;
-    downscale &= (pg->surface_scale_factor != 1);
-
-    trace_nv2a_pgraph_surface_download(
-        surface->color ? "COLOR" : "ZETA",
-        surface->swizzle ? "sz" : "lin", surface->vram_addr,
-        surface->width, surface->height, surface->pitch,
-        surface->fmt.bytes_per_pixel);
-
-    /*  Bind destination surface to framebuffer */
-    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
-                           0, 0);
-    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
-                           0, 0);
-    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
-                           GL_TEXTURE_2D, 0, 0);
-    glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment,
-                           GL_TEXTURE_2D, surface->gl_buffer, 0);
-
-    assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
-
-    /* Read surface into memory */
-    uint8_t *gl_read_buf = pixels;
-
-    uint8_t *swizzle_buf = pixels;
-    if (swizzle) {
-        /* FIXME: Allocate big buffer up front and re-alloc if necessary.
-         * FIXME: Consider swizzle in shader
-         */
-        assert(pg->surface_scale_factor == 1 || downscale);
-        swizzle_buf = (uint8_t *)g_malloc(surface->size);
-        gl_read_buf = swizzle_buf;
-    }
-
-    if (downscale) {
-        pg->scale_buf = (uint8_t *)g_realloc(
-            pg->scale_buf, pg->surface_scale_factor * pg->surface_scale_factor *
-                               surface->size);
-        gl_read_buf = pg->scale_buf;
-    }
-
-    glo_readpixels(
-        surface->fmt.gl_format, surface->fmt.gl_type, surface->fmt.bytes_per_pixel,
-        pg->surface_scale_factor * surface->pitch,
-        pg->surface_scale_factor * surface->width,
-        pg->surface_scale_factor * surface->height, flip, gl_read_buf);
-
-    /* FIXME: Replace this with a hw accelerated version */
-    if (downscale) {
-        assert(surface->pitch >= (surface->width * surface->fmt.bytes_per_pixel));
-        uint8_t *out = swizzle_buf, *in = pg->scale_buf;
-        for (unsigned int y = 0; y < surface->height; y++) {
-            surface_copy_shrink_row(out, in, surface->width,
-                                    surface->fmt.bytes_per_pixel,
-                                    pg->surface_scale_factor);
-            in += surface->pitch * pg->surface_scale_factor *
-                  pg->surface_scale_factor;
-            out += surface->pitch;
-        }
-    }
-
-    if (swizzle) {
-        swizzle_rect(swizzle_buf, surface->width, surface->height, pixels,
-                     surface->pitch, surface->fmt.bytes_per_pixel);
-        g_free(swizzle_buf);
-    }
-
-    /* Re-bind original framebuffer target */
-    glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment,
-                           GL_TEXTURE_2D, 0, 0);
-    pgraph_bind_current_surface(d);
-}
-
-static void pgraph_download_surface_data(NV2AState *d, SurfaceBinding *surface,
-    bool force)
-{
-    if (!(surface->download_pending || force)) {
-        return;
-    }
-
-    /* FIXME: Respect write enable at last TOU? */
-
-    nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD);
-
-    pgraph_download_surface_data_to_buffer(
-        d, surface, true, true, true, d->vram_ptr + surface->vram_addr);
-
-    memory_region_set_client_dirty(d->vram, surface->vram_addr,
-                                   surface->pitch * surface->height,
-                                   DIRTY_MEMORY_VGA);
-    memory_region_set_client_dirty(d->vram, surface->vram_addr,
-                                   surface->pitch * surface->height,
-                                   DIRTY_MEMORY_NV2A_TEX);
-
-    surface->download_pending = false;
-    surface->draw_dirty = false;
-}
-
-void pgraph_process_pending_downloads(NV2AState *d)
-{
-    SurfaceBinding *surface;
-    QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) {
-        pgraph_download_surface_data(d, surface, false);
-    }
-
-    qatomic_set(&d->pgraph.downloads_pending, false);
-    qemu_event_set(&d->pgraph.downloads_complete);
-}
-
-void pgraph_download_dirty_surfaces(NV2AState *d)
-{
-    SurfaceBinding *surface;
-    QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) {
-        pgraph_download_surface_data_if_dirty(d, surface);
-    }
-
-    qatomic_set(&d->pgraph.download_dirty_surfaces_pending, false);
-    qemu_event_set(&d->pgraph.dirty_surfaces_download_complete);
-}
-
-
-static void surface_copy_expand_row(uint8_t *out, uint8_t *in,
-                                    unsigned int width,
-                                    unsigned int bytes_per_pixel,
-                                    unsigned int factor)
-{
-    if (bytes_per_pixel == 4) {
-        for (unsigned int x = 0; x < width; x++) {
-            for (unsigned int i = 0; i < factor; i++) {
-                *(uint32_t *)out = *(uint32_t *)in;
-                out += bytes_per_pixel;
-            }
-            in += bytes_per_pixel;
-        }
-    } else if (bytes_per_pixel == 2) {
-        for (unsigned int x = 0; x < width; x++) {
-            for (unsigned int i = 0; i < factor; i++) {
-                *(uint16_t *)out = *(uint16_t *)in;
-                out += bytes_per_pixel;
-            }
-            in += bytes_per_pixel;
-        }
-    } else {
-        for (unsigned int x = 0; x < width; x++) {
-            for (unsigned int i = 0; i < factor; i++) {
-                memcpy(out, in, bytes_per_pixel);
-                out += bytes_per_pixel;
-            }
-            in += bytes_per_pixel;
-        }
-    }
-}
-
-static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width,
-                                unsigned int height,
-                                unsigned int bytes_per_pixel,
-                                unsigned int factor)
-{
-    size_t out_pitch = width * bytes_per_pixel * factor;
-
-    for (unsigned int y = 0; y < height; y++) {
-        surface_copy_expand_row(out, in, width, bytes_per_pixel, factor);
-        uint8_t *row_in = out;
-        for (unsigned int i = 1; i < factor; i++) {
-            out += out_pitch;
-            memcpy(out, row_in, out_pitch);
-        }
-        in += width * bytes_per_pixel;
-        out += out_pitch;
-    }
-}
-
-static void pgraph_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
-                                       bool force)
-{
-    if (!(surface->upload_pending || force)) {
-        return;
-    }
-
-    nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD);
-
-    trace_nv2a_pgraph_surface_upload(
-                 surface->color ? "COLOR" : "ZETA",
-                 surface->swizzle ? "sz" : "lin", surface->vram_addr,
-                 surface->width, surface->height, surface->pitch,
-                 surface->fmt.bytes_per_pixel);
-
-    PGRAPHState *pg = &d->pgraph;
-
-    surface->upload_pending = false;
-    surface->draw_time = pg->draw_time;
-
-    // FIXME: Don't query GL for texture binding
-    GLint last_texture_binding;
-    glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture_binding);
-
-    // FIXME: Replace with FBO to not disturb current state
-    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
-                           0, 0);
-    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
-                           0, 0);
-    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
-                           GL_TEXTURE_2D, 0, 0);
-
-    uint8_t *data = d->vram_ptr;
-    uint8_t *buf = data + surface->vram_addr;
-
-    if (surface->swizzle) {
-        buf = (uint8_t*)g_malloc(surface->size);
-        unswizzle_rect(data + surface->vram_addr,
-                       surface->width, surface->height,
-                       buf,
-                       surface->pitch,
-                       surface->fmt.bytes_per_pixel);
-    }
-
-    /* FIXME: Replace this flip/scaling */
-
-    // This is VRAM so we can't do this inplace!
-    uint8_t *flipped_buf = (uint8_t *)g_malloc(
-        surface->height * surface->width * surface->fmt.bytes_per_pixel);
-    unsigned int irow;
-    for (irow = 0; irow < surface->height; irow++) {
-        memcpy(&flipped_buf[surface->width * (surface->height - irow - 1)
-                                 * surface->fmt.bytes_per_pixel],
-               &buf[surface->pitch * irow],
-               surface->width * surface->fmt.bytes_per_pixel);
-    }
-
-    uint8_t *gl_read_buf = flipped_buf;
-    unsigned int width = surface->width, height = surface->height;
-
-    if (pg->surface_scale_factor > 1) {
-        pgraph_apply_scaling_factor(pg, &width, &height);
-        pg->scale_buf = (uint8_t *)g_realloc(
-            pg->scale_buf, width * height * surface->fmt.bytes_per_pixel);
-        gl_read_buf = pg->scale_buf;
-        uint8_t *out = gl_read_buf, *in = flipped_buf;
-        surface_copy_expand(out, in, surface->width, surface->height,
-                            surface->fmt.bytes_per_pixel,
-                            d->pgraph.surface_scale_factor);
-    }
-
-    int prev_unpack_alignment;
-    glGetIntegerv(GL_UNPACK_ALIGNMENT, &prev_unpack_alignment);
-    if (unlikely((width * surface->fmt.bytes_per_pixel) % 4 != 0)) {
-        glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
-    } else {
-        glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
-    }
-
-    glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
-    glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width,
-                 height, 0, surface->fmt.gl_format, surface->fmt.gl_type,
-                 gl_read_buf);
-    glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment);
-    g_free(flipped_buf);
-    if (surface->swizzle) {
-        g_free(buf);
-    }
-
-    // Rebind previous framebuffer binding
-    glBindTexture(GL_TEXTURE_2D, last_texture_binding);
-
-    pgraph_bind_current_surface(d);
-}
-
-static void pgraph_compare_surfaces(SurfaceBinding *s1, SurfaceBinding *s2)
-{
-    #define DO_CMP(fld) \
-        if (s1->fld != s2->fld) \
-            trace_nv2a_pgraph_surface_compare_mismatch( \
-                #fld, (long int)s1->fld, (long int)s2->fld);
-    DO_CMP(shape.clip_x)
-    DO_CMP(shape.clip_width)
-    DO_CMP(shape.clip_y)
-    DO_CMP(shape.clip_height)
-    DO_CMP(gl_buffer)
-    DO_CMP(fmt.bytes_per_pixel)
-    DO_CMP(fmt.gl_attachment)
-    DO_CMP(fmt.gl_internal_format)
-    DO_CMP(fmt.gl_format)
-    DO_CMP(fmt.gl_type)
-    DO_CMP(color)
-    DO_CMP(swizzle)
-    DO_CMP(vram_addr)
-    DO_CMP(width)
-    DO_CMP(height)
-    DO_CMP(pitch)
-    DO_CMP(size)
-    DO_CMP(dma_addr)
-    DO_CMP(dma_len)
-    DO_CMP(frame_time)
-    DO_CMP(draw_time)
-    #undef DO_CMP
-}
-
-static void pgraph_populate_surface_binding_entry_sized(NV2AState *d,
-                                                        bool color,
-                                                        unsigned int width,
-                                                        unsigned int height,
-                                                        SurfaceBinding *entry)
-{
-    PGRAPHState *pg = &d->pgraph;
-    Surface *surface;
-    hwaddr dma_address;
-    SurfaceFormatInfo fmt;
-
-    if (color) {
-        surface = &pg->surface_color;
-        dma_address = pg->dma_color;
-        assert(pg->surface_shape.color_format != 0);
-        assert(pg->surface_shape.color_format <
-               ARRAY_SIZE(kelvin_surface_color_format_map));
-        fmt = kelvin_surface_color_format_map[pg->surface_shape.color_format];
-        if (fmt.bytes_per_pixel == 0) {
-            fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n",
-                    pg->surface_shape.color_format);
-            abort();
-        }
-    } else {
-        surface = &pg->surface_zeta;
-        dma_address = pg->dma_zeta;
-        assert(pg->surface_shape.zeta_format != 0);
-        assert(pg->surface_shape.zeta_format <
-               ARRAY_SIZE(kelvin_surface_zeta_float_format_map));
-        const SurfaceFormatInfo *map =
-            pg->surface_shape.z_format ? kelvin_surface_zeta_float_format_map :
-                                         kelvin_surface_zeta_fixed_format_map;
-        fmt = map[pg->surface_shape.zeta_format];
-    }
-
-    DMAObject dma = nv_dma_load(d, dma_address);
-    /* There's a bunch of bugs that could cause us to hit this function
-     * at the wrong time and get a invalid dma object.
-     * Check that it's sane. */
-    assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS);
-    // assert(dma.address + surface->offset != 0);
-    assert(surface->offset <= dma.limit);
-    assert(surface->offset + surface->pitch * height <= dma.limit + 1);
-    assert(surface->pitch % fmt.bytes_per_pixel == 0);
-    assert((dma.address & ~0x07FFFFFF) == 0);
-
-    entry->shape = (color || !pg->color_binding) ? pg->surface_shape :
-                                                   pg->color_binding->shape;
-    entry->gl_buffer = 0;
-    entry->fmt = fmt;
-    entry->color = color;
-    entry->swizzle =
-        (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
-    entry->vram_addr = dma.address + surface->offset;
-    entry->width = width;
-    entry->height = height;
-    entry->pitch = surface->pitch;
-    entry->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel);
-    entry->upload_pending = true;
-    entry->download_pending = false;
-    entry->draw_dirty = false;
-    entry->dma_addr = dma.address;
-    entry->dma_len = dma.limit;
-    entry->frame_time = pg->frame_time;
-    entry->draw_time = pg->draw_time;
-    entry->cleared = false;
-}
-
-static void pgraph_populate_surface_binding_entry(NV2AState *d, bool color,
-                                                  SurfaceBinding *entry)
-{
-    PGRAPHState *pg = &d->pgraph;
-    unsigned int width, height;
-
-    if (color || !pg->color_binding) {
-        pgraph_get_surface_dimensions(pg, &width, &height);
-        pgraph_apply_anti_aliasing_factor(pg, &width, &height);
-
-        /* Since we determine surface dimensions based on the clipping
-         * rectangle, make sure to include the surface offset as well.
-         */
-        if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) {
-            width += pg->surface_shape.clip_x;
-            height += pg->surface_shape.clip_y;
-        }
-    } else {
-        width = pg->color_binding->width;
-        height = pg->color_binding->height;
-    }
-
-    pgraph_populate_surface_binding_entry_sized(d, color, width, height, entry);
-}
-
-static void pgraph_update_surface_part(NV2AState *d, bool upload, bool color)
-{
-    PGRAPHState *pg = &d->pgraph;
-
-    SurfaceBinding entry;
-    pgraph_populate_surface_binding_entry(d, color, &entry);
-
-    Surface *surface = color ? &pg->surface_color : &pg->surface_zeta;
-
-    bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty(
-                                           d->vram, entry.vram_addr, entry.size,
-                                           DIRTY_MEMORY_NV2A);
-
-    if (upload && (surface->buffer_dirty || mem_dirty)) {
-        pgraph_unbind_surface(d, color);
-
-        SurfaceBinding *found = pgraph_surface_get(d, entry.vram_addr);
-        if (found != NULL) {
-            /* FIXME: Support same color/zeta surface target? In the mean time,
-             * if the surface we just found is currently bound, just unbind it.
-             */
-            SurfaceBinding *other = (color ? pg->zeta_binding
-                                           : pg->color_binding);
-            if (found == other) {
-                NV2A_UNIMPLEMENTED("Same color & zeta surface offset");
-                pgraph_unbind_surface(d, !color);
-            }
-        }
-
-        trace_nv2a_pgraph_surface_target(
-            color ? "COLOR" : "ZETA", entry.vram_addr,
-            entry.swizzle ? "sz" : "ln",
-            pg->surface_shape.anti_aliasing,
-            pg->surface_shape.clip_x,
-            pg->surface_shape.clip_width, pg->surface_shape.clip_y,
-            pg->surface_shape.clip_height);
-
-        bool should_create = true;
-
-        if (found != NULL) {
-            bool is_compatible =
-                pgraph_check_surface_compatibility(found, &entry, false);
-
-#define TRACE_ARGS found->vram_addr, found->width, found->height, \
-            found->swizzle ? "sz" : "ln", \
-            found->shape.anti_aliasing, found->shape.clip_x, \
-            found->shape.clip_width, found->shape.clip_y, \
-            found->shape.clip_height, found->pitch
-            if (found->color) {
-                trace_nv2a_pgraph_surface_match_color(TRACE_ARGS);
-            } else {
-                trace_nv2a_pgraph_surface_match_zeta(TRACE_ARGS);
-            }
-#undef TRACE_ARGS
-
-            assert(!(entry.swizzle && pg->clearing));
-
-            if (found->swizzle != entry.swizzle) {
-                /* Clears should only be done on linear surfaces. Avoid
-                 * synchronization by allowing (1) a surface marked swizzled to
-                 * be cleared under the assumption the entire surface is
-                 * destined to be cleared and (2) a fully cleared linear surface
-                 * to be marked swizzled. Strictly match size to avoid
-                 * pathological cases.
-                 */
-                is_compatible &= (pg->clearing || found->cleared) &&
-                    pgraph_check_surface_compatibility(found, &entry, true);
-                if (is_compatible) {
-                    trace_nv2a_pgraph_surface_migrate_type(
-                        entry.swizzle ? "swizzled" : "linear");
-                }
-            }
-
-            if (is_compatible && color &&
-                !pgraph_check_surface_compatibility(found, &entry, true)) {
-                SurfaceBinding zeta_entry;
-                pgraph_populate_surface_binding_entry_sized(
-                    d, !color, found->width, found->height, &zeta_entry);
-                hwaddr color_end = found->vram_addr + found->size;
-                hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size;
-                is_compatible &= found->vram_addr >= zeta_end ||
-                                 zeta_entry.vram_addr >= color_end;
-            }
-
-            if (is_compatible && !color && pg->color_binding) {
-                is_compatible &= (found->width == pg->color_binding->width) &&
-                                 (found->height == pg->color_binding->height);
-            }
-
-            if (is_compatible) {
-                /* FIXME: Refactor */
-                pg->surface_binding_dim.width = found->width;
-                pg->surface_binding_dim.clip_x = found->shape.clip_x;
-                pg->surface_binding_dim.clip_width = found->shape.clip_width;
-                pg->surface_binding_dim.height = found->height;
-                pg->surface_binding_dim.clip_y = found->shape.clip_y;
-                pg->surface_binding_dim.clip_height = found->shape.clip_height;
-                found->upload_pending |= mem_dirty;
-                pg->surface_zeta.buffer_dirty |= color;
-                should_create = false;
-            } else {
-                trace_nv2a_pgraph_surface_evict_reason(
-                    "incompatible", found->vram_addr);
-                pgraph_compare_surfaces(found, &entry);
-                pgraph_download_surface_data_if_dirty(d, found);
-                pgraph_surface_invalidate(d, found);
-            }
-        }
-
-        if (should_create) {
-            glGenTextures(1, &entry.gl_buffer);
-            glBindTexture(GL_TEXTURE_2D, entry.gl_buffer);
-            NV2A_GL_DLABEL(GL_TEXTURE, entry.gl_buffer,
-                           "%s format: %0X, width: %d, height: %d "
-                           "(addr %" HWADDR_PRIx ")",
-                           color ? "color" : "zeta",
-                           color ? pg->surface_shape.color_format
-                                 : pg->surface_shape.zeta_format,
-                           entry.width, entry.height, surface->offset);
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
-            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
-            unsigned int width = entry.width, height = entry.height;
-            pgraph_apply_scaling_factor(pg, &width, &height);
-            glTexImage2D(GL_TEXTURE_2D, 0, entry.fmt.gl_internal_format, width,
-                         height, 0, entry.fmt.gl_format, entry.fmt.gl_type,
-                         NULL);
-            found = pgraph_surface_put(d, entry.vram_addr, &entry);
-
-            /* FIXME: Refactor */
-            pg->surface_binding_dim.width = entry.width;
-            pg->surface_binding_dim.clip_x = entry.shape.clip_x;
-            pg->surface_binding_dim.clip_width = entry.shape.clip_width;
-            pg->surface_binding_dim.height = entry.height;
-            pg->surface_binding_dim.clip_y = entry.shape.clip_y;
-            pg->surface_binding_dim.clip_height = entry.shape.clip_height;
-
-            if (color && pg->zeta_binding && (pg->zeta_binding->width != entry.width || pg->zeta_binding->height != entry.height)) {
-                pg->surface_zeta.buffer_dirty = true;
-            }
-        }
-
-#define TRACE_ARGS found->vram_addr, found->width, found->height, \
-                   found->swizzle ? "sz" : "ln", found->shape.anti_aliasing, \
-                   found->shape.clip_x, found->shape.clip_width, \
-                   found->shape.clip_y, found->shape.clip_height, found->pitch
-
-        if (color) {
-            if (should_create) {
-                trace_nv2a_pgraph_surface_create_color(TRACE_ARGS);
-            } else {
-                trace_nv2a_pgraph_surface_hit_color(TRACE_ARGS);
-            }
-
-            pg->color_binding = found;
-        } else {
-            if (should_create) {
-                trace_nv2a_pgraph_surface_create_zeta(TRACE_ARGS);
-            } else {
-                trace_nv2a_pgraph_surface_hit_zeta(TRACE_ARGS);
-            }
-            pg->zeta_binding = found;
-        }
-#undef TRACE_ARGS
-
-        glFramebufferTexture2D(GL_FRAMEBUFFER, entry.fmt.gl_attachment,
-                               GL_TEXTURE_2D, found->gl_buffer, 0);
-        assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) ==
-               GL_FRAMEBUFFER_COMPLETE);
-
-        surface->buffer_dirty = false;
-    }
-
-    if (!upload && surface->draw_dirty) {
-        if (!tcg_enabled()) {
-            /* FIXME: Cannot monitor for reads/writes; flush now */
-            pgraph_download_surface_data(d,
-                color ? pg->color_binding : pg->zeta_binding, true);
-        }
-
-        surface->write_enabled_cache = false;
-        surface->draw_dirty = false;
-    }
-}
-
-static void pgraph_unbind_surface(NV2AState *d, bool color)
-{
-    PGRAPHState *pg = &d->pgraph;
-
-    if (color) {
-        if (pg->color_binding) {
-            glFramebufferTexture2D(GL_FRAMEBUFFER,
-                                   GL_COLOR_ATTACHMENT0,
-                                   GL_TEXTURE_2D, 0, 0);
-            pg->color_binding = NULL;
-        }
-    } else {
-        if (pg->zeta_binding) {
-            glFramebufferTexture2D(GL_FRAMEBUFFER,
-                                   GL_DEPTH_ATTACHMENT,
-                                   GL_TEXTURE_2D, 0, 0);
-            glFramebufferTexture2D(GL_FRAMEBUFFER,
-                                   GL_DEPTH_STENCIL_ATTACHMENT,
-                                   GL_TEXTURE_2D, 0, 0);
-            pg->zeta_binding = NULL;
-        }
-    }
-}
-
-static void pgraph_update_surface(NV2AState *d, bool upload,
-                                  bool color_write, bool zeta_write)
-{
-    PGRAPHState *pg = &d->pgraph;
-
-    pg->surface_shape.z_format = GET_MASK(pg->regs[NV_PGRAPH_SETUPRASTER],
-                                          NV_PGRAPH_SETUPRASTER_Z_FORMAT);
-
-    color_write = color_write &&
-            (pg->clearing || pgraph_color_write_enabled(pg));
-    zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg));
-
-    if (upload) {
-        bool fb_dirty = pgraph_framebuffer_dirty(pg);
-        if (fb_dirty) {
-            memcpy(&pg->last_surface_shape, &pg->surface_shape,
-                   sizeof(SurfaceShape));
-            pg->surface_color.buffer_dirty = true;
-            pg->surface_zeta.buffer_dirty = true;
-        }
-
-        if (pg->surface_color.buffer_dirty) {
-            pgraph_unbind_surface(d, true);
-        }
-
-        if (color_write) {
-            pgraph_update_surface_part(d, true, true);
-        }
-
-        if (pg->surface_zeta.buffer_dirty) {
-            pgraph_unbind_surface(d, false);
-        }
-
-        if (zeta_write) {
-            pgraph_update_surface_part(d, true, false);
-        }
-    } else {
-        if ((color_write || pg->surface_color.write_enabled_cache)
-            && pg->surface_color.draw_dirty) {
-            pgraph_update_surface_part(d, false, true);
-        }
-        if ((zeta_write || pg->surface_zeta.write_enabled_cache)
-            && pg->surface_zeta.draw_dirty) {
-            pgraph_update_surface_part(d, false, false);
-        }
-    }
-
-    if (upload) {
-        pg->draw_time++;
-    }
-
-    bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
-
-    if (pg->color_binding) {
-        pg->color_binding->frame_time = pg->frame_time;
-        if (upload) {
-            pgraph_upload_surface_data(d, pg->color_binding, false);
-            pg->color_binding->draw_time = pg->draw_time;
-            pg->color_binding->swizzle = swizzle;
-        }
-    }
-
-    if (pg->zeta_binding) {
-        pg->zeta_binding->frame_time = pg->frame_time;
-        if (upload) {
-            pgraph_upload_surface_data(d, pg->zeta_binding, false);
-            pg->zeta_binding->draw_time = pg->draw_time;
-            pg->zeta_binding->swizzle = swizzle;
-        }
-    }
-
-    // Sanity check color and zeta dimensions match
-    if (pg->color_binding && pg->zeta_binding) {
-        assert((pg->color_binding->width == pg->zeta_binding->width)
-               && (pg->color_binding->height == pg->zeta_binding->height));
-    }
-
-    pgraph_surface_evict_old(d);
-}
-
-struct pgraph_texture_possibly_dirty_struct {
-    hwaddr addr, end;
-};
-
-static void pgraph_mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque)
-{
-    struct pgraph_texture_possibly_dirty_struct *test =
-        (struct pgraph_texture_possibly_dirty_struct *)opaque;
-
-    struct TextureLruNode *tnode = container_of(node, TextureLruNode, node);
-    if (tnode->binding == NULL || tnode->possibly_dirty) {
-        return;
-    }
-
-    uintptr_t k_tex_addr = tnode->key.texture_vram_offset;
-    uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1;
-    bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end);
-
-    if (tnode->key.palette_length > 0) {
-        uintptr_t k_pal_addr = tnode->key.palette_vram_offset;
-        uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1;
-        overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end);
-    }
-
-    tnode->possibly_dirty |= overlapping;
-}
-
-
-static void pgraph_mark_textures_possibly_dirty(NV2AState *d,
-    hwaddr addr, hwaddr size)
-{
-    hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1;
-    addr &= TARGET_PAGE_MASK;
-    assert(end <= memory_region_size(d->vram));
-
-    struct pgraph_texture_possibly_dirty_struct test = {
-        .addr = addr,
-        .end = end,
-    };
-
-    lru_visit_active(&d->pgraph.texture_cache,
-                     pgraph_mark_textures_possibly_dirty_visitor,
-                     &test);
-}
-
-static bool pgraph_check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size)
-{
-    hwaddr end = TARGET_PAGE_ALIGN(addr + size);
-    addr &= TARGET_PAGE_MASK;
-    assert(end < memory_region_size(d->vram));
-    return memory_region_test_and_clear_dirty(d->vram, addr, end - addr,
-                                              DIRTY_MEMORY_NV2A_TEX);
-}
-
-static bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage)
-{
-    assert(stage < NV2A_MAX_TEXTURES);
-    uint32_t mode = (pg->regs[NV_PGRAPH_SHADERPROG] >> (stage * 5)) & 0x1F;
-    return !!mode;
-}
-
-// Check if any of the pages spanned by the a texture are dirty.
-static bool pgraph_check_texture_possibly_dirty(NV2AState *d, hwaddr texture_vram_offset, unsigned int length, hwaddr palette_vram_offset, unsigned int palette_length)
-{
-    bool possibly_dirty = false;
-    if (pgraph_check_texture_dirty(d, texture_vram_offset, length)) {
-        possibly_dirty = true;
-        pgraph_mark_textures_possibly_dirty(d, texture_vram_offset, length);
-    }
-    if (palette_length && pgraph_check_texture_dirty(d, palette_vram_offset,
-                                                     palette_length)) {
-        possibly_dirty = true;
-        pgraph_mark_textures_possibly_dirty(d, palette_vram_offset,
-                                            palette_length);
-    }
-    return possibly_dirty;
-}
-
-static void apply_texture_parameters(TextureBinding *binding,
-                                     const ColorFormatInfo *f,
-                                     unsigned int dimensionality,
-                                     unsigned int filter,
-                                     unsigned int address,
-                                     bool is_bordered,
-                                     uint32_t border_color)
-{
-    unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
-    unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG);
-    unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU);
-    unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV);
-    unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP);
-
-    if (f->linear) {
-        /* somtimes games try to set mipmap min filters on linear textures.
-             * this could indicate a bug... */
-        switch (min_filter) {
-        case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD:
-        case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD:
-            min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0;
-            break;
-        case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD:
-        case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD:
-            min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0;
-            break;
-        }
-    }
-
-    if (min_filter != binding->min_filter) {
-        glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER,
-                        pgraph_texture_min_filter_map[min_filter]);
-        binding->min_filter = min_filter;
-    }
-    if (mag_filter != binding->mag_filter) {
-        glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER,
-                        pgraph_texture_mag_filter_map[mag_filter]);
-        binding->mag_filter = mag_filter;
-    }
-
-    /* Texture wrapping */
-    assert(addru < ARRAY_SIZE(pgraph_texture_addr_map));
-    if (addru != binding->addru) {
-        glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S,
-                        pgraph_texture_addr_map[addru]);
-        binding->addru = addru;
-    }
-    bool needs_border_color = binding->addru == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
-    if (dimensionality > 1) {
-        if (addrv != binding->addrv) {
-            assert(addrv < ARRAY_SIZE(pgraph_texture_addr_map));
-            glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T,
-                            pgraph_texture_addr_map[addrv]);
-            binding->addrv = addrv;
-        }
-        needs_border_color = needs_border_color || binding->addrv == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
-    }
-    if (dimensionality > 2) {
-        if (addrp != binding->addrp) {
-            assert(addrp < ARRAY_SIZE(pgraph_texture_addr_map));
-            glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R,
-                            pgraph_texture_addr_map[addrp]);
-            binding->addrp = addrp;
-        }
-        needs_border_color = needs_border_color || binding->addrp == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
-    }
-
-    if (!is_bordered && needs_border_color) {
-        if (!binding->border_color_set || binding->border_color != border_color) {
-            GLfloat gl_border_color[] = {
-                /* FIXME: Color channels might be wrong order */
-                ((border_color >> 16) & 0xFF) / 255.0f, /* red */
-                ((border_color >> 8) & 0xFF) / 255.0f, /* green */
-                (border_color & 0xFF) / 255.0f, /* blue */
-                ((border_color >> 24) & 0xFF) / 255.0f /* alpha */
-            };
-            glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR,
-                             gl_border_color);
-
-            binding->border_color_set = true;
-            binding->border_color = border_color;
-        }
-    }
-}
-
-static void pgraph_bind_textures(NV2AState *d)
-{
-    int i;
-    PGRAPHState *pg = &d->pgraph;
-
-    NV2A_GL_DGROUP_BEGIN("%s", __func__);
-
-    for (i=0; i<NV2A_MAX_TEXTURES; i++) {
-        uint32_t ctl_0 = pg->regs[NV_PGRAPH_TEXCTL0_0 + i*4];
-        bool enabled = pgraph_is_texture_stage_active(pg, i) &&
-                       GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE);
-        /* FIXME: What happens if texture is disabled but stage is active? */
-
-        glActiveTexture(GL_TEXTURE0 + i);
-        if (!enabled) {
-            glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
-            glBindTexture(GL_TEXTURE_RECTANGLE, 0);
-            glBindTexture(GL_TEXTURE_1D, 0);
-            glBindTexture(GL_TEXTURE_2D, 0);
-            glBindTexture(GL_TEXTURE_3D, 0);
-            continue;
-        }
-
-        uint32_t ctl_1 = pg->regs[NV_PGRAPH_TEXCTL1_0 + i*4];
-        uint32_t fmt = pg->regs[NV_PGRAPH_TEXFMT0 + i*4];
-        uint32_t filter = pg->regs[NV_PGRAPH_TEXFILTER0 + i*4];
-        uint32_t address = pg->regs[NV_PGRAPH_TEXADDRESS0 + i*4];
-        uint32_t palette = pg->regs[NV_PGRAPH_TEXPALETTE0 + i*4];
-
-        unsigned int min_mipmap_level =
-            GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP);
-        unsigned int max_mipmap_level =
-            GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP);
-
-        unsigned int pitch =
-            GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH);
-
-        unsigned int dma_select =
-            GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA);
-        bool cubemap =
-            GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
-        unsigned int dimensionality =
-            GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY);
-        unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR);
-        unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS);
-        unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
-        unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
-        unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
-
-        unsigned int rect_width =
-            GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4],
-                     NV_PGRAPH_TEXIMAGERECT0_WIDTH);
-        unsigned int rect_height =
-            GET_MASK(pg->regs[NV_PGRAPH_TEXIMAGERECT0 + i*4],
-                     NV_PGRAPH_TEXIMAGERECT0_HEIGHT);
-#ifdef DEBUG_NV2A
-        unsigned int lod_bias =
-            GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS);
-#endif
-        unsigned int border_source = GET_MASK(fmt,
-                                              NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
-        uint32_t border_color = pg->regs[NV_PGRAPH_BORDERCOLOR0 + i*4];
-
-        hwaddr offset = pg->regs[NV_PGRAPH_TEXOFFSET0 + i*4];
-
-        bool palette_dma_select =
-            GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA);
-        unsigned int palette_length_index =
-            GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH);
-        unsigned int palette_offset =
-            palette & NV_PGRAPH_TEXPALETTE0_OFFSET;
-
-        unsigned int palette_length = 0;
-        switch (palette_length_index) {
-        case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break;
-        case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break;
-        case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break;
-        case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break;
-        default: assert(false); break;
-        }
-
-        /* Check for unsupported features */
-        if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED");
-        if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED");
-        if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED");
-        if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED");
-
-        nv2a_profile_inc_counter(NV2A_PROF_TEX_BIND);
-
-        hwaddr dma_len;
-        uint8_t *texture_data;
-        if (dma_select) {
-            texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len);
-        } else {
-            texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len);
-        }
-        assert(offset < dma_len);
-        texture_data += offset;
-        hwaddr texture_vram_offset = texture_data - d->vram_ptr;
-
-        hwaddr palette_dma_len;
-        uint8_t *palette_data;
-        if (palette_dma_select) {
-            palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len);
-        } else {
-            palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len);
-        }
-        assert(palette_offset < palette_dma_len);
-        palette_data += palette_offset;
-        hwaddr palette_vram_offset = palette_data - d->vram_ptr;
-
-        NV2A_DPRINTF(" texture %d is format 0x%x, "
-                        "off 0x%" HWADDR_PRIx " (r %d, %d or %d, %d, %d; %d%s),"
-                        " filter %x %x, levels %d-%d %d bias %d\n",
-                     i, color_format, offset,
-                     rect_width, rect_height,
-                     1 << log_width, 1 << log_height, 1 << log_depth,
-                     pitch,
-                     cubemap ? "; cubemap" : "",
-                     GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN),
-                     GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG),
-                     min_mipmap_level, max_mipmap_level, levels,
-                     lod_bias);
-
-        assert(color_format < ARRAY_SIZE(kelvin_color_format_map));
-        ColorFormatInfo f = kelvin_color_format_map[color_format];
-        if (f.bytes_per_pixel == 0) {
-            fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n",
-                    color_format);
-            abort();
-        }
-
-        unsigned int width, height, depth;
-        if (f.linear) {
-            assert(dimensionality == 2);
-            width = rect_width;
-            height = rect_height;
-            depth = 1;
-        } else {
-            width = 1 << log_width;
-            height = 1 << log_height;
-            depth = 1 << log_depth;
-            pitch = 0;
-
-            levels = MIN(levels, max_mipmap_level + 1);
-
-            /* Discard mipmap levels that would be smaller than 1x1.
-             * FIXME: Is this actually needed?
-             *
-             * >> Level 0: 32 x 4
-             *    Level 1: 16 x 2
-             *    Level 2: 8 x 1
-             *    Level 3: 4 x 1
-             *    Level 4: 2 x 1
-             *    Level 5: 1 x 1
-             */
-            levels = MIN(levels, MAX(log_width, log_height) + 1);
-            assert(levels > 0);
-
-            if (dimensionality == 3) {
-                /* FIXME: What about 3D mipmaps? */
-                if (log_width < 2 || log_height < 2) {
-                    /* Base level is smaller than 4x4... */
-                    levels = 1;
-                } else {
-                    levels = MIN(levels, MIN(log_width, log_height) - 1);
-                }
-            }
-            min_mipmap_level = MIN(levels-1, min_mipmap_level);
-            max_mipmap_level = MIN(levels-1, max_mipmap_level);
-        }
-
-        size_t length = 0;
-        if (f.linear) {
-            assert(cubemap == false);
-            assert(dimensionality == 2);
-            length = height * pitch;
-        } else {
-            if (dimensionality >= 2) {
-                unsigned int w = width, h = height;
-                int level;
-                if (f.gl_format != 0) {
-                    for (level = 0; level < levels; level++) {
-                        w = MAX(w, 1);
-                        h = MAX(h, 1);
-                        length += w * h * f.bytes_per_pixel;
-                        w /= 2;
-                        h /= 2;
-                    }
-                } else {
-                    /* Compressed textures are a bit different */
-                    unsigned int block_size =
-                        f.gl_internal_format ==
-                                GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ?
-                            8 : 16;
-                    for (level = 0; level < levels; level++) {
-                        w = MAX(w, 1);
-                        h = MAX(h, 1);
-                        unsigned int phys_w = (w + 3) & ~3,
-                                     phys_h = (h + 3) & ~3;
-                        length += phys_w/4 * phys_h/4 * block_size;
-                        w /= 2;
-                        h /= 2;
-                    }
-                }
-                if (cubemap) {
-                    assert(dimensionality == 2);
-                    length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
-                    length *= 6;
-                }
-                if (dimensionality >= 3) {
-                    length *= depth;
-                }
-            }
-        }
-
-        bool is_bordered = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR;
-
-        assert((texture_vram_offset + length) < memory_region_size(d->vram));
-        assert((palette_vram_offset + palette_length)
-               < memory_region_size(d->vram));
-        bool is_indexed = (color_format ==
-                NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8);
-        bool possibly_dirty = false;
-        bool possibly_dirty_checked = false;
-
-        SurfaceBinding *surface = pgraph_surface_get(d, texture_vram_offset);
-        TextureBinding *tbind = pg->texture_binding[i];
-        if (!pg->texture_dirty[i] && tbind) {
-            bool reusable = false;
-            if (surface && tbind->draw_time == surface->draw_time) {
-                reusable = true;
-            } else if (!surface) {
-                possibly_dirty = pgraph_check_texture_possibly_dirty(
-                        d,
-                        texture_vram_offset,
-                        length,
-                        palette_vram_offset,
-                        is_indexed ? palette_length : 0);
-                possibly_dirty_checked = true;
-                reusable = !possibly_dirty;
-            }
-
-            if (reusable) {
-                glBindTexture(pg->texture_binding[i]->gl_target,
-                              pg->texture_binding[i]->gl_texture);
-                apply_texture_parameters(pg->texture_binding[i],
-                                         &f,
-                                         dimensionality,
-                                         filter,
-                                         address,
-                                         is_bordered,
-                                         border_color);
-                continue;
-            }
-        }
-
-        TextureShape state;
-        memset(&state, 0, sizeof(TextureShape));
-        state.cubemap = cubemap;
-        state.dimensionality = dimensionality;
-        state.color_format = color_format;
-        state.levels = levels;
-        state.width = width;
-        state.height = height;
-        state.depth = depth;
-        state.min_mipmap_level = min_mipmap_level;
-        state.max_mipmap_level = max_mipmap_level;
-        state.pitch = pitch;
-        state.border = is_bordered;
-
-        /*
-         * Check active surfaces to see if this texture was a render target
-         */
-        bool surf_to_tex = false;
-        if (surface != NULL) {
-            surf_to_tex = pgraph_check_surface_to_texture_compatibility(
-                    surface, &state);
-
-            if (surf_to_tex && surface->upload_pending) {
-                pgraph_upload_surface_data(d, surface, false);
-            }
-        }
-
-        if (!surf_to_tex) {
-            // FIXME: Restructure to support rendering surfaces to cubemap faces
-
-            // Writeback any surfaces which this texture may index
-            hwaddr tex_vram_end = texture_vram_offset + length - 1;
-            QTAILQ_FOREACH(surface, &d->pgraph.surfaces, entry) {
-                hwaddr surf_vram_end = surface->vram_addr + surface->size - 1;
-                bool overlapping = !(surface->vram_addr >= tex_vram_end
-                                     || texture_vram_offset >= surf_vram_end);
-                if (overlapping) {
-                    pgraph_download_surface_data_if_dirty(d, surface);
-                }
-            }
-        }
-
-        TextureKey key;
-        memset(&key, 0, sizeof(TextureKey));
-        key.state = state;
-        key.texture_vram_offset = texture_vram_offset;
-        key.texture_length = length;
-        if (is_indexed) {
-            key.palette_vram_offset = palette_vram_offset;
-            key.palette_length = palette_length;
-        }
-
-        // Search for existing texture binding in cache
-        uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key));
-        LruNode *found = lru_lookup(&pg->texture_cache,
-                                     tex_binding_hash, &key);
-        TextureLruNode *key_out = container_of(found, TextureLruNode, node);
-        possibly_dirty |= (key_out->binding == NULL) || key_out->possibly_dirty;
-
-        if (!surf_to_tex && !possibly_dirty_checked) {
-            possibly_dirty |= pgraph_check_texture_possibly_dirty(
-                    d,
-                    texture_vram_offset,
-                    length,
-                    palette_vram_offset,
-                    is_indexed ? palette_length : 0);
-        }
-
-        // Calculate hash of texture data, if necessary
-        uint64_t tex_data_hash = 0;
-        if (!surf_to_tex && possibly_dirty) {
-            tex_data_hash = fast_hash(texture_data, length);
-            if (is_indexed) {
-                tex_data_hash ^= fast_hash(palette_data, palette_length);
-            }
-        }
-
-        // Free existing binding, if texture data has changed
-        bool must_destroy = (key_out->binding != NULL)
-                            && possibly_dirty
-                            && (key_out->binding->data_hash != tex_data_hash);
-        if (must_destroy) {
-            texture_binding_destroy(key_out->binding);
-            key_out->binding = NULL;
-        }
-
-        if (key_out->binding == NULL) {
-            // Must create the texture
-            key_out->binding = generate_texture(state, texture_data, palette_data);
-            key_out->binding->data_hash = tex_data_hash;
-            key_out->binding->scale = 1;
-        } else {
-            // Saved an upload! Reuse existing texture in graphics memory.
-            glBindTexture(key_out->binding->gl_target,
-                          key_out->binding->gl_texture);
-        }
-
-        key_out->possibly_dirty = false;
-        TextureBinding *binding = key_out->binding;
-        binding->refcnt++;
-
-        if (surf_to_tex && binding->draw_time < surface->draw_time) {
-
-            trace_nv2a_pgraph_surface_render_to_texture(
-                surface->vram_addr, surface->width, surface->height);
-            pgraph_render_surface_to_texture(d, surface, binding, &state, i);
-            binding->draw_time = surface->draw_time;
-            if (binding->gl_target == GL_TEXTURE_RECTANGLE) {
-                binding->scale = pg->surface_scale_factor;
-            } else {
-                binding->scale = 1;
-            }
-        }
-
-        apply_texture_parameters(binding,
-                                 &f,
-                                 dimensionality,
-                                 filter,
-                                 address,
-                                 is_bordered,
-                                 border_color);
-
-        if (pg->texture_binding[i]) {
-            if (pg->texture_binding[i]->gl_target != binding->gl_target) {
-                glBindTexture(pg->texture_binding[i]->gl_target, 0);
-            }
-            texture_binding_destroy(pg->texture_binding[i]);
-        }
-        pg->texture_binding[i] = binding;
-        pg->texture_dirty[i] = false;
-    }
-    NV2A_GL_DGROUP_END();
-}
-
-static void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg,
-                                              unsigned int *width,
-                                              unsigned int *height)
-{
-    switch (pg->surface_shape.anti_aliasing) {
-    case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1:
-        break;
-    case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2:
-        if (width) { *width *= 2; }
-        break;
-    case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4:
-        if (width) { *width *= 2; }
-        if (height) { *height *= 2; }
-        break;
-    default:
-        assert(false);
-        break;
-    }
-}
-
-static void pgraph_apply_scaling_factor(PGRAPHState *pg,
-                                        unsigned int *width,
-                                        unsigned int *height)
-{
-    *width *= pg->surface_scale_factor;
-    *height *= pg->surface_scale_factor;
-}
-
-static void pgraph_get_surface_dimensions(PGRAPHState *pg,
-                                          unsigned int *width,
-                                          unsigned int *height)
-{
-    bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
-    if (swizzle) {
-        *width = 1 << pg->surface_shape.log_width;
-        *height = 1 << pg->surface_shape.log_height;
-    } else {
-        *width = pg->surface_shape.clip_width;
-        *height = pg->surface_shape.clip_height;
-    }
-}
-
-static void pgraph_update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size,
-                                        bool quick)
-{
-    glBindBuffer(GL_ARRAY_BUFFER, d->pgraph.gl_memory_buffer);
-
-    hwaddr end = TARGET_PAGE_ALIGN(addr + size);
-    addr &= TARGET_PAGE_MASK;
-    assert(end < memory_region_size(d->vram));
-
-    static hwaddr last_addr, last_end;
-    if (quick && (addr >= last_addr) && (end <= last_end)) {
-        return;
-    }
-    last_addr = addr;
-    last_end = end;
-
-    size = end - addr;
-    if (memory_region_test_and_clear_dirty(d->vram, addr, size,
-                                           DIRTY_MEMORY_NV2A)) {
-        glBufferSubData(GL_ARRAY_BUFFER, addr, size,
-                        d->vram_ptr + addr);
-        nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1);
-    }
-}
-
-static void pgraph_update_inline_value(VertexAttribute *attr,
-                                       const uint8_t *data)
-{
-    assert(attr->count <= 4);
-    attr->inline_value[0] = 0.0f;
-    attr->inline_value[1] = 0.0f;
-    attr->inline_value[2] = 0.0f;
-    attr->inline_value[3] = 1.0f;
-
-    switch (attr->format) {
-        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
-        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
-            for (uint32_t i = 0; i < attr->count; ++i) {
-                attr->inline_value[i] = (float)data[i] / 255.0f;
-            }
-            break;
-        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: {
-            const int16_t *val = (const int16_t *) data;
-            for (uint32_t i = 0; i < attr->count; ++i, ++val) {
-                attr->inline_value[i] = MAX(-1.0f, (float) *val / 32767.0f);
-            }
-            break;
-        }
-        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
-            memcpy(attr->inline_value, data, attr->size * attr->count);
-            break;
-        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: {
-            const int16_t *val = (const int16_t *) data;
-            for (uint32_t i = 0; i < attr->count; ++i, ++val) {
-                attr->inline_value[i] = (float)*val;
-            }
-            break;
-        }
-        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: {
-            /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
-            const int32_t val = *(const int32_t *)data;
-            int32_t x = val & 0x7FF;
-            if (x & 0x400) {
-                x |= 0xFFFFF800;
-            }
-            int32_t y = (val >> 11) & 0x7FF;
-            if (y & 0x400) {
-                y |= 0xFFFFF800;
-            }
-            int32_t z = (val >> 22) & 0x7FF;
-            if (z & 0x200) {
-                z |= 0xFFFFFC00;
-            }
-
-            attr->inline_value[0] = MAX(-1.0f, (float)x / 1023.0f);
-            attr->inline_value[1] = MAX(-1.0f, (float)y / 1023.0f);
-            attr->inline_value[2] = MAX(-1.0f, (float)z / 511.0f);
-            break;
-        }
-    default:
-        fprintf(stderr, "Unknown vertex attribute type: 0x%x for format 0x%x\n",
-                attr->gl_type, attr->format);
-        assert(!"Unsupported attribute type");
-        break;
-    }
-}
-
-static void pgraph_bind_vertex_attributes(NV2AState *d,
-                                          unsigned int min_element,
-                                          unsigned int max_element,
-                                          bool inline_data,
-                                          unsigned int inline_stride,
-                                          unsigned int provoking_element)
-{
-    PGRAPHState *pg = &d->pgraph;
-    bool updated_memory_buffer = false;
-    unsigned int num_elements = max_element - min_element + 1;
-
-    if (inline_data) {
-        NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
-                             __func__, num_elements, inline_stride);
-    } else {
-        NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
-    }
-
-    pg->compressed_attrs = 0;
-
-    for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
-        VertexAttribute *attr = &pg->vertex_attributes[i];
-
-        if (!attr->count) {
-            glDisableVertexAttribArray(i);
-            glVertexAttrib4fv(i, attr->inline_value);
-            continue;
-        }
-
-        nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND);
-        hwaddr attrib_data_addr;
-        size_t stride;
-
-        if (attr->needs_conversion) {
-            pg->compressed_attrs |= (1 << i);
-        }
-
-        hwaddr start = 0;
-        if (inline_data) {
-            glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer);
-            attrib_data_addr = attr->inline_array_offset;
-            stride = inline_stride;
-        } else {
-            hwaddr dma_len;
-            uint8_t *attr_data = (uint8_t *)nv_dma_map(
-                d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a,
-                &dma_len);
-            assert(attr->offset < dma_len);
-            attrib_data_addr = attr_data + attr->offset - d->vram_ptr;
-            stride = attr->stride;
-            start = attrib_data_addr + min_element * stride;
-            pgraph_update_memory_buffer(d, start, num_elements * stride,
-                                        updated_memory_buffer);
-            updated_memory_buffer = true;
-        }
-
-        uint32_t provoking_element_index = provoking_element - min_element;
-        size_t element_size = attr->size * attr->count;
-        assert(element_size <= sizeof(attr->inline_value));
-        const uint8_t *last_entry;
-
-        if (inline_data) {
-            last_entry = (uint8_t*)pg->inline_array + attr->inline_array_offset;
-        } else {
-            last_entry = d->vram_ptr + start;
-        }
-        if (!stride) {
-            // Stride of 0 indicates that only the first element should be
-            // used.
-            pgraph_update_inline_value(attr, last_entry);
-            glDisableVertexAttribArray(i);
-            glVertexAttrib4fv(i, attr->inline_value);
-            continue;
-        }
-
-        if (attr->needs_conversion) {
-            glVertexAttribIPointer(i, attr->gl_count, attr->gl_type, stride,
-                                   (void *)attrib_data_addr);
-        } else {
-            glVertexAttribPointer(i, attr->gl_count, attr->gl_type,
-                                  attr->gl_normalize, stride,
-                                  (void *)attrib_data_addr);
-        }
-
-        glEnableVertexAttribArray(i);
-        last_entry += stride * provoking_element_index;
-        pgraph_update_inline_value(attr, last_entry);
-    }
-
-    NV2A_GL_DGROUP_END();
-}
-
-static unsigned int pgraph_bind_inline_array(NV2AState *d)
-{
-    PGRAPHState *pg = &d->pgraph;
-
-    unsigned int offset = 0;
-    for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
-        VertexAttribute *attr = &pg->vertex_attributes[i];
-        if (attr->count == 0) {
-            continue;
-        }
-
-        /* FIXME: Double check */
-        offset = ROUND_UP(offset, attr->size);
-        attr->inline_array_offset = offset;
-        NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n",
-            i, attr->size, attr->count);
-        offset += attr->size * attr->count;
-        offset = ROUND_UP(offset, attr->size);
-    }
-
-    unsigned int vertex_size = offset;
-    unsigned int index_count = pg->inline_array_length*4 / vertex_size;
-
-    NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
-
-    nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
-    glBindBuffer(GL_ARRAY_BUFFER, pg->gl_inline_array_buffer);
-    glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t),
-                 NULL, GL_STREAM_DRAW);
-    glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array);
-    pgraph_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size,
-                                  index_count-1);
-
-    return index_count;
-}
-
-/* 16 bit to [0.0, F16_MAX = 511.9375] */
-static float convert_f16_to_float(uint16_t f16) {
-    if (f16 == 0x0000) { return 0.0; }
-    uint32_t i = (f16 << 11) + 0x3C000000;
-    return *(float*)&i;
-}
-
-/* 24 bit to [0.0, F24_MAX] */
-static float convert_f24_to_float(uint32_t f24) {
-    assert(!(f24 >> 24));
-    f24 &= 0xFFFFFF;
-    if (f24 == 0x000000) { return 0.0; }
-    uint32_t i = f24 << 7;
-    return *(float*)&i;
-}
-
-static uint8_t cliptobyte(int x)
-{
-    return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x));
-}
-
-static void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix,
-                                uint8_t *r, uint8_t *g, uint8_t* b) {
-    int c, d, e;
-    c = (int)line[ix * 2] - 16;
-    if (ix % 2) {
-        d = (int)line[ix * 2 - 1] - 128;
-        e = (int)line[ix * 2 + 1] - 128;
-    } else {
-        d = (int)line[ix * 2 + 1] - 128;
-        e = (int)line[ix * 2 + 3] - 128;
-    }
-    *r = cliptobyte((298 * c + 409 * e + 128) >> 8);
-    *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
-    *b = cliptobyte((298 * c + 516 * d + 128) >> 8);
-}
-
-static void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix,
-                                uint8_t *r, uint8_t *g, uint8_t* b) {
-    int c, d, e;
-    c = (int)line[ix * 2 + 1] - 16;
-    if (ix % 2) {
-        d = (int)line[ix * 2 - 2] - 128;
-        e = (int)line[ix * 2 + 0] - 128;
-    } else {
-        d = (int)line[ix * 2 + 0] - 128;
-        e = (int)line[ix * 2 + 2] - 128;
-    }
-    *r = cliptobyte((298 * c + 409 * e + 128) >> 8);
-    *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
-    *b = cliptobyte((298 * c + 516 * d + 128) >> 8);
-}
-
-static uint8_t* convert_texture_data(const TextureShape s,
-                                     const uint8_t *data,
-                                     const uint8_t *palette_data,
-                                     unsigned int width,
-                                     unsigned int height,
-                                     unsigned int depth,
-                                     unsigned int row_pitch,
-                                     unsigned int slice_pitch)
-{
-    if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) {
-        uint8_t* converted_data = (uint8_t*)g_malloc(width * height * depth * 4);
-        int x, y, z;
-        const uint8_t* src = data;
-        uint32_t* dst = (uint32_t*)converted_data;
-        for (z = 0; z < depth; z++) {
-            for (y = 0; y < height; y++) {
-                for (x = 0; x < width; x++) {
-                    uint8_t index = src[y * row_pitch + x];
-                    uint32_t color = *(uint32_t * )(palette_data + index * 4);
-                    *dst++ = color;
-                }
-            }
-            src += slice_pitch;
-        }
-        return converted_data;
-    } else if (s.color_format
-                   == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 ||
-                   s.color_format
-                   == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) {
-        // TODO: Investigate whether a non-1 depth is possible.
-        // Generally the hardware asserts when attempting to use volumetric
-        // textures in linear formats.
-        assert(depth == 1); /* FIXME */
-        // FIXME: only valid if control0 register allows for colorspace conversion
-        uint8_t* converted_data = (uint8_t*)g_malloc(width * height * 4);
-        int x, y;
-        uint8_t* pixel = converted_data;
-        for (y = 0; y < height; y++) {
-            const uint8_t* line = &data[y * row_pitch * depth];
-            for (x = 0; x < width; x++, pixel += 4) {
-                if (s.color_format
-                    == NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) {
-                    convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
-                } else {
-                    convert_uyvy_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
-                }
-                pixel[3] = 255;
-          }
-        }
-        return converted_data;
-    } else if (s.color_format
-                   == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) {
-        assert(depth == 1); /* FIXME */
-        uint8_t *converted_data = (uint8_t*)g_malloc(width * height * 3);
-        int x, y;
-        for (y = 0; y < height; y++) {
-            for (x = 0; x < width; x++) {
-                uint16_t rgb655 = *(uint16_t*)(data + y * row_pitch + x * 2);
-                int8_t *pixel = (int8_t*)&converted_data[(y * width + x) * 3];
-                /* Maps 5 bit G and B signed value range to 8 bit
-                 * signed values. R is probably unsigned.
-                 */
-                rgb655 ^= (1 << 9) | (1 << 4);
-                pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F;
-                pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80;
-                pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80;
-            }
-        }
-        return converted_data;
-    } else {
-        return NULL;
-    }
-}
-
-static void upload_gl_texture(GLenum gl_target,
-                              const TextureShape s,
-                              const uint8_t *texture_data,
-                              const uint8_t *palette_data)
-{
-    ColorFormatInfo f = kelvin_color_format_map[s.color_format];
-    nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD);
-
-    unsigned int adjusted_width = s.width;
-    unsigned int adjusted_height = s.height;
-    unsigned int adjusted_pitch = s.pitch;
-    unsigned int adjusted_depth = s.depth;
-    if (!f.linear && s.border) {
-        adjusted_width = MAX(16, adjusted_width * 2);
-        adjusted_height = MAX(16, adjusted_height * 2);
-        adjusted_pitch = adjusted_width * (s.pitch / s.width);
-        adjusted_depth = MAX(16, s.depth * 2);
-    }
-
-    switch(gl_target) {
-    case GL_TEXTURE_1D:
-        assert(false);
-        break;
-    case GL_TEXTURE_RECTANGLE: {
-        /* Can't handle strides unaligned to pixels */
-        assert(s.pitch % f.bytes_per_pixel == 0);
-
-        uint8_t *converted = convert_texture_data(s, texture_data,
-                                                  palette_data,
-                                                  adjusted_width,
-                                                  adjusted_height, 1,
-                                                  adjusted_pitch, 0);
-        glPixelStorei(GL_UNPACK_ROW_LENGTH,
-                      converted ? 0 : adjusted_pitch / f.bytes_per_pixel);
-        glTexImage2D(gl_target, 0, f.gl_internal_format,
-                     adjusted_width, adjusted_height, 0,
-                     f.gl_format, f.gl_type,
-                     converted ? converted : texture_data);
-
-        if (converted) {
-          g_free(converted);
-        }
-
-        glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-        break;
-    }
-    case GL_TEXTURE_2D:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
-    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
-    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: {
-
-        unsigned int width = adjusted_width, height = adjusted_height;
-
-        int level;
-        for (level = 0; level < s.levels; level++) {
-            width = MAX(width, 1);
-            height = MAX(height, 1);
-
-            if (f.gl_format == 0) { /* compressed */
-                 // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size
-                unsigned int block_size =
-                    f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ?
-                        8 : 16;
-                unsigned int physical_width = (width + 3) & ~3,
-                             physical_height = (height + 3) & ~3;
-                if (physical_width != width) {
-                    glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width);
-                }
-                uint8_t *converted = decompress_2d_texture_data(
-                    f.gl_internal_format, texture_data, physical_width,
-                    physical_height);
-                unsigned int tex_width = width;
-                unsigned int tex_height = height;
-
-                if (s.cubemap && adjusted_width != s.width) {
-                    // FIXME: Consider preserving the border.
-                    // There does not seem to be a way to reference the border
-                    // texels in a cubemap, so they are discarded.
-                    glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4);
-                    glPixelStorei(GL_UNPACK_SKIP_ROWS, 4);
-                    tex_width = s.width;
-                    tex_height = s.height;
-                    if (physical_width == width) {
-                        glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
-                    }
-                }
-
-                glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0,
-                             GL_RGBA, GL_UNSIGNED_INT_8_8_8_8, converted);
-                g_free(converted);
-                if (physical_width != width) {
-                    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-                }
-                if (s.cubemap && adjusted_width != s.width) {
-                    glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
-                    glPixelStorei(GL_UNPACK_SKIP_ROWS, 0);
-                    if (physical_width == width) {
-                        glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-                    }
-                }
-                texture_data +=
-                    physical_width / 4 * physical_height / 4 * block_size;
-            } else {
-                unsigned int pitch = width * f.bytes_per_pixel;
-                uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch);
-                unswizzle_rect(texture_data, width, height,
-                               unswizzled, pitch, f.bytes_per_pixel);
-                uint8_t *converted = convert_texture_data(s, unswizzled,
-                                                          palette_data,
-                                                          width, height, 1,
-                                                          pitch, 0);
-                uint8_t *pixel_data = converted ? converted : unswizzled;
-                unsigned int tex_width = width;
-                unsigned int tex_height = height;
-
-                if (s.cubemap && adjusted_width != s.width) {
-                    // FIXME: Consider preserving the border.
-                    // There does not seem to be a way to reference the border
-                    // texels in a cubemap, so they are discarded.
-                    glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
-                    tex_width = s.width;
-                    tex_height = s.height;
-                    pixel_data += 4 * f.bytes_per_pixel + 4 * pitch;
-                }
-
-                glTexImage2D(gl_target, level, f.gl_internal_format, tex_width,
-                             tex_height, 0, f.gl_format, f.gl_type,
-                             pixel_data);
-                if (s.cubemap && s.border) {
-                    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
-                }
-                if (converted) {
-                    g_free(converted);
-                }
-                g_free(unswizzled);
-
-                texture_data += width * height * f.bytes_per_pixel;
-            }
-
-            width /= 2;
-            height /= 2;
-        }
-
-        break;
-    }
-    case GL_TEXTURE_3D: {
-
-        unsigned int width = adjusted_width;
-        unsigned int height = adjusted_height;
-        unsigned int depth = adjusted_depth;
-
-        assert(f.linear == false);
-
-        int level;
-        for (level = 0; level < s.levels; level++) {
-            if (f.gl_format == 0) { /* compressed */
-                assert(width % 4 == 0 && height % 4 == 0 &&
-                       "Compressed 3D texture virtual size");
-                width = MAX(width, 4);
-                height = MAX(height, 4);
-                depth = MAX(depth, 1);
-
-                unsigned int block_size;
-                if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
-                    block_size = 8;
-                } else {
-                    block_size = 16;
-                }
-
-                size_t texture_size = width/4 * height/4 * depth * block_size;
-
-                uint8_t *converted = decompress_3d_texture_data(f.gl_internal_format, texture_data, width, height, depth);
-
-                glTexImage3D(gl_target, level,  GL_RGBA8,
-                             width, height, depth, 0,
-                             GL_RGBA, GL_UNSIGNED_INT_8_8_8_8,
-                             converted);
-
-                g_free(converted);
-
-                texture_data += texture_size;
-            } else {
-                width = MAX(width, 1);
-                height = MAX(height, 1);
-                depth = MAX(depth, 1);
-
-                unsigned int row_pitch = width * f.bytes_per_pixel;
-                unsigned int slice_pitch = row_pitch * height;
-                uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth);
-                unswizzle_box(texture_data, width, height, depth, unswizzled,
-                               row_pitch, slice_pitch, f.bytes_per_pixel);
-
-                uint8_t *converted = convert_texture_data(s, unswizzled,
-                                                          palette_data,
-                                                          width, height, depth,
-                                                          row_pitch, slice_pitch);
-
-                glTexImage3D(gl_target, level, f.gl_internal_format,
-                             width, height, depth, 0,
-                             f.gl_format, f.gl_type,
-                             converted ? converted : unswizzled);
-
-                if (converted) {
-                    g_free(converted);
-                }
-                g_free(unswizzled);
-
-                texture_data += width * height * depth * f.bytes_per_pixel;
-            }
-
-            width /= 2;
-            height /= 2;
-            depth /= 2;
-        }
-        break;
-    }
-    default:
-        assert(false);
-        break;
-    }
-}
-
-static TextureBinding* generate_texture(const TextureShape s,
-                                        const uint8_t *texture_data,
-                                        const uint8_t *palette_data)
-{
-    ColorFormatInfo f = kelvin_color_format_map[s.color_format];
-
-    /* Create a new opengl texture */
-    GLuint gl_texture;
-    glGenTextures(1, &gl_texture);
-
-    GLenum gl_target;
-    if (s.cubemap) {
-        assert(f.linear == false);
-        assert(s.dimensionality == 2);
-        gl_target = GL_TEXTURE_CUBE_MAP;
-    } else {
-        if (f.linear) {
-            /* linear textures use unnormalised texcoords.
-             * GL_TEXTURE_RECTANGLE_ARB conveniently also does, but
-             * does not allow repeat and mirror wrap modes.
-             *  (or mipmapping, but xbox d3d says 'Non swizzled and non
-             *   compressed textures cannot be mip mapped.')
-             * Not sure if that'll be an issue. */
-
-            /* FIXME: GLSL 330 provides us with textureSize()! Use that? */
-            gl_target = GL_TEXTURE_RECTANGLE;
-            assert(s.dimensionality == 2);
-        } else {
-            switch(s.dimensionality) {
-            case 1: gl_target = GL_TEXTURE_1D; break;
-            case 2: gl_target = GL_TEXTURE_2D; break;
-            case 3: gl_target = GL_TEXTURE_3D; break;
-            default:
-                assert(false);
-                break;
-            }
-        }
-    }
-
-    glBindTexture(gl_target, gl_texture);
-
-    NV2A_GL_DLABEL(GL_TEXTURE, gl_texture,
-                   "offset: 0x%08lx, format: 0x%02X%s, %d dimensions%s, "
-                   "width: %d, height: %d, depth: %d",
-                   texture_data - g_nv2a->vram_ptr,
-                   s.color_format, f.linear ? "" : " (SZ)",
-                   s.dimensionality, s.cubemap ? " (Cubemap)" : "",
-                   s.width, s.height, s.depth);
-
-    if (gl_target == GL_TEXTURE_CUBE_MAP) {
-
-        ColorFormatInfo f = kelvin_color_format_map[s.color_format];
-        unsigned int block_size;
-        if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
-            block_size = 8;
-        } else {
-            block_size = 16;
-        }
-
-        size_t length = 0;
-        unsigned int w = s.width;
-        unsigned int h = s.height;
-        if (!f.linear && s.border) {
-            w = MAX(16, w * 2);
-            h = MAX(16, h * 2);
-        }
-
-        int level;
-        for (level = 0; level < s.levels; level++) {
-            if (f.gl_format == 0) {
-                length += w/4 * h/4 * block_size;
-            } else {
-                length += w * h * f.bytes_per_pixel;
-            }
-
-            w /= 2;
-            h /= 2;
-        }
-
-        length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
-
-        upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X,
-                          s, texture_data + 0 * length, palette_data);
-        upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
-                          s, texture_data + 1 * length, palette_data);
-        upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
-                          s, texture_data + 2 * length, palette_data);
-        upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
-                          s, texture_data + 3 * length, palette_data);
-        upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
-                          s, texture_data + 4 * length, palette_data);
-        upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
-                          s, texture_data + 5 * length, palette_data);
-    } else {
-        upload_gl_texture(gl_target, s, texture_data, palette_data);
-    }
-
-    /* Linear textures don't support mipmapping */
-    if (!f.linear) {
-        glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL,
-            s.min_mipmap_level);
-        glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL,
-            s.levels - 1);
-    }
-
-    if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0
-        || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) {
-        glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA,
-                         (const GLint *)f.gl_swizzle_mask);
-    }
-
-    TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding));
-    ret->gl_target = gl_target;
-    ret->gl_texture = gl_texture;
-    ret->refcnt = 1;
-    ret->draw_time = 0;
-    ret->data_hash = 0;
-    ret->min_filter = 0xFFFFFFFF;
-    ret->mag_filter = 0xFFFFFFFF;
-    ret->addru = 0xFFFFFFFF;
-    ret->addrv = 0xFFFFFFFF;
-    ret->addrp = 0xFFFFFFFF;
-    ret->border_color_set = false;
-    return ret;
-}
-
-static void texture_binding_destroy(gpointer data)
-{
-    TextureBinding *binding = (TextureBinding *)data;
-    assert(binding->refcnt > 0);
-    binding->refcnt--;
-    if (binding->refcnt == 0) {
-        glDeleteTextures(1, &binding->gl_texture);
-        g_free(binding);
-    }
-}
-
-/* functions for texture LRU cache */
-static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key)
-{
-    TextureLruNode *tnode = container_of(node, TextureLruNode, node);
-    memcpy(&tnode->key, key, sizeof(TextureKey));
-
-    tnode->binding = NULL;
-    tnode->possibly_dirty = false;
-}
-
-static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
-{
-    TextureLruNode *tnode = container_of(node, TextureLruNode, node);
-    if (tnode->binding) {
-        texture_binding_destroy(tnode->binding);
-        tnode->binding = NULL;
-        tnode->possibly_dirty = false;
-    }
-}
-
-static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
-{
-    TextureLruNode *tnode = container_of(node, TextureLruNode, node);
-    return memcmp(&tnode->key, key, sizeof(TextureKey));
-}
-
-static unsigned int kelvin_map_stencil_op(uint32_t parameter)
-{
-    unsigned int op;
-    switch (parameter) {
-    case NV097_SET_STENCIL_OP_V_KEEP:
-        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break;
-    case NV097_SET_STENCIL_OP_V_ZERO:
-        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break;
-    case NV097_SET_STENCIL_OP_V_REPLACE:
-        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break;
-    case NV097_SET_STENCIL_OP_V_INCRSAT:
-        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break;
-    case NV097_SET_STENCIL_OP_V_DECRSAT:
-        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break;
-    case NV097_SET_STENCIL_OP_V_INVERT:
-        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break;
-    case NV097_SET_STENCIL_OP_V_INCR:
-        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break;
-    case NV097_SET_STENCIL_OP_V_DECR:
-        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break;
-    default:
-        assert(false);
-        break;
-    }
-    return op;
-}
-
-static unsigned int kelvin_map_polygon_mode(uint32_t parameter)
-{
-    unsigned int mode;
-    switch (parameter) {
-    case NV097_SET_FRONT_POLYGON_MODE_V_POINT:
-        mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break;
-    case NV097_SET_FRONT_POLYGON_MODE_V_LINE:
-        mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break;
-    case NV097_SET_FRONT_POLYGON_MODE_V_FILL:
-        mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break;
-    default:
-        assert(false);
-        break;
-    }
-    return mode;
-}
-
-static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel)
-{
-    assert(channel < 4);
-    unsigned int texgen;
-    switch (parameter) {
-    case NV097_SET_TEXGEN_S_DISABLE:
-        texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break;
-    case NV097_SET_TEXGEN_S_EYE_LINEAR:
-        texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break;
-    case NV097_SET_TEXGEN_S_OBJECT_LINEAR:
-        texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break;
-    case NV097_SET_TEXGEN_S_SPHERE_MAP:
-        assert(channel < 2);
-        texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break;
-    case NV097_SET_TEXGEN_S_REFLECTION_MAP:
-        assert(channel < 3);
-        texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break;
-    case NV097_SET_TEXGEN_S_NORMAL_MAP:
-        assert(channel < 3);
-        texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break;
-    default:
-        assert(false);
-        break;
-    }
-    return texgen;
-}
diff --git a/hw/xbox/nv2a/pgraph/debug_renderdoc.c b/hw/xbox/nv2a/pgraph/debug_renderdoc.c
new file mode 100644
index 00000000000..273e3079737
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/debug_renderdoc.c
@@ -0,0 +1,95 @@
+/*
+ * Geforce NV2A PGRAPH Renderdoc Helpers
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include "thirdparty/renderdoc_app.h"
+
+#include "hw/xbox/nv2a/debug.h"
+
+#ifdef _WIN32
+#include <libloaderapi.h>
+#else
+#include <dlfcn.h>
+#endif
+
+static RENDERDOC_API_1_6_0 *rdoc_api = NULL;
+
+int renderdoc_capture_frames = 0;
+
+void nv2a_dbg_renderdoc_init(void)
+{
+    if (rdoc_api) {
+        return;
+    }
+
+#ifdef _WIN32
+    HMODULE renderdoc = GetModuleHandleA("renderdoc.dll");
+    if (!renderdoc) {
+        fprintf(stderr, "Error: Failed to open renderdoc library: 0x%lx\n",
+                GetLastError());
+        return;
+    }
+    pRENDERDOC_GetAPI RENDERDOC_GetAPI =
+        (pRENDERDOC_GetAPI)GetProcAddress(renderdoc, "RENDERDOC_GetAPI");
+#else // _WIN32
+#ifdef __APPLE__
+    void *renderdoc = dlopen("librenderdoc.dylib", RTLD_LAZY);
+#else
+    void *renderdoc = dlopen("librenderdoc.so", RTLD_LAZY);
+#endif
+    if (!renderdoc) {
+        fprintf(stderr, "Error: Failed to open renderdoc library: %s\n",
+                dlerror());
+        return;
+    }
+    pRENDERDOC_GetAPI RENDERDOC_GetAPI =
+        (pRENDERDOC_GetAPI)dlsym(renderdoc, "RENDERDOC_GetAPI");
+#endif // _WIN32
+
+    if (!RENDERDOC_GetAPI) {
+        fprintf(stderr, "Error: Could not get RENDERDOC_GetAPI address\n");
+        return;
+    }
+
+    int ret =
+        RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_6_0, (void **)&rdoc_api);
+    if (ret != 1) {
+        fprintf(stderr, "Error: Failed to retrieve RenderDoc API.\n");
+    }
+}
+
+void *nv2a_dbg_renderdoc_get_api(void)
+{
+    return (void*)rdoc_api;
+}
+
+bool nv2a_dbg_renderdoc_available(void)
+{
+    return rdoc_api != NULL;
+}
+
+void nv2a_dbg_renderdoc_capture_frames(int num_frames)
+{
+    renderdoc_capture_frames += num_frames;
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/blit.c b/hw/xbox/nv2a/pgraph/gl/blit.c
new file mode 100644
index 00000000000..b4cce8a5ef1
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/blit.c
@@ -0,0 +1,174 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "renderer.h"
+
+// TODO: Optimize. Ideally this should all be done via OpenGL.
+void pgraph_gl_image_blit(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d;
+    ImageBlitState *image_blit = &pg->image_blit;
+    BetaState *beta = &pg->beta;
+
+    pgraph_gl_surface_update(d, false, true, true);
+
+    assert(context_surfaces->object_instance == image_blit->context_surfaces);
+
+    unsigned int bytes_per_pixel;
+    switch (context_surfaces->color_format) {
+        case NV062_SET_COLOR_FORMAT_LE_Y8:
+            bytes_per_pixel = 1;
+            break;
+        case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
+            bytes_per_pixel = 2;
+            break;
+        case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
+        case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
+        case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
+        case NV062_SET_COLOR_FORMAT_LE_Y32:
+            bytes_per_pixel = 4;
+            break;
+        default:
+            fprintf(stderr, "Unknown blit surface format: 0x%x\n",
+                    context_surfaces->color_format);
+            assert(false);
+            break;
+    }
+
+    hwaddr source_dma_len, dest_dma_len;
+
+    uint8_t *source = (uint8_t *)nv_dma_map(
+        d, context_surfaces->dma_image_source, &source_dma_len);
+    assert(context_surfaces->source_offset < source_dma_len);
+    source += context_surfaces->source_offset;
+
+    uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest,
+                                          &dest_dma_len);
+    assert(context_surfaces->dest_offset < dest_dma_len);
+    dest += context_surfaces->dest_offset;
+
+    hwaddr source_addr = source - d->vram_ptr;
+    hwaddr dest_addr = dest - d->vram_ptr;
+
+    SurfaceBinding *surf_src = pgraph_gl_surface_get(d, source_addr);
+    if (surf_src) {
+        pgraph_gl_surface_download_if_dirty(d, surf_src);
+    }
+
+    SurfaceBinding *surf_dest = pgraph_gl_surface_get(d, dest_addr);
+    if (surf_dest) {
+        if (image_blit->height < surf_dest->height ||
+            image_blit->width < surf_dest->width) {
+            pgraph_gl_surface_download_if_dirty(d, surf_dest);
+        } else {
+            // The blit will completely replace the surface so any pending
+            // download should be discarded.
+            surf_dest->download_pending = false;
+            surf_dest->draw_dirty = false;
+        }
+        surf_dest->upload_pending = true;
+        pg->draw_time++;
+    }
+
+    hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch +
+                           image_blit->in_x * bytes_per_pixel;
+    hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch +
+                         image_blit->out_x * bytes_per_pixel;
+
+    hwaddr source_size =
+        (image_blit->height - 1) * context_surfaces->source_pitch +
+        image_blit->width * bytes_per_pixel;
+    hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch +
+                       image_blit->width * bytes_per_pixel;
+
+    /* FIXME: What does hardware do in this case? */
+    assert(source_addr + source_offset + source_size <=
+           memory_region_size(d->vram));
+    assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram));
+
+    uint8_t *source_row = source + source_offset;
+    uint8_t *dest_row = dest + dest_offset;
+
+    if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
+        // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY");
+        for (unsigned int y = 0; y < image_blit->height; y++) {
+            memmove(dest_row, source_row, image_blit->width * bytes_per_pixel);
+            source_row += context_surfaces->source_pitch;
+            dest_row += context_surfaces->dest_pitch;
+        }
+    } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) {
+        // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND");
+        uint32_t max_beta_mult = 0x7f80;
+        uint32_t beta_mult = beta->beta >> 16;
+        uint32_t inv_beta_mult = max_beta_mult - beta_mult;
+        for (unsigned int y = 0; y < image_blit->height; y++) {
+            for (unsigned int x = 0; x < image_blit->width; x++) {
+                for (unsigned int ch = 0; ch < 3; ch++) {
+                    uint32_t a = source_row[x * 4 + ch] * beta_mult;
+                    uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult;
+                    dest_row[x * 4 + ch] = (a + b) / max_beta_mult;
+                }
+            }
+            source_row += context_surfaces->source_pitch;
+            dest_row += context_surfaces->dest_pitch;
+        }
+    } else {
+        fprintf(stderr, "Unknown blit operation: 0x%x\n",
+                image_blit->operation);
+        assert(false && "Unknown blit operation");
+    }
+
+    NV2A_DPRINTF("  - 0x%tx -> 0x%tx\n", source_addr, dest_addr);
+
+    bool needs_alpha_patching;
+    uint8_t alpha_override;
+    switch (context_surfaces->color_format) {
+    case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
+        needs_alpha_patching = true;
+        alpha_override = 0xff;
+        break;
+    case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
+        needs_alpha_patching = true;
+        alpha_override = 0;
+        break;
+    default:
+        needs_alpha_patching = false;
+        alpha_override = 0;
+    }
+
+    if (needs_alpha_patching) {
+        dest_row = dest + dest_offset;
+        for (unsigned int y = 0; y < image_blit->height; y++) {
+            for (unsigned int x = 0; x < image_blit->width; x++) {
+                dest_row[x * 4 + 3] = alpha_override;
+            }
+            dest_row += context_surfaces->dest_pitch;
+        }
+    }
+
+    dest_addr += dest_offset;
+    memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
+                                   DIRTY_MEMORY_VGA);
+    memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
+                                   DIRTY_MEMORY_NV2A_TEX);
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/constants.h b/hw/xbox/nv2a/pgraph/gl/constants.h
new file mode 100644
index 00000000000..d78b0054e38
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/constants.h
@@ -0,0 +1,322 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H
+#define HW_XBOX_NV2A_PGRAPH_GL_CONSTANTS_H
+
+#include "qemu/osdep.h"
+#include "hw/xbox/nv2a/nv2a_regs.h"
+#include "gloffscreen.h"
+
+static const GLenum pgraph_texture_min_filter_gl_map[] = {
+    0,
+    GL_NEAREST,
+    GL_LINEAR,
+    GL_NEAREST_MIPMAP_NEAREST,
+    GL_LINEAR_MIPMAP_NEAREST,
+    GL_NEAREST_MIPMAP_LINEAR,
+    GL_LINEAR_MIPMAP_LINEAR,
+    GL_LINEAR,
+};
+
+static const GLenum pgraph_texture_mag_filter_gl_map[] = {
+    0,
+    GL_NEAREST,
+    GL_LINEAR,
+    0,
+    GL_LINEAR /* TODO: Convolution filter... */
+};
+
+static const GLenum pgraph_texture_addr_gl_map[] = {
+    0,
+    GL_REPEAT,
+    GL_MIRRORED_REPEAT,
+    GL_CLAMP_TO_EDGE,
+    GL_CLAMP_TO_BORDER,
+    GL_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */
+};
+
+static const GLenum pgraph_blend_factor_gl_map[] = {
+    GL_ZERO,
+    GL_ONE,
+    GL_SRC_COLOR,
+    GL_ONE_MINUS_SRC_COLOR,
+    GL_SRC_ALPHA,
+    GL_ONE_MINUS_SRC_ALPHA,
+    GL_DST_ALPHA,
+    GL_ONE_MINUS_DST_ALPHA,
+    GL_DST_COLOR,
+    GL_ONE_MINUS_DST_COLOR,
+    GL_SRC_ALPHA_SATURATE,
+    0,
+    GL_CONSTANT_COLOR,
+    GL_ONE_MINUS_CONSTANT_COLOR,
+    GL_CONSTANT_ALPHA,
+    GL_ONE_MINUS_CONSTANT_ALPHA,
+};
+
+static const GLenum pgraph_blend_equation_gl_map[] = {
+    GL_FUNC_SUBTRACT,
+    GL_FUNC_REVERSE_SUBTRACT,
+    GL_FUNC_ADD,
+    GL_MIN,
+    GL_MAX,
+    GL_FUNC_REVERSE_SUBTRACT,
+    GL_FUNC_ADD,
+};
+
+/* FIXME
+static const GLenum pgraph_blend_logicop_map[] = {
+    GL_CLEAR,
+    GL_AND,
+    GL_AND_REVERSE,
+    GL_COPY,
+    GL_AND_INVERTED,
+    GL_NOOP,
+    GL_XOR,
+    GL_OR,
+    GL_NOR,
+    GL_EQUIV,
+    GL_INVERT,
+    GL_OR_REVERSE,
+    GL_COPY_INVERTED,
+    GL_OR_INVERTED,
+    GL_NAND,
+    GL_SET,
+};
+*/
+
+static const GLenum pgraph_cull_face_gl_map[] = {
+    0,
+    GL_FRONT,
+    GL_BACK,
+    GL_FRONT_AND_BACK
+};
+
+static const GLenum pgraph_depth_func_gl_map[] = {
+    GL_NEVER,
+    GL_LESS,
+    GL_EQUAL,
+    GL_LEQUAL,
+    GL_GREATER,
+    GL_NOTEQUAL,
+    GL_GEQUAL,
+    GL_ALWAYS,
+};
+
+static const GLenum pgraph_stencil_func_gl_map[] = {
+    GL_NEVER,
+    GL_LESS,
+    GL_EQUAL,
+    GL_LEQUAL,
+    GL_GREATER,
+    GL_NOTEQUAL,
+    GL_GEQUAL,
+    GL_ALWAYS,
+};
+
+static const GLenum pgraph_stencil_op_gl_map[] = {
+    0,
+    GL_KEEP,
+    GL_ZERO,
+    GL_REPLACE,
+    GL_INCR,
+    GL_DECR,
+    GL_INVERT,
+    GL_INCR_WRAP,
+    GL_DECR_WRAP,
+};
+
+typedef struct ColorFormatInfo {
+    unsigned int bytes_per_pixel;
+    bool linear;
+    GLint gl_internal_format;
+    GLenum gl_format;
+    GLenum gl_type;
+    GLenum gl_swizzle_mask[4];
+    bool depth;
+} ColorFormatInfo;
+
+static const ColorFormatInfo kelvin_color_format_gl_map[66] = {
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] =
+        {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_RED, GL_RED, GL_ONE}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] =
+        {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_RED, GL_RED, GL_RED}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] =
+        {2, false, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] =
+        {2, false, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] =
+        {2, false, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] =
+        {2, false, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] =
+        {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] =
+        {4, false, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+
+    /* paletted texture */
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] =
+        {1, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] =
+        {4, false, GL_COMPRESSED_RGBA_S3TC_DXT1_EXT, 0, GL_RGBA},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] =
+        {4, false, GL_COMPRESSED_RGBA_S3TC_DXT3_EXT, 0, GL_RGBA},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] =
+        {4, false, GL_COMPRESSED_RGBA_S3TC_DXT5_EXT, 0, GL_RGBA},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] =
+        {2, true, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] =
+        {2, true, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] =
+        {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] =
+        {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_RED, GL_RED, GL_ONE}},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] =
+        {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] =
+        {1, false, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_ONE, GL_ONE, GL_ONE, GL_RED}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] =
+        {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_RED, GL_RED, GL_GREEN}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] =
+        {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_RED, GL_RED, GL_RED}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] =
+        {2, true, GL_RGB5, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] =
+        {2, true, GL_RGBA4, GL_BGRA, GL_UNSIGNED_SHORT_4_4_4_4_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] =
+        {4, true, GL_RGB8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] =
+        {1, true, GL_R8, GL_RED, GL_UNSIGNED_BYTE,
+         {GL_ONE, GL_ONE, GL_ONE, GL_RED}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] =
+        {2, true, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_RED, GL_RED, GL_GREEN}},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] =
+        {2, false, GL_RGB8_SNORM, GL_RGB, GL_BYTE}, /* FIXME: This might be signed */
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] =
+        {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+         {GL_RED, GL_GREEN, GL_RED, GL_GREEN}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] =
+        {2, false, GL_RG8, GL_RG, GL_UNSIGNED_BYTE,
+         {GL_GREEN, GL_RED, GL_RED, GL_GREEN}},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] =
+        {2, true, GL_RGBA8,  GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] =
+        {2, true, GL_RGBA8,  GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+
+    /* Additional information is passed to the pixel shader via the swizzle:
+     * RED: The depth value.
+     * GREEN: 0 for 16-bit, 1 for 24 bit
+     * BLUE: 0 for fixed, 1 for float
+     */
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] =
+        {2, false, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
+         {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] =
+        {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
+         {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] =
+        /* FIXME: Uses fixed-point format to match surface format hack below. */
+        {4, true, GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8,
+         {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}, true},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] =
+        {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT,
+         {GL_RED, GL_ZERO, GL_ZERO, GL_ZERO}, true},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] =
+        {2, true, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_HALF_FLOAT,
+          {GL_RED, GL_ZERO, GL_ONE, GL_ZERO}, true},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] =
+        {2, true, GL_R16, GL_RED, GL_UNSIGNED_SHORT,
+         {GL_RED, GL_RED, GL_RED, GL_ONE}},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] =
+        {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] =
+        {4, false, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] =
+        {4, false, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8},
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] =
+        {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] =
+        {4, true, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8},
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] =
+        {4, true, GL_RGBA8, GL_RGBA, GL_UNSIGNED_INT_8_8_8_8}
+};
+
+typedef struct SurfaceFormatInfo {
+    unsigned int bytes_per_pixel;
+    GLint gl_internal_format;
+    GLenum gl_format;
+    GLenum gl_type;
+    GLenum gl_attachment;
+} SurfaceFormatInfo;
+
+static const SurfaceFormatInfo kelvin_surface_color_format_gl_map[] = {
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
+        {2, GL_RGB5_A1, GL_BGRA, GL_UNSIGNED_SHORT_1_5_5_5_REV, GL_COLOR_ATTACHMENT0},
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
+        {2, GL_RGB565, GL_RGB, GL_UNSIGNED_SHORT_5_6_5, GL_COLOR_ATTACHMENT0},
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
+        {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
+        {4, GL_RGBA8, GL_BGRA, GL_UNSIGNED_INT_8_8_8_8_REV, GL_COLOR_ATTACHMENT0},
+
+    // FIXME: Map channel color
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
+        {1, GL_R8, GL_RED, GL_UNSIGNED_BYTE, GL_COLOR_ATTACHMENT0},
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
+        {2, GL_RG8, GL_RG, GL_UNSIGNED_SHORT, GL_COLOR_ATTACHMENT0},
+};
+
+static const SurfaceFormatInfo kelvin_surface_zeta_float_format_gl_map[] = {
+    [NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
+        {2, GL_DEPTH_COMPONENT32F, GL_DEPTH_COMPONENT, GL_HALF_FLOAT, GL_DEPTH_ATTACHMENT},
+    [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
+        /* FIXME: GL does not support packing floating-point Z24S8 OOTB, so for
+         *        now just emulate this with fixed-point Z24S8. Possible compat
+         *        improvement with custom conversion.
+         */
+        {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
+};
+
+static const SurfaceFormatInfo kelvin_surface_zeta_fixed_format_gl_map[] = {
+    [NV097_SET_SURFACE_FORMAT_ZETA_Z16] =
+        {2, GL_DEPTH_COMPONENT16, GL_DEPTH_COMPONENT, GL_UNSIGNED_SHORT, GL_DEPTH_ATTACHMENT},
+    [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
+        {4, GL_DEPTH24_STENCIL8, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, GL_DEPTH_STENCIL_ATTACHMENT},
+};
+
+#endif
diff --git a/hw/xbox/nv2a/debug.c b/hw/xbox/nv2a/pgraph/gl/debug.c
similarity index 77%
rename from hw/xbox/nv2a/debug.c
rename to hw/xbox/nv2a/pgraph/gl/debug.c
index def94cdba1a..8e7f49e47c7 100644
--- a/hw/xbox/nv2a/debug.c
+++ b/hw/xbox/nv2a/pgraph/gl/debug.c
@@ -1,5 +1,5 @@
 /*
- * QEMU Geforce NV2A debug helpers
+ * Geforce NV2A PGRAPH OpenGL Renderer
  *
  * Copyright (c) 2015 Jannik Vogel
  * Copyright (c) 2012 espes
@@ -18,6 +18,7 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
+#include "renderer.h"
 #include "debug.h"
 
 #ifdef DEBUG_NV2A_GL
@@ -28,15 +29,8 @@
 #include <assert.h>
 
 #ifdef CONFIG_RENDERDOC
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
 #include "thirdparty/renderdoc_app.h"
-#ifdef _WIN32
-#include <libloaderapi.h>
-#else
-#include <dlfcn.h>
-#endif
-
-static RENDERDOC_API_1_1_2 *rdoc_api = NULL;
-static int32_t renderdoc_capture_frames = 0;
 #endif
 
 #define CHECK_GL_ERROR() do { \
@@ -74,31 +68,7 @@ void gl_debug_initialize(void)
     }
 
 #ifdef CONFIG_RENDERDOC
-    const char *renderdoc_lib;
-    void* renderdoc;
-#ifdef __APPLE__
-    renderdoc_lib = "librenderdoc.dylib";
-#elif _WIN32
-    renderdoc_lib = "renderdoc.dll";
-#else
-    renderdoc_lib = "librenderdoc.so";
-#endif
-
-#ifdef _WIN32
-    renderdoc = GetModuleHandleA(renderdoc_lib);
-    if (renderdoc) {
-        pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)GetProcAddress(
-            renderdoc, "RENDERDOC_GetAPI");
-#else
-    renderdoc = dlopen(renderdoc_lib, RTLD_NOW | RTLD_NOLOAD);
-    if (renderdoc) {
-        pRENDERDOC_GetAPI RENDERDOC_GetAPI = (pRENDERDOC_GetAPI)dlsym(
-            renderdoc, "RENDERDOC_GetAPI");
-#endif
-        int ret = RENDERDOC_GetAPI(eRENDERDOC_API_Version_1_1_2,
-                                   (void **)&rdoc_api);
-        assert(ret == 1 && "Failed to retrieve RenderDoc API.");
-    }
+    nv2a_dbg_renderdoc_init();
 #endif
 }
 
@@ -179,7 +149,10 @@ void gl_debug_frame_terminator(void)
     CHECK_GL_ERROR();
 
 #ifdef CONFIG_RENDERDOC
-    if (rdoc_api) {
+    if (nv2a_dbg_renderdoc_available()) {
+
+        RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api();
+
         if (rdoc_api->IsTargetControlConnected()) {
             if (rdoc_api->IsFrameCapturing()) {
                 rdoc_api->EndFrameCapture(NULL, NULL);
@@ -190,7 +163,7 @@ void gl_debug_frame_terminator(void)
                             error);
                 }
             }
-            if (renderdoc_capture_frames) {
+            if (renderdoc_capture_frames > 0) {
                 rdoc_api->StartFrameCapture(NULL, NULL);
                 GLenum error = glGetError();
                 if (error != GL_NO_ERROR) {
@@ -203,22 +176,10 @@ void gl_debug_frame_terminator(void)
         }
     }
 #endif
-    if (!has_GL_GREMEDY_frame_terminator) {
-        return;
+    if (has_GL_GREMEDY_frame_terminator) {
+        glFrameTerminatorGREMEDY();
+        CHECK_GL_ERROR();
     }
-
-    glFrameTerminatorGREMEDY();
-    CHECK_GL_ERROR();
-}
-
-#ifdef CONFIG_RENDERDOC
-bool nv2a_dbg_renderdoc_available(void) {
-    return rdoc_api != NULL;
 }
 
-void nv2a_dbg_renderdoc_capture_frames(uint32_t num_frames) {
-    renderdoc_capture_frames = num_frames;
-}
-#endif
-
 #endif // DEBUG_NV2A_GL
diff --git a/hw/xbox/nv2a/pgraph/gl/debug.h b/hw/xbox/nv2a/pgraph/gl/debug.h
new file mode 100644
index 00000000000..c242e1f3846
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/debug.h
@@ -0,0 +1,60 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2012 espes
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H
+#define HW_XBOX_NV2A_PGRAPH_GL_DEBUG_H
+
+// #define DEBUG_NV2A_GL
+#ifdef DEBUG_NV2A_GL
+
+#include <stdbool.h>
+#include "gloffscreen.h"
+#include "config-host.h"
+
+void gl_debug_initialize(void);
+void gl_debug_message(bool cc, const char *fmt, ...);
+void gl_debug_group_begin(const char *fmt, ...);
+void gl_debug_group_end(void);
+void gl_debug_label(GLenum target, GLuint name, const char *fmt, ...);
+void gl_debug_frame_terminator(void);
+
+# define NV2A_GL_DPRINTF(cc, format, ...) \
+    gl_debug_message(cc, "nv2a: " format, ## __VA_ARGS__)
+# define NV2A_GL_DGROUP_BEGIN(format, ...) \
+    gl_debug_group_begin("nv2a: " format, ## __VA_ARGS__)
+# define NV2A_GL_DGROUP_END() \
+    gl_debug_group_end()
+# define NV2A_GL_DLABEL(target, name, format, ...)  \
+    gl_debug_label(target, name, "nv2a: { " format " }", ## __VA_ARGS__)
+#define NV2A_GL_DFRAME_TERMINATOR() \
+    gl_debug_frame_terminator()
+
+#else
+
+# define NV2A_GL_DPRINTF(cc, format, ...)          do { \
+        if (cc) NV2A_DPRINTF(format "\n", ##__VA_ARGS__ ); \
+    } while (0)
+# define NV2A_GL_DGROUP_BEGIN(format, ...)         do { } while (0)
+# define NV2A_GL_DGROUP_END()                      do { } while (0)
+# define NV2A_GL_DLABEL(target, name, format, ...) do { } while (0)
+# define NV2A_GL_DFRAME_TERMINATOR()               do { } while (0)
+#endif
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/gl/display.c b/hw/xbox/nv2a/pgraph/gl/display.c
new file mode 100644
index 00000000000..fbea7d2f0c0
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/display.c
@@ -0,0 +1,438 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/pgraph/util.h"
+#include "renderer.h"
+
+#include <math.h>
+
+void pgraph_gl_init_display(NV2AState *d)
+{
+    struct PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    glo_set_current(g_nv2a_context_display);
+
+    glGenTextures(1, &r->gl_display_buffer);
+    r->gl_display_buffer_internal_format = 0;
+    r->gl_display_buffer_width = 0;
+    r->gl_display_buffer_height = 0;
+    r->gl_display_buffer_format = 0;
+    r->gl_display_buffer_type = 0;
+
+    const char *vs =
+        "#version 330\n"
+        "void main()\n"
+        "{\n"
+        "    float x = -1.0 + float((gl_VertexID & 1) << 2);\n"
+        "    float y = -1.0 + float((gl_VertexID & 2) << 1);\n"
+        "    gl_Position = vec4(x, y, 0, 1);\n"
+        "}\n";
+    /* FIXME: improve interlace handling, pvideo */
+
+    const char *fs =
+        "#version 330\n"
+        "uniform sampler2D tex;\n"
+        "uniform bool pvideo_enable;\n"
+        "uniform sampler2D pvideo_tex;\n"
+        "uniform vec2 pvideo_in_pos;\n"
+        "uniform vec4 pvideo_pos;\n"
+        "uniform vec3 pvideo_scale;\n"
+        "uniform bool pvideo_color_key_enable;\n"
+        "uniform vec4 pvideo_color_key;\n"
+        "uniform vec2 display_size;\n"
+        "uniform float line_offset;\n"
+        "layout(location = 0) out vec4 out_Color;\n"
+        "void main()\n"
+        "{\n"
+        "    vec2 texCoord = gl_FragCoord.xy/display_size;\n"
+        "    float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
+        "    texCoord.y = 1 + rel*(texCoord.y - 1);"
+        "    out_Color.rgba = texture(tex, texCoord);\n"
+        "    if (pvideo_enable) {\n"
+        "        vec2 screenCoord = gl_FragCoord.xy - 0.5;\n"
+        "        vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n"
+        "        bvec4 clip = bvec4(lessThan(screenCoord, output_region.xy),\n"
+        "                           greaterThan(screenCoord, output_region.zw));\n"
+        "        if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n"
+        "            vec2 out_xy = (screenCoord - pvideo_pos.xy) * pvideo_scale.z;\n"
+        "            vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n"
+        "            in_st.y *= -1.0;\n"
+        "            out_Color.rgba = texture(pvideo_tex, in_st);\n"
+        "        }\n"
+        "    }\n"
+        "}\n";
+
+    r->disp_rndr.prog = pgraph_gl_compile_shader(vs, fs);
+    r->disp_rndr.tex_loc = glGetUniformLocation(r->disp_rndr.prog, "tex");
+    r->disp_rndr.pvideo_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_enable");
+    r->disp_rndr.pvideo_tex_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_tex");
+    r->disp_rndr.pvideo_in_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_in_pos");
+    r->disp_rndr.pvideo_pos_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_pos");
+    r->disp_rndr.pvideo_scale_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_scale");
+    r->disp_rndr.pvideo_color_key_enable_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key_enable");
+    r->disp_rndr.pvideo_color_key_loc = glGetUniformLocation(r->disp_rndr.prog, "pvideo_color_key");
+    r->disp_rndr.display_size_loc = glGetUniformLocation(r->disp_rndr.prog, "display_size");
+    r->disp_rndr.line_offset_loc = glGetUniformLocation(r->disp_rndr.prog, "line_offset");
+
+    glGenVertexArrays(1, &r->disp_rndr.vao);
+    glBindVertexArray(r->disp_rndr.vao);
+    glGenBuffers(1, &r->disp_rndr.vbo);
+    glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo);
+    glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
+    glGenFramebuffers(1, &r->disp_rndr.fbo);
+    glGenTextures(1, &r->disp_rndr.pvideo_tex);
+    assert(glGetError() == GL_NO_ERROR);
+
+    glo_set_current(g_nv2a_context_render);
+}
+
+void pgraph_gl_finalize_display(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    glo_set_current(g_nv2a_context_display);
+
+    glDeleteTextures(1, &r->gl_display_buffer);
+    r->gl_display_buffer = 0;
+
+    glDeleteProgram(r->disp_rndr.prog);
+    r->disp_rndr.prog = 0;
+
+    glDeleteVertexArrays(1, &r->disp_rndr.vao);
+    r->disp_rndr.vao = 0;
+
+    glDeleteBuffers(1, &r->disp_rndr.vbo);
+    r->disp_rndr.vbo = 0;
+
+    glDeleteFramebuffers(1, &r->disp_rndr.fbo);
+    r->disp_rndr.fbo = 0;
+
+    glDeleteTextures(1, &r->disp_rndr.pvideo_tex);
+    r->disp_rndr.pvideo_tex = 0;
+
+    glo_set_current(g_nv2a_context_render);
+}
+
+static uint8_t *convert_texture_data__CR8YB8CB8YA8(const uint8_t *data,
+                                                   unsigned int width,
+                                                   unsigned int height,
+                                                   unsigned int pitch)
+{
+    uint8_t *converted_data = (uint8_t *)g_malloc(width * height * 4);
+    int x, y;
+    for (y = 0; y < height; y++) {
+        const uint8_t *line = &data[y * pitch];
+        const uint32_t row_offset = y * width;
+        for (x = 0; x < width; x++) {
+            uint8_t *pixel = &converted_data[(row_offset + x) * 4];
+            convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
+            pixel[3] = 255;
+        }
+    }
+    return converted_data;
+}
+
+static float pvideo_calculate_scale(unsigned int din_dout,
+                                           unsigned int output_size)
+{
+    float calculated_in = din_dout * (output_size - 1);
+    calculated_in = floorf(calculated_in / (1 << 20) + 0.5f);
+    return (calculated_in + 1.0f) / output_size;
+}
+
+static void render_display_pvideo_overlay(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior.
+    // Many games seem to pass this value when initializing or tearing down
+    // PVIDEO. On its own, this generally does not result in the overlay being
+    // hidden, however there are certain games (e.g., Ultimate Beach Soccer)
+    // that use an unknown mechanism to hide the overlay without explicitly
+    // stopping it.
+    // Since the value seems to be set to 0xFFFFFFFF only in cases where the
+    // content is not valid, it is probably good enough to treat it as an
+    // implicit stop.
+    bool enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)
+        && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF;
+    glUniform1ui(r->disp_rndr.pvideo_enable_loc, enabled);
+    if (!enabled) {
+        return;
+    }
+
+    hwaddr base = d->pvideo.regs[NV_PVIDEO_BASE];
+    hwaddr limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
+    hwaddr offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
+
+    int in_width =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH);
+    int in_height =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT);
+
+    int in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+                        NV_PVIDEO_POINT_IN_S);
+    int in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+                        NV_PVIDEO_POINT_IN_T);
+
+    int in_pitch =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH);
+    int in_color =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR);
+
+    unsigned int out_width =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH);
+    unsigned int out_height =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT);
+
+    float scale_x = 1.0f;
+    float scale_y = 1.0f;
+    unsigned int ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX];
+    unsigned int dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY];
+    if (ds_dx != NV_PVIDEO_DIN_DOUT_UNITY) {
+        scale_x = pvideo_calculate_scale(ds_dx, out_width);
+    }
+    if (dt_dy != NV_PVIDEO_DIN_DOUT_UNITY) {
+        scale_y = pvideo_calculate_scale(dt_dy, out_height);
+    }
+
+    // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results
+    // in them being capped to the output size, content is not scaled. This is
+    // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF
+    // during initialization or teardown.
+    if (in_width > out_width) {
+        in_width = floorf((float)out_width * scale_x + 0.5f);
+    }
+    if (in_height > out_height) {
+        in_height = floorf((float)out_height * scale_y + 0.5f);
+    }
+
+    /* TODO: support other color formats */
+    assert(in_color == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
+
+    unsigned int out_x =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X);
+    unsigned int out_y =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y);
+
+    unsigned int color_key_enabled =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY);
+    glUniform1ui(r->disp_rndr.pvideo_color_key_enable_loc,
+                 color_key_enabled);
+
+    // TODO: Verify that masking off the top byte is correct.
+    // SeaBlade sets a color key of 0x80000000 but the texture passed into the
+    // shader is cleared to 0 alpha.
+    unsigned int color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF;
+    glUniform4f(r->disp_rndr.pvideo_color_key_loc,
+                GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0,
+                GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0,
+                GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0,
+                GET_MASK(color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0);
+
+    assert(offset + in_pitch * in_height <= limit);
+    hwaddr end = base + offset + in_pitch * in_height;
+    assert(end <= memory_region_size(d->vram));
+
+    pgraph_apply_scaling_factor(pg, &out_x, &out_y);
+    pgraph_apply_scaling_factor(pg, &out_width, &out_height);
+
+    // Translate for the GL viewport origin.
+    out_y = MAX(r->gl_display_buffer_height - 1 - (int)(out_y + out_height), 0);
+
+    glActiveTexture(GL_TEXTURE0 + 1);
+    glBindTexture(GL_TEXTURE_2D, r->disp_rndr.pvideo_tex);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    uint8_t *tex_rgba = convert_texture_data__CR8YB8CB8YA8(
+        d->vram_ptr + base + offset, in_width, in_height, in_pitch);
+    glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, in_width, in_height, 0, GL_RGBA,
+                 GL_UNSIGNED_BYTE, tex_rgba);
+    g_free(tex_rgba);
+    glUniform1i(r->disp_rndr.pvideo_tex_loc, 1);
+    glUniform2f(r->disp_rndr.pvideo_in_pos_loc, in_s, in_t);
+    glUniform4f(r->disp_rndr.pvideo_pos_loc,
+                out_x, out_y, out_width, out_height);
+    glUniform3f(r->disp_rndr.pvideo_scale_loc,
+                scale_x, scale_y, 1.0f / pg->surface_scale_factor);
+}
+
+static void render_display(NV2AState *d, SurfaceBinding *surface)
+{
+    struct PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    unsigned int width, height;
+    uint32_t pline_offset, pstart_addr, pline_compare;
+    d->vga.get_resolution(&d->vga, (int*)&width, (int*)&height);
+    d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+    int line_offset = surface->pitch / pline_offset;
+
+    /* Adjust viewport height for interlaced mode, used only in 1080i */
+    if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
+        height *= 2;
+    }
+
+    pgraph_apply_scaling_factor(pg, &width, &height);
+
+    glBindFramebuffer(GL_FRAMEBUFFER, r->disp_rndr.fbo);
+    glActiveTexture(GL_TEXTURE0);
+    glBindTexture(GL_TEXTURE_2D, r->gl_display_buffer);
+    bool recreate = (
+        surface->fmt.gl_internal_format != r->gl_display_buffer_internal_format
+        || width != r->gl_display_buffer_width
+        || height != r->gl_display_buffer_height
+        || surface->fmt.gl_format != r->gl_display_buffer_format
+        || surface->fmt.gl_type != r->gl_display_buffer_type
+        );
+
+    if (recreate) {
+        /* XXX: There's apparently a bug in some Intel OpenGL drivers for
+         * Windows that will leak this texture when its orphaned after use in
+         * another context, apparently regardless of which thread it's created
+         * or released on.
+         *
+         * Driver: 27.20.100.8729 9/11/2020 W10 x64
+         * Track: https://community.intel.com/t5/Graphics/OpenGL-Windows-drivers-for-Intel-HD-630-leaking-GPU-memory-when/td-p/1274423
+         */
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+        glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        r->gl_display_buffer_internal_format = surface->fmt.gl_internal_format;
+        r->gl_display_buffer_width = width;
+        r->gl_display_buffer_height = height;
+        r->gl_display_buffer_format = surface->fmt.gl_format;
+        r->gl_display_buffer_type = surface->fmt.gl_type;
+        glTexImage2D(GL_TEXTURE_2D, 0,
+            r->gl_display_buffer_internal_format,
+            r->gl_display_buffer_width,
+            r->gl_display_buffer_height,
+            0,
+            r->gl_display_buffer_format,
+            r->gl_display_buffer_type,
+            NULL);
+    }
+
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+        GL_TEXTURE_2D, r->gl_display_buffer, 0);
+    GLenum DrawBuffers[1] = {GL_COLOR_ATTACHMENT0};
+    glDrawBuffers(1, DrawBuffers);
+    assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
+
+    glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
+    glBindVertexArray(r->disp_rndr.vao);
+    glBindBuffer(GL_ARRAY_BUFFER, r->disp_rndr.vbo);
+    glUseProgram(r->disp_rndr.prog);
+    glProgramUniform1i(r->disp_rndr.prog, r->disp_rndr.tex_loc, 0);
+    glUniform2f(r->disp_rndr.display_size_loc, width, height);
+    glUniform1f(r->disp_rndr.line_offset_loc, line_offset);
+    render_display_pvideo_overlay(d);
+
+    glViewport(0, 0, width, height);
+    glColorMask(true, true, true, true);
+    glDisable(GL_SCISSOR_TEST);
+    glDisable(GL_BLEND);
+    glDisable(GL_STENCIL_TEST);
+    glDisable(GL_CULL_FACE);
+    glDisable(GL_DEPTH_TEST);
+    glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+    glClearColor(0.0f, 0.0f, 0.0f, 1.0f);
+    glClear(GL_COLOR_BUFFER_BIT);
+    glDrawArrays(GL_TRIANGLES, 0, 3);
+
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+        GL_TEXTURE_2D, 0, 0);
+}
+
+static void gl_fence(void)
+{
+    GLsync fence = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+    int result = glClientWaitSync(fence, GL_SYNC_FLUSH_COMMANDS_BIT,
+                                         (GLuint64)(5000000000));
+    assert(result == GL_CONDITION_SATISFIED || result == GL_ALREADY_SIGNALED);
+    glDeleteSync(fence);
+}
+
+void pgraph_gl_sync(NV2AState *d)
+{
+    uint32_t pline_offset, pstart_addr, pline_compare;
+    d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+    SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset);
+    if (surface == NULL) {
+        qemu_event_set(&d->pgraph.sync_complete);
+        return;
+    }
+
+    /* FIXME: Sanity check surface dimensions */
+
+    /* Wait for queued commands to complete */
+    pgraph_gl_upload_surface_data(d, surface, !tcg_enabled());
+    gl_fence();
+    assert(glGetError() == GL_NO_ERROR);
+
+    /* Render framebuffer in display context */
+    glo_set_current(g_nv2a_context_display);
+    render_display(d, surface);
+    gl_fence();
+    assert(glGetError() == GL_NO_ERROR);
+
+    /* Switch back to original context */
+    glo_set_current(g_nv2a_context_render);
+
+    qatomic_set(&d->pgraph.sync_pending, false);
+    qemu_event_set(&d->pgraph.sync_complete);
+}
+
+int pgraph_gl_get_framebuffer_surface(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    qemu_mutex_lock(&d->pfifo.lock);
+    // FIXME: Possible race condition with pgraph, consider lock
+    uint32_t pline_offset, pstart_addr, pline_compare;
+    d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+    SurfaceBinding *surface = pgraph_gl_surface_get_within(d, d->pcrtc.start + pline_offset);
+    if (surface == NULL || !surface->color) {
+        qemu_mutex_unlock(&d->pfifo.lock);
+        return 0;
+    }
+
+    assert(surface->color);
+    assert(surface->fmt.gl_attachment == GL_COLOR_ATTACHMENT0);
+    assert(surface->fmt.gl_format == GL_RGBA
+        || surface->fmt.gl_format == GL_RGB
+        || surface->fmt.gl_format == GL_BGR
+        || surface->fmt.gl_format == GL_BGRA
+        );
+
+    surface->frame_time = pg->frame_time;
+    qemu_event_reset(&d->pgraph.sync_complete);
+    qatomic_set(&pg->sync_pending, true);
+    pfifo_kick(d);
+    qemu_mutex_unlock(&d->pfifo.lock);
+    qemu_event_wait(&d->pgraph.sync_complete);
+
+    return r->gl_display_buffer;
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/draw.c b/hw/xbox/nv2a/pgraph/gl/draw.c
new file mode 100644
index 00000000000..94e9beb50b0
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/draw.c
@@ -0,0 +1,528 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/fast-hash.h"
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "debug.h"
+#include "renderer.h"
+
+void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    NV2A_DPRINTF("---------PRE CLEAR ------\n");
+    pg->clearing = true;
+
+    GLbitfield gl_mask = 0;
+
+    bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
+    bool write_zeta =
+        (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
+
+    if (write_zeta) {
+        GLint gl_clear_stencil;
+        GLfloat gl_clear_depth;
+        pgraph_get_clear_depth_stencil_value(pg, &gl_clear_depth,
+                                             &gl_clear_stencil);
+
+        if (parameter & NV097_CLEAR_SURFACE_Z) {
+            gl_mask |= GL_DEPTH_BUFFER_BIT;
+            glDepthMask(GL_TRUE);
+            glClearDepth(gl_clear_depth);
+        }
+        if (parameter & NV097_CLEAR_SURFACE_STENCIL) {
+            gl_mask |= GL_STENCIL_BUFFER_BIT;
+            glStencilMask(0xff);
+            glClearStencil(gl_clear_stencil);
+        }
+    }
+    if (write_color) {
+        gl_mask |= GL_COLOR_BUFFER_BIT;
+        glColorMask((parameter & NV097_CLEAR_SURFACE_R)
+                         ? GL_TRUE : GL_FALSE,
+                    (parameter & NV097_CLEAR_SURFACE_G)
+                         ? GL_TRUE : GL_FALSE,
+                    (parameter & NV097_CLEAR_SURFACE_B)
+                         ? GL_TRUE : GL_FALSE,
+                    (parameter & NV097_CLEAR_SURFACE_A)
+                         ? GL_TRUE : GL_FALSE);
+
+        GLfloat rgba[4];
+        pgraph_get_clear_color(pg, rgba);
+        glClearColor(rgba[0], rgba[1], rgba[2], rgba[3]);
+    }
+
+    pgraph_gl_surface_update(d, true, write_color, write_zeta);
+
+    /* FIXME: Needs confirmation */
+    unsigned int xmin =
+        GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMIN);
+    unsigned int xmax =
+        GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX), NV_PGRAPH_CLEARRECTX_XMAX);
+    unsigned int ymin =
+        GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMIN);
+    unsigned int ymax =
+        GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY), NV_PGRAPH_CLEARRECTY_YMAX);
+
+    NV2A_DPRINTF(
+        "------------------CLEAR 0x%x %d,%d - %d,%d  %x---------------\n",
+        parameter, xmin, ymin, xmax, ymax,
+        d->pgraph.regs_[NV_PGRAPH_COLORCLEARVALUE]);
+
+    unsigned int scissor_width = xmax - xmin + 1,
+                 scissor_height = ymax - ymin + 1;
+    pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
+    pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
+    ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
+
+    NV2A_DPRINTF("Translated clear rect to %d,%d - %d,%d\n", xmin, ymin,
+                 xmin + scissor_width - 1, ymin + scissor_height - 1);
+
+    bool full_clear = !xmin && !ymin &&
+                      scissor_width >= pg->surface_binding_dim.width &&
+                      scissor_height >= pg->surface_binding_dim.height;
+
+    pgraph_apply_scaling_factor(pg, &xmin, &ymin);
+    pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
+
+    /* FIXME: Respect window clip?!?! */
+    glEnable(GL_SCISSOR_TEST);
+    glScissor(xmin, ymin, scissor_width, scissor_height);
+
+    /* Dither */
+    /* FIXME: Maybe also disable it here? + GL implementation dependent */
+    if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_DITHERENABLE) {
+        glEnable(GL_DITHER);
+    } else {
+        glDisable(GL_DITHER);
+    }
+
+    glClear(gl_mask);
+
+    glDisable(GL_SCISSOR_TEST);
+
+    pgraph_gl_set_surface_dirty(pg, write_color, write_zeta);
+
+    if (r->color_binding) {
+        r->color_binding->cleared = full_clear && write_color;
+    }
+    if (r->zeta_binding) {
+        r->zeta_binding->cleared = full_clear && write_zeta;
+    }
+    
+    pg->clearing = false;
+}
+
+void pgraph_gl_draw_begin(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    NV2A_GL_DGROUP_BEGIN("NV097_SET_BEGIN_END: 0x%x", pg->primitive_mode);
+
+    uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
+    bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
+    bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
+    bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
+    bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
+    bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
+    bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
+    bool stencil_test =
+        pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+    bool is_nop_draw = !(color_write || depth_test || stencil_test);
+
+    pgraph_gl_surface_update(d, true, true, depth_test || stencil_test);
+
+    if (is_nop_draw) {
+        return;
+    }
+
+    assert(r->color_binding || r->zeta_binding);
+
+    pgraph_gl_bind_textures(d);
+    pgraph_gl_bind_shaders(pg);
+
+    glColorMask(mask_red, mask_green, mask_blue, mask_alpha);
+    glDepthMask(!!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE));
+    glStencilMask(GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+                           NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE));
+
+    if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) {
+        glEnable(GL_BLEND);
+        uint32_t sfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
+                                    NV_PGRAPH_BLEND_SFACTOR);
+        uint32_t dfactor = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
+                                    NV_PGRAPH_BLEND_DFACTOR);
+        assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map));
+        assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_gl_map));
+        glBlendFunc(pgraph_blend_factor_gl_map[sfactor],
+                    pgraph_blend_factor_gl_map[dfactor]);
+
+        uint32_t equation = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND),
+                                     NV_PGRAPH_BLEND_EQN);
+        assert(equation < ARRAY_SIZE(pgraph_blend_equation_gl_map));
+        glBlendEquation(pgraph_blend_equation_gl_map[equation]);
+
+        uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR);
+        float gl_blend_color[4];
+        pgraph_argb_pack32_to_rgba_float(blend_color, gl_blend_color);
+        glBlendColor(gl_blend_color[0], gl_blend_color[1], gl_blend_color[2],
+                     gl_blend_color[3]);
+    } else {
+        glDisable(GL_BLEND);
+    }
+
+    /* Face culling */
+    if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER)
+            & NV_PGRAPH_SETUPRASTER_CULLENABLE) {
+        uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
+                                      NV_PGRAPH_SETUPRASTER_CULLCTRL);
+        assert(cull_face < ARRAY_SIZE(pgraph_cull_face_gl_map));
+        glCullFace(pgraph_cull_face_gl_map[cull_face]);
+        glEnable(GL_CULL_FACE);
+    } else {
+        glDisable(GL_CULL_FACE);
+    }
+
+    /* Clipping */
+    glEnable(GL_CLIP_DISTANCE0);
+    glEnable(GL_CLIP_DISTANCE1);
+
+    /* Front-face select */
+    glFrontFace(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER)
+                    & NV_PGRAPH_SETUPRASTER_FRONTFACE
+                        ? GL_CCW : GL_CW);
+
+    /* Polygon offset */
+    /* FIXME: GL implementation-specific, maybe do this in VS? */
+    if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+            NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE) {
+        glEnable(GL_POLYGON_OFFSET_FILL);
+    } else {
+        glDisable(GL_POLYGON_OFFSET_FILL);
+    }
+    if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+            NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE) {
+        glEnable(GL_POLYGON_OFFSET_LINE);
+    } else {
+        glDisable(GL_POLYGON_OFFSET_LINE);
+    }
+    if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+            NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE) {
+        glEnable(GL_POLYGON_OFFSET_POINT);
+    } else {
+        glDisable(GL_POLYGON_OFFSET_POINT);
+    }
+    if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+            (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
+             NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
+             NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
+        uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR);
+        GLfloat zfactor = *(float*)&zfactor_u32;
+        uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
+        GLfloat zbias = *(float*)&zbias_u32;
+        glPolygonOffset(zfactor, zbias);
+    }
+
+    /* Depth testing */
+    if (depth_test) {
+        glEnable(GL_DEPTH_TEST);
+
+        uint32_t depth_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
+                                       NV_PGRAPH_CONTROL_0_ZFUNC);
+        assert(depth_func < ARRAY_SIZE(pgraph_depth_func_gl_map));
+        glDepthFunc(pgraph_depth_func_gl_map[depth_func]);
+    } else {
+        glDisable(GL_DEPTH_TEST);
+    }
+
+    if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE),
+                 NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
+        NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) {
+        glEnable(GL_DEPTH_CLAMP);
+    } else {
+        glDisable(GL_DEPTH_CLAMP);
+    }
+
+    if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
+                 NV_PGRAPH_CONTROL_3_SHADEMODE) ==
+        NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT) {
+        glProvokingVertex(GL_FIRST_VERTEX_CONVENTION);
+    }
+
+    if (stencil_test) {
+        glEnable(GL_STENCIL_TEST);
+
+        uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+                                    NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
+        uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+                                    NV_PGRAPH_CONTROL_1_STENCIL_REF);
+        uint32_t func_mask = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+                                NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
+        uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+                                NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
+        uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+                                NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
+        uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+                                NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
+
+        assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_gl_map));
+        assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_gl_map));
+        assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_gl_map));
+        assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_gl_map));
+
+        glStencilFunc(
+            pgraph_stencil_func_gl_map[stencil_func],
+            stencil_ref,
+            func_mask);
+
+        glStencilOp(
+            pgraph_stencil_op_gl_map[op_fail],
+            pgraph_stencil_op_gl_map[op_zfail],
+            pgraph_stencil_op_gl_map[op_zpass]);
+
+    } else {
+        glDisable(GL_STENCIL_TEST);
+    }
+
+    /* Dither */
+    /* FIXME: GL implementation dependent */
+    if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
+            NV_PGRAPH_CONTROL_0_DITHERENABLE) {
+        glEnable(GL_DITHER);
+    } else {
+        glDisable(GL_DITHER);
+    }
+
+    glEnable(GL_PROGRAM_POINT_SIZE);
+
+    bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING), NV_PGRAPH_ANTIALIASING_ENABLE);
+
+    /* Edge Antialiasing */
+    if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+                              NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) {
+        glEnable(GL_LINE_SMOOTH);
+    } else {
+        glDisable(GL_LINE_SMOOTH);
+    }
+    if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+                              NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) {
+        glEnable(GL_POLYGON_SMOOTH);
+    } else {
+        glDisable(GL_POLYGON_SMOOTH);
+    }
+
+    unsigned int vp_width = pg->surface_binding_dim.width,
+                 vp_height = pg->surface_binding_dim.height;
+    pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
+    glViewport(0, 0, vp_width, vp_height);
+
+    /* Surface clip */
+    /* FIXME: Consider moving to PSH w/ window clip */
+    unsigned int xmin = pg->surface_shape.clip_x - pg->surface_binding_dim.clip_x,
+                 ymin = pg->surface_shape.clip_y - pg->surface_binding_dim.clip_y;
+    unsigned int xmax = xmin + pg->surface_shape.clip_width - 1,
+                 ymax = ymin + pg->surface_shape.clip_height - 1;
+
+    unsigned int scissor_width = xmax - xmin + 1,
+                 scissor_height = ymax - ymin + 1;
+    pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
+    pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
+    ymin = pg->surface_binding_dim.height - (ymin + scissor_height);
+    pgraph_apply_scaling_factor(pg, &xmin, &ymin);
+    pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
+
+    glEnable(GL_SCISSOR_TEST);
+    glScissor(xmin, ymin, scissor_width, scissor_height);
+
+    /* Visibility testing */
+    if (pg->zpass_pixel_count_enable) {
+        r->gl_zpass_pixel_count_query_count++;
+        r->gl_zpass_pixel_count_queries = (GLuint*)g_realloc(
+            r->gl_zpass_pixel_count_queries,
+            sizeof(GLuint) * r->gl_zpass_pixel_count_query_count);
+
+        GLuint gl_query;
+        glGenQueries(1, &gl_query);
+        r->gl_zpass_pixel_count_queries[
+            r->gl_zpass_pixel_count_query_count - 1] = gl_query;
+        glBeginQuery(GL_SAMPLES_PASSED, gl_query);
+    }
+}
+
+void pgraph_gl_draw_end(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
+    bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
+    bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
+    bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
+    bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
+    bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
+    bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
+    bool stencil_test =
+        pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+    bool is_nop_draw = !(color_write || depth_test || stencil_test);
+
+    if (is_nop_draw) {
+        // FIXME: Check PGRAPH register 0x880.
+        // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit
+        // check that will raise an exception in the case that a draw should
+        // modify the color and/or zeta buffer but the target(s) are masked
+        // off. This check only seems to trigger during the fragment
+        // processing, it is legal to attempt a draw that is entirely
+        // clipped regardless of 0x880. See xemu#635 for context.
+        return;
+    }
+
+    pgraph_gl_flush_draw(d);
+
+    /* End of visibility testing */
+    if (pg->zpass_pixel_count_enable) {
+        nv2a_profile_inc_counter(NV2A_PROF_QUERY);
+        glEndQuery(GL_SAMPLES_PASSED);
+    }
+
+    pg->draw_time++;
+    if (r->color_binding && pgraph_color_write_enabled(pg)) {
+        r->color_binding->draw_time = pg->draw_time;
+    }
+    if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) {
+        r->zeta_binding->draw_time = pg->draw_time;
+    }
+
+    pgraph_gl_set_surface_dirty(pg, color_write, depth_test || stencil_test);
+    NV2A_GL_DGROUP_END();
+}
+
+void pgraph_gl_flush_draw(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    if (!(r->color_binding || r->zeta_binding)) {
+        return;
+    }
+    assert(r->shader_binding);
+
+    if (pg->draw_arrays_length) {
+        NV2A_GL_DPRINTF(false, "Draw Arrays");
+        nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS);
+        assert(pg->inline_elements_length == 0);
+        assert(pg->inline_buffer_length == 0);
+        assert(pg->inline_array_length == 0);
+
+        pgraph_gl_bind_vertex_attributes(d, pg->draw_arrays_min_start,
+                                      pg->draw_arrays_max_count - 1,
+                                      false, 0,
+                                      pg->draw_arrays_max_count - 1);
+        glMultiDrawArrays(r->shader_binding->gl_primitive_mode,
+                          pg->draw_arrays_start,
+                          pg->draw_arrays_count,
+                          pg->draw_arrays_length);
+    } else if (pg->inline_elements_length) {
+        NV2A_GL_DPRINTF(false, "Inline Elements");
+        nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS);
+        assert(pg->inline_buffer_length == 0);
+        assert(pg->inline_array_length == 0);
+
+        uint32_t min_element = (uint32_t)-1;
+        uint32_t max_element = 0;
+        for (int i=0; i < pg->inline_elements_length; i++) {
+            max_element = MAX(pg->inline_elements[i], max_element);
+            min_element = MIN(pg->inline_elements[i], min_element);
+        }
+
+        pgraph_gl_bind_vertex_attributes(
+                d, min_element, max_element, false, 0,
+                pg->inline_elements[pg->inline_elements_length - 1]);
+
+        VertexKey k;
+        memset(&k, 0, sizeof(VertexKey));
+        k.count = pg->inline_elements_length;
+        k.gl_type = GL_UNSIGNED_INT;
+        k.gl_normalize = GL_FALSE;
+        k.stride = sizeof(uint32_t);
+        uint64_t h = fast_hash((uint8_t*)pg->inline_elements,
+                               pg->inline_elements_length * 4);
+
+        LruNode *node = lru_lookup(&r->element_cache, h, &k);
+        VertexLruNode *found = container_of(node, VertexLruNode, node);
+        glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, found->gl_buffer);
+        if (!found->initialized) {
+            nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4);
+            glBufferData(GL_ELEMENT_ARRAY_BUFFER,
+                         pg->inline_elements_length * 4,
+                         pg->inline_elements, GL_STATIC_DRAW);
+            found->initialized = true;
+        } else {
+            nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_4_NOTDIRTY);
+        }
+        glDrawElements(r->shader_binding->gl_primitive_mode,
+                       pg->inline_elements_length, GL_UNSIGNED_INT,
+                       (void *)0);
+    } else if (pg->inline_buffer_length) {
+        NV2A_GL_DPRINTF(false, "Inline Buffer");
+        nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS);
+        assert(pg->inline_array_length == 0);
+
+        if (pg->compressed_attrs) {
+            pg->compressed_attrs = 0;
+            pgraph_gl_bind_shaders(pg);
+        }
+
+        for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+            VertexAttribute *attr = &pg->vertex_attributes[i];
+            if (attr->inline_buffer_populated) {
+                nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3);
+                glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_buffer[i]);
+                glBufferData(GL_ARRAY_BUFFER,
+                             pg->inline_buffer_length * sizeof(float) * 4,
+                             attr->inline_buffer, GL_STREAM_DRAW);
+                glVertexAttribPointer(i, 4, GL_FLOAT, GL_FALSE, 0, 0);
+                glEnableVertexAttribArray(i);
+                attr->inline_buffer_populated = false;
+                memcpy(attr->inline_value,
+                       attr->inline_buffer + (pg->inline_buffer_length - 1) * 4,
+                       sizeof(attr->inline_value));
+            } else {
+                glDisableVertexAttribArray(i);
+                glVertexAttrib4fv(i, attr->inline_value);
+            }
+        }
+
+        glDrawArrays(r->shader_binding->gl_primitive_mode,
+                     0, pg->inline_buffer_length);
+    } else if (pg->inline_array_length) {
+        NV2A_GL_DPRINTF(false, "Inline Array");
+        nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS);
+
+        unsigned int index_count = pgraph_gl_bind_inline_array(d);
+        glDrawArrays(r->shader_binding->gl_primitive_mode,
+                     0, index_count);
+    } else {
+        NV2A_GL_DPRINTF(true, "EMPTY NV097_SET_BEGIN_END");
+        NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END");
+    }
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/meson.build b/hw/xbox/nv2a/pgraph/gl/meson.build
new file mode 100644
index 00000000000..ab25eacb7dd
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/meson.build
@@ -0,0 +1,12 @@
+specific_ss.add([sdl, gloffscreen, files(
+	'blit.c',
+	'debug.c',
+	'display.c',
+	'draw.c',
+	'renderer.c',
+	'reports.c',
+	'shaders.c',
+	'surface.c',
+	'texture.c',
+	'vertex.c',
+	)])
diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.c b/hw/xbox/nv2a/pgraph/gl/renderer.c
new file mode 100644
index 00000000000..9e22a80f6d5
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/renderer.c
@@ -0,0 +1,200 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/pgraph/pgraph.h"
+#include "debug.h"
+#include "renderer.h"
+
+GloContext *g_nv2a_context_render;
+GloContext *g_nv2a_context_display;
+
+static void early_context_init(void)
+{
+    g_nv2a_context_render = glo_context_create();
+    g_nv2a_context_display = glo_context_create();
+}
+
+static void pgraph_gl_init(NV2AState *d, Error **errp)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    pg->gl_renderer_state = g_malloc0(sizeof(*pg->gl_renderer_state));
+
+    /* fire up opengl */
+    glo_set_current(g_nv2a_context_render);
+
+#ifdef DEBUG_NV2A_GL
+    gl_debug_initialize();
+#endif
+
+    /* DXT textures */
+    assert(glo_check_extension("GL_EXT_texture_compression_s3tc"));
+    /*  Internal RGB565 texture format */
+    assert(glo_check_extension("GL_ARB_ES2_compatibility"));
+
+    pgraph_gl_init_surfaces(pg);
+    pgraph_gl_init_reports(d);
+    pgraph_gl_init_textures(d);
+    pgraph_gl_init_buffers(d);
+    pgraph_gl_init_shaders(pg);
+    pgraph_gl_init_display(d);
+
+    pgraph_gl_update_entire_memory_buffer(d);
+
+    pg->uniform_attrs = 0;
+    pg->swizzle_attrs = 0;
+}
+
+static void pgraph_gl_finalize(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    glo_set_current(g_nv2a_context_render);
+
+    pgraph_gl_finalize_surfaces(pg);
+    pgraph_gl_finalize_shaders(pg);
+    pgraph_gl_finalize_textures(pg);
+    pgraph_gl_finalize_reports(pg);
+    pgraph_gl_finalize_buffers(pg);
+    pgraph_gl_finalize_display(pg);
+
+    glo_set_current(NULL);
+
+    g_free(pg->gl_renderer_state);
+    pg->gl_renderer_state = NULL;
+}
+
+static void pgraph_gl_flip_stall(NV2AState *d)
+{
+    NV2A_GL_DFRAME_TERMINATOR();
+    glFinish();
+}
+
+static void pgraph_gl_flush(NV2AState *d)
+{
+    pgraph_gl_surface_flush(d);
+    pgraph_gl_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram));
+    pgraph_gl_update_entire_memory_buffer(d);
+    /* FIXME: Flush more? */
+
+    qatomic_set(&d->pgraph.flush_pending, false);
+    qemu_event_set(&d->pgraph.flush_complete);
+}
+
+static void pgraph_gl_process_pending(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    if (qatomic_read(&r->downloads_pending) ||
+        qatomic_read(&r->download_dirty_surfaces_pending) ||
+        qatomic_read(&d->pgraph.sync_pending) ||
+        qatomic_read(&d->pgraph.flush_pending) ||
+        qatomic_read(&r->shader_cache_writeback_pending)) {
+        qemu_mutex_unlock(&d->pfifo.lock);
+        qemu_mutex_lock(&d->pgraph.lock);
+        if (qatomic_read(&r->downloads_pending)) {
+            pgraph_gl_process_pending_downloads(d);
+        }
+        if (qatomic_read(&r->download_dirty_surfaces_pending)) {
+            pgraph_gl_download_dirty_surfaces(d);
+        }
+        if (qatomic_read(&d->pgraph.sync_pending)) {
+            pgraph_gl_sync(d);
+        }
+        if (qatomic_read(&d->pgraph.flush_pending)) {
+            pgraph_gl_flush(d);
+        }
+        if (qatomic_read(&r->shader_cache_writeback_pending)) {
+            pgraph_gl_shader_write_cache_reload_list(&d->pgraph);
+        }
+        qemu_mutex_unlock(&d->pgraph.lock);
+        qemu_mutex_lock(&d->pfifo.lock);
+    }
+}
+
+static void pgraph_gl_pre_savevm_trigger(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    qatomic_set(&r->download_dirty_surfaces_pending, true);
+    qemu_event_reset(&r->dirty_surfaces_download_complete);
+}
+
+static void pgraph_gl_pre_savevm_wait(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    qemu_event_wait(&r->dirty_surfaces_download_complete);
+}
+
+static void pgraph_gl_pre_shutdown_trigger(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    qatomic_set(&r->shader_cache_writeback_pending, true);
+    qemu_event_reset(&r->shader_cache_writeback_complete);
+}
+
+static void pgraph_gl_pre_shutdown_wait(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    qemu_event_wait(&r->shader_cache_writeback_complete);
+}
+
+static PGRAPHRenderer pgraph_gl_renderer = {
+    .type = CONFIG_DISPLAY_RENDERER_OPENGL,
+    .name = "OpenGL",
+    .ops = {
+        .init = pgraph_gl_init,
+        .early_context_init = early_context_init,
+        .finalize = pgraph_gl_finalize,
+        .clear_report_value = pgraph_gl_clear_report_value,
+        .clear_surface = pgraph_gl_clear_surface,
+        .draw_begin = pgraph_gl_draw_begin,
+        .draw_end = pgraph_gl_draw_end,
+        .flip_stall = pgraph_gl_flip_stall,
+        .flush_draw = pgraph_gl_flush_draw,
+        .get_report = pgraph_gl_get_report,
+        .image_blit = pgraph_gl_image_blit,
+        .pre_savevm_trigger = pgraph_gl_pre_savevm_trigger,
+        .pre_savevm_wait = pgraph_gl_pre_savevm_wait,
+        .pre_shutdown_trigger = pgraph_gl_pre_shutdown_trigger,
+        .pre_shutdown_wait = pgraph_gl_pre_shutdown_wait,
+        .process_pending = pgraph_gl_process_pending,
+        .process_pending_reports = pgraph_gl_process_pending_reports,
+        .surface_update = pgraph_gl_surface_update,
+        .set_surface_scale_factor = pgraph_gl_set_surface_scale_factor,
+        .get_surface_scale_factor = pgraph_gl_get_surface_scale_factor,
+        .get_framebuffer_surface = pgraph_gl_get_framebuffer_surface,
+    }
+};
+
+static void __attribute__((constructor)) register_renderer(void)
+{
+    pgraph_renderer_register(&pgraph_gl_renderer);
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/renderer.h b/hw/xbox/nv2a/pgraph/gl/renderer.h
new file mode 100644
index 00000000000..14160b21b79
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/renderer.h
@@ -0,0 +1,286 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H
+#define HW_XBOX_NV2A_PGRAPH_GL_RENDERER_H
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/queue.h"
+#include "qemu/lru.h"
+
+#include "hw/hw.h"
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/nv2a_regs.h"
+#include "hw/xbox/nv2a/pgraph/surface.h"
+#include "hw/xbox/nv2a/pgraph/texture.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+#include "gloffscreen.h"
+#include "constants.h"
+
+typedef struct SurfaceBinding {
+    QTAILQ_ENTRY(SurfaceBinding) entry;
+    MemAccessCallback *access_cb;
+
+    hwaddr vram_addr;
+
+    SurfaceShape shape;
+    uintptr_t dma_addr;
+    uintptr_t dma_len;
+    bool color;
+    bool swizzle;
+
+    unsigned int width;
+    unsigned int height;
+    unsigned int pitch;
+    size_t size;
+
+    bool cleared;
+    int frame_time;
+    int draw_time;
+    bool draw_dirty;
+    bool download_pending;
+    bool upload_pending;
+
+    GLuint gl_buffer;
+    SurfaceFormatInfo fmt;
+} SurfaceBinding;
+
+typedef struct TextureBinding {
+    unsigned int refcnt;
+    int draw_time;
+    uint64_t data_hash;
+    unsigned int scale;
+    unsigned int min_filter;
+    unsigned int mag_filter;
+    unsigned int addru;
+    unsigned int addrv;
+    unsigned int addrp;
+    uint32_t border_color;
+    bool border_color_set;
+    GLenum gl_target;
+    GLuint gl_texture;
+} TextureBinding;
+
+typedef struct ShaderBinding {
+    GLuint gl_program;
+    GLenum gl_primitive_mode;
+
+    GLint psh_constant_loc[9][2];
+    GLint alpha_ref_loc;
+
+    GLint bump_mat_loc[NV2A_MAX_TEXTURES];
+    GLint bump_scale_loc[NV2A_MAX_TEXTURES];
+    GLint bump_offset_loc[NV2A_MAX_TEXTURES];
+    GLint tex_scale_loc[NV2A_MAX_TEXTURES];
+
+    GLint surface_size_loc;
+    GLint clip_range_loc;
+
+    GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
+    uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
+
+    GLint inv_viewport_loc;
+    GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
+    GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
+    GLint ltc1_loc[NV2A_LTC1_COUNT];
+
+    GLint fog_color_loc;
+    GLint fog_param_loc;
+    GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
+    GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS];
+    GLint light_local_position_loc[NV2A_MAX_LIGHTS];
+    GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS];
+
+    GLint clip_region_loc[8];
+
+    GLint material_alpha_loc;
+} ShaderBinding;
+
+typedef struct ShaderLruNode {
+    LruNode node;
+    bool cached;
+    void *program;
+    size_t program_size;
+    GLenum program_format;
+    ShaderState state;
+    ShaderBinding *binding;
+    QemuThread *save_thread;
+} ShaderLruNode;
+
+typedef struct VertexKey {
+    size_t count;
+    size_t stride;
+    hwaddr addr;
+
+    GLboolean gl_normalize;
+    GLuint gl_type;
+} VertexKey;
+
+typedef struct VertexLruNode {
+    LruNode node;
+    VertexKey key;
+    bool initialized;
+
+    GLuint gl_buffer;
+} VertexLruNode;
+
+typedef struct TextureKey {
+    TextureShape state;
+    hwaddr texture_vram_offset;
+    hwaddr texture_length;
+    hwaddr palette_vram_offset;
+    hwaddr palette_length;
+} TextureKey;
+
+typedef struct TextureLruNode {
+    LruNode node;
+    TextureKey key;
+    TextureBinding *binding;
+    bool possibly_dirty;
+} TextureLruNode;
+
+typedef struct QueryReport {
+    QSIMPLEQ_ENTRY(QueryReport) entry;
+    bool clear;
+    uint32_t parameter;
+    unsigned int query_count;
+    GLuint *queries;
+} QueryReport;
+
+typedef struct PGRAPHGLState {
+    GLuint gl_framebuffer;
+    GLuint gl_display_buffer;
+    GLint gl_display_buffer_internal_format;
+    GLsizei gl_display_buffer_width;
+    GLsizei gl_display_buffer_height;
+    GLenum gl_display_buffer_format;
+    GLenum gl_display_buffer_type;
+
+    Lru element_cache;
+    VertexLruNode *element_cache_entries;
+    GLuint gl_inline_array_buffer;
+    GLuint gl_memory_buffer;
+    GLuint gl_vertex_array;
+    GLuint gl_inline_buffer[NV2A_VERTEXSHADER_ATTRIBUTES];
+
+    QTAILQ_HEAD(, SurfaceBinding) surfaces;
+    SurfaceBinding *color_binding, *zeta_binding;
+    bool downloads_pending;
+    QemuEvent downloads_complete;
+    bool download_dirty_surfaces_pending;
+    QemuEvent dirty_surfaces_download_complete; // common
+
+    TextureBinding *texture_binding[NV2A_MAX_TEXTURES];
+    Lru texture_cache;
+    TextureLruNode *texture_cache_entries;
+
+    Lru shader_cache;
+    ShaderLruNode *shader_cache_entries;
+    ShaderBinding *shader_binding;
+    QemuMutex shader_cache_lock;
+    QemuThread shader_disk_thread;
+
+    unsigned int zpass_pixel_count_result;
+    unsigned int gl_zpass_pixel_count_query_count;
+    GLuint *gl_zpass_pixel_count_queries;
+    QSIMPLEQ_HEAD(, QueryReport) report_queue;
+
+    bool shader_cache_writeback_pending;
+    QemuEvent shader_cache_writeback_complete;
+
+    struct s2t_rndr {
+        GLuint fbo, vao, vbo, prog;
+        GLuint tex_loc, surface_size_loc;
+    } s2t_rndr;
+
+    struct disp_rndr {
+        GLuint fbo, vao, vbo, prog;
+        GLuint display_size_loc;
+        GLuint line_offset_loc;
+        GLuint tex_loc;
+        GLuint pvideo_tex;
+        GLint pvideo_enable_loc;
+        GLint pvideo_tex_loc;
+        GLint pvideo_in_pos_loc;
+        GLint pvideo_pos_loc;
+        GLint pvideo_scale_loc;
+        GLint pvideo_color_key_enable_loc;
+        GLint pvideo_color_key_loc;
+        GLint palette_loc[256];
+    } disp_rndr;
+} PGRAPHGLState;
+
+extern GloContext *g_nv2a_context_render;
+extern GloContext *g_nv2a_context_display;
+
+unsigned int pgraph_gl_bind_inline_array(NV2AState *d);
+void pgraph_gl_bind_shaders(PGRAPHState *pg);
+void pgraph_gl_bind_textures(NV2AState *d);
+void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element, unsigned int max_element, bool inline_data, unsigned int inline_stride, unsigned int provoking_element);
+bool pgraph_gl_check_surface_to_texture_compatibility(const SurfaceBinding *surface, const TextureShape *shape);
+GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src);
+void pgraph_gl_download_dirty_surfaces(NV2AState *d);
+void pgraph_gl_clear_report_value(NV2AState *d);
+void pgraph_gl_clear_surface(NV2AState *d, uint32_t parameter);
+void pgraph_gl_draw_begin(NV2AState *d);
+void pgraph_gl_draw_end(NV2AState *d);
+void pgraph_gl_flush_draw(NV2AState *d);
+void pgraph_gl_get_report(NV2AState *d, uint32_t parameter);
+void pgraph_gl_image_blit(NV2AState *d);
+void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr, hwaddr size);
+void pgraph_gl_process_pending_reports(NV2AState *d);
+void pgraph_gl_surface_flush(NV2AState *d);
+void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write, bool zeta_write);
+void pgraph_gl_sync(NV2AState *d);
+void pgraph_gl_update_entire_memory_buffer(NV2AState *d);
+void pgraph_gl_init_display(NV2AState *d);
+void pgraph_gl_finalize_display(PGRAPHState *pg);
+void pgraph_gl_init_reports(NV2AState *d);
+void pgraph_gl_finalize_reports(PGRAPHState *pg);
+void pgraph_gl_init_shaders(PGRAPHState *pg);
+void pgraph_gl_finalize_shaders(PGRAPHState *pg);
+void pgraph_gl_init_surfaces(PGRAPHState *pg);
+void pgraph_gl_finalize_surfaces(PGRAPHState *pg);
+void pgraph_gl_init_textures(NV2AState *d);
+void pgraph_gl_finalize_textures(PGRAPHState *pg);
+void pgraph_gl_init_buffers(NV2AState *d);
+void pgraph_gl_finalize_buffers(PGRAPHState *pg);
+void pgraph_gl_process_pending_downloads(NV2AState *d);
+void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg);
+void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface, TextureBinding *texture, TextureShape *texture_shape, int texture_unit);
+void pgraph_gl_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta);
+void pgraph_gl_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface);
+SurfaceBinding *pgraph_gl_surface_get(NV2AState *d, hwaddr addr);
+SurfaceBinding *pgraph_gl_surface_get_within(NV2AState *d, hwaddr addr);
+void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *e);
+void pgraph_gl_unbind_surface(NV2AState *d, bool color);
+void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface, bool force);
+void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode);
+bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode);
+void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg);
+void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale);
+unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d);
+int pgraph_gl_get_framebuffer_surface(NV2AState *d);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/gl/reports.c b/hw/xbox/nv2a/pgraph/gl/reports.c
new file mode 100644
index 00000000000..2dea09e590e
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/reports.c
@@ -0,0 +1,131 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <hw/xbox/nv2a/nv2a_int.h>
+#include "renderer.h"
+
+static void process_pending_report(NV2AState *d, QueryReport *report)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    if (report->clear) {
+        r->zpass_pixel_count_result = 0;
+        return;
+    }
+
+    uint8_t type = GET_MASK(report->parameter, NV097_GET_REPORT_TYPE);
+    assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
+
+    /* FIXME: Multisampling affects this (both: OGL and Xbox GPU),
+     *        not sure if CLEARs also count
+     */
+    /* FIXME: What about clipping regions etc? */
+    for (int i = 0; i < report->query_count; i++) {
+        GLuint gl_query_result = 0;
+        glGetQueryObjectuiv(report->queries[i], GL_QUERY_RESULT, &gl_query_result);
+        gl_query_result /= pg->surface_scale_factor * pg->surface_scale_factor;
+        r->zpass_pixel_count_result += gl_query_result;
+    }
+
+    if (report->query_count) {
+        glDeleteQueries(report->query_count, report->queries);
+        g_free(report->queries);
+    }
+
+    pgraph_write_zpass_pixel_cnt_report(d, report->parameter, r->zpass_pixel_count_result);
+}
+
+void pgraph_gl_process_pending_reports(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+    QueryReport *report, *next;
+
+    QSIMPLEQ_FOREACH_SAFE(report, &r->report_queue, entry, next) {
+        process_pending_report(d, report);
+        QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry);
+        g_free(report);
+    }
+}
+
+void pgraph_gl_clear_report_value(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    /* FIXME: Does this have a value in parameter? Also does this (also?) modify
+     *        the report memory block?
+     */
+    if (r->gl_zpass_pixel_count_query_count) {
+        glDeleteQueries(r->gl_zpass_pixel_count_query_count,
+                        r->gl_zpass_pixel_count_queries);
+        r->gl_zpass_pixel_count_query_count = 0;
+    }
+
+    QueryReport *report = g_malloc(sizeof(QueryReport));
+    report->clear = true;
+    QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry);
+}
+
+void pgraph_gl_init_reports(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    QSIMPLEQ_INIT(&r->report_queue);
+}
+
+void pgraph_gl_get_report(NV2AState *d, uint32_t parameter)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    QueryReport *report = g_malloc(sizeof(QueryReport));
+    report->clear = false;
+    report->parameter = parameter;
+    report->query_count = r->gl_zpass_pixel_count_query_count;
+    report->queries = r->gl_zpass_pixel_count_queries;
+    QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry);
+
+    r->gl_zpass_pixel_count_query_count = 0;
+    r->gl_zpass_pixel_count_queries = NULL;
+}
+
+void pgraph_gl_finalize_reports(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    QueryReport *report, *next;
+    QSIMPLEQ_FOREACH_SAFE (report, &r->report_queue, entry, next) {
+        if (report->query_count) {
+            glDeleteQueries(report->query_count, report->queries);
+        }
+        QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry);
+        g_free(report);
+    }
+
+    if (r->gl_zpass_pixel_count_query_count) {
+        glDeleteQueries(r->gl_zpass_pixel_count_query_count,
+                        r->gl_zpass_pixel_count_queries);
+        r->gl_zpass_pixel_count_query_count = 0;
+    }
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/shaders.c b/hw/xbox/nv2a/pgraph/gl/shaders.c
new file mode 100644
index 00000000000..b532d9e17bb
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/shaders.c
@@ -0,0 +1,1105 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/fast-hash.h"
+#include "qemu/mstring.h"
+#include <locale.h>
+
+#include "xemu-version.h"
+#include "ui/xemu-settings.h"
+#include "hw/xbox/nv2a/pgraph/glsl/geom.h"
+#include "hw/xbox/nv2a/pgraph/glsl/vsh.h"
+#include "hw/xbox/nv2a/pgraph/glsl/psh.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include "hw/xbox/nv2a/pgraph/util.h"
+#include "debug.h"
+#include "renderer.h"
+
+static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding, bool binding_changed, bool vertex_program, bool fixed_function);
+
+static GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode)
+{
+    if (polygon_mode == POLY_MODE_POINT) {
+        return GL_POINTS;
+    }
+
+    switch (primitive_mode) {
+    case PRIM_TYPE_POINTS: return GL_POINTS;
+    case PRIM_TYPE_LINES: return GL_LINES;
+    case PRIM_TYPE_LINE_LOOP: return GL_LINE_LOOP;
+    case PRIM_TYPE_LINE_STRIP: return GL_LINE_STRIP;
+    case PRIM_TYPE_TRIANGLES: return GL_TRIANGLES;
+    case PRIM_TYPE_TRIANGLE_STRIP: return GL_TRIANGLE_STRIP;
+    case PRIM_TYPE_TRIANGLE_FAN: return GL_TRIANGLE_FAN;
+    case PRIM_TYPE_QUADS: return GL_LINES_ADJACENCY;
+    case PRIM_TYPE_QUAD_STRIP: return GL_LINE_STRIP_ADJACENCY;
+    case PRIM_TYPE_POLYGON:
+        if (polygon_mode == POLY_MODE_LINE) {
+            return GL_LINE_LOOP;
+        } else if (polygon_mode == POLY_MODE_FILL) {
+            return GL_TRIANGLE_FAN;
+        }
+
+        assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode");
+        return 0;
+    default:
+        assert(!"Invalid primitive_mode");
+        return 0;
+    }
+}
+
+static GLuint create_gl_shader(GLenum gl_shader_type,
+                               const char *code,
+                               const char *name)
+{
+    GLint compiled = 0;
+
+    NV2A_GL_DGROUP_BEGIN("Creating new %s", name);
+
+    NV2A_DPRINTF("compile new %s, code:\n%s\n", name, code);
+
+    GLuint shader = glCreateShader(gl_shader_type);
+    glShaderSource(shader, 1, &code, 0);
+    glCompileShader(shader);
+
+    /* Check it compiled */
+    compiled = 0;
+    glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
+    if (!compiled) {
+        GLchar* log;
+        GLint log_length;
+        glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+        log = g_malloc(log_length * sizeof(GLchar));
+        glGetShaderInfoLog(shader, log_length, NULL, log);
+        fprintf(stderr, "%s\n\n" "nv2a: %s compilation failed: %s\n", code, name, log);
+        g_free(log);
+
+        NV2A_GL_DGROUP_END();
+        abort();
+    }
+
+    NV2A_GL_DGROUP_END();
+
+    return shader;
+}
+
+static void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state)
+{
+    int i, j;
+    char tmp[64];
+
+    /* set texture samplers */
+    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        char samplerName[16];
+        snprintf(samplerName, sizeof(samplerName), "texSamp%d", i);
+        GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName);
+        if (texSampLoc >= 0) {
+            glUniform1i(texSampLoc, i);
+        }
+    }
+
+    /* validate the program */
+    glValidateProgram(binding->gl_program);
+    GLint valid = 0;
+    glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid);
+    if (!valid) {
+        GLchar log[1024];
+        glGetProgramInfoLog(binding->gl_program, 1024, NULL, log);
+        fprintf(stderr, "nv2a: shader validation failed: %s\n", log);
+        abort();
+    }
+
+    /* lookup fragment shader uniforms */
+    for (i = 0; i < 9; i++) {
+        for (j = 0; j < 2; j++) {
+            snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
+            binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp);
+        }
+    }
+    binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef");
+    for (i = 1; i < NV2A_MAX_TEXTURES; i++) {
+        snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
+        binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+        snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
+        binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+        snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
+        binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+    }
+
+    for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        snprintf(tmp, sizeof(tmp), "texScale%d", i);
+        binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+    }
+
+    /* lookup vertex shader uniforms */
+    for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) {
+        snprintf(tmp, sizeof(tmp), "c[%d]", i);
+        binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+    }
+    binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize");
+    binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange");
+    binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor");
+    binding->fog_param_loc = glGetUniformLocation(binding->gl_program, "fogParam");
+
+    binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport");
+    for (i = 0; i < NV2A_LTCTXA_COUNT; i++) {
+        snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i);
+        binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+    }
+    for (i = 0; i < NV2A_LTCTXB_COUNT; i++) {
+        snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i);
+        binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+    }
+    for (i = 0; i < NV2A_LTC1_COUNT; i++) {
+        snprintf(tmp, sizeof(tmp), "ltc1[%d]", i);
+        binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+    }
+    for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+        snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
+        binding->light_infinite_half_vector_loc[i] =
+            glGetUniformLocation(binding->gl_program, tmp);
+        snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
+        binding->light_infinite_direction_loc[i] =
+            glGetUniformLocation(binding->gl_program, tmp);
+
+        snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
+        binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+        snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
+        binding->light_local_attenuation_loc[i] =
+            glGetUniformLocation(binding->gl_program, tmp);
+    }
+    for (i = 0; i < 8; i++) {
+        snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i);
+        binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
+    }
+
+    if (state->fixed_function) {
+        binding->material_alpha_loc =
+            glGetUniformLocation(binding->gl_program, "material_alpha");
+    } else {
+        binding->material_alpha_loc = -1;
+    }
+}
+
+static ShaderBinding *generate_shaders(const ShaderState *state)
+{
+    char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
+    if (previous_numeric_locale) {
+        previous_numeric_locale = g_strdup(previous_numeric_locale);
+    }
+
+    /* Ensure numeric values are printed with '.' radix, no grouping */
+    setlocale(LC_NUMERIC, "C");
+    GLuint program = glCreateProgram();
+
+    /* Create an optional geometry shader and find primitive type */
+    GLenum gl_primitive_mode =
+        get_gl_primitive_mode(state->polygon_front_mode, state->primitive_mode);
+    MString* geometry_shader_code =
+        pgraph_gen_geom_glsl(state->polygon_front_mode,
+                                 state->polygon_back_mode,
+                                 state->primitive_mode,
+                                 state->smooth_shading,
+                                 false);
+    if (geometry_shader_code) {
+        const char* geometry_shader_code_str =
+             mstring_get_str(geometry_shader_code);
+        GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER,
+                                                  geometry_shader_code_str,
+                                                  "geometry shader");
+        glAttachShader(program, geometry_shader);
+        mstring_unref(geometry_shader_code);
+    }
+
+    /* create the vertex shader */
+    MString *vertex_shader_code =
+        pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL);
+    GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER,
+                                            mstring_get_str(vertex_shader_code),
+                                            "vertex shader");
+    glAttachShader(program, vertex_shader);
+    mstring_unref(vertex_shader_code);
+
+    /* generate a fragment shader from register combiners */
+    MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh);
+    const char *fragment_shader_code_str =
+        mstring_get_str(fragment_shader_code);
+    GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER,
+                                              fragment_shader_code_str,
+                                              "fragment shader");
+    glAttachShader(program, fragment_shader);
+    mstring_unref(fragment_shader_code);
+
+    /* link the program */
+    glLinkProgram(program);
+    GLint linked = 0;
+    glGetProgramiv(program, GL_LINK_STATUS, &linked);
+    if(!linked) {
+        GLchar log[2048];
+        glGetProgramInfoLog(program, 2048, NULL, log);
+        fprintf(stderr, "nv2a: shader linking failed: %s\n", log);
+        abort();
+    }
+
+    glUseProgram(program);
+
+    ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding));
+    ret->gl_program = program;
+    ret->gl_primitive_mode = gl_primitive_mode;
+
+    update_shader_constant_locations(ret, state);
+
+    if (previous_numeric_locale) {
+        setlocale(LC_NUMERIC, previous_numeric_locale);
+        g_free(previous_numeric_locale);
+    }
+
+    return ret;
+}
+
+static const char *shader_gl_vendor = NULL;
+
+static void shader_create_cache_folder(void)
+{
+    char *shader_path = g_strdup_printf("%sshaders", xemu_settings_get_base_path());
+    qemu_mkdir(shader_path);
+    g_free(shader_path);
+}
+
+static char *shader_get_lru_cache_path(void)
+{
+    return g_strdup_printf("%s/shader_cache_list", xemu_settings_get_base_path());
+}
+
+static void shader_write_lru_list_entry_to_disk(Lru *lru, LruNode *node, void *opaque)
+{
+    FILE *lru_list_file = (FILE*) opaque;
+    size_t written = fwrite(&node->hash, sizeof(uint64_t), 1, lru_list_file);
+    if (written != 1) {
+        fprintf(stderr, "nv2a: Failed to write shader list entry %llx to disk\n",
+                (unsigned long long) node->hash);
+    }
+}
+
+void pgraph_gl_shader_write_cache_reload_list(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    if (!g_config.perf.cache_shaders) {
+        qatomic_set(&r->shader_cache_writeback_pending, false);
+        qemu_event_set(&r->shader_cache_writeback_complete);
+        return;
+    }
+
+    char *shader_lru_path = shader_get_lru_cache_path();
+    qemu_thread_join(&r->shader_disk_thread);
+
+    FILE *lru_list = qemu_fopen(shader_lru_path, "wb");
+    g_free(shader_lru_path);
+    if (!lru_list) {
+        fprintf(stderr, "nv2a: Failed to open shader LRU cache for writing\n");
+        return;
+    }
+
+    lru_visit_active(&r->shader_cache, shader_write_lru_list_entry_to_disk, lru_list);
+    fclose(lru_list);
+
+    lru_flush(&r->shader_cache);
+
+    qatomic_set(&r->shader_cache_writeback_pending, false);
+    qemu_event_set(&r->shader_cache_writeback_complete);
+}
+
+bool pgraph_gl_shader_load_from_memory(ShaderLruNode *snode)
+{
+    assert(glGetError() == GL_NO_ERROR);
+
+    if (!snode->program) {
+        return false;
+    }
+
+    GLuint gl_program = glCreateProgram();
+    glProgramBinary(gl_program, snode->program_format, snode->program, snode->program_size);
+    GLint gl_error = glGetError();
+    if (gl_error != GL_NO_ERROR) {
+        NV2A_DPRINTF("failed to load shader binary from disk: GL error code %d\n", gl_error);
+        glDeleteProgram(gl_program);
+        return false;
+    }
+
+    glValidateProgram(gl_program);
+    GLint valid = 0;
+    glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid);
+    if (!valid) {
+        GLchar log[1024];
+        glGetProgramInfoLog(gl_program, 1024, NULL, log);
+        NV2A_DPRINTF("failed to load shader binary from disk: %s\n", log);
+        glDeleteProgram(gl_program);
+        return false;
+    }
+
+    glUseProgram(gl_program);
+
+    ShaderBinding* binding = g_malloc0(sizeof(ShaderBinding));
+    binding->gl_program = gl_program;
+    binding->gl_primitive_mode = get_gl_primitive_mode(snode->state.polygon_front_mode,
+                                                       snode->state.primitive_mode);
+    snode->binding = binding;
+
+    g_free(snode->program);
+    snode->program = NULL;
+
+    update_shader_constant_locations(binding, &snode->state);
+
+    return true;
+}
+
+static char *shader_get_bin_directory(uint64_t hash)
+{
+    const char *cfg_dir = xemu_settings_get_base_path();
+    uint64_t bin_mask = 0xffffUL << 48;
+    char *shader_bin_dir = g_strdup_printf("%s/shaders/%04lx",
+                                           cfg_dir, (hash & bin_mask) >> 48);
+    return shader_bin_dir;
+}
+
+static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash)
+{
+    uint64_t bin_mask = 0xffffUL << 48;
+    return g_strdup_printf("%s/%012lx", shader_bin_dir,
+                           hash & (~bin_mask));
+}
+
+static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    char *shader_bin_dir = shader_get_bin_directory(hash);
+    char *shader_path = shader_get_binary_path(shader_bin_dir, hash);
+    char *cached_xemu_version = NULL;
+    char *cached_gl_vendor = NULL;
+    void *program_buffer = NULL;
+
+    uint64_t cached_xemu_version_len;
+    uint64_t gl_vendor_len;
+    GLenum program_binary_format;
+    ShaderState state;
+    size_t shader_size;
+
+    g_free(shader_bin_dir);
+
+    qemu_mutex_lock(&r->shader_cache_lock);
+    if (lru_contains_hash(&r->shader_cache, hash)) {
+        qemu_mutex_unlock(&r->shader_cache_lock);
+        return;
+    }
+    qemu_mutex_unlock(&r->shader_cache_lock);
+
+    FILE *shader_file = qemu_fopen(shader_path, "rb");
+    if (!shader_file) {
+        goto error;
+    }
+
+    size_t nread;
+    #define READ_OR_ERR(data, data_len) \
+        do { \
+            nread = fread(data, data_len, 1, shader_file); \
+            if (nread != 1) { \
+                fclose(shader_file); \
+                goto error; \
+            } \
+        } while (0)
+
+    READ_OR_ERR(&cached_xemu_version_len, sizeof(cached_xemu_version_len));
+
+    cached_xemu_version = g_malloc(cached_xemu_version_len +1);
+    READ_OR_ERR(cached_xemu_version, cached_xemu_version_len);
+    if (strcmp(cached_xemu_version, xemu_version) != 0) {
+        fclose(shader_file);
+        goto error;
+    }
+
+    READ_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len));
+
+    cached_gl_vendor = g_malloc(gl_vendor_len);
+    READ_OR_ERR(cached_gl_vendor, gl_vendor_len);
+    if (strcmp(cached_gl_vendor, shader_gl_vendor) != 0) {
+        fclose(shader_file);
+        goto error;
+    }
+
+    READ_OR_ERR(&program_binary_format, sizeof(program_binary_format));
+    READ_OR_ERR(&state, sizeof(state));
+    READ_OR_ERR(&shader_size, sizeof(shader_size));
+
+    program_buffer = g_malloc(shader_size);
+    READ_OR_ERR(program_buffer, shader_size);
+
+    #undef READ_OR_ERR
+
+    fclose(shader_file);
+    g_free(shader_path);
+    g_free(cached_xemu_version);
+    g_free(cached_gl_vendor);
+
+    qemu_mutex_lock(&r->shader_cache_lock);
+    LruNode *node = lru_lookup(&r->shader_cache, hash, &state);
+    ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
+
+    /* If we happened to regenerate this shader already, then we may as well use the new one */
+    if (snode->binding) {
+        qemu_mutex_unlock(&r->shader_cache_lock);
+        return;
+    }
+
+    snode->program_format = program_binary_format;
+    snode->program_size = shader_size;
+    snode->program = program_buffer;
+    snode->cached = true;
+    qemu_mutex_unlock(&r->shader_cache_lock);
+    return;
+
+error:
+    /* Delete the shader so it won't be loaded again */
+    qemu_unlink(shader_path);
+    g_free(shader_path);
+    g_free(program_buffer);
+    g_free(cached_xemu_version);
+    g_free(cached_gl_vendor);
+}
+
+static void *shader_reload_lru_from_disk(void *arg)
+{
+    if (!g_config.perf.cache_shaders) {
+        return NULL;
+    }
+
+    PGRAPHState *pg = (PGRAPHState*) arg;
+    char *shader_lru_path = shader_get_lru_cache_path();
+
+    FILE *lru_shaders_list = qemu_fopen(shader_lru_path, "rb");
+    g_free(shader_lru_path);
+    if (!lru_shaders_list) {
+        return NULL;
+    }
+
+    uint64_t hash;
+    while (fread(&hash, sizeof(uint64_t), 1, lru_shaders_list) == 1) {
+        shader_load_from_disk(pg, hash);
+    }
+
+    return NULL;
+}
+
+static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
+{
+    ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
+    memcpy(&snode->state, state, sizeof(ShaderState));
+    snode->cached = false;
+    snode->binding = NULL;
+    snode->program = NULL;
+    snode->save_thread = NULL;
+}
+
+static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+    ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
+
+    if (snode->save_thread) {
+        qemu_thread_join(snode->save_thread);
+        g_free(snode->save_thread);
+    }
+
+    if (snode->binding) {
+        glDeleteProgram(snode->binding->gl_program);
+        g_free(snode->binding);
+    }
+
+    if (snode->program) {
+        g_free(snode->program);
+    }
+
+    snode->cached = false;
+    snode->save_thread = NULL;
+    snode->binding = NULL;
+    snode->program = NULL;
+    memset(&snode->state, 0, sizeof(ShaderState));
+}
+
+static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+    ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
+    return memcmp(&snode->state, key, sizeof(ShaderState));
+}
+
+void pgraph_gl_init_shaders(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    qemu_mutex_init(&r->shader_cache_lock);
+    qemu_event_init(&r->shader_cache_writeback_complete, false);
+
+    if (!shader_gl_vendor) {
+        shader_gl_vendor = (const char *) glGetString(GL_VENDOR);
+    }
+
+    shader_create_cache_folder();
+
+    /* FIXME: Make this configurable */
+    const size_t shader_cache_size = 50*1024;
+    lru_init(&r->shader_cache);
+    r->shader_cache_entries = malloc(shader_cache_size * sizeof(ShaderLruNode));
+    assert(r->shader_cache_entries != NULL);
+    for (int i = 0; i < shader_cache_size; i++) {
+        lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node);
+    }
+
+    r->shader_cache.init_node = shader_cache_entry_init;
+    r->shader_cache.compare_nodes = shader_cache_entry_compare;
+    r->shader_cache.post_node_evict = shader_cache_entry_post_evict;
+
+    qemu_thread_create(&r->shader_disk_thread, "pgraph.renderer_state->shader_cache",
+                       shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE);
+}
+
+void pgraph_gl_finalize_shaders(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    // Clear out shader cache
+    pgraph_gl_shader_write_cache_reload_list(pg); // FIXME: also flushes, rename for clarity
+    free(r->shader_cache_entries);
+    r->shader_cache_entries = NULL;
+
+    qemu_mutex_destroy(&r->shader_cache_lock);
+}
+
+static void *shader_write_to_disk(void *arg)
+{
+    ShaderLruNode *snode = (ShaderLruNode*) arg;
+
+    char *shader_bin = shader_get_bin_directory(snode->node.hash);
+    char *shader_path = shader_get_binary_path(shader_bin, snode->node.hash);
+
+    static uint64_t gl_vendor_len;
+    if (gl_vendor_len == 0) {
+        gl_vendor_len = (uint64_t) (strlen(shader_gl_vendor) + 1);
+    }
+
+    static uint64_t xemu_version_len = 0;
+    if (xemu_version_len == 0) {
+        xemu_version_len = (uint64_t) (strlen(xemu_version) + 1);
+    }
+
+    qemu_mkdir(shader_bin);
+    g_free(shader_bin);
+
+    FILE *shader_file = qemu_fopen(shader_path, "wb");
+    if (!shader_file) {
+        goto error;
+    }
+
+    size_t written;
+    #define WRITE_OR_ERR(data, data_size) \
+        do { \
+            written = fwrite(data, data_size, 1, shader_file); \
+            if (written != 1) { \
+                fclose(shader_file); \
+                goto error; \
+            } \
+        } while (0)
+
+    WRITE_OR_ERR(&xemu_version_len, sizeof(xemu_version_len));
+    WRITE_OR_ERR(xemu_version, xemu_version_len);
+
+    WRITE_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len));
+    WRITE_OR_ERR(shader_gl_vendor, gl_vendor_len);
+
+    WRITE_OR_ERR(&snode->program_format, sizeof(snode->program_format));
+    WRITE_OR_ERR(&snode->state, sizeof(snode->state));
+
+    WRITE_OR_ERR(&snode->program_size, sizeof(snode->program_size));
+    WRITE_OR_ERR(snode->program, snode->program_size);
+
+    #undef WRITE_OR_ERR
+
+    fclose(shader_file);
+
+    g_free(shader_path);
+    g_free(snode->program);
+    snode->program = NULL;
+
+    return NULL;
+
+error:
+    fprintf(stderr, "nv2a: Failed to write shader binary file to %s\n", shader_path);
+    qemu_unlink(shader_path);
+    g_free(shader_path);
+    g_free(snode->program);
+    snode->program = NULL;
+    return NULL;
+}
+
+void pgraph_gl_shader_cache_to_disk(ShaderLruNode *snode)
+{
+    if (!snode->binding || snode->cached) {
+        return;
+    }
+
+    GLint program_size;
+    glGetProgramiv(snode->binding->gl_program, GL_PROGRAM_BINARY_LENGTH, &program_size);
+
+    if (snode->program) {
+        g_free(snode->program);
+        snode->program = NULL;
+    }
+
+    /* program_size might be zero on some systems, if no binary formats are supported */
+    if (program_size == 0) {
+        return;
+    }
+
+    snode->program = g_malloc(program_size);
+    GLsizei program_size_copied;
+    glGetProgramBinary(snode->binding->gl_program, program_size, &program_size_copied,
+                       &snode->program_format, snode->program);
+    assert(glGetError() == GL_NO_ERROR);
+
+    snode->program_size = program_size_copied;
+    snode->cached = true;
+
+    char name[24];
+    snprintf(name, sizeof(name), "scache-%llx", (unsigned long long) snode->node.hash);
+    snode->save_thread = g_malloc0(sizeof(QemuThread));
+    qemu_thread_create(snode->save_thread, name, shader_write_to_disk, snode, QEMU_THREAD_JOINABLE);
+}
+
+static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
+                                    bool binding_changed,
+
+                                    // FIXME: Remove these... We already know it from binding.state
+                                    bool vertex_program,
+                                    bool fixed_function)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+    int i, j;
+
+    /* update combiner constants */
+    for (i = 0; i < 9; i++) {
+        uint32_t constant[2];
+        if (i == 8) {
+            /* final combiner */
+            constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0);
+            constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1);
+        } else {
+            constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
+            constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
+        }
+
+        for (j = 0; j < 2; j++) {
+            GLint loc = binding->psh_constant_loc[i][j];
+            if (loc != -1) {
+                float value[4];
+                pgraph_argb_pack32_to_rgba_float(constant[j], value);
+                glUniform4fv(loc, 1, value);
+            }
+        }
+    }
+    if (binding->alpha_ref_loc != -1) {
+        float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
+                                   NV_PGRAPH_CONTROL_0_ALPHAREF) / 255.0;
+        glUniform1f(binding->alpha_ref_loc, alpha_ref);
+    }
+
+
+    /* For each texture stage */
+    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        GLint loc;
+
+        /* Bump luminance only during stages 1 - 3 */
+        if (i > 0) {
+            loc = binding->bump_mat_loc[i];
+            if (loc != -1) {
+                uint32_t m_u32[4];
+                m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1));
+                m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1));
+                m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1));
+                m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1));
+                float m[4];
+                m[0] = *(float*)&m_u32[0];
+                m[1] = *(float*)&m_u32[1];
+                m[2] = *(float*)&m_u32[2];
+                m[3] = *(float*)&m_u32[3];
+                glUniformMatrix2fv(loc, 1, GL_FALSE, m);
+            }
+            loc = binding->bump_scale_loc[i];
+            if (loc != -1) {
+                uint32_t v =
+                    pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4);
+                glUniform1f(loc, *(float*)&v);
+            }
+            loc = binding->bump_offset_loc[i];
+            if (loc != -1) {
+                uint32_t v =
+                    pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4);
+                glUniform1f(loc, *(float*)&v);
+            }
+        }
+
+        loc = r->shader_binding->tex_scale_loc[i];
+        if (loc != -1) {
+            assert(r->texture_binding[i] != NULL);
+            glUniform1f(loc, (float)r->texture_binding[i]->scale);
+        }
+    }
+
+    if (binding->fog_color_loc != -1) {
+        uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR);
+        glUniform4f(binding->fog_color_loc,
+                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
+                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
+                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
+                    GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
+    }
+    if (binding->fog_param_loc != -1) {
+        uint32_t v[2];
+        v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
+        v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
+        glUniform2f(binding->fog_param_loc, *(float *)&v[0], *(float *)&v[1]);
+    }
+
+    float zmax;
+    switch (pg->surface_shape.zeta_format) {
+    case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
+        zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
+        break;
+    case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
+        zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
+        break;
+    default:
+        assert(0);
+    }
+
+    if (fixed_function) {
+        /* update lighting constants */
+        struct {
+            uint32_t* v;
+            bool* dirty;
+            GLint* locs;
+            size_t len;
+        } lighting_arrays[] = {
+            {&pg->ltctxa[0][0], &pg->ltctxa_dirty[0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT},
+            {&pg->ltctxb[0][0], &pg->ltctxb_dirty[0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT},
+            {&pg->ltc1[0][0], &pg->ltc1_dirty[0], binding->ltc1_loc, NV2A_LTC1_COUNT},
+        };
+
+        for (i=0; i<ARRAY_SIZE(lighting_arrays); i++) {
+            uint32_t *lighting_v = lighting_arrays[i].v;
+            bool *lighting_dirty = lighting_arrays[i].dirty;
+            GLint *lighting_locs = lighting_arrays[i].locs;
+            size_t lighting_len = lighting_arrays[i].len;
+            for (j=0; j<lighting_len; j++) {
+                if (!lighting_dirty[j] && !binding_changed) continue;
+                GLint loc = lighting_locs[j];
+                if (loc != -1) {
+                    glUniform4fv(loc, 1, (const GLfloat*)&lighting_v[j*4]);
+                }
+                lighting_dirty[j] = false;
+            }
+        }
+
+        for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+            GLint loc;
+            loc = binding->light_infinite_half_vector_loc[i];
+            if (loc != -1) {
+                glUniform3fv(loc, 1, pg->light_infinite_half_vector[i]);
+            }
+            loc = binding->light_infinite_direction_loc[i];
+            if (loc != -1) {
+                glUniform3fv(loc, 1, pg->light_infinite_direction[i]);
+            }
+
+            loc = binding->light_local_position_loc[i];
+            if (loc != -1) {
+                glUniform3fv(loc, 1, pg->light_local_position[i]);
+            }
+            loc = binding->light_local_attenuation_loc[i];
+            if (loc != -1) {
+                glUniform3fv(loc, 1, pg->light_local_attenuation[i]);
+            }
+        }
+
+        /* estimate the viewport by assuming it matches the surface ... */
+        unsigned int aa_width = 1, aa_height = 1;
+        pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
+
+        float m11 = 0.5 * (pg->surface_binding_dim.width/aa_width);
+        float m22 = -0.5 * (pg->surface_binding_dim.height/aa_height);
+        float m33 = zmax;
+        float m41 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
+        float m42 = *(float*)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
+
+        float invViewport[16] = {
+            1.0/m11, 0, 0, 0,
+            0, 1.0/m22, 0, 0,
+            0, 0, 1.0/m33, 0,
+            -1.0+m41/m11, 1.0+m42/m22, 0, 1.0
+        };
+
+        if (binding->inv_viewport_loc != -1) {
+            glUniformMatrix4fv(binding->inv_viewport_loc,
+                               1, GL_FALSE, &invViewport[0]);
+        }
+    }
+
+    /* update vertex program constants */
+    for (i=0; i<NV2A_VERTEXSHADER_CONSTANTS; i++) {
+        if (!pg->vsh_constants_dirty[i] && !binding_changed) continue;
+
+        GLint loc = binding->vsh_constant_loc[i];
+        if ((loc != -1) &&
+            memcmp(binding->vsh_constants[i], pg->vsh_constants[i],
+                   sizeof(pg->vsh_constants[1]))) {
+            glUniform4fv(loc, 1, (const GLfloat *)pg->vsh_constants[i]);
+            memcpy(binding->vsh_constants[i], pg->vsh_constants[i],
+                   sizeof(pg->vsh_constants[i]));
+        }
+
+        pg->vsh_constants_dirty[i] = false;
+    }
+
+    if (binding->surface_size_loc != -1) {
+        unsigned int aa_width = 1, aa_height = 1;
+        pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
+        glUniform2f(binding->surface_size_loc,
+                    pg->surface_binding_dim.width / aa_width,
+                    pg->surface_binding_dim.height / aa_height);
+    }
+
+    if (binding->clip_range_loc != -1) {
+        uint32_t v[2];
+        v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
+        v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
+        float zclip_min = *(float*)&v[0] / zmax * 2.0 - 1.0;
+        float zclip_max = *(float*)&v[1] / zmax * 2.0 - 1.0;
+        glUniform4f(binding->clip_range_loc, 0, zmax, zclip_min, zclip_max);
+    }
+
+    /* Clipping regions */
+    unsigned int max_gl_width = pg->surface_binding_dim.width;
+    unsigned int max_gl_height = pg->surface_binding_dim.height;
+    pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
+
+    for (i = 0; i < 8; i++) {
+        uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4);
+        unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
+        unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
+        uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4);
+        unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
+        unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
+        pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
+        pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
+
+        pgraph_apply_scaling_factor(pg, &x_min, &y_min);
+        pgraph_apply_scaling_factor(pg, &x_max, &y_max);
+
+        /* Translate for the GL viewport origin */
+        int y_min_xlat = MAX((int)max_gl_height - (int)y_max, 0);
+        int y_max_xlat = MIN((int)max_gl_height - (int)y_min, max_gl_height);
+
+        glUniform4i(r->shader_binding->clip_region_loc[i],
+                    x_min, y_min_xlat, x_max, y_max_xlat);
+    }
+
+    if (binding->material_alpha_loc != -1) {
+        glUniform1f(binding->material_alpha_loc, pg->material_alpha);
+    }
+}
+
+static bool test_shaders_dirty(PGRAPHState *pg)
+{
+    #define CR_1(reg) CR_x(reg, 1)
+    #define CR_4(reg) CR_x(reg, 4)
+    #define CR_8(reg) CR_x(reg, 8)
+    #define CF(src, name)  CF_x(typeof(src), (&src), name, 1)
+    #define CFA(src, name) CF_x(typeof(src[0]), src, name, ARRAY_SIZE(src))
+    #define CNAME(name) reg_check__ ## name
+    #define CX_x__define(type, name, x) static type CNAME(name)[x];
+    #define CR_x__define(reg, x) CX_x__define(uint32_t, reg, x)
+    #define CF_x__define(type, src, name, x) CX_x__define(type, name, x)
+    #define CR_x__check(reg, x) \
+        for (int i = 0; i < x; i++) { if (pgraph_reg_r(pg, reg+i*4) != CNAME(reg)[i]) goto dirty; }
+    #define CF_x__check(type, src, name, x) \
+        for (int i = 0; i < x; i++) { if (src[i] != CNAME(name)[i]) goto dirty; }
+    #define CR_x__update(reg, x) \
+        for (int i = 0; i < x; i++) { CNAME(reg)[i] = pgraph_reg_r(pg, reg+i*4); }
+    #define CF_x__update(type, src, name, x) \
+        for (int i = 0; i < x; i++) { CNAME(name)[i] = src[i]; }
+
+    #define DIRTY_REGS \
+        CR_1(NV_PGRAPH_COMBINECTL) \
+        CR_1(NV_PGRAPH_SHADERCTL) \
+        CR_1(NV_PGRAPH_SHADOWCTL) \
+        CR_1(NV_PGRAPH_COMBINESPECFOG0) \
+        CR_1(NV_PGRAPH_COMBINESPECFOG1) \
+        CR_1(NV_PGRAPH_CONTROL_0) \
+        CR_1(NV_PGRAPH_CONTROL_3) \
+        CR_1(NV_PGRAPH_CSV0_C) \
+        CR_1(NV_PGRAPH_CSV0_D) \
+        CR_1(NV_PGRAPH_CSV1_A) \
+        CR_1(NV_PGRAPH_CSV1_B) \
+        CR_1(NV_PGRAPH_SETUPRASTER) \
+        CR_1(NV_PGRAPH_SHADERPROG) \
+        CR_8(NV_PGRAPH_COMBINECOLORI0) \
+        CR_8(NV_PGRAPH_COMBINECOLORO0) \
+        CR_8(NV_PGRAPH_COMBINEALPHAI0) \
+        CR_8(NV_PGRAPH_COMBINEALPHAO0) \
+        CR_8(NV_PGRAPH_COMBINEFACTOR0) \
+        CR_8(NV_PGRAPH_COMBINEFACTOR1) \
+        CR_1(NV_PGRAPH_SHADERCLIPMODE) \
+        CR_4(NV_PGRAPH_TEXCTL0_0) \
+        CR_4(NV_PGRAPH_TEXFMT0) \
+        CR_4(NV_PGRAPH_TEXFILTER0) \
+        CR_8(NV_PGRAPH_WINDOWCLIPX0) \
+        CR_8(NV_PGRAPH_WINDOWCLIPY0) \
+        CF(pg->primitive_mode, primitive_mode) \
+        CF(pg->surface_scale_factor, surface_scale_factor) \
+        CF(pg->compressed_attrs, compressed_attrs) \
+        CFA(pg->texture_matrix_enable, texture_matrix_enable)
+
+    #define CR_x(reg, x) CR_x__define(reg, x)
+    #define CF_x(type, src, name, x) CF_x__define(type, src, name, x)
+    DIRTY_REGS
+    #undef CR_x
+    #undef CF_x
+
+    #define CR_x(reg, x) CR_x__check(reg, x)
+    #define CF_x(type, src, name, x) CF_x__check(type, src, name, x)
+    DIRTY_REGS
+    #undef CR_x
+    #undef CF_x
+    return false;
+
+dirty:
+    #define CR_x(reg, x) CR_x__update(reg, x)
+    #define CF_x(type, src, name, x) CF_x__update(type, src, name, x)
+    DIRTY_REGS
+    #undef CR_x
+    #undef CF_x
+    return true;
+}
+
+void pgraph_gl_bind_shaders(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    bool binding_changed = false;
+    if (r->shader_binding && !test_shaders_dirty(pg) && !pg->program_data_dirty) {
+        nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
+        goto update_constants;
+    }
+
+    pg->program_data_dirty = false;
+
+    ShaderBinding* old_binding = r->shader_binding;
+
+    ShaderState state = pgraph_get_shader_state(pg);
+    assert(!state.vulkan);
+
+    NV2A_GL_DGROUP_BEGIN("%s (VP: %s FFP: %s)", __func__,
+                         state.vertex_program ? "yes" : "no",
+                         state.fixed_function ? "yes" : "no");
+
+    uint64_t shader_state_hash = fast_hash((uint8_t*) &state, sizeof(ShaderState));
+    qemu_mutex_lock(&r->shader_cache_lock);
+    LruNode *node = lru_lookup(&r->shader_cache, shader_state_hash, &state);
+    ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
+    if (snode->binding || pgraph_gl_shader_load_from_memory(snode)) {
+        r->shader_binding = snode->binding;
+    } else {
+        r->shader_binding = generate_shaders(&state);
+        nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
+
+        /* cache it */
+        snode->binding = r->shader_binding;
+        if (g_config.perf.cache_shaders) {
+            pgraph_gl_shader_cache_to_disk(snode);
+        }
+    }
+
+    qemu_mutex_unlock(&r->shader_cache_lock);
+
+    binding_changed = (r->shader_binding != old_binding);
+    if (binding_changed) {
+        nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
+        glUseProgram(r->shader_binding->gl_program);
+    }
+
+    NV2A_GL_DGROUP_END();
+
+update_constants:
+    shader_update_constants(pg, r->shader_binding, binding_changed,
+                                   state.vertex_program, state.fixed_function);
+}
+
+GLuint pgraph_gl_compile_shader(const char *vs_src, const char *fs_src)
+{
+    GLint status;
+    char err_buf[512];
+
+    // Compile vertex shader
+    GLuint vs = glCreateShader(GL_VERTEX_SHADER);
+    glShaderSource(vs, 1, &vs_src, NULL);
+    glCompileShader(vs);
+    glGetShaderiv(vs, GL_COMPILE_STATUS, &status);
+    if (status != GL_TRUE) {
+        glGetShaderInfoLog(vs, sizeof(err_buf), NULL, err_buf);
+        err_buf[sizeof(err_buf)-1] = '\0';
+        fprintf(stderr, "Vertex shader compilation failed: %s\n", err_buf);
+        exit(1);
+    }
+
+    // Compile fragment shader
+    GLuint fs = glCreateShader(GL_FRAGMENT_SHADER);
+    glShaderSource(fs, 1, &fs_src, NULL);
+    glCompileShader(fs);
+    glGetShaderiv(fs, GL_COMPILE_STATUS, &status);
+    if (status != GL_TRUE) {
+        glGetShaderInfoLog(fs, sizeof(err_buf), NULL, err_buf);
+        err_buf[sizeof(err_buf)-1] = '\0';
+        fprintf(stderr, "Fragment shader compilation failed: %s\n", err_buf);
+        exit(1);
+    }
+
+    // Link vertex and fragment shaders
+    GLuint prog = glCreateProgram();
+    glAttachShader(prog, vs);
+    glAttachShader(prog, fs);
+    glLinkProgram(prog);
+    glUseProgram(prog);
+
+    // Flag shaders for deletion (will still be retained for lifetime of prog)
+    glDeleteShader(vs);
+    glDeleteShader(fs);
+
+    return prog;
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/surface.c b/hw/xbox/nv2a/pgraph/gl/surface.c
new file mode 100644
index 00000000000..802a3febc0c
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/surface.c
@@ -0,0 +1,1428 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/pgraph/pgraph.h"
+#include "ui/xemu-settings.h"
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/pgraph/swizzle.h"
+#include "debug.h"
+#include "renderer.h"
+
+static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force);
+static void surface_download_to_buffer(NV2AState *d, SurfaceBinding *surface,
+                                       bool swizzle, bool flip, bool downscale,
+                                       uint8_t *pixels);
+static void surface_get_dimensions(PGRAPHState *pg, unsigned int *width, unsigned int *height);
+
+void pgraph_gl_set_surface_scale_factor(NV2AState *d, unsigned int scale)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    g_config.display.quality.surface_scale = scale < 1 ? 1 : scale;
+
+    qemu_mutex_lock(&d->pfifo.lock);
+    qatomic_set(&d->pfifo.halt, true);
+    qemu_mutex_unlock(&d->pfifo.lock);
+
+    qemu_mutex_lock(&d->pgraph.lock);
+    qemu_event_reset(&r->dirty_surfaces_download_complete);
+    qatomic_set(&r->download_dirty_surfaces_pending, true);
+    qemu_mutex_unlock(&d->pgraph.lock);
+    qemu_mutex_lock(&d->pfifo.lock);
+    pfifo_kick(d);
+    qemu_mutex_unlock(&d->pfifo.lock);
+    qemu_event_wait(&r->dirty_surfaces_download_complete);
+
+    qemu_mutex_lock(&d->pgraph.lock);
+    qemu_event_reset(&d->pgraph.flush_complete);
+    qatomic_set(&d->pgraph.flush_pending, true);
+    qemu_mutex_unlock(&d->pgraph.lock);
+    qemu_mutex_lock(&d->pfifo.lock);
+    pfifo_kick(d);
+    qemu_mutex_unlock(&d->pfifo.lock);
+    qemu_event_wait(&d->pgraph.flush_complete);
+
+    qemu_mutex_lock(&d->pfifo.lock);
+    qatomic_set(&d->pfifo.halt, false);
+    pfifo_kick(d);
+    qemu_mutex_unlock(&d->pfifo.lock);
+}
+
+unsigned int pgraph_gl_get_surface_scale_factor(NV2AState *d)
+{
+    return d->pgraph.surface_scale_factor;
+}
+
+void pgraph_gl_reload_surface_scale_factor(PGRAPHState *pg)
+{
+    int factor = g_config.display.quality.surface_scale;
+    pg->surface_scale_factor = factor < 1 ? 1 : factor;
+}
+
+// FIXME: Move to common
+static bool framebuffer_dirty(PGRAPHState *pg)
+{
+    bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape,
+                                sizeof(SurfaceShape)) != 0;
+    if (!shape_changed || (!pg->surface_shape.color_format
+            && !pg->surface_shape.zeta_format)) {
+        return false;
+    }
+    return true;
+}
+
+void pgraph_gl_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n",
+                 color, zeta,
+                 pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg));
+    /* FIXME: Does this apply to CLEARs too? */
+    color = color && pgraph_color_write_enabled(pg);
+    zeta = zeta && pgraph_zeta_write_enabled(pg);
+    pg->surface_color.draw_dirty |= color;
+    pg->surface_zeta.draw_dirty |= zeta;
+
+    if (r->color_binding) {
+        r->color_binding->draw_dirty |= color;
+        r->color_binding->frame_time = pg->frame_time;
+        r->color_binding->cleared = false;
+
+    }
+
+    if (r->zeta_binding) {
+        r->zeta_binding->draw_dirty |= zeta;
+        r->zeta_binding->frame_time = pg->frame_time;
+        r->zeta_binding->cleared = false;
+
+    }
+}
+
+static void init_render_to_texture(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    const char *vs =
+        "#version 330\n"
+        "void main()\n"
+        "{\n"
+        "    float x = -1.0 + float((gl_VertexID & 1) << 2);\n"
+        "    float y = -1.0 + float((gl_VertexID & 2) << 1);\n"
+        "    gl_Position = vec4(x, y, 0, 1);\n"
+        "}\n";
+    const char *fs =
+        "#version 330\n"
+        "uniform sampler2D tex;\n"
+        "uniform vec2 surface_size;\n"
+        "layout(location = 0) out vec4 out_Color;\n"
+        "void main()\n"
+        "{\n"
+        "    vec2 texCoord;\n"
+        "    texCoord.x = gl_FragCoord.x;\n"
+        "    texCoord.y = (surface_size.y - gl_FragCoord.y)\n"
+        "                 + (textureSize(tex,0).y - surface_size.y);\n"
+        "    texCoord /= textureSize(tex,0).xy;\n"
+        "    out_Color.rgba = texture(tex, texCoord);\n"
+        "}\n";
+
+    r->s2t_rndr.prog = pgraph_gl_compile_shader(vs, fs);
+    r->s2t_rndr.tex_loc = glGetUniformLocation(r->s2t_rndr.prog, "tex");
+    r->s2t_rndr.surface_size_loc = glGetUniformLocation(r->s2t_rndr.prog,
+                                                    "surface_size");
+
+    glGenVertexArrays(1, &r->s2t_rndr.vao);
+    glBindVertexArray(r->s2t_rndr.vao);
+    glGenBuffers(1, &r->s2t_rndr.vbo);
+    glBindBuffer(GL_ARRAY_BUFFER, r->s2t_rndr.vbo);
+    glBufferData(GL_ARRAY_BUFFER, 0, NULL, GL_STATIC_DRAW);
+    glGenFramebuffers(1, &r->s2t_rndr.fbo);
+}
+
+static void finalize_render_to_texture(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    glDeleteProgram(r->s2t_rndr.prog);
+    r->s2t_rndr.prog = 0;
+
+    glDeleteVertexArrays(1, &r->s2t_rndr.vao);
+    r->s2t_rndr.vao = 0;
+
+    glDeleteBuffers(1, &r->s2t_rndr.vbo);
+    r->s2t_rndr.vbo = 0;
+
+    glDeleteFramebuffers(1, &r->s2t_rndr.fbo);
+    r->s2t_rndr.fbo = 0;
+}
+
+static bool surface_to_texture_can_fastpath(SurfaceBinding *surface,
+                                            TextureShape *shape)
+{
+    // FIXME: Better checks/handling on formats and surface-texture compat
+
+    int surface_fmt = surface->shape.color_format;
+    int texture_fmt = shape->color_format;
+
+    if (!surface->color) {
+        // FIXME: Support zeta to color
+        return false;
+    }
+
+    switch (surface_fmt) {
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true;
+        default: break;
+        }
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true;
+        default: break;
+        }
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true;
+        default: break;
+        }
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true;
+        default: break;
+        }
+        break;
+    default: break;
+    }
+
+    trace_nv2a_pgraph_surface_texture_compat_failed(
+        surface_fmt, texture_fmt);
+    return false;
+}
+
+static void render_surface_to(NV2AState *d, SurfaceBinding *surface,
+                              int texture_unit, GLuint gl_target,
+                              GLuint gl_texture, unsigned int width,
+                              unsigned int height)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    glActiveTexture(GL_TEXTURE0 + texture_unit);
+    glBindFramebuffer(GL_FRAMEBUFFER, r->s2t_rndr.fbo);
+
+    GLenum draw_buffers[1] = { GL_COLOR_ATTACHMENT0 };
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target,
+                           gl_texture, 0);
+    glDrawBuffers(1, draw_buffers);
+    assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
+    assert(glGetError() == GL_NO_ERROR);
+
+    float color[] = { 0.0f, 0.0f, 0.0f, 0.0f };
+    glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_BORDER);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_BORDER);
+    glTexParameterfv(GL_TEXTURE_2D, GL_TEXTURE_BORDER_COLOR, color);
+
+    glBindVertexArray(r->s2t_rndr.vao);
+    glBindBuffer(GL_ARRAY_BUFFER, r->s2t_rndr.vbo);
+    glUseProgram(r->s2t_rndr.prog);
+    glProgramUniform1i(r->s2t_rndr.prog, r->s2t_rndr.tex_loc,
+                       texture_unit);
+    glProgramUniform2f(r->s2t_rndr.prog,
+                       r->s2t_rndr.surface_size_loc, width, height);
+
+    glViewport(0, 0, width, height);
+    glColorMask(true, true, true, true);
+    glDisable(GL_DITHER);
+    glDisable(GL_SCISSOR_TEST);
+    glDisable(GL_BLEND);
+    glDisable(GL_STENCIL_TEST);
+    glDisable(GL_CULL_FACE);
+    glDisable(GL_DEPTH_TEST);
+    glPolygonMode(GL_FRONT_AND_BACK, GL_FILL);
+    glClearColor(0.0f, 0.0f, 1.0f, 1.0f);
+    glClear(GL_COLOR_BUFFER_BIT);
+    glDrawArrays(GL_TRIANGLES, 0, 3);
+
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, gl_target, 0,
+                           0);
+    glBindFramebuffer(GL_FRAMEBUFFER, r->gl_framebuffer);
+    glBindVertexArray(r->gl_vertex_array);
+    glBindTexture(gl_target, gl_texture);
+    glUseProgram(
+        r->shader_binding ? r->shader_binding->gl_program : 0);
+}
+
+static void render_surface_to_texture_slow(NV2AState *d,
+                                           SurfaceBinding *surface,
+                                           TextureBinding *texture,
+                                           TextureShape *texture_shape,
+                                           int texture_unit)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    const ColorFormatInfo *f = &kelvin_color_format_gl_map[texture_shape->color_format];
+    assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_gl_map));
+    nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX_FALLBACK);
+
+    glActiveTexture(GL_TEXTURE0 + texture_unit);
+    glBindTexture(texture->gl_target, texture->gl_texture);
+
+    unsigned int width = surface->width,
+                 height = surface->height;
+    pgraph_apply_scaling_factor(pg, &width, &height);
+
+    size_t bufsize = width * height * surface->fmt.bytes_per_pixel;
+
+    uint8_t *buf = g_malloc(bufsize);
+    surface_download_to_buffer(d, surface, false, true, false, buf);
+
+    width = texture_shape->width;
+    height = texture_shape->height;
+    pgraph_apply_scaling_factor(pg, &width, &height);
+
+    glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0,
+                 f->gl_format, f->gl_type, buf);
+    g_free(buf);
+    glBindTexture(texture->gl_target, texture->gl_texture);
+}
+
+/* Note: This function is intended to be called before PGRAPH configures GL
+ * state for rendering; it will configure GL state here but only restore a
+ * couple of items.
+ */
+void pgraph_gl_render_surface_to_texture(NV2AState *d, SurfaceBinding *surface,
+                                      TextureBinding *texture,
+                                      TextureShape *texture_shape,
+                                      int texture_unit)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    const ColorFormatInfo *f =
+        &kelvin_color_format_gl_map[texture_shape->color_format];
+    assert(texture_shape->color_format < ARRAY_SIZE(kelvin_color_format_gl_map));
+
+    nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX);
+
+    if (!surface_to_texture_can_fastpath(surface, texture_shape)) {
+        render_surface_to_texture_slow(d, surface, texture,
+                                              texture_shape, texture_unit);
+        return;
+    }
+
+    unsigned int width = texture_shape->width, height = texture_shape->height;
+    pgraph_apply_scaling_factor(pg, &width, &height);
+
+    glActiveTexture(GL_TEXTURE0 + texture_unit);
+    glBindTexture(texture->gl_target, texture->gl_texture);
+    glTexParameteri(texture->gl_target, GL_TEXTURE_BASE_LEVEL, 0);
+    glTexParameteri(texture->gl_target, GL_TEXTURE_MAX_LEVEL, 0);
+    glTexParameteri(texture->gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTexImage2D(texture->gl_target, 0, f->gl_internal_format, width, height, 0,
+                 f->gl_format, f->gl_type, NULL);
+    glBindTexture(texture->gl_target, 0);
+    render_surface_to(d, surface, texture_unit, texture->gl_target,
+                             texture->gl_texture, width, height);
+    glBindTexture(texture->gl_target, texture->gl_texture);
+    glUseProgram(
+        r->shader_binding ? r->shader_binding->gl_program : 0);
+}
+
+bool pgraph_gl_check_surface_to_texture_compatibility(
+    const SurfaceBinding *surface,
+    const TextureShape *shape)
+{
+    // FIXME: Better checks/handling on formats and surface-texture compat
+
+    if ((!surface->swizzle && surface->pitch != shape->pitch) ||
+        surface->width != shape->width ||
+        surface->height != shape->height) {
+        return false;
+    }
+
+    int surface_fmt = surface->shape.color_format;
+    int texture_fmt = shape->color_format;
+
+    if (!surface->color) {
+        // FIXME: Support zeta to color
+        return false;
+    }
+
+    if (shape->cubemap) {
+        // FIXME: Support rendering surface to cubemap face
+        return false;
+    }
+
+    if (shape->levels > 1) {
+        // FIXME: Support rendering surface to mip levels
+        return false;
+    }
+
+    switch (surface_fmt) {
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true;
+        default: break;
+        }
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true;
+        default: break;
+        }
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true;
+        default: break;
+        }
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true;
+        default: break;
+        }
+        break;
+    default:
+        break;
+    }
+
+    trace_nv2a_pgraph_surface_texture_compat_failed(
+        surface_fmt, texture_fmt);
+    return false;
+}
+
+static void wait_for_surface_download(SurfaceBinding *e)
+{
+    NV2AState *d = g_nv2a;
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    if (qatomic_read(&e->draw_dirty)) {
+        qemu_mutex_lock(&d->pfifo.lock);
+        qemu_event_reset(&r->downloads_complete);
+        qatomic_set(&e->download_pending, true);
+        qatomic_set(&r->downloads_pending, true);
+        pfifo_kick(d);
+        qemu_mutex_unlock(&d->pfifo.lock);
+        qemu_event_wait(&r->downloads_complete);
+    }
+}
+
+static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr,
+                                    hwaddr len, bool write)
+{
+    SurfaceBinding *e = opaque;
+    assert(addr >= e->vram_addr);
+    hwaddr offset = addr - e->vram_addr;
+    assert(offset < e->size);
+
+    if (qatomic_read(&e->draw_dirty)) {
+        trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
+        wait_for_surface_download(e);
+    }
+
+    if (write && !qatomic_read(&e->upload_pending)) {
+        trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
+        qatomic_set(&e->upload_pending, true);
+    }
+}
+
+static SurfaceBinding *surface_put(NV2AState *d, hwaddr addr,
+                                   SurfaceBinding *surface_in)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    assert(pgraph_gl_surface_get(d, addr) == NULL);
+
+    SurfaceBinding *surface, *next;
+    uintptr_t e_end = surface_in->vram_addr + surface_in->size - 1;
+    QTAILQ_FOREACH_SAFE(surface, &r->surfaces, entry, next) {
+        uintptr_t s_end = surface->vram_addr + surface->size - 1;
+        bool overlapping = !(surface->vram_addr > e_end
+                             || surface_in->vram_addr > s_end);
+        if (overlapping) {
+            trace_nv2a_pgraph_surface_evict_overlapping(
+                surface->vram_addr, surface->width, surface->height,
+                surface->pitch);
+            pgraph_gl_surface_download_if_dirty(d, surface);
+            pgraph_gl_surface_invalidate(d, surface);
+        }
+    }
+
+    SurfaceBinding *surface_out = g_malloc(sizeof(SurfaceBinding));
+    assert(surface_out != NULL);
+    *surface_out = *surface_in;
+
+    if (tcg_enabled()) {
+        qemu_mutex_unlock(&d->pgraph.lock);
+        qemu_mutex_lock_iothread();
+        mem_access_callback_insert(qemu_get_cpu(0),
+            d->vram, surface_out->vram_addr, surface_out->size,
+            &surface_out->access_cb, &surface_access_callback,
+            surface_out);
+        qemu_mutex_unlock_iothread();
+        qemu_mutex_lock(&d->pgraph.lock);
+    }
+
+    QTAILQ_INSERT_TAIL(&r->surfaces, surface_out, entry);
+
+    return surface_out;
+}
+
+SurfaceBinding *pgraph_gl_surface_get(NV2AState *d, hwaddr addr)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    SurfaceBinding *surface;
+    QTAILQ_FOREACH (surface, &r->surfaces, entry) {
+        if (surface->vram_addr == addr) {
+            return surface;
+        }
+    }
+
+    return NULL;
+}
+
+SurfaceBinding *pgraph_gl_surface_get_within(NV2AState *d, hwaddr addr)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    SurfaceBinding *surface;
+    QTAILQ_FOREACH (surface, &r->surfaces, entry) {
+        if (addr >= surface->vram_addr &&
+            addr < (surface->vram_addr + surface->size)) {
+            return surface;
+        }
+    }
+
+    return NULL;
+}
+
+void pgraph_gl_surface_invalidate(NV2AState *d, SurfaceBinding *surface)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    trace_nv2a_pgraph_surface_invalidated(surface->vram_addr);
+
+    if (surface == r->color_binding) {
+        assert(d->pgraph.surface_color.buffer_dirty);
+        pgraph_gl_unbind_surface(d, true);
+    }
+    if (surface == r->zeta_binding) {
+        assert(d->pgraph.surface_zeta.buffer_dirty);
+        pgraph_gl_unbind_surface(d, false);
+    }
+
+    if (tcg_enabled()) {
+        qemu_mutex_unlock(&d->pgraph.lock);
+        qemu_mutex_lock_iothread();
+        mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb);
+        qemu_mutex_unlock_iothread();
+        qemu_mutex_lock(&d->pgraph.lock);
+    }
+
+    glDeleteTextures(1, &surface->gl_buffer);
+
+    QTAILQ_REMOVE(&r->surfaces, surface, entry);
+    g_free(surface);
+}
+
+static void surface_evict_old(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    const int surface_age_limit = 5;
+
+    SurfaceBinding *s, *next;
+    QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
+        int last_used = d->pgraph.frame_time - s->frame_time;
+        if (last_used >= surface_age_limit) {
+            trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr);
+            pgraph_gl_surface_download_if_dirty(d, s);
+            pgraph_gl_surface_invalidate(d, s);
+        }
+    }
+}
+
+static bool check_surface_compatibility(SurfaceBinding *s1, SurfaceBinding *s2,
+                                        bool strict)
+{
+    bool format_compatible =
+        (s1->color == s2->color) &&
+        (s1->fmt.gl_attachment == s2->fmt.gl_attachment) &&
+        (s1->fmt.gl_internal_format == s2->fmt.gl_internal_format) &&
+        (s1->pitch == s2->pitch) &&
+        (s1->shape.clip_x <= s2->shape.clip_x) &&
+        (s1->shape.clip_y <= s2->shape.clip_y);
+    if (!format_compatible) {
+        return false;
+    }
+
+    if (!strict) {
+        return (s1->width >= s2->width) && (s1->height >= s2->height);
+    } else {
+        return (s1->width == s2->width) && (s1->height == s2->height);
+    }
+}
+
+void pgraph_gl_surface_download_if_dirty(NV2AState *d,
+                                           SurfaceBinding *surface)
+{
+    if (surface->draw_dirty) {
+        surface_download(d, surface, true);
+    }
+}
+
+static void bind_current_surface(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    if (r->color_binding) {
+        glFramebufferTexture2D(GL_FRAMEBUFFER, r->color_binding->fmt.gl_attachment,
+                               GL_TEXTURE_2D, r->color_binding->gl_buffer, 0);
+    }
+
+    if (r->zeta_binding) {
+        glFramebufferTexture2D(GL_FRAMEBUFFER, r->zeta_binding->fmt.gl_attachment,
+                               GL_TEXTURE_2D, r->zeta_binding->gl_buffer, 0);
+    }
+
+    if (r->color_binding || r->zeta_binding) {
+        assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) ==
+               GL_FRAMEBUFFER_COMPLETE);
+    }
+}
+
+static void surface_copy_shrink_row(uint8_t *out, uint8_t *in,
+                                    unsigned int width,
+                                    unsigned int bytes_per_pixel,
+                                    unsigned int factor)
+{
+    if (bytes_per_pixel == 4) {
+        for (unsigned int x = 0; x < width; x++) {
+            *(uint32_t *)out = *(uint32_t *)in;
+            out += 4;
+            in += 4 * factor;
+        }
+    } else if (bytes_per_pixel == 2) {
+        for (unsigned int x = 0; x < width; x++) {
+            *(uint16_t *)out = *(uint16_t *)in;
+            out += 2;
+            in += 2 * factor;
+        }
+    } else {
+        for (unsigned int x = 0; x < width; x++) {
+            memcpy(out, in, bytes_per_pixel);
+            out += bytes_per_pixel;
+            in += bytes_per_pixel * factor;
+        }
+    }
+}
+
+static void surface_download_to_buffer(NV2AState *d, SurfaceBinding *surface,
+                                       bool swizzle, bool flip, bool downscale,
+                                       uint8_t *pixels)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    swizzle &= surface->swizzle;
+    downscale &= (pg->surface_scale_factor != 1);
+
+    trace_nv2a_pgraph_surface_download(
+        surface->color ? "COLOR" : "ZETA",
+        surface->swizzle ? "sz" : "lin", surface->vram_addr,
+        surface->width, surface->height, surface->pitch,
+        surface->fmt.bytes_per_pixel);
+
+    /*  Bind destination surface to framebuffer */
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
+                           0, 0);
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                           0, 0);
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                           GL_TEXTURE_2D, 0, 0);
+    glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment,
+                           GL_TEXTURE_2D, surface->gl_buffer, 0);
+
+    assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
+
+    /* Read surface into memory */
+    uint8_t *gl_read_buf = pixels;
+
+    uint8_t *swizzle_buf = pixels;
+    if (swizzle) {
+        /* FIXME: Allocate big buffer up front and re-alloc if necessary.
+         * FIXME: Consider swizzle in shader
+         */
+        assert(pg->surface_scale_factor == 1 || downscale);
+        swizzle_buf = (uint8_t *)g_malloc(surface->size);
+        gl_read_buf = swizzle_buf;
+    }
+
+    if (downscale) {
+        pg->scale_buf = (uint8_t *)g_realloc(
+            pg->scale_buf, pg->surface_scale_factor * pg->surface_scale_factor *
+                               surface->size);
+        gl_read_buf = pg->scale_buf;
+    }
+
+    glo_readpixels(
+        surface->fmt.gl_format, surface->fmt.gl_type, surface->fmt.bytes_per_pixel,
+        pg->surface_scale_factor * surface->pitch,
+        pg->surface_scale_factor * surface->width,
+        pg->surface_scale_factor * surface->height, flip, gl_read_buf);
+
+    /* FIXME: Replace this with a hw accelerated version */
+    if (downscale) {
+        assert(surface->pitch >= (surface->width * surface->fmt.bytes_per_pixel));
+        uint8_t *out = swizzle_buf, *in = pg->scale_buf;
+        for (unsigned int y = 0; y < surface->height; y++) {
+            surface_copy_shrink_row(out, in, surface->width,
+                                    surface->fmt.bytes_per_pixel,
+                                    pg->surface_scale_factor);
+            in += surface->pitch * pg->surface_scale_factor *
+                  pg->surface_scale_factor;
+            out += surface->pitch;
+        }
+    }
+
+    if (swizzle) {
+        swizzle_rect(swizzle_buf, surface->width, surface->height, pixels,
+                     surface->pitch, surface->fmt.bytes_per_pixel);
+        g_free(swizzle_buf);
+    }
+
+    /* Re-bind original framebuffer target */
+    glFramebufferTexture2D(GL_FRAMEBUFFER, surface->fmt.gl_attachment,
+                           GL_TEXTURE_2D, 0, 0);
+    bind_current_surface(d);
+}
+
+static void surface_download(NV2AState *d, SurfaceBinding *surface, bool force)
+{
+    if (!(surface->download_pending || force)) {
+        return;
+    }
+
+    /* FIXME: Respect write enable at last TOU? */
+
+    nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD);
+
+    surface_download_to_buffer(d, surface, true, true, true,
+                               d->vram_ptr + surface->vram_addr);
+
+    memory_region_set_client_dirty(d->vram, surface->vram_addr,
+                                   surface->pitch * surface->height,
+                                   DIRTY_MEMORY_VGA);
+    memory_region_set_client_dirty(d->vram, surface->vram_addr,
+                                   surface->pitch * surface->height,
+                                   DIRTY_MEMORY_NV2A_TEX);
+
+    surface->download_pending = false;
+    surface->draw_dirty = false;
+}
+
+void pgraph_gl_process_pending_downloads(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    SurfaceBinding *surface;
+    QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+        surface_download(d, surface, false);
+    }
+
+    qatomic_set(&r->downloads_pending, false);
+    qemu_event_set(&r->downloads_complete);
+}
+
+void pgraph_gl_download_dirty_surfaces(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    SurfaceBinding *surface;
+    QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+        pgraph_gl_surface_download_if_dirty(d, surface);
+    }
+
+    qatomic_set(&r->download_dirty_surfaces_pending, false);
+    qemu_event_set(&r->dirty_surfaces_download_complete);
+}
+
+static void surface_copy_expand_row(uint8_t *out, uint8_t *in,
+                                    unsigned int width,
+                                    unsigned int bytes_per_pixel,
+                                    unsigned int factor)
+{
+    if (bytes_per_pixel == 4) {
+        for (unsigned int x = 0; x < width; x++) {
+            for (unsigned int i = 0; i < factor; i++) {
+                *(uint32_t *)out = *(uint32_t *)in;
+                out += bytes_per_pixel;
+            }
+            in += bytes_per_pixel;
+        }
+    } else if (bytes_per_pixel == 2) {
+        for (unsigned int x = 0; x < width; x++) {
+            for (unsigned int i = 0; i < factor; i++) {
+                *(uint16_t *)out = *(uint16_t *)in;
+                out += bytes_per_pixel;
+            }
+            in += bytes_per_pixel;
+        }
+    } else {
+        for (unsigned int x = 0; x < width; x++) {
+            for (unsigned int i = 0; i < factor; i++) {
+                memcpy(out, in, bytes_per_pixel);
+                out += bytes_per_pixel;
+            }
+            in += bytes_per_pixel;
+        }
+    }
+}
+
+static void surface_copy_expand(uint8_t *out, uint8_t *in, unsigned int width,
+                                unsigned int height,
+                                unsigned int bytes_per_pixel,
+                                unsigned int factor)
+{
+    size_t out_pitch = width * bytes_per_pixel * factor;
+
+    for (unsigned int y = 0; y < height; y++) {
+        surface_copy_expand_row(out, in, width, bytes_per_pixel, factor);
+        uint8_t *row_in = out;
+        for (unsigned int i = 1; i < factor; i++) {
+            out += out_pitch;
+            memcpy(out, row_in, out_pitch);
+        }
+        in += width * bytes_per_pixel;
+        out += out_pitch;
+    }
+}
+
+void pgraph_gl_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
+                                bool force)
+{
+    if (!(surface->upload_pending || force)) {
+        return;
+    }
+
+    nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD);
+
+    trace_nv2a_pgraph_surface_upload(
+                 surface->color ? "COLOR" : "ZETA",
+                 surface->swizzle ? "sz" : "lin", surface->vram_addr,
+                 surface->width, surface->height, surface->pitch,
+                 surface->fmt.bytes_per_pixel);
+
+    PGRAPHState *pg = &d->pgraph;
+
+    surface->upload_pending = false;
+    surface->draw_time = pg->draw_time;
+
+    // FIXME: Don't query GL for texture binding
+    GLint last_texture_binding;
+    glGetIntegerv(GL_TEXTURE_BINDING_2D, &last_texture_binding);
+
+    // FIXME: Replace with FBO to not disturb current state
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D,
+                           0, 0);
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_ATTACHMENT, GL_TEXTURE_2D,
+                           0, 0);
+    glFramebufferTexture2D(GL_FRAMEBUFFER, GL_DEPTH_STENCIL_ATTACHMENT,
+                           GL_TEXTURE_2D, 0, 0);
+
+    uint8_t *data = d->vram_ptr;
+    uint8_t *buf = data + surface->vram_addr;
+
+    if (surface->swizzle) {
+        buf = (uint8_t*)g_malloc(surface->size);
+        unswizzle_rect(data + surface->vram_addr,
+                       surface->width, surface->height,
+                       buf,
+                       surface->pitch,
+                       surface->fmt.bytes_per_pixel);
+    }
+
+    /* FIXME: Replace this flip/scaling */
+
+    // This is VRAM so we can't do this inplace!
+    uint8_t *flipped_buf = (uint8_t *)g_malloc(
+        surface->height * surface->width * surface->fmt.bytes_per_pixel);
+    unsigned int irow;
+    for (irow = 0; irow < surface->height; irow++) {
+        memcpy(&flipped_buf[surface->width * (surface->height - irow - 1)
+                                 * surface->fmt.bytes_per_pixel],
+               &buf[surface->pitch * irow],
+               surface->width * surface->fmt.bytes_per_pixel);
+    }
+
+    uint8_t *gl_read_buf = flipped_buf;
+    unsigned int width = surface->width, height = surface->height;
+
+    if (pg->surface_scale_factor > 1) {
+        pgraph_apply_scaling_factor(pg, &width, &height);
+        pg->scale_buf = (uint8_t *)g_realloc(
+            pg->scale_buf, width * height * surface->fmt.bytes_per_pixel);
+        gl_read_buf = pg->scale_buf;
+        uint8_t *out = gl_read_buf, *in = flipped_buf;
+        surface_copy_expand(out, in, surface->width, surface->height,
+                            surface->fmt.bytes_per_pixel,
+                            d->pgraph.surface_scale_factor);
+    }
+
+    int prev_unpack_alignment;
+    glGetIntegerv(GL_UNPACK_ALIGNMENT, &prev_unpack_alignment);
+    if (unlikely((width * surface->fmt.bytes_per_pixel) % 4 != 0)) {
+        glPixelStorei(GL_UNPACK_ALIGNMENT, 1);
+    } else {
+        glPixelStorei(GL_UNPACK_ALIGNMENT, 4);
+    }
+
+    glBindTexture(GL_TEXTURE_2D, surface->gl_buffer);
+    glTexImage2D(GL_TEXTURE_2D, 0, surface->fmt.gl_internal_format, width,
+                 height, 0, surface->fmt.gl_format, surface->fmt.gl_type,
+                 gl_read_buf);
+    glPixelStorei(GL_UNPACK_ALIGNMENT, prev_unpack_alignment);
+    g_free(flipped_buf);
+    if (surface->swizzle) {
+        g_free(buf);
+    }
+
+    // Rebind previous framebuffer binding
+    glBindTexture(GL_TEXTURE_2D, last_texture_binding);
+
+    bind_current_surface(d);
+}
+
+static void compare_surfaces(SurfaceBinding *s1, SurfaceBinding *s2)
+{
+    #define DO_CMP(fld) \
+        if (s1->fld != s2->fld) \
+            trace_nv2a_pgraph_surface_compare_mismatch( \
+                #fld, (long int)s1->fld, (long int)s2->fld);
+    DO_CMP(shape.clip_x)
+    DO_CMP(shape.clip_width)
+    DO_CMP(shape.clip_y)
+    DO_CMP(shape.clip_height)
+    DO_CMP(gl_buffer)
+    DO_CMP(fmt.bytes_per_pixel)
+    DO_CMP(fmt.gl_attachment)
+    DO_CMP(fmt.gl_internal_format)
+    DO_CMP(fmt.gl_format)
+    DO_CMP(fmt.gl_type)
+    DO_CMP(color)
+    DO_CMP(swizzle)
+    DO_CMP(vram_addr)
+    DO_CMP(width)
+    DO_CMP(height)
+    DO_CMP(pitch)
+    DO_CMP(size)
+    DO_CMP(dma_addr)
+    DO_CMP(dma_len)
+    DO_CMP(frame_time)
+    DO_CMP(draw_time)
+    #undef DO_CMP
+}
+
+static void populate_surface_binding_entry_sized(NV2AState *d, bool color,
+                                                 unsigned int width,
+                                                 unsigned int height,
+                                                 SurfaceBinding *entry)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    Surface *surface;
+    hwaddr dma_address;
+    SurfaceFormatInfo fmt;
+
+    if (color) {
+        surface = &pg->surface_color;
+        dma_address = pg->dma_color;
+        assert(pg->surface_shape.color_format != 0);
+        assert(pg->surface_shape.color_format <
+               ARRAY_SIZE(kelvin_surface_color_format_gl_map));
+        fmt = kelvin_surface_color_format_gl_map[pg->surface_shape.color_format];
+        if (fmt.bytes_per_pixel == 0) {
+            fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n",
+                    pg->surface_shape.color_format);
+            abort();
+        }
+    } else {
+        surface = &pg->surface_zeta;
+        dma_address = pg->dma_zeta;
+        assert(pg->surface_shape.zeta_format != 0);
+        assert(pg->surface_shape.zeta_format <
+               ARRAY_SIZE(kelvin_surface_zeta_float_format_gl_map));
+        const SurfaceFormatInfo *map =
+            pg->surface_shape.z_format ? kelvin_surface_zeta_float_format_gl_map :
+                                         kelvin_surface_zeta_fixed_format_gl_map;
+        fmt = map[pg->surface_shape.zeta_format];
+    }
+
+    DMAObject dma = nv_dma_load(d, dma_address);
+    /* There's a bunch of bugs that could cause us to hit this function
+     * at the wrong time and get a invalid dma object.
+     * Check that it's sane. */
+    assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS);
+    // assert(dma.address + surface->offset != 0);
+    assert(surface->offset <= dma.limit);
+    assert(surface->offset + surface->pitch * height <= dma.limit + 1);
+    assert(surface->pitch % fmt.bytes_per_pixel == 0);
+    assert((dma.address & ~0x07FFFFFF) == 0);
+
+    entry->shape = (color || !r->color_binding) ? pg->surface_shape :
+                                                   r->color_binding->shape;
+    entry->gl_buffer = 0;
+    entry->fmt = fmt;
+    entry->color = color;
+    entry->swizzle =
+        (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+    entry->vram_addr = dma.address + surface->offset;
+    entry->width = width;
+    entry->height = height;
+    entry->pitch = surface->pitch;
+    entry->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel);
+    entry->upload_pending = true;
+    entry->download_pending = false;
+    entry->draw_dirty = false;
+    entry->dma_addr = dma.address;
+    entry->dma_len = dma.limit;
+    entry->frame_time = pg->frame_time;
+    entry->draw_time = pg->draw_time;
+    entry->cleared = false;
+}
+
+static void populate_surface_binding_entry(NV2AState *d, bool color,
+                                                  SurfaceBinding *entry)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    unsigned int width, height;
+
+    if (color || !r->color_binding) {
+        surface_get_dimensions(pg, &width, &height);
+        pgraph_apply_anti_aliasing_factor(pg, &width, &height);
+
+        /* Since we determine surface dimensions based on the clipping
+         * rectangle, make sure to include the surface offset as well.
+         */
+        if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) {
+            width += pg->surface_shape.clip_x;
+            height += pg->surface_shape.clip_y;
+        }
+    } else {
+        width = r->color_binding->width;
+        height = r->color_binding->height;
+    }
+
+    populate_surface_binding_entry_sized(d, color, width, height, entry);
+}
+
+static void update_surface_part(NV2AState *d, bool upload, bool color)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    SurfaceBinding entry;
+    populate_surface_binding_entry(d, color, &entry);
+
+    Surface *surface = color ? &pg->surface_color : &pg->surface_zeta;
+
+    bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty(
+                                           d->vram, entry.vram_addr, entry.size,
+                                           DIRTY_MEMORY_NV2A);
+
+    if (upload && (surface->buffer_dirty || mem_dirty)) {
+        pgraph_gl_unbind_surface(d, color);
+
+        SurfaceBinding *found = pgraph_gl_surface_get(d, entry.vram_addr);
+        if (found != NULL) {
+            /* FIXME: Support same color/zeta surface target? In the mean time,
+             * if the surface we just found is currently bound, just unbind it.
+             */
+            SurfaceBinding *other = (color ? r->zeta_binding
+                                           : r->color_binding);
+            if (found == other) {
+                NV2A_UNIMPLEMENTED("Same color & zeta surface offset");
+                pgraph_gl_unbind_surface(d, !color);
+            }
+        }
+
+        trace_nv2a_pgraph_surface_target(
+            color ? "COLOR" : "ZETA", entry.vram_addr,
+            entry.swizzle ? "sz" : "ln",
+            pg->surface_shape.anti_aliasing,
+            pg->surface_shape.clip_x,
+            pg->surface_shape.clip_width, pg->surface_shape.clip_y,
+            pg->surface_shape.clip_height);
+
+        bool should_create = true;
+
+        if (found != NULL) {
+            bool is_compatible =
+                check_surface_compatibility(found, &entry, false);
+
+#define TRACE_ARGS found->vram_addr, found->width, found->height, \
+            found->swizzle ? "sz" : "ln", \
+            found->shape.anti_aliasing, found->shape.clip_x, \
+            found->shape.clip_width, found->shape.clip_y, \
+            found->shape.clip_height, found->pitch
+            if (found->color) {
+                trace_nv2a_pgraph_surface_match_color(TRACE_ARGS);
+            } else {
+                trace_nv2a_pgraph_surface_match_zeta(TRACE_ARGS);
+            }
+#undef TRACE_ARGS
+
+            assert(!(entry.swizzle && pg->clearing));
+
+            if (found->swizzle != entry.swizzle) {
+                /* Clears should only be done on linear surfaces. Avoid
+                 * synchronization by allowing (1) a surface marked swizzled to
+                 * be cleared under the assumption the entire surface is
+                 * destined to be cleared and (2) a fully cleared linear surface
+                 * to be marked swizzled. Strictly match size to avoid
+                 * pathological cases.
+                 */
+                is_compatible &= (pg->clearing || found->cleared) &&
+                    check_surface_compatibility(found, &entry, true);
+                if (is_compatible) {
+                    trace_nv2a_pgraph_surface_migrate_type(
+                        entry.swizzle ? "swizzled" : "linear");
+                }
+            }
+
+            if (is_compatible && color &&
+                !check_surface_compatibility(found, &entry, true)) {
+                SurfaceBinding zeta_entry;
+                populate_surface_binding_entry_sized(
+                    d, !color, found->width, found->height, &zeta_entry);
+                hwaddr color_end = found->vram_addr + found->size;
+                hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size;
+                is_compatible &= found->vram_addr >= zeta_end ||
+                                 zeta_entry.vram_addr >= color_end;
+            }
+
+            if (is_compatible && !color && r->color_binding) {
+                is_compatible &= (found->width == r->color_binding->width) &&
+                                 (found->height == r->color_binding->height);
+            }
+
+            if (is_compatible) {
+                /* FIXME: Refactor */
+                pg->surface_binding_dim.width = found->width;
+                pg->surface_binding_dim.clip_x = found->shape.clip_x;
+                pg->surface_binding_dim.clip_width = found->shape.clip_width;
+                pg->surface_binding_dim.height = found->height;
+                pg->surface_binding_dim.clip_y = found->shape.clip_y;
+                pg->surface_binding_dim.clip_height = found->shape.clip_height;
+                found->upload_pending |= mem_dirty;
+                pg->surface_zeta.buffer_dirty |= color;
+                should_create = false;
+            } else {
+                trace_nv2a_pgraph_surface_evict_reason(
+                    "incompatible", found->vram_addr);
+                compare_surfaces(found, &entry);
+                pgraph_gl_surface_download_if_dirty(d, found);
+                pgraph_gl_surface_invalidate(d, found);
+            }
+        }
+
+        if (should_create) {
+            glGenTextures(1, &entry.gl_buffer);
+            glBindTexture(GL_TEXTURE_2D, entry.gl_buffer);
+            NV2A_GL_DLABEL(GL_TEXTURE, entry.gl_buffer,
+                           "%s format: %0X, width: %d, height: %d "
+                           "(addr %" HWADDR_PRIx ")",
+                           color ? "color" : "zeta",
+                           color ? pg->surface_shape.color_format
+                                 : pg->surface_shape.zeta_format,
+                           entry.width, entry.height, surface->offset);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_BASE_LEVEL, 0);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAX_LEVEL, 0);
+            glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+            unsigned int width = entry.width, height = entry.height;
+            pgraph_apply_scaling_factor(pg, &width, &height);
+            glTexImage2D(GL_TEXTURE_2D, 0, entry.fmt.gl_internal_format, width,
+                         height, 0, entry.fmt.gl_format, entry.fmt.gl_type,
+                         NULL);
+            found = surface_put(d, entry.vram_addr, &entry);
+
+            /* FIXME: Refactor */
+            pg->surface_binding_dim.width = entry.width;
+            pg->surface_binding_dim.clip_x = entry.shape.clip_x;
+            pg->surface_binding_dim.clip_width = entry.shape.clip_width;
+            pg->surface_binding_dim.height = entry.height;
+            pg->surface_binding_dim.clip_y = entry.shape.clip_y;
+            pg->surface_binding_dim.clip_height = entry.shape.clip_height;
+
+            if (color && r->zeta_binding && (r->zeta_binding->width != entry.width || r->zeta_binding->height != entry.height)) {
+                pg->surface_zeta.buffer_dirty = true;
+            }
+        }
+
+#define TRACE_ARGS found->vram_addr, found->width, found->height, \
+                   found->swizzle ? "sz" : "ln", found->shape.anti_aliasing, \
+                   found->shape.clip_x, found->shape.clip_width, \
+                   found->shape.clip_y, found->shape.clip_height, found->pitch
+
+        if (color) {
+            if (should_create) {
+                trace_nv2a_pgraph_surface_create_color(TRACE_ARGS);
+            } else {
+                trace_nv2a_pgraph_surface_hit_color(TRACE_ARGS);
+            }
+
+            r->color_binding = found;
+        } else {
+            if (should_create) {
+                trace_nv2a_pgraph_surface_create_zeta(TRACE_ARGS);
+            } else {
+                trace_nv2a_pgraph_surface_hit_zeta(TRACE_ARGS);
+            }
+            r->zeta_binding = found;
+        }
+#undef TRACE_ARGS
+
+        glFramebufferTexture2D(GL_FRAMEBUFFER, entry.fmt.gl_attachment,
+                               GL_TEXTURE_2D, found->gl_buffer, 0);
+        assert(glCheckFramebufferStatus(GL_FRAMEBUFFER) ==
+               GL_FRAMEBUFFER_COMPLETE);
+
+        surface->buffer_dirty = false;
+    }
+
+    if (!upload && surface->draw_dirty) {
+        if (!tcg_enabled()) {
+            /* FIXME: Cannot monitor for reads/writes; flush now */
+            surface_download(d,
+                             color ? r->color_binding :
+                                     r->zeta_binding,
+                             true);
+        }
+
+        surface->write_enabled_cache = false;
+        surface->draw_dirty = false;
+    }
+}
+
+void pgraph_gl_unbind_surface(NV2AState *d, bool color)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    if (color) {
+        if (r->color_binding) {
+            glFramebufferTexture2D(GL_FRAMEBUFFER,
+                                   GL_COLOR_ATTACHMENT0,
+                                   GL_TEXTURE_2D, 0, 0);
+            r->color_binding = NULL;
+        }
+    } else {
+        if (r->zeta_binding) {
+            glFramebufferTexture2D(GL_FRAMEBUFFER,
+                                   GL_DEPTH_ATTACHMENT,
+                                   GL_TEXTURE_2D, 0, 0);
+            glFramebufferTexture2D(GL_FRAMEBUFFER,
+                                   GL_DEPTH_STENCIL_ATTACHMENT,
+                                   GL_TEXTURE_2D, 0, 0);
+            r->zeta_binding = NULL;
+        }
+    }
+}
+
+void pgraph_gl_surface_update(NV2AState *d, bool upload, bool color_write,
+                           bool zeta_write)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    pg->surface_shape.z_format =
+        GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
+                 NV_PGRAPH_SETUPRASTER_Z_FORMAT);
+
+    color_write = color_write &&
+            (pg->clearing || pgraph_color_write_enabled(pg));
+    zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg));
+
+    if (upload) {
+        bool fb_dirty = framebuffer_dirty(pg);
+        if (fb_dirty) {
+            memcpy(&pg->last_surface_shape, &pg->surface_shape,
+                   sizeof(SurfaceShape));
+            pg->surface_color.buffer_dirty = true;
+            pg->surface_zeta.buffer_dirty = true;
+        }
+
+        if (pg->surface_color.buffer_dirty) {
+            pgraph_gl_unbind_surface(d, true);
+        }
+
+        if (color_write) {
+            update_surface_part(d, true, true);
+        }
+
+        if (pg->surface_zeta.buffer_dirty) {
+            pgraph_gl_unbind_surface(d, false);
+        }
+
+        if (zeta_write) {
+            update_surface_part(d, true, false);
+        }
+    } else {
+        if ((color_write || pg->surface_color.write_enabled_cache)
+            && pg->surface_color.draw_dirty) {
+            update_surface_part(d, false, true);
+        }
+        if ((zeta_write || pg->surface_zeta.write_enabled_cache)
+            && pg->surface_zeta.draw_dirty) {
+            update_surface_part(d, false, false);
+        }
+    }
+
+    if (upload) {
+        pg->draw_time++;
+    }
+
+    bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+
+    if (r->color_binding) {
+        r->color_binding->frame_time = pg->frame_time;
+        if (upload) {
+            pgraph_gl_upload_surface_data(d, r->color_binding, false);
+            r->color_binding->draw_time = pg->draw_time;
+            r->color_binding->swizzle = swizzle;
+        }
+    }
+
+    if (r->zeta_binding) {
+        r->zeta_binding->frame_time = pg->frame_time;
+        if (upload) {
+            pgraph_gl_upload_surface_data(d, r->zeta_binding, false);
+            r->zeta_binding->draw_time = pg->draw_time;
+            r->zeta_binding->swizzle = swizzle;
+        }
+    }
+
+    // Sanity check color and zeta dimensions match
+    if (r->color_binding && r->zeta_binding) {
+        assert((r->color_binding->width == r->zeta_binding->width)
+               && (r->color_binding->height == r->zeta_binding->height));
+    }
+
+    surface_evict_old(d);
+}
+
+// FIXME: Move to common
+static void surface_get_dimensions(PGRAPHState *pg, unsigned int *width,
+                                   unsigned int *height)
+{
+    bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+    if (swizzle) {
+        *width = 1 << pg->surface_shape.log_width;
+        *height = 1 << pg->surface_shape.log_height;
+    } else {
+        *width = pg->surface_shape.clip_width;
+        *height = pg->surface_shape.clip_height;
+    }
+}
+
+void pgraph_gl_init_surfaces(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    pgraph_gl_reload_surface_scale_factor(pg);
+    glGenFramebuffers(1, &r->gl_framebuffer);
+    glBindFramebuffer(GL_FRAMEBUFFER, r->gl_framebuffer);
+    QTAILQ_INIT(&r->surfaces);
+    r->downloads_pending = false;
+    qemu_event_init(&r->downloads_complete, false);
+    qemu_event_init(&r->dirty_surfaces_download_complete, false);
+
+    init_render_to_texture(pg);
+}
+
+static void flush_surfaces(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    /* Clear last surface shape to force recreation of buffers at next draw */
+    pg->surface_color.draw_dirty = false;
+    pg->surface_zeta.draw_dirty = false;
+    memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape));
+    pgraph_gl_unbind_surface(d, true);
+    pgraph_gl_unbind_surface(d, false);
+
+    SurfaceBinding *s, *next;
+    QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
+        // FIXME: We should download all surfaces to ram, but need to
+        //        investigate corruption issue
+        // pgraph_gl_surface_download_if_dirty(d, s);
+        pgraph_gl_surface_invalidate(d, s);
+    }
+}
+
+void pgraph_gl_finalize_surfaces(PGRAPHState *pg)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    flush_surfaces(d);
+    glDeleteFramebuffers(1, &r->gl_framebuffer);
+    r->gl_framebuffer = 0;
+
+    finalize_render_to_texture(pg);
+}
+
+void pgraph_gl_surface_flush(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    bool update_surface = (r->color_binding || r->zeta_binding);
+
+    flush_surfaces(d);
+
+    pgraph_gl_reload_surface_scale_factor(pg);
+
+    if (update_surface) {
+        pgraph_gl_surface_update(d, true, true, true);
+    }
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/texture.c b/hw/xbox/nv2a/pgraph/gl/texture.c
new file mode 100644
index 00000000000..b951b7e1e0c
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/texture.c
@@ -0,0 +1,812 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/fast-hash.h"
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/pgraph/swizzle.h"
+#include "hw/xbox/nv2a/pgraph/s3tc.h"
+#include "hw/xbox/nv2a/pgraph/texture.h"
+#include "debug.h"
+#include "renderer.h"
+
+static TextureBinding* generate_texture(const TextureShape s, const uint8_t *texture_data, const uint8_t *palette_data);
+static void texture_binding_destroy(gpointer data);
+
+struct pgraph_texture_possibly_dirty_struct {
+    hwaddr addr, end;
+};
+
+static void mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque)
+{
+    struct pgraph_texture_possibly_dirty_struct *test =
+        (struct pgraph_texture_possibly_dirty_struct *)opaque;
+
+    struct TextureLruNode *tnode = container_of(node, TextureLruNode, node);
+    if (tnode->binding == NULL || tnode->possibly_dirty) {
+        return;
+    }
+
+    uintptr_t k_tex_addr = tnode->key.texture_vram_offset;
+    uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1;
+    bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end);
+
+    if (tnode->key.palette_length > 0) {
+        uintptr_t k_pal_addr = tnode->key.palette_vram_offset;
+        uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1;
+        overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end);
+    }
+
+    tnode->possibly_dirty |= overlapping;
+}
+
+void pgraph_gl_mark_textures_possibly_dirty(NV2AState *d,
+    hwaddr addr, hwaddr size)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1;
+    addr &= TARGET_PAGE_MASK;
+    assert(end <= memory_region_size(d->vram));
+
+    struct pgraph_texture_possibly_dirty_struct test = {
+        .addr = addr,
+        .end = end,
+    };
+
+    lru_visit_active(&r->texture_cache,
+                     mark_textures_possibly_dirty_visitor,
+                     &test);
+}
+
+static bool check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size)
+{
+    hwaddr end = TARGET_PAGE_ALIGN(addr + size);
+    addr &= TARGET_PAGE_MASK;
+    assert(end < memory_region_size(d->vram));
+    return memory_region_test_and_clear_dirty(d->vram, addr, end - addr,
+                                              DIRTY_MEMORY_NV2A_TEX);
+}
+
+// Check if any of the pages spanned by the a texture are dirty.
+static bool check_texture_possibly_dirty(NV2AState *d,
+                                         hwaddr texture_vram_offset,
+                                         unsigned int length,
+                                         hwaddr palette_vram_offset,
+                                         unsigned int palette_length)
+{
+    bool possibly_dirty = false;
+    if (check_texture_dirty(d, texture_vram_offset, length)) {
+        possibly_dirty = true;
+        pgraph_gl_mark_textures_possibly_dirty(d, texture_vram_offset, length);
+    }
+    if (palette_length && check_texture_dirty(d, palette_vram_offset,
+                                                     palette_length)) {
+        possibly_dirty = true;
+        pgraph_gl_mark_textures_possibly_dirty(d, palette_vram_offset,
+                                            palette_length);
+    }
+    return possibly_dirty;
+}
+
+static void apply_texture_parameters(TextureBinding *binding,
+                                     const BasicColorFormatInfo *f,
+                                     unsigned int dimensionality,
+                                     unsigned int filter,
+                                     unsigned int address,
+                                     bool is_bordered,
+                                     uint32_t border_color)
+{
+    unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
+    unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG);
+    unsigned int addru = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU);
+    unsigned int addrv = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV);
+    unsigned int addrp = GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP);
+
+    if (f->linear) {
+        /* somtimes games try to set mipmap min filters on linear textures.
+             * this could indicate a bug... */
+        switch (min_filter) {
+        case NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD:
+        case NV_PGRAPH_TEXFILTER0_MIN_BOX_TENT_LOD:
+            min_filter = NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0;
+            break;
+        case NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD:
+        case NV_PGRAPH_TEXFILTER0_MIN_TENT_TENT_LOD:
+            min_filter = NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0;
+            break;
+        }
+    }
+
+    if (min_filter != binding->min_filter) {
+        glTexParameteri(binding->gl_target, GL_TEXTURE_MIN_FILTER,
+                        pgraph_texture_min_filter_gl_map[min_filter]);
+        binding->min_filter = min_filter;
+    }
+    if (mag_filter != binding->mag_filter) {
+        glTexParameteri(binding->gl_target, GL_TEXTURE_MAG_FILTER,
+                        pgraph_texture_mag_filter_gl_map[mag_filter]);
+        binding->mag_filter = mag_filter;
+    }
+
+    /* Texture wrapping */
+    assert(addru < ARRAY_SIZE(pgraph_texture_addr_gl_map));
+    if (addru != binding->addru) {
+        glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_S,
+                        pgraph_texture_addr_gl_map[addru]);
+        binding->addru = addru;
+    }
+    bool needs_border_color = binding->addru == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
+    if (dimensionality > 1) {
+        if (addrv != binding->addrv) {
+            assert(addrv < ARRAY_SIZE(pgraph_texture_addr_gl_map));
+            glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_T,
+                            pgraph_texture_addr_gl_map[addrv]);
+            binding->addrv = addrv;
+        }
+        needs_border_color = needs_border_color || binding->addrv == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
+    }
+    if (dimensionality > 2) {
+        if (addrp != binding->addrp) {
+            assert(addrp < ARRAY_SIZE(pgraph_texture_addr_gl_map));
+            glTexParameteri(binding->gl_target, GL_TEXTURE_WRAP_R,
+                            pgraph_texture_addr_gl_map[addrp]);
+            binding->addrp = addrp;
+        }
+        needs_border_color = needs_border_color || binding->addrp == NV_PGRAPH_TEXADDRESS0_ADDRU_BORDER;
+    }
+
+    if (!is_bordered && needs_border_color) {
+        if (!binding->border_color_set || binding->border_color != border_color) {
+            /* FIXME: Color channels might be wrong order */
+            GLfloat gl_border_color[4];
+            pgraph_argb_pack32_to_rgba_float(border_color, gl_border_color);
+            glTexParameterfv(binding->gl_target, GL_TEXTURE_BORDER_COLOR,
+                             gl_border_color);
+
+            binding->border_color_set = true;
+            binding->border_color = border_color;
+        }
+    }
+}
+
+void pgraph_gl_bind_textures(NV2AState *d)
+{
+    int i;
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    NV2A_GL_DGROUP_BEGIN("%s", __func__);
+
+    for (i=0; i<NV2A_MAX_TEXTURES; i++) {
+        bool enabled = pgraph_is_texture_enabled(pg, i);
+        /* FIXME: What happens if texture is disabled but stage is active? */
+
+        glActiveTexture(GL_TEXTURE0 + i);
+        if (!enabled) {
+            glBindTexture(GL_TEXTURE_CUBE_MAP, 0);
+            glBindTexture(GL_TEXTURE_1D, 0);
+            glBindTexture(GL_TEXTURE_2D, 0);
+            glBindTexture(GL_TEXTURE_3D, 0);
+            continue;
+        }
+
+        uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i*4);
+        uint32_t address = pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + i*4);
+        uint32_t border_color = pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + i*4);
+
+        /* Check for unsupported features */
+        if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED");
+        if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED");
+        if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED");
+        if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED) NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED");
+
+        TextureShape state = pgraph_get_texture_shape(pg, i);
+        hwaddr texture_vram_offset, palette_vram_offset;
+        size_t length, palette_length;
+
+        length = pgraph_get_texture_length(pg, &state);
+        texture_vram_offset = pgraph_get_texture_phys_addr(pg, i);
+        palette_vram_offset = pgraph_get_texture_palette_phys_addr_length(pg, i, &palette_length);
+
+        assert((texture_vram_offset + length) < memory_region_size(d->vram));
+        assert((palette_vram_offset + palette_length)
+               < memory_region_size(d->vram));
+        bool is_indexed = (state.color_format ==
+                NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8);
+        bool possibly_dirty = false;
+        bool possibly_dirty_checked = false;
+
+        SurfaceBinding *surface = pgraph_gl_surface_get(d, texture_vram_offset);
+        TextureBinding *tbind = r->texture_binding[i];
+        if (!pg->texture_dirty[i] && tbind) {
+            bool reusable = false;
+            if (surface && tbind->draw_time == surface->draw_time) {
+                reusable = true;
+            } else if (!surface) {
+                possibly_dirty = check_texture_possibly_dirty(
+                        d,
+                        texture_vram_offset,
+                        length,
+                        palette_vram_offset,
+                        is_indexed ? palette_length : 0);
+                possibly_dirty_checked = true;
+                reusable = !possibly_dirty;
+            }
+
+            if (reusable) {
+                glBindTexture(r->texture_binding[i]->gl_target,
+                              r->texture_binding[i]->gl_texture);
+                apply_texture_parameters(r->texture_binding[i],
+                                         &kelvin_color_format_info_map[state.color_format],
+                                         state.dimensionality,
+                                         filter,
+                                         address,
+                                         state.border,
+                                         border_color);
+                continue;
+            }
+        }
+
+        /*
+         * Check active surfaces to see if this texture was a render target
+         */
+        bool surf_to_tex = false;
+        if (surface != NULL) {
+            surf_to_tex = pgraph_gl_check_surface_to_texture_compatibility(
+                    surface, &state);
+
+            if (surf_to_tex && surface->upload_pending) {
+                pgraph_gl_upload_surface_data(d, surface, false);
+            }
+        }
+
+        if (!surf_to_tex) {
+            // FIXME: Restructure to support rendering surfaces to cubemap faces
+
+            // Writeback any surfaces which this texture may index
+            hwaddr tex_vram_end = texture_vram_offset + length - 1;
+            QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+                hwaddr surf_vram_end = surface->vram_addr + surface->size - 1;
+                bool overlapping = !(surface->vram_addr >= tex_vram_end
+                                     || texture_vram_offset >= surf_vram_end);
+                if (overlapping) {
+                    pgraph_gl_surface_download_if_dirty(d, surface);
+                }
+            }
+        }
+
+        TextureKey key;
+        memset(&key, 0, sizeof(TextureKey));
+        key.state = state;
+        key.texture_vram_offset = texture_vram_offset;
+        key.texture_length = length;
+        if (is_indexed) {
+            key.palette_vram_offset = palette_vram_offset;
+            key.palette_length = palette_length;
+        }
+
+        // Search for existing texture binding in cache
+        uint64_t tex_binding_hash = fast_hash((uint8_t*)&key, sizeof(key));
+        LruNode *found = lru_lookup(&r->texture_cache,
+                                     tex_binding_hash, &key);
+        TextureLruNode *key_out = container_of(found, TextureLruNode, node);
+        possibly_dirty |= (key_out->binding == NULL) || key_out->possibly_dirty;
+
+        if (!surf_to_tex && !possibly_dirty_checked) {
+            possibly_dirty |= check_texture_possibly_dirty(
+                    d,
+                    texture_vram_offset,
+                    length,
+                    palette_vram_offset,
+                    is_indexed ? palette_length : 0);
+        }
+
+        // Calculate hash of texture data, if necessary
+        void *texture_data = (char*)d->vram_ptr + texture_vram_offset;
+        void *palette_data = (char*)d->vram_ptr + palette_vram_offset;
+
+        uint64_t tex_data_hash = 0;
+        if (!surf_to_tex && possibly_dirty) {
+            tex_data_hash = fast_hash(texture_data, length);
+            if (is_indexed) {
+                tex_data_hash ^= fast_hash(palette_data, palette_length);
+            }
+        }
+
+        // Free existing binding, if texture data has changed
+        bool must_destroy = (key_out->binding != NULL)
+                            && possibly_dirty
+                            && (key_out->binding->data_hash != tex_data_hash);
+        if (must_destroy) {
+            texture_binding_destroy(key_out->binding);
+            key_out->binding = NULL;
+        }
+
+        if (key_out->binding == NULL) {
+            // Must create the texture
+            key_out->binding = generate_texture(state, texture_data, palette_data);
+            key_out->binding->data_hash = tex_data_hash;
+            key_out->binding->scale = 1;
+        } else {
+            // Saved an upload! Reuse existing texture in graphics memory.
+            glBindTexture(key_out->binding->gl_target,
+                          key_out->binding->gl_texture);
+        }
+
+        key_out->possibly_dirty = false;
+        TextureBinding *binding = key_out->binding;
+        binding->refcnt++;
+
+        if (surf_to_tex && binding->draw_time < surface->draw_time) {
+
+            trace_nv2a_pgraph_surface_render_to_texture(
+                surface->vram_addr, surface->width, surface->height);
+            pgraph_gl_render_surface_to_texture(d, surface, binding, &state, i);
+            binding->draw_time = surface->draw_time;
+            binding->scale = pg->surface_scale_factor;
+        }
+
+        apply_texture_parameters(binding,
+                                 &kelvin_color_format_info_map[state.color_format],
+                                 state.dimensionality,
+                                 filter,
+                                 address,
+                                 state.border,
+                                 border_color);
+
+        if (r->texture_binding[i]) {
+            if (r->texture_binding[i]->gl_target != binding->gl_target) {
+                glBindTexture(r->texture_binding[i]->gl_target, 0);
+            }
+            texture_binding_destroy(r->texture_binding[i]);
+        }
+        r->texture_binding[i] = binding;
+        pg->texture_dirty[i] = false;
+    }
+    NV2A_GL_DGROUP_END();
+}
+
+static enum S3TC_DECOMPRESS_FORMAT
+gl_internal_format_to_s3tc_enum(GLint gl_internal_format)
+{
+    switch (gl_internal_format) {
+    case GL_COMPRESSED_RGBA_S3TC_DXT1_EXT:
+        return S3TC_DECOMPRESS_FORMAT_DXT1;
+    case GL_COMPRESSED_RGBA_S3TC_DXT3_EXT:
+        return S3TC_DECOMPRESS_FORMAT_DXT3;
+    case GL_COMPRESSED_RGBA_S3TC_DXT5_EXT:
+        return S3TC_DECOMPRESS_FORMAT_DXT5;
+    default:
+        assert(!"Invalid format");
+    }
+}
+
+static void upload_gl_texture(GLenum gl_target,
+                              const TextureShape s,
+                              const uint8_t *texture_data,
+                              const uint8_t *palette_data)
+{
+    ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
+    nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD);
+
+    unsigned int adjusted_width = s.width;
+    unsigned int adjusted_height = s.height;
+    unsigned int adjusted_pitch = s.pitch;
+    unsigned int adjusted_depth = s.depth;
+    if (!f.linear && s.border) {
+        adjusted_width = MAX(16, adjusted_width * 2);
+        adjusted_height = MAX(16, adjusted_height * 2);
+        adjusted_pitch = adjusted_width * (s.pitch / s.width);
+        adjusted_depth = MAX(16, s.depth * 2);
+    }
+
+    switch(gl_target) {
+    case GL_TEXTURE_1D:
+        assert(false);
+        break;
+    case GL_TEXTURE_2D:
+        if (f.linear) {
+            /* Can't handle strides unaligned to pixels */
+            assert(s.pitch % f.bytes_per_pixel == 0);
+
+            uint8_t *converted = pgraph_convert_texture_data(
+                s, texture_data, palette_data, adjusted_width, adjusted_height, 1,
+                adjusted_pitch, 0, NULL);
+            glPixelStorei(GL_UNPACK_ROW_LENGTH,
+                          converted ? 0 : adjusted_pitch / f.bytes_per_pixel);
+            glTexImage2D(GL_TEXTURE_2D, 0, f.gl_internal_format,
+                         adjusted_width, adjusted_height, 0,
+                         f.gl_format, f.gl_type,
+                         converted ? converted : texture_data);
+
+            if (converted) {
+              g_free(converted);
+            }
+
+            glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+            break;
+        }
+        /* fallthru */
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_X:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_X:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Y:
+    case GL_TEXTURE_CUBE_MAP_POSITIVE_Z:
+    case GL_TEXTURE_CUBE_MAP_NEGATIVE_Z: {
+
+        unsigned int width = adjusted_width, height = adjusted_height;
+
+        int level;
+        for (level = 0; level < s.levels; level++) {
+            width = MAX(width, 1);
+            height = MAX(height, 1);
+
+            if (f.gl_format == 0) { /* compressed */
+                 // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size
+                unsigned int block_size =
+                    f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT ?
+                        8 : 16;
+                unsigned int physical_width = (width + 3) & ~3,
+                             physical_height = (height + 3) & ~3;
+                if (physical_width != width) {
+                    glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width);
+                }
+                uint8_t *converted = s3tc_decompress_2d(
+                    gl_internal_format_to_s3tc_enum(f.gl_internal_format),
+                    texture_data, physical_width, physical_height);
+                unsigned int tex_width = width;
+                unsigned int tex_height = height;
+
+                if (s.cubemap && adjusted_width != s.width) {
+                    // FIXME: Consider preserving the border.
+                    // There does not seem to be a way to reference the border
+                    // texels in a cubemap, so they are discarded.
+                    glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4);
+                    glPixelStorei(GL_UNPACK_SKIP_ROWS, 4);
+                    tex_width = s.width;
+                    tex_height = s.height;
+                    if (physical_width == width) {
+                        glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
+                    }
+                }
+
+                glTexImage2D(gl_target, level, GL_RGBA, tex_width, tex_height, 0,
+                             GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV, converted);
+                g_free(converted);
+                if (physical_width != width) {
+                    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+                }
+                if (s.cubemap && adjusted_width != s.width) {
+                    glPixelStorei(GL_UNPACK_SKIP_PIXELS, 0);
+                    glPixelStorei(GL_UNPACK_SKIP_ROWS, 0);
+                    if (physical_width == width) {
+                        glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+                    }
+                }
+                texture_data +=
+                    physical_width / 4 * physical_height / 4 * block_size;
+            } else {
+                unsigned int pitch = width * f.bytes_per_pixel;
+                uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch);
+                unswizzle_rect(texture_data, width, height,
+                               unswizzled, pitch, f.bytes_per_pixel);
+                uint8_t *converted = pgraph_convert_texture_data(
+                    s, unswizzled, palette_data, width, height, 1, pitch, 0,
+                    NULL);
+                uint8_t *pixel_data = converted ? converted : unswizzled;
+                unsigned int tex_width = width;
+                unsigned int tex_height = height;
+
+                if (s.cubemap && adjusted_width != s.width) {
+                    // FIXME: Consider preserving the border.
+                    // There does not seem to be a way to reference the border
+                    // texels in a cubemap, so they are discarded.
+                    glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
+                    tex_width = s.width;
+                    tex_height = s.height;
+                    pixel_data += 4 * f.bytes_per_pixel + 4 * pitch;
+                }
+
+                glTexImage2D(gl_target, level, f.gl_internal_format, tex_width,
+                             tex_height, 0, f.gl_format, f.gl_type,
+                             pixel_data);
+                if (s.cubemap && s.border) {
+                    glPixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+                }
+                if (converted) {
+                    g_free(converted);
+                }
+                g_free(unswizzled);
+
+                texture_data += width * height * f.bytes_per_pixel;
+            }
+
+            width /= 2;
+            height /= 2;
+        }
+
+        break;
+    }
+    case GL_TEXTURE_3D: {
+
+        unsigned int width = adjusted_width;
+        unsigned int height = adjusted_height;
+        unsigned int depth = adjusted_depth;
+
+        assert(f.linear == false);
+
+        int level;
+        for (level = 0; level < s.levels; level++) {
+            if (f.gl_format == 0) { /* compressed */
+                assert(width % 4 == 0 && height % 4 == 0 &&
+                       "Compressed 3D texture virtual size");
+                width = MAX(width, 4);
+                height = MAX(height, 4);
+                depth = MAX(depth, 1);
+
+                unsigned int block_size;
+                if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
+                    block_size = 8;
+                } else {
+                    block_size = 16;
+                }
+
+                size_t texture_size = width/4 * height/4 * depth * block_size;
+
+                uint8_t *converted = s3tc_decompress_3d(
+                    gl_internal_format_to_s3tc_enum(f.gl_internal_format),
+                    texture_data, width, height, depth);
+
+                glTexImage3D(gl_target, level,  GL_RGBA8,
+                             width, height, depth, 0,
+                             GL_RGBA, GL_UNSIGNED_INT_8_8_8_8_REV,
+                             converted);
+
+                g_free(converted);
+
+                texture_data += texture_size;
+            } else {
+                width = MAX(width, 1);
+                height = MAX(height, 1);
+                depth = MAX(depth, 1);
+
+                unsigned int row_pitch = width * f.bytes_per_pixel;
+                unsigned int slice_pitch = row_pitch * height;
+                uint8_t *unswizzled = (uint8_t*)g_malloc(slice_pitch * depth);
+                unswizzle_box(texture_data, width, height, depth, unswizzled,
+                               row_pitch, slice_pitch, f.bytes_per_pixel);
+
+                uint8_t *converted = pgraph_convert_texture_data(
+                    s, unswizzled, palette_data, width, height, depth,
+                    row_pitch, slice_pitch, NULL);
+
+                glTexImage3D(gl_target, level, f.gl_internal_format,
+                             width, height, depth, 0,
+                             f.gl_format, f.gl_type,
+                             converted ? converted : unswizzled);
+
+                if (converted) {
+                    g_free(converted);
+                }
+                g_free(unswizzled);
+
+                texture_data += width * height * depth * f.bytes_per_pixel;
+            }
+
+            width /= 2;
+            height /= 2;
+            depth /= 2;
+        }
+        break;
+    }
+    default:
+        assert(false);
+        break;
+    }
+}
+
+static TextureBinding* generate_texture(const TextureShape s,
+                                        const uint8_t *texture_data,
+                                        const uint8_t *palette_data)
+{
+    ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
+
+    /* Create a new opengl texture */
+    GLuint gl_texture;
+    glGenTextures(1, &gl_texture);
+
+    GLenum gl_target;
+    if (s.cubemap) {
+        assert(f.linear == false);
+        assert(s.dimensionality == 2);
+        gl_target = GL_TEXTURE_CUBE_MAP;
+    } else {
+        if (f.linear) {
+            gl_target = GL_TEXTURE_2D;
+            assert(s.dimensionality == 2);
+        } else {
+            switch(s.dimensionality) {
+            case 1: gl_target = GL_TEXTURE_1D; break;
+            case 2: gl_target = GL_TEXTURE_2D; break;
+            case 3: gl_target = GL_TEXTURE_3D; break;
+            default:
+                assert(false);
+                break;
+            }
+        }
+    }
+
+    glBindTexture(gl_target, gl_texture);
+
+    NV2A_GL_DLABEL(GL_TEXTURE, gl_texture,
+                   "offset: 0x%08lx, format: 0x%02X%s, %d dimensions%s, "
+                   "width: %d, height: %d, depth: %d",
+                   texture_data - g_nv2a->vram_ptr,
+                   s.color_format, f.linear ? "" : " (SZ)",
+                   s.dimensionality, s.cubemap ? " (Cubemap)" : "",
+                   s.width, s.height, s.depth);
+
+    if (gl_target == GL_TEXTURE_CUBE_MAP) {
+
+        ColorFormatInfo f = kelvin_color_format_gl_map[s.color_format];
+        unsigned int block_size;
+        if (f.gl_internal_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
+            block_size = 8;
+        } else {
+            block_size = 16;
+        }
+
+        size_t length = 0;
+        unsigned int w = s.width;
+        unsigned int h = s.height;
+        if (!f.linear && s.border) {
+            w = MAX(16, w * 2);
+            h = MAX(16, h * 2);
+        }
+
+        int level;
+        for (level = 0; level < s.levels; level++) {
+            if (f.gl_format == 0) {
+                length += w/4 * h/4 * block_size;
+            } else {
+                length += w * h * f.bytes_per_pixel;
+            }
+
+            w /= 2;
+            h /= 2;
+        }
+
+        length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
+
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_X,
+                          s, texture_data + 0 * length, palette_data);
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_X,
+                          s, texture_data + 1 * length, palette_data);
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Y,
+                          s, texture_data + 2 * length, palette_data);
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Y,
+                          s, texture_data + 3 * length, palette_data);
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_POSITIVE_Z,
+                          s, texture_data + 4 * length, palette_data);
+        upload_gl_texture(GL_TEXTURE_CUBE_MAP_NEGATIVE_Z,
+                          s, texture_data + 5 * length, palette_data);
+    } else {
+        upload_gl_texture(gl_target, s, texture_data, palette_data);
+    }
+
+    /* Linear textures don't support mipmapping */
+    if (!f.linear) {
+        glTexParameteri(gl_target, GL_TEXTURE_BASE_LEVEL,
+            s.min_mipmap_level);
+        glTexParameteri(gl_target, GL_TEXTURE_MAX_LEVEL,
+            s.levels - 1);
+    }
+
+    if (f.gl_swizzle_mask[0] != 0 || f.gl_swizzle_mask[1] != 0
+        || f.gl_swizzle_mask[2] != 0 || f.gl_swizzle_mask[3] != 0) {
+        glTexParameteriv(gl_target, GL_TEXTURE_SWIZZLE_RGBA,
+                         (const GLint *)f.gl_swizzle_mask);
+    }
+
+    TextureBinding* ret = (TextureBinding *)g_malloc(sizeof(TextureBinding));
+    ret->gl_target = gl_target;
+    ret->gl_texture = gl_texture;
+    ret->refcnt = 1;
+    ret->draw_time = 0;
+    ret->data_hash = 0;
+    ret->min_filter = 0xFFFFFFFF;
+    ret->mag_filter = 0xFFFFFFFF;
+    ret->addru = 0xFFFFFFFF;
+    ret->addrv = 0xFFFFFFFF;
+    ret->addrp = 0xFFFFFFFF;
+    ret->border_color_set = false;
+    return ret;
+}
+
+static void texture_binding_destroy(gpointer data)
+{
+    TextureBinding *binding = (TextureBinding *)data;
+    assert(binding->refcnt > 0);
+    binding->refcnt--;
+    if (binding->refcnt == 0) {
+        glDeleteTextures(1, &binding->gl_texture);
+        g_free(binding);
+    }
+}
+
+/* functions for texture LRU cache */
+static void texture_cache_entry_init(Lru *lru, LruNode *node, void *key)
+{
+    TextureLruNode *tnode = container_of(node, TextureLruNode, node);
+    memcpy(&tnode->key, key, sizeof(TextureKey));
+
+    tnode->binding = NULL;
+    tnode->possibly_dirty = false;
+}
+
+static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+    TextureLruNode *tnode = container_of(node, TextureLruNode, node);
+    if (tnode->binding) {
+        texture_binding_destroy(tnode->binding);
+        tnode->binding = NULL;
+        tnode->possibly_dirty = false;
+    }
+}
+
+static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+    TextureLruNode *tnode = container_of(node, TextureLruNode, node);
+    return memcmp(&tnode->key, key, sizeof(TextureKey));
+}
+
+void pgraph_gl_init_textures(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    const size_t texture_cache_size = 512;
+    lru_init(&r->texture_cache);
+    r->texture_cache_entries = malloc(texture_cache_size * sizeof(TextureLruNode));
+    assert(r->texture_cache_entries != NULL);
+    for (int i = 0; i < texture_cache_size; i++) {
+        lru_add_free(&r->texture_cache, &r->texture_cache_entries[i].node);
+    }
+
+    r->texture_cache.init_node = texture_cache_entry_init;
+    r->texture_cache.compare_nodes = texture_cache_entry_compare;
+    r->texture_cache.post_node_evict = texture_cache_entry_post_evict;
+}
+
+void pgraph_gl_finalize_textures(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        r->texture_binding[i] = NULL;
+    }
+
+    lru_flush(&r->texture_cache);
+    free(r->texture_cache_entries);
+
+    r->texture_cache_entries = NULL;
+}
diff --git a/hw/xbox/nv2a/pgraph/gl/vertex.c b/hw/xbox/nv2a/pgraph/gl/vertex.c
new file mode 100644
index 00000000000..c2eccdb128b
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/gl/vertex.c
@@ -0,0 +1,311 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/nv2a_regs.h"
+#include <hw/xbox/nv2a/nv2a_int.h>
+#include "debug.h"
+#include "renderer.h"
+
+static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size,
+                                 bool quick)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
+
+    hwaddr end = TARGET_PAGE_ALIGN(addr + size);
+    addr &= TARGET_PAGE_MASK;
+    assert(end < memory_region_size(d->vram));
+
+    static hwaddr last_addr, last_end;
+    if (quick && (addr >= last_addr) && (end <= last_end)) {
+        return;
+    }
+    last_addr = addr;
+    last_end = end;
+
+    size = end - addr;
+    if (memory_region_test_and_clear_dirty(d->vram, addr, size,
+                                           DIRTY_MEMORY_NV2A)) {
+        glBufferSubData(GL_ARRAY_BUFFER, addr, size,
+                        d->vram_ptr + addr);
+        nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1);
+    }
+}
+
+void pgraph_gl_update_entire_memory_buffer(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
+    glBufferSubData(GL_ARRAY_BUFFER, 0, memory_region_size(d->vram), d->vram_ptr);
+}
+
+void pgraph_gl_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
+                                   unsigned int max_element, bool inline_data,
+                                   unsigned int inline_stride,
+                                   unsigned int provoking_element)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    bool updated_memory_buffer = false;
+    unsigned int num_elements = max_element - min_element + 1;
+
+    if (inline_data) {
+        NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
+                             __func__, num_elements, inline_stride);
+    } else {
+        NV2A_GL_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
+    }
+
+    pg->compressed_attrs = 0;
+
+    for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        VertexAttribute *attr = &pg->vertex_attributes[i];
+
+        if (!attr->count) {
+            glDisableVertexAttribArray(i);
+            glVertexAttrib4fv(i, attr->inline_value);
+            continue;
+        }
+
+        NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n",
+                     attr->format, attr->count, attr->stride);
+
+        GLint gl_count = attr->count;
+        GLenum gl_type;
+        GLboolean gl_normalize;
+        bool needs_conversion = false;
+
+        switch (attr->format) {
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
+            gl_type = GL_UNSIGNED_BYTE;
+            gl_normalize = GL_TRUE;
+            // http://www.opengl.org/registry/specs/ARB/vertex_array_bgra.txt
+            gl_count = GL_BGRA;
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
+            gl_type = GL_UNSIGNED_BYTE;
+            gl_normalize = GL_TRUE;
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
+            gl_type = GL_SHORT;
+            gl_normalize = GL_TRUE;
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+            gl_type = GL_FLOAT;
+            gl_normalize = GL_FALSE;
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
+            gl_type = GL_SHORT;
+            gl_normalize = GL_FALSE;
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
+            /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
+            gl_type = GL_INT;
+            assert(attr->count == 1);
+            needs_conversion = true;
+            break;
+        default:
+            fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
+            assert(false);
+            break;
+        }
+
+        nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND);
+        hwaddr attrib_data_addr;
+        size_t stride;
+
+        if (needs_conversion) {
+            pg->compressed_attrs |= (1 << i);
+        }
+
+        hwaddr start = 0;
+        if (inline_data) {
+            glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer);
+            attrib_data_addr = attr->inline_array_offset;
+            stride = inline_stride;
+        } else {
+            hwaddr dma_len;
+            uint8_t *attr_data = (uint8_t *)nv_dma_map(
+                d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a,
+                &dma_len);
+            assert(attr->offset < dma_len);
+            attrib_data_addr = attr_data + attr->offset - d->vram_ptr;
+            stride = attr->stride;
+            start = attrib_data_addr + min_element * stride;
+            update_memory_buffer(d, start, num_elements * stride,
+                                        updated_memory_buffer);
+            updated_memory_buffer = true;
+        }
+
+        uint32_t provoking_element_index = provoking_element - min_element;
+        size_t element_size = attr->size * attr->count;
+        assert(element_size <= sizeof(attr->inline_value));
+        const uint8_t *last_entry;
+
+        if (inline_data) {
+            last_entry = (uint8_t*)pg->inline_array + attr->inline_array_offset;
+        } else {
+            last_entry = d->vram_ptr + start;
+        }
+        if (!stride) {
+            // Stride of 0 indicates that only the first element should be
+            // used.
+            pgraph_update_inline_value(attr, last_entry);
+            glDisableVertexAttribArray(i);
+            glVertexAttrib4fv(i, attr->inline_value);
+            continue;
+        }
+
+        if (needs_conversion) {
+            glVertexAttribIPointer(i, gl_count, gl_type, stride,
+                                   (void *)attrib_data_addr);
+        } else {
+            glVertexAttribPointer(i, gl_count, gl_type, gl_normalize, stride,
+                                  (void *)attrib_data_addr);
+        }
+
+        glEnableVertexAttribArray(i);
+        last_entry += stride * provoking_element_index;
+        pgraph_update_inline_value(attr, last_entry);
+    }
+
+    NV2A_GL_DGROUP_END();
+}
+
+unsigned int pgraph_gl_bind_inline_array(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    unsigned int offset = 0;
+    for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        VertexAttribute *attr = &pg->vertex_attributes[i];
+        if (attr->count == 0) {
+            continue;
+        }
+
+        /* FIXME: Double check */
+        offset = ROUND_UP(offset, attr->size);
+        attr->inline_array_offset = offset;
+        NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n",
+            i, attr->size, attr->count);
+        offset += attr->size * attr->count;
+        offset = ROUND_UP(offset, attr->size);
+    }
+
+    unsigned int vertex_size = offset;
+    unsigned int index_count = pg->inline_array_length*4 / vertex_size;
+
+    NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
+
+    nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
+    glBindBuffer(GL_ARRAY_BUFFER, r->gl_inline_array_buffer);
+    glBufferData(GL_ARRAY_BUFFER, NV2A_MAX_BATCH_LENGTH * sizeof(uint32_t),
+                 NULL, GL_STREAM_DRAW);
+    glBufferSubData(GL_ARRAY_BUFFER, 0, index_count * vertex_size, pg->inline_array);
+    pgraph_gl_bind_vertex_attributes(d, 0, index_count-1, true, vertex_size,
+                                  index_count-1);
+
+    return index_count;
+}
+
+static void vertex_cache_entry_init(Lru *lru, LruNode *node, void *key)
+{
+    VertexLruNode *vnode = container_of(node, VertexLruNode, node);
+    memcpy(&vnode->key, key, sizeof(struct VertexKey));
+    vnode->initialized = false;
+}
+
+static bool vertex_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+    VertexLruNode *vnode = container_of(node, VertexLruNode, node);
+    return memcmp(&vnode->key, key, sizeof(VertexKey));
+}
+
+static const size_t element_cache_size = 50*1024;
+
+void pgraph_gl_init_buffers(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    lru_init(&r->element_cache);
+    r->element_cache_entries = g_malloc_n(element_cache_size, sizeof(VertexLruNode));
+    assert(r->element_cache_entries != NULL);
+    GLuint element_cache_buffers[element_cache_size];
+    glGenBuffers(element_cache_size, element_cache_buffers);
+    for (int i = 0; i < element_cache_size; i++) {
+        r->element_cache_entries[i].gl_buffer = element_cache_buffers[i];
+        lru_add_free(&r->element_cache, &r->element_cache_entries[i].node);
+    }
+
+    r->element_cache.init_node = vertex_cache_entry_init;
+    r->element_cache.compare_nodes = vertex_cache_entry_compare;
+
+    GLint max_vertex_attributes;
+    glGetIntegerv(GL_MAX_VERTEX_ATTRIBS, &max_vertex_attributes);
+    assert(max_vertex_attributes >= NV2A_VERTEXSHADER_ATTRIBUTES);
+
+    glGenBuffers(NV2A_VERTEXSHADER_ATTRIBUTES, r->gl_inline_buffer);
+    glGenBuffers(1, &r->gl_inline_array_buffer);
+
+    glGenBuffers(1, &r->gl_memory_buffer);
+    glBindBuffer(GL_ARRAY_BUFFER, r->gl_memory_buffer);
+    glBufferData(GL_ARRAY_BUFFER, memory_region_size(d->vram),
+                 NULL, GL_DYNAMIC_DRAW);
+
+    glGenVertexArrays(1, &r->gl_vertex_array);
+    glBindVertexArray(r->gl_vertex_array);
+
+    assert(glGetError() == GL_NO_ERROR);
+}
+
+void pgraph_gl_finalize_buffers(PGRAPHState *pg)
+{
+    PGRAPHGLState *r = pg->gl_renderer_state;
+
+    GLuint element_cache_buffers[element_cache_size];
+    for (int i = 0; i < element_cache_size; i++) {
+        element_cache_buffers[i] = r->element_cache_entries[i].gl_buffer;
+    }
+    glDeleteBuffers(element_cache_size, element_cache_buffers);
+    lru_flush(&r->element_cache);
+
+    g_free(r->element_cache_entries);
+    r->element_cache_entries = NULL;
+
+    glDeleteBuffers(NV2A_VERTEXSHADER_ATTRIBUTES, r->gl_inline_buffer);
+    memset(r->gl_inline_buffer, 0, sizeof(r->gl_inline_buffer));
+
+    glDeleteBuffers(1, &r->gl_inline_array_buffer);
+    r->gl_inline_array_buffer = 0;
+
+    glDeleteBuffers(1, &r->gl_memory_buffer);
+    r->gl_memory_buffer = 0;
+
+    glDeleteVertexArrays(1, &r->gl_vertex_array);
+    r->gl_vertex_array = 0;
+}
\ No newline at end of file
diff --git a/hw/xbox/nv2a/pgraph/glsl/common.c b/hw/xbox/nv2a/pgraph/glsl/common.c
new file mode 100644
index 00000000000..7059880373d
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/common.c
@@ -0,0 +1,58 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include "common.h"
+
+
+MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array)
+{
+    const char *flat_s = "flat";
+    const char *noperspective_s = "noperspective";
+    const char *qualifier_s = smooth ? noperspective_s : flat_s;
+    const char *qualifiers[11] = {
+        noperspective_s, flat_s,          qualifier_s,     qualifier_s,
+        qualifier_s,     qualifier_s,     noperspective_s, noperspective_s,
+        noperspective_s, noperspective_s, noperspective_s
+    };
+
+    const char *in_out_s = in ? "in" : "out";
+
+    const char *float_s = "float";
+    const char *vec4_s = "vec4";
+    const char *types[11] = { float_s, float_s, vec4_s, vec4_s, vec4_s, vec4_s,
+                              float_s, vec4_s,  vec4_s, vec4_s, vec4_s };
+
+    const char *prefix_s = prefix ? "v_" : "";
+    const char *names[11] = {
+        "vtx_inv_w", "vtx_inv_w_flat", "vtxD0", "vtxD1", "vtxB0", "vtxB1",
+        "vtxFog",    "vtxT0",          "vtxT1", "vtxT2", "vtxT3",
+    };
+    const char *suffix_s = array ? "[]" : "";
+
+    for (int i = 0; i < 11; i++) {
+        if (location) {
+            mstring_append_fmt(out, "layout(location = %d) ", i);
+        }
+        mstring_append_fmt(out, "%s %s %s %s%s%s;\n",
+            qualifiers[i], in_out_s, types[i], prefix_s, names[i], suffix_s);
+    }
+
+    return out;
+}
diff --git a/hw/xbox/nv2a/pgraph/glsl/common.h b/hw/xbox/nv2a/pgraph/glsl/common.h
new file mode 100644
index 00000000000..6820a1dcb19
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/common.h
@@ -0,0 +1,38 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_NV2A_SHADERS_COMMON_H
+#define HW_NV2A_SHADERS_COMMON_H
+
+#include "qemu/mstring.h"
+#include <stdbool.h>
+
+#define GLSL_C(idx) "c[" stringify(idx) "]"
+#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]"
+
+#define GLSL_C_MAT4(idx) \
+    "mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \
+            GLSL_C(idx+2) ", " GLSL_C(idx+3) ")"
+
+#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n"
+
+MString *pgraph_get_glsl_vtx_header(MString *out, bool location, bool smooth, bool in, bool prefix, bool array);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.c b/hw/xbox/nv2a/pgraph/glsl/geom.c
new file mode 100644
index 00000000000..0e738f02806
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/geom.c
@@ -0,0 +1,228 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include "common.h"
+#include "geom.h"
+
+MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode,
+                              enum ShaderPolygonMode polygon_back_mode,
+                              enum ShaderPrimitiveMode primitive_mode,
+                              bool smooth_shading,
+                              bool vulkan)
+{
+    /* FIXME: Missing support for 2-sided-poly mode */
+    assert(polygon_front_mode == polygon_back_mode);
+    enum ShaderPolygonMode polygon_mode = polygon_front_mode;
+
+    /* POINT mode shouldn't require any special work */
+    if (polygon_mode == POLY_MODE_POINT) {
+        return NULL;
+    }
+
+    /* Handle LINE and FILL mode */
+    const char *layout_in = NULL;
+    const char *layout_out = NULL;
+    const char *body = NULL;
+    switch (primitive_mode) {
+    case PRIM_TYPE_POINTS: return NULL;
+    case PRIM_TYPE_LINES: return NULL;
+    case PRIM_TYPE_LINE_LOOP: return NULL;
+    case PRIM_TYPE_LINE_STRIP: return NULL;
+    case PRIM_TYPE_TRIANGLES:
+        if (polygon_mode == POLY_MODE_FILL) { return NULL; }
+        assert(polygon_mode == POLY_MODE_LINE);
+        layout_in = "layout(triangles) in;\n";
+        layout_out = "layout(line_strip, max_vertices = 4) out;\n";
+        body = "  emit_vertex(0, 0);\n"
+               "  emit_vertex(1, 0);\n"
+               "  emit_vertex(2, 0);\n"
+               "  emit_vertex(0, 0);\n"
+               "  EndPrimitive();\n";
+        break;
+    case PRIM_TYPE_TRIANGLE_STRIP:
+        if (polygon_mode == POLY_MODE_FILL) { return NULL; }
+        assert(polygon_mode == POLY_MODE_LINE);
+        layout_in = "layout(triangles) in;\n";
+        layout_out = "layout(line_strip, max_vertices = 4) out;\n";
+        /* Imagine a quad made of a tristrip, the comments tell you which
+         * vertex we are using */
+        body = "  if ((gl_PrimitiveIDIn & 1) == 0) {\n"
+               "    if (gl_PrimitiveIDIn == 0) {\n"
+               "      emit_vertex(0, 0);\n" /* bottom right */
+               "    }\n"
+               "    emit_vertex(1, 0);\n" /* top right */
+               "    emit_vertex(2, 0);\n" /* bottom left */
+               "    emit_vertex(0, 0);\n" /* bottom right */
+               "  } else {\n"
+               "    emit_vertex(2, 0);\n" /* bottom left */
+               "    emit_vertex(1, 0);\n" /* top left */
+               "    emit_vertex(0, 0);\n" /* top right */
+               "  }\n"
+               "  EndPrimitive();\n";
+        break;
+    case PRIM_TYPE_TRIANGLE_FAN:
+        if (polygon_mode == POLY_MODE_FILL) { return NULL; }
+        assert(polygon_mode == POLY_MODE_LINE);
+        layout_in = "layout(triangles) in;\n";
+        layout_out = "layout(line_strip, max_vertices = 4) out;\n";
+        body = "  if (gl_PrimitiveIDIn == 0) {\n"
+               "    emit_vertex(0, 0);\n"
+               "  }\n"
+               "  emit_vertex(1, 0);\n"
+               "  emit_vertex(2, 0);\n"
+               "  emit_vertex(0, 0);\n"
+               "  EndPrimitive();\n";
+        break;
+    case PRIM_TYPE_QUADS:
+        layout_in = "layout(lines_adjacency) in;\n";
+        if (polygon_mode == POLY_MODE_LINE) {
+            layout_out = "layout(line_strip, max_vertices = 5) out;\n";
+            body = "  emit_vertex(0, 3);\n"
+                   "  emit_vertex(1, 3);\n"
+                   "  emit_vertex(2, 3);\n"
+                   "  emit_vertex(3, 3);\n"
+                   "  emit_vertex(0, 3);\n"
+                   "  EndPrimitive();\n";
+        } else if (polygon_mode == POLY_MODE_FILL) {
+            layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
+            body = "  emit_vertex(3, 3);\n"
+                   "  emit_vertex(0, 3);\n"
+                   "  emit_vertex(2, 3);\n"
+                   "  emit_vertex(1, 3);\n"
+                   "  EndPrimitive();\n";
+        } else {
+            assert(false);
+            return NULL;
+        }
+        break;
+    case PRIM_TYPE_QUAD_STRIP:
+        layout_in = "layout(lines_adjacency) in;\n";
+        if (polygon_mode == POLY_MODE_LINE) {
+            layout_out = "layout(line_strip, max_vertices = 5) out;\n";
+            body = "  if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
+                   "  if (gl_PrimitiveIDIn == 0) {\n"
+                   "    emit_vertex(0, 3);\n"
+                   "  }\n"
+                   "  emit_vertex(1, 3);\n"
+                   "  emit_vertex(3, 3);\n"
+                   "  emit_vertex(2, 3);\n"
+                   "  emit_vertex(0, 3);\n"
+                   "  EndPrimitive();\n";
+        } else if (polygon_mode == POLY_MODE_FILL) {
+            layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
+            body = "  if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
+                   "  emit_vertex(0, 3);\n"
+                   "  emit_vertex(1, 3);\n"
+                   "  emit_vertex(2, 3);\n"
+                   "  emit_vertex(3, 3);\n"
+                   "  EndPrimitive();\n";
+        } else {
+            assert(false);
+            return NULL;
+        }
+        break;
+    case PRIM_TYPE_POLYGON:
+        if (polygon_mode == POLY_MODE_LINE) {
+            return NULL;
+        }
+        if (polygon_mode == POLY_MODE_FILL) {
+            if (smooth_shading) {
+                return NULL;
+            }
+            layout_in = "layout(triangles) in;\n";
+            layout_out = "layout(triangle_strip, max_vertices = 3) out;\n";
+            body = "  emit_vertex(0, 2);\n"
+                   "  emit_vertex(1, 2);\n"
+                   "  emit_vertex(2, 2);\n"
+                   "  EndPrimitive();\n";
+        } else {
+            assert(false);
+            return NULL;
+        }
+        break;
+
+    default:
+        assert(false);
+        return NULL;
+    }
+
+    /* generate a geometry shader to support deprecated primitive types */
+    assert(layout_in);
+    assert(layout_out);
+    assert(body);
+    MString *s = mstring_new();
+    mstring_append_fmt(s, "#version %d\n\n", vulkan ? 450 : 400);
+    mstring_append(s, layout_in);
+    mstring_append(s, layout_out);
+    mstring_append(s, "\n");
+    pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, true, true, true);
+    pgraph_get_glsl_vtx_header(s, vulkan, smooth_shading, false, false, false);
+
+    if (smooth_shading) {
+        mstring_append(s,
+                       "void emit_vertex(int index, int _unused) {\n"
+                       "  gl_Position = gl_in[index].gl_Position;\n"
+                       "  gl_PointSize = gl_in[index].gl_PointSize;\n"
+                       // "  gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
+                       // "  gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
+                       "  vtx_inv_w = v_vtx_inv_w[index];\n"
+                       "  vtx_inv_w_flat = v_vtx_inv_w[index];\n"
+                       "  vtxD0 = v_vtxD0[index];\n"
+                       "  vtxD1 = v_vtxD1[index];\n"
+                       "  vtxB0 = v_vtxB0[index];\n"
+                       "  vtxB1 = v_vtxB1[index];\n"
+                       "  vtxFog = v_vtxFog[index];\n"
+                       "  vtxT0 = v_vtxT0[index];\n"
+                       "  vtxT1 = v_vtxT1[index];\n"
+                       "  vtxT2 = v_vtxT2[index];\n"
+                       "  vtxT3 = v_vtxT3[index];\n"
+                       "  EmitVertex();\n"
+                       "}\n");
+    } else {
+        mstring_append(s,
+                       "void emit_vertex(int index, int provoking_index) {\n"
+                       "  gl_Position = gl_in[index].gl_Position;\n"
+                       "  gl_PointSize = gl_in[index].gl_PointSize;\n"
+                       // "  gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
+                       // "  gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
+                       "  vtx_inv_w = v_vtx_inv_w[index];\n"
+                       "  vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n"
+                       "  vtxD0 = v_vtxD0[provoking_index];\n"
+                       "  vtxD1 = v_vtxD1[provoking_index];\n"
+                       "  vtxB0 = v_vtxB0[provoking_index];\n"
+                       "  vtxB1 = v_vtxB1[provoking_index];\n"
+                       "  vtxFog = v_vtxFog[index];\n"
+                       "  vtxT0 = v_vtxT0[index];\n"
+                       "  vtxT1 = v_vtxT1[index];\n"
+                       "  vtxT2 = v_vtxT2[index];\n"
+                       "  vtxT3 = v_vtxT3[index];\n"
+                       "  EmitVertex();\n"
+                       "}\n");
+    }
+
+    mstring_append(s, "\n"
+                      "void main() {\n");
+    mstring_append(s, body);
+    mstring_append(s, "}\n");
+
+    return s;
+}
diff --git a/hw/xbox/nv2a/pgraph/glsl/geom.h b/hw/xbox/nv2a/pgraph/glsl/geom.h
new file mode 100644
index 00000000000..9ca605be71b
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/geom.h
@@ -0,0 +1,34 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H
+#define HW_XBOX_NV2A_PGRAPH_GLSL_GEOM_H
+
+#include "qemu/mstring.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+MString *pgraph_gen_geom_glsl(enum ShaderPolygonMode polygon_front_mode,
+                              enum ShaderPolygonMode polygon_back_mode,
+                              enum ShaderPrimitiveMode primitive_mode,
+                              bool smooth_shading,
+                              bool vulkan);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/glsl/meson.build b/hw/xbox/nv2a/pgraph/glsl/meson.build
new file mode 100644
index 00000000000..82df3f7edee
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/meson.build
@@ -0,0 +1,8 @@
+specific_ss.add([files(
+	'common.c',
+	'geom.c',
+	'psh.c',
+	'vsh.c',
+	'vsh-ff.c',
+	'vsh-prog.c',
+	)])
diff --git a/hw/xbox/nv2a/psh.c b/hw/xbox/nv2a/pgraph/glsl/psh.c
similarity index 88%
rename from hw/xbox/nv2a/psh.c
rename to hw/xbox/nv2a/pgraph/glsl/psh.c
index ca9bffe79d4..295cc9a1f7c 100644
--- a/hw/xbox/nv2a/psh.c
+++ b/hw/xbox/nv2a/pgraph/glsl/psh.c
@@ -3,7 +3,7 @@
  *
  * Copyright (c) 2013 espes
  * Copyright (c) 2015 Jannik Vogel
- * Copyright (c) 2020-2021 Matt Borgerson
+ * Copyright (c) 2020-2024 Matt Borgerson
  *
  * Based on:
  * Cxbx, PixelShader.cpp
@@ -34,9 +34,9 @@
 #include <stdbool.h>
 #include <stdint.h>
 
-#include "qapi/qmp/qstring.h"
-
-#include "shaders_common.h"
+#include "common.h"
+#include "hw/xbox/nv2a/debug.h"
+#include "hw/xbox/nv2a/pgraph/psh.h"
 #include "psh.h"
 
 /*
@@ -562,20 +562,25 @@ static void add_final_stage_code(struct PixelShader *ps, struct FCInputInfo fina
     ps->varE = ps->varF = NULL;
 }
 
-static const char sampler2D[] = "sampler2D";
-static const char sampler3D[] = "sampler3D";
-static const char samplerCube[] = "samplerCube";
-static const char sampler2DRect[] = "sampler2DRect";
-
-static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *state, int i)
+static const char *get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *state, int i)
 {
+    const char *sampler2D = "sampler2D";
+    const char *sampler3D = "sampler3D";
+    const char *samplerCube = "samplerCube";
+    int dim = state->dim_tex[i];
+
+    // FIXME: Cleanup
     switch (mode) {
     default:
     case PS_TEXTUREMODES_NONE:
         return NULL;
 
     case PS_TEXTUREMODES_PROJECT2D:
-        return state->rect_tex[i] ? sampler2DRect : sampler2D;
+        assert(state->dim_tex[i] == 2);
+        if (state->tex_x8y24[i] && state->vulkan) {
+            return "usampler2D";
+        }
+        return sampler2D;
 
     case PS_TEXTUREMODES_BUMPENVMAP:
     case PS_TEXTUREMODES_BUMPENVMAP_LUM:
@@ -584,14 +589,18 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s
             fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode);
             assert(!"Shadow map support not implemented for this mode");
         }
-        return state->rect_tex[i] ? sampler2DRect : sampler2D;
+        assert(state->dim_tex[i] == 2);
+        return sampler2D;
 
     case PS_TEXTUREMODES_PROJECT3D:
     case PS_TEXTUREMODES_DOT_STR_3D:
+        if (state->tex_x8y24[i] && state->vulkan) {
+            return "usampler2D";
+        }
         if (state->shadow_map[i]) {
-            return state->rect_tex[i] ? sampler2DRect : sampler2D;
+            return sampler2D;
         }
-        return sampler3D;
+        return dim == 2 ? sampler2D : sampler3D;
 
     case PS_TEXTUREMODES_CUBEMAP:
     case PS_TEXTUREMODES_DOT_RFLCT_DIFF:
@@ -601,6 +610,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s
             fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode);
             assert(!"Shadow map support not implemented for this mode");
         }
+        assert(state->dim_tex[i] == 2);
         return samplerCube;
 
     case PS_TEXTUREMODES_DPNDNT_AR:
@@ -609,6 +619,7 @@ static const char* get_sampler_type(enum PS_TEXTUREMODES mode, const PshState *s
             fprintf(stderr, "Shadow map support not implemented for mode %d\n", mode);
             assert(!"Shadow map support not implemented for this mode");
         }
+        assert(state->dim_tex[i] == 2);
         return sampler2D;
     }
 }
@@ -634,13 +645,25 @@ static void psh_append_shadowmap(const struct PixelShader *ps, int i, bool compa
         return;
     }
 
-    mstring_append_fmt(vars,
-                       "pT%d.xy *= texScale%d;\n"
-                       "vec4 t%d_depth = textureProj(texSamp%d, pT%d.xyw);\n",
-                       i, i, i, i, i);
+    g_autofree gchar *normalize_tex_coords = g_strdup_printf("norm%d", i);
+    const char *tex_remap = ps->state.rect_tex[i] ? normalize_tex_coords : "";
 
     const char *comparison = shadow_comparison_map[ps->state.shadow_depth_func];
 
+    bool extract_msb_24b = ps->state.tex_x8y24[i] && ps->state.vulkan;
+
+    mstring_append_fmt(
+        vars, "%svec4 t%d_depth%s = textureProj(texSamp%d, %s(pT%d.xyw));\n",
+        extract_msb_24b ? "u" : "", i, extract_msb_24b ? "_raw" : "", i,
+        tex_remap, i);
+
+    if (extract_msb_24b) {
+        mstring_append_fmt(vars,
+                           "vec4 t%d_depth = vec4(float(t%d_depth_raw.x >> 8) "
+                           "/ 0xFFFFFF, 1.0, 0.0, 0.0);\n",
+                           i, i);
+    }
+
     // Depth.y != 0 indicates 24 bit; depth.z != 0 indicates float.
     if (compare_z) {
         mstring_append_fmt(
@@ -685,18 +708,61 @@ static void apply_border_adjustment(const struct PixelShader *ps, MString *vars,
         var_name, var_name, i, ps->state.border_inv_real_size[i][0], ps->state.border_inv_real_size[i][1], ps->state.border_inv_real_size[i][2]);
 }
 
+static void apply_convolution_filter(const struct PixelShader *ps, MString *vars, int tex)
+{
+    // FIXME: Quincunx
+
+    g_autofree gchar *normalize_tex_coords = g_strdup_printf("norm%d", tex);
+    const char *tex_remap = ps->state.rect_tex[tex] ? normalize_tex_coords : "";
+
+    mstring_append_fmt(vars,
+        "vec4 t%d = vec4(0.0);\n"
+        "for (int i = 0; i < 9; i++) {\n"
+        "    vec3 texCoordDelta = vec3(convolution3x3[i], 0);\n"
+        "    texCoordDelta.xy /= textureSize(texSamp%d, 0);\n"
+        "    t%d += textureProj(texSamp%d, %s(pT%d.xyw) + texCoordDelta) * gaussian3x3[i];\n"
+        "}\n", tex, tex, tex, tex, tex_remap, tex);
+}
+
 static MString* psh_convert(struct PixelShader *ps)
 {
     int i;
 
+    const char *u = ps->state.vulkan ? "" : "uniform "; // FIXME: Remove
+
     MString *preflight = mstring_new();
-    mstring_append(preflight, ps->state.smooth_shading ?
-                                  STRUCT_VERTEX_DATA_IN_SMOOTH :
-                                  STRUCT_VERTEX_DATA_IN_FLAT);
-    mstring_append(preflight, "\n");
-    mstring_append(preflight, "out vec4 fragColor;\n");
-    mstring_append(preflight, "\n");
-    mstring_append(preflight, "uniform vec4 fogColor;\n");
+    pgraph_get_glsl_vtx_header(preflight, ps->state.vulkan,
+                             ps->state.smooth_shading, true, false, false);
+
+    if (ps->state.vulkan) {
+        mstring_append_fmt(preflight,
+                           "layout(location = 0) out vec4 fragColor;\n"
+                           "layout(binding = %d, std140) uniform PshUniforms {\n", PSH_UBO_BINDING);
+    } else {
+        mstring_append_fmt(preflight,
+                           "layout(location = 0) out vec4 fragColor;\n");
+    }
+
+    mstring_append_fmt(preflight, "%sfloat alphaRef;\n"
+                                  "%svec4  fogColor;\n"
+                                  "%sivec4 clipRegion[8];\n",
+                                  u, u, u);
+    for (int i = 0; i < 4; i++) {
+        mstring_append_fmt(preflight, "%smat2  bumpMat%d;\n"
+                                      "%sfloat bumpScale%d;\n"
+                                      "%sfloat bumpOffset%d;\n"
+                                      "%sfloat texScale%d;\n",
+                                      u, i, u, i, u, i, u, i);
+    }
+    for (int i = 0; i < 9; i++) {
+        for (int j = 0; j < 2; j++) {
+            mstring_append_fmt(preflight, "%svec4 c%d_%d;\n", u, j, i);
+        }
+    }
+
+    if (ps->state.vulkan) {
+        mstring_append(preflight, "};\n");
+    }
 
     const char *dotmap_funcs[] = {
         "dotmap_zero_to_one",
@@ -766,22 +832,12 @@ static MString* psh_convert(struct PixelShader *ps)
         "    vec2(-1.0,-1.0),vec2(0.0,-1.0),vec2(1.0,-1.0),\n"
         "    vec2(-1.0, 0.0),vec2(0.0, 0.0),vec2(1.0, 0.0),\n"
         "    vec2(-1.0, 1.0),vec2(0.0, 1.0),vec2(1.0, 1.0));\n"
-        "vec4 gaussianFilter2DRectProj(sampler2DRect sampler, vec3 texCoord) {\n"
-        "    vec4 sum = vec4(0.0);\n"
-        "    for (int i = 0; i < 9; i++) {\n"
-        "        sum += gaussian3x3[i]*textureProj(sampler,\n"
-        "                   texCoord + vec3(convolution3x3[i], 0.0));\n"
-        "    }\n"
-        "    return sum;\n"
-        "}\n"
         );
 
     /* Window Clipping */
     MString *clip = mstring_new();
-    mstring_append(preflight, "uniform ivec4 clipRegion[8];\n");
-    mstring_append_fmt(clip, "/*  Window-clip (%s) */\n",
-                       ps->state.window_clip_exclusive ?
-                           "Exclusive" : "Inclusive");
+    mstring_append_fmt(clip, "/*  Window-clip (%slusive) */\n",
+                       ps->state.window_clip_exclusive ? "Exc" : "Inc");
     if (!ps->state.window_clip_exclusive) {
         mstring_append(clip, "bool clipContained = false;\n");
     }
@@ -841,6 +897,9 @@ static MString* psh_convert(struct PixelShader *ps)
 
         const char *sampler_type = get_sampler_type(ps->tex_modes[i], &ps->state, i);
 
+        g_autofree gchar *normalize_tex_coords = g_strdup_printf("norm%d", i);
+        const char *tex_remap = ps->state.rect_tex[i] ? normalize_tex_coords : "";
+
         assert(ps->dot_map[i] < 8);
         const char *dotmap_func = dotmap_funcs[ps->dot_map[i]];
         if (ps->dot_map[i] > 3) {
@@ -856,23 +915,14 @@ static MString* psh_convert(struct PixelShader *ps)
             if (ps->state.shadow_map[i]) {
                 psh_append_shadowmap(ps, i, false, vars);
             } else {
-                const char *lookup = "textureProj";
-                if ((ps->state.conv_tex[i] == CONVOLUTION_FILTER_GAUSSIAN)
-                    || (ps->state.conv_tex[i] == CONVOLUTION_FILTER_QUINCUNX)) {
-                    /* FIXME: Quincunx looks better than Linear and costs less than
-                     * Gaussian, but Gaussian should be plenty fast so use it for
-                     * now.
-                     */
-                    if (ps->state.rect_tex[i]) {
-                        lookup = "gaussianFilter2DRectProj";
-                    } else {
-                        NV2A_UNIMPLEMENTED("Convolution for 2D textures");
-                    }
-                }
                 apply_border_adjustment(ps, vars, i, "pT%d");
-                mstring_append_fmt(vars, "pT%d.xy = texScale%d * pT%d.xy;\n", i, i, i);
-                mstring_append_fmt(vars, "vec4 t%d = %s(texSamp%d, pT%d.xyw);\n",
-                                   i, lookup, i, i);
+                if (((ps->state.conv_tex[i] == CONVOLUTION_FILTER_GAUSSIAN) ||
+                     (ps->state.conv_tex[i] == CONVOLUTION_FILTER_QUINCUNX))) {
+                    apply_convolution_filter(ps, vars, i);
+                } else {
+                    mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, %s(pT%d.xyw));\n",
+                                       i, i, tex_remap, i);
+                }
             }
             break;
         }
@@ -881,8 +931,8 @@ static MString* psh_convert(struct PixelShader *ps)
                 psh_append_shadowmap(ps, i, true, vars);
             } else {
                 apply_border_adjustment(ps, vars, i, "pT%d");
-                mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, pT%d.xyzw);\n",
-                                   i, i, i);
+                mstring_append_fmt(vars, "vec4 t%d = textureProj(texSamp%d, %s(pT%d.xyzw));\n",
+                                   i, i, tex_remap, i);
             }
             break;
         case PS_TEXTUREMODES_CUBEMAP:
@@ -906,7 +956,6 @@ static MString* psh_convert(struct PixelShader *ps)
         }
         case PS_TEXTUREMODES_BUMPENVMAP:
             assert(i >= 1);
-            mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i);
 
             if (ps->state.snorm_tex[ps->input_tex[i]]) {
                 /* Input color channels already signed (FIXME: May not always want signed textures in this case) */
@@ -920,14 +969,11 @@ static MString* psh_convert(struct PixelShader *ps)
 
             mstring_append_fmt(vars, "dsdt%d = bumpMat%d * dsdt%d;\n",
                 i, i, i, i);
-            mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, texScale%d * (pT%d.xy + dsdt%d));\n",
-                i, i, i, i, i);
+            mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(pT%d.xy + dsdt%d));\n",
+                i, i, tex_remap, i, i);
             break;
         case PS_TEXTUREMODES_BUMPENVMAP_LUM:
             assert(i >= 1);
-            mstring_append_fmt(preflight, "uniform float bumpScale%d;\n", i);
-            mstring_append_fmt(preflight, "uniform float bumpOffset%d;\n", i);
-            mstring_append_fmt(preflight, "uniform mat2 bumpMat%d;\n", i);
 
             if (ps->state.snorm_tex[ps->input_tex[i]]) {
                 /* Input color channels already signed (FIXME: May not always want signed textures in this case) */
@@ -941,8 +987,8 @@ static MString* psh_convert(struct PixelShader *ps)
 
             mstring_append_fmt(vars, "dsdtl%d.st = bumpMat%d * dsdtl%d.st;\n",
                 i, i, i, i);
-            mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, texScale%d * (pT%d.xy + dsdtl%d.st));\n",
-                i, i, i, i, i);
+            mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(pT%d.xy + dsdtl%d.st));\n",
+                i, i, tex_remap, i, i);
             mstring_append_fmt(vars, "t%d = t%d * (bumpScale%d * dsdtl%d.p + bumpOffset%d);\n",
                 i, i, i, i, i);
             break;
@@ -961,8 +1007,8 @@ static MString* psh_convert(struct PixelShader *ps)
                 i, i, dotmap_func, ps->input_tex[i], i, i-1, i);
 
             apply_border_adjustment(ps, vars, i, "dotST%d");
-            mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, texScale%d * dotST%d);\n",
-                i, i, i, i);
+            mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(dotST%d));\n",
+                i, i, tex_remap, i);
             break;
         case PS_TEXTUREMODES_DOT_ZW:
             assert(i >= 2);
@@ -1030,16 +1076,16 @@ static MString* psh_convert(struct PixelShader *ps)
             assert(!ps->state.rect_tex[i]);
             mstring_append_fmt(vars, "vec2 t%dAR = t%d.ar;\n", i, ps->input_tex[i]);
             apply_border_adjustment(ps, vars, i, "t%dAR");
-            mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, t%dAR);\n",
-                i, i, i);
+            mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(t%dAR));\n",
+                i, i, tex_remap, i);
             break;
         case PS_TEXTUREMODES_DPNDNT_GB:
             assert(i >= 1);
             assert(!ps->state.rect_tex[i]);
             mstring_append_fmt(vars, "vec2 t%dGB = t%d.gb;\n", i, ps->input_tex[i]);
             apply_border_adjustment(ps, vars, i, "t%dGB");
-            mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, t%dGB);\n",
-                i, i, i);
+            mstring_append_fmt(vars, "vec4 t%d = texture(texSamp%d, %s(t%dGB));\n",
+                i, i, tex_remap, i);
             break;
         case PS_TEXTUREMODES_DOTPRODUCT:
             assert(i == 1 || i == 2);
@@ -1060,8 +1106,10 @@ static MString* psh_convert(struct PixelShader *ps)
             break;
         }
 
-        mstring_append_fmt(preflight, "uniform float texScale%d;\n", i);
         if (sampler_type != NULL) {
+            if (ps->state.vulkan) {
+                mstring_append_fmt(preflight, "layout(binding = %d) ", PSH_TEX_BINDING + i);
+            }
             mstring_append_fmt(preflight, "uniform %s texSamp%d;\n", sampler_type, i);
 
             /* As this means a texture fetch does happen, do alphakill */
@@ -1069,6 +1117,24 @@ static MString* psh_convert(struct PixelShader *ps)
                 mstring_append_fmt(vars, "if (t%d.a == 0.0) { discard; };\n",
                                    i);
             }
+
+            if (ps->state.rect_tex[i]) {
+                mstring_append_fmt(preflight,
+                "vec2 norm%d(vec2 coord) {\n"
+                "    return coord / (textureSize(texSamp%d, 0) / texScale%d);\n"
+                "}\n",
+                i, i, i);
+                mstring_append_fmt(preflight,
+                "vec3 norm%d(vec3 coord) {\n"
+                "    return vec3(norm%d(coord.xy), coord.z);\n"
+                "}\n",
+                i, i);
+                mstring_append_fmt(preflight,
+                "vec4 norm%d(vec4 coord) {\n"
+                "    return vec4(norm%d(coord.xy), 0, coord.w);\n"
+                "}\n",
+                i, i);
+            }
         }
     }
 
@@ -1091,7 +1157,6 @@ static MString* psh_convert(struct PixelShader *ps)
     }
 
     if (ps->state.alpha_test && ps->state.alpha_func != ALPHA_FUNC_ALWAYS) {
-        mstring_append_fmt(preflight, "uniform float alphaRef;\n");
         if (ps->state.alpha_func == ALPHA_FUNC_NEVER) {
             mstring_append(ps->code, "discard;\n");
         } else {
@@ -1112,12 +1177,8 @@ static MString* psh_convert(struct PixelShader *ps)
         }
     }
 
-    for (i = 0; i < ps->num_const_refs; i++) {
-        mstring_append_fmt(preflight, "uniform vec4 %s;\n", ps->const_refs[i]);
-    }
-
     for (i = 0; i < ps->num_var_refs; i++) {
-        mstring_append_fmt(vars, "vec4 %s;\n", ps->var_refs[i]);
+        mstring_append_fmt(vars, "vec4 %s = vec4(0);\n", ps->var_refs[i]);
         if (strcmp(ps->var_refs[i], "r0") == 0) {
             if (ps->tex_modes[0] != PS_TEXTUREMODES_NONE) {
                 mstring_append(vars, "r0.a = t0.a;\n");
@@ -1128,7 +1189,7 @@ static MString* psh_convert(struct PixelShader *ps)
     }
 
     MString *final = mstring_new();
-    mstring_append(final, "#version 330\n\n");
+    mstring_append_fmt(final, "#version %d\n\n", ps->state.vulkan ? 450 : 400);
     mstring_append(final, mstring_get_str(preflight));
     mstring_append(final, "void main() {\n");
     mstring_append(final, mstring_get_str(clip));
@@ -1175,7 +1236,7 @@ static void parse_combiner_output(uint32_t value, struct OutputInfo *out)
     out->cd_alphablue = flags & 0x40;
 }
 
-MString *psh_translate(const PshState state)
+MString *pgraph_gen_psh_glsl(const PshState state)
 {
     int i;
     struct PixelShader ps;
diff --git a/hw/xbox/nv2a/pgraph/glsl/psh.h b/hw/xbox/nv2a/pgraph/glsl/psh.h
new file mode 100644
index 00000000000..1ae0b0db7ed
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/psh.h
@@ -0,0 +1,41 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2013 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * Based on:
+ * Cxbx, PixelShader.cpp
+ * Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
+ *                    Kingofc <kingofc@freenet.de>
+ * Xeon, XBD3DPixelShader.cpp
+ * Copyright (c) 2003 _SF_
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H
+#define HW_XBOX_NV2A_PGRAPH_GLSL_PSH_H
+
+#include "qemu/mstring.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+// FIXME: Move to struct
+#define PSH_UBO_BINDING 1
+#define PSH_TEX_BINDING 2
+
+MString *pgraph_gen_psh_glsl(const PshState state);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c
new file mode 100644
index 00000000000..59749003cda
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.c
@@ -0,0 +1,497 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include "common.h"
+#include "vsh-ff.h"
+
+static void append_skinning_code(MString* str, bool mix,
+                                 unsigned int count, const char* type,
+                                 const char* output, const char* input,
+                                 const char* matrix, const char* swizzle);
+
+void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header,
+                             MString *body, MString *uniforms)
+{
+    int i, j;
+    const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove
+
+    /* generate vertex shader mimicking fixed function */
+    mstring_append(header,
+"#define position      v0\n"
+"#define weight        v1\n"
+"#define normal        v2.xyz\n"
+"#define diffuse       v3\n"
+"#define specular      v4\n"
+"#define fogCoord      v5.x\n"
+"#define pointSize     v6\n"
+"#define backDiffuse   v7\n"
+"#define backSpecular  v8\n"
+"#define texture0      v9\n"
+"#define texture1      v10\n"
+"#define texture2      v11\n"
+"#define texture3      v12\n"
+"#define reserved1     v13\n"
+"#define reserved2     v14\n"
+"#define reserved3     v15\n"
+"\n");
+    mstring_append_fmt(uniforms,
+"%svec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n"
+"%svec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n"
+"%svec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n", u, u, u
+);
+    mstring_append(header,
+"\n"
+GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0))
+GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0))
+"\n"
+GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0))
+GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1))
+GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2))
+GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3))
+"\n"
+GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0))
+GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1))
+GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2))
+GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3))
+"\n"
+GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0))
+GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1))
+GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2))
+GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3))
+"\n"
+GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0))
+GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1))
+GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2))
+GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3))
+"\n"
+GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0))
+GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1))
+GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2))
+GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3))
+"\n"
+GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0))
+GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1))
+GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2))
+GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3))
+"\n"
+GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP))
+"\n"
+"#define lightAmbientColor(i) "
+    "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n"
+"#define lightDiffuseColor(i) "
+    "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n"
+"#define lightSpecularColor(i) "
+    "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n"
+"\n"
+"#define lightSpotFalloff(i) "
+    "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n"
+"#define lightSpotDirection(i) "
+    "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n"
+"\n"
+"#define lightLocalRange(i) "
+    "ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n"
+"\n"
+GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz")
+GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz")
+"\n"
+);
+    mstring_append_fmt(uniforms,
+"%smat4 invViewport;\n", u);
+
+    /* Skinning */
+    unsigned int count;
+    bool mix;
+    switch (state->skinning) {
+    case SKINNING_OFF:
+        mix = false; count = 0; break;
+    case SKINNING_1WEIGHTS:
+        mix = true; count = 2; break;
+    case SKINNING_2WEIGHTS2MATRICES:
+        mix = false; count = 2; break;
+    case SKINNING_2WEIGHTS:
+        mix = true; count = 3; break;
+    case SKINNING_3WEIGHTS3MATRICES:
+        mix = false; count = 3; break;
+    case SKINNING_3WEIGHTS:
+        mix = true; count = 4; break;
+    case SKINNING_4WEIGHTS4MATRICES:
+        mix = false; count = 4; break;
+    default:
+        assert(false);
+        break;
+    }
+    mstring_append_fmt(body, "/* Skinning mode %d */\n",
+                       state->skinning);
+
+    append_skinning_code(body, mix, count, "vec4",
+                         "tPosition", "position",
+                         "modelViewMat", "xyzw");
+    append_skinning_code(body, mix, count, "vec3",
+                         "tNormal", "vec4(normal, 0.0)",
+                         "invModelViewMat", "xyz");
+
+    /* Normalization */
+    if (state->normalization) {
+        mstring_append(body, "tNormal = normalize(tNormal);\n");
+    }
+
+    /* Texgen */
+    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        mstring_append_fmt(body, "/* Texgen for stage %d */\n",
+                           i);
+        /* Set each component individually */
+        /* FIXME: could be nicer if some channels share the same texgen */
+        for (j = 0; j < 4; j++) {
+            /* TODO: TexGen View Model missing! */
+            char c = "xyzw"[j];
+            char cSuffix = "STRQ"[j];
+            switch (state->texgen[i][j]) {
+            case TEXGEN_DISABLE:
+                mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n",
+                                   i, c, i, c);
+                break;
+            case TEXGEN_EYE_LINEAR:
+                mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n",
+                                   i, c, cSuffix, i);
+                break;
+            case TEXGEN_OBJECT_LINEAR:
+                mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n",
+                                   i, c, cSuffix, i);
+                break;
+            case TEXGEN_SPHERE_MAP:
+                assert(j < 2);  /* Channels S,T only! */
+                mstring_append(body, "{\n");
+                /* FIXME: u, r and m only have to be calculated once */
+                mstring_append(body, "  vec3 u = normalize(tPosition.xyz);\n");
+                //FIXME: tNormal before or after normalization? Always normalize?
+                mstring_append(body, "  vec3 r = reflect(u, tNormal);\n");
+
+                /* FIXME: This would consume 1 division fewer and *might* be
+                 *        faster than length:
+                 *   // [z=1/(2*x) => z=1/x*0.5]
+                 *   vec3 ro = r + vec3(0.0, 0.0, 1.0);
+                 *   float m = inversesqrt(dot(ro,ro))*0.5;
+                 */
+
+                mstring_append(body, "  float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n");
+                mstring_append_fmt(body, "  oT%d.%c = r.%c * invM + 0.5;\n",
+                                   i, c, c);
+                mstring_append(body, "}\n");
+                break;
+            case TEXGEN_REFLECTION_MAP:
+                assert(j < 3); /* Channels S,T,R only! */
+                mstring_append(body, "{\n");
+                /* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */
+                mstring_append(body, "  vec3 u = normalize(tPosition.xyz);\n");
+                mstring_append(body, "  vec3 r = reflect(u, tNormal);\n");
+                mstring_append_fmt(body, "  oT%d.%c = r.%c;\n",
+                                   i, c, c);
+                mstring_append(body, "}\n");
+                break;
+            case TEXGEN_NORMAL_MAP:
+                assert(j < 3); /* Channels S,T,R only! */
+                mstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n",
+                                   i, c, c);
+                break;
+            default:
+                assert(false);
+                break;
+            }
+        }
+    }
+
+    /* Apply texture matrices */
+    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        if (state->texture_matrix_enable[i]) {
+            mstring_append_fmt(body,
+                               "oT%d = oT%d * texMat%d;\n",
+                               i, i, i);
+        }
+    }
+
+    /* Lighting */
+    if (state->lighting) {
+
+        //FIXME: Do 2 passes if we want 2 sided-lighting?
+
+        static char alpha_source_diffuse[] = "diffuse.a";
+        static char alpha_source_specular[] = "specular.a";
+        static char alpha_source_material[] = "material_alpha";
+        const char *alpha_source = alpha_source_diffuse;
+        if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) {
+            mstring_append_fmt(uniforms, "%sfloat material_alpha;\n", u);
+            alpha_source = alpha_source_material;
+        } else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) {
+            alpha_source = alpha_source_specular;
+        }
+
+        if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) {
+            mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source);
+        } else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) {
+            mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source);
+        } else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) {
+            mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source);
+        }
+
+        mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n");
+        if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) {
+            mstring_append(body, "oD0.rgb += sceneAmbientColor;\n");
+        } else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) {
+            mstring_append(body, "oD0.rgb += diffuse.rgb;\n");
+        } else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) {
+            mstring_append(body, "oD0.rgb += specular.rgb;\n");
+        }
+
+        mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n");
+
+        for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+            if (state->light[i] == LIGHT_OFF) {
+                continue;
+            }
+
+            /* FIXME: It seems that we only have to handle the surface colors if
+             *        they are not part of the material [= vertex colors].
+             *        If they are material the cpu will premultiply light
+             *        colors
+             */
+
+            mstring_append_fmt(body, "/* Light %d */ {\n", i);
+
+            if (state->light[i] == LIGHT_LOCAL
+                    || state->light[i] == LIGHT_SPOT) {
+
+                mstring_append_fmt(uniforms,
+                    "%svec3 lightLocalPosition%d;\n"
+                    "%svec3 lightLocalAttenuation%d;\n",
+                    u, i, u, i);
+                mstring_append_fmt(body,
+                    "  vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n"
+                    "  float d = length(VP);\n"
+//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights?
+                    "  VP = normalize(VP);\n"
+                    "  float attenuation = 1.0 / (lightLocalAttenuation%d.x\n"
+                    "                               + lightLocalAttenuation%d.y * d\n"
+                    "                               + lightLocalAttenuation%d.z * d * d);\n"
+                    "  vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */
+                    "  float nDotVP = max(0.0, dot(tNormal, VP));\n"
+                    "  float nDotHV = max(0.0, dot(tNormal, halfVector));\n",
+                    i, i, i, i);
+
+            }
+
+            switch(state->light[i]) {
+            case LIGHT_INFINITE:
+
+                /* lightLocalRange will be 1e+30 here */
+
+                mstring_append_fmt(uniforms,
+                    "%svec3 lightInfiniteHalfVector%d;\n"
+                    "%svec3 lightInfiniteDirection%d;\n",
+                    u, i, u, i);
+                mstring_append_fmt(body,
+                    "  float attenuation = 1.0;\n"
+                    "  float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n"
+                    "  float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n",
+                    i, i);
+
+                /* FIXME: Do specular */
+
+                /* FIXME: tBackDiffuse */
+
+                break;
+            case LIGHT_LOCAL:
+                /* Everything done already */
+                break;
+            case LIGHT_SPOT:
+                /* https://docs.microsoft.com/en-us/windows/win32/direct3d9/attenuation-and-spotlight-factor#spotlight-factor */
+                mstring_append_fmt(body,
+                    "  vec4 spotDir = lightSpotDirection(%d);\n"
+                    "  float invScale = 1/length(spotDir.xyz);\n"
+                    "  float cosHalfPhi = -invScale*spotDir.w;\n"
+                    "  float cosHalfTheta = invScale + cosHalfPhi;\n"
+                    "  float spotDirDotVP = dot(spotDir.xyz, VP);\n"
+                    "  float rho = invScale*spotDirDotVP;\n"
+                    "  if (rho > cosHalfTheta) {\n"
+                    "  } else if (rho <= cosHalfPhi) {\n"
+                    "    attenuation = 0.0;\n"
+                    "  } else {\n"
+                    "    attenuation *= spotDirDotVP + spotDir.w;\n" /* FIXME: lightSpotFalloff */
+                    "  }\n",
+                    i);
+                break;
+            default:
+                assert(false);
+                break;
+            }
+
+            mstring_append_fmt(body,
+                "  float pf;\n"
+                "  if (nDotVP == 0.0) {\n"
+                "    pf = 0.0;\n"
+                "  } else {\n"
+                "    pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n"
+                "  }\n"
+                "  vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n"
+                "  vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n"
+                "  vec3 lightSpecular = lightSpecularColor(%d) * pf;\n",
+                i, i, i);
+
+            mstring_append(body,
+                "  oD0.xyz += lightAmbient;\n");
+
+            switch (state->diffuse_src) {
+            case MATERIAL_COLOR_SRC_MATERIAL:
+                mstring_append(body,
+                               "  oD0.xyz += lightDiffuse;\n");
+                break;
+            case MATERIAL_COLOR_SRC_DIFFUSE:
+                mstring_append(body,
+                               "  oD0.xyz += diffuse.xyz * lightDiffuse;\n");
+                break;
+            case MATERIAL_COLOR_SRC_SPECULAR:
+                mstring_append(body,
+                               "  oD0.xyz += specular.xyz * lightDiffuse;\n");
+                break;
+            }
+
+            mstring_append(body,
+                "  oD1.xyz += specular.xyz * lightSpecular;\n");
+
+            mstring_append(body, "}\n");
+        }
+    } else {
+        mstring_append(body, "  oD0 = diffuse;\n");
+        mstring_append(body, "  oD1 = specular;\n");
+    }
+    mstring_append(body, "  oB0 = backDiffuse;\n");
+    mstring_append(body, "  oB1 = backSpecular;\n");
+
+    /* Fog */
+    if (state->fog_enable) {
+
+        /* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */
+        switch(state->foggen) {
+        case FOGGEN_SPEC_ALPHA:
+            /* FIXME: Do we have to clamp here? */
+            mstring_append(body, "  float fogDistance = clamp(specular.a, 0.0, 1.0);\n");
+            break;
+        case FOGGEN_RADIAL:
+            mstring_append(body, "  float fogDistance = length(tPosition.xyz);\n");
+            break;
+        case FOGGEN_PLANAR:
+        case FOGGEN_ABS_PLANAR:
+            mstring_append(body, "  float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n");
+            if (state->foggen == FOGGEN_ABS_PLANAR) {
+                mstring_append(body, "  fogDistance = abs(fogDistance);\n");
+            }
+            break;
+        case FOGGEN_FOG_X:
+            mstring_append(body, "  float fogDistance = fogCoord;\n");
+            break;
+        default:
+            assert(false);
+            break;
+        }
+
+    }
+
+    /* If skinning is off the composite matrix already includes the MV matrix */
+    if (state->skinning == SKINNING_OFF) {
+        mstring_append(body, "  tPosition = position;\n");
+    }
+
+    mstring_append(body,
+    "   oPos = invViewport * (tPosition * compositeMat);\n"
+    );
+
+    if (state->vulkan) {
+        mstring_append(body, "   oPos.y *= -1;\n");
+    } else {
+        mstring_append(body, "   oPos.z = oPos.z * 2.0 - oPos.w;\n");
+    }
+
+    /* FIXME: Testing */
+    if (state->point_params_enable) {
+        mstring_append_fmt(
+            body,
+            "  float d_e = length(position * modelViewMat0);\n"
+            "  oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n",
+            state->point_params[0], state->point_params[1], state->point_params[2],
+            state->point_params[6]);
+        mstring_append_fmt(body, "  oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n",
+                           state->point_params[3], state->point_params[7],
+                           state->surface_scale_factor);
+    } else {
+        mstring_append_fmt(body, "  oPts.x = %f * %d;\n", state->point_size,
+                           state->surface_scale_factor);
+    }
+
+    mstring_append(body,
+                   "  if (oPos.w == 0.0 || isinf(oPos.w)) {\n"
+                   "    vtx_inv_w = 1.0;\n"
+                   "  } else {\n"
+                   "    vtx_inv_w = 1.0 / oPos.w;\n"
+                   "  }\n"
+                   "  vtx_inv_w_flat = vtx_inv_w;\n");
+}
+
+static void append_skinning_code(MString* str, bool mix,
+                                 unsigned int count, const char* type,
+                                 const char* output, const char* input,
+                                 const char* matrix, const char* swizzle)
+{
+    if (count == 0) {
+        mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n",
+                           type, output, input, matrix, swizzle);
+    } else {
+        mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type);
+        if (mix) {
+            /* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */
+            mstring_append(str, "{\n"
+                                "  float weight_i;\n"
+                                "  float weight_n = 1.0;\n");
+            int i;
+            for (i = 0; i < count; i++) {
+                if (i < (count - 1)) {
+                    char c = "xyzw"[i];
+                    mstring_append_fmt(str, "  weight_i = weight.%c;\n"
+                                            "  weight_n -= weight_i;\n",
+                                       c);
+                } else {
+                    mstring_append(str, "  weight_i = weight_n;\n");
+                }
+                mstring_append_fmt(str, "  %s += (%s * %s%d).%s * weight_i;\n",
+                                   output, input, matrix, i, swizzle);
+            }
+            mstring_append(str, "}\n");
+        } else {
+            /* Individual weights */
+            int i;
+            for (i = 0; i < count; i++) {
+                char c = "xyzw"[i];
+                mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n",
+                                   output, input, matrix, i, swizzle, c);
+            }
+        }
+    }
+}
diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h
new file mode 100644
index 00000000000..949bf542520
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh-ff.h
@@ -0,0 +1,31 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H
+#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_FF_H
+
+#include "qemu/mstring.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+void pgraph_gen_vsh_ff_glsl(const ShaderState *state, MString *header,
+                            MString *body, MString *uniforms);
+
+#endif
diff --git a/hw/xbox/nv2a/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c
similarity index 97%
rename from hw/xbox/nv2a/vsh.c
rename to hw/xbox/nv2a/pgraph/glsl/vsh-prog.c
index 0e4cf314bc2..7bebed71e85 100644
--- a/hw/xbox/nv2a/vsh.c
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.c
@@ -1,5 +1,5 @@
 /*
- * QEMU Geforce NV2A vertex shader translation
+ * Geforce NV2A PGRAPH GLSL Shader Generator
  *
  * Copyright (c) 2014 Jannik Vogel
  * Copyright (c) 2012 espes
@@ -32,8 +32,9 @@
 #include <stdbool.h>
 #include <assert.h>
 
-#include "shaders_common.h"
-#include "vsh.h"
+#include "hw/xbox/nv2a/pgraph/vsh.h"
+#include "common.h"
+#include "vsh-prog.h"
 
 #define VSH_D3DSCM_CORRECTION 96
 
@@ -794,10 +795,11 @@ static const char* vsh_header =
     "  return t;\n"
     "}\n";
 
-void vsh_translate(uint16_t version,
+void pgraph_gen_vsh_prog_glsl(uint16_t version,
                    const uint32_t *tokens,
                    unsigned int length,
                    bool z_perspective,
+                   bool vulkan,
                    MString *header, MString *body)
 {
 
@@ -843,14 +845,30 @@ void vsh_translate(uint16_t version,
          * TODO: the pixel-center co-ordinate differences should handled
          */
         "  oPos.x = 2.0 * (oPos.x - surfaceSize.x * 0.5) / surfaceSize.x;\n"
-        "  oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) / surfaceSize.y;\n"
-    );
+        );
+
+    if (vulkan) {
+        mstring_append(body,
+                       "  oPos.y = 2.0 * oPos.y / surfaceSize.y - 1.0;\n");
+    } else {
+        mstring_append(body, "  oPos.y = -2.0 * (oPos.y - surfaceSize.y * 0.5) "
+                             "/ surfaceSize.y;\n");
+    }
+
     if (z_perspective) {
         mstring_append(body, "  oPos.z = oPos.w;\n");
     }
+
+    mstring_append(body,
+        "  if (clipRange.y != clipRange.x) {\n");
+    if (vulkan) {
+        mstring_append(body, "      oPos.z /= clipRange.y;\n");
+    } else {
+        mstring_append(body,
+                       "    oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y "
+                       "- clipRange.x)) - 1;\n");
+    }
     mstring_append(body,
-        "  if (clipRange.y != clipRange.x) {\n"
-        "    oPos.z = (oPos.z - clipRange.x)/(0.5*(clipRange.y - clipRange.x)) - 1;\n"
         "  }\n"
 
         /* Correct for the perspective divide */
diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h
new file mode 100644
index 00000000000..84d8141c5e5
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh-prog.h
@@ -0,0 +1,35 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2014 Jannik Vogel
+ * Copyright (c) 2012 espes
+ *
+ * Based on:
+ * Cxbx, VertexShader.cpp
+ * Copyright (c) 2004 Aaron Robinson <caustik@caustik.com>
+ *                    Kingofc <kingofc@freenet.de>
+ * Dxbx, uPushBuffer.pas
+ * Copyright (c) 2007 Shadow_tj, PatrickvL
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 or
+ * (at your option) version 3 of the License.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H
+#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_PROG_H
+
+void pgraph_gen_vsh_prog_glsl(uint16_t version, const uint32_t *tokens,
+                              unsigned int length, bool z_perspective,
+                              bool vulkan, MString *header, MString *body);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.c b/hw/xbox/nv2a/pgraph/glsl/vsh.c
new file mode 100644
index 00000000000..a60fbe265dd
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh.c
@@ -0,0 +1,289 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include "common.h"
+#include "vsh.h"
+#include "vsh-ff.h"
+#include "vsh-prog.h"
+#include <stdbool.h>
+
+MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs)
+{
+    int i;
+    MString *output = mstring_new();
+    mstring_append_fmt(output, "#version %d\n\n", state->vulkan ? 450 : 400);
+
+    MString *header = mstring_from_str("");
+    
+    MString *uniforms = mstring_from_str("");
+
+    const char *u = state->vulkan ? "" : "uniform "; // FIXME: Remove
+
+    mstring_append_fmt(uniforms,
+        "%svec4 clipRange;\n"
+        "%svec2 surfaceSize;\n"
+        "%svec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n"
+        "%svec2 fogParam;\n",
+        u, u, u, u
+        );
+
+    mstring_append(header,
+        GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG))
+        GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT))
+        GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT))
+        GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT))
+        GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT))
+
+        "\n"
+        "vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n"
+        "vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n"
+        "vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n"
+        "vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n"
+        "vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n"
+        "vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n"
+        "vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n"
+        "vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n"
+        "vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n"
+        "vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n"
+        "vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n"
+        "\n"
+        "vec4 decompress_11_11_10(int cmp) {\n"
+        "    float x = float(bitfieldExtract(cmp, 0,  11)) / 1023.0;\n"
+        "    float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;\n"
+        "    float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;\n"
+        "    return vec4(x, y, z, 1);\n"
+        "}\n");
+
+    pgraph_get_glsl_vtx_header(header, state->vulkan, state->smooth_shading,
+                             false, prefix_outputs, false);
+
+    if (prefix_outputs) {
+        mstring_append(header,
+                       "#define vtx_inv_w v_vtx_inv_w\n"
+                       "#define vtx_inv_w_flat v_vtx_inv_w_flat\n"
+                       "#define vtxD0 v_vtxD0\n"
+                       "#define vtxD1 v_vtxD1\n"
+                       "#define vtxB0 v_vtxB0\n"
+                       "#define vtxB1 v_vtxB1\n"
+                       "#define vtxFog v_vtxFog\n"
+                       "#define vtxT0 v_vtxT0\n"
+                       "#define vtxT1 v_vtxT1\n"
+                       "#define vtxT2 v_vtxT2\n"
+                       "#define vtxT3 v_vtxT3\n"
+                       );
+    }
+    mstring_append(header, "\n");
+
+    int num_uniform_attrs = 0;
+
+    for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        bool is_uniform = state->uniform_attrs & (1 << i);
+        bool is_swizzled = state->swizzle_attrs & (1 << i);
+        bool is_compressed = state->compressed_attrs & (1 << i);
+
+        assert(!(is_uniform && is_compressed));
+        assert(!(is_uniform && is_swizzled));
+
+        if (is_uniform) {
+            mstring_append_fmt(header, "vec4 v%d = inlineValue[%d];\n", i,
+                               num_uniform_attrs);
+            num_uniform_attrs += 1;
+        } else {
+            if (state->compressed_attrs & (1 << i)) {
+                mstring_append_fmt(header,
+                                   "layout(location = %d) in int v%d_cmp;\n", i, i);
+            } else if (state->swizzle_attrs & (1 << i)) {
+                mstring_append_fmt(header, "layout(location = %d) in vec4 v%d_sw;\n",
+                                   i, i);
+            } else {
+                mstring_append_fmt(header, "layout(location = %d) in vec4 v%d;\n",
+                                   i, i);
+            }
+        }
+    }
+    mstring_append(header, "\n");
+
+    MString *body = mstring_from_str("void main() {\n");
+
+    for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        if (state->compressed_attrs & (1 << i)) {
+            mstring_append_fmt(
+                body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i);
+        }
+
+        if (state->swizzle_attrs & (1 << i)) {
+            mstring_append_fmt(body, "vec4 v%d = v%d_sw.bgra;\n", i, i);
+        }
+
+    }
+
+    if (state->fixed_function) {
+        pgraph_gen_vsh_ff_glsl(state, header, body, uniforms);
+    } else if (state->vertex_program) {
+        pgraph_gen_vsh_prog_glsl(VSH_VERSION_XVS,
+                                 (uint32_t *)state->program_data,
+                                 state->program_length, state->z_perspective,
+                                 state->vulkan, header, body);
+    } else {
+        assert(false);
+    }
+
+
+    /* Fog */
+
+    if (state->fog_enable) {
+
+        if (state->vertex_program) {
+            /* FIXME: Does foggen do something here? Let's do some tracking..
+             *
+             *   "RollerCoaster Tycoon" has
+             *      state->vertex_program = true; state->foggen == FOGGEN_PLANAR
+             *      but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z
+             */
+            mstring_append(body, "  float fogDistance = oFog.x;\n");
+        }
+
+        /* FIXME: Do this per pixel? */
+
+        switch (state->fog_mode) {
+        case FOG_MODE_LINEAR:
+        case FOG_MODE_LINEAR_ABS:
+
+            /* f = (end - d) / (end - start)
+             *    fogParam.y = -1 / (end - start)
+             *    fogParam.x = 1 - end * fogParam.y;
+             */
+
+            mstring_append(body,
+                "  if (isinf(fogDistance)) {\n"
+                "    fogDistance = 0.0;\n"
+                "  }\n"
+            );
+            mstring_append(body, "  float fogFactor = fogParam.x + fogDistance * fogParam.y;\n");
+            mstring_append(body, "  fogFactor -= 1.0;\n");
+            break;
+        case FOG_MODE_EXP:
+          mstring_append(body,
+                         "  if (isinf(fogDistance)) {\n"
+                         "    fogDistance = 0.0;\n"
+                         "  }\n"
+          );
+          /* fallthru */
+        case FOG_MODE_EXP_ABS:
+
+            /* f = 1 / (e^(d * density))
+             *    fogParam.y = -density / (2 * ln(256))
+             *    fogParam.x = 1.5
+             */
+
+            mstring_append(body, "  float fogFactor = fogParam.x + exp2(fogDistance * fogParam.y * 16.0);\n");
+            mstring_append(body, "  fogFactor -= 1.5;\n");
+            break;
+        case FOG_MODE_EXP2:
+        case FOG_MODE_EXP2_ABS:
+
+            /* f = 1 / (e^((d * density)^2))
+             *    fogParam.y = -density / (2 * sqrt(ln(256)))
+             *    fogParam.x = 1.5
+             */
+
+            mstring_append(body, "  float fogFactor = fogParam.x + exp2(-fogDistance * fogDistance * fogParam.y * fogParam.y * 32.0);\n");
+            mstring_append(body, "  fogFactor -= 1.5;\n");
+            break;
+        default:
+            assert(false);
+            break;
+        }
+        /* Calculate absolute for the modes which need it */
+        switch (state->fog_mode) {
+        case FOG_MODE_LINEAR_ABS:
+        case FOG_MODE_EXP_ABS:
+        case FOG_MODE_EXP2_ABS:
+            mstring_append(body, "  fogFactor = abs(fogFactor);\n");
+            break;
+        default:
+            break;
+        }
+
+        mstring_append(body, "  oFog.xyzw = vec4(fogFactor);\n");
+    } else {
+        /* FIXME: Is the fog still calculated / passed somehow?!
+         */
+        mstring_append(body, "  oFog.xyzw = vec4(1.0);\n");
+    }
+
+    /* Set outputs */
+    const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat";
+    mstring_append_fmt(body, "\n"
+                      "  vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n"
+                      "  vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n"
+                      "  vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n"
+                      "  vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n"
+                      "  vtxFog = oFog.x * vtx_inv_w;\n"
+                      "  vtxT0 = oT0 * vtx_inv_w;\n"
+                      "  vtxT1 = oT1 * vtx_inv_w;\n"
+                      "  vtxT2 = oT2 * vtx_inv_w;\n"
+                      "  vtxT3 = oT3 * vtx_inv_w;\n"
+                      "  gl_Position = oPos;\n"
+                      "  gl_PointSize = oPts.x;\n"
+                      // "  gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near
+                      // "  gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far
+                      "\n"
+                      "}\n",
+                       shade_model_mult,
+                       shade_model_mult,
+                       shade_model_mult,
+                       shade_model_mult);
+
+    /* Return combined header + source */
+    if (state->vulkan) {
+        // FIXME: Optimize uniforms
+        if (num_uniform_attrs > 0) {
+            if (state->use_push_constants_for_uniform_attrs) {
+                mstring_append_fmt(output,
+                    "layout(push_constant) uniform PushConstants {\n"
+                    "    vec4 inlineValue[%d];\n"
+                    "};\n\n", num_uniform_attrs);
+            } else {
+                mstring_append_fmt(uniforms, "    vec4 inlineValue[%d];\n",
+                                   num_uniform_attrs);
+            }
+        }
+        mstring_append_fmt(
+            output,
+            "layout(binding = %d, std140) uniform VshUniforms {\n"
+            "%s"
+            "};\n\n",
+            VSH_UBO_BINDING, mstring_get_str(uniforms));
+    } else {
+        mstring_append(
+            output, mstring_get_str(uniforms));
+    }
+
+    mstring_append(output, mstring_get_str(header));
+    mstring_unref(header);
+
+    mstring_append(output, mstring_get_str(body));
+    mstring_unref(body);
+    return output;
+}
diff --git a/hw/xbox/nv2a/pgraph/glsl/vsh.h b/hw/xbox/nv2a/pgraph/glsl/vsh.h
new file mode 100644
index 00000000000..584e1997e38
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/glsl/vsh.h
@@ -0,0 +1,33 @@
+/*
+ * Geforce NV2A PGRAPH GLSL Shader Generator
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H
+#define HW_XBOX_NV2A_PGRAPH_GLSL_VSH_H
+
+#include "qemu/mstring.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+// FIXME: Move to struct
+#define VSH_UBO_BINDING 0
+
+MString *pgraph_gen_vsh_glsl(const ShaderState *state, bool prefix_outputs);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/meson.build b/hw/xbox/nv2a/pgraph/meson.build
new file mode 100644
index 00000000000..5b8bc181c3d
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/meson.build
@@ -0,0 +1,19 @@
+specific_ss.add(files(
+	'pgraph.c',
+	'profile.c',
+	'rdi.c',
+	's3tc.c',
+	'shaders.c',
+	'swizzle.c',
+	'texture.c',
+	'vertex.c',
+	))
+if have_renderdoc
+	specific_ss.add(files('debug_renderdoc.c'))
+endif
+subdir('thirdparty')
+subdir('null')
+subdir('gl')
+subdir('glsl')
+subdir('vk')
+specific_ss.add(nv2a_vsh_cpu)
diff --git a/hw/xbox/nv2a/pgraph_methods.h b/hw/xbox/nv2a/pgraph/methods.h
similarity index 100%
rename from hw/xbox/nv2a/pgraph_methods.h
rename to hw/xbox/nv2a/pgraph/methods.h
diff --git a/hw/xbox/nv2a/pgraph/null/meson.build b/hw/xbox/nv2a/pgraph/null/meson.build
new file mode 100644
index 00000000000..e2731a13d92
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/null/meson.build
@@ -0,0 +1,3 @@
+specific_ss.add([sdl, files(
+	'renderer.c',
+	)])
diff --git a/hw/xbox/nv2a/pgraph/null/renderer.c b/hw/xbox/nv2a/pgraph/null/renderer.c
new file mode 100644
index 00000000000..8b34efc5d17
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/null/renderer.c
@@ -0,0 +1,146 @@
+/*
+ * Geforce NV2A PGRAPH Null Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "hw/hw.h"
+#include "hw/xbox/nv2a/nv2a_int.h"
+
+static void pgraph_null_sync(NV2AState *d)
+{
+    qatomic_set(&d->pgraph.sync_pending, false);
+    qemu_event_set(&d->pgraph.sync_complete);
+}
+
+static void pgraph_null_flush(NV2AState *d)
+{
+    qatomic_set(&d->pgraph.flush_pending, false);
+    qemu_event_set(&d->pgraph.flush_complete);
+}
+
+static void pgraph_null_process_pending(NV2AState *d)
+{
+    if (
+        qatomic_read(&d->pgraph.sync_pending) ||
+        qatomic_read(&d->pgraph.flush_pending)
+        ) {
+        qemu_mutex_unlock(&d->pfifo.lock);
+        qemu_mutex_lock(&d->pgraph.lock);
+        if (qatomic_read(&d->pgraph.sync_pending)) {
+            pgraph_null_sync(d);
+        }
+        if (qatomic_read(&d->pgraph.flush_pending)) {
+            pgraph_null_flush(d);
+        }
+        qemu_mutex_unlock(&d->pgraph.lock);
+        qemu_mutex_lock(&d->pfifo.lock);
+    }
+}
+
+static void pgraph_null_clear_report_value(NV2AState *d)
+{
+}
+
+static void pgraph_null_clear_surface(NV2AState *d, uint32_t parameter)
+{
+}
+
+static void pgraph_null_draw_begin(NV2AState *d)
+{
+}
+
+static void pgraph_null_draw_end(NV2AState *d)
+{
+}
+
+static void pgraph_null_flip_stall(NV2AState *d)
+{
+}
+
+static void pgraph_null_flush_draw(NV2AState *d)
+{
+}
+
+static void pgraph_null_get_report(NV2AState *d, uint32_t parameter)
+{
+    pgraph_write_zpass_pixel_cnt_report(d, parameter, 0);
+}
+
+static void pgraph_null_image_blit(NV2AState *d)
+{
+}
+
+static void pgraph_null_pre_savevm_trigger(NV2AState *d)
+{
+}
+
+static void pgraph_null_pre_savevm_wait(NV2AState *d)
+{
+}
+
+static void pgraph_null_pre_shutdown_trigger(NV2AState *d)
+{
+}
+
+static void pgraph_null_pre_shutdown_wait(NV2AState *d)
+{
+}
+
+static void pgraph_null_process_pending_reports(NV2AState *d)
+{
+}
+
+static void pgraph_null_surface_update(NV2AState *d, bool upload,
+                                       bool color_write, bool zeta_write)
+{
+}
+
+static void pgraph_null_init(NV2AState *d, Error **errp)
+{
+    PGRAPHState *pg = &d->pgraph;
+    pg->null_renderer_state = NULL;
+}
+
+static PGRAPHRenderer pgraph_null_renderer = {
+    .type = CONFIG_DISPLAY_RENDERER_NULL,
+    .name = "Null",
+    .ops = {
+        .init = pgraph_null_init,
+        .clear_report_value = pgraph_null_clear_report_value,
+        .clear_surface = pgraph_null_clear_surface,
+        .draw_begin = pgraph_null_draw_begin,
+        .draw_end = pgraph_null_draw_end,
+        .flip_stall = pgraph_null_flip_stall,
+        .flush_draw = pgraph_null_flush_draw,
+        .get_report = pgraph_null_get_report,
+        .image_blit = pgraph_null_image_blit,
+        .pre_savevm_trigger = pgraph_null_pre_savevm_trigger,
+        .pre_savevm_wait = pgraph_null_pre_savevm_wait,
+        .pre_shutdown_trigger = pgraph_null_pre_shutdown_trigger,
+        .pre_shutdown_wait = pgraph_null_pre_shutdown_wait,
+        .process_pending = pgraph_null_process_pending,
+        .process_pending_reports = pgraph_null_process_pending_reports,
+        .surface_update = pgraph_null_surface_update,
+    }
+};
+
+static void __attribute__((constructor)) register_renderer(void)
+{
+    pgraph_renderer_register(&pgraph_null_renderer);
+}
diff --git a/hw/xbox/nv2a/pgraph/pgraph.c b/hw/xbox/nv2a/pgraph/pgraph.c
new file mode 100644
index 00000000000..534daa3c0fe
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/pgraph.c
@@ -0,0 +1,3025 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../nv2a_int.h"
+#include "ui/xemu-notifications.h"
+#include "ui/xemu-settings.h"
+#include "util.h"
+#include "swizzle.h"
+#include "nv2a_vsh_emulator.h"
+
+#define PG_GET_MASK(reg, mask) GET_MASK(pgraph_reg_r(pg, reg), mask)
+#define PG_SET_MASK(reg, mask, value)        \
+    do {                                     \
+        uint32_t rv = pgraph_reg_r(pg, reg); \
+        SET_MASK(rv, mask, value);           \
+        pgraph_reg_w(pg, reg, rv);           \
+    } while (0)
+
+
+NV2AState *g_nv2a;
+
+uint64_t pgraph_read(void *opaque, hwaddr addr, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+    PGRAPHState *pg = &d->pgraph;
+
+    qemu_mutex_lock(&pg->lock);
+
+    uint64_t r = 0;
+    switch (addr) {
+    case NV_PGRAPH_INTR:
+        r = pg->pending_interrupts;
+        break;
+    case NV_PGRAPH_INTR_EN:
+        r = pg->enabled_interrupts;
+        break;
+    case NV_PGRAPH_RDI_DATA: {
+        unsigned int select = PG_GET_MASK(NV_PGRAPH_RDI_INDEX,
+                                       NV_PGRAPH_RDI_INDEX_SELECT);
+        unsigned int address = PG_GET_MASK(NV_PGRAPH_RDI_INDEX,
+                                        NV_PGRAPH_RDI_INDEX_ADDRESS);
+
+        r = pgraph_rdi_read(pg, select, address);
+
+        /* FIXME: Overflow into select? */
+        assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS,
+                                  NV_PGRAPH_RDI_INDEX_ADDRESS));
+        PG_SET_MASK(NV_PGRAPH_RDI_INDEX,
+                 NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1);
+        break;
+    }
+    default:
+        r = pgraph_reg_r(pg, addr);
+        break;
+    }
+
+    qemu_mutex_unlock(&pg->lock);
+
+    nv2a_reg_log_read(NV_PGRAPH, addr, size, r);
+    return r;
+}
+
+void pgraph_write(void *opaque, hwaddr addr, uint64_t val, unsigned int size)
+{
+    NV2AState *d = (NV2AState *)opaque;
+    PGRAPHState *pg = &d->pgraph;
+
+    nv2a_reg_log_write(NV_PGRAPH, addr, size, val);
+
+    qemu_mutex_lock(&d->pfifo.lock); // FIXME: Factor out fifo lock here
+    qemu_mutex_lock(&pg->lock);
+
+    switch (addr) {
+    case NV_PGRAPH_INTR:
+        pg->pending_interrupts &= ~val;
+
+        if (!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR)) {
+            pg->waiting_for_nop = false;
+        }
+        if (!(pg->pending_interrupts & NV_PGRAPH_INTR_CONTEXT_SWITCH)) {
+            pg->waiting_for_context_switch = false;
+        }
+        pfifo_kick(d);
+        break;
+    case NV_PGRAPH_INTR_EN:
+        pg->enabled_interrupts = val;
+        break;
+    case NV_PGRAPH_INCREMENT:
+        if (val & NV_PGRAPH_INCREMENT_READ_3D) {
+            PG_SET_MASK(NV_PGRAPH_SURFACE,
+                     NV_PGRAPH_SURFACE_READ_3D,
+                     (PG_GET_MASK(NV_PGRAPH_SURFACE,
+                              NV_PGRAPH_SURFACE_READ_3D)+1)
+                        % PG_GET_MASK(NV_PGRAPH_SURFACE,
+                                   NV_PGRAPH_SURFACE_MODULO_3D) );
+            nv2a_profile_increment();
+            pfifo_kick(d);
+        }
+        break;
+    case NV_PGRAPH_RDI_DATA: {
+        unsigned int select = PG_GET_MASK(NV_PGRAPH_RDI_INDEX,
+                                       NV_PGRAPH_RDI_INDEX_SELECT);
+        unsigned int address = PG_GET_MASK(NV_PGRAPH_RDI_INDEX,
+                                        NV_PGRAPH_RDI_INDEX_ADDRESS);
+
+        pgraph_rdi_write(pg, select, address, val);
+
+        /* FIXME: Overflow into select? */
+        assert(address < GET_MASK(NV_PGRAPH_RDI_INDEX_ADDRESS,
+                                  NV_PGRAPH_RDI_INDEX_ADDRESS));
+        PG_SET_MASK(NV_PGRAPH_RDI_INDEX,
+                 NV_PGRAPH_RDI_INDEX_ADDRESS, address + 1);
+        break;
+    }
+    case NV_PGRAPH_CHANNEL_CTX_TRIGGER: {
+        hwaddr context_address =
+            PG_GET_MASK(NV_PGRAPH_CHANNEL_CTX_POINTER,
+                     NV_PGRAPH_CHANNEL_CTX_POINTER_INST) << 4;
+
+        if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_READ_IN) {
+#ifdef DEBUG_NV2A
+            unsigned pgraph_channel_id =
+                PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID);
+#endif
+            NV2A_DPRINTF("PGRAPH: read channel %d context from %" HWADDR_PRIx "\n",
+                         pgraph_channel_id, context_address);
+
+            assert(context_address < memory_region_size(&d->ramin));
+
+            uint8_t *context_ptr = d->ramin_ptr + context_address;
+            uint32_t context_user = ldl_le_p((uint32_t*)context_ptr);
+
+            NV2A_DPRINTF("    - CTX_USER = 0x%x\n", context_user);
+
+            pgraph_reg_w(pg, NV_PGRAPH_CTX_USER, context_user);
+            // pgraph_set_context_user(d, context_user);
+        }
+        if (val & NV_PGRAPH_CHANNEL_CTX_TRIGGER_WRITE_OUT) {
+            /* do stuff ... */
+        }
+
+        break;
+    }
+    default:
+        pgraph_reg_w(pg, addr, val);
+        break;
+    }
+
+    // events
+    switch (addr) {
+    case NV_PGRAPH_FIFO:
+        pfifo_kick(d);
+        break;
+    }
+
+    qemu_mutex_unlock(&pg->lock);
+    qemu_mutex_unlock(&d->pfifo.lock);
+}
+
+void pgraph_context_switch(NV2AState *d, unsigned int channel_id)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    bool channel_valid =
+        pgraph_reg_r(pg, NV_PGRAPH_CTX_CONTROL) & NV_PGRAPH_CTX_CONTROL_CHID;
+    unsigned pgraph_channel_id =
+        PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID);
+
+    bool valid = channel_valid && pgraph_channel_id == channel_id;
+    if (!valid) {
+        PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR,
+                 NV_PGRAPH_TRAPPED_ADDR_CHID, channel_id);
+
+        NV2A_DPRINTF("pgraph switching to ch %d\n", channel_id);
+
+        /* TODO: hardware context switching */
+        assert(!PG_GET_MASK(NV_PGRAPH_DEBUG_3,
+                            NV_PGRAPH_DEBUG_3_HW_CONTEXT_SWITCH));
+
+        pg->waiting_for_context_switch = true;
+        qemu_mutex_unlock(&pg->lock);
+        qemu_mutex_lock_iothread();
+        pg->pending_interrupts |= NV_PGRAPH_INTR_CONTEXT_SWITCH;
+        nv2a_update_irq(d);
+        qemu_mutex_unlock_iothread();
+        qemu_mutex_lock(&pg->lock);
+    }
+}
+
+static const PGRAPHRenderer *renderers[CONFIG_DISPLAY_RENDERER__COUNT];
+
+void pgraph_renderer_register(const PGRAPHRenderer *renderer)
+{
+    assert(renderer->type < CONFIG_DISPLAY_RENDERER__COUNT);
+    renderers[renderer->type] = renderer;
+}
+
+void pgraph_init(NV2AState *d)
+{
+    g_nv2a = d;
+
+    PGRAPHState *pg = &d->pgraph;
+    qemu_mutex_init(&pg->lock);
+    qemu_mutex_init(&pg->renderer_lock);
+    qemu_event_init(&pg->sync_complete, false);
+    qemu_event_init(&pg->flush_complete, false);
+    qemu_cond_init(&pg->framebuffer_released);
+
+    pg->frame_time = 0;
+    pg->draw_time = 0;
+
+    pg->material_alpha = 0.0f;
+    PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE,
+         NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH);
+    pg->primitive_mode = PRIM_TYPE_INVALID;
+
+    for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        VertexAttribute *attribute = &pg->vertex_attributes[i];
+        attribute->inline_buffer = (float*)g_malloc(NV2A_MAX_BATCH_LENGTH
+                                              * sizeof(float) * 4);
+        attribute->inline_buffer_populated = false;
+    }
+
+    pgraph_clear_dirty_reg_map(pg);
+}
+
+void pgraph_clear_dirty_reg_map(PGRAPHState *pg)
+{
+    memset(pg->regs_dirty, 0, sizeof(pg->regs_dirty));
+}
+
+static CONFIG_DISPLAY_RENDERER get_default_renderer(void)
+{
+#ifdef CONFIG_OPENGL
+    if (renderers[CONFIG_DISPLAY_RENDERER_OPENGL]) {
+        return CONFIG_DISPLAY_RENDERER_OPENGL;
+    }
+#endif
+#ifdef CONFIG_VULKAN
+    if (renderers[CONFIG_DISPLAY_RENDERER_VULKAN]) {
+        return CONFIG_DISPLAY_RENDERER_VULKAN;
+    }
+#endif
+    fprintf(stderr, "Warning: No available renderer\n");
+    return CONFIG_DISPLAY_RENDERER_NULL;
+}
+
+void nv2a_context_init(void)
+{
+    if (!renderers[g_config.display.renderer]) {
+        g_config.display.renderer = get_default_renderer();
+        fprintf(stderr,
+                "Warning: Configured renderer unavailable. Switching to %s.\n",
+                renderers[g_config.display.renderer]->name);
+    }
+
+    // FIXME: We need a mechanism for renderer to initialize new GL contexts
+    //        on the main thread at run time. For now, just let them all create
+    //        what they need.
+    for (int i = 0; i < ARRAY_SIZE(renderers); i++) {
+        const PGRAPHRenderer *r = renderers[i];
+        if (!r) {
+            continue;
+        }
+        if (r->ops.early_context_init) {
+            r->ops.early_context_init();
+        }
+    }
+}
+
+static bool attempt_renderer_init(PGRAPHState *pg)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+
+    pg->renderer = renderers[g_config.display.renderer];
+    if (!pg->renderer) {
+        xemu_queue_error_message("Configured renderer not available");
+        return false;
+    }
+
+    Error *local_err = NULL;
+    if (pg->renderer->ops.init) {
+        pg->renderer->ops.init(d, &local_err);
+    }
+    if (local_err) {
+        const char *msg = error_get_pretty(local_err);
+        xemu_queue_error_message(msg);
+        error_free(local_err);
+        local_err = NULL;
+        return false;
+    }
+
+    return true;
+}
+
+static void init_renderer(PGRAPHState *pg)
+{
+    if (attempt_renderer_init(pg)) {
+        return;  // Success
+    }
+
+    CONFIG_DISPLAY_RENDERER default_renderer = get_default_renderer();
+    if (default_renderer != g_config.display.renderer) {
+        g_config.display.renderer = default_renderer;
+        if (attempt_renderer_init(pg)) {
+            g_autofree gchar *msg = g_strdup_printf(
+                "Switched to default renderer: %s", pg->renderer->name);
+            xemu_queue_notification(msg);
+            return;
+        }
+    }
+
+    // FIXME: Try others
+
+    fprintf(stderr, "Fatal error: cannot initialize renderer\n");
+    exit(1);
+}
+
+void pgraph_init_thread(NV2AState *d)
+{
+    init_renderer(&d->pgraph);
+}
+
+void pgraph_destroy(PGRAPHState *pg)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+
+    if (pg->renderer->ops.finalize) {
+       pg->renderer->ops.finalize(d);
+    }
+
+    qemu_mutex_destroy(&pg->lock);
+}
+
+int nv2a_get_framebuffer_surface(void)
+{
+    NV2AState *d = g_nv2a;
+    PGRAPHState *pg = &d->pgraph;
+    int s = 0;
+
+    qemu_mutex_lock(&pg->renderer_lock);
+    assert(!pg->framebuffer_in_use);
+    pg->framebuffer_in_use = true;
+    if (pg->renderer->ops.get_framebuffer_surface) {
+        s = pg->renderer->ops.get_framebuffer_surface(d);
+    }
+    qemu_mutex_unlock(&pg->renderer_lock);
+
+    return s;
+}
+
+void nv2a_release_framebuffer_surface(void)
+{
+    NV2AState *d = g_nv2a;
+    PGRAPHState *pg = &d->pgraph;
+    qemu_mutex_lock(&pg->renderer_lock);
+    pg->framebuffer_in_use = false;
+    qemu_cond_broadcast(&pg->framebuffer_released);
+    qemu_mutex_unlock(&pg->renderer_lock);
+}
+
+void nv2a_set_surface_scale_factor(unsigned int scale)
+{
+    NV2AState *d = g_nv2a;
+
+    qemu_mutex_unlock_iothread();
+    qemu_mutex_lock(&d->pgraph.renderer_lock);
+    if (d->pgraph.renderer->ops.set_surface_scale_factor) {
+        d->pgraph.renderer->ops.set_surface_scale_factor(d, scale);
+    }
+    qemu_mutex_unlock(&d->pgraph.renderer_lock);
+    qemu_mutex_lock_iothread();
+}
+
+unsigned int nv2a_get_surface_scale_factor(void)
+{
+    NV2AState *d = g_nv2a;
+    int s = 1;
+
+    qemu_mutex_unlock_iothread();
+    qemu_mutex_lock(&d->pgraph.renderer_lock);
+    if (d->pgraph.renderer->ops.get_surface_scale_factor) {
+        s = d->pgraph.renderer->ops.get_surface_scale_factor(d);
+    }
+    qemu_mutex_unlock(&d->pgraph.renderer_lock);
+    qemu_mutex_lock_iothread();
+
+    return s;
+}
+
+#define METHOD_ADDR(gclass, name) \
+    gclass ## _ ## name
+#define METHOD_ADDR_TO_INDEX(x) ((x)>>2)
+#define METHOD_NAME_STR(gclass, name) \
+    tostring(gclass ## _ ## name)
+#define METHOD_FUNC_NAME(gclass, name) \
+    pgraph_ ## gclass ## _ ## name ## _handler
+#define METHOD_HANDLER_ARG_DECL \
+    NV2AState *d, PGRAPHState *pg, \
+    unsigned int subchannel, unsigned int method, \
+    uint32_t parameter, uint32_t *parameters, \
+    size_t num_words_available, size_t *num_words_consumed, bool inc
+#define METHOD_HANDLER_ARGS \
+    d, pg, subchannel, method, parameter, parameters, \
+    num_words_available, num_words_consumed, inc
+#define DEF_METHOD_PROTO(gclass, name) \
+    static void METHOD_FUNC_NAME(gclass, name)(METHOD_HANDLER_ARG_DECL)
+
+#define DEF_METHOD(gclass, name) \
+    DEF_METHOD_PROTO(gclass, name);
+#define DEF_METHOD_RANGE(gclass, name, range) \
+    DEF_METHOD_PROTO(gclass, name);
+#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* Drop */
+#define DEF_METHOD_CASE_4(gclass, name, stride) \
+    DEF_METHOD_PROTO(gclass, name);
+#include "methods.h"
+#undef DEF_METHOD
+#undef DEF_METHOD_RANGE
+#undef DEF_METHOD_CASE_4_OFFSET
+#undef DEF_METHOD_CASE_4
+
+typedef void (*MethodFunc)(METHOD_HANDLER_ARG_DECL);
+static const struct {
+    uint32_t base;
+    const char *name;
+    MethodFunc handler;
+} pgraph_kelvin_methods[0x800] = {
+#define DEF_METHOD(gclass, name)                        \
+    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name))] = \
+    { \
+        METHOD_ADDR(gclass, name), \
+        METHOD_NAME_STR(gclass, name), \
+        METHOD_FUNC_NAME(gclass, name), \
+    },
+#define DEF_METHOD_RANGE(gclass, name, range) \
+    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name)) \
+     ... METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + 4*range - 1)] = \
+    { \
+        METHOD_ADDR(gclass, name), \
+        METHOD_NAME_STR(gclass, name), \
+        METHOD_FUNC_NAME(gclass, name), \
+    },
+#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) \
+    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset)] = \
+    { \
+        METHOD_ADDR(gclass, name), \
+        METHOD_NAME_STR(gclass, name), \
+        METHOD_FUNC_NAME(gclass, name), \
+    }, \
+    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride)] = \
+    { \
+        METHOD_ADDR(gclass, name), \
+        METHOD_NAME_STR(gclass, name), \
+        METHOD_FUNC_NAME(gclass, name), \
+    }, \
+    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 2)] = \
+    { \
+        METHOD_ADDR(gclass, name), \
+        METHOD_NAME_STR(gclass, name), \
+        METHOD_FUNC_NAME(gclass, name), \
+    }, \
+    [METHOD_ADDR_TO_INDEX(METHOD_ADDR(gclass, name) + offset + stride * 3)] = \
+    { \
+        METHOD_ADDR(gclass, name), \
+        METHOD_NAME_STR(gclass, name), \
+        METHOD_FUNC_NAME(gclass, name), \
+    },
+#define DEF_METHOD_CASE_4(gclass, name, stride) \
+    DEF_METHOD_CASE_4_OFFSET(gclass, name, 0, stride)
+#include "methods.h"
+#undef DEF_METHOD
+#undef DEF_METHOD_RANGE
+#undef DEF_METHOD_CASE_4_OFFSET
+#undef DEF_METHOD_CASE_4
+};
+
+#define METHOD_RANGE_END_NAME(gclass, name) \
+    pgraph_ ## gclass ## _ ## name ## __END
+#define DEF_METHOD(gclass, name) \
+    static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
+        METHOD_ADDR(gclass, name) + 4;
+#define DEF_METHOD_RANGE(gclass, name, range) \
+    static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
+        METHOD_ADDR(gclass, name) + 4*range;
+#define DEF_METHOD_CASE_4_OFFSET(gclass, name, offset, stride) /* drop */
+#define DEF_METHOD_CASE_4(gclass, name, stride) \
+    static const size_t METHOD_RANGE_END_NAME(gclass, name) = \
+        METHOD_ADDR(gclass, name) + 4*stride;
+#include "methods.h"
+#undef DEF_METHOD
+#undef DEF_METHOD_RANGE
+#undef DEF_METHOD_CASE_4_OFFSET
+#undef DEF_METHOD_CASE_4
+
+static void pgraph_method_log(unsigned int subchannel,
+                              unsigned int graphics_class,
+                              unsigned int method, uint32_t parameter)
+{
+    const char *method_name = "?";
+    static unsigned int last = 0;
+    static unsigned int count = 0;
+
+    if (last == NV097_ARRAY_ELEMENT16 && method != last) {
+        method_name = "NV097_ARRAY_ELEMENT16";
+        trace_nv2a_pgraph_method_abbrev(subchannel, graphics_class, last,
+                                        method_name, count);
+    }
+
+    if (method != NV097_ARRAY_ELEMENT16) {
+        uint32_t base = method;
+        switch (graphics_class) {
+        case NV_KELVIN_PRIMITIVE: {
+            int idx = METHOD_ADDR_TO_INDEX(method);
+            if (idx < ARRAY_SIZE(pgraph_kelvin_methods) &&
+                pgraph_kelvin_methods[idx].handler) {
+                method_name = pgraph_kelvin_methods[idx].name;
+                base = pgraph_kelvin_methods[idx].base;
+            }
+            break;
+        }
+        default:
+            break;
+        }
+
+        uint32_t offset = method - base;
+        trace_nv2a_pgraph_method(subchannel, graphics_class, method,
+                                 method_name, offset, parameter);
+    }
+
+    if (method == last) {
+        count++;
+    } else {
+        count = 0;
+    }
+    last = method;
+}
+
+static void pgraph_method_inc(MethodFunc handler, uint32_t end,
+                              METHOD_HANDLER_ARG_DECL)
+{
+    if (!inc) {
+        handler(METHOD_HANDLER_ARGS);
+        return;
+    }
+    size_t count = MIN(num_words_available, (end - method) / 4);
+    for (size_t i = 0; i < count; i++) {
+        parameter = ldl_le_p(parameters + i);
+        if (i) {
+            pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method,
+                              parameter);
+        }
+        handler(METHOD_HANDLER_ARGS);
+        method += 4;
+    }
+    *num_words_consumed = count;
+}
+
+static void pgraph_method_non_inc(MethodFunc handler, METHOD_HANDLER_ARG_DECL)
+{
+    if (inc) {
+        handler(METHOD_HANDLER_ARGS);
+        return;
+    }
+
+    for (size_t i = 0; i < num_words_available; i++) {
+        parameter = ldl_le_p(parameters + i);
+        if (i) {
+            pgraph_method_log(subchannel, NV_KELVIN_PRIMITIVE, method,
+                              parameter);
+        }
+        handler(METHOD_HANDLER_ARGS);
+    }
+    *num_words_consumed = num_words_available;
+}
+
+#define METHOD_FUNC_NAME_INT(gclass, name) METHOD_FUNC_NAME(gclass, name##_int)
+#define DEF_METHOD_INT(gclass, name) DEF_METHOD(gclass, name##_int)
+#define DEF_METHOD(gclass, name) DEF_METHOD_PROTO(gclass, name)
+
+#define DEF_METHOD_INC(gclass, name)                           \
+    DEF_METHOD_INT(gclass, name);                              \
+    DEF_METHOD(gclass, name)                                   \
+    {                                                          \
+        pgraph_method_inc(METHOD_FUNC_NAME_INT(gclass, name),  \
+                          METHOD_RANGE_END_NAME(gclass, name), \
+                          METHOD_HANDLER_ARGS);                \
+    }                                                          \
+    DEF_METHOD_INT(gclass, name)
+
+#define DEF_METHOD_NON_INC(gclass, name)                          \
+    DEF_METHOD_INT(gclass, name);                                 \
+    DEF_METHOD(gclass, name)                                      \
+    {                                                             \
+        pgraph_method_non_inc(METHOD_FUNC_NAME_INT(gclass, name), \
+                              METHOD_HANDLER_ARGS);               \
+    }                                                             \
+    DEF_METHOD_INT(gclass, name)
+
+int pgraph_method(NV2AState *d, unsigned int subchannel,
+                   unsigned int method, uint32_t parameter,
+                   uint32_t *parameters, size_t num_words_available,
+                   size_t max_lookahead_words, bool inc)
+{
+    int num_processed = 1;
+
+    PGRAPHState *pg = &d->pgraph;
+
+    bool channel_valid =
+        PG_GET_MASK(NV_PGRAPH_CTX_CONTROL, NV_PGRAPH_CTX_CONTROL_CHID);
+    assert(channel_valid);
+
+    ContextSurfaces2DState *context_surfaces_2d = &pg->context_surfaces_2d;
+    ImageBlitState *image_blit = &pg->image_blit;
+    BetaState *beta = &pg->beta;
+
+    assert(subchannel < 8);
+
+    if (method == NV_SET_OBJECT) {
+        assert(parameter < memory_region_size(&d->ramin));
+        uint8_t *obj_ptr = d->ramin_ptr + parameter;
+
+        uint32_t ctx_1 = ldl_le_p((uint32_t*)obj_ptr);
+        uint32_t ctx_2 = ldl_le_p((uint32_t*)(obj_ptr+4));
+        uint32_t ctx_3 = ldl_le_p((uint32_t*)(obj_ptr+8));
+        uint32_t ctx_4 = ldl_le_p((uint32_t*)(obj_ptr+12));
+        uint32_t ctx_5 = parameter;
+
+        pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE1 + subchannel * 4, ctx_1);
+        pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE2 + subchannel * 4, ctx_2);
+        pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE3 + subchannel * 4, ctx_3);
+        pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE4 + subchannel * 4, ctx_4);
+        pgraph_reg_w(pg, NV_PGRAPH_CTX_CACHE5 + subchannel * 4, ctx_5);
+    }
+
+    // is this right?
+    pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH1,
+                 pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE1 + subchannel * 4));
+    pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH2,
+                 pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE2 + subchannel * 4));
+    pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH3,
+                 pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE3 + subchannel * 4));
+    pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH4,
+                 pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE4 + subchannel * 4));
+    pgraph_reg_w(pg, NV_PGRAPH_CTX_SWITCH5,
+                 pgraph_reg_r(pg, NV_PGRAPH_CTX_CACHE5 + subchannel * 4));
+
+    uint32_t graphics_class = PG_GET_MASK(NV_PGRAPH_CTX_SWITCH1,
+                                       NV_PGRAPH_CTX_SWITCH1_GRCLASS);
+
+    pgraph_method_log(subchannel, graphics_class, method, parameter);
+
+    if (subchannel != 0) {
+        // catches context switching issues on xbox d3d
+        assert(graphics_class != 0x97);
+    }
+
+    /* ugly switch for now */
+    switch (graphics_class) {
+    case NV_BETA: {
+        switch (method) {
+        case NV012_SET_OBJECT:
+            beta->object_instance = parameter;
+            break;
+        case NV012_SET_BETA:
+            if (parameter & 0x80000000) {
+                beta->beta = 0;
+            } else {
+                // The parameter is a signed fixed-point number with a sign bit
+                // and 31 fractional bits. Note that negative values are clamped
+                // to 0, and only 8 fractional bits are actually implemented in
+                // hardware.
+                beta->beta = parameter & 0x7f800000;
+            }
+            break;
+        default:
+            goto unhandled;
+        }
+        break;
+    }
+    case NV_CONTEXT_PATTERN: {
+        switch (method) {
+        case NV044_SET_MONOCHROME_COLOR0:
+            pgraph_reg_w(pg, NV_PGRAPH_PATT_COLOR0, parameter);
+            break;
+        default:
+            goto unhandled;
+        }
+        break;
+    }
+    case NV_CONTEXT_SURFACES_2D: {
+        switch (method) {
+        case NV062_SET_OBJECT:
+            context_surfaces_2d->object_instance = parameter;
+            break;
+        case NV062_SET_CONTEXT_DMA_IMAGE_SOURCE:
+            context_surfaces_2d->dma_image_source = parameter;
+            break;
+        case NV062_SET_CONTEXT_DMA_IMAGE_DESTIN:
+            context_surfaces_2d->dma_image_dest = parameter;
+            break;
+        case NV062_SET_COLOR_FORMAT:
+            context_surfaces_2d->color_format = parameter;
+            break;
+        case NV062_SET_PITCH:
+            context_surfaces_2d->source_pitch = parameter & 0xFFFF;
+            context_surfaces_2d->dest_pitch = parameter >> 16;
+            break;
+        case NV062_SET_OFFSET_SOURCE:
+            context_surfaces_2d->source_offset = parameter & 0x07FFFFFF;
+            break;
+        case NV062_SET_OFFSET_DESTIN:
+            context_surfaces_2d->dest_offset = parameter & 0x07FFFFFF;
+            break;
+        default:
+            goto unhandled;
+        }
+        break;
+    }
+    case NV_IMAGE_BLIT: {
+        switch (method) {
+        case NV09F_SET_OBJECT:
+            image_blit->object_instance = parameter;
+            break;
+        case NV09F_SET_CONTEXT_SURFACES:
+            image_blit->context_surfaces = parameter;
+            break;
+        case NV09F_SET_OPERATION:
+            image_blit->operation = parameter;
+            break;
+        case NV09F_CONTROL_POINT_IN:
+            image_blit->in_x = parameter & 0xFFFF;
+            image_blit->in_y = parameter >> 16;
+            break;
+        case NV09F_CONTROL_POINT_OUT:
+            image_blit->out_x = parameter & 0xFFFF;
+            image_blit->out_y = parameter >> 16;
+            break;
+        case NV09F_SIZE:
+            image_blit->width = parameter & 0xFFFF;
+            image_blit->height = parameter >> 16;
+
+            if (image_blit->width && image_blit->height) {
+                d->pgraph.renderer->ops.image_blit(d);
+            }
+            break;
+        default:
+            goto unhandled;
+        }
+        break;
+    }
+    case NV_KELVIN_PRIMITIVE: {
+        MethodFunc handler =
+            pgraph_kelvin_methods[METHOD_ADDR_TO_INDEX(method)].handler;
+        if (handler == NULL) {
+            goto unhandled;
+        }
+        size_t num_words_consumed = 1;
+        handler(d, pg, subchannel, method, parameter, parameters,
+                num_words_available, &num_words_consumed, inc);
+
+        /* Squash repeated BEGIN,DRAW_ARRAYS,END */
+        #define LAM(i, mthd) ((parameters[i*2+1] & 0x31fff) == (mthd))
+        #define LAP(i, prm) (parameters[i*2+2] == (prm))
+        #define LAMP(i, mthd, prm) (LAM(i, mthd) && LAP(i, prm))
+
+        if (method == NV097_DRAW_ARRAYS && (max_lookahead_words >= 7) &&
+            pg->inline_elements_length == 0 &&
+            pg->draw_arrays_length <
+                (ARRAY_SIZE(pg->draw_arrays_start) - 1) &&
+            LAMP(0, NV097_SET_BEGIN_END, NV097_SET_BEGIN_END_OP_END) &&
+            LAMP(1, NV097_SET_BEGIN_END, pg->primitive_mode) &&
+            LAM(2, NV097_DRAW_ARRAYS)) {
+            num_words_consumed += 4;
+            pg->draw_arrays_prevent_connect = true;
+        }
+
+        #undef LAM
+        #undef LAP
+        #undef LAMP
+
+        num_processed = num_words_consumed;
+        break;
+    }
+    default:
+        goto unhandled;
+    }
+
+    return num_processed;
+
+unhandled:
+    trace_nv2a_pgraph_method_unhandled(subchannel, graphics_class,
+                                           method, parameter);
+    return num_processed;
+}
+
+DEF_METHOD(NV097, SET_OBJECT)
+{
+    pg->kelvin.object_instance = parameter;
+}
+
+DEF_METHOD(NV097, NO_OPERATION)
+{
+    /* The bios uses nop as a software method call -
+     * it seems to expect a notify interrupt if the parameter isn't 0.
+     * According to a nouveau guy it should still be a nop regardless
+     * of the parameter. It's possible a debug register enables this,
+     * but nothing obvious sticks out. Weird.
+     */
+    if (parameter == 0) {
+        return;
+    }
+
+    unsigned channel_id =
+        PG_GET_MASK(NV_PGRAPH_CTX_USER, NV_PGRAPH_CTX_USER_CHID);
+
+    assert(!(pg->pending_interrupts & NV_PGRAPH_INTR_ERROR));
+
+    PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_CHID,
+             channel_id);
+    PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_SUBCH,
+             subchannel);
+    PG_SET_MASK(NV_PGRAPH_TRAPPED_ADDR, NV_PGRAPH_TRAPPED_ADDR_MTHD,
+             method);
+    pgraph_reg_w(pg, NV_PGRAPH_TRAPPED_DATA_LOW, parameter);
+    pgraph_reg_w(pg, NV_PGRAPH_NSOURCE,
+                 NV_PGRAPH_NSOURCE_NOTIFICATION); /* TODO: check this */
+    pg->pending_interrupts |= NV_PGRAPH_INTR_ERROR;
+    pg->waiting_for_nop = true;
+
+    qemu_mutex_unlock(&pg->lock);
+    qemu_mutex_lock_iothread();
+    nv2a_update_irq(d);
+    qemu_mutex_unlock_iothread();
+    qemu_mutex_lock(&pg->lock);
+}
+
+DEF_METHOD(NV097, WAIT_FOR_IDLE)
+{
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+}
+
+DEF_METHOD(NV097, SET_FLIP_READ)
+{
+    PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_READ_3D,
+             parameter);
+}
+
+DEF_METHOD(NV097, SET_FLIP_WRITE)
+{
+    PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D,
+             parameter);
+}
+
+DEF_METHOD(NV097, SET_FLIP_MODULO)
+{
+    PG_SET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_MODULO_3D,
+             parameter);
+}
+
+DEF_METHOD(NV097, FLIP_INCREMENT_WRITE)
+{
+    uint32_t old =
+        PG_GET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D);
+
+    PG_SET_MASK(NV_PGRAPH_SURFACE,
+             NV_PGRAPH_SURFACE_WRITE_3D,
+             (PG_GET_MASK(NV_PGRAPH_SURFACE,
+                      NV_PGRAPH_SURFACE_WRITE_3D)+1)
+                % PG_GET_MASK(NV_PGRAPH_SURFACE,
+                           NV_PGRAPH_SURFACE_MODULO_3D) );
+
+    uint32_t new =
+        PG_GET_MASK(NV_PGRAPH_SURFACE, NV_PGRAPH_SURFACE_WRITE_3D);
+
+    trace_nv2a_pgraph_flip_increment_write(old, new);
+    pg->frame_time++;
+}
+
+DEF_METHOD(NV097, FLIP_STALL)
+{
+    trace_nv2a_pgraph_flip_stall();
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+    d->pgraph.renderer->ops.flip_stall(d);
+    nv2a_profile_flip_stall();
+    pg->waiting_for_flip = true;
+}
+
+// TODO: these should be loading the dma objects from ramin here?
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_NOTIFIES)
+{
+    pg->dma_notifies = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_A)
+{
+    pg->dma_a = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_B)
+{
+    pg->dma_b = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_STATE)
+{
+    pg->dma_state = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_COLOR)
+{
+    /* try to get any straggling draws in before the surface's changed :/ */
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+    pg->dma_color = parameter;
+    pg->surface_color.buffer_dirty = true;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_ZETA)
+{
+    pg->dma_zeta = parameter;
+    pg->surface_zeta.buffer_dirty = true;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_A)
+{
+    pg->dma_vertex_a = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_VERTEX_B)
+{
+    pg->dma_vertex_b = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_SEMAPHORE)
+{
+    pg->dma_semaphore = parameter;
+}
+
+DEF_METHOD(NV097, SET_CONTEXT_DMA_REPORT)
+{
+    d->pgraph.renderer->ops.process_pending_reports(d);
+
+    pg->dma_report = parameter;
+}
+
+DEF_METHOD(NV097, SET_SURFACE_CLIP_HORIZONTAL)
+{
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+    pg->surface_shape.clip_x =
+        GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_X);
+    pg->surface_shape.clip_width =
+        GET_MASK(parameter, NV097_SET_SURFACE_CLIP_HORIZONTAL_WIDTH);
+}
+
+DEF_METHOD(NV097, SET_SURFACE_CLIP_VERTICAL)
+{
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+    pg->surface_shape.clip_y =
+        GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_Y);
+    pg->surface_shape.clip_height =
+        GET_MASK(parameter, NV097_SET_SURFACE_CLIP_VERTICAL_HEIGHT);
+}
+
+DEF_METHOD(NV097, SET_SURFACE_FORMAT)
+{
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+    pg->surface_shape.color_format =
+        GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_COLOR);
+    pg->surface_shape.zeta_format =
+        GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ZETA);
+    pg->surface_shape.anti_aliasing =
+        GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_ANTI_ALIASING);
+    pg->surface_shape.log_width =
+        GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_WIDTH);
+    pg->surface_shape.log_height =
+        GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_HEIGHT);
+
+    int surface_type = GET_MASK(parameter, NV097_SET_SURFACE_FORMAT_TYPE);
+    if (surface_type != pg->surface_type) {
+        pg->surface_type = surface_type;
+        pg->surface_color.buffer_dirty = true;
+        pg->surface_zeta.buffer_dirty = true;
+    }
+}
+
+DEF_METHOD(NV097, SET_SURFACE_PITCH)
+{
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+    unsigned int color_pitch = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_COLOR);
+    unsigned int zeta_pitch  = GET_MASK(parameter, NV097_SET_SURFACE_PITCH_ZETA);
+
+    pg->surface_color.buffer_dirty |= (pg->surface_color.pitch != color_pitch);
+    pg->surface_color.pitch = color_pitch;
+
+    pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.pitch != zeta_pitch);
+    pg->surface_zeta.pitch = zeta_pitch;
+}
+
+DEF_METHOD(NV097, SET_SURFACE_COLOR_OFFSET)
+{
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+    pg->surface_color.buffer_dirty |= (pg->surface_color.offset != parameter);
+    pg->surface_color.offset = parameter;
+}
+
+DEF_METHOD(NV097, SET_SURFACE_ZETA_OFFSET)
+{
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+    pg->surface_zeta.buffer_dirty |= (pg->surface_zeta.offset != parameter);
+    pg->surface_zeta.offset = parameter;
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_ICW)
+{
+    int slot = (method - NV097_SET_COMBINER_ALPHA_ICW) / 4;
+    pgraph_reg_w(pg, NV_PGRAPH_COMBINEALPHAI0 + slot * 4, parameter);
+}
+
+DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW0)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_COMBINESPECFOG0, parameter);
+}
+
+DEF_METHOD(NV097, SET_COMBINER_SPECULAR_FOG_CW1)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_COMBINESPECFOG1, parameter);
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_ADDRESS)
+{
+    int slot = (method - NV097_SET_TEXTURE_ADDRESS) / 64;
+    pgraph_reg_w(pg, NV_PGRAPH_TEXADDRESS0 + slot * 4, parameter);
+}
+
+DEF_METHOD(NV097, SET_CONTROL0)
+{
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+    bool stencil_write_enable =
+        parameter & NV097_SET_CONTROL0_STENCIL_WRITE_ENABLE;
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE,
+             stencil_write_enable);
+
+    uint32_t z_format = GET_MASK(parameter, NV097_SET_CONTROL0_Z_FORMAT);
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_Z_FORMAT, z_format);
+
+    bool z_perspective =
+        parameter & NV097_SET_CONTROL0_Z_PERSPECTIVE_ENABLE;
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE,
+             z_perspective);
+}
+
+DEF_METHOD(NV097, SET_COLOR_MATERIAL)
+{
+    PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_EMISSION,
+             (parameter >> 0) & 3);
+    PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_AMBIENT,
+             (parameter >> 2) & 3);
+    PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_DIFFUSE,
+             (parameter >> 4) & 3);
+    PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_SPECULAR,
+             (parameter >> 6) & 3);
+}
+
+DEF_METHOD(NV097, SET_FOG_MODE)
+{
+    /* FIXME: There is also NV_PGRAPH_CSV0_D_FOG_MODE */
+    unsigned int mode;
+    switch (parameter) {
+    case NV097_SET_FOG_MODE_V_LINEAR:
+        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR; break;
+    case NV097_SET_FOG_MODE_V_EXP:
+        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP; break;
+    case NV097_SET_FOG_MODE_V_EXP2:
+        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2; break;
+    case NV097_SET_FOG_MODE_V_EXP_ABS:
+        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP_ABS; break;
+    case NV097_SET_FOG_MODE_V_EXP2_ABS:
+        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_EXP2_ABS; break;
+    case NV097_SET_FOG_MODE_V_LINEAR_ABS:
+        mode = NV_PGRAPH_CONTROL_3_FOG_MODE_LINEAR_ABS; break;
+    default:
+        assert(false);
+        break;
+    }
+    PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_FOG_MODE,
+             mode);
+}
+
+DEF_METHOD(NV097, SET_FOG_GEN_MODE)
+{
+    unsigned int mode;
+    switch (parameter) {
+    case NV097_SET_FOG_GEN_MODE_V_SPEC_ALPHA:
+        mode = NV_PGRAPH_CSV0_D_FOGGENMODE_SPEC_ALPHA; break;
+    case NV097_SET_FOG_GEN_MODE_V_RADIAL:
+        mode = NV_PGRAPH_CSV0_D_FOGGENMODE_RADIAL; break;
+    case NV097_SET_FOG_GEN_MODE_V_PLANAR:
+        mode = NV_PGRAPH_CSV0_D_FOGGENMODE_PLANAR; break;
+    case NV097_SET_FOG_GEN_MODE_V_ABS_PLANAR:
+        mode = NV_PGRAPH_CSV0_D_FOGGENMODE_ABS_PLANAR; break;
+    case NV097_SET_FOG_GEN_MODE_V_FOG_X:
+        mode = NV_PGRAPH_CSV0_D_FOGGENMODE_FOG_X; break;
+    default:
+        assert(false);
+        break;
+    }
+    PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_FOGGENMODE, mode);
+}
+
+DEF_METHOD(NV097, SET_FOG_ENABLE)
+{
+    /*
+      FIXME: There is also:
+        PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_FOGENABLE,
+             parameter);
+    */
+    PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_FOGENABLE,
+         parameter);
+}
+
+DEF_METHOD(NV097, SET_FOG_COLOR)
+{
+    /* PGRAPH channels are ARGB, parameter channels are ABGR */
+    uint8_t red = GET_MASK(parameter, NV097_SET_FOG_COLOR_RED);
+    uint8_t green = GET_MASK(parameter, NV097_SET_FOG_COLOR_GREEN);
+    uint8_t blue = GET_MASK(parameter, NV097_SET_FOG_COLOR_BLUE);
+    uint8_t alpha = GET_MASK(parameter, NV097_SET_FOG_COLOR_ALPHA);
+    PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_RED, red);
+    PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_GREEN, green);
+    PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_BLUE, blue);
+    PG_SET_MASK(NV_PGRAPH_FOGCOLOR, NV_PGRAPH_FOGCOLOR_ALPHA, alpha);
+}
+
+DEF_METHOD(NV097, SET_WINDOW_CLIP_TYPE)
+{
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_HORIZONTAL)
+{
+    int slot = (method - NV097_SET_WINDOW_CLIP_HORIZONTAL) / 4;
+    for (; slot < 8; ++slot) {
+        pgraph_reg_w(pg, NV_PGRAPH_WINDOWCLIPX0 + slot * 4, parameter);
+    }
+}
+
+DEF_METHOD_INC(NV097, SET_WINDOW_CLIP_VERTICAL)
+{
+    int slot = (method - NV097_SET_WINDOW_CLIP_VERTICAL) / 4;
+    for (; slot < 8; ++slot) {
+        pgraph_reg_w(pg, NV_PGRAPH_WINDOWCLIPY0 + slot * 4, parameter);
+    }
+}
+
+DEF_METHOD(NV097, SET_ALPHA_TEST_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_ALPHATESTENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_BLEND_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_EN, parameter);
+}
+
+DEF_METHOD(NV097, SET_CULL_FACE_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_CULLENABLE,
+             parameter);
+}
+
+DEF_METHOD(NV097, SET_DEPTH_TEST_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_0_ZENABLE,
+             parameter);
+}
+
+DEF_METHOD(NV097, SET_DITHER_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_DITHERENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_LIGHTING_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_LIGHTING,
+             parameter);
+}
+
+DEF_METHOD(NV097, SET_POINT_PARAMS_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_POINTPARAMSENABLE,
+             parameter);
+    PG_SET_MASK(NV_PGRAPH_CONTROL_3,
+             NV_PGRAPH_CONTROL_3_POINTPARAMSENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_POINT_SMOOTH_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_LINE_SMOOTH_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_POLY_SMOOTH_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_SKIN_MODE)
+{
+    PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_SKIN,
+             parameter);
+}
+
+DEF_METHOD(NV097, SET_STENCIL_TEST_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_1,
+             NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_POLY_OFFSET_POINT_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_POLY_OFFSET_LINE_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_POLY_OFFSET_FILL_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_ALPHA_FUNC)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_ALPHAFUNC, parameter & 0xF);
+}
+
+DEF_METHOD(NV097, SET_ALPHA_REF)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_ALPHAREF, parameter);
+}
+
+DEF_METHOD(NV097, SET_BLEND_FUNC_SFACTOR)
+{
+    unsigned int factor;
+    switch (parameter) {
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_ZERO:
+        factor = NV_PGRAPH_BLEND_SFACTOR_ZERO; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE:
+        factor = NV_PGRAPH_BLEND_SFACTOR_ONE; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_COLOR:
+        factor = NV_PGRAPH_BLEND_SFACTOR_SRC_COLOR; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_COLOR:
+        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_COLOR; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA:
+        factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_SRC_ALPHA:
+        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_SRC_ALPHA; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_ALPHA:
+        factor = NV_PGRAPH_BLEND_SFACTOR_DST_ALPHA; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_ALPHA:
+        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_ALPHA; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_DST_COLOR:
+        factor = NV_PGRAPH_BLEND_SFACTOR_DST_COLOR; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_DST_COLOR:
+        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_DST_COLOR; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_SRC_ALPHA_SATURATE:
+        factor = NV_PGRAPH_BLEND_SFACTOR_SRC_ALPHA_SATURATE; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_COLOR:
+        factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_COLOR; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_COLOR:
+        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_COLOR; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_CONSTANT_ALPHA:
+        factor = NV_PGRAPH_BLEND_SFACTOR_CONSTANT_ALPHA; break;
+    case NV097_SET_BLEND_FUNC_SFACTOR_V_ONE_MINUS_CONSTANT_ALPHA:
+        factor = NV_PGRAPH_BLEND_SFACTOR_ONE_MINUS_CONSTANT_ALPHA; break;
+    default:
+        NV2A_DPRINTF("Unknown blend source factor: 0x%08x\n", parameter);
+        return; /* discard */
+    }
+    PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_SFACTOR, factor);
+}
+
+DEF_METHOD(NV097, SET_BLEND_FUNC_DFACTOR)
+{
+    unsigned int factor;
+    switch (parameter) {
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_ZERO:
+        factor = NV_PGRAPH_BLEND_DFACTOR_ZERO; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE:
+        factor = NV_PGRAPH_BLEND_DFACTOR_ONE; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_COLOR:
+        factor = NV_PGRAPH_BLEND_DFACTOR_SRC_COLOR; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_COLOR:
+        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_COLOR; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA:
+        factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_SRC_ALPHA:
+        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_SRC_ALPHA; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_ALPHA:
+        factor = NV_PGRAPH_BLEND_DFACTOR_DST_ALPHA; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_ALPHA:
+        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_ALPHA; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_DST_COLOR:
+        factor = NV_PGRAPH_BLEND_DFACTOR_DST_COLOR; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_DST_COLOR:
+        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_DST_COLOR; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_SRC_ALPHA_SATURATE:
+        factor = NV_PGRAPH_BLEND_DFACTOR_SRC_ALPHA_SATURATE; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_COLOR:
+        factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_COLOR; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_COLOR:
+        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_COLOR; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_CONSTANT_ALPHA:
+        factor = NV_PGRAPH_BLEND_DFACTOR_CONSTANT_ALPHA; break;
+    case NV097_SET_BLEND_FUNC_DFACTOR_V_ONE_MINUS_CONSTANT_ALPHA:
+        factor = NV_PGRAPH_BLEND_DFACTOR_ONE_MINUS_CONSTANT_ALPHA; break;
+    default:
+        NV2A_DPRINTF("Unknown blend destination factor: 0x%08x\n", parameter);
+        return; /* discard */
+    }
+    PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_DFACTOR, factor);
+}
+
+DEF_METHOD(NV097, SET_BLEND_COLOR)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_BLENDCOLOR, parameter);
+}
+
+DEF_METHOD(NV097, SET_BLEND_EQUATION)
+{
+    unsigned int equation;
+    switch (parameter) {
+    case NV097_SET_BLEND_EQUATION_V_FUNC_SUBTRACT:
+        equation = 0; break;
+    case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT:
+        equation = 1; break;
+    case NV097_SET_BLEND_EQUATION_V_FUNC_ADD:
+        equation = 2; break;
+    case NV097_SET_BLEND_EQUATION_V_MIN:
+        equation = 3; break;
+    case NV097_SET_BLEND_EQUATION_V_MAX:
+        equation = 4; break;
+    case NV097_SET_BLEND_EQUATION_V_FUNC_REVERSE_SUBTRACT_SIGNED:
+        equation = 5; break;
+    case NV097_SET_BLEND_EQUATION_V_FUNC_ADD_SIGNED:
+        equation = 6; break;
+    default:
+        NV2A_DPRINTF("Unknown blend equation: 0x%08x\n", parameter);
+        return; /* discard */
+    }
+    PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_EQN, equation);
+}
+
+DEF_METHOD(NV097, SET_DEPTH_FUNC)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0, NV_PGRAPH_CONTROL_0_ZFUNC,
+             parameter & 0xF);
+}
+
+DEF_METHOD(NV097, SET_COLOR_MASK)
+{
+    pg->surface_color.write_enabled_cache |= pgraph_color_write_enabled(pg);
+
+    bool alpha = parameter & NV097_SET_COLOR_MASK_ALPHA_WRITE_ENABLE;
+    bool red = parameter & NV097_SET_COLOR_MASK_RED_WRITE_ENABLE;
+    bool green = parameter & NV097_SET_COLOR_MASK_GREEN_WRITE_ENABLE;
+    bool blue = parameter & NV097_SET_COLOR_MASK_BLUE_WRITE_ENABLE;
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE, alpha);
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE, red);
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE, green);
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE, blue);
+}
+
+DEF_METHOD(NV097, SET_DEPTH_MASK)
+{
+    pg->surface_zeta.write_enabled_cache |= pgraph_zeta_write_enabled(pg);
+
+    PG_SET_MASK(NV_PGRAPH_CONTROL_0,
+             NV_PGRAPH_CONTROL_0_ZWRITEENABLE, parameter);
+}
+
+DEF_METHOD(NV097, SET_STENCIL_MASK)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_1,
+             NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE, parameter);
+}
+
+DEF_METHOD(NV097, SET_STENCIL_FUNC)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_1,
+             NV_PGRAPH_CONTROL_1_STENCIL_FUNC, parameter & 0xF);
+}
+
+DEF_METHOD(NV097, SET_STENCIL_FUNC_REF)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_1,
+             NV_PGRAPH_CONTROL_1_STENCIL_REF, parameter);
+}
+
+DEF_METHOD(NV097, SET_STENCIL_FUNC_MASK)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_1,
+             NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ, parameter);
+}
+
+static unsigned int kelvin_map_stencil_op(uint32_t parameter)
+{
+    unsigned int op;
+    switch (parameter) {
+    case NV097_SET_STENCIL_OP_V_KEEP:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_KEEP; break;
+    case NV097_SET_STENCIL_OP_V_ZERO:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_ZERO; break;
+    case NV097_SET_STENCIL_OP_V_REPLACE:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_REPLACE; break;
+    case NV097_SET_STENCIL_OP_V_INCRSAT:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCRSAT; break;
+    case NV097_SET_STENCIL_OP_V_DECRSAT:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECRSAT; break;
+    case NV097_SET_STENCIL_OP_V_INVERT:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INVERT; break;
+    case NV097_SET_STENCIL_OP_V_INCR:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_INCR; break;
+    case NV097_SET_STENCIL_OP_V_DECR:
+        op = NV_PGRAPH_CONTROL_2_STENCIL_OP_V_DECR; break;
+    default:
+        assert(false);
+        break;
+    }
+    return op;
+}
+
+DEF_METHOD(NV097, SET_STENCIL_OP_FAIL)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_2,
+             NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL,
+             kelvin_map_stencil_op(parameter));
+}
+
+DEF_METHOD(NV097, SET_STENCIL_OP_ZFAIL)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_2,
+             NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL,
+             kelvin_map_stencil_op(parameter));
+}
+
+DEF_METHOD(NV097, SET_STENCIL_OP_ZPASS)
+{
+    PG_SET_MASK(NV_PGRAPH_CONTROL_2,
+             NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS,
+             kelvin_map_stencil_op(parameter));
+}
+
+DEF_METHOD(NV097, SET_SHADE_MODE)
+{
+    switch (parameter) {
+    case NV097_SET_SHADE_MODE_V_FLAT:
+        PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE,
+                 NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT);
+        break;
+    case NV097_SET_SHADE_MODE_V_SMOOTH:
+        PG_SET_MASK(NV_PGRAPH_CONTROL_3, NV_PGRAPH_CONTROL_3_SHADEMODE,
+                 NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH);
+        break;
+    default:
+        /* Discard */
+        break;
+    }
+}
+
+DEF_METHOD(NV097, SET_POLYGON_OFFSET_SCALE_FACTOR)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_ZOFFSETFACTOR, parameter);
+}
+
+DEF_METHOD(NV097, SET_POLYGON_OFFSET_BIAS)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_ZOFFSETBIAS, parameter);
+}
+
+static unsigned int kelvin_map_polygon_mode(uint32_t parameter)
+{
+    unsigned int mode;
+    switch (parameter) {
+    case NV097_SET_FRONT_POLYGON_MODE_V_POINT:
+        mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_POINT; break;
+    case NV097_SET_FRONT_POLYGON_MODE_V_LINE:
+        mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_LINE; break;
+    case NV097_SET_FRONT_POLYGON_MODE_V_FILL:
+        mode = NV_PGRAPH_SETUPRASTER_FRONTFACEMODE_FILL; break;
+    default:
+        assert(false);
+        break;
+    }
+    return mode;
+}
+
+DEF_METHOD(NV097, SET_FRONT_POLYGON_MODE)
+{
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_FRONTFACEMODE,
+             kelvin_map_polygon_mode(parameter));
+}
+
+DEF_METHOD(NV097, SET_BACK_POLYGON_MODE)
+{
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER,
+             NV_PGRAPH_SETUPRASTER_BACKFACEMODE,
+             kelvin_map_polygon_mode(parameter));
+}
+
+DEF_METHOD(NV097, SET_CLIP_MIN)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_ZCLIPMIN, parameter);
+}
+
+DEF_METHOD(NV097, SET_CLIP_MAX)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_ZCLIPMAX, parameter);
+}
+
+DEF_METHOD(NV097, SET_CULL_FACE)
+{
+    unsigned int face;
+    switch (parameter) {
+    case NV097_SET_CULL_FACE_V_FRONT:
+        face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT; break;
+    case NV097_SET_CULL_FACE_V_BACK:
+        face = NV_PGRAPH_SETUPRASTER_CULLCTRL_BACK; break;
+    case NV097_SET_CULL_FACE_V_FRONT_AND_BACK:
+        face = NV_PGRAPH_SETUPRASTER_CULLCTRL_FRONT_AND_BACK; break;
+    default:
+        assert(false);
+        break;
+    }
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER, NV_PGRAPH_SETUPRASTER_CULLCTRL, face);
+}
+
+DEF_METHOD(NV097, SET_FRONT_FACE)
+{
+    bool ccw;
+    switch (parameter) {
+    case NV097_SET_FRONT_FACE_V_CW:
+        ccw = false; break;
+    case NV097_SET_FRONT_FACE_V_CCW:
+        ccw = true; break;
+    default:
+        NV2A_DPRINTF("Unknown front face: 0x%08x\n", parameter);
+        return; /* discard */
+    }
+    PG_SET_MASK(NV_PGRAPH_SETUPRASTER, NV_PGRAPH_SETUPRASTER_FRONTFACE,
+                ccw ? 1 : 0);
+}
+
+DEF_METHOD(NV097, SET_NORMALIZATION_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_CSV0_C, NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE,
+                parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_MATERIAL_EMISSION)
+{
+    int slot = (method - NV097_SET_MATERIAL_EMISSION) / 4;
+    // FIXME: Verify NV_IGRAPH_XF_LTCTXA_CM_COL is correct
+    pg->ltctxa[NV_IGRAPH_XF_LTCTXA_CM_COL][slot] = parameter;
+    pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_CM_COL] = true;
+}
+
+DEF_METHOD(NV097, SET_MATERIAL_ALPHA)
+{
+    pg->material_alpha = *(float*)&parameter;
+}
+
+DEF_METHOD(NV097, SET_LIGHT_ENABLE_MASK)
+{
+    PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_LIGHTS, parameter);
+}
+
+static unsigned int kelvin_map_texgen(uint32_t parameter, unsigned int channel)
+{
+    assert(channel < 4);
+    unsigned int texgen;
+    switch (parameter) {
+    case NV097_SET_TEXGEN_S_DISABLE:
+        texgen = NV_PGRAPH_CSV1_A_T0_S_DISABLE; break;
+    case NV097_SET_TEXGEN_S_EYE_LINEAR:
+        texgen = NV_PGRAPH_CSV1_A_T0_S_EYE_LINEAR; break;
+    case NV097_SET_TEXGEN_S_OBJECT_LINEAR:
+        texgen = NV_PGRAPH_CSV1_A_T0_S_OBJECT_LINEAR; break;
+    case NV097_SET_TEXGEN_S_SPHERE_MAP:
+        assert(channel < 2);
+        texgen = NV_PGRAPH_CSV1_A_T0_S_SPHERE_MAP; break;
+    case NV097_SET_TEXGEN_S_REFLECTION_MAP:
+        assert(channel < 3);
+        texgen = NV_PGRAPH_CSV1_A_T0_S_REFLECTION_MAP; break;
+    case NV097_SET_TEXGEN_S_NORMAL_MAP:
+        assert(channel < 3);
+        texgen = NV_PGRAPH_CSV1_A_T0_S_NORMAL_MAP; break;
+    default:
+        assert(false);
+        break;
+    }
+    return texgen;
+}
+
+DEF_METHOD(NV097, SET_TEXGEN_S)
+{
+    int slot = (method - NV097_SET_TEXGEN_S) / 16;
+    unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+                                  : NV_PGRAPH_CSV1_B;
+    unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_S
+                                   : NV_PGRAPH_CSV1_A_T0_S;
+    PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 0));
+}
+
+DEF_METHOD(NV097, SET_TEXGEN_T)
+{
+    int slot = (method - NV097_SET_TEXGEN_T) / 16;
+    unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+                                  : NV_PGRAPH_CSV1_B;
+    unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_T
+                                   : NV_PGRAPH_CSV1_A_T0_T;
+    PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 1));
+}
+
+DEF_METHOD(NV097, SET_TEXGEN_R)
+{
+    int slot = (method - NV097_SET_TEXGEN_R) / 16;
+    unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+                                  : NV_PGRAPH_CSV1_B;
+    unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_R
+                                   : NV_PGRAPH_CSV1_A_T0_R;
+    PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 2));
+}
+
+DEF_METHOD(NV097, SET_TEXGEN_Q)
+{
+    int slot = (method - NV097_SET_TEXGEN_Q) / 16;
+    unsigned int reg = (slot < 2) ? NV_PGRAPH_CSV1_A
+                                  : NV_PGRAPH_CSV1_B;
+    unsigned int mask = (slot % 2) ? NV_PGRAPH_CSV1_A_T1_Q
+                                   : NV_PGRAPH_CSV1_A_T0_Q;
+    PG_SET_MASK(reg, mask, kelvin_map_texgen(parameter, 3));
+}
+
+DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX_ENABLE)
+{
+    int slot = (method - NV097_SET_TEXTURE_MATRIX_ENABLE) / 4;
+    pg->texture_matrix_enable[slot] = parameter;
+}
+
+DEF_METHOD(NV097, SET_POINT_SIZE)
+{
+    PG_SET_MASK(NV_PGRAPH_POINTSIZE, NV097_SET_POINT_SIZE_V, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_PROJECTION_MATRIX)
+{
+    int slot = (method - NV097_SET_PROJECTION_MATRIX) / 4;
+    // pg->projection_matrix[slot] = *(float*)&parameter;
+    unsigned int row = NV_IGRAPH_XF_XFCTX_PMAT0 + slot/4;
+    pg->vsh_constants[row][slot%4] = parameter;
+    pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_MODEL_VIEW_MATRIX)
+{
+    int slot = (method - NV097_SET_MODEL_VIEW_MATRIX) / 4;
+    unsigned int matnum = slot / 16;
+    unsigned int entry = slot % 16;
+    unsigned int row = NV_IGRAPH_XF_XFCTX_MMAT0 + matnum*8 + entry/4;
+    pg->vsh_constants[row][entry % 4] = parameter;
+    pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_INVERSE_MODEL_VIEW_MATRIX)
+{
+    int slot = (method - NV097_SET_INVERSE_MODEL_VIEW_MATRIX) / 4;
+    unsigned int matnum = slot / 16;
+    unsigned int entry = slot % 16;
+    unsigned int row = NV_IGRAPH_XF_XFCTX_IMMAT0 + matnum*8 + entry/4;
+    pg->vsh_constants[row][entry % 4] = parameter;
+    pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_COMPOSITE_MATRIX)
+{
+    int slot = (method - NV097_SET_COMPOSITE_MATRIX) / 4;
+    unsigned int row = NV_IGRAPH_XF_XFCTX_CMAT0 + slot/4;
+    pg->vsh_constants[row][slot%4] = parameter;
+    pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_TEXTURE_MATRIX)
+{
+    int slot = (method - NV097_SET_TEXTURE_MATRIX) / 4;
+    unsigned int tex = slot / 16;
+    unsigned int entry = slot % 16;
+    unsigned int row = NV_IGRAPH_XF_XFCTX_T0MAT + tex*8 + entry/4;
+    pg->vsh_constants[row][entry%4] = parameter;
+    pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_FOG_PARAMS)
+{
+    int slot = (method - NV097_SET_FOG_PARAMS) / 4;
+    if (slot < 2) {
+        pgraph_reg_w(pg, NV_PGRAPH_FOGPARAM0 + slot*4, parameter);
+    } else {
+        /* FIXME: No idea where slot = 2 is */
+    }
+
+    pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FOG_K][slot] = parameter;
+    pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FOG_K] = true;
+}
+
+/* Handles NV097_SET_TEXGEN_PLANE_S,T,R,Q */
+DEF_METHOD_INC(NV097, SET_TEXGEN_PLANE_S)
+{
+    int slot = (method - NV097_SET_TEXGEN_PLANE_S) / 4;
+    unsigned int tex = slot / 16;
+    unsigned int entry = slot % 16;
+    unsigned int row = NV_IGRAPH_XF_XFCTX_TG0MAT + tex*8 + entry/4;
+    pg->vsh_constants[row][entry%4] = parameter;
+    pg->vsh_constants_dirty[row] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXGEN_VIEW_MODEL)
+{
+    PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_TEXGEN_REF,
+             parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_FOG_PLANE)
+{
+    int slot = (method - NV097_SET_FOG_PLANE) / 4;
+    pg->vsh_constants[NV_IGRAPH_XF_XFCTX_FOG][slot] = parameter;
+    pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_FOG] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_SCENE_AMBIENT_COLOR)
+{
+    int slot = (method - NV097_SET_SCENE_AMBIENT_COLOR) / 4;
+    // ??
+    pg->ltctxa[NV_IGRAPH_XF_LTCTXA_FR_AMB][slot] = parameter;
+    pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_FR_AMB] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_VIEWPORT_OFFSET)
+{
+    int slot = (method - NV097_SET_VIEWPORT_OFFSET) / 4;
+    pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][slot] = parameter;
+    pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPOFF] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_POINT_PARAMS)
+{
+    int slot = (method - NV097_SET_POINT_PARAMS) / 4;
+    pg->point_params[slot] = *(float *)&parameter; /* FIXME: Where? */
+}
+
+DEF_METHOD_INC(NV097, SET_EYE_POSITION)
+{
+    int slot = (method - NV097_SET_EYE_POSITION) / 4;
+    pg->vsh_constants[NV_IGRAPH_XF_XFCTX_EYEP][slot] = parameter;
+    pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_EYEP] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR0)
+{
+    int slot = (method - NV097_SET_COMBINER_FACTOR0) / 4;
+    pgraph_reg_w(pg, NV_PGRAPH_COMBINEFACTOR0 + slot*4, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_FACTOR1)
+{
+    int slot = (method - NV097_SET_COMBINER_FACTOR1) / 4;
+    pgraph_reg_w(pg, NV_PGRAPH_COMBINEFACTOR1 + slot*4, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_ALPHA_OCW)
+{
+    int slot = (method - NV097_SET_COMBINER_ALPHA_OCW) / 4;
+    pgraph_reg_w(pg, NV_PGRAPH_COMBINEALPHAO0 + slot*4, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_ICW)
+{
+    int slot = (method - NV097_SET_COMBINER_COLOR_ICW) / 4;
+    pgraph_reg_w(pg, NV_PGRAPH_COMBINECOLORI0 + slot*4, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_VIEWPORT_SCALE)
+{
+    int slot = (method - NV097_SET_VIEWPORT_SCALE) / 4;
+    pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPSCL][slot] = parameter;
+    pg->vsh_constants_dirty[NV_IGRAPH_XF_XFCTX_VPSCL] = true;
+}
+
+DEF_METHOD_INC(NV097, SET_TRANSFORM_PROGRAM)
+{
+    int slot = (method - NV097_SET_TRANSFORM_PROGRAM) / 4;
+
+    int program_load = PG_GET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+                                NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR);
+
+    assert(program_load < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+    pg->program_data[program_load][slot%4] = parameter;
+    pg->program_data_dirty = true;
+
+    if (slot % 4 == 3) {
+        PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+                 NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, program_load+1);
+    }
+}
+
+DEF_METHOD_INC(NV097, SET_TRANSFORM_CONSTANT)
+{
+    int slot = (method - NV097_SET_TRANSFORM_CONSTANT) / 4;
+    int const_load = PG_GET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+                              NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR);
+
+    assert(const_load < NV2A_VERTEXSHADER_CONSTANTS);
+    // VertexShaderConstant *constant = &pg->constants[const_load];
+    pg->vsh_constants_dirty[const_load] |=
+        (parameter != pg->vsh_constants[const_load][slot%4]);
+    pg->vsh_constants[const_load][slot%4] = parameter;
+
+    if (slot % 4 == 3) {
+        PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+                 NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, const_load+1);
+    }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX3F)
+{
+    int slot = (method - NV097_SET_VERTEX3F) / 4;
+    VertexAttribute *attribute =
+        &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION];
+    pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION);
+    attribute->inline_value[slot] = *(float*)&parameter;
+    attribute->inline_value[3] = 1.0f;
+    if (slot == 2) {
+        pgraph_finish_inline_buffer_vertex(pg);
+    }
+}
+
+/* Handles NV097_SET_BACK_LIGHT_* */
+DEF_METHOD_INC(NV097, SET_BACK_LIGHT_AMBIENT_COLOR)
+{
+    int slot = (method - NV097_SET_BACK_LIGHT_AMBIENT_COLOR) / 4;
+    unsigned int part = NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4 + slot % 16;
+    slot /= 16; /* [Light index] */
+    assert(slot < 8);
+    switch(part * 4) {
+    case NV097_SET_BACK_LIGHT_AMBIENT_COLOR ...
+            NV097_SET_BACK_LIGHT_AMBIENT_COLOR + 8:
+        part -= NV097_SET_BACK_LIGHT_AMBIENT_COLOR / 4;
+        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6][part] = parameter;
+        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BAMB + slot*6] = true;
+        break;
+    case NV097_SET_BACK_LIGHT_DIFFUSE_COLOR ...
+            NV097_SET_BACK_LIGHT_DIFFUSE_COLOR + 8:
+        part -= NV097_SET_BACK_LIGHT_DIFFUSE_COLOR / 4;
+        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6][part] = parameter;
+        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BDIF + slot*6] = true;
+        break;
+    case NV097_SET_BACK_LIGHT_SPECULAR_COLOR ...
+            NV097_SET_BACK_LIGHT_SPECULAR_COLOR + 8:
+        part -= NV097_SET_BACK_LIGHT_SPECULAR_COLOR / 4;
+        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6][part] = parameter;
+        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_BSPC + slot*6] = true;
+        break;
+    default:
+        assert(false);
+        break;
+    }
+}
+
+/* Handles all the light source props except for NV097_SET_BACK_LIGHT_* */
+DEF_METHOD_INC(NV097, SET_LIGHT_AMBIENT_COLOR)
+{
+    int slot = (method - NV097_SET_LIGHT_AMBIENT_COLOR) / 4;
+    unsigned int part = NV097_SET_LIGHT_AMBIENT_COLOR / 4 + slot % 32;
+    slot /= 32; /* [Light index] */
+    assert(slot < 8);
+    switch(part * 4) {
+    case NV097_SET_LIGHT_AMBIENT_COLOR ...
+            NV097_SET_LIGHT_AMBIENT_COLOR + 8:
+        part -= NV097_SET_LIGHT_AMBIENT_COLOR / 4;
+        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6][part] = parameter;
+        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_AMB + slot*6] = true;
+        break;
+    case NV097_SET_LIGHT_DIFFUSE_COLOR ...
+           NV097_SET_LIGHT_DIFFUSE_COLOR + 8:
+        part -= NV097_SET_LIGHT_DIFFUSE_COLOR / 4;
+        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6][part] = parameter;
+        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_DIF + slot*6] = true;
+        break;
+    case NV097_SET_LIGHT_SPECULAR_COLOR ...
+            NV097_SET_LIGHT_SPECULAR_COLOR + 8:
+        part -= NV097_SET_LIGHT_SPECULAR_COLOR / 4;
+        pg->ltctxb[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6][part] = parameter;
+        pg->ltctxb_dirty[NV_IGRAPH_XF_LTCTXB_L0_SPC + slot*6] = true;
+        break;
+    case NV097_SET_LIGHT_LOCAL_RANGE:
+        pg->ltc1[NV_IGRAPH_XF_LTC1_r0 + slot][0] = parameter;
+        pg->ltc1_dirty[NV_IGRAPH_XF_LTC1_r0 + slot] = true;
+        break;
+    case NV097_SET_LIGHT_INFINITE_HALF_VECTOR ...
+            NV097_SET_LIGHT_INFINITE_HALF_VECTOR + 8:
+        part -= NV097_SET_LIGHT_INFINITE_HALF_VECTOR / 4;
+        pg->light_infinite_half_vector[slot][part] = *(float*)&parameter;
+        break;
+    case NV097_SET_LIGHT_INFINITE_DIRECTION ...
+            NV097_SET_LIGHT_INFINITE_DIRECTION + 8:
+        part -= NV097_SET_LIGHT_INFINITE_DIRECTION / 4;
+        pg->light_infinite_direction[slot][part] = *(float*)&parameter;
+        break;
+    case NV097_SET_LIGHT_SPOT_FALLOFF ...
+            NV097_SET_LIGHT_SPOT_FALLOFF + 8:
+        part -= NV097_SET_LIGHT_SPOT_FALLOFF / 4;
+        pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2][part] = parameter;
+        pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_K + slot*2] = true;
+        break;
+    case NV097_SET_LIGHT_SPOT_DIRECTION ...
+            NV097_SET_LIGHT_SPOT_DIRECTION + 12:
+        part -= NV097_SET_LIGHT_SPOT_DIRECTION / 4;
+        pg->ltctxa[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2][part] = parameter;
+        pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_L0_SPT + slot*2] = true;
+        break;
+    case NV097_SET_LIGHT_LOCAL_POSITION ...
+            NV097_SET_LIGHT_LOCAL_POSITION + 8:
+        part -= NV097_SET_LIGHT_LOCAL_POSITION / 4;
+        pg->light_local_position[slot][part] = *(float*)&parameter;
+        break;
+    case NV097_SET_LIGHT_LOCAL_ATTENUATION ...
+            NV097_SET_LIGHT_LOCAL_ATTENUATION + 8:
+        part -= NV097_SET_LIGHT_LOCAL_ATTENUATION / 4;
+        pg->light_local_attenuation[slot][part] = *(float*)&parameter;
+        break;
+    default:
+        assert(false);
+        break;
+    }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX4F)
+{
+    int slot = (method - NV097_SET_VERTEX4F) / 4;
+    VertexAttribute *attribute =
+        &pg->vertex_attributes[NV2A_VERTEX_ATTR_POSITION];
+    pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_POSITION);
+    attribute->inline_value[slot] = *(float*)&parameter;
+    if (slot == 3) {
+        pgraph_finish_inline_buffer_vertex(pg);
+    }
+}
+
+DEF_METHOD_INC(NV097, SET_NORMAL3S)
+{
+    int slot = (method - NV097_SET_NORMAL3S) / 4;
+    unsigned int part = slot % 2;
+    VertexAttribute *attribute =
+        &pg->vertex_attributes[NV2A_VERTEX_ATTR_NORMAL];
+    pgraph_allocate_inline_buffer_vertices(pg, NV2A_VERTEX_ATTR_NORMAL);
+    int16_t val = parameter & 0xFFFF;
+    attribute->inline_value[part * 2 + 0] = MAX(-1.0f, (float)val / 32767.0f);
+    val = parameter >> 16;
+    attribute->inline_value[part * 2 + 1] = MAX(-1.0f, (float)val / 32767.0f);
+}
+
+#define SET_VERTEX_ATTRIBUTE_4S(command, attr_index)                     \
+    do {                                                                   \
+        int slot = (method - (command)) / 4;                               \
+        unsigned int part = slot % 2;                                      \
+        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
+        attribute->inline_value[part * 2 + 0] =                            \
+            (float)(int16_t)(parameter & 0xFFFF);                          \
+        attribute->inline_value[part * 2 + 1] =                            \
+            (float)(int16_t)(parameter >> 16);                             \
+    } while (0)
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD0_4S)
+{
+    SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD0_4S, NV2A_VERTEX_ATTR_TEXTURE0);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD1_4S)
+{
+    SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD1_4S, NV2A_VERTEX_ATTR_TEXTURE1);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD2_4S)
+{
+    SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD2_4S, NV2A_VERTEX_ATTR_TEXTURE2);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD3_4S)
+{
+    SET_VERTEX_ATTRIBUTE_4S(NV097_SET_TEXCOORD3_4S, NV2A_VERTEX_ATTR_TEXTURE3);
+}
+
+#undef SET_VERTEX_ATTRIBUTE_4S
+
+#define SET_VERTEX_ATRIBUTE_TEX_2S(attr_index)                             \
+    do {                                                                   \
+        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
+        attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF); \
+        attribute->inline_value[1] = (float)(int16_t)(parameter >> 16);    \
+        attribute->inline_value[2] = 0.0f;                                 \
+        attribute->inline_value[3] = 1.0f;                                 \
+    } while (0)
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD0_2S)
+{
+    SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE0);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD1_2S)
+{
+    SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE1);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD2_2S)
+{
+    SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE2);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD3_2S)
+{
+    SET_VERTEX_ATRIBUTE_TEX_2S(NV2A_VERTEX_ATTR_TEXTURE3);
+}
+
+#undef SET_VERTEX_ATRIBUTE_TEX_2S
+
+#define SET_VERTEX_COLOR_3F(command, attr_index)                           \
+    do {                                                                   \
+        int slot = (method - (command)) / 4;                               \
+        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
+        attribute->inline_value[slot] = *(float*)&parameter;               \
+        attribute->inline_value[3] = 1.0f;                                 \
+    } while (0)
+
+DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR3F)
+{
+    SET_VERTEX_COLOR_3F(NV097_SET_DIFFUSE_COLOR3F, NV2A_VERTEX_ATTR_DIFFUSE);
+}
+
+DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR3F)
+{
+    SET_VERTEX_COLOR_3F(NV097_SET_SPECULAR_COLOR3F, NV2A_VERTEX_ATTR_SPECULAR);
+}
+
+#undef SET_VERTEX_COLOR_3F
+
+#define SET_VERTEX_ATTRIBUTE_F(command, attr_index)                        \
+    do {                                                                   \
+        int slot = (method - (command)) / 4;                               \
+        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
+        attribute->inline_value[slot] = *(float*)&parameter;               \
+    } while (0)
+
+DEF_METHOD_INC(NV097, SET_NORMAL3F)
+{
+    SET_VERTEX_ATTRIBUTE_F(NV097_SET_NORMAL3F, NV2A_VERTEX_ATTR_NORMAL);
+}
+
+DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4F)
+{
+    SET_VERTEX_ATTRIBUTE_F(NV097_SET_DIFFUSE_COLOR4F, NV2A_VERTEX_ATTR_DIFFUSE);
+}
+
+DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4F)
+{
+    SET_VERTEX_ATTRIBUTE_F(NV097_SET_SPECULAR_COLOR4F,
+                           NV2A_VERTEX_ATTR_SPECULAR);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD0_4F)
+{
+    SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD0_4F, NV2A_VERTEX_ATTR_TEXTURE0);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD1_4F)
+{
+    SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD1_4F, NV2A_VERTEX_ATTR_TEXTURE1);
+}
+
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD2_4F)
+{
+    SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD2_4F, NV2A_VERTEX_ATTR_TEXTURE2);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD3_4F)
+{
+    SET_VERTEX_ATTRIBUTE_F(NV097_SET_TEXCOORD3_4F, NV2A_VERTEX_ATTR_TEXTURE3);
+}
+
+#undef SET_VERTEX_ATTRIBUTE_F
+
+#define SET_VERTEX_ATRIBUTE_TEX_2F(command, attr_index)                    \
+    do {                                                                   \
+        int slot = (method - (command)) / 4;                               \
+        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
+        attribute->inline_value[slot] = *(float*)&parameter;               \
+        attribute->inline_value[2] = 0.0f;                                 \
+        attribute->inline_value[3] = 1.0f;                                 \
+    } while (0)
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD0_2F)
+{
+    SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD0_2F,
+                               NV2A_VERTEX_ATTR_TEXTURE0);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD1_2F)
+{
+    SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD1_2F,
+                               NV2A_VERTEX_ATTR_TEXTURE1);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD2_2F)
+{
+    SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD2_2F,
+                               NV2A_VERTEX_ATTR_TEXTURE2);
+}
+
+DEF_METHOD_INC(NV097, SET_TEXCOORD3_2F)
+{
+    SET_VERTEX_ATRIBUTE_TEX_2F(NV097_SET_TEXCOORD3_2F,
+                               NV2A_VERTEX_ATTR_TEXTURE3);
+}
+
+#undef SET_VERTEX_ATRIBUTE_TEX_2F
+
+#define SET_VERTEX_ATTRIBUTE_4UB(command, attr_index)                       \
+    do {                                                                   \
+        VertexAttribute *attribute = &pg->vertex_attributes[(attr_index)]; \
+        pgraph_allocate_inline_buffer_vertices(pg, (attr_index));          \
+        attribute->inline_value[0] = (parameter & 0xFF) / 255.0f;          \
+        attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0f;   \
+        attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0f;  \
+        attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0f;  \
+    } while (0)
+
+DEF_METHOD_INC(NV097, SET_DIFFUSE_COLOR4UB)
+{
+    SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_DIFFUSE_COLOR4UB,
+                             NV2A_VERTEX_ATTR_DIFFUSE);
+}
+
+DEF_METHOD_INC(NV097, SET_SPECULAR_COLOR4UB)
+{
+    SET_VERTEX_ATTRIBUTE_4UB(NV097_SET_SPECULAR_COLOR4UB,
+                             NV2A_VERTEX_ATTR_SPECULAR);
+}
+
+#undef SET_VERTEX_ATTRIBUTE_4UB
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_FORMAT)
+{
+    int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_FORMAT) / 4;
+    VertexAttribute *attr = &pg->vertex_attributes[slot];
+    attr->format = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE);
+    attr->count = GET_MASK(parameter, NV097_SET_VERTEX_DATA_ARRAY_FORMAT_SIZE);
+    attr->stride = GET_MASK(parameter,
+                            NV097_SET_VERTEX_DATA_ARRAY_FORMAT_STRIDE);
+
+    NV2A_DPRINTF("vertex data array format=%d, count=%d, stride=%d\n",
+                 attr->format, attr->count, attr->stride);
+
+    switch (attr->format) {
+    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
+        attr->size = 1;
+        assert(attr->count == 4);
+        break;
+    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
+        attr->size = 1;
+        break;
+    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
+        attr->size = 2;
+        break;
+    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+        attr->size = 4;
+        break;
+    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
+        attr->size = 2;
+        break;
+    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
+        /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
+        attr->size = 4;
+        assert(attr->count == 1);
+        break;
+    default:
+        fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
+        assert(false);
+        break;
+    }
+
+    if (attr->format == NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP) {
+        pg->compressed_attrs |= (1 << slot);
+    } else {
+        pg->compressed_attrs &= ~(1 << slot);
+    }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA_ARRAY_OFFSET)
+{
+    int slot = (method - NV097_SET_VERTEX_DATA_ARRAY_OFFSET) / 4;
+
+    pg->vertex_attributes[slot].dma_select = parameter & 0x80000000;
+    pg->vertex_attributes[slot].offset = parameter & 0x7fffffff;
+}
+
+DEF_METHOD(NV097, SET_LOGIC_OP_ENABLE)
+{
+    PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_LOGICOP_ENABLE,
+             parameter);
+}
+
+DEF_METHOD(NV097, SET_LOGIC_OP)
+{
+    PG_SET_MASK(NV_PGRAPH_BLEND, NV_PGRAPH_BLEND_LOGICOP,
+             parameter & 0xF);
+}
+
+DEF_METHOD(NV097, CLEAR_REPORT_VALUE)
+{
+    d->pgraph.renderer->ops.clear_report_value(d);
+}
+
+DEF_METHOD(NV097, SET_ZPASS_PIXEL_COUNT_ENABLE)
+{
+    pg->zpass_pixel_count_enable = parameter;
+}
+
+DEF_METHOD(NV097, GET_REPORT)
+{
+    uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE);
+    assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
+
+    d->pgraph.renderer->ops.get_report(d, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_EYE_DIRECTION)
+{
+    int slot = (method - NV097_SET_EYE_DIRECTION) / 4;
+    pg->ltctxa[NV_IGRAPH_XF_LTCTXA_EYED][slot] = parameter;
+    pg->ltctxa_dirty[NV_IGRAPH_XF_LTCTXA_EYED] = true;
+}
+
+DEF_METHOD(NV097, SET_BEGIN_END)
+{
+    if (parameter == NV097_SET_BEGIN_END_OP_END) {
+        if (pg->primitive_mode == PRIM_TYPE_INVALID) {
+            NV2A_DPRINTF("End without Begin!\n");
+        }
+        nv2a_profile_inc_counter(NV2A_PROF_BEGIN_ENDS);
+        d->pgraph.renderer->ops.draw_end(d);
+        pgraph_reset_inline_buffers(pg);
+        pg->primitive_mode = PRIM_TYPE_INVALID;
+    } else {
+        if (pg->primitive_mode != PRIM_TYPE_INVALID) {
+            NV2A_DPRINTF("Begin without End!\n");
+        }
+        assert(parameter <= NV097_SET_BEGIN_END_OP_POLYGON);
+        pg->primitive_mode = parameter;
+        pgraph_reset_inline_buffers(pg);
+        d->pgraph.renderer->ops.draw_begin(d);
+    }
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_OFFSET)
+{
+    int slot = (method - NV097_SET_TEXTURE_OFFSET) / 64;
+    pgraph_reg_w(pg, NV_PGRAPH_TEXOFFSET0 + slot * 4, parameter);
+    pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_FORMAT)
+{
+    int slot = (method - NV097_SET_TEXTURE_FORMAT) / 64;
+
+    bool dma_select =
+        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CONTEXT_DMA) == 2;
+    bool cubemap =
+        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_CUBEMAP_ENABLE);
+    unsigned int border_source =
+        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BORDER_SOURCE);
+    unsigned int dimensionality =
+        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_DIMENSIONALITY);
+    unsigned int color_format =
+        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_COLOR);
+    unsigned int levels =
+        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_MIPMAP_LEVELS);
+    unsigned int log_width =
+        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_U);
+    unsigned int log_height =
+        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_V);
+    unsigned int log_depth =
+        GET_MASK(parameter, NV097_SET_TEXTURE_FORMAT_BASE_SIZE_P);
+
+    unsigned int reg = NV_PGRAPH_TEXFMT0 + slot * 4;
+    PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_CONTEXT_DMA, dma_select);
+    PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE, cubemap);
+    PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BORDER_SOURCE, border_source);
+    PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_DIMENSIONALITY, dimensionality);
+    PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_COLOR, color_format);
+    PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS, levels);
+    PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_U, log_width);
+    PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_V, log_height);
+    PG_SET_MASK(reg, NV_PGRAPH_TEXFMT0_BASE_SIZE_P, log_depth);
+
+    pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_CONTROL0)
+{
+    int slot = (method - NV097_SET_TEXTURE_CONTROL0) / 64;
+    pgraph_reg_w(pg, NV_PGRAPH_TEXCTL0_0 + slot*4, parameter);
+    pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_CONTROL1)
+{
+    int slot = (method - NV097_SET_TEXTURE_CONTROL1) / 64;
+    pgraph_reg_w(pg, NV_PGRAPH_TEXCTL1_0 + slot*4, parameter);
+    pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_FILTER)
+{
+    int slot = (method - NV097_SET_TEXTURE_FILTER) / 64;
+    pgraph_reg_w(pg, NV_PGRAPH_TEXFILTER0 + slot * 4, parameter);
+    pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_IMAGE_RECT)
+{
+    int slot = (method - NV097_SET_TEXTURE_IMAGE_RECT) / 64;
+    pgraph_reg_w(pg, NV_PGRAPH_TEXIMAGERECT0 + slot * 4, parameter);
+    pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_PALETTE)
+{
+    int slot = (method - NV097_SET_TEXTURE_PALETTE) / 64;
+
+    bool dma_select =
+        GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_CONTEXT_DMA) == 1;
+    unsigned int length =
+        GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_LENGTH);
+    unsigned int offset =
+        GET_MASK(parameter, NV097_SET_TEXTURE_PALETTE_OFFSET);
+
+    unsigned int reg = NV_PGRAPH_TEXPALETTE0 + slot * 4;
+    PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA, dma_select);
+    PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_LENGTH, length);
+    PG_SET_MASK(reg, NV_PGRAPH_TEXPALETTE0_OFFSET, offset);
+
+    pg->texture_dirty[slot] = true;
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_BORDER_COLOR)
+{
+    int slot = (method - NV097_SET_TEXTURE_BORDER_COLOR) / 64;
+    pgraph_reg_w(pg, NV_PGRAPH_BORDERCOLOR0 + slot * 4, parameter);
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_MAT)
+{
+    int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_MAT) / 4;
+    if (slot < 16) {
+        /* discard */
+        return;
+    }
+
+    slot -= 16;
+    const int swizzle[4] = { NV_PGRAPH_BUMPMAT00, NV_PGRAPH_BUMPMAT01,
+                             NV_PGRAPH_BUMPMAT11, NV_PGRAPH_BUMPMAT10 };
+    pgraph_reg_w(pg, swizzle[slot % 4] + slot / 4, parameter);
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_SCALE)
+{
+    int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_SCALE) / 64;
+    if (slot == 0) {
+        /* discard */
+        return;
+    }
+
+    slot--;
+    pgraph_reg_w(pg, NV_PGRAPH_BUMPSCALE1 + slot * 4, parameter);
+}
+
+DEF_METHOD(NV097, SET_TEXTURE_SET_BUMP_ENV_OFFSET)
+{
+    int slot = (method - NV097_SET_TEXTURE_SET_BUMP_ENV_OFFSET) / 64;
+    if (slot == 0) {
+        /* discard */
+        return;
+    }
+
+    slot--;
+    pgraph_reg_w(pg, NV_PGRAPH_BUMPOFFSET1 + slot * 4, parameter);
+}
+
+static void pgraph_expand_draw_arrays(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    uint32_t start = pg->draw_arrays_start[pg->draw_arrays_length - 1];
+    uint32_t count = pg->draw_arrays_count[pg->draw_arrays_length - 1];
+
+    /* Render any previously squashed DRAW_ARRAYS calls. This case would be
+     * triggered if a set of BEGIN+DA+END triplets is followed by the
+     * BEGIN+DA+ARRAY_ELEMENT+... chain that caused this expansion. */
+    if (pg->draw_arrays_length > 1) {
+        d->pgraph.renderer->ops.flush_draw(d);
+        pgraph_reset_inline_buffers(pg);
+    }
+    assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH);
+    for (unsigned int i = 0; i < count; i++) {
+        pg->inline_elements[pg->inline_elements_length++] = start + i;
+    }
+
+    pgraph_reset_draw_arrays(pg);
+}
+
+void pgraph_check_within_begin_end_block(PGRAPHState *pg)
+{
+    if (pg->primitive_mode == PRIM_TYPE_INVALID) {
+        NV2A_DPRINTF("Vertex data being sent outside of begin/end block!\n");
+    }
+}
+
+DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT16)
+{
+    pgraph_check_within_begin_end_block(pg);
+
+    if (pg->draw_arrays_length) {
+        pgraph_expand_draw_arrays(d);
+    }
+
+    assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
+    pg->inline_elements[pg->inline_elements_length++] = parameter & 0xFFFF;
+    pg->inline_elements[pg->inline_elements_length++] = parameter >> 16;
+}
+
+DEF_METHOD_NON_INC(NV097, ARRAY_ELEMENT32)
+{
+    pgraph_check_within_begin_end_block(pg);
+
+    if (pg->draw_arrays_length) {
+        pgraph_expand_draw_arrays(d);
+    }
+
+    assert(pg->inline_elements_length < NV2A_MAX_BATCH_LENGTH);
+    pg->inline_elements[pg->inline_elements_length++] = parameter;
+}
+
+DEF_METHOD(NV097, DRAW_ARRAYS)
+{
+    pgraph_check_within_begin_end_block(pg);
+
+    int32_t start = GET_MASK(parameter, NV097_DRAW_ARRAYS_START_INDEX);
+    int32_t count = GET_MASK(parameter, NV097_DRAW_ARRAYS_COUNT) + 1;
+
+    if (pg->inline_elements_length) {
+        /* FIXME: Determine HW behavior for overflow case. */
+        assert((pg->inline_elements_length + count) < NV2A_MAX_BATCH_LENGTH);
+        assert(!pg->draw_arrays_prevent_connect);
+
+        for (unsigned int i = 0; i < count; i++) {
+            pg->inline_elements[pg->inline_elements_length++] = start + i;
+        }
+        return;
+    }
+
+    pg->draw_arrays_min_start = MIN(pg->draw_arrays_min_start, start);
+    pg->draw_arrays_max_count = MAX(pg->draw_arrays_max_count, start + count);
+
+    assert(pg->draw_arrays_length < ARRAY_SIZE(pg->draw_arrays_start));
+
+    /* Attempt to connect contiguous primitives */
+    if (!pg->draw_arrays_prevent_connect && pg->draw_arrays_length > 0) {
+        unsigned int last_start =
+            pg->draw_arrays_start[pg->draw_arrays_length - 1];
+        int32_t *last_count =
+            &pg->draw_arrays_count[pg->draw_arrays_length - 1];
+        if (start == (last_start + *last_count)) {
+            *last_count += count;
+            return;
+        }
+    }
+
+    pg->draw_arrays_start[pg->draw_arrays_length] = start;
+    pg->draw_arrays_count[pg->draw_arrays_length] = count;
+    pg->draw_arrays_length++;
+    pg->draw_arrays_prevent_connect = false;
+}
+
+DEF_METHOD_NON_INC(NV097, INLINE_ARRAY)
+{
+    pgraph_check_within_begin_end_block(pg);
+    assert(pg->inline_array_length < NV2A_MAX_BATCH_LENGTH);
+    pg->inline_array[pg->inline_array_length++] = parameter;
+}
+
+DEF_METHOD_INC(NV097, SET_EYE_VECTOR)
+{
+    int slot = (method - NV097_SET_EYE_VECTOR) / 4;
+    pgraph_reg_w(pg, NV_PGRAPH_EYEVEC0 + slot * 4, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA2F_M)
+{
+    int slot = (method - NV097_SET_VERTEX_DATA2F_M) / 4;
+    unsigned int part = slot % 2;
+    slot /= 2;
+    VertexAttribute *attribute = &pg->vertex_attributes[slot];
+    pgraph_allocate_inline_buffer_vertices(pg, slot);
+    attribute->inline_value[part] = *(float*)&parameter;
+    /* FIXME: Should these really be set to 0.0 and 1.0 ? Conditions? */
+    attribute->inline_value[2] = 0.0;
+    attribute->inline_value[3] = 1.0;
+    if ((slot == 0) && (part == 1)) {
+        pgraph_finish_inline_buffer_vertex(pg);
+    }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA4F_M)
+{
+    int slot = (method - NV097_SET_VERTEX_DATA4F_M) / 4;
+    unsigned int part = slot % 4;
+    slot /= 4;
+    VertexAttribute *attribute = &pg->vertex_attributes[slot];
+    pgraph_allocate_inline_buffer_vertices(pg, slot);
+    attribute->inline_value[part] = *(float*)&parameter;
+    if ((slot == 0) && (part == 3)) {
+        pgraph_finish_inline_buffer_vertex(pg);
+    }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA2S)
+{
+    int slot = (method - NV097_SET_VERTEX_DATA2S) / 4;
+    VertexAttribute *attribute = &pg->vertex_attributes[slot];
+    pgraph_allocate_inline_buffer_vertices(pg, slot);
+    attribute->inline_value[0] = (float)(int16_t)(parameter & 0xFFFF);
+    attribute->inline_value[1] = (float)(int16_t)(parameter >> 16);
+    attribute->inline_value[2] = 0.0;
+    attribute->inline_value[3] = 1.0;
+    if (slot == 0) {
+        pgraph_finish_inline_buffer_vertex(pg);
+    }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA4UB)
+{
+    int slot = (method - NV097_SET_VERTEX_DATA4UB) / 4;
+    VertexAttribute *attribute = &pg->vertex_attributes[slot];
+    pgraph_allocate_inline_buffer_vertices(pg, slot);
+    attribute->inline_value[0] = (parameter & 0xFF) / 255.0;
+    attribute->inline_value[1] = ((parameter >> 8) & 0xFF) / 255.0;
+    attribute->inline_value[2] = ((parameter >> 16) & 0xFF) / 255.0;
+    attribute->inline_value[3] = ((parameter >> 24) & 0xFF) / 255.0;
+    if (slot == 0) {
+        pgraph_finish_inline_buffer_vertex(pg);
+    }
+}
+
+DEF_METHOD_INC(NV097, SET_VERTEX_DATA4S_M)
+{
+    int slot = (method - NV097_SET_VERTEX_DATA4S_M) / 4;
+    unsigned int part = slot % 2;
+    slot /= 2;
+    VertexAttribute *attribute = &pg->vertex_attributes[slot];
+    pgraph_allocate_inline_buffer_vertices(pg, slot);
+
+    attribute->inline_value[part * 2 + 0] = (float)(int16_t)(parameter & 0xFFFF);
+    attribute->inline_value[part * 2 + 1] = (float)(int16_t)(parameter >> 16);
+    if ((slot == 0) && (part == 1)) {
+        pgraph_finish_inline_buffer_vertex(pg);
+    }
+}
+
+DEF_METHOD(NV097, SET_SEMAPHORE_OFFSET)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_SEMAPHOREOFFSET, parameter);
+}
+
+DEF_METHOD(NV097, BACK_END_WRITE_SEMAPHORE_RELEASE)
+{
+    d->pgraph.renderer->ops.surface_update(d, false, true, true);
+
+    //qemu_mutex_unlock(&d->pgraph.lock);
+    //qemu_mutex_lock_iothread();
+
+    uint32_t semaphore_offset = pgraph_reg_r(pg, NV_PGRAPH_SEMAPHOREOFFSET);
+
+    hwaddr semaphore_dma_len;
+    uint8_t *semaphore_data = (uint8_t*)nv_dma_map(d, pg->dma_semaphore,
+                                                   &semaphore_dma_len);
+    assert(semaphore_offset < semaphore_dma_len);
+    semaphore_data += semaphore_offset;
+
+    stl_le_p((uint32_t*)semaphore_data, parameter);
+
+    //qemu_mutex_lock(&d->pgraph.lock);
+    //qemu_mutex_unlock_iothread();
+}
+
+DEF_METHOD(NV097, SET_ZMIN_MAX_CONTROL)
+{
+    switch (GET_MASK(parameter, NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN)) {
+    case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CULL:
+        PG_SET_MASK(NV_PGRAPH_ZCOMPRESSOCCLUDE,
+                 NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN,
+                 NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CULL);
+        break;
+    case NV097_SET_ZMIN_MAX_CONTROL_ZCLAMP_EN_CLAMP:
+        PG_SET_MASK(NV_PGRAPH_ZCOMPRESSOCCLUDE,
+                 NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN,
+                 NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP);
+        break;
+    default:
+        /* FIXME: Should raise NV_PGRAPH_NSOURCE_DATA_ERROR_PENDING */
+        assert(!"Invalid zclamp value");
+        break;
+    }
+}
+
+DEF_METHOD(NV097, SET_ANTI_ALIASING_CONTROL)
+{
+    PG_SET_MASK(NV_PGRAPH_ANTIALIASING, NV_PGRAPH_ANTIALIASING_ENABLE,
+             GET_MASK(parameter, NV097_SET_ANTI_ALIASING_CONTROL_ENABLE));
+    // FIXME: Handle the remaining bits (observed values 0xFFFF0000, 0xFFFF0001)
+}
+
+DEF_METHOD(NV097, SET_ZSTENCIL_CLEAR_VALUE)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_ZSTENCILCLEARVALUE, parameter);
+}
+
+DEF_METHOD(NV097, SET_COLOR_CLEAR_VALUE)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_COLORCLEARVALUE, parameter);
+}
+
+DEF_METHOD(NV097, CLEAR_SURFACE)
+{
+    d->pgraph.renderer->ops.clear_surface(d, parameter);
+}
+
+DEF_METHOD(NV097, SET_CLEAR_RECT_HORIZONTAL)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_CLEARRECTX, parameter);
+}
+
+DEF_METHOD(NV097, SET_CLEAR_RECT_VERTICAL)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_CLEARRECTY, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_SPECULAR_FOG_FACTOR)
+{
+    int slot = (method - NV097_SET_SPECULAR_FOG_FACTOR) / 4;
+    pgraph_reg_w(pg, NV_PGRAPH_SPECFOGFACTOR0 + slot*4, parameter);
+}
+
+DEF_METHOD(NV097, SET_SHADER_CLIP_PLANE_MODE)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_SHADERCLIPMODE, parameter);
+}
+
+DEF_METHOD_INC(NV097, SET_COMBINER_COLOR_OCW)
+{
+    int slot = (method - NV097_SET_COMBINER_COLOR_OCW) / 4;
+    pgraph_reg_w(pg, NV_PGRAPH_COMBINECOLORO0 + slot*4, parameter);
+}
+
+DEF_METHOD(NV097, SET_COMBINER_CONTROL)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_COMBINECTL, parameter);
+}
+
+DEF_METHOD(NV097, SET_SHADOW_ZSLOPE_THRESHOLD)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_SHADOWZSLOPETHRESHOLD, parameter);
+    assert(parameter == 0x7F800000); /* FIXME: Unimplemented */
+}
+
+DEF_METHOD(NV097, SET_SHADOW_DEPTH_FUNC)
+{
+    PG_SET_MASK(NV_PGRAPH_SHADOWCTL, NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC,
+             parameter);
+}
+
+DEF_METHOD(NV097, SET_SHADER_STAGE_PROGRAM)
+{
+    pgraph_reg_w(pg, NV_PGRAPH_SHADERPROG, parameter);
+}
+
+DEF_METHOD(NV097, SET_DOT_RGBMAPPING)
+{
+    PG_SET_MASK(NV_PGRAPH_SHADERCTL, 0xFFF,
+             GET_MASK(parameter, 0xFFF));
+}
+
+DEF_METHOD(NV097, SET_SHADER_OTHER_STAGE_INPUT)
+{
+    PG_SET_MASK(NV_PGRAPH_SHADERCTL, 0xFFFF000,
+             GET_MASK(parameter, 0xFFFF000));
+}
+
+DEF_METHOD_INC(NV097, SET_TRANSFORM_DATA)
+{
+    int slot = (method - NV097_SET_TRANSFORM_DATA) / 4;
+    pg->vertex_state_shader_v0[slot] = parameter;
+}
+
+DEF_METHOD(NV097, LAUNCH_TRANSFORM_PROGRAM)
+{
+    unsigned int program_start = parameter;
+    assert(program_start < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+    Nv2aVshProgram program;
+    Nv2aVshParseResult result = nv2a_vsh_parse_program(
+            &program,
+            pg->program_data[program_start],
+            NV2A_MAX_TRANSFORM_PROGRAM_LENGTH - program_start);
+    assert(result == NV2AVPR_SUCCESS);
+
+    Nv2aVshCPUXVSSExecutionState state_linkage;
+    Nv2aVshExecutionState state = nv2a_vsh_emu_initialize_xss_execution_state(
+            &state_linkage, (float*)pg->vsh_constants);
+    memcpy(state_linkage.input_regs, pg->vertex_state_shader_v0, sizeof(pg->vertex_state_shader_v0));
+
+    nv2a_vsh_emu_execute_track_context_writes(&state, &program, pg->vsh_constants_dirty);
+
+    nv2a_vsh_program_destroy(&program);
+}
+
+DEF_METHOD(NV097, SET_TRANSFORM_EXECUTION_MODE)
+{
+    PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_MODE,
+             GET_MASK(parameter,
+                      NV097_SET_TRANSFORM_EXECUTION_MODE_MODE));
+    PG_SET_MASK(NV_PGRAPH_CSV0_D, NV_PGRAPH_CSV0_D_RANGE_MODE,
+             GET_MASK(parameter,
+                      NV097_SET_TRANSFORM_EXECUTION_MODE_RANGE_MODE));
+}
+
+DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_CXT_WRITE_EN)
+{
+    pg->enable_vertex_program_write = parameter;
+}
+
+DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_LOAD)
+{
+    assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+    PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+             NV_PGRAPH_CHEOPS_OFFSET_PROG_LD_PTR, parameter);
+}
+
+DEF_METHOD(NV097, SET_TRANSFORM_PROGRAM_START)
+{
+    assert(parameter < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH);
+    PG_SET_MASK(NV_PGRAPH_CSV0_C,
+             NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START, parameter);
+}
+
+DEF_METHOD(NV097, SET_TRANSFORM_CONSTANT_LOAD)
+{
+    assert(parameter < NV2A_VERTEXSHADER_CONSTANTS);
+    PG_SET_MASK(NV_PGRAPH_CHEOPS_OFFSET,
+             NV_PGRAPH_CHEOPS_OFFSET_CONST_LD_PTR, parameter);
+}
+
+void pgraph_get_clear_color(PGRAPHState *pg, float rgba[4])
+{
+    uint32_t clear_color = pgraph_reg_r(pg, NV_PGRAPH_COLORCLEARVALUE);
+
+    float *r = &rgba[0], *g = &rgba[1], *b = &rgba[2], *a = &rgba[3];
+
+    /* Handle RGB */
+    switch(pg->surface_shape.color_format) {
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5:
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_O1R5G5B5:
+        *r = ((clear_color >> 10) & 0x1F) / 31.0f;
+        *g = ((clear_color >> 5) & 0x1F) / 31.0f;
+        *b = (clear_color & 0x1F) / 31.0f;
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5:
+        *r = ((clear_color >> 11) & 0x1F) / 31.0f;
+        *g = ((clear_color >> 5) & 0x3F) / 63.0f;
+        *b = (clear_color & 0x1F) / 31.0f;
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8:
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_O8R8G8B8:
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8:
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8:
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8:
+        *r = ((clear_color >> 16) & 0xFF) / 255.0f;
+        *g = ((clear_color >> 8) & 0xFF) / 255.0f;
+        *b = (clear_color & 0xFF) / 255.0f;
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_B8:
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8:
+        /* Xbox D3D doesn't support clearing those */
+    default:
+        *r = 1.0f;
+        *g = 0.0f;
+        *b = 1.0f;
+        fprintf(stderr, "CLEAR_SURFACE for color_format 0x%x unsupported",
+                pg->surface_shape.color_format);
+        assert(false);
+        break;
+    }
+
+    /* Handle alpha */
+    switch(pg->surface_shape.color_format) {
+    /* FIXME: CLEAR_SURFACE seems to work like memset, so maybe we
+     *        also have to clear non-alpha bits with alpha value?
+     *        As GL doesn't own those pixels we'd have to do this on
+     *        our own in xbox memory.
+     */
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_Z1A7R8G8B8:
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1A7R8G8B8_O1A7R8G8B8:
+        *a = ((clear_color >> 24) & 0x7F) / 127.0f;
+        assert(false); /* Untested */
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8:
+        *a = ((clear_color >> 24) & 0xFF) / 255.0f;
+        break;
+    default:
+        *a = 1.0f;
+        break;
+    }
+}
+
+void pgraph_get_clear_depth_stencil_value(PGRAPHState *pg, float *depth,
+                                          int *stencil)
+{
+    uint32_t clear_zstencil =
+        pgraph_reg_r(pg, NV_PGRAPH_ZSTENCILCLEARVALUE);
+    *stencil = 0;
+    *depth = 1.0;
+
+    switch (pg->surface_shape.zeta_format) {
+    case NV097_SET_SURFACE_FORMAT_ZETA_Z16: {
+        uint16_t z = clear_zstencil & 0xFFFF;
+        /* FIXME: Remove bit for stencil clear? */
+        if (pg->surface_shape.z_format) {
+            *depth = convert_f16_to_float(z) / f16_max;
+        } else {
+            *depth = z / (float)0xFFFF;
+        }
+        break;
+    }
+    case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8: {
+        *stencil = clear_zstencil & 0xFF;
+        uint32_t z = clear_zstencil >> 8;
+        if (pg->surface_shape.z_format) {
+            *depth = convert_f24_to_float(z) / f24_max;
+        } else {
+            *depth = z / (float)0xFFFFFF;
+        }
+        break;
+    }
+    default:
+        fprintf(stderr, "Unknown zeta surface format: 0x%x\n",
+                pg->surface_shape.zeta_format);
+        assert(false);
+        break;
+    }
+}
+
+void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter,
+                                         uint32_t result)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    uint64_t timestamp = 0x0011223344556677; /* FIXME: Update timestamp?! */
+    uint32_t done = 0; // FIXME: Check
+
+    hwaddr report_dma_len;
+    uint8_t *report_data =
+        (uint8_t *)nv_dma_map(d, pg->dma_report, &report_dma_len);
+
+    hwaddr offset = GET_MASK(parameter, NV097_GET_REPORT_OFFSET);
+    assert(offset < report_dma_len);
+    report_data += offset;
+
+    stq_le_p((uint64_t *)&report_data[0], timestamp);
+    stl_le_p((uint32_t *)&report_data[8], result);
+    stl_le_p((uint32_t *)&report_data[12], done);
+
+    NV2A_DPRINTF("Report result %d @%" HWADDR_PRIx, result, offset);
+}
+
+void pgraph_process_pending(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    pg->renderer->ops.process_pending(d);
+
+    if (g_config.display.renderer != pg->renderer->type) {
+        qemu_mutex_lock(&d->pgraph.renderer_lock);
+        qemu_mutex_unlock(&d->pfifo.lock);
+        qemu_mutex_lock(&d->pgraph.lock);
+
+        if (pg->renderer) {
+            qemu_event_reset(&pg->flush_complete);
+            pg->flush_pending = true;
+
+            qemu_mutex_lock(&d->pfifo.lock);
+            qemu_mutex_unlock(&d->pgraph.lock);
+
+            if (pg->renderer->ops.process_pending) {
+                pg->renderer->ops.process_pending(d);
+            }
+
+            qemu_mutex_unlock(&d->pfifo.lock);
+            qemu_mutex_lock(&d->pgraph.lock);
+            while (pg->framebuffer_in_use) {
+                qemu_cond_wait(&d->pgraph.framebuffer_released, &d->pgraph.renderer_lock);
+            }
+
+            if (pg->renderer->ops.finalize) {
+                pg->renderer->ops.finalize(d);
+            }
+        }
+
+        init_renderer(pg);
+
+        qemu_mutex_unlock(&d->pgraph.renderer_lock);
+        qemu_mutex_unlock(&d->pgraph.lock);
+        qemu_mutex_lock(&d->pfifo.lock);
+    }
+}
+
+void pgraph_process_pending_reports(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    pg->renderer->ops.process_pending_reports(d);
+}
+
+void pgraph_pre_savevm_trigger(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    pg->renderer->ops.pre_savevm_trigger(d);
+}
+
+void pgraph_pre_savevm_wait(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    pg->renderer->ops.pre_savevm_wait(d);
+}
+
+void pgraph_pre_shutdown_trigger(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    pg->renderer->ops.pre_shutdown_trigger(d);
+}
+
+void pgraph_pre_shutdown_wait(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    pg->renderer->ops.pre_shutdown_wait(d);
+}
+
diff --git a/hw/xbox/nv2a/pgraph/pgraph.h b/hw/xbox/nv2a/pgraph/pgraph.h
new file mode 100644
index 00000000000..64b671e71da
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/pgraph.h
@@ -0,0 +1,396 @@
+/*
+ * QEMU Geforce NV2A PGRAPH internal definitions
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_H
+#define HW_XBOX_NV2A_PGRAPH_H
+
+#include "xemu-config.h"
+#include "qemu/osdep.h"
+#include "qemu/bitmap.h"
+#include "qemu/units.h"
+#include "qemu/thread.h"
+#include "cpu.h"
+
+#include "shaders.h"
+#include "surface.h"
+#include "util.h"
+
+typedef struct NV2AState NV2AState;
+typedef struct PGRAPHNullState PGRAPHNullState;
+typedef struct PGRAPHGLState PGRAPHGLState;
+typedef struct PGRAPHVkState PGRAPHVkState;
+
+typedef struct VertexAttribute {
+    bool dma_select;
+    hwaddr offset;
+
+    /* inline arrays are packed in order?
+     * Need to pass the offset to converted attributes */
+    unsigned int inline_array_offset;
+
+    float inline_value[4];
+
+    unsigned int format;
+    unsigned int size; /* size of the data type */
+    unsigned int count; /* number of components */
+    uint32_t stride;
+
+    bool needs_conversion;
+
+    float *inline_buffer;
+    bool inline_buffer_populated;
+} VertexAttribute;
+
+typedef struct Surface {
+    bool draw_dirty;
+    bool buffer_dirty;
+    bool write_enabled_cache;
+    unsigned int pitch;
+
+    hwaddr offset;
+} Surface;
+
+typedef struct KelvinState {
+    hwaddr object_instance;
+} KelvinState;
+
+typedef struct ContextSurfaces2DState {
+    hwaddr object_instance;
+    hwaddr dma_image_source;
+    hwaddr dma_image_dest;
+    unsigned int color_format;
+    unsigned int source_pitch, dest_pitch;
+    hwaddr source_offset, dest_offset;
+} ContextSurfaces2DState;
+
+typedef struct ImageBlitState {
+    hwaddr object_instance;
+    hwaddr context_surfaces;
+    unsigned int operation;
+    unsigned int in_x, in_y;
+    unsigned int out_x, out_y;
+    unsigned int width, height;
+} ImageBlitState;
+
+typedef struct BetaState {
+  hwaddr object_instance;
+  uint32_t beta;
+} BetaState;
+
+typedef struct PGRAPHRenderer {
+    CONFIG_DISPLAY_RENDERER type;
+    const char *name;
+    struct {
+        void (*early_context_init)(void);
+        void (*init)(NV2AState *d, Error **errp);
+        void (*finalize)(NV2AState *d);
+        void (*clear_report_value)(NV2AState *d);
+        void (*clear_surface)(NV2AState *d, uint32_t parameter);
+        void (*draw_begin)(NV2AState *d);
+        void (*draw_end)(NV2AState *d);
+        void (*flip_stall)(NV2AState *d);
+        void (*flush_draw)(NV2AState *d);
+        void (*get_report)(NV2AState *d, uint32_t parameter);
+        void (*image_blit)(NV2AState *d);
+        void (*pre_savevm_trigger)(NV2AState *d);
+        void (*pre_savevm_wait)(NV2AState *d);
+        void (*pre_shutdown_trigger)(NV2AState *d);
+        void (*pre_shutdown_wait)(NV2AState *d);
+        void (*process_pending)(NV2AState *d);
+        void (*process_pending_reports)(NV2AState *d);
+        void (*surface_flush)(NV2AState *d);
+        void (*surface_update)(NV2AState *d, bool upload, bool color_write, bool zeta_write);
+        void (*set_surface_scale_factor)(NV2AState *d, unsigned int scale);
+        unsigned int (*get_surface_scale_factor)(NV2AState *d);
+        int (*get_framebuffer_surface)(NV2AState *d);
+    } ops;
+} PGRAPHRenderer;
+
+typedef struct PGRAPHState {
+    QemuMutex lock;
+    QemuMutex renderer_lock;
+
+    uint32_t pending_interrupts;
+    uint32_t enabled_interrupts;
+
+    int frame_time;
+    int draw_time;
+
+    /* subchannels state we're not sure the location of... */
+    ContextSurfaces2DState context_surfaces_2d;
+    ImageBlitState image_blit;
+    KelvinState kelvin;
+    BetaState beta;
+
+    hwaddr dma_color, dma_zeta;
+    Surface surface_color, surface_zeta;
+    unsigned int surface_type;
+    SurfaceShape surface_shape;
+    SurfaceShape last_surface_shape;
+
+    struct {
+        int clip_x;
+        int clip_width;
+        int clip_y;
+        int clip_height;
+        int width;
+        int height;
+    } surface_binding_dim; // FIXME: Refactor
+
+    hwaddr dma_a, dma_b;
+    bool texture_dirty[NV2A_MAX_TEXTURES];
+
+    bool texture_matrix_enable[NV2A_MAX_TEXTURES];
+
+    hwaddr dma_state;
+    hwaddr dma_notifies;
+    hwaddr dma_semaphore;
+
+    hwaddr dma_report;
+    hwaddr report_offset;
+    bool zpass_pixel_count_enable;
+
+    hwaddr dma_vertex_a, dma_vertex_b;
+
+    uint32_t primitive_mode;
+
+    bool enable_vertex_program_write; // FIXME: Not used anywhere???
+
+    uint32_t vertex_state_shader_v0[4];
+    uint32_t program_data[NV2A_MAX_TRANSFORM_PROGRAM_LENGTH][VSH_TOKEN_SIZE];
+    bool program_data_dirty;
+
+    uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
+    bool vsh_constants_dirty[NV2A_VERTEXSHADER_CONSTANTS];
+
+    /* lighting constant arrays */
+    uint32_t ltctxa[NV2A_LTCTXA_COUNT][4];
+    bool ltctxa_dirty[NV2A_LTCTXA_COUNT];
+    uint32_t ltctxb[NV2A_LTCTXB_COUNT][4];
+    bool ltctxb_dirty[NV2A_LTCTXB_COUNT];
+    uint32_t ltc1[NV2A_LTC1_COUNT][4];
+    bool ltc1_dirty[NV2A_LTC1_COUNT];
+
+    float material_alpha;
+
+    // should figure out where these are in lighting context
+    float light_infinite_half_vector[NV2A_MAX_LIGHTS][3];
+    float light_infinite_direction[NV2A_MAX_LIGHTS][3];
+    float light_local_position[NV2A_MAX_LIGHTS][3];
+    float light_local_attenuation[NV2A_MAX_LIGHTS][3];
+
+    float point_params[8];
+
+    VertexAttribute vertex_attributes[NV2A_VERTEXSHADER_ATTRIBUTES];
+    uint16_t compressed_attrs;
+    uint16_t uniform_attrs;
+    uint16_t swizzle_attrs;
+
+    unsigned int inline_array_length;
+    uint32_t inline_array[NV2A_MAX_BATCH_LENGTH];
+
+    unsigned int inline_elements_length;
+    uint32_t inline_elements[NV2A_MAX_BATCH_LENGTH];
+
+    unsigned int inline_buffer_length;
+
+    unsigned int draw_arrays_length;
+    unsigned int draw_arrays_min_start;
+    unsigned int draw_arrays_max_count;
+    /* FIXME: Unknown size, possibly endless, 1250 will do for now */
+    /* Keep in sync with size used in nv2a.c */
+    int32_t draw_arrays_start[1250];
+    int32_t draw_arrays_count[1250];
+    bool draw_arrays_prevent_connect;
+
+    uint32_t regs_[0x2000];
+    DECLARE_BITMAP(regs_dirty, 0x2000 / sizeof(uint32_t));
+
+    bool clearing; // FIXME: Internal
+    bool waiting_for_nop;
+    bool waiting_for_flip;
+    bool waiting_for_context_switch;
+
+    bool flush_pending;
+    QemuEvent flush_complete;
+
+    bool sync_pending;
+    QemuEvent sync_complete;
+
+    bool framebuffer_in_use;
+    QemuCond framebuffer_released;
+
+    unsigned int surface_scale_factor;
+    uint8_t *scale_buf;
+
+    const PGRAPHRenderer *renderer;
+    union {
+        PGRAPHNullState *null_renderer_state;
+        PGRAPHGLState *gl_renderer_state;
+        PGRAPHVkState *vk_renderer_state;
+    };
+} PGRAPHState;
+
+void pgraph_init(NV2AState *d);
+void pgraph_init_thread(NV2AState *d);
+void pgraph_destroy(PGRAPHState *pg);
+void pgraph_context_switch(NV2AState *d, unsigned int channel_id);
+void pgraph_process_pending(NV2AState *d);
+void pgraph_process_pending_reports(NV2AState *d);
+void pgraph_pre_savevm_trigger(NV2AState *d);
+void pgraph_pre_savevm_wait(NV2AState *d);
+void pgraph_pre_shutdown_trigger(NV2AState *d);
+void pgraph_pre_shutdown_wait(NV2AState *d);
+
+int pgraph_method(NV2AState *d, unsigned int subchannel, unsigned int method,
+                  uint32_t parameter, uint32_t *parameters,
+                  size_t num_words_available, size_t max_lookahead_words,
+                  bool inc);
+void pgraph_check_within_begin_end_block(PGRAPHState *pg);
+
+void *pfifo_thread(void *arg);
+void pfifo_kick(NV2AState *d);
+
+void pgraph_renderer_register(const PGRAPHRenderer *renderer);
+
+// FIXME: Move from here
+
+extern NV2AState *g_nv2a;
+
+// FIXME: Add new function pgraph_is_texture_sampler_active()
+
+static inline uint32_t pgraph_reg_r(PGRAPHState *pg, unsigned int r)
+{
+    assert(r % 4 == 0);
+    return pg->regs_[r];
+}
+
+static inline void pgraph_reg_w(PGRAPHState *pg, unsigned int r, uint32_t v)
+{
+    assert(r % 4 == 0);
+    if (pg->regs_[r] != v) {
+        bitmap_set(pg->regs_dirty, r / sizeof(uint32_t), 1);
+    }
+    pg->regs_[r] = v;
+}
+
+void pgraph_clear_dirty_reg_map(PGRAPHState *pg);
+
+static inline bool pgraph_is_reg_dirty(PGRAPHState *pg, unsigned int reg)
+{
+    return test_bit(reg / sizeof(uint32_t), pg->regs_dirty);
+}
+
+static inline bool pgraph_is_texture_stage_active(PGRAPHState *pg, unsigned int stage)
+{
+    assert(stage < NV2A_MAX_TEXTURES);
+    uint32_t mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (stage * 5)) & 0x1F;
+    return mode != 0 && mode != 4;// && mode != 0x11 && mode != 0x0a && mode != 0x09 && mode != 5;
+}
+
+static inline bool pgraph_is_texture_enabled(PGRAPHState *pg, int texture_idx)
+{
+    uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + texture_idx*4);
+    return // pgraph_is_texture_stage_active(pg, texture_idx) &&
+                       GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_ENABLE);
+}
+
+static inline bool pgraph_is_texture_format_compressed(PGRAPHState *pg, int color_format)
+{
+    return color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ||
+           color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8 ||
+           color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8;
+}
+
+static inline bool pgraph_color_write_enabled(PGRAPHState *pg)
+{
+    return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & (
+        NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE
+        | NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE
+        | NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE
+        | NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE);
+}
+
+static inline bool pgraph_zeta_write_enabled(PGRAPHState *pg)
+{
+    return pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & (
+        NV_PGRAPH_CONTROL_0_ZWRITEENABLE
+        | NV_PGRAPH_CONTROL_0_STENCIL_WRITE_ENABLE);
+}
+
+static inline void pgraph_apply_anti_aliasing_factor(PGRAPHState *pg,
+                                              unsigned int *width,
+                                              unsigned int *height)
+{
+    switch (pg->surface_shape.anti_aliasing) {
+    case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_1:
+        break;
+    case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_CENTER_CORNER_2:
+        if (width) { *width *= 2; }
+        break;
+    case NV097_SET_SURFACE_FORMAT_ANTI_ALIASING_SQUARE_OFFSET_4:
+        if (width) { *width *= 2; }
+        if (height) { *height *= 2; }
+        break;
+    default:
+        assert(false);
+        break;
+    }
+}
+
+static inline void pgraph_apply_scaling_factor(PGRAPHState *pg,
+                                        unsigned int *width,
+                                        unsigned int *height)
+{
+    *width *= pg->surface_scale_factor;
+    *height *= pg->surface_scale_factor;
+}
+
+void pgraph_get_clear_color(PGRAPHState *pg, float rgba[4]);
+void pgraph_get_clear_depth_stencil_value(PGRAPHState *pg, float *depth, int *stencil);
+
+/* Vertex */
+void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr);
+void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg);
+void pgraph_reset_inline_buffers(PGRAPHState *pg);
+void pgraph_reset_draw_arrays(PGRAPHState *pg);
+void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data);
+void pgraph_get_inline_values(PGRAPHState *pg, uint16_t attrs,
+                               float values[NV2A_VERTEXSHADER_ATTRIBUTES][4],
+                               int *count);
+
+/* RDI */
+uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select,
+                         unsigned int address);
+void pgraph_rdi_write(PGRAPHState *pg, unsigned int select,
+                      unsigned int address, uint32_t val);
+
+static inline void pgraph_argb_pack32_to_rgba_float(uint32_t argb, float *rgba)
+{
+    rgba[0] = ((argb >> 16) & 0xFF) / 255.0f; /* red */
+    rgba[1] = ((argb >> 8) & 0xFF) / 255.0f; /* green */
+    rgba[2] = (argb & 0xFF) / 255.0f; /* blue */
+    rgba[3] = ((argb >> 24) & 0xFF) / 255.0f; /* alpha */
+}
+
+void pgraph_write_zpass_pixel_cnt_report(NV2AState *d, uint32_t parameter, uint32_t result);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/profile.c b/hw/xbox/nv2a/pgraph/profile.c
new file mode 100644
index 00000000000..69a1b5bfbd1
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/profile.c
@@ -0,0 +1,74 @@
+/*
+ * QEMU Geforce NV2A profiling helpers
+ *
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../nv2a_int.h"
+
+NV2AStats g_nv2a_stats;
+
+void nv2a_profile_increment(void)
+{
+    int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+    const int64_t fps_update_interval = 250000;
+    g_nv2a_stats.last_flip_time = now;
+
+    static int64_t frame_count = 0;
+    frame_count++;
+
+    static int64_t ts = 0;
+    int64_t delta = now - ts;
+    if (delta >= fps_update_interval) {
+        g_nv2a_stats.increment_fps = frame_count * 1000000 / delta;
+        ts = now;
+        frame_count = 0;
+    }
+}
+
+void nv2a_profile_flip_stall(void)
+{
+    int64_t now = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
+    int64_t render_time = (now-g_nv2a_stats.last_flip_time)/1000;
+
+    g_nv2a_stats.frame_working.mspf = render_time;
+    g_nv2a_stats.frame_history[g_nv2a_stats.frame_ptr] =
+        g_nv2a_stats.frame_working;
+    g_nv2a_stats.frame_ptr =
+        (g_nv2a_stats.frame_ptr + 1) % NV2A_PROF_NUM_FRAMES;
+    g_nv2a_stats.frame_count++;
+    memset(&g_nv2a_stats.frame_working, 0, sizeof(g_nv2a_stats.frame_working));
+}
+
+const char *nv2a_profile_get_counter_name(unsigned int cnt)
+{
+    const char *default_names[NV2A_PROF__COUNT] = {
+        #define _X(x) stringify(x),
+        NV2A_PROF_COUNTERS_XMAC
+        #undef _X
+    };
+
+    assert(cnt < NV2A_PROF__COUNT);
+    return default_names[cnt] + 10; /* 'NV2A_PROF_' */
+}
+
+int nv2a_profile_get_counter_value(unsigned int cnt)
+{
+    assert(cnt < NV2A_PROF__COUNT);
+    unsigned int idx = (g_nv2a_stats.frame_ptr + NV2A_PROF_NUM_FRAMES - 1) %
+                       NV2A_PROF_NUM_FRAMES;
+    return g_nv2a_stats.frame_history[idx].counters[cnt];
+}
diff --git a/hw/xbox/nv2a/psh.h b/hw/xbox/nv2a/pgraph/psh.h
similarity index 95%
rename from hw/xbox/nv2a/psh.h
rename to hw/xbox/nv2a/pgraph/psh.h
index 65ef4e43a20..13660457078 100644
--- a/hw/xbox/nv2a/psh.h
+++ b/hw/xbox/nv2a/pgraph/psh.h
@@ -20,7 +20,8 @@
 #ifndef HW_NV2A_PSH_H
 #define HW_NV2A_PSH_H
 
-#include "shaders_common.h"
+#include <stdint.h>
+#include <stdbool.h>
 
 enum PshAlphaFunc {
     ALPHA_FUNC_NEVER,
@@ -51,6 +52,8 @@ enum ConvolutionFilter {
 };
 
 typedef struct PshState {
+    bool vulkan;
+
     /* fragment shader - register combiner stuff */
     uint32_t combiner_control;
     uint32_t shader_stage_program;
@@ -67,6 +70,8 @@ typedef struct PshState {
     bool compare_mode[4][4];
     bool alphakill[4];
     enum ConvolutionFilter conv_tex[4];
+    bool tex_x8y24[4];
+    int dim_tex[4];
 
     float border_logical_size[4][3];
     float border_inv_real_size[4][3];
@@ -82,6 +87,4 @@ typedef struct PshState {
     bool smooth_shading;
 } PshState;
 
-MString *psh_translate(const PshState state);
-
 #endif
diff --git a/hw/xbox/nv2a/pgraph/rdi.c b/hw/xbox/nv2a/pgraph/rdi.c
new file mode 100644
index 00000000000..297c7a67c0c
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/rdi.c
@@ -0,0 +1,60 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "../nv2a_int.h"
+
+uint32_t pgraph_rdi_read(PGRAPHState *pg, unsigned int select,
+                         unsigned int address)
+{
+    uint32_t r = 0;
+    switch(select) {
+    case RDI_INDEX_VTX_CONSTANTS0:
+    case RDI_INDEX_VTX_CONSTANTS1:
+        assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
+        r = pg->vsh_constants[address / 4][3 - address % 4];
+        break;
+    default:
+        fprintf(stderr, "nv2a: unknown rdi read select 0x%x address 0x%x\n",
+                select, address);
+        assert(false);
+        break;
+    }
+    return r;
+}
+
+void pgraph_rdi_write(PGRAPHState *pg, unsigned int select,
+                      unsigned int address, uint32_t val)
+{
+    switch(select) {
+    case RDI_INDEX_VTX_CONSTANTS0:
+    case RDI_INDEX_VTX_CONSTANTS1:
+        assert(false); /* Untested */
+        assert((address / 4) < NV2A_VERTEXSHADER_CONSTANTS);
+        pg->vsh_constants_dirty[address / 4] |=
+            (val != pg->vsh_constants[address / 4][3 - address % 4]);
+        pg->vsh_constants[address / 4][3 - address % 4] = val;
+        break;
+    default:
+        NV2A_DPRINTF("unknown rdi write select 0x%x, address 0x%x, val 0x%08x\n",
+                     select, address, val);
+        break;
+    }
+}
diff --git a/hw/xbox/nv2a/s3tc.c b/hw/xbox/nv2a/pgraph/s3tc.c
similarity index 71%
rename from hw/xbox/nv2a/s3tc.c
rename to hw/xbox/nv2a/pgraph/s3tc.c
index 454cc43aee4..affd058e667 100644
--- a/hw/xbox/nv2a/s3tc.c
+++ b/hw/xbox/nv2a/pgraph/s3tc.c
@@ -1,5 +1,5 @@
 /*
- * QEMU texture decompression routines
+ * S3TC Texture Decompression
  *
  * Copyright (c) 2020 Wilhelm Kovatch
  *
@@ -25,13 +25,9 @@
 #include "qemu/osdep.h"
 #include "s3tc.h"
 
-static inline void decode_bc1_colors(uint16_t c0,
-                                     uint16_t c1,
-                                     uint8_t r[4],
-                                     uint8_t g[4],
-                                     uint8_t b[4],
-                                     uint8_t a[16],
-                                     bool transparent)
+static void decode_bc1_colors(uint16_t c0, uint16_t c1, uint8_t r[4],
+                              uint8_t g[4], uint8_t b[4], uint8_t a[16],
+                              bool transparent)
 {
     r[0] = ((c0 & 0xF800) >> 8) * 0xFF / 0xF8,
     g[0] = ((c0 & 0x07E0) >> 3) * 0xFF / 0xFC,
@@ -66,15 +62,10 @@ static inline void decode_bc1_colors(uint16_t c0,
     }
 }
 
-static inline void write_block_to_texture(uint8_t *converted_data,
-                                          uint32_t indices,
-                                          int i, int j, int width,
-                                          int z_pos_factor,
-                                          uint8_t r[4],
-                                          uint8_t g[4],
-                                          uint8_t b[4],
-                                          uint8_t a[16],
-                                          bool separate_alpha)
+static void write_block_to_texture(uint8_t *converted_data, uint32_t indices,
+                                   int i, int j, int width, int z_pos_factor,
+                                   uint8_t r[4], uint8_t g[4], uint8_t b[4],
+                                   uint8_t a[16], bool separate_alpha)
 {
     int x0 = i * 4,
         y0 = j * 4;
@@ -89,16 +80,18 @@ static inline void write_block_to_texture(uint8_t *converted_data,
             int xy_index = y_index + x - x0;
             uint8_t index = (indices >> 2 * xy_index) & 0x03;
             uint8_t alpha_index = separate_alpha ? xy_index : index;
-            uint32_t color = (r[index] << 24) | (g[index] << 16) | (b[index] << 8) | a[alpha_index];
-            *(uint32_t*)(converted_data + (z_plus_y_pos_factor + x) * 4) = color;
+            uint8_t *p = converted_data + (z_plus_y_pos_factor + x) * 4;
+            *p++ = r[index];
+            *p++ = g[index];
+            *p++ = b[index];
+            *p++ = a[alpha_index];
         }
     }
 }
 
-static inline void decompress_dxt1_block(const uint8_t block_data[8],
-                                         uint8_t *converted_data,
-                                         int i, int j, int width,
-                                         int z_pos_factor)
+static void decompress_dxt1_block(const uint8_t block_data[8],
+                                  uint8_t *converted_data, int i, int j,
+                                  int width, int z_pos_factor)
 {
     uint16_t c0 = ((uint16_t*)block_data)[0],
              c1 = ((uint16_t*)block_data)[1];
@@ -111,10 +104,9 @@ static inline void decompress_dxt1_block(const uint8_t block_data[8],
                            r, g, b, a, false);
 }
 
-static inline void decompress_dxt3_block(const uint8_t block_data[16],
-                                         uint8_t *converted_data,
-                                         int i, int j, int width,
-                                         int z_pos_factor)
+static void decompress_dxt3_block(const uint8_t block_data[16],
+                                  uint8_t *converted_data, int i, int j,
+                                  int width, int z_pos_factor)
 {
     uint16_t c0 = ((uint16_t*)block_data)[4],
              c1 = ((uint16_t*)block_data)[5];
@@ -132,10 +124,9 @@ static inline void decompress_dxt3_block(const uint8_t block_data[16],
                            r, g, b, a, true);
 }
 
-static inline void decompress_dxt5_block(const uint8_t block_data[16],
-                                         uint8_t *converted_data,
-                                         int i, int j, int width,
-                                         int z_pos_factor)
+static void decompress_dxt5_block(const uint8_t block_data[16],
+                                  uint8_t *converted_data, int i, int j,
+                                  int width, int z_pos_factor)
 {
     uint16_t c0 = ((uint16_t*)block_data)[4],
              c1 = ((uint16_t*)block_data)[5];
@@ -173,11 +164,9 @@ static inline void decompress_dxt5_block(const uint8_t block_data[16],
                            r, g, b, a, true);
 }
 
-uint8_t *decompress_3d_texture_data(GLint color_format,
-                                    const uint8_t *data,
-                                    unsigned int width,
-                                    unsigned int height,
-                                    unsigned int depth)
+uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format,
+                            const uint8_t *data, unsigned int width,
+                            unsigned int height, unsigned int depth)
 {
     assert((width > 0) && (width % 4 == 0));
     assert((height > 0) && (height % 4 == 0));
@@ -196,13 +185,13 @@ uint8_t *decompress_3d_texture_data(GLint color_format,
                     int sub_block_index = block_index * block_depth + slice;
                     int z_pos_factor = (k * block_depth + slice) * width * height;
 
-                    if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
+                    if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) {
                         decompress_dxt1_block(data + 8 * sub_block_index, converted_data,
                                               i, j, width, z_pos_factor);
-                    } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) {
+                    } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) {
                         decompress_dxt3_block(data + 16 * sub_block_index, converted_data,
                                               i, j, width, z_pos_factor);
-                    } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) {
+                    } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) {
                         decompress_dxt5_block(data + 16 * sub_block_index, converted_data,
                                               i, j, width, z_pos_factor);
                     } else {
@@ -216,8 +205,9 @@ uint8_t *decompress_3d_texture_data(GLint color_format,
     return converted_data;
 }
 
-uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
-                                    unsigned int width, unsigned int height)
+uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format,
+                            const uint8_t *data, unsigned int width,
+                            unsigned int height)
 {
     assert((width > 0) && (width % 4 == 0));
     assert((height > 0) && (height % 4 == 0));
@@ -226,13 +216,13 @@ uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
     for (int j = 0; j < num_blocks_y; j++) {
         for (int i = 0; i < num_blocks_x; i++) {
             int block_index = j * num_blocks_x + i;
-            if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT1_EXT) {
+            if (color_format == S3TC_DECOMPRESS_FORMAT_DXT1) {
                 decompress_dxt1_block(data + 8 * block_index,
                                       converted_data, i, j, width, 0);
-            } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT3_EXT) {
+            } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT3) {
                 decompress_dxt3_block(data + 16 * block_index,
                                       converted_data, i, j, width, 0);
-            } else if (color_format == GL_COMPRESSED_RGBA_S3TC_DXT5_EXT) {
+            } else if (color_format == S3TC_DECOMPRESS_FORMAT_DXT5) {
                 decompress_dxt5_block(data + 16 * block_index,
                                       converted_data, i, j, width, 0);
             } else {
diff --git a/hw/xbox/nv2a/s3tc.h b/hw/xbox/nv2a/pgraph/s3tc.h
similarity index 63%
rename from hw/xbox/nv2a/s3tc.h
rename to hw/xbox/nv2a/pgraph/s3tc.h
index 87dad0d3c4f..6a10074e748 100644
--- a/hw/xbox/nv2a/s3tc.h
+++ b/hw/xbox/nv2a/pgraph/s3tc.h
@@ -1,5 +1,5 @@
 /*
- * QEMU texture decompression routines
+ * S3TC Texture Decompression
  *
  * Copyright (c) 2020 Wilhelm Kovatch
  *
@@ -22,18 +22,23 @@
  * THE SOFTWARE.
  */
 
-#ifndef S3TC_H
-#define S3TC_H
+#ifndef HW_XBOX_NV2A_PGRAPH_S3TC_H
+#define HW_XBOX_NV2A_PGRAPH_S3TC_H
 
-#include "gl/gloffscreen.h"
+#include <stdint.h>
 
-uint8_t *decompress_3d_texture_data(GLint color_format,
-                                    const uint8_t *data,
-                                    unsigned int width,
-                                    unsigned int height,
-                                    unsigned int depth);
+enum S3TC_DECOMPRESS_FORMAT {
+    S3TC_DECOMPRESS_FORMAT_DXT1,
+    S3TC_DECOMPRESS_FORMAT_DXT3,
+    S3TC_DECOMPRESS_FORMAT_DXT5,
+};
 
-uint8_t *decompress_2d_texture_data(GLint color_format, const uint8_t *data,
-                                    unsigned int width, unsigned int height);
+uint8_t *s3tc_decompress_3d(enum S3TC_DECOMPRESS_FORMAT color_format,
+                            const uint8_t *data, unsigned int width,
+                            unsigned int height, unsigned int depth);
+
+uint8_t *s3tc_decompress_2d(enum S3TC_DECOMPRESS_FORMAT color_format,
+                            const uint8_t *data, unsigned int width,
+                            unsigned int height);
 
 #endif
diff --git a/hw/xbox/nv2a/pgraph/shaders.c b/hw/xbox/nv2a/pgraph/shaders.c
new file mode 100644
index 00000000000..8d2c77a535b
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/shaders.c
@@ -0,0 +1,295 @@
+/*
+ * Geforce NV2A PGRAPH OpenGL Renderer
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2020-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/debug.h"
+#include "texture.h"
+#include "pgraph.h"
+#include "shaders.h"
+
+ShaderState pgraph_get_shader_state(PGRAPHState *pg)
+{
+    bool vertex_program = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
+                                   NV_PGRAPH_CSV0_D_MODE) == 2;
+
+    bool fixed_function = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
+                                   NV_PGRAPH_CSV0_D_MODE) == 0;
+
+    int program_start = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C),
+                                 NV_PGRAPH_CSV0_C_CHEOPS_PROGRAM_START);
+
+    pg->program_data_dirty = false;
+
+    ShaderState state;
+
+    // We will hash it, so make sure any padding is zeroed
+    memset(&state, 0, sizeof(ShaderState));
+
+    state.surface_scale_factor = pg->surface_scale_factor;
+
+    state.compressed_attrs = pg->compressed_attrs;
+    state.uniform_attrs = pg->uniform_attrs;
+    state.swizzle_attrs = pg->swizzle_attrs;
+
+    /* register combiner stuff */
+    state.psh.window_clip_exclusive =
+        pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_WINDOWCLIPTYPE;
+    state.psh.combiner_control = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL);
+    state.psh.shader_stage_program = pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG);
+    state.psh.other_stage_input = pgraph_reg_r(pg, NV_PGRAPH_SHADERCTL);
+    state.psh.final_inputs_0 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG0);
+    state.psh.final_inputs_1 = pgraph_reg_r(pg, NV_PGRAPH_COMBINESPECFOG1);
+
+    state.psh.alpha_test =
+        pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) & NV_PGRAPH_CONTROL_0_ALPHATESTENABLE;
+    state.psh.alpha_func = (enum PshAlphaFunc)GET_MASK(
+        pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ALPHAFUNC);
+
+    state.psh.point_sprite = pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+                             NV_PGRAPH_SETUPRASTER_POINTSMOOTHENABLE;
+
+    state.psh.shadow_depth_func = (enum PshShadowDepthFunc)GET_MASK(
+        pgraph_reg_r(pg, NV_PGRAPH_SHADOWCTL), NV_PGRAPH_SHADOWCTL_SHADOW_ZFUNC);
+
+    state.fixed_function = fixed_function;
+
+    /* fixed function stuff */
+    if (fixed_function) {
+        state.skinning = (enum VshSkinning)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
+                                                    NV_PGRAPH_CSV0_D_SKIN);
+        state.lighting =
+            GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_LIGHTING);
+        state.normalization =
+            pgraph_reg_r(pg, NV_PGRAPH_CSV0_C) & NV_PGRAPH_CSV0_C_NORMALIZATION_ENABLE;
+
+        /* color material */
+        state.emission_src = (enum MaterialColorSource)GET_MASK(
+            pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_EMISSION);
+        state.ambient_src = (enum MaterialColorSource)GET_MASK(
+            pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_AMBIENT);
+        state.diffuse_src = (enum MaterialColorSource)GET_MASK(
+            pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_DIFFUSE);
+        state.specular_src = (enum MaterialColorSource)GET_MASK(
+            pgraph_reg_r(pg, NV_PGRAPH_CSV0_C), NV_PGRAPH_CSV0_C_SPECULAR);
+    }
+
+    /* vertex program stuff */
+    state.vertex_program = vertex_program,
+    state.z_perspective = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
+                          NV_PGRAPH_CONTROL_0_Z_PERSPECTIVE_ENABLE;
+
+    state.point_params_enable = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
+                                         NV_PGRAPH_CSV0_D_POINTPARAMSENABLE);
+    state.point_size =
+        GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_POINTSIZE), NV097_SET_POINT_SIZE_V) / 8.0f;
+    if (state.point_params_enable) {
+        for (int i = 0; i < 8; i++) {
+            state.point_params[i] = pg->point_params[i];
+        }
+    }
+
+    /* geometry shader stuff */
+    state.primitive_mode = (enum ShaderPrimitiveMode)pg->primitive_mode;
+    state.polygon_front_mode = (enum ShaderPolygonMode)GET_MASK(
+        pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_FRONTFACEMODE);
+    state.polygon_back_mode = (enum ShaderPolygonMode)GET_MASK(
+        pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER), NV_PGRAPH_SETUPRASTER_BACKFACEMODE);
+
+    state.smooth_shading = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
+                                    NV_PGRAPH_CONTROL_3_SHADEMODE) ==
+                           NV_PGRAPH_CONTROL_3_SHADEMODE_SMOOTH;
+    state.psh.smooth_shading = state.smooth_shading;
+
+    state.program_length = 0;
+
+    if (vertex_program) {
+        // copy in vertex program tokens
+        for (int i = program_start; i < NV2A_MAX_TRANSFORM_PROGRAM_LENGTH;
+             i++) {
+            uint32_t *cur_token = (uint32_t *)&pg->program_data[i];
+            memcpy(&state.program_data[state.program_length], cur_token,
+                   VSH_TOKEN_SIZE * sizeof(uint32_t));
+            state.program_length++;
+
+            if (vsh_get_field(cur_token, FLD_FINAL)) {
+                break;
+            }
+        }
+    }
+
+    /* Texgen */
+    for (int i = 0; i < 4; i++) {
+        unsigned int reg = (i < 2) ? NV_PGRAPH_CSV1_A : NV_PGRAPH_CSV1_B;
+        for (int j = 0; j < 4; j++) {
+            unsigned int masks[] = {
+                (i % 2) ? NV_PGRAPH_CSV1_A_T1_S : NV_PGRAPH_CSV1_A_T0_S,
+                (i % 2) ? NV_PGRAPH_CSV1_A_T1_T : NV_PGRAPH_CSV1_A_T0_T,
+                (i % 2) ? NV_PGRAPH_CSV1_A_T1_R : NV_PGRAPH_CSV1_A_T0_R,
+                (i % 2) ? NV_PGRAPH_CSV1_A_T1_Q : NV_PGRAPH_CSV1_A_T0_Q
+            };
+            state.texgen[i][j] =
+                (enum VshTexgen)GET_MASK(pgraph_reg_r(pg, reg), masks[j]);
+        }
+    }
+
+    /* Fog */
+    state.fog_enable =
+        pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3) & NV_PGRAPH_CONTROL_3_FOGENABLE;
+    if (state.fog_enable) {
+        /*FIXME: Use CSV0_D? */
+        state.fog_mode = (enum VshFogMode)GET_MASK(
+            pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3), NV_PGRAPH_CONTROL_3_FOG_MODE);
+        state.foggen = (enum VshFoggen)GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CSV0_D),
+                                                NV_PGRAPH_CSV0_D_FOGGENMODE);
+    } else {
+        /* FIXME: Do we still pass the fogmode? */
+        state.fog_mode = (enum VshFogMode)0;
+        state.foggen = (enum VshFoggen)0;
+    }
+
+    /* Texture matrices */
+    for (int i = 0; i < 4; i++) {
+        state.texture_matrix_enable[i] = pg->texture_matrix_enable[i];
+    }
+
+    /* Lighting */
+    if (state.lighting) {
+        for (int i = 0; i < NV2A_MAX_LIGHTS; i++) {
+            state.light[i] = (enum VshLight)GET_MASK(
+                pgraph_reg_r(pg, NV_PGRAPH_CSV0_D), NV_PGRAPH_CSV0_D_LIGHT0 << (i * 2));
+        }
+    }
+
+    /* Copy content of enabled combiner stages */
+    int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
+    for (int i = 0; i < num_stages; i++) {
+        state.psh.rgb_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4);
+        state.psh.rgb_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4);
+        state.psh.alpha_inputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4);
+        state.psh.alpha_outputs[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4);
+        // constant_0[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
+        // constant_1[i] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
+    }
+
+    for (int i = 0; i < 4; i++) {
+        for (int j = 0; j < 4; j++) {
+            state.psh.compare_mode[i][j] =
+                (pgraph_reg_r(pg, NV_PGRAPH_SHADERCLIPMODE) >> (4 * i + j)) & 1;
+        }
+
+        uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i * 4);
+        bool enabled = pgraph_is_texture_stage_active(pg, i) &&
+                       (ctl_0 & NV_PGRAPH_TEXCTL0_0_ENABLE);
+        if (!enabled) {
+            continue;
+        }
+
+        state.psh.alphakill[i] = ctl_0 & NV_PGRAPH_TEXCTL0_0_ALPHAKILLEN;
+
+        uint32_t tex_fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i * 4);
+        state.psh.dim_tex[i] = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY);
+
+        unsigned int color_format = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_COLOR);
+        BasicColorFormatInfo f = kelvin_color_format_info_map[color_format];
+        state.psh.rect_tex[i] = f.linear;
+        state.psh.tex_x8y24[i] = color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED ||
+                                color_format == NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT;
+
+        uint32_t border_source =
+            GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
+        bool cubemap = GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
+        state.psh.border_logical_size[i][0] = 0.0f;
+        state.psh.border_logical_size[i][1] = 0.0f;
+        state.psh.border_logical_size[i][2] = 0.0f;
+        if (border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR) {
+            if (!f.linear && !cubemap) {
+                // The actual texture will be (at least) double the reported
+                // size and shifted by a 4 texel border but texture coordinates
+                // will still be relative to the reported size.
+                unsigned int reported_width =
+                    1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
+                unsigned int reported_height =
+                    1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
+                unsigned int reported_depth =
+                    1 << GET_MASK(tex_fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
+
+                state.psh.border_logical_size[i][0] = reported_width;
+                state.psh.border_logical_size[i][1] = reported_height;
+                state.psh.border_logical_size[i][2] = reported_depth;
+
+                if (reported_width < 8) {
+                    state.psh.border_inv_real_size[i][0] = 0.0625f;
+                } else {
+                    state.psh.border_inv_real_size[i][0] =
+                        1.0f / (reported_width * 2.0f);
+                }
+                if (reported_height < 8) {
+                    state.psh.border_inv_real_size[i][1] = 0.0625f;
+                } else {
+                    state.psh.border_inv_real_size[i][1] =
+                        1.0f / (reported_height * 2.0f);
+                }
+                if (reported_depth < 8) {
+                    state.psh.border_inv_real_size[i][2] = 0.0625f;
+                } else {
+                    state.psh.border_inv_real_size[i][2] =
+                        1.0f / (reported_depth * 2.0f);
+                }
+            } else {
+                NV2A_UNIMPLEMENTED(
+                    "Border source texture with linear %d cubemap %d", f.linear,
+                    cubemap);
+            }
+        }
+
+        /* Keep track of whether texture data has been loaded as signed
+         * normalized integers or not. This dictates whether or not we will need
+         * to re-map in fragment shader for certain texture modes (e.g.
+         * bumpenvmap).
+         *
+         * FIXME: When signed texture data is loaded as unsigned and remapped in
+         * fragment shader, there may be interpolation artifacts. Fix this to
+         * support signed textures more appropriately.
+         */
+#if 0 // FIXME
+        state.psh.snorm_tex[i] = (f.gl_internal_format == GL_RGB8_SNORM)
+                                 || (f.gl_internal_format == GL_RG8_SNORM);
+#endif
+        state.psh.shadow_map[i] = f.depth;
+
+        uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i * 4);
+        unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
+        enum ConvolutionFilter kernel = CONVOLUTION_FILTER_DISABLED;
+        /* FIXME: We do not distinguish between min and mag when
+         * performing convolution. Just use it if specified for min (common AA
+         * case).
+         */
+        if (min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0) {
+            int k = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL);
+            assert(k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_QUINCUNX ||
+                   k == NV_PGRAPH_TEXFILTER0_CONVOLUTION_KERNEL_GAUSSIAN_3);
+            kernel = (enum ConvolutionFilter)k;
+        }
+
+        state.psh.conv_tex[i] = kernel;
+    }
+
+    return state;
+}
diff --git a/hw/xbox/nv2a/shaders.h b/hw/xbox/nv2a/pgraph/shaders.h
similarity index 56%
rename from hw/xbox/nv2a/shaders.h
rename to hw/xbox/nv2a/pgraph/shaders.h
index 0362da10993..71febe2e2f8 100644
--- a/hw/xbox/nv2a/shaders.h
+++ b/hw/xbox/nv2a/pgraph/shaders.h
@@ -18,17 +18,14 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef HW_NV2A_SHADERS_H
-#define HW_NV2A_SHADERS_H
+#ifndef HW_XBOX_NV2A_PGRAPH_SHADERS_H
+#define HW_XBOX_NV2A_PGRAPH_SHADERS_H
 
-#include "qemu/thread.h"
-#include "qapi/qmp/qstring.h"
-#include "gl/gloffscreen.h"
+#include <stdint.h>
+#include "hw/xbox/nv2a/nv2a_regs.h"
 
-#include "nv2a_regs.h"
 #include "vsh.h"
 #include "psh.h"
-#include "lru.h"
 
 enum ShaderPrimitiveMode {
     PRIM_TYPE_INVALID,
@@ -57,10 +54,14 @@ enum MaterialColorSource {
 };
 
 typedef struct ShaderState {
+    bool vulkan;
+    bool use_push_constants_for_uniform_attrs;
     unsigned int surface_scale_factor;
 
     PshState psh;
     uint16_t compressed_attrs;
+    uint16_t uniform_attrs;
+    uint16_t swizzle_attrs;
 
     bool texture_matrix_enable[4];
     enum VshTexgen texgen[4][4];
@@ -101,61 +102,8 @@ typedef struct ShaderState {
     bool smooth_shading;
 } ShaderState;
 
-typedef struct ShaderBinding {
-    GLuint gl_program;
-    GLenum gl_primitive_mode;
-
-    GLint psh_constant_loc[9][2];
-    GLint alpha_ref_loc;
-
-    GLint bump_mat_loc[NV2A_MAX_TEXTURES];
-    GLint bump_scale_loc[NV2A_MAX_TEXTURES];
-    GLint bump_offset_loc[NV2A_MAX_TEXTURES];
-    GLint tex_scale_loc[NV2A_MAX_TEXTURES];
-
-    GLint surface_size_loc;
-    GLint clip_range_loc;
-
-    GLint vsh_constant_loc[NV2A_VERTEXSHADER_CONSTANTS];
-    uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
-
-    GLint inv_viewport_loc;
-    GLint ltctxa_loc[NV2A_LTCTXA_COUNT];
-    GLint ltctxb_loc[NV2A_LTCTXB_COUNT];
-    GLint ltc1_loc[NV2A_LTC1_COUNT];
-
-    GLint fog_color_loc;
-    GLint fog_param_loc[2];
-    GLint light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
-    GLint light_infinite_direction_loc[NV2A_MAX_LIGHTS];
-    GLint light_local_position_loc[NV2A_MAX_LIGHTS];
-    GLint light_local_attenuation_loc[NV2A_MAX_LIGHTS];
-
-    GLint clip_region_loc[8];
-
-    GLint material_alpha_loc;
-} ShaderBinding;
-
-typedef struct ShaderLruNode {
-    LruNode node;
-    bool cached;
-    void *program;
-    size_t program_size;
-    GLenum program_format;
-    ShaderState state;
-    ShaderBinding *binding;
-    QemuThread *save_thread;
-} ShaderLruNode;
-
 typedef struct PGRAPHState PGRAPHState;
 
-GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode);
-void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state);
-ShaderBinding *generate_shaders(const ShaderState *state);
-
-void shader_cache_init(PGRAPHState *pg);
-void shader_write_cache_reload_list(PGRAPHState *pg);
-bool shader_load_from_memory(ShaderLruNode *snode);
-void shader_cache_to_disk(ShaderLruNode *snode);
+ShaderState pgraph_get_shader_state(PGRAPHState *pg);
 
 #endif
diff --git a/hw/xbox/nv2a/pgraph/surface.h b/hw/xbox/nv2a/pgraph/surface.h
new file mode 100644
index 00000000000..d51bc04ea4f
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/surface.h
@@ -0,0 +1,35 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_SURFACE_H
+#define HW_XBOX_NV2A_PGRAPH_SURFACE_H
+
+typedef struct SurfaceShape {
+    unsigned int z_format;
+    unsigned int color_format;
+    unsigned int zeta_format;
+    unsigned int log_width, log_height;
+    unsigned int clip_x, clip_y;
+    unsigned int clip_width, clip_height;
+    unsigned int anti_aliasing;
+} SurfaceShape;
+
+#endif
diff --git a/hw/xbox/nv2a/swizzle.c b/hw/xbox/nv2a/pgraph/swizzle.c
similarity index 100%
rename from hw/xbox/nv2a/swizzle.c
rename to hw/xbox/nv2a/pgraph/swizzle.c
diff --git a/hw/xbox/nv2a/swizzle.h b/hw/xbox/nv2a/pgraph/swizzle.h
similarity index 94%
rename from hw/xbox/nv2a/swizzle.h
rename to hw/xbox/nv2a/pgraph/swizzle.h
index 21889b39cf2..78ff0740a44 100644
--- a/hw/xbox/nv2a/swizzle.h
+++ b/hw/xbox/nv2a/pgraph/swizzle.h
@@ -18,8 +18,10 @@
  * License along with this library; if not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef HW_XBOX_SWIZZLE_H
-#define HW_XBOX_SWIZZLE_H
+#ifndef HW_XBOX_NV2A_PGRAPH_SWIZZLE_H
+#define HW_XBOX_NV2A_PGRAPH_SWIZZLE_H
+
+#include <stdint.h>
 
 void swizzle_box(
     const uint8_t *src_buf,
diff --git a/hw/xbox/nv2a/pgraph/texture.c b/hw/xbox/nv2a/pgraph/texture.c
new file mode 100644
index 00000000000..e5350ea8d48
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/texture.c
@@ -0,0 +1,405 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "texture.h"
+#include "util.h"
+
+const BasicColorFormatInfo kelvin_color_format_info_map[66] = {
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = { 1, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = { 1, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = { 2, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = { 2, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = { 2, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = { 2, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = { 4, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = { 4, false },
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = { 1, false },
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = { 4, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = { 4, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = { 4, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = { 2, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = { 2, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = { 4, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = { 1, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = { 2, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = { 1, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = { 2, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = { 1, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = { 2, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = { 2, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = { 4, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = { 1, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = { 2, true },
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = { 2, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = { 2, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = { 2, false },
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = { 2, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = { 2, true },
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = { 2, false, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = { 4, true,
+                                                                     true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = { 4, true,
+                                                                     true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = { 2, true,
+                                                                  true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = { 2, true,
+                                                                  true },
+
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = { 2, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = { 4, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = { 4, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = { 4, false },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = { 4, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = { 4, true },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = { 4, true },
+};
+
+hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    int i = texture_idx;
+
+    uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4);
+    unsigned int dma_select =
+        GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CONTEXT_DMA);
+
+    hwaddr offset = pgraph_reg_r(pg, NV_PGRAPH_TEXOFFSET0 + i*4);
+
+    hwaddr dma_len;
+    uint8_t *texture_data;
+    if (dma_select) {
+        texture_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &dma_len);
+    } else {
+        texture_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &dma_len);
+    }
+    assert(offset < dma_len);
+    texture_data += offset;
+
+    return texture_data - d->vram_ptr;
+}
+
+hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    int i = texture_idx;
+
+    uint32_t palette = pgraph_reg_r(pg, NV_PGRAPH_TEXPALETTE0 + i*4);
+    bool palette_dma_select =
+        GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_CONTEXT_DMA);
+    unsigned int palette_length_index =
+        GET_MASK(palette, NV_PGRAPH_TEXPALETTE0_LENGTH);
+    unsigned int palette_offset =
+        palette & NV_PGRAPH_TEXPALETTE0_OFFSET;
+
+    unsigned int palette_length = 0;
+    switch (palette_length_index) {
+    case NV_PGRAPH_TEXPALETTE0_LENGTH_256: palette_length = 256; break;
+    case NV_PGRAPH_TEXPALETTE0_LENGTH_128: palette_length = 128; break;
+    case NV_PGRAPH_TEXPALETTE0_LENGTH_64: palette_length = 64; break;
+    case NV_PGRAPH_TEXPALETTE0_LENGTH_32: palette_length = 32; break;
+    default: assert(false); break;
+    }
+    if (length) {
+        *length = palette_length;
+    }
+
+    hwaddr palette_dma_len;
+    uint8_t *palette_data;
+    if (palette_dma_select) {
+        palette_data = (uint8_t*)nv_dma_map(d, pg->dma_b, &palette_dma_len);
+    } else {
+        palette_data = (uint8_t*)nv_dma_map(d, pg->dma_a, &palette_dma_len);
+    }
+    assert(palette_offset < palette_dma_len);
+    palette_data += palette_offset;
+
+    return palette_data - d->vram_ptr;
+}
+
+size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape)
+{
+    BasicColorFormatInfo f = kelvin_color_format_info_map[shape->color_format];
+    size_t length = 0;
+
+    if (f.linear) {
+        assert(shape->cubemap == false);
+        assert(shape->dimensionality == 2);
+        length = shape->height * shape->pitch;
+    } else {
+        if (shape->dimensionality >= 2) {
+            unsigned int w = shape->width, h = shape->height;
+            int level;
+            if (!pgraph_is_texture_format_compressed(pg, shape->color_format)) {
+                for (level = 0; level < shape->levels; level++) {
+                    w = MAX(w, 1);
+                    h = MAX(h, 1);
+                    length += w * h * f.bytes_per_pixel;
+                    w /= 2;
+                    h /= 2;
+                }
+            } else {
+                /* Compressed textures are a bit different */
+                unsigned int block_size =
+                    shape->color_format ==
+                            NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ?
+                        8 : 16;
+                for (level = 0; level < shape->levels; level++) {
+                    w = MAX(w, 1);
+                    h = MAX(h, 1);
+                    unsigned int phys_w = (w + 3) & ~3,
+                                 phys_h = (h + 3) & ~3;
+                    length += phys_w/4 * phys_h/4 * block_size;
+                    w /= 2;
+                    h /= 2;
+                }
+            }
+            if (shape->cubemap) {
+                assert(shape->dimensionality == 2);
+                length = (length + NV2A_CUBEMAP_FACE_ALIGNMENT - 1) & ~(NV2A_CUBEMAP_FACE_ALIGNMENT - 1);
+                length *= 6;
+            }
+            if (shape->dimensionality >= 3) {
+                length *= shape->depth;
+            }
+        }
+    }
+
+    return length;
+}
+
+TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx)
+{
+    int i = texture_idx;
+
+    uint32_t ctl_0 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL0_0 + i*4);
+    uint32_t ctl_1 = pgraph_reg_r(pg, NV_PGRAPH_TEXCTL1_0 + i*4);
+    uint32_t fmt = pgraph_reg_r(pg, NV_PGRAPH_TEXFMT0 + i*4);
+
+#if DEBUG_NV2A
+    uint32_t filter = pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + i*4);
+    uint32_t address = pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + i*4);
+#endif
+
+    unsigned int min_mipmap_level =
+        GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MIN_LOD_CLAMP);
+    unsigned int max_mipmap_level =
+        GET_MASK(ctl_0, NV_PGRAPH_TEXCTL0_0_MAX_LOD_CLAMP);
+
+    unsigned int pitch =
+        GET_MASK(ctl_1, NV_PGRAPH_TEXCTL1_0_IMAGE_PITCH);
+
+    bool cubemap =
+        GET_MASK(fmt, NV_PGRAPH_TEXFMT0_CUBEMAPENABLE);
+    unsigned int dimensionality =
+        GET_MASK(fmt, NV_PGRAPH_TEXFMT0_DIMENSIONALITY);
+
+    int tex_mode = (pgraph_reg_r(pg, NV_PGRAPH_SHADERPROG) >> (texture_idx * 5)) & 0x1F;
+    if (tex_mode == 0x02) {
+        assert(pgraph_is_texture_enabled(pg, texture_idx));
+        // assert(state.dimensionality == 3);
+
+        // OVERRIDE
+        // dimensionality = 3;
+    }
+
+    unsigned int color_format = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_COLOR);
+    unsigned int levels = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_MIPMAP_LEVELS);
+    unsigned int log_width = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_U);
+    unsigned int log_height = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_V);
+    unsigned int log_depth = GET_MASK(fmt, NV_PGRAPH_TEXFMT0_BASE_SIZE_P);
+
+    unsigned int rect_width =
+        GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4),
+                 NV_PGRAPH_TEXIMAGERECT0_WIDTH);
+    unsigned int rect_height =
+        GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_TEXIMAGERECT0 + i*4),
+                 NV_PGRAPH_TEXIMAGERECT0_HEIGHT);
+#ifdef DEBUG_NV2A
+    unsigned int lod_bias =
+        GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIPMAP_LOD_BIAS);
+#endif
+    unsigned int border_source = GET_MASK(fmt,
+                                          NV_PGRAPH_TEXFMT0_BORDER_SOURCE);
+
+    NV2A_DPRINTF(" texture %d is format 0x%x, "
+                    "off 0x%" HWADDR_PRIx " (r %d, %d or %d, %d, %d; %d%s),"
+                    " filter %x %x, levels %d-%d %d bias %d\n",
+                 i, color_format, address,
+                 rect_width, rect_height,
+                 1 << log_width, 1 << log_height, 1 << log_depth,
+                 pitch,
+                 cubemap ? "; cubemap" : "",
+                 GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN),
+                 GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG),
+                 min_mipmap_level, max_mipmap_level, levels,
+                 lod_bias);
+
+    assert(color_format < ARRAY_SIZE(kelvin_color_format_info_map));
+    BasicColorFormatInfo f = kelvin_color_format_info_map[color_format];
+    if (f.bytes_per_pixel == 0) {
+        fprintf(stderr, "nv2a: unimplemented texture color format 0x%x\n",
+                color_format);
+        abort();
+    }
+
+    unsigned int width, height, depth;
+    if (f.linear) {
+        assert(dimensionality == 2);
+        width = rect_width;
+        height = rect_height;
+        depth = 1;
+    } else {
+        width = 1 << log_width;
+        height = 1 << log_height;
+        depth = 1 << log_depth;
+        pitch = 0;
+
+        levels = MIN(levels, max_mipmap_level + 1);
+
+        /* Discard mipmap levels that would be smaller than 1x1.
+         * FIXME: Is this actually needed?
+         *
+         * >> Level 0: 32 x 4
+         *    Level 1: 16 x 2
+         *    Level 2: 8 x 1
+         *    Level 3: 4 x 1
+         *    Level 4: 2 x 1
+         *    Level 5: 1 x 1
+         */
+        levels = MIN(levels, MAX(log_width, log_height) + 1);
+        assert(levels > 0);
+
+        if (dimensionality == 3) {
+            /* FIXME: What about 3D mipmaps? */
+            if (log_width < 2 || log_height < 2) {
+                /* Base level is smaller than 4x4... */
+                levels = 1;
+            } else {
+                levels = MIN(levels, MIN(log_width, log_height) - 1);
+            }
+        }
+        min_mipmap_level = MIN(levels-1, min_mipmap_level);
+        max_mipmap_level = MIN(levels-1, max_mipmap_level);
+    }
+
+    TextureShape shape;
+
+    // We will hash it, so make sure any padding is zero
+    memset(&shape, 0, sizeof(shape));
+
+    shape.cubemap = cubemap;
+    shape.dimensionality = dimensionality;
+    shape.color_format = color_format;
+    shape.levels = levels;
+    shape.width = width;
+    shape.height = height;
+    shape.depth = depth;
+    shape.min_mipmap_level = min_mipmap_level;
+    shape.max_mipmap_level = max_mipmap_level;
+    shape.pitch = pitch;
+    shape.border = border_source != NV_PGRAPH_TEXFMT0_BORDER_SOURCE_COLOR;
+    return shape;
+}
+
+uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data,
+                                     const uint8_t *palette_data,
+                                     unsigned int width, unsigned int height,
+                                     unsigned int depth, unsigned int row_pitch,
+                                     unsigned int slice_pitch,
+                                     size_t *converted_size)
+{
+    size_t size = 0;
+    uint8_t *converted_data;
+
+    if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8) {
+        size = width * height * depth * 4;
+        converted_data = g_malloc(size);
+        const uint8_t *src = data;
+        uint32_t *dst = (uint32_t *)converted_data;
+        for (int z = 0; z < depth; z++) {
+            for (int y = 0; y < height; y++) {
+                for (int x = 0; x < width; x++) {
+                    uint8_t index = src[y * row_pitch + x];
+                    uint32_t color = *(uint32_t *)(palette_data + index * 4);
+                    *dst++ = color;
+                }
+            }
+            src += slice_pitch;
+        }
+    } else if (s.color_format ==
+                   NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8 ||
+               s.color_format ==
+                   NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8) {
+        // TODO: Investigate whether a non-1 depth is possible.
+        // Generally the hardware asserts when attempting to use volumetric
+        // textures in linear formats.
+        assert(depth == 1); /* FIXME */
+        // FIXME: only valid if control0 register allows for colorspace
+        // conversion
+        size = width * height * 4;
+        converted_data = g_malloc(size);
+        uint8_t *pixel = converted_data;
+        for (int y = 0; y < height; y++) {
+            const uint8_t *line = &data[y * row_pitch * depth];
+            for (int x = 0; x < width; x++, pixel += 4) {
+                if (s.color_format ==
+                    NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8) {
+                    convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1],
+                                        &pixel[2]);
+                } else {
+                    convert_uyvy_to_rgb(line, x, &pixel[0], &pixel[1],
+                                        &pixel[2]);
+                }
+                pixel[3] = 255;
+            }
+        }
+    } else if (s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5) {
+        assert(depth == 1); /* FIXME */
+        size = width * height * 3;
+        converted_data = g_malloc(size);
+        for (int y = 0; y < height; y++) {
+            for (int x = 0; x < width; x++) {
+                uint16_t rgb655 = *(uint16_t *)(data + y * row_pitch + x * 2);
+                int8_t *pixel = (int8_t *)&converted_data[(y * width + x) * 3];
+                /* Maps 5 bit G and B signed value range to 8 bit
+                 * signed values. R is probably unsigned.
+                 */
+                rgb655 ^= (1 << 9) | (1 << 4);
+                pixel[0] = ((rgb655 & 0xFC00) >> 10) * 0x7F / 0x3F;
+                pixel[1] = ((rgb655 & 0x03E0) >> 5) * 0xFF / 0x1F - 0x80;
+                pixel[2] = (rgb655 & 0x001F) * 0xFF / 0x1F - 0x80;
+            }
+        }
+    } else {
+        return NULL;
+    }
+
+    if (converted_size) {
+        *converted_size = size;
+    }
+    return converted_data;
+}
diff --git a/hw/xbox/nv2a/pgraph/texture.h b/hw/xbox/nv2a/pgraph/texture.h
new file mode 100644
index 00000000000..4c9818ca3cc
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/texture.h
@@ -0,0 +1,67 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_TEXTURE_H
+#define HW_XBOX_NV2A_PGRAPH_TEXTURE_H
+
+#include "qemu/osdep.h"
+#include "cpu.h"
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include "hw/xbox/nv2a/nv2a_regs.h"
+
+typedef struct PGRAPHState PGRAPHState;
+
+typedef struct TextureShape {
+    bool cubemap;
+    unsigned int dimensionality;
+    unsigned int color_format;
+    unsigned int levels;
+    unsigned int width, height, depth;
+    bool border;
+
+    unsigned int min_mipmap_level, max_mipmap_level;
+    unsigned int pitch;
+} TextureShape;
+
+typedef struct BasicColorFormatInfo {
+    unsigned int bytes_per_pixel;
+    bool linear;
+    bool depth;
+} BasicColorFormatInfo;
+
+extern const BasicColorFormatInfo kelvin_color_format_info_map[66];
+
+uint8_t *pgraph_convert_texture_data(const TextureShape s, const uint8_t *data,
+                                     const uint8_t *palette_data,
+                                     unsigned int width, unsigned int height,
+                                     unsigned int depth, unsigned int row_pitch,
+                                     unsigned int slice_pitch,
+                                     size_t *converted_size);
+
+hwaddr pgraph_get_texture_phys_addr(PGRAPHState *pg, int texture_idx);
+hwaddr pgraph_get_texture_palette_phys_addr_length(PGRAPHState *pg, int texture_idx, size_t *length);
+TextureShape pgraph_get_texture_shape(PGRAPHState *pg, int texture_idx);
+size_t pgraph_get_texture_length(PGRAPHState *pg, TextureShape *shape);
+
+#endif
diff --git a/hw/xbox/nv2a/gl/gloffscreen_common.c b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/common.c
similarity index 100%
rename from hw/xbox/nv2a/gl/gloffscreen_common.c
rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/common.c
diff --git a/hw/xbox/nv2a/gl/gloffscreen.h b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/gloffscreen.h
similarity index 100%
rename from hw/xbox/nv2a/gl/gloffscreen.h
rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/gloffscreen.h
diff --git a/hw/xbox/nv2a/gl/gloffscreen_sdl.c b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c
similarity index 98%
rename from hw/xbox/nv2a/gl/gloffscreen_sdl.c
rename to hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c
index 2221067ddd4..277694cc509 100644
--- a/hw/xbox/nv2a/gl/gloffscreen_sdl.c
+++ b/hw/xbox/nv2a/pgraph/thirdparty/gloffscreen/sdl.c
@@ -1,7 +1,7 @@
 /*
  *  Offscreen OpenGL abstraction layer -- SDL based
  *
- *  Copyright (c) 2018-2021 Matt Borgerson
+ *  Copyright (c) 2018-2024 Matt Borgerson
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
diff --git a/hw/xbox/nv2a/thirdparty/meson.build b/hw/xbox/nv2a/pgraph/thirdparty/meson.build
similarity index 62%
rename from hw/xbox/nv2a/thirdparty/meson.build
rename to hw/xbox/nv2a/pgraph/thirdparty/meson.build
index ec4068a77c6..d0139f17639 100644
--- a/hw/xbox/nv2a/thirdparty/meson.build
+++ b/hw/xbox/nv2a/pgraph/thirdparty/meson.build
@@ -10,3 +10,9 @@ libnv2a_vsh_cpu = static_library('nv2a_vsh_cpu',
                           include_directories: ['.', 'nv2a_vsh_cpu/src'])
 nv2a_vsh_cpu = declare_dependency(link_with: libnv2a_vsh_cpu,
                            include_directories: ['nv2a_vsh_cpu/src'])
+
+libgloffscreen = static_library('libgloffscreen',
+                          sources: files('gloffscreen/common.c', 'gloffscreen/sdl.c'),
+                          dependencies: sdl)
+gloffscreen = declare_dependency(link_with: libgloffscreen,
+                           include_directories: ['gloffscreen'])
diff --git a/hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu b/hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu
similarity index 100%
rename from hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu
rename to hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu
diff --git a/hw/xbox/nv2a/pgraph/util.h b/hw/xbox/nv2a/pgraph/util.h
new file mode 100644
index 00000000000..c8a28d3c0d8
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/util.h
@@ -0,0 +1,86 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_UTIL_H
+#define HW_XBOX_NV2A_PGRAPH_UTIL_H
+
+static const float f16_max = 511.9375f;
+static const float f24_max = 1.0E30;
+
+/* 16 bit to [0.0, F16_MAX = 511.9375] */
+static inline 
+float convert_f16_to_float(uint16_t f16) {
+    if (f16 == 0x0000) { return 0.0; }
+    uint32_t i = (f16 << 11) + 0x3C000000;
+    return *(float*)&i;
+}
+
+/* 24 bit to [0.0, F24_MAX] */
+static inline 
+float convert_f24_to_float(uint32_t f24) {
+    assert(!(f24 >> 24));
+    f24 &= 0xFFFFFF;
+    if (f24 == 0x000000) { return 0.0; }
+    uint32_t i = f24 << 7;
+    return *(float*)&i;
+}
+
+static inline 
+uint8_t cliptobyte(int x)
+{
+    return (uint8_t)((x < 0) ? 0 : ((x > 255) ? 255 : x));
+}
+
+static inline 
+void convert_yuy2_to_rgb(const uint8_t *line, unsigned int ix,
+                                uint8_t *r, uint8_t *g, uint8_t* b) {
+    int c, d, e;
+    c = (int)line[ix * 2] - 16;
+    if (ix % 2) {
+        d = (int)line[ix * 2 - 1] - 128;
+        e = (int)line[ix * 2 + 1] - 128;
+    } else {
+        d = (int)line[ix * 2 + 1] - 128;
+        e = (int)line[ix * 2 + 3] - 128;
+    }
+    *r = cliptobyte((298 * c + 409 * e + 128) >> 8);
+    *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
+    *b = cliptobyte((298 * c + 516 * d + 128) >> 8);
+}
+
+static inline 
+void convert_uyvy_to_rgb(const uint8_t *line, unsigned int ix,
+                                uint8_t *r, uint8_t *g, uint8_t* b) {
+    int c, d, e;
+    c = (int)line[ix * 2 + 1] - 16;
+    if (ix % 2) {
+        d = (int)line[ix * 2 - 2] - 128;
+        e = (int)line[ix * 2 + 0] - 128;
+    } else {
+        d = (int)line[ix * 2 + 0] - 128;
+        e = (int)line[ix * 2 + 2] - 128;
+    }
+    *r = cliptobyte((298 * c + 409 * e + 128) >> 8);
+    *g = cliptobyte((298 * c - 100 * d - 208 * e + 128) >> 8);
+    *b = cliptobyte((298 * c + 516 * d + 128) >> 8);
+}
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/vertex.c b/hw/xbox/nv2a/pgraph/vertex.c
new file mode 100644
index 00000000000..31076896e7e
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vertex.c
@@ -0,0 +1,151 @@
+/*
+ * QEMU Geforce NV2A implementation
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+
+void pgraph_update_inline_value(VertexAttribute *attr, const uint8_t *data)
+{
+    assert(attr->count <= 4);
+    attr->inline_value[0] = 0.0f;
+    attr->inline_value[1] = 0.0f;
+    attr->inline_value[2] = 0.0f;
+    attr->inline_value[3] = 1.0f;
+
+    switch (attr->format) {
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
+            for (uint32_t i = 0; i < attr->count; ++i) {
+                attr->inline_value[i] = (float)data[i] / 255.0f;
+            }
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1: {
+            const int16_t *val = (const int16_t *) data;
+            for (uint32_t i = 0; i < attr->count; ++i, ++val) {
+                attr->inline_value[i] = MAX(-1.0f, (float) *val / 32767.0f);
+            }
+            break;
+        }
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+            memcpy(attr->inline_value, data, attr->size * attr->count);
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K: {
+            const int16_t *val = (const int16_t *) data;
+            for (uint32_t i = 0; i < attr->count; ++i, ++val) {
+                attr->inline_value[i] = (float)*val;
+            }
+            break;
+        }
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP: {
+            /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
+            const int32_t val = *(const int32_t *)data;
+            int32_t x = val & 0x7FF;
+            if (x & 0x400) {
+                x |= 0xFFFFF800;
+            }
+            int32_t y = (val >> 11) & 0x7FF;
+            if (y & 0x400) {
+                y |= 0xFFFFF800;
+            }
+            int32_t z = (val >> 22) & 0x7FF;
+            if (z & 0x200) {
+                z |= 0xFFFFFC00;
+            }
+
+            attr->inline_value[0] = MAX(-1.0f, (float)x / 1023.0f);
+            attr->inline_value[1] = MAX(-1.0f, (float)y / 1023.0f);
+            attr->inline_value[2] = MAX(-1.0f, (float)z / 511.0f);
+            break;
+        }
+    default:
+        fprintf(stderr, "Unknown vertex attribute type: for format 0x%x\n",
+                attr->format);
+        assert(!"Unsupported attribute type");
+        break;
+    }
+}
+
+void pgraph_get_inline_values(PGRAPHState *pg, uint16_t attrs,
+                               float values[NV2A_VERTEXSHADER_ATTRIBUTES][4],
+                               int *count)
+{
+    int num_attributes = 0;
+
+    for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        if (attrs & (1 << i)) {
+            memcpy(values[num_attributes],
+                   pg->vertex_attributes[i].inline_value, 4 * sizeof(float));
+            num_attributes += 1;
+        }
+    }
+
+    if (count) {
+        *count = num_attributes;
+    }
+}
+
+
+void pgraph_allocate_inline_buffer_vertices(PGRAPHState *pg, unsigned int attr)
+{
+    VertexAttribute *attribute = &pg->vertex_attributes[attr];
+
+    if (attribute->inline_buffer_populated || pg->inline_buffer_length == 0) {
+        return;
+    }
+
+    /* Now upload the previous attribute value */
+    attribute->inline_buffer_populated = true;
+    for (int i = 0; i < pg->inline_buffer_length; i++) {
+        memcpy(&attribute->inline_buffer[i * 4], attribute->inline_value,
+               sizeof(float) * 4);
+    }
+}
+
+void pgraph_finish_inline_buffer_vertex(PGRAPHState *pg)
+{
+    pgraph_check_within_begin_end_block(pg);
+    assert(pg->inline_buffer_length < NV2A_MAX_BATCH_LENGTH);
+
+    for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        VertexAttribute *attribute = &pg->vertex_attributes[i];
+        if (attribute->inline_buffer_populated) {
+            memcpy(&attribute->inline_buffer[pg->inline_buffer_length * 4],
+                   attribute->inline_value, sizeof(float) * 4);
+        }
+    }
+
+    pg->inline_buffer_length++;
+}
+
+void pgraph_reset_inline_buffers(PGRAPHState *pg)
+{
+    pg->inline_elements_length = 0;
+    pg->inline_array_length = 0;
+    pg->inline_buffer_length = 0;
+    pgraph_reset_draw_arrays(pg);
+}
+
+void pgraph_reset_draw_arrays(PGRAPHState *pg)
+{
+    pg->draw_arrays_length = 0;
+    pg->draw_arrays_min_start = -1;
+    pg->draw_arrays_max_count = 0;
+    pg->draw_arrays_prevent_connect = false;
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/blit.c b/hw/xbox/nv2a/pgraph/vk/blit.c
new file mode 100644
index 00000000000..e4529a3c586
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/blit.c
@@ -0,0 +1,177 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * Based on GL implementation:
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "renderer.h"
+
+void pgraph_vk_image_blit(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    ContextSurfaces2DState *context_surfaces = &pg->context_surfaces_2d;
+    ImageBlitState *image_blit = &pg->image_blit;
+    BetaState *beta = &pg->beta;
+
+    pgraph_vk_surface_update(d, false, true, true);
+
+    assert(context_surfaces->object_instance == image_blit->context_surfaces);
+
+    unsigned int bytes_per_pixel;
+    switch (context_surfaces->color_format) {
+        case NV062_SET_COLOR_FORMAT_LE_Y8:
+            bytes_per_pixel = 1;
+            break;
+        case NV062_SET_COLOR_FORMAT_LE_R5G6B5:
+            bytes_per_pixel = 2;
+            break;
+        case NV062_SET_COLOR_FORMAT_LE_A8R8G8B8:
+        case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
+        case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
+        case NV062_SET_COLOR_FORMAT_LE_Y32:
+            bytes_per_pixel = 4;
+            break;
+        default:
+            fprintf(stderr, "Unknown blit surface format: 0x%x\n",
+                    context_surfaces->color_format);
+            assert(false);
+            break;
+    }
+
+    hwaddr source_dma_len, dest_dma_len;
+
+    uint8_t *source = (uint8_t *)nv_dma_map(
+        d, context_surfaces->dma_image_source, &source_dma_len);
+    assert(context_surfaces->source_offset < source_dma_len);
+    source += context_surfaces->source_offset;
+
+    uint8_t *dest = (uint8_t *)nv_dma_map(d, context_surfaces->dma_image_dest,
+                                          &dest_dma_len);
+    assert(context_surfaces->dest_offset < dest_dma_len);
+    dest += context_surfaces->dest_offset;
+
+    hwaddr source_addr = source - d->vram_ptr;
+    hwaddr dest_addr = dest - d->vram_ptr;
+
+    SurfaceBinding *surf_src = pgraph_vk_surface_get(d, source_addr);
+    if (surf_src) {
+        pgraph_vk_surface_download_if_dirty(d, surf_src);
+    }
+
+    SurfaceBinding *surf_dest = pgraph_vk_surface_get(d, dest_addr);
+    if (surf_dest) {
+        if (image_blit->height < surf_dest->height ||
+            image_blit->width < surf_dest->width) {
+            pgraph_vk_surface_download_if_dirty(d, surf_dest);
+        } else {
+            // The blit will completely replace the surface so any pending
+            // download should be discarded.
+            surf_dest->download_pending = false;
+            surf_dest->draw_dirty = false;
+        }
+        surf_dest->upload_pending = true;
+        pg->draw_time++;
+    }
+
+    hwaddr source_offset = image_blit->in_y * context_surfaces->source_pitch +
+                           image_blit->in_x * bytes_per_pixel;
+    hwaddr dest_offset = image_blit->out_y * context_surfaces->dest_pitch +
+                         image_blit->out_x * bytes_per_pixel;
+
+    hwaddr source_size =
+        (image_blit->height - 1) * context_surfaces->source_pitch +
+        image_blit->width * bytes_per_pixel;
+    hwaddr dest_size = (image_blit->height - 1) * context_surfaces->dest_pitch +
+                       image_blit->width * bytes_per_pixel;
+
+    /* FIXME: What does hardware do in this case? */
+    assert(source_addr + source_offset + source_size <=
+           memory_region_size(d->vram));
+    assert(dest_addr + dest_offset + dest_size <= memory_region_size(d->vram));
+
+    uint8_t *source_row = source + source_offset;
+    uint8_t *dest_row = dest + dest_offset;
+
+    if (image_blit->operation == NV09F_SET_OPERATION_SRCCOPY) {
+        // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_SRCCOPY");
+        for (unsigned int y = 0; y < image_blit->height; y++) {
+            memmove(dest_row, source_row, image_blit->width * bytes_per_pixel);
+            source_row += context_surfaces->source_pitch;
+            dest_row += context_surfaces->dest_pitch;
+        }
+    } else if (image_blit->operation == NV09F_SET_OPERATION_BLEND_AND) {
+        // NV2A_GL_DPRINTF(false, "NV09F_SET_OPERATION_BLEND_AND");
+        uint32_t max_beta_mult = 0x7f80;
+        uint32_t beta_mult = beta->beta >> 16;
+        uint32_t inv_beta_mult = max_beta_mult - beta_mult;
+        for (unsigned int y = 0; y < image_blit->height; y++) {
+            for (unsigned int x = 0; x < image_blit->width; x++) {
+                for (unsigned int ch = 0; ch < 3; ch++) {
+                    uint32_t a = source_row[x * 4 + ch] * beta_mult;
+                    uint32_t b = dest_row[x * 4 + ch] * inv_beta_mult;
+                    dest_row[x * 4 + ch] = (a + b) / max_beta_mult;
+                }
+            }
+            source_row += context_surfaces->source_pitch;
+            dest_row += context_surfaces->dest_pitch;
+        }
+    } else {
+        fprintf(stderr, "Unknown blit operation: 0x%x\n",
+                image_blit->operation);
+        assert(false && "Unknown blit operation");
+    }
+
+    NV2A_DPRINTF("  - 0x%tx -> 0x%tx\n", source_addr, dest_addr);
+
+    bool needs_alpha_patching;
+    uint8_t alpha_override;
+    switch (context_surfaces->color_format) {
+    case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8:
+        needs_alpha_patching = true;
+        alpha_override = 0xff;
+        break;
+    case NV062_SET_COLOR_FORMAT_LE_X8R8G8B8_Z8R8G8B8:
+        needs_alpha_patching = true;
+        alpha_override = 0;
+        break;
+    default:
+        needs_alpha_patching = false;
+        alpha_override = 0;
+    }
+
+    if (needs_alpha_patching) {
+        dest_row = dest + dest_offset;
+        for (unsigned int y = 0; y < image_blit->height; y++) {
+            for (unsigned int x = 0; x < image_blit->width; x++) {
+                dest_row[x * 4 + 3] = alpha_override;
+            }
+            dest_row += context_surfaces->dest_pitch;
+        }
+    }
+
+    dest_addr += dest_offset;
+    memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
+                                   DIRTY_MEMORY_VGA);
+    memory_region_set_client_dirty(d->vram, dest_addr, dest_size,
+                                   DIRTY_MEMORY_NV2A_TEX);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/buffer.c b/hw/xbox/nv2a/pgraph/vk/buffer.c
new file mode 100644
index 00000000000..93458b254ac
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/buffer.c
@@ -0,0 +1,208 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "renderer.h"
+#include <vulkan/vulkan_core.h>
+
+static void create_buffer(PGRAPHState *pg, StorageBuffer *buffer)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkBufferCreateInfo buffer_create_info = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
+        .size = buffer->buffer_size,
+        .usage = buffer->usage,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+    };
+    VK_CHECK(vmaCreateBuffer(r->allocator, &buffer_create_info,
+                             &buffer->alloc_info, &buffer->buffer,
+                             &buffer->allocation, NULL));
+}
+
+static void destroy_buffer(PGRAPHState *pg, StorageBuffer *buffer)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vmaDestroyBuffer(r->allocator, buffer->buffer, buffer->allocation);
+    buffer->buffer = VK_NULL_HANDLE;
+    buffer->allocation = VK_NULL_HANDLE;
+}
+
+void pgraph_vk_init_buffers(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    // FIXME: Profile buffer sizes
+
+    VmaAllocationCreateInfo host_alloc_create_info = {
+        .usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST,
+        .flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT |
+                 VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT,
+    };
+    VmaAllocationCreateInfo device_alloc_create_info = {
+        .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
+        .flags = VMA_ALLOCATION_CREATE_DEDICATED_MEMORY_BIT
+    };
+
+    r->storage_buffers[BUFFER_STAGING_DST] = (StorageBuffer){
+        .alloc_info = host_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT,
+        .buffer_size = 4096 * 4096 * 4,
+    };
+
+    r->storage_buffers[BUFFER_STAGING_SRC] = (StorageBuffer){
+        .alloc_info = host_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+        .buffer_size = r->storage_buffers[BUFFER_STAGING_DST].buffer_size,
+    };
+
+    r->storage_buffers[BUFFER_COMPUTE_DST] = (StorageBuffer){
+        .alloc_info = device_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+                 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+        .buffer_size = (1024 * 10) * (1024 * 10) * 8,
+    };
+
+    r->storage_buffers[BUFFER_COMPUTE_SRC] = (StorageBuffer){
+        .alloc_info = device_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT |
+                 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT,
+        .buffer_size = r->storage_buffers[BUFFER_COMPUTE_DST].buffer_size,
+    };
+
+    r->storage_buffers[BUFFER_INDEX] = (StorageBuffer){
+        .alloc_info = device_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+                 VK_BUFFER_USAGE_INDEX_BUFFER_BIT,
+        .buffer_size = sizeof(pg->inline_elements) * 100,
+    };
+
+    r->storage_buffers[BUFFER_INDEX_STAGING] = (StorageBuffer){
+        .alloc_info = host_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+        .buffer_size = r->storage_buffers[BUFFER_INDEX].buffer_size,
+    };
+
+    // FIXME: Don't assume that we can render with host mapped buffer
+    r->storage_buffers[BUFFER_VERTEX_RAM] = (StorageBuffer){
+        .alloc_info = host_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+        .buffer_size = memory_region_size(d->vram),
+    };
+
+    r->bitmap_size = memory_region_size(d->vram) / 4096;
+    r->uploaded_bitmap = bitmap_new(r->bitmap_size);
+    bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size);
+
+    r->storage_buffers[BUFFER_VERTEX_INLINE] = (StorageBuffer){
+        .alloc_info = device_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+                 VK_BUFFER_USAGE_VERTEX_BUFFER_BIT,
+        .buffer_size = NV2A_VERTEXSHADER_ATTRIBUTES * NV2A_MAX_BATCH_LENGTH *
+                       4 * sizeof(float) * 10,
+    };
+
+    r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING] = (StorageBuffer){
+        .alloc_info = host_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+        .buffer_size = r->storage_buffers[BUFFER_VERTEX_INLINE].buffer_size,
+    };
+
+    r->storage_buffers[BUFFER_UNIFORM] = (StorageBuffer){
+        .alloc_info = device_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT |
+                 VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
+        .buffer_size = 8 * 1024 * 1024,
+    };
+
+    r->storage_buffers[BUFFER_UNIFORM_STAGING] = (StorageBuffer){
+        .alloc_info = host_alloc_create_info,
+        .usage = VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
+        .buffer_size = r->storage_buffers[BUFFER_UNIFORM].buffer_size,
+    };
+
+    for (int i = 0; i < BUFFER_COUNT; i++) {
+        create_buffer(pg, &r->storage_buffers[i]);
+    }
+
+    // FIXME: Add fallback path for device using host mapped memory
+
+    int buffers_to_map[] = { BUFFER_VERTEX_RAM,
+                             BUFFER_INDEX_STAGING,
+                             BUFFER_VERTEX_INLINE_STAGING,
+                             BUFFER_UNIFORM_STAGING };
+
+    for (int i = 0; i < ARRAY_SIZE(buffers_to_map); i++) {
+        VK_CHECK(vmaMapMemory(
+            r->allocator, r->storage_buffers[buffers_to_map[i]].allocation,
+            (void **)&r->storage_buffers[buffers_to_map[i]].mapped));
+    }
+}
+
+void pgraph_vk_finalize_buffers(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    for (int i = 0; i < BUFFER_COUNT; i++) {
+        if (r->storage_buffers[i].mapped) {
+            vmaUnmapMemory(r->allocator, r->storage_buffers[i].allocation);
+        }
+        destroy_buffer(pg, &r->storage_buffers[i]);
+    }
+
+    g_free(r->uploaded_bitmap);
+    r->uploaded_bitmap = NULL;
+}
+
+bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index,
+                                    VkDeviceSize size,
+                                    VkDeviceAddress alignment)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    StorageBuffer *b = &r->storage_buffers[index];
+    return (ROUND_UP(b->buffer_offset, alignment) + size) <= b->buffer_size;
+}
+
+VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data,
+                                        VkDeviceSize *sizes, size_t count,
+                                        VkDeviceAddress alignment)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkDeviceSize total_size = 0;
+    for (int i = 0; i < count; i++) {
+        total_size += sizes[i];
+    }
+    assert(pgraph_vk_buffer_has_space_for(pg, index, total_size, alignment));
+
+    StorageBuffer *b = &r->storage_buffers[index];
+    VkDeviceSize starting_offset = ROUND_UP(b->buffer_offset, alignment);
+
+    assert(b->mapped);
+
+    for (int i = 0; i < count; i++) {
+        b->buffer_offset = ROUND_UP(b->buffer_offset, alignment);
+        memcpy(b->mapped + b->buffer_offset, data[i], sizes[i]);
+        b->buffer_offset += sizes[i];
+    }
+
+    return starting_offset;
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/command.c b/hw/xbox/nv2a/pgraph/vk/command.c
new file mode 100644
index 00000000000..0e9fc9a2ee1
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/command.c
@@ -0,0 +1,119 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "renderer.h"
+
+static void create_command_pool(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    QueueFamilyIndices indices =
+        pgraph_vk_find_queue_families(r->physical_device);
+
+    VkCommandPoolCreateInfo create_info = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO,
+        .flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT,
+        .queueFamilyIndex = indices.queue_family,
+    };
+    VK_CHECK(
+        vkCreateCommandPool(r->device, &create_info, NULL, &r->command_pool));
+}
+
+static void destroy_command_pool(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkDestroyCommandPool(r->device, r->command_pool, NULL);
+}
+
+static void create_command_buffers(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkCommandBufferAllocateInfo alloc_info = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,
+        .commandPool = r->command_pool,
+        .level = VK_COMMAND_BUFFER_LEVEL_PRIMARY,
+        .commandBufferCount = ARRAY_SIZE(r->command_buffers),
+    };
+    VK_CHECK(
+        vkAllocateCommandBuffers(r->device, &alloc_info, r->command_buffers));
+
+    r->command_buffer = r->command_buffers[0];
+    r->aux_command_buffer = r->command_buffers[1];
+}
+
+static void destroy_command_buffers(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkFreeCommandBuffers(r->device, r->command_pool,
+                         ARRAY_SIZE(r->command_buffers), r->command_buffers);
+
+    r->command_buffer = VK_NULL_HANDLE;
+    r->aux_command_buffer = VK_NULL_HANDLE;
+}
+
+VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(!r->in_aux_command_buffer);
+    r->in_aux_command_buffer = true;
+
+    VkCommandBufferBeginInfo begin_info = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+    };
+    VK_CHECK(vkBeginCommandBuffer(r->aux_command_buffer, &begin_info));
+
+    return r->aux_command_buffer;
+}
+
+void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(r->in_aux_command_buffer);
+
+    VK_CHECK(vkEndCommandBuffer(cmd));
+
+    VkSubmitInfo submit_info = {
+        .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+        .commandBufferCount = 1,
+        .pCommandBuffers = &cmd,
+    };
+    VK_CHECK(vkQueueSubmit(r->queue, 1, &submit_info, VK_NULL_HANDLE));
+    nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_AUX);
+    VK_CHECK(vkQueueWaitIdle(r->queue));
+
+    r->in_aux_command_buffer = false;
+}
+
+void pgraph_vk_init_command_buffers(PGRAPHState *pg)
+{
+    create_command_pool(pg);
+    create_command_buffers(pg);
+}
+
+void pgraph_vk_finalize_command_buffers(PGRAPHState *pg)
+{
+    destroy_command_buffers(pg);
+    destroy_command_pool(pg);
+}
\ No newline at end of file
diff --git a/hw/xbox/nv2a/pgraph/vk/constants.h b/hw/xbox/nv2a/pgraph/vk/constants.h
new file mode 100644
index 00000000000..9ae8ba6dd4c
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/constants.h
@@ -0,0 +1,418 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H
+#define HW_XBOX_NV2A_PGRAPH_VK_CONSTANTS_H
+
+#include "hw/xbox/nv2a/nv2a_regs.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include <vulkan/vulkan.h>
+
+static const VkFilter pgraph_texture_min_filter_vk_map[] = {
+    0,
+    VK_FILTER_NEAREST,
+    VK_FILTER_LINEAR,
+    VK_FILTER_NEAREST,
+    VK_FILTER_LINEAR,
+    VK_FILTER_NEAREST,
+    VK_FILTER_LINEAR,
+    VK_FILTER_LINEAR,
+};
+
+static const VkFilter pgraph_texture_mag_filter_vk_map[] = {
+    0,
+    VK_FILTER_NEAREST,
+    VK_FILTER_LINEAR,
+    0,
+    VK_FILTER_LINEAR /* TODO: Convolution filter... */
+};
+
+static const VkSamplerAddressMode pgraph_texture_addr_vk_map[] = {
+    0,
+    VK_SAMPLER_ADDRESS_MODE_REPEAT,
+    VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT,
+    VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
+    VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER,
+    VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, /* Approximate GL_CLAMP */
+};
+
+static const VkBlendFactor pgraph_blend_factor_vk_map[] = {
+    VK_BLEND_FACTOR_ZERO,
+    VK_BLEND_FACTOR_ONE,
+    VK_BLEND_FACTOR_SRC_COLOR,
+    VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR,
+    VK_BLEND_FACTOR_SRC_ALPHA,
+    VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+    VK_BLEND_FACTOR_DST_ALPHA,
+    VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA,
+    VK_BLEND_FACTOR_DST_COLOR,
+    VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR,
+    VK_BLEND_FACTOR_SRC_ALPHA_SATURATE,
+    0,
+    VK_BLEND_FACTOR_CONSTANT_COLOR,
+    VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR,
+    VK_BLEND_FACTOR_CONSTANT_ALPHA,
+    VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA,
+};
+
+static const VkBlendOp pgraph_blend_equation_vk_map[] = {
+    VK_BLEND_OP_SUBTRACT,
+    VK_BLEND_OP_REVERSE_SUBTRACT,
+    VK_BLEND_OP_ADD,
+    VK_BLEND_OP_MIN,
+    VK_BLEND_OP_MAX,
+    VK_BLEND_OP_REVERSE_SUBTRACT,
+    VK_BLEND_OP_ADD,
+};
+
+/* FIXME
+static const GLenum pgraph_blend_logicop_map[] = {
+    GL_CLEAR,
+    GL_AND,
+    GL_AND_REVERSE,
+    GL_COPY,
+    GL_AND_INVERTED,
+    GL_NOOP,
+    GL_XOR,
+    GL_OR,
+    GL_NOR,
+    GL_EQUIV,
+    GL_INVERT,
+    GL_OR_REVERSE,
+    GL_COPY_INVERTED,
+    GL_OR_INVERTED,
+    GL_NAND,
+    GL_SET,
+};
+*/
+
+static const VkCullModeFlags pgraph_cull_face_vk_map[] = {
+    0,
+    VK_CULL_MODE_FRONT_BIT,
+    VK_CULL_MODE_BACK_BIT,
+    VK_CULL_MODE_FRONT_AND_BACK,
+};
+
+static const VkCompareOp pgraph_depth_func_vk_map[] = {
+    VK_COMPARE_OP_NEVER,
+    VK_COMPARE_OP_LESS,
+    VK_COMPARE_OP_EQUAL,
+    VK_COMPARE_OP_LESS_OR_EQUAL,
+    VK_COMPARE_OP_GREATER,
+    VK_COMPARE_OP_NOT_EQUAL,
+    VK_COMPARE_OP_GREATER_OR_EQUAL,
+    VK_COMPARE_OP_ALWAYS,
+};
+
+static const VkCompareOp pgraph_stencil_func_vk_map[] = {
+    VK_COMPARE_OP_NEVER,
+    VK_COMPARE_OP_LESS,
+    VK_COMPARE_OP_EQUAL,
+    VK_COMPARE_OP_LESS_OR_EQUAL,
+    VK_COMPARE_OP_GREATER,
+    VK_COMPARE_OP_NOT_EQUAL,
+    VK_COMPARE_OP_GREATER_OR_EQUAL,
+    VK_COMPARE_OP_ALWAYS,
+};
+
+static const VkStencilOp pgraph_stencil_op_vk_map[] = {
+    0,
+    VK_STENCIL_OP_KEEP,
+    VK_STENCIL_OP_ZERO,
+    VK_STENCIL_OP_REPLACE,
+    VK_STENCIL_OP_INCREMENT_AND_CLAMP,
+    VK_STENCIL_OP_DECREMENT_AND_CLAMP,
+    VK_STENCIL_OP_INVERT,
+    VK_STENCIL_OP_INCREMENT_AND_WRAP,
+    VK_STENCIL_OP_DECREMENT_AND_WRAP,
+};
+
+static const VkPolygonMode pgraph_polygon_mode_vk_map[] = {
+    [POLY_MODE_FILL] = VK_POLYGON_MODE_FILL,
+    [POLY_MODE_POINT] = VK_POLYGON_MODE_POINT,
+    [POLY_MODE_LINE] = VK_POLYGON_MODE_LINE,
+};
+
+typedef struct VkColorFormatInfo {
+    VkFormat vk_format;
+    VkComponentMapping component_map;
+} VkColorFormatInfo;
+
+static const VkColorFormatInfo kelvin_color_format_vk_map[66] = {
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_Y8] = {
+        VK_FORMAT_R8_UNORM,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_AY8] = {
+        VK_FORMAT_R8_UNORM,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A1R5G5B5] = {
+        VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X1R5G5B5] = {
+        VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+        { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A4R4G4B4] = {
+        VK_FORMAT_A4R4G4B4_UNORM_PACK16,
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5] = {
+        VK_FORMAT_R5G6B5_UNORM_PACK16,
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8] = {
+        VK_FORMAT_B8G8R8A8_UNORM,
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8] = {
+        VK_FORMAT_B8G8R8A8_UNORM,
+        { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8] = {
+        VK_FORMAT_B8G8R8A8_UNORM, // Converted
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5] = {
+        VK_FORMAT_R8G8B8A8_UNORM, // Converted
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8] = {
+        VK_FORMAT_R8G8B8A8_UNORM, // Converted
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8] = {
+        VK_FORMAT_R8G8B8A8_UNORM, // Converted
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A1R5G5B5] = {
+        VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5] = {
+        VK_FORMAT_R5G6B5_UNORM_PACK16,
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8] = {
+        VK_FORMAT_B8G8R8A8_UNORM,
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y8] = {
+        VK_FORMAT_R8_UNORM,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_G8B8] = {
+        VK_FORMAT_R8G8_UNORM,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8] = {
+        VK_FORMAT_R8_UNORM,
+        { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8Y8] = {
+        VK_FORMAT_R8G8_UNORM,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_AY8] = {
+        VK_FORMAT_R8_UNORM,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5] = {
+        VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+        { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A4R4G4B4] = {
+        VK_FORMAT_A4R4G4B4_UNORM_PACK16,
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8] = {
+        VK_FORMAT_B8G8R8A8_UNORM,
+        { VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_ONE },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8] = {
+        VK_FORMAT_R8_UNORM,
+        { VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_R }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8Y8] = {
+        VK_FORMAT_R8G8_UNORM,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R6G5B5] = {
+        VK_FORMAT_R8G8B8_SNORM, // Converted
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_G8B8] = {
+        VK_FORMAT_R8G8_UNORM,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8B8] = {
+        VK_FORMAT_R8G8_UNORM,
+        { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_CR8YB8CB8YA8] = {
+        VK_FORMAT_R8G8B8A8_UNORM, // Converted
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LC_IMAGE_YB8CR8YA8CB8] = {
+        VK_FORMAT_R8G8B8A8_UNORM, // Converted
+    },
+
+    /* Additional information is passed to the pixel shader via the swizzle:
+     * RED: The depth value.
+     * GREEN: 0 for 16-bit, 1 for 24 bit
+     * BLUE: 0 for fixed, 1 for float
+     */
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_DEPTH_Y16_FIXED] = {
+        VK_FORMAT_R16_UNORM, // FIXME
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FIXED] = {
+        // FIXME
+        // {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}},
+        VK_FORMAT_R32_UINT,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO,  VK_COMPONENT_SWIZZLE_ZERO },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_X8_Y24_FLOAT] = {
+        // FIXME
+        // {GL_DEPTH_COMPONENT, GL_DEPTH_STENCIL, GL_UNSIGNED_INT_24_8, {GL_RED, GL_ONE, GL_ZERO, GL_ZERO}},
+        VK_FORMAT_R32_UINT,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO,  VK_COMPONENT_SWIZZLE_ZERO },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FIXED] = {
+        VK_FORMAT_R16_UNORM, // FIXME
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_DEPTH_Y16_FLOAT] = {
+        VK_FORMAT_R16_SFLOAT,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ZERO },
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_Y16] = {
+        VK_FORMAT_R16_UNORM,
+        { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_ONE }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8B8G8R8] = {
+        VK_FORMAT_R8G8B8A8_UNORM,
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_B8G8R8A8] = {
+        VK_FORMAT_R8G8B8A8_UNORM,
+        { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R8G8B8A8] = {
+        VK_FORMAT_R8G8B8A8_UNORM,
+        { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8] = {
+        VK_FORMAT_R8G8B8A8_UNORM,
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_B8G8R8A8] = {
+        VK_FORMAT_R8G8B8A8_UNORM,
+        { VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_R }
+    },
+    [NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8] = {
+        VK_FORMAT_R8G8B8A8_UNORM,
+        { VK_COMPONENT_SWIZZLE_A, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_R }
+    },
+};
+
+typedef struct BasicSurfaceFormatInfo {
+    unsigned int bytes_per_pixel;
+} BasicSurfaceFormatInfo;
+
+typedef struct SurfaceFormatInfo {
+    unsigned int host_bytes_per_pixel;
+    VkFormat vk_format;
+    VkImageUsageFlags usage;
+    VkImageAspectFlags aspect;
+} SurfaceFormatInfo;
+
+static const BasicSurfaceFormatInfo kelvin_surface_color_format_map[] = {
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] = { 2 },
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] = { 2 },
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] = { 4 },
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] = { 4 },
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] = { 1 },
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] = { 2 },
+};
+
+static const SurfaceFormatInfo kelvin_surface_color_format_vk_map[] = {
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5] =
+    {
+        // FIXME: Force alpha to zero
+        2,
+        VK_FORMAT_A1R5G5B5_UNORM_PACK16,
+        VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+        VK_IMAGE_ASPECT_COLOR_BIT,
+    },
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5] =
+    {
+        2,
+        VK_FORMAT_R5G6B5_UNORM_PACK16,
+        VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+        VK_IMAGE_ASPECT_COLOR_BIT,
+    },
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8] =
+    {
+        // FIXME: Force alpha to zero
+        4,
+        VK_FORMAT_B8G8R8A8_UNORM,
+        VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+        VK_IMAGE_ASPECT_COLOR_BIT,
+    },
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8] =
+    {
+        4,
+        VK_FORMAT_B8G8R8A8_UNORM,
+        VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+        VK_IMAGE_ASPECT_COLOR_BIT,
+    },
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_B8] =
+    {
+        // FIXME: Map channel color
+        1,
+        VK_FORMAT_R8_UNORM,
+        VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+        VK_IMAGE_ASPECT_COLOR_BIT,
+    },
+    [NV097_SET_SURFACE_FORMAT_COLOR_LE_G8B8] =
+    {
+        // FIXME: Map channel color
+        2,
+        VK_FORMAT_R8G8_UNORM,
+        VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+        VK_IMAGE_ASPECT_COLOR_BIT,
+    },
+};
+
+static const BasicSurfaceFormatInfo kelvin_surface_zeta_format_map[] = {
+    [NV097_SET_SURFACE_FORMAT_ZETA_Z16] = { 2 },
+    [NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] = { 4 },
+};
+
+// FIXME: Actually support stored float format
+
+static const SurfaceFormatInfo zeta_d16 = {
+    2,
+    VK_FORMAT_D16_UNORM,
+    VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+    VK_IMAGE_ASPECT_DEPTH_BIT,
+};
+
+static const SurfaceFormatInfo zeta_d32_sfloat_s8_uint = {
+    8,
+    VK_FORMAT_D32_SFLOAT_S8_UINT,
+    VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+    VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
+};
+
+static const SurfaceFormatInfo zeta_d24_unorm_s8_uint = {
+    4,
+    VK_FORMAT_D24_UNORM_S8_UINT,
+    VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
+    VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
+};
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/vk/debug.c b/hw/xbox/nv2a/pgraph/vk/debug.c
new file mode 100644
index 00000000000..5c31c9f1194
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/debug.c
@@ -0,0 +1,121 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "renderer.h"
+#include "debug.h"
+
+#ifndef _WIN32
+#include <dlfcn.h>
+#endif
+
+#ifdef CONFIG_RENDERDOC
+#pragma GCC diagnostic ignored "-Wstrict-prototypes"
+#include "thirdparty/renderdoc_app.h"
+#endif
+
+int nv2a_vk_dgroup_indent = 0;
+
+void pgraph_vk_debug_init(void)
+{
+#ifdef CONFIG_RENDERDOC
+    nv2a_dbg_renderdoc_init();
+#endif
+}
+
+void pgraph_vk_debug_frame_terminator(void)
+{
+#ifdef CONFIG_RENDERDOC
+    if (nv2a_dbg_renderdoc_available()) {
+        RENDERDOC_API_1_6_0 *rdoc_api = nv2a_dbg_renderdoc_get_api();
+
+        PGRAPHVkState *r = g_nv2a->pgraph.vk_renderer_state;
+        if (rdoc_api->IsTargetControlConnected()) {
+            if (rdoc_api->IsFrameCapturing()) {
+                rdoc_api->EndFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0);
+            }
+            if (renderdoc_capture_frames > 0) {
+                rdoc_api->StartFrameCapture(RENDERDOC_DEVICEPOINTER_FROM_VKINSTANCE(r->instance), 0);
+                --renderdoc_capture_frames;
+            }
+        }
+    }
+#endif
+}
+
+void pgraph_vk_insert_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd,
+                                   float color[4], const char *format, ...)
+{
+    if (!r->debug_utils_extension_enabled) {
+        return;
+    }
+
+    char *buf = NULL;
+
+    va_list args;
+    va_start(args, format);
+    int err = vasprintf(&buf, format, args);
+    assert(err >= 0);
+    va_end(args);
+
+    VkDebugUtilsLabelEXT label_info = {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pLabelName = buf,
+    };
+    memcpy(label_info.color, color, 4 * sizeof(float));
+    vkCmdInsertDebugUtilsLabelEXT(cmd, &label_info);
+    free(buf);
+}
+
+void pgraph_vk_begin_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd,
+                                  float color[4], const char *format, ...)
+{
+    if (!r->debug_utils_extension_enabled) {
+        return;
+    }
+
+    char *buf = NULL;
+
+    va_list args;
+    va_start(args, format);
+    int err = vasprintf(&buf, format, args);
+    assert(err >= 0);
+    va_end(args);
+
+    VkDebugUtilsLabelEXT label_info = {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT,
+        .pLabelName = buf,
+    };
+    memcpy(label_info.color, color, 4 * sizeof(float));
+    vkCmdBeginDebugUtilsLabelEXT(cmd, &label_info);
+    free(buf);
+
+    r->debug_depth += 1;
+    assert(r->debug_depth < 10 && "Missing pgraph_vk_debug_marker_end?");
+}
+
+void pgraph_vk_end_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd)
+{
+    if (!r->debug_utils_extension_enabled) {
+        return;
+    }
+
+    vkCmdEndDebugUtilsLabelEXT(cmd);
+    assert(r->debug_depth > 0);
+    r->debug_depth -= 1;
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/debug.h b/hw/xbox/nv2a/pgraph/vk/debug.h
new file mode 100644
index 00000000000..62cd63e592e
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/debug.h
@@ -0,0 +1,61 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H
+#define HW_XBOX_NV2A_PGRAPH_VK_DEBUG_H
+
+#define DEBUG_VK 0
+
+extern int nv2a_vk_dgroup_indent;
+
+#define NV2A_VK_XDPRINTF(x, fmt, ...)                       \
+    do {                                                    \
+        if (x) {                                            \
+            for (int i = 0; i < nv2a_vk_dgroup_indent; i++) \
+                fprintf(stderr, "  ");                      \
+            fprintf(stderr, fmt "\n", ##__VA_ARGS__);       \
+        }                                                   \
+    } while (0)
+
+#define NV2A_VK_DPRINTF(fmt, ...) NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__)
+
+#define NV2A_VK_DGROUP_BEGIN(fmt, ...)                  \
+    do {                                                \
+        NV2A_VK_XDPRINTF(DEBUG_VK, fmt, ##__VA_ARGS__); \
+        nv2a_vk_dgroup_indent++;                        \
+    } while (0)
+
+#define NV2A_VK_DGROUP_END(...)             \
+    do {                                    \
+        nv2a_vk_dgroup_indent--;            \
+        assert(nv2a_vk_dgroup_indent >= 0); \
+    } while (0)
+
+#define VK_CHECK(x)                                           \
+    do {                                                      \
+        VkResult vk_result = (x);                             \
+        if (vk_result != VK_SUCCESS) {                        \
+            fprintf(stderr, "vk_result = %d\n", vk_result);   \
+        }                                                     \
+        assert(vk_result == VK_SUCCESS && "vk check failed"); \
+    } while (0)
+
+void pgraph_vk_debug_frame_terminator(void);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/vk/display.c b/hw/xbox/nv2a/pgraph/vk/display.c
new file mode 100644
index 00000000000..030ab2dea85
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/display.c
@@ -0,0 +1,1090 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "renderer.h"
+#include <math.h>
+
+static uint8_t *convert_texture_data__CR8YB8CB8YA8(uint8_t *data_out,
+                                                   const uint8_t *data_in,
+                                                   unsigned int width,
+                                                   unsigned int height,
+                                                   unsigned int pitch)
+{
+    int x, y;
+    for (y = 0; y < height; y++) {
+        const uint8_t *line = &data_in[y * pitch];
+        const uint32_t row_offset = y * width;
+        for (x = 0; x < width; x++) {
+            uint8_t *pixel = &data_out[(row_offset + x) * 4];
+            convert_yuy2_to_rgb(line, x, &pixel[0], &pixel[1], &pixel[2]);
+            pixel[3] = 255;
+        }
+    }
+    return data_out;
+}
+
+static float pvideo_calculate_scale(unsigned int din_dout,
+                                    unsigned int output_size)
+{
+    float calculated_in = din_dout * (output_size - 1);
+    calculated_in = floorf(calculated_in / (1 << 20) + 0.5f);
+    return (calculated_in + 1.0f) / output_size;
+}
+
+static void destroy_pvideo_image(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    PGRAPHVkDisplayState *d = &r->display;
+
+    if (d->pvideo.sampler != VK_NULL_HANDLE) {
+        vkDestroySampler(r->device, d->pvideo.sampler, NULL);
+        d->pvideo.sampler = VK_NULL_HANDLE;
+    }
+
+    if (d->pvideo.image_view != VK_NULL_HANDLE) {
+        vkDestroyImageView(r->device, d->pvideo.image_view, NULL);
+        d->pvideo.image_view = VK_NULL_HANDLE;
+    }
+
+    if (d->pvideo.image != VK_NULL_HANDLE) {
+        vmaDestroyImage(r->allocator, d->pvideo.image, d->pvideo.allocation);
+        d->pvideo.image = VK_NULL_HANDLE;
+        d->pvideo.allocation = VK_NULL_HANDLE;
+    }
+}
+
+static void create_pvideo_image(PGRAPHState *pg, int width, int height)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    PGRAPHVkDisplayState *d = &r->display;
+
+    if (d->pvideo.image == VK_NULL_HANDLE || d->pvideo.width != width ||
+        d->pvideo.height != height) {
+        destroy_pvideo_image(pg);
+    }
+
+    VkImageCreateInfo image_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .imageType = VK_IMAGE_TYPE_2D,
+        .extent.width = width,
+        .extent.height = height,
+        .extent.depth = 1,
+        .mipLevels = 1,
+        .arrayLayers = 1,
+        .format = VK_FORMAT_R8G8B8A8_UNORM,
+        .tiling = VK_IMAGE_TILING_OPTIMAL,
+        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+        .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .flags = 0,
+    };
+    VmaAllocationCreateInfo alloc_create_info = {
+        .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
+    };
+    VK_CHECK(vmaCreateImage(r->allocator, &image_create_info,
+                            &alloc_create_info, &d->pvideo.image,
+                            &d->pvideo.allocation, NULL));
+
+    VkImageViewCreateInfo image_view_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .image = d->pvideo.image,
+        .viewType = VK_IMAGE_VIEW_TYPE_2D,
+        .format = VK_FORMAT_R8G8B8A8_UNORM,
+        .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .subresourceRange.baseMipLevel = 0,
+        .subresourceRange.levelCount = image_create_info.mipLevels,
+        .subresourceRange.baseArrayLayer = 0,
+        .subresourceRange.layerCount = image_create_info.arrayLayers,
+    };
+    VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
+                               &d->pvideo.image_view));
+
+    VkSamplerCreateInfo sampler_create_info = {
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+        .magFilter = VK_FILTER_LINEAR,
+        .minFilter = VK_FILTER_NEAREST,
+        .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+        .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+        .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+        .borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE,
+        .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+    };
+    VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL,
+                             &d->pvideo.sampler));
+}
+
+static void upload_pvideo_image(PGRAPHState *pg, PvideoState state)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    PGRAPHVkDisplayState *disp = &r->display;
+
+    create_pvideo_image(pg, state.in_width, state.in_height);
+
+    // FIXME: Dirty tracking. We don't necessarily need to upload so much.
+
+    // Copy texture data to mapped device buffer
+    uint8_t *mapped_memory_ptr;
+
+    VK_CHECK(vmaMapMemory(r->allocator,
+                          r->storage_buffers[BUFFER_STAGING_SRC].allocation,
+                          (void *)&mapped_memory_ptr));
+
+    convert_texture_data__CR8YB8CB8YA8(
+        mapped_memory_ptr, d->vram_ptr + state.base + state.offset,
+        state.in_width, state.in_height, state.pitch);
+
+    vmaFlushAllocation(r->allocator,
+                       r->storage_buffers[BUFFER_STAGING_SRC].allocation, 0,
+                       VK_WHOLE_SIZE);
+
+    vmaUnmapMemory(r->allocator,
+                   r->storage_buffers[BUFFER_STAGING_SRC].allocation);
+
+    // FIXME: Merge with display renderer command buffer
+
+    VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+
+    VkBufferMemoryBarrier host_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_HOST_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = r->storage_buffers[BUFFER_STAGING_SRC].buffer,
+        .size = VK_WHOLE_SIZE
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT,
+                         VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                         &host_barrier, 0, NULL);
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, disp->pvideo.image, VK_FORMAT_R8_UNORM,
+        VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+    VkBufferImageCopy region = {
+        .bufferOffset = 0,
+        .bufferRowLength = 0,
+        .bufferImageHeight = 0,
+        .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .imageSubresource.mipLevel = 0,
+        .imageSubresource.baseArrayLayer = 0,
+        .imageSubresource.layerCount = 1,
+        .imageOffset = (VkOffset3D){ 0, 0, 0 },
+        .imageExtent = (VkExtent3D){ state.in_width, state.in_height, 1 },
+    };
+    vkCmdCopyBufferToImage(cmd, r->storage_buffers[BUFFER_STAGING_SRC].buffer,
+                           disp->pvideo.image,
+                           VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
+
+    pgraph_vk_transition_image_layout(pg, cmd, disp->pvideo.image,
+                                      VK_FORMAT_R8G8B8A8_UNORM,
+                                      VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+    pgraph_vk_end_single_time_commands(pg, cmd);
+}
+
+static const char *display_frag_glsl =
+    "#version 450\n"
+    "layout(binding = 0) uniform sampler2D tex;\n"
+    "layout(binding = 1) uniform sampler2D pvideo_tex;\n"
+    "layout(push_constant, std430) uniform PushConstants {\n"
+    "    float line_offset;\n"
+    "    vec2 display_size;\n"
+    "    bool pvideo_enable;\n"
+    "    vec2 pvideo_in_pos;\n"
+    "    vec4 pvideo_pos;\n"
+    "    vec4 pvideo_scale;\n"
+    "    bool pvideo_color_key_enable;\n"
+    "    vec4 pvideo_color_key;\n"
+    "};\n"
+    "layout(location = 0) out vec4 out_Color;\n"
+    "void main()\n"
+    "{\n"
+    "    vec2 tex_coord = gl_FragCoord.xy/display_size;\n"
+    "    float rel = display_size.y/textureSize(tex, 0).y/line_offset;\n"
+    "    tex_coord.y = 1 + rel*(tex_coord.y - 1);"
+    "    tex_coord.y = 1 - tex_coord.y;\n" // GL compat
+    "    out_Color.rgba = texture(tex, tex_coord);\n"
+    "    if (pvideo_enable) {\n"
+    "        vec2 screen_coord = vec2(gl_FragCoord.x, display_size.y - gl_FragCoord.y) * pvideo_scale.z;\n"
+    "        vec4 output_region = vec4(pvideo_pos.xy, pvideo_pos.xy + pvideo_pos.zw);\n"
+    "        bvec4 clip = bvec4(lessThan(screen_coord, output_region.xy),\n"
+    "                           greaterThan(screen_coord, output_region.zw));\n"
+    "        if (!any(clip) && (!pvideo_color_key_enable || out_Color.rgba == pvideo_color_key)) {\n"
+    "            vec2 out_xy = screen_coord - pvideo_pos.xy;\n"
+    "            vec2 in_st = (pvideo_in_pos + out_xy * pvideo_scale.xy) / textureSize(pvideo_tex, 0);\n"
+    "            out_Color.rgba = texture(pvideo_tex, in_st);\n"
+    "        }\n"
+    "    }\n"
+    "}\n";
+
+static void create_descriptor_pool(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkDescriptorPoolSize pool_sizes = {
+        .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+        .descriptorCount = 2,
+    };
+
+    VkDescriptorPoolCreateInfo pool_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+        .poolSizeCount = 1,
+        .pPoolSizes = &pool_sizes,
+        .maxSets = 1,
+        .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+    };
+    VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
+                                    &r->display.descriptor_pool));
+}
+
+static void destroy_descriptor_pool(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkDestroyDescriptorPool(r->device, r->display.descriptor_pool, NULL);
+    r->display.descriptor_pool = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_set_layout(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkDescriptorSetLayoutBinding bindings[2];
+
+    for (int i = 0; i < ARRAY_SIZE(bindings); i++) {
+        bindings[i] = (VkDescriptorSetLayoutBinding){
+            .binding = i,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+        };
+    }
+    VkDescriptorSetLayoutCreateInfo layout_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+        .bindingCount = ARRAY_SIZE(bindings),
+        .pBindings = bindings,
+    };
+    VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
+                                         &r->display.descriptor_set_layout));
+}
+
+static void destroy_descriptor_set_layout(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkDestroyDescriptorSetLayout(r->device, r->display.descriptor_set_layout,
+                                 NULL);
+    r->display.descriptor_set_layout = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_sets(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkDescriptorSetLayout layout = r->display.descriptor_set_layout;
+
+    VkDescriptorSetAllocateInfo alloc_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+        .descriptorPool = r->display.descriptor_pool,
+        .descriptorSetCount = 1,
+        .pSetLayouts = &layout,
+    };
+    VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info,
+                                      &r->display.descriptor_set));
+}
+
+static void create_render_pass(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkAttachmentDescription attachment;
+
+    VkAttachmentReference color_reference;
+    attachment = (VkAttachmentDescription){
+        .format = VK_FORMAT_R8G8B8A8_UNORM,
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+        .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+        .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+        .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+        .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+        .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+    };
+    color_reference = (VkAttachmentReference){
+        0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
+    };
+
+    VkSubpassDependency dependency = {
+        .srcSubpass = VK_SUBPASS_EXTERNAL,
+    };
+
+    dependency.srcStageMask |=
+        VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+    dependency.dstStageMask |=
+        VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+    dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+
+    VkSubpassDescription subpass = {
+        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+        .colorAttachmentCount = 1,
+        .pColorAttachments = &color_reference,
+    };
+
+    VkRenderPassCreateInfo renderpass_create_info = {
+        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+        .attachmentCount = 1,
+        .pAttachments = &attachment,
+        .subpassCount = 1,
+        .pSubpasses = &subpass,
+        .dependencyCount = 1,
+        .pDependencies = &dependency,
+    };
+    VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL,
+                                &r->display.render_pass));
+}
+
+static void destroy_render_pass(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    vkDestroyRenderPass(r->device, r->display.render_pass, NULL);
+    r->display.render_pass = VK_NULL_HANDLE;
+}
+
+static void create_display_pipeline(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    r->display.display_frag =
+        pgraph_vk_create_shader_module_from_glsl(
+            r, VK_SHADER_STAGE_FRAGMENT_BIT, display_frag_glsl);
+
+    VkPipelineShaderStageCreateInfo shader_stages[] = {
+        (VkPipelineShaderStageCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .stage = VK_SHADER_STAGE_VERTEX_BIT,
+            .module = r->quad_vert_module->module,
+            .pName = "main",
+        },
+        (VkPipelineShaderStageCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+            .module = r->display.display_frag->module,
+            .pName = "main",
+        },
+     };
+
+    VkPipelineVertexInputStateCreateInfo vertex_input = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+    };
+
+    VkPipelineInputAssemblyStateCreateInfo input_assembly = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+        .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+        .primitiveRestartEnable = VK_FALSE,
+    };
+
+    VkPipelineViewportStateCreateInfo viewport_state = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+        .viewportCount = 1,
+        .scissorCount = 1,
+    };
+
+    VkPipelineRasterizationStateCreateInfo rasterizer = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+        .depthClampEnable = VK_FALSE,
+        .rasterizerDiscardEnable = VK_FALSE,
+        .polygonMode = VK_POLYGON_MODE_FILL,
+        .lineWidth = 1.0f,
+        .cullMode = VK_CULL_MODE_BACK_BIT,
+        .frontFace = VK_FRONT_FACE_CLOCKWISE,
+        .depthBiasEnable = VK_FALSE,
+    };
+
+    VkPipelineMultisampleStateCreateInfo multisampling = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+        .sampleShadingEnable = VK_FALSE,
+        .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+    };
+
+    VkPipelineDepthStencilStateCreateInfo depth_stencil = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+        .depthTestEnable = VK_FALSE,
+        .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+        .depthBoundsTestEnable = VK_FALSE,
+    };
+
+    VkPipelineColorBlendAttachmentState color_blend_attachment = {
+        .colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
+                          VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
+        .blendEnable = VK_FALSE,
+    };
+
+    VkPipelineColorBlendStateCreateInfo color_blending = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+        .logicOpEnable = VK_FALSE,
+        .logicOp = VK_LOGIC_OP_COPY,
+        .attachmentCount = 1,
+        .pAttachments = &color_blend_attachment,
+    };
+
+    VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT,
+                                        VK_DYNAMIC_STATE_SCISSOR };
+    VkPipelineDynamicStateCreateInfo dynamic_state = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+        .dynamicStateCount = 2,
+        .pDynamicStates = dynamic_states,
+    };
+
+    VkPushConstantRange push_constant_range = {
+        .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+        .offset = 0,
+        .size = r->display.display_frag->push_constants.total_size,
+    };
+
+    VkPipelineLayoutCreateInfo pipeline_layout_info = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+        .setLayoutCount = 1,
+        .pSetLayouts = &r->display.descriptor_set_layout,
+        .pushConstantRangeCount = 1,
+        .pPushConstantRanges = &push_constant_range,
+    };
+    VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
+                                    &r->display.pipeline_layout));
+
+    VkGraphicsPipelineCreateInfo pipeline_info = {
+        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+        .stageCount = ARRAY_SIZE(shader_stages),
+        .pStages = shader_stages,
+        .pVertexInputState = &vertex_input,
+        .pInputAssemblyState = &input_assembly,
+        .pViewportState = &viewport_state,
+        .pRasterizationState = &rasterizer,
+        .pMultisampleState = &multisampling,
+        .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
+        .pColorBlendState = &color_blending,
+        .pDynamicState = &dynamic_state,
+        .layout = r->display.pipeline_layout,
+        .renderPass = r->display.render_pass,
+        .subpass = 0,
+        .basePipelineHandle = VK_NULL_HANDLE,
+    };
+    VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
+                                       &pipeline_info, NULL,
+                                       &r->display.pipeline));
+}
+
+static void destroy_display_pipeline(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkDestroyPipeline(r->device, r->display.pipeline, NULL);
+    r->display.pipeline = VK_NULL_HANDLE;
+
+    vkDestroyPipelineLayout(r->device, r->display.pipeline_layout, NULL);
+    r->display.pipeline_layout = VK_NULL_HANDLE;
+
+    pgraph_vk_destroy_shader_module(r, r->display.display_frag);
+    r->display.display_frag = NULL;
+}
+
+static void create_frame_buffer(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkFramebufferCreateInfo create_info = {
+        .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+        .renderPass = r->display.render_pass,
+        .attachmentCount = 1,
+        .pAttachments = &r->display.image_view,
+        .width = r->display.width,
+        .height = r->display.height,
+        .layers = 1,
+    };
+    VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL,
+                                 &r->display.framebuffer));
+}
+
+static void destroy_frame_buffer(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    vkDestroyFramebuffer(r->device, r->display.framebuffer, NULL);
+    r->display.framebuffer = NULL;
+}
+
+static void destroy_current_display_image(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    PGRAPHVkDisplayState *d = &r->display;
+
+    if (d->image == VK_NULL_HANDLE) {
+        return;
+    }
+
+    destroy_frame_buffer(pg);
+
+#if HAVE_EXTERNAL_MEMORY
+    glDeleteTextures(1, &d->gl_texture_id);
+    d->gl_texture_id = 0;
+
+    glDeleteMemoryObjectsEXT(1, &d->gl_memory_obj);
+    d->gl_memory_obj = 0;
+
+#ifdef WIN32
+    CloseHandle(d->handle);
+    d->handle = 0;
+#endif
+#endif
+
+    vkDestroyImageView(r->device, d->image_view, NULL);
+    d->image_view = VK_NULL_HANDLE;
+
+    vkDestroyImage(r->device, d->image, NULL);
+    d->image = VK_NULL_HANDLE;
+
+    vkFreeMemory(r->device, d->memory, NULL);
+    d->memory = VK_NULL_HANDLE;
+
+    d->draw_time = 0;
+}
+
+// FIXME: We may need to use two images. One for actually rendering display,
+// and another for GL in the correct tiling mode
+
+static void create_display_image(PGRAPHState *pg, int width, int height)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    PGRAPHVkDisplayState *d = &r->display;
+
+    if (r->display.image != VK_NULL_HANDLE) {
+        destroy_current_display_image(pg);
+    }
+
+    const GLint gl_internal_format = GL_RGBA8;
+    bool use_optimal_tiling = true;
+
+#if HAVE_EXTERNAL_MEMORY
+    GLint num_tiling_types;
+    glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format,
+                          GL_NUM_TILING_TYPES_EXT, 1, &num_tiling_types);
+    // XXX: Apparently on AMD GL_OPTIMAL_TILING_EXT is reported to be
+    // supported, but doesn't work? On nVidia, GL_LINEAR_TILING_EXT may not
+    // be supported so we must use optimal. Default to optimal unless
+    // linear is explicitly specified...
+    GLint tiling_types[num_tiling_types];
+    glGetInternalformativ(GL_TEXTURE_2D, gl_internal_format,
+                          GL_TILING_TYPES_EXT, num_tiling_types, tiling_types);
+    for (int i = 0; i < num_tiling_types; i++) {
+        if (tiling_types[i] == GL_LINEAR_TILING_EXT) {
+            use_optimal_tiling = false;
+            break;
+        }
+    }
+#endif
+
+    // Create image
+    VkImageCreateInfo image_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .imageType = VK_IMAGE_TYPE_2D,
+        .extent.width = width,
+        .extent.height = height,
+        .extent.depth = 1,
+        .mipLevels = 1,
+        .arrayLayers = 1,
+        .format = VK_FORMAT_R8G8B8A8_UNORM,
+        .tiling = use_optimal_tiling ? VK_IMAGE_TILING_OPTIMAL : VK_IMAGE_TILING_LINEAR,
+        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+        .usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT,
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+    };
+
+    VkExternalMemoryImageCreateInfo external_memory_image_create_info = {
+        .sType = VK_STRUCTURE_TYPE_EXTERNAL_MEMORY_IMAGE_CREATE_INFO,
+        .handleTypes = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT_KHR,
+    };
+    image_create_info.pNext = &external_memory_image_create_info;
+
+    VK_CHECK(vkCreateImage(r->device, &image_create_info, NULL, &d->image));
+
+    // Allocate and bind image memory
+    VkMemoryRequirements memory_requirements;
+    vkGetImageMemoryRequirements(r->device, d->image, &memory_requirements);
+
+    VkMemoryAllocateInfo alloc_info = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
+        .allocationSize = memory_requirements.size,
+        .memoryTypeIndex =
+            pgraph_vk_get_memory_type(pg, memory_requirements.memoryTypeBits,
+                                      VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT),
+    };
+
+    VkExportMemoryAllocateInfo export_memory_alloc_info = {
+        .sType = VK_STRUCTURE_TYPE_EXPORT_MEMORY_ALLOCATE_INFO,
+        .handleTypes =
+#ifdef WIN32
+            VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
+#else
+            VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT
+#endif
+            ,
+    };
+    alloc_info.pNext = &export_memory_alloc_info;
+
+    VK_CHECK(vkAllocateMemory(r->device, &alloc_info, NULL, &d->memory));
+
+    vkBindImageMemory(r->device, d->image, d->memory, 0);
+
+    // Create Image View
+    VkImageViewCreateInfo image_view_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .image = d->image,
+        .viewType = VK_IMAGE_VIEW_TYPE_2D,
+        .format = image_create_info.format,
+        .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .subresourceRange.levelCount = 1,
+        .subresourceRange.layerCount = 1,
+    };
+    VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
+                               &d->image_view));
+
+#if HAVE_EXTERNAL_MEMORY
+
+#ifdef WIN32
+
+    VkMemoryGetWin32HandleInfoKHR handle_info = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_GET_WIN32_HANDLE_INFO_KHR,
+        .memory = d->memory,
+        .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32_BIT_KHR
+    };
+    VK_CHECK(vkGetMemoryWin32HandleKHR(r->device, &handle_info, &d->handle));
+
+    glCreateMemoryObjectsEXT(1, &d->gl_memory_obj);
+    glImportMemoryWin32HandleEXT(d->gl_memory_obj, memory_requirements.size, GL_HANDLE_TYPE_OPAQUE_WIN32_EXT, d->handle);
+    assert(glGetError() == GL_NO_ERROR);
+
+#else
+
+    VkMemoryGetFdInfoKHR fd_info = {
+        .sType = VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR,
+        .memory = d->memory,
+        .handleType = VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT,
+    };
+    VK_CHECK(vkGetMemoryFdKHR(r->device, &fd_info, &d->fd));
+
+    glCreateMemoryObjectsEXT(1, &d->gl_memory_obj);
+    glImportMemoryFdEXT(d->gl_memory_obj, memory_requirements.size,
+                        GL_HANDLE_TYPE_OPAQUE_FD_EXT, d->fd);
+    assert(glIsMemoryObjectEXT(d->gl_memory_obj));
+    assert(glGetError() == GL_NO_ERROR);
+
+#endif // WIN32
+
+    glGenTextures(1, &d->gl_texture_id);
+    glBindTexture(GL_TEXTURE_2D, d->gl_texture_id);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_TILING_EXT,
+                    use_optimal_tiling ? GL_OPTIMAL_TILING_EXT :
+                                         GL_LINEAR_TILING_EXT);
+    glTexStorageMem2DEXT(GL_TEXTURE_2D, 1, gl_internal_format,
+                         image_create_info.extent.width,
+                         image_create_info.extent.height, d->gl_memory_obj, 0);
+    assert(glGetError() == GL_NO_ERROR);
+
+#endif // HAVE_EXTERNAL_MEMORY
+
+    d->width = image_create_info.extent.width;
+    d->height = image_create_info.extent.height;
+
+    create_frame_buffer(pg);
+}
+
+static void update_descriptor_set(PGRAPHState *pg, SurfaceBinding *surface)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkDescriptorImageInfo image_infos[2];
+    VkWriteDescriptorSet descriptor_writes[2];
+
+    // Display surface
+    image_infos[0] = (VkDescriptorImageInfo){
+        .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+        .imageView = surface->image_view,
+        .sampler = r->display.sampler,
+    };
+    descriptor_writes[0] = (VkWriteDescriptorSet){
+        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+        .dstSet = r->display.descriptor_set,
+        .dstBinding = 0,
+        .dstArrayElement = 0,
+        .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+        .descriptorCount = 1,
+        .pImageInfo = &image_infos[0],
+    };
+
+    // FIXME: PVIDEO Overlay
+    if (r->display.pvideo.state.enabled) {
+        assert(r->display.pvideo.image_view != VK_NULL_HANDLE);
+        assert(r->display.pvideo.sampler != VK_NULL_HANDLE);
+        image_infos[1] = (VkDescriptorImageInfo){
+            .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+            .imageView = r->display.pvideo.image_view,
+            .sampler = r->display.pvideo.sampler,
+        };
+    } else {
+        image_infos[1] = (VkDescriptorImageInfo){
+            .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+            .imageView = r->dummy_texture.image_view,
+            .sampler = r->dummy_texture.sampler,
+        };
+    }
+    descriptor_writes[1] = (VkWriteDescriptorSet){
+        .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+        .dstSet = r->display.descriptor_set,
+        .dstBinding = 1,
+        .dstArrayElement = 0,
+        .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+        .descriptorCount = 1,
+        .pImageInfo = &image_infos[1],
+    };
+
+    vkUpdateDescriptorSets(r->device, ARRAY_SIZE(descriptor_writes),
+                           descriptor_writes, 0, NULL);
+}
+
+static PvideoState get_pvideo_state(PGRAPHState *pg)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    PvideoState state;
+
+    // FIXME: This check against PVIDEO_SIZE_IN does not match HW behavior.
+    // Many games seem to pass this value when initializing or tearing down
+    // PVIDEO. On its own, this generally does not result in the overlay being
+    // hidden, however there are certain games (e.g., Ultimate Beach Soccer)
+    // that use an unknown mechanism to hide the overlay without explicitly
+    // stopping it.
+    // Since the value seems to be set to 0xFFFFFFFF only in cases where the
+    // content is not valid, it is probably good enough to treat it as an
+    // implicit stop.
+    state.enabled = (d->pvideo.regs[NV_PVIDEO_BUFFER] & NV_PVIDEO_BUFFER_0_USE)
+        && d->pvideo.regs[NV_PVIDEO_SIZE_IN] != 0xFFFFFFFF;
+    if (!state.enabled) {
+        return state;
+    }
+
+    state.base = d->pvideo.regs[NV_PVIDEO_BASE];
+    state.limit = d->pvideo.regs[NV_PVIDEO_LIMIT];
+    state.offset = d->pvideo.regs[NV_PVIDEO_OFFSET];
+
+    state.pitch =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_PITCH);
+    state.format =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_COLOR);
+
+    /* TODO: support other color formats */
+    assert(state.format == NV_PVIDEO_FORMAT_COLOR_LE_CR8YB8CB8YA8);
+
+    state.in_width =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_WIDTH);
+    state.in_height =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_IN], NV_PVIDEO_SIZE_IN_HEIGHT);
+
+    state.out_width =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_WIDTH);
+    state.out_height =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_SIZE_OUT], NV_PVIDEO_SIZE_OUT_HEIGHT);
+
+    state.in_s = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+                        NV_PVIDEO_POINT_IN_S);
+    state.in_t = GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_IN],
+                        NV_PVIDEO_POINT_IN_T);
+
+    uint32_t ds_dx = d->pvideo.regs[NV_PVIDEO_DS_DX];
+    uint32_t dt_dy = d->pvideo.regs[NV_PVIDEO_DT_DY];
+    state.scale_x = ds_dx == NV_PVIDEO_DIN_DOUT_UNITY ?
+                        1.0f :
+                        pvideo_calculate_scale(ds_dx, state.out_width);
+    state.scale_y = dt_dy == NV_PVIDEO_DIN_DOUT_UNITY ?
+                        1.0f :
+                        pvideo_calculate_scale(dt_dy, state.out_height);
+
+    // On HW, setting NV_PVIDEO_SIZE_IN larger than NV_PVIDEO_SIZE_OUT results
+    // in them being capped to the output size, content is not scaled. This is
+    // particularly important as NV_PVIDEO_SIZE_IN may be set to 0xFFFFFFFF
+    // during initialization or teardown.
+    if (state.in_width > state.out_width) {
+        state.in_width = floorf((float)state.out_width * state.scale_x + 0.5f);
+    }
+    if (state.in_height > state.out_height) {
+        state.in_height = floorf((float)state.out_height * state.scale_y + 0.5f);
+    }
+
+    state.out_x =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_X);
+    state.out_y =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_POINT_OUT], NV_PVIDEO_POINT_OUT_Y);
+
+    state.color_key_enabled =
+        GET_MASK(d->pvideo.regs[NV_PVIDEO_FORMAT], NV_PVIDEO_FORMAT_DISPLAY);
+
+    // TODO: Verify that masking off the top byte is correct.
+    // SeaBlade sets a color key of 0x80000000 but the texture passed into the
+    // shader is cleared to 0 alpha.
+    state.color_key = d->pvideo.regs[NV_PVIDEO_COLOR_KEY] & 0xFFFFFF;
+
+    assert(state.offset + state.pitch * state.in_height <= state.limit);
+    hwaddr end = state.base + state.offset + state.pitch * state.in_height;
+    assert(end <= memory_region_size(d->vram));
+
+    return state;
+}
+
+static void update_uniforms(PGRAPHState *pg, SurfaceBinding *surface)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    ShaderUniformLayout *l = &r->display.display_frag->push_constants;
+
+    int display_size_loc = uniform_index(l, "display_size");  // FIXME: Cache
+    uniform2f(l, display_size_loc, r->display.width, r->display.height);
+
+    uint32_t pline_offset, pstart_addr, pline_compare;
+    d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+    int line_offset = surface->pitch / pline_offset;
+    int line_offset_loc = uniform_index(l, "line_offset");
+    uniform1f(l, line_offset_loc, line_offset);
+
+    PvideoState *pvideo = &r->display.pvideo.state;
+    uniform1i(l, uniform_index(l, "pvideo_enable"), pvideo->enabled);
+    if (pvideo->enabled) {
+        uniform1i(l, uniform_index(l, "pvideo_color_key_enable"),
+                  pvideo->color_key_enabled);
+        uniform4f(
+            l, uniform_index(l, "pvideo_color_key"),
+            GET_MASK(pvideo->color_key, NV_PVIDEO_COLOR_KEY_RED) / 255.0,
+            GET_MASK(pvideo->color_key, NV_PVIDEO_COLOR_KEY_GREEN) / 255.0,
+            GET_MASK(pvideo->color_key, NV_PVIDEO_COLOR_KEY_BLUE) / 255.0,
+            GET_MASK(pvideo->color_key, NV_PVIDEO_COLOR_KEY_ALPHA) / 255.0);
+        uniform2f(l, uniform_index(l, "pvideo_in_pos"), pvideo->in_s,
+                  pvideo->in_t);
+        uniform4f(l, uniform_index(l, "pvideo_pos"), pvideo->out_x,
+                  pvideo->out_y, pvideo->out_width, pvideo->out_height);
+        uniform4f(l, uniform_index(l, "pvideo_scale"), pvideo->scale_x,
+                  pvideo->scale_y, 1.0f / pg->surface_scale_factor, 1.0);
+    }
+}
+
+static void render_display(PGRAPHState *pg, SurfaceBinding *surface)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    PGRAPHVkDisplayState *disp = &r->display;
+
+    if (r->in_command_buffer &&
+        surface->draw_time >= r->command_buffer_start_time) {
+        pgraph_vk_finish(pg, VK_FINISH_REASON_PRESENTING);
+    }
+
+    pgraph_vk_upload_surface_data(d, surface, !tcg_enabled());
+
+    disp->pvideo.state = get_pvideo_state(pg);
+    if (disp->pvideo.state.enabled) {
+        upload_pvideo_image(pg, disp->pvideo.state);
+    }
+
+    update_uniforms(pg, surface);
+    update_descriptor_set(pg, surface);
+
+    VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+    pgraph_vk_begin_debug_marker(r, cmd, RGBA_YELLOW,
+        "Display Surface %08"HWADDR_PRIx);
+
+    pgraph_vk_transition_image_layout(pg, cmd, surface->image,
+                                      surface->host_fmt.vk_format,
+                                      VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+    pgraph_vk_transition_image_layout(
+        pg, cmd, disp->image, VK_FORMAT_R8G8B8A8_UNORM,
+        VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
+
+    VkRenderPassBeginInfo render_pass_begin_info = {
+        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+        .renderPass = disp->render_pass,
+        .framebuffer = disp->framebuffer,
+        .renderArea.extent.width = disp->width,
+        .renderArea.extent.height = disp->height,
+    };
+    vkCmdBeginRenderPass(cmd, &render_pass_begin_info,
+                         VK_SUBPASS_CONTENTS_INLINE);
+    vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                      disp->pipeline);
+
+    vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                            disp->pipeline_layout, 0, 1, &disp->descriptor_set,
+                            0, NULL);
+
+    VkViewport viewport = {
+        .width = disp->width,
+        .height = disp->height,
+        .minDepth = 0.0,
+        .maxDepth = 1.0,
+    };
+    vkCmdSetViewport(cmd, 0, 1, &viewport);
+
+    VkRect2D scissor = {
+        .extent.width = disp->width,
+        .extent.height = disp->height,
+    };
+    vkCmdSetScissor(cmd, 0, 1, &scissor);
+
+    vkCmdPushConstants(cmd, disp->pipeline_layout, VK_SHADER_STAGE_FRAGMENT_BIT,
+                       0, disp->display_frag->push_constants.total_size,
+                       disp->display_frag->push_constants.allocation);
+
+    vkCmdDraw(cmd, 3, 1, 0, 0);
+
+    vkCmdEndRenderPass(cmd);
+
+#if 0
+    VkImageCopy region = {
+        .srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .srcSubresource.layerCount = 1,
+        .dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .dstSubresource.layerCount = 1,
+        .extent.width = surface->width,
+        .extent.height = surface->height,
+        .extent.depth = 1,
+    };
+    pgraph_apply_scaling_factor(pg, &region.extent.width,
+                                &region.extent.height);
+
+    vkCmdCopyImage(cmd, surface->image,
+                   VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, disp->image,
+                   VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &region);
+#endif
+
+    pgraph_vk_transition_image_layout(pg, cmd, surface->image,
+                                      surface->host_fmt.vk_format,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+                                      VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
+
+    pgraph_vk_transition_image_layout(pg, cmd, disp->image,
+                                      VK_FORMAT_R8G8B8_UNORM,
+                                      VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+
+    pgraph_vk_end_debug_marker(r, cmd);
+    pgraph_vk_end_single_time_commands(pg, cmd);
+    nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_5);
+
+    disp->draw_time = surface->draw_time;
+}
+
+static void create_surface_sampler(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkSamplerCreateInfo sampler_create_info = {
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+        .magFilter = VK_FILTER_NEAREST,
+        .minFilter = VK_FILTER_NEAREST,
+        .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+        .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+        .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+        .anisotropyEnable = VK_FALSE,
+        .borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE,
+        .unnormalizedCoordinates = VK_FALSE,
+        .compareEnable = VK_FALSE,
+        .compareOp = VK_COMPARE_OP_ALWAYS,
+        .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+    };
+
+    VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL,
+                             &r->display.sampler));
+}
+
+static void destroy_surface_sampler(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkDestroySampler(r->device, r->display.sampler, NULL);
+    r->display.sampler = VK_NULL_HANDLE;
+}
+
+void pgraph_vk_init_display(PGRAPHState *pg)
+{
+    create_descriptor_pool(pg);
+    create_descriptor_set_layout(pg);
+    create_descriptor_sets(pg);
+    create_render_pass(pg);
+    create_display_pipeline(pg);
+    create_surface_sampler(pg);
+}
+
+void pgraph_vk_finalize_display(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    destroy_pvideo_image(pg);
+
+    if (r->display.image != VK_NULL_HANDLE) {
+        destroy_current_display_image(pg);
+    }
+
+    destroy_surface_sampler(pg);
+    destroy_display_pipeline(pg);
+    destroy_render_pass(pg);
+    destroy_descriptor_set_layout(pg);
+    destroy_descriptor_pool(pg);
+}
+
+void pgraph_vk_render_display(PGRAPHState *pg)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    uint32_t pline_offset, pstart_addr, pline_compare;
+    d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+
+    SurfaceBinding *surface =
+        pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset);
+    if (surface == NULL || !surface->color) {
+        return;
+    }
+
+    unsigned int width = 0, height = 0;
+    d->vga.get_resolution(&d->vga, (int *)&width, (int *)&height);
+
+    /* Adjust viewport height for interlaced mode, used only in 1080i */
+    if (d->vga.cr[NV_PRMCIO_INTERLACE_MODE] != NV_PRMCIO_INTERLACE_MODE_DISABLED) {
+        height *= 2;
+    }
+
+    pgraph_apply_scaling_factor(pg, &width, &height);
+
+    PGRAPHVkDisplayState *disp = &r->display;
+    if (!disp->image || disp->width != width || disp->height != height) {
+        create_display_image(pg, width, height);
+    }
+
+    render_display(pg, surface);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/draw.c b/hw/xbox/nv2a/pgraph/vk/draw.c
new file mode 100644
index 00000000000..47b132541f7
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/draw.c
@@ -0,0 +1,2223 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/fast-hash.h"
+#include "renderer.h"
+
+void pgraph_vk_draw_begin(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    NV2A_VK_DPRINTF("NV097_SET_BEGIN_END: 0x%x", d->pgraph.primitive_mode);
+
+    uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
+    bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
+    bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
+    bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
+    bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
+    bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
+    bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
+    bool stencil_test =
+        pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+    bool is_nop_draw = !(color_write || depth_test || stencil_test);
+
+    pgraph_vk_surface_update(d, true, true, depth_test || stencil_test);
+
+    if (is_nop_draw) {
+        NV2A_VK_DPRINTF("nop!");
+        return;
+    }
+}
+
+static VkPrimitiveTopology get_primitive_topology(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    int polygon_mode = r->shader_binding->state.polygon_front_mode;
+    int primitive_mode = r->shader_binding->state.primitive_mode;
+
+    if (polygon_mode == POLY_MODE_POINT) {
+        return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
+    }
+
+    // FIXME: Replace with LUT
+    switch (primitive_mode) {
+    case PRIM_TYPE_POINTS:
+        return VK_PRIMITIVE_TOPOLOGY_POINT_LIST;
+    case PRIM_TYPE_LINES:
+        return VK_PRIMITIVE_TOPOLOGY_LINE_LIST;
+    case PRIM_TYPE_LINE_LOOP:
+        // FIXME: line strips, except that the first and last vertices are also used as a line
+        return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
+    case PRIM_TYPE_LINE_STRIP:
+        return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP;
+    case PRIM_TYPE_TRIANGLES:
+        return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST;
+    case PRIM_TYPE_TRIANGLE_STRIP:
+        return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP;
+    case PRIM_TYPE_TRIANGLE_FAN:
+        return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
+    case PRIM_TYPE_QUADS:
+        return VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY;
+    case PRIM_TYPE_QUAD_STRIP:
+        return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY;
+    case PRIM_TYPE_POLYGON:
+        if (polygon_mode == POLY_MODE_LINE) {
+            return VK_PRIMITIVE_TOPOLOGY_LINE_STRIP; // FIXME
+        } else if (polygon_mode == POLY_MODE_FILL) {
+            return VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN;
+        }
+        assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode");
+        return 0;
+    default:
+        assert(!"Invalid primitive_mode");
+        return 0;
+    }
+}
+
+static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state)
+{
+    PipelineBinding *snode = container_of(node, PipelineBinding, node);
+    snode->layout = VK_NULL_HANDLE;
+    snode->pipeline = VK_NULL_HANDLE;
+    snode->draw_time = 0;
+}
+
+static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+    PGRAPHVkState *r = container_of(lru, PGRAPHVkState, pipeline_cache);
+    PipelineBinding *snode = container_of(node, PipelineBinding, node);
+
+    assert((!r->in_command_buffer ||
+            snode->draw_time < r->command_buffer_start_time) &&
+           "Pipeline evicted while in use!");
+
+    vkDestroyPipeline(r->device, snode->pipeline, NULL);
+    snode->pipeline = VK_NULL_HANDLE;
+
+    vkDestroyPipelineLayout(r->device, snode->layout, NULL);
+    snode->layout = VK_NULL_HANDLE;
+}
+
+static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+    PipelineBinding *snode = container_of(node, PipelineBinding, node);
+    return memcmp(&snode->key, key, sizeof(PipelineKey));
+}
+
+static void init_pipeline_cache(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkPipelineCacheCreateInfo cache_info = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO,
+        .flags = 0,
+        .initialDataSize = 0,
+        .pInitialData = NULL,
+        .pNext = NULL,
+    };
+    VK_CHECK(vkCreatePipelineCache(r->device, &cache_info, NULL,
+                                   &r->vk_pipeline_cache));
+
+    const size_t pipeline_cache_size = 2048;
+    lru_init(&r->pipeline_cache);
+    r->pipeline_cache_entries =
+        g_malloc_n(pipeline_cache_size, sizeof(PipelineBinding));
+    assert(r->pipeline_cache_entries != NULL);
+    for (int i = 0; i < pipeline_cache_size; i++) {
+        lru_add_free(&r->pipeline_cache, &r->pipeline_cache_entries[i].node);
+    }
+
+    r->pipeline_cache.init_node = pipeline_cache_entry_init;
+    r->pipeline_cache.compare_nodes = pipeline_cache_entry_compare;
+    r->pipeline_cache.post_node_evict = pipeline_cache_entry_post_evict;
+}
+
+static void finalize_pipeline_cache(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    lru_flush(&r->pipeline_cache);
+    g_free(r->pipeline_cache_entries);
+    r->pipeline_cache_entries = NULL;
+
+    vkDestroyPipelineCache(r->device, r->vk_pipeline_cache, NULL);
+}
+
+static char const *const quad_glsl =
+    "#version 450\n"
+    "void main()\n"
+    "{\n"
+    "    float x = -1.0 + float((gl_VertexIndex & 1) << 2);\n"
+    "    float y = -1.0 + float((gl_VertexIndex & 2) << 1);\n"
+    "    gl_Position = vec4(x, y, 0, 1);\n"
+    "}\n";
+
+static char const *const solid_frag_glsl =
+    "#version 450\n"
+    "layout(location = 0) out vec4 fragColor;\n"
+    "void main()\n"
+    "{\n"
+    "    fragColor = vec4(1.0);"
+    "}\n";
+
+static void init_clear_shaders(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    r->quad_vert_module = pgraph_vk_create_shader_module_from_glsl(
+        r, VK_SHADER_STAGE_VERTEX_BIT, quad_glsl);
+    r->solid_frag_module = pgraph_vk_create_shader_module_from_glsl(
+        r, VK_SHADER_STAGE_FRAGMENT_BIT, solid_frag_glsl);
+}
+
+static void finalize_clear_shaders(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    pgraph_vk_destroy_shader_module(r, r->quad_vert_module);
+    pgraph_vk_destroy_shader_module(r, r->solid_frag_module);
+}
+
+static void init_render_passes(PGRAPHVkState *r)
+{
+    r->render_passes = g_array_new(false, false, sizeof(RenderPass));
+}
+
+static void finalize_render_passes(PGRAPHVkState *r)
+{
+    for (int i = 0; i < r->render_passes->len; i++) {
+        RenderPass *p = &g_array_index(r->render_passes, RenderPass, i);
+        vkDestroyRenderPass(r->device, p->render_pass, NULL);
+    }
+    g_array_free(r->render_passes, true);
+    r->render_passes = NULL;
+}
+
+void pgraph_vk_init_pipelines(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    init_pipeline_cache(pg);
+    init_clear_shaders(pg);
+    init_render_passes(r);
+
+    VkSemaphoreCreateInfo semaphore_info = {
+        .sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO
+    };
+    VK_CHECK(vkCreateSemaphore(r->device, &semaphore_info, NULL,
+                               &r->command_buffer_semaphore));
+
+    VkFenceCreateInfo fence_info = {
+        .sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO,
+    };
+    VK_CHECK(
+        vkCreateFence(r->device, &fence_info, NULL, &r->command_buffer_fence));
+}
+
+void pgraph_vk_finalize_pipelines(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    finalize_clear_shaders(pg);
+    finalize_pipeline_cache(pg);
+    finalize_render_passes(r);
+
+    vkDestroyFence(r->device, r->command_buffer_fence, NULL);
+    vkDestroySemaphore(r->device, r->command_buffer_semaphore, NULL);
+}
+
+static void init_render_pass_state(PGRAPHState *pg, RenderPassState *state)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    state->color_format = r->color_binding ?
+                              r->color_binding->host_fmt.vk_format :
+                              VK_FORMAT_UNDEFINED;
+    state->zeta_format = r->zeta_binding ? r->zeta_binding->host_fmt.vk_format :
+                                           VK_FORMAT_UNDEFINED;
+}
+
+static VkRenderPass create_render_pass(PGRAPHVkState *r, RenderPassState *state)
+{
+    NV2A_VK_DPRINTF("Creating render pass");
+
+    VkAttachmentDescription attachments[2];
+    int num_attachments = 0;
+
+    bool color = state->color_format != VK_FORMAT_UNDEFINED;
+    bool zeta = state->zeta_format != VK_FORMAT_UNDEFINED;
+
+    VkAttachmentReference color_reference;
+    if (color) {
+        attachments[num_attachments] = (VkAttachmentDescription){
+            .format = state->color_format,
+            .samples = VK_SAMPLE_COUNT_1_BIT,
+            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+            .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE,
+            .stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE,
+            .initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+            .finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
+        };
+        color_reference = (VkAttachmentReference){
+            num_attachments, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
+        };
+        num_attachments++;
+    }
+
+    VkAttachmentReference depth_reference;
+    if (zeta) {
+        attachments[num_attachments] = (VkAttachmentDescription){
+            .format = state->zeta_format,
+            .samples = VK_SAMPLE_COUNT_1_BIT,
+            .loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+            .storeOp = VK_ATTACHMENT_STORE_OP_STORE,
+            .stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,
+            .stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,
+            .initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+            .finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+        };
+        depth_reference = (VkAttachmentReference){
+            num_attachments, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+        };
+        num_attachments++;
+    }
+
+    VkSubpassDependency dependency = {
+        .srcSubpass = VK_SUBPASS_EXTERNAL,
+    };
+
+    if (color) {
+        dependency.srcStageMask |=
+            VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+        dependency.srcAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                                    VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+        dependency.dstStageMask |=
+            VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+        dependency.dstAccessMask |= VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
+                                    VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+    }
+
+    if (zeta) {
+        dependency.srcStageMask |=
+            VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
+            VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+        dependency.srcAccessMask |=
+            VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+            VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+        dependency.dstStageMask |=
+            VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |
+            VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+        dependency.dstAccessMask |=
+            VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
+            VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+    }
+
+    VkSubpassDescription subpass = {
+        .pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
+        .colorAttachmentCount = color ? 1 : 0,
+        .pColorAttachments = color ? &color_reference : NULL,
+        .pDepthStencilAttachment = zeta ? &depth_reference : NULL,
+    };
+
+    VkRenderPassCreateInfo renderpass_create_info = {
+        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
+        .attachmentCount = num_attachments,
+        .pAttachments = attachments,
+        .subpassCount = 1,
+        .pSubpasses = &subpass,
+        .dependencyCount = 1,
+        .pDependencies = &dependency,
+    };
+    VkRenderPass render_pass;
+    VK_CHECK(vkCreateRenderPass(r->device, &renderpass_create_info, NULL,
+                                &render_pass));
+    return render_pass;
+}
+
+static VkRenderPass add_new_render_pass(PGRAPHVkState *r, RenderPassState *state)
+{
+    RenderPass new_pass;
+    memcpy(&new_pass.state, state, sizeof(*state));
+    new_pass.render_pass = create_render_pass(r, state);
+    g_array_append_vals(r->render_passes, &new_pass, 1);
+    return new_pass.render_pass;
+}
+
+static VkRenderPass get_render_pass(PGRAPHVkState *r, RenderPassState *state)
+{
+    for (int i = 0; i < r->render_passes->len; i++) {
+        RenderPass *p = &g_array_index(r->render_passes, RenderPass, i);
+        if (!memcmp(&p->state, state, sizeof(*state))) {
+            return p->render_pass;
+        }
+    }
+    return add_new_render_pass(r, state);
+}
+
+static void create_frame_buffer(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    NV2A_VK_DPRINTF("Creating framebuffer");
+
+    assert(r->color_binding || r->zeta_binding);
+
+    if (r->framebuffer_index >= ARRAY_SIZE(r->framebuffers)) {
+        pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
+    }
+
+    VkImageView attachments[2];
+    int attachment_count = 0;
+
+    if (r->color_binding) {
+        attachments[attachment_count++] = r->color_binding->image_view;
+    }
+    if (r->zeta_binding) {
+        attachments[attachment_count++] = r->zeta_binding->image_view;
+    }
+
+    SurfaceBinding *binding = r->color_binding ? : r->zeta_binding;
+
+    VkFramebufferCreateInfo create_info = {
+        .sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
+        .renderPass = r->render_pass,
+        .attachmentCount = attachment_count,
+        .pAttachments = attachments,
+        .width = binding->width,
+        .height = binding->height,
+        .layers = 1,
+    };
+    pgraph_apply_scaling_factor(pg, &create_info.width, &create_info.height);
+    VK_CHECK(vkCreateFramebuffer(r->device, &create_info, NULL,
+                                 &r->framebuffers[r->framebuffer_index++]));
+}
+
+static void destroy_framebuffers(PGRAPHState *pg)
+{
+    NV2A_VK_DPRINTF("Destroying framebuffer");
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    for (int i = 0; i < r->framebuffer_index; i++) {
+        vkDestroyFramebuffer(r->device, r->framebuffers[i], NULL);
+        r->framebuffers[i] = VK_NULL_HANDLE;
+    }
+    r->framebuffer_index = 0;
+}
+
+static void create_clear_pipeline(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    NV2A_VK_DGROUP_BEGIN("Creating clear pipeline");
+
+    PipelineKey key;
+    memset(&key, 0, sizeof(key));
+    key.clear = true;
+    init_render_pass_state(pg, &key.render_pass_state);
+
+    key.regs[0] = r->clear_parameter;
+
+    uint64_t hash = fast_hash((void *)&key, sizeof(key));
+    LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key);
+    PipelineBinding *snode = container_of(node, PipelineBinding, node);
+
+    if (snode->pipeline != VK_NULL_HANDLE) {
+        NV2A_VK_DPRINTF("Cache hit");
+        r->pipeline_binding_changed = r->pipeline_binding != snode;
+        r->pipeline_binding = snode;
+        NV2A_VK_DGROUP_END();
+        return;
+    }
+
+    NV2A_VK_DPRINTF("Cache miss");
+    nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN);
+    memcpy(&snode->key, &key, sizeof(key));
+
+    bool clear_any_color_channels =
+        r->clear_parameter & NV097_CLEAR_SURFACE_COLOR;
+    bool clear_all_color_channels =
+        (r->clear_parameter & NV097_CLEAR_SURFACE_COLOR) ==
+        (NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G | NV097_CLEAR_SURFACE_B |
+         NV097_CLEAR_SURFACE_A);
+    bool partial_color_clear =
+        clear_any_color_channels && !clear_all_color_channels;
+
+    int num_active_shader_stages = 0;
+    VkPipelineShaderStageCreateInfo shader_stages[2];
+    shader_stages[num_active_shader_stages++] =
+        (VkPipelineShaderStageCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .stage = VK_SHADER_STAGE_VERTEX_BIT,
+            .module = r->quad_vert_module->module,
+            .pName = "main",
+        };
+    if (partial_color_clear) {
+        shader_stages[num_active_shader_stages++] =
+            (VkPipelineShaderStageCreateInfo){
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+                .module = r->solid_frag_module->module,
+                .pName = "main",
+            };
+     }
+
+    VkPipelineVertexInputStateCreateInfo vertex_input = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+    };
+
+    VkPipelineInputAssemblyStateCreateInfo input_assembly = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+        .topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
+        .primitiveRestartEnable = VK_FALSE,
+    };
+
+    VkPipelineViewportStateCreateInfo viewport_state = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+        .viewportCount = 1,
+        .scissorCount = 1,
+    };
+
+    VkPipelineRasterizationStateCreateInfo rasterizer = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+        .depthClampEnable = VK_FALSE,
+        .rasterizerDiscardEnable = VK_FALSE,
+        .polygonMode = VK_POLYGON_MODE_FILL,
+        .lineWidth = 1.0f,
+        .cullMode = VK_CULL_MODE_BACK_BIT,
+        .frontFace = VK_FRONT_FACE_CLOCKWISE,
+        .depthBiasEnable = VK_FALSE,
+    };
+
+    VkPipelineMultisampleStateCreateInfo multisampling = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+        .sampleShadingEnable = VK_FALSE,
+        .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+    };
+
+    VkPipelineDepthStencilStateCreateInfo depth_stencil = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+        .depthTestEnable = VK_TRUE,
+        .depthWriteEnable =
+            (r->clear_parameter & NV097_CLEAR_SURFACE_Z) ? VK_TRUE : VK_FALSE,
+        .depthCompareOp = VK_COMPARE_OP_ALWAYS,
+        .depthBoundsTestEnable = VK_FALSE,
+    };
+
+    if (r->clear_parameter & NV097_CLEAR_SURFACE_STENCIL) {
+        depth_stencil.stencilTestEnable = VK_TRUE;
+        depth_stencil.front.failOp = VK_STENCIL_OP_REPLACE;
+        depth_stencil.front.passOp = VK_STENCIL_OP_REPLACE;
+        depth_stencil.front.depthFailOp = VK_STENCIL_OP_REPLACE;
+        depth_stencil.front.compareOp = VK_COMPARE_OP_ALWAYS;
+        depth_stencil.front.compareMask = 0xff;
+        depth_stencil.front.writeMask = 0xff;
+        depth_stencil.front.reference = 0xff;
+        depth_stencil.back = depth_stencil.front;
+    }
+
+    VkColorComponentFlags write_mask = 0;
+    if (r->clear_parameter & NV097_CLEAR_SURFACE_R)
+        write_mask |= VK_COLOR_COMPONENT_R_BIT;
+    if (r->clear_parameter & NV097_CLEAR_SURFACE_G)
+        write_mask |= VK_COLOR_COMPONENT_G_BIT;
+    if (r->clear_parameter & NV097_CLEAR_SURFACE_B)
+        write_mask |= VK_COLOR_COMPONENT_B_BIT;
+    if (r->clear_parameter & NV097_CLEAR_SURFACE_A)
+        write_mask |= VK_COLOR_COMPONENT_A_BIT;
+
+    VkPipelineColorBlendAttachmentState color_blend_attachment = {
+        .colorWriteMask = write_mask,
+        .blendEnable = VK_TRUE,
+        .colorBlendOp = VK_BLEND_OP_ADD,
+        .dstColorBlendFactor = VK_BLEND_FACTOR_ZERO,
+        .srcColorBlendFactor = VK_BLEND_FACTOR_CONSTANT_COLOR,
+        .alphaBlendOp = VK_BLEND_OP_ADD,
+        .dstAlphaBlendFactor = VK_BLEND_FACTOR_ZERO,
+        .srcAlphaBlendFactor = VK_BLEND_FACTOR_CONSTANT_ALPHA,
+    };
+
+    VkPipelineColorBlendStateCreateInfo color_blending = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+        .logicOpEnable = VK_FALSE,
+        .logicOp = VK_LOGIC_OP_COPY,
+        .attachmentCount = r->color_binding ? 1 : 0,
+        .pAttachments = r->color_binding ? &color_blend_attachment : NULL,
+    };
+
+    VkDynamicState dynamic_states[] = { VK_DYNAMIC_STATE_VIEWPORT,
+                                        VK_DYNAMIC_STATE_SCISSOR,
+                                        VK_DYNAMIC_STATE_BLEND_CONSTANTS };
+    VkPipelineDynamicStateCreateInfo dynamic_state = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+        .dynamicStateCount = partial_color_clear ? 3 : 2,
+        .pDynamicStates = dynamic_states,
+    };
+
+    VkPipelineLayoutCreateInfo pipeline_layout_info = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+    };
+
+    VkPipelineLayout layout;
+    VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
+                                    &layout));
+
+    VkGraphicsPipelineCreateInfo pipeline_info = {
+        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+        .stageCount = num_active_shader_stages,
+        .pStages = shader_stages,
+        .pVertexInputState = &vertex_input,
+        .pInputAssemblyState = &input_assembly,
+        .pViewportState = &viewport_state,
+        .pRasterizationState = &rasterizer,
+        .pMultisampleState = &multisampling,
+        .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
+        .pColorBlendState = &color_blending,
+        .pDynamicState = &dynamic_state,
+        .layout = layout,
+        .renderPass = get_render_pass(r, &key.render_pass_state),
+        .subpass = 0,
+        .basePipelineHandle = VK_NULL_HANDLE,
+    };
+
+    VkPipeline pipeline;
+    VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
+                                       &pipeline_info, NULL, &pipeline));
+
+    snode->pipeline = pipeline;
+    snode->layout = layout;
+    snode->render_pass = pipeline_info.renderPass;
+    snode->draw_time = pg->draw_time;
+
+    r->pipeline_binding = snode;
+    r->pipeline_binding_changed = true;
+
+    NV2A_VK_DGROUP_END();
+}
+
+static bool check_render_pass_dirty(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    assert(r->pipeline_binding);
+
+    RenderPassState state;
+    init_render_pass_state(pg, &state);
+
+    return memcmp(&state, &r->pipeline_binding->key.render_pass_state,
+                  sizeof(state)) != 0;
+}
+
+// Quickly check for any state changes that would require more analysis
+static bool check_pipeline_dirty(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    if (!r->pipeline_binding || r->shader_bindings_changed ||
+        r->texture_bindings_changed || check_render_pass_dirty(pg)) {
+        return true;
+    }
+
+    const unsigned int regs[] = {
+        NV_PGRAPH_BLEND,       NV_PGRAPH_BLENDCOLOR,
+        NV_PGRAPH_CONTROL_0,   NV_PGRAPH_CONTROL_1,
+        NV_PGRAPH_CONTROL_2,   NV_PGRAPH_CONTROL_3,
+        NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE,
+        NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR,
+    };
+
+    for (int i = 0; i < ARRAY_SIZE(regs); i++) {
+        if (pgraph_is_reg_dirty(pg, regs[i])) {
+            return true;
+        }
+    }
+
+    // FIXME: Use dirty bits instead
+    if (memcmp(r->vertex_attribute_descriptions,
+               r->pipeline_binding->key.attribute_descriptions,
+               r->num_active_vertex_attribute_descriptions *
+                   sizeof(r->vertex_attribute_descriptions[0])) ||
+        memcmp(r->vertex_binding_descriptions,
+               r->pipeline_binding->key.binding_descriptions,
+               r->num_active_vertex_binding_descriptions *
+                   sizeof(r->vertex_binding_descriptions[0]))) {
+        return true;
+    }
+
+    nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_NOTDIRTY);
+
+    return false;
+}
+
+static void init_pipeline_key(PGRAPHState *pg, PipelineKey *key)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    memset(key, 0, sizeof(*key));
+    init_render_pass_state(pg, &key->render_pass_state);
+    memcpy(&key->shader_state, &r->shader_binding->state, sizeof(ShaderState));
+    memcpy(key->binding_descriptions, r->vertex_binding_descriptions,
+           sizeof(key->binding_descriptions[0]) *
+               r->num_active_vertex_binding_descriptions);
+    memcpy(key->attribute_descriptions, r->vertex_attribute_descriptions,
+           sizeof(key->attribute_descriptions[0]) *
+               r->num_active_vertex_attribute_descriptions);
+
+    // FIXME: Register masking
+    // FIXME: Use more dynamic state updates
+    const int regs[] = {
+        NV_PGRAPH_BLEND,       NV_PGRAPH_BLENDCOLOR,
+        NV_PGRAPH_CONTROL_0,   NV_PGRAPH_CONTROL_1,
+        NV_PGRAPH_CONTROL_2,   NV_PGRAPH_CONTROL_3,
+        NV_PGRAPH_SETUPRASTER, NV_PGRAPH_ZCOMPRESSOCCLUDE,
+        NV_PGRAPH_ZOFFSETBIAS, NV_PGRAPH_ZOFFSETFACTOR,
+    };
+    assert(ARRAY_SIZE(regs) == ARRAY_SIZE(key->regs));
+    for (int i = 0; i < ARRAY_SIZE(regs); i++) {
+        key->regs[i] = pgraph_reg_r(pg, regs[i]);
+    }
+}
+
+static void create_pipeline(PGRAPHState *pg)
+{
+    NV2A_VK_DGROUP_BEGIN("Creating pipeline");
+
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    pgraph_vk_bind_textures(d);
+    pgraph_vk_bind_shaders(pg);
+
+    // FIXME: If nothing was dirty, don't even try creating the key or hashing.
+    //        Just use the same pipeline.
+    bool pipeline_dirty = check_pipeline_dirty(pg);
+
+    pgraph_clear_dirty_reg_map(pg);
+    // FIXME: We could clear less
+
+    if (r->pipeline_binding && !pipeline_dirty) {
+        NV2A_VK_DPRINTF("Cache hit");
+        NV2A_VK_DGROUP_END();
+        return;
+    }
+
+    PipelineKey key;
+    init_pipeline_key(pg, &key);
+    uint64_t hash = fast_hash((void *)&key, sizeof(key));
+
+    LruNode *node = lru_lookup(&r->pipeline_cache, hash, &key);
+    PipelineBinding *snode = container_of(node, PipelineBinding, node);
+    if (snode->pipeline != VK_NULL_HANDLE) {
+        NV2A_VK_DPRINTF("Cache hit");
+        r->pipeline_binding_changed = r->pipeline_binding != snode;
+        r->pipeline_binding = snode;
+        NV2A_VK_DGROUP_END();
+        return;
+    }
+
+    NV2A_VK_DPRINTF("Cache miss");
+    nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_GEN);
+
+    memcpy(&snode->key, &key, sizeof(key));
+
+    uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
+    bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
+    bool depth_write = !!(control_0 & NV_PGRAPH_CONTROL_0_ZWRITEENABLE);
+    bool stencil_test =
+        pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+
+    int num_active_shader_stages = 0;
+    VkPipelineShaderStageCreateInfo shader_stages[3];
+
+    shader_stages[num_active_shader_stages++] =
+        (VkPipelineShaderStageCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .stage = VK_SHADER_STAGE_VERTEX_BIT,
+            .module = r->shader_binding->vertex->module,
+            .pName = "main",
+        };
+    if (r->shader_binding->geometry) {
+        shader_stages[num_active_shader_stages++] =
+            (VkPipelineShaderStageCreateInfo){
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .stage = VK_SHADER_STAGE_GEOMETRY_BIT,
+                .module = r->shader_binding->geometry->module,
+                .pName = "main",
+            };
+    }
+    shader_stages[num_active_shader_stages++] =
+        (VkPipelineShaderStageCreateInfo){
+            .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+            .stage = VK_SHADER_STAGE_FRAGMENT_BIT,
+            .module = r->shader_binding->fragment->module,
+            .pName = "main",
+        };
+
+    VkPipelineVertexInputStateCreateInfo vertex_input = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
+        .vertexBindingDescriptionCount =
+            r->num_active_vertex_binding_descriptions,
+        .pVertexBindingDescriptions = r->vertex_binding_descriptions,
+        .vertexAttributeDescriptionCount =
+            r->num_active_vertex_attribute_descriptions,
+        .pVertexAttributeDescriptions = r->vertex_attribute_descriptions,
+    };
+
+    VkPipelineInputAssemblyStateCreateInfo input_assembly = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
+        .topology = get_primitive_topology(pg),
+        .primitiveRestartEnable = VK_FALSE,
+    };
+
+    VkPipelineViewportStateCreateInfo viewport_state = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
+        .viewportCount = 1,
+        .scissorCount = 1,
+    };
+
+    void *rasterizer_next_struct = NULL;
+
+    VkPipelineRasterizationProvokingVertexStateCreateInfoEXT provoking_state;
+
+    if (r->provoking_vertex_extension_enabled) {
+        VkProvokingVertexModeEXT provoking_mode =
+            GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_3),
+                     NV_PGRAPH_CONTROL_3_SHADEMODE) ==
+                    NV_PGRAPH_CONTROL_3_SHADEMODE_FLAT ?
+                VK_PROVOKING_VERTEX_MODE_FIRST_VERTEX_EXT :
+                VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;
+
+        provoking_state =
+            (VkPipelineRasterizationProvokingVertexStateCreateInfoEXT){
+                .sType =
+                    VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT,
+                .provokingVertexMode = provoking_mode,
+            };
+        rasterizer_next_struct = &provoking_state;
+    } else {
+        // FIXME: Handle in shader?
+    }
+
+    VkPipelineRasterizationStateCreateInfo rasterizer = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
+        .depthClampEnable = VK_FALSE,
+        .rasterizerDiscardEnable = VK_FALSE,
+        .polygonMode = pgraph_polygon_mode_vk_map[r->shader_binding->state
+                                                      .polygon_front_mode],
+        .lineWidth = 1.0f,
+        .frontFace = (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+                      NV_PGRAPH_SETUPRASTER_FRONTFACE) ?
+                         VK_FRONT_FACE_COUNTER_CLOCKWISE :
+                         VK_FRONT_FACE_CLOCKWISE,
+        .depthBiasEnable = VK_FALSE,
+        .pNext = rasterizer_next_struct,
+    };
+
+    if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) & NV_PGRAPH_SETUPRASTER_CULLENABLE) {
+        uint32_t cull_face = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
+                                      NV_PGRAPH_SETUPRASTER_CULLCTRL);
+        assert(cull_face < ARRAY_SIZE(pgraph_cull_face_vk_map));
+        rasterizer.cullMode = pgraph_cull_face_vk_map[cull_face];
+    } else {
+        rasterizer.cullMode = VK_CULL_MODE_NONE;
+    }
+
+    VkPipelineMultisampleStateCreateInfo multisampling = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
+        .sampleShadingEnable = VK_FALSE,
+        .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
+    };
+
+    VkPipelineDepthStencilStateCreateInfo depth_stencil = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
+        .depthWriteEnable = depth_write ? VK_TRUE : VK_FALSE,
+    };
+
+    if (depth_test) {
+        depth_stencil.depthTestEnable = VK_TRUE;
+        uint32_t depth_func =
+            GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0), NV_PGRAPH_CONTROL_0_ZFUNC);
+        assert(depth_func < ARRAY_SIZE(pgraph_depth_func_vk_map));
+        depth_stencil.depthCompareOp = pgraph_depth_func_vk_map[depth_func];
+    }
+
+    if (stencil_test) {
+        depth_stencil.stencilTestEnable = VK_TRUE;
+        uint32_t stencil_func = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+                                         NV_PGRAPH_CONTROL_1_STENCIL_FUNC);
+        uint32_t stencil_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+                                        NV_PGRAPH_CONTROL_1_STENCIL_REF);
+        uint32_t mask_read = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+                                      NV_PGRAPH_CONTROL_1_STENCIL_MASK_READ);
+        uint32_t mask_write = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1),
+                                       NV_PGRAPH_CONTROL_1_STENCIL_MASK_WRITE);
+        uint32_t op_fail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+                                    NV_PGRAPH_CONTROL_2_STENCIL_OP_FAIL);
+        uint32_t op_zfail = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+                                     NV_PGRAPH_CONTROL_2_STENCIL_OP_ZFAIL);
+        uint32_t op_zpass = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_2),
+                                     NV_PGRAPH_CONTROL_2_STENCIL_OP_ZPASS);
+
+        assert(stencil_func < ARRAY_SIZE(pgraph_stencil_func_vk_map));
+        assert(op_fail < ARRAY_SIZE(pgraph_stencil_op_vk_map));
+        assert(op_zfail < ARRAY_SIZE(pgraph_stencil_op_vk_map));
+        assert(op_zpass < ARRAY_SIZE(pgraph_stencil_op_vk_map));
+
+        depth_stencil.front.failOp = pgraph_stencil_op_vk_map[op_fail];
+        depth_stencil.front.passOp = pgraph_stencil_op_vk_map[op_zpass];
+        depth_stencil.front.depthFailOp = pgraph_stencil_op_vk_map[op_zfail];
+        depth_stencil.front.compareOp =
+            pgraph_stencil_func_vk_map[stencil_func];
+        depth_stencil.front.compareMask = mask_read;
+        depth_stencil.front.writeMask = mask_write;
+        depth_stencil.front.reference = stencil_ref;
+        depth_stencil.back = depth_stencil.front;
+    }
+
+    VkColorComponentFlags write_mask = 0;
+    if (control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE)
+        write_mask |= VK_COLOR_COMPONENT_R_BIT;
+    if (control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE)
+        write_mask |= VK_COLOR_COMPONENT_G_BIT;
+    if (control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE)
+        write_mask |= VK_COLOR_COMPONENT_B_BIT;
+    if (control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE)
+        write_mask |= VK_COLOR_COMPONENT_A_BIT;
+
+    VkPipelineColorBlendAttachmentState color_blend_attachment = {
+        .colorWriteMask = write_mask,
+    };
+
+    float blend_constant[4] = { 0, 0, 0, 0 };
+
+    if (pgraph_reg_r(pg, NV_PGRAPH_BLEND) & NV_PGRAPH_BLEND_EN) {
+        color_blend_attachment.blendEnable = VK_TRUE;
+
+        uint32_t sfactor =
+            GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_SFACTOR);
+        uint32_t dfactor =
+            GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_DFACTOR);
+        assert(sfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map));
+        assert(dfactor < ARRAY_SIZE(pgraph_blend_factor_vk_map));
+        color_blend_attachment.srcColorBlendFactor =
+            pgraph_blend_factor_vk_map[sfactor];
+        color_blend_attachment.dstColorBlendFactor =
+            pgraph_blend_factor_vk_map[dfactor];
+        color_blend_attachment.srcAlphaBlendFactor =
+            pgraph_blend_factor_vk_map[sfactor];
+        color_blend_attachment.dstAlphaBlendFactor =
+            pgraph_blend_factor_vk_map[dfactor];
+
+        uint32_t equation =
+            GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_BLEND), NV_PGRAPH_BLEND_EQN);
+        assert(equation < ARRAY_SIZE(pgraph_blend_equation_vk_map));
+
+        color_blend_attachment.colorBlendOp =
+            pgraph_blend_equation_vk_map[equation];
+        color_blend_attachment.alphaBlendOp =
+            pgraph_blend_equation_vk_map[equation];
+
+        uint32_t blend_color = pgraph_reg_r(pg, NV_PGRAPH_BLENDCOLOR);
+        pgraph_argb_pack32_to_rgba_float(blend_color, blend_constant);
+    }
+
+    VkPipelineColorBlendStateCreateInfo color_blending = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
+        .logicOpEnable = VK_FALSE,
+        .logicOp = VK_LOGIC_OP_COPY,
+        .attachmentCount = r->color_binding ? 1 : 0,
+        .pAttachments = r->color_binding ? &color_blend_attachment : NULL,
+        .blendConstants[0] = blend_constant[0],
+        .blendConstants[1] = blend_constant[1],
+        .blendConstants[2] = blend_constant[2],
+        .blendConstants[3] = blend_constant[3],
+    };
+
+    VkDynamicState dynamic_states[2] = { VK_DYNAMIC_STATE_VIEWPORT,
+                                         VK_DYNAMIC_STATE_SCISSOR };
+
+    VkPipelineDynamicStateCreateInfo dynamic_state = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
+        .dynamicStateCount = ARRAY_SIZE(dynamic_states),
+        .pDynamicStates = dynamic_states,
+    };
+
+    // /* Clipping */
+    // glEnable(GL_CLIP_DISTANCE0);
+    // glEnable(GL_CLIP_DISTANCE1);
+
+    // /* Polygon offset */
+    // /* FIXME: GL implementation-specific, maybe do this in VS? */
+    // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+    //         NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE)
+    // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+    //         NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE)
+    // if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+    //         NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)
+    if (pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+        (NV_PGRAPH_SETUPRASTER_POFFSETFILLENABLE |
+         NV_PGRAPH_SETUPRASTER_POFFSETLINEENABLE |
+         NV_PGRAPH_SETUPRASTER_POFFSETPOINTENABLE)) {
+        uint32_t zfactor_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETFACTOR);
+        float zfactor = *(float *)&zfactor_u32;
+        uint32_t zbias_u32 = pgraph_reg_r(pg, NV_PGRAPH_ZOFFSETBIAS);
+        float zbias = *(float *)&zbias_u32;
+        rasterizer.depthBiasEnable = VK_TRUE;
+        rasterizer.depthBiasSlopeFactor = zfactor;
+        rasterizer.depthBiasConstantFactor = zbias;
+    }
+
+    if (GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ZCOMPRESSOCCLUDE),
+                 NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN) ==
+        NV_PGRAPH_ZCOMPRESSOCCLUDE_ZCLAMP_EN_CLAMP) {
+        rasterizer.depthClampEnable = VK_TRUE;
+    }
+
+    // FIXME: Dither
+    // if (pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0) &
+    //         NV_PGRAPH_CONTROL_0_DITHERENABLE))
+    // FIXME: point size
+    // FIXME: Edge Antialiasing
+    // bool anti_aliasing = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_ANTIALIASING),
+    // NV_PGRAPH_ANTIALIASING_ENABLE);
+    // if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+    //                           NV_PGRAPH_SETUPRASTER_LINESMOOTHENABLE) {
+    // FIXME: VK_EXT_line_rasterization
+    // }
+
+    // if (!anti_aliasing && pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER) &
+    //                           NV_PGRAPH_SETUPRASTER_POLYSMOOTHENABLE) {
+    // FIXME: No direct analog. Just do it with MSAA.
+    // }
+
+
+    VkPipelineLayoutCreateInfo pipeline_layout_info = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+        .setLayoutCount = 1,
+        .pSetLayouts = &r->descriptor_set_layout,
+    };
+
+    VkPushConstantRange push_constant_range;
+    if (r->shader_binding->state.use_push_constants_for_uniform_attrs) {
+        int num_uniform_attributes =
+            __builtin_popcount(r->shader_binding->state.uniform_attrs);
+        if (num_uniform_attributes) {
+            push_constant_range = (VkPushConstantRange){
+                .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+                .offset = 0,
+                // FIXME: Minimize push constants
+                .size = num_uniform_attributes * 4 * sizeof(float),
+            };
+            pipeline_layout_info.pushConstantRangeCount = 1;
+            pipeline_layout_info.pPushConstantRanges = &push_constant_range;
+        }
+    }
+
+    VkPipelineLayout layout;
+    VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
+                                    &layout));
+
+    VkGraphicsPipelineCreateInfo pipeline_create_info = {
+        .sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
+        .stageCount = num_active_shader_stages,
+        .pStages = shader_stages,
+        .pVertexInputState = &vertex_input,
+        .pInputAssemblyState = &input_assembly,
+        .pViewportState = &viewport_state,
+        .pRasterizationState = &rasterizer,
+        .pMultisampleState = &multisampling,
+        .pDepthStencilState = r->zeta_binding ? &depth_stencil : NULL,
+        .pColorBlendState = &color_blending,
+        .pDynamicState = &dynamic_state,
+        .layout = layout,
+        .renderPass = get_render_pass(r, &key.render_pass_state),
+        .subpass = 0,
+        .basePipelineHandle = VK_NULL_HANDLE,
+    };
+    VkPipeline pipeline;
+    VK_CHECK(vkCreateGraphicsPipelines(r->device, r->vk_pipeline_cache, 1,
+                                       &pipeline_create_info, NULL, &pipeline));
+
+    snode->pipeline = pipeline;
+    snode->layout = layout;
+    snode->render_pass = pipeline_create_info.renderPass;
+    snode->draw_time = pg->draw_time;
+
+    r->pipeline_binding = snode;
+    r->pipeline_binding_changed = true;
+
+    NV2A_VK_DGROUP_END();
+}
+
+static void push_vertex_attr_values(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    if (!r->shader_binding->state.use_push_constants_for_uniform_attrs) {
+        return;
+    }
+
+    // FIXME: Partial updates
+
+    float values[NV2A_VERTEXSHADER_ATTRIBUTES][4];
+    int num_uniform_attrs = 0;
+
+    pgraph_get_inline_values(pg, r->shader_binding->state.uniform_attrs, values,
+                             &num_uniform_attrs);
+
+    if (num_uniform_attrs > 0) {
+        vkCmdPushConstants(r->command_buffer, r->pipeline_binding->layout,
+                           VK_SHADER_STAGE_VERTEX_BIT, 0,
+                           num_uniform_attrs * 4 * sizeof(float),
+                           &values);
+    }
+}
+
+static void bind_descriptor_sets(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    assert(r->descriptor_set_index >= 1);
+
+    vkCmdBindDescriptorSets(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                            r->pipeline_binding->layout, 0, 1,
+                            &r->descriptor_sets[r->descriptor_set_index - 1], 0,
+                            NULL);
+}
+
+static void begin_query(PGRAPHVkState *r)
+{
+    assert(r->in_command_buffer);
+    assert(!r->in_render_pass);
+    assert(!r->query_in_flight);
+
+    // FIXME: We should handle this. Make the query buffer bigger, but at least
+    // flush current queries.
+    assert(r->num_queries_in_flight < r->max_queries_in_flight);
+
+    nv2a_profile_inc_counter(NV2A_PROF_QUERY);
+    vkCmdResetQueryPool(r->command_buffer, r->query_pool,
+                        r->num_queries_in_flight, 1);
+    vkCmdBeginQuery(r->command_buffer, r->query_pool, r->num_queries_in_flight,
+                    VK_QUERY_CONTROL_PRECISE_BIT);
+
+    r->query_in_flight = true;
+    r->new_query_needed = false;
+    r->num_queries_in_flight++;
+}
+
+static void end_query(PGRAPHVkState *r)
+{
+    assert(r->in_command_buffer);
+    assert(!r->in_render_pass);
+    assert(r->query_in_flight);
+
+    vkCmdEndQuery(r->command_buffer, r->query_pool,
+                  r->num_queries_in_flight - 1);
+    r->query_in_flight = false;
+}
+
+static void sync_staging_buffer(PGRAPHState *pg, VkCommandBuffer cmd,
+                                int index_src, int index_dst)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    StorageBuffer *b_src = &r->storage_buffers[index_src];
+    StorageBuffer *b_dst = &r->storage_buffers[index_dst];
+
+    if (!b_src->buffer_offset) {
+        return;
+    }
+
+    VkBufferCopy copy_region = { .size = b_src->buffer_offset };
+    vkCmdCopyBuffer(cmd, b_src->buffer, b_dst->buffer, 1, &copy_region);
+
+    VkAccessFlags dst_access_mask;
+    VkPipelineStageFlags dst_stage_mask;
+
+    switch (index_dst) {
+    case BUFFER_INDEX:
+        dst_access_mask = VK_ACCESS_INDEX_READ_BIT;
+        dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
+        break;
+    case BUFFER_VERTEX_INLINE:
+        dst_access_mask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;
+        dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_INPUT_BIT;
+        break;
+    case BUFFER_UNIFORM:
+        dst_access_mask = VK_ACCESS_UNIFORM_READ_BIT;
+        dst_stage_mask = VK_PIPELINE_STAGE_VERTEX_SHADER_BIT;
+        break;
+    default:
+        assert(0);
+        break;
+    }
+
+    VkBufferMemoryBarrier barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+        .dstAccessMask = dst_access_mask,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = b_dst->buffer,
+        .size = b_src->buffer_offset
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, 0,
+                         0, NULL, 1, &barrier, 0, NULL);
+
+    b_src->buffer_offset = 0;
+}
+
+static void flush_memory_buffer(PGRAPHState *pg, VkCommandBuffer cmd)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VK_CHECK(vmaFlushAllocation(
+        r->allocator, r->storage_buffers[BUFFER_VERTEX_RAM].allocation, 0,
+        VK_WHOLE_SIZE));
+
+    VkBufferMemoryBarrier barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_HOST_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = r->storage_buffers[BUFFER_VERTEX_RAM].buffer,
+        .offset = 0,
+        .size = VK_WHOLE_SIZE,
+    };
+
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT,
+                         VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, 0, NULL, 1,
+                         &barrier, 0, NULL);
+}
+
+static void begin_render_pass(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(r->in_command_buffer);
+    assert(!r->in_render_pass);
+
+    nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_RENDERPASSES);
+
+    unsigned int vp_width = pg->surface_binding_dim.width,
+                 vp_height = pg->surface_binding_dim.height;
+    pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
+
+    assert(r->framebuffer_index > 0);
+
+    VkRenderPassBeginInfo render_pass_begin_info = {
+        .sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
+        .renderPass = r->render_pass,
+        .framebuffer = r->framebuffers[r->framebuffer_index - 1],
+        .renderArea.extent.width = vp_width,
+        .renderArea.extent.height = vp_height,
+        .clearValueCount = 0,
+        .pClearValues = NULL,
+    };
+    vkCmdBeginRenderPass(r->command_buffer, &render_pass_begin_info,
+                         VK_SUBPASS_CONTENTS_INLINE);
+    r->in_render_pass = true;
+
+}
+
+static void end_render_pass(PGRAPHVkState *r)
+{
+    if (r->in_render_pass) {
+        vkCmdEndRenderPass(r->command_buffer);
+        r->in_render_pass = false;
+    }
+}
+
+const enum NV2A_PROF_COUNTERS_ENUM finish_reason_to_counter_enum[] = {
+    [VK_FINISH_REASON_VERTEX_BUFFER_DIRTY] = NV2A_PROF_FINISH_VERTEX_BUFFER_DIRTY,
+    [VK_FINISH_REASON_SURFACE_CREATE] = NV2A_PROF_FINISH_SURFACE_CREATE,
+    [VK_FINISH_REASON_SURFACE_DOWN] = NV2A_PROF_FINISH_SURFACE_DOWN,
+    [VK_FINISH_REASON_NEED_BUFFER_SPACE] = NV2A_PROF_FINISH_NEED_BUFFER_SPACE,
+    [VK_FINISH_REASON_FRAMEBUFFER_DIRTY] = NV2A_PROF_FINISH_FRAMEBUFFER_DIRTY,
+    [VK_FINISH_REASON_PRESENTING] = NV2A_PROF_FINISH_PRESENTING,
+    [VK_FINISH_REASON_FLIP_STALL] = NV2A_PROF_FINISH_FLIP_STALL,
+    [VK_FINISH_REASON_FLUSH] = NV2A_PROF_FINISH_FLUSH,
+    [VK_FINISH_REASON_STALLED] = NV2A_PROF_FINISH_STALLED,
+};
+
+void pgraph_vk_finish(PGRAPHState *pg, FinishReason finish_reason)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(!r->in_draw);
+    assert(r->debug_depth == 0);
+
+    if (r->in_command_buffer) {
+        nv2a_profile_inc_counter(finish_reason_to_counter_enum[finish_reason]);
+
+        if (r->in_render_pass) {
+            end_render_pass(r);
+        }
+        if (r->query_in_flight) {
+            end_query(r);
+        }
+        VK_CHECK(vkEndCommandBuffer(r->command_buffer));
+
+        VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg); // FIXME: Cleanup
+        sync_staging_buffer(pg, cmd, BUFFER_INDEX_STAGING, BUFFER_INDEX);
+        sync_staging_buffer(pg, cmd, BUFFER_VERTEX_INLINE_STAGING,
+                                BUFFER_VERTEX_INLINE);
+        sync_staging_buffer(pg, cmd, BUFFER_UNIFORM_STAGING, BUFFER_UNIFORM);
+        bitmap_clear(r->uploaded_bitmap, 0, r->bitmap_size);
+        flush_memory_buffer(pg, cmd);
+        VK_CHECK(vkEndCommandBuffer(r->aux_command_buffer));
+        r->in_aux_command_buffer = false;
+
+        VkPipelineStageFlags wait_stage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+        VkSubmitInfo submit_infos[] = {
+            {
+                .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+                .commandBufferCount = 1,
+                .pCommandBuffers = &r->aux_command_buffer,
+                .signalSemaphoreCount = 1,
+                .pSignalSemaphores = &r->command_buffer_semaphore,
+            },
+            {
+
+                .sType = VK_STRUCTURE_TYPE_SUBMIT_INFO,
+                .commandBufferCount = 1,
+                .pCommandBuffers = &r->command_buffer,
+                .waitSemaphoreCount = 1,
+                .pWaitSemaphores = &r->command_buffer_semaphore,
+                .pWaitDstStageMask = &wait_stage,
+            }
+        };
+        nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT);
+        vkResetFences(r->device, 1, &r->command_buffer_fence);
+        VK_CHECK(vkQueueSubmit(r->queue, ARRAY_SIZE(submit_infos), submit_infos,
+                               r->command_buffer_fence));
+        r->submit_count += 1;
+
+        bool check_budget = false;
+
+        // Periodically check memory budget
+        const int max_num_submits_before_budget_update = 5;
+        if (finish_reason == VK_FINISH_REASON_FLIP_STALL ||
+            (r->submit_count - r->allocator_last_submit_index) >
+                max_num_submits_before_budget_update) {
+
+            // VMA queries budget via vmaSetCurrentFrameIndex
+            vmaSetCurrentFrameIndex(r->allocator, r->submit_count);
+            r->allocator_last_submit_index = r->submit_count;
+            check_budget = true;
+        }
+
+        VK_CHECK(vkWaitForFences(r->device, 1, &r->command_buffer_fence,
+                                 VK_TRUE, UINT64_MAX));
+
+        r->descriptor_set_index = 0;
+        r->in_command_buffer = false;
+        destroy_framebuffers(pg);
+
+        if (check_budget) {
+            pgraph_vk_check_memory_budget(pg);
+        }
+    }
+
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    pgraph_vk_process_pending_reports_internal(d);
+
+    pgraph_vk_compute_finish_complete(r);
+}
+
+void pgraph_vk_begin_command_buffer(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    assert(!r->in_command_buffer);
+
+    VkCommandBufferBeginInfo command_buffer_begin_info = {
+        .sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,
+        .flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT,
+    };
+    VK_CHECK(vkBeginCommandBuffer(r->command_buffer,
+                                  &command_buffer_begin_info));
+    r->command_buffer_start_time = pg->draw_time;
+    r->in_command_buffer = true;
+}
+
+// FIXME: Refactor below
+
+void pgraph_vk_ensure_command_buffer(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    if (!r->in_command_buffer) {
+        pgraph_vk_begin_command_buffer(pg);
+    }
+}
+
+void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    end_render_pass(r);
+    if (r->query_in_flight) {
+        end_query(r);
+    }
+}
+
+VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    pgraph_vk_ensure_command_buffer(pg);
+    pgraph_vk_ensure_not_in_render_pass(pg);
+    return r->command_buffer;
+}
+
+void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    assert(cmd == r->command_buffer);
+}
+
+// FIXME: Add more metrics for determining command buffer 'fullness' and
+// conservatively flush. Unfortunately there doesn't appear to be a good
+// way to determine what the actual maximum capacity of a command buffer
+// is, but we are obviously not supposed to endlessly append to one command
+// buffer. For other reasons though (like descriptor set amount, surface
+// changes, etc) we do flush often.
+
+static void begin_pre_draw(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(r->color_binding || r->zeta_binding);
+    assert(!r->color_binding || r->color_binding->initialized);
+    assert(!r->zeta_binding || r->zeta_binding->initialized);
+
+    if (pg->clearing) {
+        create_clear_pipeline(pg);
+    } else {
+        create_pipeline(pg);
+    }
+
+    bool render_pass_dirty = r->pipeline_binding->render_pass != r->render_pass;
+
+    if (r->framebuffer_dirty || render_pass_dirty) {
+        pgraph_vk_ensure_not_in_render_pass(pg);
+    }
+    if (render_pass_dirty) {
+        r->render_pass = r->pipeline_binding->render_pass;
+    }
+    if (r->framebuffer_dirty) {
+        create_frame_buffer(pg);
+        r->framebuffer_dirty = false;
+    }
+    if (!pg->clearing) {
+        pgraph_vk_update_descriptor_sets(pg);
+    }
+    if (r->framebuffer_index == 0) {
+        create_frame_buffer(pg);
+    }
+
+    pgraph_vk_ensure_command_buffer(pg);
+}
+
+static void begin_draw(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(r->in_command_buffer);
+
+    // Visibility testing
+    if (!pg->clearing && pg->zpass_pixel_count_enable) {
+        if (r->new_query_needed && r->query_in_flight) {
+            end_render_pass(r);
+            end_query(r);
+        }
+        if (!r->query_in_flight) {
+            end_render_pass(r);
+            begin_query(r);
+        }
+    } else if (r->query_in_flight) {
+        end_render_pass(r);
+        end_query(r);
+    }
+
+    if (pg->clearing) {
+        end_render_pass(r);
+    }
+
+    bool must_bind_pipeline = r->pipeline_binding_changed;
+
+    if (!r->in_render_pass) {
+        begin_render_pass(pg);
+        must_bind_pipeline = true;
+    }
+
+    if (must_bind_pipeline) {
+        nv2a_profile_inc_counter(NV2A_PROF_PIPELINE_BIND);
+        vkCmdBindPipeline(r->command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS,
+                          r->pipeline_binding->pipeline);
+        r->pipeline_binding->draw_time = pg->draw_time;
+
+        unsigned int vp_width = pg->surface_binding_dim.width,
+                     vp_height = pg->surface_binding_dim.height;
+        pgraph_apply_scaling_factor(pg, &vp_width, &vp_height);
+
+        VkViewport viewport = {
+            .width = vp_width,
+            .height = vp_height,
+            .minDepth = 0.0,
+            .maxDepth = 1.0,
+        };
+        vkCmdSetViewport(r->command_buffer, 0, 1, &viewport);
+
+        /* Surface clip */
+        /* FIXME: Consider moving to PSH w/ window clip */
+        unsigned int xmin = pg->surface_shape.clip_x -
+                            pg->surface_binding_dim.clip_x,
+                     ymin = pg->surface_shape.clip_y -
+                            pg->surface_binding_dim.clip_y;
+
+        unsigned int xmax = xmin + pg->surface_shape.clip_width - 1,
+                     ymax = ymin + pg->surface_shape.clip_height - 1;
+
+        unsigned int scissor_width = xmax - xmin + 1,
+                     scissor_height = ymax - ymin + 1;
+
+        pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
+        pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
+
+        pgraph_apply_scaling_factor(pg, &xmin, &ymin);
+        pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
+
+        VkRect2D scissor = {
+            .offset.x = xmin,
+            .offset.y = ymin,
+            .extent.width = scissor_width,
+            .extent.height = scissor_height,
+        };
+        vkCmdSetScissor(r->command_buffer, 0, 1, &scissor);
+    }
+
+    if (!pg->clearing) {
+        bind_descriptor_sets(pg);
+        push_vertex_attr_values(pg);
+    }
+
+    r->in_draw = true;
+}
+
+static void end_draw(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(r->in_command_buffer);
+    assert(r->in_render_pass);
+
+    if (pg->clearing) {
+        end_render_pass(r);
+    }
+
+    r->in_draw = false;
+}
+
+void pgraph_vk_draw_end(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    uint32_t control_0 = pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0);
+    bool mask_alpha = control_0 & NV_PGRAPH_CONTROL_0_ALPHA_WRITE_ENABLE;
+    bool mask_red = control_0 & NV_PGRAPH_CONTROL_0_RED_WRITE_ENABLE;
+    bool mask_green = control_0 & NV_PGRAPH_CONTROL_0_GREEN_WRITE_ENABLE;
+    bool mask_blue = control_0 & NV_PGRAPH_CONTROL_0_BLUE_WRITE_ENABLE;
+    bool color_write = mask_alpha || mask_red || mask_green || mask_blue;
+    bool depth_test = control_0 & NV_PGRAPH_CONTROL_0_ZENABLE;
+    bool stencil_test =
+        pgraph_reg_r(pg, NV_PGRAPH_CONTROL_1) & NV_PGRAPH_CONTROL_1_STENCIL_TEST_ENABLE;
+    bool is_nop_draw = !(color_write || depth_test || stencil_test);
+
+    if (is_nop_draw) {
+        // FIXME: Check PGRAPH register 0x880.
+        // HW uses bit 11 in 0x880 to enable or disable a color/zeta limit
+        // check that will raise an exception in the case that a draw should
+        // modify the color and/or zeta buffer but the target(s) are masked
+        // off. This check only seems to trigger during the fragment
+        // processing, it is legal to attempt a draw that is entirely
+        // clipped regardless of 0x880. See xemu#635 for context.
+        NV2A_VK_DPRINTF("nop draw!\n");
+        return;
+    }
+
+    pgraph_vk_flush_draw(d);
+
+    pg->draw_time++;
+    if (r->color_binding && pgraph_color_write_enabled(pg)) {
+        r->color_binding->draw_time = pg->draw_time;
+    }
+    if (r->zeta_binding && pgraph_zeta_write_enabled(pg)) {
+        r->zeta_binding->draw_time = pg->draw_time;
+    }
+
+    pgraph_vk_set_surface_dirty(pg, color_write, depth_test || stencil_test);
+}
+
+static int compare_memory_sync_requirement_by_addr(const void *p1,
+                                                   const void *p2)
+{
+    const MemorySyncRequirement *l = p1, *r = p2;
+    if (l->addr < r->addr)
+        return -1;
+    if (l->addr > r->addr)
+        return 1;
+    return 0;
+}
+
+static void sync_vertex_ram_buffer(PGRAPHState *pg)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    if (r->num_vertex_ram_buffer_syncs == 0) {
+        return;
+    }
+
+    // Align sync requirements to page boundaries
+    NV2A_VK_DGROUP_BEGIN("Sync vertex RAM buffer");
+
+    for (int i = 0; i < r->num_vertex_ram_buffer_syncs; i++) {
+        NV2A_VK_DPRINTF("Need to sync vertex memory @%" HWADDR_PRIx
+                        ", %" HWADDR_PRIx " bytes",
+                        r->vertex_ram_buffer_syncs[i].addr,
+                        r->vertex_ram_buffer_syncs[i].size);
+
+        hwaddr start_addr =
+            r->vertex_ram_buffer_syncs[i].addr & TARGET_PAGE_MASK;
+        hwaddr end_addr = r->vertex_ram_buffer_syncs[i].addr +
+                          r->vertex_ram_buffer_syncs[i].size;
+        end_addr = ROUND_UP(end_addr, TARGET_PAGE_SIZE);
+
+        NV2A_VK_DPRINTF("- %d: %08" HWADDR_PRIx " %zd bytes"
+                          " -> %08" HWADDR_PRIx " %zd bytes", i,
+                        r->vertex_ram_buffer_syncs[i].addr,
+                        r->vertex_ram_buffer_syncs[i].size, start_addr,
+                        end_addr - start_addr);
+
+        r->vertex_ram_buffer_syncs[i].addr = start_addr;
+        r->vertex_ram_buffer_syncs[i].size = end_addr - start_addr;
+    }
+
+    // Sort the requirements in increasing order of addresses
+    qsort(r->vertex_ram_buffer_syncs, r->num_vertex_ram_buffer_syncs,
+          sizeof(MemorySyncRequirement),
+          compare_memory_sync_requirement_by_addr);
+
+    // Merge overlapping/adjacent requests to minimize number of tests
+    MemorySyncRequirement merged[16];
+    int num_syncs = 1;
+
+    merged[0] = r->vertex_ram_buffer_syncs[0];
+
+    for (int i = 1; i < r->num_vertex_ram_buffer_syncs; i++) {
+        MemorySyncRequirement *p = &merged[num_syncs - 1];
+        MemorySyncRequirement *t = &r->vertex_ram_buffer_syncs[i];
+
+        if (t->addr <= (p->addr + p->size)) {
+            // Merge with previous
+            hwaddr p_end_addr = p->addr + p->size;
+            hwaddr t_end_addr = t->addr + t->size;
+            hwaddr new_end_addr = MAX(p_end_addr, t_end_addr);
+            p->size = new_end_addr - p->addr;
+        } else {
+            merged[num_syncs++] = *t;
+        }
+    }
+
+    if (num_syncs < r->num_vertex_ram_buffer_syncs) {
+        NV2A_VK_DPRINTF("Reduced to %d sync checks", num_syncs);
+    }
+
+    for (int i = 0; i < num_syncs; i++) {
+        hwaddr addr = merged[i].addr;
+        VkDeviceSize size = merged[i].size;
+
+        NV2A_VK_DPRINTF("- %d: %08"HWADDR_PRIx" %zd bytes", i, addr, size);
+
+        if (memory_region_test_and_clear_dirty(d->vram, addr, size,
+                                               DIRTY_MEMORY_NV2A)) {
+            NV2A_VK_DPRINTF("Memory dirty. Synchronizing...");
+            pgraph_vk_update_vertex_ram_buffer(pg, addr, d->vram_ptr + addr,
+                                               size);
+        }
+    }
+
+    r->num_vertex_ram_buffer_syncs = 0;
+
+    NV2A_VK_DGROUP_END();
+}
+
+void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    nv2a_profile_inc_counter(NV2A_PROF_CLEAR);
+
+    bool write_color = (parameter & NV097_CLEAR_SURFACE_COLOR);
+    bool write_zeta =
+        (parameter & (NV097_CLEAR_SURFACE_Z | NV097_CLEAR_SURFACE_STENCIL));
+
+    pg->clearing = true;
+
+    // FIXME: If doing a full surface clear, mark the surface for full clear
+    // and we can just do the clear as part of the surface load.
+    pgraph_vk_surface_update(d, true, write_color, write_zeta);
+
+    SurfaceBinding *binding = r->color_binding ?: r->zeta_binding;
+    if (!binding) {
+        /* Nothing bound to clear */
+        pg->clearing = false;
+        return;
+    }
+
+    r->clear_parameter = parameter;
+
+    uint32_t clearrectx = pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTX);
+    uint32_t clearrecty = pgraph_reg_r(pg, NV_PGRAPH_CLEARRECTY);
+
+    int xmin = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMIN);
+    int xmax = GET_MASK(clearrectx, NV_PGRAPH_CLEARRECTX_XMAX);
+    int ymin = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMIN);
+    int ymax = GET_MASK(clearrecty, NV_PGRAPH_CLEARRECTY_YMAX);
+
+    NV2A_VK_DGROUP_BEGIN("CLEAR min=(%d,%d) max=(%d,%d)%s%s", xmin, ymin, xmax,
+                         ymax, write_color ? " color" : "",
+                         write_zeta ? " zeta" : "");
+
+    begin_pre_draw(pg);
+    pgraph_vk_begin_debug_marker(r, r->command_buffer,
+        RGBA_BLUE, "Clear %08" HWADDR_PRIx,
+        binding->vram_addr);
+    begin_draw(pg);
+
+    // FIXME: What does hardware do when min <= max?
+    xmin = MIN(xmin, binding->width - 1);
+    ymin = MIN(ymin, binding->height - 1);
+    xmax = MIN(xmax, binding->width - 1);
+    ymax = MIN(ymax, binding->height - 1);
+
+    int scissor_width = MAX(0, xmax - xmin + 1),
+        scissor_height = MAX(0, ymax - ymin + 1);
+
+    pgraph_apply_anti_aliasing_factor(pg, &xmin, &ymin);
+    pgraph_apply_anti_aliasing_factor(pg, &scissor_width, &scissor_height);
+
+    pgraph_apply_scaling_factor(pg, &xmin, &ymin);
+    pgraph_apply_scaling_factor(pg, &scissor_width, &scissor_height);
+
+    VkClearRect clear_rect = {
+        .rect = {
+            .offset = { .x = xmin, .y = ymin },
+            .extent = { .width = scissor_width, .height = scissor_height },
+        },
+        .baseArrayLayer = 0,
+        .layerCount = 1,
+    };
+
+    int num_attachments = 0;
+    VkClearAttachment attachments[2];
+
+    if (write_color && r->color_binding) {
+        const bool clear_all_color_channels =
+            (parameter & NV097_CLEAR_SURFACE_COLOR) ==
+            (NV097_CLEAR_SURFACE_R | NV097_CLEAR_SURFACE_G |
+             NV097_CLEAR_SURFACE_B | NV097_CLEAR_SURFACE_A);
+
+        if (clear_all_color_channels) {
+            attachments[num_attachments] = (VkClearAttachment){
+                .aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                .colorAttachment = 0,
+            };
+            pgraph_get_clear_color(
+                pg, attachments[num_attachments].clearValue.color.float32);
+            num_attachments++;
+        } else {
+            float blend_constants[4];
+            pgraph_get_clear_color(pg, blend_constants);
+            vkCmdSetScissor(r->command_buffer, 0, 1, &clear_rect.rect);
+            vkCmdSetBlendConstants(r->command_buffer, blend_constants);
+            vkCmdDraw(r->command_buffer, 3, 1, 0, 0);
+        }
+    }
+
+    if (write_zeta && r->zeta_binding) {
+        int stencil_value = 0;
+        float depth_value = 1.0;
+        pgraph_get_clear_depth_stencil_value(pg, &depth_value, &stencil_value);
+
+        VkImageAspectFlags aspect = 0;
+        if (parameter & NV097_CLEAR_SURFACE_Z) {
+            aspect |= VK_IMAGE_ASPECT_DEPTH_BIT;
+        }
+        if ((parameter & NV097_CLEAR_SURFACE_STENCIL) &&
+            (r->zeta_binding->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT)) {
+            aspect |= VK_IMAGE_ASPECT_STENCIL_BIT;
+        }
+
+        attachments[num_attachments++] = (VkClearAttachment){
+            .aspectMask = aspect,
+            .clearValue.depthStencil.depth = depth_value,
+            .clearValue.depthStencil.stencil = stencil_value,
+        };
+    }
+
+    if (num_attachments) {
+        vkCmdClearAttachments(r->command_buffer, num_attachments, attachments,
+                              1, &clear_rect);
+    }
+    end_draw(pg);
+    pgraph_vk_end_debug_marker(r, r->command_buffer);
+
+    pg->clearing = false;
+
+    pgraph_vk_set_surface_dirty(pg, write_color, write_zeta);
+
+    NV2A_VK_DGROUP_END();
+}
+
+#if 0
+static void pgraph_vk_debug_attrs(NV2AState *d)
+{
+    for (int vertex_idx = 0; vertex_idx < pg->draw_arrays_count[i]; vertex_idx++) {
+        NV2A_VK_DGROUP_BEGIN("Vertex %d+%d", pg->draw_arrays_start[i], vertex_idx);
+        for (int attr_idx = 0; attr_idx < NV2A_VERTEXSHADER_ATTRIBUTES; attr_idx++) {
+            VertexAttribute *attr = &pg->vertex_attributes[attr_idx];
+            if (attr->count) {
+                char *p = (char *)d->vram_ptr + r->attribute_offsets[attr_idx] + (pg->draw_arrays_start[i] + vertex_idx) * attr->stride;
+                NV2A_VK_DGROUP_BEGIN("Attribute %d data at %tx", attr_idx, (ptrdiff_t)(p - (char*)d->vram_ptr));
+                for (int count_idx = 0; count_idx < attr->count; count_idx++) {
+                    switch (attr->format) {
+                    case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+                        NV2A_VK_DPRINTF("[%d] %f", count_idx, *(float*)p);
+                        p += sizeof(float);
+                        break;
+                    default:
+                        assert(0);
+                        break;
+                    }
+                }
+                NV2A_VK_DGROUP_END();
+            }
+        }
+        NV2A_VK_DGROUP_END();
+    }
+}
+#endif
+
+static void bind_vertex_buffer(PGRAPHState *pg, uint16_t inline_map,
+                               VkDeviceSize offset)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    if (r->num_active_vertex_binding_descriptions == 0) {
+        return;
+    }
+
+    VkBuffer buffers[NV2A_VERTEXSHADER_ATTRIBUTES];
+    VkDeviceSize offsets[NV2A_VERTEXSHADER_ATTRIBUTES];
+
+    for (int i = 0; i < r->num_active_vertex_binding_descriptions; i++) {
+        int attr_idx = r->vertex_attribute_descriptions[i].location;
+        int buffer_idx = (inline_map & (1 << attr_idx)) ? BUFFER_VERTEX_INLINE :
+                                                          BUFFER_VERTEX_RAM;
+        buffers[i] = r->storage_buffers[buffer_idx].buffer;
+        offsets[i] = offset + r->vertex_attribute_offsets[attr_idx];
+    }
+
+    vkCmdBindVertexBuffers(r->command_buffer, 0,
+                           r->num_active_vertex_binding_descriptions, buffers,
+                           offsets);
+}
+
+static void bind_inline_vertex_buffer(PGRAPHState *pg, VkDeviceSize offset)
+{
+    bind_vertex_buffer(pg, 0xffff, offset);
+}
+
+void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta)
+{
+    NV2A_DPRINTF("pgraph_set_surface_dirty(%d, %d) -- %d %d\n", color, zeta,
+                 pgraph_color_write_enabled(pg), pgraph_zeta_write_enabled(pg));
+
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    /* FIXME: Does this apply to CLEARs too? */
+    color = color && pgraph_color_write_enabled(pg);
+    zeta = zeta && pgraph_zeta_write_enabled(pg);
+    pg->surface_color.draw_dirty |= color;
+    pg->surface_zeta.draw_dirty |= zeta;
+
+    if (r->color_binding) {
+        r->color_binding->draw_dirty |= color;
+        r->color_binding->frame_time = pg->frame_time;
+        r->color_binding->cleared = false;
+    }
+
+    if (r->zeta_binding) {
+        r->zeta_binding->draw_dirty |= zeta;
+        r->zeta_binding->frame_time = pg->frame_time;
+        r->zeta_binding->cleared = false;
+    }
+}
+
+static bool ensure_buffer_space(PGRAPHState *pg, int index, VkDeviceSize size)
+{
+    if (!pgraph_vk_buffer_has_space_for(pg, index, size, 1)) {
+        pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
+        return true;
+    }
+
+    return false;
+}
+
+static void get_size_and_count_for_format(VkFormat fmt, size_t *size, size_t *count)
+{
+    static const struct {
+        size_t size;
+        size_t count;
+    } table[] = {
+        [VK_FORMAT_R8_UNORM] =              { 1, 1 },
+        [VK_FORMAT_R8G8_UNORM] =            { 1, 2 },
+        [VK_FORMAT_R8G8B8_UNORM] =          { 1, 3 },
+        [VK_FORMAT_R8G8B8A8_UNORM] =        { 1, 4 },
+        [VK_FORMAT_R16_SNORM] =             { 2, 1 },
+        [VK_FORMAT_R16G16_SNORM] =          { 2, 2 },
+        [VK_FORMAT_R16G16B16_SNORM] =       { 2, 3 },
+        [VK_FORMAT_R16G16B16A16_SNORM] =    { 2, 4 },
+        [VK_FORMAT_R16_SSCALED] =           { 2, 1 },
+        [VK_FORMAT_R16G16_SSCALED] =        { 2, 2 },
+        [VK_FORMAT_R16G16B16_SSCALED] =     { 2, 3 },
+        [VK_FORMAT_R16G16B16A16_SSCALED] =  { 2, 4 },
+        [VK_FORMAT_R32_SFLOAT] =            { 4, 1 },
+        [VK_FORMAT_R32G32_SFLOAT] =         { 4, 2 },
+        [VK_FORMAT_R32G32B32_SFLOAT] =      { 4, 3 },
+        [VK_FORMAT_R32G32B32A32_SFLOAT] =   { 4, 4 },
+        [VK_FORMAT_R32_SINT] =              { 4, 1 },
+    };
+
+    assert(fmt < ARRAY_SIZE(table));
+    assert(table[fmt].size);
+
+    *size = table[fmt].size;
+    *count = table[fmt].count;
+}
+
+typedef struct VertexBufferRemap {
+    uint16_t attributes;
+    size_t buffer_space_required;
+    struct {
+        VkDeviceAddress offset;
+        VkDeviceSize stride;
+    } map[NV2A_VERTEXSHADER_ATTRIBUTES];
+} VertexBufferRemap;
+
+static VertexBufferRemap remap_unaligned_attributes(PGRAPHState *pg,
+                                                    uint32_t num_vertices)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VertexBufferRemap remap = {0};
+
+    VkDeviceAddress output_offset = 0;
+
+    for (int attr_id = 0; attr_id < NV2A_VERTEXSHADER_ATTRIBUTES; attr_id++) {
+        int desc_loc = r->vertex_attribute_to_description_location[attr_id];
+        if (desc_loc < 0) {
+            continue;
+        }
+
+        VkVertexInputBindingDescription *desc =
+            &r->vertex_binding_descriptions[desc_loc];
+        VkVertexInputAttributeDescription *attr =
+            &r->vertex_attribute_descriptions[desc_loc];
+
+        size_t element_size, element_count;
+        get_size_and_count_for_format(attr->format, &element_size, &element_count);
+
+        bool offset_valid =
+            (r->vertex_attribute_offsets[attr_id] % element_size == 0);
+        bool stride_valid = (desc->stride % element_size == 0);
+
+        if (offset_valid && stride_valid) {
+            continue;
+        }
+
+        remap.attributes |= 1 << attr_id;
+        remap.map[attr_id].offset = ROUND_UP(output_offset, element_size);
+        remap.map[attr_id].stride = element_size * element_count;
+
+        // fprintf(stderr,
+        //         "attr %02d remapped: "
+        //         "%08" HWADDR_PRIx "->%08" HWADDR_PRIx " "
+        //         "stride=%d->%zd\n",
+        //         attr_id, r->vertex_attribute_offsets[attr_id],
+        //         remap.map[attr_id].offset, desc->stride,
+        //         remap.map[attr_id].stride);
+
+        output_offset =
+            remap.map[attr_id].offset + remap.map[attr_id].stride * num_vertices;
+    }
+
+    remap.buffer_space_required = output_offset;
+    return remap;
+}
+
+static void copy_remapped_attributes_to_inline_buffer(PGRAPHState *pg,
+                                                      VertexBufferRemap remap,
+                                                      uint32_t start_vertex,
+                                                      uint32_t num_vertices)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    StorageBuffer *buffer = &r->storage_buffers[BUFFER_VERTEX_INLINE_STAGING];
+
+    r->vertex_buffer_inline = remap.attributes;
+
+    if (!remap.attributes) {
+        return;
+    }
+
+    VkDeviceSize starting_offset = ROUND_UP(buffer->buffer_offset, 16);
+    size_t total_space_required =
+        (starting_offset - buffer->buffer_offset) + remap.buffer_space_required;
+    ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, total_space_required);
+    assert(pgraph_vk_buffer_has_space_for(pg, BUFFER_VERTEX_INLINE_STAGING,
+                                          total_space_required, 1));
+
+    buffer->buffer_offset = starting_offset; // Aligned
+
+    // FIXME: SIMD memcpy
+    // FIXME: Caching
+    // FIXME: Account for only what is drawn
+    assert(start_vertex == 0);
+    assert(buffer->mapped);
+
+    // Copy vertex data
+    for (int attr_id = 0; attr_id < NV2A_VERTEXSHADER_ATTRIBUTES; attr_id++) {
+        if (!(remap.attributes & (1 << attr_id))) {
+            continue;
+        }
+
+        int bind_desc_loc =
+            r->vertex_attribute_to_description_location[attr_id];
+        assert(bind_desc_loc >= 0);
+
+        VkVertexInputBindingDescription *bind_desc =
+            &r->vertex_binding_descriptions[bind_desc_loc];
+
+        VkDeviceSize attr_buffer_offset =
+            buffer->buffer_offset + remap.map[attr_id].offset;
+
+        uint8_t *out_ptr = buffer->mapped + attr_buffer_offset;
+        uint8_t *in_ptr = d->vram_ptr + r->vertex_attribute_offsets[attr_id];
+
+        for (int vertex_id = 0; vertex_id < num_vertices; vertex_id++) {
+            memcpy(out_ptr, in_ptr, remap.map[attr_id].stride);
+            out_ptr += remap.map[attr_id].stride;
+            in_ptr += bind_desc->stride;
+        }
+
+        r->vertex_attribute_offsets[attr_id] = attr_buffer_offset;
+        bind_desc->stride = remap.map[attr_id].stride;
+    }
+
+    buffer->buffer_offset += remap.buffer_space_required;
+}
+
+void pgraph_vk_flush_draw(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    if (!(r->color_binding || r->zeta_binding)) {
+        NV2A_VK_DPRINTF("No binding present!!!\n");
+        return;
+    }
+
+    r->num_vertex_ram_buffer_syncs = 0;
+
+    if (pg->draw_arrays_length) {
+        NV2A_VK_DGROUP_BEGIN("Draw Arrays");
+        nv2a_profile_inc_counter(NV2A_PROF_DRAW_ARRAYS);
+
+        assert(pg->inline_elements_length == 0);
+        assert(pg->inline_buffer_length == 0);
+        assert(pg->inline_array_length == 0);
+
+        pgraph_vk_bind_vertex_attributes(d, pg->draw_arrays_min_start,
+                                         pg->draw_arrays_max_count - 1, false,
+                                         0, pg->draw_arrays_max_count - 1);
+        uint32_t min_element = INT_MAX;
+        uint32_t max_element = 0;
+        for (int i = 0; i < pg->draw_arrays_length; i++) {
+            min_element = MIN(pg->draw_arrays_start[i], min_element);
+            max_element = MAX(max_element, pg->draw_arrays_start[i] + pg->draw_arrays_count[i]);
+        }
+        sync_vertex_ram_buffer(pg);
+        VertexBufferRemap remap = remap_unaligned_attributes(pg, max_element);
+        copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element);
+
+        begin_pre_draw(pg);
+        pgraph_vk_begin_debug_marker(r, r->command_buffer, RGBA_BLUE,
+                                     "Draw Arrays");
+        begin_draw(pg);
+        bind_vertex_buffer(pg, remap.attributes, 0);
+        for (int i = 0; i < pg->draw_arrays_length; i++) {
+            uint32_t start = pg->draw_arrays_start[i],
+                     count = pg->draw_arrays_count[i];
+            NV2A_VK_DPRINTF("- [%d] Start:%d Count:%d", i, start, count);
+            vkCmdDraw(r->command_buffer, count, 1, start, 0);
+        }
+        end_draw(pg);
+        pgraph_vk_end_debug_marker(r, r->command_buffer);
+
+        NV2A_VK_DGROUP_END();
+    } else if (pg->inline_elements_length) {
+        NV2A_VK_DGROUP_BEGIN("Inline Elements");
+        assert(pg->inline_buffer_length == 0);
+        assert(pg->inline_array_length == 0);
+
+        nv2a_profile_inc_counter(NV2A_PROF_INLINE_ELEMENTS);
+
+        size_t index_data_size =
+            pg->inline_elements_length * sizeof(pg->inline_elements[0]);
+
+        ensure_buffer_space(pg, BUFFER_INDEX_STAGING, index_data_size);
+
+        uint32_t min_element = (uint32_t)-1;
+        uint32_t max_element = 0;
+        for (int i = 0; i < pg->inline_elements_length; i++) {
+            max_element = MAX(pg->inline_elements[i], max_element);
+            min_element = MIN(pg->inline_elements[i], min_element);
+        }
+        pgraph_vk_bind_vertex_attributes(
+            d, min_element, max_element, false, 0,
+            pg->inline_elements[pg->inline_elements_length - 1]);
+        sync_vertex_ram_buffer(pg);
+        VertexBufferRemap remap = remap_unaligned_attributes(pg, max_element + 1);
+        copy_remapped_attributes_to_inline_buffer(pg, remap, 0, max_element + 1);
+
+        begin_pre_draw(pg);
+        VkDeviceSize buffer_offset = pgraph_vk_update_index_buffer(
+            pg, pg->inline_elements, index_data_size);
+        pgraph_vk_begin_debug_marker(r, r->command_buffer, RGBA_BLUE,
+                                     "Inline Elements");
+        begin_draw(pg);
+        bind_vertex_buffer(pg, remap.attributes, 0);
+        vkCmdBindIndexBuffer(r->command_buffer,
+                             r->storage_buffers[BUFFER_INDEX].buffer,
+                             buffer_offset, VK_INDEX_TYPE_UINT32);
+        vkCmdDrawIndexed(r->command_buffer, pg->inline_elements_length, 1, 0, 0,
+                         0);
+        end_draw(pg);
+        pgraph_vk_end_debug_marker(r, r->command_buffer);
+
+        NV2A_VK_DGROUP_END();
+    } else if (pg->inline_buffer_length) {
+        NV2A_VK_DGROUP_BEGIN("Inline Buffer");
+        nv2a_profile_inc_counter(NV2A_PROF_INLINE_BUFFERS);
+        assert(pg->inline_array_length == 0);
+
+        size_t vertex_data_size = pg->inline_buffer_length * sizeof(float) * 4;
+        void *data[NV2A_VERTEXSHADER_ATTRIBUTES];
+        size_t sizes[NV2A_VERTEXSHADER_ATTRIBUTES];
+        size_t offset = 0;
+
+        pgraph_vk_bind_vertex_attributes_inline(d);
+        for (int i = 0; i < r->num_active_vertex_attribute_descriptions; i++) {
+            int attr_index = r->vertex_attribute_descriptions[i].location;
+
+            VertexAttribute *attr = &pg->vertex_attributes[attr_index];
+            r->vertex_attribute_offsets[attr_index] = offset;
+
+            data[i] = attr->inline_buffer;
+            sizes[i] = vertex_data_size;
+
+            attr->inline_buffer_populated = false;
+            offset += vertex_data_size;
+        }
+        ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING, offset);
+
+        begin_pre_draw(pg);
+        VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer(
+            pg, data, sizes, r->num_active_vertex_attribute_descriptions);
+        pgraph_vk_begin_debug_marker(r, r->command_buffer, RGBA_BLUE,
+                                     "Inline Buffer");
+        begin_draw(pg);
+        bind_inline_vertex_buffer(pg, buffer_offset);
+        vkCmdDraw(r->command_buffer, pg->inline_buffer_length, 1, 0, 0);
+        end_draw(pg);
+        pgraph_vk_end_debug_marker(r, r->command_buffer);
+
+        NV2A_VK_DGROUP_END();
+    } else if (pg->inline_array_length) {
+        NV2A_VK_DGROUP_BEGIN("Inline Array");
+        nv2a_profile_inc_counter(NV2A_PROF_INLINE_ARRAYS);
+
+        VkDeviceSize inline_array_data_size = pg->inline_array_length * 4;
+        ensure_buffer_space(pg, BUFFER_VERTEX_INLINE_STAGING,
+                               inline_array_data_size);
+
+        unsigned int offset = 0;
+        for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+            VertexAttribute *attr = &pg->vertex_attributes[i];
+            if (attr->count == 0) {
+                continue;
+            }
+
+            /* FIXME: Double check */
+            offset = ROUND_UP(offset, attr->size);
+            attr->inline_array_offset = offset;
+            NV2A_DPRINTF("bind inline attribute %d size=%d, count=%d\n", i,
+                         attr->size, attr->count);
+            offset += attr->size * attr->count;
+            offset = ROUND_UP(offset, attr->size);
+        }
+
+        unsigned int vertex_size = offset;
+        unsigned int index_count = pg->inline_array_length * 4 / vertex_size;
+
+        NV2A_DPRINTF("draw inline array %d, %d\n", vertex_size, index_count);
+        pgraph_vk_bind_vertex_attributes(d, 0, index_count - 1, true,
+                                         vertex_size, index_count - 1);
+
+        begin_pre_draw(pg);
+        void *inline_array_data = pg->inline_array;
+        VkDeviceSize buffer_offset = pgraph_vk_update_vertex_inline_buffer(
+            pg, &inline_array_data, &inline_array_data_size, 1);
+        pgraph_vk_begin_debug_marker(r, r->command_buffer, RGBA_BLUE,
+                                     "Inline Array");
+        begin_draw(pg);
+        bind_inline_vertex_buffer(pg, buffer_offset);
+        vkCmdDraw(r->command_buffer, index_count, 1, 0, 0);
+        end_draw(pg);
+        pgraph_vk_end_debug_marker(r, r->command_buffer);
+        NV2A_VK_DGROUP_END();
+    } else {
+        NV2A_VK_DPRINTF("EMPTY NV097_SET_BEGIN_END");
+        NV2A_UNCONFIRMED("EMPTY NV097_SET_BEGIN_END");
+    }
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/glsl.c b/hw/xbox/nv2a/pgraph/vk/glsl.c
new file mode 100644
index 00000000000..d3ae05a34be
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/glsl.c
@@ -0,0 +1,389 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "ui/xemu-settings.h"
+#include "renderer.h"
+
+#include <assert.h>
+#include <glslang/Include/glslang_c_interface.h>
+#include <stdio.h>
+
+static const glslang_resource_t
+    resource_limits = { .max_lights = 32,
+                        .max_clip_planes = 6,
+                        .max_texture_units = 32,
+                        .max_texture_coords = 32,
+                        .max_vertex_attribs = 64,
+                        .max_vertex_uniform_components = 4096,
+                        .max_varying_floats = 64,
+                        .max_vertex_texture_image_units = 32,
+                        .max_combined_texture_image_units = 80,
+                        .max_texture_image_units = 32,
+                        .max_fragment_uniform_components = 4096,
+                        .max_draw_buffers = 32,
+                        .max_vertex_uniform_vectors = 128,
+                        .max_varying_vectors = 8,
+                        .max_fragment_uniform_vectors = 16,
+                        .max_vertex_output_vectors = 16,
+                        .max_fragment_input_vectors = 15,
+                        .min_program_texel_offset = -8,
+                        .max_program_texel_offset = 7,
+                        .max_clip_distances = 8,
+                        .max_compute_work_group_count_x = 65535,
+                        .max_compute_work_group_count_y = 65535,
+                        .max_compute_work_group_count_z = 65535,
+                        .max_compute_work_group_size_x = 1024,
+                        .max_compute_work_group_size_y = 1024,
+                        .max_compute_work_group_size_z = 64,
+                        .max_compute_uniform_components = 1024,
+                        .max_compute_texture_image_units = 16,
+                        .max_compute_image_uniforms = 8,
+                        .max_compute_atomic_counters = 8,
+                        .max_compute_atomic_counter_buffers = 1,
+                        .max_varying_components = 60,
+                        .max_vertex_output_components = 64,
+                        .max_geometry_input_components = 64,
+                        .max_geometry_output_components = 128,
+                        .max_fragment_input_components = 128,
+                        .max_image_units = 8,
+                        .max_combined_image_units_and_fragment_outputs = 8,
+                        .max_combined_shader_output_resources = 8,
+                        .max_image_samples = 0,
+                        .max_vertex_image_uniforms = 0,
+                        .max_tess_control_image_uniforms = 0,
+                        .max_tess_evaluation_image_uniforms = 0,
+                        .max_geometry_image_uniforms = 0,
+                        .max_fragment_image_uniforms = 8,
+                        .max_combined_image_uniforms = 8,
+                        .max_geometry_texture_image_units = 16,
+                        .max_geometry_output_vertices = 256,
+                        .max_geometry_total_output_components = 1024,
+                        .max_geometry_uniform_components = 1024,
+                        .max_geometry_varying_components = 64,
+                        .max_tess_control_input_components = 128,
+                        .max_tess_control_output_components = 128,
+                        .max_tess_control_texture_image_units = 16,
+                        .max_tess_control_uniform_components = 1024,
+                        .max_tess_control_total_output_components = 4096,
+                        .max_tess_evaluation_input_components = 128,
+                        .max_tess_evaluation_output_components = 128,
+                        .max_tess_evaluation_texture_image_units = 16,
+                        .max_tess_evaluation_uniform_components = 1024,
+                        .max_tess_patch_components = 120,
+                        .max_patch_vertices = 32,
+                        .max_tess_gen_level = 64,
+                        .max_viewports = 16,
+                        .max_vertex_atomic_counters = 0,
+                        .max_tess_control_atomic_counters = 0,
+                        .max_tess_evaluation_atomic_counters = 0,
+                        .max_geometry_atomic_counters = 0,
+                        .max_fragment_atomic_counters = 8,
+                        .max_combined_atomic_counters = 8,
+                        .max_atomic_counter_bindings = 1,
+                        .max_vertex_atomic_counter_buffers = 0,
+                        .max_tess_control_atomic_counter_buffers = 0,
+                        .max_tess_evaluation_atomic_counter_buffers = 0,
+                        .max_geometry_atomic_counter_buffers = 0,
+                        .max_fragment_atomic_counter_buffers = 1,
+                        .max_combined_atomic_counter_buffers = 1,
+                        .max_atomic_counter_buffer_size = 16384,
+                        .max_transform_feedback_buffers = 4,
+                        .max_transform_feedback_interleaved_components = 64,
+                        .max_cull_distances = 8,
+                        .max_combined_clip_and_cull_distances = 8,
+                        .max_samples = 4,
+                        .max_mesh_output_vertices_nv = 256,
+                        .max_mesh_output_primitives_nv = 512,
+                        .max_mesh_work_group_size_x_nv = 32,
+                        .max_mesh_work_group_size_y_nv = 1,
+                        .max_mesh_work_group_size_z_nv = 1,
+                        .max_task_work_group_size_x_nv = 32,
+                        .max_task_work_group_size_y_nv = 1,
+                        .max_task_work_group_size_z_nv = 1,
+                        .max_mesh_view_count_nv = 4,
+                        .maxDualSourceDrawBuffersEXT = 1,
+                        .limits = {
+                            .non_inductive_for_loops = 1,
+                            .while_loops = 1,
+                            .do_while_loops = 1,
+                            .general_uniform_indexing = 1,
+                            .general_attribute_matrix_vector_indexing = 1,
+                            .general_varying_indexing = 1,
+                            .general_sampler_indexing = 1,
+                            .general_variable_indexing = 1,
+                            .general_constant_matrix_vector_indexing = 1,
+                        } };
+
+void pgraph_vk_init_glsl_compiler(void)
+{
+    glslang_initialize_process();
+}
+
+void pgraph_vk_finalize_glsl_compiler(void)
+{
+    glslang_finalize_process();
+}
+
+GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage,
+                                          const char *glsl_source)
+{
+    const glslang_input_t input = {
+        .language = GLSLANG_SOURCE_GLSL,
+        .stage = stage,
+        .client = GLSLANG_CLIENT_VULKAN,
+        .client_version = GLSLANG_TARGET_VULKAN_1_3,
+        .target_language = GLSLANG_TARGET_SPV,
+        .target_language_version = GLSLANG_TARGET_SPV_1_6,
+        .code = glsl_source,
+        .default_version = 460,
+        .default_profile = GLSLANG_NO_PROFILE,
+        .force_default_version_and_profile = false,
+        .forward_compatible = false,
+        .messages = GLSLANG_MSG_DEFAULT_BIT,
+        .resource = &resource_limits,
+    };
+
+    glslang_shader_t *shader = glslang_shader_create(&input);
+
+    if (!glslang_shader_preprocess(shader, &input)) {
+        fprintf(stderr,
+                "GLSL preprocessing failed\n"
+                "[INFO]: %s\n"
+                "[DEBUG]: %s\n"
+                "%s\n",
+                glslang_shader_get_info_log(shader),
+                glslang_shader_get_info_debug_log(shader), input.code);
+        assert(!"glslang preprocess failed");
+        glslang_shader_delete(shader);
+        return NULL;
+    }
+
+    if (!glslang_shader_parse(shader, &input)) {
+        fprintf(stderr,
+                "GLSL parsing failed\n"
+                "[INFO]: %s\n"
+                "[DEBUG]: %s\n"
+                "%s\n",
+                glslang_shader_get_info_log(shader),
+                glslang_shader_get_info_debug_log(shader),
+                glslang_shader_get_preprocessed_code(shader));
+        assert(!"glslang parse failed");
+        glslang_shader_delete(shader);
+        return NULL;
+    }
+
+    glslang_program_t *program = glslang_program_create();
+    glslang_program_add_shader(program, shader);
+
+    if (!glslang_program_link(program, GLSLANG_MSG_SPV_RULES_BIT |
+                                           GLSLANG_MSG_VULKAN_RULES_BIT)) {
+        fprintf(stderr,
+                "GLSL linking failed\n"
+                "[INFO]: %s\n"
+                "[DEBUG]: %s\n",
+                glslang_program_get_info_log(program),
+                glslang_program_get_info_debug_log(program));
+        assert(!"glslang link failed");
+        glslang_program_delete(program);
+        glslang_shader_delete(shader);
+        return NULL;
+    }
+
+    glslang_spv_options_t spv_options = {
+        .validate = true,
+    };
+
+    if (g_config.display.vulkan.debug_shaders) {
+        spv_options.disable_optimizer = true;
+        spv_options.generate_debug_info = true;
+        spv_options.emit_nonsemantic_shader_debug_info = true;
+        spv_options.emit_nonsemantic_shader_debug_source = true;
+
+        // XXX: Note emit_nonsemantic_shader_debug_source actually does nothing
+        // as of 2024.07.25. To actually get glsl source embedded in spv, we
+        // must do the following...
+        //
+        // ref: https://github.com/KhronosGroup/glslang/issues/3252
+        glslang_program_add_source_text(program, input.stage, input.code,
+                                        strlen(input.code));
+    }
+    glslang_program_SPIRV_generate_with_options(program, stage, &spv_options);
+
+    const char *spirv_messages = glslang_program_SPIRV_get_messages(program);
+    if (spirv_messages) {
+        printf("%s\b", spirv_messages);
+    }
+
+    size_t num_program_bytes =
+        glslang_program_SPIRV_get_size(program) * sizeof(uint32_t);
+
+    guint8 *data = g_malloc(num_program_bytes);
+    glslang_program_SPIRV_get(program, (unsigned int *)data);
+
+    glslang_program_delete(program);
+    glslang_shader_delete(shader);
+
+    return g_byte_array_new_take(data, num_program_bytes);
+}
+
+VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r, GByteArray *spv)
+{
+    VkShaderModuleCreateInfo create_info = {
+        .sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO,
+        .codeSize = spv->len,
+        .pCode = (uint32_t *)spv->data,
+    };
+    VkShaderModule module;
+    VK_CHECK(
+        vkCreateShaderModule(r->device, &create_info, NULL, &module));
+    return module;
+}
+
+static void block_to_uniforms(const SpvReflectBlockVariable *block, ShaderUniformLayout *layout)
+{
+    assert(!layout->uniforms);
+
+    layout->num_uniforms = block->member_count;
+    layout->uniforms = g_malloc0_n(block->member_count, sizeof(ShaderUniform));
+    layout->total_size = block->size;
+    layout->allocation = g_malloc0(block->size);
+
+    for (uint32_t k = 0; k < block->member_count; ++k) {
+        const SpvReflectBlockVariable *member = &block->members[k];
+
+        assert(member->array.dims_count < 2);
+
+        layout->uniforms[k] = (ShaderUniform){
+            .name = strdup(member->name),
+            .offset = member->offset,
+            .dim_v = MAX(1, member->numeric.vector.component_count),
+            .dim_a = MAX(member->array.dims_count ? member->array.dims[0] : 1, member->numeric.matrix.column_count),
+            .stride = MAX(member->array.stride, member->numeric.matrix.stride),
+        };
+
+        // fprintf(stderr, "<%s offset=%zd dim_v=%zd dim_a=%zd stride=%zd>\n",
+        //     layout->uniforms[k].name,
+        //     layout->uniforms[k].offset,
+        //     layout->uniforms[k].dim_v,
+        //     layout->uniforms[k].dim_a,
+        //     layout->uniforms[k].stride
+        //     );
+    }
+    // fprintf(stderr, "--\n");
+}
+
+static void init_layout_from_spv(ShaderModuleInfo *info)
+{
+    SpvReflectResult result = spvReflectCreateShaderModule(
+        info->spirv->len, info->spirv->data, &info->reflect_module);
+    assert(result == SPV_REFLECT_RESULT_SUCCESS &&
+           "Failed to create SPIR-V shader module");
+
+    uint32_t descriptor_set_count = 0;
+    result = spvReflectEnumerateDescriptorSets(&info->reflect_module,
+                                               &descriptor_set_count, NULL);
+    assert(result == SPV_REFLECT_RESULT_SUCCESS &&
+           "Failed to enumerate descriptor sets");
+
+    info->descriptor_sets =
+        g_malloc_n(descriptor_set_count, sizeof(SpvReflectDescriptorSet *));
+    result = spvReflectEnumerateDescriptorSets(
+        &info->reflect_module, &descriptor_set_count, info->descriptor_sets);
+    assert(result == SPV_REFLECT_RESULT_SUCCESS &&
+           "Failed to enumerate descriptor sets");
+
+    info->uniforms.num_uniforms = 0;
+    info->uniforms.uniforms = NULL;
+
+    for (uint32_t i = 0; i < descriptor_set_count; ++i) {
+        const SpvReflectDescriptorSet *descriptor_set =
+            info->descriptor_sets[i];
+        for (uint32_t j = 0; j < descriptor_set->binding_count; ++j) {
+            const SpvReflectDescriptorBinding *binding =
+                descriptor_set->bindings[j];
+            if (binding->descriptor_type !=
+                SPV_REFLECT_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {
+                continue;
+            }
+
+            const SpvReflectBlockVariable *block = &binding->block;
+            block_to_uniforms(block, &info->uniforms);
+        }
+    }
+
+    info->push_constants.num_uniforms = 0;
+    info->push_constants.uniforms = NULL;
+    assert(info->reflect_module.push_constant_block_count < 2);
+    if (info->reflect_module.push_constant_block_count) {
+        block_to_uniforms(&info->reflect_module.push_constant_blocks[0],
+                          &info->push_constants);
+    }
+}
+
+static glslang_stage_t vk_shader_stage_to_glslang_stage(VkShaderStageFlagBits stage)
+{
+    switch (stage) {
+    case VK_SHADER_STAGE_GEOMETRY_BIT:
+        return GLSLANG_STAGE_GEOMETRY;
+    case VK_SHADER_STAGE_VERTEX_BIT:
+        return GLSLANG_STAGE_VERTEX;
+    case VK_SHADER_STAGE_FRAGMENT_BIT:
+        return GLSLANG_STAGE_FRAGMENT;
+    case VK_SHADER_STAGE_COMPUTE_BIT:
+        return GLSLANG_STAGE_COMPUTE;
+    default:
+        assert(0);
+    }
+}
+
+ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl(
+    PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl)
+{
+    ShaderModuleInfo *info = g_malloc0(sizeof(*info));
+    info->glsl = strdup(glsl);
+    info->spirv = pgraph_vk_compile_glsl_to_spv(
+        vk_shader_stage_to_glslang_stage(stage), glsl);
+    info->module = pgraph_vk_create_shader_module_from_spv(r, info->spirv);
+    init_layout_from_spv(info);
+    return info;
+}
+
+static void finalize_uniform_layout(ShaderUniformLayout *layout)
+{
+    for (int i = 0; i < layout->num_uniforms; i++) {
+        free((void*)layout->uniforms[i].name);
+    }
+    if (layout->uniforms) {
+        g_free(layout->uniforms);
+    }
+}
+
+void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info)
+{
+    if (info->glsl) {
+        free(info->glsl);
+    }
+    finalize_uniform_layout(&info->uniforms);
+    finalize_uniform_layout(&info->push_constants);
+    free(info->descriptor_sets);
+    spvReflectDestroyShaderModule(&info->reflect_module);
+    vkDestroyShaderModule(r->device, info->module, NULL);
+    g_byte_array_unref(info->spirv);
+    g_free(info);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/glsl.h b/hw/xbox/nv2a/pgraph/vk/glsl.h
new file mode 100644
index 00000000000..3f6ccd9b3a6
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/glsl.h
@@ -0,0 +1,205 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_VK_GLSL_H
+#define HW_XBOX_NV2A_PGRAPH_VK_GLSL_H
+
+#include "qemu/osdep.h"
+#include <stdint.h>
+#include <assert.h>
+#include <string.h>
+
+typedef struct ShaderUniform {
+	const char *name;
+	size_t dim_v;
+	size_t dim_a;
+	size_t align;
+	size_t stride;
+	size_t offset;
+} ShaderUniform;
+
+typedef struct ShaderUniformLayout {
+	ShaderUniform *uniforms;
+	size_t num_uniforms;
+	size_t total_size;
+	void *allocation;
+} ShaderUniformLayout;
+
+static inline void uniform_std140(ShaderUniformLayout *layout)
+{
+	size_t offset = 0;
+
+	for (int i = 0; i < layout->num_uniforms; i++) {
+		ShaderUniform *u = &layout->uniforms[i];
+		size_t size = sizeof(float); // float or int
+		size_t align = size;
+		size_t stride = 0;
+
+		size *= u->dim_v;
+		align *= u->dim_v == 3 ? 4 : u->dim_v;
+
+		// If an array, each element is padded to vec4.
+		if (u->dim_a > 1) {
+			align = 4 * sizeof(float);
+			stride = align;
+			size = u->dim_a * align;
+		} else {
+			align = size;
+			stride = 0;
+		}
+		
+		offset = ROUND_UP(offset, align);
+
+		u->align = align;
+		u->offset = offset;
+		u->stride = stride;
+
+		offset += size;
+	}
+
+	layout->total_size = offset;
+	assert(layout->total_size);
+}
+
+static inline void uniform_std430(ShaderUniformLayout *layout)
+{
+	size_t offset = 0;
+
+	for (int i = 0; i < layout->num_uniforms; i++) {
+		ShaderUniform *u = &layout->uniforms[i];
+		size_t size = sizeof(float); // float or int
+		size *= u->dim_v;
+		size_t align = size;
+		size *= u->dim_a;
+		
+		offset = ROUND_UP(offset, align);
+
+		u->align = align;
+		u->offset = offset;
+		u->stride = u->dim_a > 1 ? (size * u->dim_v) : 0;
+
+		offset += size;
+	}
+
+	layout->total_size = offset;
+	assert(layout->total_size);
+}
+
+static inline int uniform_index(ShaderUniformLayout *layout, const char *name)
+{
+    for (int i = 0; i < layout->num_uniforms; i++) {
+        if (!strcmp(layout->uniforms[i].name, name)) {
+            return i + 1;
+        }
+    }
+
+    return -1;
+}
+
+static inline
+void *uniform_ptr(ShaderUniformLayout *layout, int idx)
+{
+	assert(idx > 0 && "invalid uniform index");
+
+    return (char *)layout->allocation + layout->uniforms[idx - 1].offset;
+}
+
+static inline
+void uniform_copy(ShaderUniformLayout *layout, int idx, void *values, size_t value_size, size_t count)
+{
+	assert(idx > 0 && "invalid uniform index");
+
+    ShaderUniform *u = &layout->uniforms[idx - 1];
+    const size_t element_size = value_size * u->dim_v;
+
+    size_t bytes_remaining = value_size * count;
+    char *p_out = uniform_ptr(layout, idx);
+    char *p_max = p_out + layout->total_size;
+    char *p_in = (char *)values;
+
+    int index = 0;
+    while (bytes_remaining) {
+    	assert(p_out < p_max);
+    	assert(index < u->dim_a);
+        memcpy(p_out, p_in, element_size);
+        bytes_remaining -= element_size;
+        p_out += u->stride;
+        p_in += element_size;
+        index += 1;
+	}
+}
+
+static inline
+void uniform1fv(ShaderUniformLayout *layout, int idx, size_t count, float *values)
+{
+	uniform_copy(layout, idx, values, sizeof(float), count);
+}
+
+static inline
+void uniform1f(ShaderUniformLayout *layout, int idx, float value)
+{
+	uniform1fv(layout, idx, 1, &value);
+}
+
+static inline
+void uniform2f(ShaderUniformLayout *layout, int idx, float v0, float v1)
+{
+	float values[] = { v0, v1 };
+	uniform1fv(layout, idx, 2, values);
+}
+
+static inline
+void uniform4f(ShaderUniformLayout *layout, int idx, float v0, float v1, float v2, float v3)
+{
+	float values[] = { v0, v1, v2, v3 };
+	uniform1fv(layout, idx, 4, values);
+}
+
+static inline
+void uniformMatrix2fv(ShaderUniformLayout *layout, int idx, float *values)
+{
+	uniform1fv(layout, idx, 4, values);
+}
+
+static inline
+void uniformMatrix4fv(ShaderUniformLayout *layout, int idx, float *values)
+{
+	uniform1fv(layout, idx, 4 * 4, values);
+}
+
+static inline
+void uniform1iv(ShaderUniformLayout *layout, int idx, size_t count, int32_t *values)
+{
+	uniform_copy(layout, idx, values, sizeof(int32_t), count);
+}
+
+static inline
+void uniform1i(ShaderUniformLayout *layout, int idx, int32_t value)
+{
+	uniform1iv(layout, idx, 1, &value);
+}
+
+static inline
+void uniform4i(ShaderUniformLayout *layout, int idx, int v0, int v1, int v2, int v3)
+{
+	int values[] = { v0, v1, v2, v3 };
+	uniform1iv(layout, idx, 4, values);
+}
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/vk/image.c b/hw/xbox/nv2a/pgraph/vk/image.c
new file mode 100644
index 00000000000..de8e4d30da7
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/image.c
@@ -0,0 +1,209 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "renderer.h"
+
+static bool check_format_has_depth_component(VkFormat format)
+{
+    return format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
+           format == VK_FORMAT_D24_UNORM_S8_UINT ||
+           format == VK_FORMAT_D16_UNORM;
+}
+
+static bool check_format_has_stencil_component(VkFormat format)
+{
+    return format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
+           format == VK_FORMAT_D24_UNORM_S8_UINT;
+}
+
+void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd,
+                                       VkImage image, VkFormat format,
+                                       VkImageLayout oldLayout,
+                                       VkImageLayout newLayout)
+{
+    VkImageMemoryBarrier barrier = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,
+        .oldLayout = oldLayout,
+        .newLayout = newLayout,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .image = image,
+        .subresourceRange.baseMipLevel = 0,
+        .subresourceRange.levelCount = VK_REMAINING_MIP_LEVELS,
+        .subresourceRange.baseArrayLayer = 0,
+        .subresourceRange.layerCount = VK_REMAINING_ARRAY_LAYERS,
+    };
+
+    if (check_format_has_depth_component(format)) {
+        barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
+
+        if (check_format_has_stencil_component(format)) {
+            barrier.subresourceRange.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
+        }
+    } else {
+        barrier.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
+    }
+
+    VkPipelineStageFlags sourceStage;
+    VkPipelineStageFlags destinationStage;
+
+    // Undefined -> Dst
+    if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
+        newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+        barrier.srcAccessMask = 0;
+        barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+        destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+    // Undefined -> Color
+    } else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
+               newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+        barrier.srcAccessMask = 0;
+        barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+        destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+
+    // Undefined -> Depth
+    } else if (oldLayout == VK_IMAGE_LAYOUT_UNDEFINED &&
+               newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
+        barrier.srcAccessMask = 0;
+        barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
+        destinationStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+
+    // Dst -> Shader Read
+    } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+        barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+        sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+        destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+
+    // Dst -> Color
+    } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+        barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+        destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+
+    // Dst -> Depth
+    } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+        barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+        destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+
+    // Dst -> Src
+    } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+        barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+        sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+        destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+    // Shader Read -> Dst
+    } else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
+        barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+        destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+    // Shader Read -> Color
+    } else if (oldLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_SHADER_READ_BIT;
+        barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+        destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+
+    // Color -> Src
+    } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+        barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+        sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+        destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+    // Color -> Dst
+    } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+        barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;
+        destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+    // Color -> Shader Read
+    } else if (oldLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+        barrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
+        sourceStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+        destinationStage = VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT;
+
+    // Depth -> Src
+    } else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+        barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+
+        sourceStage = VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+        destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+    // Depth -> Dst
+    } else if (oldLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+        barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+        destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+    // Src -> Color
+    } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+        barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+        destinationStage = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
+
+    // Src -> Depth
+    } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+        barrier.dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+        destinationStage = VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT;
+
+    // Src -> Dst
+    } else if (oldLayout == VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL &&
+               newLayout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+        barrier.srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT;
+        barrier.dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
+        sourceStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+        destinationStage = VK_PIPELINE_STAGE_TRANSFER_BIT;
+
+    } else {
+        assert(!"unsupported layout transition!");
+    }
+
+    vkCmdPipelineBarrier(cmd, sourceStage, destinationStage, 0, 0,
+                         NULL, 0, NULL, 1, &barrier);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/instance.c b/hw/xbox/nv2a/pgraph/vk/instance.c
new file mode 100644
index 00000000000..9df440930c0
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/instance.c
@@ -0,0 +1,742 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "ui/xemu-settings.h"
+#include "renderer.h"
+#include "xemu-version.h"
+
+#include <SDL.h>
+#include <SDL_syswm.h>
+#include <SDL_vulkan.h>
+
+#include <volk.h>
+
+#define VkExtensionPropertiesArray GArray
+#define StringArray GArray
+
+static bool enable_validation = false;
+
+static char const *const validation_layers[] = {
+    "VK_LAYER_KHRONOS_validation",
+};
+
+static char const *const required_instance_extensions[] = {
+    VK_KHR_GET_PHYSICAL_DEVICE_PROPERTIES_2_EXTENSION_NAME,
+    VK_KHR_EXTERNAL_SEMAPHORE_CAPABILITIES_EXTENSION_NAME,
+    VK_KHR_EXTERNAL_MEMORY_CAPABILITIES_EXTENSION_NAME,
+};
+
+static char const *const required_device_extensions[] = {
+    VK_KHR_EXTERNAL_SEMAPHORE_EXTENSION_NAME,
+    VK_KHR_EXTERNAL_MEMORY_EXTENSION_NAME,
+#ifdef WIN32
+    VK_KHR_EXTERNAL_MEMORY_WIN32_EXTENSION_NAME,
+    VK_KHR_EXTERNAL_SEMAPHORE_WIN32_EXTENSION_NAME,
+#else
+    VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME,
+    VK_KHR_EXTERNAL_SEMAPHORE_FD_EXTENSION_NAME,
+#endif
+};
+
+static VKAPI_ATTR VkBool32 VKAPI_CALL debugCallback(
+    VkDebugUtilsMessageSeverityFlagBitsEXT messageSeverity,
+    VkDebugUtilsMessageTypeFlagsEXT messageType,
+    const VkDebugUtilsMessengerCallbackDataEXT *pCallbackData, void *pUserData)
+{
+    fprintf(stderr, "[vk] %s\n", pCallbackData->pMessage);
+
+    if ((messageType & VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT) &&
+        (messageSeverity & (VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+                            VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT))) {
+        assert(!g_config.display.vulkan.assert_on_validation_msg);
+    }
+    return VK_FALSE;
+}
+
+static bool check_validation_layer_support(void)
+{
+    uint32_t num_available_layers;
+    vkEnumerateInstanceLayerProperties(&num_available_layers, NULL);
+
+    g_autofree VkLayerProperties *available_layers =
+        g_malloc_n(num_available_layers, sizeof(VkLayerProperties));
+    vkEnumerateInstanceLayerProperties(&num_available_layers, available_layers);
+
+    for (int i = 0; i < ARRAY_SIZE(validation_layers); i++) {
+        bool found = false;
+        for (int j = 0; j < num_available_layers; j++) {
+            if (!strcmp(validation_layers[i], available_layers[j].layerName)) {
+                found = true;
+                break;
+            }
+        }
+        if (!found) {
+            fprintf(stderr, "desired validation layer not found: %s\n",
+                    validation_layers[i]);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static void create_window(PGRAPHVkState *r, Error **errp)
+{
+    r->window = SDL_CreateWindow(
+        "SDL Offscreen Window", SDL_WINDOWPOS_CENTERED, SDL_WINDOWPOS_CENTERED,
+        640, 480, SDL_WINDOW_VULKAN | SDL_WINDOW_HIDDEN);
+
+    if (r->window == NULL) {
+        error_setg(errp, "SDL_CreateWindow failed: %s", SDL_GetError());
+    }
+}
+
+static void destroy_window(PGRAPHVkState *r)
+{
+    if (r->window) {
+        SDL_DestroyWindow(r->window);
+        r->window = NULL;
+    }
+}
+
+static VkExtensionPropertiesArray *
+get_available_instance_extensions(PGRAPHState *pg)
+{
+    uint32_t num_extensions = 0;
+
+    VK_CHECK(
+        vkEnumerateInstanceExtensionProperties(NULL, &num_extensions, NULL));
+
+    VkExtensionPropertiesArray *extensions = g_array_sized_new(
+        FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions);
+
+    g_array_set_size(extensions, num_extensions);
+    VK_CHECK(vkEnumerateInstanceExtensionProperties(
+        NULL, &num_extensions, (VkExtensionProperties *)extensions->data));
+
+    return extensions;
+}
+
+static bool
+is_extension_available(VkExtensionPropertiesArray *available_extensions,
+                       const char *extension_name)
+{
+    for (int i = 0; i < available_extensions->len; i++) {
+        VkExtensionProperties *e =
+            &g_array_index(available_extensions, VkExtensionProperties, i);
+        if (!strcmp(e->extensionName, extension_name)) {
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static StringArray *get_required_instance_extension_names(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    // Add instance extensions SDL lists as required
+    unsigned int sdl_count = 0;
+    SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count, NULL);
+
+    StringArray *extensions =
+        g_array_sized_new(FALSE, FALSE, sizeof(char *),
+                          sdl_count + ARRAY_SIZE(required_instance_extensions));
+
+    if (sdl_count) {
+        g_array_set_size(extensions, sdl_count);
+        SDL_Vulkan_GetInstanceExtensions((SDL_Window *)r->window, &sdl_count,
+                                         (const char **)extensions->data);
+    }
+
+    // Add additional required extensions
+    g_array_append_vals(extensions, required_instance_extensions,
+                        ARRAY_SIZE(required_instance_extensions));
+
+    return extensions;
+}
+
+static bool
+add_extension_if_available(VkExtensionPropertiesArray *available_extensions,
+                           StringArray *enabled_extension_names,
+                           const char *desired_extension_name)
+{
+    if (is_extension_available(available_extensions, desired_extension_name)) {
+        g_array_append_val(enabled_extension_names, desired_extension_name);
+        return true;
+    }
+
+    fprintf(stderr, "Warning: extension not available: %s\n",
+            desired_extension_name);
+    return false;
+}
+
+static void
+add_optional_instance_extension_names(PGRAPHState *pg,
+                                      VkExtensionPropertiesArray *available_extensions,
+                                      StringArray *enabled_extension_names)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    r->debug_utils_extension_enabled =
+        g_config.display.vulkan.validation_layers &&
+        add_extension_if_available(available_extensions, enabled_extension_names,
+                                   VK_EXT_DEBUG_UTILS_EXTENSION_NAME);
+}
+
+static bool create_instance(PGRAPHState *pg, Error **errp)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    VkResult result;
+
+    create_window(r, errp);
+    if (*errp) {
+        return false;
+    }
+
+    result = volkInitialize();
+    if (result != VK_SUCCESS) {
+        error_setg(errp, "volkInitialize failed");
+        destroy_window(r);
+        return false;
+    }
+
+    VkApplicationInfo app_info = {
+        .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+        .pApplicationName = "xemu",
+        .applicationVersion = VK_MAKE_VERSION(
+            xemu_version_major, xemu_version_minor, xemu_version_patch),
+        .pEngineName = "No Engine",
+        .engineVersion = VK_MAKE_VERSION(1, 0, 0),
+        .apiVersion = VK_API_VERSION_1_3,
+    };
+
+    g_autoptr(VkExtensionPropertiesArray) available_extensions =
+        get_available_instance_extensions(pg);
+
+    g_autoptr(StringArray) enabled_extension_names =
+        get_required_instance_extension_names(pg);
+
+    bool all_required_extensions_available = true;
+    for (int i = 0; i < enabled_extension_names->len; i++) {
+        const char *required_extension =
+            g_array_index(enabled_extension_names, const char *, i);
+        if (!is_extension_available(available_extensions, required_extension)) {
+            fprintf(stderr,
+                    "Error: Required instance extension not available: %s\n",
+                    required_extension);
+            all_required_extensions_available = false;
+        }
+    }
+
+    if (!all_required_extensions_available) {
+        error_setg(errp, "Required instance extensions not available");
+        goto error;
+    }
+
+    add_optional_instance_extension_names(pg, available_extensions,
+                                          enabled_extension_names);
+
+    fprintf(stderr, "Enabled instance extensions:\n");
+    for (int i = 0; i < enabled_extension_names->len; i++) {
+        fprintf(stderr, "- %s\n",
+                g_array_index(enabled_extension_names, char *, i));
+    }
+
+    VkInstanceCreateInfo create_info = {
+        .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+        .pApplicationInfo = &app_info,
+        .enabledExtensionCount = enabled_extension_names->len,
+        .ppEnabledExtensionNames =
+            &g_array_index(enabled_extension_names, const char *, 0),
+    };
+
+    enable_validation = g_config.display.vulkan.validation_layers;
+
+    VkValidationFeatureEnableEXT enables[] = {
+        VK_VALIDATION_FEATURE_ENABLE_SYNCHRONIZATION_VALIDATION_EXT,
+        // VK_VALIDATION_FEATURE_ENABLE_BEST_PRACTICES_EXT,
+    };
+
+    VkValidationFeaturesEXT validationFeatures = {
+        .sType = VK_STRUCTURE_TYPE_VALIDATION_FEATURES_EXT,
+        .enabledValidationFeatureCount = ARRAY_SIZE(enables),
+        .pEnabledValidationFeatures = enables,
+    };
+
+    if (enable_validation) {
+        if (check_validation_layer_support()) {
+            fprintf(stderr, "Warning: Validation layers enabled. Expect "
+                            "performance impact.\n");
+            create_info.enabledLayerCount = ARRAY_SIZE(validation_layers);
+            create_info.ppEnabledLayerNames = validation_layers;
+            create_info.pNext = &validationFeatures;
+        } else {
+            fprintf(stderr, "Warning: validation layers not available\n");
+            enable_validation = false;
+        }
+    }
+
+    result = vkCreateInstance(&create_info, NULL, &r->instance);
+    if (result != VK_SUCCESS) {
+        error_setg(errp, "Failed to create instance (%d)", result);
+        return false;
+    }
+
+    volkLoadInstance(r->instance);
+
+    if (r->debug_utils_extension_enabled) {
+        VkDebugUtilsMessengerCreateInfoEXT messenger_info = {
+            .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_MESSENGER_CREATE_INFO_EXT,
+            .messageSeverity = VK_DEBUG_UTILS_MESSAGE_SEVERITY_VERBOSE_BIT_EXT |
+                               VK_DEBUG_UTILS_MESSAGE_SEVERITY_WARNING_BIT_EXT |
+                               VK_DEBUG_UTILS_MESSAGE_SEVERITY_ERROR_BIT_EXT,
+            .messageType = VK_DEBUG_UTILS_MESSAGE_TYPE_GENERAL_BIT_EXT |
+                           VK_DEBUG_UTILS_MESSAGE_TYPE_VALIDATION_BIT_EXT |
+                           VK_DEBUG_UTILS_MESSAGE_TYPE_PERFORMANCE_BIT_EXT,
+            .pfnUserCallback = debugCallback,
+        };
+        VK_CHECK(vkCreateDebugUtilsMessengerEXT(r->instance, &messenger_info,
+                                                NULL, &r->debug_messenger));
+    }
+
+    return true;
+
+error:
+    volkFinalize();
+    destroy_window(r);
+    return false;
+}
+
+static bool is_queue_family_indicies_complete(QueueFamilyIndices indices)
+{
+    return indices.queue_family >= 0;
+}
+
+QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device)
+{
+    QueueFamilyIndices indices = {
+        .queue_family = -1,
+    };
+
+    uint32_t num_queue_families = 0;
+    vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families, NULL);
+
+    g_autofree VkQueueFamilyProperties *queue_families =
+        g_malloc_n(num_queue_families, sizeof(VkQueueFamilyProperties));
+    vkGetPhysicalDeviceQueueFamilyProperties(device, &num_queue_families,
+                                             queue_families);
+
+    for (int i = 0; i < num_queue_families; i++) {
+        VkQueueFamilyProperties queueFamily = queue_families[i];
+        // FIXME: Support independent graphics, compute queues
+        int required_flags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT;
+        if ((queueFamily.queueFlags & required_flags) == required_flags) {
+            indices.queue_family = i;
+        }
+        if (is_queue_family_indicies_complete(indices)) {
+            break;
+        }
+    }
+
+    return indices;
+}
+
+static VkExtensionPropertiesArray *
+get_available_device_extensions(VkPhysicalDevice device)
+{
+    uint32_t num_extensions = 0;
+
+    VK_CHECK(vkEnumerateDeviceExtensionProperties(device, NULL, &num_extensions,
+                                                  NULL));
+
+    VkExtensionPropertiesArray *extensions = g_array_sized_new(
+        FALSE, FALSE, sizeof(VkExtensionProperties), num_extensions);
+
+    g_array_set_size(extensions, num_extensions);
+    VK_CHECK(vkEnumerateDeviceExtensionProperties(
+        device, NULL, &num_extensions,
+        (VkExtensionProperties *)extensions->data));
+
+    return extensions;
+}
+
+static StringArray *get_required_device_extension_names(void)
+{
+    StringArray *extensions =
+        g_array_sized_new(FALSE, FALSE, sizeof(char *),
+                          ARRAY_SIZE(required_device_extensions));
+
+    g_array_append_vals(extensions, required_device_extensions,
+                        ARRAY_SIZE(required_device_extensions));
+
+    return extensions;
+}
+
+static void add_optional_device_extension_names(
+    PGRAPHState *pg, VkExtensionPropertiesArray *available_extensions,
+    StringArray *enabled_extension_names)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    r->custom_border_color_extension_enabled =
+        add_extension_if_available(available_extensions, enabled_extension_names,
+                                   VK_EXT_CUSTOM_BORDER_COLOR_EXTENSION_NAME);
+
+    r->provoking_vertex_extension_enabled =
+        add_extension_if_available(available_extensions, enabled_extension_names,
+                                   VK_EXT_PROVOKING_VERTEX_EXTENSION_NAME);
+
+    r->memory_budget_extension_enabled = add_extension_if_available(
+        available_extensions, enabled_extension_names,
+        VK_EXT_MEMORY_BUDGET_EXTENSION_NAME);
+}
+
+static bool check_device_support_required_extensions(VkPhysicalDevice device)
+{
+    g_autoptr(VkExtensionPropertiesArray) available_extensions =
+        get_available_device_extensions(device);
+
+    for (int i = 0; i < ARRAY_SIZE(required_device_extensions); i++) {
+        if (!is_extension_available(available_extensions,
+                                    required_device_extensions[i])) {
+            fprintf(stderr, "required device extension not found: %s\n",
+                    required_device_extensions[i]);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+static bool is_device_compatible(VkPhysicalDevice device)
+{
+    QueueFamilyIndices indices = pgraph_vk_find_queue_families(device);
+
+    return is_queue_family_indicies_complete(indices) &&
+           check_device_support_required_extensions(device);
+    // FIXME: Check formats
+    // FIXME: Check vram
+}
+
+static bool select_physical_device(PGRAPHState *pg, Error **errp)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    VkResult result;
+
+    uint32_t num_physical_devices = 0;
+
+    result =
+        vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, NULL);
+    if (result != VK_SUCCESS || num_physical_devices == 0) {
+        error_setg(errp, "Failed to find GPUs with Vulkan support");
+        return false;
+    }
+
+    g_autofree VkPhysicalDevice *devices =
+        g_malloc_n(num_physical_devices, sizeof(VkPhysicalDevice));
+    vkEnumeratePhysicalDevices(r->instance, &num_physical_devices, devices);
+
+    fprintf(stderr, "Available physical devices:\n");
+    for (int i = 0; i < num_physical_devices; i++) {
+        vkGetPhysicalDeviceProperties(devices[i], &r->device_props);
+        fprintf(stderr, "- %s\n", r->device_props.deviceName);
+    }
+
+    // FIXME: Store preferred device
+
+    r->physical_device = VK_NULL_HANDLE;
+    for (int i = 0; i < num_physical_devices; i++) {
+        if (is_device_compatible(devices[i])) {
+            r->physical_device = devices[i];
+            break;
+        }
+    }
+    if (r->physical_device == VK_NULL_HANDLE) {
+        error_setg(errp, "Failed to find a suitable GPU");
+        return false;
+    }
+
+    vkGetPhysicalDeviceProperties(r->physical_device, &r->device_props);
+    fprintf(stderr,
+            "Selected physical device: %s\n"
+            "- Vendor: %x, Device: %x\n"
+            "- Driver Version: %d.%d.%d\n",
+            r->device_props.deviceName,
+            r->device_props.vendorID,
+            r->device_props.deviceID,
+            VK_VERSION_MAJOR(r->device_props.driverVersion),
+            VK_VERSION_MINOR(r->device_props.driverVersion),
+            VK_VERSION_PATCH(r->device_props.driverVersion));
+
+    return true;
+}
+
+static bool create_logical_device(PGRAPHState *pg, Error **errp)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    VkResult result;
+
+    QueueFamilyIndices indices =
+        pgraph_vk_find_queue_families(r->physical_device);
+
+    g_autoptr(VkExtensionPropertiesArray) available_extensions =
+        get_available_device_extensions(r->physical_device);
+
+    g_autoptr(StringArray) enabled_extension_names =
+        get_required_device_extension_names();
+
+    add_optional_device_extension_names(pg, available_extensions,
+                                        enabled_extension_names);
+
+    fprintf(stderr, "Enabled device extensions:\n");
+    for (int i = 0; i < enabled_extension_names->len; i++) {
+        fprintf(stderr, "- %s\n",
+                g_array_index(enabled_extension_names, char *, i));
+    }
+
+    float queuePriority = 1.0f;
+
+    VkDeviceQueueCreateInfo queue_create_info = {
+        .sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO,
+        .queueFamilyIndex = indices.queue_family,
+        .queueCount = 1,
+        .pQueuePriorities = &queuePriority,
+    };
+
+    // Ensure device supports required features
+    VkPhysicalDeviceFeatures available_features, enabled_features;
+    vkGetPhysicalDeviceFeatures(r->physical_device, &available_features);
+    memset(&enabled_features, 0, sizeof(enabled_features));
+
+    struct {
+        const char *name;
+        VkBool32 available, *enabled;
+    } required_features[] = {
+        #define F(n) { #n, available_features.n, &enabled_features.n }
+        F(shaderClipDistance),
+        F(geometryShader),
+        F(shaderTessellationAndGeometryPointSize),
+        F(depthClamp),
+        F(occlusionQueryPrecise),
+        #undef F
+    };
+
+    bool all_features_available = true;
+    for (int i = 0; i < ARRAY_SIZE(required_features); i++) {
+        if (required_features[i].available != VK_TRUE) {
+            fprintf(stderr,
+                    "Error: Device does not support required feature %s\n",
+                    required_features[i].name);
+            all_features_available = false;
+        }
+        *required_features[i].enabled = VK_TRUE;
+    }
+
+    if (!all_features_available) {
+        error_setg(errp, "Device does not support required features");
+        return false;
+    }
+
+    void *next_struct = NULL;
+
+    VkPhysicalDeviceProvokingVertexFeaturesEXT provoking_vertex_features;
+    if (r->provoking_vertex_extension_enabled) {
+        provoking_vertex_features = (VkPhysicalDeviceProvokingVertexFeaturesEXT){
+            .sType =
+                VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT,
+            .provokingVertexLast = VK_TRUE,
+            .pNext = next_struct,
+        };
+        next_struct = &provoking_vertex_features;
+    }
+
+    VkPhysicalDeviceCustomBorderColorFeaturesEXT custom_border_features;
+    if (r->custom_border_color_extension_enabled) {
+        custom_border_features = (VkPhysicalDeviceCustomBorderColorFeaturesEXT){
+            .sType =
+                VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT,
+            .customBorderColors = VK_TRUE,
+            .pNext = next_struct,
+        };
+        next_struct = &custom_border_features;
+    }
+
+    VkDeviceCreateInfo device_create_info = {
+        .sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO,
+        .queueCreateInfoCount = 1,
+        .pQueueCreateInfos = &queue_create_info,
+        .pEnabledFeatures = &enabled_features,
+        .enabledExtensionCount = enabled_extension_names->len,
+        .ppEnabledExtensionNames =
+            &g_array_index(enabled_extension_names, const char *, 0),
+        .pNext = next_struct,
+    };
+
+    if (enable_validation) {
+        device_create_info.enabledLayerCount = ARRAY_SIZE(validation_layers);
+        device_create_info.ppEnabledLayerNames = validation_layers;
+    }
+
+    result = vkCreateDevice(r->physical_device, &device_create_info, NULL,
+                            &r->device);
+    if (result != VK_SUCCESS) {
+        error_setg(errp, "Failed to create logical device (%d)", result);
+        return false;
+    }
+
+    vkGetDeviceQueue(r->device, indices.queue_family, 0, &r->queue);
+    return true;
+}
+
+uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits,
+                                   VkMemoryPropertyFlags properties)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkPhysicalDeviceMemoryProperties prop;
+    vkGetPhysicalDeviceMemoryProperties(r->physical_device, &prop);
+    for (uint32_t i = 0; i < prop.memoryTypeCount; i++) {
+        if ((prop.memoryTypes[i].propertyFlags & properties) == properties &&
+            type_bits & (1 << i)) {
+            return i;
+        }
+    }
+    return 0xFFFFFFFF; // Unable to find memoryType
+}
+
+static bool init_allocator(PGRAPHState *pg, Error **errp)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    VkResult result;
+
+    VmaVulkanFunctions vulkanFunctions = {
+        /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
+        .vkGetInstanceProcAddr = vkGetInstanceProcAddr,
+        /// Required when using VMA_DYNAMIC_VULKAN_FUNCTIONS.
+        .vkGetDeviceProcAddr = vkGetDeviceProcAddr,
+        .vkGetPhysicalDeviceProperties = vkGetPhysicalDeviceProperties,
+        .vkGetPhysicalDeviceMemoryProperties = vkGetPhysicalDeviceMemoryProperties,
+        .vkAllocateMemory = vkAllocateMemory,
+        .vkFreeMemory = vkFreeMemory,
+        .vkMapMemory = vkMapMemory,
+        .vkUnmapMemory = vkUnmapMemory,
+        .vkFlushMappedMemoryRanges = vkFlushMappedMemoryRanges,
+        .vkInvalidateMappedMemoryRanges = vkInvalidateMappedMemoryRanges,
+        .vkBindBufferMemory = vkBindBufferMemory,
+        .vkBindImageMemory = vkBindImageMemory,
+        .vkGetBufferMemoryRequirements = vkGetBufferMemoryRequirements,
+        .vkGetImageMemoryRequirements = vkGetImageMemoryRequirements,
+        .vkCreateBuffer = vkCreateBuffer,
+        .vkDestroyBuffer = vkDestroyBuffer,
+        .vkCreateImage = vkCreateImage,
+        .vkDestroyImage = vkDestroyImage,
+        .vkCmdCopyBuffer = vkCmdCopyBuffer,
+    #if VMA_DEDICATED_ALLOCATION || VMA_VULKAN_VERSION >= 1001000
+        /// Fetch "vkGetBufferMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetBufferMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
+        .vkGetBufferMemoryRequirements2KHR = vkGetBufferMemoryRequirements2,
+        /// Fetch "vkGetImageMemoryRequirements2" on Vulkan >= 1.1, fetch "vkGetImageMemoryRequirements2KHR" when using VK_KHR_dedicated_allocation extension.
+        .vkGetImageMemoryRequirements2KHR = vkGetImageMemoryRequirements2,
+    #endif
+    #if VMA_BIND_MEMORY2 || VMA_VULKAN_VERSION >= 1001000
+        /// Fetch "vkBindBufferMemory2" on Vulkan >= 1.1, fetch "vkBindBufferMemory2KHR" when using VK_KHR_bind_memory2 extension.
+        .vkBindBufferMemory2KHR = vkBindBufferMemory2,
+        /// Fetch "vkBindImageMemory2" on Vulkan >= 1.1, fetch "vkBindImageMemory2KHR" when using VK_KHR_bind_memory2 extension.
+        .vkBindImageMemory2KHR = vkBindImageMemory2,
+    #endif
+    #if VMA_MEMORY_BUDGET || VMA_VULKAN_VERSION >= 1001000
+        /// Fetch from "vkGetPhysicalDeviceMemoryProperties2" on Vulkan >= 1.1, but you can also fetch it from "vkGetPhysicalDeviceMemoryProperties2KHR" if you enabled extension VK_KHR_get_physical_device_properties2.
+        .vkGetPhysicalDeviceMemoryProperties2KHR = vkGetPhysicalDeviceMemoryProperties2KHR,
+    #endif
+    #if VMA_KHR_MAINTENANCE4 || VMA_VULKAN_VERSION >= 1003000
+        /// Fetch from "vkGetDeviceBufferMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceBufferMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
+        .vkGetDeviceBufferMemoryRequirements = vkGetDeviceBufferMemoryRequirements,
+        /// Fetch from "vkGetDeviceImageMemoryRequirements" on Vulkan >= 1.3, but you can also fetch it from "vkGetDeviceImageMemoryRequirementsKHR" if you enabled extension VK_KHR_maintenance4.
+        .vkGetDeviceImageMemoryRequirements = vkGetDeviceImageMemoryRequirements,
+    #endif
+    };
+
+    VmaAllocatorCreateInfo create_info = {
+        .flags = (r->memory_budget_extension_enabled ?
+                      VMA_ALLOCATOR_CREATE_EXT_MEMORY_BUDGET_BIT :
+                      0),
+        .vulkanApiVersion = VK_API_VERSION_1_3,
+        .instance = r->instance,
+        .physicalDevice = r->physical_device,
+        .device = r->device,
+        .pVulkanFunctions = &vulkanFunctions,
+    };
+
+    result = vmaCreateAllocator(&create_info, &r->allocator);
+    if (result != VK_SUCCESS) {
+        error_setg(errp, "vmaCreateAllocator failed");
+        return false;
+    }
+
+    return true;
+}
+
+void pgraph_vk_init_instance(PGRAPHState *pg, Error **errp)
+{
+    if (create_instance(pg, errp) &&
+        select_physical_device(pg, errp) &&
+        create_logical_device(pg, errp) &&
+        init_allocator(pg, errp)) {
+        return;
+    }
+
+    pgraph_vk_finalize_instance(pg);
+
+    const char *msg = "Failed to initialize Vulkan renderer";
+    if (*errp) {
+        error_prepend(errp, "%s: ", msg);
+    } else {
+        error_setg(errp, "%s", msg);
+    }
+}
+
+void pgraph_vk_finalize_instance(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    if (r->allocator != VK_NULL_HANDLE) {
+        vmaDestroyAllocator(r->allocator);
+        r->allocator = VK_NULL_HANDLE;
+    }
+
+    if (r->device != VK_NULL_HANDLE) {
+        vkDestroyDevice(r->device, NULL);
+        r->device = VK_NULL_HANDLE;
+    }
+
+    if (r->debug_messenger != VK_NULL_HANDLE) {
+        vkDestroyDebugUtilsMessengerEXT(r->instance, r->debug_messenger, NULL);
+        r->debug_messenger = VK_NULL_HANDLE;
+    }
+
+    if (r->instance != VK_NULL_HANDLE) {
+        vkDestroyInstance(r->instance, NULL);
+        r->instance = VK_NULL_HANDLE;
+    }
+
+    volkFinalize();
+    destroy_window(r);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/meson.build b/hw/xbox/nv2a/pgraph/vk/meson.build
new file mode 100644
index 00000000000..24c2474cb97
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/meson.build
@@ -0,0 +1,24 @@
+if vulkan.found()
+
+specific_ss.add([sdl, volk, libglslang, vma, vulkan, spirv_reflect, gloffscreen,
+	files(
+		'blit.c',
+		'buffer.c',
+		'command.c',
+		'debug.c',
+		'display.c',
+		'draw.c',
+		'glsl.c',
+		'image.c',
+		'instance.c',
+		'renderer.c',
+		'reports.c',
+		'shaders.c',
+		'surface-compute.c',
+		'surface.c',
+		'texture.c',
+		'vertex.c',
+		)
+	])
+
+endif
diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.c b/hw/xbox/nv2a/pgraph/vk/renderer.c
new file mode 100644
index 00000000000..272b5f6ae5c
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/renderer.c
@@ -0,0 +1,272 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "renderer.h"
+
+#include "gloffscreen.h"
+
+#if HAVE_EXTERNAL_MEMORY
+static GloContext *g_gl_context;
+#endif
+
+static void early_context_init(void)
+{
+#if HAVE_EXTERNAL_MEMORY
+    g_gl_context = glo_context_create();
+#endif
+}
+
+static void pgraph_vk_init(NV2AState *d, Error **errp)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    pg->vk_renderer_state = (PGRAPHVkState *)g_malloc0(sizeof(PGRAPHVkState));
+
+#if HAVE_EXTERNAL_MEMORY
+    glo_set_current(g_gl_context);
+#endif
+
+    pgraph_vk_debug_init();
+
+    pgraph_vk_init_instance(pg, errp);
+    if (*errp) {
+        return;
+    }
+
+    pgraph_vk_init_command_buffers(pg);
+    pgraph_vk_init_buffers(d);
+    pgraph_vk_init_surfaces(pg);
+    pgraph_vk_init_shaders(pg);
+    pgraph_vk_init_pipelines(pg);
+    pgraph_vk_init_textures(pg);
+    pgraph_vk_init_reports(pg);
+    pgraph_vk_init_compute(pg);
+    pgraph_vk_init_display(pg);
+
+    pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr,
+                                   memory_region_size(d->vram));
+}
+
+static void pgraph_vk_finalize(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    pgraph_vk_finalize_display(pg);
+    pgraph_vk_finalize_compute(pg);
+    pgraph_vk_finalize_reports(pg);
+    pgraph_vk_finalize_textures(pg);
+    pgraph_vk_finalize_pipelines(pg);
+    pgraph_vk_finalize_shaders(pg);
+    pgraph_vk_finalize_surfaces(pg);
+    pgraph_vk_finalize_buffers(d);
+    pgraph_vk_finalize_command_buffers(pg);
+    pgraph_vk_finalize_instance(pg);
+
+    g_free(pg->vk_renderer_state);
+    pg->vk_renderer_state = NULL;
+}
+
+static void pgraph_vk_flush(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+
+    pgraph_vk_finish(pg, VK_FINISH_REASON_FLUSH);
+    pgraph_vk_surface_flush(d);
+    pgraph_vk_mark_textures_possibly_dirty(d, 0, memory_region_size(d->vram));
+    pgraph_vk_update_vertex_ram_buffer(&d->pgraph, 0, d->vram_ptr,
+                                       memory_region_size(d->vram));
+    for (int i = 0; i < 4; i++) {
+        pg->texture_dirty[i] = true;
+    }
+
+    /* FIXME: Flush more? */
+
+    qatomic_set(&d->pgraph.flush_pending, false);
+    qemu_event_set(&d->pgraph.flush_complete);
+}
+
+static void pgraph_vk_sync(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    pgraph_vk_render_display(pg);
+
+    qatomic_set(&d->pgraph.sync_pending, false);
+    qemu_event_set(&d->pgraph.sync_complete);
+}
+
+static void pgraph_vk_process_pending(NV2AState *d)
+{
+    PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+    if (qatomic_read(&r->downloads_pending) ||
+        qatomic_read(&r->download_dirty_surfaces_pending) ||
+        qatomic_read(&d->pgraph.sync_pending) ||
+        qatomic_read(&d->pgraph.flush_pending)
+    ) {
+        qemu_mutex_unlock(&d->pfifo.lock);
+        qemu_mutex_lock(&d->pgraph.lock);
+        if (qatomic_read(&r->downloads_pending)) {
+            pgraph_vk_process_pending_downloads(d);
+        }
+        if (qatomic_read(&r->download_dirty_surfaces_pending)) {
+            pgraph_vk_download_dirty_surfaces(d);
+        }
+        if (qatomic_read(&d->pgraph.sync_pending)) {
+            pgraph_vk_sync(d);
+        }
+        if (qatomic_read(&d->pgraph.flush_pending)) {
+            pgraph_vk_flush(d);
+        }
+        qemu_mutex_unlock(&d->pgraph.lock);
+        qemu_mutex_lock(&d->pfifo.lock);
+    }
+}
+
+static void pgraph_vk_flip_stall(NV2AState *d)
+{
+    pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_FLIP_STALL);
+    pgraph_vk_debug_frame_terminator();
+}
+
+static void pgraph_vk_pre_savevm_trigger(NV2AState *d)
+{
+    qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true);
+    qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
+}
+
+static void pgraph_vk_pre_savevm_wait(NV2AState *d)
+{
+    qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
+}
+
+static void pgraph_vk_pre_shutdown_trigger(NV2AState *d)
+{
+    // qatomic_set(&d->pgraph.vk_renderer_state->shader_cache_writeback_pending, true);
+    // qemu_event_reset(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete);
+}
+
+static void pgraph_vk_pre_shutdown_wait(NV2AState *d)
+{
+    // qemu_event_wait(&d->pgraph.vk_renderer_state->shader_cache_writeback_complete);   
+}
+
+static int pgraph_vk_get_framebuffer_surface(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    qemu_mutex_lock(&d->pfifo.lock);
+    // FIXME: Possible race condition with pgraph, consider lock
+    uint32_t pline_offset, pstart_addr, pline_compare;
+    d->vga.get_offsets(&d->vga, &pline_offset, &pstart_addr, &pline_compare);
+    SurfaceBinding *surface = pgraph_vk_surface_get_within(d, d->pcrtc.start + pline_offset);
+    if (surface == NULL || !surface->color) {
+        qemu_mutex_unlock(&d->pfifo.lock);
+        return 0;
+    }
+
+    assert(surface->color);
+
+    surface->frame_time = pg->frame_time;
+
+#if HAVE_EXTERNAL_MEMORY
+    qemu_event_reset(&d->pgraph.sync_complete);
+    qatomic_set(&pg->sync_pending, true);
+    pfifo_kick(d);
+    qemu_mutex_unlock(&d->pfifo.lock);
+    qemu_event_wait(&d->pgraph.sync_complete);
+    return r->display.gl_texture_id;
+#else
+    qemu_mutex_unlock(&d->pfifo.lock);
+    pgraph_vk_wait_for_surface_download(surface);
+    return 0;
+#endif
+}
+
+static PGRAPHRenderer pgraph_vk_renderer = {
+    .type = CONFIG_DISPLAY_RENDERER_VULKAN,
+    .name = "Vulkan",
+    .ops = {
+        .init = pgraph_vk_init,
+        .early_context_init = early_context_init,
+        .finalize = pgraph_vk_finalize,
+        .clear_report_value = pgraph_vk_clear_report_value,
+        .clear_surface = pgraph_vk_clear_surface,
+        .draw_begin = pgraph_vk_draw_begin,
+        .draw_end = pgraph_vk_draw_end,
+        .flip_stall = pgraph_vk_flip_stall,
+        .flush_draw = pgraph_vk_flush_draw,
+        .get_report = pgraph_vk_get_report,
+        .image_blit = pgraph_vk_image_blit,
+        .pre_savevm_trigger = pgraph_vk_pre_savevm_trigger,
+        .pre_savevm_wait = pgraph_vk_pre_savevm_wait,
+        .pre_shutdown_trigger = pgraph_vk_pre_shutdown_trigger,
+        .pre_shutdown_wait = pgraph_vk_pre_shutdown_wait,
+        .process_pending = pgraph_vk_process_pending,
+        .process_pending_reports = pgraph_vk_process_pending_reports,
+        .surface_update = pgraph_vk_surface_update,
+        .set_surface_scale_factor = pgraph_vk_set_surface_scale_factor,
+        .get_surface_scale_factor = pgraph_vk_get_surface_scale_factor,
+        .get_framebuffer_surface = pgraph_vk_get_framebuffer_surface,
+    }
+};
+
+static void __attribute__((constructor)) register_renderer(void)
+{
+    pgraph_renderer_register(&pgraph_vk_renderer);
+}
+
+void pgraph_vk_check_memory_budget(PGRAPHState *pg)
+{
+#if 0 // FIXME
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkPhysicalDeviceMemoryProperties const *props;
+    vmaGetMemoryProperties(r->allocator, &props);
+
+    g_autofree VmaBudget *budgets = g_malloc_n(props->memoryHeapCount, sizeof(VmaBudget));
+    vmaGetHeapBudgets(r->allocator, budgets);
+
+    const float budget_threshold = 0.8;
+    bool near_budget = false;
+
+    for (int i = 0; i < props->memoryHeapCount; i++) {
+        VmaBudget *b = &budgets[i];
+        float use_to_budget_ratio =
+            (double)b->statistics.allocationBytes / (double)b->budget;
+        NV2A_VK_DPRINTF("Heap %d: used %lu/%lu MiB (%.2f%%)", i,
+                        b->statistics.allocationBytes / (1024 * 1024),
+                        b->budget / (1024 * 1024), use_to_budget_ratio * 100);
+        near_budget |= use_to_budget_ratio > budget_threshold;
+    }
+
+    // If any heaps are near budget, free up some resources
+    if (near_budget) {
+        pgraph_vk_trim_texture_cache(pg);
+    }
+#endif
+
+#if 0
+    char *s;
+    vmaBuildStatsString(r->allocator, &s, VK_TRUE);
+    puts(s);
+    vmaFreeStatsString(r->allocator, s);
+#endif
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/renderer.h b/hw/xbox/nv2a/pgraph/vk/renderer.h
new file mode 100644
index 00000000000..250b92f6953
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/renderer.h
@@ -0,0 +1,592 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H
+#define HW_XBOX_NV2A_PGRAPH_VK_RENDERER_H
+
+#include "qemu/osdep.h"
+#include "qemu/thread.h"
+#include "qemu/queue.h"
+#include "qemu/lru.h"
+#include "hw/hw.h"
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/nv2a_regs.h"
+#include "hw/xbox/nv2a/pgraph/surface.h"
+#include "hw/xbox/nv2a/pgraph/texture.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+
+#include <vulkan/vulkan.h>
+#include <glslang/Include/glslang_c_interface.h>
+#include <volk.h>
+#include <spirv_reflect.h>
+#include <vk_mem_alloc.h>
+
+#include "debug.h"
+#include "constants.h"
+#include "glsl.h"
+
+#define HAVE_EXTERNAL_MEMORY 1
+
+typedef struct QueueFamilyIndices {
+    int queue_family;
+} QueueFamilyIndices;
+
+typedef struct MemorySyncRequirement {
+    hwaddr addr, size;
+} MemorySyncRequirement;
+
+typedef struct RenderPassState {
+    VkFormat color_format;
+    VkFormat zeta_format;
+} RenderPassState;
+
+typedef struct RenderPass {
+    RenderPassState state;
+    VkRenderPass render_pass;
+} RenderPass;
+
+typedef struct PipelineKey {
+    bool clear;
+    RenderPassState render_pass_state;
+    ShaderState shader_state;
+    uint32_t regs[10];
+    VkVertexInputBindingDescription binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
+    VkVertexInputAttributeDescription attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
+} PipelineKey;
+
+typedef struct PipelineBinding {
+    LruNode node;
+    PipelineKey key;
+    VkPipelineLayout layout;
+    VkPipeline pipeline;
+    VkRenderPass render_pass;
+    unsigned int draw_time;
+} PipelineBinding;
+
+enum Buffer {
+    BUFFER_STAGING_DST,
+    BUFFER_STAGING_SRC,
+    BUFFER_COMPUTE_DST,
+    BUFFER_COMPUTE_SRC,
+    BUFFER_INDEX,
+    BUFFER_INDEX_STAGING,
+    BUFFER_VERTEX_RAM,
+    BUFFER_VERTEX_INLINE,
+    BUFFER_VERTEX_INLINE_STAGING,
+    BUFFER_UNIFORM,
+    BUFFER_UNIFORM_STAGING,
+    BUFFER_COUNT
+};
+
+typedef struct StorageBuffer {
+    VkBuffer buffer;
+    VkBufferUsageFlags usage;
+    VmaAllocationCreateInfo alloc_info;
+    VmaAllocation allocation;
+    VkMemoryPropertyFlags properties;
+    size_t buffer_offset;
+    size_t buffer_size;
+    uint8_t *mapped;
+} StorageBuffer;
+
+typedef struct SurfaceBinding {
+    QTAILQ_ENTRY(SurfaceBinding) entry;
+    MemAccessCallback *access_cb;
+
+    hwaddr vram_addr;
+
+    SurfaceShape shape;
+    uintptr_t dma_addr;
+    uintptr_t dma_len;
+    bool color;
+    bool swizzle;
+
+    unsigned int width;
+    unsigned int height;
+    unsigned int pitch;
+    size_t size;
+
+    bool cleared;
+    int frame_time;
+    int draw_time;
+    bool draw_dirty;
+    bool download_pending;
+    bool upload_pending;
+
+    BasicSurfaceFormatInfo fmt;
+    SurfaceFormatInfo host_fmt;
+
+    VkImage image;
+    VkImageView image_view;
+    VmaAllocation allocation;
+
+    // Used for scaling
+    VkImage image_scratch;
+    VkImageLayout image_scratch_current_layout;
+    VmaAllocation allocation_scratch;
+
+    bool initialized;
+} SurfaceBinding;
+
+typedef struct ShaderModuleInfo {
+    char *glsl;
+    GByteArray *spirv;
+    VkShaderModule module;
+    SpvReflectShaderModule reflect_module;
+    SpvReflectDescriptorSet **descriptor_sets;
+    ShaderUniformLayout uniforms;
+    ShaderUniformLayout push_constants;
+} ShaderModuleInfo;
+
+typedef struct ShaderBinding {
+    LruNode node;
+    bool initialized;
+
+    ShaderState state;
+    ShaderModuleInfo *geometry;
+    ShaderModuleInfo *vertex;
+    ShaderModuleInfo *fragment;
+
+    int psh_constant_loc[9][2];
+    int alpha_ref_loc;
+
+    int bump_mat_loc[NV2A_MAX_TEXTURES];
+    int bump_scale_loc[NV2A_MAX_TEXTURES];
+    int bump_offset_loc[NV2A_MAX_TEXTURES];
+    int tex_scale_loc[NV2A_MAX_TEXTURES];
+
+    int surface_size_loc;
+    int clip_range_loc;
+
+    int vsh_constant_loc;
+    uint32_t vsh_constants[NV2A_VERTEXSHADER_CONSTANTS][4];
+
+    int inv_viewport_loc;
+    int ltctxa_loc;
+    int ltctxb_loc;
+    int ltc1_loc;
+
+    int fog_color_loc;
+    int fog_param_loc;
+    int light_infinite_half_vector_loc[NV2A_MAX_LIGHTS];
+    int light_infinite_direction_loc[NV2A_MAX_LIGHTS];
+    int light_local_position_loc[NV2A_MAX_LIGHTS];
+    int light_local_attenuation_loc[NV2A_MAX_LIGHTS];
+
+    int clip_region_loc;
+
+    int material_alpha_loc;
+
+    int uniform_attrs_loc;
+} ShaderBinding;
+
+typedef struct TextureKey {
+    TextureShape state;
+    hwaddr texture_vram_offset;
+    hwaddr texture_length;
+    hwaddr palette_vram_offset;
+    hwaddr palette_length;
+    float scale;
+    uint32_t filter;
+    uint32_t address;
+    uint32_t border_color;
+} TextureKey;
+
+typedef struct TextureBinding {
+    LruNode node;
+    TextureKey key;
+    VkImage image;
+    VkImageLayout current_layout;
+    VkImageView image_view;
+    VmaAllocation allocation;
+    VkSampler sampler;
+    bool possibly_dirty;
+    uint64_t hash;
+    unsigned int draw_time;
+    uint32_t submit_time;
+} TextureBinding;
+
+typedef struct QueryReport {
+    QSIMPLEQ_ENTRY(QueryReport) entry;
+    bool clear;
+    uint32_t parameter;
+    unsigned int query_count;
+} QueryReport;
+
+typedef struct PvideoState {
+    bool enabled;
+    hwaddr base;
+    hwaddr limit;
+    hwaddr offset;
+
+    int pitch;
+    int format;
+
+    int in_width;
+    int in_height;
+    int out_width;
+    int out_height;
+
+    int in_s;
+    int in_t;
+    int out_x;
+    int out_y;
+
+    float scale_x;
+    float scale_y;
+
+    bool color_key_enabled;
+    uint32_t color_key;
+} PvideoState;
+
+typedef struct PGRAPHVkDisplayState {
+    ShaderModuleInfo *display_frag;
+
+    VkDescriptorPool descriptor_pool;
+    VkDescriptorSetLayout descriptor_set_layout;
+    VkDescriptorSet descriptor_set;
+
+    VkPipelineLayout pipeline_layout;
+    VkPipeline pipeline;
+
+    VkRenderPass render_pass;
+    VkFramebuffer framebuffer;
+
+    VkImage image;
+    VkImageView image_view;
+    VkDeviceMemory memory;
+    VkSampler sampler;
+
+    struct {
+        PvideoState state;
+        int width, height;
+        VkImage image;
+        VkImageView image_view;
+        VmaAllocation allocation;
+        VkSampler sampler;
+    } pvideo;
+
+    int width, height;
+    int draw_time;
+
+    // OpenGL Interop
+#ifdef WIN32
+    HANDLE handle;
+#else
+    int fd;
+#endif
+    GLuint gl_memory_obj;
+    GLuint gl_texture_id;
+} PGRAPHVkDisplayState;
+
+typedef struct ComputePipelineKey {
+    VkFormat host_fmt;
+    bool pack;
+    int workgroup_size;
+} ComputePipelineKey;
+
+typedef struct ComputePipeline {
+    LruNode node;
+    ComputePipelineKey key;
+    VkPipeline pipeline;
+} ComputePipeline;
+
+typedef struct PGRAPHVkComputeState {
+    VkDescriptorPool descriptor_pool;
+    VkDescriptorSetLayout descriptor_set_layout;
+    VkDescriptorSet descriptor_sets[1024];
+    int descriptor_set_index;
+    VkPipelineLayout pipeline_layout;
+    Lru pipeline_cache;
+    ComputePipeline *pipeline_cache_entries;
+} PGRAPHVkComputeState;
+
+typedef struct PGRAPHVkState {
+    void *window;
+    VkInstance instance;
+    VkDebugUtilsMessengerEXT debug_messenger;
+    int debug_depth;
+
+    bool debug_utils_extension_enabled;
+    bool custom_border_color_extension_enabled;
+    bool provoking_vertex_extension_enabled;
+    bool memory_budget_extension_enabled;
+
+    VkPhysicalDevice physical_device;
+    VkPhysicalDeviceProperties device_props;
+    VkDevice device;
+    VmaAllocator allocator;
+    uint32_t allocator_last_submit_index;
+
+    VkQueue queue;
+    VkCommandPool command_pool;
+    VkCommandBuffer command_buffers[2];
+
+    VkCommandBuffer command_buffer;
+    VkSemaphore command_buffer_semaphore;
+    VkFence command_buffer_fence;
+    unsigned int command_buffer_start_time;
+    bool in_command_buffer;
+    uint32_t submit_count;
+
+    VkCommandBuffer aux_command_buffer;
+    bool in_aux_command_buffer;
+
+    VkFramebuffer framebuffers[50];
+    int framebuffer_index;
+    bool framebuffer_dirty;
+
+    VkRenderPass render_pass;
+    GArray *render_passes; // RenderPass
+    bool in_render_pass;
+    bool in_draw;
+
+    Lru pipeline_cache;
+    VkPipelineCache vk_pipeline_cache;
+    PipelineBinding *pipeline_cache_entries;
+    PipelineBinding *pipeline_binding;
+    bool pipeline_binding_changed;
+
+    VkDescriptorPool descriptor_pool;
+    VkDescriptorSetLayout descriptor_set_layout;
+    VkDescriptorSet descriptor_sets[1024];
+    int descriptor_set_index;
+
+    StorageBuffer storage_buffers[BUFFER_COUNT];
+
+    MemorySyncRequirement vertex_ram_buffer_syncs[NV2A_VERTEXSHADER_ATTRIBUTES];
+    size_t num_vertex_ram_buffer_syncs;
+    unsigned long *uploaded_bitmap;
+    size_t bitmap_size;
+
+    VkVertexInputAttributeDescription vertex_attribute_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
+    int vertex_attribute_to_description_location[NV2A_VERTEXSHADER_ATTRIBUTES];
+    int num_active_vertex_attribute_descriptions;
+
+    VkVertexInputBindingDescription vertex_binding_descriptions[NV2A_VERTEXSHADER_ATTRIBUTES];
+    int num_active_vertex_binding_descriptions;
+    hwaddr vertex_attribute_offsets[NV2A_VERTEXSHADER_ATTRIBUTES];
+    uint16_t vertex_buffer_inline;
+
+    QTAILQ_HEAD(, SurfaceBinding) surfaces;
+    QTAILQ_HEAD(, SurfaceBinding) invalid_surfaces;
+    SurfaceBinding *color_binding, *zeta_binding;
+    bool downloads_pending;
+    QemuEvent downloads_complete;
+    bool download_dirty_surfaces_pending;
+    QemuEvent dirty_surfaces_download_complete; // common
+
+    Lru texture_cache;
+    TextureBinding *texture_cache_entries;
+    TextureBinding *texture_bindings[NV2A_MAX_TEXTURES];
+    TextureBinding dummy_texture;
+    bool texture_bindings_changed;
+    VkFormatProperties *texture_format_properties;
+
+    Lru shader_cache;
+    ShaderBinding *shader_cache_entries;
+    ShaderBinding *shader_binding;
+    ShaderModuleInfo *quad_vert_module, *solid_frag_module;
+    bool shader_bindings_changed;
+
+    // FIXME: Merge these into a structure
+    uint64_t uniform_buffer_hashes[2];
+    size_t uniform_buffer_offsets[2];
+    bool uniforms_changed;
+
+    VkQueryPool query_pool;
+    int max_queries_in_flight; // FIXME: Move out to constant
+    int num_queries_in_flight;
+    bool new_query_needed;
+    bool query_in_flight;
+    uint32_t zpass_pixel_count_result;
+    QSIMPLEQ_HEAD(, QueryReport) report_queue; // FIXME: Statically allocate
+
+    SurfaceFormatInfo kelvin_surface_zeta_vk_map[3];
+
+    uint32_t clear_parameter;
+
+    PGRAPHVkDisplayState display;
+    PGRAPHVkComputeState compute;
+} PGRAPHVkState;
+
+// renderer.c
+void pgraph_vk_check_memory_budget(PGRAPHState *pg);
+
+// debug.c
+#define RGBA_RED     (float[4]){1,0,0,1}
+#define RGBA_YELLOW  (float[4]){1,1,0,1}
+#define RGBA_GREEN   (float[4]){0,1,0,1}
+#define RGBA_BLUE    (float[4]){0,0,1,1}
+#define RGBA_PINK    (float[4]){1,0,1,1}
+#define RGBA_DEFAULT (float[4]){0,0,0,0}
+
+void pgraph_vk_debug_init(void);
+void pgraph_vk_insert_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd,
+                                   float color[4], const char *format, ...) __attribute__ ((format (printf, 4, 5)));
+void pgraph_vk_begin_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd,
+                                  float color[4], const char *format, ...) __attribute__ ((format (printf, 4, 5)));
+void pgraph_vk_end_debug_marker(PGRAPHVkState *r, VkCommandBuffer cmd);
+
+// instance.c
+void pgraph_vk_init_instance(PGRAPHState *pg, Error **errp);
+void pgraph_vk_finalize_instance(PGRAPHState *pg);
+QueueFamilyIndices pgraph_vk_find_queue_families(VkPhysicalDevice device);
+uint32_t pgraph_vk_get_memory_type(PGRAPHState *pg, uint32_t type_bits,
+                                   VkMemoryPropertyFlags properties);
+
+// glsl.c
+void pgraph_vk_init_glsl_compiler(void);
+void pgraph_vk_finalize_glsl_compiler(void);
+GByteArray *pgraph_vk_compile_glsl_to_spv(glslang_stage_t stage,
+                                          const char *glsl_source);
+VkShaderModule pgraph_vk_create_shader_module_from_spv(PGRAPHVkState *r,
+                                                       GByteArray *spv);
+ShaderModuleInfo *pgraph_vk_create_shader_module_from_glsl(
+    PGRAPHVkState *r, VkShaderStageFlagBits stage, const char *glsl);
+void pgraph_vk_destroy_shader_module(PGRAPHVkState *r, ShaderModuleInfo *info);
+
+// buffer.c
+void pgraph_vk_init_buffers(NV2AState *d);
+void pgraph_vk_finalize_buffers(NV2AState *d);
+bool pgraph_vk_buffer_has_space_for(PGRAPHState *pg, int index,
+                                    VkDeviceSize size,
+                                    VkDeviceAddress alignment);
+VkDeviceSize pgraph_vk_append_to_buffer(PGRAPHState *pg, int index, void **data,
+                                        VkDeviceSize *sizes, size_t count,
+                                        VkDeviceAddress alignment);
+
+// command.c
+void pgraph_vk_init_command_buffers(PGRAPHState *pg);
+void pgraph_vk_finalize_command_buffers(PGRAPHState *pg);
+VkCommandBuffer pgraph_vk_begin_single_time_commands(PGRAPHState *pg);
+void pgraph_vk_end_single_time_commands(PGRAPHState *pg, VkCommandBuffer cmd);
+
+// image.c
+void pgraph_vk_transition_image_layout(PGRAPHState *pg, VkCommandBuffer cmd,
+                                       VkImage image, VkFormat format,
+                                       VkImageLayout oldLayout,
+                                       VkImageLayout newLayout);
+
+// vertex.c
+void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
+                                      unsigned int max_element,
+                                      bool inline_data,
+                                      unsigned int inline_stride,
+                                      unsigned int provoking_element);
+void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d);
+void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset, void *data,
+                                    VkDeviceSize size);
+VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data,
+                                           VkDeviceSize size);
+VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data,
+                                                   VkDeviceSize *sizes,
+                                                   size_t count);
+
+// surface.c
+void pgraph_vk_init_surfaces(PGRAPHState *pg);
+void pgraph_vk_finalize_surfaces(PGRAPHState *pg);
+void pgraph_vk_surface_flush(NV2AState *d);
+void pgraph_vk_process_pending_downloads(NV2AState *d);
+void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface);
+SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr);
+void pgraph_vk_wait_for_surface_download(SurfaceBinding *e);
+void pgraph_vk_download_dirty_surfaces(NV2AState *d);
+void pgraph_vk_download_surfaces_in_range_if_dirty(PGRAPHState *pg, hwaddr start, hwaddr size);
+void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
+                                   bool force);
+void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write,
+                              bool zeta_write);
+SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr);
+void pgraph_vk_set_surface_dirty(PGRAPHState *pg, bool color, bool zeta);
+void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale);
+unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d);
+void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg);
+
+// surface-compute.c
+void pgraph_vk_init_compute(PGRAPHState *pg);
+bool pgraph_vk_compute_needs_finish(PGRAPHVkState *r);
+void pgraph_vk_compute_finish_complete(PGRAPHVkState *r);
+void pgraph_vk_finalize_compute(PGRAPHState *pg);
+void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
+                                  VkCommandBuffer cmd, VkBuffer src,
+                                  VkBuffer dst, bool downscale);
+void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
+                                    VkCommandBuffer cmd, VkBuffer src,
+                                    VkBuffer dst);
+
+// display.c
+void pgraph_vk_init_display(PGRAPHState *pg);
+void pgraph_vk_finalize_display(PGRAPHState *pg);
+void pgraph_vk_render_display(PGRAPHState *pg);
+
+// texture.c
+void pgraph_vk_init_textures(PGRAPHState *pg);
+void pgraph_vk_finalize_textures(PGRAPHState *pg);
+void pgraph_vk_bind_textures(NV2AState *d);
+void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d, hwaddr addr,
+                                            hwaddr size);
+void pgraph_vk_trim_texture_cache(PGRAPHState *pg);
+
+// shaders.c
+void pgraph_vk_init_shaders(PGRAPHState *pg);
+void pgraph_vk_finalize_shaders(PGRAPHState *pg);
+void pgraph_vk_update_descriptor_sets(PGRAPHState *pg);
+void pgraph_vk_bind_shaders(PGRAPHState *pg);
+void pgraph_vk_update_shader_uniforms(PGRAPHState *pg);
+
+// reports.c
+void pgraph_vk_init_reports(PGRAPHState *pg);
+void pgraph_vk_finalize_reports(PGRAPHState *pg);
+void pgraph_vk_clear_report_value(NV2AState *d);
+void pgraph_vk_get_report(NV2AState *d, uint32_t parameter);
+void pgraph_vk_process_pending_reports(NV2AState *d);
+void pgraph_vk_process_pending_reports_internal(NV2AState *d);
+
+typedef enum FinishReason {
+    VK_FINISH_REASON_VERTEX_BUFFER_DIRTY,
+    VK_FINISH_REASON_SURFACE_CREATE,
+    VK_FINISH_REASON_SURFACE_DOWN,
+    VK_FINISH_REASON_NEED_BUFFER_SPACE,
+    VK_FINISH_REASON_FRAMEBUFFER_DIRTY,
+    VK_FINISH_REASON_PRESENTING,
+    VK_FINISH_REASON_FLIP_STALL,
+    VK_FINISH_REASON_FLUSH,
+    VK_FINISH_REASON_STALLED,
+} FinishReason;
+
+// draw.c
+void pgraph_vk_init_pipelines(PGRAPHState *pg);
+void pgraph_vk_finalize_pipelines(PGRAPHState *pg);
+void pgraph_vk_clear_surface(NV2AState *d, uint32_t parameter);
+void pgraph_vk_draw_begin(NV2AState *d);
+void pgraph_vk_draw_end(NV2AState *d);
+void pgraph_vk_finish(PGRAPHState *pg, FinishReason why);
+void pgraph_vk_flush_draw(NV2AState *d);
+void pgraph_vk_begin_command_buffer(PGRAPHState *pg);
+void pgraph_vk_ensure_command_buffer(PGRAPHState *pg);
+void pgraph_vk_ensure_not_in_render_pass(PGRAPHState *pg);
+
+VkCommandBuffer pgraph_vk_begin_nondraw_commands(PGRAPHState *pg);
+void pgraph_vk_end_nondraw_commands(PGRAPHState *pg, VkCommandBuffer cmd);
+
+// blit.c
+void pgraph_vk_image_blit(NV2AState *d);
+
+#endif
diff --git a/hw/xbox/nv2a/pgraph/vk/reports.c b/hw/xbox/nv2a/pgraph/vk/reports.c
new file mode 100644
index 00000000000..b6570523722
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/reports.c
@@ -0,0 +1,159 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "renderer.h"
+
+void pgraph_vk_init_reports(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    QSIMPLEQ_INIT(&r->report_queue);
+    r->num_queries_in_flight = 0;
+    r->max_queries_in_flight = 1024;
+    r->new_query_needed = false;
+    r->query_in_flight = false;
+    r->zpass_pixel_count_result = 0;
+
+    VkQueryPoolCreateInfo pool_create_info = (VkQueryPoolCreateInfo){
+        .sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO,
+        .queryType = VK_QUERY_TYPE_OCCLUSION,
+        .queryCount = r->max_queries_in_flight,
+    };
+    VK_CHECK(
+        vkCreateQueryPool(r->device, &pool_create_info, NULL, &r->query_pool));
+}
+
+void pgraph_vk_finalize_reports(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    QueryReport *report;
+    while ((report = QSIMPLEQ_FIRST(&r->report_queue)) != NULL) {
+        QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry);
+        g_free(report);
+    }
+
+    vkDestroyQueryPool(r->device, r->query_pool, NULL);
+}
+
+void pgraph_vk_clear_report_value(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    QueryReport *report = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate
+    report->clear = true;
+    report->parameter = 0;
+    report->query_count = r->num_queries_in_flight;
+    QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry);
+
+    r->new_query_needed = true;
+}
+
+void pgraph_vk_get_report(NV2AState *d, uint32_t parameter)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    uint8_t type = GET_MASK(parameter, NV097_GET_REPORT_TYPE);
+    assert(type == NV097_GET_REPORT_TYPE_ZPASS_PIXEL_CNT);
+
+    QueryReport *report = g_malloc(sizeof(QueryReport)); // FIXME: Pre-allocate
+    report->clear = false;
+    report->parameter = parameter;
+    report->query_count = r->num_queries_in_flight;
+    QSIMPLEQ_INSERT_TAIL(&r->report_queue, report, entry);
+
+    r->new_query_needed = true;
+}
+
+void pgraph_vk_process_pending_reports_internal(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    NV2A_VK_DGROUP_BEGIN("Processing queries");
+
+    assert(!r->in_command_buffer);
+
+    // Fetch all query results
+    g_autofree uint64_t *query_results = NULL;
+
+    if (r->num_queries_in_flight > 0) {
+        size_t size_of_results = r->num_queries_in_flight * sizeof(uint64_t);
+        query_results = g_malloc_n(r->num_queries_in_flight,
+                                   sizeof(uint64_t)); // FIXME: Pre-allocate
+        VkResult result;
+        do {
+            result = vkGetQueryPoolResults(
+                r->device, r->query_pool, 0, r->num_queries_in_flight,
+                size_of_results, query_results, sizeof(uint64_t),
+                VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT);
+        } while (result == VK_NOT_READY);
+    }
+
+    // Write out queries
+    int num_results_counted = 0;
+    const int result_divisor =
+        pg->surface_scale_factor * pg->surface_scale_factor;
+
+    QueryReport *report;
+    while ((report = QSIMPLEQ_FIRST(&r->report_queue)) != NULL) {
+        assert(report->query_count >= num_results_counted);
+        assert(report->query_count <= r->num_queries_in_flight);
+
+        while (num_results_counted < report->query_count) {
+            r->zpass_pixel_count_result +=
+                query_results[num_results_counted++];
+        }
+
+        if (report->clear) {
+            NV2A_VK_DPRINTF("Cleared");
+            r->zpass_pixel_count_result = 0;
+        } else {
+            pgraph_write_zpass_pixel_cnt_report(
+                d, report->parameter,
+                r->zpass_pixel_count_result / result_divisor);
+        }
+
+        QSIMPLEQ_REMOVE_HEAD(&r->report_queue, entry);
+        g_free(report);
+    }
+
+    // Add remaining results
+    while (num_results_counted < r->num_queries_in_flight) {
+        r->zpass_pixel_count_result += query_results[num_results_counted++];
+    }
+
+    r->num_queries_in_flight = 0;
+    NV2A_VK_DGROUP_END();
+}
+
+void pgraph_vk_process_pending_reports(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    uint32_t *dma_get = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_GET];
+    uint32_t *dma_put = &d->pfifo.regs[NV_PFIFO_CACHE1_DMA_PUT];
+
+    if (*dma_get == *dma_put && r->in_command_buffer) {
+        pgraph_vk_finish(pg, VK_FINISH_REASON_STALLED);
+    }
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/shaders.c b/hw/xbox/nv2a/pgraph/vk/shaders.c
new file mode 100644
index 00000000000..b1a9410bc73
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/shaders.c
@@ -0,0 +1,834 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * Based on GL implementation:
+ *
+ * Copyright (c) 2015 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/xbox/nv2a/pgraph/shaders.h"
+#include "hw/xbox/nv2a/pgraph/util.h"
+#include "hw/xbox/nv2a/pgraph/glsl/geom.h"
+#include "hw/xbox/nv2a/pgraph/glsl/vsh.h"
+#include "hw/xbox/nv2a/pgraph/glsl/psh.h"
+#include "qemu/fast-hash.h"
+#include "qemu/mstring.h"
+#include "renderer.h"
+#include <locale.h>
+
+const size_t MAX_UNIFORM_ATTR_VALUES_SIZE = NV2A_VERTEXSHADER_ATTRIBUTES * 4 * sizeof(float);
+
+static void create_descriptor_pool(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    size_t num_sets = ARRAY_SIZE(r->descriptor_sets);
+
+    VkDescriptorPoolSize pool_sizes[] = {
+        {
+            .type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .descriptorCount = 2 * num_sets,
+        },
+        {
+            .type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .descriptorCount = NV2A_MAX_TEXTURES * num_sets,
+        }
+    };
+
+    VkDescriptorPoolCreateInfo pool_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+        .poolSizeCount = ARRAY_SIZE(pool_sizes),
+        .pPoolSizes = pool_sizes,
+        .maxSets = ARRAY_SIZE(r->descriptor_sets),
+        .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+    };
+    VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
+                                    &r->descriptor_pool));
+}
+
+static void destroy_descriptor_pool(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkDestroyDescriptorPool(r->device, r->descriptor_pool, NULL);
+    r->descriptor_pool = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_set_layout(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkDescriptorSetLayoutBinding bindings[2 + NV2A_MAX_TEXTURES];
+
+    bindings[0] = (VkDescriptorSetLayoutBinding){
+        .binding = VSH_UBO_BINDING,
+        .descriptorCount = 1,
+        .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+        .stageFlags = VK_SHADER_STAGE_VERTEX_BIT,
+    };
+    bindings[1] = (VkDescriptorSetLayoutBinding){
+        .binding = PSH_UBO_BINDING,
+        .descriptorCount = 1,
+        .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+        .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+    };
+    for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        bindings[2 + i] = (VkDescriptorSetLayoutBinding){
+            .binding = PSH_TEX_BINDING + i,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
+        };
+    }
+    VkDescriptorSetLayoutCreateInfo layout_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+        .bindingCount = ARRAY_SIZE(bindings),
+        .pBindings = bindings,
+    };
+    VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
+                                         &r->descriptor_set_layout));
+}
+
+static void destroy_descriptor_set_layout(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkDestroyDescriptorSetLayout(r->device, r->descriptor_set_layout, NULL);
+    r->descriptor_set_layout = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_sets(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkDescriptorSetLayout layouts[ARRAY_SIZE(r->descriptor_sets)];
+    for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+        layouts[i] = r->descriptor_set_layout;
+    }
+
+    VkDescriptorSetAllocateInfo alloc_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+        .descriptorPool = r->descriptor_pool,
+        .descriptorSetCount = ARRAY_SIZE(r->descriptor_sets),
+        .pSetLayouts = layouts,
+    };
+    VK_CHECK(
+        vkAllocateDescriptorSets(r->device, &alloc_info, r->descriptor_sets));
+}
+
+static void destroy_descriptor_sets(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkFreeDescriptorSets(r->device, r->descriptor_pool,
+                         ARRAY_SIZE(r->descriptor_sets), r->descriptor_sets);
+    for (int i = 0; i < ARRAY_SIZE(r->descriptor_sets); i++) {
+        r->descriptor_sets[i] = VK_NULL_HANDLE;
+    }
+}
+
+void pgraph_vk_update_descriptor_sets(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    bool need_uniform_write =
+        r->uniforms_changed ||
+        !r->storage_buffers[BUFFER_UNIFORM_STAGING].buffer_offset;
+
+    if (!(r->shader_bindings_changed || r->texture_bindings_changed ||
+          (r->descriptor_set_index == 0) || need_uniform_write)) {
+        return; // Nothing changed
+    }
+
+    ShaderBinding *binding = r->shader_binding;
+    ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
+                                       &binding->fragment->uniforms };
+    VkDeviceSize ubo_buffer_total_size = 0;
+    for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+        ubo_buffer_total_size += layouts[i]->total_size;
+    }
+    bool need_ubo_staging_buffer_reset =
+        r->uniforms_changed &&
+        !pgraph_vk_buffer_has_space_for(pg, BUFFER_UNIFORM_STAGING,
+                                        ubo_buffer_total_size,
+                                        r->device_props.limits.minUniformBufferOffsetAlignment);
+
+    bool need_descriptor_write_reset =
+        (r->descriptor_set_index >= ARRAY_SIZE(r->descriptor_sets));
+
+    if (need_descriptor_write_reset || need_ubo_staging_buffer_reset) {
+        pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
+        need_uniform_write = true;
+    }
+
+    VkWriteDescriptorSet descriptor_writes[2 + NV2A_MAX_TEXTURES];
+
+    assert(r->descriptor_set_index < ARRAY_SIZE(r->descriptor_sets));
+
+    if (need_uniform_write) {
+        for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+            void *data = layouts[i]->allocation;
+            VkDeviceSize size = layouts[i]->total_size;
+            r->uniform_buffer_offsets[i] = pgraph_vk_append_to_buffer(
+                pg, BUFFER_UNIFORM_STAGING, &data, &size, 1,
+                r->device_props.limits.minUniformBufferOffsetAlignment);
+        }
+
+        r->uniforms_changed = false;
+    }
+
+    VkDescriptorBufferInfo ubo_buffer_infos[2];
+    for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+        ubo_buffer_infos[i] = (VkDescriptorBufferInfo){
+            .buffer = r->storage_buffers[BUFFER_UNIFORM].buffer,
+            .offset = r->uniform_buffer_offsets[i],
+            .range = layouts[i]->total_size,
+        };
+        descriptor_writes[i] = (VkWriteDescriptorSet){
+            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+            .dstSet = r->descriptor_sets[r->descriptor_set_index],
+            .dstBinding = i == 0 ? VSH_UBO_BINDING : PSH_UBO_BINDING,
+            .dstArrayElement = 0,
+            .descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+            .descriptorCount = 1,
+            .pBufferInfo = &ubo_buffer_infos[i],
+        };
+    }
+
+    VkDescriptorImageInfo image_infos[NV2A_MAX_TEXTURES];
+    for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        image_infos[i] = (VkDescriptorImageInfo){
+            .imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+            .imageView = r->texture_bindings[i]->image_view,
+            .sampler = r->texture_bindings[i]->sampler,
+        };
+        descriptor_writes[2 + i] = (VkWriteDescriptorSet){
+            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+            .dstSet = r->descriptor_sets[r->descriptor_set_index],
+            .dstBinding = PSH_TEX_BINDING + i,
+            .dstArrayElement = 0,
+            .descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
+            .descriptorCount = 1,
+            .pImageInfo = &image_infos[i],
+        };
+    }
+
+    vkUpdateDescriptorSets(r->device, 6, descriptor_writes, 0, NULL);
+
+    r->descriptor_set_index++;
+}
+
+static void update_shader_constant_locations(ShaderBinding *binding)
+{
+    int i, j;
+    char tmp[64];
+
+    /* lookup fragment shader uniforms */
+    for (i = 0; i < 9; i++) {
+        for (j = 0; j < 2; j++) {
+            snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
+            binding->psh_constant_loc[i][j] =
+                uniform_index(&binding->fragment->uniforms, tmp);
+        }
+    }
+    binding->alpha_ref_loc =
+        uniform_index(&binding->fragment->uniforms, "alphaRef");
+    binding->fog_color_loc =
+        uniform_index(&binding->fragment->uniforms, "fogColor");
+    for (i = 1; i < NV2A_MAX_TEXTURES; i++) {
+        snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
+        binding->bump_mat_loc[i] =
+            uniform_index(&binding->fragment->uniforms, tmp);
+        snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
+        binding->bump_scale_loc[i] =
+            uniform_index(&binding->fragment->uniforms, tmp);
+        snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
+        binding->bump_offset_loc[i] =
+            uniform_index(&binding->fragment->uniforms, tmp);
+    }
+
+    for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        snprintf(tmp, sizeof(tmp), "texScale%d", i);
+        binding->tex_scale_loc[i] =
+            uniform_index(&binding->fragment->uniforms, tmp);
+    }
+
+    /* lookup vertex shader uniforms */
+    binding->vsh_constant_loc = uniform_index(&binding->vertex->uniforms, "c");
+    binding->surface_size_loc =
+        uniform_index(&binding->vertex->uniforms, "surfaceSize");
+    binding->clip_range_loc =
+        uniform_index(&binding->vertex->uniforms, "clipRange");
+    binding->fog_param_loc =
+        uniform_index(&binding->vertex->uniforms, "fogParam");
+
+    binding->inv_viewport_loc =
+        uniform_index(&binding->vertex->uniforms, "invViewport");
+    binding->ltctxa_loc = uniform_index(&binding->vertex->uniforms, "ltctxa");
+    binding->ltctxb_loc = uniform_index(&binding->vertex->uniforms, "ltctxb");
+    binding->ltc1_loc = uniform_index(&binding->vertex->uniforms, "ltc1");
+
+    for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+        snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
+        binding->light_infinite_half_vector_loc[i] =
+            uniform_index(&binding->vertex->uniforms, tmp);
+        snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
+        binding->light_infinite_direction_loc[i] =
+            uniform_index(&binding->vertex->uniforms, tmp);
+
+        snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
+        binding->light_local_position_loc[i] =
+            uniform_index(&binding->vertex->uniforms, tmp);
+        snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
+        binding->light_local_attenuation_loc[i] =
+            uniform_index(&binding->vertex->uniforms, tmp);
+    }
+
+    binding->clip_region_loc =
+        uniform_index(&binding->fragment->uniforms, "clipRegion");
+
+    binding->material_alpha_loc =
+        uniform_index(&binding->vertex->uniforms, "material_alpha");
+
+    binding->uniform_attrs_loc =
+        uniform_index(&binding->vertex->uniforms, "inlineValue");
+}
+
+static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
+{
+    ShaderBinding *snode = container_of(node, ShaderBinding, node);
+    memcpy(&snode->state, state, sizeof(ShaderState));
+    snode->initialized = false;
+}
+
+static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+    PGRAPHVkState *r = container_of(lru, PGRAPHVkState, shader_cache);
+    ShaderBinding *snode = container_of(node, ShaderBinding, node);
+
+    ShaderModuleInfo *modules[] = {
+        snode->geometry,
+        snode->vertex,
+        snode->fragment,
+    };
+    for (int i = 0; i < ARRAY_SIZE(modules); i++) {
+        if (modules[i]) {
+            pgraph_vk_destroy_shader_module(r, modules[i]);
+        }
+    }
+
+    snode->initialized = false;
+}
+
+static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+    ShaderBinding *snode = container_of(node, ShaderBinding, node);
+    return memcmp(&snode->state, key, sizeof(ShaderState));
+}
+
+static void shader_cache_init(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    const size_t shader_cache_size = 1024;
+    lru_init(&r->shader_cache);
+    r->shader_cache_entries = g_malloc_n(shader_cache_size, sizeof(ShaderBinding));
+    assert(r->shader_cache_entries != NULL);
+    for (int i = 0; i < shader_cache_size; i++) {
+        lru_add_free(&r->shader_cache, &r->shader_cache_entries[i].node);
+    }
+    r->shader_cache.init_node = shader_cache_entry_init;
+    r->shader_cache.compare_nodes = shader_cache_entry_compare;
+    r->shader_cache.post_node_evict = shader_cache_entry_post_evict;
+}
+
+static void shader_cache_finalize(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    lru_flush(&r->shader_cache);
+    g_free(r->shader_cache_entries);
+    r->shader_cache_entries = NULL;
+}
+
+static ShaderBinding *gen_shaders(PGRAPHState *pg, ShaderState *state)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    uint64_t hash = fast_hash((void *)state, sizeof(*state));
+    LruNode *node = lru_lookup(&r->shader_cache, hash, state);
+    ShaderBinding *snode = container_of(node, ShaderBinding, node);
+
+    NV2A_VK_DPRINTF("shader state hash: %016lx, %p", hash, snode);
+
+    if (!snode->initialized) {
+        NV2A_VK_DPRINTF("cache miss");
+        nv2a_profile_inc_counter(NV2A_PROF_SHADER_GEN);
+
+        char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
+        if (previous_numeric_locale) {
+            previous_numeric_locale = g_strdup(previous_numeric_locale);
+        }
+
+        /* Ensure numeric values are printed with '.' radix, no grouping */
+        setlocale(LC_NUMERIC, "C");
+
+        MString *geometry_shader_code = pgraph_gen_geom_glsl(
+            state->polygon_front_mode, state->polygon_back_mode,
+            state->primitive_mode, state->smooth_shading, true);
+        if (geometry_shader_code) {
+            NV2A_VK_DPRINTF("geometry shader: \n%s",
+                            mstring_get_str(geometry_shader_code));
+            snode->geometry = pgraph_vk_create_shader_module_from_glsl(
+                r, VK_SHADER_STAGE_GEOMETRY_BIT,
+                mstring_get_str(geometry_shader_code));
+            mstring_unref(geometry_shader_code);
+        } else {
+            snode->geometry = NULL;
+        }
+
+        MString *vertex_shader_code =
+            pgraph_gen_vsh_glsl(state, geometry_shader_code != NULL);
+        NV2A_VK_DPRINTF("vertex shader: \n%s",
+                        mstring_get_str(vertex_shader_code));
+        snode->vertex = pgraph_vk_create_shader_module_from_glsl(
+            r, VK_SHADER_STAGE_VERTEX_BIT,
+            mstring_get_str(vertex_shader_code));
+        mstring_unref(vertex_shader_code);
+
+        MString *fragment_shader_code = pgraph_gen_psh_glsl(state->psh);
+        NV2A_VK_DPRINTF("fragment shader: \n%s",
+                        mstring_get_str(fragment_shader_code));
+        snode->fragment = pgraph_vk_create_shader_module_from_glsl(
+            r, VK_SHADER_STAGE_FRAGMENT_BIT,
+            mstring_get_str(fragment_shader_code));
+        mstring_unref(fragment_shader_code);
+
+        if (previous_numeric_locale) {
+            setlocale(LC_NUMERIC, previous_numeric_locale);
+            g_free(previous_numeric_locale);
+        }
+
+        update_shader_constant_locations(snode);
+
+        snode->initialized = true;
+    }
+
+    return snode;
+}
+
+static void update_uniform_attr_values(PGRAPHState *pg, ShaderBinding *binding)
+{
+    float values[NV2A_VERTEXSHADER_ATTRIBUTES][4];
+    int num_uniform_attrs = 0;
+
+    pgraph_get_inline_values(pg, binding->state.uniform_attrs, values,
+                             &num_uniform_attrs);
+
+    if (num_uniform_attrs > 0) {
+        uniform1fv(&binding->vertex->uniforms, binding->uniform_attrs_loc,
+                   num_uniform_attrs * 4, &values[0][0]);
+    }
+}
+
+// FIXME: Move to common
+static void shader_update_constants(PGRAPHState *pg, ShaderBinding *binding,
+                                    bool binding_changed, bool vertex_program,
+                                    bool fixed_function)
+{
+    ShaderState *state = &binding->state;
+    int i, j;
+
+    /* update combiner constants */
+    for (i = 0; i < 9; i++) {
+        uint32_t constant[2];
+        if (i == 8) {
+            /* final combiner */
+            constant[0] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR0);
+            constant[1] = pgraph_reg_r(pg, NV_PGRAPH_SPECFOGFACTOR1);
+        } else {
+            constant[0] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR0 + i * 4);
+            constant[1] = pgraph_reg_r(pg, NV_PGRAPH_COMBINEFACTOR1 + i * 4);
+        }
+
+        for (j = 0; j < 2; j++) {
+            GLint loc = binding->psh_constant_loc[i][j];
+            if (loc != -1) {
+                float value[4];
+                pgraph_argb_pack32_to_rgba_float(constant[j], value);
+                uniform1fv(&binding->fragment->uniforms, loc, 4, value);
+            }
+        }
+    }
+    if (binding->alpha_ref_loc != -1) {
+        float alpha_ref = GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_CONTROL_0),
+                                   NV_PGRAPH_CONTROL_0_ALPHAREF) /
+                          255.0;
+        uniform1f(&binding->fragment->uniforms, binding->alpha_ref_loc,
+                         alpha_ref);
+    }
+
+
+    /* For each texture stage */
+    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        int loc;
+
+        /* Bump luminance only during stages 1 - 3 */
+        if (i > 0) {
+            loc = binding->bump_mat_loc[i];
+            if (loc != -1) {
+                uint32_t m_u32[4];
+                m_u32[0] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT00 + 4 * (i - 1));
+                m_u32[1] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT01 + 4 * (i - 1));
+                m_u32[2] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT10 + 4 * (i - 1));
+                m_u32[3] = pgraph_reg_r(pg, NV_PGRAPH_BUMPMAT11 + 4 * (i - 1));
+                float m[4];
+                m[0] = *(float*)&m_u32[0];
+                m[1] = *(float*)&m_u32[1];
+                m[2] = *(float*)&m_u32[2];
+                m[3] = *(float*)&m_u32[3];
+                uniformMatrix2fv(&binding->fragment->uniforms, loc, m);
+            }
+            loc = binding->bump_scale_loc[i];
+            if (loc != -1) {
+                uint32_t v =
+                    pgraph_reg_r(pg, NV_PGRAPH_BUMPSCALE1 + (i - 1) * 4);
+                uniform1f(&binding->fragment->uniforms, loc,
+                                 *(float *)&v);
+            }
+            loc = binding->bump_offset_loc[i];
+            if (loc != -1) {
+                uint32_t v =
+                    pgraph_reg_r(pg, NV_PGRAPH_BUMPOFFSET1 + (i - 1) * 4);
+                uniform1f(&binding->fragment->uniforms, loc,
+                                 *(float *)&v);
+            }
+        }
+
+        loc = binding->tex_scale_loc[i];
+        if (loc != -1) {
+            assert(pg->vk_renderer_state->texture_bindings[i] != NULL);
+            float scale = pg->vk_renderer_state->texture_bindings[i]->key.scale;
+            BasicColorFormatInfo f_basic = kelvin_color_format_info_map[pg->vk_renderer_state->texture_bindings[i]->key.state.color_format];
+            if (!f_basic.linear) {
+                scale = 1.0;
+            }
+            uniform1f(&binding->fragment->uniforms, loc, scale);
+        }
+    }
+
+    if (binding->fog_color_loc != -1) {
+        uint32_t fog_color = pgraph_reg_r(pg, NV_PGRAPH_FOGCOLOR);
+        uniform4f(&binding->fragment->uniforms, binding->fog_color_loc,
+                         GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_RED) / 255.0,
+                         GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_GREEN) / 255.0,
+                         GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_BLUE) / 255.0,
+                         GET_MASK(fog_color, NV_PGRAPH_FOGCOLOR_ALPHA) / 255.0);
+    }
+    if (binding->fog_param_loc != -1) {
+        uint32_t v[2];
+        v[0] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM0);
+        v[1] = pgraph_reg_r(pg, NV_PGRAPH_FOGPARAM1);
+        uniform2f(&binding->vertex->uniforms,
+                         binding->fog_param_loc, *(float *)&v[0],
+                         *(float *)&v[1]);
+    }
+
+    float zmax;
+    switch (pg->surface_shape.zeta_format) {
+    case NV097_SET_SURFACE_FORMAT_ZETA_Z16:
+        zmax = pg->surface_shape.z_format ? f16_max : (float)0xFFFF;
+        break;
+    case NV097_SET_SURFACE_FORMAT_ZETA_Z24S8:
+        zmax = pg->surface_shape.z_format ? f24_max : (float)0xFFFFFF;
+        break;
+    default:
+        assert(0);
+    }
+
+    if (fixed_function) {
+        /* update lighting constants */
+        struct {
+            uint32_t *v;
+            int locs;
+            size_t len;
+        } lighting_arrays[] = {
+            { &pg->ltctxa[0][0], binding->ltctxa_loc, NV2A_LTCTXA_COUNT },
+            { &pg->ltctxb[0][0], binding->ltctxb_loc, NV2A_LTCTXB_COUNT },
+            { &pg->ltc1[0][0], binding->ltc1_loc, NV2A_LTC1_COUNT },
+        };
+
+        for (i = 0; i < ARRAY_SIZE(lighting_arrays); i++) {
+            uniform1iv(
+                &binding->vertex->uniforms, lighting_arrays[i].locs,
+                lighting_arrays[i].len * 4, (void *)lighting_arrays[i].v);
+        }
+
+        for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
+            int loc = binding->light_infinite_half_vector_loc[i];
+            if (loc != -1) {
+                uniform1fv(&binding->vertex->uniforms, loc, 3,
+                                 pg->light_infinite_half_vector[i]);
+            }
+            loc = binding->light_infinite_direction_loc[i];
+            if (loc != -1) {
+                uniform1fv(&binding->vertex->uniforms, loc, 3,
+                                 pg->light_infinite_direction[i]);
+            }
+
+            loc = binding->light_local_position_loc[i];
+            if (loc != -1) {
+                uniform1fv(&binding->vertex->uniforms, loc, 3,
+                                 pg->light_local_position[i]);
+            }
+            loc = binding->light_local_attenuation_loc[i];
+            if (loc != -1) {
+                uniform1fv(&binding->vertex->uniforms, loc, 3,
+                                 pg->light_local_attenuation[i]);
+            }
+        }
+
+        /* estimate the viewport by assuming it matches the surface ... */
+        unsigned int aa_width = 1, aa_height = 1;
+        pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
+
+        float m11 = 0.5 * (pg->surface_binding_dim.width / aa_width);
+        float m22 = -0.5 * (pg->surface_binding_dim.height / aa_height);
+        float m33 = zmax;
+        float m41 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][0];
+        float m42 = *(float *)&pg->vsh_constants[NV_IGRAPH_XF_XFCTX_VPOFF][1];
+
+        float invViewport[16] = {
+            1.0 / m11, 0,  0, 0,         0, 1.0 / m22,        0,
+            0,         0,  0, 1.0 / m33, 0, -1.0 + m41 / m11, 1.0 + m42 / m22,
+            0,         1.0
+        };
+
+        if (binding->inv_viewport_loc != -1) {
+            uniformMatrix4fv(&binding->vertex->uniforms,
+                                    binding->inv_viewport_loc, &invViewport[0]);
+        }
+    }
+
+    /* update vertex program constants */
+    uniform1iv(&binding->vertex->uniforms, binding->vsh_constant_loc,
+               NV2A_VERTEXSHADER_CONSTANTS * 4, (void *)pg->vsh_constants);
+
+    if (binding->surface_size_loc != -1) {
+        unsigned int aa_width = 1, aa_height = 1;
+        pgraph_apply_anti_aliasing_factor(pg, &aa_width, &aa_height);
+        uniform2f(&binding->vertex->uniforms, binding->surface_size_loc,
+                         pg->surface_binding_dim.width / aa_width,
+                         pg->surface_binding_dim.height / aa_height);
+    }
+
+    if (binding->clip_range_loc != -1) {
+        uint32_t v[2];
+        v[0] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMIN);
+        v[1] = pgraph_reg_r(pg, NV_PGRAPH_ZCLIPMAX);
+        float zclip_min = *(float *)&v[0] / zmax * 2.0 - 1.0;
+        float zclip_max = *(float *)&v[1] / zmax * 2.0 - 1.0;
+        uniform4f(&binding->vertex->uniforms, binding->clip_range_loc, 0,
+                         zmax, zclip_min, zclip_max);
+    }
+
+    /* Clipping regions */
+    unsigned int max_gl_width = pg->surface_binding_dim.width;
+    unsigned int max_gl_height = pg->surface_binding_dim.height;
+    pgraph_apply_scaling_factor(pg, &max_gl_width, &max_gl_height);
+
+    uint32_t clip_regions[8][4];
+
+    for (i = 0; i < 8; i++) {
+        uint32_t x = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPX0 + i * 4);
+        unsigned int x_min = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMIN);
+        unsigned int x_max = GET_MASK(x, NV_PGRAPH_WINDOWCLIPX0_XMAX) + 1;
+        uint32_t y = pgraph_reg_r(pg, NV_PGRAPH_WINDOWCLIPY0 + i * 4);
+        unsigned int y_min = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMIN);
+        unsigned int y_max = GET_MASK(y, NV_PGRAPH_WINDOWCLIPY0_YMAX) + 1;
+        pgraph_apply_anti_aliasing_factor(pg, &x_min, &y_min);
+        pgraph_apply_anti_aliasing_factor(pg, &x_max, &y_max);
+
+        pgraph_apply_scaling_factor(pg, &x_min, &y_min);
+        pgraph_apply_scaling_factor(pg, &x_max, &y_max);
+
+        clip_regions[i][0] = x_min;
+        clip_regions[i][1] = y_min;
+        clip_regions[i][2] = x_max;
+        clip_regions[i][3] = y_max;
+    }
+    uniform1iv(&binding->fragment->uniforms, binding->clip_region_loc,
+                     8 * 4, (void *)clip_regions);
+
+    if (binding->material_alpha_loc != -1) {
+        uniform1f(&binding->vertex->uniforms, binding->material_alpha_loc,
+                         pg->material_alpha);
+    }
+
+    if (!state->use_push_constants_for_uniform_attrs && state->uniform_attrs) {
+        update_uniform_attr_values(pg, binding);
+    }
+}
+
+// Quickly check PGRAPH state to see if any registers have changed that
+// necessitate a full shader state inspection.
+static bool check_shaders_dirty(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    if (!r->shader_binding) {
+        return true;
+    }
+    if (pg->program_data_dirty) {
+        return true;
+    }
+
+    int num_stages = pgraph_reg_r(pg, NV_PGRAPH_COMBINECTL) & 0xFF;
+    for (int i = 0; i < num_stages; i++) {
+        if (pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAI0 + i * 4) ||
+            pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINEALPHAO0 + i * 4) ||
+            pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORI0 + i * 4) ||
+            pgraph_is_reg_dirty(pg, NV_PGRAPH_COMBINECOLORO0 + i * 4)) {
+            return true;
+        }
+    }
+
+    unsigned int regs[] = {
+        NV_PGRAPH_COMBINECTL,
+        NV_PGRAPH_COMBINESPECFOG0,
+        NV_PGRAPH_COMBINESPECFOG1,
+        NV_PGRAPH_CONTROL_0,
+        NV_PGRAPH_CONTROL_3,
+        NV_PGRAPH_CSV0_C,
+        NV_PGRAPH_CSV0_D,
+        NV_PGRAPH_CSV1_A,
+        NV_PGRAPH_CSV1_B,
+        NV_PGRAPH_POINTSIZE,
+        NV_PGRAPH_SETUPRASTER,
+        NV_PGRAPH_SHADERCLIPMODE,
+        NV_PGRAPH_SHADERCTL,
+        NV_PGRAPH_SHADERPROG,
+        NV_PGRAPH_SHADOWCTL,
+    };
+    for (int i = 0; i < ARRAY_SIZE(regs); i++) {
+        if (pgraph_is_reg_dirty(pg, regs[i])) {
+            return true;
+        }
+    }
+
+    ShaderState *state = &r->shader_binding->state;
+    if (pg->uniform_attrs != state->uniform_attrs ||
+        pg->swizzle_attrs != state->swizzle_attrs ||
+        pg->compressed_attrs != state->compressed_attrs ||
+        pg->primitive_mode != state->primitive_mode ||
+        pg->surface_scale_factor != state->surface_scale_factor) {
+        return true;
+    }
+
+    // Textures
+    for (int i = 0; i < 4; i++) {
+        if (pg->texture_matrix_enable[i] != pg->vk_renderer_state->shader_binding->state.texture_matrix_enable[i] ||
+            pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXCTL0_0 + i * 4) ||
+            pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFILTER0 + i * 4) ||
+            pgraph_is_reg_dirty(pg, NV_PGRAPH_TEXFMT0 + i * 4)) {
+            return true;
+        }
+    }
+
+    nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND_NOTDIRTY);
+
+    return false;
+}
+
+void pgraph_vk_bind_shaders(PGRAPHState *pg)
+{
+    NV2A_VK_DGROUP_BEGIN("%s", __func__);
+
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    r->shader_bindings_changed = false;
+
+    if (check_shaders_dirty(pg)) {
+        ShaderState new_state;
+        memset(&new_state, 0, sizeof(ShaderState));
+        new_state = pgraph_get_shader_state(pg);
+        new_state.vulkan = true;
+        new_state.psh.vulkan = true;
+        new_state.use_push_constants_for_uniform_attrs =
+            (r->device_props.limits.maxPushConstantsSize >=
+             MAX_UNIFORM_ATTR_VALUES_SIZE);
+
+        if (!r->shader_binding || memcmp(&r->shader_binding->state, &new_state, sizeof(ShaderState))) {
+            r->shader_binding = gen_shaders(pg, &new_state);
+            r->shader_bindings_changed = true;
+        }
+    }
+
+    // FIXME: Use dirty bits
+    pgraph_vk_update_shader_uniforms(pg);
+
+    NV2A_VK_DGROUP_END();
+}
+
+void pgraph_vk_update_shader_uniforms(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    NV2A_VK_DGROUP_BEGIN("%s", __func__);
+    nv2a_profile_inc_counter(NV2A_PROF_SHADER_BIND);
+
+    assert(r->shader_binding);
+    ShaderBinding *binding = r->shader_binding;
+    ShaderUniformLayout *layouts[] = { &binding->vertex->uniforms,
+                                        &binding->fragment->uniforms };
+    shader_update_constants(pg, r->shader_binding, true,
+                            r->shader_binding->state.vertex_program,
+                            r->shader_binding->state.fixed_function);
+
+    for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+        uint64_t hash = fast_hash(layouts[i]->allocation, layouts[i]->total_size);
+        r->uniforms_changed |= (hash != r->uniform_buffer_hashes[i]);
+        r->uniform_buffer_hashes[i] = hash;
+    }
+
+    nv2a_profile_inc_counter(r->uniforms_changed ?
+                                 NV2A_PROF_SHADER_UBO_DIRTY :
+                                 NV2A_PROF_SHADER_UBO_NOTDIRTY);
+
+    NV2A_VK_DGROUP_END();
+}
+
+void pgraph_vk_init_shaders(PGRAPHState *pg)
+{
+    pgraph_vk_init_glsl_compiler();
+    create_descriptor_pool(pg);
+    create_descriptor_set_layout(pg);
+    create_descriptor_sets(pg);
+    shader_cache_init(pg);
+}
+
+void pgraph_vk_finalize_shaders(PGRAPHState *pg)
+{
+    shader_cache_finalize(pg);
+    destroy_descriptor_sets(pg);
+    destroy_descriptor_set_layout(pg);
+    destroy_descriptor_pool(pg);
+    pgraph_vk_finalize_glsl_compiler();
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/surface-compute.c b/hw/xbox/nv2a/pgraph/vk/surface-compute.c
new file mode 100644
index 00000000000..155eaa2e854
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/surface-compute.c
@@ -0,0 +1,608 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/pgraph/pgraph.h"
+#include "qemu/fast-hash.h"
+#include "qemu/lru.h"
+#include "renderer.h"
+#include <vulkan/vulkan_core.h>
+
+// TODO: Swizzle/Unswizzle
+// TODO: Float depth format (low priority, but would be better for accuracy)
+
+// FIXME: Below pipeline creation assumes identical 3 buffer setup. For
+//        swizzle shader we will need more flexibility.
+
+const char *pack_d24_unorm_s8_uint_to_z24s8_glsl =
+    "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
+    "layout(set = 0, binding = 0) buffer DepthIn { uint depth_in[]; };\n"
+    "layout(set = 0, binding = 1) buffer StencilIn { uint stencil_in[]; };\n"
+    "layout(set = 0, binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n"
+    "uint get_input_idx(uint idx_out) {\n"
+    "    uint scale = width_in / width_out;\n"
+    "    uint y = (idx_out / width_out) * scale;\n"
+    "    uint x = (idx_out % width_out) * scale;\n"
+    "    return y * width_in + x;\n"
+    "}\n"
+    "void main() {\n"
+    "    uint idx_out = gl_GlobalInvocationID.x;\n"
+    "    uint idx_in = get_input_idx(idx_out);\n"
+    "    uint depth_value = depth_in[idx_in];\n"
+    "    uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n"
+    "    depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n"
+    "}\n";
+
+const char *unpack_z24s8_to_d24_unorm_s8_uint_glsl =
+    "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
+    "layout(set = 0, binding = 0) buffer DepthOut { uint depth_out[]; };\n"
+    "layout(set = 0, binding = 1) buffer StencilOut { uint stencil_out[]; };\n"
+    "layout(set = 0, binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n"
+    "uint get_input_idx(uint idx_out) {\n"
+    "    uint scale = width_out / width_in;\n"
+    "    uint y = (idx_out / width_out) / scale;\n"
+    "    uint x = (idx_out % width_out) / scale;\n"
+    "    return y * width_in + x;\n"
+    "}\n"
+    "void main() {\n"
+    "    uint idx_out = gl_GlobalInvocationID.x;\n"
+    "    uint idx_in = get_input_idx(idx_out);\n"
+    "    depth_out[idx_out] = depth_stencil_in[idx_in] >> 8;\n"
+    "    if (idx_out % 4 == 0) {\n"
+    "       uint stencil_value = 0;\n"
+    "       for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels
+    "           uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n"
+    "           stencil_value |= v << (i * 8);\n"
+    "       }\n"
+    "       stencil_out[idx_out / 4] = stencil_value;\n"
+    "    }\n"
+    "}\n";
+
+const char *pack_d32_sfloat_s8_uint_to_z24s8_glsl =
+    "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
+    "layout(set = 0, binding = 0) buffer DepthIn { float depth_in[]; };\n"
+    "layout(set = 0, binding = 1) buffer StencilIn { uint stencil_in[]; };\n"
+    "layout(set = 0, binding = 2) buffer DepthStencilOut { uint depth_stencil_out[]; };\n"
+    "uint get_input_idx(uint idx_out) {\n"
+    "    uint scale = width_in / width_out;\n"
+    "    uint y = (idx_out / width_out) * scale;\n"
+    "    uint x = (idx_out % width_out) * scale;\n"
+    "    return y * width_in + x;\n"
+    "}\n"
+    "void main() {\n"
+    "    uint idx_out = gl_GlobalInvocationID.x;\n"
+    "    uint idx_in = get_input_idx(idx_out);\n"
+    "    uint depth_value = int(depth_in[idx_in] * float(0xffffff));\n"
+    "    uint stencil_value = (stencil_in[idx_in / 4] >> ((idx_in % 4) * 8)) & 0xff;\n"
+    "    depth_stencil_out[idx_out] = depth_value << 8 | stencil_value;\n"
+    "}\n";
+
+const char *unpack_z24s8_to_d32_sfloat_s8_uint_glsl =
+    "layout(push_constant) uniform PushConstants { uint width_in, width_out; };\n"
+    "layout(set = 0, binding = 0) buffer DepthOut { float depth_out[]; };\n"
+    "layout(set = 0, binding = 1) buffer StencilOut { uint stencil_out[]; };\n"
+    "layout(set = 0, binding = 2) buffer DepthStencilIn { uint depth_stencil_in[]; };\n"
+    "uint get_input_idx(uint idx_out) {\n"
+    "    uint scale = width_out / width_in;\n"
+    "    uint y = (idx_out / width_out) / scale;\n"
+    "    uint x = (idx_out % width_out) / scale;\n"
+    "    return y * width_in + x;\n"
+    "}\n"
+    "void main() {\n"
+    "    uint idx_out = gl_GlobalInvocationID.x;\n"
+    "    uint idx_in = get_input_idx(idx_out);\n"
+    "    depth_out[idx_out] = float(depth_stencil_in[idx_in] >> 8) / float(0xffffff);\n"
+    "    if (idx_out % 4 == 0) {\n"
+    "       uint stencil_value = 0;\n"
+    "       for (int i = 0; i < 4; i++) {\n" // Include next 3 pixels
+    "           uint v = depth_stencil_in[get_input_idx(idx_out + i)] & 0xff;\n"
+    "           stencil_value |= v << (i * 8);\n"
+    "       }\n"
+    "       stencil_out[idx_out / 4] = stencil_value;\n"
+    "    }\n"
+    "}\n";
+
+static gchar *get_compute_shader_glsl(VkFormat host_fmt, bool pack,
+                                      int workgroup_size)
+{
+    const char *template;
+
+    switch (host_fmt) {
+    case VK_FORMAT_D24_UNORM_S8_UINT:
+        template = pack ? pack_d24_unorm_s8_uint_to_z24s8_glsl :
+                          unpack_z24s8_to_d24_unorm_s8_uint_glsl;
+        break;
+    case VK_FORMAT_D32_SFLOAT_S8_UINT:
+        template = pack ? pack_d32_sfloat_s8_uint_to_z24s8_glsl :
+                          unpack_z24s8_to_d32_sfloat_s8_uint_glsl;
+        break;
+    default:
+        assert(!"Unsupported host fmt");
+        break;
+    }
+    assert(template);
+
+    gchar *glsl = g_strdup_printf(
+        "#version 450\n"
+        "layout(local_size_x = %d, local_size_y = 1, local_size_z = 1) in;\n"
+        "%s", workgroup_size, template);
+    assert(glsl);
+
+    return glsl;
+}
+
+static void create_descriptor_pool(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkDescriptorPoolSize pool_sizes[] = {
+        {
+            .type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .descriptorCount = 3 * ARRAY_SIZE(r->compute.descriptor_sets),
+        },
+    };
+
+    VkDescriptorPoolCreateInfo pool_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
+        .poolSizeCount = ARRAY_SIZE(pool_sizes),
+        .pPoolSizes = pool_sizes,
+        .maxSets = ARRAY_SIZE(r->compute.descriptor_sets),
+        .flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT,
+    };
+    VK_CHECK(vkCreateDescriptorPool(r->device, &pool_info, NULL,
+                                    &r->compute.descriptor_pool));
+}
+
+static void destroy_descriptor_pool(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkDestroyDescriptorPool(r->device, r->compute.descriptor_pool, NULL);
+    r->compute.descriptor_pool = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_set_layout(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    const int num_buffers = 3;
+
+    VkDescriptorSetLayoutBinding bindings[num_buffers];
+    for (int i = 0; i < num_buffers; i++) {
+        bindings[i] = (VkDescriptorSetLayoutBinding){
+            .binding = i,
+            .descriptorCount = 1,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        };
+    }
+    VkDescriptorSetLayoutCreateInfo layout_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
+        .bindingCount = ARRAY_SIZE(bindings),
+        .pBindings = bindings,
+    };
+    VK_CHECK(vkCreateDescriptorSetLayout(r->device, &layout_info, NULL,
+                                         &r->compute.descriptor_set_layout));
+}
+
+static void destroy_descriptor_set_layout(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkDestroyDescriptorSetLayout(r->device, r->compute.descriptor_set_layout,
+                                 NULL);
+    r->compute.descriptor_set_layout = VK_NULL_HANDLE;
+}
+
+static void create_descriptor_sets(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkDescriptorSetLayout layouts[ARRAY_SIZE(r->compute.descriptor_sets)];
+    for (int i = 0; i < ARRAY_SIZE(layouts); i++) {
+        layouts[i] = r->compute.descriptor_set_layout;
+    }
+    VkDescriptorSetAllocateInfo alloc_info = {
+        .sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
+        .descriptorPool = r->compute.descriptor_pool,
+        .descriptorSetCount = ARRAY_SIZE(r->compute.descriptor_sets),
+        .pSetLayouts = layouts,
+    };
+    VK_CHECK(vkAllocateDescriptorSets(r->device, &alloc_info,
+                                      r->compute.descriptor_sets));
+}
+
+static void destroy_descriptor_sets(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    vkFreeDescriptorSets(r->device, r->compute.descriptor_pool,
+                         ARRAY_SIZE(r->compute.descriptor_sets),
+                         r->compute.descriptor_sets);
+    for (int i = 0; i < ARRAY_SIZE(r->compute.descriptor_sets); i++) {
+        r->compute.descriptor_sets[i] = VK_NULL_HANDLE;
+    }
+}
+
+static void create_compute_pipeline_layout(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkPushConstantRange push_constant_range = {
+        .stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
+        .size = 2 * sizeof(uint32_t),
+    };
+    VkPipelineLayoutCreateInfo pipeline_layout_info = {
+        .sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
+        .setLayoutCount = 1,
+        .pSetLayouts = &r->compute.descriptor_set_layout,
+        .pushConstantRangeCount = 1,
+        .pPushConstantRanges = &push_constant_range,
+    };
+    VK_CHECK(vkCreatePipelineLayout(r->device, &pipeline_layout_info, NULL,
+                                    &r->compute.pipeline_layout));
+}
+
+static void destroy_compute_pipeline_layout(PGRAPHVkState *r)
+{
+    vkDestroyPipelineLayout(r->device, r->compute.pipeline_layout, NULL);
+    r->compute.pipeline_layout = VK_NULL_HANDLE;
+}
+
+static VkPipeline create_compute_pipeline(PGRAPHVkState *r, const char *glsl)
+{
+    ShaderModuleInfo *module = pgraph_vk_create_shader_module_from_glsl(
+        r, VK_SHADER_STAGE_COMPUTE_BIT, glsl);
+
+    VkComputePipelineCreateInfo pipeline_info = {
+        .sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
+        .layout = r->compute.pipeline_layout,
+        .stage =
+            (VkPipelineShaderStageCreateInfo){
+                .sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
+                .stage = VK_SHADER_STAGE_COMPUTE_BIT,
+                .pName = "main",
+                .module = module->module,
+            },
+    };
+    VkPipeline pipeline;
+    VK_CHECK(vkCreateComputePipelines(r->device, r->vk_pipeline_cache, 1,
+                                       &pipeline_info, NULL,
+                                       &pipeline));
+
+    pgraph_vk_destroy_shader_module(r, module);
+
+    return pipeline;
+}
+
+static void update_descriptor_sets(PGRAPHState *pg,
+                                   VkDescriptorBufferInfo *buffers, int count)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(count == 3);
+    VkWriteDescriptorSet descriptor_writes[3];
+
+    assert(r->compute.descriptor_set_index <
+           ARRAY_SIZE(r->compute.descriptor_sets));
+
+    for (int i = 0; i < count; i++) {
+        descriptor_writes[i] = (VkWriteDescriptorSet){
+            .sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
+            .dstSet =
+                r->compute.descriptor_sets[r->compute.descriptor_set_index],
+            .dstBinding = i,
+            .dstArrayElement = 0,
+            .descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+            .descriptorCount = 1,
+            .pBufferInfo = &buffers[i],
+        };
+    }
+    vkUpdateDescriptorSets(r->device, count, descriptor_writes, 0, NULL);
+
+    r->compute.descriptor_set_index += 1;
+}
+
+bool pgraph_vk_compute_needs_finish(PGRAPHVkState *r)
+{
+    bool need_descriptor_write_reset = (r->compute.descriptor_set_index >=
+                                        ARRAY_SIZE(r->compute.descriptor_sets));
+
+    return need_descriptor_write_reset;
+}
+
+void pgraph_vk_compute_finish_complete(PGRAPHVkState *r)
+{
+    r->compute.descriptor_set_index = 0;
+}
+
+static int get_workgroup_size_for_output_units(PGRAPHVkState *r, int output_units)
+{
+    int group_size = 1024;
+
+    // FIXME: Smarter workgroup size calculation could factor in multiple
+    //        submissions. For now we will just pick the highest number that
+    //        evenly divides output_units.
+
+    while (group_size > 1) {
+        if (group_size > r->device_props.limits.maxComputeWorkGroupSize[0]) {
+            continue;
+        }
+        if (output_units % group_size == 0) {
+            break;
+        }
+        group_size /= 2;
+    }
+
+    return group_size;
+}
+
+static ComputePipeline *get_compute_pipeline(PGRAPHVkState *r, VkFormat host_fmt, bool pack, int output_units)
+{
+    int workgroup_size = get_workgroup_size_for_output_units(r, output_units);
+
+    ComputePipelineKey key;
+    memset(&key, 0, sizeof(key));
+
+    key.host_fmt = host_fmt;
+    key.pack = pack;
+    key.workgroup_size = workgroup_size;
+
+    LruNode *node = lru_lookup(&r->compute.pipeline_cache,
+                      fast_hash((void *)&key, sizeof(key)), &key);
+    ComputePipeline *pipeline = container_of(node, ComputePipeline, node);
+
+    assert(pipeline);
+
+    return pipeline;
+}
+
+//
+// Pack depth+stencil into NV097_SET_SURFACE_FORMAT_ZETA_Z24S8
+// formatted buffer with depth in bits 31-8 and stencil in bits 7-0.
+//
+void pgraph_vk_pack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
+                                  VkCommandBuffer cmd, VkBuffer src,
+                                  VkBuffer dst, bool downscale)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    unsigned int input_width = surface->width, input_height = surface->height;
+    pgraph_apply_scaling_factor(pg, &input_width, &input_height);
+
+    unsigned int output_width = surface->width, output_height = surface->height;
+    if (!downscale) {
+        pgraph_apply_scaling_factor(pg, &output_width, &output_height);
+    }
+
+    size_t depth_bytes_per_pixel = 4;
+    size_t depth_size = input_width * input_height * depth_bytes_per_pixel;
+
+    size_t stencil_bytes_per_pixel = 1;
+    size_t stencil_size = input_width * input_height * stencil_bytes_per_pixel;
+
+    size_t output_bytes_per_pixel = 4;
+    size_t output_size = output_width * output_height * output_bytes_per_pixel;
+
+    VkDescriptorBufferInfo buffers[] = {
+        {
+            .buffer = src,
+            .offset = 0,
+            .range = depth_size,
+        },
+        {
+            .buffer = src,
+            .offset = ROUND_UP(
+                depth_size,
+                r->device_props.limits.minStorageBufferOffsetAlignment),
+            .range = stencil_size,
+        },
+        {
+            .buffer = dst,
+            .offset = 0,
+            .range = output_size,
+        },
+    };
+
+    update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers));
+
+    size_t output_size_in_units = output_width * output_height;
+    ComputePipeline *pipeline = get_compute_pipeline(
+        r, surface->host_fmt.vk_format, true, output_size_in_units);
+
+    size_t workgroup_size_in_units = pipeline->key.workgroup_size;
+    assert(output_size_in_units % workgroup_size_in_units == 0);
+    size_t group_count = output_size_in_units / workgroup_size_in_units;
+
+    assert(r->device_props.limits.maxComputeWorkGroupSize[0] >= workgroup_size_in_units);
+    assert(r->device_props.limits.maxComputeWorkGroupCount[0] >= group_count);
+
+    // FIXME: Smarter workgroup scaling
+
+    pgraph_vk_begin_debug_marker(r, cmd, RGBA_PINK, __func__);
+    vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline);
+    vkCmdBindDescriptorSets(
+        cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->compute.pipeline_layout, 0, 1,
+        &r->compute.descriptor_sets[r->compute.descriptor_set_index - 1], 0,
+        NULL);
+
+    uint32_t push_constants[2] = { input_width, output_width };
+    assert(sizeof(push_constants) == 8);
+    vkCmdPushConstants(cmd, r->compute.pipeline_layout,
+                       VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
+                       push_constants);
+
+    // FIXME: Check max group count
+
+    vkCmdDispatch(cmd, group_count, 1, 1);
+    pgraph_vk_end_debug_marker(r, cmd);
+}
+
+void pgraph_vk_unpack_depth_stencil(PGRAPHState *pg, SurfaceBinding *surface,
+                                    VkCommandBuffer cmd, VkBuffer src,
+                                    VkBuffer dst)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    unsigned int input_width = surface->width, input_height = surface->height;
+
+    unsigned int output_width = surface->width, output_height = surface->height;
+    pgraph_apply_scaling_factor(pg, &output_width, &output_height);
+
+    size_t depth_bytes_per_pixel = 4;
+    size_t depth_size = output_width * output_height * depth_bytes_per_pixel;
+
+    size_t stencil_bytes_per_pixel = 1;
+    size_t stencil_size = output_width * output_height * stencil_bytes_per_pixel;
+
+    size_t input_bytes_per_pixel = 4;
+    size_t input_size = input_width * input_height * input_bytes_per_pixel;
+
+    VkDescriptorBufferInfo buffers[] = {
+        {
+            .buffer = dst,
+            .offset = 0,
+            .range = depth_size,
+        },
+        {
+            .buffer = dst,
+            .offset = ROUND_UP(
+                depth_size,
+                r->device_props.limits.minStorageBufferOffsetAlignment),
+            .range = stencil_size,
+        },
+        {
+            .buffer = src,
+            .offset = 0,
+            .range = input_size,
+        },
+    };
+    update_descriptor_sets(pg, buffers, ARRAY_SIZE(buffers));
+
+    size_t output_size_in_units = output_width * output_height;
+    ComputePipeline *pipeline = get_compute_pipeline(
+        r, surface->host_fmt.vk_format, false, output_size_in_units);
+
+    size_t workgroup_size_in_units = pipeline->key.workgroup_size;
+    assert(output_size_in_units % workgroup_size_in_units == 0);
+    size_t group_count = output_size_in_units / workgroup_size_in_units;
+
+    assert(r->device_props.limits.maxComputeWorkGroupSize[0] >= workgroup_size_in_units);
+    assert(r->device_props.limits.maxComputeWorkGroupCount[0] >= group_count);
+
+    // FIXME: Smarter workgroup scaling
+
+    pgraph_vk_begin_debug_marker(r, cmd, RGBA_PINK, __func__);
+    vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_COMPUTE, pipeline->pipeline);
+    vkCmdBindDescriptorSets(
+        cmd, VK_PIPELINE_BIND_POINT_COMPUTE, r->compute.pipeline_layout, 0, 1,
+        &r->compute.descriptor_sets[r->compute.descriptor_set_index - 1], 0,
+        NULL);
+
+    assert(output_width >= input_width);
+    uint32_t push_constants[2] = { input_width, output_width };
+    assert(sizeof(push_constants) == 8);
+    vkCmdPushConstants(cmd, r->compute.pipeline_layout,
+                       VK_SHADER_STAGE_COMPUTE_BIT, 0, sizeof(push_constants),
+                       push_constants);
+    vkCmdDispatch(cmd, group_count, 1, 1);
+    pgraph_vk_end_debug_marker(r, cmd);
+}
+
+static void pipeline_cache_entry_init(Lru *lru, LruNode *node, void *state)
+{
+    PGRAPHVkState *r = container_of(lru, PGRAPHVkState, compute.pipeline_cache);
+    ComputePipeline *snode = container_of(node, ComputePipeline, node);
+
+    memcpy(&snode->key, state, sizeof(snode->key));
+
+    if (snode->key.workgroup_size == 1) {
+        fprintf(stderr,
+                "Warning: Needed compute shader with workgroup size = 1\n");
+    }
+
+    gchar *glsl = get_compute_shader_glsl(
+        snode->key.host_fmt, snode->key.pack, snode->key.workgroup_size);
+    assert(glsl);
+    snode->pipeline = create_compute_pipeline(r, glsl);
+    g_free(glsl);
+}
+
+static void pipeline_cache_release_node_resources(PGRAPHVkState *r, ComputePipeline *snode)
+{
+    vkDestroyPipeline(r->device, snode->pipeline, NULL);
+    snode->pipeline = VK_NULL_HANDLE;
+}
+
+static void pipeline_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+    PGRAPHVkState *r = container_of(lru, PGRAPHVkState, compute.pipeline_cache);
+    ComputePipeline *snode = container_of(node, ComputePipeline, node);
+    pipeline_cache_release_node_resources(r, snode);
+}
+
+static bool pipeline_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+    ComputePipeline *snode = container_of(node, ComputePipeline, node);
+    return memcmp(&snode->key, key, sizeof(ComputePipelineKey));
+}
+
+static void pipeline_cache_init(PGRAPHVkState *r)
+{
+    const size_t pipeline_cache_size = 100; // FIXME: Trim
+    lru_init(&r->compute.pipeline_cache);
+    r->compute.pipeline_cache_entries = g_malloc_n(pipeline_cache_size, sizeof(ComputePipeline));
+    assert(r->compute.pipeline_cache_entries != NULL);
+    for (int i = 0; i < pipeline_cache_size; i++) {
+        lru_add_free(&r->compute.pipeline_cache, &r->compute.pipeline_cache_entries[i].node);
+    }
+    r->compute.pipeline_cache.init_node = pipeline_cache_entry_init;
+    r->compute.pipeline_cache.compare_nodes = pipeline_cache_entry_compare;
+    r->compute.pipeline_cache.post_node_evict = pipeline_cache_entry_post_evict;
+}
+
+static void pipeline_cache_finalize(PGRAPHVkState *r)
+{
+    lru_flush(&r->compute.pipeline_cache);
+    g_free(r->compute.pipeline_cache_entries);
+    r->compute.pipeline_cache_entries = NULL;
+}
+
+void pgraph_vk_init_compute(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    create_descriptor_pool(pg);
+    create_descriptor_set_layout(pg);
+    create_descriptor_sets(pg);
+    create_compute_pipeline_layout(pg);
+    pipeline_cache_init(r);
+}
+
+void pgraph_vk_finalize_compute(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(!r->in_command_buffer);
+
+    pipeline_cache_finalize(r);
+    destroy_compute_pipeline_layout(r);
+    destroy_descriptor_sets(pg);
+    destroy_descriptor_set_layout(pg);
+    destroy_descriptor_pool(pg);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/surface.c b/hw/xbox/nv2a/pgraph/vk/surface.c
new file mode 100644
index 00000000000..f7f68bb0d4d
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/surface.c
@@ -0,0 +1,1726 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * Based on GL implementation:
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hw/xbox/nv2a/nv2a_int.h"
+#include "hw/xbox/nv2a/pgraph/swizzle.h"
+#include "qemu/compiler.h"
+#include "ui/xemu-settings.h"
+#include "renderer.h"
+
+const int num_invalid_surfaces_to_keep = 10;  // FIXME: Make automatic
+const int max_surface_frame_time_delta = 5;
+
+void pgraph_vk_set_surface_scale_factor(NV2AState *d, unsigned int scale)
+{
+    g_config.display.quality.surface_scale = scale < 1 ? 1 : scale;
+
+    qemu_mutex_lock(&d->pfifo.lock);
+    qatomic_set(&d->pfifo.halt, true);
+    qemu_mutex_unlock(&d->pfifo.lock);
+
+    // FIXME: It's just flush
+    qemu_mutex_lock(&d->pgraph.lock);
+    qemu_event_reset(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
+    qatomic_set(&d->pgraph.vk_renderer_state->download_dirty_surfaces_pending, true);
+    qemu_mutex_unlock(&d->pgraph.lock);
+    qemu_mutex_lock(&d->pfifo.lock);
+    pfifo_kick(d);
+    qemu_mutex_unlock(&d->pfifo.lock);
+    qemu_event_wait(&d->pgraph.vk_renderer_state->dirty_surfaces_download_complete);
+
+    qemu_mutex_lock(&d->pgraph.lock);
+    qemu_event_reset(&d->pgraph.flush_complete);
+    qatomic_set(&d->pgraph.flush_pending, true);
+    qemu_mutex_unlock(&d->pgraph.lock);
+    qemu_mutex_lock(&d->pfifo.lock);
+    pfifo_kick(d);
+    qemu_mutex_unlock(&d->pfifo.lock);
+    qemu_event_wait(&d->pgraph.flush_complete);
+
+    qemu_mutex_lock(&d->pfifo.lock);
+    qatomic_set(&d->pfifo.halt, false);
+    pfifo_kick(d);
+    qemu_mutex_unlock(&d->pfifo.lock);
+}
+
+unsigned int pgraph_vk_get_surface_scale_factor(NV2AState *d)
+{
+    return d->pgraph.surface_scale_factor; // FIXME: Move internal to renderer
+}
+
+void pgraph_vk_reload_surface_scale_factor(PGRAPHState *pg)
+{
+    int factor = g_config.display.quality.surface_scale;
+    pg->surface_scale_factor = MAX(factor, 1);
+}
+
+// FIXME: Move to common
+static void get_surface_dimensions(PGRAPHState const *pg, unsigned int *width,
+                                   unsigned int *height)
+{
+    bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+    if (swizzle) {
+        *width = 1 << pg->surface_shape.log_width;
+        *height = 1 << pg->surface_shape.log_height;
+    } else {
+        *width = pg->surface_shape.clip_width;
+        *height = pg->surface_shape.clip_height;
+    }
+}
+
+// FIXME: Move to common
+static bool framebuffer_dirty(PGRAPHState const *pg)
+{
+    bool shape_changed = memcmp(&pg->surface_shape, &pg->last_surface_shape,
+                                sizeof(SurfaceShape)) != 0;
+    if (!shape_changed || (!pg->surface_shape.color_format
+            && !pg->surface_shape.zeta_format)) {
+        return false;
+    }
+    return true;
+}
+
+static void memcpy_image(void *dst, void const *src, int dst_stride,
+                         int src_stride, int height)
+{
+    if (dst_stride == src_stride) {
+        memcpy(dst, src, dst_stride * height);
+        return;
+    }
+
+    uint8_t *dst_ptr = (uint8_t *)dst;
+    uint8_t const *src_ptr = (uint8_t *)src;
+
+    size_t copy_stride = MIN(src_stride, dst_stride);
+
+    for (int i = 0; i < height; i++) {
+        memcpy(dst_ptr, src_ptr, copy_stride);
+        dst_ptr += dst_stride;
+        src_ptr += src_stride;
+    }
+}
+
+void pgraph_vk_download_surfaces_in_range_if_dirty(PGRAPHState *pg, hwaddr start, hwaddr size)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    SurfaceBinding *surface;
+
+    hwaddr end = start + size - 1;
+
+    QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+        hwaddr surf_end = surface->vram_addr + surface->size - 1;
+        bool overlapping = !(surface->vram_addr >= end || start >= surf_end);
+        if (overlapping) {
+            pgraph_vk_surface_download_if_dirty(
+                container_of(pg, NV2AState, pgraph), surface);
+        }
+    }
+}
+
+static void download_surface_to_buffer(NV2AState *d, SurfaceBinding *surface,
+                                       uint8_t *pixels)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    nv2a_profile_inc_counter(NV2A_PROF_SURF_DOWNLOAD);
+
+    bool use_compute_to_convert_depth_stencil_format =
+        surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
+        surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT;
+
+    bool no_conversion_necessary =
+        surface->color || use_compute_to_convert_depth_stencil_format ||
+        surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM;
+
+    assert(no_conversion_necessary);
+
+    bool compute_needs_finish = (use_compute_to_convert_depth_stencil_format &&
+                                 pgraph_vk_compute_needs_finish(r));
+
+    if (r->in_command_buffer &&
+        surface->draw_time >= r->command_buffer_start_time) {
+        pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_DOWN);
+    } else if (compute_needs_finish) {
+        pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
+    }
+
+    bool downscale = (pg->surface_scale_factor != 1);
+
+    trace_nv2a_pgraph_surface_download(
+        surface->color ? "COLOR" : "ZETA",
+        surface->swizzle ? "sz" : "lin", surface->vram_addr,
+        surface->width, surface->height, surface->pitch,
+        surface->fmt.bytes_per_pixel);
+
+    // Read surface into memory
+    uint8_t *gl_read_buf = pixels;
+
+    uint8_t *swizzle_buf = pixels;
+    if (surface->swizzle) {
+        // FIXME: Swizzle in shader
+        assert(pg->surface_scale_factor == 1 || downscale);
+        swizzle_buf = (uint8_t *)g_malloc(surface->size);
+        gl_read_buf = swizzle_buf;
+    }
+
+    unsigned int scaled_width = surface->width,
+                 scaled_height = surface->height;
+    pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height);
+
+    VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+    pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__);
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, surface->image, surface->host_fmt.vk_format,
+        surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+                         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+    int num_copy_regions = 1;
+    VkBufferImageCopy copy_regions[2];
+    copy_regions[0] = (VkBufferImageCopy){
+        .imageSubresource.aspectMask = surface->color ?
+                                           VK_IMAGE_ASPECT_COLOR_BIT :
+                                           VK_IMAGE_ASPECT_DEPTH_BIT,
+        .imageSubresource.layerCount = 1,
+    };
+
+    VkImage surface_image_loc;
+    if (downscale && !use_compute_to_convert_depth_stencil_format) {
+        copy_regions[0].imageExtent =
+            (VkExtent3D){ surface->width, surface->height, 1 };
+
+        if (surface->image_scratch_current_layout !=
+            VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+            pgraph_vk_transition_image_layout(
+                pg, cmd, surface->image_scratch, surface->host_fmt.vk_format,
+                surface->image_scratch_current_layout,
+                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+            surface->image_scratch_current_layout =
+                VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+        }
+
+        VkImageBlit blit_region = {
+            .srcSubresource.aspectMask = surface->host_fmt.aspect,
+            .srcSubresource.mipLevel = 0,
+            .srcSubresource.baseArrayLayer = 0,
+            .srcSubresource.layerCount = 1,
+            .srcOffsets[0] = (VkOffset3D){0, 0, 0},
+            .srcOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1},
+
+            .dstSubresource.aspectMask = surface->host_fmt.aspect,
+            .dstSubresource.mipLevel = 0,
+            .dstSubresource.baseArrayLayer = 0,
+            .dstSubresource.layerCount = 1,
+            .dstOffsets[0] = (VkOffset3D){0, 0, 0},
+            .dstOffsets[1] = (VkOffset3D){surface->width, surface->height, 1},
+        };
+
+        vkCmdBlitImage(cmd, surface->image,
+                       VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+                       surface->image_scratch,
+                       surface->image_scratch_current_layout, 1, &blit_region,
+                       surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
+
+        pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch,
+                                          surface->host_fmt.vk_format,
+                                          surface->image_scratch_current_layout,
+                                          VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+        surface->image_scratch_current_layout =
+            VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+        surface_image_loc = surface->image_scratch;
+    } else {
+        copy_regions[0].imageExtent =
+            (VkExtent3D){ scaled_width, scaled_height, 1 };
+        surface_image_loc = surface->image;
+    }
+
+    if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) {
+        size_t depth_size = scaled_width * scaled_height * 4;
+        copy_regions[num_copy_regions++] = (VkBufferImageCopy){
+            .bufferOffset = ROUND_UP(
+                depth_size,
+                r->device_props.limits.minStorageBufferOffsetAlignment),
+            .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
+            .imageSubresource.layerCount = 1,
+            .imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 },
+        };
+    }
+
+    //
+    // Copy image to staging buffer, or to compute_dst if we need to pack it
+    //
+
+    size_t downloaded_image_size = surface->host_fmt.host_bytes_per_pixel *
+                                   surface->width * surface->height;
+    assert((downloaded_image_size) <=
+           r->storage_buffers[BUFFER_STAGING_DST].buffer_size);
+
+    int copy_buffer_idx = use_compute_to_convert_depth_stencil_format ?
+                             BUFFER_COMPUTE_DST :
+                             BUFFER_STAGING_DST;
+    VkBuffer copy_buffer = r->storage_buffers[copy_buffer_idx].buffer;
+
+    VkBufferMemoryBarrier pre_copy_dst_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+        .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = copy_buffer,
+        .size = VK_WHOLE_SIZE
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                         VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                         &pre_copy_dst_barrier, 0, NULL);
+
+    vkCmdCopyImageToBuffer(cmd, surface_image_loc,
+                           VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, copy_buffer,
+                           num_copy_regions, copy_regions);
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, surface->image, surface->host_fmt.vk_format,
+        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+        surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+                         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+
+    // FIXME: Verify output of depth stencil conversion
+    // FIXME: Track current layout and only transition when required
+
+    if (use_compute_to_convert_depth_stencil_format) {
+        size_t bytes_per_pixel = 4;
+        size_t packed_size =
+            downscale ? (surface->width * surface->height * bytes_per_pixel) :
+                        (scaled_width * scaled_height * bytes_per_pixel);
+
+        //
+        // Pack the depth-stencil image into compute_src buffer
+        //
+
+        VkBufferMemoryBarrier pre_compute_src_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = copy_buffer,
+            .size = VK_WHOLE_SIZE
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                             VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
+                             1, &pre_compute_src_barrier, 0, NULL);
+
+        VkBuffer pack_buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer;
+
+        VkBufferMemoryBarrier pre_compute_dst_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+            .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = pack_buffer,
+            .size = VK_WHOLE_SIZE
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                             VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
+                             1, &pre_compute_dst_barrier, 0, NULL);
+
+        pgraph_vk_pack_depth_stencil(pg, surface, cmd, copy_buffer, pack_buffer,
+                                     downscale);
+
+        VkBufferMemoryBarrier post_compute_src_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_SHADER_READ_BIT,
+            .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = copy_buffer,
+            .size = VK_WHOLE_SIZE
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                             VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                             &post_compute_src_barrier, 0, NULL);
+
+        VkBufferMemoryBarrier post_compute_dst_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = pack_buffer,
+            .size = packed_size
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                             VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                             &post_compute_dst_barrier, 0, NULL);
+
+        //
+        // Copy packed image over to staging buffer for host download
+        //
+
+        copy_buffer = r->storage_buffers[BUFFER_STAGING_DST].buffer;
+
+        VkBufferMemoryBarrier pre_copy_dst_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+            .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = copy_buffer,
+            .size = VK_WHOLE_SIZE
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                             VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                             &pre_copy_dst_barrier, 0, NULL);
+
+        VkBufferCopy buffer_copy_region = {
+            .size = packed_size,
+        };
+        vkCmdCopyBuffer(cmd, pack_buffer, copy_buffer, 1, &buffer_copy_region);
+
+        VkBufferMemoryBarrier post_copy_src_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+            .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = pack_buffer,
+            .size = VK_WHOLE_SIZE
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                             VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                             &post_copy_src_barrier, 0, NULL);
+    }
+
+    //
+    // Download image data to host
+    //
+
+    VkBufferMemoryBarrier post_copy_dst_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_HOST_READ_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = copy_buffer,
+        .size = VK_WHOLE_SIZE
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                         VK_PIPELINE_STAGE_HOST_BIT, 0, 0, NULL, 1,
+                         &post_copy_dst_barrier, 0, NULL);
+
+    nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_1);
+    pgraph_vk_end_debug_marker(r, cmd);
+    pgraph_vk_end_single_time_commands(pg, cmd);
+
+    void *mapped_memory_ptr = NULL;
+    VK_CHECK(vmaMapMemory(r->allocator,
+                          r->storage_buffers[BUFFER_STAGING_DST].allocation,
+                          &mapped_memory_ptr));
+
+    vmaInvalidateAllocation(r->allocator,
+                            r->storage_buffers[BUFFER_STAGING_DST].allocation,
+                            0, VK_WHOLE_SIZE);
+
+    memcpy_image(gl_read_buf, mapped_memory_ptr, surface->pitch,
+                 surface->width * surface->fmt.bytes_per_pixel,
+                 surface->height);
+
+    vmaUnmapMemory(r->allocator,
+                   r->storage_buffers[BUFFER_STAGING_DST].allocation);
+
+    if (surface->swizzle) {
+        // FIXME: Swizzle in shader
+        swizzle_rect(swizzle_buf, surface->width, surface->height, pixels,
+                     surface->pitch, surface->fmt.bytes_per_pixel);
+        nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE);
+        g_free(swizzle_buf);
+    }
+}
+
+static void download_surface(NV2AState *d, SurfaceBinding *surface, bool force)
+{
+    if (!(surface->download_pending || force)) {
+        return;
+    }
+
+    // FIXME: Respect write enable at last TOU?
+
+    download_surface_to_buffer(d, surface, d->vram_ptr + surface->vram_addr);
+
+    memory_region_set_client_dirty(d->vram, surface->vram_addr,
+                                   surface->pitch * surface->height,
+                                   DIRTY_MEMORY_VGA);
+    memory_region_set_client_dirty(d->vram, surface->vram_addr,
+                                   surface->pitch * surface->height,
+                                   DIRTY_MEMORY_NV2A_TEX);
+
+    surface->download_pending = false;
+    surface->draw_dirty = false;
+}
+
+void pgraph_vk_wait_for_surface_download(SurfaceBinding *surface)
+{
+    NV2AState *d = g_nv2a;
+
+    if (qatomic_read(&surface->draw_dirty)) {
+        qemu_mutex_lock(&d->pfifo.lock);
+        qemu_event_reset(&d->pgraph.vk_renderer_state->downloads_complete);
+        qatomic_set(&surface->download_pending, true);
+        qatomic_set(&d->pgraph.vk_renderer_state->downloads_pending, true);
+        pfifo_kick(d);
+        qemu_mutex_unlock(&d->pfifo.lock);
+        qemu_event_wait(&d->pgraph.vk_renderer_state->downloads_complete);
+    }
+}
+
+void pgraph_vk_process_pending_downloads(NV2AState *d)
+{
+    PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+    SurfaceBinding *surface;
+
+    QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+        download_surface(d, surface, false);
+    }
+
+    qatomic_set(&r->downloads_pending, false);
+    qemu_event_set(&r->downloads_complete);
+}
+
+void pgraph_vk_download_dirty_surfaces(NV2AState *d)
+{
+    PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+    SurfaceBinding *surface;
+    QTAILQ_FOREACH(surface, &r->surfaces, entry) {
+        pgraph_vk_surface_download_if_dirty(d, surface);
+    }
+
+    qatomic_set(&r->download_dirty_surfaces_pending, false);
+    qemu_event_set(&r->dirty_surfaces_download_complete);
+}
+
+static void surface_access_callback(void *opaque, MemoryRegion *mr, hwaddr addr,
+                                    hwaddr len, bool write)
+{
+    SurfaceBinding *e = opaque;
+    assert(addr >= e->vram_addr);
+    hwaddr offset = addr - e->vram_addr;
+    assert(offset < e->size);
+
+    if (qatomic_read(&e->draw_dirty)) {
+        trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
+        pgraph_vk_wait_for_surface_download(e);
+    }
+
+    if (write && !qatomic_read(&e->upload_pending)) {
+        trace_nv2a_pgraph_surface_cpu_access(e->vram_addr, offset);
+        qatomic_set(&e->upload_pending, true);
+    }
+}
+
+static void register_cpu_access_callback(NV2AState *d, SurfaceBinding *surface)
+{
+    if (tcg_enabled()) {
+        qemu_mutex_unlock(&d->pgraph.lock);
+        qemu_mutex_lock_iothread();
+        mem_access_callback_insert(qemu_get_cpu(0),
+            d->vram, surface->vram_addr, surface->size,
+            &surface->access_cb, &surface_access_callback,
+            surface);
+        qemu_mutex_unlock_iothread();
+        qemu_mutex_lock(&d->pgraph.lock);
+    }
+}
+
+static void unregister_cpu_access_callback(NV2AState *d,
+                                           SurfaceBinding const *surface)
+{
+    if (tcg_enabled()) {
+        qemu_mutex_unlock(&d->pgraph.lock);
+        qemu_mutex_lock_iothread();
+        mem_access_callback_remove_by_ref(qemu_get_cpu(0), surface->access_cb);
+        qemu_mutex_unlock_iothread();
+        qemu_mutex_lock(&d->pgraph.lock);
+    }
+}
+
+static void bind_surface(PGRAPHVkState *r, SurfaceBinding *surface)
+{
+    if (surface->color) {
+        r->color_binding = surface;
+    } else {
+        r->zeta_binding = surface;
+    }
+
+    r->framebuffer_dirty = true;
+}
+
+static void unbind_surface(NV2AState *d, bool color)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    if (color) {
+        if (r->color_binding) {
+            r->color_binding = NULL;
+            r->framebuffer_dirty = true;
+        }
+    } else {
+        if (r->zeta_binding) {
+            r->zeta_binding = NULL;
+            r->framebuffer_dirty = true;
+        }
+    }
+}
+
+static void invalidate_surface(NV2AState *d, SurfaceBinding *surface)
+{
+    PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+    trace_nv2a_pgraph_surface_invalidated(surface->vram_addr);
+
+    // FIXME: We may be reading from the surface in the current command buffer!
+    // Add a detection to handle it. For now, finish to be safe.
+    pgraph_vk_finish(&d->pgraph, VK_FINISH_REASON_SURFACE_DOWN);
+
+    assert((!r->in_command_buffer ||
+            surface->draw_time < r->command_buffer_start_time) &&
+           "Surface evicted while in use!");
+
+    if (surface == r->color_binding) {
+        assert(d->pgraph.surface_color.buffer_dirty);
+        unbind_surface(d, true);
+    }
+    if (surface == r->zeta_binding) {
+        assert(d->pgraph.surface_zeta.buffer_dirty);
+        unbind_surface(d, false);
+    }
+
+    unregister_cpu_access_callback(d, surface);
+
+    QTAILQ_REMOVE(&r->surfaces, surface, entry);
+    QTAILQ_INSERT_HEAD(&r->invalid_surfaces, surface, entry);
+}
+
+static void invalidate_overlapping_surfaces(NV2AState *d,
+                                            SurfaceBinding const *surface)
+{
+    PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+    uintptr_t e_end = surface->vram_addr + surface->size - 1;
+
+    SurfaceBinding *s, *next;
+    QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
+        uintptr_t s_end = s->vram_addr + s->size - 1;
+        bool overlapping =
+            !(s->vram_addr > e_end || surface->vram_addr > s_end);
+        if (overlapping) {
+            trace_nv2a_pgraph_surface_evict_overlapping(
+                s->vram_addr, s->width, s->height,
+                s->pitch);
+            pgraph_vk_surface_download_if_dirty(d, s);
+            invalidate_surface(d, s);
+        }
+    }
+}
+
+static void surface_put(NV2AState *d, SurfaceBinding *surface)
+{
+    PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+    assert(pgraph_vk_surface_get(d, surface->vram_addr) == NULL);
+
+    invalidate_overlapping_surfaces(d, surface);
+    register_cpu_access_callback(d, surface);
+
+    QTAILQ_INSERT_HEAD(&r->surfaces, surface, entry);
+}
+
+SurfaceBinding *pgraph_vk_surface_get(NV2AState *d, hwaddr addr)
+{
+    PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+    SurfaceBinding *surface;
+    QTAILQ_FOREACH (surface, &r->surfaces, entry) {
+        if (surface->vram_addr == addr) {
+            return surface;
+        }
+    }
+
+    return NULL;
+}
+
+SurfaceBinding *pgraph_vk_surface_get_within(NV2AState *d, hwaddr addr)
+{
+    PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+    SurfaceBinding *surface;
+    QTAILQ_FOREACH (surface, &r->surfaces, entry) {
+        if (addr >= surface->vram_addr &&
+            addr < (surface->vram_addr + surface->size)) {
+            return surface;
+        }
+    }
+
+    return NULL;
+}
+
+static void set_surface_label(PGRAPHState *pg, SurfaceBinding const *surface)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    g_autofree gchar *label = g_strdup_printf(
+        "Surface %" HWADDR_PRIx "h fmt:%s,%02xh %dx%d aa:%d",
+        surface->vram_addr, surface->color ? "Color" : "Zeta",
+        surface->color ? surface->shape.color_format :
+                         surface->shape.zeta_format,
+        surface->width, surface->height, pg->surface_shape.anti_aliasing);
+
+    VkDebugUtilsObjectNameInfoEXT name_info = {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
+        .objectType = VK_OBJECT_TYPE_IMAGE,
+        .objectHandle = (uint64_t)surface->image,
+        .pObjectName = label,
+    };
+
+    if (r->debug_utils_extension_enabled) {
+        vkSetDebugUtilsObjectNameEXT(r->device, &name_info);
+    }
+    vmaSetAllocationName(r->allocator, surface->allocation, label);
+
+    if (surface->image_scratch) {
+        g_autofree gchar *label_scratch =
+            g_strdup_printf("%s (scratch)", label);
+        name_info.objectHandle = (uint64_t)surface->image_scratch;
+        name_info.pObjectName = label_scratch;
+        if (r->debug_utils_extension_enabled) {
+            vkSetDebugUtilsObjectNameEXT(r->device, &name_info);
+        }
+        vmaSetAllocationName(r->allocator, surface->allocation_scratch,
+                             label_scratch);
+    }
+}
+
+static void create_surface_image(PGRAPHState *pg, SurfaceBinding *surface)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    unsigned int width = surface->width, height = surface->height;
+    pgraph_apply_scaling_factor(pg, &width, &height);
+
+    assert(!surface->image);
+    assert(!surface->image_scratch);
+
+    NV2A_VK_DPRINTF(
+        "Creating new surface image width=%d height=%d @ %08" HWADDR_PRIx,
+        width, height, surface->vram_addr);
+
+    VkImageCreateInfo image_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .imageType = VK_IMAGE_TYPE_2D,
+        .extent.width = width,
+        .extent.height = height,
+        .extent.depth = 1,
+        .mipLevels = 1,
+        .arrayLayers = 1,
+        .format = surface->host_fmt.vk_format,
+        .tiling = VK_IMAGE_TILING_OPTIMAL,
+        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+        .usage = VK_IMAGE_USAGE_SAMPLED_BIT |
+                 VK_IMAGE_USAGE_TRANSFER_DST_BIT |
+                 VK_IMAGE_USAGE_TRANSFER_SRC_BIT | surface->host_fmt.usage,
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+    };
+
+    VmaAllocationCreateInfo alloc_create_info = {
+        .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
+    };
+
+    VK_CHECK(vmaCreateImage(r->allocator, &image_create_info,
+                            &alloc_create_info, &surface->image,
+                            &surface->allocation, NULL));
+
+    VK_CHECK(vmaCreateImage(r->allocator, &image_create_info,
+                            &alloc_create_info, &surface->image_scratch,
+                            &surface->allocation_scratch, NULL));
+    surface->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+
+    VkImageViewCreateInfo image_view_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .image = surface->image,
+        .viewType = VK_IMAGE_VIEW_TYPE_2D,
+        .format = surface->host_fmt.vk_format,
+        .subresourceRange.aspectMask = surface->host_fmt.aspect,
+        .subresourceRange.levelCount = 1,
+        .subresourceRange.layerCount = 1,
+    };
+    VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
+                               &surface->image_view));
+
+    // FIXME: Go right into main command buffer
+    VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+    pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__);
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, surface->image, surface->host_fmt.vk_format,
+        VK_IMAGE_LAYOUT_UNDEFINED,
+        surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+                         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+
+    nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_3);
+    pgraph_vk_end_debug_marker(r, cmd);
+    pgraph_vk_end_single_time_commands(pg, cmd);
+    nv2a_profile_inc_counter(NV2A_PROF_SURF_CREATE);
+}
+
+static void migrate_surface_image(SurfaceBinding *dst, SurfaceBinding *src)
+{
+    dst->image = src->image;
+    dst->image_view = src->image_view;
+    dst->allocation = src->allocation;
+    dst->image_scratch = src->image_scratch;
+    dst->image_scratch_current_layout = src->image_scratch_current_layout;
+    dst->allocation_scratch = src->allocation_scratch;
+
+    src->image = VK_NULL_HANDLE;
+    src->image_view = VK_NULL_HANDLE;
+    src->allocation = VK_NULL_HANDLE;
+    src->image_scratch = VK_NULL_HANDLE;
+    src->image_scratch_current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+    src->allocation_scratch = VK_NULL_HANDLE;
+}
+
+static void destroy_surface_image(PGRAPHVkState *r, SurfaceBinding *surface)
+{
+    vkDestroyImageView(r->device, surface->image_view, NULL);
+    surface->image_view = VK_NULL_HANDLE;
+
+    vmaDestroyImage(r->allocator, surface->image, surface->allocation);
+    surface->image = VK_NULL_HANDLE;
+    surface->allocation = VK_NULL_HANDLE;
+
+    vmaDestroyImage(r->allocator, surface->image_scratch,
+                    surface->allocation_scratch);
+    surface->image_scratch = VK_NULL_HANDLE;
+    surface->allocation_scratch = VK_NULL_HANDLE;
+}
+
+static bool check_invalid_surface_is_compatibile(SurfaceBinding *surface,
+                                                 SurfaceBinding *target)
+{
+    return surface->host_fmt.vk_format == target->host_fmt.vk_format &&
+           surface->width == target->width &&
+           surface->height == target->height &&
+           surface->host_fmt.usage == target->host_fmt.usage;
+}
+
+static SurfaceBinding *
+get_any_compatible_invalid_surface(PGRAPHVkState *r, SurfaceBinding *target)
+{
+    SurfaceBinding *surface, *next;
+    QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) {
+        if (check_invalid_surface_is_compatibile(surface, target)) {
+            QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry);
+            return surface;
+        }
+    }
+
+    return NULL;
+}
+
+static void prune_invalid_surfaces(PGRAPHVkState *r, int keep)
+{
+    int num_surfaces = 0;
+
+    SurfaceBinding *surface, *next;
+    QTAILQ_FOREACH_SAFE(surface, &r->invalid_surfaces, entry, next) {
+        num_surfaces += 1;
+        if (num_surfaces > keep) {
+            QTAILQ_REMOVE(&r->invalid_surfaces, surface, entry);
+            destroy_surface_image(r, surface);
+            g_free(surface);
+        }
+    }
+}
+
+static void expire_old_surfaces(NV2AState *d)
+{
+    PGRAPHVkState *r = d->pgraph.vk_renderer_state;
+
+    SurfaceBinding *s, *next;
+    QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
+        int last_used = d->pgraph.frame_time - s->frame_time;
+        if (last_used >= max_surface_frame_time_delta) {
+            trace_nv2a_pgraph_surface_evict_reason("old", s->vram_addr);
+            pgraph_vk_surface_download_if_dirty(d, s);
+            invalidate_surface(d, s);
+        }
+    }
+}
+
+static bool check_surface_compatibility(SurfaceBinding const *s1,
+                                        SurfaceBinding const *s2, bool strict)
+{
+    bool format_compatible =
+        (s1->color == s2->color) &&
+        (s1->host_fmt.vk_format == s2->host_fmt.vk_format) &&
+        (s1->pitch == s2->pitch) &&
+        (s1->shape.clip_x <= s2->shape.clip_x) &&
+        (s1->shape.clip_y <= s2->shape.clip_y);
+    if (!format_compatible) {
+        return false;
+    }
+
+    if (!strict) {
+        return (s1->width >= s2->width) && (s1->height >= s2->height);
+    } else {
+        return (s1->width == s2->width) && (s1->height == s2->height);
+    }
+}
+
+void pgraph_vk_surface_download_if_dirty(NV2AState *d, SurfaceBinding *surface)
+{
+    if (surface->draw_dirty) {
+        download_surface(d, surface, true);
+    }
+}
+
+void pgraph_vk_upload_surface_data(NV2AState *d, SurfaceBinding *surface,
+                                   bool force)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    if (!(surface->upload_pending || force)) {
+        return;
+    }
+
+    nv2a_profile_inc_counter(NV2A_PROF_SURF_UPLOAD);
+
+    pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE); // FIXME: SURFACE_UP
+
+    trace_nv2a_pgraph_surface_upload(
+                 surface->color ? "COLOR" : "ZETA",
+                 surface->swizzle ? "sz" : "lin", surface->vram_addr,
+                 surface->width, surface->height, surface->pitch,
+                 surface->fmt.bytes_per_pixel);
+
+    surface->upload_pending = false;
+    surface->draw_time = pg->draw_time;
+
+    uint8_t *data = d->vram_ptr;
+    uint8_t *buf = data + surface->vram_addr;
+
+    g_autofree uint8_t *swizzle_buf = NULL;
+    uint8_t *gl_read_buf = NULL;
+
+    if (surface->swizzle) {
+        swizzle_buf = (uint8_t*)g_malloc(surface->size);
+        gl_read_buf = swizzle_buf;
+        unswizzle_rect(data + surface->vram_addr,
+                       surface->width, surface->height,
+                       swizzle_buf,
+                       surface->pitch,
+                       surface->fmt.bytes_per_pixel);
+        nv2a_profile_inc_counter(NV2A_PROF_SURF_SWIZZLE);
+    } else {
+        gl_read_buf = buf;
+    }
+
+    //
+    // Upload image data from host to staging buffer
+    //
+
+    StorageBuffer *copy_buffer = &r->storage_buffers[BUFFER_STAGING_SRC];
+    size_t uploaded_image_size = surface->height * surface->width *
+                                 surface->fmt.bytes_per_pixel;
+    assert(uploaded_image_size <= copy_buffer->buffer_size);
+
+    void *mapped_memory_ptr = NULL;
+    VK_CHECK(vmaMapMemory(r->allocator, copy_buffer->allocation,
+                          &mapped_memory_ptr));
+
+    bool use_compute_to_convert_depth_stencil_format =
+        surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
+        surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT;
+
+    bool no_conversion_necessary =
+        surface->color || surface->host_fmt.vk_format == VK_FORMAT_D16_UNORM ||
+        use_compute_to_convert_depth_stencil_format;
+    assert(no_conversion_necessary);
+
+    memcpy_image(mapped_memory_ptr, gl_read_buf,
+                 surface->width * surface->fmt.bytes_per_pixel, surface->pitch,
+                 surface->height);
+
+    vmaFlushAllocation(r->allocator, copy_buffer->allocation, 0, VK_WHOLE_SIZE);
+    vmaUnmapMemory(r->allocator, copy_buffer->allocation);
+
+    VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+    pgraph_vk_begin_debug_marker(r, cmd, RGBA_RED, __func__);
+
+    VkBufferMemoryBarrier host_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_HOST_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = copy_buffer->buffer,
+        .size = VK_WHOLE_SIZE
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT,
+                         VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                         &host_barrier, 0, NULL);
+
+    // Set up image copy regions (which may be modified by compute unpack)
+
+    VkBufferImageCopy regions[2];
+    int num_regions = 0;
+
+    regions[num_regions++] = (VkBufferImageCopy){
+        .imageSubresource.aspectMask = surface->color ?
+                                           VK_IMAGE_ASPECT_COLOR_BIT :
+                                           VK_IMAGE_ASPECT_DEPTH_BIT,
+        .imageSubresource.layerCount = 1,
+        .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 },
+    };
+
+    if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) {
+        regions[num_regions++] = (VkBufferImageCopy){
+            .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
+            .imageSubresource.layerCount = 1,
+            .imageExtent = (VkExtent3D){ surface->width, surface->height, 1 },
+        };
+    }
+
+
+    unsigned int scaled_width = surface->width, scaled_height = surface->height;
+    pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height);
+
+    if (use_compute_to_convert_depth_stencil_format) {
+
+        //
+        // Copy packed image buffer to compute_dst for unpacking
+        //
+
+        size_t packed_size = uploaded_image_size;
+        VkBufferCopy buffer_copy_region = {
+            .size = packed_size,
+        };
+        vkCmdCopyBuffer(cmd, copy_buffer->buffer,
+                        r->storage_buffers[BUFFER_COMPUTE_DST].buffer, 1,
+                        &buffer_copy_region);
+
+        size_t num_pixels = scaled_width * scaled_height;
+        size_t unpacked_depth_image_size = num_pixels * 4;
+        size_t unpacked_stencil_image_size = num_pixels;
+        size_t unpacked_size =
+            unpacked_depth_image_size + unpacked_stencil_image_size;
+
+        VkBufferMemoryBarrier post_copy_src_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+            .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = copy_buffer->buffer,
+            .size = VK_WHOLE_SIZE
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                             VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                             &post_copy_src_barrier, 0, NULL);
+
+        //
+        // Unpack depth-stencil image into compute_src
+        //
+
+        VkBufferMemoryBarrier pre_unpack_src_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
+            .size = VK_WHOLE_SIZE
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                             VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
+                             1, &pre_unpack_src_barrier, 0, NULL);
+
+        StorageBuffer *unpack_buffer = &r->storage_buffers[BUFFER_COMPUTE_SRC];
+
+        VkBufferMemoryBarrier pre_unpack_dst_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+            .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = unpack_buffer->buffer,
+            .size = unpacked_size
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                             VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL, 1,
+                             &pre_unpack_dst_barrier, 0, NULL);
+
+        pgraph_vk_unpack_depth_stencil(
+            pg, surface, cmd, r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
+            unpack_buffer->buffer);
+
+        VkBufferMemoryBarrier post_unpack_src_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_SHADER_READ_BIT,
+            .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
+            .size = VK_WHOLE_SIZE
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                             VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                             &post_unpack_src_barrier, 0, NULL);
+
+        VkBufferMemoryBarrier post_unpack_dst_barrier = {
+            .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+            .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+            .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+            .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+            .buffer = unpack_buffer->buffer,
+            .size = unpacked_size
+        };
+        vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                             VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                             &post_unpack_dst_barrier, 0, NULL);
+
+        // Already scaled during compute. Adjust copy regions.
+        regions[0].imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 };
+        regions[1].imageExtent = regions[0].imageExtent;
+        regions[1].bufferOffset =
+            ROUND_UP(unpacked_depth_image_size,
+                     r->device_props.limits.minStorageBufferOffsetAlignment);
+
+        copy_buffer = unpack_buffer;
+    }
+
+    //
+    // Copy image data from buffer to staging image
+    //
+
+    if (surface->image_scratch_current_layout !=
+        VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
+        pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch,
+                                          surface->host_fmt.vk_format,
+                                          surface->image_scratch_current_layout,
+                                          VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+        surface->image_scratch_current_layout =
+            VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+    }
+
+    vkCmdCopyBufferToImage(cmd, copy_buffer->buffer, surface->image_scratch,
+                           VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, num_regions,
+                           regions);
+
+    VkBufferMemoryBarrier post_copy_src_buffer_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+        .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = copy_buffer->buffer,
+        .size = VK_WHOLE_SIZE
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                         VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                         &post_copy_src_buffer_barrier, 0, NULL);
+
+    //
+    // Copy staging image to final image
+    //
+
+    pgraph_vk_transition_image_layout(pg, cmd, surface->image_scratch,
+                                      surface->host_fmt.vk_format,
+                                      surface->image_scratch_current_layout,
+                                      VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+    surface->image_scratch_current_layout =
+        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, surface->image, surface->host_fmt.vk_format,
+        surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+                         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+        VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+    bool upscale = pg->surface_scale_factor > 1 &&
+                   !use_compute_to_convert_depth_stencil_format;
+
+    if (upscale) {
+        unsigned int scaled_width = surface->width,
+                     scaled_height = surface->height;
+        pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height);
+
+        VkImageBlit blitRegion = {
+            .srcSubresource.aspectMask = surface->host_fmt.aspect,
+            .srcSubresource.mipLevel = 0,
+            .srcSubresource.baseArrayLayer = 0,
+            .srcSubresource.layerCount = 1,
+            .srcOffsets[0] = (VkOffset3D){0, 0, 0},
+            .srcOffsets[1] = (VkOffset3D){surface->width, surface->height, 1},
+
+            .dstSubresource.aspectMask = surface->host_fmt.aspect,
+            .dstSubresource.mipLevel = 0,
+            .dstSubresource.baseArrayLayer = 0,
+            .dstSubresource.layerCount = 1,
+            .dstOffsets[0] = (VkOffset3D){0, 0, 0},
+            .dstOffsets[1] = (VkOffset3D){scaled_width, scaled_height, 1},
+        };
+
+        vkCmdBlitImage(cmd, surface->image_scratch,
+                       surface->image_scratch_current_layout, surface->image,
+                       VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, &blitRegion,
+                       surface->color ? VK_FILTER_LINEAR : VK_FILTER_NEAREST);
+    } else {
+        // Note: We should be able to vkCmdCopyBufferToImage directly into
+        // surface->image, but there is an apparent AMD Windows driver
+        // synchronization bug we'll hit when doing this. For this reason,
+        // always use a staging image.
+
+        for (int i = 0; i < num_regions; i++) {
+            VkImageAspectFlags aspect = regions[i].imageSubresource.aspectMask;
+            VkImageCopy copy_region = {
+                .srcSubresource.aspectMask = aspect,
+                .srcSubresource.layerCount = 1,
+                .dstSubresource.aspectMask = aspect,
+                .dstSubresource.layerCount = 1,
+                .extent = regions[i].imageExtent,
+            };
+            vkCmdCopyImage(cmd, surface->image_scratch,
+                           VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, surface->image,
+                           VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1,
+                           &copy_region);
+        }
+    }
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, surface->image, surface->host_fmt.vk_format,
+        VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+        surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+                         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+
+    nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_2);
+    pgraph_vk_end_debug_marker(r, cmd);
+    pgraph_vk_end_single_time_commands(pg, cmd);
+
+    surface->initialized = true;
+}
+
+static void compare_surfaces(SurfaceBinding const *a, SurfaceBinding const *b)
+{
+    #define DO_CMP(fld) \
+        if (a->fld != b->fld) \
+            trace_nv2a_pgraph_surface_compare_mismatch( \
+                #fld, (long int)a->fld, (long int)b->fld);
+    DO_CMP(shape.clip_x)
+    DO_CMP(shape.clip_width)
+    DO_CMP(shape.clip_y)
+    DO_CMP(shape.clip_height)
+    DO_CMP(fmt.bytes_per_pixel)
+    DO_CMP(host_fmt.vk_format)
+    DO_CMP(color)
+    DO_CMP(swizzle)
+    DO_CMP(vram_addr)
+    DO_CMP(width)
+    DO_CMP(height)
+    DO_CMP(pitch)
+    DO_CMP(size)
+    DO_CMP(dma_addr)
+    DO_CMP(dma_len)
+    DO_CMP(frame_time)
+    DO_CMP(draw_time)
+    #undef DO_CMP
+}
+
+static void populate_surface_binding_target_sized(NV2AState *d, bool color,
+                                                  unsigned int width,
+                                                  unsigned int height,
+                                                  SurfaceBinding *target)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    Surface *surface;
+    hwaddr dma_address;
+    BasicSurfaceFormatInfo fmt;
+    SurfaceFormatInfo host_fmt;
+
+    if (color) {
+        surface = &pg->surface_color;
+        dma_address = pg->dma_color;
+        assert(pg->surface_shape.color_format != 0);
+        assert(pg->surface_shape.color_format <
+               ARRAY_SIZE(kelvin_surface_color_format_vk_map));
+        fmt = kelvin_surface_color_format_map[pg->surface_shape.color_format];
+        host_fmt = kelvin_surface_color_format_vk_map[pg->surface_shape.color_format];
+        if (host_fmt.host_bytes_per_pixel == 0) {
+            fprintf(stderr, "nv2a: unimplemented color surface format 0x%x\n",
+                    pg->surface_shape.color_format);
+            abort();
+        }
+    } else {
+        surface = &pg->surface_zeta;
+        dma_address = pg->dma_zeta;
+        assert(pg->surface_shape.zeta_format != 0);
+        assert(pg->surface_shape.zeta_format <
+               ARRAY_SIZE(r->kelvin_surface_zeta_vk_map));
+        fmt = kelvin_surface_zeta_format_map[pg->surface_shape.zeta_format];
+        host_fmt = r->kelvin_surface_zeta_vk_map[pg->surface_shape.zeta_format];
+        // FIXME: Support float 16,24b float format surface
+    }
+
+    DMAObject dma = nv_dma_load(d, dma_address);
+    // There's a bunch of bugs that could cause us to hit this function
+    // at the wrong time and get a invalid dma object.
+    // Check that it's sane.
+    assert(dma.dma_class == NV_DMA_IN_MEMORY_CLASS);
+    // assert(dma.address + surface->offset != 0);
+    assert(surface->offset <= dma.limit);
+    assert(surface->offset + surface->pitch * height <= dma.limit + 1);
+    assert(surface->pitch % fmt.bytes_per_pixel == 0);
+    assert((dma.address & ~0x07FFFFFF) == 0);
+
+    target->shape = (color || !r->color_binding) ? pg->surface_shape :
+                                                   r->color_binding->shape;
+    target->fmt = fmt;
+    target->host_fmt = host_fmt;
+    target->color = color;
+    target->swizzle =
+        (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+    target->vram_addr = dma.address + surface->offset;
+    target->width = width;
+    target->height = height;
+    target->pitch = surface->pitch;
+    target->size = height * MAX(surface->pitch, width * fmt.bytes_per_pixel);
+    target->upload_pending = true;
+    target->download_pending = false;
+    target->draw_dirty = false;
+    target->dma_addr = dma.address;
+    target->dma_len = dma.limit;
+    target->frame_time = pg->frame_time;
+    target->draw_time = pg->draw_time;
+    target->cleared = false;
+
+    target->initialized = false;
+}
+
+static void populate_surface_binding_target(NV2AState *d, bool color,
+                                            SurfaceBinding *target)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    unsigned int width, height;
+
+    if (color || !r->color_binding) {
+        get_surface_dimensions(pg, &width, &height);
+        pgraph_apply_anti_aliasing_factor(pg, &width, &height);
+
+        // Since we determine surface dimensions based on the clipping
+        // rectangle, make sure to include the surface offset as well.
+        if (pg->surface_type != NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE) {
+            width += pg->surface_shape.clip_x;
+            height += pg->surface_shape.clip_y;
+        }
+    } else {
+        width = r->color_binding->width;
+        height = r->color_binding->height;
+    }
+
+    populate_surface_binding_target_sized(d, color, width, height, target);
+}
+
+static void update_surface_part(NV2AState *d, bool upload, bool color)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    SurfaceBinding target;
+    memset(&target, 0, sizeof(target));
+    populate_surface_binding_target(d, color, &target);
+
+    Surface *pg_surface = color ? &pg->surface_color : &pg->surface_zeta;
+
+    bool mem_dirty = !tcg_enabled() && memory_region_test_and_clear_dirty(
+                                           d->vram, target.vram_addr,
+                                           target.size, DIRTY_MEMORY_NV2A);
+
+    SurfaceBinding *current_binding = color ? r->color_binding
+                                            : r->zeta_binding;
+
+    if (!current_binding ||
+        (upload && (pg_surface->buffer_dirty || mem_dirty))) {
+        // FIXME: We don't need to be so aggressive flushing the command list
+        // pgraph_vk_finish(pg, VK_FINISH_REASON_SURFACE_CREATE);
+        pgraph_vk_ensure_not_in_render_pass(pg);
+
+        unbind_surface(d, color);
+
+        SurfaceBinding *surface = pgraph_vk_surface_get(d, target.vram_addr);
+        if (surface != NULL) {
+            // FIXME: Support same color/zeta surface target? In the mean time,
+            // if the surface we just found is currently bound, just unbind it.
+            SurfaceBinding *other = (color ? r->zeta_binding
+                                           : r->color_binding);
+            if (surface == other) {
+                NV2A_UNIMPLEMENTED("Same color & zeta surface offset");
+                unbind_surface(d, !color);
+            }
+        }
+
+        trace_nv2a_pgraph_surface_target(
+            color ? "COLOR" : "ZETA", target.vram_addr,
+            target.swizzle ? "sz" : "ln",
+            pg->surface_shape.anti_aliasing,
+            pg->surface_shape.clip_x,
+            pg->surface_shape.clip_width, pg->surface_shape.clip_y,
+            pg->surface_shape.clip_height);
+
+        bool should_create = true;
+
+        if (surface != NULL) {
+            bool is_compatible =
+                check_surface_compatibility(surface, &target, false);
+
+            void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height,
+                             const char *layout, uint32_t anti_aliasing,
+                             uint32_t clip_x, uint32_t clip_width,
+                             uint32_t clip_y, uint32_t clip_height,
+                             uint32_t pitch) =
+                surface->color ? trace_nv2a_pgraph_surface_match_color :
+                               trace_nv2a_pgraph_surface_match_zeta;
+
+            trace_fn(surface->vram_addr, surface->width, surface->height,
+                     surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing,
+                     surface->shape.clip_x, surface->shape.clip_width,
+                     surface->shape.clip_y, surface->shape.clip_height,
+                     surface->pitch);
+
+            assert(!(target.swizzle && pg->clearing));
+
+#if 0
+            if (surface->swizzle != target.swizzle) {
+                // Clears should only be done on linear surfaces. Avoid
+                // synchronization by allowing (1) a surface marked swizzled to
+                // be cleared under the assumption the entire surface is
+                // destined to be cleared and (2) a fully cleared linear surface
+                // to be marked swizzled. Strictly match size to avoid
+                // pathological cases.
+                is_compatible &= (pg->clearing || surface->cleared) &&
+                    check_surface_compatibility(surface, &target, true);
+                if (is_compatible) {
+                    trace_nv2a_pgraph_surface_migrate_type(
+                        target.swizzle ? "swizzled" : "linear");
+                }
+            }
+#endif
+
+            if (is_compatible && color &&
+                !check_surface_compatibility(surface, &target, true)) {
+                SurfaceBinding zeta_entry;
+                populate_surface_binding_target_sized(
+                    d, !color, surface->width, surface->height, &zeta_entry);
+                hwaddr color_end = surface->vram_addr + surface->size;
+                hwaddr zeta_end = zeta_entry.vram_addr + zeta_entry.size;
+                is_compatible &= surface->vram_addr >= zeta_end ||
+                                 zeta_entry.vram_addr >= color_end;
+            }
+
+            if (is_compatible && !color && r->color_binding) {
+                is_compatible &= (surface->width == r->color_binding->width) &&
+                                 (surface->height == r->color_binding->height);
+            }
+
+            if (is_compatible) {
+                // FIXME: Refactor
+                pg->surface_binding_dim.width = surface->width;
+                pg->surface_binding_dim.clip_x = surface->shape.clip_x;
+                pg->surface_binding_dim.clip_width = surface->shape.clip_width;
+                pg->surface_binding_dim.height = surface->height;
+                pg->surface_binding_dim.clip_y = surface->shape.clip_y;
+                pg->surface_binding_dim.clip_height = surface->shape.clip_height;
+                surface->upload_pending |= mem_dirty;
+                pg->surface_zeta.buffer_dirty |= color;
+                should_create = false;
+            } else {
+                trace_nv2a_pgraph_surface_evict_reason(
+                    "incompatible", surface->vram_addr);
+                compare_surfaces(surface, &target);
+                pgraph_vk_surface_download_if_dirty(d, surface);
+                invalidate_surface(d, surface);
+            }
+        }
+
+        if (should_create) {
+            surface = get_any_compatible_invalid_surface(r, &target);
+            if (surface) {
+                migrate_surface_image(&target, surface);
+            } else {
+                surface = g_malloc(sizeof(SurfaceBinding));
+                create_surface_image(pg, &target);
+            }
+
+            *surface = target;
+            set_surface_label(pg, surface);
+            surface_put(d, surface);
+
+            // FIXME: Refactor
+            pg->surface_binding_dim.width = target.width;
+            pg->surface_binding_dim.clip_x = target.shape.clip_x;
+            pg->surface_binding_dim.clip_width = target.shape.clip_width;
+            pg->surface_binding_dim.height = target.height;
+            pg->surface_binding_dim.clip_y = target.shape.clip_y;
+            pg->surface_binding_dim.clip_height = target.shape.clip_height;
+
+            if (color && r->zeta_binding &&
+                (r->zeta_binding->width != target.width ||
+                 r->zeta_binding->height != target.height)) {
+                pg->surface_zeta.buffer_dirty = true;
+            }
+        }
+
+        void (*trace_fn)(uint32_t addr, uint32_t width, uint32_t height,
+                         const char *layout, uint32_t anti_aliasing,
+                         uint32_t clip_x, uint32_t clip_width, uint32_t clip_y,
+                         uint32_t clip_height, uint32_t pitch) =
+            color ? (should_create ? trace_nv2a_pgraph_surface_create_color :
+                                     trace_nv2a_pgraph_surface_hit_color) :
+                    (should_create ? trace_nv2a_pgraph_surface_create_zeta :
+                                     trace_nv2a_pgraph_surface_hit_zeta);
+        trace_fn(surface->vram_addr, surface->width, surface->height,
+                 surface->swizzle ? "sz" : "ln", surface->shape.anti_aliasing,
+                 surface->shape.clip_x, surface->shape.clip_width,
+                 surface->shape.clip_y, surface->shape.clip_height, surface->pitch);
+
+        bind_surface(r, surface);
+        pg_surface->buffer_dirty = false;
+    }
+
+    if (!upload && pg_surface->draw_dirty) {
+        if (!tcg_enabled()) {
+            // FIXME: Cannot monitor for reads/writes; flush now
+            download_surface(d, color ? r->color_binding : r->zeta_binding,
+                             true);
+        }
+
+        pg_surface->write_enabled_cache = false;
+        pg_surface->draw_dirty = false;
+    }
+}
+
+// FIXME: Move to common?
+void pgraph_vk_surface_update(NV2AState *d, bool upload, bool color_write,
+                              bool zeta_write)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    pg->surface_shape.z_format =
+        GET_MASK(pgraph_reg_r(pg, NV_PGRAPH_SETUPRASTER),
+                 NV_PGRAPH_SETUPRASTER_Z_FORMAT);
+
+    color_write = color_write &&
+            (pg->clearing || pgraph_color_write_enabled(pg));
+    zeta_write = zeta_write && (pg->clearing || pgraph_zeta_write_enabled(pg));
+
+    if (upload) {
+        bool fb_dirty = framebuffer_dirty(pg);
+        if (fb_dirty) {
+            memcpy(&pg->last_surface_shape, &pg->surface_shape,
+                   sizeof(SurfaceShape));
+            pg->surface_color.buffer_dirty = true;
+            pg->surface_zeta.buffer_dirty = true;
+        }
+
+        if (pg->surface_color.buffer_dirty) {
+            unbind_surface(d, true);
+        }
+
+        if (color_write) {
+            update_surface_part(d, true, true);
+        }
+
+        if (pg->surface_zeta.buffer_dirty) {
+            unbind_surface(d, false);
+        }
+
+        if (zeta_write) {
+            update_surface_part(d, true, false);
+        }
+    } else {
+        if ((color_write || pg->surface_color.write_enabled_cache)
+            && pg->surface_color.draw_dirty) {
+            update_surface_part(d, false, true);
+        }
+        if ((zeta_write || pg->surface_zeta.write_enabled_cache)
+            && pg->surface_zeta.draw_dirty) {
+            update_surface_part(d, false, false);
+        }
+    }
+
+    if (upload) {
+        pg->draw_time++;
+    }
+
+    bool swizzle = (pg->surface_type == NV097_SET_SURFACE_FORMAT_TYPE_SWIZZLE);
+
+    if (r->color_binding) {
+        r->color_binding->frame_time = pg->frame_time;
+        if (upload) {
+            pgraph_vk_upload_surface_data(d, r->color_binding, false);
+            r->color_binding->draw_time = pg->draw_time;
+            r->color_binding->swizzle = swizzle;
+        }
+    }
+
+    if (r->zeta_binding) {
+        r->zeta_binding->frame_time = pg->frame_time;
+        if (upload) {
+            pgraph_vk_upload_surface_data(d, r->zeta_binding, false);
+            r->zeta_binding->draw_time = pg->draw_time;
+            r->zeta_binding->swizzle = swizzle;
+        }
+    }
+
+    // Sanity check color and zeta dimensions match
+    if (r->color_binding && r->zeta_binding) {
+        assert(r->color_binding->width == r->zeta_binding->width);
+        assert(r->color_binding->height == r->zeta_binding->height);
+    }
+
+    expire_old_surfaces(d);
+    prune_invalid_surfaces(r, num_invalid_surfaces_to_keep);
+}
+
+static bool check_format_and_usage_supported(PGRAPHVkState *r, VkFormat format,
+                                             VkImageUsageFlags usage)
+{
+    VkPhysicalDeviceImageFormatInfo2 pdif2 = {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_FORMAT_INFO_2,
+        .format = format,
+        .type = VK_IMAGE_TYPE_2D,
+        .tiling = VK_IMAGE_TILING_OPTIMAL,
+        .usage = usage,
+    };
+    VkImageFormatProperties2 props = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_PROPERTIES_2,
+    };
+    VkResult result = vkGetPhysicalDeviceImageFormatProperties2(
+        r->physical_device, &pdif2, &props);
+    return result == VK_SUCCESS;
+}
+
+static bool check_surface_internal_formats_supported(
+    PGRAPHVkState *r, const SurfaceFormatInfo *fmts, size_t count)
+{
+    bool all_supported = true;
+    for (int i = 0; i < count; i++) {
+        const SurfaceFormatInfo *f = &fmts[i];
+        if (f->host_bytes_per_pixel) {
+            all_supported &=
+                check_format_and_usage_supported(r, f->vk_format, f->usage);
+        }
+    }
+    return all_supported;
+}
+
+void pgraph_vk_init_surfaces(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    // Make sure all surface format types are supported. We don't expect issue
+    // with these, and therefore have no fallback mechanism.
+    bool color_formats_supported = check_surface_internal_formats_supported(
+        r, kelvin_surface_color_format_vk_map,
+        ARRAY_SIZE(kelvin_surface_color_format_vk_map));
+    assert(color_formats_supported);
+
+    // Check if the device supports preferred VK_FORMAT_D24_UNORM_S8_UINT
+    // format, fall back to D32_SFLOAT_S8_UINT otherwise.
+    r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z16] = zeta_d16;
+    if (check_surface_internal_formats_supported(r, &zeta_d24_unorm_s8_uint,
+                                                 1)) {
+        r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
+            zeta_d24_unorm_s8_uint;
+    } else if (check_surface_internal_formats_supported(
+                   r, &zeta_d32_sfloat_s8_uint, 1)) {
+        r->kelvin_surface_zeta_vk_map[NV097_SET_SURFACE_FORMAT_ZETA_Z24S8] =
+            zeta_d32_sfloat_s8_uint;
+    } else {
+        assert(!"No suitable depth-stencil format supported");
+    }
+
+    QTAILQ_INIT(&r->surfaces);
+    QTAILQ_INIT(&r->invalid_surfaces);
+
+    r->downloads_pending = false;
+    qemu_event_init(&r->downloads_complete, false);
+    qemu_event_init(&r->dirty_surfaces_download_complete, false);
+
+    r->color_binding = NULL;
+    r->zeta_binding = NULL;
+    r->framebuffer_dirty = true;
+
+    pgraph_vk_reload_surface_scale_factor(pg); // FIXME: Move internal
+}
+
+void pgraph_vk_finalize_surfaces(PGRAPHState *pg)
+{
+    pgraph_vk_surface_flush(container_of(pg, NV2AState, pgraph));
+}
+
+void pgraph_vk_surface_flush(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    // Clear last surface shape to force recreation of buffers at next draw
+    pg->surface_color.draw_dirty = false;
+    pg->surface_zeta.draw_dirty = false;
+    memset(&pg->last_surface_shape, 0, sizeof(pg->last_surface_shape));
+    unbind_surface(d, true);
+    unbind_surface(d, false);
+
+    SurfaceBinding *s, *next;
+    QTAILQ_FOREACH_SAFE(s, &r->surfaces, entry, next) {
+        // FIXME: We should download all surfaces to ram, but need to
+        //        investigate corruption issue
+        pgraph_vk_surface_download_if_dirty(d, s);
+        invalidate_surface(d, s);
+    }
+    prune_invalid_surfaces(r, 0);
+
+    pgraph_vk_reload_surface_scale_factor(pg);
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/texture.c b/hw/xbox/nv2a/pgraph/vk/texture.c
new file mode 100644
index 00000000000..c5d3cf7fe2f
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/texture.c
@@ -0,0 +1,1569 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * Based on GL implementation:
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "hw/xbox/nv2a/pgraph/s3tc.h"
+#include "hw/xbox/nv2a/pgraph/swizzle.h"
+#include "qemu/fast-hash.h"
+#include "qemu/lru.h"
+#include "renderer.h"
+
+static void texture_cache_release_node_resources(PGRAPHVkState *r, TextureBinding *snode);
+
+static const VkImageType dimensionality_to_vk_image_type[] = {
+    0,
+    VK_IMAGE_TYPE_1D,
+    VK_IMAGE_TYPE_2D,
+    VK_IMAGE_TYPE_3D,
+};
+static const VkImageViewType dimensionality_to_vk_image_view_type[] = {
+    0,
+    VK_IMAGE_VIEW_TYPE_1D,
+    VK_IMAGE_VIEW_TYPE_2D,
+    VK_IMAGE_VIEW_TYPE_3D,
+};
+
+static VkSamplerAddressMode lookup_texture_address_mode(int idx)
+{
+    assert(0 < idx && idx < ARRAY_SIZE(pgraph_texture_addr_vk_map));
+    return pgraph_texture_addr_vk_map[idx];
+}
+
+// FIXME: Move to common
+// FIXME: We can shrink the size of this structure
+// FIXME: Use simple allocator
+typedef struct TextureLevel {
+    unsigned int width, height, depth;
+    hwaddr vram_addr;
+    void *decoded_data;
+    size_t decoded_size;
+} TextureLevel;
+
+typedef struct TextureLayer {
+    TextureLevel levels[16];
+} TextureLayer;
+
+typedef struct TextureLayout {
+    TextureLayer layers[6];
+} TextureLayout;
+
+// FIXME: Move to common
+static enum S3TC_DECOMPRESS_FORMAT kelvin_format_to_s3tc_format(int color_format)
+{
+    switch (color_format) {
+    case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5:
+        return S3TC_DECOMPRESS_FORMAT_DXT1;
+    case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT23_A8R8G8B8:
+        return S3TC_DECOMPRESS_FORMAT_DXT3;
+    case NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT45_A8R8G8B8:
+        return S3TC_DECOMPRESS_FORMAT_DXT5;
+    default:
+        assert(false);
+    }
+}
+
+// FIXME: Move to common
+static void memcpy_image(void *dst, void *src, int min_stride, int dst_stride, int src_stride, int height)
+{
+    uint8_t *dst_ptr = (uint8_t *)dst;
+    uint8_t *src_ptr = (uint8_t *)src;
+
+    for (int i = 0; i < height; i++) {
+        memcpy(dst_ptr, src_ptr, min_stride);
+        src_ptr += src_stride;
+        dst_ptr += dst_stride;
+    }
+}
+
+// FIXME: Move to common
+static size_t get_cubemap_layer_size(PGRAPHState *pg, TextureShape s)
+{
+    BasicColorFormatInfo f = kelvin_color_format_info_map[s.color_format];
+    bool is_compressed =
+        pgraph_is_texture_format_compressed(pg, s.color_format);
+    unsigned int block_size;
+
+    unsigned int w = s.width, h = s.height;
+    size_t length = 0;
+
+    if (!f.linear && s.border) {
+        w = MAX(16, w * 2);
+        h = MAX(16, h * 2);
+    }
+
+    if (is_compressed) {
+        block_size =
+            s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5 ?
+                8 :
+                16;
+    }
+
+    for (int level = 0; level < s.levels; level++) {
+        if (is_compressed) {
+            length += w / 4 * h / 4 * block_size;
+        } else {
+            length += w * h * f.bytes_per_pixel;
+        }
+
+        w /= 2;
+        h /= 2;
+    }
+
+    return ROUND_UP(length, NV2A_CUBEMAP_FACE_ALIGNMENT);
+}
+
+// FIXME: Move to common
+// FIXME: More refactoring
+// FIXME: Possible parallelization of decoding
+// FIXME: Bounds checking
+static TextureLayout *get_texture_layout(PGRAPHState *pg, int texture_idx)
+{
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    TextureShape s = pgraph_get_texture_shape(pg, texture_idx);
+    BasicColorFormatInfo f = kelvin_color_format_info_map[s.color_format];
+
+    NV2A_VK_DGROUP_BEGIN("Texture %d: cubemap=%d, dimensionality=%d, color_format=0x%x, levels=%d, width=%d, height=%d, depth=%d border=%d, min_mipmap_level=%d, max_mipmap_level=%d, pitch=%d",
+        texture_idx,
+        s.cubemap,
+        s.dimensionality,
+        s.color_format,
+        s.levels,
+        s.width,
+        s.height,
+        s.depth,
+        s.border,
+        s.min_mipmap_level,
+        s.max_mipmap_level,
+        s.pitch
+        );
+
+    // Sanity checks on below assumptions
+    if (f.linear) {
+        assert(s.dimensionality == 2);
+    }
+    if (s.cubemap) {
+        assert(s.dimensionality == 2);
+        assert(!f.linear);
+    }
+    assert(s.dimensionality > 1);
+
+    const hwaddr texture_vram_offset = pgraph_get_texture_phys_addr(pg, texture_idx);
+    void *texture_data_ptr = (char *)d->vram_ptr + texture_vram_offset;
+
+    size_t texture_palette_data_size;
+    const hwaddr texture_palette_vram_offset =
+        pgraph_get_texture_palette_phys_addr_length(pg, texture_idx,
+                                                    &texture_palette_data_size);
+    void *palette_data_ptr = (char *)d->vram_ptr + texture_palette_vram_offset;
+
+    unsigned int adjusted_width = s.width, adjusted_height = s.height,
+                 adjusted_pitch = s.pitch, adjusted_depth = s.depth;
+
+    if (!f.linear && s.border) {
+        adjusted_width = MAX(16, adjusted_width * 2);
+        adjusted_height = MAX(16, adjusted_height * 2);
+        adjusted_pitch = adjusted_width * (s.pitch / s.width);
+        adjusted_depth = MAX(16, s.depth * 2);
+    }
+
+    TextureLayout *layout = g_malloc0(sizeof(TextureLayout));
+
+    if (f.linear) {
+        assert(s.pitch % f.bytes_per_pixel == 0 && "Can't handle strides unaligned to pixels");
+
+        size_t converted_size;
+        uint8_t *converted = pgraph_convert_texture_data(
+            s, texture_data_ptr, palette_data_ptr, adjusted_width,
+            adjusted_height, 1, adjusted_pitch, 0, &converted_size);
+
+        if (!converted) {
+            int dst_stride = adjusted_width * f.bytes_per_pixel;
+            assert(adjusted_width <= s.width);
+            converted_size = dst_stride * adjusted_height;
+            converted = g_malloc(converted_size);
+            memcpy_image(converted, texture_data_ptr, adjusted_width * f.bytes_per_pixel, dst_stride,
+                         adjusted_pitch, adjusted_height);
+        }
+
+        assert(s.levels == 1);
+        layout->layers[0].levels[0] = (TextureLevel){
+            .width = adjusted_width,
+            .height = adjusted_height,
+            .depth = 1,
+            .decoded_size = converted_size,
+            .decoded_data = converted,
+        };
+
+        NV2A_VK_DGROUP_END();
+        return layout;
+    }
+
+    bool is_compressed = pgraph_is_texture_format_compressed(pg, s.color_format);
+    size_t block_size = 0;
+    if (is_compressed) {
+        bool is_dxt1 =
+            s.color_format == NV097_SET_TEXTURE_FORMAT_COLOR_L_DXT1_A1R5G5B5;
+        block_size = is_dxt1 ? 8 : 16;
+    }
+
+    if (s.dimensionality == 2) {
+        hwaddr layer_size = s.cubemap ? get_cubemap_layer_size(pg, s) : 0;
+        const int num_layers = s.cubemap ? 6 : 1;
+        for (int layer = 0; layer < num_layers; layer++) {
+            unsigned int width = adjusted_width, height = adjusted_height;
+            texture_data_ptr = (char *)d->vram_ptr + texture_vram_offset +
+                               layer * layer_size;
+
+            for (int level = 0; level < s.levels; level++) {
+                NV2A_VK_DPRINTF("Layer %d Level %d @ %x", layer, level, (int)((char*)texture_data_ptr - (char*)d->vram_ptr));
+
+                width = MAX(width, 1);
+                height = MAX(height, 1);
+                if (is_compressed) {
+                    // https://docs.microsoft.com/en-us/windows/win32/direct3d10/d3d10-graphics-programming-guide-resources-block-compression#virtual-size-versus-physical-size
+                    unsigned int tex_width = width, tex_height = height;
+                    unsigned int physical_width = (width + 3) & ~3,
+                                 physical_height = (height + 3) & ~3;
+                    // if (physical_width != width) {
+                    //     glPixelStorei(GL_UNPACK_ROW_LENGTH, physical_width);
+                    // }
+
+                    size_t converted_size = width * height * 4;
+                    uint8_t *converted = s3tc_decompress_2d(
+                        kelvin_format_to_s3tc_format(s.color_format),
+                        texture_data_ptr, physical_width, physical_height);
+                    assert(converted);
+
+                    if (s.cubemap && adjusted_width != s.width) {
+                        // FIXME: Consider preserving the border.
+                        // There does not seem to be a way to reference the border
+                        // texels in a cubemap, so they are discarded.
+
+                        // glPixelStorei(GL_UNPACK_SKIP_PIXELS, 4);
+                        // glPixelStorei(GL_UNPACK_SKIP_ROWS, 4);
+                        tex_width = s.width;
+                        tex_height = s.height;
+                        // if (physical_width == width) {
+                        //     glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
+                        // }
+
+                        // FIXME: Crop by 4 pixels on each side
+                    }
+
+                    layout->layers[layer].levels[level] = (TextureLevel){
+                        .width = tex_width,
+                        .height = tex_height,
+                        .depth = 1,
+                        .decoded_size = converted_size,
+                        .decoded_data = converted,
+                    };
+
+                    texture_data_ptr +=
+                        physical_width / 4 * physical_height / 4 * block_size;
+                } else {
+                    unsigned int pitch = width * f.bytes_per_pixel;
+                    unsigned int tex_width = width, tex_height = height;
+
+                    size_t converted_size = height * pitch;
+                    uint8_t *unswizzled = (uint8_t*)g_malloc(height * pitch);
+                    unswizzle_rect(texture_data_ptr, width, height,
+                                   unswizzled, pitch, f.bytes_per_pixel);
+
+                    uint8_t *converted = pgraph_convert_texture_data(
+                        s, unswizzled, palette_data_ptr, width, height, 1,
+                        pitch, 0, &converted_size);
+
+                    if (converted) {
+                        g_free(unswizzled);
+                    } else {
+                        converted = unswizzled;
+                    }
+
+                    if (s.cubemap && adjusted_width != s.width) {
+                        // FIXME: Consider preserving the border.
+                        // There does not seem to be a way to reference the border
+                        // texels in a cubemap, so they are discarded.
+                        // glPixelStorei(GL_UNPACK_ROW_LENGTH, adjusted_width);
+                        tex_width = s.width;
+                        tex_height = s.height;
+                        // pixel_data += 4 * f.bytes_per_pixel + 4 * pitch;
+
+                        // FIXME: Crop by 4 pixels on each side
+                    }
+
+                    layout->layers[layer].levels[level] = (TextureLevel){
+                        .width = tex_width,
+                        .height = tex_height,
+                        .depth = 1,
+                        .decoded_size = converted_size,
+                        .decoded_data = converted,
+                    };
+
+                    texture_data_ptr += width * height * f.bytes_per_pixel;
+                }
+
+                width /= 2;
+                height /= 2;
+            }
+        }
+    } else if (s.dimensionality == 3) {
+        assert(!f.linear);
+        unsigned int width = adjusted_width, height = adjusted_height,
+                     depth = adjusted_depth;
+
+        for (int level = 0; level < s.levels; level++) {
+            if (is_compressed) {
+                assert(width % 4 == 0 && height % 4 == 0 &&
+                       "Compressed 3D texture virtual size");
+
+                width = MAX(width, 4);
+                height = MAX(height, 4);
+                depth = MAX(depth, 1);
+
+                size_t converted_size = width * height * depth * 4;
+                uint8_t *converted = s3tc_decompress_3d(
+                    kelvin_format_to_s3tc_format(s.color_format),
+                    texture_data_ptr, width, height, depth);
+                assert(converted);
+
+                layout->layers[0].levels[level] = (TextureLevel){
+                    .width = width,
+                    .height = height,
+                    .depth = depth,
+                    .decoded_size = converted_size,
+                    .decoded_data = converted,
+                };
+
+                texture_data_ptr += width / 4 * height / 4 * depth * block_size;
+            } else {
+                width = MAX(width, 1);
+                height = MAX(height, 1);
+                depth = MAX(depth, 1);
+
+                unsigned int row_pitch = width * f.bytes_per_pixel;
+                unsigned int slice_pitch = row_pitch * height;
+
+                size_t unswizzled_size = slice_pitch * depth;
+                uint8_t *unswizzled = g_malloc(unswizzled_size);
+                unswizzle_box(texture_data_ptr, width, height, depth,
+                              unswizzled, row_pitch, slice_pitch,
+                              f.bytes_per_pixel);
+
+                size_t converted_size;
+                uint8_t *converted = pgraph_convert_texture_data(
+                    s, unswizzled, palette_data_ptr, width, height, depth,
+                    row_pitch, slice_pitch, &converted_size);
+
+                if (converted) {
+                    g_free(unswizzled);
+                } else {
+                    converted = unswizzled;
+                    converted_size = unswizzled_size;
+                }
+
+                layout->layers[0].levels[level] = (TextureLevel){
+                    .width = width,
+                    .height = height,
+                    .depth = depth,
+                    .decoded_size = converted_size,
+                    .decoded_data = converted,
+                };
+
+                texture_data_ptr += width * height * depth * f.bytes_per_pixel;
+            }
+
+            width /= 2;
+            height /= 2;
+            depth /= 2;
+        }
+    }
+
+    NV2A_VK_DGROUP_END();
+    return layout;
+}
+
+struct pgraph_texture_possibly_dirty_struct {
+    hwaddr addr, end;
+};
+
+static void mark_textures_possibly_dirty_visitor(Lru *lru, LruNode *node, void *opaque)
+{
+    struct pgraph_texture_possibly_dirty_struct *test = opaque;
+
+    TextureBinding *tnode = container_of(node, TextureBinding, node);
+    if (tnode->possibly_dirty) {
+        return;
+    }
+
+    uintptr_t k_tex_addr = tnode->key.texture_vram_offset;
+    uintptr_t k_tex_end = k_tex_addr + tnode->key.texture_length - 1;
+    bool overlapping = !(test->addr > k_tex_end || k_tex_addr > test->end);
+
+    if (tnode->key.palette_length > 0) {
+        uintptr_t k_pal_addr = tnode->key.palette_vram_offset;
+        uintptr_t k_pal_end = k_pal_addr + tnode->key.palette_length - 1;
+        overlapping |= !(test->addr > k_pal_end || k_pal_addr > test->end);
+    }
+
+    tnode->possibly_dirty |= overlapping;
+}
+
+void pgraph_vk_mark_textures_possibly_dirty(NV2AState *d,
+    hwaddr addr, hwaddr size)
+{
+    hwaddr end = TARGET_PAGE_ALIGN(addr + size) - 1;
+    addr &= TARGET_PAGE_MASK;
+    assert(end <= memory_region_size(d->vram));
+
+    struct pgraph_texture_possibly_dirty_struct test = {
+        .addr = addr,
+        .end = end,
+    };
+
+    lru_visit_active(&d->pgraph.vk_renderer_state->texture_cache,
+                     mark_textures_possibly_dirty_visitor,
+                     &test);
+}
+
+static bool check_texture_dirty(NV2AState *d, hwaddr addr, hwaddr size)
+{
+    hwaddr end = TARGET_PAGE_ALIGN(addr + size);
+    addr &= TARGET_PAGE_MASK;
+    assert(end < memory_region_size(d->vram));
+    return memory_region_test_and_clear_dirty(d->vram, addr, end - addr,
+                                              DIRTY_MEMORY_NV2A_TEX);
+}
+
+// Check if any of the pages spanned by the a texture are dirty.
+static bool check_texture_possibly_dirty(NV2AState *d,
+                                         hwaddr texture_vram_offset,
+                                         unsigned int length,
+                                         hwaddr palette_vram_offset,
+                                         unsigned int palette_length)
+{
+    bool possibly_dirty = false;
+    if (check_texture_dirty(d, texture_vram_offset, length)) {
+        possibly_dirty = true;
+        pgraph_vk_mark_textures_possibly_dirty(d, texture_vram_offset, length);
+    }
+    if (palette_length && check_texture_dirty(d, palette_vram_offset,
+                                                     palette_length)) {
+        possibly_dirty = true;
+        pgraph_vk_mark_textures_possibly_dirty(d, palette_vram_offset,
+                                            palette_length);
+    }
+    return possibly_dirty;
+}
+
+// FIXME: Make sure we update sampler when data matches. Should we add filtering
+// options to the textureshape?
+static void upload_texture_image(PGRAPHState *pg, int texture_idx,
+                                 TextureBinding *binding)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    TextureShape *state = &binding->key.state;
+    VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format];
+
+    nv2a_profile_inc_counter(NV2A_PROF_TEX_UPLOAD);
+
+    g_autofree TextureLayout *layout = get_texture_layout(pg, texture_idx);
+    const int num_layers = state->cubemap ? 6 : 1;
+
+    // Calculate decoded texture data size
+    size_t texture_data_size = 0;
+    for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) {
+        TextureLayer *layer = &layout->layers[layer_idx];
+        for (int level_idx = 0; level_idx < state->levels; level_idx++) {
+            size_t size = layer->levels[level_idx].decoded_size;
+            assert(size);
+            texture_data_size += size;
+        }
+    }
+
+    assert(texture_data_size <=
+           r->storage_buffers[BUFFER_STAGING_SRC].buffer_size);
+
+    // Copy texture data to mapped device buffer
+    uint8_t *mapped_memory_ptr;
+
+    VK_CHECK(vmaMapMemory(r->allocator,
+                          r->storage_buffers[BUFFER_STAGING_SRC].allocation,
+                          (void *)&mapped_memory_ptr));
+
+    int num_regions = num_layers * state->levels;
+    g_autofree VkBufferImageCopy *regions =
+        g_malloc0_n(num_regions, sizeof(VkBufferImageCopy));
+
+    VkBufferImageCopy *region = regions;
+    VkDeviceSize buffer_offset = 0;
+
+    for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) {
+        TextureLayer *layer = &layout->layers[layer_idx];
+        NV2A_VK_DPRINTF("Layer %d", layer_idx);
+        for (int level_idx = 0; level_idx < state->levels; level_idx++) {
+            TextureLevel *level = &layer->levels[level_idx];
+            NV2A_VK_DPRINTF(" - Level %d, w=%d h=%d d=%d @ %08" HWADDR_PRIx,
+                            level_idx, level->width, level->height,
+                            level->depth, buffer_offset);
+            memcpy(mapped_memory_ptr + buffer_offset, level->decoded_data,
+                   level->decoded_size);
+            *region = (VkBufferImageCopy){
+                .bufferOffset = buffer_offset,
+                .bufferRowLength = 0, // Tightly packed
+                .bufferImageHeight = 0,
+                .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+                .imageSubresource.mipLevel = level_idx,
+                .imageSubresource.baseArrayLayer = layer_idx,
+                .imageSubresource.layerCount = 1,
+                .imageOffset = (VkOffset3D){ 0, 0, 0 },
+                .imageExtent =
+                    (VkExtent3D){ level->width, level->height, level->depth },
+            };
+            buffer_offset += level->decoded_size;
+            region++;
+        }
+    }
+    assert(buffer_offset <= r->storage_buffers[BUFFER_STAGING_SRC].buffer_size);
+
+    vmaFlushAllocation(r->allocator,
+                       r->storage_buffers[BUFFER_STAGING_SRC].allocation, 0,
+                       VK_WHOLE_SIZE);
+
+    vmaUnmapMemory(r->allocator,
+                   r->storage_buffers[BUFFER_STAGING_SRC].allocation);
+
+    // FIXME: Use nondraw. Need to fill and copy tex buffer at once
+    VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+    pgraph_vk_begin_debug_marker(r, cmd, RGBA_GREEN, __func__);
+
+    VkBufferMemoryBarrier host_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_HOST_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = r->storage_buffers[BUFFER_STAGING_SRC].buffer,
+        .size = VK_WHOLE_SIZE
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_HOST_BIT,
+                         VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                         &host_barrier, 0, NULL);
+
+    pgraph_vk_transition_image_layout(pg, cmd, binding->image, vkf.vk_format,
+                                      binding->current_layout,
+                                      VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+    binding->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+
+    vkCmdCopyBufferToImage(cmd, r->storage_buffers[BUFFER_STAGING_SRC].buffer,
+                           binding->image, binding->current_layout,
+                           num_regions, regions);
+
+    pgraph_vk_transition_image_layout(pg, cmd, binding->image, vkf.vk_format,
+                                      binding->current_layout,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+    binding->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+    nv2a_profile_inc_counter(NV2A_PROF_QUEUE_SUBMIT_4);
+    pgraph_vk_end_debug_marker(r, cmd);
+    pgraph_vk_end_single_time_commands(pg, cmd);
+
+    // Release decoded texture data
+    for (int layer_idx = 0; layer_idx < num_layers; layer_idx++) {
+        TextureLayer *layer = &layout->layers[layer_idx];
+        for (int level_idx = 0; level_idx < state->levels; level_idx++) {
+            g_free(layer->levels[level_idx].decoded_data);
+        }
+    }
+}
+
+static void copy_zeta_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface,
+                                       TextureBinding *texture)
+{
+    assert(!surface->color);
+
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    TextureShape *state = &texture->key.state;
+    VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format];
+
+    bool use_compute_to_convert_depth_stencil =
+        surface->host_fmt.vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
+        surface->host_fmt.vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT;
+
+    bool compute_needs_finish = use_compute_to_convert_depth_stencil &&
+                                pgraph_vk_compute_needs_finish(r);
+    if (compute_needs_finish) {
+        pgraph_vk_finish(pg, VK_FINISH_REASON_NEED_BUFFER_SPACE);
+    }
+
+    nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX);
+
+    trace_nv2a_pgraph_surface_render_to_texture(
+        surface->vram_addr, surface->width, surface->height);
+
+    VkCommandBuffer cmd = pgraph_vk_begin_nondraw_commands(pg);
+    pgraph_vk_begin_debug_marker(r, cmd, RGBA_GREEN, __func__);
+
+    unsigned int scaled_width = surface->width,
+                 scaled_height = surface->height;
+    pgraph_apply_scaling_factor(pg, &scaled_width, &scaled_height);
+
+    size_t copied_image_size =
+        scaled_width * scaled_height * surface->host_fmt.host_bytes_per_pixel;
+    size_t stencil_buffer_offset = 0;
+    size_t stencil_buffer_size = 0;
+
+    int num_regions = 0;
+    VkBufferImageCopy regions[2];
+    regions[num_regions++] = (VkBufferImageCopy){
+        .bufferOffset = 0,
+        .bufferRowLength = 0, // Tightly packed
+        .bufferImageHeight = 0, // Tightly packed
+        .imageSubresource.aspectMask = surface->color ? VK_IMAGE_ASPECT_COLOR_BIT : VK_IMAGE_ASPECT_DEPTH_BIT,
+        .imageSubresource.mipLevel = 0,
+        .imageSubresource.baseArrayLayer = 0,
+        .imageSubresource.layerCount = 1,
+        .imageOffset = (VkOffset3D){0, 0, 0},
+        .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1},
+    };
+
+    if (surface->host_fmt.aspect & VK_IMAGE_ASPECT_STENCIL_BIT) {
+        stencil_buffer_offset =
+            ROUND_UP(scaled_width * scaled_height * 4,
+                     r->device_props.limits.minStorageBufferOffsetAlignment);
+        stencil_buffer_size = scaled_width * scaled_height;
+        copied_image_size += stencil_buffer_size;
+
+        regions[num_regions++] = (VkBufferImageCopy){
+            .bufferOffset = stencil_buffer_offset,
+            .bufferRowLength = 0, // Tightly packed
+            .bufferImageHeight = 0, // Tightly packed
+            .imageSubresource.aspectMask = VK_IMAGE_ASPECT_STENCIL_BIT,
+            .imageSubresource.mipLevel = 0,
+            .imageSubresource.baseArrayLayer = 0,
+            .imageSubresource.layerCount = 1,
+            .imageOffset = (VkOffset3D){0, 0, 0},
+            .imageExtent = (VkExtent3D){scaled_width, scaled_height, 1},
+        };
+    }
+    assert(use_compute_to_convert_depth_stencil && "Unimplemented");
+
+    StorageBuffer *dst_storage_buffer = &r->storage_buffers[BUFFER_COMPUTE_DST];
+    assert(dst_storage_buffer->buffer_size >= copied_image_size);
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, surface->image, surface->host_fmt.vk_format,
+        VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+    vkCmdCopyImageToBuffer(
+        cmd, surface->image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+        dst_storage_buffer->buffer,
+        num_regions, regions);
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, surface->image, surface->host_fmt.vk_format,
+        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+        VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+
+    size_t packed_image_size = scaled_width * scaled_height * 4;
+
+    VkBufferMemoryBarrier pre_pack_src_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_SHADER_READ_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
+        .size = VK_WHOLE_SIZE
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
+                         1, &pre_pack_src_barrier, 0, NULL);
+
+    VkBufferMemoryBarrier pre_pack_dst_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+        .dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer,
+        .size = VK_WHOLE_SIZE
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                         VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0, 0, NULL,
+                         1, &pre_pack_dst_barrier, 0, NULL);
+
+    pgraph_vk_pack_depth_stencil(
+        pg, surface, cmd,
+        r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
+        r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, false);
+
+    VkBufferMemoryBarrier post_pack_src_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_SHADER_READ_BIT,
+        .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = r->storage_buffers[BUFFER_COMPUTE_DST].buffer,
+        .size = VK_WHOLE_SIZE
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                         VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                         &post_pack_src_barrier, 0, NULL);
+
+    VkBufferMemoryBarrier post_pack_dst_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT,
+        .dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer,
+        .size = packed_image_size
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
+                         VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                         &post_pack_dst_barrier, 0, NULL);
+
+    pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format,
+                                      texture->current_layout,
+                                      VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+    texture->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+
+    regions[0] = (VkBufferImageCopy){
+        .bufferOffset = 0,
+        .bufferRowLength = 0,
+        .bufferImageHeight = 0,
+        .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .imageSubresource.mipLevel = 0,
+        .imageSubresource.baseArrayLayer = 0,
+        .imageSubresource.layerCount = 1,
+        .imageOffset = (VkOffset3D){ 0, 0, 0 },
+        .imageExtent = (VkExtent3D){ scaled_width, scaled_height, 1 },
+    };
+    vkCmdCopyBufferToImage(
+        cmd, r->storage_buffers[BUFFER_COMPUTE_SRC].buffer, texture->image,
+        VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1, regions);
+
+    VkBufferMemoryBarrier post_copy_src_barrier = {
+        .sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
+        .srcAccessMask = VK_ACCESS_TRANSFER_READ_BIT,
+        .dstAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT,
+        .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
+        .buffer = r->storage_buffers[BUFFER_COMPUTE_SRC].buffer,
+        .size = VK_WHOLE_SIZE
+    };
+    vkCmdPipelineBarrier(cmd, VK_PIPELINE_STAGE_TRANSFER_BIT,
+                         VK_PIPELINE_STAGE_TRANSFER_BIT, 0, 0, NULL, 1,
+                         &post_copy_src_barrier, 0, NULL);
+
+    pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format,
+                                      texture->current_layout,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+    texture->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+    pgraph_vk_end_debug_marker(r, cmd);
+    pgraph_vk_end_nondraw_commands(pg, cmd);
+
+    texture->draw_time = surface->draw_time;
+}
+
+// FIXME: Should be able to skip the copy and sample the original surface image
+static void copy_surface_to_texture(PGRAPHState *pg, SurfaceBinding *surface,
+                                    TextureBinding *texture)
+{
+    if (!surface->color) {
+        copy_zeta_surface_to_texture(pg, surface, texture);
+        return;
+    }
+
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    TextureShape *state = &texture->key.state;
+    VkColorFormatInfo vkf = kelvin_color_format_vk_map[state->color_format];
+
+    nv2a_profile_inc_counter(NV2A_PROF_SURF_TO_TEX);
+
+    trace_nv2a_pgraph_surface_render_to_texture(
+        surface->vram_addr, surface->width, surface->height);
+
+    VkCommandBuffer cmd = pgraph_vk_begin_nondraw_commands(pg);
+    pgraph_vk_begin_debug_marker(r, cmd, RGBA_GREEN, __func__);
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, surface->image, surface->host_fmt.vk_format,
+        surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+                         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
+        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
+
+    pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format,
+                                      texture->current_layout,
+                                      VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+    texture->current_layout = VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL;
+
+    VkImageCopy region = {
+        .srcSubresource.aspectMask = surface->host_fmt.aspect,
+        .srcSubresource.layerCount = 1,
+        .dstSubresource.aspectMask = surface->host_fmt.aspect,
+        .dstSubresource.layerCount = 1,
+        .extent.width = surface->width,
+        .extent.height = surface->height,
+        .extent.depth = 1,
+    };
+    pgraph_apply_scaling_factor(pg, &region.extent.width,
+                                &region.extent.height);
+    vkCmdCopyImage(cmd, surface->image,
+                   VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, texture->image,
+                   texture->current_layout, 1, &region);
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, surface->image, surface->host_fmt.vk_format,
+        VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
+        surface->color ? VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL :
+                         VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
+
+    pgraph_vk_transition_image_layout(pg, cmd, texture->image, vkf.vk_format,
+                                      texture->current_layout,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+    texture->current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
+
+    pgraph_vk_end_debug_marker(r, cmd);
+    pgraph_vk_end_nondraw_commands(pg, cmd);
+
+    texture->draw_time = surface->draw_time;
+}
+
+static bool check_surface_to_texture_compatiblity(const SurfaceBinding *surface,
+                                                  const TextureShape *shape)
+{
+    // FIXME: Better checks/handling on formats and surface-texture compat
+
+    if ((!surface->swizzle && surface->pitch != shape->pitch) ||
+        surface->width != shape->width ||
+        surface->height != shape->height) {
+        return false;
+    }
+
+    int surface_fmt = surface->shape.color_format;
+    int texture_fmt = shape->color_format;
+
+    if (!surface->color) {
+        if (surface->shape.zeta_format == NV097_SET_SURFACE_FORMAT_ZETA_Z24S8) {
+            return true;
+        }
+        return false;
+    }
+
+    if (shape->cubemap) {
+        // FIXME: Support rendering surface to cubemap face
+        return false;
+    }
+
+    if (shape->levels > 1) {
+        // FIXME: Support rendering surface to mip levels
+        return false;
+    }
+
+    switch (surface_fmt) {
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X1R5G5B5_Z1R5G5B5: switch (texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X1R5G5B5: return true;
+        default: break;
+        }
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_R5G6B5: switch (texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R5G6B5: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_R5G6B5: return true;
+        default: break;
+        }
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_X8R8G8B8_Z8R8G8B8: switch(texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_X8R8G8B8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_X8R8G8B8: return true;
+        default: break;
+        }
+        break;
+    case NV097_SET_SURFACE_FORMAT_COLOR_LE_A8R8G8B8: switch (texture_fmt) {
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8B8G8R8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_R8G8B8A8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_LU_IMAGE_A8R8G8B8: return true;
+        case NV097_SET_TEXTURE_FORMAT_COLOR_SZ_A8R8G8B8: return true;
+        default: break;
+        }
+        break;
+    default:
+        break;
+    }
+
+    trace_nv2a_pgraph_surface_texture_compat_failed(
+        surface_fmt, texture_fmt);
+    return false;
+}
+
+static void create_dummy_texture(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    VkImageCreateInfo image_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .imageType = VK_IMAGE_TYPE_2D,
+        .extent.width = 16,
+        .extent.height = 16,
+        .extent.depth = 1,
+        .mipLevels = 1,
+        .arrayLayers = 1,
+        .format = VK_FORMAT_R8_UNORM,
+        .tiling = VK_IMAGE_TILING_OPTIMAL,
+        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+        .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .flags = 0,
+    };
+
+    VmaAllocationCreateInfo alloc_create_info = {
+        .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
+    };
+
+    VkImage texture_image;
+    VmaAllocation texture_allocation;
+
+    VK_CHECK(vmaCreateImage(r->allocator, &image_create_info,
+                            &alloc_create_info, &texture_image,
+                            &texture_allocation, NULL));
+
+    VkImageViewCreateInfo image_view_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .image = texture_image,
+        .viewType = VK_IMAGE_VIEW_TYPE_2D,
+        .format = VK_FORMAT_R8_UNORM,
+        .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .subresourceRange.baseMipLevel = 0,
+        .subresourceRange.levelCount = image_create_info.mipLevels,
+        .subresourceRange.baseArrayLayer = 0,
+        .subresourceRange.layerCount = image_create_info.arrayLayers,
+        .components = (VkComponentMapping){ VK_COMPONENT_SWIZZLE_R,
+                                            VK_COMPONENT_SWIZZLE_R,
+                                            VK_COMPONENT_SWIZZLE_R,
+                                            VK_COMPONENT_SWIZZLE_R },
+    };
+    VkImageView texture_image_view;
+    VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
+                               &texture_image_view));
+
+    VkSamplerCreateInfo sampler_create_info = {
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+        .magFilter = VK_FILTER_NEAREST,
+        .minFilter = VK_FILTER_NEAREST,
+        .addressModeU = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+        .addressModeV = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+        .addressModeW = VK_SAMPLER_ADDRESS_MODE_REPEAT,
+        .anisotropyEnable = VK_FALSE,
+        .borderColor = VK_BORDER_COLOR_INT_OPAQUE_WHITE,
+        .unnormalizedCoordinates = VK_FALSE,
+        .compareEnable = VK_FALSE,
+        .compareOp = VK_COMPARE_OP_ALWAYS,
+        .mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
+    };
+
+    VkSampler texture_sampler;
+    VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL,
+                             &texture_sampler));
+
+    // Copy texture data to mapped device buffer
+    uint8_t *mapped_memory_ptr;
+    size_t texture_data_size =
+        image_create_info.extent.width * image_create_info.extent.height;
+
+    VK_CHECK(vmaMapMemory(r->allocator,
+                          r->storage_buffers[BUFFER_STAGING_SRC].allocation,
+                          (void *)&mapped_memory_ptr));
+    memset(mapped_memory_ptr, 0xff, texture_data_size);
+
+    vmaFlushAllocation(r->allocator,
+                       r->storage_buffers[BUFFER_STAGING_SRC].allocation, 0,
+                       VK_WHOLE_SIZE);
+
+    vmaUnmapMemory(r->allocator,
+                   r->storage_buffers[BUFFER_STAGING_SRC].allocation);
+
+    VkCommandBuffer cmd = pgraph_vk_begin_single_time_commands(pg);
+    pgraph_vk_begin_debug_marker(r, cmd, RGBA_GREEN, __func__);
+
+    pgraph_vk_transition_image_layout(
+        pg, cmd, texture_image, VK_FORMAT_R8_UNORM, VK_IMAGE_LAYOUT_UNDEFINED,
+        VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
+
+    VkBufferImageCopy region = {
+        .bufferOffset = 0,
+        .bufferRowLength = 0,
+        .bufferImageHeight = 0,
+        .imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .imageSubresource.mipLevel = 0,
+        .imageSubresource.baseArrayLayer = 0,
+        .imageSubresource.layerCount = 1,
+        .imageOffset = (VkOffset3D){ 0, 0, 0 },
+        .imageExtent = (VkExtent3D){ image_create_info.extent.width,
+                                     image_create_info.extent.height, 1 },
+    };
+    vkCmdCopyBufferToImage(cmd, r->storage_buffers[BUFFER_STAGING_SRC].buffer,
+                           texture_image, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                           1, &region);
+
+    pgraph_vk_transition_image_layout(pg, cmd, texture_image,
+                                      VK_FORMAT_R8_UNORM,
+                                      VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
+                                      VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL);
+
+    pgraph_vk_end_debug_marker(r, cmd);
+    pgraph_vk_end_single_time_commands(pg, cmd);
+
+    r->dummy_texture = (TextureBinding){
+        .key.scale = 1.0,
+        .image = texture_image,
+        .current_layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
+        .allocation = texture_allocation,
+        .image_view = texture_image_view,
+        .sampler = texture_sampler,
+    };
+}
+
+static void destroy_dummy_texture(PGRAPHVkState *r)
+{
+    texture_cache_release_node_resources(r, &r->dummy_texture);
+}
+
+static void set_texture_label(PGRAPHState *pg, TextureBinding *texture)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    g_autofree gchar *label = g_strdup_printf(
+        "Texture %" HWADDR_PRIx "h fmt:%02xh %dx%dx%d lvls:%d",
+        texture->key.texture_vram_offset, texture->key.state.color_format,
+        texture->key.state.width, texture->key.state.height,
+        texture->key.state.depth, texture->key.state.levels);
+
+    VkDebugUtilsObjectNameInfoEXT name_info = {
+        .sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT,
+        .objectType = VK_OBJECT_TYPE_IMAGE,
+        .objectHandle = (uint64_t)texture->image,
+        .pObjectName = label,
+    };
+
+    if (r->debug_utils_extension_enabled) {
+        vkSetDebugUtilsObjectNameEXT(r->device, &name_info);
+    }
+    vmaSetAllocationName(r->allocator, texture->allocation, label);
+}
+
+static bool is_linear_filter_supported_for_format(PGRAPHVkState *r,
+                                                  int kelvin_format)
+{
+    return r->texture_format_properties[kelvin_format].optimalTilingFeatures &
+           VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT;
+}
+
+static void create_texture(PGRAPHState *pg, int texture_idx)
+{
+    NV2A_VK_DGROUP_BEGIN("Creating texture %d", texture_idx);
+
+    NV2AState *d = container_of(pg, NV2AState, pgraph);
+    PGRAPHVkState *r = pg->vk_renderer_state;
+    TextureShape state = pgraph_get_texture_shape(pg, texture_idx); // FIXME: Check for pad issues
+    BasicColorFormatInfo f_basic = kelvin_color_format_info_map[state.color_format];
+
+    const hwaddr texture_vram_offset = pgraph_get_texture_phys_addr(pg, texture_idx);
+    size_t texture_palette_data_size;
+    const hwaddr texture_palette_vram_offset =
+        pgraph_get_texture_palette_phys_addr_length(pg, texture_idx,
+                                                    &texture_palette_data_size);
+
+    size_t texture_length = pgraph_get_texture_length(pg, &state);
+
+    uint32_t filter =
+        pgraph_reg_r(pg, NV_PGRAPH_TEXFILTER0 + texture_idx * 4);
+    uint32_t address =
+        pgraph_reg_r(pg, NV_PGRAPH_TEXADDRESS0 + texture_idx * 4);
+    uint32_t border_color_pack32 =
+        pgraph_reg_r(pg, NV_PGRAPH_BORDERCOLOR0 + texture_idx * 4);
+
+    TextureKey key;
+    memset(&key, 0, sizeof(key));
+    key.state = state;
+    key.texture_vram_offset = texture_vram_offset;
+    key.texture_length = texture_length;
+    key.palette_vram_offset = texture_palette_vram_offset;
+    key.palette_length = texture_palette_data_size;
+    key.scale = 1;
+
+    // FIXME: Separate sampler from texture
+    key.filter = filter;
+    key.address = address;
+    key.border_color = border_color_pack32;
+
+    bool is_indexed = (state.color_format ==
+            NV097_SET_TEXTURE_FORMAT_COLOR_SZ_I8_A8R8G8B8);
+
+    bool possibly_dirty = false;
+    bool possibly_dirty_checked = false;
+    bool surface_to_texture = false;
+
+    // Check active surfaces to see if this texture was a render target
+    SurfaceBinding *surface = pgraph_vk_surface_get(d, texture_vram_offset);
+    if (surface && state.levels == 1) {
+        surface_to_texture =
+            check_surface_to_texture_compatiblity(surface, &state);
+
+        if (surface_to_texture && surface->upload_pending) {
+            pgraph_vk_upload_surface_data(d, surface, false);
+        }
+    }
+
+    if (!surface_to_texture) {
+        // FIXME: Restructure to support rendering surfaces to cubemap faces
+
+        // Writeback any surfaces which this texture may index
+        pgraph_vk_download_surfaces_in_range_if_dirty(
+            pg, texture_vram_offset, texture_length);
+    }
+
+    if (surface_to_texture && pg->surface_scale_factor > 1) {
+        key.scale = pg->surface_scale_factor;
+    }
+
+    uint64_t key_hash = fast_hash((void*)&key, sizeof(key));
+    LruNode *node = lru_lookup(&r->texture_cache, key_hash, &key);
+    TextureBinding *snode = container_of(node, TextureBinding, node);
+    bool binding_found = snode->image != VK_NULL_HANDLE;
+
+    if (binding_found) {
+        NV2A_VK_DPRINTF("Cache hit");
+        r->texture_bindings[texture_idx] = snode;
+        possibly_dirty |= snode->possibly_dirty;
+    } else {
+        possibly_dirty = true;
+    }
+
+    if (!surface_to_texture && !possibly_dirty_checked) {
+        possibly_dirty |= check_texture_possibly_dirty(
+            d, texture_vram_offset, texture_length, texture_palette_vram_offset,
+            texture_palette_data_size);
+    }
+
+    // Calculate hash of texture data, if necessary
+    void *texture_data = (char*)d->vram_ptr + texture_vram_offset;
+    void *palette_data = (char*)d->vram_ptr + texture_palette_vram_offset;
+
+    uint64_t content_hash = 0;
+    if (!surface_to_texture && possibly_dirty) {
+        content_hash = fast_hash(texture_data, texture_length);
+        if (is_indexed) {
+            content_hash ^= fast_hash(palette_data, texture_palette_data_size);
+        }
+    }
+
+    if (binding_found) {
+        if (surface_to_texture) {
+            // FIXME: Add draw time tracking
+            if (surface->draw_time != snode->draw_time) {
+                copy_surface_to_texture(pg, surface, snode);
+            }
+        } else {
+            if (possibly_dirty && content_hash != snode->hash) {
+                upload_texture_image(pg, texture_idx, snode);
+                snode->hash = content_hash;
+            }
+        }
+
+        NV2A_VK_DGROUP_END();
+        return;
+    }
+
+    NV2A_VK_DPRINTF("Cache miss");
+
+    memcpy(&snode->key, &key, sizeof(key));
+    snode->current_layout = VK_IMAGE_LAYOUT_UNDEFINED;
+    snode->possibly_dirty = false;
+    snode->hash = content_hash;
+
+    VkColorFormatInfo vkf = kelvin_color_format_vk_map[state.color_format];
+    assert(vkf.vk_format != 0);
+    assert(0 < state.dimensionality);
+    assert(state.dimensionality < ARRAY_SIZE(dimensionality_to_vk_image_type));
+    assert(state.dimensionality <
+           ARRAY_SIZE(dimensionality_to_vk_image_view_type));
+
+    VkImageCreateInfo image_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
+        .imageType = dimensionality_to_vk_image_type[state.dimensionality],
+        .extent.width = state.width, // FIXME: Use adjusted size?
+        .extent.height = state.height,
+        .extent.depth = state.depth,
+        .mipLevels = f_basic.linear ? 1 : state.levels,
+        .arrayLayers = state.cubemap ? 6 : 1,
+        .format = vkf.vk_format,
+        .tiling = VK_IMAGE_TILING_OPTIMAL,
+        .initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
+        .usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_SAMPLED_BIT,
+        .samples = VK_SAMPLE_COUNT_1_BIT,
+        .sharingMode = VK_SHARING_MODE_EXCLUSIVE,
+        .flags = (state.cubemap ? VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT : 0),
+    };
+
+    if (surface_to_texture) {
+        pgraph_apply_scaling_factor(pg, &image_create_info.extent.width,
+                                        &image_create_info.extent.height);
+    }
+
+    VmaAllocationCreateInfo alloc_create_info = {
+        .usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE,
+    };
+
+    VK_CHECK(vmaCreateImage(r->allocator, &image_create_info,
+                            &alloc_create_info, &snode->image,
+                            &snode->allocation, NULL));
+
+    VkImageViewCreateInfo image_view_create_info = {
+        .sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
+        .image = snode->image,
+        .viewType = state.cubemap ?
+            VK_IMAGE_VIEW_TYPE_CUBE :
+            dimensionality_to_vk_image_view_type[state.dimensionality],
+        .format = vkf.vk_format,
+        .subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
+        .subresourceRange.baseMipLevel = 0,
+        .subresourceRange.levelCount = image_create_info.mipLevels,
+        .subresourceRange.baseArrayLayer = 0,
+        .subresourceRange.layerCount = image_create_info.arrayLayers,
+        .components = vkf.component_map,
+    };
+
+    VK_CHECK(vkCreateImageView(r->device, &image_view_create_info, NULL,
+                               &snode->image_view));
+
+
+    void *sampler_next_struct = NULL;
+
+    VkSamplerCustomBorderColorCreateInfoEXT custom_border_color_create_info;
+    VkBorderColor vk_border_color;
+
+    bool is_integer_type = vkf.vk_format == VK_FORMAT_R32_UINT;
+
+    if (r->custom_border_color_extension_enabled) {
+        vk_border_color = is_integer_type ? VK_BORDER_COLOR_INT_CUSTOM_EXT :
+                                            VK_BORDER_COLOR_FLOAT_CUSTOM_EXT;
+        custom_border_color_create_info =
+            (VkSamplerCustomBorderColorCreateInfoEXT){
+                .sType =
+                    VK_STRUCTURE_TYPE_SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT,
+                .format = image_view_create_info.format,
+                .pNext = sampler_next_struct
+            };
+        if (is_integer_type) {
+            float rgba[4];
+            pgraph_argb_pack32_to_rgba_float(border_color_pack32, rgba);
+            for (int i = 0; i < 4; i++) {
+                custom_border_color_create_info.customBorderColor.uint32[i] =
+                    (uint32_t)((double)rgba[i] * (double)0xffffffff);
+            }
+        } else {
+            pgraph_argb_pack32_to_rgba_float(
+                border_color_pack32,
+                custom_border_color_create_info.customBorderColor.float32);
+        }
+        sampler_next_struct = &custom_border_color_create_info;
+    } else {
+        // FIXME: Handle custom color in shader
+        if (is_integer_type) {
+            vk_border_color = VK_BORDER_COLOR_INT_TRANSPARENT_BLACK;
+        } else if (border_color_pack32 == 0x00000000) {
+            vk_border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
+        } else if (border_color_pack32 == 0xff000000) {
+            vk_border_color = VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK;
+        } else {
+            vk_border_color = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
+        }
+    }
+
+    if (filter & NV_PGRAPH_TEXFILTER0_ASIGNED)
+        NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_ASIGNED");
+    if (filter & NV_PGRAPH_TEXFILTER0_RSIGNED)
+        NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_RSIGNED");
+    if (filter & NV_PGRAPH_TEXFILTER0_GSIGNED)
+        NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_GSIGNED");
+    if (filter & NV_PGRAPH_TEXFILTER0_BSIGNED)
+        NV2A_UNIMPLEMENTED("NV_PGRAPH_TEXFILTER0_BSIGNED");
+
+    VkFilter vk_min_filter, vk_mag_filter;
+    unsigned int mag_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MAG);
+    assert(mag_filter < ARRAY_SIZE(pgraph_texture_mag_filter_vk_map));
+
+    unsigned int min_filter = GET_MASK(filter, NV_PGRAPH_TEXFILTER0_MIN);
+    assert(min_filter < ARRAY_SIZE(pgraph_texture_min_filter_vk_map));
+
+    if (is_linear_filter_supported_for_format(r, state.color_format)) {
+        vk_mag_filter = pgraph_texture_min_filter_vk_map[mag_filter];
+        vk_min_filter = pgraph_texture_min_filter_vk_map[min_filter];
+    } else {
+        vk_mag_filter = vk_min_filter = VK_FILTER_NEAREST;
+    }
+
+    bool mipmap_en =
+        !f_basic.linear &&
+        !(min_filter == NV_PGRAPH_TEXFILTER0_MIN_BOX_LOD0 ||
+          min_filter == NV_PGRAPH_TEXFILTER0_MIN_TENT_LOD0 ||
+          min_filter == NV_PGRAPH_TEXFILTER0_MIN_CONVOLUTION_2D_LOD0);
+
+    bool mipmap_nearest =
+        f_basic.linear || image_create_info.mipLevels == 1 ||
+        min_filter == NV_PGRAPH_TEXFILTER0_MIN_BOX_NEARESTLOD ||
+        min_filter == NV_PGRAPH_TEXFILTER0_MIN_TENT_NEARESTLOD;
+
+    VkSamplerCreateInfo sampler_create_info = {
+        .sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
+        .magFilter = vk_mag_filter,
+        .minFilter = vk_min_filter,
+        .addressModeU = lookup_texture_address_mode(
+            GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRU)),
+        .addressModeV = lookup_texture_address_mode(
+            GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRV)),
+        .addressModeW = lookup_texture_address_mode(
+            GET_MASK(address, NV_PGRAPH_TEXADDRESS0_ADDRP)),
+        .anisotropyEnable = VK_FALSE,
+        // .anisotropyEnable = VK_TRUE,
+        // .maxAnisotropy = properties.limits.maxSamplerAnisotropy,
+        .borderColor = vk_border_color,
+        .compareEnable = VK_FALSE,
+        .compareOp = VK_COMPARE_OP_ALWAYS,
+        .mipmapMode = mipmap_nearest ? VK_SAMPLER_MIPMAP_MODE_NEAREST :
+                                       VK_SAMPLER_MIPMAP_MODE_LINEAR,
+        .minLod = mipmap_en ? MIN(state.min_mipmap_level, state.levels - 1) : 0.0,
+        .maxLod = mipmap_en ? MIN(state.max_mipmap_level, state.levels - 1) : 0.0,
+        .mipLodBias = 0.0,
+        .pNext = sampler_next_struct,
+    };
+
+    VK_CHECK(vkCreateSampler(r->device, &sampler_create_info, NULL,
+                             &snode->sampler));
+
+    set_texture_label(pg, snode);
+
+    r->texture_bindings[texture_idx] = snode;
+
+    if (surface_to_texture) {
+        copy_surface_to_texture(pg, surface, snode);
+    } else {
+        upload_texture_image(pg, texture_idx, snode);
+        snode->draw_time = 0;
+    }
+
+    NV2A_VK_DGROUP_END();
+}
+
+static bool check_textures_dirty(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        if (!r->texture_bindings[i] || pg->texture_dirty[i]) {
+            return true;
+        }
+    }
+    return false;
+}
+
+static void update_timestamps(PGRAPHVkState *r)
+{
+    for (int i = 0; i < ARRAY_SIZE(r->texture_bindings); i++) {
+        if (r->texture_bindings[i]) {
+            r->texture_bindings[i]->submit_time = r->submit_count;
+        }
+    }
+}
+
+void pgraph_vk_bind_textures(NV2AState *d)
+{
+    NV2A_VK_DGROUP_BEGIN("%s", __func__);
+
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    // FIXME: Check for modifications on bind fastpath (CPU hook)
+    // FIXME: Mark textures that are sourced from surfaces so we can track them
+
+    r->texture_bindings_changed = false;
+
+    if (!check_textures_dirty(pg)) {
+        NV2A_VK_DPRINTF("Not dirty");
+        NV2A_VK_DGROUP_END();
+        update_timestamps(r);
+        return;
+    }
+
+    for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        if (!pgraph_is_texture_enabled(pg, i)) {
+            r->texture_bindings[i] = &r->dummy_texture;
+            continue;
+        }
+
+        create_texture(pg, i);
+
+        pg->texture_dirty[i] = false; // FIXME: Move to renderer?
+    }
+
+    r->texture_bindings_changed = true;
+    update_timestamps(r);
+    NV2A_VK_DGROUP_END();
+}
+
+static void texture_cache_entry_init(Lru *lru, LruNode *node, void *state)
+{
+    TextureBinding *snode = container_of(node, TextureBinding, node);
+
+    snode->image = VK_NULL_HANDLE;
+    snode->allocation = VK_NULL_HANDLE;
+    snode->image_view = VK_NULL_HANDLE;
+    snode->sampler = VK_NULL_HANDLE;
+}
+
+static void texture_cache_release_node_resources(PGRAPHVkState *r, TextureBinding *snode)
+{
+    vkDestroySampler(r->device, snode->sampler, NULL);
+    snode->sampler = VK_NULL_HANDLE;
+
+    vkDestroyImageView(r->device, snode->image_view, NULL);
+    snode->image_view = VK_NULL_HANDLE;
+
+    vmaDestroyImage(r->allocator, snode->image, snode->allocation);
+    snode->image = VK_NULL_HANDLE;
+    snode->allocation = VK_NULL_HANDLE;
+}
+
+static bool texture_cache_entry_pre_evict(Lru *lru, LruNode *node)
+{
+    PGRAPHVkState *r = container_of(lru, PGRAPHVkState, texture_cache);
+    TextureBinding *snode = container_of(node, TextureBinding, node);
+
+    // FIXME: Simplify. We don't really need to check bindings
+
+
+    // Currently bound
+    for (int i = 0; i < ARRAY_SIZE(r->texture_bindings); i++) {
+        if (r->texture_bindings[i] == snode) {
+            return false;
+        }
+    }
+
+    // Used in command buffer
+    if (r->in_command_buffer && snode->submit_time == r->submit_count) {
+        return false;
+    }
+
+    return true;
+}
+
+static void texture_cache_entry_post_evict(Lru *lru, LruNode *node)
+{
+    PGRAPHVkState *r = container_of(lru, PGRAPHVkState, texture_cache);
+    TextureBinding *snode = container_of(node, TextureBinding, node);
+    texture_cache_release_node_resources(r, snode);
+}
+
+static bool texture_cache_entry_compare(Lru *lru, LruNode *node, void *key)
+{
+    TextureBinding *snode = container_of(node, TextureBinding, node);
+    return memcmp(&snode->key, key, sizeof(TextureKey));
+}
+
+static void texture_cache_init(PGRAPHVkState *r)
+{
+    const size_t texture_cache_size = 1024;
+    lru_init(&r->texture_cache);
+    r->texture_cache_entries = g_malloc_n(texture_cache_size, sizeof(TextureBinding));
+    assert(r->texture_cache_entries != NULL);
+    for (int i = 0; i < texture_cache_size; i++) {
+        lru_add_free(&r->texture_cache, &r->texture_cache_entries[i].node);
+    }
+    r->texture_cache.init_node = texture_cache_entry_init;
+    r->texture_cache.compare_nodes = texture_cache_entry_compare;
+    r->texture_cache.pre_node_evict = texture_cache_entry_pre_evict;
+    r->texture_cache.post_node_evict = texture_cache_entry_post_evict;
+}
+
+static void texture_cache_finalize(PGRAPHVkState *r)
+{
+    lru_flush(&r->texture_cache);
+    g_free(r->texture_cache_entries);
+    r->texture_cache_entries = NULL;
+}
+
+void pgraph_vk_trim_texture_cache(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    // FIXME: Allow specifying some amount to trim by
+
+    int num_to_evict = r->texture_cache.num_used / 4;
+    int num_evicted = 0;
+
+    while (num_to_evict-- && lru_try_evict_one(&r->texture_cache)) {
+        num_evicted += 1;
+    }
+
+    NV2A_VK_DPRINTF("Evicted %d textures, %d remain", num_evicted, r->texture_cache.num_used);
+}
+
+void pgraph_vk_init_textures(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    texture_cache_init(r);
+    create_dummy_texture(pg);
+
+    r->texture_format_properties = g_malloc0_n(
+        ARRAY_SIZE(kelvin_color_format_vk_map), sizeof(VkFormatProperties));
+    for (int i = 0; i < ARRAY_SIZE(kelvin_color_format_vk_map); i++) {
+        vkGetPhysicalDeviceFormatProperties(
+            r->physical_device, kelvin_color_format_vk_map[i].vk_format,
+            &r->texture_format_properties[i]);
+    }
+}
+
+void pgraph_vk_finalize_textures(PGRAPHState *pg)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(!r->in_command_buffer);
+
+    for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
+        r->texture_bindings[i] = NULL;
+    }
+
+    destroy_dummy_texture(r);
+    texture_cache_finalize(r);
+
+    assert(r->texture_cache.num_used == 0);
+
+    g_free(r->texture_format_properties);
+    r->texture_format_properties = NULL;
+}
diff --git a/hw/xbox/nv2a/pgraph/vk/vertex.c b/hw/xbox/nv2a/pgraph/vk/vertex.c
new file mode 100644
index 00000000000..af13bd67b49
--- /dev/null
+++ b/hw/xbox/nv2a/pgraph/vk/vertex.c
@@ -0,0 +1,314 @@
+/*
+ * Geforce NV2A PGRAPH Vulkan Renderer
+ *
+ * Copyright (c) 2024 Matt Borgerson
+ *
+ * Based on GL implementation:
+ *
+ * Copyright (c) 2012 espes
+ * Copyright (c) 2015 Jannik Vogel
+ * Copyright (c) 2018-2024 Matt Borgerson
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "renderer.h"
+
+VkDeviceSize pgraph_vk_update_index_buffer(PGRAPHState *pg, void *data,
+                                           VkDeviceSize size)
+{
+    nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_2);
+    return pgraph_vk_append_to_buffer(pg, BUFFER_INDEX_STAGING, &data, &size, 1,
+                                      1);
+}
+
+VkDeviceSize pgraph_vk_update_vertex_inline_buffer(PGRAPHState *pg, void **data,
+                                                   VkDeviceSize *sizes,
+                                                   size_t count)
+{
+    nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_3);
+    return pgraph_vk_append_to_buffer(pg, BUFFER_VERTEX_INLINE_STAGING, data,
+                                      sizes, count, 1);
+}
+
+void pgraph_vk_update_vertex_ram_buffer(PGRAPHState *pg, hwaddr offset,
+                                        void *data, VkDeviceSize size)
+{
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    pgraph_vk_download_surfaces_in_range_if_dirty(pg, offset, size);
+
+    size_t offset_bit = offset / 4096;
+    size_t nbits = size / 4096;
+    if (find_next_bit(r->uploaded_bitmap, nbits, offset_bit) < nbits) {
+        // Vertex data changed while building the draw list. Finish drawing
+        // before updating RAM buffer.
+        pgraph_vk_finish(pg, VK_FINISH_REASON_VERTEX_BUFFER_DIRTY);
+    }
+
+    nv2a_profile_inc_counter(NV2A_PROF_GEOM_BUFFER_UPDATE_1);
+    memcpy(r->storage_buffers[BUFFER_VERTEX_RAM].mapped + offset, data, size);
+
+    bitmap_set(r->uploaded_bitmap, offset_bit, nbits);
+}
+
+static void update_memory_buffer(NV2AState *d, hwaddr addr, hwaddr size)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    assert(r->num_vertex_ram_buffer_syncs <
+           ARRAY_SIZE(r->vertex_ram_buffer_syncs));
+    r->vertex_ram_buffer_syncs[r->num_vertex_ram_buffer_syncs++] =
+        (MemorySyncRequirement){ .addr = addr, .size = size };
+}
+
+static const VkFormat float_to_count[] = {
+    VK_FORMAT_R32_SFLOAT,
+    VK_FORMAT_R32G32_SFLOAT,
+    VK_FORMAT_R32G32B32_SFLOAT,
+    VK_FORMAT_R32G32B32A32_SFLOAT,
+};
+
+static const VkFormat ub_to_count[] = {
+    VK_FORMAT_R8_UNORM,
+    VK_FORMAT_R8G8_UNORM,
+    VK_FORMAT_R8G8B8_UNORM,
+    VK_FORMAT_R8G8B8A8_UNORM,
+};
+
+static const VkFormat s1_to_count[] = {
+    VK_FORMAT_R16_SNORM,
+    VK_FORMAT_R16G16_SNORM,
+    VK_FORMAT_R16G16B16_SNORM,
+    VK_FORMAT_R16G16B16A16_SNORM,
+};
+
+static const VkFormat s32k_to_count[] = {
+    VK_FORMAT_R16_SSCALED,
+    VK_FORMAT_R16G16_SSCALED,
+    VK_FORMAT_R16G16B16_SSCALED,
+    VK_FORMAT_R16G16B16A16_SSCALED,
+};
+
+static char const * const vertex_data_array_format_to_str[] = {
+    [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D] = "UB_D3D",
+    [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL] = "UB_OGL",
+    [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1] = "S1",
+    [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F] = "F",
+    [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K] = "S32K",
+    [NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP] = "CMP",
+};
+
+void pgraph_vk_bind_vertex_attributes(NV2AState *d, unsigned int min_element,
+                                      unsigned int max_element,
+                                      bool inline_data,
+                                      unsigned int inline_stride,
+                                      unsigned int provoking_element)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    unsigned int num_elements = max_element - min_element + 1;
+
+    if (inline_data) {
+        NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d inline stride: %d)",
+                             __func__, num_elements, inline_stride);
+    } else {
+        NV2A_VK_DGROUP_BEGIN("%s (num_elements: %d)", __func__, num_elements);
+    }
+
+    pg->compressed_attrs = 0;
+    pg->uniform_attrs = 0;
+    pg->swizzle_attrs = 0;
+
+    r->num_active_vertex_attribute_descriptions = 0;
+    r->num_active_vertex_binding_descriptions = 0;
+
+    for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        VertexAttribute *attr = &pg->vertex_attributes[i];
+        NV2A_VK_DGROUP_BEGIN("[attr %02d] format=%s, count=%d, stride=%d", i,
+                             vertex_data_array_format_to_str[attr->format],
+                             attr->count, attr->stride);
+        r->vertex_attribute_to_description_location[i] = -1;
+        if (!attr->count) {
+            pg->uniform_attrs |= 1 << i;
+            NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}",
+                            attr->inline_value[0], attr->inline_value[1],
+                            attr->inline_value[2], attr->inline_value[3]);
+            NV2A_VK_DGROUP_END();
+            continue;
+        }
+
+        VkFormat vk_format;
+        bool needs_conversion = false;
+        bool d3d_swizzle = false;
+
+        switch (attr->format) {
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_D3D:
+            assert(attr->count == 4);
+            d3d_swizzle = true;
+            /* fallthru */
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_UB_OGL:
+            assert(attr->count <= ARRAY_SIZE(ub_to_count));
+            vk_format = ub_to_count[attr->count - 1];
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S1:
+            assert(attr->count <= ARRAY_SIZE(s1_to_count));
+            vk_format = s1_to_count[attr->count - 1];
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_F:
+            assert(attr->count <= ARRAY_SIZE(float_to_count));
+            vk_format = float_to_count[attr->count - 1];
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_S32K:
+            assert(attr->count <= ARRAY_SIZE(s32k_to_count));
+            vk_format = s32k_to_count[attr->count - 1];
+            break;
+        case NV097_SET_VERTEX_DATA_ARRAY_FORMAT_TYPE_CMP:
+            vk_format =
+                VK_FORMAT_R32_SINT; // VK_FORMAT_B10G11R11_UFLOAT_PACK32 ??
+            /* 3 signed, normalized components packed in 32-bits. (11,11,10) */
+            assert(attr->count == 1);
+            needs_conversion = true;
+            break;
+        default:
+            fprintf(stderr, "Unknown vertex type: 0x%x\n", attr->format);
+            assert(false);
+            break;
+        }
+
+        nv2a_profile_inc_counter(NV2A_PROF_ATTR_BIND);
+        hwaddr attrib_data_addr;
+        size_t stride;
+
+        if (needs_conversion) {
+            pg->compressed_attrs |= (1 << i);
+        }
+        if (d3d_swizzle) {
+            pg->swizzle_attrs |= (1 << i);
+        }
+
+        hwaddr start = 0;
+        if (inline_data) {
+            attrib_data_addr = attr->inline_array_offset;
+            stride = inline_stride;
+        } else {
+            hwaddr dma_len;
+            uint8_t *attr_data = (uint8_t *)nv_dma_map(
+                d, attr->dma_select ? pg->dma_vertex_b : pg->dma_vertex_a,
+                &dma_len);
+            assert(attr->offset < dma_len);
+            attrib_data_addr = attr_data + attr->offset - d->vram_ptr;
+            stride = attr->stride;
+            start = attrib_data_addr + min_element * stride;
+            update_memory_buffer(d, start, num_elements * stride);
+        }
+
+        uint32_t provoking_element_index = provoking_element - min_element;
+        size_t element_size = attr->size * attr->count;
+        assert(element_size <= sizeof(attr->inline_value));
+        const uint8_t *last_entry;
+
+        if (inline_data) {
+            last_entry =
+                (uint8_t *)pg->inline_array + attr->inline_array_offset;
+        } else {
+            last_entry = d->vram_ptr + start;
+        }
+        if (!stride) {
+            // Stride of 0 indicates that only the first element should be
+            // used.
+            pg->uniform_attrs |= 1 << i;
+            pgraph_update_inline_value(attr, last_entry);
+            NV2A_VK_DPRINTF("inline_value = {%f, %f, %f, %f}",
+                            attr->inline_value[0], attr->inline_value[1],
+                            attr->inline_value[2], attr->inline_value[3]);
+            NV2A_VK_DGROUP_END();
+            continue;
+        }
+
+        NV2A_VK_DPRINTF("offset = %08" HWADDR_PRIx, attrib_data_addr);
+        last_entry += stride * provoking_element_index;
+        pgraph_update_inline_value(attr, last_entry);
+
+        r->vertex_attribute_to_description_location[i] =
+            r->num_active_vertex_binding_descriptions;
+
+        r->vertex_binding_descriptions
+            [r->num_active_vertex_binding_descriptions++] =
+            (VkVertexInputBindingDescription){
+                .binding = r->vertex_attribute_to_description_location[i],
+                .stride = stride,
+                .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+            };
+
+        r->vertex_attribute_descriptions
+            [r->num_active_vertex_attribute_descriptions++] =
+            (VkVertexInputAttributeDescription){
+                .binding = r->vertex_attribute_to_description_location[i],
+                .location = i,
+                .format = vk_format,
+            };
+
+        r->vertex_attribute_offsets[i] = attrib_data_addr;
+
+        NV2A_VK_DGROUP_END();
+    }
+
+    NV2A_VK_DGROUP_END();
+}
+
+void pgraph_vk_bind_vertex_attributes_inline(NV2AState *d)
+{
+    PGRAPHState *pg = &d->pgraph;
+    PGRAPHVkState *r = pg->vk_renderer_state;
+
+    pg->compressed_attrs = 0;
+    pg->uniform_attrs = 0;
+    pg->swizzle_attrs = 0;
+
+    r->num_active_vertex_attribute_descriptions = 0;
+    r->num_active_vertex_binding_descriptions = 0;
+
+    for (int i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
+        VertexAttribute *attr = &pg->vertex_attributes[i];
+        if (attr->inline_buffer_populated) {
+            r->vertex_attribute_to_description_location[i] =
+                r->num_active_vertex_binding_descriptions;
+            r->vertex_binding_descriptions
+                [r->num_active_vertex_binding_descriptions++] =
+                (VkVertexInputBindingDescription){
+                    .binding =
+                        r->vertex_attribute_to_description_location[i],
+                    .stride = 4 * sizeof(float),
+                    .inputRate = VK_VERTEX_INPUT_RATE_VERTEX,
+                };
+            r->vertex_attribute_descriptions
+                [r->num_active_vertex_attribute_descriptions++] =
+                (VkVertexInputAttributeDescription){
+                    .binding =
+                        r->vertex_attribute_to_description_location[i],
+                    .location = i,
+                    .format = VK_FORMAT_R32G32B32A32_SFLOAT,
+                };
+            memcpy(attr->inline_value,
+                   attr->inline_buffer + (pg->inline_buffer_length - 1) * 4,
+                   sizeof(attr->inline_value));
+        } else {
+            r->vertex_attribute_to_description_location[i] = -1;
+            pg->uniform_attrs |= 1 << i;
+        }
+    }
+}
diff --git a/hw/xbox/nv2a/vsh.h b/hw/xbox/nv2a/pgraph/vsh.h
similarity index 92%
rename from hw/xbox/nv2a/vsh.h
rename to hw/xbox/nv2a/pgraph/vsh.h
index 18ef4bb5f20..405b6c9aa68 100644
--- a/hw/xbox/nv2a/vsh.h
+++ b/hw/xbox/nv2a/pgraph/vsh.h
@@ -21,7 +21,7 @@
 #define HW_NV2A_VSH_H
 
 #include <stdbool.h>
-#include "shaders_common.h"
+#include "qemu/mstring.h"
 
 enum VshLight {
     LIGHT_OFF,
@@ -130,11 +130,4 @@ typedef enum {
 
 uint8_t vsh_get_field(const uint32_t *shader_token, VshFieldName field_name);
 
-void vsh_translate(uint16_t version,
-                   const uint32_t *tokens,
-                   unsigned int length,
-                   bool z_perspective,
-                   MString *header, MString *body);
-
-
 #endif
diff --git a/hw/xbox/nv2a/shaders.c b/hw/xbox/nv2a/shaders.c
deleted file mode 100644
index cafe326e93e..00000000000
--- a/hw/xbox/nv2a/shaders.c
+++ /dev/null
@@ -1,1599 +0,0 @@
-/*
- * QEMU Geforce NV2A shader generator
- *
- * Copyright (c) 2015 espes
- * Copyright (c) 2015 Jannik Vogel
- * Copyright (c) 2020-2021 Matt Borgerson
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#include "qemu/osdep.h"
-#include <locale.h>
-
-#include "shaders_common.h"
-#include "shaders.h"
-#include "nv2a_int.h"
-#include "ui/xemu-settings.h"
-#include "xemu-version.h"
-
-void mstring_append_fmt(MString *qstring, const char *fmt, ...)
-{
-    va_list ap;
-    va_start(ap, fmt);
-    mstring_append_va(qstring, fmt, ap);
-    va_end(ap);
-}
-
-MString *mstring_from_fmt(const char *fmt, ...)
-{
-    MString *ret = mstring_new();
-    va_list ap;
-    va_start(ap, fmt);
-    mstring_append_va(ret, fmt, ap);
-    va_end(ap);
-
-    return ret;
-}
-
-void mstring_append_va(MString *qstring, const char *fmt, va_list va)
-{
-    char scratch[256];
-
-    va_list ap;
-    va_copy(ap, va);
-    const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap);
-    va_end(ap);
-
-    if (len == 0) {
-        return;
-    } else if (len < sizeof(scratch)) {
-        mstring_append(qstring, scratch);
-        return;
-    }
-
-    /* overflowed out scratch buffer, alloc and try again */
-    char *buf = g_malloc(len + 1);
-    va_copy(ap, va);
-    vsnprintf(buf, len + 1, fmt, ap);
-    va_end(ap);
-
-    mstring_append(qstring, buf);
-    g_free(buf);
-}
-
-GLenum get_gl_primitive_mode(enum ShaderPolygonMode polygon_mode, enum ShaderPrimitiveMode primitive_mode)
-{
-    if (polygon_mode == POLY_MODE_POINT) {
-        return GL_POINTS;
-    }
-
-    switch (primitive_mode) {
-    case PRIM_TYPE_POINTS: return GL_POINTS;
-    case PRIM_TYPE_LINES: return GL_LINES;
-    case PRIM_TYPE_LINE_LOOP: return GL_LINE_LOOP;
-    case PRIM_TYPE_LINE_STRIP: return GL_LINE_STRIP;
-    case PRIM_TYPE_TRIANGLES: return GL_TRIANGLES;
-    case PRIM_TYPE_TRIANGLE_STRIP: return GL_TRIANGLE_STRIP;
-    case PRIM_TYPE_TRIANGLE_FAN: return GL_TRIANGLE_FAN;
-    case PRIM_TYPE_QUADS: return GL_LINES_ADJACENCY;
-    case PRIM_TYPE_QUAD_STRIP: return GL_LINE_STRIP_ADJACENCY;
-    case PRIM_TYPE_POLYGON:
-        if (polygon_mode == POLY_MODE_LINE) {
-            return GL_LINE_LOOP;
-        } else if (polygon_mode == POLY_MODE_FILL) {
-            return GL_TRIANGLE_FAN;
-        }
-
-        assert(!"PRIM_TYPE_POLYGON with invalid polygon_mode");
-        return 0;
-    default:
-        assert(!"Invalid primitive_mode");
-        return 0;
-    }
-}
-
-static MString* generate_geometry_shader(
-                                      enum ShaderPolygonMode polygon_front_mode,
-                                      enum ShaderPolygonMode polygon_back_mode,
-                                      enum ShaderPrimitiveMode primitive_mode,
-                                      GLenum *gl_primitive_mode,
-                                      bool smooth_shading)
-{
-    /* FIXME: Missing support for 2-sided-poly mode */
-    assert(polygon_front_mode == polygon_back_mode);
-    enum ShaderPolygonMode polygon_mode = polygon_front_mode;
-
-    *gl_primitive_mode = get_gl_primitive_mode(polygon_mode, primitive_mode);
-
-    /* POINT mode shouldn't require any special work */
-    if (polygon_mode == POLY_MODE_POINT) {
-        return NULL;
-    }
-
-    /* Handle LINE and FILL mode */
-    const char *layout_in = NULL;
-    const char *layout_out = NULL;
-    const char *body = NULL;
-    switch (primitive_mode) {
-    case PRIM_TYPE_POINTS: return NULL;
-    case PRIM_TYPE_LINES: return NULL;
-    case PRIM_TYPE_LINE_LOOP: return NULL;
-    case PRIM_TYPE_LINE_STRIP: return NULL;
-    case PRIM_TYPE_TRIANGLES:
-        if (polygon_mode == POLY_MODE_FILL) { return NULL; }
-        assert(polygon_mode == POLY_MODE_LINE);
-        layout_in = "layout(triangles) in;\n";
-        layout_out = "layout(line_strip, max_vertices = 4) out;\n";
-        body = "  emit_vertex(0, 0);\n"
-               "  emit_vertex(1, 0);\n"
-               "  emit_vertex(2, 0);\n"
-               "  emit_vertex(0, 0);\n"
-               "  EndPrimitive();\n";
-        break;
-    case PRIM_TYPE_TRIANGLE_STRIP:
-        if (polygon_mode == POLY_MODE_FILL) { return NULL; }
-        assert(polygon_mode == POLY_MODE_LINE);
-        layout_in = "layout(triangles) in;\n";
-        layout_out = "layout(line_strip, max_vertices = 4) out;\n";
-        /* Imagine a quad made of a tristrip, the comments tell you which
-         * vertex we are using */
-        body = "  if ((gl_PrimitiveIDIn & 1) == 0) {\n"
-               "    if (gl_PrimitiveIDIn == 0) {\n"
-               "      emit_vertex(0, 0);\n" /* bottom right */
-               "    }\n"
-               "    emit_vertex(1, 0);\n" /* top right */
-               "    emit_vertex(2, 0);\n" /* bottom left */
-               "    emit_vertex(0, 0);\n" /* bottom right */
-               "  } else {\n"
-               "    emit_vertex(2, 0);\n" /* bottom left */
-               "    emit_vertex(1, 0);\n" /* top left */
-               "    emit_vertex(0, 0);\n" /* top right */
-               "  }\n"
-               "  EndPrimitive();\n";
-        break;
-    case PRIM_TYPE_TRIANGLE_FAN:
-        if (polygon_mode == POLY_MODE_FILL) { return NULL; }
-        assert(polygon_mode == POLY_MODE_LINE);
-        layout_in = "layout(triangles) in;\n";
-        layout_out = "layout(line_strip, max_vertices = 4) out;\n";
-        body = "  if (gl_PrimitiveIDIn == 0) {\n"
-               "    emit_vertex(0, 0);\n"
-               "  }\n"
-               "  emit_vertex(1, 0);\n"
-               "  emit_vertex(2, 0);\n"
-               "  emit_vertex(0, 0);\n"
-               "  EndPrimitive();\n";
-        break;
-    case PRIM_TYPE_QUADS:
-        layout_in = "layout(lines_adjacency) in;\n";
-        if (polygon_mode == POLY_MODE_LINE) {
-            layout_out = "layout(line_strip, max_vertices = 5) out;\n";
-            body = "  emit_vertex(0, 3);\n"
-                   "  emit_vertex(1, 3);\n"
-                   "  emit_vertex(2, 3);\n"
-                   "  emit_vertex(3, 3);\n"
-                   "  emit_vertex(0, 3);\n"
-                   "  EndPrimitive();\n";
-        } else if (polygon_mode == POLY_MODE_FILL) {
-            layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
-            body = "  emit_vertex(3, 3);\n"
-                   "  emit_vertex(0, 3);\n"
-                   "  emit_vertex(2, 3);\n"
-                   "  emit_vertex(1, 3);\n"
-                   "  EndPrimitive();\n";
-        } else {
-            assert(false);
-            return NULL;
-        }
-        break;
-    case PRIM_TYPE_QUAD_STRIP:
-        layout_in = "layout(lines_adjacency) in;\n";
-        if (polygon_mode == POLY_MODE_LINE) {
-            layout_out = "layout(line_strip, max_vertices = 5) out;\n";
-            body = "  if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
-                   "  if (gl_PrimitiveIDIn == 0) {\n"
-                   "    emit_vertex(0, 3);\n"
-                   "  }\n"
-                   "  emit_vertex(1, 3);\n"
-                   "  emit_vertex(3, 3);\n"
-                   "  emit_vertex(2, 3);\n"
-                   "  emit_vertex(0, 3);\n"
-                   "  EndPrimitive();\n";
-        } else if (polygon_mode == POLY_MODE_FILL) {
-            layout_out = "layout(triangle_strip, max_vertices = 4) out;\n";
-            body = "  if ((gl_PrimitiveIDIn & 1) != 0) { return; }\n"
-                   "  emit_vertex(0, 3);\n"
-                   "  emit_vertex(1, 3);\n"
-                   "  emit_vertex(2, 3);\n"
-                   "  emit_vertex(3, 3);\n"
-                   "  EndPrimitive();\n";
-        } else {
-            assert(false);
-            return NULL;
-        }
-        break;
-    case PRIM_TYPE_POLYGON:
-        if (polygon_mode == POLY_MODE_LINE) {
-            return NULL;
-        }
-        if (polygon_mode == POLY_MODE_FILL) {
-            if (smooth_shading) {
-                return NULL;
-            }
-            layout_in = "layout(triangles) in;\n";
-            layout_out = "layout(triangle_strip, max_vertices = 3) out;\n";
-            body = "  emit_vertex(0, 2);\n"
-                   "  emit_vertex(1, 2);\n"
-                   "  emit_vertex(2, 2);\n"
-                   "  EndPrimitive();\n";
-        } else {
-            assert(false);
-            return NULL;
-        }
-        break;
-
-    default:
-        assert(false);
-        return NULL;
-    }
-
-    /* generate a geometry shader to support deprecated primitive types */
-    assert(layout_in);
-    assert(layout_out);
-    assert(body);
-    MString* s = mstring_from_str("#version 330\n"
-                                  "\n");
-    mstring_append(s, layout_in);
-    mstring_append(s, layout_out);
-    mstring_append(s, "\n");
-    if (smooth_shading) {
-        mstring_append(s,
-                       STRUCT_V_VERTEX_DATA_IN_ARRAY_SMOOTH
-                       "\n"
-                       STRUCT_VERTEX_DATA_OUT_SMOOTH
-                       "\n"
-                       "void emit_vertex(int index, int _unused) {\n"
-                       "  gl_Position = gl_in[index].gl_Position;\n"
-                       "  gl_PointSize = gl_in[index].gl_PointSize;\n"
-                       "  gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
-                       "  gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
-                       "  vtx_inv_w = v_vtx_inv_w[index];\n"
-                       "  vtx_inv_w_flat = v_vtx_inv_w[index];\n"
-                       "  vtxD0 = v_vtxD0[index];\n"
-                       "  vtxD1 = v_vtxD1[index];\n"
-                       "  vtxB0 = v_vtxB0[index];\n"
-                       "  vtxB1 = v_vtxB1[index];\n"
-                       "  vtxFog = v_vtxFog[index];\n"
-                       "  vtxT0 = v_vtxT0[index];\n"
-                       "  vtxT1 = v_vtxT1[index];\n"
-                       "  vtxT2 = v_vtxT2[index];\n"
-                       "  vtxT3 = v_vtxT3[index];\n"
-                       "  EmitVertex();\n"
-                       "}\n");
-    } else {
-        mstring_append(s,
-                       STRUCT_V_VERTEX_DATA_IN_ARRAY_FLAT
-                       "\n"
-                       STRUCT_VERTEX_DATA_OUT_FLAT
-                       "\n"
-                       "void emit_vertex(int index, int provoking_index) {\n"
-                       "  gl_Position = gl_in[index].gl_Position;\n"
-                       "  gl_PointSize = gl_in[index].gl_PointSize;\n"
-                       "  gl_ClipDistance[0] = gl_in[index].gl_ClipDistance[0];\n"
-                       "  gl_ClipDistance[1] = gl_in[index].gl_ClipDistance[1];\n"
-                       "  vtx_inv_w = v_vtx_inv_w[index];\n"
-                       "  vtx_inv_w_flat = v_vtx_inv_w[provoking_index];\n"
-                       "  vtxD0 = v_vtxD0[provoking_index];\n"
-                       "  vtxD1 = v_vtxD1[provoking_index];\n"
-                       "  vtxB0 = v_vtxB0[provoking_index];\n"
-                       "  vtxB1 = v_vtxB1[provoking_index];\n"
-                       "  vtxFog = v_vtxFog[index];\n"
-                       "  vtxT0 = v_vtxT0[index];\n"
-                       "  vtxT1 = v_vtxT1[index];\n"
-                       "  vtxT2 = v_vtxT2[index];\n"
-                       "  vtxT3 = v_vtxT3[index];\n"
-                       "  EmitVertex();\n"
-                       "}\n");
-    }
-
-    mstring_append(s, "\n"
-                      "void main() {\n");
-    mstring_append(s, body);
-    mstring_append(s, "}\n");
-
-    return s;
-}
-
-static void append_skinning_code(MString* str, bool mix,
-                                 unsigned int count, const char* type,
-                                 const char* output, const char* input,
-                                 const char* matrix, const char* swizzle)
-{
-    if (count == 0) {
-        mstring_append_fmt(str, "%s %s = (%s * %s0).%s;\n",
-                           type, output, input, matrix, swizzle);
-    } else {
-        mstring_append_fmt(str, "%s %s = %s(0.0);\n", type, output, type);
-        if (mix) {
-            /* Generated final weight (like GL_WEIGHT_SUM_UNITY_ARB) */
-            mstring_append(str, "{\n"
-                                "  float weight_i;\n"
-                                "  float weight_n = 1.0;\n");
-            int i;
-            for (i = 0; i < count; i++) {
-                if (i < (count - 1)) {
-                    char c = "xyzw"[i];
-                    mstring_append_fmt(str, "  weight_i = weight.%c;\n"
-                                            "  weight_n -= weight_i;\n",
-                                       c);
-                } else {
-                    mstring_append(str, "  weight_i = weight_n;\n");
-                }
-                mstring_append_fmt(str, "  %s += (%s * %s%d).%s * weight_i;\n",
-                                   output, input, matrix, i, swizzle);
-            }
-            mstring_append(str, "}\n");
-        } else {
-            /* Individual weights */
-            int i;
-            for (i = 0; i < count; i++) {
-                char c = "xyzw"[i];
-                mstring_append_fmt(str, "%s += (%s * %s%d).%s * weight.%c;\n",
-                                   output, input, matrix, i, swizzle, c);
-            }
-        }
-    }
-}
-
-#define GLSL_C(idx) "c[" stringify(idx) "]"
-#define GLSL_LTCTXA(idx) "ltctxa[" stringify(idx) "]"
-
-#define GLSL_C_MAT4(idx) \
-    "mat4(" GLSL_C(idx) ", " GLSL_C(idx+1) ", " \
-            GLSL_C(idx+2) ", " GLSL_C(idx+3) ")"
-
-#define GLSL_DEFINE(a, b) "#define " stringify(a) " " b "\n"
-
-static void generate_fixed_function(const ShaderState *state,
-                                    MString *header, MString *body)
-{
-    int i, j;
-
-    /* generate vertex shader mimicking fixed function */
-    mstring_append(header,
-"#define position      v0\n"
-"#define weight        v1\n"
-"#define normal        v2.xyz\n"
-"#define diffuse       v3\n"
-"#define specular      v4\n"
-"#define fogCoord      v5.x\n"
-"#define pointSize     v6\n"
-"#define backDiffuse   v7\n"
-"#define backSpecular  v8\n"
-"#define texture0      v9\n"
-"#define texture1      v10\n"
-"#define texture2      v11\n"
-"#define texture3      v12\n"
-"#define reserved1     v13\n"
-"#define reserved2     v14\n"
-"#define reserved3     v15\n"
-"\n"
-"uniform vec4 ltctxa[" stringify(NV2A_LTCTXA_COUNT) "];\n"
-"uniform vec4 ltctxb[" stringify(NV2A_LTCTXB_COUNT) "];\n"
-"uniform vec4 ltc1[" stringify(NV2A_LTC1_COUNT) "];\n"
-"\n"
-GLSL_DEFINE(projectionMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_PMAT0))
-GLSL_DEFINE(compositeMat, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_CMAT0))
-"\n"
-GLSL_DEFINE(texPlaneS0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 0))
-GLSL_DEFINE(texPlaneT0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 1))
-GLSL_DEFINE(texPlaneR0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 2))
-GLSL_DEFINE(texPlaneQ0, GLSL_C(NV_IGRAPH_XF_XFCTX_TG0MAT + 3))
-"\n"
-GLSL_DEFINE(texPlaneS1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 0))
-GLSL_DEFINE(texPlaneT1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 1))
-GLSL_DEFINE(texPlaneR1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 2))
-GLSL_DEFINE(texPlaneQ1, GLSL_C(NV_IGRAPH_XF_XFCTX_TG1MAT + 3))
-"\n"
-GLSL_DEFINE(texPlaneS2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 0))
-GLSL_DEFINE(texPlaneT2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 1))
-GLSL_DEFINE(texPlaneR2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 2))
-GLSL_DEFINE(texPlaneQ2, GLSL_C(NV_IGRAPH_XF_XFCTX_TG2MAT + 3))
-"\n"
-GLSL_DEFINE(texPlaneS3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 0))
-GLSL_DEFINE(texPlaneT3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 1))
-GLSL_DEFINE(texPlaneR3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 2))
-GLSL_DEFINE(texPlaneQ3, GLSL_C(NV_IGRAPH_XF_XFCTX_TG3MAT + 3))
-"\n"
-GLSL_DEFINE(modelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT0))
-GLSL_DEFINE(modelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT1))
-GLSL_DEFINE(modelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT2))
-GLSL_DEFINE(modelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_MMAT3))
-"\n"
-GLSL_DEFINE(invModelViewMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT0))
-GLSL_DEFINE(invModelViewMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT1))
-GLSL_DEFINE(invModelViewMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT2))
-GLSL_DEFINE(invModelViewMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_IMMAT3))
-"\n"
-GLSL_DEFINE(eyePosition, GLSL_C(NV_IGRAPH_XF_XFCTX_EYEP))
-"\n"
-"#define lightAmbientColor(i) "
-    "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_AMB) " + (i)*6].xyz\n"
-"#define lightDiffuseColor(i) "
-    "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_DIF) " + (i)*6].xyz\n"
-"#define lightSpecularColor(i) "
-    "ltctxb[" stringify(NV_IGRAPH_XF_LTCTXB_L0_SPC) " + (i)*6].xyz\n"
-"\n"
-"#define lightSpotFalloff(i) "
-    "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_K) " + (i)*2].xyz\n"
-"#define lightSpotDirection(i) "
-    "ltctxa[" stringify(NV_IGRAPH_XF_LTCTXA_L0_SPT) " + (i)*2]\n"
-"\n"
-"#define lightLocalRange(i) "
-    "ltc1[" stringify(NV_IGRAPH_XF_LTC1_r0) " + (i)].x\n"
-"\n"
-GLSL_DEFINE(sceneAmbientColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_FR_AMB) ".xyz")
-GLSL_DEFINE(materialEmissionColor, GLSL_LTCTXA(NV_IGRAPH_XF_LTCTXA_CM_COL) ".xyz")
-"\n"
-"uniform mat4 invViewport;\n"
-"\n");
-
-    /* Skinning */
-    unsigned int count;
-    bool mix;
-    switch (state->skinning) {
-    case SKINNING_OFF:
-        mix = false; count = 0; break;
-    case SKINNING_1WEIGHTS:
-        mix = true; count = 2; break;
-    case SKINNING_2WEIGHTS2MATRICES:
-        mix = false; count = 2; break;
-    case SKINNING_2WEIGHTS:
-        mix = true; count = 3; break;
-    case SKINNING_3WEIGHTS3MATRICES:
-        mix = false; count = 3; break;
-    case SKINNING_3WEIGHTS:
-        mix = true; count = 4; break;
-    case SKINNING_4WEIGHTS4MATRICES:
-        mix = false; count = 4; break;
-    default:
-        assert(false);
-        break;
-    }
-    mstring_append_fmt(body, "/* Skinning mode %d */\n",
-                       state->skinning);
-
-    append_skinning_code(body, mix, count, "vec4",
-                         "tPosition", "position",
-                         "modelViewMat", "xyzw");
-    append_skinning_code(body, mix, count, "vec3",
-                         "tNormal", "vec4(normal, 0.0)",
-                         "invModelViewMat", "xyz");
-
-    /* Normalization */
-    if (state->normalization) {
-        mstring_append(body, "tNormal = normalize(tNormal);\n");
-    }
-
-    /* Texgen */
-    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
-        mstring_append_fmt(body, "/* Texgen for stage %d */\n",
-                           i);
-        /* Set each component individually */
-        /* FIXME: could be nicer if some channels share the same texgen */
-        for (j = 0; j < 4; j++) {
-            /* TODO: TexGen View Model missing! */
-            char c = "xyzw"[j];
-            char cSuffix = "STRQ"[j];
-            switch (state->texgen[i][j]) {
-            case TEXGEN_DISABLE:
-                mstring_append_fmt(body, "oT%d.%c = texture%d.%c;\n",
-                                   i, c, i, c);
-                break;
-            case TEXGEN_EYE_LINEAR:
-                mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, tPosition);\n",
-                                   i, c, cSuffix, i);
-                break;
-            case TEXGEN_OBJECT_LINEAR:
-                mstring_append_fmt(body, "oT%d.%c = dot(texPlane%c%d, position);\n",
-                                   i, c, cSuffix, i);
-                break;
-            case TEXGEN_SPHERE_MAP:
-                assert(j < 2);  /* Channels S,T only! */
-                mstring_append(body, "{\n");
-                /* FIXME: u, r and m only have to be calculated once */
-                mstring_append(body, "  vec3 u = normalize(tPosition.xyz);\n");
-                //FIXME: tNormal before or after normalization? Always normalize?
-                mstring_append(body, "  vec3 r = reflect(u, tNormal);\n");
-
-                /* FIXME: This would consume 1 division fewer and *might* be
-                 *        faster than length:
-                 *   // [z=1/(2*x) => z=1/x*0.5]
-                 *   vec3 ro = r + vec3(0.0, 0.0, 1.0);
-                 *   float m = inversesqrt(dot(ro,ro))*0.5;
-                 */
-
-                mstring_append(body, "  float invM = 1.0 / (2.0 * length(r + vec3(0.0, 0.0, 1.0)));\n");
-                mstring_append_fmt(body, "  oT%d.%c = r.%c * invM + 0.5;\n",
-                                   i, c, c);
-                mstring_append(body, "}\n");
-                break;
-            case TEXGEN_REFLECTION_MAP:
-                assert(j < 3); /* Channels S,T,R only! */
-                mstring_append(body, "{\n");
-                /* FIXME: u and r only have to be calculated once, can share the one from SPHERE_MAP */
-                mstring_append(body, "  vec3 u = normalize(tPosition.xyz);\n");
-                mstring_append(body, "  vec3 r = reflect(u, tNormal);\n");
-                mstring_append_fmt(body, "  oT%d.%c = r.%c;\n",
-                                   i, c, c);
-                mstring_append(body, "}\n");
-                break;
-            case TEXGEN_NORMAL_MAP:
-                assert(j < 3); /* Channels S,T,R only! */
-                mstring_append_fmt(body, "oT%d.%c = tNormal.%c;\n",
-                                   i, c, c);
-                break;
-            default:
-                assert(false);
-                break;
-            }
-        }
-    }
-
-    /* Apply texture matrices */
-    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
-        if (state->texture_matrix_enable[i]) {
-            mstring_append_fmt(body,
-                               "oT%d = oT%d * texMat%d;\n",
-                               i, i, i);
-        }
-    }
-
-    /* Lighting */
-    if (state->lighting) {
-
-        //FIXME: Do 2 passes if we want 2 sided-lighting?
-
-        static char alpha_source_diffuse[] = "diffuse.a";
-        static char alpha_source_specular[] = "specular.a";
-        static char alpha_source_material[] = "material_alpha";
-        const char *alpha_source = alpha_source_diffuse;
-        if (state->diffuse_src == MATERIAL_COLOR_SRC_MATERIAL) {
-            mstring_append(header, "uniform float material_alpha;\n");
-            alpha_source = alpha_source_material;
-        } else if (state->diffuse_src == MATERIAL_COLOR_SRC_SPECULAR) {
-            alpha_source = alpha_source_specular;
-        }
-
-        if (state->ambient_src == MATERIAL_COLOR_SRC_MATERIAL) {
-            mstring_append_fmt(body, "oD0 = vec4(sceneAmbientColor, %s);\n", alpha_source);
-        } else if (state->ambient_src == MATERIAL_COLOR_SRC_DIFFUSE) {
-            mstring_append_fmt(body, "oD0 = vec4(diffuse.rgb, %s);\n", alpha_source);
-        } else if (state->ambient_src == MATERIAL_COLOR_SRC_SPECULAR) {
-            mstring_append_fmt(body, "oD0 = vec4(specular.rgb, %s);\n", alpha_source);
-        }
-
-        mstring_append(body, "oD0.rgb *= materialEmissionColor.rgb;\n");
-        if (state->emission_src == MATERIAL_COLOR_SRC_MATERIAL) {
-            mstring_append(body, "oD0.rgb += sceneAmbientColor;\n");
-        } else if (state->emission_src == MATERIAL_COLOR_SRC_DIFFUSE) {
-            mstring_append(body, "oD0.rgb += diffuse.rgb;\n");
-        } else if (state->emission_src == MATERIAL_COLOR_SRC_SPECULAR) {
-            mstring_append(body, "oD0.rgb += specular.rgb;\n");
-        }
-
-        mstring_append(body, "oD1 = vec4(0.0, 0.0, 0.0, specular.a);\n");
-
-        for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
-            if (state->light[i] == LIGHT_OFF) {
-                continue;
-            }
-
-            /* FIXME: It seems that we only have to handle the surface colors if
-             *        they are not part of the material [= vertex colors].
-             *        If they are material the cpu will premultiply light
-             *        colors
-             */
-
-            mstring_append_fmt(body, "/* Light %d */ {\n", i);
-
-            if (state->light[i] == LIGHT_LOCAL
-                    || state->light[i] == LIGHT_SPOT) {
-
-                mstring_append_fmt(header,
-                    "uniform vec3 lightLocalPosition%d;\n"
-                    "uniform vec3 lightLocalAttenuation%d;\n",
-                    i, i);
-                mstring_append_fmt(body,
-                    "  vec3 VP = lightLocalPosition%d - tPosition.xyz/tPosition.w;\n"
-                    "  float d = length(VP);\n"
-//FIXME: if (d > lightLocalRange) { .. don't process this light .. } /* inclusive?! */ - what about directional lights?
-                    "  VP = normalize(VP);\n"
-                    "  float attenuation = 1.0 / (lightLocalAttenuation%d.x\n"
-                    "                               + lightLocalAttenuation%d.y * d\n"
-                    "                               + lightLocalAttenuation%d.z * d * d);\n"
-                    "  vec3 halfVector = normalize(VP + eyePosition.xyz / eyePosition.w);\n" /* FIXME: Not sure if eyePosition is correct */
-                    "  float nDotVP = max(0.0, dot(tNormal, VP));\n"
-                    "  float nDotHV = max(0.0, dot(tNormal, halfVector));\n",
-                    i, i, i, i);
-
-            }
-
-            switch(state->light[i]) {
-            case LIGHT_INFINITE:
-
-                /* lightLocalRange will be 1e+30 here */
-
-                mstring_append_fmt(header,
-                    "uniform vec3 lightInfiniteHalfVector%d;\n"
-                    "uniform vec3 lightInfiniteDirection%d;\n",
-                    i, i);
-                mstring_append_fmt(body,
-                    "  float attenuation = 1.0;\n"
-                    "  float nDotVP = max(0.0, dot(tNormal, normalize(vec3(lightInfiniteDirection%d))));\n"
-                    "  float nDotHV = max(0.0, dot(tNormal, vec3(lightInfiniteHalfVector%d)));\n",
-                    i, i);
-
-                /* FIXME: Do specular */
-
-                /* FIXME: tBackDiffuse */
-
-                break;
-            case LIGHT_LOCAL:
-                /* Everything done already */
-                break;
-            case LIGHT_SPOT:
-                /* https://docs.microsoft.com/en-us/windows/win32/direct3d9/attenuation-and-spotlight-factor#spotlight-factor */
-                mstring_append_fmt(body,
-                    "  vec4 spotDir = lightSpotDirection(%d);\n"
-                    "  float invScale = 1/length(spotDir.xyz);\n"
-                    "  float cosHalfPhi = -invScale*spotDir.w;\n"
-                    "  float cosHalfTheta = invScale + cosHalfPhi;\n"
-                    "  float spotDirDotVP = dot(spotDir.xyz, VP);\n"
-                    "  float rho = invScale*spotDirDotVP;\n"
-                    "  if (rho > cosHalfTheta) {\n"
-                    "  } else if (rho <= cosHalfPhi) {\n"
-                    "    attenuation = 0.0;\n"
-                    "  } else {\n"
-                    "    attenuation *= spotDirDotVP + spotDir.w;\n" /* FIXME: lightSpotFalloff */
-                    "  }\n",
-                    i);
-                break;
-            default:
-                assert(false);
-                break;
-            }
-
-            mstring_append_fmt(body,
-                "  float pf;\n"
-                "  if (nDotVP == 0.0) {\n"
-                "    pf = 0.0;\n"
-                "  } else {\n"
-                "    pf = pow(nDotHV, /* specular(l, m, n, l1, m1, n1) */ 0.001);\n"
-                "  }\n"
-                "  vec3 lightAmbient = lightAmbientColor(%d) * attenuation;\n"
-                "  vec3 lightDiffuse = lightDiffuseColor(%d) * attenuation * nDotVP;\n"
-                "  vec3 lightSpecular = lightSpecularColor(%d) * pf;\n",
-                i, i, i);
-
-            mstring_append(body,
-                "  oD0.xyz += lightAmbient;\n");
-
-            switch (state->diffuse_src) {
-            case MATERIAL_COLOR_SRC_MATERIAL:
-                mstring_append(body,
-                               "  oD0.xyz += lightDiffuse;\n");
-                break;
-            case MATERIAL_COLOR_SRC_DIFFUSE:
-                mstring_append(body,
-                               "  oD0.xyz += diffuse.xyz * lightDiffuse;\n");
-                break;
-            case MATERIAL_COLOR_SRC_SPECULAR:
-                mstring_append(body,
-                               "  oD0.xyz += specular.xyz * lightDiffuse;\n");
-                break;
-            }
-
-            mstring_append(body,
-                "  oD1.xyz += specular.xyz * lightSpecular;\n");
-
-            mstring_append(body, "}\n");
-        }
-    } else {
-        mstring_append(body, "  oD0 = diffuse;\n");
-        mstring_append(body, "  oD1 = specular;\n");
-    }
-    mstring_append(body, "  oB0 = backDiffuse;\n");
-    mstring_append(body, "  oB1 = backSpecular;\n");
-
-    /* Fog */
-    if (state->fog_enable) {
-
-        /* From: https://www.opengl.org/registry/specs/NV/fog_distance.txt */
-        switch(state->foggen) {
-        case FOGGEN_SPEC_ALPHA:
-            /* FIXME: Do we have to clamp here? */
-            mstring_append(body, "  float fogDistance = clamp(specular.a, 0.0, 1.0);\n");
-            break;
-        case FOGGEN_RADIAL:
-            mstring_append(body, "  float fogDistance = length(tPosition.xyz);\n");
-            break;
-        case FOGGEN_PLANAR:
-        case FOGGEN_ABS_PLANAR:
-            mstring_append(body, "  float fogDistance = dot(fogPlane.xyz, tPosition.xyz) + fogPlane.w;\n");
-            if (state->foggen == FOGGEN_ABS_PLANAR) {
-                mstring_append(body, "  fogDistance = abs(fogDistance);\n");
-            }
-            break;
-        case FOGGEN_FOG_X:
-            mstring_append(body, "  float fogDistance = fogCoord;\n");
-            break;
-        default:
-            assert(false);
-            break;
-        }
-
-    }
-
-    /* If skinning is off the composite matrix already includes the MV matrix */
-    if (state->skinning == SKINNING_OFF) {
-        mstring_append(body, "  tPosition = position;\n");
-    }
-
-    mstring_append(body,
-    "   oPos = invViewport * (tPosition * compositeMat);\n"
-    "   oPos.z = oPos.z * 2.0 - oPos.w;\n");
-
-    /* FIXME: Testing */
-    if (state->point_params_enable) {
-        mstring_append_fmt(
-            body,
-            "  float d_e = length(position * modelViewMat0);\n"
-            "  oPts.x = 1/sqrt(%f + %f*d_e + %f*d_e*d_e) + %f;\n",
-            state->point_params[0], state->point_params[1], state->point_params[2],
-            state->point_params[6]);
-        mstring_append_fmt(body, "  oPts.x = min(oPts.x*%f + %f, 64.0) * %d;\n",
-                           state->point_params[3], state->point_params[7],
-                           state->surface_scale_factor);
-    } else {
-        mstring_append_fmt(body, "  oPts.x = %f * %d;\n", state->point_size,
-                           state->surface_scale_factor);
-    }
-
-    mstring_append(body,
-                   "  if (oPos.w == 0.0 || isinf(oPos.w)) {\n"
-                   "    vtx_inv_w = 1.0;\n"
-                   "  } else {\n"
-                   "    vtx_inv_w = 1.0 / oPos.w;\n"
-                   "  }\n"
-                   "  vtx_inv_w_flat = vtx_inv_w;\n");
-}
-
-static MString *generate_vertex_shader(const ShaderState *state,
-                                       bool prefix_outputs)
-{
-    int i;
-    MString *header = mstring_from_str(
-"#version 400\n"
-"\n"
-"uniform vec4 clipRange;\n"
-"uniform vec2 surfaceSize;\n"
-"\n"
-/* All constants in 1 array declaration */
-"uniform vec4 c[" stringify(NV2A_VERTEXSHADER_CONSTANTS) "];\n"
-"\n"
-"uniform vec4 fogColor;\n"
-"uniform float fogParam[2];\n"
-"\n"
-
-GLSL_DEFINE(fogPlane, GLSL_C(NV_IGRAPH_XF_XFCTX_FOG))
-GLSL_DEFINE(texMat0, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T0MAT))
-GLSL_DEFINE(texMat1, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T1MAT))
-GLSL_DEFINE(texMat2, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T2MAT))
-GLSL_DEFINE(texMat3, GLSL_C_MAT4(NV_IGRAPH_XF_XFCTX_T3MAT))
-
-"\n"
-"vec4 oPos = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oD0 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oD1 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oB0 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oB1 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oPts = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oFog = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oT0 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oT1 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oT2 = vec4(0.0,0.0,0.0,1.0);\n"
-"vec4 oT3 = vec4(0.0,0.0,0.0,1.0);\n"
-"\n"
-"vec4 decompress_11_11_10(int cmp) {\n"
-"    float x = float(bitfieldExtract(cmp, 0,  11)) / 1023.0;\n"
-"    float y = float(bitfieldExtract(cmp, 11, 11)) / 1023.0;\n"
-"    float z = float(bitfieldExtract(cmp, 22, 10)) / 511.0;\n"
-"    return vec4(x, y, z, 1);\n"
-"}\n");
-    if (prefix_outputs) {
-        mstring_append(header, state->smooth_shading ?
-                                   STRUCT_V_VERTEX_DATA_OUT_SMOOTH :
-                                   STRUCT_V_VERTEX_DATA_OUT_FLAT);
-        mstring_append(header,
-                       "#define vtx_inv_w v_vtx_inv_w\n"
-                       "#define vtx_inv_w_flat v_vtx_inv_w_flat\n"
-                       "#define vtxD0 v_vtxD0\n"
-                       "#define vtxD1 v_vtxD1\n"
-                       "#define vtxB0 v_vtxB0\n"
-                       "#define vtxB1 v_vtxB1\n"
-                       "#define vtxFog v_vtxFog\n"
-                       "#define vtxT0 v_vtxT0\n"
-                       "#define vtxT1 v_vtxT1\n"
-                       "#define vtxT2 v_vtxT2\n"
-                       "#define vtxT3 v_vtxT3\n"
-                       );
-    } else {
-        mstring_append(header, state->smooth_shading ?
-                                   STRUCT_VERTEX_DATA_OUT_SMOOTH :
-                                   STRUCT_VERTEX_DATA_OUT_FLAT);
-    }
-    mstring_append(header, "\n");
-    for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
-        if (state->compressed_attrs & (1 << i)) {
-            mstring_append_fmt(header,
-                               "layout(location = %d) in int v%d_cmp;\n", i, i);
-        } else {
-            mstring_append_fmt(header, "layout(location = %d) in vec4 v%d;\n",
-                               i, i);
-        }
-    }
-    mstring_append(header, "\n");
-
-    MString *body = mstring_from_str("void main() {\n");
-
-    for (i = 0; i < NV2A_VERTEXSHADER_ATTRIBUTES; i++) {
-        if (state->compressed_attrs & (1 << i)) {
-            mstring_append_fmt(
-                body, "vec4 v%d = decompress_11_11_10(v%d_cmp);\n", i, i);
-        }
-    }
-
-    if (state->fixed_function) {
-        generate_fixed_function(state, header, body);
-    } else if (state->vertex_program) {
-        vsh_translate(VSH_VERSION_XVS,
-                      (uint32_t*)state->program_data,
-                      state->program_length,
-                      state->z_perspective,
-                      header, body);
-    } else {
-        assert(false);
-    }
-
-
-    /* Fog */
-
-    if (state->fog_enable) {
-
-        if (state->vertex_program) {
-            /* FIXME: Does foggen do something here? Let's do some tracking..
-             *
-             *   "RollerCoaster Tycoon" has
-             *      state->vertex_program = true; state->foggen == FOGGEN_PLANAR
-             *      but expects oFog.x as fogdistance?! Writes oFog.xyzw = v0.z
-             */
-            mstring_append(body, "  float fogDistance = oFog.x;\n");
-        }
-
-        /* FIXME: Do this per pixel? */
-
-        switch (state->fog_mode) {
-        case FOG_MODE_LINEAR:
-        case FOG_MODE_LINEAR_ABS:
-
-            /* f = (end - d) / (end - start)
-             *    fogParam[1] = -1 / (end - start)
-             *    fogParam[0] = 1 - end * fogParam[1];
-             */
-
-            mstring_append(body,
-                "  if (isinf(fogDistance)) {\n"
-                "    fogDistance = 0.0;\n"
-                "  }\n"
-            );
-            mstring_append(body, "  float fogFactor = fogParam[0] + fogDistance * fogParam[1];\n");
-            mstring_append(body, "  fogFactor -= 1.0;\n");
-            break;
-        case FOG_MODE_EXP:
-          mstring_append(body,
-                         "  if (isinf(fogDistance)) {\n"
-                         "    fogDistance = 0.0;\n"
-                         "  }\n"
-          );
-          /* fallthru */
-        case FOG_MODE_EXP_ABS:
-
-            /* f = 1 / (e^(d * density))
-             *    fogParam[1] = -density / (2 * ln(256))
-             *    fogParam[0] = 1.5
-             */
-
-            mstring_append(body, "  float fogFactor = fogParam[0] + exp2(fogDistance * fogParam[1] * 16.0);\n");
-            mstring_append(body, "  fogFactor -= 1.5;\n");
-            break;
-        case FOG_MODE_EXP2:
-        case FOG_MODE_EXP2_ABS:
-
-            /* f = 1 / (e^((d * density)^2))
-             *    fogParam[1] = -density / (2 * sqrt(ln(256)))
-             *    fogParam[0] = 1.5
-             */
-
-            mstring_append(body, "  float fogFactor = fogParam[0] + exp2(-fogDistance * fogDistance * fogParam[1] * fogParam[1] * 32.0);\n");
-            mstring_append(body, "  fogFactor -= 1.5;\n");
-            break;
-        default:
-            assert(false);
-            break;
-        }
-        /* Calculate absolute for the modes which need it */
-        switch (state->fog_mode) {
-        case FOG_MODE_LINEAR_ABS:
-        case FOG_MODE_EXP_ABS:
-        case FOG_MODE_EXP2_ABS:
-            mstring_append(body, "  fogFactor = abs(fogFactor);\n");
-            break;
-        default:
-            break;
-        }
-
-        mstring_append(body, "  oFog.xyzw = vec4(fogFactor);\n");
-    } else {
-        /* FIXME: Is the fog still calculated / passed somehow?!
-         */
-        mstring_append(body, "  oFog.xyzw = vec4(1.0);\n");
-    }
-
-    /* Set outputs */
-    const char *shade_model_mult = state->smooth_shading ? "vtx_inv_w" : "vtx_inv_w_flat";
-    mstring_append_fmt(body, "\n"
-                      "  vtxD0 = clamp(oD0, 0.0, 1.0) * %s;\n"
-                      "  vtxD1 = clamp(oD1, 0.0, 1.0) * %s;\n"
-                      "  vtxB0 = clamp(oB0, 0.0, 1.0) * %s;\n"
-                      "  vtxB1 = clamp(oB1, 0.0, 1.0) * %s;\n"
-                      "  vtxFog = oFog.x * vtx_inv_w;\n"
-                      "  vtxT0 = oT0 * vtx_inv_w;\n"
-                      "  vtxT1 = oT1 * vtx_inv_w;\n"
-                      "  vtxT2 = oT2 * vtx_inv_w;\n"
-                      "  vtxT3 = oT3 * vtx_inv_w;\n"
-                      "  gl_Position = oPos;\n"
-                      "  gl_PointSize = oPts.x;\n"
-                      "  gl_ClipDistance[0] = oPos.z - oPos.w*clipRange.z;\n" // Near
-                      "  gl_ClipDistance[1] = oPos.w*clipRange.w - oPos.z;\n" // Far
-                      "\n"
-                      "}\n",
-                       shade_model_mult,
-                       shade_model_mult,
-                       shade_model_mult,
-                       shade_model_mult);
-
-
-    /* Return combined header + source */
-    mstring_append(header, mstring_get_str(body));
-    mstring_unref(body);
-    return header;
-
-}
-
-static GLuint create_gl_shader(GLenum gl_shader_type,
-                               const char *code,
-                               const char *name)
-{
-    GLint compiled = 0;
-
-    NV2A_GL_DGROUP_BEGIN("Creating new %s", name);
-
-    NV2A_DPRINTF("compile new %s, code:\n%s\n", name, code);
-
-    GLuint shader = glCreateShader(gl_shader_type);
-    glShaderSource(shader, 1, &code, 0);
-    glCompileShader(shader);
-
-    /* Check it compiled */
-    compiled = 0;
-    glGetShaderiv(shader, GL_COMPILE_STATUS, &compiled);
-    if (!compiled) {
-        GLchar* log;
-        GLint log_length;
-        glGetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
-        log = g_malloc(log_length * sizeof(GLchar));
-        glGetShaderInfoLog(shader, log_length, NULL, log);
-        fprintf(stderr, "%s\n\n" "nv2a: %s compilation failed: %s\n", code, name, log);
-        g_free(log);
-
-        NV2A_GL_DGROUP_END();
-        abort();
-    }
-
-    NV2A_GL_DGROUP_END();
-
-    return shader;
-}
-
-void update_shader_constant_locations(ShaderBinding *binding, const ShaderState *state)
-{
-    int i, j;
-    char tmp[64];
-
-    /* set texture samplers */
-    for (i = 0; i < NV2A_MAX_TEXTURES; i++) {
-        char samplerName[16];
-        snprintf(samplerName, sizeof(samplerName), "texSamp%d", i);
-        GLint texSampLoc = glGetUniformLocation(binding->gl_program, samplerName);
-        if (texSampLoc >= 0) {
-            glUniform1i(texSampLoc, i);
-        }
-    }
-
-    /* validate the program */
-    glValidateProgram(binding->gl_program);
-    GLint valid = 0;
-    glGetProgramiv(binding->gl_program, GL_VALIDATE_STATUS, &valid);
-    if (!valid) {
-        GLchar log[1024];
-        glGetProgramInfoLog(binding->gl_program, 1024, NULL, log);
-        fprintf(stderr, "nv2a: shader validation failed: %s\n", log);
-        abort();
-    }
-
-    /* lookup fragment shader uniforms */
-    for (i = 0; i < 9; i++) {
-        for (j = 0; j < 2; j++) {
-            snprintf(tmp, sizeof(tmp), "c%d_%d", j, i);
-            binding->psh_constant_loc[i][j] = glGetUniformLocation(binding->gl_program, tmp);
-        }
-    }
-    binding->alpha_ref_loc = glGetUniformLocation(binding->gl_program, "alphaRef");
-    for (i = 1; i < NV2A_MAX_TEXTURES; i++) {
-        snprintf(tmp, sizeof(tmp), "bumpMat%d", i);
-        binding->bump_mat_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
-        snprintf(tmp, sizeof(tmp), "bumpScale%d", i);
-        binding->bump_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
-        snprintf(tmp, sizeof(tmp), "bumpOffset%d", i);
-        binding->bump_offset_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
-    }
-
-    for (int i = 0; i < NV2A_MAX_TEXTURES; i++) {
-        snprintf(tmp, sizeof(tmp), "texScale%d", i);
-        binding->tex_scale_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
-    }
-
-    /* lookup vertex shader uniforms */
-    for(i = 0; i < NV2A_VERTEXSHADER_CONSTANTS; i++) {
-        snprintf(tmp, sizeof(tmp), "c[%d]", i);
-        binding->vsh_constant_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
-    }
-    binding->surface_size_loc = glGetUniformLocation(binding->gl_program, "surfaceSize");
-    binding->clip_range_loc = glGetUniformLocation(binding->gl_program, "clipRange");
-    binding->fog_color_loc = glGetUniformLocation(binding->gl_program, "fogColor");
-    binding->fog_param_loc[0] = glGetUniformLocation(binding->gl_program, "fogParam[0]");
-    binding->fog_param_loc[1] = glGetUniformLocation(binding->gl_program, "fogParam[1]");
-
-    binding->inv_viewport_loc = glGetUniformLocation(binding->gl_program, "invViewport");
-    for (i = 0; i < NV2A_LTCTXA_COUNT; i++) {
-        snprintf(tmp, sizeof(tmp), "ltctxa[%d]", i);
-        binding->ltctxa_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
-    }
-    for (i = 0; i < NV2A_LTCTXB_COUNT; i++) {
-        snprintf(tmp, sizeof(tmp), "ltctxb[%d]", i);
-        binding->ltctxb_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
-    }
-    for (i = 0; i < NV2A_LTC1_COUNT; i++) {
-        snprintf(tmp, sizeof(tmp), "ltc1[%d]", i);
-        binding->ltc1_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
-    }
-    for (i = 0; i < NV2A_MAX_LIGHTS; i++) {
-        snprintf(tmp, sizeof(tmp), "lightInfiniteHalfVector%d", i);
-        binding->light_infinite_half_vector_loc[i] =
-            glGetUniformLocation(binding->gl_program, tmp);
-        snprintf(tmp, sizeof(tmp), "lightInfiniteDirection%d", i);
-        binding->light_infinite_direction_loc[i] =
-            glGetUniformLocation(binding->gl_program, tmp);
-
-        snprintf(tmp, sizeof(tmp), "lightLocalPosition%d", i);
-        binding->light_local_position_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
-        snprintf(tmp, sizeof(tmp), "lightLocalAttenuation%d", i);
-        binding->light_local_attenuation_loc[i] =
-            glGetUniformLocation(binding->gl_program, tmp);
-    }
-    for (i = 0; i < 8; i++) {
-        snprintf(tmp, sizeof(tmp), "clipRegion[%d]", i);
-        binding->clip_region_loc[i] = glGetUniformLocation(binding->gl_program, tmp);
-    }
-
-    if (state->fixed_function) {
-        binding->material_alpha_loc =
-            glGetUniformLocation(binding->gl_program, "material_alpha");
-    } else {
-        binding->material_alpha_loc = -1;
-    }
-}
-
-ShaderBinding *generate_shaders(const ShaderState *state)
-{
-    char *previous_numeric_locale = setlocale(LC_NUMERIC, NULL);
-    if (previous_numeric_locale) {
-        previous_numeric_locale = g_strdup(previous_numeric_locale);
-    }
-
-    /* Ensure numeric values are printed with '.' radix, no grouping */
-    setlocale(LC_NUMERIC, "C");
-    GLuint program = glCreateProgram();
-
-    /* Create an optional geometry shader and find primitive type */
-    GLenum gl_primitive_mode;
-    MString* geometry_shader_code =
-        generate_geometry_shader(state->polygon_front_mode,
-                                 state->polygon_back_mode,
-                                 state->primitive_mode,
-                                 &gl_primitive_mode,
-                                 state->smooth_shading);
-    if (geometry_shader_code) {
-        const char* geometry_shader_code_str =
-             mstring_get_str(geometry_shader_code);
-        GLuint geometry_shader = create_gl_shader(GL_GEOMETRY_SHADER,
-                                                  geometry_shader_code_str,
-                                                  "geometry shader");
-        glAttachShader(program, geometry_shader);
-        mstring_unref(geometry_shader_code);
-    }
-
-    /* create the vertex shader */
-    MString *vertex_shader_code =
-        generate_vertex_shader(state, geometry_shader_code != NULL);
-    GLuint vertex_shader = create_gl_shader(GL_VERTEX_SHADER,
-                                            mstring_get_str(vertex_shader_code),
-                                            "vertex shader");
-    glAttachShader(program, vertex_shader);
-    mstring_unref(vertex_shader_code);
-
-    /* generate a fragment shader from register combiners */
-    MString *fragment_shader_code = psh_translate(state->psh);
-    const char *fragment_shader_code_str =
-        mstring_get_str(fragment_shader_code);
-    GLuint fragment_shader = create_gl_shader(GL_FRAGMENT_SHADER,
-                                              fragment_shader_code_str,
-                                              "fragment shader");
-    glAttachShader(program, fragment_shader);
-    mstring_unref(fragment_shader_code);
-
-    /* link the program */
-    glLinkProgram(program);
-    GLint linked = 0;
-    glGetProgramiv(program, GL_LINK_STATUS, &linked);
-    if(!linked) {
-        GLchar log[2048];
-        glGetProgramInfoLog(program, 2048, NULL, log);
-        fprintf(stderr, "nv2a: shader linking failed: %s\n", log);
-        abort();
-    }
-
-    glUseProgram(program);
-
-    ShaderBinding* ret = g_malloc0(sizeof(ShaderBinding));
-    ret->gl_program = program;
-    ret->gl_primitive_mode = gl_primitive_mode;
-
-    update_shader_constant_locations(ret, state);
-
-    if (previous_numeric_locale) {
-        setlocale(LC_NUMERIC, previous_numeric_locale);
-        g_free(previous_numeric_locale);
-    }
-
-    return ret;
-}
-
-static const char *shader_gl_vendor = NULL;
-
-static void shader_create_cache_folder(void)
-{
-    char *shader_path = g_strdup_printf("%sshaders", xemu_settings_get_base_path());
-    qemu_mkdir(shader_path);
-    g_free(shader_path);
-}
-
-static char *shader_get_lru_cache_path(void)
-{
-    return g_strdup_printf("%s/shader_cache_list", xemu_settings_get_base_path());
-}
-
-static void shader_write_lru_list_entry_to_disk(Lru *lru, LruNode *node, void *opaque)
-{
-    FILE *lru_list_file = (FILE*) opaque;
-    size_t written = fwrite(&node->hash, sizeof(uint64_t), 1, lru_list_file);
-    if (written != 1) {
-        fprintf(stderr, "nv2a: Failed to write shader list entry %llx to disk\n",
-                (unsigned long long) node->hash);
-    }
-}
-
-void shader_write_cache_reload_list(PGRAPHState *pg)
-{
-    if (!g_config.perf.cache_shaders) {
-        qatomic_set(&pg->shader_cache_writeback_pending, false);
-        qemu_event_set(&pg->shader_cache_writeback_complete);
-        return;
-    }
-
-    char *shader_lru_path = shader_get_lru_cache_path();
-    qemu_thread_join(&pg->shader_disk_thread);
-
-    FILE *lru_list = qemu_fopen(shader_lru_path, "wb");
-    g_free(shader_lru_path);
-    if (!lru_list) {
-        fprintf(stderr, "nv2a: Failed to open shader LRU cache for writing\n");
-        return;
-    }
-
-    lru_visit_active(&pg->shader_cache, shader_write_lru_list_entry_to_disk, lru_list);
-    fclose(lru_list);
-
-    lru_flush(&pg->shader_cache);
-
-    qatomic_set(&pg->shader_cache_writeback_pending, false);
-    qemu_event_set(&pg->shader_cache_writeback_complete);
-}
-
-bool shader_load_from_memory(ShaderLruNode *snode)
-{
-    assert(glGetError() == GL_NO_ERROR);
-
-    if (!snode->program) {
-        return false;
-    }
-
-    GLuint gl_program = glCreateProgram();
-    glProgramBinary(gl_program, snode->program_format, snode->program, snode->program_size);
-    GLint gl_error = glGetError();
-    if (gl_error != GL_NO_ERROR) {
-        NV2A_DPRINTF("failed to load shader binary from disk: GL error code %d\n", gl_error);
-        glDeleteProgram(gl_program);
-        return false;
-    }
-
-    glValidateProgram(gl_program);
-    GLint valid = 0;
-    glGetProgramiv(gl_program, GL_VALIDATE_STATUS, &valid);
-    if (!valid) {
-        GLchar log[1024];
-        glGetProgramInfoLog(gl_program, 1024, NULL, log);
-        NV2A_DPRINTF("failed to load shader binary from disk: %s\n", log);
-        glDeleteProgram(gl_program);
-        return false;
-    }
-
-    glUseProgram(gl_program);
-
-    ShaderBinding* binding = g_malloc0(sizeof(ShaderBinding));
-    binding->gl_program = gl_program;
-    binding->gl_primitive_mode = get_gl_primitive_mode(snode->state.polygon_front_mode,
-                                                       snode->state.primitive_mode);
-    snode->binding = binding;
-
-    g_free(snode->program);
-    snode->program = NULL;
-
-    update_shader_constant_locations(binding, &snode->state);
-
-    return true;
-}
-
-static char *shader_get_bin_directory(uint64_t hash)
-{
-    const char *cfg_dir = xemu_settings_get_base_path();
-    uint64_t bin_mask = 0xffffUL << 48;
-    char *shader_bin_dir = g_strdup_printf("%s/shaders/%04lx",
-                                           cfg_dir, (hash & bin_mask) >> 48);
-    return shader_bin_dir;
-}
-
-static char *shader_get_binary_path(const char *shader_bin_dir, uint64_t hash)
-{
-    uint64_t bin_mask = 0xffffUL << 48;
-    return g_strdup_printf("%s/%012lx", shader_bin_dir,
-                           hash & (~bin_mask));
-}
-
-static void shader_load_from_disk(PGRAPHState *pg, uint64_t hash)
-{
-    char *shader_bin_dir = shader_get_bin_directory(hash);
-    char *shader_path = shader_get_binary_path(shader_bin_dir, hash);
-    char *cached_xemu_version = NULL;
-    char *cached_gl_vendor = NULL;
-    void *program_buffer = NULL;
-
-    uint64_t cached_xemu_version_len;
-    uint64_t gl_vendor_len;
-    GLenum program_binary_format;
-    ShaderState state;
-    size_t shader_size;
-
-    g_free(shader_bin_dir);
-
-    qemu_mutex_lock(&pg->shader_cache_lock);
-    if (lru_contains_hash(&pg->shader_cache, hash)) {
-        qemu_mutex_unlock(&pg->shader_cache_lock);
-        return;
-    }
-    qemu_mutex_unlock(&pg->shader_cache_lock);
-
-    FILE *shader_file = qemu_fopen(shader_path, "rb");
-    if (!shader_file) {
-        goto error;
-    }
-
-    size_t nread;
-    #define READ_OR_ERR(data, data_len) \
-        do { \
-            nread = fread(data, data_len, 1, shader_file); \
-            if (nread != 1) { \
-                fclose(shader_file); \
-                goto error; \
-            } \
-        } while (0)
-
-    READ_OR_ERR(&cached_xemu_version_len, sizeof(cached_xemu_version_len));
-
-    cached_xemu_version = g_malloc(cached_xemu_version_len +1);
-    READ_OR_ERR(cached_xemu_version, cached_xemu_version_len);
-    if (strcmp(cached_xemu_version, xemu_version) != 0) {
-        fclose(shader_file);
-        goto error;
-    }
-
-    READ_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len));
-
-    cached_gl_vendor = g_malloc(gl_vendor_len);
-    READ_OR_ERR(cached_gl_vendor, gl_vendor_len);
-    if (strcmp(cached_gl_vendor, shader_gl_vendor) != 0) {
-        fclose(shader_file);
-        goto error;
-    }
-
-    READ_OR_ERR(&program_binary_format, sizeof(program_binary_format));
-    READ_OR_ERR(&state, sizeof(state));
-    READ_OR_ERR(&shader_size, sizeof(shader_size));
-
-    program_buffer = g_malloc(shader_size);
-    READ_OR_ERR(program_buffer, shader_size);
-
-    #undef READ_OR_ERR
-
-    fclose(shader_file);
-    g_free(shader_path);
-    g_free(cached_xemu_version);
-    g_free(cached_gl_vendor);
-
-    qemu_mutex_lock(&pg->shader_cache_lock);
-    LruNode *node = lru_lookup(&pg->shader_cache, hash, &state);
-    ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
-
-    /* If we happened to regenerate this shader already, then we may as well use the new one */
-    if (snode->binding) {
-        qemu_mutex_unlock(&pg->shader_cache_lock);
-        return;
-    }
-
-    snode->program_format = program_binary_format;
-    snode->program_size = shader_size;
-    snode->program = program_buffer;
-    snode->cached = true;
-    qemu_mutex_unlock(&pg->shader_cache_lock);
-    return;
-
-error:
-    /* Delete the shader so it won't be loaded again */
-    qemu_unlink(shader_path);
-    g_free(shader_path);
-    g_free(program_buffer);
-    g_free(cached_xemu_version);
-    g_free(cached_gl_vendor);
-}
-
-static void *shader_reload_lru_from_disk(void *arg)
-{
-    if (!g_config.perf.cache_shaders) {
-        return NULL;
-    }
-
-    PGRAPHState *pg = (PGRAPHState*) arg;
-    char *shader_lru_path = shader_get_lru_cache_path();
-
-    FILE *lru_shaders_list = qemu_fopen(shader_lru_path, "rb");
-    g_free(shader_lru_path);
-    if (!lru_shaders_list) {
-        return NULL;
-    }
-
-    uint64_t hash;
-    while (fread(&hash, sizeof(uint64_t), 1, lru_shaders_list) == 1) {
-        shader_load_from_disk(pg, hash);
-    }
-
-    return NULL;
-}
-
-static void shader_cache_entry_init(Lru *lru, LruNode *node, void *state)
-{
-    ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
-    memcpy(&snode->state, state, sizeof(ShaderState));
-    snode->cached = false;
-    snode->binding = NULL;
-    snode->program = NULL;
-    snode->save_thread = NULL;
-}
-
-static void shader_cache_entry_post_evict(Lru *lru, LruNode *node)
-{
-    ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
-
-    if (snode->save_thread) {
-        qemu_thread_join(snode->save_thread);
-        g_free(snode->save_thread);
-    }
-
-    if (snode->binding) {
-        glDeleteProgram(snode->binding->gl_program);
-        g_free(snode->binding);
-    }
-
-    if (snode->program) {
-        g_free(snode->program);
-    }
-
-    snode->cached = false;
-    snode->save_thread = NULL;
-    snode->binding = NULL;
-    snode->program = NULL;
-    memset(&snode->state, 0, sizeof(ShaderState));
-}
-
-static bool shader_cache_entry_compare(Lru *lru, LruNode *node, void *key)
-{
-    ShaderLruNode *snode = container_of(node, ShaderLruNode, node);
-    return memcmp(&snode->state, key, sizeof(ShaderState));
-}
-
-void shader_cache_init(PGRAPHState *pg)
-{
-    if (!shader_gl_vendor) {
-        shader_gl_vendor = (const char *) glGetString(GL_VENDOR);
-    }
-
-    shader_create_cache_folder();
-
-    /* FIXME: Make this configurable */
-    const size_t shader_cache_size = 50*1024;
-    lru_init(&pg->shader_cache);
-    pg->shader_cache_entries = malloc(shader_cache_size * sizeof(ShaderLruNode));
-    assert(pg->shader_cache_entries != NULL);
-    for (int i = 0; i < shader_cache_size; i++) {
-        lru_add_free(&pg->shader_cache, &pg->shader_cache_entries[i].node);
-    }
-
-    pg->shader_cache.init_node = shader_cache_entry_init;
-    pg->shader_cache.compare_nodes = shader_cache_entry_compare;
-    pg->shader_cache.post_node_evict = shader_cache_entry_post_evict;
-
-    qemu_thread_create(&pg->shader_disk_thread, "pgraph.shader_cache",
-                       shader_reload_lru_from_disk, pg, QEMU_THREAD_JOINABLE);
-}
-
-static void *shader_write_to_disk(void *arg)
-{
-    ShaderLruNode *snode = (ShaderLruNode*) arg;
-
-    char *shader_bin = shader_get_bin_directory(snode->node.hash);
-    char *shader_path = shader_get_binary_path(shader_bin, snode->node.hash);
-
-    static uint64_t gl_vendor_len;
-    if (gl_vendor_len == 0) {
-        gl_vendor_len = (uint64_t) (strlen(shader_gl_vendor) + 1);
-    }
-
-    static uint64_t xemu_version_len = 0;
-    if (xemu_version_len == 0) {
-        xemu_version_len = (uint64_t) (strlen(xemu_version) + 1);
-    }
-
-    qemu_mkdir(shader_bin);
-    g_free(shader_bin);
-
-    FILE *shader_file = qemu_fopen(shader_path, "wb");
-    if (!shader_file) {
-        goto error;
-    }
-
-    size_t written;
-    #define WRITE_OR_ERR(data, data_size) \
-        do { \
-            written = fwrite(data, data_size, 1, shader_file); \
-            if (written != 1) { \
-                fclose(shader_file); \
-                goto error; \
-            } \
-        } while (0)
-
-    WRITE_OR_ERR(&xemu_version_len, sizeof(xemu_version_len));
-    WRITE_OR_ERR(xemu_version, xemu_version_len);
-
-    WRITE_OR_ERR(&gl_vendor_len, sizeof(gl_vendor_len));
-    WRITE_OR_ERR(shader_gl_vendor, gl_vendor_len);
-
-    WRITE_OR_ERR(&snode->program_format, sizeof(snode->program_format));
-    WRITE_OR_ERR(&snode->state, sizeof(snode->state));
-
-    WRITE_OR_ERR(&snode->program_size, sizeof(snode->program_size));
-    WRITE_OR_ERR(snode->program, snode->program_size);
-
-    #undef WRITE_OR_ERR
-
-    fclose(shader_file);
-
-    g_free(shader_path);
-    g_free(snode->program);
-    snode->program = NULL;
-
-    return NULL;
-
-error:
-    fprintf(stderr, "nv2a: Failed to write shader binary file to %s\n", shader_path);
-    qemu_unlink(shader_path);
-    g_free(shader_path);
-    g_free(snode->program);
-    snode->program = NULL;
-    return NULL;
-}
-
-void shader_cache_to_disk(ShaderLruNode *snode)
-{
-    if (!snode->binding || snode->cached) {
-        return;
-    }
-
-    GLint program_size;
-    glGetProgramiv(snode->binding->gl_program, GL_PROGRAM_BINARY_LENGTH, &program_size);
-
-    if (snode->program) {
-        g_free(snode->program);
-        snode->program = NULL;
-    }
-
-    /* program_size might be zero on some systems, if no binary formats are supported */
-    if (program_size == 0) {
-        return;
-    }
-
-    snode->program = g_malloc(program_size);
-    GLsizei program_size_copied;
-    glGetProgramBinary(snode->binding->gl_program, program_size, &program_size_copied,
-                       &snode->program_format, snode->program);
-    assert(glGetError() == GL_NO_ERROR);
-
-    snode->program_size = program_size_copied;
-    snode->cached = true;
-
-    char name[24];
-    snprintf(name, sizeof(name), "scache-%llx", (unsigned long long) snode->node.hash);
-    snode->save_thread = g_malloc0(sizeof(QemuThread));
-    qemu_thread_create(snode->save_thread, name, shader_write_to_disk, snode, QEMU_THREAD_JOINABLE);
-}
diff --git a/hw/xbox/nv2a/shaders_common.h b/hw/xbox/nv2a/shaders_common.h
deleted file mode 100644
index ae2ba9f14d3..00000000000
--- a/hw/xbox/nv2a/shaders_common.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*
- * QEMU Geforce NV2A shader common definitions
- *
- * Copyright (c) 2015 espes
- * Copyright (c) 2015 Jannik Vogel
- *
- * This library is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2 of the License, or (at your option) any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with this library; if not, see <http://www.gnu.org/licenses/>.
- */
-
-#ifndef HW_NV2A_SHADERS_COMMON_H
-#define HW_NV2A_SHADERS_COMMON_H
-
-#include "debug.h"
-
-#define DEF_VERTEX_DATA(qualifier, in_out, prefix, suffix) \
-    "noperspective " in_out " float " prefix "vtx_inv_w" suffix ";\n" \
-    "flat " in_out " float " prefix "vtx_inv_w_flat" suffix ";\n" \
-    qualifier " " in_out " vec4 " prefix "vtxD0" suffix ";\n" \
-    qualifier " " in_out " vec4 " prefix "vtxD1" suffix ";\n" \
-    qualifier " " in_out " vec4 " prefix "vtxB0" suffix ";\n" \
-    qualifier " " in_out " vec4 " prefix "vtxB1" suffix ";\n" \
-    "noperspective " in_out " float " prefix "vtxFog" suffix ";\n" \
-    "noperspective " in_out " vec4 " prefix "vtxT0" suffix ";\n" \
-    "noperspective " in_out " vec4 " prefix "vtxT1" suffix ";\n" \
-    "noperspective " in_out " vec4 " prefix "vtxT2" suffix ";\n" \
-    "noperspective " in_out " vec4 " prefix "vtxT3" suffix ";\n"
-
-#define STRUCT_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "", "")
-#define STRUCT_VERTEX_DATA_IN_SMOOTH  DEF_VERTEX_DATA("noperspective", "in", "", "")
-#define STRUCT_V_VERTEX_DATA_OUT_SMOOTH DEF_VERTEX_DATA("noperspective", "out", "v_", "")
-#define STRUCT_V_VERTEX_DATA_IN_ARRAY_SMOOTH DEF_VERTEX_DATA("noperspective", "in", "v_", "[]")
-
-#define STRUCT_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "", "")
-#define STRUCT_VERTEX_DATA_IN_FLAT  DEF_VERTEX_DATA("flat", "in", "", "")
-#define STRUCT_V_VERTEX_DATA_OUT_FLAT DEF_VERTEX_DATA("flat", "out", "v_", "")
-#define STRUCT_V_VERTEX_DATA_IN_ARRAY_FLAT DEF_VERTEX_DATA("flat", "in", "v_", "[]")
-
-typedef struct {
-   int ref;
-   gchar *string;
-} MString;
-
-void mstring_append_fmt(MString *mstring, const char *fmt, ...);
-MString *mstring_from_fmt(const char *fmt, ...);
-void mstring_append_va(MString *mstring, const char *fmt, va_list va);
-
-static inline
-void mstring_ref(MString *mstr)
-{
-   mstr->ref++;
-}
-
-static inline
-void mstring_unref(MString *mstr)
-{
-   mstr->ref--;
-   if (!mstr->ref) {
-      g_free(mstr->string);
-      g_free(mstr);
-   }
-}
-
-static inline
-void mstring_append(MString *mstr, const char *str)
-{
-   gchar *n = g_strconcat(mstr->string, str, NULL);
-   g_free(mstr->string);
-   mstr->string = n;
-}
-
-static inline
-void mstring_append_chr(MString *mstr, char chr)
-{
-   mstring_append_fmt(mstr, "%c", chr);
-}
-
-static inline
-void mstring_append_int(MString *mstr, int val)
-{
-   mstring_append_fmt(mstr, "%" PRId64, val);
-}
-
-static inline
-MString *mstring_new(void)
-{
-   MString *mstr = g_malloc(sizeof(MString));
-   mstr->ref = 1;
-   mstr->string = g_strdup("");
-   return mstr;
-}
-
-static inline
-MString *mstring_from_str(const char *str)
-{
-   MString *mstr = g_malloc(sizeof(MString));
-   mstr->ref = 1;
-   mstr->string = g_strdup(str);
-   return mstr;
-}
-
-static inline
-const gchar *mstring_get_str(MString *mstr)
-{
-   return mstr->string;
-}
-
-static inline
-size_t mstring_get_length(MString *mstr)
-{
-   return strlen(mstr->string);
-}
-
-
-#endif
diff --git a/hw/xbox/nv2a/lru.h b/include/qemu/lru.h
similarity index 87%
rename from hw/xbox/nv2a/lru.h
rename to include/qemu/lru.h
index c0dca7ec5d2..b5882702827 100644
--- a/hw/xbox/nv2a/lru.h
+++ b/include/qemu/lru.h
@@ -1,7 +1,7 @@
 /*
  * LRU object list
  *
- * Copyright (c) 2021 Matt Borgerson
+ * Copyright (c) 2021-2024 Matt Borgerson
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -42,6 +42,8 @@ typedef struct Lru Lru;
 struct Lru {
 	QTAILQ_HEAD(, LruNode) global;
 	QTAILQ_HEAD(, LruNode) bins[LRU_NUM_BINS];
+	int num_used;
+	int num_free;
 
 	/* Initialize a node. */
 	void (*init_node)(Lru *lru, LruNode *node, void *key);
@@ -67,6 +69,8 @@ void lru_init(Lru *lru)
 	lru->compare_nodes = NULL;
 	lru->pre_node_evict = NULL;
 	lru->post_node_evict = NULL;
+	lru->num_free = 0;
+	lru->num_used = 0;
 }
 
 static inline
@@ -74,6 +78,7 @@ void lru_add_free(Lru *lru, LruNode *node)
 {
 	node->next_bin.tqe_circ.tql_prev = NULL;
 	QTAILQ_INSERT_TAIL(&lru->global, node, next_global);
+	lru->num_free += 1;
 }
 
 static inline
@@ -106,29 +111,51 @@ void lru_evict_node(Lru *lru, LruNode *node)
 	if (lru->post_node_evict) {
 		lru->post_node_evict(lru, node);
 	}
+
+	lru->num_used -= 1;
+	lru->num_free += 1;
 }
 
 static inline
-LruNode *lru_evict_one(Lru *lru)
+LruNode *lru_try_evict_one(Lru *lru)
 {
 	LruNode *found;
 
 	QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
-		bool can_evict = true;
-		if (lru_is_node_in_use(lru, found) && lru->pre_node_evict) {
-			can_evict = lru->pre_node_evict(lru, found);
-		}
-		if (can_evict) {
-			break;
+		if (lru_is_node_in_use(lru, found)
+			&& (!lru->pre_node_evict || lru->pre_node_evict(lru, found))) {
+			lru_evict_node(lru, found);
+			return found;
 		}
 	}
 
+	return NULL;
+}
+
+static inline
+LruNode *lru_evict_one(Lru *lru)
+{
+	LruNode *found = lru_try_evict_one(lru);
+
 	assert(found != NULL); /* No evictable node! */
 
-	lru_evict_node(lru, found);
 	return found;
 }
 
+static inline
+LruNode *lru_get_one_free(Lru *lru)
+{
+	LruNode *found;
+
+	QTAILQ_FOREACH_REVERSE(found, &lru->global, next_global) {
+		if (!lru_is_node_in_use(lru, found)) {
+			return found;
+		}
+	}
+
+	return lru_evict_one(lru);
+}
+
 static inline
 bool lru_contains_hash(Lru *lru, uint64_t hash)
 {
@@ -160,12 +187,15 @@ LruNode *lru_lookup(Lru *lru, uint64_t hash, void *key)
 	if (found) {
 		QTAILQ_REMOVE(&lru->bins[bin], found, next_bin);
 	} else {
-		found = lru_evict_one(lru);
+		found = lru_get_one_free(lru);
 		found->hash = hash;
 		if (lru->init_node) {
 			lru->init_node(lru, found, key);
 		}
 		assert(found->hash == hash);
+
+		lru->num_used += 1;
+		lru->num_free -= 1;
 	}
 
 	QTAILQ_REMOVE(&lru->global, found, next_global);
diff --git a/include/qemu/mstring.h b/include/qemu/mstring.h
new file mode 100644
index 00000000000..567fd4cdf38
--- /dev/null
+++ b/include/qemu/mstring.h
@@ -0,0 +1,82 @@
+#ifndef MSTRING_H
+#define MSTRING_H
+
+#include "qemu/osdep.h"
+#include <string.h>
+
+typedef struct {
+   int ref;
+   gchar *string;
+} MString;
+
+void mstring_append_fmt(MString *mstring, const char *fmt, ...);
+MString *mstring_from_fmt(const char *fmt, ...);
+void mstring_append_va(MString *mstring, const char *fmt, va_list va);
+
+static inline
+void mstring_ref(MString *mstr)
+{
+   mstr->ref++;
+}
+
+static inline
+void mstring_unref(MString *mstr)
+{
+   mstr->ref--;
+   if (!mstr->ref) {
+      g_free(mstr->string);
+      g_free(mstr);
+   }
+}
+
+static inline
+void mstring_append(MString *mstr, const char *str)
+{
+   gchar *n = g_strconcat(mstr->string, str, NULL);
+   g_free(mstr->string);
+   mstr->string = n;
+}
+
+static inline
+void mstring_append_chr(MString *mstr, char chr)
+{
+   mstring_append_fmt(mstr, "%c", chr);
+}
+
+static inline
+void mstring_append_int(MString *mstr, int val)
+{
+   mstring_append_fmt(mstr, "%" PRId64, val);
+}
+
+static inline
+MString *mstring_new(void)
+{
+   MString *mstr = g_malloc(sizeof(MString));
+   mstr->ref = 1;
+   mstr->string = g_strdup("");
+   return mstr;
+}
+
+static inline
+MString *mstring_from_str(const char *str)
+{
+   MString *mstr = g_malloc(sizeof(MString));
+   mstr->ref = 1;
+   mstr->string = g_strdup(str);
+   return mstr;
+}
+
+static inline
+const gchar *mstring_get_str(MString *mstr)
+{
+   return mstr->string;
+}
+
+static inline
+size_t mstring_get_length(MString *mstr)
+{
+   return strlen(mstr->string);
+}
+
+#endif
diff --git a/licenses/SPIRV-Reflect.license.txt b/licenses/SPIRV-Reflect.license.txt
new file mode 100644
index 00000000000..261eeb9e9f8
--- /dev/null
+++ b/licenses/SPIRV-Reflect.license.txt
@@ -0,0 +1,201 @@
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/licenses/SPIRV-Tools.license.txt b/licenses/SPIRV-Tools.license.txt
new file mode 100644
index 00000000000..d6456956733
--- /dev/null
+++ b/licenses/SPIRV-Tools.license.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
diff --git a/licenses/VulkanMemoryAllocator.license.txt b/licenses/VulkanMemoryAllocator.license.txt
new file mode 100644
index 00000000000..b9fff388f1b
--- /dev/null
+++ b/licenses/VulkanMemoryAllocator.license.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2017-2024 Advanced Micro Devices, Inc. All rights reserved.
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
diff --git a/licenses/glslang.license.txt b/licenses/glslang.license.txt
new file mode 100644
index 00000000000..054e68a4614
--- /dev/null
+++ b/licenses/glslang.license.txt
@@ -0,0 +1,1016 @@
+Here, glslang proper means core GLSL parsing, HLSL parsing, and SPIR-V code
+generation. Glslang proper requires use of a number of licenses, one that covers
+preprocessing and others that covers non-preprocessing.
+
+Bison was removed long ago. You can build glslang from the source grammar,
+using tools of your choice, without using bison or any bison files.
+
+Other parts, outside of glslang proper, include:
+
+- gl_types.h, only needed for OpenGL-like reflection, and can be left out of
+  a parse and codegen project.  See it for its license.
+
+- update_glslang_sources.py, which is not part of the project proper and does
+  not need to be used.
+
+- the SPIR-V "remapper", which is optional, but has the same license as
+  glslang proper
+
+- Google tests and SPIR-V tools, and anything in the external subdirectory
+  are external and optional; see them for their respective licenses.
+
+--------------------------------------------------------------------------------
+
+The core of glslang-proper, minus the preprocessor is licenced as follows:
+
+--------------------------------------------------------------------------------
+3-Clause BSD License
+--------------------------------------------------------------------------------
+
+//
+// Copyright (C) 2015-2018 Google, Inc.
+// Copyright (C) <various other dates and companies>
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//
+//    Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//
+//    Redistributions in binary form must reproduce the above
+//    copyright notice, this list of conditions and the following
+//    disclaimer in the documentation and/or other materials provided
+//    with the distribution.
+//
+//    Neither the name of 3Dlabs Inc. Ltd. nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
+// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
+// COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
+// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
+// BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+// LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
+// ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+// POSSIBILITY OF SUCH DAMAGE.
+//
+
+
+--------------------------------------------------------------------------------
+2-Clause BSD License
+--------------------------------------------------------------------------------
+
+Copyright 2020 The Khronos Group Inc
+
+Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
+
+1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
+
+2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+
+--------------------------------------------------------------------------------
+The MIT License
+--------------------------------------------------------------------------------
+
+Copyright 2020 The Khronos Group Inc
+
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+
+--------------------------------------------------------------------------------
+APACHE LICENSE, VERSION 2.0
+--------------------------------------------------------------------------------
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+
+
+--------------------------------------------------------------------------------
+GPL 3 with special bison exception
+--------------------------------------------------------------------------------
+
+                    GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+Bison Exception
+
+As a special exception, you may create a larger work that contains part or all
+of the Bison parser skeleton and distribute that work under terms of your
+choice, so long as that work isn't itself a parser generator using the skeleton
+or a modified version thereof as a parser skeleton.  Alternatively, if you
+modify or redistribute the parser skeleton itself, you may (at your option)
+remove this special exception, which will cause the skeleton and the resulting
+Bison output files to be licensed under the GNU General Public License without
+this special exception.
+
+This special exception was added by the Free Software Foundation in version
+2.2 of Bison.
+
+                     END OF TERMS AND CONDITIONS
+
+--------------------------------------------------------------------------------
+================================================================================
+--------------------------------------------------------------------------------
+
+The preprocessor has the core licenses stated above, plus additional licences:
+
+/****************************************************************************\
+Copyright (c) 2002, NVIDIA Corporation.
+
+NVIDIA Corporation("NVIDIA") supplies this software to you in
+consideration of your agreement to the following terms, and your use,
+installation, modification or redistribution of this NVIDIA software
+constitutes acceptance of these terms.  If you do not agree with these
+terms, please do not use, install, modify or redistribute this NVIDIA
+software.
+
+In consideration of your agreement to abide by the following terms, and
+subject to these terms, NVIDIA grants you a personal, non-exclusive
+license, under NVIDIA's copyrights in this original NVIDIA software (the
+"NVIDIA Software"), to use, reproduce, modify and redistribute the
+NVIDIA Software, with or without modifications, in source and/or binary
+forms; provided that if you redistribute the NVIDIA Software, you must
+retain the copyright notice of NVIDIA, this notice and the following
+text and disclaimers in all such redistributions of the NVIDIA Software.
+Neither the name, trademarks, service marks nor logos of NVIDIA
+Corporation may be used to endorse or promote products derived from the
+NVIDIA Software without specific prior written permission from NVIDIA.
+Except as expressly stated in this notice, no other rights or licenses
+express or implied, are granted by NVIDIA herein, including but not
+limited to any patent rights that may be infringed by your derivative
+works or by other works in which the NVIDIA Software may be
+incorporated. No hardware is licensed hereunder.
+
+THE NVIDIA SOFTWARE IS BEING PROVIDED ON AN "AS IS" BASIS, WITHOUT
+WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR IMPLIED,
+INCLUDING WITHOUT LIMITATION, WARRANTIES OR CONDITIONS OF TITLE,
+NON-INFRINGEMENT, MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE, OR
+ITS USE AND OPERATION EITHER ALONE OR IN COMBINATION WITH OTHER
+PRODUCTS.
+
+IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT,
+INCIDENTAL, EXEMPLARY, CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, LOST PROFITS; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
+USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) OR ARISING IN ANY WAY
+OUT OF THE USE, REPRODUCTION, MODIFICATION AND/OR DISTRIBUTION OF THE
+NVIDIA SOFTWARE, HOWEVER CAUSED AND WHETHER UNDER THEORY OF CONTRACT,
+TORT (INCLUDING NEGLIGENCE), STRICT LIABILITY OR OTHERWISE, EVEN IF
+NVIDIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+\****************************************************************************/
+
+/*
+** Copyright (c) 2014-2016 The Khronos Group Inc.
+**
+** Permission is hereby granted, free of charge, to any person obtaining a copy
+** of this software and/or associated documentation files (the "Materials"),
+** to deal in the Materials without restriction, including without limitation
+** the rights to use, copy, modify, merge, publish, distribute, sublicense,
+** and/or sell copies of the Materials, and to permit persons to whom the
+** Materials are furnished to do so, subject to the following conditions:
+**
+** The above copyright notice and this permission notice shall be included in
+** all copies or substantial portions of the Materials.
+**
+** MODIFICATIONS TO THIS FILE MAY MEAN IT NO LONGER ACCURATELY REFLECTS KHRONOS
+** STANDARDS. THE UNMODIFIED, NORMATIVE VERSIONS OF KHRONOS SPECIFICATIONS AND
+** HEADER INFORMATION ARE LOCATED AT https://www.khronos.org/registry/
+**
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+** OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+** FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+** THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+** LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+** FROM,OUT OF OR IN CONNECTION WITH THE MATERIALS OR THE USE OR OTHER DEALINGS
+** IN THE MATERIALS.
+*/
diff --git a/licenses/volk.license.txt b/licenses/volk.license.txt
new file mode 100644
index 00000000000..5a717f26780
--- /dev/null
+++ b/licenses/volk.license.txt
@@ -0,0 +1,19 @@
+Copyright (c) 2018-2024 Arseny Kapoulkine
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
diff --git a/meson.build b/meson.build
index a6fd6fa5a3b..a94c30d1687 100644
--- a/meson.build
+++ b/meson.build
@@ -1173,6 +1173,41 @@ if not get_option('opengl').auto() or have_system or have_vhost_user_gpu
                               link_args: config_host['EPOXY_LIBS'].split() + opengl_libs)
 endif
 
+vulkan = not_found
+libglslang = not_found
+
+if targetos == 'windows'
+  vulkan = declare_dependency(compile_args: ['-DVK_USE_PLATFORM_WIN32_KHR'])
+  libglslang = declare_dependency(link_args: [
+        '-lglslang',
+        '-lMachineIndependent',
+        '-lGenericCodeGen',
+        '-lSPIRV',
+        '-lSPIRV-Tools',
+        '-lSPIRV-Tools-opt'
+      ])
+elif targetos == 'linux'
+  vulkan = dependency('vulkan')
+endif
+
+if vulkan.found() and not libglslang.found()
+  cmake = import('cmake')
+  # FIXME: Get spirv-tools to enable opt.
+  glslang_opts = cmake.subproject_options()
+  glslang_opts.add_cmake_defines({'ENABLE_OPT': false})
+  glslang_subpro = cmake.subproject('glslang', options: glslang_opts)
+  libglslang = declare_dependency(link_with: [
+        glslang_subpro.target('glslang'),
+        glslang_subpro.target('MachineIndependent'),
+        glslang_subpro.target('GenericCodeGen'),
+        glslang_subpro.target('SPIRV'),
+      ], include_directories: ['subprojects' / 'glslang']
+    )
+endif
+
+subdir('thirdparty')
+
+
 gbm = not_found
 if (have_system or have_tools) and (virgl.found() or opengl.found())
   gbm = dependency('gbm', method: 'pkg-config', required: false,
@@ -1924,6 +1959,7 @@ config_host_data.set('CONFIG_LINUX_IO_URING', linux_io_uring.found())
 config_host_data.set('CONFIG_LIBPMEM', libpmem.found())
 config_host_data.set('CONFIG_NUMA', numa.found())
 config_host_data.set('CONFIG_OPENGL', opengl.found())
+config_host_data.set('CONFIG_VULKAN', vulkan.found())
 config_host_data.set('CONFIG_PROFILER', get_option('profiler'))
 config_host_data.set('CONFIG_RBD', rbd.found())
 config_host_data.set('CONFIG_RDMA', rdma.found())
@@ -4047,6 +4083,7 @@ summary_info += {'U2F support':       u2f}
 summary_info += {'libusb':            libusb}
 summary_info += {'usb net redir':     usbredir}
 summary_info += {'OpenGL support (epoxy)': opengl}
+summary_info += {'Vulkan support':    vulkan}
 summary_info += {'GBM':               gbm}
 summary_info += {'libiscsi support':  libiscsi}
 summary_info += {'libnfs support':    libnfs}
diff --git a/scripts/archive-source.sh b/scripts/archive-source.sh
index 0496ebeb6c7..e20751e50ef 100755
--- a/scripts/archive-source.sh
+++ b/scripts/archive-source.sh
@@ -28,8 +28,12 @@ sub_file="${sub_tdir}/submodule.tar"
 # different to the host OS.
 submodules="dtc meson ui/keycodemapdb"
 submodules="$submodules tests/fp/berkeley-softfloat-3 tests/fp/berkeley-testfloat-3"
-submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig" # xemu extras
-submodules="$submodules hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu"
+
+# xemu extras
+submodules="$submodules ui/thirdparty/imgui ui/thirdparty/implot ui/thirdparty/httplib util/xxHash tomlplusplus genconfig"
+submodules="$submodules hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu"
+submodules="$submodules thirdparty/volk thirdparty/VulkanMemoryAllocator thirdparty/SPIRV-Reflect"
+
 sub_deinit=""
 
 function cleanup() {
diff --git a/scripts/gen-license.py b/scripts/gen-license.py
index b71d4ecd56a..88733871ad7 100755
--- a/scripts/gen-license.py
+++ b/scripts/gen-license.py
@@ -228,7 +228,25 @@ def head(self):
 Lib('nv2a_vsh_cpu', 'https://github.com/abaire/nv2a_vsh_cpu',
 	unlicense, 'https://raw.githubusercontent.com/abaire/nv2a_vsh_cpu/main/LICENSE',
 	ships_static=all_platforms,
-	submodule=Submodule('hw/xbox/nv2a/thirdparty/nv2a_vsh_cpu')
+	submodule=Submodule('hw/xbox/nv2a/pgraph/thirdparty/nv2a_vsh_cpu')
+	),
+
+Lib('volk', 'https://github.com/zeux/volk',
+	mit, 'https://raw.githubusercontent.com/zeux/volk/master/LICENSE.md',
+	ships_static=all_platforms,
+	submodule=Submodule('thirdparty/volk')
+	),
+
+Lib('VulkanMemoryAllocator', 'https://github.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator',
+	mit, 'https://raw.githubusercontent.com/GPUOpen-LibrariesAndSDKs/VulkanMemoryAllocator/master/LICENSE.txt',
+	ships_static=all_platforms,
+	submodule=Submodule('thirdparty/VulkanMemoryAllocator')
+	),
+
+Lib('SPIRV-Reflect', 'https://github.com/KhronosGroup/SPIRV-Reflect',
+	apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Reflect/main/LICENSE',
+	ships_static=all_platforms,
+	submodule=Submodule('thirdparty/SPIRV-Reflect')
 	),
 
 #
@@ -344,6 +362,18 @@ def head(self):
 	ships_static={windows},	platform={windows},
 	version='2.1.0'
 	),
+
+Lib('glslang', 'https://github.com/KhronosGroup/glslang',
+	bsd_3clause, 'https://raw.githubusercontent.com/KhronosGroup/glslang/main/LICENSE.txt',
+	ships_static={windows},	platform={windows},
+	version='14.3.0'
+	),
+
+Lib('SPIRV-Tools', 'https://github.com/KhronosGroup/SPIRV-Tools',
+	apache2, 'https://raw.githubusercontent.com/KhronosGroup/SPIRV-Tools/main/LICENSE',
+	ships_static={windows},	platform={windows},
+	pkgconfig=PkgConfig('SPIRV-Tools')
+	),
 ]
 
 def gen_license():
diff --git a/subprojects/glslang.wrap b/subprojects/glslang.wrap
new file mode 100644
index 00000000000..7e6fe8414ef
--- /dev/null
+++ b/subprojects/glslang.wrap
@@ -0,0 +1,4 @@
+[wrap-git]
+url=https://github.com/KhronosGroup/glslang
+revision=vulkan-sdk-1.3.283.0
+depth=1
diff --git a/thirdparty/SPIRV-Reflect b/thirdparty/SPIRV-Reflect
new file mode 160000
index 00000000000..1d674a82d7e
--- /dev/null
+++ b/thirdparty/SPIRV-Reflect
@@ -0,0 +1 @@
+Subproject commit 1d674a82d7e102ed0c02e64e036827db9e8b1a71
diff --git a/thirdparty/VulkanMemoryAllocator b/thirdparty/VulkanMemoryAllocator
new file mode 160000
index 00000000000..009ecd192c1
--- /dev/null
+++ b/thirdparty/VulkanMemoryAllocator
@@ -0,0 +1 @@
+Subproject commit 009ecd192c1289c7529bff248a16cfe896254816
diff --git a/thirdparty/meson.build b/thirdparty/meson.build
new file mode 100644
index 00000000000..8bd29db98ef
--- /dev/null
+++ b/thirdparty/meson.build
@@ -0,0 +1,26 @@
+if vulkan.found()
+
+libvolk = static_library('volk', sources: 'volk/volk.c', c_args: ['-DVK_NO_PROTOTYPES'], dependencies: vulkan)
+volk = declare_dependency(compile_args: ['-DVK_NO_PROTOTYPES'], include_directories: 'volk', link_with: libvolk, dependencies: vulkan)
+
+debug_vma = false
+
+vma_defns = [
+	'-DVMA_STATIC_VULKAN_FUNCTIONS=0',
+	'-DVMA_DYNAMIC_VULKAN_FUNCTIONS=0',
+	]
+
+if debug_vma
+	vma_defns += [
+		'-DVMA_DEBUG_MARGIN=16',
+		'-DVMA_DEBUG_DETECT_CORRUPTION=1',
+		'-DVMA_DEBUG_MIN_BUFFER_IMAGE_GRANULARITY=256',
+	]
+endif
+libvma = static_library('vma', sources: 'vma.cc', cpp_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', dependencies: [vulkan, volk])
+vma = declare_dependency(compile_args: vma_defns, include_directories: 'VulkanMemoryAllocator/include', link_with: libvma)
+
+libspirv_reflect = static_library('spirv_reflect', sources: 'SPIRV-Reflect/spirv_reflect.c', dependencies: vulkan)
+spirv_reflect = declare_dependency(include_directories: 'SPIRV-Reflect', link_with: libspirv_reflect, dependencies: vulkan)
+
+endif
diff --git a/thirdparty/renderdoc_app.h b/thirdparty/renderdoc_app.h
index 7ee24b69eed..c01e05932e2 100644
--- a/thirdparty/renderdoc_app.h
+++ b/thirdparty/renderdoc_app.h
@@ -1,7 +1,7 @@
 /******************************************************************************
  * The MIT License (MIT)
  *
- * Copyright (c) 2019-2022 Baldur Karlsson
+ * Copyright (c) 2019-2024 Baldur Karlsson
  *
  * Permission is hereby granted, free of charge, to any person obtaining a copy
  * of this software and associated documentation files (the "Software"), to deal
@@ -35,7 +35,7 @@
 
 #if defined(WIN32) || defined(__WIN32__) || defined(_WIN32) || defined(_MSC_VER)
 #define RENDERDOC_CC __cdecl
-#elif defined(__linux__)
+#elif defined(__linux__) || defined(__FreeBSD__)
 #define RENDERDOC_CC
 #elif defined(__APPLE__)
 #define RENDERDOC_CC
@@ -72,7 +72,8 @@ extern "C" {
 // RenderDoc capture options
 //
 
-typedef enum RENDERDOC_CaptureOption {
+typedef enum RENDERDOC_CaptureOption
+{
   // Allow the application to enable vsync
   //
   // Default - enabled
@@ -214,6 +215,19 @@ typedef enum RENDERDOC_CaptureOption {
   // necessary as directed by a RenderDoc developer.
   eRENDERDOC_Option_AllowUnsupportedVendorExtensions = 12,
 
+  // Define a soft memory limit which some APIs may aim to keep overhead under where
+  // possible. Anything above this limit will where possible be saved directly to disk during
+  // capture.
+  // This will cause increased disk space use (which may cause a capture to fail if disk space is
+  // exhausted) as well as slower capture times.
+  //
+  // Not all memory allocations may be deferred like this so it is not a guarantee of a memory
+  // limit.
+  //
+  // Units are in MBs, suggested values would range from 200MB to 1000MB.
+  //
+  // Default - 0 Megabytes
+  eRENDERDOC_Option_SoftMemoryLimit = 13,
 } RENDERDOC_CaptureOption;
 
 // Sets an option that controls how RenderDoc behaves on capture.
@@ -233,7 +247,8 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionU32)(RENDERDOC_Capture
 // If the option is invalid, -FLT_MAX is returned
 typedef float(RENDERDOC_CC *pRENDERDOC_GetCaptureOptionF32)(RENDERDOC_CaptureOption opt);
 
-typedef enum RENDERDOC_InputButton {
+typedef enum RENDERDOC_InputButton
+{
   // '0' - '9' matches ASCII values
   eRENDERDOC_Key_0 = 0x30,
   eRENDERDOC_Key_1 = 0x31,
@@ -321,7 +336,8 @@ typedef void(RENDERDOC_CC *pRENDERDOC_SetFocusToggleKeys)(RENDERDOC_InputButton
 // If keys is NULL or num is 0, captures keys will be disabled
 typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureKeys)(RENDERDOC_InputButton *keys, int num);
 
-typedef enum RENDERDOC_OverlayBits {
+typedef enum RENDERDOC_OverlayBits
+{
   // This single bit controls whether the overlay is enabled or disabled globally
   eRENDERDOC_Overlay_Enabled = 0x1,
 
@@ -452,6 +468,15 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_LaunchReplayUI)(uint32_t connectTarget
 // ignored and the others will be filled out.
 typedef void(RENDERDOC_CC *pRENDERDOC_GetAPIVersion)(int *major, int *minor, int *patch);
 
+// Requests that the replay UI show itself (if hidden or not the current top window). This can be
+// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle
+// showing the UI after making a capture.
+//
+// This will return 1 if the request was successfully passed on, though it's not guaranteed that
+// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current
+// target control connection to make such a request, or if there was another error
+typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)();
+
 //////////////////////////////////////////////////////////////////////////
 // Capturing functions
 //
@@ -525,14 +550,15 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_EndFrameCapture)(RENDERDOC_DevicePoint
 typedef uint32_t(RENDERDOC_CC *pRENDERDOC_DiscardFrameCapture)(RENDERDOC_DevicePointer device,
                                                                RENDERDOC_WindowHandle wndHandle);
 
-// Requests that the replay UI show itself (if hidden or not the current top window). This can be
-// used in conjunction with IsTargetControlConnected and LaunchReplayUI to intelligently handle
-// showing the UI after making a capture.
+// Only valid to be called between a call to StartFrameCapture and EndFrameCapture. Gives a custom
+// title to the capture produced which will be displayed in the UI.
 //
-// This will return 1 if the request was successfully passed on, though it's not guaranteed that
-// the UI will be on top in all cases depending on OS rules. It will return 0 if there is no current
-// target control connection to make such a request, or if there was another error
-typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)();
+// If multiple captures are ongoing, this title will be applied to the first capture to end after
+// this call. The second capture to end will have no title, unless this function is called again.
+//
+// Calling this function has no effect if no capture is currently running, and if it is called
+// multiple times only the last title will be used.
+typedef void(RENDERDOC_CC *pRENDERDOC_SetCaptureTitle)(const char *title);
 
 //////////////////////////////////////////////////////////////////////////////////////////////////
 // RenderDoc API versions
@@ -547,7 +573,8 @@ typedef uint32_t(RENDERDOC_CC *pRENDERDOC_ShowReplayUI)();
 // Note that this means the API returned can be higher than the one you might have requested.
 // e.g. if you are running against a newer RenderDoc that supports 1.0.1, it will be returned
 // instead of 1.0.0. You can check this with the GetAPIVersion entry point
-typedef enum RENDERDOC_Version {
+typedef enum RENDERDOC_Version
+{
   eRENDERDOC_API_Version_1_0_0 = 10000,    // RENDERDOC_API_1_0_0 = 1 00 00
   eRENDERDOC_API_Version_1_0_1 = 10001,    // RENDERDOC_API_1_0_1 = 1 00 01
   eRENDERDOC_API_Version_1_0_2 = 10002,    // RENDERDOC_API_1_0_2 = 1 00 02
@@ -560,6 +587,7 @@ typedef enum RENDERDOC_Version {
   eRENDERDOC_API_Version_1_4_1 = 10401,    // RENDERDOC_API_1_4_1 = 1 04 01
   eRENDERDOC_API_Version_1_4_2 = 10402,    // RENDERDOC_API_1_4_2 = 1 04 02
   eRENDERDOC_API_Version_1_5_0 = 10500,    // RENDERDOC_API_1_5_0 = 1 05 00
+  eRENDERDOC_API_Version_1_6_0 = 10600,    // RENDERDOC_API_1_6_0 = 1 06 00
 } RENDERDOC_Version;
 
 // API version changelog:
@@ -588,8 +616,10 @@ typedef enum RENDERDOC_Version {
 // 1.4.1 - Refactor: Renamed Shutdown to RemoveHooks to better clarify what is happening
 // 1.4.2 - Refactor: Renamed 'draws' to 'actions' in callstack capture option.
 // 1.5.0 - Added feature: ShowReplayUI() to request that the replay UI show itself if connected
+// 1.6.0 - Added feature: SetCaptureTitle() which can be used to set a title for a
+//         capture made with StartFrameCapture() or EndFrameCapture()
 
-typedef struct RENDERDOC_API_1_5_0
+typedef struct RENDERDOC_API_1_6_0
 {
   pRENDERDOC_GetAPIVersion GetAPIVersion;
 
@@ -664,19 +694,23 @@ typedef struct RENDERDOC_API_1_5_0
 
   // new function in 1.5.0
   pRENDERDOC_ShowReplayUI ShowReplayUI;
-} RENDERDOC_API_1_5_0;
-
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_1;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_0_2;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_1;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_1_2;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_2_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_3_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_0;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_1;
-typedef RENDERDOC_API_1_5_0 RENDERDOC_API_1_4_2;
+
+  // new function in 1.6.0
+  pRENDERDOC_SetCaptureTitle SetCaptureTitle;
+} RENDERDOC_API_1_6_0;
+
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_0;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_1;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_0_2;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_0;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_1;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_1_2;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_2_0;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_3_0;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_0;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_1;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_4_2;
+typedef RENDERDOC_API_1_6_0 RENDERDOC_API_1_5_0;
 
 //////////////////////////////////////////////////////////////////////////////////////////////////
 // RenderDoc API entry point
diff --git a/thirdparty/vma.cc b/thirdparty/vma.cc
new file mode 100644
index 00000000000..a2023d33b25
--- /dev/null
+++ b/thirdparty/vma.cc
@@ -0,0 +1,2 @@
+#define VMA_IMPLEMENTATION
+#include "vk_mem_alloc.h"
diff --git a/thirdparty/volk b/thirdparty/volk
new file mode 160000
index 00000000000..466085407d5
--- /dev/null
+++ b/thirdparty/volk
@@ -0,0 +1 @@
+Subproject commit 466085407d5d2f50583fd663c1d65f93a7709d3e
diff --git a/ui/meson.build b/ui/meson.build
index 18bb7c97c18..75b82df9270 100644
--- a/ui/meson.build
+++ b/ui/meson.build
@@ -40,10 +40,6 @@ xemu_cocoa = dependency('appleframeworks', modules: 'Cocoa')
 xemu_ss.add(xemu_cocoa)
 endif
 
-if 'CONFIG_LINUX' in config_host
-xemu_ss.add(gtk)
-endif
-
 xemu_ss.add(when: 'CONFIG_LINUX', if_true: [gtk, files('xemu-os-utils-linux.c')])
 xemu_ss.add(when: 'CONFIG_WIN32', if_true: files('xemu-os-utils-windows.c'))
 xemu_ss.add(when: 'CONFIG_DARWIN', if_true: files('xemu-os-utils-macos.m'))
diff --git a/ui/xemu.c b/ui/xemu.c
index b06e19eb1c7..933d301a6fe 100644
--- a/ui/xemu.c
+++ b/ui/xemu.c
@@ -426,6 +426,7 @@ static void handle_keydown(SDL_Event *ev)
 {
     int win;
     struct sdl2_console *scon = get_scon_from_window(ev->key.windowID);
+    if (scon == NULL) return; 
     int gui_key_modifier_pressed = get_mod_state();
     int gui_keysym = 0;
 
@@ -484,6 +485,7 @@ static void handle_keydown(SDL_Event *ev)
 static void handle_keyup(SDL_Event *ev)
 {
     struct sdl2_console *scon = get_scon_from_window(ev->key.windowID);
+    if (!scon) return;
 
     scon->ignore_hotkeys = false;
     sdl2_process_key(scon, &ev->key);
@@ -944,7 +946,7 @@ static void sdl2_display_very_early_init(DisplayOptions *o)
     fprintf(stderr, "GL_SHADING_LANGUAGE_VERSION: %s\n", glGetString(GL_SHADING_LANGUAGE_VERSION));
 
     // Initialize offscreen rendering context now
-    nv2a_gl_context_init();
+    nv2a_context_init();
     SDL_GL_MakeCurrent(NULL, NULL);
 
     // FIXME: atexit(sdl_cleanup);
@@ -1207,6 +1209,7 @@ void sdl2_gl_refresh(DisplayChangeListener *dcl)
     qemu_mutex_unlock_main_loop();
 
     glFinish();
+    nv2a_release_framebuffer_surface();
     SDL_GL_SwapWindow(scon->real_window);
 
     /* VGA update (see note above) + vblank */
diff --git a/ui/xui/main-menu.cc b/ui/xui/main-menu.cc
index 75b88cafb6e..a9a6c6ec85d 100644
--- a/ui/xui/main-menu.cc
+++ b/ui/xui/main-menu.cc
@@ -449,7 +449,15 @@ void MainMenuInputView::Draw()
 
 void MainMenuDisplayView::Draw()
 {
-    SectionTitle("Quality");
+    SectionTitle("Renderer");
+    ChevronCombo("Backend", &g_config.display.renderer,
+                 "Null\0"
+                 "OpenGL\0"
+#ifdef CONFIG_VULKAN
+                 "Vulkan\0"
+#endif
+                 ,
+                 "Select desired renderer implementation");
     int rendering_scale = nv2a_get_surface_scale_factor() - 1;
     if (ChevronCombo("Internal resolution scale", &rendering_scale,
                      "1x\0"
diff --git a/ui/xui/main.cc b/ui/xui/main.cc
index fd38aa4e7bb..069a6282f9b 100644
--- a/ui/xui/main.cc
+++ b/ui/xui/main.cc
@@ -216,7 +216,7 @@ void xemu_hud_render(void)
     ImGui::NewFrame();
     ProcessKeyboardShortcuts();
 
-#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC)
+#if defined(CONFIG_RENDERDOC)
     if (g_capture_renderdoc_frame) {
         nv2a_dbg_renderdoc_capture_frames(1);
         g_capture_renderdoc_frame = false;
diff --git a/ui/xui/menubar.cc b/ui/xui/menubar.cc
index 2d1f48c6045..f0b6c1d5c27 100644
--- a/ui/xui/menubar.cc
+++ b/ui/xui/menubar.cc
@@ -71,8 +71,8 @@ void ProcessKeyboardShortcuts(void)
         ActionScreenshot();
     }
 
-#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC)
-    if (ImGui::IsKeyPressed(ImGuiKey_F10)) {
+#ifdef CONFIG_RENDERDOC
+    if (ImGui::IsKeyPressed(ImGuiKey_F10) && nv2a_dbg_renderdoc_available()) {
         nv2a_dbg_renderdoc_capture_frames(1);
     }
 #endif
@@ -168,6 +168,15 @@ void ShowMainMenu()
                     g_config.display.ui.scale = ui_scale_idx;
                 }
             }
+
+            ImGui::Combo("Backend", &g_config.display.renderer,
+                 "Null\0"
+                 "OpenGL\0"
+#ifdef CONFIG_VULKAN
+                 "Vulkan\0"
+#endif
+                );
+
             int rendering_scale = nv2a_get_surface_scale_factor() - 1;
             if (ImGui::Combo("Int. Resolution Scale", &rendering_scale,
                              "1x\0"
@@ -203,7 +212,7 @@ void ShowMainMenu()
             ImGui::MenuItem("Monitor", "~", &monitor_window.is_open);
             ImGui::MenuItem("Audio", NULL, &apu_window.m_is_open);
             ImGui::MenuItem("Video", NULL, &video_window.m_is_open);
-#if defined(DEBUG_NV2A_GL) && defined(CONFIG_RENDERDOC)
+#ifdef CONFIG_RENDERDOC
             if (nv2a_dbg_renderdoc_available()) {
                 ImGui::MenuItem("RenderDoc: Capture", NULL, &g_capture_renderdoc_frame);
             }
diff --git a/util/meson.build b/util/meson.build
index 4269ef4e38f..72ef1db2b57 100644
--- a/util/meson.build
+++ b/util/meson.build
@@ -59,6 +59,7 @@ util_ss.add(files('int128.c'))
 util_ss.add(files('memalign.c'))
 util_ss.add(when: 'CONFIG_WIN32', if_true: files('miniz/miniz.c'))
 util_ss.add(files('fast-hash.c'))
+util_ss.add(files('mstring.c'))
 
 if have_user
   util_ss.add(files('selfmap.c'))
diff --git a/util/mstring.c b/util/mstring.c
new file mode 100644
index 00000000000..6cd0af7335e
--- /dev/null
+++ b/util/mstring.c
@@ -0,0 +1,49 @@
+#include "qemu/osdep.h"
+#include "qemu/mstring.h"
+
+#include <stdarg.h>
+
+void mstring_append_fmt(MString *qstring, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    mstring_append_va(qstring, fmt, ap);
+    va_end(ap);
+}
+
+MString *mstring_from_fmt(const char *fmt, ...)
+{
+    MString *ret = mstring_new();
+    va_list ap;
+    va_start(ap, fmt);
+    mstring_append_va(ret, fmt, ap);
+    va_end(ap);
+
+    return ret;
+}
+
+void mstring_append_va(MString *qstring, const char *fmt, va_list va)
+{
+    char scratch[256];
+
+    va_list ap;
+    va_copy(ap, va);
+    const int len = vsnprintf(scratch, sizeof(scratch), fmt, ap);
+    va_end(ap);
+
+    if (len == 0) {
+        return;
+    } else if (len < sizeof(scratch)) {
+        mstring_append(qstring, scratch);
+        return;
+    }
+
+    /* overflowed out scratch buffer, alloc and try again */
+    char *buf = g_malloc(len + 1);
+    va_copy(ap, va);
+    vsnprintf(buf, len + 1, fmt, ap);
+    va_end(ap);
+
+    mstring_append(qstring, buf);
+    g_free(buf);
+}
diff --git a/xemu-version.c b/xemu-version.c
index 523d9557601..f2e7a958e9e 100644
--- a/xemu-version.c
+++ b/xemu-version.c
@@ -1,5 +1,8 @@
 #include "xemu-version-macro.h"
 
+const int xemu_version_major = XEMU_VERSION_MAJOR;
+const int xemu_version_minor = XEMU_VERSION_MINOR;
+const int xemu_version_patch = XEMU_VERSION_PATCH;
 const char *xemu_version = XEMU_VERSION;
 const char *xemu_branch  = XEMU_BRANCH;;
 const char *xemu_commit  = XEMU_COMMIT;
diff --git a/xemu-version.h b/xemu-version.h
index 484af8a9deb..a1fe27fccbc 100644
--- a/xemu-version.h
+++ b/xemu-version.h
@@ -1,6 +1,9 @@
 #ifndef XEMU_VERSION_H
 #define XEMU_VERSION_H
 
+extern const int xemu_version_major;
+extern const int xemu_version_minor;
+extern const int xemu_version_patch;
 extern const char *xemu_version;
 extern const char *xemu_branch;
 extern const char *xemu_commit;